Megatest

Diff
Login

Differences From Artifact [dbe1379c23]:

To Artifact [0cd899f860]:


27
28
29
30
31
32
33

34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52

53
54
55
56
57
58
59
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61







+



















+







(declare (uses tests))
(declare (uses server))
(declare (uses mt))
(declare (uses archive))
(declare (uses mtargs))
(declare (uses rmtmod))
(declare (uses dbfile))
(declare (uses tcp-transportmod))

(use (prefix sqlite3 sqlite3:) srfi-1 posix regex regex-case srfi-69 (srfi 18) 
     posix-extras directory-utils pathname-expand typed-records format  sxml-serializer
     sxml-modifications matchable)



(include "common_records.scm")
(include "key_records.scm")
(include "db_records.scm")
(include "run_records.scm")
(include "test_records.scm")

;; (include "debugger.scm")

(import commonmod
	debugprint
	rmtmod
	dbfile
	tcp-transportmod
	(prefix mtargs args:))

;; use this struct to facilitate refactoring
;;

(defstruct runs:dat
  reglen regfull
809
810
811
812
813
814
815
816

817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836


837
838
839
840
841
842
843
811
812
813
814
815
816
817

818









819
820

821
822
823
824
825
826


827
828
829
830
831
832
833
834
835







-
+
-
-
-
-
-
-
-
-
-


-






-
-
+
+








    (if (not (null? required-tests))
	(debug:print-info 1 *default-log-port* "Adding \"" (string-intersperse required-tests " ") "\" to the run queue"))
    ;; NOTE: these are all parent tests, items are not expanded yet.
    (debug:print-info 4 *default-log-port* "test-records=" (hash-table->alist test-records))
    (let ((reglen (configf:lookup *configdat* "setup" "runqueue")))
      (if (> (length (hash-table-keys test-records)) 0)
	  (let* ((keep-going        #t)
	  (let* ()
		 (run-queue-retries 5)
		 (run-ids (rmt:get-all-run-ids)))
	    #;(for-each (lambda (run-id)
			(if keep-going
			    (handle-exceptions
				exn
			      (debug:print 0 *default-log-port* "error in calling find-and-mark-incomplete for run-id " run-id ", exn=" exn)
			      (rmt:find-and-mark-incomplete run-id #f)))) ;; ovr-deadtime))) ;; could be root of https://hsdes.intel.com/appstore/article/#/220546828/main -- Title: Megatest jobs show DEAD even though they are still running (1.64/27)
		      run-ids)
	    (runs:run-tests-queue run-id runname test-records keyvals flags test-patts required-tests
				  (any->number reglen) all-tests-registry)
	    (set! keep-going #f)
	    (if (> run-count 0) ;; handle reruns
		(begin
		  (if (not (hash-table-ref/default flags "-preclean" #f))
		      (hash-table-set! flags "-preclean" #t))
		  (if (not (hash-table-ref/default flags "-rerun" #f))
		      (hash-table-set! flags "-rerun" "ABORT,STUCK/DEAD,n/a,ZERO_ITEMS"))
      (runs:run-tests target runname test-patts user flags run-count: (- run-count 1)))
                (launch:end-of-run-check run-id)))
                  (runs:run-tests target runname test-patts user flags run-count: (- run-count 1)))
                 (launch:end-of-run-check run-id)))
	  (debug:print-info 0 *default-log-port* "No tests to run")))
    (debug:print-info 4 *default-log-port* "All done by here")
    ;; TODO: try putting post hook call here
      
    ;  (debug:print-info 2 *default-log-port* " run-count " run-count)
    ;  (runs:run-post-hook run-id))
    ;  (debug:print-info 2 *default-log-port* "Not calling post hook runcount = " run-count ))   
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208







1209
1210
1211
1212
1213
1214
1215
1191
1192
1193
1194
1195
1196
1197



1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211







-
-
-
+
+
+
+
+
+
+







	   (if (and (not (common:on-homehost?))
		    maxload) ;; only gate if maxload is specified, NOTE: maxload is normalized, i.e. load=1 means all cpus fully utilized
	       (common:wait-for-normalized-load maxload "Waiting for load to drop before starting more tests" #f))
	   
	   ;; jobtools maxhomehostload is intended to prevent overloading on the homehost which can cause database corruption issues
	   (if maxhomehostload
	       (common:wait-for-homehost-load maxhomehostload
					      (conc "Waiting for homehost load to drop below normalized value of " maxhomehostload))))))
    
 
					      (conc "Waiting for homehost load to drop below normalized value of " maxhomehostload)))

	   ;; lastly lets check the servers are not overloaded by looking at threads
	   (tt:wait-on-server-load run-id *ttdat*)
	   
	   )))
    
    
    (if (and (not (null? prereqs-not-met))
	     (runs:lownoise (conc "waiting on tests " prereqs-not-met hed) 60))
	(debug:print-info 2 *default-log-port* "waiting on tests; " (string-intersperse (runs:mixed-list-testname-and-testrec->list-of-strings prereqs-not-met) ", ")))

    ;; Don't know at this time if the test have been launched at some time in the past
    ;; i.e. is this a re-launch?
1379
1380
1381
1382
1383
1384
1385



1386
1387



1388
1389
1390
1391
1392
1393
1394
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384


1385
1386
1387
1388
1389
1390
1391
1392
1393
1394







+
+
+
-
-
+
+
+







		      (if (eq? nth-try 'removed) ;; removed is removed - drop it NOW
			  (if (null? tal)
			      #f ;; yes, really
			      (list (car tal)(cdr tal) reg reruns))
			  (begin
			    (if (runs:lownoise (conc "FAILED prerequisites or other issue" hed) 60)
				(debug:print 0 *default-log-port* "WARNING: test " hed " has FAILED prerequisites or other issue. Internal state >" nth-try "< will be overridden and we'll retry."))
			    (let* ((test-id      (rmt:get-test-id run-id testname item-path))
				   (test-info    (rmt:get-testinfo-state-status run-id test-id)) ;; we need *current* info
				   (status       (db:test-status test-info)))
			    ;; was: (mt:test-set-state-status-by-testname run-id test-name item-path "NOT_STARTED" "KEEP_TRYING" #f)
                            (mt:test-set-state-status-by-testname-unless-completed run-id test-name item-path "COMPLETED" "PREQ_FAIL" #f)
			      (if (equal? status "KEEP_TRYING")
				  (mt:test-set-state-status-by-testname-unless-completed run-id test-name item-path "COMPLETED" "PREQ_FAIL" #f)
				  (mt:test-set-state-status-by-testname run-id test-name item-path "NOT_STARTED" "KEEP_TRYING" #f)))
			    (hash-table-set! test-registry hed 'removed) ;; was 0
                            (if (not (and (null? reg) (null? tal)))
                                (runs:loop-values tal reg reglen regfull reruns)
                                #f))))
		     (else
		      (if (runs:lownoise (conc "FAILED prerequitests and we tried" hed) 60)
			  (debug:print 0 *default-log-port* "WARNING: test " hed " has FAILED prerequitests and we've tried at least 10 times to run it. Giving up now."))