Megatest

Diff
Login

Differences From Artifact [1ba5251696]:

To Artifact [d6f3c96faf]:


1
2
3

4
5
6

7
8

9
10
11
12
13
14
15
16
17








18
19
20
21
22
23
24
25
26
27
28

29
30
31
32
33
34
35

36
37







38
39
40
41
42
43
44
1
2

3
4
5

6
7

8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61


-
+


-
+

-
+









+
+
+
+
+
+
+
+











+







+


+
+
+
+
+
+
+








;; test-records is a hash table testname:item_path => vector < testname testconfig waitons priority items-info ... >
(define (runs:run-tests-queue-new run-id runname test-records keyvallst flags test-patts required-tests reglen)
(define (runs:run-tests-queue-new run-id runname test-records keyvals flags test-patts required-tests reglen)
  ;; At this point the list of parent tests is expanded 
  ;; NB// Should expand items here and then insert into the run queue.
  (debug:print 5 "test-records: " test-records ", keyvallst: " keyvallst " flags: " (hash-table->alist flags))
  (debug:print 5 "test-records: " test-records ", flags: " (hash-table->alist flags))
  (let ((run-info              (cdb:remote-run db:get-run-info #f run-id))
	(key-vals              (cdb:remote-run db:get-key-vals #f run-id))
	(tests-info            (mt:get-tests-for-run run-id #f '() '())) ;;  qryvals: "id,testname,item_path"))
	(sorted-test-names     (tests:sort-by-priority-and-waiton test-records))
	(test-registry         (make-hash-table))
	(registry-mutex        (make-mutex))
	(num-retries           0)
	(max-retries           (config-lookup *configdat* "setup" "maxretries"))
	(max-concurrent-jobs   (let ((mcj (config-lookup *configdat* "setup"     "max_concurrent_jobs")))
				 (if (and mcj (string->number mcj))
				     (string->number mcj)
				     1)))) ;; length of the register queue ahead
    ;; Initialize the test-registery hash with tests that already have a record
    (for-each (lambda (trec)
		(let ((id (db:test-get-id        trec))
		      (tn (db:test-get-testname  trec))
		      (ip (db:test-get-item-path trec))
		      (st (db:test-get-state     trec)))
		  (hash-table-set! test-registry (runs:make-full-test-name tn ip) (string->symbol st))))
	      tests-info)
    (set! max-retries (if (and max-retries (string->number max-retries))(string->number max-retries) 100))
    (if (not (null? sorted-test-names))
	(let loop ((hed         (car sorted-test-names))
		   (tal         (cdr sorted-test-names))
		   (reg         '()) ;; registered, put these at the head of tal 
		   (reruns      '()))
	  (if (not (null? reruns))(debug:print-info 4 "reruns=" reruns))
	  ;; (print "Top of loop, hed=" hed ", tal=" tal " ,reruns=" reruns)
	  (let* ((test-record (hash-table-ref test-records hed))
		 (test-name   (tests:testqueue-get-testname test-record))
		 (tconfig     (tests:testqueue-get-testconfig test-record))
		 (jobgroup    (config-lookup tconfig "requirements" "jobgroup"))
		 (testmode    (let ((m (config-lookup tconfig "requirements" "mode")))
				(if m (string->symbol m) 'normal)))
		 (waitons     (tests:testqueue-get-waitons    test-record))
		 (priority    (tests:testqueue-get-priority   test-record))
		 (itemdat     (tests:testqueue-get-itemdat    test-record)) ;; itemdat can be a string, list or #f
		 (items       (tests:testqueue-get-items      test-record))
		 (item-path   (item-list->path itemdat))
		 (tfullname   (runs:make-full-test-name test-name item-path))
		 (newtal      (append tal (list hed)))
		 (regfull     (> (length reg) reglen)))

	    ;; Fast skip of tests that are already "COMPLETED"
	    (if (equal? (hash-table-ref/default test-registry tfullname #f) 'COMPLETED)
		(begin
		  (debug:print-info 0 "Skipping COMPLETED test " tfullname)
		  (if (not (null? tal))
		      (loop (car tal)(cdr tal) reg reruns))))
	    ;; (if (> (length reg) 10)
	    ;;     (begin
	    ;;       (set! tal (cons hed tal))
	    ;;       (set! hed (car reg))
	    ;;       (set! reg (cdr reg))
	    ;;       (set! newtal tal)))
	    (debug:print 6
57
58
59
60
61
62
63
64

65
66

67

68
69
70
71
72
73

74
75
76
77
78
79
80
81
82






83
84
85
86
87
88
89
90

91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112



113
114
115
116
117

118
119
120


121
122

123
124
125
126
127
128
129
74
75
76
77
78
79
80

81
82
83
84

85
86
87
88
89
90

91
92
93
94






95
96
97
98
99
100
101
102
103
104
105
106
107

108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127



128
129
130
131
132
133
134

135
136


137
138
139

140
141
142
143
144
145
146
147







-
+


+
-
+





-
+



-
-
-
-
-
-
+
+
+
+
+
+







-
+



















-
-
-
+
+
+




-
+

-
-
+
+

-
+







	    (if (member test-name waitons)
		(begin
		  (debug:print 0 "ERROR: test " test-name " has listed itself as a waiton, please correct this!")
		  (set! waiton (filter (lambda (x)(not (equal? x hed))) waitons))))

	    (cond ;; OUTER COND
	     ((not items) ;; when false the test is ok to be handed off to launch (but not before)
	      (if (and (not (tests:match test-patts (tests:testqueue-get-testname test-record) item-path))
	      (if (and (not (tests:match test-patts (tests:testqueue-get-testname test-record) item-path required: required-tests))
	               (not (null? tal)))
	          (loop (car tal)(cdr tal) reg reruns))
	      (let* ((run-limits-info         (cdb:remote-run runs:can-run-more-tests #f jobgroup max-concurrent-jobs)) ;; look at the test jobgroup and tot jobs running
	      (let* ((run-limits-info         (runs:can-run-more-tests test-record max-concurrent-jobs)) ;; look at the test jobgroup and tot jobs running
		      ;; (open-run-close runs:can-run-more-tests #f jobgroup max-concurrent-jobs)) ;; look at the test jobgroup and tot jobs running
		     (have-resources          (car run-limits-info))
		     (num-running             (list-ref run-limits-info 1))
		     (num-running-in-jobgroup (list-ref run-limits-info 2))
		     (max-concurrent-jobs     (list-ref run-limits-info 3))
		     (job-group-limit         (list-ref run-limits-info 4))
		     (prereqs-not-met         (db:get-prereqs-not-met run-id waitons item-path mode: testmode))
		     (prereqs-not-met         (mt:get-prereqs-not-met run-id waitons item-path mode: testmode))
		     (fails                   (runs:calc-fails prereqs-not-met))
		     (non-completed           (runs:calc-not-completed prereqs-not-met)))
		(debug:print-info 8 "have-resources: " have-resources " prereqs-not-met: " 
				  (string-intersperse 
				   (map (lambda (t)
					  (if (vector? t)
					      (conc (db:test-get-state t) "/" (db:test-get-status t))
					      (conc " WARNING: t is not a vector=" t )))
					prereqs-not-met) ", ") " fails: " fails)
			     (string-intersperse 
			      (map (lambda (t)
				     (if (vector? t)
					 (conc (db:test-get-state t) "/" (db:test-get-status t))
					 (conc " WARNING: t is not a vector=" t )))
				   prereqs-not-met) ", ") " fails: " fails)
		(debug:print-info 4 "hed=" hed "\n  test-record=" test-record "\n  test-name: " test-name "\n  item-path: " item-path "\n  test-patts: " test-patts)

		;; Don't know at this time if the test have been launched at some time in the past
		;; i.e. is this a re-launch?
		(debug:print-info 4 "run-limits-info = " run-limits-info)
		(cond ;; INNER COND #1 for a launchable test
		 ;; Check item path against item-patts
		 ((not (tests:match test-patts (tests:testqueue-get-testname test-record) item-path)) ;; This test/itempath is not to be run
		 ((not (tests:match test-patts (tests:testqueue-get-testname test-record) item-path required: required-tests)) ;; This test/itempath is not to be run
		  ;; else the run is stuck, temporarily or permanently
		  ;; but should check if it is due to lack of resources vs. prerequisites
		  (debug:print-info 1 "Skipping " (tests:testqueue-get-testname test-record) " " item-path " as it doesn't match " test-patts)
		  ;; (thread-sleep! *global-delta*)
		  (if (not (null? tal))
		      (loop (runs:queue-next-hed tal reg reglen regfull)
			    (runs:queue-next-tal tal reg reglen regfull)
			    (runs:queue-next-reg tal reg reglen regfull)
			    reruns)))
		 ;; Registry has been started for this test but has not yet completed
		 ;; this should be rare, the case where there are only a couple of tests and the db is slow
		 ;; delay a short while and continue
		 ;; ((eq? (hash-table-ref/default test-registry (runs:make-full-test-name test-name item-path) #f) 'start)
		 ;;  (thread-sleep! 0.01)
		 ;;  (loop (car newtal)(cdr newtal) reruns))
		 ;; count number of 'done, if more than 100 then skip on through.
		 ((not (hash-table-ref/default test-registry (runs:make-full-test-name test-name item-path) #f)) ;; ) ;; too many changes required. Implement later.
		  (debug:print-info 4 "Pre-registering test " test-name "/" item-path " to create placeholder" )
		  (let ((th (make-thread (lambda ()
					   (mutex-lock! registry-mutex)
					   (hash-table-set! test-registry (runs:make-full-test-name test-name item-path) 'start)
					   (mutex-unlock! registry-mutex)
		        		   (mutex-lock! registry-mutex)
		        		   (hash-table-set! test-registry (runs:make-full-test-name test-name item-path) 'start)
		        		   (mutex-unlock! registry-mutex)
					   ;; If haven't done it before register a top level test if this is an itemized test
					   (if (not (eq? (hash-table-ref/default test-registry (runs:make-full-test-name test-name "") #f) 'done))
					       (cdb:tests-register-test *runremote* run-id test-name ""))
					   (cdb:tests-register-test *runremote* run-id test-name item-path)
					   (mutex-lock! registry-mutex)
		        		   (mutex-lock! registry-mutex)
					   (hash-table-set! test-registry (runs:make-full-test-name test-name item-path) 'done)
					   (mutex-unlock! registry-mutex))
					 (conc test-name "/" item-path))))
		        		   (mutex-unlock! registry-mutex))
		        		 (conc test-name "/" item-path))))
		    (thread-start! th))
		  (runs:shrink-can-run-more-tests-count)   ;; DELAY TWEAKER (still needed?)
		  (cdb:remote-run runs:shrink-can-run-more-tests-count #f)   ;; DELAY TWEAKER (still needed?)
		  (if (and (null? tal)(null? reg))
		      (loop hed tal reg reruns)
		      (loop (runs:queue-next-hed tal reg reglen regfull)
			    (runs:queue-next-tal tal reg reglen regfull)
			    (let ((newl (append reg (list hed))))
			      (if regfull 
				  (cdr newl)
146
147
148
149
150
151
152
153

154
155

156
157
158
159
160
161
162
164
165
166
167
168
169
170

171
172

173
174
175
176
177
178
179
180







-
+

-
+







		  (thread-sleep! 1) ;; (+ 2 *global-delta*))
		  ;; could have done hed tal here but doing car/cdr of newtal to rotate tests
		  (loop (car newtal)(cdr newtal) reg reruns))
		 ((and have-resources
		       (or (null? prereqs-not-met)
			   (and (eq? testmode 'toplevel)
				(null? non-completed))))
		  (run:test run-id run-info key-vals runname keyvallst test-record flags #f)
		  (run:test run-id run-info keyvals runname test-record flags #f)
		  (hash-table-set! test-registry (runs:make-full-test-name test-name item-path) 'running)
		  (runs:shrink-can-run-more-tests-count)  ;; DELAY TWEAKER (still needed?)
		  (cdb:remote-run runs:shrink-can-run-more-tests-count #f)  ;; DELAY TWEAKER (still needed?)
		  ;; (thread-sleep! *global-delta*)
		  (if (not (null? tal))
		      (loop (runs:queue-next-hed tal reg reglen regfull)
			    (runs:queue-next-tal tal reg reglen regfull)
			    (runs:queue-next-reg tal reg reglen regfull)
			    reruns)))
		 (else ;; must be we have unmet prerequisites
172
173
174
175
176
177
178
179

180
181
182
183
184
185
186
187
188

189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205

206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225

226
227
228

229
230
231
232
233
234
235
190
191
192
193
194
195
196

197
198
199
200
201
202
203
204
205

206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222

223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242

243
244
245

246
247
248
249
250
251
252
253







-
+








-
+
















-
+



















-
+


-
+







			(loop (car newtal)(cdr newtal) reg reruns))
		      ;; the waiton is FAIL so no point in trying to run hed ever again
		      (if (not (null? tal))
			  (if (vector? hed)
			      (begin 
				(debug:print 1 "WARN: Dropping test " (db:test-get-testname hed) "/" (db:test-get-item-path hed)
					     " from the launch list as it has prerequistes that are FAIL")
				(runs:shrink-can-run-more-tests-count) ;; DELAY TWEAKER (still needed?)
				(cdb:remote-run runs:shrink-can-run-more-tests-count #f) ;; DELAY TWEAKER (still needed?)
				;; (thread-sleep! *global-delta*)
				(hash-table-set! test-registry (runs:make-full-test-name test-name item-path) 'removed)
				(loop (runs:queue-next-hed tal reg reglen regfull)
				      (runs:queue-next-tal tal reg reglen regfull)
				      (runs:queue-next-reg tal reg reglen regfull)
				      (cons hed reruns)))
			      (begin
				(debug:print 1 "WARN: Test not processed correctly. Could be a race condition in your test implementation? " hed) ;;  " as it has prerequistes that are FAIL. (NOTE: hed is not a vector)")
				(runs:shrink-can-run-more-tests-count) ;; DELAY TWEAKER (still needed?)
				(cdb:remote-run runs:shrink-can-run-more-tests-count #f) ;; DELAY TWEAKER (still needed?)
				;; (thread-sleep! (+ 0.01 *global-delta*))
				(loop hed tal reg reruns))))))))) ;; END OF INNER COND
	     
	     ;; case where an items came in as a list been processed
	     ((and (list? items)     ;; thus we know our items are already calculated
		   (not   itemdat)) ;; and not yet expanded into the list of things to be done
	      (if (and (debug:debug-mode 1) ;; (>= *verbosity* 1)
		       (> (length items) 0)
		       (> (length (car items)) 0))
		  (pp items))
	      (for-each
	       (lambda (my-itemdat)
		 (let* ((new-test-record (let ((newrec (make-tests:testqueue)))
					   (vector-copy! test-record newrec)
					   newrec))
			(my-item-path (item-list->path my-itemdat)))
		   (if (tests:match test-patts hed my-item-path) ;; (patt-list-match my-item-path item-patts)           ;; yes, we want to process this item, NOTE: Should not need this check here!
		   (if (tests:match test-patts hed my-item-path required: required-tests) ;; (patt-list-match my-item-path item-patts)           ;; yes, we want to process this item, NOTE: Should not need this check here!
		       (let ((newtestname (runs:make-full-test-name hed my-item-path)))    ;; test names are unique on testname/item-path
			 (tests:testqueue-set-items!     new-test-record #f)
			 (tests:testqueue-set-itemdat!   new-test-record my-itemdat)
			 (tests:testqueue-set-item_path! new-test-record my-item-path)
			 (hash-table-set! test-records newtestname new-test-record)
			 (set! tal (cons newtestname tal)))))) ;; since these are itemized create new test names testname/itempath
	       items)
	      (if (not (null? tal))
		  (begin
		    (debug:print-info 4 "End of items list, looping with next after short delay")
		    ;; (thread-sleep! (+ 0.01 *global-delta*))
		    (loop (runs:queue-next-hed tal reg reglen regfull)
			  (runs:queue-next-tal tal reg reglen regfull)
			  (runs:queue-next-reg tal reg reglen regfull)
			  reruns))))

	     ;; if items is a proc then need to run items:get-items-from-config, get the list and loop 
	     ;;    - but only do that if resources exist to kick off the job
	     ((or (procedure? items)(eq? items 'have-procedure))
	      (let ((can-run-more    (runs:can-run-more-tests test-record max-concurrent-jobs)))
	      (let ((can-run-more    (cdb:remote-run runs:can-run-more-tests #f jobgroup max-concurrent-jobs)))
		(if (and (list? can-run-more)
			 (car can-run-more))
		    (let* ((prereqs-not-met (db:get-prereqs-not-met run-id waitons item-path mode: testmode))
		    (let* ((prereqs-not-met (mt:get-prereqs-not-met run-id waitons item-path mode: testmode))
			   (fails           (runs:calc-fails prereqs-not-met))
			   (non-completed   (runs:calc-not-completed prereqs-not-met)))
		      (debug:print-info 8 "can-run-more: " can-run-more
					"\n testname:        " hed
					"\n prereqs-not-met: " (runs:pretty-string prereqs-not-met)
					"\n non-completed:   " (runs:pretty-string non-completed) 
					"\n fails:           " (runs:pretty-string fails)
245
246
247
248
249
250
251
252

253
254
255
256
257
258
259
263
264
265
266
267
268
269

270
271
272
273
274
275
276
277







-
+







		       ((or (null? prereqs-not-met) ;; all prereqs met, fire off the test
			    ;; or, if it is a 'toplevel test and all prereqs not met are COMPLETED then launch
			    (and (eq? testmode 'toplevel)
				 (null? non-completed)))
			(let ((test-name (tests:testqueue-get-testname test-record)))
			  (setenv "MT_TEST_NAME" test-name) ;; 
			  (setenv "MT_RUNNAME"   runname)
			  (set-megatest-env-vars run-id) ;; these may be needed by the launching process
			  (set-megatest-env-vars run-id inrunname: runname) ;; these may be needed by the launching process
			  (let ((items-list (items:get-items-from-config tconfig)))
			    (if (list? items-list)
				(begin
				  (tests:testqueue-set-items! test-record items-list)
				  ;; (thread-sleep! *global-delta*)
				  (loop hed tal reg reruns))
				(begin
273
274
275
276
277
278
279
280
281


282
283
284
285
286
287
288
291
292
293
294
295
296
297


298
299
300
301
302
303
304
305
306







-
-
+
+







				(loop (runs:queue-next-hed tal reg reglen regfull)
				      (runs:queue-next-tal tal reg reglen regfull)
				      (runs:queue-next-reg tal reg reglen regfull)
				      reruns))
			    (loop (car newtal)(cdr newtal) reg reruns))) ;; an issue with prereqs not yet met?
		       ((and (not (null? fails))(eq? testmode 'normal))
			(debug:print-info 1 "test "  hed " (mode=" testmode ") has failed prerequisite(s); "
					  (string-intersperse (map (lambda (t)(conc (db:test-get-testname t) ":" (db:test-get-state t)"/"(db:test-get-status t))) fails) ", ")
					  ", removing it from to-do list")
				     (string-intersperse (map (lambda (t)(conc (db:test-get-testname t) ":" (db:test-get-state t)"/"(db:test-get-status t))) fails) ", ")
				     ", removing it from to-do list")
			(if (not (null? tal))
			    (begin
			      ;; (thread-sleep! *global-delta*)
			      (loop (runs:queue-next-hed tal reg reglen regfull)
				    (runs:queue-next-tal tal reg reglen regfull)
				    (runs:queue-next-reg tal reg reglen regfull)
				    (cons hed reruns)))))