︙ | | |
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
|
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
|
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
-
-
-
+
+
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
-
-
+
+
+
+
-
+
-
+
-
+
-
+
-
+
|
(include "test_records.scm")
(define (runs:test-get-full-path test)
(let* ((testname (db:test-get-testname test))
(itempath (db:test-get-item-path test)))
(conc testname (if (equal? itempath "") "" (conc "(" itempath ")")))))
;; This is the *new* methodology. One record to inform them and in the chaos, organise them.
;;
(define (runs:create-run-record)
(let* ((mconfig (if *configdat*
*configdat*
(if (launch:setup-for-run)
*configdat*
(begin
(debug:print 0 "ERROR: Called setup in a non-megatest area, exiting")
(exit 1)))))
(runrec (runs:runrec-make-record))
(target (common:args-get-target))
(runname (or (args:get-arg "-runname")
(args:get-arg ":runname")))
(testpatt (or (args:get-arg "-testpatt")
(args:get-arg "-runtests")))
(keys (keys:config-get-fields mconfig))
(keyvals (keys:target->keyval keys target))
;;;;;; ;; This is the *new* methodology. One record to inform them and in the chaos, organise them.
;;;;;; ;;
;;;;;; (define (runs:create-run-record area-dat) ;; #!key (remote #f))
;;;;;; (let* ((remote (megatest:area-remote area-dat))
;;;;;; (configdat (megatest:area-configdat area-dat))
;;;;;; (toppath (megatest:area-path area-dat)))
;;;;;; (mconfig (if configdat
;;;;;; configdat
;;;;;; (if (launch:setup-for-run)
;;;;;; configdat
;;;;;; (begin
;;;;;; (debug:print 0 "ERROR: Called setup in a non-megatest area, exiting")
;;;;;; (exit 1)))))
;;;;;; (runrec (runs:runrec-make-record))
;;;;;; (target (common:args-get-target))
;;;;;; (runname (or (args:get-arg "-runname")
;;;;;; (args:get-arg ":runname")))
;;;;;; (testpatt (or (args:get-arg "-testpatt")
;;;;;; (args:get-arg "-runtests")))
;;;;;; (keys (keys:config-get-fields mconfig))
;;;;;; (keyvals (keys:target->keyval keys target))
(toppath *toppath*)
(envdat keyvals) ;; initial values start with keyvals
(runconfig #f)
;;;;;; (envdat keyvals) ;; initial values start with keyvals
;;;;;; (runconfig #f)
(serverdat (if (args:get-arg "-server")
*runremote*
#f)) ;; to be used later
(transport (or (args:get-arg "-transport") 'http))
(run-id #f))
;; Set all the environment vars we know so far, start with keys
(for-each (lambda (keyval)
(setenv (car keyval)(cadr keyval)))
keyvals)
;; Set up various and sundry known vars here
(setenv "MT_RUN_AREA_HOME" toppath)
(setenv "MT_RUNNAME" runname)
(setenv "MT_TARGET" target)
(setenv "MT_TESTSUITENAME" (common:get-testsuite-name))
(set! envdat (append
envdat
(list (list "MT_RUN_AREA_HOME" toppath)
(list "MT_RUNNAME" runname)
(list "MT_TARGET" target))))
;; Now can read the runconfigs file
;;
(set! runconfig (read-config (conc *toppath* "/runconfigs.config") #f #t sections: (list "default" target)))
(if (not (hash-table-ref/default runconfig (args:get-arg "-reqtarg") #f))
(begin
(debug:print 0 "ERROR: [" (args:get-arg "-reqtarg") "] not found in " runconfigf)
(if db (sqlite3:finalize! db))
(exit 1)))
;; Now have runconfigs data loaded, set environment vars
(for-each (lambda (section)
(for-each (lambda (varval)
(set! envdat (append envdat (list varval)))
(safe-setenv (car varval)(cadr varval)))
(configf:get-section runconfig section)))
(list "default" target))
(vector target runname testpatt keys keyvals envdat mconfig runconfig serverdat transport db toppath run-id)))
;;;;;; (transport (or (args:get-arg "-transport") 'http))
;;;;;; (run-id #f))
;;;;;; ;; Set all the environment vars we know so far, start with keys
;;;;;; (for-each (lambda (keyval)
;;;;;; (setenv (car keyval)(cadr keyval)))
;;;;;; keyvals)
;;;;;; ;; Set up various and sundry known vars here
;;;;;; (setenv "MT_RUN_AREA_HOME" toppath)
;;;;;; (setenv "MT_RUNNAME" runname)
;;;;;; (setenv "MT_TARGET" target)
;;;;;; (setenv "MT_TESTSUITENAME" (common:get-testsuite-name))
;;;;;; (set! envdat (append
;;;;;; envdat
;;;;;; (list (list "MT_RUN_AREA_HOME" toppath)
;;;;;; (list "MT_RUNNAME" runname)
;;;;;; (list "MT_TARGET" target))))
;;;;;; ;; Now can read the runconfigs file
;;;;;; ;;
;;;;;; (set! runconfig (read-config (conc toppath "/runconfigs.config") #f #t sections: (list "default" target)))
;;;;;; (if (not (hash-table-ref/default runconfig (args:get-arg "-reqtarg") #f))
;;;;;; (begin
;;;;;; (debug:print 0 "ERROR: [" (args:get-arg "-reqtarg") "] not found in " runconfigf)
;;;;;; (if db (sqlite3:finalize! db))
;;;;;; (exit 1)))
;;;;;; ;; Now have runconfigs data loaded, set environment vars
;;;;;; (for-each (lambda (section)
;;;;;; (for-each (lambda (varval)
;;;;;; (set! envdat (append envdat (list varval)))
;;;;;; (safe-setenv (car varval)(cadr varval)))
;;;;;; (configf:get-section runconfig section)))
;;;;;; (list "default" target))
;;;;;; (vector target runname testpatt keys keyvals envdat mconfig runconfig (common:get-remote remote run-id) transport db toppath run-id)))
(define (runs:set-megatest-env-vars run-id #!key (inkeys #f)(inrunname #f)(inkeyvals #f))
(let* ((target (or (common:args-get-target)
(define (runs:set-megatest-env-vars run-id area-dat #!key (inkeys #f)(inrunname #f)(inkeyvals #f))
(let* ((configdat (megatest:area-configdat area-dat))
(toppath (megatest:area-path area-dat))
(target (or (common:args-get-target)
(get-environment-variable "MT_TARGET")))
(keys (if inkeys inkeys (rmt:get-keys)))
(keys (if inkeys inkeys (rmt:get-keys area-dat)))
(keyvals (if inkeyvals inkeyvals (keys:target->keyval keys target)))
(vals (hash-table-ref/default *env-vars-by-run-id* run-id #f))
(link-tree (configf:lookup *configdat* "setup" "linktree")))
(link-tree (configf:lookup configdat "setup" "linktree")))
;; get the info from the db and put it in the cache
(if link-tree
(setenv "MT_LINKTREE" link-tree)
(debug:print 0 "ERROR: linktree not set, should be set in megatest.config in [setup] section."))
(if (not vals)
(let ((ht (make-hash-table)))
(hash-table-set! *env-vars-by-run-id* run-id ht)
(set! vals ht)
(for-each
(lambda (key)
(hash-table-set! vals (car key) (cadr key)))
keyvals)))
;; from the cached data set the vars
(hash-table-for-each
vals
(lambda (key val)
(debug:print 2 "setenv " key " " val)
(safe-setenv key val)))
(if (not (get-environment-variable "MT_TARGET"))(setenv "MT_TARGET" target))
(alist->env-vars (hash-table-ref/default *configdat* "env-override" '()))
(alist->env-vars (hash-table-ref/default configdat "env-override" '()))
;; Lets use this as an opportunity to put MT_RUNNAME in the environment
(let ((runname (if inrunname inrunname (rmt:get-run-name-from-id run-id))))
(let ((runname (if inrunname inrunname (rmt:get-run-name-from-id run-id area-dat))))
(if runname
(setenv "MT_RUNNAME" runname)
(debug:print 0 "ERROR: no value for runname for id " run-id)))
(setenv "MT_RUN_AREA_HOME" *toppath*)))
(setenv "MT_RUN_AREA_HOME" toppath)))
(define (set-item-env-vars itemdat)
(for-each (lambda (item)
(debug:print 2 "setenv " (car item) " " (cadr item))
(setenv (car item) (cadr item)))
itemdat))
|
︙ | | |
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
|
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
|
-
+
+
-
-
-
+
+
+
|
(currtime (current-seconds)))
(if (> (- currtime lasttime) waitval)
(begin
(hash-table-set! *runs:denoise* key currtime)
#t)
#f)))
(define (runs:can-run-more-tests run-id jobgroup max-concurrent-jobs)
(define (runs:can-run-more-tests run-id jobgroup max-concurrent-jobs area-dat)
(thread-sleep! (cond
((> *runs:can-run-more-tests-count* 20)
(if (runs:lownoise "waiting on tasks" 60)
(debug:print-info 2 "waiting for tasks to complete, sleeping briefly ..."))
2);; obviously haven't had any work to do for a while
(else 0)))
(let* ((configdat (megatest:area-configdat area-dat))
(let* ((num-running (rmt:get-count-tests-running run-id))
(num-running-in-jobgroup (rmt:get-count-tests-running-in-jobgroup run-id jobgroup))
(job-group-limit (let ((jobg-count (config-lookup *configdat* "jobgroups" jobgroup)))
(num-running (rmt:get-count-tests-running run-id area-dat))
(num-running-in-jobgroup (rmt:get-count-tests-running-in-jobgroup run-id jobgroup area-dat))
(job-group-limit (let ((jobg-count (config-lookup configdat "jobgroups" jobgroup)))
(if (string? jobg-count)
(string->number jobg-count)
jobg-count))))
(if (> (+ num-running num-running-in-jobgroup) 0)
(set! *runs:can-run-more-tests-count* (+ *runs:can-run-more-tests-count* 1)))
(if (not (eq? *last-num-running-tests* num-running))
(begin
|
︙ | | |
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
|
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
|
-
-
+
+
+
+
-
+
-
+
-
+
-
+
-
+
-
-
-
+
+
+
-
+
-
-
+
+
-
-
+
+
-
+
-
+
-
+
|
(list (not can-not-run-more) num-running num-running-in-jobgroup max-concurrent-jobs job-group-limit)))))
;; test-names: Comma separated patterns same as test-patts but used in selection
;; of tests to run. The item portions are not respected.
;; FIXME: error out if /patt specified
;;
(define (runs:run-tests target runname test-patts user flags #!key (run-count 3)) ;; test-names
(let* ((keys (keys:config-get-fields *configdat*))
(define (runs:run-tests target runname test-patts user flags area-dat #!key (run-count 3)) ;; test-names
(let* ((configdat (megatest:area-configdat area-dat))
(toppath (megatest:area-path area-dat))
(keys (keys:config-get-fields configdat))
(keyvals (keys:target->keyval keys target))
(run-id (rmt:register-run keyvals runname "new" "n/a" user)) ;; test-name)))
(run-id (rmt:register-run keyvals runname "new" "n/a" user area-dat)) ;; test-name)))
(deferred '()) ;; delay running these since they have a waiton clause
(runconfigf (conc *toppath* "/runconfigs.config"))
(runconfigf (conc toppath "/runconfigs.config"))
(test-records (make-hash-table))
;; need to process runconfigs before generating these lists
(all-tests-registry #f) ;; (tests:get-all)) ;; (tests:get-valid-tests (make-hash-table) test-search-path)) ;; all valid tests to check waiton names
(all-test-names #f) ;; (hash-table-keys all-tests-registry))
(test-names #f) ;; (tests:filter-test-names all-test-names test-patts))
(required-tests #f) ;;(lset-intersection equal? (string-split test-patts ",") test-names))) ;; test-names)) ;; Added test-names as initial for required-tests but that failed to work
(task-key (conc (hash-table->alist flags) " " (get-host-name) " " (current-process-id)))
(tdbdat (tasks:open-db)))
(tdbdat (tasks:open-db area-dat)))
(if (tasks:need-server run-id)(tasks:start-and-wait-for-server tdbdat run-id 10))
(if (tasks:need-server run-id area-dat)(tasks:start-and-wait-for-server tdbdat run-id 10))
(set-signal-handler! signal/int
(lambda (signum)
(signal-mask! signum)
(print "Received signal " signum ", cleaning up before exit. Please wait...")
(let ((tdbdat (tasks:open-db)))
(let ((tdbdat (tasks:open-db area-dat)))
(rmt:tasks-set-state-given-param-key task-key "killed"))
(print "Killed by signal " signum ". Exiting")
(exit)))
;; register this run in monitor.db
(rmt:tasks-add "run-tests" user target runname test-patts task-key) ;; params)
(rmt:tasks-set-state-given-param-key task-key "running")
(runs:set-megatest-env-vars run-id inkeys: keys inrunname: runname) ;; these may be needed by the launching process
(rmt:tasks-add "run-tests" user target runname test-patts task-key area-dat) ;; params)
(rmt:tasks-set-state-given-param-key task-key "running" area-dat)
(runs:set-megatest-env-vars run-id area-dat inkeys: keys inrunname: runname) ;; these may be needed by the launching process
(if (file-exists? runconfigf)
(setup-env-defaults runconfigf run-id *already-seen-runconfig-info* keyvals target)
(debug:print 0 "WARNING: You do not have a run config file: " runconfigf))
;; Now generate all the tests lists
(set! all-tests-registry (tests:get-all))
(set! all-tests-registry (tests:get-all area-dat))
(set! all-test-names (hash-table-keys all-tests-registry))
(set! test-names (tests:filter-test-names all-test-names test-patts))
(set! required-tests (lset-intersection equal? (string-split test-patts ",") test-names))
;; look up all tests matching the comma separated list of globs in
;; test-patts (using % as wildcard)
;; (set! test-names (delete-duplicates (tests:get-valid-tests *toppath* test-patts)))
(debug:print-info 0 "tests search path: " (tests:get-tests-search-path *configdat*))
;; (set! test-names (delete-duplicates (tests:get-valid-tests toppath test-patts)))
(debug:print-info 0 "tests search path: " (tests:get-tests-search-path configdat area-dat))
(debug:print-info 0 "all tests: " (string-intersperse (sort all-test-names string<) " "))
(debug:print-info 0 "test names: " (string-intersperse (sort test-names string<) " "))
;; on the first pass or call to run-tests set FAILS to NOT_STARTED if
;; -keepgoing is specified
(if (eq? *passnum* 0)
(begin
;; Is this still necessary? I think not. Unreachable tests are marked as such and
;; should not cause problems here.
;;
;; have to delete test records where NOT_STARTED since they can cause -keepgoing to
;; get stuck due to becoming inaccessible from a failed test. I.e. if test B depends
;; on test A but test B reached the point on being registered as NOT_STARTED and test
;; A failed for some reason then on re-run using -keepgoing the run can never complete.
;;
;; (rmt:general-call 'delete-tests-in-state run-id "NOT_STARTED")
;; Now convert FAIL and anything in allow-auto-rerun to NOT_STARTED
;;
(for-each (lambda (state)
(rmt:set-tests-state-status run-id test-names state #f "NOT_STARTED" state))
(string-split (or (configf:lookup *configdat* "setup" "allow-auto-rerun") "")))))
(rmt:set-tests-state-status run-id test-names state #f "NOT_STARTED" state area-dat))
(string-split (or (configf:lookup configdat "setup" "allow-auto-rerun") "")))))
;; Ensure all tests are registered in the test_meta table
(runs:update-all-test_meta #f)
(runs:update-all-test_meta #f area-dat)
;; now add non-directly referenced dependencies (i.e. waiton)
;;======================================================================
;; refactoring this block into tests:get-full-data
;;
;; What happended, this code is now duplicated in tests!?
;;
;;======================================================================
(if (not (null? test-names))
(let loop ((hed (car test-names))
(tal (cdr test-names))) ;; 'return-procs tells the config reader to prep running system but return a proc
(change-directory *toppath*) ;; PLEASE OPTIMIZE ME!!! I think this should be a no-op but there are several places where change-directories could be happening.
(change-directory toppath) ;; PLEASE OPTIMIZE ME!!! I think this should be a no-op but there are several places where change-directories could be happening.
(setenv "MT_TEST_NAME" hed) ;;
(let* ((config (tests:get-testconfig hed all-tests-registry 'return-procs))
(let* ((config (tests:get-testconfig hed all-tests-registry 'return-procs area-dat))
(waitons (let ((instr (if config
(config-lookup config "requirements" "waiton")
(begin ;; No config means this is a non-existant test
(debug:print 0 "ERROR: non-existent required test \"" hed "\"")
(exit 1)))))
(debug:print-info 8 "waitons string is " instr)
(let ((newwaitons
|
︙ | | |
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
|
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
|
-
+
-
-
+
+
-
+
-
+
-
+
-
+
|
(if (not (null? remtests))
(loop (car remtests)(cdr remtests)))))))
(if (not (null? required-tests))
(debug:print-info 1 "Adding " required-tests " to the run queue"))
;; NOTE: these are all parent tests, items are not expanded yet.
(debug:print-info 4 "test-records=" (hash-table->alist test-records))
(let ((reglen (configf:lookup *configdat* "setup" "runqueue")))
(let ((reglen (configf:lookup configdat "setup" "runqueue")))
(if (> (length (hash-table-keys test-records)) 0)
(let* ((keep-going #t)
(run-queue-retries 5)
(th1 (make-thread (lambda ()
(handle-exceptions
exn
(begin
(print-call-chain (current-error-port))
(debug:print 0 "ERROR: failure in runs:run-tests-queue thread, error: " ((condition-property-accessor 'exn 'message) exn))
(if (> run-queue-retries 0)
(begin
(set! run-queue-retries (- run-queue-retries 1))
(runs:run-tests-queue run-id runname test-records keyvals flags test-patts required-tests (any->number reglen) all-tests-registry))))
(runs:run-tests-queue run-id runname test-records keyvals flags test-patts required-tests (any->number reglen) all-tests-registry)))
(runs:run-tests-queue run-id runname test-records keyvals flags test-patts required-tests (any->number reglen) all-tests-registry area-dat))))
(runs:run-tests-queue run-id runname test-records keyvals flags test-patts required-tests (any->number reglen) all-tests-registry area-dat)))
"runs:run-tests-queue"))
(th2 (make-thread (lambda ()
;; (rmt:find-and-mark-incomplete-all-runs))))) CAN'T INTERRUPT IT ...
(let ((run-ids (rmt:get-all-run-ids)))
(let ((run-ids (rmt:get-all-run-ids area-dat)))
(for-each (lambda (run-id)
(if keep-going
(handle-exceptions
exn
(debug:print 0 "error in calling find-and-mark-incomplete for run-id " run-id)
(rmt:find-and-mark-incomplete run-id #f)))) ;; ovr-deadtime)))
(rmt:find-and-mark-incomplete run-id #f area-dat)))) ;; ovr-deadtime)))
run-ids)))
"runs: mark-incompletes")))
(thread-start! th1)
(thread-start! th2)
(thread-join! th1)
(set! keep-going #f)
(thread-join! th2)
;; if run-count > 0 call, set -preclean and -rerun STUCK/DEAD
(if (> run-count 0)
(begin
(if (not (hash-table-ref/default flags "-preclean" #f))
(hash-table-set! flags "-preclean" #t))
(if (not (hash-table-ref/default flags "-rerun" #f))
(hash-table-set! flags "-rerun" "STUCK/DEAD,n/a,ZERO_ITEMS"))
(runs:run-tests target runname test-patts user flags run-count: (- run-count 1)))))
(runs:run-tests target runname test-patts user flags area-dat run-count: (- run-count 1)))))
(debug:print-info 0 "No tests to run")))
(debug:print-info 4 "All done by here")
(rmt:tasks-set-state-given-param-key task-key "done")
(rmt:tasks-set-state-given-param-key task-key "done" area-dat)
;; (sqlite3:finalize! tasks-db)
))
;; loop logic. These are used in runs:run-tests-queue to make it a bit more readable.
;;
;; If reg not full and have items in tal then loop with (car tal)(cdr tal) reg reruns
|
︙ | | |
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
|
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
|
-
+
-
+
|
(cdr reg)
(if (null? tal) ;; if tal is null and reg not full then '() as reg contents moved to tal
'()
reg)))
(define runs:nothing-left-in-queue-count 0)
(define (runs:expand-items hed tal reg reruns regfull newtal jobgroup max-concurrent-jobs run-id waitons item-path testmode test-record can-run-more items runname tconfig reglen test-registry test-records itemmap)
(define (runs:expand-items hed tal reg reruns regfull newtal jobgroup max-concurrent-jobs run-id waitons item-path testmode test-record can-run-more items runname tconfig reglen test-registry test-records itemmap area-dat)
(let* ((loop-list (list hed tal reg reruns))
(prereqs-not-met (rmt:get-prereqs-not-met run-id waitons item-path testmode itemmap: itemmap))
(prereqs-not-met (rmt:get-prereqs-not-met run-id waitons item-path area-dat itemmap: itemmap))
;; (prereqs-not-met (mt:lazy-get-prereqs-not-met run-id waitons item-path mode: testmode itemmap: itemmap))
(fails (runs:calc-fails prereqs-not-met))
(prereq-fails (runs:calc-prereq-fail prereqs-not-met))
(non-completed (runs:calc-not-completed prereqs-not-met))
(runnables (runs:calc-runnable prereqs-not-met)))
(debug:print-info 4 "START OF INNER COND #2 "
"\n can-run-more: " can-run-more
|
︙ | | |
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
|
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
|
-
+
-
+
|
((or (null? prereqs-not-met)
(and (member 'toplevel testmode)
(null? non-completed)))
(debug:print-info 4 "runs:expand-items: (or (null? prereqs-not-met) (and (member 'toplevel testmode)(null? non-completed)))")
(let ((test-name (tests:testqueue-get-testname test-record)))
(setenv "MT_TEST_NAME" test-name) ;;
(setenv "MT_RUNNAME" runname)
(runs:set-megatest-env-vars run-id inrunname: runname) ;; these may be needed by the launching process
(runs:set-megatest-env-vars run-id area-dat inrunname: runname) ;; these may be needed by the launching process
(let ((items-list (items:get-items-from-config tconfig)))
(if (list? items-list)
(begin
(if (null? items-list)
(let ((test-id (rmt:get-test-id run-id test-name "")))
(let ((test-id (rmt:get-test-id run-id test-name "" area-dat)))
(if test-id (mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "ZERO_ITEMS" "Failed to run due to failed prerequisites"))))
(tests:testqueue-set-items! test-record items-list)
(list hed tal reg reruns))
(begin
(debug:print 0 "ERROR: The proc from reading the items table did not yield a list - please report this")
(exit 1))))))
|
︙ | | |
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
|
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
|
-
+
|
(if (and give-up
(not (and (null? tal)(null? reg))))
(let ((trimmed-tal (mt:discard-blocked-tests run-id hed tal test-records))
(trimmed-reg (mt:discard-blocked-tests run-id hed reg test-records)))
(debug:print 1 "WARNING: test " hed " has discarded prerequisites, removing it from the queue")
(let ((test-id (rmt:get-test-id run-id hed "")))
(let ((test-id (rmt:get-test-id run-id hed "" area-dat)))
(if test-id (mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "PREQ_DISCARDED" "Failed to run due to discarded prerequisites")))
(if (and (null? trimmed-tal)
(null? trimmed-reg))
#f
(list (runs:queue-next-hed trimmed-tal trimmed-reg reglen regfull)
(runs:queue-next-tal trimmed-tal trimmed-reg reglen regfull)
|
︙ | | |
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
|
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
|
-
+
-
+
|
;; getting here likely means the system is way overloaded, kill a full minute before continuing
(thread-sleep! 60)
;; num-retries code was here
;; we use this opportunity to move contents of reg to tal
(list (car newtal)(append (cdr newtal) reg) '() reruns)) ;; an issue with prereqs not yet met?
(begin
(debug:print-info 1 "no fails in prerequisites for " hed " but nothing seen running in a while, dropping test " hed " from the run queue")
(let ((test-id (rmt:get-test-id run-id hed "")))
(let ((test-id (rmt:get-test-id run-id hed "" area-dat)))
(if test-id (mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "TIMED_OUT" "Nothing seen running in a while.")))
(list (runs:queue-next-hed tal reg reglen regfull)
(runs:queue-next-tal tal reg reglen regfull)
(runs:queue-next-reg tal reg reglen regfull)
reruns))))
((and
(or (not (null? fails))
(not (null? prereq-fails)))
(member 'normal testmode))
(debug:print-info 1 "test " hed " (mode=" testmode ") has failed prerequisite(s); "
(string-intersperse (map (lambda (t)(conc (db:test-get-testname t) ":" (db:test-get-state t)"/"(db:test-get-status t))) fails) ", ")
", removing it from to-do list")
(let ((test-id (rmt:get-test-id run-id hed "")))
(let ((test-id (rmt:get-test-id run-id hed "" area-dat)))
(if test-id
(if (not (null? prereq-fails))
(mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "PREQ_DISCARDED" "Failed to run due to prior failed prerequisites")
(mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "PREQ_FAIL" "Failed to run due to failed prerequisites"))))
(if (or (not (null? reg))(not (null? tal)))
(begin
(hash-table-set! test-registry hed 'CANNOTRUN)
|
︙ | | |
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
|
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
|
-
-
+
+
+
+
-
+
-
-
+
+
|
(conc test-name (if (equal? item-path "") "" "/") item-path ":" test-state "/" test-status)))
((string? t)
t)
(else
(conc t))))
inlst)))
(define (runs:process-expanded-tests hed tal reg reruns reglen regfull test-record runname test-name item-path jobgroup max-concurrent-jobs run-id waitons item-path testmode test-patts required-tests test-registry registry-mutex flags keyvals run-info newtal all-tests-registry itemmap)
(let* ((run-limits-info (runs:can-run-more-tests run-id jobgroup max-concurrent-jobs)) ;; look at the test jobgroup and tot jobs running
(define (runs:process-expanded-tests hed tal reg reruns reglen regfull test-record runname test-name item-path jobgroup max-concurrent-jobs run-id waitons item-path testmode test-patts required-tests test-registry registry-mutex flags keyvals run-info newtal all-tests-registry itemmap area-dat)
(let* ((configdat (megatest:area-configdat area-dat))
(toppath (megatest:area-path area-dat))
(run-limits-info (runs:can-run-more-tests run-id jobgroup max-concurrent-jobs area-dat)) ;; look at the test jobgroup and tot jobs running
(have-resources (car run-limits-info))
(num-running (list-ref run-limits-info 1))
(num-running-in-jobgroup (list-ref run-limits-info 2))
(max-concurrent-jobs (list-ref run-limits-info 3))
(job-group-limit (list-ref run-limits-info 4))
(prereqs-not-met (rmt:get-prereqs-not-met run-id waitons item-path testmode itemmap: itemmap))
(prereqs-not-met (rmt:get-prereqs-not-met run-id waitons item-path testmode area-dat itemmap: itemmap))
;; (prereqs-not-met (mt:lazy-get-prereqs-not-met run-id waitons item-path mode: testmode itemmap: itemmap))
(fails (runs:calc-fails prereqs-not-met))
(non-completed (runs:calc-not-completed prereqs-not-met))
(loop-list (list hed tal reg reruns))
;; configure the load runner
(numcpus (common:get-num-cpus))
(maxload (string->number (or (configf:lookup *configdat* "jobtools" "maxload") "3")))
(waitdelay (string->number (or (configf:lookup *configdat* "jobtools" "waitdelay") "60"))))
(maxload (string->number (or (configf:lookup configdat "jobtools" "maxload") "3")))
(waitdelay (string->number (or (configf:lookup configdat "jobtools" "waitdelay") "60"))))
(debug:print-info 4 "have-resources: " have-resources " prereqs-not-met: ("
(string-intersperse
(map (lambda (t)
(if (vector? t)
(conc (db:test-get-state t) "/" (db:test-get-status t))
(conc " WARNING: t is not a vector=" t )))
prereqs-not-met) ", ") ") fails: " fails)
|
︙ | | |
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
|
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
|
-
+
-
+
|
;; Register tests
;;
((not (hash-table-ref/default test-registry (db:test-make-full-name test-name item-path) #f))
(debug:print-info 4 "Pre-registering test " test-name "/" item-path " to create placeholder" )
;; always do firm registration now in v1.60 and greater ;; (eq? *transport-type* 'fs) ;; no point in parallel registration if use fs
(let register-loop ((numtries 15))
(rmt:general-call 'register-test run-id run-id test-name item-path)
(rmt:general-call 'register-test run-id area-dat run-id test-name item-path)
(if (rmt:get-test-id run-id test-name item-path)
(hash-table-set! test-registry (db:test-make-full-name test-name item-path) 'done)
(if (> numtries 0)
(begin
(thread-sleep! 0.5)
(register-loop (- numtries 1)))
(debug:print 0 "ERROR: failed to register test " (db:test-make-full-name test-name item-path)))))
(if (not (eq? (hash-table-ref/default test-registry (db:test-make-full-name test-name "") #f) 'done))
(begin
(rmt:general-call 'register-test run-id run-id test-name "")
(rmt:general-call 'register-test run-id area-dat run-id test-name "")
(if (rmt:get-test-id run-id test-name "")
(hash-table-set! test-registry (db:test-make-full-name test-name "") 'done))))
(runs:shrink-can-run-more-tests-count) ;; DELAY TWEAKER (still needed?)
(if (and (null? tal)(null? reg))
(list hed tal (append reg (list hed)) reruns)
(list (runs:queue-next-hed tal reg reglen regfull)
(runs:queue-next-tal tal reg reglen regfull)
|
︙ | | |
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
|
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
|
-
+
-
+
|
(null? non-completed))))
;; (hash-table-delete! *max-tries-hash* (db:test-make-full-name test-name item-path))
;; we are going to reset all the counters for test retries by setting a new hash table
;; this means they will increment only when nothing can be run
(set! *max-tries-hash* (make-hash-table))
;; well, first lets see if cpu load throttling is enabled. If so wait around until the
;; average cpu load is under the threshold before continuing
(if (configf:lookup *configdat* "jobtools" "maxload") ;; only gate if maxload is specified
(if (configf:lookup configdat "jobtools" "maxload") ;; only gate if maxload is specified
(common:wait-for-cpuload maxload numcpus waitdelay))
(run:test run-id run-info keyvals runname test-record flags #f test-registry all-tests-registry)
(run:test run-id run-info keyvals runname test-record flags #f test-registry all-tests-registry area-dat)
(hash-table-set! test-registry (db:test-make-full-name test-name item-path) 'running)
(runs:shrink-can-run-more-tests-count) ;; DELAY TWEAKER (still needed?)
;; (thread-sleep! *global-delta*)
(if (or (not (null? tal))(not (null? reg)))
(list (runs:queue-next-hed tal reg reglen regfull)
(runs:queue-next-tal tal reg reglen regfull)
(runs:queue-next-reg tal reg reglen regfull)
|
︙ | | |
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
|
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
|
-
+
+
+
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
|
;; every time though the loop increment the test/itempatt val.
;; when the min is > max-allowed and none running then force exit
;;
(define *max-tries-hash* (make-hash-table))
;; test-records is a hash table testname:item_path => vector < testname testconfig waitons priority items-info ... >
(define (runs:run-tests-queue run-id runname test-records keyvals flags test-patts required-tests reglen-in all-tests-registry)
(define (runs:run-tests-queue run-id runname test-records keyvals flags test-patts required-tests reglen-in all-tests-registry area-dat)
;; At this point the list of parent tests is expanded
;; NB// Should expand items here and then insert into the run queue.
(debug:print 5 "test-records: " test-records ", flags: " (hash-table->alist flags))
;; Do mark-and-find clean up of db before starting runing of quue
;;
;; (rmt:find-and-mark-incomplete)
(let* ((configdat (megatest:area-configdat area-dat))
(toppath (megatest:area-path area-dat))
(let ((run-info (rmt:get-run-info run-id))
(tests-info (mt:get-tests-for-run run-id #f '() '())) ;; qryvals: "id,testname,item_path"))
(sorted-test-names (tests:sort-by-priority-and-waiton test-records))
(test-registry (make-hash-table))
(registry-mutex (make-mutex))
(num-retries 0)
(max-retries (config-lookup *configdat* "setup" "maxretries"))
(max-concurrent-jobs (let ((mcj (config-lookup *configdat* "setup" "max_concurrent_jobs")))
(if (and mcj (string->number mcj))
(string->number mcj)
1))) ;; length of the register queue ahead
(reglen (if (number? reglen-in) reglen-in 1))
(last-time-incomplete (- (current-seconds) 900)) ;; force at least one clean up cycle
(last-time-some-running (current-seconds))
(tdbdat (tasks:open-db)))
(run-info (rmt:get-run-info run-id area-dat))
(tests-info (mt:get-tests-for-run run-id #f '() '() area-dat)) ;; qryvals: "id,testname,item_path"))
(sorted-test-names (tests:sort-by-priority-and-waiton test-records))
(test-registry (make-hash-table))
(registry-mutex (make-mutex))
(num-retries 0)
(max-retries (config-lookup configdat "setup" "maxretries"))
(max-concurrent-jobs (let ((mcj (config-lookup configdat "setup" "max_concurrent_jobs")))
(if (and mcj (string->number mcj))
(string->number mcj)
1))) ;; length of the register queue ahead
(reglen (if (number? reglen-in) reglen-in 1))
(last-time-incomplete (- (current-seconds) 900)) ;; force at least one clean up cycle
(last-time-some-running (current-seconds))
(tdbdat (tasks:open-db area-dat)))
;; Initialize the test-registery hash with tests that already have a record
;; convert state to symbol and use that as the hash value
(for-each (lambda (trec)
(let ((id (db:test-get-id trec))
(tn (db:test-get-testname trec))
(ip (db:test-get-item-path trec))
(st (db:test-get-state trec)))
|
︙ | | |
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
|
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
|
-
+
-
+
-
+
|
(priority (tests:testqueue-get-priority test-record))
(itemdat (tests:testqueue-get-itemdat test-record)) ;; itemdat can be a string, list or #f
(items (tests:testqueue-get-items test-record))
(item-path (item-list->path itemdat))
(tfullname (db:test-make-full-name test-name item-path))
(newtal (append tal (list hed)))
(regfull (>= (length reg) reglen))
(num-running (rmt:get-count-tests-running-for-run-id run-id)))
(num-running (rmt:get-count-tests-running-for-run-id run-id area-dat)))
;; every couple minutes verify the server is there for this run
(if (and (common:low-noise-print 60 "try start server" run-id)
(tasks:need-server run-id))
(tasks:need-server run-id area-dat))
(tasks:start-and-wait-for-server tdbdat run-id 10)) ;; NOTE: delay and wait is done under the hood
(if (> num-running 0)
(set! last-time-some-running (current-seconds)))
(if (> (current-seconds)(+ last-time-some-running 240))
(hash-table-set! *max-tries-hash* tfullname (+ (hash-table-ref/default *max-tries-hash* tfullname 0) 1)))
;; (debug:print 0 "max-tries-hash: " (hash-table->alist *max-tries-hash*))
;; Ensure all top level tests get registered. This way they show up as "NOT_STARTED" on the dashboard
;; and it is clear they *should* have run but did not.
(if (not (hash-table-ref/default test-registry (db:test-make-full-name test-name "") #f))
(begin
(rmt:general-call 'register-test run-id run-id test-name "")
(rmt:general-call 'register-test run-id area-dat run-id test-name "" area-dat)
(hash-table-set! test-registry (db:test-make-full-name test-name "") 'done)))
;; Fast skip of tests that are already "COMPLETED" - NO! Cannot do that as the items may not have been expanded yet :(
;;
(if (member (hash-table-ref/default test-registry tfullname #f)
'(DONOTRUN removed)) ;; *common:cant-run-states-sym*) ;; '(COMPLETED KILLED WAIVED UNKNOWN INCOMPLETE))
(begin
|
︙ | | |
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
|
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
|
-
+
|
;; items is #f then the test is ok to be handed off to launch (but not before)
;;
((not items)
(debug:print-info 4 "OUTER COND: (not items)")
(if (and (not (tests:match test-patts (tests:testqueue-get-testname test-record) item-path required: required-tests))
(not (null? tal)))
(loop (car tal)(cdr tal) reg reruns))
(let ((loop-list (runs:process-expanded-tests hed tal reg reruns reglen regfull test-record runname test-name item-path jobgroup max-concurrent-jobs run-id waitons item-path testmode test-patts required-tests test-registry registry-mutex flags keyvals run-info newtal all-tests-registry itemmap)))
(let ((loop-list (runs:process-expanded-tests hed tal reg reruns reglen regfull test-record runname test-name item-path jobgroup max-concurrent-jobs run-id waitons item-path testmode test-patts required-tests test-registry registry-mutex flags keyvals run-info newtal all-tests-registry itemmap area-dat)))
(if loop-list (apply loop loop-list))))
;; items processed into a list but not came in as a list been processed
;;
((and (list? items) ;; thus we know our items are already calculated
(not itemdat)) ;; and not yet expanded into the list of things to be done
(debug:print-info 4 "OUTER COND: (and (list? items)(not itemdat))")
|
︙ | | |
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
|
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
|
-
+
-
+
|
#f
(loop (car tal)(cdr tal) reg reruns)))
;; if items is a proc then need to run items:get-items-from-config, get the list and loop
;; - but only do that if resources exist to kick off the job
;; EXPAND ITEMS
((or (procedure? items)(eq? items 'have-procedure))
(let ((can-run-more (runs:can-run-more-tests run-id jobgroup max-concurrent-jobs)))
(let ((can-run-more (runs:can-run-more-tests run-id jobgroup max-concurrent-jobs area-dat)))
(if (and (list? can-run-more)
(car can-run-more))
(let ((loop-list (runs:expand-items hed tal reg reruns regfull newtal jobgroup max-concurrent-jobs run-id waitons item-path testmode test-record can-run-more items runname tconfig reglen test-registry test-records itemmap)))
(let ((loop-list (runs:expand-items hed tal reg reruns regfull newtal jobgroup max-concurrent-jobs run-id waitons item-path testmode test-record can-run-more items runname tconfig reglen test-registry test-records itemmap area-dat)))
(if loop-list
(apply loop loop-list)))
;; if can't run more just loop with next possible test
(loop (car newtal)(cdr newtal) reg reruns))))
;; this case should not happen, added to help catch any bugs
((and (list? items) itemdat)
|
︙ | | |
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
|
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
|
-
+
-
+
-
+
-
+
|
(debug:print-info 0 "Have leftovers!")
(loop (car reg)(cdr reg) '() reruns))
(else
(debug:print-info 4 "Exiting loop with...\n hed=" hed "\n tal=" tal "\n reruns=" reruns))
)))
;; now *if* -run-wait we wait for all tests to be done
;; Now wait for any RUNNING tests to complete (if in run-wait mode)
(let wait-loop ((num-running (rmt:get-count-tests-running-for-run-id run-id))
(let wait-loop ((num-running (rmt:get-count-tests-running-for-run-id run-id area-dat))
(prev-num-running 0))
;; (debug:print 0 "num-running=" num-running ", prev-num-running=" prev-num-running)
(if (and (or (args:get-arg "-run-wait")
(equal? (configf:lookup *configdat* "setup" "run-wait") "yes"))
(equal? (configf:lookup configdat "setup" "run-wait") "yes"))
(> num-running 0))
(begin
;; Here we mark any old defunct tests as incomplete. Do this every fifteen minutes
;; (debug:print 0 "Got here eh! num-running=" num-running " (> num-running 0) " (> num-running 0))
(if (> (current-seconds)(+ last-time-incomplete 900))
(begin
(debug:print-info 0 "Marking stuck tests as INCOMPLETE while waiting for run " run-id ". Running as pid " (current-process-id) " on " (get-host-name))
(set! last-time-incomplete (current-seconds))
(rmt:find-and-mark-incomplete run-id #f)))
(rmt:find-and-mark-incomplete run-id #f area-dat)))
(if (not (eq? num-running prev-num-running))
(debug:print-info 0 "run-wait specified, waiting on " num-running " tests in RUNNING, REMOTEHOSTSTART or LAUNCHED state at " (time->string (seconds->local-time (current-seconds)))))
(thread-sleep! 5)
;; (wait-loop (rmt:get-count-tests-running-for-run-id run-id) num-running))))
(wait-loop (rmt:get-count-tests-running-for-run-id run-id) num-running))))
(wait-loop (rmt:get-count-tests-running-for-run-id run-id area-dat) num-running))))
;; LET* ((test-record
;; we get here on "drop through". All done!
(debug:print-info 1 "All tests launched")))
(define (runs:calc-fails prereqs-not-met)
(filter (lambda (test)
(and (vector? test) ;; not (string? test))
|
︙ | | |
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
|
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
|
-
+
+
-
+
-
+
-
+
-
-
+
+
-
+
-
-
+
+
-
+
-
-
+
+
-
+
-
+
|
(map (lambda (t)
(if (not (vector? t))
(conc t)
(conc (db:test-get-testname t) ":" (db:test-get-state t) "/" (db:test-get-status t))))
lst))
;; parent-test is there as a placeholder for when parent-tests can be run as a setup step
(define (run:test run-id run-info keyvals runname test-record flags parent-test test-registry all-tests-registry)
(define (run:test run-id run-info keyvals runname test-record flags parent-test test-registry all-tests-registry area-dat)
;; All these vars might be referenced by the testconfig file reader
(let* ((toppath (megatest:area-path area-dat))
(let* ((test-name (tests:testqueue-get-testname test-record))
(test-name (tests:testqueue-get-testname test-record))
(test-waitons (tests:testqueue-get-waitons test-record))
(test-conf (tests:testqueue-get-testconfig test-record))
(itemdat (tests:testqueue-get-itemdat test-record))
(test-path (hash-table-ref all-tests-registry test-name)) ;; (conc *toppath* "/tests/" test-name)) ;; could use tests:get-testconfig here ...
(test-path (hash-table-ref all-tests-registry test-name))
(force (hash-table-ref/default flags "-force" #f))
(rerun (hash-table-ref/default flags "-rerun" #f))
(keepgoing (hash-table-ref/default flags "-keepgoing" #f))
(incomplete-timeout (string->number (or (configf:lookup *configdat* "setup" "incomplete-timeout") "x")))
(incomplete-timeout (string->number (or (configf:lookup configdat "setup" "incomplete-timeout") "x")))
(item-path "")
(db #f)
(full-test-name #f))
;; setting itemdat to a list if it is #f
(if (not itemdat)(set! itemdat '()))
(set! item-path (item-list->path itemdat))
(set! full-test-name (db:test-make-full-name test-name item-path))
(debug:print-info 4
"\nTESTNAME: " full-test-name
"\n test-config: " (hash-table->alist test-conf)
"\n itemdat: " itemdat
)
(debug:print 2 "Attempting to launch test " full-test-name)
(setenv "MT_TEST_NAME" test-name) ;;
(setenv "MT_ITEMPATH" item-path)
(setenv "MT_RUNNAME" runname)
(runs:set-megatest-env-vars run-id inrunname: runname) ;; these may be needed by the launching process
(change-directory *toppath*)
(runs:set-megatest-env-vars run-id area-dat inrunname: runname) ;; these may be needed by the launching process
(change-directory toppath)
;; Here is where the test_meta table is best updated
;; Yes, another use of a global for caching. Need a better way?
;;
;; There is now a single call to runs:update-all-test_meta and this
;; per-test call is not needed. Given the delicacy of the move to
;; v1.55 this code is being left in place for the time being.
;;
(if (not (hash-table-ref/default *test-meta-updated* test-name #f))
(begin
(hash-table-set! *test-meta-updated* test-name #t)
(runs:update-test_meta test-name test-conf)))
(runs:update-test_meta test-name test-conf area-dat)))
;; itemdat => ((ripeness "overripe") (temperature "cool") (season "summer"))
(let* ((new-test-path (string-intersperse (cons test-path (map cadr itemdat)) "/"))
(test-id (rmt:get-test-id run-id test-name item-path))
(testdat (if test-id (rmt:get-test-info-by-id run-id test-id) #f)))
(test-id (rmt:get-test-id run-id test-name item-path area-dat))
(testdat (if test-id (rmt:get-test-info-by-id run-id test-id area-dat) #f)))
(if (not testdat)
(let loop ()
;; ensure that the path exists before registering the test
;; NOPE: Cannot! Don't know yet which disk area will be assigned....
;; (system (conc "mkdir -p " new-test-path))
;;
;; (open-run-close tests:register-test db run-id test-name item-path)
;;
;; NB// for the above line. I want the test to be registered long before this routine gets called!
;;
(if (not test-id)(set! test-id (rmt:get-test-id run-id test-name item-path)))
(if (not test-id)(set! test-id (rmt:get-test-id run-id test-name item-path area-dat)))
(if (not test-id)
(begin
(debug:print 2 "WARN: Test not pre-created? test-name=" test-name ", item-path=" item-path ", run-id=" run-id)
(rmt:general-call 'register-test run-id run-id test-name item-path)
(set! test-id (rmt:get-test-id run-id test-name item-path))))
(rmt:general-call 'register-test run-id area-dat run-id test-name item-path area-dat)
(set! test-id (rmt:get-test-id run-id test-name item-path area-dat))))
(debug:print-info 4 "test-id=" test-id ", run-id=" run-id ", test-name=" test-name ", item-path=\"" item-path "\"")
(set! testdat (rmt:get-test-info-by-id run-id test-id))
(set! testdat (rmt:get-test-info-by-id run-id test-id area-dat))
(if (not testdat)
(begin
(debug:print-info 0 "WARNING: server is overloaded, trying again in one second")
(thread-sleep! 1)
(loop)))))
(if (not testdat) ;; should NOT happen
(debug:print 0 "ERROR: failed to get test record for test-id " test-id))
(set! test-id (db:test-get-id testdat))
(if (file-exists? test-path)
(change-directory test-path)
(begin
(debug:print "ERROR: test run path not created before attempting to run the test. Perhaps you are running -remove-runs at the same time?")
(change-directory *toppath*)))
(change-directory toppath)))
(case (if force ;; (args:get-arg "-force")
'NOT_STARTED
(if testdat
(string->symbol (test:get-state testdat))
'failed-to-insert))
((failed-to-insert)
(debug:print 0 "ERROR: Failed to insert the record into the db"))
|
︙ | | |
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
|
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
|
-
+
|
(skip-check (configf:get-section test-conf "skip")))
(cond
;; Have to check for skip conditions. This one skips if there are same-named tests
;; currently running
((and skip-check
(configf:lookup test-conf "skip" "prevrunning"))
;; run-ids = #f means *all* runs
(let ((running-tests (rmt:get-tests-for-runs-mindata #f full-test-name '("RUNNING" "REMOTEHOSTSTART" "LAUNCHED") '() #f)))
(let ((running-tests (rmt:get-tests-for-runs-mindata #f full-test-name '("RUNNING" "REMOTEHOSTSTART" "LAUNCHED") '() #f area-dat)))
(if (not (null? running-tests)) ;; have to skip
(set! skip-test "Skipping due to previous tests running"))))
((and skip-check
(configf:lookup test-conf "skip" "fileexists"))
(if (file-exists? (configf:lookup test-conf "skip" "fileexists"))
(set! skip-test (conc "Skipping due to existance of file " (configf:lookup test-conf "skip" "fileexists"))))))
(if skip-test
|
︙ | | |
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
|
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
|
-
+
-
-
+
+
-
+
-
+
-
+
|
;; fields are passing in through
;; action:
;; 'remove-runs
;; 'set-state-status
;;
;; NB// should pass in keys?
;;
(define (runs:operate-on action target runnamepatt testpatt #!key (state #f)(status #f)(new-state-status #f)(mode 'remove-all)(options '()))
(define (runs:operate-on action target runnamepatt testpatt area-dat #!key (state #f)(status #f)(new-state-status #f)(mode 'remove-all)(options '()))
(common:clear-caches) ;; clear all caches
(let* ((db #f)
(tdbdat (tasks:open-db))
(keys (rmt:get-keys))
(tdbdat (tasks:open-db area-dat))
(keys (rmt:get-keys area-dat))
(rundat (mt:get-runs-by-patt keys runnamepatt target))
(header (vector-ref rundat 0))
(runs (vector-ref rundat 1))
(states (if state (string-split state ",") '()))
(statuses (if status (string-split status ",") '()))
(state-status (if (string? new-state-status) (string-split new-state-status ",") '(#f #f))))
(debug:print-info 4 "runs:operate-on => Header: " header " action: " action " new-state-status: " new-state-status)
(if (> 2 (length state-status))
(begin
(debug:print 0 "ERROR: the parameter to -set-state-status is a comma delimited string. E.g. COMPLETED,FAIL")
(exit)))
(for-each
(lambda (run)
(let ((runkey (string-intersperse (map (lambda (k)
(db:get-value-by-header run header k)) keys) "/"))
(dirs-to-remove (make-hash-table))
(proc-get-tests (lambda (run-id)
(mt:get-tests-for-run run-id
testpatt states statuses
testpatt states statuses area-dat
not-in: #f
sort-by: (case action
((remove-runs) 'rundir)
(else 'event_time))))))
(let* ((run-id (db:get-value-by-header run header "id"))
(run-state (db:get-value-by-header run header "state"))
(run-name (db:get-value-by-header run header "runname"))
(tests (if (not (equal? run-state "locked"))
(proc-get-tests run-id)
'()))
(lasttpath "/does/not/exist/I/hope")
(worker-thread #f))
(debug:print-info 4 "runs:operate-on run=" run ", header=" header)
(if (not (null? tests))
(begin
(case action
((remove-runs)
(if (tasks:need-server run-id)(tasks:start-and-wait-for-server tdbdat run-id 10))
(if (tasks:need-server run-id area-dat)(tasks:start-and-wait-for-server tdbdat run-id 10))
;; seek and kill in flight -runtests with % as testpatt here
(if (equal? testpatt "%")
(tasks:kill-runner target run-name)
(debug:print 0 "not attempting to kill any run launcher processes as testpatt is " testpatt))
(debug:print 1 "Removing tests for run: " runkey " " (db:get-value-by-header run header "runname")))
((set-state-status)
(if (tasks:need-server run-id)(tasks:start-and-wait-for-server tdbdat run-id 10))
(if (tasks:need-server run-id area-dat)(tasks:start-and-wait-for-server tdbdat run-id 10))
(debug:print 1 "Modifying state and staus for tests for run: " runkey " " (db:get-value-by-header run header "runname")))
((print-run)
(debug:print 1 "Printing info for run " runkey ", run=" run ", tests=" tests ", header=" header)
action)
((run-wait)
(debug:print 1 "Waiting for run " runkey ", run=" runnamepatt " to complete"))
((archive)
|
︙ | | |
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
|
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
|
-
+
-
+
|
#f))))))
(toplevel-retries (make-hash-table)) ;; try three times to loop through and remove top level tests
(test-retry-time (make-hash-table))
(allow-run-time 10)) ;; seconds to allow for killing tests before just brutally killing 'em
(let loop ((test (car sorted-tests))
(tal (cdr sorted-tests)))
(let* ((test-id (db:test-get-id test))
(new-test-dat (rmt:get-test-info-by-id run-id test-id)))
(new-test-dat (rmt:get-test-info-by-id run-id test-id area-dat)))
(if (not new-test-dat)
(begin
(debug:print 0 "ERROR: We have a test-id of " test-id " but no record was found. NOTE: No locking of records is done between processes, do not simultaneously remove the same run from two processes!")
(if (not (null? tal))
(loop (car tal)(cdr tal))))
(let* ((item-path (db:test-get-item-path new-test-dat))
(test-name (db:test-get-testname new-test-dat))
(run-dir ;;(filedb:get-path *fdb*
;; (rmt:sdb-qry 'getid
(db:test-get-rundir new-test-dat)) ;; ) ;; run dir is from the link tree
(test-state (db:test-get-state new-test-dat))
(test-fulln (db:test-get-fullname new-test-dat))
(uname (db:test-get-uname new-test-dat))
(toplevel-with-children (and (db:test-get-is-toplevel test)
(> (rmt:test-toplevel-num-items run-id test-name) 0))))
(> (rmt:test-toplevel-num-items run-id test-name area-dat) 0))))
(case action
((remove-runs)
;; if the test is a toplevel-with-children issue an error and do not remove
(if toplevel-with-children
(begin
(debug:print 0 "WARNING: skipping removal of " test-fulln " with run-id " run-id " as it has sub tests")
(hash-table-set! toplevel-retries test-fulln (+ (hash-table-ref/default toplevel-retries test-fulln 0) 1))
|
︙ | | |
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
|
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
|
-
+
-
-
+
+
|
(if (not (null? tal))
(loop (car tal)(cdr tal))))
)))
)
(if worker-thread (thread-join! worker-thread))))))
;; remove the run if zero tests remain
(if (eq? action 'remove-runs)
(let ((remtests (mt:get-tests-for-run (db:get-value-by-header run header "id") #f '("DELETED") '("n/a") not-in: #t)))
(let ((remtests (mt:get-tests-for-run (db:get-value-by-header run header "id") #f '("DELETED") '("n/a") area-dat not-in: #t)))
(if (null? remtests) ;; no more tests remaining
(let* ((dparts (string-split lasttpath "/"))
(runpath (conc "/" (string-intersperse
(take dparts (- (length dparts) 1))
"/"))))
(debug:print 1 "Removing run: " runkey " " (db:get-value-by-header run header "runname") " and related record")
(rmt:delete-run run-id)
(rmt:delete-old-deleted-test-records)
(rmt:delete-run run-id area-dat)
(rmt:delete-old-deleted-test-records area-dat)
;; (rmt:set-var "DELETED_TESTS" (current-seconds))
;; need to figure out the path to the run dir and remove it if empty
;; (if (null? (glob (conc runpath "/*")))
;; (begin
;; (debug:print 1 "Removing run dir " runpath)
;; (system (conc "rmdir -p " runpath))))
)))))
|
︙ | | |
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
|
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
|
-
+
-
-
-
+
+
+
+
+
+
-
-
+
+
-
+
-
+
-
+
|
(debug:print 0 "WARNING: not removing " run-dir " as it either doesn't exist or is not a symlink")
(debug:print 0 "NOTE: the run dir for this test is undefined. Test may have already been deleted."))
))
;; Only delete the records *after* removing the directory. If things fail we have a record
(case mode
((remove-data-only)(mt:test-set-state-status-by-id (db:test-get-run_id test)(db:test-get-id test) "NOT_STARTED" "n/a" #f))
((archive-remove) (mt:test-set-state-status-by-id (db:test-get-run_id test)(db:test-get-id test) "ARCHIVED" #f #f))
(else (rmt:delete-test-records (db:test-get-run_id test) (db:test-get-id test))))))
(else (rmt:delete-test-records (db:test-get-run_id test) (db:test-get-id test) area-dat)))))
;;======================================================================
;; Routines for manipulating runs
;;======================================================================
;; Since many calls to a run require pretty much the same setup
;; this wrapper is used to reduce the replication of code
(define (general-run-call switchname action-desc proc)
(let ((runname (or (args:get-arg "-runname")(args:get-arg ":runname")))
(target (common:args-get-target)))
(define (general-run-call switchname action-desc proc area-dat)
(let ((runname (or (args:get-arg "-runname")(args:get-arg ":runname")))
(target (common:args-get-target))
(toppath (megatest:area-path area-dat))
(configdat (megatest:area-configdat area-dat))
(configinfo (megatest:area-configinfo area-dat)))
(cond
((not target)
(debug:print 0 "ERROR: Missing required parameter for " switchname ", you must specify the target with -target")
(exit 3))
((not runname)
(debug:print 0 "ERROR: Missing required parameter for " switchname ", you must specify the run name with -runname runname")
(exit 3))
(else
(let (;; (db #f)
(keys #f))
(if (launch:setup-for-run)
(launch:cache-config)
(if (launch:setup-for-run area-dat)
(launch:cache-config area-dat)
(begin
(debug:print 0 "Failed to setup, exiting")
(exit 1)))
(set! keys (keys:config-get-fields *configdat*))
(set! keys (keys:config-get-fields configdat))
;; have enough to process -target or -reqtarg here
(if (args:get-arg "-reqtarg")
(let* ((runconfigf (conc *toppath* "/runconfigs.config")) ;; DO NOT EVALUATE ALL
(let* ((runconfigf (conc toppath "/runconfigs.config")) ;; DO NOT EVALUATE ALL
(runconfig (read-config runconfigf #f #t environ-patt: #f)))
(if (hash-table-ref/default runconfig (args:get-arg "-reqtarg") #f)
(keys:target-set-args keys (args:get-arg "-reqtarg") args:arg-hash)
(begin
(debug:print 0 "ERROR: [" (args:get-arg "-reqtarg") "] not found in " runconfigf)
;; (if db (sqlite3:finalize! db))
(exit 1)
)))
(if (args:get-arg "-target")
(keys:target-set-args keys (args:get-arg "-target" args:arg-hash) args:arg-hash)))
(if (not (car *configinfo*))
(if (not (car configinfo))
(begin
(debug:print 0 "ERROR: Attempted to " action-desc " but run area config file not found")
(exit 1))
;; Extract out stuff needed in most or many calls
;; here then call proc
(let* ((keyvals (keys:target->keyval keys target)))
(proc target runname keys keyvals)))
|
︙ | | |
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
|
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
|
-
+
-
-
+
+
-
+
-
+
-
-
+
+
-
-
+
+
-
+
-
-
-
+
+
+
-
+
-
+
|
(for-each (lambda (run)
(let ((run-id (db:get-value-by-header run header "id")))
(if (or lock
(and unlock
(begin
(print "Do you really wish to unlock run " run-id "?\n y/n: ")
(equal? "y" (read-line)))))
(rmt:lock/unlock-run run-id lock unlock user)
(rmt:lock/unlock-run run-id lock unlock user area-dat)
(debug:print-info 0 "Skipping lock/unlock on " run-id))))
runs)))
;;======================================================================
;; Rollup runs
;;======================================================================
;; Update the test_meta table for this test
(define (runs:update-test_meta test-name test-conf)
(let ((currrecord (rmt:testmeta-get-record test-name)))
(define (runs:update-test_meta test-name test-conf area-dat)
(let ((currrecord (rmt:testmeta-get-record test-name area-dat)))
(if (not currrecord)
(begin
(set! currrecord (make-vector 11 #f))
(rmt:testmeta-add-record test-name)))
(rmt:testmeta-add-record test-name area-dat)))
(for-each
(lambda (key)
(let* ((idx (cadr key))
(fld (car key))
(val (config-lookup test-conf "test_meta" fld)))
;; (debug:print 5 "idx: " idx " fld: " fld " val: " val)
(if (and val (not (equal? (vector-ref currrecord idx) val)))
(begin
(print "Updating " test-name " " fld " to " val)
(rmt:testmeta-update-field test-name fld val)))))
(rmt:testmeta-update-field test-name fld val area-dat)))))
'(("author" 2)("owner" 3)("description" 4)("reviewed" 5)("tags" 9)("jobgroup" 10)))))
;; Update test_meta for all tests
(define (runs:update-all-test_meta db)
(let ((test-names (tests:get-all))) ;; (tests:get-valid-tests)))
(define (runs:update-all-test_meta db area-dat)
(let ((test-names (tests:get-all area-dat))) ;; (tests:get-valid-tests)))
(for-each
(lambda (test-name)
(let* ((test-conf (mt:lazy-read-test-config test-name)))
(if test-conf (runs:update-test_meta test-name test-conf))))
(let* ((test-conf (mt:lazy-read-test-config test-name area-dat)))
(if test-conf (runs:update-test_meta test-name test-conf area-dat))))
(hash-table-keys test-names))))
;; This could probably be refactored into one complex query ...
;; NOT PORTED - DO NOT USE YET
;;
(define (runs:rollup-run keys runname user keyvals)
(define (runs:rollup-run keys runname user keyvals area-dat)
(debug:print 4 "runs:rollup-run, keys: " keys " -runname " runname " user: " user)
(let* ((db #f)
;; register run operates on the main db
(new-run-id (rmt:register-run keyvals runname "new" "n/a" user))
(prev-tests (rmt:get-matching-previous-test-run-records new-run-id "%" "%"))
(curr-tests (mt:get-tests-for-run new-run-id "%/%" '() '()))
(new-run-id (rmt:register-run keyvals runname "new" "n/a" user area-dat))
(prev-tests (rmt:get-matching-previous-test-run-records new-run-id "%" "%" area-dat))
(curr-tests (mt:get-tests-for-run new-run-id "%/%" '() '() area-dat))
(curr-tests-hash (make-hash-table)))
(rmt:update-run-event_time new-run-id)
(rmt:update-run-event_time new-run-id area-dat)
;; index the already saved tests by testname and itemdat in curr-tests-hash
(for-each
(lambda (testdat)
(let* ((testname (db:test-get-testname testdat))
(item-path (db:test-get-item-path testdat))
(full-name (conc testname "/" item-path)))
(hash-table-set! curr-tests-hash full-name testdat)))
curr-tests)
;; NOPE: Non-optimal approach. Try this instead.
;; 1. tests are received in a list, most recent first
;; 2. replace the rollup test with the new *always*
(for-each
(lambda (testdat)
(let* ((testname (db:test-get-testname testdat))
(item-path (db:test-get-item-path testdat))
(full-name (conc testname "/" item-path))
(prev-test-dat (hash-table-ref/default curr-tests-hash full-name #f))
(test-steps (rmt:get-steps-for-test (db:test-get-id testdat)))
(test-steps (rmt:get-steps-for-test (db:test-get-id testdat) area-dat))
(new-test-record #f))
;; replace these with insert ... select
(apply sqlite3:execute
db
(conc "INSERT OR REPLACE INTO tests (run_id,testname,state,status,event_time,host,cpuload,diskfree,uname,rundir,item_path,run_duration,final_logf,comment) "
"VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?);")
new-run-id (cddr (vector->list testdat)))
|
︙ | | |