︙ | | | ︙ | |
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
|
(common:check-db-dir-and-exit-if-insufficient)
;; override the number of reruns from the configs
(if (and config-reruns
(> run-count config-reruns))
(set! run-count config-reruns))
(if (tasks:need-server run-id)(tasks:start-and-wait-for-server tdbdat run-id 10))
(let ((sighand (lambda (signum)
;; (signal-mask! signum) ;; to mask or not? seems to cause issues in exiting
(set! *time-to-exit* #t)
(print "Received signal " signum ", cleaning up before exit. Please wait...")
(let ((th1 (make-thread (lambda ()
(let ((tdbdat (tasks:open-db)))
|
|
|
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
|
(common:check-db-dir-and-exit-if-insufficient)
;; override the number of reruns from the configs
(if (and config-reruns
(> run-count config-reruns))
(set! run-count config-reruns))
;; (if (tasks:need-server run-id)(tasks:start-and-wait-for-server tdbdat run-id 10))
(let ((sighand (lambda (signum)
;; (signal-mask! signum) ;; to mask or not? seems to cause issues in exiting
(set! *time-to-exit* #t)
(print "Received signal " signum ", cleaning up before exit. Please wait...")
(let ((th1 (make-thread (lambda ()
(let ((tdbdat (tasks:open-db)))
|
︙ | | | ︙ | |
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
|
(debug:print-error 0 *default-log-port* "prereqs-not-met is not a list! " prereqs-not-met)
'())))
(non-completed (filter (lambda (x) ;; remove hed from not completed list, duh, of course it is not completed!
(not (equal? x hed)))
(runs:calc-not-completed prereqs-not-met)))
(loop-list (list hed tal reg reruns))
;; configure the load runner
(numcpus (common:get-num-cpus))
(maxload (string->number (or (configf:lookup *configdat* "jobtools" "maxload") "3")))
(waitdelay (string->number (or (configf:lookup *configdat* "jobtools" "waitdelay") "60"))))
(debug:print-info 4 *default-log-port* "have-resources: " have-resources " prereqs-not-met: ("
(string-intersperse
(map (lambda (t)
(if (vector? t)
(conc (db:test-get-state t) "/" (db:test-get-status t))
|
|
|
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
|
(debug:print-error 0 *default-log-port* "prereqs-not-met is not a list! " prereqs-not-met)
'())))
(non-completed (filter (lambda (x) ;; remove hed from not completed list, duh, of course it is not completed!
(not (equal? x hed)))
(runs:calc-not-completed prereqs-not-met)))
(loop-list (list hed tal reg reruns))
;; configure the load runner
(numcpus (common:get-num-cpus #f))
(maxload (string->number (or (configf:lookup *configdat* "jobtools" "maxload") "3")))
(waitdelay (string->number (or (configf:lookup *configdat* "jobtools" "waitdelay") "60"))))
(debug:print-info 4 *default-log-port* "have-resources: " have-resources " prereqs-not-met: ("
(string-intersperse
(map (lambda (t)
(if (vector? t)
(conc (db:test-get-state t) "/" (db:test-get-status t))
|
︙ | | | ︙ | |
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
|
(else
(if (runs:lownoise (conc "FAILED prerequitests and we tried" hed) 60)
(debug:print 0 *default-log-port* "WARNING: test " hed " has FAILED prerequitests and we've tried at least 10 times to run it. Giving up now."))
;; (debug:print 0 *default-log-port* " prereqs: " prereqs-not-met)
(hash-table-set! test-registry hed 'removed)
(mt:test-set-state-status-by-testname run-id test-name item-path "NOT_STARTED" "TEN_STRIKES" #f)
;; I'm unclear on if this roll up is needed - it may be the root cause of the "all set to FAIL" bug.
(rmt:roll-up-pass-fail-counts run-id test-name item-path #f "FAIL") ;; treat as FAIL
(list (if (null? tal)(car newtal)(car tal))
tal
reg
reruns)))))
;; can't drop this - maybe running? Just keep trying
(let ((runable-tests (runs:runable-tests prereqs-not-met)))
(if (null? runable-tests)
|
|
|
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
|
(else
(if (runs:lownoise (conc "FAILED prerequitests and we tried" hed) 60)
(debug:print 0 *default-log-port* "WARNING: test " hed " has FAILED prerequitests and we've tried at least 10 times to run it. Giving up now."))
;; (debug:print 0 *default-log-port* " prereqs: " prereqs-not-met)
(hash-table-set! test-registry hed 'removed)
(mt:test-set-state-status-by-testname run-id test-name item-path "NOT_STARTED" "TEN_STRIKES" #f)
;; I'm unclear on if this roll up is needed - it may be the root cause of the "all set to FAIL" bug.
(rmt:roll-up-pass-fail-counts run-id test-name item-path #f "FAIL" #f) ;; treat as FAIL
(list (if (null? tal)(car newtal)(car tal))
tal
reg
reruns)))))
;; can't drop this - maybe running? Just keep trying
(let ((runable-tests (runs:runable-tests prereqs-not-met)))
(if (null? runable-tests)
|
︙ | | | ︙ | |
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
|
(debug:print-info 4 *default-log-port* "Exiting loop with...\n hed=" hed "\n tal=" tal "\n reruns=" reruns))
)))
;; now *if* -run-wait we wait for all tests to be done
;; Now wait for any RUNNING tests to complete (if in run-wait mode)
(thread-sleep! 5) ;; I think there is a race condition here. Let states/statuses settle
(let wait-loop ((num-running (rmt:get-count-tests-running-for-run-id run-id))
(prev-num-running 0))
;; (debug:print 0 *default-log-port* "num-running=" num-running ", prev-num-running=" prev-num-running)
(if (and (or (args:get-arg "-run-wait")
(equal? (configf:lookup *configdat* "setup" "run-wait") "yes"))
(> num-running 0))
(begin
;; Here we mark any old defunct tests as incomplete. Do this every fifteen minutes
;; (debug:print 0 *default-log-port* "Got here eh! num-running=" num-running " (> num-running 0) " (> num-running 0))
(if (> (current-seconds)(+ last-time-incomplete 900))
|
|
|
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
|
(debug:print-info 4 *default-log-port* "Exiting loop with...\n hed=" hed "\n tal=" tal "\n reruns=" reruns))
)))
;; now *if* -run-wait we wait for all tests to be done
;; Now wait for any RUNNING tests to complete (if in run-wait mode)
(thread-sleep! 5) ;; I think there is a race condition here. Let states/statuses settle
(let wait-loop ((num-running (rmt:get-count-tests-running-for-run-id run-id))
(prev-num-running 0))
;; (BB> "num-running=" num-running ", prev-num-running=" prev-num-running)
(if (and (or (args:get-arg "-run-wait")
(equal? (configf:lookup *configdat* "setup" "run-wait") "yes"))
(> num-running 0))
(begin
;; Here we mark any old defunct tests as incomplete. Do this every fifteen minutes
;; (debug:print 0 *default-log-port* "Got here eh! num-running=" num-running " (> num-running 0) " (> num-running 0))
(if (> (current-seconds)(+ last-time-incomplete 900))
|
︙ | | | ︙ | |
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
|
((and skip-check
(configf:lookup test-conf "skip" "rundelay"))
;; run-ids = #f means *all* runs
(let* ((numseconds (common:hms-string->seconds (configf:lookup test-conf "skip" "rundelay")))
(running-tests (rmt:get-tests-for-runs-mindata #f full-test-name '("RUNNING" "REMOTEHOSTSTART" "LAUNCHED") '() #f))
(completed-tests (rmt:get-tests-for-runs-mindata #f full-test-name '("COMPLETED" "INCOMPLETE") '("PASS" "FAIL" "ABORT") #f)) ;; ironically INCOMPLETE is same as COMPLETED in this contex
(last-run-times (map db:mintest-get-event_time completed-tests))
(time-since-last (- (current-seconds) (if (null? last-run-times) 0 (apply max last-run-times)))))
(if (or (not (null? running-tests)) ;; have to skip if test is running
(> numseconds time-since-last))
(set! skip-test (conc "Skipping due to previous test run less than " (configf:lookup test-conf "skip" "rundelay") " ago"))))))
(if skip-test
(begin
(mt:test-set-state-status-by-id run-id test-id "COMPLETED" "SKIP" skip-test)
|
|
|
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
|
((and skip-check
(configf:lookup test-conf "skip" "rundelay"))
;; run-ids = #f means *all* runs
(let* ((numseconds (common:hms-string->seconds (configf:lookup test-conf "skip" "rundelay")))
(running-tests (rmt:get-tests-for-runs-mindata #f full-test-name '("RUNNING" "REMOTEHOSTSTART" "LAUNCHED") '() #f))
(completed-tests (rmt:get-tests-for-runs-mindata #f full-test-name '("COMPLETED" "INCOMPLETE") '("PASS" "FAIL" "ABORT") #f)) ;; ironically INCOMPLETE is same as COMPLETED in this contex
(last-run-times (map db:mintest-get-event_time completed-tests))
(time-since-last (- (current-seconds) (if (null? last-run-times) 0 (common:max last-run-times)))))
(if (or (not (null? running-tests)) ;; have to skip if test is running
(> numseconds time-since-last))
(set! skip-test (conc "Skipping due to previous test run less than " (configf:lookup test-conf "skip" "rundelay") " ago"))))))
(if skip-test
(begin
(mt:test-set-state-status-by-id run-id test-id "COMPLETED" "SKIP" skip-test)
|
︙ | | | ︙ | |
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
|
(lasttpath "/does/not/exist/I/hope")
(worker-thread #f))
(debug:print-info 4 *default-log-port* "runs:operate-on run=" run ", header=" header)
(if (not (null? tests))
(begin
(case action
((remove-runs)
(if (tasks:need-server run-id)(tasks:start-and-wait-for-server tdbdat run-id 10))
;; seek and kill in flight -runtests with % as testpatt here
;; (if (equal? testpatt "%")
(tasks:kill-runner target run-name testpatt)
;; (debug:print 0 *default-log-port* "not attempting to kill any run launcher processes as testpatt is " testpatt))
(debug:print 1 *default-log-port* "Removing tests for run: " runkey " " (db:get-value-by-header run header "runname")))
((set-state-status)
(if (tasks:need-server run-id)(tasks:start-and-wait-for-server tdbdat run-id 10))
|
|
|
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
|
(lasttpath "/does/not/exist/I/hope")
(worker-thread #f))
(debug:print-info 4 *default-log-port* "runs:operate-on run=" run ", header=" header)
(if (not (null? tests))
(begin
(case action
((remove-runs)
;; (if (tasks:need-server run-id)(tasks:start-and-wait-for-server tdbdat run-id 10))
;; seek and kill in flight -runtests with % as testpatt here
;; (if (equal? testpatt "%")
(tasks:kill-runner target run-name testpatt)
;; (debug:print 0 *default-log-port* "not attempting to kill any run launcher processes as testpatt is " testpatt))
(debug:print 1 *default-log-port* "Removing tests for run: " runkey " " (db:get-value-by-header run header "runname")))
((set-state-status)
(if (tasks:need-server run-id)(tasks:start-and-wait-for-server tdbdat run-id 10))
|
︙ | | | ︙ | |