︙ | | | ︙ | |
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
|
((< age 10) #t)
(else #f))))
lock-files)))
(if fresh-locks
(begin
(if (runs:lownoise "runners-softlock-wait" 360)
(debug:print-info 0 *default-log-port* "Other runners in flight, giving up some time..."))
(thread-sleep! 10))
(begin
(if (runs:lownoise "runners-softlock-nowait" 360)
(debug:print-info 0 *default-log-port* "No runners in flight, updating softlock"))
(let* ((ouf (open-output-file my-lock-file)))
(with-output-to-port ouf
(lambda ()(print (current-seconds))))
(close-output-port ouf))))
|
|
|
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
|
((< age 10) #t)
(else #f))))
lock-files)))
(if fresh-locks
(begin
(if (runs:lownoise "runners-softlock-wait" 360)
(debug:print-info 0 *default-log-port* "Other runners in flight, giving up some time..."))
(thread-sleep! 2))
(begin
(if (runs:lownoise "runners-softlock-nowait" 360)
(debug:print-info 0 *default-log-port* "No runners in flight, updating softlock"))
(let* ((ouf (open-output-file my-lock-file)))
(with-output-to-port ouf
(lambda ()(print (current-seconds))))
(close-output-port ouf))))
|
︙ | | | ︙ | |
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
|
(define (runs:can-run-more-tests runsdat run-id jobgroup max-concurrent-jobs)
;; Take advantage of a good place to exit if running the one-pass methodology
(if (and (> (runs:dat-can-run-more-tests-count runsdat) 20)
(args:get-arg "-one-pass"))
(exit 0))
(thread-sleep! (cond ;; BB: check with Matt. Should this sleep move
;; to cond clauses below where we determine we
;; have too many jobs running rather than each
;; time the and condition above is true (which
;; seems like always)?
((< (- (current-seconds)(runs:dat-beginning-of-time runsdat)) 30) ;; for the first 30 seconds do not throttle in any way
0)
((> (runs:dat-can-run-more-tests-count runsdat) 20) ;; original intent was - save cycles, wait a long time
(if (runs:lownoise "waiting on tasks" 60)(debug:print-info 2 *default-log-port* "waiting for tasks to complete, sleeping briefly ..."))
10) ;; obviously haven't had any work to do for a while
(else 0)))
;; ;; if have a number for inter-test-delay, use it, else don't delay much, maybe even zero?
;; (configf:lookup-number *configdat* "setup" "inter-test-delay" default: 0.01)
;; )))
(let* ((num-running (rmt:get-count-tests-running run-id #f)) ;; fastmode=no
(num-running-in-jobgroup (rmt:get-count-tests-running-in-jobgroup run-id jobgroup))
(job-group-limit (let ((jobg-count (configf:lookup *configdat* "jobgroups" jobgroup)))
|
|
|
|
|
|
|
|
|
|
|
|
|
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
|
(define (runs:can-run-more-tests runsdat run-id jobgroup max-concurrent-jobs)
;; Take advantage of a good place to exit if running the one-pass methodology
(if (and (> (runs:dat-can-run-more-tests-count runsdat) 20)
(args:get-arg "-one-pass"))
(exit 0))
;; (thread-sleep! (cond ;; BB: check with Matt. Should this sleep move
;; ;; to cond clauses below where we determine we
;; ;; have too many jobs running rather than each
;; ;; time the and condition above is true (which
;; ;; seems like always)?
;; ((< (- (current-seconds)(runs:dat-beginning-of-time runsdat)) 30) ;; for the first 30 seconds do not throttle in any way
;; 0)
;; ((> (runs:dat-can-run-more-tests-count runsdat) 20) ;; original intent was - save cycles, wait a long time
;; (if (runs:lownoise "waiting on tasks" 60)(debug:print-info 2 *default-log-port* "waiting for tasks to complete, sleeping briefly ..."))
;; 10) ;; obviously haven't had any work to do for a while
;; (else 0)))
;; ;; if have a number for inter-test-delay, use it, else don't delay much, maybe even zero?
;; (configf:lookup-number *configdat* "setup" "inter-test-delay" default: 0.01)
;; )))
(let* ((num-running (rmt:get-count-tests-running run-id #f)) ;; fastmode=no
(num-running-in-jobgroup (rmt:get-count-tests-running-in-jobgroup run-id jobgroup))
(job-group-limit (let ((jobg-count (configf:lookup *configdat* "jobgroups" jobgroup)))
|
︙ | | | ︙ | |
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
|
(null? non-completed))
(debug:print-info 4 *default-log-port* "cond branch - " "ei-4")
(if (runs:can-keep-running? hed 20)
(begin
(runs:inc-cant-run-tests hed)
(debug:print-info 0 *default-log-port* "no fails in prerequisites for " hed " but also none running, keeping " hed " for now. Try count: " (hash-table-ref/default *seen-cant-run-tests* hed 0)) ;;
;; getting here likely means the system is way overloaded, kill a full minute before continuing
(thread-sleep! 60) ;; TODO: gate by normalized server load > 1.0 (maxload config thing)
;; num-retries code was here
;; we use this opportunity to move contents of reg to tal
(list (car newtal)(append (cdr newtal) reg) '() reruns)) ;; an issue with prereqs not yet met?
(begin
(debug:print-info 1 *default-log-port* "no fails in prerequisites for " hed " but nothing seen running in a while, dropping test " hed " from the run queue")
(let ((test-id (rmt:get-test-id run-id hed "")))
(if test-id (mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "TIMED_OUT" "Nothing seen running in a while.")))
|
|
|
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
|
(null? non-completed))
(debug:print-info 4 *default-log-port* "cond branch - " "ei-4")
(if (runs:can-keep-running? hed 20)
(begin
(runs:inc-cant-run-tests hed)
(debug:print-info 0 *default-log-port* "no fails in prerequisites for " hed " but also none running, keeping " hed " for now. Try count: " (hash-table-ref/default *seen-cant-run-tests* hed 0)) ;;
;; getting here likely means the system is way overloaded, kill a full minute before continuing
(thread-sleep! 5) ;; TODO: gate by normalized server load > 1.0 (maxload config thing)
;; num-retries code was here
;; we use this opportunity to move contents of reg to tal
(list (car newtal)(append (cdr newtal) reg) '() reruns)) ;; an issue with prereqs not yet met?
(begin
(debug:print-info 1 *default-log-port* "no fails in prerequisites for " hed " but nothing seen running in a while, dropping test " hed " from the run queue")
(let ((test-id (rmt:get-test-id run-id hed "")))
(if test-id (mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "TIMED_OUT" "Nothing seen running in a while.")))
|
︙ | | | ︙ | |
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
|
(debug:print-info 0 *default-log-port* "Marking stuck tests as INCOMPLETE while waiting for run " run-id
". Running as pid " (current-process-id) " on " (get-host-name))
(set! last-time-incomplete (current-seconds)) ;; FIXME, this might be causing slow down - use of set!
(rmt:find-and-mark-incomplete run-id #f)
(debug:print-info 0 *default-log-port* "run-wait specified, waiting on " actual-num-running
" tests in RUNNING, REMOTEHOSTSTART or LAUNCHED state at "
(time->string (seconds->local-time (current-seconds))))))
(thread-sleep! 5)
(wait-loop (rmt:get-count-tests-running-for-run-id run-id #t) ;; fastmode=yes
num-running))))
;; LET* ((test-record
;; we get here on "drop through". All done!
;; this is moved to runs:run-testes since this function is getting called twice to ensure everthing is completed.
;; (debug:print-info 0 *default-log-port* "Calling Post Hook")
;; (runs:run-post-hook run-id)
|
|
|
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
|
(debug:print-info 0 *default-log-port* "Marking stuck tests as INCOMPLETE while waiting for run " run-id
". Running as pid " (current-process-id) " on " (get-host-name))
(set! last-time-incomplete (current-seconds)) ;; FIXME, this might be causing slow down - use of set!
(rmt:find-and-mark-incomplete run-id #f)
(debug:print-info 0 *default-log-port* "run-wait specified, waiting on " actual-num-running
" tests in RUNNING, REMOTEHOSTSTART or LAUNCHED state at "
(time->string (seconds->local-time (current-seconds))))))
(thread-sleep! 1) ;; (if (>= num-running max-concurrent-jobs) 5 1))
(wait-loop (rmt:get-count-tests-running-for-run-id run-id #t) ;; fastmode=yes
num-running))))
;; LET* ((test-record
;; we get here on "drop through". All done!
;; this is moved to runs:run-testes since this function is getting called twice to ensure everthing is completed.
;; (debug:print-info 0 *default-log-port* "Calling Post Hook")
;; (runs:run-post-hook run-id)
|
︙ | | | ︙ | |
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
|
(debug:print-info 2 *default-log-port* "new state " new-state ", new status " new-status )
(mt:test-set-state-status-by-id run-id test-id new-state new-status #f))
(if (not (null? tal))
(loop (car tal)(cdr tal))))
((run-wait)
;; BB TODO - manage has-subrun case
(debug:print-info 2 *default-log-port* "still waiting, " (length tests) " tests still running")
(thread-sleep! 10)
(let ((new-tests (proc-get-tests run-id)))
(if (null? new-tests)
(debug:print-info 1 *default-log-port* "Run completed according to zero tests matching provided criteria.")
(loop (car new-tests)(cdr new-tests)))))
((archive)
;; BB TODO - manage has-subrun case
(if (and run-dir (not toplevel-with-children))
|
|
|
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
|
(debug:print-info 2 *default-log-port* "new state " new-state ", new status " new-status )
(mt:test-set-state-status-by-id run-id test-id new-state new-status #f))
(if (not (null? tal))
(loop (car tal)(cdr tal))))
((run-wait)
;; BB TODO - manage has-subrun case
(debug:print-info 2 *default-log-port* "still waiting, " (length tests) " tests still running")
(thread-sleep! 5)
(let ((new-tests (proc-get-tests run-id)))
(if (null? new-tests)
(debug:print-info 1 *default-log-port* "Run completed according to zero tests matching provided criteria.")
(loop (car new-tests)(cdr new-tests)))))
((archive)
;; BB TODO - manage has-subrun case
(if (and run-dir (not toplevel-with-children))
|
︙ | | | ︙ | |