︙ | | | ︙ | |
27
28
29
30
31
32
33
34
35
36
37
38
39
40
|
(declare (uses items))
(declare (uses runconfig))
(declare (uses tests))
(declare (uses server))
(declare (uses mt))
(declare (uses archive))
;; (declare (uses filedb))
(include "common_records.scm")
(include "key_records.scm")
(include "db_records.scm")
(include "run_records.scm")
(include "test_records.scm")
|
>
>
>
>
>
>
>
>
>
>
>
>
|
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
|
(declare (uses items))
(declare (uses runconfig))
(declare (uses tests))
(declare (uses server))
(declare (uses mt))
(declare (uses archive))
;; (declare (uses filedb))
(declare (uses commonmod))
(import commonmod)
(declare (uses configfmod))
(import configfmod)
(declare (uses dbmod))
(import dbmod)
(declare (uses servermod))
(import servermod)
(include "common_records.scm")
(include "key_records.scm")
(include "db_records.scm")
(include "run_records.scm")
(include "test_records.scm")
|
︙ | | | ︙ | |
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
|
;; (rmt:find-and-mark-incomplete-all-runs))))) CAN'T INTERRUPT IT ...
(let ((run-ids (rmt:get-all-run-ids)))
(for-each (lambda (run-id)
(if keep-going
(handle-exceptions
exn
(debug:print 0 *default-log-port* "error in calling find-and-mark-incomplete for run-id " run-id ", exn=" exn)
(rmt:find-and-mark-incomplete run-id #f)))) ;; ovr-deadtime))) ;; could be root of https://hsdes.intel.com/appstore/article/#/220546828/main -- Title: Megatest jobs show DEAD even though they are still running (1.64/27)
run-ids)))
"runs: mark-incompletes")))
;; (thread-start! th1)
(thread-start! th2)
;; (thread-join! th1)
;; just do the main stuff in the main thread
(runs:run-tests-queue run-id runname test-records keyvals flags test-patts required-tests
|
>
>
>
|
|
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
|
;; (rmt:find-and-mark-incomplete-all-runs))))) CAN'T INTERRUPT IT ...
(let ((run-ids (rmt:get-all-run-ids)))
(for-each (lambda (run-id)
(if keep-going
(handle-exceptions
exn
(debug:print 0 *default-log-port* "error in calling find-and-mark-incomplete for run-id " run-id ", exn=" exn)
(rmt:find-and-mark-incomplete run-id #f)
(launch:end-of-run-check run-id)
))) ;; ovr-deadtime))) ;; could be root of https://hsdes.intel.com/appstore/article/#/220546828/main -- Title: Megatest jobs show DEAD even though they are still running (1.64/27)
run-ids)))
"runs: mark-incompletes")))
;; (thread-start! th1)
(thread-start! th2)
;; (thread-join! th1)
;; just do the main stuff in the main thread
(runs:run-tests-queue run-id runname test-records keyvals flags test-patts required-tests
|
︙ | | | ︙ | |
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
|
;; If no resources are available just kill time and loop again
;;
((not have-resources) ;; simply try again after waiting a second
(if (runs:lownoise "no resources" 60)
(debug:print-info 1 *default-log-port* "no resources to run new tests, waiting ..."))
;; Have gone back and forth on this but db starvation is an issue.
;; wait one second before looking again to run jobs.
(thread-sleep! 0.25)
;; could have done hed tal here but doing car/cdr of newtal to rotate tests
(list (car newtal)(cdr newtal) reg reruns))
;; This is the final stage, everything is in place so launch the test
;;
((and have-resources
(or (null? prereqs-not-met)
|
|
|
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
|
;; If no resources are available just kill time and loop again
;;
((not have-resources) ;; simply try again after waiting a second
(if (runs:lownoise "no resources" 60)
(debug:print-info 1 *default-log-port* "no resources to run new tests, waiting ..."))
;; Have gone back and forth on this but db starvation is an issue.
;; wait one second before looking again to run jobs.
(thread-sleep! 1) ;; changed back to 1 from 0.25
;; could have done hed tal here but doing car/cdr of newtal to rotate tests
(list (car newtal)(cdr newtal) reg reruns))
;; This is the final stage, everything is in place so launch the test
;;
((and have-resources
(or (null? prereqs-not-met)
|
︙ | | | ︙ | |
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
|
;; (debug:print 0 *default-log-port* "Got here eh! num-running=" num-running " (> num-running 0) " (> num-running 0))
(if (> (current-seconds)(+ last-time-incomplete 900))
(let ((actual-num-running (rmt:get-count-tests-running-for-run-id run-id)))
(debug:print-info 0 *default-log-port* "Marking stuck tests as INCOMPLETE while waiting for run " run-id
". Running as pid " (current-process-id) " on " (get-host-name))
(set! last-time-incomplete (current-seconds)) ;; FIXME, this might be causing slow down - use of set!
(rmt:find-and-mark-incomplete run-id #f)
(debug:print-info 0 *default-log-port* "run-wait specified, waiting on " actual-num-running
" tests in RUNNING, REMOTEHOSTSTART or LAUNCHED state at "
(time->string (seconds->local-time (current-seconds))))))
;; (if (runs:dat-load-mgmt-function runsdat)((runs:dat-load-mgmt-function runsdat)))
(thread-sleep! 5) ;; (if (>= num-running max-concurrent-jobs) 5 1))
(wait-loop (rmt:get-count-tests-running-for-run-id run-id)
num-running))))
|
>
>
|
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
|
;; (debug:print 0 *default-log-port* "Got here eh! num-running=" num-running " (> num-running 0) " (> num-running 0))
(if (> (current-seconds)(+ last-time-incomplete 900))
(let ((actual-num-running (rmt:get-count-tests-running-for-run-id run-id)))
(debug:print-info 0 *default-log-port* "Marking stuck tests as INCOMPLETE while waiting for run " run-id
". Running as pid " (current-process-id) " on " (get-host-name))
(set! last-time-incomplete (current-seconds)) ;; FIXME, this might be causing slow down - use of set!
(rmt:find-and-mark-incomplete run-id #f)
;;call end of eud of run detection for posthook
(launch:end-of-run-check run-id)
(debug:print-info 0 *default-log-port* "run-wait specified, waiting on " actual-num-running
" tests in RUNNING, REMOTEHOSTSTART or LAUNCHED state at "
(time->string (seconds->local-time (current-seconds))))))
;; (if (runs:dat-load-mgmt-function runsdat)((runs:dat-load-mgmt-function runsdat)))
(thread-sleep! 5) ;; (if (>= num-running max-concurrent-jobs) 5 1))
(wait-loop (rmt:get-count-tests-running-for-run-id run-id)
num-running))))
|
︙ | | | ︙ | |