375
376
377
378
379
380
381
382
383
384
385
386
387
388
|
(-
(current-seconds)
start-seconds)))))
(kill-tries 0))
;; (tests:set-full-meta-info #f test-id run-id (calc-minutes) work-area)
;; (tests:set-full-meta-info test-id run-id (calc-minutes) work-area)
(tests:set-full-meta-info #f test-id run-id (calc-minutes) work-area 10)
(let loop ((minutes (calc-minutes))
(cpu-load (alist-ref 'adj-core-load (common:get-normalized-cpu-load #f)))
(disk-free (get-df (current-directory)))
(last-sync (current-seconds)))
(BB> "launch:monitor-job - top of loop encountered at "(current-seconds)" with last-sync="last-sync)
(let* ((over-time (> (current-seconds) (+ last-sync update-period)))
(new-cpu-load (let* ((load (alist-ref 'adj-core-load (common:get-normalized-cpu-load #f)))
|
>
|
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
|
(-
(current-seconds)
start-seconds)))))
(kill-tries 0))
;; (tests:set-full-meta-info #f test-id run-id (calc-minutes) work-area)
;; (tests:set-full-meta-info test-id run-id (calc-minutes) work-area)
(tests:set-full-meta-info #f test-id run-id (calc-minutes) work-area 10)
(let loop ((minutes (calc-minutes))
(cpu-load (alist-ref 'adj-core-load (common:get-normalized-cpu-load #f)))
(disk-free (get-df (current-directory)))
(last-sync (current-seconds)))
(BB> "launch:monitor-job - top of loop encountered at "(current-seconds)" with last-sync="last-sync)
(let* ((over-time (> (current-seconds) (+ last-sync update-period)))
(new-cpu-load (let* ((load (alist-ref 'adj-core-load (common:get-normalized-cpu-load #f)))
|
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
|
(test-pid (db:test-get-process_id test-info)))
(cond
;; -mrw- I'm removing KILLREQ from this list so that a test in KILLREQ state is treated as a "do not run" flag.
((member (db:test-get-state test-info) '("INCOMPLETE" "KILLED" "UNKNOWN" "STUCK")) ;; prior run of this test didn't complete, go ahead and try to rerun
(debug:print 0 *default-log-port* "INFO: test is INCOMPLETE or KILLED, treat this execute call as a rerun request")
;; (tests:test-force-state-status! run-id test-id "REMOTEHOSTSTART" "n/a")
(db:general-call dbstruct 'set-test-start-time (list test-id))
(rmt:test-set-state-status run-id test-id "REMOTEHOSTSTART" "n/a" #f)
) ;; prime it for running
((member (db:test-get-state test-info) '("RUNNING" "REMOTEHOSTSTART"))
(if (process:alive-on-host? test-host test-pid)
(debug:print-error 0 *default-log-port* "test state is " (db:test-get-state test-info) " and process " test-pid " is still running on host " test-host ", cannot proceed")
(exit)))
((not (member (db:test-get-state test-info) '("REMOVING" "REMOTEHOSTSTART" "RUNNING" "KILLREQ")))
;; (tests:test-force-state-status! run-id test-id "REMOTEHOSTSTART" "n/a")
(db:general-call dbstruct 'set-test-start-time (list test-id))
(rmt:test-set-state-status run-id test-id "REMOTEHOSTSTART" "n/a" #f)
)
(else ;; (member (db:test-get-state test-info) '("REMOVING" "REMOTEHOSTSTART" "RUNNING" "KILLREQ"))
(debug:print-error 0 *default-log-port* "test state is " (db:test-get-state test-info) ", cannot proceed")
(exit))))
(debug:print 2 *default-log-port* "Executing " test-name " (id: " test-id ") on " (get-host-name))
|
|
|
|
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
|
(test-pid (db:test-get-process_id test-info)))
(cond
;; -mrw- I'm removing KILLREQ from this list so that a test in KILLREQ state is treated as a "do not run" flag.
((member (db:test-get-state test-info) '("INCOMPLETE" "KILLED" "UNKNOWN" "STUCK")) ;; prior run of this test didn't complete, go ahead and try to rerun
(debug:print 0 *default-log-port* "INFO: test is INCOMPLETE or KILLED, treat this execute call as a rerun request")
;; (tests:test-force-state-status! run-id test-id "REMOTEHOSTSTART" "n/a")
(rmt:general-call 'set-test-start-time #f test-id)
(rmt:test-set-state-status run-id test-id "REMOTEHOSTSTART" "n/a" #f)
) ;; prime it for running
((member (db:test-get-state test-info) '("RUNNING" "REMOTEHOSTSTART"))
(if (process:alive-on-host? test-host test-pid)
(debug:print-error 0 *default-log-port* "test state is " (db:test-get-state test-info) " and process " test-pid " is still running on host " test-host ", cannot proceed")
(exit)))
((not (member (db:test-get-state test-info) '("REMOVING" "REMOTEHOSTSTART" "RUNNING" "KILLREQ")))
;; (tests:test-force-state-status! run-id test-id "REMOTEHOSTSTART" "n/a")
(rmt:general-call 'set-test-start-time #f test-id)
(rmt:test-set-state-status run-id test-id "REMOTEHOSTSTART" "n/a" #f)
)
(else ;; (member (db:test-get-state test-info) '("REMOVING" "REMOTEHOSTSTART" "RUNNING" "KILLREQ"))
(debug:print-error 0 *default-log-port* "test state is " (db:test-get-state test-info) ", cannot proceed")
(exit))))
(debug:print 2 *default-log-port* "Executing " test-name " (id: " test-id ") on " (get-host-name))
|