269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
|
;; (set-signal-handler! signal/int (lambda ()
;; Do not run the test if it is REMOVING, RUNNING, KILLREQ or REMOTEHOSTSTART,
;; Mark the test as REMOTEHOSTSTART *IMMEDIATELY*
;;
(let ((test-info (rmt:get-testinfo-state-status run-id test-id)))
(cond
((member (db:test-get-state test-info) '("INCOMPLETE" "KILLED" "UNKNOWN" "KILLREQ" "STUCK")) ;; prior run of this test didn't complete, go ahead and try to rerun
(debug:print 0 "INFO: test is INCOMPLETE or KILLED, treat this execute call as a rerun request")
(tests:test-force-state-status! run-id test-id "REMOTEHOSTSTART" "n/a")) ;; prime it for running
((not (member (db:test-get-state test-info) '("REMOVING" "REMOTEHOSTSTART" "RUNNING" "KILLREQ")))
(tests:test-force-state-status! run-id test-id "REMOTEHOSTSTART" "n/a"))
(else ;; (member (db:test-get-state test-info) '("REMOVING" "REMOTEHOSTSTART" "RUNNING" "KILLREQ"))
(debug:print 0 "ERROR: test state is " (db:test-get-state test-info) ", cannot proceed")
(exit))))
(debug:print 2 "Exectuing " test-name " (id: " test-id ") on " (get-host-name))
(set! keys (rmt:get-keys))
;; (runs:set-megatest-env-vars run-id inkeys: keys inkeyvals: keyvals) ;; these may be needed by the launching process
;; one of these is defunct/redundant ...
(if (not (launch:setup-for-run force: #t))
|
|
|
|
|
|
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
|
;; (set-signal-handler! signal/int (lambda ()
;; Do not run the test if it is REMOVING, RUNNING, KILLREQ or REMOTEHOSTSTART,
;; Mark the test as REMOTEHOSTSTART *IMMEDIATELY*
;;
(let ((test-info (rmt:get-testinfo-state-status run-id test-id)))
(cond
((member (db:test-state test-info) '("INCOMPLETE" "KILLED" "UNKNOWN" "KILLREQ" "STUCK")) ;; prior run of this test didn't complete, go ahead and try to rerun
(debug:print 0 "INFO: test is INCOMPLETE or KILLED, treat this execute call as a rerun request")
(tests:test-force-state-status! run-id test-id "REMOTEHOSTSTART" "n/a")) ;; prime it for running
((not (member (db:test-state test-info) '("REMOVING" "REMOTEHOSTSTART" "RUNNING" "KILLREQ")))
(tests:test-force-state-status! run-id test-id "REMOTEHOSTSTART" "n/a"))
(else ;; (member (db:test-state test-info) '("REMOVING" "REMOTEHOSTSTART" "RUNNING" "KILLREQ"))
(debug:print 0 "ERROR: test state is " (db:test-state test-info) ", cannot proceed")
(exit))))
(debug:print 2 "Exectuing " test-name " (id: " test-id ") on " (get-host-name))
(set! keys (rmt:get-keys))
;; (runs:set-megatest-env-vars run-id inkeys: keys inkeyvals: keyvals) ;; these may be needed by the launching process
;; one of these is defunct/redundant ...
(if (not (launch:setup-for-run force: #t))
|
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
|
(thread-join! th1)
(thread-sleep! 1) ;; givbe thread th1 a chance to be done TODO: Verify this is needed. At 0.1 I was getting fail to stop, increased to total of 1.1 sec.
(mutex-lock! m)
(let* ((item-path (item-list->path itemdat))
;; only state and status needed - use lazy routine
(testinfo (rmt:get-testinfo-state-status run-id test-id)))
;; Am I completed?
(if (member (db:test-get-state testinfo) '("REMOTEHOSTSTART" "RUNNING")) ;; NOTE: It should *not* be REMOTEHOSTSTART but for reasons I don't yet understand it sometimes gets stuck in that state ;; (not (equal? (db:test-get-state testinfo) "COMPLETED"))
(let ((new-state (if kill-job? "KILLED" "COMPLETED") ;; (if (eq? (vector-ref exit-info 2) 0) ;; exited with "good" status
;; "COMPLETED"
;; (db:test-get-state testinfo))) ;; else preseve the state as set within the test
)
(new-status (cond
((not (launch:einf-exit-status exit-info)) "FAIL") ;; job failed to run ... (vector-ref exit-info 1)
((eq? (launch:einf-rollup-status exit-info) 0) ;; (vector-ref exit-info 3)
;; if the current status is AUTO then defer to the calculated value (i.e. leave this AUTO)
(if (equal? (db:test-get-status testinfo) "AUTO") "AUTO" "PASS"))
((eq? (launch:einf-rollup-status exit-info) 1) "FAIL") ;; (vector-ref exit-info 3)
((eq? (launch:einf-rollup-status exit-info) 2) ;; (vector-ref exit-info 3)
;; if the current status is AUTO the defer to the calculated value but qualify (i.e. make this AUTO-WARN)
(if (equal? (db:test-get-status testinfo) "AUTO") "AUTO-WARN" "WARN"))
(else "FAIL")))) ;; (db:test-get-status testinfo)))
(debug:print-info 1 "Test exited in state=" (db:test-get-state testinfo) ", setting state/status based on exit code of " (launch:einf-exit-status exit-info) " and rollup-status of " (launch:einf-rollup-status exit-info))
(tests:test-set-status! run-id
test-id
new-state
new-status
(args:get-arg "-m") #f)
;; need to update the top test record if PASS or FAIL and this is a subtest
;; NO NEED TO CALL roll-up-pass-fail-counts HERE, THIS IS DONE IN roll-up-pass-fail-counts called by tests:test-set-status!
|
|
|
|
|
|
|
|
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
|
(thread-join! th1)
(thread-sleep! 1) ;; givbe thread th1 a chance to be done TODO: Verify this is needed. At 0.1 I was getting fail to stop, increased to total of 1.1 sec.
(mutex-lock! m)
(let* ((item-path (item-list->path itemdat))
;; only state and status needed - use lazy routine
(testinfo (rmt:get-testinfo-state-status run-id test-id)))
;; Am I completed?
(if (member (db:test-state testinfo) '("REMOTEHOSTSTART" "RUNNING")) ;; NOTE: It should *not* be REMOTEHOSTSTART but for reasons I don't yet understand it sometimes gets stuck in that state ;; (not (equal? (db:test-state testinfo) "COMPLETED"))
(let ((new-state (if kill-job? "KILLED" "COMPLETED") ;; (if (eq? (vector-ref exit-info 2) 0) ;; exited with "good" status
;; "COMPLETED"
;; (db:test-state testinfo))) ;; else preseve the state as set within the test
)
(new-status (cond
((not (launch:einf-exit-status exit-info)) "FAIL") ;; job failed to run ... (vector-ref exit-info 1)
((eq? (launch:einf-rollup-status exit-info) 0) ;; (vector-ref exit-info 3)
;; if the current status is AUTO then defer to the calculated value (i.e. leave this AUTO)
(if (equal? (db:test-status testinfo) "AUTO") "AUTO" "PASS"))
((eq? (launch:einf-rollup-status exit-info) 1) "FAIL") ;; (vector-ref exit-info 3)
((eq? (launch:einf-rollup-status exit-info) 2) ;; (vector-ref exit-info 3)
;; if the current status is AUTO the defer to the calculated value but qualify (i.e. make this AUTO-WARN)
(if (equal? (db:test-status testinfo) "AUTO") "AUTO-WARN" "WARN"))
(else "FAIL")))) ;; (db:test-status testinfo)))
(debug:print-info 1 "Test exited in state=" (db:test-state testinfo) ", setting state/status based on exit code of " (launch:einf-exit-status exit-info) " and rollup-status of " (launch:einf-rollup-status exit-info))
(tests:test-set-status! run-id
test-id
new-state
new-status
(args:get-arg "-m") #f)
;; need to update the top test record if PASS or FAIL and this is a subtest
;; NO NEED TO CALL roll-up-pass-fail-counts HERE, THIS IS DONE IN roll-up-pass-fail-counts called by tests:test-set-status!
|
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
|
;; tree is damaged or lost.
;;
(if (not (hash-table-ref/default *toptest-paths* testname #f))
(let* ((testinfo (rmt:get-test-info-by-id run-id test-id)) ;; run-id testname item-path))
(curr-test-path (if testinfo ;; (filedb:get-path *fdb*
;; (db:get-path dbstruct
;; (rmt:sdb-qry 'getstr
(db:test-get-rundir testinfo) ;; ) ;; )
#f)))
(hash-table-set! *toptest-paths* testname curr-test-path)
;; NB// Was this for the test or for the parent in an iterated test?
(rmt:general-call 'test-set-rundir-shortdir run-id lnkpath
(if (file-exists? lnkpath)
(resolve-pathname lnkpath)
lnkpath)
|
|
|
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
|
;; tree is damaged or lost.
;;
(if (not (hash-table-ref/default *toptest-paths* testname #f))
(let* ((testinfo (rmt:get-test-info-by-id run-id test-id)) ;; run-id testname item-path))
(curr-test-path (if testinfo ;; (filedb:get-path *fdb*
;; (db:get-path dbstruct
;; (rmt:sdb-qry 'getstr
(db:test-rundir testinfo) ;; ) ;; )
#f)))
(hash-table-set! *toptest-paths* testname curr-test-path)
;; NB// Was this for the test or for the parent in an iterated test?
(rmt:general-call 'test-set-rundir-shortdir run-id lnkpath
(if (file-exists? lnkpath)
(resolve-pathname lnkpath)
lnkpath)
|
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
|
(if hosts (set! hosts (string-split hosts)))
;; set the megatest to be called on the remote host
(if (not remote-megatest)(set! remote-megatest local-megatest)) ;; "megatest"))
(set! mt-bindir-path (pathname-directory remote-megatest))
(if launcher (set! launcher (string-split launcher)))
;; set up the run work area for this test
(if (and (args:get-arg "-preclean") ;; user has requested to preclean for this run
(not (member (db:test-get-rundir testinfo)(list "n/a" "/tmp/badname")))) ;; n/a is a placeholder and thus not a read dir
(begin
(debug:print-info 0 "attempting to preclean directory " (db:test-get-rundir testinfo) " for test " test-name "/" item-path)
(runs:remove-test-directory testinfo 'remove-data-only))) ;; remove data only, do not perturb the record
;; prevent overlapping actions - set to LAUNCHED as early as possible
;;
(tests:test-set-status! run-id test-id "LAUNCHED" "n/a" #f #f) ;; (if launch-results launch-results "FAILED"))
(rmt:roll-up-pass-fail-counts run-id test-name item-path #f "LAUNCHED")
(set! diskpath (get-best-disk *configdat* tconfig))
|
|
|
|
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
|
(if hosts (set! hosts (string-split hosts)))
;; set the megatest to be called on the remote host
(if (not remote-megatest)(set! remote-megatest local-megatest)) ;; "megatest"))
(set! mt-bindir-path (pathname-directory remote-megatest))
(if launcher (set! launcher (string-split launcher)))
;; set up the run work area for this test
(if (and (args:get-arg "-preclean") ;; user has requested to preclean for this run
(not (member (db:test-rundir testinfo)(list "n/a" "/tmp/badname")))) ;; n/a is a placeholder and thus not a read dir
(begin
(debug:print-info 0 "attempting to preclean directory " (db:test-rundir testinfo) " for test " test-name "/" item-path)
(runs:remove-test-directory testinfo 'remove-data-only))) ;; remove data only, do not perturb the record
;; prevent overlapping actions - set to LAUNCHED as early as possible
;;
(tests:test-set-status! run-id test-id "LAUNCHED" "n/a" #f #f) ;; (if launch-results launch-results "FAILED"))
(rmt:roll-up-pass-fail-counts run-id test-name item-path #f "LAUNCHED")
(set! diskpath (get-best-disk *configdat* tconfig))
|