︙ | | | ︙ | |
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
|
(set! tconfigreg (tests:get-all)) ;; mapping of testname => test source path
(let ((sighand (lambda (signum)
;; (signal-mask! signum) ;; to mask or not? seems to cause issues in exiting
(if (eq? signum signal/stop)
(debug:print-error 0 *default-log-port* "attempt to STOP process. Exiting."))
(set! *time-to-exit* #t)
(print "Received signal " signum ", cleaning up before exit (set this test to COMPLETED/ABORT) . Please wait...")
(let ((th1 (make-thread (lambda ()
(print "set test to COMPLETED/ABORT begin.")
(rmt:test-set-state-status run-id test-id "COMPLETED" "ABORT" "received kill signal")
(print "set test to COMPLETED/ABORT complete.")
(print "Killed by signal " signum ". Exiting")
(exit 1))))
(th2 (make-thread (lambda ()
(thread-sleep! 20)
(debug:print 0 *default-log-port* "Done")
(exit 4)))))
(thread-start! th2)
(thread-start! th1)
|
|
|
|
|
|
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
|
(set! tconfigreg (tests:get-all)) ;; mapping of testname => test source path
(let ((sighand (lambda (signum)
;; (signal-mask! signum) ;; to mask or not? seems to cause issues in exiting
(if (eq? signum signal/stop)
(debug:print-error 0 *default-log-port* "attempt to STOP process. Exiting."))
(set! *time-to-exit* #t)
(debug:print 0 *default-log-port* "Received signal " signum ", cleaning up before exit (set this test to COMPLETED/ABORT) . Please wait...")
(let ((th1 (make-thread (lambda ()
(debug:print 0 *default-log-port* "set test to COMPLETED/ABORT begin.")
(rmt:test-set-state-status run-id test-id "COMPLETED" "ABORT" "received kill signal")
(debug:print 0 *default-log-port* "set test to COMPLETED/ABORT complete.")
(debug:print 0 *default-log-port* "Killed by signal " signum ". Exiting")
(exit 1))))
(th2 (make-thread (lambda ()
(thread-sleep! 20)
(debug:print 0 *default-log-port* "Done")
(exit 4)))))
(thread-start! th2)
(thread-start! th1)
|
︙ | | | ︙ | |
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
|
(test-pid (db:test-get-process_id test-info)))
(cond
;; -mrw- I'm removing KILLREQ from this list so that a test in KILLREQ state is treated as a "do not run" flag.
((member (db:test-get-state test-info) '("INCOMPLETE" "KILLED" "UNKNOWN" "STUCK")) ;; prior run of this test didn't complete, go ahead and try to rerun
(debug:print 0 *default-log-port* "INFO: test is INCOMPLETE or KILLED, treat this execute call as a rerun request")
;; (tests:test-force-state-status! run-id test-id "REMOTEHOSTSTART" "n/a")
(rmt:general-call 'set-test-start-time #f test-id)
(rmt:test-set-state-status run-id test-id "REMOTEHOSTSTART" "n/a" #f)
) ;; prime it for running
((member (db:test-get-state test-info) '("RUNNING" "REMOTEHOSTSTART"))
(if (process:alive-on-host? test-host test-pid)
(debug:print-error 0 *default-log-port* "test state is " (db:test-get-state test-info) " and process " test-pid " is still running on host " test-host ", cannot proceed")
(exit 1)))
((member (db:test-get-state test-info) '("COMPLETED")) ;; we do NOT want to re-run COMPLETED jobs. Mark as NOT_STARTED to run!
(debug:print 0 *default-log-port* "test state is " (db:test-get-state test-info) ", cannot proceed")
(debug:print 0 *default-log-port* "exiting with status 1")
(exit 1))
((not (member (db:test-get-state test-info) '("REMOVING" "REMOTEHOSTSTART" "RUNNING" "KILLREQ")))
;; (tests:test-force-state-status! run-id test-id "REMOTEHOSTSTART" "n/a")
(rmt:general-call 'set-test-start-time #f test-id)
(rmt:test-set-state-status run-id test-id "REMOTEHOSTSTART" "n/a" #f))
(else ;; (member (db:test-get-state test-info) '("REMOVING" "REMOTEHOSTSTART" "RUNNING" "KILLREQ"))
(debug:print 0 *default-log-port* "test state is " (db:test-get-state test-info) ", cannot proceed")
(debug:print 0 *default-log-port* "exiting with status 1")
(exit 1))))
;; cleanup prior execution's steps
|
|
|
|
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
|
(test-pid (db:test-get-process_id test-info)))
(cond
;; -mrw- I'm removing KILLREQ from this list so that a test in KILLREQ state is treated as a "do not run" flag.
((member (db:test-get-state test-info) '("INCOMPLETE" "KILLED" "UNKNOWN" "STUCK")) ;; prior run of this test didn't complete, go ahead and try to rerun
(debug:print 0 *default-log-port* "INFO: test is INCOMPLETE or KILLED, treat this execute call as a rerun request")
;; (tests:test-force-state-status! run-id test-id "REMOTEHOSTSTART" "n/a")
(rmt:general-call 'set-test-start-time run-id test-id)
(rmt:test-set-state-status run-id test-id "REMOTEHOSTSTART" "n/a" #f)
) ;; prime it for running
((member (db:test-get-state test-info) '("RUNNING" "REMOTEHOSTSTART"))
(if (process:alive-on-host? test-host test-pid)
(debug:print-error 0 *default-log-port* "test state is " (db:test-get-state test-info) " and process " test-pid " is still running on host " test-host ", cannot proceed")
(exit 1)))
((member (db:test-get-state test-info) '("COMPLETED")) ;; we do NOT want to re-run COMPLETED jobs. Mark as NOT_STARTED to run!
(debug:print 0 *default-log-port* "test state is " (db:test-get-state test-info) ", cannot proceed")
(debug:print 0 *default-log-port* "exiting with status 1")
(exit 1))
((not (member (db:test-get-state test-info) '("REMOVING" "REMOTEHOSTSTART" "RUNNING" "KILLREQ")))
;; (tests:test-force-state-status! run-id test-id "REMOTEHOSTSTART" "n/a")
(rmt:general-call 'set-test-start-time run-id test-id)
(rmt:test-set-state-status run-id test-id "REMOTEHOSTSTART" "n/a" #f))
(else ;; (member (db:test-get-state test-info) '("REMOVING" "REMOTEHOSTSTART" "RUNNING" "KILLREQ"))
(debug:print 0 *default-log-port* "test state is " (db:test-get-state test-info) ", cannot proceed")
(debug:print 0 *default-log-port* "exiting with status 1")
(exit 1))))
;; cleanup prior execution's steps
|
︙ | | | ︙ | |
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
|
(list "MT_RUNNAME" runname)
(list "MT_MEGATEST" megatest)
(list "MT_TARGET" target)
(list "MT_LINKTREE" (common:get-linktree)) ;; (configf:lookup *configdat* "setup" "linktree"))
(list "MT_TESTSUITENAME" (common:get-testsuite-name))))
;;(bb-check-path msg: "launch:execute post block 3")
(if mt-bindir-path (setenv "PATH" (conc (getenv "PATH") ":" mt-bindir-path)))
;;(bb-check-path msg: "launch:execute post block 4")
;; (change-directory top-path)
;; Can setup as client for server mode now
;; (client:setup)
;; environment overrides are done *before* the remaining critical envars.
|
>
>
>
|
|
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
|
(list "MT_RUNNAME" runname)
(list "MT_MEGATEST" megatest)
(list "MT_TARGET" target)
(list "MT_LINKTREE" (common:get-linktree)) ;; (configf:lookup *configdat* "setup" "linktree"))
(list "MT_TESTSUITENAME" (common:get-testsuite-name))))
;;(bb-check-path msg: "launch:execute post block 3")
(let ((tmppath (getenv "PATH")))
(if (string-search tmppath " ")
(debug:print 0 *default-log-port* "WARNING: spaces in PATH are not supported."))
(if mt-bindir-path (setenv "PATH" (conc tmppath":"mt-bindir-path))))
;;(bb-check-path msg: "launch:execute post block 4")
;; (change-directory top-path)
;; Can setup as client for server mode now
;; (client:setup)
;; environment overrides are done *before* the remaining critical envars.
|
︙ | | | ︙ | |
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
|
(output (with-input-from-pipe cmd read-lines)))
(debug:print 2 *default-log-port* "Running " cmd " received " output)
(if (eq? (length output) 0)
#f
#t))
#t))
(define (launch:is-test-alive host pid)
(let* ((same-host (equal? host (get-host-name)))
(cmd (conc
(if same-host "" (conc "ssh "host" "))
"pstree -A "pid)))
(if (and host pid
(not (equal? host "n/a")))
(let* ((output (with-input-from-pipe cmd read-lines)))
(debug:print 2 *default-log-port* "Running " cmd " received " output)
(if (eq? (length output) 0)
#f
#t))
#t))) ;; assuming bad query is about a live test is likely not the right thing to do?
(define (launch:kill-tests-if-dead run-id)
|
>
>
>
>
>
|
>
|
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
|
(output (with-input-from-pipe cmd read-lines)))
(debug:print 2 *default-log-port* "Running " cmd " received " output)
(if (eq? (length output) 0)
#f
#t))
#t))
;; this is a close duplicate of:
;; process:alist-on-host?
;; process:alive
;;
(define (launch:is-test-alive host pid)
(let* ((same-host (equal? host (get-host-name)))
(cmd (conc
(if same-host "" (conc "ssh "host" "))
"pstree -A "pid)))
(if (and host pid
(not (equal? host "n/a")))
(let* ((output (if same-host
(with-input-from-pipe cmd read-lines)
(common:generic-ssh cmd read-lines '())))) ;; (with-input-from-pipe cmd read-lines)))
(debug:print 2 *default-log-port* "Running " cmd " received " output)
(if (eq? (length output) 0)
#f
#t))
#t))) ;; assuming bad query is about a live test is likely not the right thing to do?
(define (launch:kill-tests-if-dead run-id)
|
︙ | | | ︙ | |
908
909
910
911
912
913
914
915
916
917
918
919
920
921
|
;; side effects:
;; sets; *configdat* (megatest.config info)
;; *runconfigdat* (runconfigs.config info)
;; *configstatus* (status of the read data)
;;
(define (launch:setup #!key (force-reread #f) (areapath #f))
(mutex-lock! *launch-setup-mutex*)
(if (and *toppath*
(eq? *configstatus* 'fulldata) (not force-reread)) ;; got it all
(begin
(debug:print 2 *default-log-port* "NOTE: skipping launch:setup-body call since we have fulldata")
(mutex-unlock! *launch-setup-mutex*)
*toppath*)
(let ((res (launch:setup-body force-reread: force-reread areapath: areapath)))
|
>
>
>
>
>
>
|
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
|
;; side effects:
;; sets; *configdat* (megatest.config info)
;; *runconfigdat* (runconfigs.config info)
;; *configstatus* (status of the read data)
;;
(define (launch:setup #!key (force-reread #f) (areapath #f))
(mutex-lock! *launch-setup-mutex*)
;; this stops the train quickly for new processes
(if (and *toppath*
(file-exists? (conc *toppath*"/stop-the-train")))
(begin
(debug:print 0 *default-log-port* "ERROR: found file "*toppath*"/stop-the-train, exiting immediately")
(exit 1)))
(if (and *toppath*
(eq? *configstatus* 'fulldata) (not force-reread)) ;; got it all
(begin
(debug:print 2 *default-log-port* "NOTE: skipping launch:setup-body call since we have fulldata")
(mutex-unlock! *launch-setup-mutex*)
*toppath*)
(let ((res (launch:setup-body force-reread: force-reread areapath: areapath)))
|
︙ | | | ︙ | |
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
|
;; (loop (rmt:no-sync-get-lock lock-key) expire-time))))))
(item-path (item-list->path itemdat))
(contour #f)) ;; NOT READY FOR THIS (args:get-arg "-contour")))
(let loop ((delta (- (current-seconds) *last-launch*))
(launch-delay (configf:lookup-number *configdat* "setup" "launch-delay" default: 0)))
(if (> launch-delay delta)
(begin
(if (common:low-noise-print 1200 "test launch delay") ;; every two hours or so remind the user about launch delay.
(debug:print-info 0 *default-log-port* "NOTE: test launches are delayed by " launch-delay " seconds. See megatest.config launch-delay setting to adjust.")) ;; launch of " test-name " for " (- launch-delay delta) " seconds"))
(thread-sleep! (- launch-delay delta))
(loop (- (current-seconds) *last-launch*) launch-delay))))
(change-directory *toppath*)
(alist->env-vars ;; consolidate this code with the code in megatest.scm for "-execute", *maybe* - the longer they are set the longer each launch takes (must be non-overlapping with the vars)
(append
(list
(list "MT_RUN_AREA_HOME" *toppath*)
|
|
|
|
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
|
;; (loop (rmt:no-sync-get-lock lock-key) expire-time))))))
(item-path (item-list->path itemdat))
(contour #f)) ;; NOT READY FOR THIS (args:get-arg "-contour")))
(let loop ((delta (- (current-seconds) *last-launch*))
(launch-delay (configf:lookup-number *configdat* "setup" "launch-delay" default: 0)))
(if (> launch-delay delta)
(begin
;; (if (common:low-noise-print 1200 "test launch delay") ;; every two hours or so remind the user about launch delay.
;; (debug:print-info 0 *default-log-port* "NOTE: test launches are delayed by " launch-delay " seconds. See megatest.config launch-delay setting to adjust.")) ;; launch of " test-name " for " (- launch-delay delta) " seconds"))
(thread-sleep! (- launch-delay delta))
(loop (- (current-seconds) *last-launch*) launch-delay))))
(change-directory *toppath*)
(alist->env-vars ;; consolidate this code with the code in megatest.scm for "-execute", *maybe* - the longer they are set the longer each launch takes (must be non-overlapping with the vars)
(append
(list
(list "MT_RUN_AREA_HOME" *toppath*)
|
︙ | | | ︙ | |
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
|
(apply print launch-results)
(print "NOTE: launched \"" fullcmd "\"\n but did not wait for it to proceed. Add the following to megatest.config \n[setup]\nlaunchwait yes\n if you have problems with this"))
#:append))
(debug:print 2 *default-log-port* "Launching completed, updating db")
(debug:print 2 *default-log-port* "Launch results: " launch-results)
(if (not launch-results)
(begin
(print "ERROR: Failed to run " (string-intersperse fullcmd " ") ", exiting now")
;; (sqlite3:finalize! db)
;; good ole "exit" seems not to work
;; (_exit 9)
;; but this hack will work! Thanks go to Alan Post of the Chicken email list
;; NB// Is this still needed? Should be safe to go back to "exit" now?
(process-signal (current-process-id) signal/kill)
))
|
|
|
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
|
(apply print launch-results)
(print "NOTE: launched \"" fullcmd "\"\n but did not wait for it to proceed. Add the following to megatest.config \n[setup]\nlaunchwait yes\n if you have problems with this"))
#:append))
(debug:print 2 *default-log-port* "Launching completed, updating db")
(debug:print 2 *default-log-port* "Launch results: " launch-results)
(if (not launch-results)
(begin
(debug:print 0 *default-log-port* "ERROR: Failed to run " (string-intersperse fullcmd " ") ", exiting now")
;; (sqlite3:finalize! db)
;; good ole "exit" seems not to work
;; (_exit 9)
;; but this hack will work! Thanks go to Alan Post of the Chicken email list
;; NB// Is this still needed? Should be safe to go back to "exit" now?
(process-signal (current-process-id) signal/kill)
))
|
︙ | | | ︙ | |