Index: common.scm ================================================================== --- common.scm +++ common.scm @@ -38,13 +38,15 @@ (import commonmod debugprint rmtmod (prefix mtargs args:)) - + +(define (remove-server-files directory-path) + (let ((files (glob (string-append directory-path "/server*")))) + (for-each delete-file* files))) (include "common_records.scm") - (define (remove-files filespec) (let ((files (glob filespec))) (for-each delete-file* files))) (define (stop-the-train) @@ -53,10 +55,11 @@ (if (and *toppath* (file-exists? (conc *toppath*"/stop-the-train"))) (let* ((msg (conc "ERROR: found file "*toppath*"/stop-the-train, exiting immediately"))) ;; yes, print to current-output-port AND *default-log-port*, annoying but necessary I think (print msg) + (remove-server-files (conc *toppath* "/logs")) (debug:print 0 *default-log-port* msg) (remove-files (conc *toppath* "/logs/server*")) (remove-files (conc *toppath* "/.servinfo/*")) (remove-files (conc *toppath* "/.mtdb/*lock")) (exit 1))) Index: megatest.scm ================================================================== --- megatest.scm +++ megatest.scm @@ -113,11 +113,13 @@ ;; (let ((debugcontrolf (conc (get-environment-variable "HOME") "/.megatestrc"))) (if (common:file-exists? debugcontrolf) (begin ;; for some reason, debug:print does not work here. Had to use print. - (print (conc "WARNING: loading " debugcontrolf)) + (with-output-to-port (current-error-port) + (lambda () + (print (conc "WARNING: loading " debugcontrolf)))) (load debugcontrolf) ) ) ) Index: portlogger.scm ================================================================== --- portlogger.scm +++ portlogger.scm @@ -15,10 +15,11 @@ ;; ;; You should have received a copy of the GNU General Public License ;; along with Megatest. If not, see . ;; +(require-extension (srfi 18) extras tcp s11n) (declare (unit portlogger)) (declare (uses debugprint)) (declare (uses dbmod)) Index: rmt.scm ================================================================== --- rmt.scm +++ rmt.scm @@ -790,5 +790,8 @@ (let* ((cfg-deadtime (configf:lookup-number *configdat* "setup" "deadtime")) (test-stats-update-period (configf:lookup-number *configdat* "setup" "test-stats-update-period"))) (rmt:find-and-mark-incomplete-engine run-id ovr-deadtime cfg-deadtime test-stats-update-period) ;;call end of eud of run detection for posthook (launch:end-of-run-check run-id))) + +;; orphaned from cherrypick merge +;; (debug:print 0 *default-log-port* "Inserting " (length tests-data) " tests in run " runname) Index: runs.scm ================================================================== --- runs.scm +++ runs.scm @@ -811,31 +811,21 @@ (debug:print-info 1 *default-log-port* "Adding \"" (string-intersperse required-tests " ") "\" to the run queue")) ;; NOTE: these are all parent tests, items are not expanded yet. (debug:print-info 4 *default-log-port* "test-records=" (hash-table->alist test-records)) (let ((reglen (configf:lookup *configdat* "setup" "runqueue"))) (if (> (length (hash-table-keys test-records)) 0) - (let* ((keep-going #t) - (run-queue-retries 5) - (run-ids (rmt:get-all-run-ids))) - #;(for-each (lambda (run-id) - (if keep-going - (handle-exceptions - exn - (debug:print 0 *default-log-port* "error in calling find-and-mark-incomplete for run-id " run-id ", exn=" exn) - (rmt:find-and-mark-incomplete run-id #f)))) ;; ovr-deadtime))) ;; could be root of https://hsdes.intel.com/appstore/article/#/220546828/main -- Title: Megatest jobs show DEAD even though they are still running (1.64/27) - run-ids) + (let* () (runs:run-tests-queue run-id runname test-records keyvals flags test-patts required-tests (any->number reglen) all-tests-registry) - (set! keep-going #f) (if (> run-count 0) ;; handle reruns (begin (if (not (hash-table-ref/default flags "-preclean" #f)) (hash-table-set! flags "-preclean" #t)) (if (not (hash-table-ref/default flags "-rerun" #f)) (hash-table-set! flags "-rerun" "ABORT,STUCK/DEAD,n/a,ZERO_ITEMS")) - (runs:run-tests target runname test-patts user flags run-count: (- run-count 1))) - (launch:end-of-run-check run-id))) + (runs:run-tests target runname test-patts user flags run-count: (- run-count 1))) + (launch:end-of-run-check run-id))) (debug:print-info 0 *default-log-port* "No tests to run"))) (debug:print-info 4 *default-log-port* "All done by here") ;; TODO: try putting post hook call here ; (debug:print-info 2 *default-log-port* " run-count " run-count) @@ -1381,12 +1371,16 @@ #f ;; yes, really (list (car tal)(cdr tal) reg reruns)) (begin (if (runs:lownoise (conc "FAILED prerequisites or other issue" hed) 60) (debug:print 0 *default-log-port* "WARNING: test " hed " has FAILED prerequisites or other issue. Internal state >" nth-try "< will be overridden and we'll retry.")) - ;; was: (mt:test-set-state-status-by-testname run-id test-name item-path "NOT_STARTED" "KEEP_TRYING" #f) - (mt:test-set-state-status-by-testname-unless-completed run-id test-name item-path "COMPLETED" "PREQ_FAIL" #f) + (let* ((test-id (rmt:get-test-id run-id testname item-path)) + (test-info (rmt:get-testinfo-state-status run-id test-id)) ;; we need *current* info + (status (db:test-status test-info))) + (if (equal? status "KEEP_TRYING") + (mt:test-set-state-status-by-testname-unless-completed run-id test-name item-path "COMPLETED" "PREQ_FAIL" #f) + (mt:test-set-state-status-by-testname run-id test-name item-path "NOT_STARTED" "KEEP_TRYING" #f))) (hash-table-set! test-registry hed 'removed) ;; was 0 (if (not (and (null? reg) (null? tal))) (runs:loop-values tal reg reglen regfull reruns) #f)))) (else