Index: launch.scm ================================================================== --- launch.scm +++ launch.scm @@ -252,14 +252,18 @@ (> (/ delta df) 0.1)) ;; (> delta 200) ;; ignore changes under 200 Meg df #f))) (do-sync (or new-cpu-load new-disk-free over-time)) - (test-info (rmt:get-test-state-status-by-id run-id test-id)) - (state (car test-info));; (db:test-get-state test-info)) - (status (cdr test-info));; (db:test-get-status test-info)) - (killreq (equal? state "KILLREQ")) + ;; + ;; MOVE THIS TO A FILE FLAG BASED APPROACH (FOR NOW) + ;; + + ;; (test-info (rmt:get-test-state-status-by-id run-id test-id)) + ;; (state (car test-info));; (db:test-get-state test-info)) + ;; (status (cdr test-info));; (db:test-get-status test-info)) + (killreq (file-exists? (conc work-area"/kill-test"))) ;; (equal? state "KILLREQ")) (kill-reason "no kill reason specified") (kill-job? #f)) ;; (common:telemetry-log "zombie" (conc "launch:monitor-job - decision time encountered at "(current-seconds)" with last-sync="last-sync" do-sync="do-sync" over-time="over-time" update-period="update-period)) (cond (killreq @@ -266,21 +270,33 @@ (set! kill-reason "KILLING TEST since received kill request (KILLREQ)") (set! kill-job? #t)) ((and runtlim (> (- (current-seconds) start-seconds) runtlim)) (set! kill-reason (conc "KILLING TEST DUE TO TIME LIMIT EXCEEDED! Runtime=" (- (current-seconds) start-seconds) " seconds, limit=" runtlim)) (set! kill-job? #t)) - ((equal? status "DEAD") + #;((equal? status "DEAD") ;; NEED ALTERNATIVE MECHANISM FOR THIS. (tests:update-central-meta-info run-id test-id new-cpu-load new-disk-free (calc-minutes) #f #f) (rmt:set-state-status-and-roll-up-items run-id test-id 'foo "RUNNING" "n/a" "was marked dead; really still running.") ;;(set! kill-reason "KILLING TEST because it was marked as DEAD by launch:handle-zombie-tests (might indicate really overloaded server or else overzealous setup.deadtime)") ;; MARK RUNNING (set! kill-job? #f))) (debug:print 4 *default-log-port* "cpu: " new-cpu-load " disk: " new-disk-free " last-sync: " last-sync " do-sync: " do-sync) + + ;; revisit logic in zombie handling. + ;; (if (common:low-noise-print 600 "run zombie") ;; every five minutes is plenty (launch:handle-zombie-tests run-id)) + (when do-sync - (tests:update-central-meta-info run-id test-id new-cpu-load new-disk-free (calc-minutes) #f #f)) + ;; (tests:update-central-meta-info run-id test-id new-cpu-load new-disk-free (calc-minutes) #f #f) + + (let ((oup (open-output-file (conc work-area"/.run-logging-stats.csv") :append)) + (csv (conc run-id","test-id","new-cpu-load","new-disk-free","(calc-minutes)))) + (debug:print 0 *default-log-port* "Updating run log, csv="csv) + (with-output-to-port oup + (lambda () + (print csv))) + (close-output-port oup))) (if kill-job? (begin (debug:print-info 0 *default-log-port* "proceeding to kill test: "kill-reason) (mutex-lock! m)