Index: common.scm ================================================================== --- common.scm +++ common.scm @@ -71,10 +71,14 @@ ;; Awful. Please FIXME (define *env-vars-by-run-id* (make-hash-table)) (define *current-run-name* #f) +;; Testconfig and runconfig caches. +(define *testconfigs* (make-hash-table)) ;; test-id => testconfig +(define *runconfigs* (make-hash-table)) ;; target => runconfig + (define (common:clear-caches) (set! *target* (make-hash-table)) (set! *keys* (make-hash-table)) (set! *keyvals* (make-hash-table)) (set! *toptest-paths* (make-hash-table)) Index: configf.scm ================================================================== --- configf.scm +++ configf.scm @@ -141,11 +141,12 @@ (define (read-config path ht allow-system #!key (environ-patt #f)(curr-section #f)(sections #f)) (debug:print-info 5 "read-config " path " allow-system " allow-system " environ-patt " environ-patt " curr-section: " curr-section " sections: " sections " pwd: " (current-directory)) (if (not (file-exists? path)) (begin (debug:print-info 1 "read-config - file not found " path " current path: " (current-directory)) - (if (not ht)(make-hash-table) ht)) + ;; WARNING: This is a risky change but really, we should not return an empty hash table if no file read? + #f) ;; (if (not ht)(make-hash-table) ht)) (let ((inp (open-input-file path)) (res (if (not ht)(make-hash-table) ht))) (let loop ((inl (configf:read-line inp res allow-system)) ;; (read-line inp)) (curr-section-name (if curr-section curr-section "default")) (var-flag #f);; turn on for key-var-pr and cont-ln-rx, turn off elsewhere Index: db.scm ================================================================== --- db.scm +++ db.scm @@ -1107,13 +1107,10 @@ (cdb:client-call serverdat 'update-run-duration #t *default-numtries* minutes test-id)) (define (cdb:tests-update-uname-host serverdat test-id uname hostname) (cdb:client-call serverdat 'update-uname-host #t *default-numtries* uname hostname test-id)) -(define (db:process-triggers test-id newstate newstatus) - #t) - ;; speed up for common cases with a little logic ;; NB// Ultimately this will be deprecated in deference to mt:test-set-state-status-by-id ;; (define (db:test-set-state-status-by-id db test-id newstate newstatus newcomment) (cond @@ -1123,11 +1120,11 @@ (sqlite3:execute db "UPDATE tests SET state=?,status=? WHERE id=?;" newstate newstatus test-id)) (else (if newstate (sqlite3:execute db "UPDATE tests SET state=? WHERE id=?;" newstate test-id)) (if newstatus (sqlite3:execute db "UPDATE tests SET status=? WHERE id=?;" newstatus test-id)) (if newcomment (sqlite3:execute db "UPDATE tests SET comment=? WHERE id=?;" newcomment test-id)))) - (db:process-triggers test-id newstate newstatus)) + (mt:process-triggers test-id newstate newstatus)) ;; Never used ;; (define (db:test-set-state-status-by-run-id-testname db run-id test-name item-path status state) ;; (sqlite3:execute db "UPDATE tests SET state=?,status=?,event_time=strftime('%s','now') WHERE run_id=? AND testname=? AND item_path=?;" ;; state status run-id test-name item-path)) @@ -1586,11 +1583,13 @@ (define (cdb:get-test-info serverdat run-id test-name item-path) (cdb:client-call serverdat 'immediate #f *default-numtries* open-run-close db:get-test-info #f run-id test-name item-path)) (define (cdb:get-test-info-by-id serverdat test-id) - (cdb:client-call serverdat 'immediate #f *default-numtries* open-run-close db:get-test-info-by-id #f test-id)) + (let ((test-dat (cdb:client-call serverdat 'immediate #f *default-numtries* open-run-close db:get-test-info-by-id #f test-id))) + (hash-table-set! *test-info* test-id test-dat) ;; cached for use where up-to-date info is not needed + test-dat)) ;; db should be db open proc or #f (define (cdb:remote-run proc db . params) (apply cdb:client-call *runremote* 'immediate #f *default-numtries* open-run-close proc #f params)) Index: launch.scm ================================================================== --- launch.scm +++ launch.scm @@ -178,10 +178,11 @@ ;; then, if runscript ran ok (or did not get called) ;; do all the ezsteps (if any) (if ezsteps (let* ((testconfig (read-config (conc work-area "/testconfig") #f #t environ-patt: "pre-launch-env-vars")) ;; FIXME??? is allow-system ok here? (ezstepslst (hash-table-ref/default testconfig "ezsteps" '()))) + (hash-table-set! *testconfigs* test-id testconfig) ;; cached for lazy reads later ... (if (not (file-exists? ".ezsteps"))(create-directory ".ezsteps")) ;; if ezsteps was defined then we are sure to have at least one step but check anyway (if (not (> (length ezstepslst) 0)) (debug:print 0 "ERROR: ezsteps defined but ezstepslst is zero length") (let loop ((ezstep (car ezstepslst)) Index: mt.scm ================================================================== --- mt.scm +++ mt.scm @@ -86,10 +86,33 @@ (db:get-prereqs-not-met run-id waitons ref-item-path mode: mode)) (define (mt:get-run-stats) (cdb:remote-run db:get-run-stats #f)) +;;====================================================================== +;; T R I G G E R S +;;====================================================================== + +(define (mt:process-triggers test-id newstate newstatus) + (let* ((test-dat (mt:lazy-get-test-info-by-id test-id)) + (test-rundir (db:test-get-rundir test-dat)) + (tconfig #f)) + (if (and (file-exists? test-rundir) + (directory? test-rundir)) + (begin + (push-directory test-rundir) + (set! tconfig (mt:lazy-read-test-config test-dat)) + (pop-directory) + (for-each (lambda (trigger) + (let ((cmd (configf:lookup tconfig "triggers" trigger))) + (if cmd + (system (conc cmd " " test-id " " test-rundir " " trigger " 2&>1 " test-rundir "/last-trigger.log"))))) + (list + (conc newstate "/" newstatus) + (conc newstate "/") + (conc "/" newstatus))))))) + ;;====================================================================== ;; S T A T E A N D S T A T U S F O R T E S T S ;;====================================================================== (define (mt:roll-up-pass-fail-counts run-id test-name item-path status) @@ -112,6 +135,25 @@ (cdb:client-call *runremote* 'state-status #t *default-numtries* newstate newstatus test-id)) (else (if newstate (cdb:client-call *runremote* 'set-test-state #t *default-numtries* newstate test-id)) (if newstatus (cdb:client-call *runremote* 'set-test-status #t *default-numtries* newstatus test-id)) (if newcomment (cdb:client-call *runremote* 'set-test-comment #t *default-numtries* newcomment test-id)))) - (db:process-triggers test-id newstate newstatus)) + (mt:process-triggers test-id newstate newstatus) + #t) + +(define (mt:lazy-get-test-info-by-id test-id) + (let ((tdat (hash-table-ref/default *test-info* test-id #f))) + (if tdat + tdat + ;; no need to update *test-info* as that is done in cdb:get-test-info-by-id + (cdb:get-test-info-by-id *runremote* test-id)))) + +(define (mt:lazy-read-test-config test-dat) + (let* ((test-id (db:test-get-id test-dat)) + (test-rundir (db:test-get-rundir test-dat)) + (tconfig (hash-table-ref/default *testconfigs* test-id #f))) + (if tconfig + tconfig + (let ((newtcfg (read-config (conc test-rundir "/testconfig") #f #f))) ;; NOTE: Does NOT run [system ...] + (hash-table-set! *testconfigs* test-id newtcfg) + newtcfg)))) + Index: runs.scm ================================================================== --- runs.scm +++ runs.scm @@ -1005,92 +1005,97 @@ (test-retry-time (make-hash-table)) (allow-run-time 10)) ;; seconds to allow for killing tests before just brutally killing 'em (let loop ((test (car sorted-tests)) (tal (cdr sorted-tests))) (let* ((test-id (db:test-get-id test)) - (new-test-dat (cdb:remote-run db:get-test-info-by-id #f test-id)) - (item-path (db:test-get-item-path new-test-dat)) - (test-name (db:test-get-testname new-test-dat)) - (run-dir (db:test-get-rundir new-test-dat)) ;; run dir is from the link tree - (real-dir (if (file-exists? run-dir) - (resolve-pathname run-dir) - #f)) - (test-state (db:test-get-state new-test-dat)) - (test-fulln (db:test-get-fullname new-test-dat))) - (case action - ((remove-runs) - (debug:print-info 0 "test: " test-name " itest-state: " test-state) - (if (member test-state (list "RUNNING" "LAUNCHED" "REMOTEHOSTSTART" "KILLREQ")) - (begin - (if (not (hash-table-ref/default test-retry-time test-fulln #f)) - (begin - ;; want to set to REMOVING BUT CANNOT do it here? - (hash-table-set! test-retry-time test-fulln (current-seconds)))) - (if (> (- (current-seconds)(hash-table-ref test-retry-time test-fulln)) allow-run-time) - ;; This test is not in a correct state for cleaning up. Let's try some graceful shutdown steps first - ;; Set the test to "KILLREQ" and wait five seconds then try again. Repeat up to five times then give - ;; up and blow it away. - (begin - (debug:print 0 "WARNING: could not gracefully remove test " test-fulln ", tried to kill it to no avail. Forcing state to FAILEDKILL and continuing") - (mt:test-set-state-status-by-id (db:test-get-id test) "FAILEDKILL" "n/a" #f) - (thread-sleep! 1)) - (begin - (mt:test-set-state-status-by-id (db:test-get-id test) "KILLREQ" "n/a" #f) - (thread-sleep! 1))) - ;; NOTE: This is suboptimal as the testdata will be used later and the state/status may have changed ... - (if (null? tal) - (loop new-test-dat tal) - (loop (car tal)(append tal (list new-test-dat))))) - (begin - (mt:test-set-state-status-by-id (db:test-get-id test) "REMOVING" "LOCKED" #f) - (debug:print-info 1 "Attempting to remove " (if real-dir (conc " dir " real-dir " and ") "") " link " run-dir) - (if (and real-dir - (> (string-length real-dir) 5) - (file-exists? real-dir)) ;; bad heuristic but should prevent /tmp /home etc. - (begin ;; let* ((realpath (resolve-pathname run-dir))) - (debug:print-info 1 "Recursively removing " real-dir) - (if (file-exists? real-dir) - (runs:safe-delete-test-dir real-dir) - (debug:print 0 "WARNING: test dir " real-dir " appears to not exist or is not readable"))) - (if real-dir - (debug:print 0 "WARNING: directory " real-dir " does not exist") - (debug:print 0 "WARNING: no real directory corrosponding to link " run-dir ", nothing done"))) - (if (symbolic-link? run-dir) - (begin - (debug:print-info 1 "Removing symlink " run-dir) - (handle-exceptions - exn - (debug:print 0 "ERROR: Failed to remove symlink " run-dir ((condition-property-accessor 'exn 'message) exn) ", attempting to continue") - (delete-file run-dir))) - (if (directory? run-dir) - (if (> (directory-fold (lambda (f x)(+ 1 x)) 0 run-dir) 0) - (debug:print 0 "WARNING: refusing to remove " run-dir " as it is not empty") - (handle-exceptions - exn - (debug:print 0 "ERROR: Failed to remove directory " run-dir ((condition-property-accessor 'exn 'message) exn) ", attempting to continue") - (delete-directory run-dir))) - (if run-dir - (debug:print 0 "WARNING: not removing " run-dir " as it either doesn't exist or is not a symlink") - (debug:print 0 "NOTE: the run dir for this test is undefined. Test may have already been deleted.")) - )) - ;; Only delete the records *after* removing the directory. If things fail we have a record - (cdb:remote-run db:delete-test-records db #f (db:test-get-id test)) - (if (not (null? tal)) - (loop (car tal)(cdr tal)))))) - ((set-state-status) - (debug:print-info 2 "new state " (car state-status) ", new status " (cadr state-status)) - (mt:test-set-state-status-by-id (db:test-get-id test) (car state-status)(cadr state-status) #f) - (if (not (null? tal)) - (loop (car tal)(cdr tal)))) - ((run-wait) - (debug:print-info 2 "still waiting, " (length tests) " tests still running") - (thread-sleep! 10) - (let ((new-tests (proc-get-tests run-id))) - (if (null? new-tests) - (debug:print-info 1 "Run completed according to zero tests matching provided criteria.") - (loop (car new-tests)(cdr new-tests)))))))) - ))) + (new-test-dat (cdb:get-test-info-by-id *runremote* test-id))) + (if (not new-test-dat) + (begin + (debug:print 0 "ERROR: We have a test-id of " test-id " but no record was found. NOTE: No locking of records is done between processes, do not simultaneously remove the same run from two processes!") + (if (not (null? tal)) + (loop (car tal)(cdr tal)))) + (let* ((item-path (db:test-get-item-path new-test-dat)) + (test-name (db:test-get-testname new-test-dat)) + (run-dir (db:test-get-rundir new-test-dat)) ;; run dir is from the link tree + (real-dir (if (file-exists? run-dir) + (resolve-pathname run-dir) + #f)) + (test-state (db:test-get-state new-test-dat)) + (test-fulln (db:test-get-fullname new-test-dat))) + (case action + ((remove-runs) + (debug:print-info 0 "test: " test-name " itest-state: " test-state) + (if (member test-state (list "RUNNING" "LAUNCHED" "REMOTEHOSTSTART" "KILLREQ")) + (begin + (if (not (hash-table-ref/default test-retry-time test-fulln #f)) + (begin + ;; want to set to REMOVING BUT CANNOT do it here? + (hash-table-set! test-retry-time test-fulln (current-seconds)))) + (if (> (- (current-seconds)(hash-table-ref test-retry-time test-fulln)) allow-run-time) + ;; This test is not in a correct state for cleaning up. Let's try some graceful shutdown steps first + ;; Set the test to "KILLREQ" and wait five seconds then try again. Repeat up to five times then give + ;; up and blow it away. + (begin + (debug:print 0 "WARNING: could not gracefully remove test " test-fulln ", tried to kill it to no avail. Forcing state to FAILEDKILL and continuing") + (mt:test-set-state-status-by-id (db:test-get-id test) "FAILEDKILL" "n/a" #f) + (thread-sleep! 1)) + (begin + (mt:test-set-state-status-by-id (db:test-get-id test) "KILLREQ" "n/a" #f) + (thread-sleep! 1))) + ;; NOTE: This is suboptimal as the testdata will be used later and the state/status may have changed ... + (if (null? tal) + (loop new-test-dat tal) + (loop (car tal)(append tal (list new-test-dat))))) + (begin + (mt:test-set-state-status-by-id (db:test-get-id test) "REMOVING" "LOCKED" #f) + (debug:print-info 1 "Attempting to remove " (if real-dir (conc " dir " real-dir " and ") "") " link " run-dir) + (if (and real-dir + (> (string-length real-dir) 5) + (file-exists? real-dir)) ;; bad heuristic but should prevent /tmp /home etc. + (begin ;; let* ((realpath (resolve-pathname run-dir))) + (debug:print-info 1 "Recursively removing " real-dir) + (if (file-exists? real-dir) + (runs:safe-delete-test-dir real-dir) + (debug:print 0 "WARNING: test dir " real-dir " appears to not exist or is not readable"))) + (if real-dir + (debug:print 0 "WARNING: directory " real-dir " does not exist") + (debug:print 0 "WARNING: no real directory corrosponding to link " run-dir ", nothing done"))) + (if (symbolic-link? run-dir) + (begin + (debug:print-info 1 "Removing symlink " run-dir) + (handle-exceptions + exn + (debug:print 0 "ERROR: Failed to remove symlink " run-dir ((condition-property-accessor 'exn 'message) exn) ", attempting to continue") + (delete-file run-dir))) + (if (directory? run-dir) + (if (> (directory-fold (lambda (f x)(+ 1 x)) 0 run-dir) 0) + (debug:print 0 "WARNING: refusing to remove " run-dir " as it is not empty") + (handle-exceptions + exn + (debug:print 0 "ERROR: Failed to remove directory " run-dir ((condition-property-accessor 'exn 'message) exn) ", attempting to continue") + (delete-directory run-dir))) + (if run-dir + (debug:print 0 "WARNING: not removing " run-dir " as it either doesn't exist or is not a symlink") + (debug:print 0 "NOTE: the run dir for this test is undefined. Test may have already been deleted.")) + )) + ;; Only delete the records *after* removing the directory. If things fail we have a record + (cdb:remote-run db:delete-test-records db #f (db:test-get-id test)) + (if (not (null? tal)) + (loop (car tal)(cdr tal)))))) + ((set-state-status) + (debug:print-info 2 "new state " (car state-status) ", new status " (cadr state-status)) + (mt:test-set-state-status-by-id (db:test-get-id test) (car state-status)(cadr state-status) #f) + (if (not (null? tal)) + (loop (car tal)(cdr tal)))) + ((run-wait) + (debug:print-info 2 "still waiting, " (length tests) " tests still running") + (thread-sleep! 10) + (let ((new-tests (proc-get-tests run-id))) + (if (null? new-tests) + (debug:print-info 1 "Run completed according to zero tests matching provided criteria.") + (loop (car new-tests)(cdr new-tests)))))))) + ))))) ;; remove the run if zero tests remain (if (eq? action 'remove-runs) (let ((remtests (mt:get-tests-for-run (db:get-value-by-header run header "id") #f '("DELETED") '("n/a") not-in: #t))) (if (null? remtests) ;; no more tests remaining (let* ((dparts (string-split lasttpath "/"))