Index: dashboard-tests.scm ================================================================== --- dashboard-tests.scm +++ dashboard-tests.scm @@ -32,10 +32,23 @@ ;; (declare (uses filedb)) (include "common_records.scm") (include "db_records.scm") (include "run_records.scm") + +;;====================================================================== +;; C O M M O N +;;====================================================================== + +(define (dtests:get-pre-command #!key (default-override #f)) + (let ((cfg-ovrd (configf:lookup *configdat* "dashboard" "pre-command"))) + (or cfg-ovrd default-override "xterm -geometry 180x20 -e \""))) + +(define (dtests:get-post-command #!key (default-override #f)) + (let ((cfg-ovrd (configf:lookup *configdat* "dashboard" "post-command"))) + (or cfg-ovrd default-override ";echo Press any key to continue;bash -c 'read -n 1 -s'\" &"))) + (define (test-info-panel testdat store-label widgets) (iup:frame #:title "Test Info" ; #:expand "YES" (iup:hbox ; #:expand "YES" @@ -609,55 +622,57 @@ ))))) lbl)) (store-button store-label) (command-text-box (iup:textbox #:expand "HORIZONTAL" #:font "Courier New, -10")) (command-launch-button (iup:button "Execute!" #:action (lambda (x) - (let ((cmd (iup:attribute command-text-box "VALUE"))) - (system (conc cmd " &")))))) + (let* ((cmd (iup:attribute command-text-box "VALUE")) + (fullcmd (conc (dtests:get-pre-command) + cmd + (dtests:get-post-command)))) + (debug:print-info 02 "Running command: " fullcmd) + (system fullcmd))))) (kill-jobs (lambda (x) (iup:attribute-set! command-text-box "VALUE" - (conc "xterm -geometry 180x20 -e \"megatest -target " keystring " -runname " runname + (conc "megatest -target " keystring " -runname " runname " -set-state-status KILLREQ,n/a -testpatt %/% " - ;; (conc testname "/" (if (equal? item-path "") "%" item-path)) - " -state RUNNING ;echo Press any key to continue;bash -c 'read -n 1 -s'\"")))) + " -state RUNNING")))) (run-test (lambda (x) (iup:attribute-set! command-text-box "VALUE" - (conc "xterm -geometry 180x20 -e \"megatest -target " keystring " -runname " runname + (conc "megatest -target " keystring " -runname " runname " -runtests " (conc testname "/" (if (equal? item-path "") "%" item-path)) - " ;echo Press any key to continue;bash -c 'read -n 1 -s'\"")))) + )))) (remove-test (lambda (x) (iup:attribute-set! command-text-box "VALUE" - (conc "xterm -geometry 180x20 -e \"megatest -remove-runs -target " keystring " -runname " runname + (conc "megatest -remove-runs -target " keystring " -runname " runname " -testpatt " (conc testname "/" (if (equal? item-path "") "%" item-path)) - " -v ;echo Press any key to continue;bash -c 'read -n 1 -s'\"")))) + " -v")))) (clean-run-execute (lambda (x) - (let ((cmd (conc "xterm -geometry 180x20 -e \"" - "megatest -remove-runs -target " keystring " -runname " runname + (let ((cmd (conc "megatest -remove-runs -target " keystring " -runname " runname " -testpatt " (conc testname "/" (if (equal? item-path "") "%" item-path)) ";megatest -target " keystring " -runname " runname " -runtests " (conc testname "/" (if (equal? item-path "") "%" item-path)) - " ;echo Press any key to continue;bash -c 'read -n 1 -s'\""))) + ))) (system (conc cmd " &"))))) (remove-test (lambda (x) (iup:attribute-set! command-text-box "VALUE" - (conc "xterm -geometry 180x20 -e \"megatest -remove-runs -target " keystring " -runname " runname + (conc "megatest -remove-runs -target " keystring " -runname " runname " -testpatt " (conc testname "/" (if (equal? item-path "") "%" item-path)) - " -v ;echo Press any key to continue;bash -c 'read -n 1 -s'\"")) + " -v")) ))) (cond ((not testdat)(begin (print "ERROR: bad test info for " test-id)(exit 1))) ((not rundat)(begin (print "ERROR: found test info but there is a problem with the run info for " run-id)(exit 1))) (else Index: dashboard.scm ================================================================== --- dashboard.scm +++ dashboard.scm @@ -1454,11 +1454,13 @@ (file-modification-time filen)) (glob (conc *dbdir* "/*.db"))))) (define (dashboard:run-update x) (let* ((modtime (dashboard:get-youngest-run-db-mod-time)) ;; (file-modification-time *db-file-path*)) - (monitor-modtime (file-modification-time *monitor-db-path*)) + (monitor-modtime (if (file-exists? *monitor-db-path*) + (file-modification-time *monitor-db-path*) + -1)) (run-update-time (current-seconds)) (recalc (dashboard:recalc modtime *please-update-buttons* *last-db-update-time*))) (if (and (eq? *current-tab-number* 0) (> monitor-modtime *last-monitor-update-time*)) (begin Index: db.scm ================================================================== --- db.scm +++ db.scm @@ -674,13 +674,14 @@ ;; select end_time-now from ;; (select testname,item_path,event_time+run_duration as ;; end_time,strftime('%s','now') as now from tests where state in ;; ('RUNNING','REMOTEHOSTSTART','LAUNCED')); - (define (db:find-and-mark-incomplete db #!key (ovr-deadtime #f)) (let* ((incompleted '()) + (oldlaunched '()) + (toplevels '()) (deadtime-str (configf:lookup *configdat* "setup" "deadtime")) (deadtime (if (and deadtime-str (string->number deadtime-str)) (string->number deadtime-str) 7200)) ;; two hours @@ -696,37 +697,68 @@ ;; ;; HOWEVER: this code in run:test seems to work fine ;; (> (- (current-seconds)(+ (db:test-get-event_time testdat) ;; (db:test-get-run_duration testdat))) ;; 600) + (db:delay-if-busy) (sqlite3:for-each-row - (lambda (test-id) - (set! incompleted (cons test-id incompleted))) + (lambda (test-id run-dir uname testname item-path) + (if (and (equal? uname "n/a") + (equal? item-path "")) ;; this is a toplevel test + ;; what to do with toplevel? call rollup? + (begin + (set! toplevels (cons (list test-id run-dir uname testname item-path run-id) toplevels)) + (debug:print-info 0 "Found old toplevel test in RUNNING state, test-id=" test-id)) + (set! incompleted (cons (list test-id run-dir uname testname item-path run-id) incompleted)))) db - "SELECT id FROM tests WHERE run_id=? AND (strftime('%s','now') - event_time - run_duration) > ? AND state IN ('RUNNING','REMOTEHOSTSTART');" - run-id deadtime) + "SELECT id,rundir,uname,testname,item_path FROM tests WHERE run_id=? AND (strftime('%s','now') - event_time) > 600 AND state IN ('RUNNING','REMOTEHOSTSTART');" + run-id) ;; in LAUNCHED for more than one day. Could be long due to job queues TODO/BUG: Need override for this in config ;; + (db:delay-if-busy) (sqlite3:for-each-row - (lambda (test-id) - (set! incompleted (cons test-id incompleted))) + (lambda (test-id run-dir uname testname item-path) + (if (and (equal? uname "n/a") + (equal? item-path "")) ;; this is a toplevel test + ;; what to do with toplevel? call rollup? + (set! toplevels (cons (list test-id run-dir uname testname item-path run-id) toplevels)) + (set! oldlaunched (cons (list test-id run-dir uname testname item-path run-id) oldlaunched)))) db - "SELECT id FROM tests WHERE run_id=? AND (strftime('%s','now') - event_time - run_duration) > ? AND state IN ('LAUNCHED');" - run-id (* 60 60 24))) + "SELECT id,rundir,uname,testname,item_path FROM tests WHERE run_id=? AND (strftime('%s','now') - event_time) > 86400 AND state IN ('LAUNCHED');" + run-id)) run-ids) - + ;; These are defunct tests, do not do all the overhead of set-state-status. Force them to INCOMPLETE. ;; - (if (> (length incompleted) 0) - (begin - (debug:print 0 "WARNING: Marking test(s); " (string-intersperse (map conc incompleted) ", ") " as INCOMPLETE") - (sqlite3:execute - db - (conc "UPDATE tests SET state='INCOMPLETE' WHERE id IN (" - (string-intersperse (map conc incompleted) ",") - ");")))))) + (db:delay-if-busy) + (let* ((min-incompleted (filter (lambda (x) + (let* ((testpath (cadr x)) + (tdatpath (conc testpath "/testdat.db")) + (dbexists (file-exists? tdatpath))) + (or (not dbexists) ;; if no file then something wrong - mark as incomplete + (> (- (current-seconds)(file-modification-time tdatpath)) 600)))) ;; no change in 10 minutes to testdat.db - she's dead Jim + incompleted)) + (min-incompleted-ids (map car min-incompleted)) + (all-ids (append min-incompleted-ids (map car oldlaunched)))) + (if (> (length all-ids) 0) + (begin + (debug:print 0 "WARNING: Marking test(s); " (string-intersperse (map conc all-ids) ", ") " as INCOMPLETE") + (sqlite3:execute + db + (conc "UPDATE tests SET state='INCOMPLETE' WHERE id IN (" + (string-intersperse (map conc all-ids) ",") + ");"))))) + + ;; Now do rollups for the toplevel tests + ;; + (for-each + (lambda (toptest) + (let ((test-name (list-ref toptest 3)) + (run-id (list-ref toptest 5))) + (cdb:top-test-set-per-pf-counts *runremote* run-id test-name))) + toplevels))) ;; Clean out old junk and vacuum the database ;; ;; Ultimately do something like this: ;; @@ -1476,10 +1508,12 @@ (let ((res 0)) (sqlite3:for-each-row (lambda (count) (set! res count)) (db:get-db dbstruct run-id) + ;; WARNING BUG EDIT ME - merged from v1.55 - not sure what is right here ... + ;; "SELECT count(id) FROM tests WHERE state in ('RUNNING','LAUNCHED','REMOTEHOSTSTART') AND run_id NOT IN (SELECT id FROM runs WHERE state='deleted') AND NOT (uname = 'n/a' AND item_path = '');") "SELECT count(id) FROM tests WHERE state in ('RUNNING','LAUNCHED','REMOTEHOSTSTART') AND run_id=?;" run-id) ;; NOT IN (SELECT id FROM runs WHERE state='deleted');") res)) ;; NEW BEHAVIOR: Look only at single run with run-id @@ -1487,13 +1521,13 @@ ;; (define (db:get-running-stats dbstruct run-id) (define (db:get-count-tests-running-for-run-id dbstruct run-id) (let ((res 0)) (sqlite3:for-each-row (lambda (count) - (set! res count)) + (set! res count)) ;; select * from tests where run_id=1 and uname = 'n/a' and item_path=''; (db:get-db dbstruct run-id) - "SELECT count(id) FROM tests WHERE state in ('RUNNING','LAUNCHED','REMOTEHOSTSTART') AND run_id=?;" run-id) + "SELECT count(id) FROM tests WHERE state in ('RUNNING','LAUNCHED','REMOTEHOSTSTART') AND run_id=? AND NOT (uname = 'n/a' AND item_path = '');" run-id) res)) (define (db:get-count-tests-running-in-jobgroup dbstruct run-id jobgroup) (if (not jobgroup) 0 ;; @@ -1513,10 +1547,13 @@ (set! res count)) (db:get-db dbstruct run-id) (conc "SELECT count(id) FROM tests WHERE state in ('RUNNING','LAUNCHED','REMOTEHOSTSTART') AND testname in ('" (string-intersperse testnames "','") "');"))) + ;; DEBUG FIXME - need to merge this v.155 query correctly + ;; AND testname in (SELECT testname FROM test_meta WHERE jobgroup=?) + ;; AND NOT (uname = 'n/a' AND item_path = '');" res))) ;; done with run when: ;; 0 tests in LAUNCHED, NOT_STARTED, REMOTEHOSTSTART, RUNNING (define (db:estimated-tests-remaining dbstruct run-id) @@ -1779,10 +1816,21 @@ (lambda (p) (set! res (cons p res))) (db:get-db dbstruct run-id) tstsqry) res)) + +(define (db:test-toplevel-num-items db run-id testname) + (let ((res 0)) + (sqlite3:for-each-row + (lambda (num-items) + (set! res num-items)) + db + "SELECT count(id) FROM tests WHERE run_id=? AND testname=? AND item_path != '' AND state NOT IN ('DELETED');" + run-id + testname) + res)) ;;====================================================================== ;; QUEUE UP META, TEST STATUS AND STEPS REMOTE ACCESS ;;====================================================================== @@ -2037,32 +2085,35 @@ ;; (proc (- remtries 1))))) ;; (apply sqlite3:execute db query params)) ;; (debug:print 0 "ERROR: too many attempts to access db were made and no sucess. query: " ;; query ", params: " params)))) ;; (proc remtries)) -(define (db:delay-if-busy #!key (count 5)) +(define (db:delay-if-busy #!key (count 6)) (let ((dbfj (conc *toppath* "/megatest.db-journal"))) (if (file-exists? dbfj) (case count + ((6) + (thread-sleep! 0.2) + (db:delay-if-busy count: 5)) ((5) - (thread-sleep! 0.1) + (thread-sleep! 0.4) (db:delay-if-busy count: 4)) ((4) - (thread-sleep! 0.4) + (thread-sleep! 0.8) (db:delay-if-busy count: 3)) ((3) - (thread-sleep! 1.0) + (thread-sleep! 1.6) (db:delay-if-busy count: 2)) ((2) - (thread-sleep! 2.0) + (thread-sleep! 3.2) (db:delay-if-busy count: 1)) ((1) - (thread-sleep! 5.0) + (thread-sleep! 6.4) (db:delay-if-busy count: 0)) (else (debug:print-info 0 "delaying db access due to high database load.") - (thread-sleep! 10)))))) + (thread-sleep! 12.8)))))) ;; (db:delay-if-busy) ;; (apply sqlite3:execute db query params))) ;; (db:delay-if-busy) (define (db:test-get-records-for-index-file dbstruct run-id test-name) Index: db_records.scm ================================================================== --- db_records.scm +++ db_records.scm @@ -92,10 +92,18 @@ (define-inline (db:test-set-state! vec val)(vector-set! vec 3 val)) (define-inline (db:test-set-status! vec val)(vector-set! vec 4 val)) (define-inline (db:test-set-run_duration! vec val)(vector-set! vec 12 val)) (define-inline (db:test-set-final_logf! vec val)(vector-set! vec 13 val)) +;; Test record utility functions + +;; Is a test a toplevel? +;; +(define (db:test-get-is-toplevel vec) + (and (equal? (db:test-get-item-path vec) "") ;; test is not an item + (equal? (db:test-get-uname vec) "n/a"))) ;; test has never been run + ;; make-vector-record "" db mintest id run_id testname state status event_time item_path ;; (define (make-db:mintest)(make-vector 7)) (define-inline (db:mintest-get-id vec) (vector-ref vec 0)) (define-inline (db:mintest-get-run_id vec) (vector-ref vec 1)) Index: launch.scm ================================================================== --- launch.scm +++ launch.scm @@ -689,18 +689,18 @@ (case (string->symbol exe) ((dboard) "../megatest") ((mtest) "../megatest") ((dashboard) "megatest") (else exe))))) - (test-sig (conc test-name ":" (item-list->path itemdat))) ;; test-path is the full path including the item-path + (item-path (item-list->path itemdat)) + (test-sig (conc test-name ":" item-path)) ;; (item-list->path itemdat))) ;; test-path is the full path including the item-path (work-area #f) (toptest-work-area #f) ;; for iterated tests the top test contains data relevant for all (diskpath #f) (cmdparms #f) (fullcmd #f) ;; (define a (with-output-to-string (lambda ()(write x)))) (mt-bindir-path #f) - (item-path (item-list->path itemdat)) ;; (test-id (cdb:remote-run db:get-test-id #f run-id test-name item-path)) (testinfo (rmt:get-test-info-by-id run-id test-id)) (mt_target (string-intersperse (map cadr keyvals) "/")) (debug-param (append (if (args:get-arg "-debug") (list "-debug" (args:get-arg "-debug")) '()) (if (args:get-arg "-logging")(list "-logging") '())))) @@ -709,10 +709,14 @@ ;; set the megatest to be called on the remote host (if (not remote-megatest)(set! remote-megatest local-megatest)) ;; "megatest")) (set! mt-bindir-path (pathname-directory remote-megatest)) (if launcher (set! launcher (string-split launcher))) ;; set up the run work area for this test + (if (args:get-arg "-preclean") ;; user has requested to preclean for this run + (begin + (debug:print-info 0 "attempting to preclean directory " (db:test-get-rundir testinfo) " for test " test-name "/" item-path) + (runs:remove-test-directory #f testinfo #t))) ;; remove data only, do not perturb the record (set! diskpath (get-best-disk *configdat*)) (if diskpath (let ((dat (create-work-area run-id run-info keyvals test-id test-path diskpath test-name itemdat))) (set! work-area (car dat)) (set! toptest-work-area (cadr dat)) Index: megatest.scm ================================================================== --- megatest.scm +++ megatest.scm @@ -70,10 +70,11 @@ -lock : lock run specified by target and runname -unlock : unlock run specified by target and runname -set-run-status status : sets status for run to status, requires -target and -runname -get-run-status : gets status for run specified by target and runname -run-wait : wait on run specified by target and runname + -preclean : remove the existing test directory before running the test Selectors (e.g. use for -runtests, -remove-runs, -set-state-status, -list-runs etc.) -target key1/key2/... : run for key1, key2, etc. -reqtarg key1/key2/... : run for key1, key2, etc. but key1/key2 must be in runconfig -testpatt patt1/patt2,patt3/... : % is wildcard @@ -232,10 +233,11 @@ "-set-values" "-load-test-data" "-summarize-items" "-gui" "-daemonize" + "-preclean" ;; misc "-archive" "-repl" "-lock" "-unlock" @@ -1193,23 +1195,10 @@ (debug:print 0 "Failed to setup, exiting") (exit 1))) (open-run-close db:find-and-mark-incomplete #f) (set! *didsomething* #t))) -;;====================================================================== -;; Wait on a run to complete -;;====================================================================== - -(if (args:get-arg "-run-wait") - (begin - (if (not (setup-for-run)) - (begin - (debug:print 0 "Failed to setup, exiting") - (exit 1))) - (operate-on 'run-wait) - (set! *didsomething* #t))) - ;;====================================================================== ;; Update the tests meta data from the testconfig files ;;====================================================================== (if (args:get-arg "-update-meta") @@ -1246,10 +1235,23 @@ (repl) (load (args:get-arg "-load"))) (db:close-all dbstruct)) (exit)) (set! *didsomething* #t))) + +;;====================================================================== +;; Wait on a run to complete +;;====================================================================== + +(if (args:get-arg "-run-wait") + (begin + (if (not (setup-for-run)) + (begin + (debug:print 0 "Failed to setup, exiting") + (exit 1))) + (operate-on 'run-wait) + (set! *didsomething* #t))) ;; Not converted to use dbstruct yet ;; (if (args:get-arg "-convert-to-norm") (let* ((toppath (setup-for-run)) Index: runs.scm ================================================================== --- runs.scm +++ runs.scm @@ -853,11 +853,11 @@ (max-concurrent-jobs (let ((mcj (config-lookup *configdat* "setup" "max_concurrent_jobs"))) (if (and mcj (string->number mcj)) (string->number mcj) 1))) ;; length of the register queue ahead (reglen (if (number? reglen-in) reglen-in 1)) - (last-time-incomplete (current-seconds)) + (last-time-incomplete (- (current-seconds) 900)) ;; force at least one clean up cycle (last-time-some-running (current-seconds))) ;; Initialize the test-registery hash with tests that already have a record ;; convert state to symbol and use that as the hash value (for-each (lambda (trec) @@ -872,17 +872,18 @@ (let loop ((hed (car sorted-test-names)) (tal (cdr sorted-test-names)) (reg '()) ;; registered, put these at the head of tal (reruns '())) + (if (not (null? reruns))(debug:print-info 4 "reruns=" reruns)) ;; Here we mark any old defunct tests as incomplete. Do this every fifteen minutes - ;; (if (> (current-seconds)(+ last-time-incomplete 900)) - ;; (begin - ;; (set! last-time-incomplete (current-seconds)) - ;; (cdb:remote-run db:find-and-mark-incomplete #f))) + (if (> (current-seconds)(+ last-time-incomplete 900)) + (begin + (set! last-time-incomplete (current-seconds)) + (cdb:remote-run db:find-and-mark-incomplete #f))) ;; (print "Top of loop, hed=" hed ", tal=" tal " ,reruns=" reruns) (let* ((test-record (hash-table-ref test-records hed)) (test-name (tests:testqueue-get-testname test-record)) (tconfig (tests:testqueue-get-testconfig test-record)) @@ -1304,11 +1305,11 @@ ;; 'remove-runs ;; 'set-state-status ;; ;; NB// should pass in keys? ;; -(define (runs:operate-on action target runnamepatt testpatt #!key (state #f)(status #f)(new-state-status #f)) +(define (runs:operate-on action target runnamepatt testpatt #!key (state #f)(status #f)(new-state-status #f)(remove-data-only #f)) (common:clear-caches) ;; clear all caches (let* ((db #f) (keys (rmt:get-keys)) (rundat (mt:get-runs-by-patt keys runnamepatt target)) (header (vector-ref rundat 0)) @@ -1375,75 +1376,47 @@ (let* ((item-path (db:test-get-item-path new-test-dat)) (test-name (db:test-get-testname new-test-dat)) (run-dir ;;(filedb:get-path *fdb* ;; (rmt:sdb-qry 'getid (db:test-get-rundir new-test-dat)) ;; ) ;; run dir is from the link tree - (real-dir (if (file-exists? run-dir) - (resolve-pathname run-dir) - #f)) (test-state (db:test-get-state new-test-dat)) - (test-fulln (db:test-get-fullname new-test-dat))) + (test-fulln (db:test-get-fullname new-test-dat)) + (uname (db:test-get-uname new-test-dat)) + (toplevel-with-children (and (db:test-get-is-toplevel test) + (> (cdb:remote-run db:test-toplevel-num-items db run-id test-name) 0)))) (case action ((remove-runs) - (debug:print-info 0 "test: " test-name " itest-state: " test-state) - (if (member test-state (list "RUNNING" "LAUNCHED" "REMOTEHOSTSTART" "KILLREQ")) - (begin - (if (not (hash-table-ref/default test-retry-time test-fulln #f)) - (begin - ;; want to set to REMOVING BUT CANNOT do it here? - (hash-table-set! test-retry-time test-fulln (current-seconds)))) - (if (> (- (current-seconds)(hash-table-ref test-retry-time test-fulln)) allow-run-time) - ;; This test is not in a correct state for cleaning up. Let's try some graceful shutdown steps first - ;; Set the test to "KILLREQ" and wait five seconds then try again. Repeat up to five times then give - ;; up and blow it away. - (begin - (debug:print 0 "WARNING: could not gracefully remove test " test-fulln ", tried to kill it to no avail. Forcing state to FAILEDKILL and continuing") - (mt:test-set-state-status-by-id run-id (db:test-get-id test) "FAILEDKILL" "n/a" #f) - (thread-sleep! 1)) - (begin + ;; if the test is a toplevel-with-children issue an error and do not remove + (if toplevel-with-children + (debug:print 0 "WARNING: skipping removal of " test-fulln " with run-id " run-id " as it has sub tests") + (begin + (debug:print-info 0 "test: " test-name " itest-state: " test-state) + (if (member test-state (list "RUNNING" "LAUNCHED" "REMOTEHOSTSTART" "KILLREQ")) + (begin + (if (not (hash-table-ref/default test-retry-time test-fulln #f)) + (begin + ;; want to set to REMOVING BUT CANNOT do it here? + (hash-table-set! test-retry-time test-fulln (current-seconds)))) + (if (> (- (current-seconds)(hash-table-ref test-retry-time test-fulln)) allow-run-time) + ;; This test is not in a correct state for cleaning up. Let's try some graceful shutdown steps first + ;; Set the test to "KILLREQ" and wait five seconds then try again. Repeat up to five times then give + ;; up and blow it away. + (begin + (debug:print 0 "WARNING: could not gracefully remove test " test-fulln ", tried to kill it to no avail. Forcing state to FAILEDKILL and continuing") + (mt:test-set-state-status-by-id run-id (db:test-get-id test) "FAILEDKILL" "n/a" #f) + (thread-sleep! 1)) + (begin (mt:test-set-state-status-by-id run-id (db:test-get-id test) "KILLREQ" "n/a" #f) - (thread-sleep! 1))) - ;; NOTE: This is suboptimal as the testdata will be used later and the state/status may have changed ... - (if (null? tal) - (loop new-test-dat tal) - (loop (car tal)(append tal (list new-test-dat))))) - (begin - (mt:test-set-state-status-by-id run-id (db:test-get-id test) "REMOVING" "LOCKED" #f) - (debug:print-info 1 "Attempting to remove " (if real-dir (conc " dir " real-dir " and ") "") " link " run-dir) - (if (and real-dir - (> (string-length real-dir) 5) - (file-exists? real-dir)) ;; bad heuristic but should prevent /tmp /home etc. - (begin ;; let* ((realpath (resolve-pathname run-dir))) - (debug:print-info 1 "Recursively removing " real-dir) - (if (file-exists? real-dir) - (runs:safe-delete-test-dir real-dir) - (debug:print 0 "WARNING: test dir " real-dir " appears to not exist or is not readable"))) - (if real-dir - (debug:print 0 "WARNING: directory " real-dir " does not exist") - (debug:print 0 "WARNING: no real directory corrosponding to link " run-dir ", nothing done"))) - (if (symbolic-link? run-dir) - (begin - (debug:print-info 1 "Removing symlink " run-dir) - (handle-exceptions - exn - (debug:print 0 "ERROR: Failed to remove symlink " run-dir ((condition-property-accessor 'exn 'message) exn) ", attempting to continue") - (delete-file run-dir))) - (if (directory? run-dir) - (if (> (directory-fold (lambda (f x)(+ 1 x)) 0 run-dir) 0) - (debug:print 0 "WARNING: refusing to remove " run-dir " as it is not empty") - (handle-exceptions - exn - (debug:print 0 "ERROR: Failed to remove directory " run-dir ((condition-property-accessor 'exn 'message) exn) ", attempting to continue") - (delete-directory run-dir))) - (if run-dir - (debug:print 0 "WARNING: not removing " run-dir " as it either doesn't exist or is not a symlink") - (debug:print 0 "NOTE: the run dir for this test is undefined. Test may have already been deleted.")) - )) - ;; Only delete the records *after* removing the directory. If things fail we have a record - (rmt:delete-test-records (db:test-get-run_id test)(db:test-get-id test)) - (if (not (null? tal)) - (loop (car tal)(cdr tal)))))) + (thread-sleep! 1))) + ;; NOTE: This is suboptimal as the testdata will be used later and the state/status may have changed ... + (if (null? tal) + (loop new-test-dat tal) + (loop (car tal)(append tal (list new-test-dat))))) + (begin + (runs:remove-test-directory db new-test-dat remove-data-only) + (if (not (null? tal)) + (loop (car tal)(cdr tal)))))))) ((set-state-status) (debug:print-info 2 "new state " (car state-status) ", new status " (cadr state-status)) (mt:test-set-state-status-by-id run-id (db:test-get-id test) (car state-status)(cadr state-status) #f) (if (not (null? tal)) (loop (car tal)(cdr tal)))) @@ -1475,10 +1448,51 @@ ))))) )) runs)) #t) +(define (runs:remove-test-directory db test remove-data-only) + (let* ((run-dir (db:test-get-rundir test)) ;; run dir is from the link tree + (real-dir (if (file-exists? run-dir) + (resolve-pathname run-dir) + #f))) + (if (not remove-data-only) + (mt:test-set-state-status-by-id (db:test-get-id test) "REMOVING" "LOCKED" #f)) + (debug:print-info 1 "Attempting to remove " (if real-dir (conc " dir " real-dir " and ") "") " link " run-dir) + (if (and real-dir + (> (string-length real-dir) 5) + (file-exists? real-dir)) ;; bad heuristic but should prevent /tmp /home etc. + (begin ;; let* ((realpath (resolve-pathname run-dir))) + (debug:print-info 1 "Recursively removing " real-dir) + (if (file-exists? real-dir) + (runs:safe-delete-test-dir real-dir) + (debug:print 0 "WARNING: test dir " real-dir " appears to not exist or is not readable"))) + (if real-dir + (debug:print 0 "WARNING: directory " real-dir " does not exist") + (debug:print 0 "WARNING: no real directory corrosponding to link " run-dir ", nothing done"))) + (if (symbolic-link? run-dir) + (begin + (debug:print-info 1 "Removing symlink " run-dir) + (handle-exceptions + exn + (debug:print 0 "ERROR: Failed to remove symlink " run-dir ((condition-property-accessor 'exn 'message) exn) ", attempting to continue") + (delete-file run-dir))) + (if (directory? run-dir) + (if (> (directory-fold (lambda (f x)(+ 1 x)) 0 run-dir) 0) + (debug:print 0 "WARNING: refusing to remove " run-dir " as it is not empty") + (handle-exceptions + exn + (debug:print 0 "ERROR: Failed to remove directory " run-dir ((condition-property-accessor 'exn 'message) exn) ", attempting to continue") + (delete-directory run-dir))) + (if run-dir + (debug:print 0 "WARNING: not removing " run-dir " as it either doesn't exist or is not a symlink") + (debug:print 0 "NOTE: the run dir for this test is undefined. Test may have already been deleted.")) + )) + ;; Only delete the records *after* removing the directory. If things fail we have a record + (if (not remove-data-only) + (cdb:remote-run db:delete-test-records db #f (db:test-get-id test))))) + ;;====================================================================== ;; Routines for manipulating runs ;;====================================================================== ;; Since many calls to a run require pretty much the same setup Index: tasks.scm ================================================================== --- tasks.scm +++ tasks.scm @@ -25,11 +25,13 @@ (define (tasks:open-db) (let* ((linktree (configf:lookup *configdat* "setup" "linktree")) (dbpath (conc linktree "/.db/monitor.db")) (exists (file-exists? dbpath)) (write-access (file-write-access? dbpath)) - (mdb (sqlite3:open-database dbpath)) ;; (never-give-up-open-db dbpath)) + (mdb (if (file-write-access? *toppath*) + (sqlite3:open-database dbpath) + (sqlite3:open-database ":memory:"))) ;; (never-give-up-open-db dbpath)) (handler (make-busy-timeout 36000))) (if (and exists (not write-access)) (set! *db-write-access* write-access)) ;; only unset so other db's also can use this control (sqlite3:set-busy-handler! mdb handler) Index: tests/Makefile ================================================================== --- tests/Makefile +++ tests/Makefile @@ -38,109 +38,109 @@ cd simplerun ; $(MEGATEST) -server - -debug $(DEBUG) test1 : cleanprep test2 : fullprep - cd fullrun;$(MEGATEST) -runtests ez_pass,runfirst/a/% -reqtarg ubuntu/nfs/none :runname $(RUNNAME) -debug $(DEBUG) $(LOGGING) - cd fullrun;megatest -runtests % -target ubuntu/nfs/none :runname $(RUNNAME)_01 -testpatt %/,%/ai -debug $(DEBUG) - cd fullrun;megatest -runtests %/,%/ai -target ubuntu/nfs/none :runname $(RUNAME)_02 -debug $(DEBUG) - cd fullrun;megatest -runtests runfirst/%,%/ai -target ubuntu/nfs/none :runname $(RUNNAME)_02 -debug $(DEBUG) + cd fullrun;$(MEGATEST) -preclean -runtests ez_pass,runfirst/a/% -reqtarg ubuntu/nfs/none :runname $(RUNNAME) -debug $(DEBUG) $(LOGGING) + cd fullrun;megatest -preclean -runtests % -target ubuntu/nfs/none :runname $(RUNNAME)_01 -testpatt %/,%/ai -debug $(DEBUG) + cd fullrun;megatest -preclean -runtests %/,%/ai -target ubuntu/nfs/none :runname $(RUNAME)_02 -debug $(DEBUG) + cd fullrun;megatest -preclean -runtests runfirst/%,%/ai -target ubuntu/nfs/none :runname $(RUNNAME)_02 -debug $(DEBUG) cd fullrun;megatest -runtests %/,%/winter -target ubuntu/nfs/none :runname $(RUNNAME)_03 -debug $(DEBUG) sleep 40;cd fullrun;megatest -target ubuntu/nfs/none :runname $(RUNNAME) -set-state-status COMPLETED,FORCED :state COMPLETED :status PASS -testpatt ez_p%s,runfirst/ -debug $(DEBUG) $(LOGGING) test3 : fullprep test3a test3b test3a : @echo Run runfirst and any waitons. - cd fullrun;$(MEGATEST) -runtests runfirst -reqtarg ubuntu/nfs/none :runname $(RUNNAME)_b + cd fullrun;$(MEGATEST) -preclean -runtests runfirst -reqtarg ubuntu/nfs/none :runname $(RUNNAME)_b test3b : @echo Run all_toplevel and all waitons - cd fullrun;$(MEGATEST) -runtests all_toplevel -reqtarg ubuntu/nfs/none :runname $(RUNNAME)_c + cd fullrun;$(MEGATEST) -preclean -runtests all_toplevel -reqtarg ubuntu/nfs/none :runname $(RUNNAME)_c test4 : cleanprep @echo "WARNING: No longer running fullprep, test converage may be lessened" - cd fullrun;time $(MEGATEST) -debug $(DEBUG) -runtests % -reqtarg ubuntu/nfs/none :runname $(RUNNAME)_b -m "This is a comment specific to a run" -v $(LOGGING) + cd fullrun;time $(MEGATEST) -debug $(DEBUG) -preclean -runtests % -reqtarg ubuntu/nfs/none :runname $(RUNNAME)_b -m "This is a comment specific to a run" -v $(LOGGING) test4a : cleanprep - cd fullrun;time $(MEGATEST) -debug $(DEBUG) -runtests all_toplevel -reqtarg ubuntu/nfs/none :runname $(RUNNAME)_b -m "This is a comment specific to a run" -v $(LOGGING) + cd fullrun;time $(MEGATEST) -debug $(DEBUG) -preclean -runtests all_toplevel -reqtarg ubuntu/nfs/none :runname $(RUNNAME)_b -m "This is a comment specific to a run" -v $(LOGGING) # NOTE: Only one instance can be a server test5 : cleanprep @echo "WARNING: No longer running fullprep, test converage may be lessened" - cd fullrun;sleep 0;$(MEGATEST) -runtests % -target $(TARGET) :runname $(RUNNAME)_aa -debug $(DEBUG) $(LOGGING) > aa.log 2> aa.log & - cd fullrun;sleep 3;$(MEGATEST) -runtests % -target $(TARGET) :runname $(RUNNAME)_ab -debug $(DEBUG) $(LOGGING) > ab.log 2> ab.log & - cd fullrun;sleep 6;$(MEGATEST) -runtests % -target $(TARGET) :runname $(RUNNAME)_ac -debug $(DEBUG) $(LOGGING) > ac.log 2> ac.log & - cd fullrun;sleep 9;$(MEGATEST) -runtests % -target $(TARGET) :runname $(RUNNAME)_ad -debug $(DEBUG) $(LOGGING) > ad.log 2> ad.log & - cd fullrun;sleep 12;$(MEGATEST) -runtests % -target $(TARGET) :runname $(RUNNAME)_ae -debug $(DEBUG) $(LOGGING) > ae.log 2> ae.log & - cd fullrun;sleep 15;$(MEGATEST) -runtests % -target $(TARGET) :runname $(RUNNAME)_af -debug $(DEBUG) $(LOGGING) > af.log 2> af.log & + cd fullrun;sleep 0;$(MEGATEST) -preclean -runtests % -target $(TARGET) :runname $(RUNNAME)_aa -debug $(DEBUG) $(LOGGING) > aa.log 2> aa.log & + cd fullrun;sleep 0;$(MEGATEST) -preclean -runtests % -target $(TARGET) :runname $(RUNNAME)_ab -debug $(DEBUG) $(LOGGING) > ab.log 2> ab.log & + cd fullrun;sleep 5;$(MEGATEST) -preclean -runtests % -target $(TARGET) :runname $(RUNNAME)_ac -debug $(DEBUG) $(LOGGING) > ac.log 2> ac.log & + cd fullrun;sleep 8;$(MEGATEST) -preclean -runtests % -target $(TARGET) :runname $(RUNNAME)_ad -debug $(DEBUG) $(LOGGING) > ad.log 2> ad.log & +# cd fullrun;sleep 0;$(MEGATEST) -preclean -runtests % -target $(TARGET) :runname $(RUNNAME)_ae -debug $(DEBUG) $(LOGGING) > ae.log 2> ae.log & +# cd fullrun;sleep 0;$(MEGATEST) -preclean -runtests % -target $(TARGET) :runname $(RUNNAME)_af -debug $(DEBUG) $(LOGGING) > af.log 2> af.log & # MUST ADD THIS BACK IN ASAP!!!! # cd fullrun;sleep 10;$(MEGATEST) -run-wait -target $(TARGET) :runname % -testpatt % :state RUNNING,LAUNCHED,NOT_STARTED,REMOTEHOSTSTART;echo ALL DONE test6: fullprep - cd fullrun;$(MEGATEST) -runtests runfirst -testpatt %/1 -reqtarg ubuntu/nfs/none :runname $(RUNNAME)_itempatt -v - cd fullrun;$(MEGATEST) -runtests runfirst -testpatt %blahha% -reqtarg ubuntu/nfs/none :runname $(RUNNAME)_itempatt -debug 10 + cd fullrun;$(MEGATEST) -preclean -runtests runfirst -testpatt %/1 -reqtarg ubuntu/nfs/none :runname $(RUNNAME)_itempatt -v + cd fullrun;$(MEGATEST) -preclean -runtests runfirst -testpatt %blahha% -reqtarg ubuntu/nfs/none :runname $(RUNNAME)_itempatt -debug 10 cd fullrun;$(MEGATEST) -rollup :runname newrun -target ubuntu/nfs/none -debug 10 test7: @echo Only a/c testname c should remain. If there is a run a/b/c then there is a cache issue. (cd simplerun; \ $(MEGATEST) -server - -daemonize; \ $(MEGATEST) -remove-runs -target %/% :runname % -testpatt %; \ - $(MEGATEST) -runtests % -target a/b :runname c; sleep 5; \ + $(MEGATEST) -preclean -runtests % -target a/b :runname c; sleep 5; \ $(MEGATEST) -remove-runs -target a/c :runname c; \ - $(MEGATEST) -runtests % -target a/c :runname c; \ + $(MEGATEST) -preclean -runtests % -target a/c :runname c; \ $(MEGATEST) -remove-runs -target a/b :runname c -testpatt % ; \ - $(MEGATEST) -runtests % -target a/d :runname c;$(MEGATEST) -list-runs %|egrep ^Run:) > test7.log 2> test7.log + $(MEGATEST) -preclean -runtests % -target a/d :runname c;$(MEGATEST) -list-runs %|egrep ^Run:) > test7.log 2> test7.log logpro test7.logpro test7.html < test7.log @echo @echo Run \"firefox test7.html\" to see the results. # This one failed with v1.55 test8a : - cd fullrun;$(MEGATEST) -runtests priority_10_waiton_1 -target ubuntu/nfs/none :runname $(RUNNAME)_waiton_single + cd fullrun;$(MEGATEST) -preclean -runtests priority_10_waiton_1 -target ubuntu/nfs/none :runname $(RUNNAME)_waiton_single test8 : test8a - cd fullrun;$(MEGATEST) -runtests lineitem_fail 1 -target ubuntu/nfs/none :runname $(RUNNAME)_singletest - cd fullrun;$(MEGATEST) -runtests runfirst/fall 1 -target ubuntu/nfs/none :runname $(RUNNAME)_singleitem - cd fullrun;$(MEGATEST) -runtests test_mt_vars/2 -target ubuntu/nfs/none :runname $(RUNNAME)_singleitem_waiton + cd fullrun;$(MEGATEST) -preclean -runtests lineitem_fail 1 -target ubuntu/nfs/none :runname $(RUNNAME)_singletest + cd fullrun;$(MEGATEST) -preclean -runtests runfirst/fall 1 -target ubuntu/nfs/none :runname $(RUNNAME)_singleitem + cd fullrun;$(MEGATEST) -preclean -runtests test_mt_vars/2 -target ubuntu/nfs/none :runname $(RUNNAME)_singleitem_waiton # Some simple checks for bootstrapping and run loop logic test9 : minsetup test9a test9b test9c test9d test9e test9a : @echo Run super-simple mintest e, no waitons. - cd mintest;$(MEGATEST) -runtests e -target $(VER) :runname `date +%H.%M.%S` -debug $(DEBUG) + cd mintest;$(MEGATEST) -preclean -runtests e -target $(VER) :runname `date +%H.%M.%S` -debug $(DEBUG) test9b : @echo Run simple mintest d with one waiton c - cd mintest;$(MEGATEST) -runtests d -target $(VER) :runname `date +%H.%M.%S` -debug $(DEBUG) + cd mintest;$(MEGATEST) -preclean -runtests d -target $(VER) :runname `date +%H.%M.%S` -debug $(DEBUG) test9c : @echo Run mintest a with full waiton chain a -> b -> c -> d -> e - cd mintest;$(MEGATEST) -runtests a -target $(VER) :runname `date +%H.%M.%S` -debug $(DEBUG) + cd mintest;$(MEGATEST) -preclean -runtests a -target $(VER) :runname `date +%H.%M.%S` -debug $(DEBUG) test9d : @echo Run an itemized test with no items - cd mintest;$(MEGATEST) -runtests g -target $(VER) :runname `date +%H.%M.%S` -debug $(DEBUG) + cd mintest;$(MEGATEST) -preclean -runtests g -target $(VER) :runname `date +%H.%M.%S` -debug $(DEBUG) test9e : @echo Run mintest a1 with full waiton chain with d1fail: a1 -> b1 -> c1 -> d1fail -> e1 - cd mintest;$(MEGATEST) -runtests a1 -target $(VER) :runname `date +%H.%M.%S` -debug $(DEBUG) + cd mintest;$(MEGATEST) -preclean -runtests a1 -target $(VER) :runname `date +%H.%M.%S` -debug $(DEBUG) test10 : @echo Run a bunch of different targets simultaneously (cd fullrun;$(MEGATEST) -server - ;sleep 2)& for targ in mint/btrfs/mintdir sunos/sshfs/loc; do \ - (cd fullrun;$(MEGATEST) -runtests priority_10_waiton_1 -target $$targ :runname $(RUNNAME) &); done + (cd fullrun;$(MEGATEST) -preclean -runtests priority_10_waiton_1 -target $$targ :runname $(RUNNAME) &); done for sys in ubuntu suse redhat debian;do \ for fs in afs nfs zfs; do \ for dpath in none tmp; do \ - (cd fullrun;$(MEGATEST) -runtests priority_10_waiton_1 -target $$sys/$$fs/$$dpath :runname $(RUNNAME) &);\ + (cd fullrun;$(MEGATEST) -preclean -runtests priority_10_waiton_1 -target $$sys/$$fs/$$dpath :runname $(RUNNAME) &);\ done;done;done test11 : cd fullrun;time (for a in 1 2 3 4 5 6 7 8 9 10 1 2 3 4 5 6 7 8 9 10 1 2 3 4 5 6 7 8 9 10 1 2 3 4 5 6 7 8 9 10 ;do (megatest -test-paths -target %/%/% > /dev/null ) & done; wait; ) Index: tests/fullrun/megatest.config ================================================================== --- tests/fullrun/megatest.config +++ tests/fullrun/megatest.config @@ -8,10 +8,14 @@ [refareas] area1 /tmp/oldarea/megatest [include config/mt_include_1.config] +[dashboard] +pre-command xterm -geometry 180x20 -e " +post-command |& tee results.log ;echo Press any key to continue;bash -c 'read -n 1 -s'" & + [misc] home #{shell readlink -f $MT_RUN_AREA_HOME} parent #{shell readlink -f $MT_RUN_AREA_HOME/..} [tests-paths]