Index: api.scm ================================================================== --- api.scm +++ api.scm @@ -72,13 +72,13 @@ ;; TESTS test-set-state-status-by-id delete-test-records delete-old-deleted-test-records - test-set-status-state + test-set-state-status test-set-top-process-pid - roll-up-pass-fail-counts + set-state-status-and-roll-up-items update-pass-fail-counts top-test-set-per-pf-counts ;; (db:top-test-set-per-pf-counts (db:get-db *db* 5) 5 "runfirst") ;; RUNS register-run @@ -140,13 +140,13 @@ ;; TESTS ((test-set-state-status-by-id) (apply db:test-set-state-status-by-id dbstruct params)) ((delete-test-records) (apply db:delete-test-records dbstruct params)) ((delete-old-deleted-test-records) (apply db:delete-old-deleted-test-records dbstruct params)) - ((test-set-status-state) (apply db:test-set-status-state dbstruct params)) + ((test-set-state-status) (apply db:test-set-state-status dbstruct params)) ((test-set-top-process-pid) (apply db:test-set-top-process-pid dbstruct params)) - ((roll-up-pass-fail-counts) (apply db:roll-up-pass-fail-counts dbstruct params)) + ((set-state-status-and-roll-up-items) (apply db:set-state-status-and-roll-up-items dbstruct params)) ;; ((update-pass-fail-counts) (apply db:general-call dbstruct 'update-pass-fail-counts params)) ((top-test-set-per-pf-counts) (apply db:top-test-set-per-pf-counts dbstruct params)) ((test-set-archive-block-id) (apply db:test-set-archive-block-id dbstruct params)) ;; RUNS Index: common.scm ================================================================== --- common.scm +++ common.scm @@ -97,10 +97,11 @@ (define *db-multi-sync-mutex* (make-mutex)) ;; protect access to *db-sync-in-progress*, *db-last-sync* ;; task db (define *task-db* #f) ;; (vector db path-to-db) (define *db-access-allowed* #t) ;; flag to allow access (define *db-access-mutex* (make-mutex)) +(define *db-transaction-mutex* (make-mutex)) (define *db-cache-path* #f) ;; SERVER (define *my-client-signature* #f) (define *transport-type* 'http) ;; override with [server] transport http|rpc|nmsg Index: dashboard-tests.scm ================================================================== --- dashboard-tests.scm +++ dashboard-tests.scm @@ -271,13 +271,14 @@ (iup:frame #:title "Set fields" (iup:vbox (iup:hbox (iup:label "Comment:") (let ((txtbox (iup:textbox #:action (lambda (val a b) - (rmt:test-set-state-status-by-id run-id test-id #f #f b) + ;; (rmt:test-set-state-status-by-id run-id test-id #f #f b) + (rmt:test-set-state-status run-id test-id #f #f b) ;; IDEA: Just set a variable with the proc to call? - (rmt:test-set-state-status-by-id run-id test-id #f #f b) + ;; (rmt:test-set-state-status-by-id run-id test-id #f #f b) (set! newcomment b)) #:value (db:test-get-comment testdat) #:expand "HORIZONTAL"))) (set! wtxtbox txtbox) txtbox)) @@ -287,11 +288,11 @@ (let* ((btns (map (lambda (state) (let ((btn (iup:button state #:expand "HORIZONTAL" #:size "50x" #:font "Courier New, -10" #:action (lambda (x) ;; (rmt:test-set-state-status-by-id run-id test-id state #f #f) - (rmt:roll-up-pass-fail-counts run-id test-id #f state #f #f) ;; test-name passed in as test-id is respected + (rmt:set-state-status-and-roll-up-items run-id test-id #f state #f #f) ;; test-name passed in as test-id is respected (db:test-set-state! testdat state))))) btn)) (map cadr *common:std-states*)))) ;; (list "COMPLETED" "NOT_STARTED" "RUNNING" "REMOTEHOSTSTART" "LAUNCHED" "KILLED" "KILLREQ")))) (vector-set! *state-status* 0 (lambda (state color) @@ -321,11 +322,11 @@ (if (not *dashboard-comment-share-slot*) (set! *dashboard-comment-share-slot* wtxtbox))) )))) (begin ;; (rmt:test-set-state-status-by-id run-id test-id #f status #f) - (rmt:roll-up-pass-fail-counts run-id test-id #f #f status #f) ;; test-name passed in as test-id is respected + (rmt:set-state-status-and-roll-up-items run-id test-id #f #f status #f) ;; test-name passed in as test-id is respected (db:test-set-status! testdat status)))))))) btn)) (map cadr *common:std-statuses*)))) ;; (list "PASS" "WARN" "FAIL" "CHECK" "n/a" "WAIVED" "SKIP")))) (vector-set! *state-status* 1 (lambda (status color) @@ -402,11 +403,12 @@ (let ((comment (iup:attribute comnt "VALUE")) (test-id (db:test-get-id testdat))) (if (or (not wpatt) (string-match wregx comment)) (begin - (rmt:test-set-state-status-by-id run-id test-id #f "WAIVED" comment) + ;; (rmt:test-set-state-status-by-id run-id test-id #f "WAIVED" comment) + (rmt:test-set-state-status-by run-id test-id #f "WAIVED" comment) (db:test-set-status! testdat "WAIVED") (cmtcmd comment) (iup:destroy! dlog)))))) (iup:button "Cancel" #:expand "HORIZONTAL" Index: db.scm ================================================================== --- db.scm +++ db.scm @@ -626,11 +626,11 @@ ;; first pass implementation, just insert all changed rows (for-each (lambda (targdb) (let* ((db (db:dbdat-get-db targdb)) (stmth (sqlite3:prepare db full-ins))) - ;; (db:delay-if-busy targdb) ;; NO WAITING + (db:delay-if-busy targdb) ;; NO WAITING (for-each (lambda (fromdat-lst) (sqlite3:with-transaction db (lambda () @@ -1500,11 +1500,11 @@ (if (> (length all-ids) 0) (begin (debug:print 0 *default-log-port* "WARNING: Marking test(s); " (string-intersperse (map conc all-ids) ", ") " as INCOMPLETE") (for-each (lambda (test-id) - (db:test-set-status-state dbstruct run-id test-id "COMPLETE" "DEAD" "Test failed to complete")) + (db:test-set-state-status dbstruct run-id test-id "COMPLETE" "DEAD" "Test failed to complete")) all-ids)))))) ;; ALL REPLACED BY THE BLOCK ABOVE ;; ;; (sqlite3:execute @@ -1961,16 +1961,19 @@ ;; Update run_stats for given run_id ;; input data is a list (state status count) ;; (define (db:update-run-stats dbstruct run-id stats) + ;; (mutex-lock! *db-transaction-mutex*) (db:with-db dbstruct #f #f + (lambda (db) ;; remove previous data + (let* ((stmt1 (sqlite3:prepare db "DELETE FROM run_stats WHERE run_id=? AND state=? AND status=?;")) (stmt2 (sqlite3:prepare db "INSERT INTO run_stats (run_id,state,status,count) VALUES (?,?,?,?);")) (res (sqlite3:with-transaction db @@ -1980,10 +1983,11 @@ (sqlite3:execute stmt1 run-id (car dat)(cadr dat)) (apply sqlite3:execute stmt2 run-id dat)) stats))))) (sqlite3:finalize! stmt1) (sqlite3:finalize! stmt2) + ;; (mutex-unlock! *db-transaction-mutex*) res)))) (define (db:get-main-run-stats dbstruct run-id) (db:with-db dbstruct @@ -2483,18 +2487,19 @@ run-id #t (lambda (db) (let ((test-id (db:get-test-id dbstruct run-id testname ""))) (sqlite3:execute db qry newstate newstatus run-id testname) - (if test-id (mt:process-triggers run-id test-id newstate newstatus))) + (if test-id (mt:process-triggers dbstruct run-id test-id newstate newstatus)) + ) )))) testnames)) -;; speed up for common cases with a little logic -;; NB// Ultimately this will be deprecated in deference to mt:test-set-state-status-by-id -;; -(define (db:test-set-state-status-by-id dbstruct run-id test-id newstate newstatus newcomment) +;; ;; speed up for common cases with a little logic +;; ;; NB// Ultimately this will be deprecated in deference to mt:test-set-state-status-by-id +;; ;; +(define (db:test-set-state-status dbstruct run-id test-id newstate newstatus newcomment) (db:with-db dbstruct run-id #t (lambda (db) @@ -2507,11 +2512,11 @@ (else (if newstate (sqlite3:execute db "UPDATE tests SET state=? WHERE id=?;" newstate test-id)) (if newstatus (sqlite3:execute db "UPDATE tests SET status=? WHERE id=?;" newstatus test-id)) (if newcomment (sqlite3:execute db "UPDATE tests SET comment=? WHERE id=?;" newcomment ;; (sdb:qry 'getid newcomment) test-id)))) - (mt:process-triggers run-id test-id newstate newstatus)))) + (mt:process-triggers dbstruct run-id test-id newstate newstatus)))) ;; NEW BEHAVIOR: Count tests running in all runs! ;; (define (db:get-count-tests-running dbstruct run-id) (db:with-db @@ -3138,24 +3143,24 @@ (print-call-chain (current-error-port)) msg))) ;; crude reply for when things go awry ((zmq nmsg)(with-input-from-string msg (lambda ()(deserialize)))) (else msg))) ;; rpc -;; This is to be the big daddy call - -(define (db:test-set-status-state dbstruct run-id test-id status state msg) - (let ((dbdat (db:get-db dbstruct run-id))) - (if (member state '("LAUNCHED" "REMOTEHOSTSTART")) - (db:general-call dbdat 'set-test-start-time (list test-id))) - ;; (if msg - ;; (db:general-call dbdat 'state-status-msg (list state status msg test-id)) - ;; (db:general-call dbdat 'state-status (list state status test-id))) - (db:set-state-status-and-roll-up-items dbstruct run-id test-id #f state status msg) - ;; process the test_data table - (if (and test-id state status (equal? status "AUTO")) - (db:test-data-rollup dbstruct run-id test-id status)) - (mt:process-triggers run-id test-id state status))) +;; ; This is to be the big daddy call NOPE: Replaced by db:set-state-status-and-roll-up-items +;; ; +;; define (db:test-set-state-status dbstruct run-id test-id state status msg) +;; (let ((dbdat (db:get-db dbstruct run-id))) +;; (if (member state '("LAUNCHED" "REMOTEHOSTSTART")) +;; (db:general-call dbdat 'set-test-start-time (list test-id))) +;; ;; (if msg +;; ;; (db:general-call dbdat 'state-status-msg (list state status msg test-id)) +;; ;; (db:general-call dbdat 'state-status (list state status test-id))) +;; (db:set-state-status-and-roll-up-items dbstruct run-id test-id #f state status msg) +;; ;; process the test_data table +;; (if (and test-id state status (equal? status "AUTO")) +;; (db:test-data-rollup dbstruct run-id test-id status)) +;; (mt:process-triggers dbstruct run-id test-id state status))) ;; state is the priority rollup of all states ;; status is the priority rollup of all completed statesfu ;; ;; if test-name is an integer work off that instead of test-name test-path @@ -3170,85 +3175,53 @@ (test-name (if (number? test-name) (db:test-get-testname testdat) test-name)) (item-path (db:test-get-item-path testdat)) (tl-testdat (db:get-test-info dbstruct run-id test-name "")) - (tl-test-id (db:test-get-id tl-testdat))) - (sqlite3:with-transaction - db - (lambda () - (db:test-set-state-status-by-id dbstruct run-id test-id state status comment) - (if (not (equal? item-path "")) ;; only roll up IF incoming test is an item - (let* ((state-status-counts (db:get-all-state-status-counts-for-test db run-id test-name item-path)) ;; item-path is used to exclude current state/status of THIS test - (running (length (filter (lambda (x) - (member (dbr:counts-state x) *common:running-states*)) - state-status-counts))) - (bad-not-started (length (filter (lambda (x) - (and (equal? (dbr:counts-state x) "NOT_STARTED") - (not (member (dbr:counts-status x) - *common:not-started-ok-statuses*)))) - state-status-counts))) - (all-curr-states (common:special-sort ;; worst -> best (sort of) - (delete-duplicates - (cons state (map dbr:counts-state state-status-counts))) - *common:std-states* >)) - (all-curr-statuses (common:special-sort ;; worst -> best - (delete-duplicates - (cons status (map dbr:counts-status state-status-counts))) - *common:std-statuses* >)) - (newstate (if (> running 0) - "RUNNING" - (if (> bad-not-started 0) - "COMPLETED" - (car all-curr-states)))) - (newstatus (if (> bad-not-started 0) - "CHECK" - (car all-curr-statuses)))) - ;; (print "Setting toplevel to: " newstate "/" newstatus) - (db:test-set-state-status-by-id dbstruct run-id tl-test-id newstate newstatus #f))))))) - -(define db:roll-up-pass-fail-counts db:set-state-status-and-roll-up-items) - -;; call with state = #f to roll up with out accounting for state/status of this item -;; -;; (define (db:roll-up-pass-fail-counts dbstruct run-id test-name item-path state status) -;; (if (not (equal? item-path "")) ;; if asked to do this for a specific item then do an incremental update -;; (let* ((dbdat (db:get-db dbstruct run-id)) -;; (toptestdat (db:get-test-info dbstruct run-id test-name item-path)) -;; (currtopstate (db:test-get-state toptestdat)) -;; (currtopstatus (db:test-get-status toptestdat)) -;; (nextss (common:apply-state-status currtopstate currtopstatus state status)) -;; (newtopstate (car nextss)) ;; #f or a symbol -;; (newtopstatus (cdr nextss))) ;; #f or a symbol -;; (if (not newtopstate) ;; need to calculate it -;; -;; ;; We rely on the toplevel to track status as state varies. I.e. preserve an ABORT -;; -;; -;; ;; (db (db:dbdat-get-db dbdat))) -;; (db:general-call dbdat 'update-pass-fail-counts (list test-name test-name test-name)) -;; (db:top-test-set-per-pf-counts dbstruct run-id test-name)))) -;; -;; ;; (case (string->symbol status) -;; ;; ((RUNNING) (db:general-call dbdat 'top-test-set-running (list test-name))) -;; ;; ((LAUNCHED) (db:general-call dbdat 'top-test-set (list "LAUNCHED" test-name))) -;; ;; ((ABORT INCOMPLETE) (db:general-call dbdat 'top-test-set (list status test-name)))) -;; -;; ;; (if (or (not state) -;; ;; (not (equal? item-path ""))) -;; ;; ;; just do a rollup -;; ;; (begin -;; ;; (db:top-test-set-per-pf-counts dbdat run-id test-name) -;; ;; #f) -;; ;; (begin -;; ;; ;; NOTE: No else clause needed for this case -;; ;; (case (string->symbol status) -;; ;; ((RUNNING) (db:general-call dbdat 'top-test-set-running (list test-name))) -;; ;; ((LAUNCHED) (db:general-call dbdat 'top-test-set (list "LAUNCHED" test-name))) -;; ;; ((ABORT INCOMPLETE) (db:general-call dbdat 'top-test-set (list status test-name)))) -;; ;; #f) -;; ;; ))) + (tl-test-id (db:test-get-id tl-testdat)) + (dbdat (db:get-db dbstruct run-id))) + (if (member state '("LAUNCHED" "REMOTEHOSTSTART")) + (db:general-call dbdat 'set-test-start-time (list test-id))) + (mutex-lock! *db-transaction-mutex*) + (let ((tr-res + (sqlite3:with-transaction + db + (lambda () + ;; (db:test-set-state-status-by-id dbstruct run-id test-id state status comment) + (db:test-set-state-status dbstruct run-id test-id state status comment) + (if (not (equal? item-path "")) ;; only roll up IF incoming test is an item + (let* ((state-status-counts (db:get-all-state-status-counts-for-test db run-id test-name item-path)) ;; item-path is used to exclude current state/status of THIS test + (running (length (filter (lambda (x) + (member (dbr:counts-state x) *common:running-states*)) + state-status-counts))) + (bad-not-started (length (filter (lambda (x) + (and (equal? (dbr:counts-state x) "NOT_STARTED") + (not (member (dbr:counts-status x) + *common:not-started-ok-statuses*)))) + state-status-counts))) + (all-curr-states (common:special-sort ;; worst -> best (sort of) + (delete-duplicates + (cons state (map dbr:counts-state state-status-counts))) + *common:std-states* >)) + (all-curr-statuses (common:special-sort ;; worst -> best + (delete-duplicates + (cons status (map dbr:counts-status state-status-counts))) + *common:std-statuses* >)) + (newstate (if (> running 0) + "RUNNING" + (if (> bad-not-started 0) + "COMPLETED" + (car all-curr-states)))) + (newstatus (if (> bad-not-started 0) + "CHECK" + (car all-curr-statuses)))) + ;; (print "Setting toplevel to: " newstate "/" newstatus) + (db:test-set-state-status dbstruct run-id tl-test-id newstate newstatus #f))))))) + (mutex-unlock! *db-transaction-mutex*) + (if (and test-id state status (equal? status "AUTO")) + (db:test-data-rollup dbstruct run-id test-id status)) + tr-res))) (define (db:get-all-state-status-counts-for-test db run-id test-name item-path) (sqlite3:map-row (lambda (state status count) (make-dbr:counts state: state status: status count: count)) @@ -3329,11 +3302,11 @@ '(update-cpuload-diskfree "UPDATE tests SET cpuload=?,diskfree=? WHERE id=?;") ;; DONE '(update-uname-host "UPDATE tests SET uname=?,host=? WHERE id=?;") ;; DONE '(update-test-rundat "INSERT INTO test_rundat (test_id,update_time,cpuload,diskfree,diskusage,run_duration) VALUES (?,?,?,?,?,?);") '(update-test-state "UPDATE tests SET state=? WHERE state=? AND run_id=? AND testname=? AND NOT (item_path='' AND testname IN (SELECT DISTINCT testname FROM tests WHERE testname=? AND item_path != ''));") '(update-test-status "UPDATE tests SET status=? WHERE status like ? AND run_id=? AND testname=? AND NOT (item_path='' AND testname IN (SELECT DISTINCT testname FROM tests WHERE testname=? AND item_path != ''));") - ;; stuff for roll-up-pass-fail-counts + ;; stuff for set-state-status-and-roll-up-items '(update-pass-fail-counts "UPDATE tests SET fail_count=(SELECT count(id) FROM tests WHERE testname=? AND item_path != '' AND status IN ('FAIL','CHECK','INCOMPLETE','ABORT')), pass_count=(SELECT count(id) FROM tests WHERE testname=? AND item_path != '' AND status IN ('PASS','WARN','WAIVED')) WHERE testname=? AND item_path='' AND run_id=?;") ;; DONE ;; BROKEN!!! NEEDS run-id '(top-test-set-running "UPDATE tests SET state='RUNNING' WHERE testname=? AND item_path='' AND run_id=?;") ;; DONE ;; BROKEN!!! NEEDS run-id @@ -3437,11 +3410,11 @@ (let ((q (alist-ref qry-name db:queries))) (if q (car q) #f))) ;; do not run these as part of the transaction (define db:special-queries '(rollup-tests-pass-fail - ;; db:roll-up-pass-fail-counts ;; WHY NOT!? + ;; db:set-state-status-and-roll-up-items ;; WHY NOT!? login immediate flush sync set-verbosity Index: ezsteps.scm ================================================================== --- ezsteps.scm +++ ezsteps.scm @@ -164,11 +164,11 @@ new-state new-status (args:get-arg "-m") #f) ;; need to update the top test record if PASS or FAIL and this is a subtest (if (not (equal? item-path "")) - (cdb:roll-up-pass-fail-counts *runremote* run-id test-name item-path new-status)))) + (cdb:set-state-status-and-roll-up-items *runremote* run-id test-name item-path new-status)))) ;; for automated creation of the rollup html file this is a good place... (if (not (equal? item-path "")) (tests:summarize-items #f run-id test-id test-name #f)) ;; don't force - just update if no ))) (pop-directory) Index: launch.scm ================================================================== --- launch.scm +++ launch.scm @@ -64,18 +64,19 @@ (define (launch:load-logpro-dat run-id test-id stepname) (let ((cname (conc stepname ".dat"))) (if (file-exists? cname) (let* ((dat (read-config cname #f #f)) (csvr (db:logpro-dat->csv dat stepname)) - (csvt (let-values (( (fmt-cell fmt-record fmt-csv) (make-format ","))) - (fmt-csv (map list->csv-record csvr)))) + (csvt (let-values (((fmt-cell fmt-record fmt-csv) (make-format ","))) + (fmt-csv (map list->csv-record csvr)))) (status (configf:lookup dat "final" "exit-status")) (msg (configf:lookup dat "final" "message"))) - ;;(if csvt ;; this if blocked stack dump caused by .dat file from logpro being 0-byte. fixed by upgrading logpro + (if csvt ;; this if blocked stack dump caused by .dat file from logpro being 0-byte. fixed by upgrading logpro (rmt:csv->test-data run-id test-id csvt) - ;; (BB> "Error: run-id/test-id/stepname="run-id"/"test-id"/"stepname" => bad csvr="csvr) - ;; ) + (debug:print 0 *default-log-port* "ERROR: no csvdat exists for run-id: " run-id " test-id: " test-id " stepname: " stepname ", check that logpro version is 1.15 or newer")) + ;; (BB> "Error: run-id/test-id/stepname="run-id"/"test-id"/"stepname" => bad csvr="csvr) + ;; ) (cond ((equal? status "PASS") "PASS") ;; skip the message part if status is pass (status (conc (configf:lookup dat "final" "exit-status") ": " (if msg msg "no message"))) (else #f))) #f))) @@ -253,11 +254,11 @@ ;; any of the other stuff that tests:test-set-status! does. Let's just ;; force RUNNING/n/a ;; (thread-sleep! 0.3) ;; (tests:test-force-state-status! run-id test-id "RUNNING" "n/a") - (rmt:roll-up-pass-fail-counts run-id test-name item-path #f "RUNNING" #f) + (rmt:set-state-status-and-roll-up-items run-id test-name item-path #f "RUNNING" #f) ;; (thread-sleep! 0.3) ;; NFS slowness has caused grief here ;; if there is a runscript do it first (if fullrunscript (let ((pid (process-run fullrunscript))) @@ -457,11 +458,11 @@ (if (eq? signum signal/stop) (debug:print-error 0 *default-log-port* "attempt to STOP process. Exiting.")) (set! *time-to-exit* #t) (print "Received signal " signum ", cleaning up before exit. Please wait...") (let ((th1 (make-thread (lambda () - (tests:test-force-state-status! run-id test-id "INCOMPLETE" "KILLED") + (rmt:test-set-state-status run-id test-id "INCOMPLETE" "KILLED" #f) (print "Killed by signal " signum ". Exiting") (thread-sleep! 1) (exit 1)))) (th2 (make-thread (lambda () (thread-sleep! 2) @@ -481,17 +482,23 @@ (test-host (db:test-get-host test-info)) (test-pid (db:test-get-process_id test-info))) (cond ((member (db:test-get-state test-info) '("INCOMPLETE" "KILLED" "UNKNOWN" "KILLREQ" "STUCK")) ;; prior run of this test didn't complete, go ahead and try to rerun (debug:print 0 *default-log-port* "INFO: test is INCOMPLETE or KILLED, treat this execute call as a rerun request") - (tests:test-force-state-status! run-id test-id "REMOTEHOSTSTART" "n/a")) ;; prime it for running + ;; (tests:test-force-state-status! run-id test-id "REMOTEHOSTSTART" "n/a") + (rmt:test-set-state-status run-id test-id "REMOTEHOSTSTART" "n/a" #f) + ) ;; prime it for running ((member (db:test-get-state test-info) '("RUNNING" "REMOTEHOSTSTART")) (if (process:alive-on-host? test-host test-pid) (debug:print-error 0 *default-log-port* "test state is " (db:test-get-state test-info) " and process " test-pid " is still running on host " test-host ", cannot proceed") - (tests:test-force-state-status! run-id test-id "REMOTEHOSTSTART" "n/a"))) + ;; (tests:test-force-state-status! run-id test-id "REMOTEHOSTSTART" "n/a") + (rmt:test-set-state-status run-id test-id "REMOTEHOSTSTART" "n/a" #f) + )) ((not (member (db:test-get-state test-info) '("REMOVING" "REMOTEHOSTSTART" "RUNNING" "KILLREQ"))) - (tests:test-force-state-status! run-id test-id "REMOTEHOSTSTART" "n/a")) + ;; (tests:test-force-state-status! run-id test-id "REMOTEHOSTSTART" "n/a") + (rmt:test-set-state-status run-id test-id "REMOTEHOSTSTART" "n/a" #f) + ) (else ;; (member (db:test-get-state test-info) '("REMOVING" "REMOTEHOSTSTART" "RUNNING" "KILLREQ")) (debug:print-error 0 *default-log-port* "test state is " (db:test-get-state test-info) ", cannot proceed") (exit)))) (debug:print 2 *default-log-port* "Exectuing " test-name " (id: " test-id ") on " (get-host-name)) @@ -651,11 +658,11 @@ test-id new-state new-status (args:get-arg "-m") #f) ;; need to update the top test record if PASS or FAIL and this is a subtest - ;; NO NEED TO CALL roll-up-pass-fail-counts HERE, THIS IS DONE IN roll-up-pass-fail-counts called by tests:test-set-status! + ;; NO NEED TO CALL set-state-status-and-roll-up-items HERE, THIS IS DONE IN set-state-status-and-roll-up-items called by tests:test-set-status! )) ;; for automated creation of the rollup html file this is a good place... (if (not (equal? item-path "")) (tests:summarize-items run-id test-id test-name #f)) (tests:summarize-test run-id test-id) ;; don't force - just update if no @@ -1121,12 +1128,12 @@ ((dboard) "../megatest") ((mtest) "../megatest") ((dashboard) "megatest") (else exe))))) (launcher (common:get-launcher *configdat* test-name item-path)) ;; (config-lookup *configdat* "jobtools" "launcher")) - (test-sig (conc (common:get-testsuite-name) ":" test-name ":" item-path)) ;; (item-list->path itemdat))) ;; test-path is the full path including the item-path - (work-area #f) + (test-sig (conc (common:get-testsuite-name) ":" test-name ":" item-path)) ;; (item-list->path itemdat))) ;; test-path is the full path including the item-path + (work-area #f) (toptest-work-area #f) ;; for iterated tests the top test contains data relevant for all (diskpath #f) (cmdparms #f) (fullcmd #f) ;; (define a (with-output-to-string (lambda ()(write x)))) (mt-bindir-path #f) @@ -1148,11 +1155,11 @@ ;; prevent overlapping actions - set to LAUNCHED as early as possible ;; ;; the following call handles waiver propogation. cannot yet condense into roll-up-pass-fail (tests:test-set-status! run-id test-id "LAUNCHED" "n/a" #f #f) ;; (if launch-results launch-results "FAILED")) - (rmt:roll-up-pass-fail-counts run-id test-name item-path #f "LAUNCHED" #f) + (rmt:set-state-status-and-roll-up-items run-id test-name item-path #f "LAUNCHED" #f) ;; (pp (hash-table->alist tconfig)) (set! diskpath (get-best-disk *configdat* tconfig)) (if diskpath (let ((dat (create-work-area run-id run-info keyvals test-id test-path diskpath test-name itemdat))) (set! work-area (car dat)) @@ -1227,11 +1234,11 @@ process-run) (if useshell (let ((cmdstr (string-intersperse fullcmd " "))) (if launchwait cmdstr - (conc cmdstr " >> mt_launch.log 2>&1"))) + (conc cmdstr " >> mt_launch.log 2>&1 &"))) (car fullcmd)) (if useshell '() (cdr fullcmd))))) (mutex-unlock! *launch-setup-mutex*) ;; yes, really should mutex all the way to here. Need to put this entire process into a fork. Index: mt.scm ================================================================== --- mt.scm +++ mt.scm @@ -128,12 +128,12 @@ ;;====================================================================== ;; T R I G G E R S ;;====================================================================== -(define (mt:process-triggers run-id test-id newstate newstatus) - (let* ((test-dat (rmt:get-test-info-by-id run-id test-id))) +(define (mt:process-triggers dbstruct run-id test-id newstate newstatus) + (let* ((test-dat (db:get-test-info-by-id dbstruct run-id test-id))) (if test-dat (let* ((test-rundir ;; (rmt:sdb-qry 'getstr ;; (filedb:get-path *fdb* (db:test-get-rundir test-dat)) ;; ) ;; ) (test-name (db:test-get-testname test-dat)) (tconfig #f) @@ -186,18 +186,18 @@ ;; (rmt:general-call 'state-status run-id newstate newstatus test-id)) ;; (else ;; (if newstate (rmt:general-call 'set-test-state run-id newstate test-id)) ;; (if newstatus (rmt:general-call 'set-test-status run-id newstatus test-id)) ;; (if newcomment (rmt:general-call 'set-test-comment run-id newcomment test-id)))) - (rmt:roll-up-pass-fail-counts run-id test-id #f newstate newstatus newcomment) - (mt:process-triggers run-id test-id newstate newstatus) + (rmt:set-state-status-and-roll-up-items run-id test-id #f newstate newstatus newcomment) + ;; (mt:process-triggers run-id test-id newstate newstatus) #t))) (define (mt:test-set-state-status-by-testname run-id test-name item-path new-state new-status new-comment) (let ((test-id (rmt:get-test-id run-id test-name item-path))) - (rmt:roll-up-pass-fail-counts run-id test-name item-path new-state new-status new-comment) - (mt:process-triggers run-id test-id new-state new-status) + (rmt:set-state-status-and-roll-up-items run-id test-name item-path new-state new-status new-comment) + ;; (mt:process-triggers run-id test-id new-state new-status) #t)) ;;(mt:test-set-state-status-by-id run-id test-id new-state new-status new-comment))) (define (mt:lazy-read-test-config test-name) (let ((tconf (hash-table-ref/default *testconfigs* test-name #f))) Index: rmt.scm ================================================================== --- rmt.scm +++ rmt.scm @@ -75,11 +75,11 @@ (set! *runremote* (make-remote)) (mutex-unlock! *rmt-mutex*) (debug:print-info 12 *default-log-port* "rmt:send-receive, case 1") (rmt:send-receive cmd rid params attemptnum: attemptnum)) ;; ensure we have a homehost record - ((not (pair? (remote-hh-dat *runremote*))) ;; have a homehost record? + ((not (pair? (remote-hh-dat *runremote*))) ;; not on homehost (thread-sleep! 0.1) ;; since we shouldn't get here, delay a little (remote-hh-dat-set! *runremote* (common:get-homehost)) (mutex-unlock! *rmt-mutex*) (debug:print-info 12 *default-log-port* "rmt:send-receive, case 2") (rmt:send-receive cmd rid params attemptnum: attemptnum)) @@ -141,18 +141,20 @@ ;; (rmt:send-receive cmd rid params attemptnum: attemptnum)) ;) ;) ;;;; ;; if not on homehost ensure we have a connection to a live server ;; NOTE: we *have* a homehost record by now - ((and (not (cdr (remote-hh-dat *runremote*))) ;; not on a homehost - (not (remote-conndat *runremote*)) ;; and no connection - (server:read-dotserver *toppath*)) ;; .server file exists - ;; something caused the server entry in tdb to disappear, but the server is still running - (server:remove-dotserver-file *toppath* ".*") - (mutex-unlock! *rmt-mutex*) - (debug:print-info 12 *default-log-port* "rmt:send-receive, case 20") - (rmt:send-receive cmd rid params attemptnum: (add1 attemptnum))) + + ;; ((and (not (cdr (remote-hh-dat *runremote*))) ;; not on a homehost + ;; (not (remote-conndat *runremote*)) ;; and no connection + ;; (server:read-dotserver *toppath*)) ;; .server file exists + ;; ;; something caused the server entry in tdb to disappear, but the server is still running + ;; (server:remove-dotserver-file *toppath* ".*") + ;; (mutex-unlock! *rmt-mutex*) + ;; (debug:print-info 12 *default-log-port* "rmt:send-receive, case 20") + ;; (rmt:send-receive cmd rid params attemptnum: (add1 attemptnum))) + ((and (not (cdr (remote-hh-dat *runremote*))) ;; not on a homehost (not (remote-conndat *runremote*))) ;; and no connection (debug:print-info 12 *default-log-port* "rmt:send-receive, case 6 hh-dat: " (remote-hh-dat *runremote*) " conndat: " (remote-conndat *runremote*)) (mutex-unlock! *rmt-mutex*) (tasks:start-and-wait-for-server (tasks:open-db) 0 15) @@ -166,10 +168,11 @@ ;; not on homehost, do server query (else (mutex-unlock! *rmt-mutex*) (debug:print-info 12 *default-log-port* "rmt:send-receive, case 9") + (mutex-lock! *rmt-mutex*) (let* ((conninfo (remote-conndat *runremote*)) (dat (case (remote-transport *runremote*) ((http) (condition-case ;; handling here has caused a lot of problems. However it is needed to deal with attemtped communication to servers that have gone away (http-transport:client-api-send-receive 0 conninfo cmd params) ((commfail)(vector #f "communications fail")) @@ -178,22 +181,27 @@ (debug:print 0 *default-log-port* "ERROR: transport " (remote-transport *runremote*) " not supported") (exit)))) (success (if (vector? dat) (vector-ref dat 0) #f)) (res (if (vector? dat) (vector-ref dat 1) #f))) (if (vector? conninfo)(http-transport:server-dat-update-last-access conninfo)) ;; refresh access time - (debug:print-info 12 *default-log-port* "rmt:send-receive, case 9. conninfo=" conninfo " dat=" dat) + ;; (mutex-unlock! *rmt-mutex*) + (debug:print-info 12 *default-log-port* "rmt:send-receive, case 9. conninfo=" conninfo " dat=" dat " *runremote* = "*runremote*) (if success (case (remote-transport *runremote*) - ((http) res) + ((http) + (mutex-unlock! *rmt-mutex*) + res) (else (debug:print 0 *default-log-port* "ERROR: transport " (remote-transport *runremote*) " is unknown") + (mutex-unlock! *rmt-mutex*) (exit 1))) (begin (debug:print 0 *default-log-port* "WARNING: communication failed. Trying again, try num: " attemptnum) (remote-conndat-set! *runremote* #f) (remote-server-url-set! *runremote* #f) (debug:print-info 12 *default-log-port* "rmt:send-receive, case 9.1") + (mutex-unlock! *rmt-mutex*) (tasks:start-and-wait-for-server (tasks:open-db) 0 15) (rmt:send-receive cmd rid params attemptnum: (+ attemptnum 1))))))))) (define (rmt:update-db-stats run-id rawcmd params duration) (mutex-lock! *db-stats-mutex*) @@ -499,12 +507,12 @@ ;; This is not needed as test steps are deleted on test delete call ;; ;; (define (rmt:delete-test-step-records run-id test-id) ;; (rmt:send-receive 'delete-test-step-records run-id (list run-id test-id))) -(define (rmt:test-set-status-state run-id test-id status state msg) - (rmt:send-receive 'test-set-status-state run-id (list run-id test-id status state msg))) +(define (rmt:test-set-state-status run-id test-id state status msg) + (rmt:send-receive 'test-set-state-status run-id (list run-id test-id state status msg))) (define (rmt:test-toplevel-num-items run-id test-name) (rmt:send-receive 'test-toplevel-num-items run-id (list run-id test-name))) ;; (define (rmt:get-previous-test-run-record run-id test-name item-path) @@ -563,12 +571,12 @@ (define (rmt:get-count-tests-running-in-jobgroup run-id jobgroup) (rmt:send-receive 'get-count-tests-running-in-jobgroup run-id (list run-id jobgroup))) ;; state and status are extra hints not usually used in the calculation ;; -(define (rmt:roll-up-pass-fail-counts run-id test-name item-path state status comment) - (rmt:send-receive 'roll-up-pass-fail-counts run-id (list run-id test-name item-path state status comment))) +(define (rmt:set-state-status-and-roll-up-items run-id test-name item-path state status comment) + (rmt:send-receive 'set-state-status-and-roll-up-items run-id (list run-id test-name item-path state status comment))) (define (rmt:update-pass-fail-counts run-id test-name) (rmt:general-call 'update-pass-fail-counts run-id test-name test-name test-name)) (define (rmt:top-test-set-per-pf-counts run-id test-name) Index: runs.scm ================================================================== --- runs.scm +++ runs.scm @@ -928,11 +928,11 @@ (debug:print 0 *default-log-port* "WARNING: test " hed " has FAILED prerequitests and we've tried at least 10 times to run it. Giving up now.")) ;; (debug:print 0 *default-log-port* " prereqs: " prereqs-not-met) (hash-table-set! test-registry hed 'removed) (mt:test-set-state-status-by-testname run-id test-name item-path "NOT_STARTED" "TEN_STRIKES" #f) ;; I'm unclear on if this roll up is needed - it may be the root cause of the "all set to FAIL" bug. - (rmt:roll-up-pass-fail-counts run-id test-name item-path #f "FAIL" #f) ;; treat as FAIL + (rmt:set-state-status-and-roll-up-items run-id test-name item-path #f "FAIL" #f) ;; treat as FAIL (list (if (null? tal)(car newtal)(car tal)) tal reg reruns))))) ;; can't drop this - maybe running? Just keep trying Index: tasks.scm ================================================================== --- tasks.scm +++ tasks.scm @@ -448,14 +448,16 @@ (reverse res))) ;; no elegance here ... ;; (define (tasks:kill-server hostname pid #!key (kill-switch "")) + (server:remove-dotserver-file *toppath* ".*") (debug:print-info 0 *default-log-port* "Attempting to kill server process " pid " on host " hostname) (setenv "TARGETHOST" hostname) (setenv "TARGETHOST_LOGF" "server-kills.log") (system (conc "nbfake kill "kill-switch" "pid)) + (unsetenv "TARGETHOST_LOGF") (unsetenv "TARGETHOST")) ;; look up a server by run-id and send it a kill, also delete the record for that server ;; @@ -466,10 +468,11 @@ (let ((hostname (vector-ref sdat 6)) (pid (vector-ref sdat 5)) (server-id (vector-ref sdat 0))) (tasks:server-set-state! (db:delay-if-busy tdbdat) server-id "killed") (debug:print-info 0 *default-log-port* "Killing server " server-id " for run-id " run-id " on host " hostname " with pid " pid) + (server:remove-dotserver-file *toppath* ".*") (tasks:kill-server hostname pid) (tasks:server-delete-record (db:delay-if-busy tdbdat) server-id tag) ) (debug:print-info 0 *default-log-port* "No server found for run-id " run-id ", nothing to kill")) ;; (sqlite3:finalize! tdb) )) Index: tests.scm ================================================================== --- tests.scm +++ tests.scm @@ -351,15 +351,10 @@ (loop (car tal)(cdr tal))) #f)))))) (pop-directory) result))))) -(define (tests:test-force-state-status! run-id test-id state status) - (rmt:test-set-status-state run-id test-id status state #f) - ;; (rmt:roll-up-pass-fail-counts run-id test-name item - (mt:process-triggers run-id test-id state status)) - ;; Do not rpc this one, do the underlying calls!!! (define (tests:test-set-status! run-id test-id state status comment dat #!key (work-area #f)) (let* ((real-status status) (otherdat (if dat dat (make-hash-table))) (testdat (rmt:get-test-info-by-id run-id test-id)) @@ -396,12 +391,12 @@ (debug:print 4 *default-log-port* "real-status " real-status ", waived " waived ", status " status) ;; update the primary record IF state AND status are defined (if (and state status) (begin - (rmt:test-set-status-state run-id test-id real-status state (if waived waived comment)) - ;; (mt:process-triggers run-id test-id state real-status) ;; triggers are called in test-set-status-state + (rmt:test-set-state-status run-id test-id state real-status (if waived waived comment)) + ;; (mt:process-triggers run-id test-id state real-status) ;; triggers are called in test-set-state-status )) ;; if status is "AUTO" then call rollup (note, this one modifies data in test ;; run area, it does remote calls under the hood. ;; (if (and test-id state status (equal? status "AUTO")) @@ -443,11 +438,11 @@ (rmt:csv->test-data run-id test-id dat)))) ;; need to update the top test record if PASS or FAIL and this is a subtest (if (not (equal? item-path "")) - (rmt:roll-up-pass-fail-counts run-id test-name item-path state status #f)) + (rmt:set-state-status-and-roll-up-items run-id test-name item-path state status #f)) (if (or (and (string? comment) (string-match (regexp "\\S+") comment)) waived) (let ((cmt (if waived waived comment))) @@ -481,12 +476,11 @@ (lockf (conc outputfilename ".lock"))) (let loop ((have-lock (common:simple-file-lock lockf))) (if have-lock (let ((script (configf:lookup *configdat* "testrollup" test-name))) (print "Obtained lock for " outputfilename) - (rmt:roll-up-pass-fail-counts run-id test-name "" #f #f #f) - ;; (rmt:test-set-status-state run-id test-name #f #f #f) ;; (rmt:top-test-set-per-pf-counts run-id test-name) + (rmt:set-state-status-and-roll-up-items run-id test-name "" #f #f #f) (if script (system (conc script " > " outputfilename " & ")) (tests:generate-html-summary-for-iterated-test run-id test-id test-name outputfilename)) (common:simple-file-release-lock lockf) (change-directory orig-dir) Index: tests/fdktestqa/testqa/Makefile ================================================================== --- tests/fdktestqa/testqa/Makefile +++ tests/fdktestqa/testqa/Makefile @@ -5,16 +5,16 @@ NEWDASHBOARD = $(BINDIR)/newdashboard RUNNAME = a NUMTESTS = 20 all : - $(MEGATEST) -remove-runs -target a/b :runname c -testpatt %/% - $(MEGATEST) -runtests % -target a/b :runname c + $(MEGATEST) -remove-runs -target a/b -runname c -testpatt %/% + $(MEGATEST) -run -testpatt % -target a/b -runname c bigbig : for tn in a b c d;do \ - ($(MEGATEST) -runtests % -target a/b :runname $tn & ) ; \ + ($(MEGATEST) -run -testpatt % -target a/b -runname $tn & ) ; \ done waitonpatt : megatest -remove-runs -runname waitonpatt -target a/b -testpatt % NUMTESTS=15 megatest -run -target a/b -runname waitonpatt -testpatt bigrun3/%8 @@ -22,17 +22,17 @@ waitonall : megatest -remove-runs -runname waitonall -target a/b -testpatt % NUMTESTS=20 megatest -run -target a/b -runname waitonall -testpatt alltop bigrun : - NUMTESTS=$(NUMTESTS) $(MEGATEST) -runtests bigrun -target a/bigrun :runname a$(shell date +%V) + NUMTESTS=$(NUMTESTS) $(MEGATEST) -run -testpatt bigrun -target a/bigrun -runname a$(shell date +%V) bigrun2 : - NUMTESTS=$(NUMTESTS) $(MEGATEST) -runtests bigrun2 -target a/bigrun2 :runname a$(shell date +%V) + NUMTESTS=$(NUMTESTS) $(MEGATEST) -run -testpatt bigrun2 -target a/bigrun2 -runname a$(shell date +%V) bigrun3 : - NUMTESTS=$(NUMTESTS) $(MEGATEST) -runtests bigrun3 -target a/bigrun3 :runname $(RUNNAME) + NUMTESTS=$(NUMTESTS) $(MEGATEST) -run -testpatt bigrun3 -target a/bigrun3 -runname $(RUNNAME) dashboard : mkdir -p ../simpleruns $(DASHBOARD) -rows 20 & @@ -41,6 +41,6 @@ compile : (cd ../../..;make -j && make install) clean : - rm -rf ../simple*/*/* megatest.db db/* ../simple*/.db/* logs/* monitor.db + rm -rf ../simple*/*/* megatest.db db/* ../simple*/.db/* logs/* monitor.db /tmp/$(USER)/megatest_localdb/testqa .server ADDED tests/fdktestqa/testqa/local.config.example Index: tests/fdktestqa/testqa/local.config.example ================================================================== --- /dev/null +++ tests/fdktestqa/testqa/local.config.example @@ -0,0 +1,13 @@ +[host-types] +general #MTLOWESTLOAD xena zeus + +[jobtools] +launcher nbfake +maxload 1.5 +flexi-launcher yes + +[setup] +launch-delay 0 + +[launchers] +% general Index: tests/fdktestqa/testqa/megatest.config ================================================================== --- tests/fdktestqa/testqa/megatest.config +++ tests/fdktestqa/testqa/megatest.config @@ -1,11 +1,13 @@ [setup] testcopycmd cp --remove-destination -rlv TEST_SRC_PATH/. TEST_TARG_PATH/. >> TEST_TARG_PATH/mt_launch.log 2>> TEST_TARG_PATH/mt_launch.log # launchwait no +launch-delay 0 # All these are overridden in ../fdk.config # [jobtools] # launcher nbfake # launcher bsub -q priority -o $MT_TEST_RUN_DIR/openlava.log [include ../fdk.config] +[include local.config] Index: utils/nbfake ================================================================== --- utils/nbfake +++ utils/nbfake @@ -70,7 +70,7 @@ if [[ -z "$MY_NBFAKE_HOST" ]]; then # Run locally sh -c "cd $CURRWD;export DISPLAY=$DISPLAY; export PATH=$PATH; nohup $* >> $MY_NBFAKE_LOG 2>&1 &" else # run remotely - ssh -n -f $MY_NBFAKE_HOST "sh -c \"cd $CURRWD;export DISPLAY=$DISPLAY; export PATH=$PATH; nohup $* >> $MY_NBFAKE_LOG 2>&1 &\"" + ssh -X -n -f $MY_NBFAKE_HOST "sh -c \"cd $CURRWD;export DISPLAY=$DISPLAY; export PATH=$PATH; nohup $* >> $MY_NBFAKE_LOG 2>&1 &\"" fi Index: utils/plot-code.scm ================================================================== --- utils/plot-code.scm +++ utils/plot-code.scm @@ -8,17 +8,19 @@ ;; third param is list of files to scan (use regex srfi-69 srfi-13) (define targs #f) -(define files (cddddr (argv))) +(define files (cdr (cddddr (argv)))) (let ((targdat (cadddr (argv)))) (if (equal? targdat "-") (set! targs files) (set! targs (string-split targdat ",")))) +(define function-patt (car (cdr (cdddr (argv))))) +(define function-rx (regexp function-patt)) (define filedat-defns (make-hash-table)) (define filedat-usages (make-hash-table)) (define defn-rx (regexp "^\\s*\\(define\\s+\\(([^\\s\\)]+).*")) (define all-regexs (make-hash-table)) @@ -32,10 +34,11 @@ (lambda () (apply print data)))) (print-err "Making graph for files: " (string-intersperse targs ", ")) (print-err "Looking at files: " (string-intersperse files ", ")) +(print-err "Function regex: " function-patt) ;; Gather the functions ;; (for-each (lambda (fname) @@ -46,16 +49,18 @@ (if (not (eof-object? inl)) (let ((match (string-match defn-rx inl))) (if match (let ((fnname (cadr match))) ;; (print " " fnname) - (set! all-fns (cons fnname all-fns)) - (hash-table-set! - filedat-defns - fname - (cons fnname (hash-table-ref/default filedat-defns fname '()))) - )) + (if (string-match function-rx fnname) + (begin + (set! all-fns (cons fnname all-fns))) + (hash-table-set! + filedat-defns + fname + (cons fnname (hash-table-ref/default filedat-defns fname '()))) + ))) (loop (read-line)))))))) files) ;; fill up the regex hash (print-err "Make the huge regex hash")