Index: db.scm ================================================================== --- db.scm +++ db.scm @@ -282,12 +282,14 @@ (begin (debug:print-info 11 "open-test-db END (unsucessful)" testpath) #f))) ;; find and open the testdat.db file for an existing test -(define (db:open-test-db-by-test-id db test-id) - (let* ((test-path (cdb:remote-run db:test-get-rundir-from-test-id db test-id))) +(define (db:open-test-db-by-test-id db test-id #!key (testpath #f)) + (let* ((test-path (if testpath + testpath + (cdb:remote-run db:test-get-rundir-from-test-id db test-id)))) (debug:print 3 "TEST PATH: " test-path) (open-test-db test-path))) (define (db:testdb-initialize db) (debug:print 11 "db:testdb-initialize START") @@ -540,10 +542,43 @@ (conc fieldname " " wildtype " '" patt "'"))) (if (null? patts) '("") patts)) comparator))) + + +;; register a test run with the db +(define (db:register-run db keys keyvallst runname state status user) + (debug:print 3 "runs:register-run, keys: " keys " keyvallst: " keyvallst " runname: " runname " state: " state " status: " status " user: " user) + (let* ((keystr (keys->keystr keys)) + (comma (if (> (length keys) 0) "," "")) + (andstr (if (> (length keys) 0) " AND " "")) + (valslots (keys->valslots keys)) ;; ?,?,? ... + (keyvals (map cadr keyvallst)) + (allvals (append (list runname state status user) keyvals)) + (qryvals (append (list runname) keyvals)) + (key=?str (string-intersperse (map (lambda (k)(conc (key:get-fieldname k) "=?")) keys) " AND "))) + (debug:print 3 "keys: " keys " allvals: " allvals " keyvals: " keyvals) + (debug:print 2 "NOTE: using target " (string-intersperse keyvals "/") " for this run") + (if (and runname (null? (filter (lambda (x)(not x)) keyvals))) ;; there must be a better way to "apply and" + (let ((res #f)) + (apply sqlite3:execute db (conc "INSERT OR IGNORE INTO runs (runname,state,status,owner,event_time" comma keystr ") VALUES (?,?,?,?,strftime('%s','now')" comma valslots ");") + allvals) + (apply sqlite3:for-each-row + (lambda (id) + (set! res id)) + db + (let ((qry (conc "SELECT id FROM runs WHERE (runname=? " andstr key=?str ");"))) + ;(debug:print 4 "qry: " qry) + qry) + qryvals) + (sqlite3:execute db "UPDATE runs SET state=?,status=? WHERE id=?;" state status res) + res) + (begin + (debug:print 0 "ERROR: Called without all necessary keys") + #f)))) + ;; replace header and keystr with a call to runs:get-std-run-fields ;; ;; keypatts: ( (KEY1 "abc%def")(KEY2 "%") ) ;; runpatts: patt1,patt2 ... @@ -701,25 +736,10 @@ ;;====================================================================== ;; T E S T S ;;====================================================================== -(define (db:tests-register-test db run-id test-name item-path) - (debug:print-info 11 "db:tests-register-test START db=" db ", run-id=" run-id ", test-name=" test-name ", item-path=\"" item-path "\"") - (let ((item-paths (if (equal? item-path "") - (list item-path) - (list item-path "")))) - (for-each - (lambda (pth) - (sqlite3:execute db "INSERT OR IGNORE INTO tests (run_id,testname,event_time,item_path,state,status) VALUES (?,?,strftime('%s','now'),?,'NOT_STARTED','n/a');" - run-id - test-name - pth)) - item-paths) - (debug:print-info 11 "db:tests-register-test END db=" db ", run-id=" run-id ", test-name=" test-name ", item-path=\"" item-path "\"") - #f)) - ;; states and statuses are lists, turn them into ("PASS","FAIL"...) and use NOT IN ;; i.e. these lists define what to NOT show. ;; states and statuses are required to be lists, empty is ok ;; not-in #t = above behaviour, #f = must match (define (db:get-tests-for-run db run-id testpatt states statuses @@ -824,13 +844,13 @@ ) (debug:print-info 11 "db:get-tests-for-run START run-ids=" run-ids ", testpatt=" testpatt ", states=" states ", statuses=" statuses ", not-in=" not-in ", sort-by=" sort-by) res)) ;; this one is a bit broken BUG FIXME -(define (db:delete-test-step-records db test-id) +(define (db:delete-test-step-records db test-id #!key (testpath #f)) ;; Breaking it into two queries for better file access interleaving - (let* ((tdb (db:open-test-db-by-test-id db test-id))) + (let* ((tdb (db:open-test-db-by-test-id db test-id testpath: testpath))) ;; test db's can go away - must check every time (if tdb (begin (sqlite3:execute tdb "DELETE FROM test_steps;") (sqlite3:execute tdb "DELETE FROM test_data;") @@ -874,10 +894,19 @@ testnames)) (define (cdb:delete-tests-in-state serverdat run-id state) (cdb:client-call serverdat 'delete-tests-in-state #t *default-numtries* run-id state)) +(define (cdb:tests-update-cpuload-diskfree serverdat test-id cpuload diskfree) + (cdb:client-call serverdat 'update-cpuload-diskfree #t *default-numtries* cpuload diskfree test-id)) + +(define (cdb:tests-update-run-duration serverdat test-id minutes) + (cdb:client-call serverdat 'update-run-duration #t *default-numtries* minutes test-id)) + +(define (cdb:tests-update-uname-host serverdat test-id uname hostname) + (cdb:client-call serverdat 'update-uname-host #t *default-numtries* test-id uname hostname)) + ;; speed up for common cases with a little logic (define (db:test-set-state-status-by-id db test-id newstate newstatus newcomment) (cond ((and newstate newstatus newcomment) (sqlite3:exectute db "UPDATE tests SET state=?,status=?,comment=? WHERE id=?;" newstate newstatus test-id)) @@ -954,12 +983,15 @@ (define db:get-test-id db:get-test-id-not-cached) ;; given a test-info record, patch in the latest data from the testdat.db file ;; found in the test run directory -(define (db:patch-tdb-data-into-test-info db test-id res) - (let ((tdb (db:open-test-db-by-test-id db test-id))) +;; +;; NOT USED +;; +(define (db:patch-tdb-data-into-test-info db test-id res #!key (testpath #f)) + (let ((tdb (db:open-test-db-by-test-id db test-id testpath: testpath))) ;; get state and status from megatest.db in real time ;; other fields that perhaps should be updated: ;; fail_count ;; pass_count ;; final_logf @@ -1196,10 +1228,11 @@ (vector-ref tmp 2)))) ((zmq) (handle-exceptions exn (begin + (debug:print-info 0 "cdb:client-call timeout or error. Trying again in 5 seconds") (thread-sleep! 5) (if (> numretries 0)(apply cdb:client-call serverdat qtype immediate (- numretries 1) params))) (let* ((push-socket (vector-ref serverdat 0)) (sub-socket (vector-ref serverdat 1)) (client-sig (client:get-signature)) @@ -1217,31 +1250,32 @@ (receive-message* sub-socket) ;; now get the actual message (let ((myres (db:string->obj (receive-message* sub-socket)))) (if (equal? query-sig (vector-ref myres 1)) (set! res (vector-ref myres 2)) - (loop)))))) - (timeout (lambda () - (let loop ((n numretries)) - (thread-sleep! 15) - (if (not res) - (if (> numretries 0) - (begin - (debug:print 2 "WARNING: no reply to query " params ", trying resend") - (debug:print-info 11 "re-sending message") - (send-message push-socket zdat) - (debug:print-info 11 "message re-sent") - (loop (- n 1))) - ;; (apply cdb:client-call *runremote* qtype immediate (- numretries 1) params)) - (begin - (debug:print 0 "ERROR: cdb:client-call timed out " params ", exiting.") - (exit 5)))))))) + (loop))))))) + ;; (timeout (lambda () + ;; (let loop ((n numretries)) + ;; (thread-sleep! 15) + ;; (if (not res) + ;; (if (> numretries 0) + ;; (begin + ;; (debug:print 2 "WARNING: no reply to query " params ", trying resend") + ;; (debug:print-info 11 "re-sending message") + ;; (send-message push-socket zdat) + ;; (debug:print-info 11 "message re-sent") + ;; (loop (- n 1))) + ;; ;; (apply cdb:client-call *runremote* qtype immediate (- numretries 1) params)) + ;; (begin + ;; (debug:print 0 "ERROR: cdb:client-call timed out " params ", exiting.") + ;; (exit 5)))))))) (debug:print-info 11 "Starting threads") (let ((th1 (make-thread send-receive "send receive")) - (th2 (make-thread timeout "timeout"))) + ;; (th2 (make-thread timeout "timeout")) + ) (thread-start! th1) - (thread-start! th2) + ;; (thread-start! th2) (thread-join! th1) (debug:print-info 11 "cdb:client-call returning res=" res) res)))))) (define (cdb:set-verbosity serverdat val) @@ -1266,14 +1300,11 @@ (define (cdb:pass-fail-counts serverdat test-id fail-count pass-count) (cdb:client-call serverdat 'pass-fail-counts #t *default-numtries* fail-count pass-count test-id)) (define (cdb:tests-register-test serverdat run-id test-name item-path) - (let ((item-paths (if (equal? item-path "") - (list item-path) - (list item-path "")))) - (cdb:client-call serverdat 'register-test #t *default-numtries* run-id test-name item-path))) + (cdb:client-call serverdat 'register-test #t *default-numtries* run-id test-name item-path)) (define (cdb:flush-queue serverdat) (cdb:client-call serverdat 'flush #f *default-numtries*)) (define (cdb:kill-server serverdat) @@ -1304,10 +1335,14 @@ db "SELECT rundir,final_logf FROM tests WHERE run_id=? AND testname=? AND item_path='';" run-id test-name) res)) +;;====================================================================== +;; A G R E G A T E D T R A N S A C T I O N D B W R I T E S +;;====================================================================== + (define db:queries (list '(register-test "INSERT OR IGNORE INTO tests (run_id,testname,event_time,item_path,state,status) VALUES (?,?,strftime('%s','now'),?,'NOT_STARTED','n/a');") '(state-status "UPDATE tests SET state=?,status=? WHERE id=?;") '(state-status-msg "UPDATE tests SET state=?,status=?,comment=? WHERE id=?;") '(pass-fail-counts "UPDATE tests SET fail_count=?,pass_count=? WHERE id=?;") @@ -1323,10 +1358,13 @@ '(test-set-log "UPDATE tests SET final_logf=? WHERE id=?;") '(test-set-rundir-by-test-id "UPDATE tests SET rundir=? WHERE id=?") '(test-set-rundir "UPDATE tests SET rundir=? WHERE run_id=? AND testname=? AND item_path=?;") '(delete-tests-in-state "DELETE FROM tests WHERE state=? AND run_id=?;") '(tests:test-set-toplog "UPDATE tests SET final_logf=? WHERE run_id=? AND testname=? AND item_path='';") + '(update-cpuload-diskfree "UPDATE tests SET cpuload=?,diskfree=? WHERE id=?;") + '(update-run-duration "UPDATE tests SET run_duration=? WHERE id=?;") + '(update-uname-host "UPDATE tests SET uname=?,host=? WHERE id=?;") )) ;; do not run these as part of the transaction (define db:special-queries '(rollup-tests-pass-fail db:roll-up-pass-fail-counts @@ -1593,13 +1631,13 @@ ;;====================================================================== ;; T E S T D A T A ;;====================================================================== -(define (db:csv->test-data db test-id csvdata) +(define (db:csv->test-data db test-id csvdata #!key (testpath #f)) (debug:print 4 "test-id " test-id ", csvdata: " csvdata) - (let ((tdb (db:open-test-db-by-test-id db test-id))) + (let ((tdb (db:open-test-db-by-test-id db test-id testpath: testpath))) (if tdb (let ((csvlist (csv->list (make-csv-reader (open-input-string csvdata) '((strip-leading-whitespace? #t) (strip-trailing-whitespace? #t)) )))) ;; (csv->list csvdata))) @@ -1654,12 +1692,12 @@ test-id category variable value expected tol units (if comment comment "") status type) (sqlite3:finalize! tdb))) csvlist))))) ;; get a list of test_data records matching categorypatt -(define (db:read-test-data db test-id categorypatt) - (let ((tdb (db:open-test-db-by-test-id db test-id))) +(define (db:read-test-data db test-id categorypatt #!key (testpath #f)) + (let ((tdb (db:open-test-db-by-test-id db test-id testpath: testpath))) (if tdb (let ((res '())) (sqlite3:for-each-row (lambda (id test_id category variable value expected tol units comment status type) (set! res (cons (vector id test_id category variable value expected tol units comment status type) res))) @@ -1668,28 +1706,28 @@ (sqlite3:finalize! tdb) (reverse res)) '()))) ;; NOTE: Run this local with #f for db !!! -(define (db:load-test-data db test-id) +(define (db:load-test-data db test-id #!key (testpath #f)) (let loop ((lin (read-line))) (if (not (eof-object? lin)) (begin (debug:print 4 lin) (db:csv->test-data db test-id lin) (loop (read-line))))) ;; roll up the current results. ;; FIXME: Add the status to - (db:test-data-rollup db test-id #f)) + (db:test-data-rollup db test-id #f testpath: testpath)) ;; WARNING: Do NOT call this for the parent test on an iterated test ;; Roll up test_data pass/fail results ;; look at the test_data status field, ;; if all are pass (any case) and the test status is PASS or NULL or '' then set test status to PASS. ;; if one or more are fail (any case) then set test status to PASS, non "pass" or "fail" are ignored -(define (db:test-data-rollup db test-id status) - (let ((tdb (db:open-test-db-by-test-id db test-id)) +(define (db:test-data-rollup db test-id status #!key (testpath #f)) + (let ((tdb (db:open-test-db-by-test-id db test-id testpath: testpath)) (fail-count 0) (pass-count 0)) (if tdb (begin (sqlite3:for-each-row @@ -1738,12 +1776,12 @@ (define (db:step-get-time-as-string vec) (seconds->time-string (db:step-get-event_time vec))) ;; db-get-test-steps-for-run -(define (db:get-steps-for-test db test-id) - (let* ((tdb (db:open-test-db-by-test-id db test-id)) +(define (db:get-steps-for-test db test-id #!key (testpath #f)) + (let* ((tdb (db:open-test-db-by-test-id db test-id testpath: testpath)) (res '())) (if tdb (begin (sqlite3:for-each-row (lambda (id test-id stepname state status event-time logfile) @@ -1755,12 +1793,12 @@ (reverse res)) '()))) ;; get a pretty table to summarize steps ;; -(define (db:get-steps-table db test-id) - (let ((steps (db:get-steps-for-test db test-id))) +(define (db:get-steps-table db test-id #!key (testpath #f)) + (let ((steps (db:get-steps-for-test db test-id testpath: testpath))) ;; organise the steps for better readability (let ((res (make-hash-table))) (for-each (lambda (step) (debug:print 6 "step=" step) @@ -1815,12 +1853,12 @@ (else #f))))) res))) ;; get a pretty table to summarize steps ;; -(define (db:get-steps-table-list db test-id) - (let ((steps (db:get-steps-for-test db test-id))) +(define (db:get-steps-table-list db test-id #!key (testpath #f)) + (let ((steps (db:get-steps-for-test db test-id testpath: testpath))) ;; organise the steps for better readability (let ((res (make-hash-table))) (for-each (lambda (step) (debug:print 6 "step=" step) @@ -1957,14 +1995,14 @@ (if (not ever-seen) (set! result (append (if (null? tests)(list waitontest-name) tests) result))))) waitons) (delete-duplicates result)))) -(define (db:teststep-set-status! db test-id teststep-name state-in status-in comment logfile) +(define (db:teststep-set-status! db test-id teststep-name state-in status-in comment logfile #!key (testpath #f)) (debug:print 4 "test-id: " test-id " teststep-name: " teststep-name) ;; db:open-test-db-by-test-id does cdb:remote-run - (let* ((tdb (db:open-test-db-by-test-id db test-id)) + (let* ((tdb (db:open-test-db-by-test-id db test-id testpath: testpath)) (state (items:check-valid-items "state" state-in)) (status (items:check-valid-items "status" status-in))) (if (or (not state)(not status)) (debug:print 3 "WARNING: Invalid " (if status "status" "state") " value \"" (if status state-in status-in) "\", update your validvalues section in megatest.config")) Index: http-transport.scm ================================================================== --- http-transport.scm +++ http-transport.scm @@ -202,11 +202,11 @@ (serverdat (list iface port))) (set! login-res (client:login serverdat)) (if (and (not (null? login-res)) (car login-res)) (begin - (debug:print-info 0 "Logged in and connected to " iface ":" port) + (debug:print-info 2 "Logged in and connected to " iface ":" port) (set! *runremote* serverdat) serverdat) (begin (debug:print-info 0 "Failed to login or connect to " iface ":" port) (set! *runremote* #f) Index: launch.scm ================================================================== --- launch.scm +++ launch.scm @@ -133,11 +133,11 @@ (alist->env-vars env-ovrd) (set-megatest-env-vars run-id) (set-item-env-vars itemdat) (save-environment-as-files "megatest") ;; open-run-close not needed for test-set-meta-info - (test-set-meta-info #f test-id run-id test-name itemdat 0) + (tests:set-meta-info #f test-id run-id test-name itemdat 0 testpath) (tests:test-set-status! test-id "REMOTEHOSTSTART" "n/a" (args:get-arg "-m") #f) (if (args:get-arg "-xterm") (set! fullrunscript "xterm") (if (and fullrunscript (not (file-execute-access? fullrunscript))) (system (conc "chmod ug+x " fullrunscript)))) @@ -208,11 +208,11 @@ ;; call the command using mt_ezstep (set! script (conc "mt_ezstep " stepname " " (if prevstep prevstep "-") " " stepcmd)) (debug:print 4 "script: " script) ;; DO NOT remote - (db:teststep-set-status! #f test-id stepname "start" "-" #f #f) + (db:teststep-set-status! #f test-id stepname "start" "-" #f #f testpath: testpath) ;; now launch (let ((pid (process-run script))) (let processloop ((i 0)) (let-values (((pid-val exit-status exit-code)(process-wait pid #t))) (mutex-lock! m) @@ -226,11 +226,11 @@ (processloop (+ i 1)))) )) (let ((exinfo (vector-ref exit-info 2)) (logfna (if logpro-used (conc stepname ".html") ""))) ;; testing if procedures called in a remote call cause problems (ans: no or so I suspect) - (db:teststep-set-status! #f test-id stepname "end" exinfo #f logfna)) + (db:teststep-set-status! #f test-id stepname "end" exinfo #f logfna testpath: testpath)) (if logpro-used (cdb:test-set-log! *runremote* test-id (conc stepname ".html"))) ;; set the test final status (let* ((this-step-status (cond ((and (eq? (vector-ref exit-info 2) 2) logpro-used) 'warn) @@ -276,11 +276,11 @@ (kill-tries 0)) (let loop ((minutes (calc-minutes))) (begin (set! kill-job? (test-get-kill-request test-id)) ;; run-id test-name itemdat)) ;; open-run-close not needed for test-set-meta-info - (test-set-meta-info #f test-id run-id test-name itemdat minutes) + (tests:set-meta-info #f test-id run-id test-name itemdat minutes testpath) (if kill-job? (begin (mutex-lock! m) (let* ((pid (vector-ref exit-info 0))) (if (number? pid) @@ -537,15 +537,16 @@ (begin (let* ((ovrcmd (let ((cmd (config-lookup *configdat* "setup" "testcopycmd"))) (if cmd ;; substitute the TEST_SRC_PATH and TEST_TARG_PATH (string-substitute "TEST_TARG_PATH" test-path - (string-substitute "TEST_SRC_PATH" test-src-path cmd)) + (string-substitute "TEST_SRC_PATH" test-src-path cmd #t) #t) #f))) (cmd (if ovrcmd ovrcmd - (conc "rsync -av" (if (debug:debug-mode 1) "" "q") " " test-src-path "/ " test-path "/"))) + (conc "rsync -av" (if (debug:debug-mode 1) "" "q") " " test-src-path "/ " test-path "/" + " >> " test-path "/mt_launch.log >>2 " test-path "/mt_launch.log"))) (status (system cmd))) (if (not (eq? status 0)) (debug:print 2 "ERROR: problem with running \"" cmd "\""))) (list lnkpathf lnkpath )) (list #f #f)))) @@ -668,34 +669,37 @@ (list "MT_ITEM_INFO" (conc itemdat)) (list "MT_RUNNAME" runname) (list "MT_TARGET" mt_target) ) itemdat))) - (launch-results (apply cmd-run-with-stderr->list ;; cmd-run-proc-each-line + (launch-results (apply (if (equal? (configf:lookup *configdat* "setup" "launchwait") "yes") + cmd-run-with-stderr->list + process-run) (if useshell (string-intersperse fullcmd " ") (car fullcmd)) - ;; conc (if useshell '() - (cdr fullcmd))))) ;; launcher fullcmd)));; (apply cmd-run-proc-each-line launcher print fullcmd))) ;; (cmd-run->list fullcmd)) - (with-output-to-file "mt_launch.log" - (lambda () - (apply print launch-results))) + (cdr fullcmd))))) + (if (list? launch-results) + (with-output-to-file "mt_launch.log" + (lambda () + (apply print launch-results)) + #:append)) (debug:print 2 "Launching completed, updating db") (debug:print 2 "Launch results: " launch-results) (if (not launch-results) - (begin - (print "ERROR: Failed to run " (string-intersperse fullcmd " ") ", exiting now") - ;; (sqlite3:finalize! db) - ;; good ole "exit" seems not to work - ;; (_exit 9) - ;; but this hack will work! Thanks go to Alan Post of the Chicken email list - ;; NB// Is this still needed? Should be safe to go back to "exit" now? - (process-signal (current-process-id) signal/kill) - )) + (begin + (print "ERROR: Failed to run " (string-intersperse fullcmd " ") ", exiting now") + ;; (sqlite3:finalize! db) + ;; good ole "exit" seems not to work + ;; (_exit 9) + ;; but this hack will work! Thanks go to Alan Post of the Chicken email list + ;; NB// Is this still needed? Should be safe to go back to "exit" now? + (process-signal (current-process-id) signal/kill) + )) (alist->env-vars miscprevvals) (alist->env-vars testprevvals) (alist->env-vars commonprevvals) launch-results)) (change-directory *toppath*)) Index: megatest.scm ================================================================== --- megatest.scm +++ megatest.scm @@ -34,12 +34,19 @@ (include "db_records.scm") (include "megatest-fossil-hash.scm") ;; (use trace dot-locking) ;; (trace -;; cdb:client-call -;; cdb:remote-run +;; db:teststep-set-status! +;; db:open-test-db-by-test-id +;; db:test-get-rundir-from-test-id +;; cdb:tests-register-test +;; cdb:tests-update-uname-host +;; cdb:tests-update-run-duration +;; ;; cdb:client-call +;; ;; cdb:remote-run +;; ) ;; cdb:test-set-status-state ;; change-directory ;; db:process-queue-item ;; db:test-get-logfile-info ;; db:teststep-set-status! @@ -806,12 +813,12 @@ (if (not (setup-for-run)) (begin (debug:print 0 "Failed to setup, exiting") (exit 1))) (if (and state status) - ;; DO NOT remote run - (db:teststep-set-status! db test-id step state status msg logfile) + ;; DO NOT remote run, makes calls to the testdat.db test db. + (db:teststep-set-status! db test-id step state status msg logfile testpath: testpath) (begin (debug:print 0 "ERROR: You must specify :state and :status with every call to -step") (exit 6)))))) (if (args:get-arg "-step") @@ -823,11 +830,12 @@ (args:get-arg "-setlog") (args:get-arg "-m")) ;; (if db (sqlite3:finalize! db)) (set! *didsomething* #t))) -(if (or (args:get-arg "-setlog") ;; since setting up is so costly lets piggyback on -test-status +(if (or (and (args:get-arg "-setlog") ;; since setting up is so costly lets piggyback on -test-status + (not (args:get-arg "-step"))) ;; -setlog may have been processed already in the "-step" previous (args:get-arg "-set-toplog") (args:get-arg "-test-status") (args:get-arg "-set-values") (args:get-arg "-load-test-data") (args:get-arg "-runstep") @@ -894,11 +902,11 @@ (fullcmd (conc "(" (string-intersperse (cons cmd params) " ") ") " redir " " logfile))) ;; mark the start of the test ;; DO NOT run remote - (db:teststep-set-status! db test-id stepname "start" "n/a" (args:get-arg "-m") logfile) + (db:teststep-set-status! db test-id stepname "start" "n/a" (args:get-arg "-m") logfile testpath: testpath) ;; run the test step (debug:print-info 2 "Running \"" fullcmd "\"") (change-directory startingdir) (set! exitstat (system fullcmd)) ;; cmd params)) (set! *globalexitstatus* exitstat) @@ -914,11 +922,11 @@ (set! *globalexitstatus* exitstat) ;; no necessary (change-directory testpath) (cdb:test-set-log! *runremote* test-id htmllogfile))) (let ((msg (args:get-arg "-m"))) ;; DO NOT run remote - (db:teststep-set-status! db test-id stepname "end" exitstat msg logfile)) + (db:teststep-set-status! db test-id stepname "end" exitstat msg logfile testpath: testpath)) ))) (if (or (args:get-arg "-test-status") (args:get-arg "-set-values")) (let ((newstatus (cond ((number? status) (if (equal? status 0) "PASS" "FAIL")) Index: runs.scm ================================================================== --- runs.scm +++ runs.scm @@ -171,40 +171,10 @@ ;; New methodology. These routines will replace the above in time. For ;; now the code is duplicated. This stuff is initially used in the monitor ;; based code. ;;====================================================================== -;; register a test run with the db -(define (runs:register-run db keys keyvallst runname state status user) - (debug:print 3 "runs:register-run, keys: " keys " keyvallst: " keyvallst " runname: " runname " state: " state " status: " status " user: " user) - (let* ((keystr (keys->keystr keys)) - (comma (if (> (length keys) 0) "," "")) - (andstr (if (> (length keys) 0) " AND " "")) - (valslots (keys->valslots keys)) ;; ?,?,? ... - (keyvals (map cadr keyvallst)) - (allvals (append (list runname state status user) keyvals)) - (qryvals (append (list runname) keyvals)) - (key=?str (string-intersperse (map (lambda (k)(conc (key:get-fieldname k) "=?")) keys) " AND "))) - (debug:print 3 "keys: " keys " allvals: " allvals " keyvals: " keyvals) - (debug:print 2 "NOTE: using target " (string-intersperse keyvals "/") " for this run") - (if (and runname (null? (filter (lambda (x)(not x)) keyvals))) ;; there must be a better way to "apply and" - (let ((res #f)) - (apply sqlite3:execute db (conc "INSERT OR IGNORE INTO runs (runname,state,status,owner,event_time" comma keystr ") VALUES (?,?,?,?,strftime('%s','now')" comma valslots ");") - allvals) - (apply sqlite3:for-each-row - (lambda (id) - (set! res id)) - db - (let ((qry (conc "SELECT id FROM runs WHERE (runname=? " andstr key=?str ");"))) - ;(debug:print 4 "qry: " qry) - qry) - qryvals) - (sqlite3:execute db "UPDATE runs SET state=?,status=? WHERE id=?;" state status res) - res) - (begin - (debug:print 0 "ERROR: Called without all necessary keys") - #f)))) ;; This is a duplicate of run-tests (which has been deprecated). Use this one instead of run tests. ;; keyvals. ;; ;; test-names: Comma separated patterns same as test-patts but used in selection @@ -214,11 +184,11 @@ (define (runs:run-tests target runname test-names test-patts user flags) (common:clear-caches) ;; clear all caches (let* ((db #f) (keys (cdb:remote-run db:get-keys #f)) (keyvallst (keys:target->keyval keys target)) - (run-id (cdb:remote-run runs:register-run #f keys keyvallst runname "new" "n/a" user)) ;; test-name))) + (run-id (cdb:remote-run db:register-run #f keys keyvallst runname "new" "n/a" user)) ;; test-name))) (keyvals (if run-id (cdb:remote-run db:get-key-vals #f run-id) #f)) (deferred '()) ;; delay running these since they have a waiton clause ;; keepgoing is the defacto modality now, will add hit-n-run a bit later ;; (keepgoing (hash-table-ref/default flags "-keepgoing" #f)) (runconfigf (conc *toppath* "/runconfigs.config")) @@ -364,10 +334,11 @@ ;; At this point the list of parent tests is expanded ;; NB// Should expand items here and then insert into the run queue. (debug:print 5 "test-records: " test-records ", keyvallst: " keyvallst " flags: " (hash-table->alist flags)) (let ((sorted-test-names (tests:sort-by-priority-and-waiton test-records)) (test-registery (make-hash-table)) + (registery-mutex (make-mutex)) (num-retries 0) (max-retries (config-lookup *configdat* "setup" "maxretries"))) (set! max-retries (if (and max-retries (string->number max-retries))(string->number max-retries) 100)) (if (not (null? sorted-test-names)) (let loop ((hed (car sorted-test-names)) @@ -405,17 +376,17 @@ (debug:print 0 "ERROR: test " test-name " has listed itself as a waiton, please correct this!") (set! waiton (filter (lambda (x)(not (equal? x hed))) waitons)))) (cond ;; OUTER COND ((not items) ;; when false the test is ok to be handed off to launch (but not before) - (let* ((run-limits-info (open-run-close runs:can-run-more-tests test-record)) ;; look at the test jobgroup and tot jobs running + (let* ((run-limits-info (runs:can-run-more-tests test-record)) ;; look at the test jobgroup and tot jobs running (have-resources (car run-limits-info)) (num-running (list-ref run-limits-info 1)) (num-running-in-jobgroup (list-ref run-limits-info 2)) (max-concurrent-jobs (list-ref run-limits-info 3)) (job-group-limit (list-ref run-limits-info 4)) - (prereqs-not-met (open-run-close db:get-prereqs-not-met #f run-id waitons item-path mode: testmode)) + (prereqs-not-met (cdb:remote-run db:get-prereqs-not-met #f run-id waitons item-path mode: testmode)) (fails (runs:calc-fails prereqs-not-met)) (non-completed (runs:calc-not-completed prereqs-not-met))) (debug:print-info 8 "have-resources: " have-resources " prereqs-not-met: " (string-intersperse (map (lambda (t) @@ -435,30 +406,60 @@ ;; but should check if it is due to lack of resources vs. prerequisites (debug:print-info 1 "Skipping " (tests:testqueue-get-testname test-record) " " item-path " as it doesn't match " test-patts) ;; (thread-sleep! *global-delta*) (if (not (null? tal)) (loop (car tal)(cdr tal) reruns))) - ( ;; (and - (not (hash-table-ref/default test-registery (runs:make-full-test-name test-name item-path) #f)) - ;; (and max-concurrent-jobs (> (- max-concurrent-jobs num-running) 5))) + ;; Registery has been started for this test but has not yet completed + ;; this should be rare, the case where there are only a couple of tests and the db is slow + ;; delay a short while and continue + ;; ((eq? (hash-table-ref/default test-registery (runs:make-full-test-name test-name item-path) #f) 'start) + ;; (thread-sleep! 0.01) + ;; (loop (car newtal)(cdr newtal) reruns)) + ;; count number of 'done, if more than 100 then skip on through. + (;; (and (< (length (filter (lambda (x)(eq? x 'done))(hash-table-values test-registery))) 100) ;; why get more than 200 ahead? + (not (hash-table-ref/default test-registery (runs:make-full-test-name test-name item-path) #f)) ;; ) ;; too many changes required. Implement later. (debug:print-info 4 "Pre-registering test " test-name "/" item-path " to create placeholder" ) - (open-run-close db:tests-register-test #f run-id test-name item-path) - (hash-table-set! test-registery (runs:make-full-test-name test-name item-path) #t) - ;; (thread-sleep! *global-delta*) -(runs:shrink-can-run-more-tests-delay) + ;; NEED TO THREADIFY THIS + (let ((th (make-thread (lambda () + (mutex-lock! registery-mutex) + (hash-table-set! test-registery (runs:make-full-test-name test-name item-path) 'start) + (mutex-unlock! registery-mutex) + ;; If haven't done it before register a top level test if this is an itemized test + (if (not (eq? (hash-table-ref/default test-registery (runs:make-full-test-name test-name "") #f) 'done)) + (cdb:tests-register-test *runremote* run-id test-name "")) + (cdb:tests-register-test *runremote* run-id test-name item-path) + (mutex-lock! registery-mutex) + (hash-table-set! test-registery (runs:make-full-test-name test-name item-path) 'done) + (mutex-unlock! registery-mutex)) + (conc test-name "/" item-path)))) + (thread-start! th)) + (thread-sleep! *global-delta*) + (runs:shrink-can-run-more-tests-delay) ;; DELAY TWEAKER (still needed?) (loop (car newtal)(cdr newtal) reruns)) + ;; At this point *all* test registrations must be completed. + ((not (null? (filter (lambda (x)(eq? 'start x))(hash-table-values test-registery)))) + (debug:print-info 0 "Waiting on test registrations: " (string-intersperse + (filter (lambda (x) + (eq? (hash-table-ref/default test-registery x #f) 'start)) + (hash-table-keys test-registery)) + ", ")) + (thread-sleep! 0.1) + (loop hed tal reruns)) ((not have-resources) ;; simply try again after waiting a second (debug:print-info 1 "no resources to run new tests, waiting ...") - ;; (thread-sleep! (+ 2 *global-delta*)) + ;; Have gone back and forth on this but db starvation is an issue. + ;; wait one second before looking again to run jobs. + (thread-sleep! 1) ;; (+ 2 *global-delta*)) ;; could have done hed tal here but doing car/cdr of newtal to rotate tests (loop (car newtal)(cdr newtal) reruns)) ((and have-resources (or (null? prereqs-not-met) (and (eq? testmode 'toplevel) (null? non-completed)))) (run:test run-id runname keyvallst test-record flags #f) -(runs:shrink-can-run-more-tests-delay) + (hash-table-set! test-registery (runs:make-full-test-name test-name item-path) 'running) + (runs:shrink-can-run-more-tests-delay) ;; DELAY TWEAKER (still needed?) ;; (thread-sleep! *global-delta*) (if (not (null? tal)) (loop (car tal)(cdr tal) reruns))) (else ;; must be we have unmet prerequisites (debug:print 4 "FAILS: " fails) @@ -472,18 +473,20 @@ ;; we made new tal by sticking hed at the back of the list (loop (car newtal)(cdr newtal) reruns)) ;; the waiton is FAIL so no point in trying to run hed ever again (if (not (null? tal)) (if (vector? hed) - (begin (debug:print 1 "WARN: Dropping test " (db:test-get-testname hed) "/" (db:test-get-item-path hed) - " from the launch list as it has prerequistes that are FAIL") -(runs:shrink-can-run-more-tests-delay) - ;; (thread-sleep! *global-delta*) - (loop (car tal)(cdr tal) (cons hed reruns))) + (begin + (debug:print 1 "WARN: Dropping test " (db:test-get-testname hed) "/" (db:test-get-item-path hed) + " from the launch list as it has prerequistes that are FAIL") + (runs:shrink-can-run-more-tests-delay) ;; DELAY TWEAKER (still needed?) + ;; (thread-sleep! *global-delta*) + (hash-table-set! test-registery (runs:make-full-test-name test-name item-path) 'removed) + (loop (car tal)(cdr tal) (cons hed reruns))) (begin (debug:print 1 "WARN: Test not processed correctly. Could be a race condition in your test implementation? " hed) ;; " as it has prerequistes that are FAIL. (NOTE: hed is not a vector)") -(runs:shrink-can-run-more-tests-delay) + (runs:shrink-can-run-more-tests-delay) ;; DELAY TWEAKER (still needed?) ;; (thread-sleep! (+ 0.01 *global-delta*)) (loop hed tal reruns))))))))) ;; END OF INNER COND ;; case where an items came in as a list been processed ((and (list? items) ;; thus we know our items are already calculated @@ -667,11 +670,11 @@ ;; (set! test-id (open-run-close db:get-test-id db run-id test-name item-path)) (if (not test-id) (begin (debug:print 2 "WARN: Test not pre-created? test-name=" test-name ", item-path=" item-path ", run-id=" run-id) - (open-run-close db:tests-register-test #f run-id test-name item-path) + (cdb:tests-register-test *runremote* run-id test-name item-path) (set! test-id (open-run-close db:get-test-id db run-id test-name item-path)))) (debug:print-info 4 "test-id=" test-id ", run-id=" run-id ", test-name=" test-name ", item-path=\"" item-path "\"") (set! testdat (cdb:get-test-info-by-id *runremote* test-id)))) (set! test-id (db:test-get-id testdat)) (change-directory test-path) @@ -719,10 +722,11 @@ (debug:print 1 "NOTE: Not starting test " new-test-name " as it is state \"" (test:get-state testdat) "\" and status \"" (test:get-status testdat) "\", use -rerun \"" (test:get-status testdat) "\" or -force to override")) ;; NOTE: No longer be checking prerequisites here! Will never get here unless prereqs are ;; already met. + ;; This would be a great place to do the process-fork (if (not (launch-test #f run-id runname test-conf keyvallst test-name test-path itemdat flags)) (begin (print "ERROR: Failed to launch the test. Exiting as soon as possible") (set! *globalexitstatus* 1) ;; (process-signal (current-process-id) signal/kill)))))) @@ -958,25 +962,25 @@ ;; Rollup runs ;;====================================================================== ;; Update the test_meta table for this test (define (runs:update-test_meta db test-name test-conf) - (let ((currrecord (open-run-close db:testmeta-get-record db test-name))) + (let ((currrecord (cdb:remote-run db:testmeta-get-record db test-name))) (if (not currrecord) (begin (set! currrecord (make-vector 10 #f)) - (open-run-close db:testmeta-add-record db test-name))) + (cdb:remote-run db:testmeta-add-record db test-name))) (for-each (lambda (key) (let* ((idx (cadr key)) (fld (car key)) (val (config-lookup test-conf "test_meta" fld))) ;; (debug:print 5 "idx: " idx " fld: " fld " val: " val) (if (and val (not (equal? (vector-ref currrecord idx) val))) (begin (print "Updating " test-name " " fld " to " val) - (open-run-close db:testmeta-update-field db test-name fld val))))) + (cdb:remote-run db:testmeta-update-field db test-name fld val))))) '(("author" 2)("owner" 3)("description" 4)("reviewed" 5)("tags" 9))))) ;; Update test_meta for all tests (define (runs:update-all-test_meta db) (let ((test-names (get-all-legal-tests))) @@ -993,11 +997,11 @@ ;; This could probably be refactored into one complex query ... (define (runs:rollup-run keys keyvallst runname user) ;; was target, now keyvallst (debug:print 4 "runs:rollup-run, keys: " keys " keyvallst: " keyvallst " :runname " runname " user: " user) (let* ((db #f) ;; (keyvalllst (keys:target->keyval keys target)) - (new-run-id (open-run-close runs:register-run db keys keyvallst runname "new" "n/a" user)) + (new-run-id (cdb:remote-run db:register-run #f keys keyvallst runname "new" "n/a" user)) (prev-tests (open-run-close test:get-matching-previous-test-run-records db new-run-id "%" "%")) (curr-tests (open-run-close db:get-tests-for-run db new-run-id "%/%" '() '())) (curr-tests-hash (make-hash-table))) (open-run-close db:update-run-event_time db new-run-id) ;; index the already saved tests by testname and itemdat in curr-tests-hash Index: tests.scm ================================================================== --- tests.scm +++ tests.scm @@ -553,32 +553,41 @@ tdb "SELECT count(id) FROM test_rundat;") res)) 0) -(define (db:update-central-meta-info db test-id cpuload diskfree minutes num-records uname hostname) - (sqlite3:execute db "UPDATE tests SET cpuload=?,diskfree=? WHERE id=?;" - cpuload - diskfree - test-id) - (if minutes (sqlite3:execute db "UPDATE tests SET run_duration=? WHERE id=?;" minutes test-id)) - (if (eq? num-records 0) - (sqlite3:execute db "UPDATE tests SET uname=?,host=? WHERE id=?;" - uname hostname test-id))) - -(define (test-set-meta-info db test-id run-id testname itemdat minutes) +(define (tests:update-central-meta-info test-id cpuload diskfree minutes num-records uname hostname) + ;; This is a good candidate for threading the requests to enable + ;; transactionized write at the server + (cdb:tests-update-cpuload-diskfree *runremote* test-id cpuload diskfree) + ;; (let ((db (open-db))) + ;; (sqlite3:execute db "UPDATE tests SET cpuload=?,diskfree=? WHERE id=?;" + ;; cpuload + ;; diskfree + ;; test-id) + (if minutes + (cdb:tests-update-run-duration *runremote* test-id minutes)) + ;; (sqlite3:execute db "UPDATE tests SET run_duration=? WHERE id=?;" minutes test-id)) + (if (eq? num-records 0) + (cdb:tests-update-uname-host *runremote* test-id uname hostname)) + ;;(sqlite3:execute db "UPDATE tests SET uname=?,host=? WHERE id=?;" uname hostname test-id)) + ;;(sqlite3:finalize! db)) + ) + +(define (tests:set-meta-info db test-id run-id testname itemdat minutes testpath) ;; DOES cdb:remote-run under the hood! - (let* ((tdb (db:open-test-db-by-test-id db test-id)) + (let* ((tdb (db:open-test-db-by-test-id db test-id testpath: testpath)) (num-records (test:tdb-get-rundat-count tdb)) (cpuload (get-cpu-load)) (diskfree (get-df (current-directory)))) (if (eq? (modulo num-records 10) 0) ;; every ten records update central (let ((uname (get-uname "-srvpio")) (hostname (get-host-name))) - (cdb:remote-run db:update-central-meta-info db test-id cpuload diskfree minutes num-records uname hostname))) + (tests:update-central-meta-info test-id cpuload diskfree minutes num-records uname hostname))) (sqlite3:execute tdb "INSERT INTO test_rundat (update_time,cpuload,diskfree,run_duration) VALUES (strftime('%s','now'),?,?,?);" - cpuload diskfree minutes))) + cpuload diskfree minutes) + (sqlite3:finalize! tdb))) ;;====================================================================== ;; A R C H I V I N G ;;====================================================================== Index: tests/Makefile ================================================================== --- tests/Makefile +++ tests/Makefile @@ -44,15 +44,17 @@ test3 : fullprep cd fullrun;$(MEGATEST) -runtests runfirst -reqtarg ubuntu/nfs/none :runname $(RUNNAME)_b -debug 10 -test4 : fullprep - cd fullrun;$(MEGATEST) -debug $(DEBUG) -runall -reqtarg ubuntu/nfs/none :runname $(RUNNAME)_b -m "This is a comment specific to a run" -v $(LOGGING) +test4 : cleanprep + @echo "WARNING: No longer running fullprep, test converage may be lessened" + cd fullrun;time $(MEGATEST) -debug $(DEBUG) -runtests % -reqtarg ubuntu/nfs/none :runname $(RUNNAME)_b -m "This is a comment specific to a run" -v $(LOGGING) # NOTE: Only one instance can be a server -test5 : fullprep +test5 : cleanprep + @echo "WARNING: No longer running fullprep, test converage may be lessened" cd fullrun;sleep 0;$(MEGATEST) -runtests % -target $(TARGET) :runname $(RUNNAME)_aa -debug $(DEBUG) $(LOGGING) > aa.log 2> aa.log & cd fullrun;sleep 0;$(MEGATEST) -runtests % -target $(TARGET) :runname $(RUNNAME)_ab -debug $(DEBUG) $(LOGGING) > ab.log 2> ab.log & cd fullrun;sleep 0;$(MEGATEST) -runtests % -target $(TARGET) :runname $(RUNNAME)_ac -debug $(DEBUG) $(LOGGING) > ac.log 2> ac.log & cd fullrun;sleep 0;$(MEGATEST) -runtests % -target $(TARGET) :runname $(RUNNAME)_ad -debug $(DEBUG) $(LOGGING) > ad.log 2> ad.log & # cd fullrun;sleep 0;$(MEGATEST) -runtests % -target $(TARGET) :runname $(RUNNAME)_ae -debug $(DEBUG) $(LOGGING) > ae.log 2> ae.log & @@ -63,11 +65,11 @@ cd fullrun;$(MEGATEST) -runtests runfirst -testpatt %blahha% -reqtarg ubuntu/nfs/none :runname $(RUNNAME)_itempatt -debug 10 cd fullrun;$(MEGATEST) -rollup :runname newrun -target ubuntu/nfs/none -debug 10 cleanprep : ../*.scm Makefile */*.config - mkdir -p /tmp/mt_runs /tmp/mt_links + mkdir -p fullrun/tmp/mt_runs fullrun/tmp/mt_links cd ..;make;make install rm -f */logging.db touch cleanprep fullprep : cleanprep Index: tests/fdktestqa/testqa/megatest.config ================================================================== --- tests/fdktestqa/testqa/megatest.config +++ tests/fdktestqa/testqa/megatest.config @@ -1,5 +1,7 @@ [setup] testcopycmd cp --remove-destination -rlv TEST_SRC_PATH/. TEST_TARG_PATH/. [include ../fdk.config] +[server] +timeout 0.01 ADDED tests/fdktestqa/testqa/runsuite.sh Index: tests/fdktestqa/testqa/runsuite.sh ================================================================== --- /dev/null +++ tests/fdktestqa/testqa/runsuite.sh @@ -0,0 +1,18 @@ +#!/bin/bash + +(cd ../../..;make && make install) || exit 1 +export PATH=$PWD/../../../bin:$PATH + +for i in a b c d e f;do + # g h i j k l m n o p q r s t u v w x y z;do + megatest -runtests % -target a/b :runname $i & +done + +echo "" > num-running.log +while true; do + foo=`megatest -list-runs % | grep RUNNING | wc -l` + echo "Num running at `date` $foo" + echo "$foo at `date`" >> num-running.log + # to make the test go at a reasonable clip only gather this info ever minute + sleep 1m +done Index: tests/fdktestqa/testqa/tests/bigrun/step1.sh ================================================================== --- tests/fdktestqa/testqa/tests/bigrun/step1.sh +++ tests/fdktestqa/testqa/tests/bigrun/step1.sh @@ -1,3 +1,8 @@ #!/bin/sh -sleep 10 +if [ $NUMBER -lt 200 ];then + sleep $NUMBER +else + sleep 200 +fi + exit 0 Index: tests/fdktestqa/testqa/tests/bigrun/testconfig ================================================================== --- tests/fdktestqa/testqa/tests/bigrun/testconfig +++ tests/fdktestqa/testqa/tests/bigrun/testconfig @@ -7,11 +7,11 @@ # waiton setup priority 0 # Iteration for your tests are controlled by the items section [items] -NUMBER #{scheme (string-intersperse (map number->string (sort (let loop ((a 0)(res '()))(if (< a 120)(loop (+ a 1)(cons a res)) res)) >)) " ")} +NUMBER #{scheme (string-intersperse (map number->string (sort (let loop ((a 0)(res '()))(if (< a 150)(loop (+ a 1)(cons a res)) res)) >)) " ")} # test_meta is a section for storing additional data on your test [test_meta] author matt owner matt Index: tests/fdktestqa/testqa/tests/bigrun2/testconfig ================================================================== --- tests/fdktestqa/testqa/tests/bigrun2/testconfig +++ tests/fdktestqa/testqa/tests/bigrun2/testconfig @@ -9,11 +9,11 @@ mode itemmatch # Iteration for your tests are controlled by the items section [items] -NUMBER #{scheme (string-intersperse (map number->string (sort (let loop ((a 0)(res '()))(if (< a 120)(loop (+ a 1)(cons a res)) res)) >)) " ")} +NUMBER #{scheme (string-intersperse (map number->string (sort (let loop ((a 0)(res '()))(if (< a 150)(loop (+ a 1)(cons a res)) res)) >)) " ")} # test_meta is a section for storing additional data on your test [test_meta] author matt owner matt Index: tests/fullrun/config/mt_include_1.config ================================================================== --- tests/fullrun/config/mt_include_1.config +++ tests/fullrun/config/mt_include_1.config @@ -1,8 +1,8 @@ [setup] # exectutable /path/to/megatest -max_concurrent_jobs 200 +max_concurrent_jobs 25 linktree #{getenv MT_RUN_AREA_HOME}/tmp/mt_links [jobtools] useshell yes Index: tests/fullrun/megatest.config ================================================================== --- tests/fullrun/megatest.config +++ tests/fullrun/megatest.config @@ -9,16 +9,20 @@ area1 /tmp/oldarea/megatest [include config/mt_include_1.config] [setup] +# Set launchwait to yes to use the old launch run code that waits for the launch process to return before +# proceeding. +# launchwait yes + # It is possible (but not recommended) to override the rsync command used # to populate the test directories. For test development the following # example can be useful # -testcopycmd cp --remove-destination -rsv TEST_SRC_PATH/. TEST_TARG_PATH/. +testcopycmd cp --remove-destination -rsv TEST_SRC_PATH/. TEST_TARG_PATH/. >> TEST_TARG_PATH/mt_launch.log 2>> TEST_TARG_PATH/mt_launch.log # or for hard links # testcopycmd cp --remove-destination -rlv TEST_SRC_PATH/. TEST_TARG_PATH/.