Index: dashboard-guimonitor.scm ================================================================== --- dashboard-guimonitor.scm +++ dashboard-guimonitor.scm @@ -179,11 +179,11 @@ ;; (print "On-exit called") (tasks:remove-monitor-record tdb) (sqlite3:finalize! tdb)))) (define (gui-monitor db) - (let ((keys (rdb:get-keys db)) + (let ((keys (db:get-keys db)) (tdb (tasks:open-db))) (tasks:register-monitor db tdb) ;;; let the other monitors know we are here (control-panel db tdb keys) ;(tasks:remove-monitor-record db) ;(sqlite3:finalize! db) Index: db.scm ================================================================== --- db.scm +++ db.scm @@ -70,36 +70,32 @@ db)) ;; keeping it around for debugging purposes only (define (open-run-close-no-exception-handling proc idb . params) (debug:print-info 11 "open-run-close-no-exception-handling START given a db=" (if idb "yes " "no ") ", params=" params) - (let* ((db (if idb idb (open-db))) + (let* ((db (if idb + (if (procedure? idb) + (idb) + idb) + (open-db))) (res #f)) (set! res (apply proc db params)) (if (not idb)(sqlite3:finalize! db)) (debug:print-info 11 "open-run-close-no-exception-handling END" ) res)) (define (open-run-close-exception-handling proc idb . params) - (debug:print-info 11 "open-run-close-exception-handling START, idb=" idb ", params=" params) - (let ((runner (lambda () - (let* ((db (if idb idb (open-db))) - (res #f)) - (set! res (apply proc db params)) - (if (not idb)(sqlite3:finalize! db)) - (debug:print-info 11 "open-run-close-no-exception-handling END" ) - res)))) - (handle-exceptions - exn - (begin - (debug:print 0 "EXCEPTION: database probably overloaded?") - (debug:print 0 " " ((condition-property-accessor 'exn 'message) exn)) - (print-call-chain) - (thread-sleep! (random 120)) - (debug:print-info 0 "trying db call one more time....") - (runner)) - (runner)))) + (handle-exceptions + exn + (begin + (debug:print 0 "EXCEPTION: database probably overloaded?") + (debug:print 0 " " ((condition-property-accessor 'exn 'message) exn)) + (print-call-chain) + (thread-sleep! (random 120)) + (debug:print-info 0 "trying db call one more time....") + (apply open-run-close-no-exception-handling proc idb params)) + (apply open-run-close-no-exception-handling proc idb params))) (define open-run-close open-run-close-exception-handling) (define *global-delta* 0) (define *last-global-delta-printed* 0) @@ -257,10 +253,11 @@ #f))) ;; find and open the testdat.db file for an existing test (define (db:open-test-db-by-test-id db test-id) (let* ((test-path (db:test-get-rundir-from-test-id db test-id))) + (debug:print 3 "TEST PATH: " test-path) (open-test-db test-path))) (define (db:testdb-initialize db) (debug:print 11 "db:testdb-initialize START") (for-each @@ -776,12 +773,12 @@ " run_id=? AND testname=? AND NOT (item_path='' AND testname in (SELECT DISTINCT testname FROM tests WHERE testname=? AND item_path != ''));"))) ;;(debug:print 0 "QRY: " qry) (sqlite3:execute db qry run-id newstate newstatus testname testname))) testnames)) -(define (db:delete-tests-in-state db run-id state) - (sqlite3:execute db "DELETE FROM tests WHERE state=? AND run_id=?;" state run-id)) +(define (cdb:delete-tests-in-state zmqsocket run-id state) + (cdb:client-call zmqsocket 'delete-tests-in-state #t run-id state)) ;; speed up for common cases with a little logic (define (db:test-set-state-status-by-id db test-id newstate newstatus newcomment) (cond ((and newstate newstatus newcomment) @@ -855,11 +852,11 @@ db "SELECT id FROM tests WHERE run_id=? AND testname=? AND item_path=?;" run-id testname item-path) res)) -(define db:get-test-id db:get-test-id-cached) +(define db:get-test-id db:get-test-id-not-cached) ;; given a test-info record, patch in the latest data from the testdat.db file ;; found in the test run directory (define (db:patch-tdb-data-into-test-info db test-id res) (let ((tdb (db:open-test-db-by-test-id db test-id))) @@ -955,43 +952,32 @@ (sqlite3:execute db "UPDATE tests SET comment=? WHERE id=?;" comment test-id)) -;; -(define (db:test-set-rundir! db run-id test-name item-path rundir) - (sqlite3:execute - db - "UPDATE tests SET rundir=? WHERE run_id=? AND testname=? AND item_path=?;" - rundir run-id test-name item-path)) - -(define (db:test-set-rundir-by-test-id! db test-id rundir) - (sqlite3:execute - db - "UPDATE tests SET rundir=? WHERE id=?" - rundir test-id)) - -;; +(define (cdb:test-set-rundir! zmqsocket run-id test-name item-path rundir) + (cdb:client-call zmqsocket 'test-set-rundir #t rundir run-id test-name item-path)) + +(define (cdb:test-set-rundir-by-test-id zmqsocket test-id rundir) + (cdb:client-call zmqsocket 'test-set-rundir-by-test-id #t rundir test-id)) + (define (db:test-get-rundir-from-test-id db test-id) - (let ((res (hash-table-ref/default *test-paths* test-id #f))) - (if res - res - (begin - (sqlite3:for-each-row - (lambda (tpath) - (set! res tpath)) - db - "SELECT rundir FROM tests WHERE id=?;" - test-id) - (hash-table-set! *test-paths* test-id res) - res)))) - -(define (db:test-set-log! db test-id logf) - (if (string? logf) - (sqlite3:execute db "UPDATE tests SET final_logf=? WHERE id=?;" - logf test-id) - (debug:print 0 "ERROR: db:test-set-log! called with non-string log file name " logf))) + (let ((res #f)) ;; (hash-table-ref/default *test-paths* test-id #f))) + ;; (if res + ;; res + ;; (begin + (sqlite3:for-each-row + (lambda (tpath) + (set! res tpath)) + db + "SELECT rundir FROM tests WHERE id=?;" + test-id) + ;; (hash-table-set! *test-paths* test-id res) + res)) ;; )) + +(define (cdb:test-set-log! zmqsocket test-id logf) + (if (string? logf)(cdb:client-call zmqsocket 'test-set-log #f logf test-id))) ;;====================================================================== ;; Misc. test related queries ;;====================================================================== @@ -1091,20 +1077,20 @@ db qrystr) res)) ;;====================================================================== -;; QUEUE UP META, TEST STATUS AND STEPS +;; QUEUE UP META, TEST STATUS AND STEPS REMOTE ACCESS ;;====================================================================== ;; db:updater is run in a thread to write out the cached data periodically -(define (db:updater) - (debug:print-info 4 "Starting cache processing") - (let loop () - (thread-sleep! 10) ;; move save time around to minimize regular collisions? - (db:write-cached-data) - (loop))) +;; (define (db:updater) +;; (debug:print-info 4 "Starting cache processing") +;; (let loop () +;; (thread-sleep! 10) ;; move save time around to minimize regular collisions? +;; (db:write-cached-data) +;; (loop))) ;; cdb:cached-access is called by the server loop to dispatch commands or queue up ;; db accesses ;; ;; params := qry-name cached? val1 val2 val3 ... @@ -1114,10 +1100,11 @@ "ERROR" (let ((qry-name (car params)) (cached? (cadr params)) (remparam (list-tail params 2))) (debug:print-info 12 "cdb:cached-access qry-name=" qry-name " params=" params) + (if (not cached?)(db:write-cached-data)) ;; Any special calls are dispatched here. ;; Remainder are put in the db queue (case qry-name ((login) ;; login checks that the megatest path matches (if (null? remparam) @@ -1127,10 +1114,25 @@ #t ;; path matches - pass! Should vet the caller at this time ... #f)))) ;; else fail to login ((flush) (db:write-cached-data) #t) + ((immediate) + (db:write-cached-data) + (if (not (null? remparam)) + (apply (car remparam) (cdr remparam)) + "ERROR")) + ((killserver) + (db:write-cached-data) + (debug:print-info 0 "Remotely killed server on host " (get-host-name) " pid " (current-process-id)) + (set! *time-to-exit* #t) + #t) + ((set-verbosity) + (set! *verbosity* (caddr params)) + *verbosity*) + ((get-verbosity) + *verbosity*) (else (mutex-lock! *incoming-mutex*) (set! *last-db-access* (current-seconds)) (set! *incoming-data* (cons (vector qry-name @@ -1159,10 +1161,13 @@ (send-message zmq-socket zdat) (set! res (db:string->obj (receive-message zmq-socket zdat))) (debug:print-info 11 "zmq-socket " (car params) " res=" res) res)) +(define (cdb:set-verbosity zmqsocket val) + (cdb:client-call zmqsocket 'set-verbosity #f val)) + (define (cdb:test-set-status-state zmqsocket test-id status state msg) (if msg (cdb:client-call zmqsocket 'state-status-msg #t state status msg test-id) (cdb:client-call zmqsocket 'state-status #t state status test-id))) ;; run-id test-name item-path minutes cpuload diskfree tmpfree) @@ -1178,33 +1183,68 @@ (list item-path "")))) (cdb:client-call zmqsocket 'register-test #t run-id test-name item-path))) (define (cdb:flush-queue zmqsocket) (cdb:client-call zmqsocket 'flush #f)) + +(define (cdb:kill-server zmqsocket) + (cdb:client-call zmqsocket 'killserver #f)) + +(define (cdb:roll-up-pass-fail-counts zmqsocket run-id test-name item-path status) + (cdb:client-call zmqsocket 'immediate #f open-run-close db:roll-up-pass-fail-counts #f run-id test-name item-path status)) + +(define (cdb:get-test-info zmqsocket run-id test-name item-path) + (cdb:client-call zmqsocket 'immediate #f open-run-close db:get-test-info #f run-id test-name item-path)) + +(define (cdb:get-test-info-by-id zmqsocket test-id) + (cdb:client-call zmqsocket 'immediate #f open-run-close db:get-test-info-by-id #f test-id)) + +;; db should be db open proc or #f +(define (cdb:remote-run proc db . params) + (apply cdb:client-call *runremote* 'immediate #f open-run-close proc #f params)) + +(define (db:test-get-logfile-info db run-id test-name) + (let ((res #f)) + (sqlite3:for-each-row + (lambda (path final_logf) + (set! logf final_logf) + (set! res (list path final_logf)) + (if (directory? path) + (print "Found path: " path) + (print "No such path: " path))) + db + "SELECT rundir,final_logf FROM tests WHERE run_id=? AND testname=? AND item_path='';" + run-id test-name) + res)) (define db:queries - '((register-test "INSERT OR IGNORE INTO tests (run_id,testname,event_time,item_path,state,status) VALUES (?,?,strftime('%s','now'),?,'NOT_STARTED','n/a');") - (state-status "UPDATE tests SET state=?,status=? WHERE id=?;") - (state-status-msg "UPDATE tests SET state=?,status=?,comment=? WHERE id=?;") - (pass-fail-counts "UPDATE tests SET fail_count=?,pass_count=? WHERE id=?;") - (test_data-pf-rollup "UPDATE tests - SET status=CASE WHEN (SELECT fail_count FROM tests WHERE id=?) > 0 - THEN 'FAIL' - WHEN (SELECT pass_count FROM tests WHERE id=?) > 0 AND - (SELECT status FROM tests WHERE id=?) NOT IN ('WARN','FAIL') - THEN 'PASS' - ELSE status - END WHERE id=?;") - (rollup-tests-pass-fail "UPDATE tests - SET fail_count=(SELECT count(id) FROM tests WHERE - run_id=? AND testname=? AND item_path != '' AND status='FAIL'), - pass_count=(SELECT count(id) FROM tests WHERE - run_id=? AND testname=? AND item_path != '' AND (status='PASS' OR status='WARN' OR status='WAIVED')) - WHERE run_id=? AND testname=? AND item_path='';"))) - -(define db:special-queries '(rollup-tests-pass-fail)) -(define db:run-local-queries '(rollup-tests-pass-fail)) + (list '(register-test "INSERT OR IGNORE INTO tests (run_id,testname,event_time,item_path,state,status) VALUES (?,?,strftime('%s','now'),?,'NOT_STARTED','n/a');") + '(state-status "UPDATE tests SET state=?,status=? WHERE id=?;") + '(state-status-msg "UPDATE tests SET state=?,status=?,comment=? WHERE id=?;") + '(pass-fail-counts "UPDATE tests SET fail_count=?,pass_count=? WHERE id=?;") + ;; test_data-pf-rollup is used to set a tests PASS/FAIL based on the pass/fail info from the steps + '(test_data-pf-rollup "UPDATE tests + SET status=CASE WHEN (SELECT fail_count FROM tests WHERE id=?) > 0 + THEN 'FAIL' + WHEN (SELECT pass_count FROM tests WHERE id=?) > 0 AND + (SELECT status FROM tests WHERE id=?) NOT IN ('WARN','FAIL') + THEN 'PASS' + ELSE status + END WHERE id=?;") + '(test-set-log "UPDATE tests SET final_logf=? WHERE id=?;") + '(test-set-rundir-by-test-id "UPDATE tests SET rundir=? WHERE id=?") + '(test-set-rundir "UPDATE tests SET rundir=? WHERE run_id=? AND testname=? AND item_path=?;") + '(delete-tests-in-state "DELETE FROM tests WHERE state=? AND run_id=?;") + '(tests:test-set-toplog "UPDATE tests SET final_logf=? WHERE run_id=? AND testname=? AND item_path='';") + )) + +;; do not run these as part of the transaction +(define db:special-queries '(rollup-tests-pass-fail + db:roll-up-pass-fail-counts)) + +;; not used, intended to indicate to run in calling process +(define db:run-local-queries '()) ;; rollup-tests-pass-fail)) ;; The queue is a list of vectors where the zeroth slot indicates the type of query to ;; apply and the second slot is the time of the query and the third entry is a list of ;; values to be applied ;; @@ -1217,57 +1257,91 @@ (set! data (sort *incoming-data* (lambda (a b)(< (vector-ref a 1)(vector-ref b 1))))) (set! *incoming-data* '()) (mutex-unlock! *incoming-mutex*) (if (> (length data) 0) (debug:print-info 4 "Writing cached data " data)) - ;; prepare the needed statements + + ;; prepare the needed statements, do each only once (for-each (lambda (request-item) (let ((stmt-key (vector-ref request-item 0))) (if (not (hash-table-ref/default queries stmt-key #f)) (let ((stmt (alist-ref stmt-key db:queries))) (if stmt (hash-table-set! queries stmt-key (sqlite3:prepare db (car stmt))) - (debug:print 0 "ERROR: Missing query spec for " stmt-key "!")))))) + (if (procedure? stmt-key) + (hash-table-set! queries stmt-key #f) + (debug:print 0 "ERROR: Missing query spec for " stmt-key "!"))))))) data) + + ;; outer loop to handle special queries that cannot be handled in the + ;; transaction. (let outerloop ((special-qry #f) (stmts data)) (if special-qry + ;; handle a query that cannot be part of the grouped queries (let* ((stmt-key (vector-ref special-qry 0)) (qry (hash-table-ref queries stmt-key)) - (params (vector-ref speical-qry 2))) - (apply sqlite3:execute db qry params) + (params (vector-ref special-qry 2))) + (if (string? qry) + (apply sqlite3:execute db qry params) + (if (procedure? stmt-key) + (begin + ;; we are being handed a procedure so call it + (debug:print-info 11 "Running (apply " stmt-key " " db " " params ")") + (apply stmt-key db params)) + (debug:print 0 "ERROR: Unrecognised queued call " qry " " params))) (if (not (null? stmts)) (outerloop #f stmts))) + ;; handle normal queries - (sqlite3:with-transaction - db - (lambda () - (debug:print-info 11 "flushing " stmts " to db") - (if (not (null? stmts)) - (let innerloop ((hed (car stmts)) - (tal (cdr stmts))) - (let ((params (vector-ref hed 2)) - (stmt-key (vector-ref hed 0))) - (if (not (member stmt-key db:special-queries)) - (begin - (debug:print-info 11 "Executing " stmt-key " for " params) - (apply sqlite3:execute (hash-table-ref queries stmt-key) params) - (if (not (null? tal)) - (innerloop (car tal)(cdr tal)))) - (outerloop hed tal))))))))) + (let ((rem (sqlite3:with-transaction + db + (lambda () + (debug:print-info 11 "flushing " stmts " to db") + (if (null? stmts) + stmts + (let innerloop ((hed (car stmts)) + (tal (cdr stmts))) + (let ((params (vector-ref hed 2)) + (stmt-key (vector-ref hed 0))) + (if (or (procedure? stmt-key) + (member stmt-key db:special-queries)) + (begin + (debug:print-info 11 "Handling special statement " stmt-key) + (cons hed tal)) + (begin + (debug:print-info 11 "Executing " stmt-key " for " params) + (apply sqlite3:execute (hash-table-ref queries stmt-key) params) + (if (not (null? tal)) + (innerloop (car tal)(cdr tal)) + '())) + )))))))) + (if (not (null? rem)) + (outerloop (car rem)(cdr rem)))))) (for-each (lambda (stmt-key) (sqlite3:finalize! (hash-table-ref queries stmt-key))) (hash-table-keys queries)) (let ((cache-size (length data))) (if (> cache-size *max-cache-size*) (set! *max-cache-size* cache-size))) )) #f)) +(define (db:test-get-records-for-index-file db run-id test-name) + (let ((res '())) + (sqlite3:for-each-row + (lambda (id itempath state status run_duration logf comment) + (set! res (cons (vector id itempath state status run_duration logf comment) res))) + db + "SELECT id,item_path,state,status,run_duration,final_logf,comment FROM tests WHERE run_id=? AND testname=? AND item_path != '';" + run-id test-name) + res)) + +;; Rollup the pass/fail counts from itemized tests into fail_count and pass_count (define (db:roll-up-pass-fail-counts db run-id test-name item-path status) - (cdb:flush-queue *runremote*) + ;; (cdb:flush-queue *runremote*) (if (and (not (equal? item-path "")) (or (equal? status "PASS") (equal? status "WARN") (equal? status "FAIL") (equal? status "WAIVED") @@ -1291,11 +1365,10 @@ ELSE 'COMPLETED' END, status=CASE WHEN fail_count > 0 THEN 'FAIL' WHEN pass_count > 0 AND fail_count=0 THEN 'PASS' ELSE 'UNKNOWN' END WHERE run_id=? AND testname=? AND item_path='';" run-id test-name run-id test-name)) #f) - #f)) ;;====================================================================== ;; Tests meta data ;;====================================================================== @@ -1430,12 +1503,12 @@ ;; Now rollup the counts to the central megatest.db (cdb:pass-fail-counts *runremote* test-id fail-count pass-count) ;; (sqlite3:execute db "UPDATE tests SET fail_count=?,pass_count=? WHERE id=?;" ;; fail-count pass-count test-id) - - (thread-sleep! 1) ;; play nice with the queue by ensuring the rollup is at least 10ms later than the set + (cdb:flush-queue *runremote*) + ;; (thread-sleep! 1) ;; play nice with the queue by ensuring the rollup is at least 10ms later than the set ;; if the test is not FAIL then set status based on the fail and pass counts. (cdb:test-rollup-test_data-pass-fail *runremote* test-id) ;; (sqlite3:execute ;; db ;;; NOTE: Should this be WARN,FAIL? A WARN is not a FAIL????? BUG FIXME @@ -1730,54 +1803,5 @@ results) ;; brutal clean up (system "rm -rf tempdir"))) ;; (db:extract-ods-file db "outputfile.ods" '(("sysname" "%")("fsname" "%")("datapath" "%")) "%") - - -;;====================================================================== -;; REMOTE DB ACCESS VIA RPC -;;====================================================================== - -;; (define (rdb:test-set-status-state test-id status state msg) -;; (if *runremote* -;; (let ((host (vector-ref *runremote* 0)) -;; (port (vector-ref *runremote* 1))) -;; (handle-exceptions -;; exn -;; (begin -;; (debug:print 0 "EXCEPTION: rpc call failed?") -;; (debug:print 0 " " ((condition-property-accessor 'exn 'message) exn)) -;; (print-call-chain) -;; (cdb:test-set-status-state test-id status state msg)) -;; ((rpc:procedure 'cdb:test-set-status-state host port) test-id status state msg))) -;; (cdb:test-set-status-state test-id status state msg))) -;; -;; (define (rdb:test-rollup-test_data-pass-fail test-id) -;; (if *runremote* -;; (let ((host (vector-ref *runremote* 0)) -;; (port (vector-ref *runremote* 1))) -;; ((rpc:procedure 'cdb:test-rollup-test_data-pass-fail host port) test-id)) -;; (cdb:test-rollup-test_data-pass-fail test-id))) -;; -;; (define (rdb:pass-fail-counts test-id fail-count pass-count) -;; (if *runremote* -;; (let ((host (vector-ref *runremote* 0)) -;; (port (vector-ref *runremote* 1))) -;; ((rpc:procedure 'cdb:pass-fail-counts host port) test-id fail-count pass-count)) -;; (cdb:pass-fail-counts test-id fail-count pass-count))) -;; -;; ;; currently forces a flush of the queue -;; (define (rdb:tests-register-test db run-id test-name item-path) -;; (if *runremote* -;; (let ((host (vector-ref *runremote* 0)) -;; (port (vector-ref *runremote* 1))) -;; ((rpc:procedure 'cdb:tests-register-test host port) db run-id test-name item-path force-write: #t)) -;; (cdb:tests-register-test db run-id test-name item-path force-write: #t))) -;; -;; (define (rdb:flush-queue) -;; (if *runremote* -;; (let ((host (vector-ref *runremote* 0)) -;; (port (vector-ref *runremote* 1))) -;; ((rpc:procedure 'cdb:flush-queue host port))) -;; (cdb:flush-queue))) -;; Index: launch.scm ================================================================== --- launch.scm +++ launch.scm @@ -104,17 +104,17 @@ (exit 1))) ;; Can setup as client for server mode now (server:client-setup) (change-directory *toppath*) - (open-run-close set-megatest-env-vars #f run-id) ;; these may be needed by the launching process + (set-megatest-env-vars run-id) ;; these may be needed by the launching process (change-directory work-area) (open-run-close set-run-config-vars #f run-id) ;; environment overrides are done *before* the remaining critical envars. (alist->env-vars env-ovrd) - (open-run-close set-megatest-env-vars #f run-id) + (set-megatest-env-vars run-id) (set-item-env-vars itemdat) (save-environment-as-files "megatest") (open-run-close test-set-meta-info #f test-id run-id test-name itemdat 0) (tests:test-set-status! test-id "REMOTEHOSTSTART" "n/a" (args:get-arg "-m") #f) (if (args:get-arg "-xterm") @@ -188,11 +188,11 @@ ;; call the command using mt_ezstep (set! script (conc "mt_ezstep " stepname " " (if prevstep prevstep "-") " " stepcmd)) (debug:print 4 "script: " script) - (open-run-close db:teststep-set-status! #f test-id stepname "start" "-" #f #f) + (cdb:remote-run db:teststep-set-status! #f test-id stepname "start" "-" #f #f) ;; now launch (let ((pid (process-run script))) (let processloop ((i 0)) (let-values (((pid-val exit-status exit-code)(process-wait pid #t))) (mutex-lock! m) @@ -206,13 +206,13 @@ (processloop (+ i 1)))) )) (let ((exinfo (vector-ref exit-info 2)) (logfna (if logpro-used (conc stepname ".html") ""))) ;; testing if procedures called in a remote call cause problems (ans: no or so I suspect) - (open-run-close db:teststep-set-status! #f test-id stepname "end" exinfo #f logfna)) + (cdb:remote-run db:teststep-set-status! #f test-id stepname "end" exinfo #f logfna)) (if logpro-used - (open-run-close db:test-set-log! #f test-id (conc stepname ".html"))) + (cdb:test-set-log! *runremote* test-id (conc stepname ".html"))) ;; set the test final status (let* ((this-step-status (cond ((and (eq? (vector-ref exit-info 2) 2) logpro-used) 'warn) ((eq? (vector-ref exit-info 2) 0) 'pass) (else 'fail))) @@ -254,11 +254,11 @@ (current-seconds) start-seconds))))) (kill-tries 0)) (let loop ((minutes (calc-minutes))) (begin - (set! kill-job? (open-run-close test-get-kill-request #f test-id)) ;; run-id test-name itemdat)) + (set! kill-job? (test-get-kill-request test-id)) ;; run-id test-name itemdat)) (open-run-close test-set-meta-info #f test-id run-id test-name itemdat minutes) (if kill-job? (begin (mutex-lock! m) (let* ((pid (vector-ref exit-info 0))) @@ -295,11 +295,11 @@ (thread-start! th1) (thread-start! th2) (thread-join! th2) (mutex-lock! m) (let* ((item-path (item-list->path itemdat)) - (testinfo (open-run-close db:get-test-info-by-id #f test-id))) ;; )) ;; run-id test-name item-path))) + (testinfo (cdb:get-test-info-by-id *runremote* test-id))) ;; )) ;; run-id test-name item-path))) ;; Am I completed? (if (not (equal? (db:test-get-state testinfo) "COMPLETED")) (begin (debug:print 2 "Test NOT logged as COMPLETED, (state=" (db:test-get-state testinfo) "), updating result, rollup-status is " rollup-status) (tests:test-set-status! test-id @@ -386,17 +386,17 @@ ;; All log file links should be stored relative to the top of link path ;; ;; - [ - ] ;; (define (create-work-area db run-id test-id test-src-path disk-path testname itemdat) - (let* ((run-info (db:get-run-info db run-id)) + (let* ((run-info (cdb:remote-run db:get-run-info #f run-id)) (item-path (item-list->path itemdat)) (runname (db:get-value-by-header (db:get-row run-info) (db:get-header run-info) "runname")) ;; convert back to db: from rdb: - this is always run at server end - (key-vals (db:get-key-vals db run-id)) + (key-vals (cdb:remote-run db:get-key-vals #f run-id)) (target (string-intersperse key-vals "/")) (not-iterated (equal? "" item-path)) ;; all tests are found at /test-base or /test-base @@ -414,11 +414,11 @@ (lnkbase (conc linktree "/" target "/" runname)) (lnkpath (conc lnkbase "/" testname)) (lnkpathf (conc lnkpath (if not-iterated "" "/") item-path))) ;; Update the rundir path in the test record for all - (db:test-set-rundir-by-test-id! db test-id lnkpathf) + (cdb:test-set-rundir-by-test-id *runremote* test-id lnkpathf) (debug:print 2 "INFO:\n lnkbase=" lnkbase "\n lnkpath=" lnkpath "\n toptest-path=" toptest-path "\n test-path=" test-path) (if (not (file-exists? linktree)) (begin (debug:print 0 "WARNING: linktree did not exist! Creating it now at " linktree) @@ -433,14 +433,15 @@ ;; again. ;; NB - This is not working right - some top tests are not getting the path set!!! (if (not (hash-table-ref/default *toptest-paths* testname #f)) - (let* ((testinfo (db:get-test-info-by-id db test-id)) ;; run-id testname item-path)) + (let* ((testinfo (cdb:get-test-info-by-id *runremote* test-id)) ;; run-id testname item-path)) (curr-test-path (if testinfo (db:test-get-rundir testinfo) #f))) (hash-table-set! *toptest-paths* testname curr-test-path) - (db:test-set-rundir! db run-id testname "" lnkpath) ;; toptest-path) + ;; NB// Was this for the test or for the parent in an iterated test? + (cdb:test-set-rundir! *runremote* run-id testname "" lnkpath) ;; toptest-path) (if (or (not curr-test-path) (not (directory-exists? toptest-path))) (begin (debug:print-info 2 "Creating " toptest-path " and link " lnkpath) (create-directory toptest-path #t) @@ -538,12 +539,12 @@ (diskpath #f) (cmdparms #f) (fullcmd #f) ;; (define a (with-output-to-string (lambda ()(write x)))) (mt-bindir-path #f) (item-path (item-list->path itemdat)) - (test-id (open-run-close db:get-test-id db run-id test-name item-path)) - (testinfo (open-run-close db:get-test-info-by-id db test-id)) + (test-id (cdb:remote-run db:get-test-id #f run-id test-name item-path)) + (testinfo (cdb:get-test-info-by-id *runremote* test-id)) (mt_target (string-intersperse (map cadr keyvallst) "/")) (debug-param (append (if (args:get-arg "-debug") (list "-debug" (args:get-arg "-debug")) '()) (if (args:get-arg "-logging")(list "-logging") '())))) (if hosts (set! hosts (string-split hosts))) ;; set the megatest to be called on the remote host @@ -577,12 +578,12 @@ (list 'env-ovrd (hash-table-ref/default *configdat* "env-override" '())) (list 'set-vars (if params (hash-table-ref/default params "-setvars" #f))) (list 'runname runname) (list 'mt-bindir-path mt-bindir-path))))))) ;; (string-intersperse keyvallst " ")))) ;; clean out step records from previous run if they exist - (debug:print-info 4 "FIXMEEEEE!!!! This can be removed some day, perhaps move all test records to the test db?") - (open-run-close db:delete-test-step-records db test-id) + ;; (debug:print-info 4 "FIXMEEEEE!!!! This can be removed some day, perhaps move all test records to the test db?") + ;; (open-run-close db:delete-test-step-records db test-id) (change-directory work-area) ;; so that log files from the launch process don't clutter the test dir (tests:test-set-status! test-id "LAUNCHED" "n/a" #f #f) ;; (if launch-results launch-results "FAILED")) (cond ((and launcher hosts) ;; must be using ssh hostname (set! fullcmd (append launcher (car hosts)(list remote-megatest test-sig "-execute" cmdparms) debug-param))) Index: megatest.scm ================================================================== --- megatest.scm +++ megatest.scm @@ -257,12 +257,22 @@ ;;====================================================================== ;; Start the server - can be done in conjunction with -runall or -runtests (one day...) ;; we start the server if not running else start the client thread ;;====================================================================== + (if (args:get-arg "-server") - (server:launch) + (server:launch)) + +(if (or (let ((res #f)) + (for-each + (lambda (key) + (if (args:get-arg key)(set! res #t))) + (list "-h" "-version" "-gen-megatest-area" "-gen-megatest-test")) + res) + (eq? (length (hash-table-keys args:arg-hash)) 0)) + (debug:print-info 1 "No server needed") (server:client-launch)) ;;====================================================================== ;; Remove old run(s) ;;====================================================================== @@ -673,11 +683,11 @@ (if (args:get-arg "-load-test-data") ;; has sub commands that are rdb: (open-run-close db:load-test-data db test-id)) (if (args:get-arg "-setlog") (let ((logfname (args:get-arg "-setlog"))) - (open-run-close db:test-set-log! db test-id logfname))) + (cdb:test-set-log! *runremote* test-id logfname))) (if (args:get-arg "-set-toplog") (open-run-close tests:test-set-toplog! db run-id test-name (args:get-arg "-set-toplog"))) (if (args:get-arg "-summarize-items") (open-run-close tests:summarize-items db run-id test-name #t)) ;; do force here (if (args:get-arg "-runstep") @@ -716,11 +726,11 @@ (debug:print-info 2 "running \"" cmd "\"") (change-directory startingdir) (set! exitstat (system cmd)) (set! *globalexitstatus* exitstat) ;; no necessary (change-directory testpath) - (open-run-close db:test-set-log! db test-id htmllogfile))) + (cdb:test-set-log! *runremote* test-id htmllogfile))) (let ((msg (args:get-arg "-m"))) (open-run-close db:teststep-set-status! db test-id stepname "end" exitstat msg logfile)) ))) (if (or (args:get-arg "-test-status") (args:get-arg "-set-values")) Index: runs.scm ================================================================== --- runs.scm +++ runs.scm @@ -71,44 +71,51 @@ ;; Awful. Please FIXME (define *env-vars-by-run-id* (make-hash-table)) (define *current-run-name* #f) -(define (set-megatest-env-vars db run-id) - (let ((keys (db:get-keys db)) +(define (db:get-run-key-val db run-id key) + (let ((res #f)) + (sqlite3:for-each-row + (lambda (val) + (set! res val)) + db + (conc "SELECT " (key:get-fieldname key) " FROM runs WHERE id=?;") + run-id) + res)) + +(define (db:get-run-name-from-id db run-id) + (let ((res #f)) + (sqlite3:for-each-row + (lambda (runname) + (set! res runname)) + db + "SELECT runname FROM runs WHERE id=?;" + run-id) + res)) + +(define (set-megatest-env-vars run-id) + (let ((keys (cdb:remote-run db:get-keys #f)) (vals (hash-table-ref/default *env-vars-by-run-id* run-id #f))) ;; get the info from the db and put it in the cache (if (not vals) (let ((ht (make-hash-table))) (hash-table-set! *env-vars-by-run-id* run-id ht) (set! vals ht) (for-each (lambda (key) - (sqlite3:for-each-row - (lambda (val) - (hash-table-set! vals key val)) - db - (conc "SELECT " (key:get-fieldname key) " FROM runs WHERE id=?;") - run-id)) + (hash-table-set! vals key (cdb:remote-run db:get-run-key-val #f run-id key))) keys))) ;; from the cached data set the vars (hash-table-for-each vals (lambda (key val) (debug:print 2 "setenv " (key:get-fieldname key) " " val) (setenv (key:get-fieldname key) val))) (alist->env-vars (hash-table-ref/default *configdat* "env-override" '())) ;; Lets use this as an opportunity to put MT_RUNNAME in the environment - (if (not *current-run-name*) - (sqlite3:for-each-row - (lambda (runname) - (set! *current-run-name* runname)) - - db - "SELECT runname FROM runs WHERE id=?;" - run-id)) - (setenv "MT_RUNNAME" *current-run-name*) + (setenv "MT_RUNNAME" (cdb:remote-run db:get-run-name-from-id #f run-id)) (setenv "MT_RUN_AREA_HOME" *toppath*) )) (define (set-item-env-vars itemdat) (for-each (lambda (item) @@ -115,15 +122,15 @@ (debug:print 2 "setenv " (car item) " " (cadr item)) (setenv (car item) (cadr item))) itemdat)) (define *last-num-running-tests* 0) -(define (runs:can-run-more-tests db test-record) +(define (runs:can-run-more-tests test-record) (let* ((tconfig (tests:testqueue-get-testconfig test-record)) (jobgroup (config-lookup tconfig "requirements" "jobgroup")) - (num-running (db:get-count-tests-running db)) - (num-running-in-jobgroup (db:get-count-tests-running-in-jobgroup db jobgroup)) + (num-running (cdb:remote-run db:get-count-tests-running #f)) + (num-running-in-jobgroup (cdb:remote-run db:get-count-tests-running-in-jobgroup #f jobgroup)) (max-concurrent-jobs (let ((mcj (config-lookup *configdat* "setup" "max_concurrent_jobs"))) (if (and mcj (string->number mcj)) (string->number mcj) #f))) (job-group-limit (config-lookup *configdat* "jobgroups" jobgroup))) @@ -189,22 +196,22 @@ ;; This is a duplicate of run-tests (which has been deprecated). Use this one instead of run tests. ;; keyvals (define (runs:run-tests target runname test-patts user flags) (let* ((db #f) - (keys (open-run-close db:get-keys db)) + (keys (cdb:remote-run db:get-keys #f)) (keyvallst (keys:target->keyval keys target)) - (run-id (open-run-close runs:register-run db keys keyvallst runname "new" "n/a" user)) ;; test-name))) + (run-id (cdb:remote-run runs:register-run #f keys keyvallst runname "new" "n/a" user)) ;; test-name))) (deferred '()) ;; delay running these since they have a waiton clause ;; keepgoing is the defacto modality now, will add hit-n-run a bit later ;; (keepgoing (hash-table-ref/default flags "-keepgoing" #f)) (test-names '()) (runconfigf (conc *toppath* "/runconfigs.config")) (required-tests '()) (test-records (make-hash-table))) - (open-run-close set-megatest-env-vars db run-id) ;; these may be needed by the launching process + (set-megatest-env-vars run-id) ;; these may be needed by the launching process (if (file-exists? runconfigf) (open-run-close setup-env-defaults db runconfigf run-id *already-seen-runconfig-info* "pre-launch-env-vars") (debug:print 0 "WARNING: You do not have a run config file: " runconfigf)) @@ -222,12 +229,12 @@ (begin ;; have to delete test records where NOT_STARTED since they can cause -keepgoing to ;; get stuck due to becoming inaccessible from a failed test. I.e. if test B depends ;; on test A but test B reached the point on being registered as NOT_STARTED and test ;; A failed for some reason then on re-run using -keepgoing the run can never complete. - (open-run-close db:delete-tests-in-state db run-id "NOT_STARTED") - (open-run-close db:set-tests-state-status db run-id test-names #f "FAIL" "NOT_STARTED" "FAIL"))) + (cdb:delete-tests-in-state *runremote* run-id "NOT_STARTED") + (cdb:remote-run db:set-tests-state-status #f run-id test-names #f "FAIL" "NOT_STARTED" "FAIL"))) ;; from here on out the db will be opened and closed on every call runs:run-tests-queue ;; (sqlite3:finalize! db) ;; now add non-directly referenced dependencies (i.e. waiton) (if (not (null? test-names)) @@ -371,11 +378,11 @@ (debug:print 0 "ERROR: test " test-name " has listed itself as a waiton, please correct this!") (set! waiton (filter (lambda (x)(not (equal? x hed))) waitons)))) (cond ;; OUTER COND ((not items) ;; when false the test is ok to be handed off to launch (but not before) - (let* ((run-limits-info (open-run-close runs:can-run-more-tests #f test-record)) ;; look at the test jobgroup and tot jobs running + (let* ((run-limits-info (open-run-close runs:can-run-more-tests test-record)) ;; look at the test jobgroup and tot jobs running (have-resources (car run-limits-info)) (num-running (list-ref run-limits-info 1)) (num-running-in-jobgroup (list-ref run-limits-info 2)) (max-concurrent-jobs (list-ref run-limits-info 3)) (job-group-limit (list-ref run-limits-info 4)) @@ -474,11 +481,11 @@ (loop (car tal)(cdr tal) reruns)))) ;; if items is a proc then need to run items:get-items-from-config, get the list and loop ;; - but only do that if resources exist to kick off the job ((or (procedure? items)(eq? items 'have-procedure)) - (let ((can-run-more (open-run-close runs:can-run-more-tests #f test-record))) + (let ((can-run-more (runs:can-run-more-tests test-record))) (if can-run-more (let* ((prereqs-not-met (open-run-close db:get-prereqs-not-met #f run-id waitons item-path mode: testmode)) (fails (runs:calc-fails prereqs-not-met)) (non-completed (runs:calc-not-completed prereqs-not-met))) (debug:print-info 8 "can-run-more: " can-run-more @@ -500,11 +507,11 @@ (and (eq? testmode 'toplevel) (null? non-completed))) (let ((test-name (tests:testqueue-get-testname test-record))) (setenv "MT_TEST_NAME" test-name) ;; (setenv "MT_RUNNAME" runname) - (open-run-close set-megatest-env-vars #f run-id) ;; these may be needed by the launching process + (set-megatest-env-vars run-id) ;; these may be needed by the launching process (let ((items-list (items:get-items-from-config tconfig))) (if (list? items-list) (begin (tests:testqueue-set-items! test-record items-list) (thread-sleep! *global-delta*) @@ -547,11 +554,11 @@ ;; this case should not happen, added to help catch any bugs ((and (list? items) itemdat) (debug:print 0 "ERROR: Should not have a list of items in a test and the itemspath set - please report this") (exit 1)) ((not (null? reruns)) - (let* ((newlst (open-run-close tests:filter-non-runnable #f run-id tal test-records)) ;; i.e. not FAIL, WAIVED, INCOMPLETE, PASS, KILLED, + (let* ((newlst (tests:filter-non-runnable run-id tal test-records)) ;; i.e. not FAIL, WAIVED, INCOMPLETE, PASS, KILLED, (junked (lset-difference equal? tal newlst))) (debug:print-info 4 "full drop through, if reruns is less than 100 we will force retry them, reruns=" reruns ", tal=" tal) (if (< num-retries max-retries) (set! newlst (append reruns newlst))) (set! num-retries (+ num-retries 1)) @@ -598,11 +605,11 @@ (if (not itemdat)(set! itemdat '())) (set! item-path (item-list->path itemdat)) (debug:print 2 "Attempting to launch test " test-name (if (equal? item-path "/") "/" item-path)) (setenv "MT_TEST_NAME" test-name) ;; (setenv "MT_RUNNAME" runname) - (open-run-close set-megatest-env-vars db run-id) ;; these may be needed by the launching process + (set-megatest-env-vars run-id) ;; these may be needed by the launching process (change-directory *toppath*) ;; Here is where the test_meta table is best updated ;; Yes, another use of a global for caching. Need a better way? (if (not (hash-table-ref/default *test-meta-updated* test-name #f)) @@ -611,12 +618,12 @@ (open-run-close runs:update-test_meta db test-name test-conf))) ;; (lambda (itemdat) ;;; ((ripeness "overripe") (temperature "cool") (season "summer")) (let* ((new-test-path (string-intersperse (cons test-path (map cadr itemdat)) "/")) (new-test-name (if (equal? item-path "") test-name (conc test-name "/" item-path))) ;; just need it to be unique - (test-id (open-run-close db:get-test-id db run-id test-name item-path)) - (testdat (open-run-close db:get-test-info-by-id db test-id))) + (test-id (cdb:remote-run db:get-test-id #f run-id test-name item-path)) + (testdat (cdb:get-test-info-by-id *runremote* test-id))) (if (not testdat) (begin ;; ensure that the path exists before registering the test ;; NOPE: Cannot! Don't know yet which disk area will be assigned.... ;; (system (conc "mkdir -p " new-test-path)) @@ -630,11 +637,11 @@ (begin (debug:print 2 "WARN: Test not pre-created? test-name=" test-name ", item-path=" item-path ", run-id=" run-id) (open-run-close db:tests-register-test #f run-id test-name item-path) (set! test-id (open-run-close db:get-test-id db run-id test-name item-path)))) (debug:print-info 4 "test-id=" test-id ", run-id=" run-id ", test-name=" test-name ", item-path=\"" item-path "\"") - (set! testdat (open-run-close db:get-test-info-by-id db test-id)))) + (set! testdat (cdb:get-test-info-by-id *runremote* test-id)))) (set! test-id (db:test-get-id testdat)) (change-directory test-path) (case (if force ;; (args:get-arg "-force") 'NOT_STARTED (if testdat Index: server.scm ================================================================== --- server.scm +++ server.scm @@ -17,44 +17,55 @@ (declare (unit server)) (declare (uses common)) (declare (uses db)) (declare (uses tests)) +(declare (uses tasks)) ;; tasks are where stuff is maintained about what is running. (include "common_records.scm") (include "db_records.scm") +(define (server:make-server-url hostport) + (if (not hostport) + #f + (conc "tcp://" (car hostport) ":" (cadr hostport)))) +(define *time-to-exit* #f) + (define (server:run hostn) (debug:print 0 "Attempting to start the server ...") - (let ((host:port (open-run-close db:get-var #f "SERVER"))) ;; do whe already have a server running? + (if (not *toppath*)(setup-for-run)) + (let* ((hostport (open-run-close tasks:get-best-server tasks:open-db)) ;; do whe already have a server running? + (host:port (server:make-server-url hostport))) (if host:port (begin (debug:print 0 "NOTE: server already running.") (if (server:client-setup) (begin - (debug:print-info 0 "Server is alive, not starting another") - ;;(exit) - ) + (debug:print-info 0 "Server is alive, not starting another")) (begin - (debug:print-info 0 "Server is dead, removing flag and trying again") - (open-run-close db:del-var #f "SERVER") - (server:run hostn)))) + (debug:print-info 0 "Server is dead, deregistering it, please try again") + (open-run-close tasks:server-deregister tasks:open-db (car hostport) port: (cadr port)) + ;; (server:run hostn) + (debug:print 0 "WOULD NORMALLY START ANOTHER SERVER HERE") + ) + ) + ) (let* ((zmq-socket #f) (hostname (if (string=? "-" hostn) (get-host-name) hostn)) (ipaddrstr (let ((ipstr (if (string=? "-" hostn) (string-intersperse (map number->string (u8vector->list (hostname->ip hostname))) ".") #f))) (if ipstr ipstr hostname)))) - (set! zmq-socket (server:find-free-port-and-open ipaddrstr zmq-socket 5555)) + (set! zmq-socket (server:find-free-port-and-open ipaddrstr zmq-socket 5555 0)) (set! *cache-on* #t) ;; what to do when we quit ;; (on-exit (lambda () - (open-run-close db:del-var #f "SERVER") + (open-run-close tasks:server-deregister-self tasks:open-db) (let loop () (let ((queue-len 0)) (thread-sleep! (random 5)) (mutex-lock! *incoming-mutex*) (set! queue-len (length *incoming-data*)) @@ -72,10 +83,11 @@ (res #f)) (debug:print-info 12 "server=> received params=" params) (set! res (cdb:cached-access params)) (debug:print-info 12 "server=> processed res=" res) (send-message zmq-socket (db:obj->string res)) + (if *time-to-exit* (exit)) (loop))))))) ;; run server:keep-running in a parallel thread to monitor that the db is being ;; used and to shutdown after sometime if it is not. ;; @@ -101,29 +113,33 @@ (debug:print-info 0 "Max cached queries was " *max-cache-size*) (debug:print-info 0 "Server shutdown complete. Exiting") ;; (exit))) )))))) -(define (server:find-free-port-and-open host s port) +(define (server:find-free-port-and-open host s port trynum) (let ((s (if s s (make-socket 'rep))) (p (if (number? port) port 5555))) (handle-exceptions exn (begin (debug:print 0 "Failed to bind to port " p ", trying next port") (debug:print 0 " EXCEPTION: " ((condition-property-accessor 'exn 'message) exn)) - (server:find-free-port-and-open host s (+ p 1))) + (if (< trynum 100) + (server:find-free-port-and-open host s (+ p 1) (+ trynum 1)) + (debug:print-info 0 "Tried ports from " (- p trynum) " to " p + " but all were in use. Please try a different port range by starting the server with parameter \" -port N\" where N is the starting port number to use"))) (let ((zmq-url (conc "tcp://" host ":" p))) (print "Trying to start server on " zmq-url) (bind-socket s zmq-url) (set! *runremote* #f) (debug:print 0 "Server started on " zmq-url) - (open-run-close db:set-var #f "SERVER" zmq-url) + (open-run-close tasks:server-register tasks:open-db (current-process-id) host p 0 'live) s)))) (define (server:client-setup) - (let* ((hostinfo (open-run-close db:get-var #f "SERVER")) + (if (not *toppath*)(setup-for-run)) + (let* ((hostinfo (open-run-close tasks:get-best-server tasks:open-db)) (zmq-socket (make-socket 'req))) (if hostinfo (begin (debug:print-info 2 "Setting up to connect to " hostinfo) (handle-exceptions @@ -130,33 +146,36 @@ exn (begin (debug:print 0 "ERROR: Failed to open a connection to the server at: " hostinfo) (debug:print 0 " EXCEPTION: " ((condition-property-accessor 'exn 'message) exn)) (debug:print 0 " perhaps jobs killed with -9? Removing server records") - (open-run-close db:del-var #f "SERVER") - (exit) + (open-run-close tasks:server-deregister tasks:open-db (car hostinfo) port: (cadr hostinfo)) + ;; (exit) ;; why forced exit? #f) - (let ((connect-ok #f)) - (connect-socket zmq-socket hostinfo) + (let ((connect-ok #f) + (conurl (server:make-server-url hostinfo))) + (connect-socket zmq-socket conurl) (set! connect-ok (cdb:client-call zmq-socket 'login #t *toppath*)) (if connect-ok (begin - (debug:print-info 2 "Logged in and connected to " hostinfo) + (debug:print-info 2 "Logged in and connected to " conurl) (set! *runremote* zmq-socket) #t) (begin - (debug:print-info 2 "Failed to login or connect to " hostinfo) + (debug:print-info 2 "Failed to login or connect to " conurl) (set! *runremote* #f) #f))))) (begin - (debug:print-info 2 "No server available, attempting to start one...") - (system (conc "megatest -server - " (if (args:get-arg "-debug") - (conc "-debug " (args:get-arg "-debug")) - "") - " &")) - (sleep 5) - (server:client-setup))))) + (debug:print-info 0 "NO SERVER RUNNING! PLEASE START ONE! E.g. \"megatest -server - &\"") + ;; (debug:print-info 2 "No server available, attempting to start one...") + ;; (system (conc (car (argv)) " -server - " (if (args:get-arg "-debug") + ;; (conc "-debug " (args:get-arg "-debug")) + ;; "") + ;; " &")) + ;; (sleep 5) + ;; (server:client-setup) + )))) (define (server:launch) (let* ((toppath (setup-for-run))) (debug:print-info 0 "Starting the standalone server") (if *toppath* @@ -164,15 +183,15 @@ (server:run (args:get-arg "-server"))))) (th3 (make-thread (lambda () (server:keep-running))))) (thread-start! th3) (thread-start! th2) - (thread-join! th3) + (thread-join! th2) (set! *didsomething* #t)) (debug:print 0 "ERROR: Failed to setup for megatest")))) (define (server:client-launch) (if (server:client-setup) (debug:print-info 0 "connected as client") (begin (debug:print 0 "ERROR: Failed to connect as client") (exit)))) Index: tasks.scm ================================================================== --- tasks.scm +++ tasks.scm @@ -23,16 +23,16 @@ ;;====================================================================== (define (tasks:open-db) (let* ((dbpath (conc *toppath* "/monitor.db")) (exists (file-exists? dbpath)) - (tdb (sqlite3:open-database dbpath)) ;; (never-give-up-open-db dbpath)) + (mdb (sqlite3:open-database dbpath)) ;; (never-give-up-open-db dbpath)) (handler (make-busy-timeout 36000))) - (sqlite3:set-busy-handler! tdb handler) + (sqlite3:set-busy-handler! mdb handler) (if (not exists) (begin - (sqlite3:execute tdb "CREATE TABLE IF NOT EXISTS tasks_queue (id INTEGER PRIMARY KEY, + (sqlite3:execute mdb "CREATE TABLE IF NOT EXISTS tasks_queue (id INTEGER PRIMARY KEY, action TEXT DEFAULT '', owner TEXT, state TEXT DEFAULT 'new', target TEXT DEFAULT '', name TEXT DEFAULT '', @@ -40,19 +40,96 @@ item TEXT DEFAULT '', keylock TEXT, params TEXT, creation_time TIMESTAMP, execution_time TIMESTAMP);") - (sqlite3:execute tdb "CREATE TABLE IF NOT EXISTS monitors (id INTEGER PRIMARY KEY, + (sqlite3:execute mdb "CREATE TABLE IF NOT EXISTS monitors (id INTEGER PRIMARY KEY, pid INTEGER, start_time TIMESTAMP, last_update TIMESTAMP, hostname TEXT, username TEXT, - CONSTRAINT monitors_constraint UNIQUE (pid,hostname));"))) - tdb)) + CONSTRAINT monitors_constraint UNIQUE (pid,hostname));") + (sqlite3:execute mdb "CREATE TABLE IF NOT EXISTS servers (id INTEGER PRIMARY KEY, + pid INTEGER, + hostname TEXT, + port INTEGER, + start_time TIMESTAMP, + priority INTEGER, + state TEXT, + CONSTRAINT servers_constraint UNIQUE (pid,hostname));") + (sqlite3:execute mdb "CREATE TABLE IF NOT EXISTS clients (id INTEGER PRIMARY KEY, + server_id INTEGER, + pid INTEGER, + hostname TEXT, + cmdline TEXT, + login_time TIMESTAMP, + logout_time TIMESTAMP DEFAULT -1, + CONSTRAINT clients_constraint UNIQUE (pid,hostname));") + + )) + mdb)) +;;====================================================================== +;; Server and client management +;;====================================================================== + +;; state: 'live, 'shutting-down, 'dead +(define (tasks:server-register mdb pid hostname port priority state) + (sqlite3:execute + mdb + "INSERT OR REPLACE INTO servers (pid,hostname,port,start_time,priority,state) VALUES(?,?,?,strftime('%s','now'),?,?);" + pid hostname port priority (conc state))) + +(define (tasks:server-deregister mdb hostname #!key (port #f)(pid #f)) + (if pid + (sqlite3:execute mdb "DELETE FROM servers WHERE hostname=? AND pid=?;" hostname pid) + (if port + (sqlite3:execute mdb "DELETE FROM servers WHERE hostname=? AND port=?;" hostname port) + (debug:print 0 "ERROR: tasks:server-deregister called with neither pid nor port specified")))) + +(define (tasks:server-deregister-self mdb) + (tasks:server-deregister mdb (get-host-name) pid: (current-process-id))) + +(define (tasks:server-get-server-id mdb) + ;; dunno yet + 0) + +(define (tasks:client-register mdb pid hostname cmdline) + (sqlite3:execute + mdb + "INSERT OR REPLACE INTO clients (server_id,pid,hostname,cmdline,login_time) VALUES(?,?,?,?,strftime('%s','now'));") + (tasks:server-get-server-id mdb) + pid hostname cmdline) + +(define (tasks:client-logout mdb pid hostname cmdline) + (sqlite3:execute + mdb + "UPDATE clients SET logout_time=strftime('%s','now') WHERE pid=? AND hostname=? AND cmdline=?;" + pid hostname cmdline)) + +(define (tasks:get-logged-in-clients mdb server-id) + (let ((res '())) + (sqlite3:for-each-row + (lambda (id server-id pid hostname cmdline login-time logout-time) + (set! res (cons (vector id server-id pid hostname cmdline login-time lougout-time) res))) + mdb + "SELECT id,server_id,pid,hostname,cmdline,login_time,logout_time FROM clients WHERE server_id=?;" + server-id))) + +(define (tasks:have-clients? mdb server-id) + (null? (tasks:get-logged-in-clients mdb server-id))) + +(define (tasks:get-best-server mdb) + (let ((res #f)) + (sqlite3:for-each-row + (lambda (id hostname port) + (set! res (list hostname port))) + mdb + "SELECT id,hostname,port FROM servers WHERE state='live' ORDER BY start_time DESC LIMIT 1;") + res)) + ;;====================================================================== ;; Tasks and Task monitors ;;====================================================================== @@ -65,32 +142,32 @@ ;;====================================================================== ;; Task Monitors ;;====================================================================== -(define (tasks:register-monitor db tdb) +(define (tasks:register-monitor db mdb) (let* ((pid (current-process-id)) (hostname (get-host-name)) (userinfo (user-information (current-user-id))) (username (car userinfo))) (print "Register monitor, pid: " pid ", hostname: " hostname ", username: " username) - (sqlite3:execute tdb "INSERT INTO monitors (pid,start_time,last_update,hostname,username) VALUES (?,strftime('%s','now'),strftime('%s','now'),?,?);" + (sqlite3:execute mdb "INSERT INTO monitors (pid,start_time,last_update,hostname,username) VALUES (?,strftime('%s','now'),strftime('%s','now'),?,?);" pid hostname username))) -(define (tasks:get-num-alive-monitors tdb) +(define (tasks:get-num-alive-monitors mdb) (let ((res 0)) (sqlite3:for-each-row (lambda (count) (set! res count)) - tdb + mdb "SELECT count(id) FROM monitors WHERE last_update < (strftime('%s','now') - 300) AND username=?;" (car (user-information (current-user-id)))) res)) ;; register a task -(define (tasks:add tdb action owner target runname test item params) - (sqlite3:execute tdb "INSERT INTO tasks_queue (action,owner,state,target,name,test,item,params,creation_time,execution_time) +(define (tasks:add mdb action owner target runname test item params) + (sqlite3:execute mdb "INSERT INTO tasks_queue (action,owner,state,target,name,test,item,params,creation_time,execution_time) VALUES (?,?,'new',?,?,?,?,?,strftime('%s','now'),0);" action owner target runname @@ -105,28 +182,28 @@ (set! tmp (conc tmp "/" (hash-table-ref/default key-params (vector-ref key 0) "")))) (cdr keys))) tmp)) ;; for use from the gui -(define (tasks:add-from-params tdb action keys key-params var-params) +(define (tasks:add-from-params mdb action keys key-params var-params) (let ((target (keys:key-vals-hash->target keys key-params)) (owner (car (user-information (current-user-id)))) (runname (hash-table-ref/default var-params "runname" #f)) (testpatts (hash-table-ref/default var-params "testpatts" "%")) (itempatts (hash-table-ref/default var-params "itempatts" "%")) (params (hash-table-ref/default var-params "params" ""))) - (tasks:add tdb action owner target runname testpatts itempatts params))) + (tasks:add mdb action owner target runname testpatts itempatts params))) ;; return one task from those who are 'new' OR 'waiting' AND more than 10sec old ;; -(define (tasks:snag-a-task tdb) +(define (tasks:snag-a-task mdb) (let ((res #f) (keytxt (conc (current-process-id) "-" (get-host-name) "-" (car (user-information (current-user-id)))))) ;; first randomly set a new to pid-hostname-hostname (sqlite3:execute - tdb + mdb "UPDATE tasks_queue SET keylock=? WHERE id IN (SELECT id FROM tasks_queue WHERE state='new' OR (state='waiting' AND (strftime('%s','now')-execution_time) > 10) OR state='reset' @@ -133,92 +210,92 @@ ORDER BY RANDOM() LIMIT 1);" keytxt) (sqlite3:for-each-row (lambda (id . rem) (set! res (apply vector id rem))) - tdb + mdb "SELECT id,action,owner,state,target,name,test,item,params,creation_time,execution_time FROM tasks_queue WHERE keylock=? ORDER BY execution_time ASC LIMIT 1;" keytxt) (if res ;; yep, have work to be done (begin - (sqlite3:execute tdb "UPDATE tasks_queue SET state='inprogress',execution_time=strftime('%s','now') WHERE id=?;" + (sqlite3:execute mdb "UPDATE tasks_queue SET state='inprogress',execution_time=strftime('%s','now') WHERE id=?;" (tasks:task-get-id res)) res) #f))) -(define (tasks:reset-stuck-tasks tdb) +(define (tasks:reset-stuck-tasks mdb) (let ((res '())) (sqlite3:for-each-row (lambda (id delta) (set! res (cons id res))) - tdb + mdb "SELECT id,strftime('%s','now')-execution_time AS delta FROM tasks_queue WHERE state='inprogress' AND delta>700 ORDER BY delta DESC LIMIT 2;") (sqlite3:execute - tdb + mdb (conc "UPDATE tasks_queue SET state='reset' WHERE id IN ('" (string-intersperse (map conc res) "','") "');")))) ;; return all tasks in the tasks_queue table ;; -(define (tasks:get-tasks tdb types states) +(define (tasks:get-tasks mdb types states) (let ((res '())) (sqlite3:for-each-row (lambda (id . rem) (set! res (cons (apply vector id rem) res))) - tdb + mdb (conc "SELECT id,action,owner,state,target,name,test,item,params,creation_time,execution_time FROM tasks_queue " ;; WHERE ;; state IN " statesstr " AND ;; action IN " actionsstr " ORDER BY creation_time DESC;")) res)) ;; remove tasks given by a string of numbers comma separated -(define (tasks:remove-queue-entries tdb task-ids) - (sqlite3:execute tdb (conc "DELETE FROM tasks_queue WHERE id IN (" task-ids ");"))) +(define (tasks:remove-queue-entries mdb task-ids) + (sqlite3:execute mdb (conc "DELETE FROM tasks_queue WHERE id IN (" task-ids ");"))) ;; -(define (tasks:start-monitor db tdb) - (if (> (tasks:get-num-alive-monitors tdb) 2) ;; have two running, no need for more +(define (tasks:start-monitor db mdb) + (if (> (tasks:get-num-alive-monitors mdb) 2) ;; have two running, no need for more (debug:print-info 1 "Not starting monitor, already have more than two running") (let* ((megatestdb (conc *toppath* "/megatest.db")) (monitordbf (conc *toppath* "/monitor.db")) (last-db-update 0)) ;; (file-modification-time megatestdb))) - (task:register-monitor tdb) + (task:register-monitor mdb) (let loop ((count 0) (next-touch 0)) ;; next-touch is the time where we need to update last_update ;; if the db has been modified we'd best look at the task queue (let ((modtime (file-modification-time megatestdbpath ))) (if (> modtime last-db-update) - (tasks:process-queue db tdb last-db-update megatestdb next-touch)) + (tasks:process-queue db mdb last-db-update megatestdb next-touch)) ;; WARNING: Possible race conditon here!! ;; should this update be immediately after the task-get-action call above? (if (> (current-seconds) next-touch) (begin - (tasks:monitors-update tdb) + (tasks:monitors-update mdb) (loop (+ count 1)(+ (current-seconds) 240))) (loop (+ count 1) next-touch))))))) -(define (tasks:process-queue db tdb) - (let* ((task (tasks:snag-a-task tdb)) +(define (tasks:process-queue db mdb) + (let* ((task (tasks:snag-a-task mdb)) (action (if task (tasks:task-get-action task) #f))) (if action (print "tasks:process-queue task: " task)) (if action (case (string->symbol action) - ((run) (tasks:start-run db tdb task)) - ((remove) (tasks:remove-runs db tdb task)) - ((lock) (tasks:lock-runs db tdb task)) + ((run) (tasks:start-run db mdb task)) + ((remove) (tasks:remove-runs db mdb task)) + ((lock) (tasks:lock-runs db mdb task)) ;; ((monitor) (tasks:start-monitor db task)) - ((rollup) (tasks:rollup-runs db tdb task)) - ((updatemeta)(tasks:update-meta db tdb task)) - ((kill) (tasks:kill-monitors db tdb task)))))) + ((rollup) (tasks:rollup-runs db mdb task)) + ((updatemeta)(tasks:update-meta db mdb task)) + ((kill) (tasks:kill-monitors db mdb task)))))) -(define (tasks:get-monitors tdb) +(define (tasks:get-monitors mdb) (let ((res '())) (sqlite3:for-each-row (lambda (a . rem) (set! res (cons (apply vector a rem) res))) - tdb + mdb "SELECT id,pid,strftime('%m/%d/%Y %H:%M',datetime(start_time,'unixepoch'),'localtime'),strftime('%m/%d/%Y %H:%M:%S',datetime(last_update,'unixepoch'),'localtime'),hostname,username FROM monitors ORDER BY last_update ASC;") (reverse res) )) (define (tasks:tasks->text tasks) @@ -253,31 +330,31 @@ monitors) "\n")))) ;; update the last_update field with the current time and ;; if any monitors appear dead, remove them -(define (tasks:monitors-update tdb) - (sqlite3:execute tdb "UPDATE monitors SET last_update=strftime('%s','now') WHERE pid=? AND hostname=?;" +(define (tasks:monitors-update mdb) + (sqlite3:execute mdb "UPDATE monitors SET last_update=strftime('%s','now') WHERE pid=? AND hostname=?;" (current-process-id) (get-host-name)) (let ((deadlist '())) (sqlite3:for-each-row (lambda (id pid host last-update delta) (print "Going to delete stale record for monitor with pid " pid " on host " host " last updated " delta " seconds ago") (set! deadlist (cons id deadlist))) - tdb + mdb "SELECT id,pid,hostname,last_update,strftime('%s','now')-last_update AS delta FROM monitors WHERE delta > 700;") - (sqlite3:execute tdb (conc "DELETE FROM monitors WHERE id IN ('" (string-intersperse (map conc deadlist) "','") "');"))) + (sqlite3:execute mdb (conc "DELETE FROM monitors WHERE id IN ('" (string-intersperse (map conc deadlist) "','") "');"))) ) -(define (tasks:remove-monitor-record tdb) - (sqlite3:execute tdb "DELETE FROM monitors WHERE pid=? AND hostname=?;" +(define (tasks:remove-monitor-record mdb) + (sqlite3:execute mdb "DELETE FROM monitors WHERE pid=? AND hostname=?;" (current-process-id) (get-host-name))) -(define (tasks:set-state tdb task-id state) - (sqlite3:execute tdb "UPDATE tasks_queue SET state=? WHERE id=?;" +(define (tasks:set-state mdb task-id state) + (sqlite3:execute mdb "UPDATE tasks_queue SET state=? WHERE id=?;" state task-id)) ;;====================================================================== ;; The routines to process tasks @@ -284,11 +361,11 @@ ;;====================================================================== ;; NOTE: It might be good to add one more layer of checking to ensure ;; that no task gets run in parallel. -(define (tasks:start-run db tdb task) +(define (tasks:start-run db mdb task) (let ((flags (make-hash-table))) (hash-table-set! flags "-rerun" "NOT_STARTED") (if (not (string=? (tasks:task-get-params task) "")) (hash-table-set! flags "-setvars" (tasks:task-get-params task))) (print "Starting run " task) @@ -298,13 +375,13 @@ (tasks:task-get-name task) (tasks:task-get-test task) (tasks:task-get-item task) (tasks:task-get-owner task) flags) - (tasks:set-state tdb (tasks:task-get-id task) "waiting"))) + (tasks:set-state mdb (tasks:task-get-id task) "waiting"))) -(define (tasks:rollup-runs db tdb task) +(define (tasks:rollup-runs db mdb task) (let* ((flags (make-hash-table)) (keys (db:get-keys db)) (keyvallst (keys:target->keyval keys (tasks:task-get-target task)))) ;; (hash-table-set! flags "-rerun" "NOT_STARTED") (print "Starting rollup " task) @@ -312,6 +389,6 @@ (runs:rollup-run db keys keyvallst (tasks:task-get-name task) (tasks:task-get-owner task)) - (tasks:set-state tdb (tasks:task-get-id task) "waiting"))) + (tasks:set-state mdb (tasks:task-get-id task) "waiting"))) Index: tests.scm ================================================================== --- tests.scm +++ tests.scm @@ -108,11 +108,11 @@ #f)) ;; get the previous record for when this test was run where all keys match but runname ;; returns #f if no such test found, returns a single test record if found (define (test:get-previous-test-run-record db run-id test-name item-path) - (let* ((keys (db:get-keys db)) + (let* ((keys (cdb:remote-run db:get-keys #f)) (selstr (string-intersperse (map (lambda (x)(vector-ref x 0)) keys) ",")) (qrystr (string-intersperse (map (lambda (x)(conc (vector-ref x 0) "=?")) keys) " AND ")) (keyvals #f)) ;; first look up the key values from the run selected by run-id (sqlite3:for-each-row @@ -132,11 +132,11 @@ ;; if found then return that matching test record (debug:print 4 "selstr: " selstr ", qrystr: " qrystr ", keyvals: " keyvals ", previous run ids found: " prev-run-ids) (if (null? prev-run-ids) #f (let loop ((hed (car prev-run-ids)) (tal (cdr prev-run-ids))) - (let ((results (db:get-tests-for-run db hed (conc test-name "/" item-path)'() '()))) + (let ((results (cdb:remote-run db:get-tests-for-run #f hed (conc test-name "/" item-path)'() '()))) (debug:print 4 "Got tests for run-id " run-id ", test-name " test-name ", item-path " item-path ": " results) (if (and (null? results) (not (null? tal))) (loop (car tal)(cdr tal)) (if (null? results) #f @@ -144,11 +144,11 @@ ;; get the previous records for when these tests were run where all keys match but runname ;; NB// Merge this with test:get-previous-test-run-records? This one looks for all matching tests ;; can use wildcards. Also can likely be factored in with get test paths? (define (test:get-matching-previous-test-run-records db run-id test-name item-path) - (let* ((keys (db:get-keys db)) + (let* ((keys (cdb:remote-run db:get-keys #f)) (selstr (string-intersperse (map (lambda (x)(vector-ref x 0)) keys) ",")) (qrystr (string-intersperse (map (lambda (x)(conc (vector-ref x 0) "=?")) keys) " AND ")) (keyvals #f) (tests-hash (make-hash-table))) ;; first look up the key values from the run selected by run-id @@ -170,11 +170,11 @@ (debug:print 4 "selstr: " selstr ", qrystr: " qrystr ", keyvals: " keyvals ", previous run ids found: " prev-run-ids) (if (null? prev-run-ids) '() ;; no previous runs? return null (let loop ((hed (car prev-run-ids)) (tal (cdr prev-run-ids))) - (let ((results (db:get-tests-for-run db hed (conc test-name "/" item-path) '() '()))) + (let ((results (cdb:remote-run db:get-tests-for-run #f hed (conc test-name "/" item-path) '() '()))) (debug:print 4 "Got tests for run-id " run-id ", test-name " test-name ", item-path " item-path " results: " (intersperse results "\n")) ;; Keep only the youngest of any test/item combination (for-each (lambda (testdat) @@ -194,11 +194,11 @@ (define (tests:test-set-status! test-id state status comment dat) (debug:print-info 4 "tests:test-set-status! test-id=" test-id ", state=" state ", status=" status ", dat=" dat) (let* ((db #f) (real-status status) (otherdat (if dat dat (make-hash-table))) - (testdat (open-run-close db:get-test-info-by-id db test-id)) + (testdat (cdb:get-test-info-by-id *runremote* test-id)) (run-id (db:test-get-run_id testdat)) (test-name (db:test-get-testname testdat)) (item-path (db:test-get-item-path testdat)) ;; before proceeding we must find out if the previous test (where all keys matched except runname) ;; was WAIVED if this test is FAIL @@ -257,48 +257,45 @@ expected "," tol "," units "," dcomment ",," ;; extra comma for status type ))) - (open-run-close db:csv->test-data db test-id + (cdb:remote-run db:csv->test-data #f test-id dat)))) ;; need to update the top test record if PASS or FAIL and this is a subtest - (open-run-close db:roll-up-pass-fail-counts db run-id test-name item-path status) + (cdb:remote-run db:roll-up-pass-fail-counts #f run-id test-name item-path status) (if (or (and (string? comment) (string-match (regexp "\\S+") comment)) waived) (let ((cmt (if waived waived comment))) - (open-run-close db:test-set-comment db test-id cmt))) + (cdb:remote-run db:test-set-comment #f test-id cmt))) )) + (define (tests:test-set-toplog! db run-id test-name logf) - (sqlite3:execute db "UPDATE tests SET final_logf=? WHERE run_id=? AND testname=? AND item_path='';" - logf run-id test-name)) + (cdb:client-call *runremote* 'tests:test-set-toplog #t logf run-id test-name)) (define (tests:summarize-items db run-id test-name force) ;; if not force then only update the record if one of these is true: ;; 1. logf is "log/final.log ;; 2. logf is same as outputfilename - (let ((outputfilename (conc "megatest-rollup-" test-name ".html")) - (orig-dir (current-directory)) - (logf #f)) + (let* ((outputfilename (conc "megatest-rollup-" test-name ".html")) + (orig-dir (current-directory)) + (logf-info (cdb:remote-run db:test-get-logfile-info #f run-id test-name)) + (logf (if logf-info (cadr logf-info) #f)) + (path (if logf-info (car logf-info) #f))) ;; This query finds the path and changes the directory to it for the test - (sqlite3:for-each-row - (lambda (path final_logf) - (set! logf final_logf) - (if (directory? path) - (begin - (print "Found path: " path) - (change-directory path)) - ;; (set! outputfilename (conc path "/" outputfilename))) - (print "No such path: " path))) - db - "SELECT rundir,final_logf FROM tests WHERE run_id=? AND testname=? AND item_path='';" - run-id test-name) - (print "summarize-items with logf " logf) + (set! logf (car logf-info)) + (if (directory? path) + (begin + (print "Found path: " path) + (change-directory path)) + ;; (set! outputfilename (conc path "/" outputfilename))) + (print "No such path: " path)) + (debug:print 1 "summarize-items with logf " logf) (if (or (equal? logf "logs/final.log") (equal? logf outputfilename) force) (begin (if (obtain-dot-lock outputfilename 1 20 30) ;; retry every second for 20 seconds, call it dead after 30 seconds and steal the lock @@ -306,33 +303,38 @@ (print "Failed to obtain lock for " outputfilename)) (let ((oup (open-output-file outputfilename)) (counts (make-hash-table)) (statecounts (make-hash-table)) (outtxt "") - (tot 0)) + (tot 0) + (testdat (cdb:remote-run db:test-get-records-for-index-file run-id test-name))) (with-output-to-port oup (lambda () (set! outtxt (conc outtxt "Summary: " test-name "

Summary for " test-name "

")) - (sqlite3:for-each-row - (lambda (id itempath state status run_duration logf comment) - (hash-table-set! counts status (+ 1 (hash-table-ref/default counts status 0))) - (hash-table-set! statecounts state (+ 1 (hash-table-ref/default statecounts state 0))) - (set! outtxt (conc outtxt "" - " " itempath "" - "" state "" - "" status "" - "" (if (equal? comment "") - " " - comment) "" - ""))) - db - "SELECT id,item_path,state,status,run_duration,final_logf,comment FROM tests WHERE run_id=? AND testname=? AND item_path != '';" - run-id test-name) - + (for-each + (lambda (testrecord) + (let ((id (vector-ref testrecord 0)) + (itempath (vector-ref testrecord 1)) + (state (vector-ref testrecord 2)) + (status (vector-ref testrecord 3)) + (run_duration (vector-ref testrecord 4)) + (logf (vector-ref testrecord 5)) + (comment (vector-ref testrecord 6))) + (hash-table-set! counts status (+ 1 (hash-table-ref/default counts status 0))) + (hash-table-set! statecounts state (+ 1 (hash-table-ref/default statecounts state 0))) + (set! outtxt (conc outtxt "" + " " itempath "" + "" state "" + "" status "" + "" (if (equal? comment "") + " " + comment) "" + "")))) + testdat) (print "
") ;; Print out stats for status (set! tot 0) (print "") (for-each (lambda (state) @@ -356,10 +358,11 @@ "" outtxt "

State stats

ItemStateStatusComment
") (release-dot-lock outputfilename))) (close-output-port oup) (change-directory orig-dir) + ;; NB// tests:test-set-toplog! is remote internal... (tests:test-set-toplog! db run-id test-name outputfilename) ))))) (define (get-all-legal-tests) (let* ((tests (glob (conc *toppath* "/tests/*"))) @@ -423,22 +426,22 @@ #t ;; if a is a higher priority than b then we are good to go #f)))))))) ;; for each test: ;; -(define (tests:filter-non-runnable db run-id testkeynames testrecordshash) +(define (tests:filter-non-runnable run-id testkeynames testrecordshash) (let ((runnables '())) (for-each (lambda (testkeyname) (let* ((test-record (hash-table-ref testrecordshash testkeyname)) (test-name (tests:testqueue-get-testname test-record)) (itemdat (tests:testqueue-get-itemdat test-record)) (item-path (tests:testqueue-get-item_path test-record)) (waitons (tests:testqueue-get-waitons test-record)) (keep-test #t) - (test-id (db:get-test-id db run-id test-name item-path)) - (tdat (db:get-test-info-by-id db test-id))) + (test-id (cdb:remote-run db:get-test-id #f run-id test-name item-path)) + (tdat (cdb:get-test-info-by-id *runremote* test-id))) (if tdat (begin ;; Look at the test state and status (if (or (member (db:test-get-status tdat) '("PASS" "WARN" "WAIVED" "CHECK")) @@ -449,12 +452,12 @@ ;; examine waitons for any fails. If it is FAIL or INCOMPLETE then eliminate this test ;; from the runnable list (if keep-test (for-each (lambda (waiton) ;; for now we are waiting only on the parent test - (let* ((parent-test-id (db:get-test-id db run-id waiton "")) - (wtdat (db:get-test-info-by-id db test-id))) + (let* ((parent-test-id (cdb:remote-run db:get-test-id #f run-id waiton "")) + (wtdat (cdb:get-test-info-by-id *runremote* test-id))) (if (or (member (db:test-get-status wtdat) '("FAIL" "KILLED")) (member (db:test-get-state wtdat) '("INCOMPETE"))) (set! keep-test #f)))) ;; no point in running this one again @@ -467,13 +470,13 @@ ;; test steps ;;====================================================================== ;; teststep-set-status! used to be here -(define (test-get-kill-request db test-id) ;; run-id test-name itemdat) +(define (test-get-kill-request test-id) ;; run-id test-name itemdat) (let* (;; (item-path (item-list->path itemdat)) - (testdat (db:get-test-info-by-id db test-id))) ;; run-id test-name item-path))) + (testdat (cdb:get-test-info-by-id *runremote* test-id))) ;; run-id test-name item-path))) (equal? (test:get-state testdat) "KILLREQ"))) (define (test:tdb-get-rundat-count tdb) (if tdb (let ((res 0)) Index: tests/Makefile ================================================================== --- tests/Makefile +++ tests/Makefile @@ -18,10 +18,13 @@ NEWTARGET = "-target $(OS)/$(FS)/$(VER)" TARGET = "-target ubuntu/nfs/none" all : test1 test2 test3 test4 test5 +test0 : cleanprep + cd simplerun ; $(MEGATEST) -server - -debug $(DEBUG)& + test1 : cleanprep rm -f simplerun/megatest.db rm -rf simplelinks/ simpleruns/ mkdir -p simplelinks simpleruns cd simplerun;cp ../../*_records.scm .;perl -pi.bak -e 's/define-inline/define/' *_records.scm @@ -48,12 +51,12 @@ cd fullrun;$(MEGATEST) $(SERVER) $(LOGGING) & cd fullrun;sleep 10;$(MEGATEST) -runall $(TARGET) :runname $(RUNNAME)_aa -debug $(DEBUG) $(LOGGING) > aa.log 2> aa.log & cd fullrun;sleep 10;$(MEGATEST) -runall $(TARGET) :runname $(RUNNAME)_ab -debug $(DEBUG) $(LOGGING) > ab.log 2> ab.log & cd fullrun;sleep 10;$(MEGATEST) -runall $(TARGET) :runname $(RUNNAME)_ac -debug $(DEBUG) $(LOGGING) > ac.log 2> ac.log & cd fullrun;sleep 10;$(MEGATEST) -runall $(TARGET) :runname $(RUNNAME)_ad -debug $(DEBUG) $(LOGGING) > ad.log 2> ad.log & - # cd fullrun;sleep 10;$(MEGATEST) -runall $(TARGET) :runname $(RUNNAME)_ae -debug $(DEBUG) $(LOGGING) > ae.log 2> ae.log & - # cd fullrun;sleep 10;$(MEGATEST) -runall $(TARGET) :runname $(RUNNAME)_af -debug $(DEBUG) $(LOGGING) > af.log 2> af.log & + cd fullrun;sleep 10;$(MEGATEST) -runall $(TARGET) :runname $(RUNNAME)_ae -debug $(DEBUG) $(LOGGING) > ae.log 2> ae.log & + cd fullrun;sleep 10;$(MEGATEST) -runall $(TARGET) :runname $(RUNNAME)_af -debug $(DEBUG) $(LOGGING) > af.log 2> af.log & test6: fullprep cd fullrun;$(MEGATEST) -runtests runfirst -testpatt %/1 -reqtarg ubuntu/nfs/none :runname $(RUNNAME)_itempatt -v cd fullrun;$(MEGATEST) -runtests runfirst -testpatt %blahha% -reqtarg ubuntu/nfs/none :runname $(RUNNAME)_itempatt -debug 10 cd fullrun;$(MEGATEST) -rollup :runname newrun -target ubuntu/nfs/none -debug 10 @@ -61,11 +64,11 @@ cleanprep : ../*.scm Makefile */*.config # if [ -e fullrun/megatest.db ]; then sqlite3 fullrun/megatest.db "delete from metadat where var='SERVER';";fi mkdir -p /tmp/mt_runs /tmp/mt_links cd ..;make install - rm -f fullrun/logging.db + rm -f */logging.db */monitor.db touch cleanprep fullprep : cleanprep cd fullrun;$(MEGATEST) -server - -debug $(DEBUG) & sleep 5;cd fullrun;$(MEGATEST) -remove-runs :runname $(RUNNAME)% -target %/%/% -testpatt %/% @@ -80,10 +83,13 @@ clean : rm cleanprep kill : killall -v mtest main.sh dboard || true - rm -f fullrun/megatest.db fullrun/logging.db || true - killall -v -9 mtest dboard || true + rm -f */megatest.db */logging.db */monitor.db || true + killall -v mtest dboard || true + +hardkill : kill + sleep 5;killall -v mtest main.sh dboard -9 runforever : while(ls); do runname=`date +%F-%R:%S`;(cd fullrun;$(MEGATEST) -runall -target ubuntu/nfs/none :runname $$runname;/home/matt/data/megatest/megatest -runall -target ubuntu/nfs/none :runname $$runname;/home/matt/data/megatest/megatest -runall -target ubuntu/nfs/none :runname $$runname);done Index: tests/tests.scm ================================================================== --- tests/tests.scm +++ tests/tests.scm @@ -1,7 +1,22 @@ +;; Copyright 2006-2012, Matthew Welland. +;; +;; This program is made available under the GNU GPL version 2.0 or +;; greater. See the accompanying file COPYING for details. +;; +;; This program is distributed WITHOUT ANY WARRANTY; without even the +;; implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR +;; PURPOSE. + +;; strftime('%m/%d/%Y %H:%M:%S','now','localtime') + (require-extension test) (require-extension regex) +(require-extension srfi-18) +(import srfi-18) +(require-extension zmq) +(import zmq) (define test-work-dir (current-directory)) ;; read in all the _record files (let ((files (glob "*_records.scm"))) @@ -8,10 +23,12 @@ (for-each (lambda (file) (print "Loading " file) (load file)) files)) + +(define *runremote* #f) ;;====================================================================== ;; P R O C E S S E S ;;====================================================================== @@ -53,12 +70,38 @@ ;; test:match->sqlqry (test #f "(testname GLOB 'a' AND item_path GLOB 'b') OR (testname LIKE 'a%' AND item_path LIKE '%') OR (testname GLOB '' AND item_path LIKE 'b%')" (tests:match->sqlqry "a/b,a%,/b%")) (test #f "(testname GLOB 'a' AND item_path GLOB 'b') OR (testname LIKE 'a%' AND item_path LIKE '%') OR (testname LIKE '%' AND item_path LIKE 'b%')" (tests:match->sqlqry "a/b,a%,%/b%")) + +;;====================================================================== +;; S E R V E R +;;====================================================================== + +(test "setup for run" #t (begin (setup-for-run) + (string? (getenv "MT_RUN_AREA_HOME")))) + +(test "server-register, get-best-server" '("bob" 1234) (let ((res #f)) + (open-run-close tasks:server-register tasks:open-db 1 "bob" 1234 100 'live) + (set! res (open-run-close tasks:get-best-server tasks:open-db)) + res)) +(test "de-register server" #f (let ((res #f)) + (open-run-close tasks:server-deregister tasks:open-db "bob" port: 1234) + (open-run-close tasks:get-best-server tasks:open-db))) + ;; (exit) + +(set! *verbosity* 3) ;; enough to trigger turning off exception handling in db accesses +(define server-pid (process-run "../../bin/megatest" (list "-server" "-" "-debug" (conc *verbosity*)))) +(sleep 3) +(set! *verbosity* 1) + +(define th1 (make-thread (lambda ()(server:client-setup)))) +(thread-start! th1) + +(test #f #t (socket? *runremote*)) ;;====================================================================== ;; C O N F I G F I L E S ;;====================================================================== @@ -78,12 +121,10 @@ (define header (list "col1" "col2" "col3" "col4")) (test "Get row by header" "blah" (db:get-value-by-header row header "col4")) ;; (define *toppath* "tests") (define *db* #f) -(test "setup for run" #t (begin (setup-for-run) - (string? (getenv "MT_RUN_AREA_HOME")))) (test "open-db" #t (begin (set! *db* (open-db)) (if *db* #t #f))) ;; quit wasting time, I'm changing *db* to db @@ -107,24 +148,21 @@ (test "write env files" "nada.csh" (begin (save-environment-as-files "nada") (and (file-exists? "nada.sh") (file-exists? "nada.csh")))) + +(test #f #t (cdb:client-call *runremote* 'immediate #f (lambda ()(display "Got here eh!?") #t))) + +;; (set! *verbosity* 20) +(test #f *verbosity* (cdb:set-verbosity *runremote* *verbosity*)) +(test #f #f (cdb:roll-up-pass-fail-counts *runremote* 1 "test1" "" "PASS")) +;; (set! *verbosity* 1) +;; (cdb:set-verbosity *runremote* *verbosity*) (test "get all legal tests" (list "test1" "test2") (sort (get-all-legal-tests) string<=?)) -(test "register-test, test info" "NOT_STARTED" - (begin - (cdb:tests-register-test *remoterun* 1 "nada" "") - ;; (rdb:flush-queue) - (vector-ref (db:get-test-info *db* 1 "nada" "") 3))) - -(test #f "NOT_STARTED" - (begin - (rdb:tests-register-test #f 1 "nada" "") - ;; (rdb:flush-queue) - (vector-ref (open-run-close db:get-test-info #f 1 "nada" "") 3))) (test "get-keys" "SYSTEM" (vector-ref (car (db:get-keys *db*)) 0));; (key:get-fieldname (car (sort (db-get-keys *db*)(lambda (a b)(string>=? (vector-ref a 0)(vector-ref b 0))))))) (define remargs (args:get-args '("bar" "foo" ":runname" "bob" ":SYSTEM" "ubuntu" ":RELEASE" "v1.2" ":datapath" "blah/foo" "nada") @@ -138,10 +176,16 @@ '(("SYSTEM" "key1")("RELEASE" "key2")) "myrun" "new" "n/a" "bob"))) + +(test #f "CACHED" (cdb:tests-register-test *runremote* 1 "nada" "")) +(test #f 1 (cdb:remote-run db:get-test-id #f 1 "nada" "")) +(test #f "NOT_STARTED" (vector-ref (open-run-close db:get-test-info #f 1 "nada" "") 3)) +(test #f "NOT_STARTED" (vector-ref (cdb:get-test-info *runremote* 1 "nada" "") 3)) + (define keys (db:get-keys *db*)) ;;====================================================================== ;; D B ;;====================================================================== @@ -233,46 +277,43 @@ (test "Add a step" #t (begin (db:teststep-set-status! db 2 "step1" "start" 0 "This is a comment" "mylogfile.html") (sleep 2) (db:teststep-set-status! db 2 "step1" "end" "pass" "This is a different comment" "finallogfile.html") - (set! test-id (db:test-get-id (car (db:get-tests-for-run db 1 "test1" '() '())))) + (set! test-id (db:test-get-id (car (cdb:remote-run db:get-tests-for-run #f 1 "test1" '() '())))) (number? test-id))) -(test "Get rundir" #t (let ((rundir (db:test-get-rundir-from-test-id db test-id))) - (print "Rundir" rundir) +(test "Get rundir" #t (let ((rundir (cdb:remote-run db:test-get-rundir-from-test-id #f test-id))) + (print "Rundir " rundir) + (system (conc "mkdir -p " rundir)) (string? rundir))) -(test "Create a test db" "../simpleruns/key1/key2/myrun/test1/testdat.db" (let ((tdb (db:open-test-db-by-test-id db test-id))) - (sqlite3#finalize! tdb) - (file-exists? "../simpleruns/key1/key2/myrun/test1/testdat.db"))) -(test "Get steps for test" #t (> (length (db:get-steps-for-test db test-id)) 0)) +(test #f #t (sqlite3#database? (open-test-db "./"))) +(test "Create a test db" "../simpleruns/key1/key2/myrun/test1/testdat.db" + (let ((tdb (open-run-close db:open-test-db-by-test-id db test-id))) + (if tdb (sqlite3#finalize! tdb)) + (file-exists? "../simpleruns/key1/key2/myrun/test1/testdat.db"))) + +(test "Get steps for test" #t (let ((steps (cdb:remote-run db:get-steps-for-test #f test-id))) + (print steps) + (> (length steps) 0))) (test "Get nice table for steps" "2.0s" (begin - (vector-ref (hash-table-ref (db:get-steps-table db test-id) "step1") 4))) + (vector-ref (hash-table-ref (open-run-close db:get-steps-table #f test-id) "step1") 4))) ;; (exit) ;;====================================================================== ;; R E M O T E C A L L S ;;====================================================================== -;; start a server process -(set! *verbosity* 10) -;; (define server-pid (process-run "../../bin/megatest" (list "-server" "-" "-debug" (conc *verbosity*)))) -;; (sleep 2) - -(define th1 (make-thread server:launch)) -(thread-start! th1) - (define start-wait (current-seconds)) -(server:client-setup) (print "Starting intensive cache and rpc test") (for-each (lambda (params) - ;;; (rdb:tests-register-test #f 1 (conc "test" (random 20)) "") - (apply cdb:test-set-status-state *remoterun* test-id params) - (rdb:pass-fail-counts test-id (random 100) (random 100)) - (rdb:test-rollup-test_data-pass-fail test-id) + (cdb:tests-register-test *runremote* 1 (conc "test" (random 20)) "") + (apply cdb:test-set-status-state *runremote* test-id params) + (cdb:pass-fail-counts *runremote* test-id (random 100) (random 100)) + (cdb:test-rollup-test_data-pass-fail *runremote* test-id) (thread-sleep! 0.01)) ;; cache ordering granularity is at the second level. Should really be at the ms level '(("COMPLETED" "PASS" #f) ("NOT_STARTED" "FAIL" "Just testing") ("NOT_STARTED" "FAIL" "Just testing") ("NOT_STARTED" "FAIL" "Just testing") @@ -311,20 +352,21 @@ ("NOT_STARTED" "FAIL" "Just testing") ("KILLED" "UNKNOWN" "More testing") ("KILLED" "UNKNOWN" "More testing") )) ;; now set all tests to completed -(rdb:flush-queue) -(let ((tests (open-run-close db:get-tests-for-run #f 1 "%" '() '()))) +(cdb:flush-queue *runremote*) +(let ((tests (cdb:remote-run db:get-tests-for-run #f 1 "%" '() '()))) (print "Setting " (length tests) " to COMPLETED/PASS") (for-each (lambda (test) - (rdb:test-set-status-state (db:test-get-id test) "COMPLETED" "PASS" "Forced pass")) + (cdb:test-set-status-state *runremote* (db:test-get-id test) "COMPLETED" "PASS" "Forced pass")) tests)) (print "Waiting for server to be done, should be about 20 seconds") -(process-wait server-pid) +(cdb:kill-server *runremote*) +;; (process-wait server-pid) (test "Server wait time" #t (let ((run-delta (- (current-seconds) start-wait))) (print "Server ran for " run-delta " seconds") (> run-delta 20))) (test "Rollup the run(s)" #t (begin @@ -332,8 +374,10 @@ #t)) (hash-table-set! args:arg-hash ":runname" "%") (test "Remove the rollup run" #t (begin (operate-on 'remove-runs))) + +(thread-join! th1 th2 th3) ;; ADD ME!!!! (db:get-prereqs-not-met *db* 1 '("runfirst") "" mode: 'normal) ;; ADD ME!!!! (rdb:get-tests-for-run *db* 1 "runfirst" #f '() '())