Index: db.scm ================================================================== --- db.scm +++ db.scm @@ -92,11 +92,11 @@ (define (db:hoh-get dat key1 key2) (let* ((subhash (hash-table-ref/default dat key1 #f))) (and subhash (hash-table-ref/default subhash key2 #f)))) -(define (db:get-cache-stmth dbdat run-id db stmt) +(define (db:get-cache-stmth dbdat db stmt) (let* (;; (dbdat (dbfile:get-dbdat dbstruct run-id)) (stmt-cache (dbr:dbdat-stmt-cache dbdat)) (stmth (db:hoh-get stmt-cache db stmt))) (or stmth (let* ((newstmth (sqlite3:prepare db stmt))) @@ -1113,21 +1113,21 @@ (db:with-db dbstruct run-id #f (lambda (dbdat db) (let* ((stmth1 (db:get-cache-stmth - dbdat run-id db + dbdat db "SELECT id,rundir,uname,testname,item_path,event_time,run_duration FROM tests WHERE run_id=? AND (strftime('%s','now') - event_time) > (run_duration + ?) AND state IN ('RUNNING');")) (stmth2 (db:get-cache-stmth - dbdat run-id db + dbdat db "SELECT id,rundir,uname,testname,item_path,event_time,run_duration FROM tests WHERE run_id=? AND (strftime('%s','now') - event_time) > (run_duration + ?) AND state IN ('REMOTEHOSTSTART');")) (stmth3 (db:get-cache-stmth - dbdat run-id db + dbdat db "SELECT id,rundir,uname,testname,item_path FROM tests WHERE run_id=? AND (strftime('%s','now') - event_time) > 86400 AND state IN ('LAUNCHED');"))) ;; in RUNNING or REMOTEHOSTSTART for more than 10 minutes ;; @@ -1359,12 +1359,11 @@ dbstruct #f #f ;; for the moment vars are only stored in main.db (lambda (dbdat db) (sqlite3:for-each-row (lambda (val) (set! res val)) - db - "SELECT val FROM metadat WHERE var=?;" var) + (db:get-cache-stmth dbdat db "SELECT val FROM metadat WHERE var=?;") var) ;; convert to number if can (if (string? res) (let ((valnum (string->number res))) (if valnum (set! res valnum)))) res)))) @@ -1392,11 +1391,12 @@ ;; (set! *last-global-delta-printed* *global-delta*))) (define (db:set-var dbstruct var val) (db:with-db dbstruct #f #t (lambda (dbdat db) - (sqlite3:execute db "INSERT OR REPLACE INTO metadat (var,val) VALUES (?,?);" var val)))) + (sqlite3:execute (db:get-cache-stmth dbdat db "INSERT OR REPLACE INTO metadat (var,val) VALUES (?,?);") + var val)))) (define (db:add-var dbstruct var val) (db:with-db dbstruct #f #t (lambda (dbdat db) (sqlite3:execute db "UPDATE metadat SET val=val+? WHERE var=?;" val var)))) @@ -1814,23 +1814,23 @@ #f (lambda (dbdat db) ;; remove previous data - (let* ((stmt1 (sqlite3:prepare db "DELETE FROM run_stats WHERE run_id=? AND state=? AND status=?;")) - (stmt2 (sqlite3:prepare db "INSERT INTO run_stats (run_id,state,status,count) VALUES (?,?,?,?);")) + (let* ((stmt1 (db:get-cache-stmth dbdat db "DELETE FROM run_stats WHERE run_id=? AND state=? AND status=?;")) + (stmt2 (db:get-cache-stmth dbdat db "INSERT INTO run_stats (run_id,state,status,count) VALUES (?,?,?,?);")) (res (sqlite3:with-transaction db (lambda () (for-each (lambda (dat) (sqlite3:execute stmt1 run-id (car dat)(cadr dat)) (apply sqlite3:execute stmt2 run-id dat)) stats))))) - (sqlite3:finalize! stmt1) - (sqlite3:finalize! stmt2) + ;; (sqlite3:finalize! stmt1) + ;; (sqlite3:finalize! stmt2) ;; (mutex-unlock! *db-transaction-mutex*) res)))) (define (db:get-main-run-stats dbstruct run-id) (db:with-db @@ -2432,23 +2432,24 @@ (define (db:test-set-state-status dbstruct run-id test-id newstate newstatus newcomment) (db:with-db dbstruct run-id #f (lambda (dbdat db) - (db:test-set-state-status-db db run-id test-id newstate newstatus newcomment)))) + (db:test-set-state-status-db dbdat db run-id test-id newstate newstatus newcomment)))) -(define (db:test-set-state-status-db db run-id test-id newstate newstatus newcomment) +;; dbdat needed for cached prepared statements +(define (db:test-set-state-status-db dbdat db run-id test-id newstate newstatus newcomment) (cond ((and newstate newstatus newcomment) - (sqlite3:execute db "UPDATE tests SET state=?,status=?,comment=? WHERE id=?;" newstate newstatus newcomment ;; (sdb:qry 'getid newcomment) + (sqlite3:execute (db:get-cache-stmth dbdat db "UPDATE tests SET state=?,status=?,comment=? WHERE id=?;") newstate newstatus newcomment ;; (sdb:qry 'getid newcomment) test-id)) ((and newstate newstatus) - (sqlite3:execute db "UPDATE tests SET state=?,status=? WHERE id=?;" newstate newstatus test-id)) + (sqlite3:execute (db:get-cache-stmth dbdat db "UPDATE tests SET state=?,status=? WHERE id=?;") newstate newstatus test-id)) (else - (if newstate (sqlite3:execute db "UPDATE tests SET state=? WHERE id=?;" newstate test-id)) - (if newstatus (sqlite3:execute db "UPDATE tests SET status=? WHERE id=?;" newstatus test-id)) - (if newcomment (sqlite3:execute db "UPDATE tests SET comment=? WHERE id=?;" newcomment ;; (sdb:qry 'getid newcomment) + (if newstate (sqlite3:execute (db:get-cache-stmth dbdat db "UPDATE tests SET state=? WHERE id=?;") newstate test-id)) + (if newstatus (sqlite3:execute (db:get-cache-stmth dbdat db "UPDATE tests SET status=? WHERE id=?;") newstatus test-id)) + (if newcomment (sqlite3:execute (db:get-cache-stmth dbdat db "UPDATE tests SET comment=? WHERE id=?;") newcomment ;; (sdb:qry 'getid newcomment) test-id)))) ;; (mt:process-triggers dbstruct run-id test-id newstate newstatus)) ;; NOTE: Moved into calling function ) ;; NEW BEHAVIOR: Count tests running in all runs! @@ -2460,11 +2461,11 @@ (db:with-db dbstruct run-id #f (lambda (dbdat db) - (let* ((stmth (db:get-cache-stmth dbdat run-id db qry))) + (let* ((stmth (db:get-cache-stmth dbdat db qry))) (sqlite3:first-result stmth)))))) ;; NEW BEHAVIOR: Count tests running in only one run! ;; (define (db:get-count-tests-actually-running dbstruct run-id) @@ -2490,11 +2491,11 @@ (db:with-db dbstruct run-id #f (lambda (dbdat db) - (let* ((stmth (db:get-cache-stmth dbdat run-id db qry))) + (let* ((stmth (db:get-cache-stmth dbdat db qry))) (sqlite3:first-result stmth run-id)))))) ;; For a given testname how many items are running? Used to determine ;; probability for regenerating html ;; @@ -2503,11 +2504,11 @@ dbstruct run-id #f (lambda (dbdat db) (let* ((stmt "SELECT count(id) FROM tests WHERE state in ('RUNNING','LAUNCHED','REMOTEHOSTSTART') AND run_id=? AND NOT (uname = 'n/a' AND item_path = '') AND testname=?;") - (stmth (db:get-cache-stmth dbdat run-id db stmt))) + (stmth (db:get-cache-stmth dbdat db stmt))) (sqlite3:first-result stmth run-id testname))))) (define (db:get-not-completed-cnt dbstruct run-id) (db:with-db @@ -2799,12 +2800,12 @@ dbstruct run-id #t (lambda (dbdat db) (sqlite3:execute - db - "INSERT OR REPLACE into test_steps (test_id,stepname,state,status,event_time,comment,logfile) VALUES(?,?,?,?,?,?,?);" + (db:get-cache-stmth dbdat db + "INSERT OR REPLACE into test_steps (test_id,stepname,state,status,event_time,comment,logfile) VALUES(?,?,?,?,?,?,?);") test-id teststep-name state-in status-in (current-seconds) (if comment comment "") (if logfile logfile ""))))) @@ -2877,11 +2878,11 @@ (db:with-db dbstruct run-id #f (lambda (dbdat db) - (let* ((stmth (db:get-cache-stmth dbdat #f db stmt)) + (let* ((stmth (db:get-cache-stmth dbdat db stmt)) (res (sqlite3:fold-row (lambda (res id test-id category variable value expected tol units comment status type last-update) (vector id test-id category variable value expected tol units comment status type last-update)) (vector #f #f #f #f #f #f #f #f #f #f #f #f) stmth @@ -3225,11 +3226,11 @@ (let ((tr-res (sqlite3:with-transaction db (lambda () ;; NB// Pass the db so it is part fo the transaction - (db:test-set-state-status-db db run-id test-id state status comment) ;; this call sets the item state/status + (db:test-set-state-status-db dbdat db run-id test-id state status comment) ;; this call sets the item state/status (if (not (equal? item-path "")) ;; only roll up IF incoming test is an item (let* ((state-status-counts (db:get-all-state-status-counts-for-test db run-id test-name item-path state status)) ;; item-path is used to exclude current state/status of THIS test (state-statuses (db:roll-up-rules state-status-counts state status)) (newstate (car state-statuses)) (newstatus (cadr state-statuses))) @@ -3240,11 +3241,11 @@ (map (lambda (x) (conc (with-output-to-string (lambda () (pp (dbr:counts->alist x)))) " | ")) state-status-counts))); end debug:print (if tl-test-id - (db:test-set-state-status-db db run-id tl-test-id newstate newstatus #f)) ;; we are still in the transaction - must access the db and not the dbstruct + (db:test-set-state-status-db dbdat db run-id tl-test-id newstate newstatus #f)) ;; we are still in the transaction - must access the db and not the dbstruct )))))) (mutex-unlock! *db-transaction-mutex*) (if (and test-id state status (equal? status "AUTO")) (db:test-data-rollup dbstruct run-id test-id status)) (if new-state-eh ;; moved from db:test-set-state-status @@ -3587,11 +3588,11 @@ db:queries))) (if q (car q) #f)))) (db:with-db dbstruct run-id #f (lambda (dbdat db) - (apply sqlite3:execute db query params) + (apply sqlite3:execute (db:get-cache-stmth dbdat db query) params) #t)))) ;; get a summary of state and status counts to calculate a rollup ;; (define (db:get-state-status-summary dbstruct run-id testname) @@ -4651,11 +4652,12 @@ (set! *task-db* #f))))) (if (and (not (args:get-arg "-server")) *runremote*) (begin (debug:print-info 0 *default-log-port* "Closing all client connections...") - (http-client#close-all-connections!))) + (http-transport:close-connections *runremote*) + #;(http-client#close-all-connections!))) ;; (if (and *runremote* ;; (remote-conndat *runremote*)) ;; (begin ;; (http-client#close-all-connections!))) ;; for http-client (if (not (eq? *default-log-port* (current-error-port))) Index: http-transport.scm ================================================================== --- http-transport.scm +++ http-transport.scm @@ -296,11 +296,12 @@ ;; (tasks:kill-server-run-id run-id) ;; better to kill the server in the logic that called this routine? (mutex-unlock! *http-mutex*) ;; (signal (make-composite-condition ;; (make-property-condition 'commfail 'message "failed to connect to server"))) ;; "communications failed" - (close-all-connections!) + ;; (close-all-connections!) + (close-connection! fullurl) (db:obj->string #f)) (with-input-from-request ;; was dat fullurl (list (cons 'key (or server-id "thekey")) (cons 'cmd cmd) @@ -357,12 +358,14 @@ (handle-exceptions exn (begin (print-call-chain *default-log-port*) (debug:print-error 0 *default-log-port* " closing connection failed with error: " ((condition-property-accessor 'exn 'message) exn) ", exn=" exn)) + (if (args:any-defined? "-server" "-execute" "-run") + (debug:print-info 0 *default-log-port* "Closing connections to "api-dat)) (close-connection! api-dat) - ;; (close-idle-connections!) + (close-connection! (http-transport:server-dat-make-url server-dat)) (remote-conndat-set! runremote #f) #t)) #f))) Index: launch.scm ================================================================== --- launch.scm +++ launch.scm @@ -222,11 +222,10 @@ (let loop ((minutes (calc-minutes)) (cpu-load (alist-ref 'adj-core-load (common:get-normalized-cpu-load #f))) (disk-free (get-df (current-directory))) (last-sync (current-seconds))) - ;; (common:telemetry-log "zombie" (conc "launch:monitor-job - top of loop encountered at "(current-seconds)" with last-sync="last-sync)) (let* ((over-time (> (current-seconds) (+ last-sync update-period))) (new-cpu-load (let* ((load (alist-ref 'adj-core-load (common:get-normalized-cpu-load #f))) (delta (abs (- load cpu-load)))) (if (> delta 0.1) ;; don't bother updating with small changes load @@ -238,38 +237,35 @@ (if (and (> df 0) (> (/ delta df) 0.1)) ;; (> delta 200) ;; ignore changes under 200 Meg df #f))) (do-sync (or new-cpu-load new-disk-free over-time)) - - (test-info (rmt:get-test-info-by-id run-id test-id)) - (state (db:test-get-state test-info)) - (status (db:test-get-status test-info)) + ;; instead of looking for KILLREQ we are looking for a file KILLREQUEST - see tests.scm test-get-kill-request + ;; (test-info (rmt:get-test-info-by-id run-id test-id)) + ;; (state (db:test-get-state test-info)) + ;; (status (db:test-get-status test-info)) (kill-reason "no kill reason specified") (kill-job? #f)) - ;; (common:telemetry-log "zombie" (conc "launch:monitor-job - decision time encountered at "(current-seconds)" with last-sync="last-sync" do-sync="do-sync" over-time="over-time" update-period="update-period)) (cond ((test-get-kill-request run-id test-id) (set! kill-reason "KILLING TEST since received kill request (KILLREQ)") (set! kill-job? #t)) ((and runtlim (> (- (current-seconds) start-seconds) runtlim)) (set! kill-reason (conc "KILLING TEST DUE TO TIME LIMIT EXCEEDED! Runtime=" (- (current-seconds) start-seconds) " seconds, limit=" runtlim)) (set! kill-job? #t)) - ((equal? status "DEAD") + #;((equal? status "DEAD") (tests:update-central-meta-info run-id test-id new-cpu-load new-disk-free (calc-minutes) #f #f) (rmt:set-state-status-and-roll-up-items run-id test-id 'foo "RUNNING" "n/a" "was marked dead; really still running.") ;;(set! kill-reason "KILLING TEST because it was marked as DEAD by launch:handle-zombie-tests (might indicate really overloaded server or else overzealous setup.deadtime)") ;; MARK RUNNING (set! kill-job? #f))) (debug:print 4 *default-log-port* "cpu: " new-cpu-load " disk: " new-disk-free " last-sync: " last-sync " do-sync: " do-sync) (launch:handle-zombie-tests run-id) (when do-sync - ;;(with-output-to-file (conc (getenv "MT_TEST_RUN_DIR") "/last-loadinfo.log" #:append) - ;; (lambda () (pp (list (current-seconds) new-cpu-load new-disk-free (calc-minutes))))) - ;; (common:telemetry-log "zombie" (conc "launch:monitor-job - dosync started at "(current-seconds))) - (tests:update-central-meta-info run-id test-id new-cpu-load new-disk-free (calc-minutes) #f #f) - ;; (common:telemetry-log "zombie" (conc "launch:monitor-job - dosync finished at "(current-seconds))) + (with-output-to-file (conc (getenv "MT_TEST_RUN_DIR") "/last-loadinfo.log" #:append) + (lambda () (pp (list (current-seconds) new-cpu-load new-disk-free (calc-minutes))))) + ;; (tests:update-central-meta-info run-id test-id new-cpu-load new-disk-free (calc-minutes) #f #f) ) (if kill-job? (begin (debug:print-info 0 *default-log-port* "proceeding to kill test: "kill-reason) Index: rmt.scm ================================================================== --- rmt.scm +++ rmt.scm @@ -42,13 +42,12 @@ ;;====================================================================== ;; if a server is either running or in the process of starting call client:setup ;; else return #f to let the calling proc know that there is no server available ;; -(define (rmt:get-connection-info areapath #!key (area-dat #f)) ;; TODO: push areapath down. - (let* ((runremote (or area-dat *runremote*)) - (cinfo (if (remote? runremote) +(define (rmt:get-connection-info areapath runremote) ;; TODO: push areapath down. + (let* ((cinfo (if (remote? runremote) (remote-conndat runremote) #f))) (if cinfo cinfo (if (server:check-if-running areapath) @@ -262,11 +261,11 @@ (not (remote-conndat runremote)))) ;; and no connection (debug:print-info 12 *default-log-port* "rmt:send-receive, case 9, hh-dat: " (remote-hh-dat runremote) " conndat: " (remote-conndat runremote)) (mutex-unlock! *rmt-mutex*) (if (not (server:check-if-running *toppath*)) ;; who knows, maybe one has started up? (server:start-and-wait *toppath*)) - (remote-conndat-set! runremote (rmt:get-connection-info *toppath*)) ;; calls client:setup which calls client:setup-http + (remote-conndat-set! runremote (rmt:get-connection-info *toppath* runremote)) ;; calls client:setup which calls client:setup-http (rmt:send-receive cmd rid params attemptnum: attemptnum)) ;; TODO: add back-off timeout as ;;DOT CASE10 [label="on homehost"]; ;;DOT MUTEXLOCK -> CASE10 [label="server not required,\non homehost"]; {rank=same "case 10" CASE10}; ;;DOT CASE10 -> "rmt:open-qry-close-locally"; Index: tests.scm ================================================================== --- tests.scm +++ tests.scm @@ -1966,11 +1966,13 @@ ;;====================================================================== ;; teststep-set-status! used to be here (define (test-get-kill-request run-id test-id) ;; run-id test-name itemdat) - (let* ((testdat (rmt:get-test-info-by-id run-id test-id))) + (or (file-exists? (conc (getenv "MT_TEST_RUN_DIR")"/KILLREQUEST")) + (file-exists? (conc (getenv "MT_LINKTREE")"/"(getenv "MT_TARGET")"/"(getenv "MT_RUNNAME")"/KILLREQUEST"))) + #;(let* ((testdat (rmt:get-test-info-by-id run-id test-id))) (and testdat (equal? (test:get-state testdat) "KILLREQ")))) (define (test:tdb-get-rundat-count tdb) (if tdb