Index: db.scm ================================================================== --- db.scm +++ db.scm @@ -3079,23 +3079,21 @@ test-id)))))) (mt:process-triggers dbstruct run-id test-id newstate newstatus)) ;; NEW BEHAVIOR: Count tests running in all runs! ;; -(define (db:get-count-tests-running dbstruct run-id) +(define (db:get-count-tests-running dbstruct run-id fastmode) + (let* ((qry (if fastmode + "SELECT count(id) FROM tests WHERE state in ('RUNNING','LAUNCHED','REMOTEHOSTSTART') AND NOT (uname = 'n/a' AND item_path = '') LIMIT 1;" + "SELECT count(id) FROM tests WHERE state in ('RUNNING','LAUNCHED','REMOTEHOSTSTART') AND NOT (uname = 'n/a' AND item_path = '');"))) (db:with-db dbstruct run-id #f (lambda (db) - (sqlite3:first-result - db - ;; WARNING BUG EDIT ME - merged from v1.55 - not sure what is right here ... - ;; AND run_id NOT IN (SELECT id FROM runs WHERE state='deleted') - "SELECT count(id) FROM tests WHERE state in ('RUNNING','LAUNCHED','REMOTEHOSTSTART') AND NOT (uname = 'n/a' AND item_path = '');" - ;; "SELECT count(id) FROM tests WHERE state in ('RUNNING','LAUNCHED','REMOTEHOSTSTART') AND run_id=?;" - )))) + (let* ((stmth (db:get-cache-stmth dbstruct db qry))) + (sqlite3:first-result stmth)))))) ;; NEW BEHAVIOR: Count tests running in only one run! ;; (define (db:get-count-tests-actually-running dbstruct run-id) (db:with-db @@ -3111,19 +3109,21 @@ run-id)))) ;; NOT IN (SELECT id FROM runs WHERE state='deleted');") ;; NEW BEHAVIOR: Look only at single run with run-id ;; ;; (define (db:get-running-stats dbstruct run-id) -(define (db:get-count-tests-running-for-run-id dbstruct run-id) - (db:with-db - dbstruct - run-id - #f - (lambda (db) - (sqlite3:first-result - db - "SELECT count(id) FROM tests WHERE state in ('RUNNING','LAUNCHED','REMOTEHOSTSTART') AND run_id=?;" run-id)))) +(define (db:get-count-tests-running-for-run-id dbstruct run-id fastmode) + (let* ((qry (if fastmode + "SELECT count(id) FROM tests WHERE state in ('RUNNING','LAUNCHED','REMOTEHOSTSTART') AND run_id=? LIMIT 1;" + "SELECT count(id) FROM tests WHERE state in ('RUNNING','LAUNCHED','REMOTEHOSTSTART') AND run_id=?;"))) + (db:with-db + dbstruct + run-id + #f + (lambda (db) + (let* ((stmth (db:get-cache-stmth dbstruct db qry))) + (sqlite3:first-result stmth run-id)))))) ;; For a given testname how many items are running? Used to determine ;; probability for regenerating html ;; (define (db:get-count-tests-running-for-testname dbstruct run-id testname) Index: launch.scm ================================================================== --- launch.scm +++ launch.scm @@ -904,12 +904,12 @@ ;; > 3 RUNNING with not test_dead do nothing (run should already be RUNNING/ na ;; > 0 RUNNING and test_dead then send KILLREQ ==> COMPLETED ;; 0 RUNNING ==> this is actually the first condition, should not get here (define (launch:end-of-run-check run-id ) - (let* ((not-completed-cnt (rmt:get-not-completed-cnt run-id)) - (running-cnt (rmt:get-count-tests-running-for-run-id run-id)) + (let* ((not-completed-cnt (rmt:get-not-completed-cnt run-id)) + (running-cnt (rmt:get-count-tests-running-for-run-id run-id #f)) ;; fastmode=no (all-test-launched (rmt:get-var (conc "lunch-complete-" run-id))) (current-state (rmt:get-run-state run-id)) (current-status (rmt:get-run-status run-id))) ;;get-vars run-id to query metadata table to check if all completed. if all-test-launched = yes then only not-completed-cnt = 0 means everyting is completed if no entry found in the table do nothing (debug:print 0 *default-log-port* "Running test cnt :" running-cnt) Index: rmt.scm ================================================================== --- rmt.scm +++ rmt.scm @@ -691,21 +691,21 @@ ;; (rmt:send-receive #f 'get-run-ids-matching (list keynames target res))) (define (rmt:get-prereqs-not-met run-id waitons ref-test-name ref-item-path #!key (mode '(normal))(itemmaps #f)) (rmt:send-receive 'get-prereqs-not-met run-id (list run-id waitons ref-test-name ref-item-path mode itemmaps))) -(define (rmt:get-count-tests-running-for-run-id run-id) - (rmt:send-receive 'get-count-tests-running-for-run-id run-id (list run-id))) +(define (rmt:get-count-tests-running-for-run-id run-id fastmode) + (rmt:send-receive 'get-count-tests-running-for-run-id run-id (list run-id fastmode))) (define (rmt:get-not-completed-cnt run-id) (rmt:send-receive 'get-not-completed-cnt run-id (list run-id))) ;; Statistical queries -(define (rmt:get-count-tests-running run-id) - (rmt:send-receive 'get-count-tests-running run-id (list run-id))) +(define (rmt:get-count-tests-running run-id fastmode) + (rmt:send-receive 'get-count-tests-running run-id (list run-id fastmode))) (define (rmt:get-count-tests-running-for-testname run-id testname) (rmt:send-receive 'get-count-tests-running-for-testname run-id (list run-id testname))) (define (rmt:get-count-tests-running-in-jobgroup run-id jobgroup) Index: runs.scm ================================================================== --- runs.scm +++ runs.scm @@ -519,11 +519,11 @@ 10) ;; obviously haven't had any work to do for a while (else ;; if have a number for inter-test-delay, use it, else don't delay much, maybe even zero? (configf:lookup-number *configdat* "setup" "inter-test-delay" default: 0.01)))) - (let* ((num-running (rmt:get-count-tests-running run-id)) + (let* ((num-running (rmt:get-count-tests-running run-id #f)) ;; fastmode=no (num-running-in-jobgroup (rmt:get-count-tests-running-in-jobgroup run-id jobgroup)) (job-group-limit (let ((jobg-count (configf:lookup *configdat* "jobgroups" jobgroup))) (if (string? jobg-count) (string->number jobg-count) jobg-count)))) @@ -1744,11 +1744,11 @@ extras) '()))) (waitons (delete-duplicates (append (tests:testqueue-get-waitons test-record) extra-waits) equal?)) (newtal (append tal (list hed))) (regfull (>= (length reg) reglen)) - (num-running (rmt:get-count-tests-running-for-run-id run-id)) + (num-running (rmt:get-count-tests-running-for-run-id run-id #t)) ;; fastmode=yes (testdat (make-runs:testdat hed: hed tal: tal reg: reg reruns: reruns @@ -1964,29 +1964,31 @@ (rmt:set-var (conc "lunch-complete-" run-id) "yes") ;; now *if* -run-wait we wait for all tests to be done ;; Now wait for any RUNNING tests to complete (if in run-wait mode) (thread-sleep! 10) ;; I think there is a race condition here. Let states/statuses settle - (let wait-loop ((num-running (rmt:get-count-tests-running-for-run-id run-id)) + (let wait-loop ((num-running (rmt:get-count-tests-running-for-run-id run-id #t)) ;; fastmode=yes (prev-num-running 0)) ;; (debug:print-info 13 *default-log-port* "num-running=" num-running ", prev-num-running=" prev-num-running) (if (and (or (args:get-arg "-run-wait") (equal? (configf:lookup *configdat* "setup" "run-wait") "yes")) (> num-running 0)) (begin ;; Here we mark any old defunct tests as incomplete. Do this every fifteen minutes ;; (debug:print 0 *default-log-port* "Got here eh! num-running=" num-running " (> num-running 0) " (> num-running 0)) (if (> (current-seconds)(+ last-time-incomplete 900)) - (begin - (debug:print-info 0 *default-log-port* "Marking stuck tests as INCOMPLETE while waiting for run " run-id ". Running as pid " (current-process-id) " on " (get-host-name)) - (set! last-time-incomplete (current-seconds)) - (rmt:find-and-mark-incomplete run-id #f))) - (if (not (eq? num-running prev-num-running)) - (debug:print-info 0 *default-log-port* "run-wait specified, waiting on " num-running " tests in RUNNING, REMOTEHOSTSTART or LAUNCHED state at " (time->string (seconds->local-time (current-seconds))))) + (let ((actual-num-running (rmt:get-count-tests-running-for-run-id run-id #f))) ;; fastmode=no + (debug:print-info 0 *default-log-port* "Marking stuck tests as INCOMPLETE while waiting for run " run-id + ". Running as pid " (current-process-id) " on " (get-host-name)) + (set! last-time-incomplete (current-seconds)) ;; FIXME, this might be causing slow down - use of set! + (rmt:find-and-mark-incomplete run-id #f) + (debug:print-info 0 *default-log-port* "run-wait specified, waiting on " actual-num-running + " tests in RUNNING, REMOTEHOSTSTART or LAUNCHED state at " + (time->string (seconds->local-time (current-seconds)))))) (thread-sleep! 5) - ;; (wait-loop (rmt:get-count-tests-running-for-run-id run-id) num-running)))) - (wait-loop (rmt:get-count-tests-running-for-run-id run-id) num-running)))) + (wait-loop (rmt:get-count-tests-running-for-run-id run-id #t) ;; fastmode=yes + num-running)))) ;; LET* ((test-record ;; we get here on "drop through". All done! ;; this is moved to runs:run-testes since this function is getting called twice to ensure everthing is completed. ;; (debug:print-info 0 *default-log-port* "Calling Post Hook") ;; (runs:run-post-hook run-id)