Index: dashboard.scm ================================================================== --- dashboard.scm +++ dashboard.scm @@ -168,10 +168,12 @@ ;; updaters: (make-hash-table) ;; updating: #f ;; hide-not-hide-tabs: #f ;; target: "" ;; )) + +(set! *journal-stats-enable* #f) ;;====================================================================== ;; buttons color using image ;;====================================================================== Index: rmt.scm ================================================================== --- rmt.scm +++ rmt.scm @@ -102,12 +102,12 @@ (tt:start-stats dbdir)) ;; fixme - find the right call to get the db directory ;; check the load on dbfname and add some delay using a droop curve of sorts (if *journal-stats* (let* ((load (tt:get-journal-stats dbfname))) - (if (> load 0) - (let ((dely (* 10 load))) + (if (> load 0.1) ;; start activating delay at 10% journal load time + (let ((dely (* 50 (* load load)))) ;; 100% journal time=50sec delay (debug:print 0 *default-log-port* "Journal load "load" delaying queries "dely"s.") (thread-sleep! dely))))) (case (rmt:transport-mode) ((tcp) Index: runs.scm ================================================================== --- runs.scm +++ runs.scm @@ -1154,14 +1154,16 @@ (dbfile:run-id->dbnum run-id) ".db")) (load (tt:get-journal-stats dbfname))) (if (> load 0.1) ;; dbs too busy to start more tests (begin - (debug:print-info 0 *default-log-port* "Gating launch due to db load "load" based on journal file observations for "dbfname) + (debug:print-info 0 *default-log-port* "Gating launch due to db load "load" based on journal file observations for "dbfname) #f) #t)) - #t) ;; if journal monitoring not started do not gate + (begin + (debug:print-info 0 *default-log-port* "Journal gating not started for "run-id) + #t)) ;; if journal monitoring not started do not gate (car run-limits-info))) (num-running (list-ref run-limits-info 1)) (num-running-in-jobgroup(list-ref run-limits-info 2)) (max-concurrent-jobs (list-ref run-limits-info 3)) (job-group-limit (list-ref run-limits-info 4)) @@ -1388,11 +1390,11 @@ #f ;; yes, really (list (car tal)(cdr tal) reg reruns)) (begin (if (runs:lownoise (conc "FAILED prerequisites or other issue" hed) 60) (debug:print 0 *default-log-port* "WARNING: test " hed " has FAILED prerequisites or other issue. Internal state >" nth-try "< will be overridden and we'll retry.")) - (let* ((test-id (rmt:get-test-id run-id testname item-path)) + (let* ((test-id (rmt:get-test-id run-id hed item-path)) (test-info (rmt:get-testinfo-state-status run-id test-id)) ;; we need *current* info (status (db:test-status test-info))) (if (equal? status "KEEP_TRYING") (mt:test-set-state-status-by-testname-unless-completed run-id test-name item-path "COMPLETED" "PREQ_FAIL" #f) (mt:test-set-state-status-by-testname run-id test-name item-path "NOT_STARTED" "KEEP_TRYING" #f))) Index: tcp-transportmod.scm ================================================================== --- tcp-transportmod.scm +++ tcp-transportmod.scm @@ -1141,51 +1141,53 @@ (jcount (make-hash-table)) ;; 1.db => journal_count ) ;; timeblk => jstats (define *journal-stats* #f) ;; (make-hash-table)) +(define *journal-stats-enable* #t) ;; change to #f to turn off ;; monte-carlo-esque random sampling of journal files ;; for all the files: ;; if .journal ;; update stats +1 +1 ;; update stats +1 0 ;; (define (tt:write-load-tracking dbdir) - (let* ((cs (current-seconds)) - (key (inexact->exact (quotient cs 10))) - (old (- key 5)) ;; 4 x 10 seconds ago - (jstat (if (hash-table-exists? *journal-stats* key) - (hash-table-ref *journal-stats* key ) - (let ((new (make-jstats))) - (hash-table-set! *journal-stats* key new) - new)))) - ;; clear out old records - (for-each - (lambda (key) - (if (< key old) - (hash-table-delete! *journal-stats* key))) - (hash-table-keys *journal-stats*)) - - ;; increment our count of observations - (jstats-count-set! jstat (+ (jstats-count jstat) 1)) - - ;; now find and increment journal file counts - (directory-fold - (lambda (fname res) - ;; is it a journal file? - (let ((parts (string-match "^(.*\\.db)-journal.*" fname))) - (match parts - ((_ dbfname) - (hash-table-set! (jstats-jcount jstat) dbfname - (+ (hash-table-ref/default (jstats-jcount jstat) dbfname 0) 1.0) - )) - (else #f) - ))) - '() - dbdir - ))) + (if *journal-stats-enable* + (let* ((cs (current-seconds)) + (key (inexact->exact (quotient cs 10))) + (old (- key 5)) ;; 4 x 10 seconds ago + (jstat (if (hash-table-exists? *journal-stats* key) + (hash-table-ref *journal-stats* key ) + (let ((new (make-jstats))) + (hash-table-set! *journal-stats* key new) + new)))) + ;; clear out old records + (for-each + (lambda (key) + (if (< key old) + (hash-table-delete! *journal-stats* key))) + (hash-table-keys *journal-stats*)) + + ;; increment our count of observations + (jstats-count-set! jstat (+ (jstats-count jstat) 1)) + + ;; now find and increment journal file counts + (directory-fold + (lambda (fname res) + ;; is it a journal file? + (let ((parts (string-match "^(.*\\.db)-journal.*" fname))) + (match parts + ((_ dbfname) + (hash-table-set! (jstats-jcount jstat) dbfname + (+ (hash-table-ref/default (jstats-jcount jstat) dbfname 0) 1.0) + )) + (else #f) + ))) + '() + dbdir + )))) (define *journal-stats-mutex* (make-mutex)) (define (tt:journal-stats-run dbdir) (if (not *journal-stats*)(set! *journal-stats* (make-hash-table))) @@ -1204,11 +1206,12 @@ (lambda ()(tt:journal-stats-run dbdir)) "Journal stats collection thread"))) (define (tt:get-journal-stats #!optional (dbfname #f)) (let* ((result (make-jstats)) (hitcounts (jstats-jcount result))) - (if *journal-stats* + (if (and *journal-stats* + *journal-stats-enable*) (begin (mutex-lock! *journal-stats-mutex*) (hash-table-for-each *journal-stats* (lambda (k v) ;; key jstats @@ -1224,11 +1227,11 @@ (+ hit-count (hash-table-ref/default hitcounts dbfname 0)))))))) (mutex-unlock! *journal-stats-mutex*)) (debug:print 0 *default-log-port* "INFO: *journal-stats* not set.")) ;; convert to normalized alist - (let* ((tot (min (jstats-count result) 1)) ;; avoid divide by zero + (let* ((tot (max (jstats-count result) 1)) ;; avoid divide by zero (hits (jstats-jcount result)) ;; 1.db => count (res (hash-table-map hits (lambda (fname hitcount) (cons fname (/ hitcount tot))))))