Overview
Comment: | Turn off throttling in dashboard. Tweaked values and got proper output from the journal sensor |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | v1.81-journal-based-throttling |
Files: | files | file ages | folders |
SHA1: |
b1c53d218a69287cda4a3271be31a88b |
User & Date: | mrwellan on 2024-07-11 11:06:09 |
Other Links: | branch diff | manifest | tags |
Context
2024-07-11
| ||
13:28 | Added setcicd script for registering branches ready to merge check-in: 3ced0f4705 user: mrwellan tags: v1.81-journal-based-throttling | |
11:06 | Turn off throttling in dashboard. Tweaked values and got proper output from the journal sensor check-in: b1c53d218a user: mrwellan tags: v1.81-journal-based-throttling | |
06:06 | Merged in latest from v1.81 check-in: afc4721a06 user: mrwellan tags: v1.81-journal-based-throttling | |
Changes
Modified dashboard.scm from [e5b27b795a] to [89bc431aeb].
︙ | ︙ | |||
166 167 168 169 170 171 172 173 174 175 176 177 178 179 | ;; please-update: #t ;; update-mutex: (make-mutex) ;; updaters: (make-hash-table) ;; updating: #f ;; hide-not-hide-tabs: #f ;; target: "" ;; )) ;;====================================================================== ;; buttons color using image ;;====================================================================== (define *images* (make-hash-table)) | > > | 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 | ;; please-update: #t ;; update-mutex: (make-mutex) ;; updaters: (make-hash-table) ;; updating: #f ;; hide-not-hide-tabs: #f ;; target: "" ;; )) (set! *journal-stats-enable* #f) ;;====================================================================== ;; buttons color using image ;;====================================================================== (define *images* (make-hash-table)) |
︙ | ︙ |
Modified rmt.scm from [07ade4ba5d] to [e8bc4b391f].
︙ | ︙ | |||
100 101 102 103 104 105 106 | (if (and (not *journal-stats*) (file-exists? dbdir)) (tt:start-stats dbdir)) ;; fixme - find the right call to get the db directory ;; check the load on dbfname and add some delay using a droop curve of sorts (if *journal-stats* (let* ((load (tt:get-journal-stats dbfname))) | | | | 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 | (if (and (not *journal-stats*) (file-exists? dbdir)) (tt:start-stats dbdir)) ;; fixme - find the right call to get the db directory ;; check the load on dbfname and add some delay using a droop curve of sorts (if *journal-stats* (let* ((load (tt:get-journal-stats dbfname))) (if (> load 0.1) ;; start activating delay at 10% journal load time (let ((dely (* 50 (* load load)))) ;; 100% journal time=50sec delay (debug:print 0 *default-log-port* "Journal load "load" delaying queries "dely"s.") (thread-sleep! dely))))) (case (rmt:transport-mode) ((tcp) (let* ((start-time (current-seconds)) ;; snapshot time so all use cases get same value (attemptnum (+ 1 attemptnum)) |
︙ | ︙ |
Modified runs.scm from [832a86263e] to [adfae1025a].
︙ | ︙ | |||
1152 1153 1154 1155 1156 1157 1158 | (have-resources (and (if *journal-stats* (let* ((dbfname (conc (dbfile:run-id->dbnum run-id) ".db")) (load (tt:get-journal-stats dbfname))) (if (> load 0.1) ;; dbs too busy to start more tests (begin | | > > | | 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 | (have-resources (and (if *journal-stats* (let* ((dbfname (conc (dbfile:run-id->dbnum run-id) ".db")) (load (tt:get-journal-stats dbfname))) (if (> load 0.1) ;; dbs too busy to start more tests (begin (debug:print-info 0 *default-log-port* "Gating launch due to db load "load" based on journal file observations for "dbfname) #f) #t)) (begin (debug:print-info 0 *default-log-port* "Journal gating not started for "run-id) #t)) ;; if journal monitoring not started do not gate (car run-limits-info))) (num-running (list-ref run-limits-info 1)) (num-running-in-jobgroup(list-ref run-limits-info 2)) (max-concurrent-jobs (list-ref run-limits-info 3)) (job-group-limit (list-ref run-limits-info 4)) ;; (prereqs-not-met (rmt:get-prereqs-not-met run-id waitons hed item-path mode: testmode itemmaps: itemmaps)) ;; (prereqs-not-met (mt:lazy-get-prereqs-not-met run-id waitons item-path mode: testmode itemmap: itemmap)) |
︙ | ︙ | |||
1386 1387 1388 1389 1390 1391 1392 | (if (eq? nth-try 'removed) ;; removed is removed - drop it NOW (if (null? tal) #f ;; yes, really (list (car tal)(cdr tal) reg reruns)) (begin (if (runs:lownoise (conc "FAILED prerequisites or other issue" hed) 60) (debug:print 0 *default-log-port* "WARNING: test " hed " has FAILED prerequisites or other issue. Internal state >" nth-try "< will be overridden and we'll retry.")) | | | 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 | (if (eq? nth-try 'removed) ;; removed is removed - drop it NOW (if (null? tal) #f ;; yes, really (list (car tal)(cdr tal) reg reruns)) (begin (if (runs:lownoise (conc "FAILED prerequisites or other issue" hed) 60) (debug:print 0 *default-log-port* "WARNING: test " hed " has FAILED prerequisites or other issue. Internal state >" nth-try "< will be overridden and we'll retry.")) (let* ((test-id (rmt:get-test-id run-id hed item-path)) (test-info (rmt:get-testinfo-state-status run-id test-id)) ;; we need *current* info (status (db:test-status test-info))) (if (equal? status "KEEP_TRYING") (mt:test-set-state-status-by-testname-unless-completed run-id test-name item-path "COMPLETED" "PREQ_FAIL" #f) (mt:test-set-state-status-by-testname run-id test-name item-path "NOT_STARTED" "KEEP_TRYING" #f))) (hash-table-set! test-registry hed 'removed) ;; was 0 (if (not (and (null? reg) (null? tal))) |
︙ | ︙ |
Modified tcp-transportmod.scm from [172e93584b] to [d0258d10eb].
︙ | ︙ | |||
1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 | (defstruct jstats (count 0) (jcount (make-hash-table)) ;; 1.db => journal_count ) ;; timeblk => jstats (define *journal-stats* #f) ;; (make-hash-table)) ;; monte-carlo-esque random sampling of journal files ;; for all the files: ;; if .journal ;; update stats +1 +1 ;; update stats +1 0 ;; (define (tt:write-load-tracking dbdir) | > > | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 | (defstruct jstats (count 0) (jcount (make-hash-table)) ;; 1.db => journal_count ) ;; timeblk => jstats (define *journal-stats* #f) ;; (make-hash-table)) (define *journal-stats-enable* #t) ;; change to #f to turn off ;; monte-carlo-esque random sampling of journal files ;; for all the files: ;; if .journal ;; update stats +1 +1 ;; update stats +1 0 ;; (define (tt:write-load-tracking dbdir) (if *journal-stats-enable* (let* ((cs (current-seconds)) (key (inexact->exact (quotient cs 10))) (old (- key 5)) ;; 4 x 10 seconds ago (jstat (if (hash-table-exists? *journal-stats* key) (hash-table-ref *journal-stats* key ) (let ((new (make-jstats))) (hash-table-set! *journal-stats* key new) new)))) ;; clear out old records (for-each (lambda (key) (if (< key old) (hash-table-delete! *journal-stats* key))) (hash-table-keys *journal-stats*)) ;; increment our count of observations (jstats-count-set! jstat (+ (jstats-count jstat) 1)) ;; now find and increment journal file counts (directory-fold (lambda (fname res) ;; is it a journal file? (let ((parts (string-match "^(.*\\.db)-journal.*" fname))) (match parts ((_ dbfname) (hash-table-set! (jstats-jcount jstat) dbfname (+ (hash-table-ref/default (jstats-jcount jstat) dbfname 0) 1.0) )) (else #f) ))) '() dbdir )))) (define *journal-stats-mutex* (make-mutex)) (define (tt:journal-stats-run dbdir) (if (not *journal-stats*)(set! *journal-stats* (make-hash-table))) (let loop () (mutex-lock! *journal-stats-mutex*) |
︙ | ︙ | |||
1202 1203 1204 1205 1206 1207 1208 | (thread-start! (make-thread (lambda ()(tt:journal-stats-run dbdir)) "Journal stats collection thread"))) (define (tt:get-journal-stats #!optional (dbfname #f)) (let* ((result (make-jstats)) (hitcounts (jstats-jcount result))) | | > | | 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 | (thread-start! (make-thread (lambda ()(tt:journal-stats-run dbdir)) "Journal stats collection thread"))) (define (tt:get-journal-stats #!optional (dbfname #f)) (let* ((result (make-jstats)) (hitcounts (jstats-jcount result))) (if (and *journal-stats* *journal-stats-enable*) (begin (mutex-lock! *journal-stats-mutex*) (hash-table-for-each *journal-stats* (lambda (k v) ;; key jstats (let* ((count (jstats-count v)) (jcount (jstats-jcount v))) ;; dbfname => hit count (jstats-count-set! result (+ (jstats-count result) (jstats-count v))) (hash-table-for-each jcount (lambda (dbfname hit-count) (hash-table-set! hitcounts dbfname (+ hit-count (hash-table-ref/default hitcounts dbfname 0)))))))) (mutex-unlock! *journal-stats-mutex*)) (debug:print 0 *default-log-port* "INFO: *journal-stats* not set.")) ;; convert to normalized alist (let* ((tot (max (jstats-count result) 1)) ;; avoid divide by zero (hits (jstats-jcount result)) ;; 1.db => count (res (hash-table-map hits (lambda (fname hitcount) (cons fname (/ hitcount tot)))))) (if dbfname (or (alist-ref dbfname res equal?) 0) |
︙ | ︙ |