Overview
Comment: | Force values to be real in journal stats colletion. still broken though |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | v1.81-journal-based-throttling |
Files: | files | file ages | folders |
SHA1: |
c906466bb072b9a51bb8ae23bac47e31 |
User & Date: | matt on 2024-07-10 20:11:42 |
Other Links: | branch diff | manifest | tags |
Context
2024-07-11
| ||
06:06 | Merged in latest from v1.81 check-in: afc4721a06 user: mrwellan tags: v1.81-journal-based-throttling | |
2024-07-10
| ||
20:11 | Force values to be real in journal stats colletion. still broken though check-in: c906466bb0 user: matt tags: v1.81-journal-based-throttling | |
18:10 | Gate test launch based on journal load. Values from load calc seem wrong. Should be 0-1.0 but seeing integers 0, 1, 2 ... check-in: 2635b582e7 user: mrwellan tags: v1.81-journal-based-throttling | |
Changes
Modified rmt.scm from [a86edf18db] to [07ade4ba5d].
︙ | ︙ | |||
99 100 101 102 103 104 105 | (dbdir (conc areapath "/.mtdb"))) (if (and (not *journal-stats*) (file-exists? dbdir)) (tt:start-stats dbdir)) ;; fixme - find the right call to get the db directory ;; check the load on dbfname and add some delay using a droop curve of sorts (if *journal-stats* | | < | 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 | (dbdir (conc areapath "/.mtdb"))) (if (and (not *journal-stats*) (file-exists? dbdir)) (tt:start-stats dbdir)) ;; fixme - find the right call to get the db directory ;; check the load on dbfname and add some delay using a droop curve of sorts (if *journal-stats* (let* ((load (tt:get-journal-stats dbfname))) (if (> load 0) (let ((dely (* 10 load))) (debug:print 0 *default-log-port* "Journal load "load" delaying queries "dely"s.") (thread-sleep! dely))))) (case (rmt:transport-mode) ((tcp) |
︙ | ︙ |
Modified runs.scm from [dadc9aecb3] to [832a86263e].
︙ | ︙ | |||
1149 1150 1151 1152 1153 1154 1155 | (all-tests-registry (runs:dat-all-tests-registry runsdat)) (run-limits-info (runs:dat-can-run-more-tests runsdat)) ;; (runs:can-run-more-tests run-id jobgroup max-concurrent-jobs)) ;; look at the test jobgroup and tot jobs running (have-resources (and (if *journal-stats* (let* ((dbfname (conc (dbfile:run-id->dbnum run-id) ".db")) | | < | 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 | (all-tests-registry (runs:dat-all-tests-registry runsdat)) (run-limits-info (runs:dat-can-run-more-tests runsdat)) ;; (runs:can-run-more-tests run-id jobgroup max-concurrent-jobs)) ;; look at the test jobgroup and tot jobs running (have-resources (and (if *journal-stats* (let* ((dbfname (conc (dbfile:run-id->dbnum run-id) ".db")) (load (tt:get-journal-stats dbfname))) (if (> load 0.1) ;; dbs too busy to start more tests (begin (debug:print-info 0 *default-log-port* "Gating launch due to db load "load" based on journal file observations for "dbfname) #f) #t)) #t) ;; if journal monitoring not started do not gate (car run-limits-info))) |
︙ | ︙ |
Modified tcp-transportmod.scm from [8195ca9d01] to [c97bb376ec].
︙ | ︙ | |||
1173 1174 1175 1176 1177 1178 1179 | (directory-fold (lambda (fname res) ;; is it a journal file? (let ((parts (string-match "^(.*\\.db)-journal.*" fname))) (match parts ((_ dbfname) (hash-table-set! (jstats-jcount jstat) dbfname | | | 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 | (directory-fold (lambda (fname res) ;; is it a journal file? (let ((parts (string-match "^(.*\\.db)-journal.*" fname))) (match parts ((_ dbfname) (hash-table-set! (jstats-jcount jstat) dbfname (+ (hash-table-ref/default (jstats-jcount jstat) dbfname 0) 1.0) )) (else #f) ))) '() dbdir ))) |
︙ | ︙ | |||
1199 1200 1201 1202 1203 1204 1205 | ;; call this to start a thread that is keeping the journal-stats up to date. (define (tt:start-stats dbdir) (thread-start! (make-thread (lambda ()(tt:journal-stats-run dbdir)) "Journal stats collection thread"))) | | | 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 | ;; call this to start a thread that is keeping the journal-stats up to date. (define (tt:start-stats dbdir) (thread-start! (make-thread (lambda ()(tt:journal-stats-run dbdir)) "Journal stats collection thread"))) (define (tt:get-journal-stats #!optional (dbfname #f)) (let* ((result (make-jstats)) (hitcounts (jstats-jcount result))) (if *journal-stats* (begin (mutex-lock! *journal-stats-mutex*) (hash-table-for-each *journal-stats* |
︙ | ︙ | |||
1222 1223 1224 1225 1226 1227 1228 | (lambda (dbfname hit-count) (hash-table-set! hitcounts dbfname (+ hit-count (hash-table-ref/default hitcounts dbfname 0)))))))) (mutex-unlock! *journal-stats-mutex*)) (debug:print 0 *default-log-port* "INFO: *journal-stats* not set.")) ;; convert to normalized alist | | | | | | | > > > | 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 | (lambda (dbfname hit-count) (hash-table-set! hitcounts dbfname (+ hit-count (hash-table-ref/default hitcounts dbfname 0)))))))) (mutex-unlock! *journal-stats-mutex*)) (debug:print 0 *default-log-port* "INFO: *journal-stats* not set.")) ;; convert to normalized alist (let* ((tot (min (jstats-count result) 1)) ;; avoid divide by zero (hits (jstats-jcount result)) ;; 1.db => count (res (hash-table-map hits (lambda (fname hitcount) (cons fname (/ hitcount tot)))))) (if dbfname (or (alist-ref dbfname res equal?) 0) res)))) ;; megatest> (import tcp-transportmod) ;; megatest> (tt:write-load-tracking ".mtdb") ;; megatest> (hash-table-keys *journal-stats*) ;; (172060297) ;; megatest> (jstats->alist (hash-table-ref *journal-stats* 172060297)) ;; ((count . 1) (jcount . #<hash-table (1)>)) ;; megatest> (jstats-jcount (hash-table-ref *journal-stats* 172060297)) ;; #<hash-table (1)> ;; megatest> (hash-table->alist (jstats-jcount (hash-table-ref *journal-stats* 172060297))) ;; (("1.db" . 4)) ) |