Overview
Comment: | added qif |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | v1.80 |
Files: | files | file ages | folders |
SHA1: |
7e09817061438a0bb50e84485d1d728d |
User & Date: | matt on 2023-02-01 23:41:12 |
Other Links: | branch diff | manifest | tags |
Context
2023-02-02
| ||
06:39 | improved droop curved for queries in flight (qif). check-in: d1dc75ffab user: matt tags: v1.80 | |
2023-02-01
| ||
23:41 | added qif check-in: 7e09817061 user: matt tags: v1.80 | |
20:38 | Convert one statement to prepared, test and do more check-in: 87e172c693 user: matt tags: v1.80 | |
Changes
Modified api.scm from [1fa92fd71b] to [9f9940bc49].
︙ | ︙ | |||
187 188 189 190 191 192 193 | newmutex))) (start-t (current-milliseconds)) (readonly-mode (dbr:dbstruct-read-only dbstruct)) (readonly-command (member cmd api:read-only-queries)) (writecmd-in-readonly-mode (and readonly-mode (not readonly-command)))) (if (not readonly-command) (mutex-lock! write-mutex)) | > > | > | 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 | newmutex))) (start-t (current-milliseconds)) (readonly-mode (dbr:dbstruct-read-only dbstruct)) (readonly-command (member cmd api:read-only-queries)) (writecmd-in-readonly-mode (and readonly-mode (not readonly-command)))) (if (not readonly-command) (mutex-lock! write-mutex)) (let* ((tmppath (dbr:dbstruct-tmppath dbstruct)) (crumbfile (dbfile:wait-for-qif tmppath run-id (cons cmd params))) (res (if writecmd-in-readonly-mode (conc "attempt to run write command "cmd" on a read-only database") (api:dispatch-request dbstruct cmd run-id params)))) (delete-file* crumbfile) (if (not readonly-command) (mutex-unlock! write-mutex)) ;; save all stats (let ((delta-t (- (current-milliseconds) start-t)) (modified-cmd (if (eq? cmd 'general-call) |
︙ | ︙ |
Modified db.scm from [cfe2b3bd89] to [93e4d59dca].
︙ | ︙ | |||
1006 1007 1008 1009 1010 1011 1012 | ;; D B U T I L S ;;====================================================================== ;;====================================================================== ;; M A I N T E N A N C E ;;====================================================================== | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 | ;; D B U T I L S ;;====================================================================== ;;====================================================================== ;; M A I N T E N A N C E ;;====================================================================== ;; (define (db:have-incompletes? dbstruct run-id ovr-deadtime) ;; (let* ((incompleted '()) ;; (oldlaunched '()) ;; (toplevels '()) ;; (deadtime-str (configf:lookup *configdat* "setup" "deadtime")) ;; (deadtime (if (and deadtime-str ;; (string->number deadtime-str)) ;; (string->number deadtime-str) ;; 72000))) ;; twenty hours ;; (db:with-db ;; dbstruct run-id #f ;; (lambda (dbdat db) ;; (if (number? ovr-deadtime)(set! deadtime ovr-deadtime)) ;; ;; ;; in RUNNING or REMOTEHOSTSTART for more than 10 minutes ;; ;; ;; ;; HOWEVER: this code in run:test seems to work fine ;; ;; (> (- (current-seconds)(+ (db:test-get-event_time testdat) ;; ;; (db:test-get-run_duration testdat))) ;; ;; 600) ;; ;; (db:delay-if-busy dbdat) ;; (sqlite3:for-each-row ;; (lambda (test-id run-dir uname testname item-path) ;; (if (and (equal? uname "n/a") ;; (equal? item-path "")) ;; this is a toplevel test ;; ;; what to do with toplevel? call rollup? ;; (begin ;; (set! toplevels (cons (list test-id run-dir uname testname item-path run-id) toplevels)) ;; (debug:print-info 0 *default-log-port* "Found old toplevel test in RUNNING state, test-id=" test-id)) ;; (set! incompleted (cons (list test-id run-dir uname testname item-path run-id) incompleted)))) ;; (db:get-cache-stmth dbdat db ;; "SELECT id,rundir,uname,testname,item_path FROM tests WHERE run_id=? AND (strftime('%s','now') - event_time) > (run_duration + ?) AND state IN ('RUNNING','REMOTEHOSTSTART');") ;; run-id deadtime) ;; ;; ;; in LAUNCHED for more than one day. Could be long due to job queues TODO/BUG: Need override for this in config ;; ;; ;; ;; (db:delay-if-busy dbdat) ;; (sqlite3:for-each-row ;; (lambda (test-id run-dir uname testname item-path) ;; (if (and (equal? uname "n/a") ;; (equal? item-path "")) ;; this is a toplevel test ;; ;; what to do with toplevel? call rollup? ;; (set! toplevels (cons (list test-id run-dir uname testname item-path run-id) toplevels)) ;; (set! oldlaunched (cons (list test-id run-dir uname testname item-path run-id) oldlaunched)))) ;; (db:get-cache-stmth dbdat db ;; "SELECT id,rundir,uname,testname,item_path FROM tests WHERE run_id=? AND (strftime('%s','now') - event_time) > 86400 AND state IN ('LAUNCHED');") ;; run-id) ;; ;; (debug:print-info 18 *default-log-port* "Found " (length oldlaunched) " old LAUNCHED items, " (length toplevels) " old LAUNCHED toplevel tests and " (length incompleted) " tests marked RUNNING but apparently dead.") ;; (if (and (null? incompleted) ;; (null? oldlaunched) ;; (null? toplevels)) ;; #f ;; #t))))) (define (db:get-status-from-final-status-file run-dir) (let ((infile (conc run-dir "/.final-status"))) ;; first verify we are able to write the output file (if (not (file-read-access? infile)) (begin (debug:print 2 *default-log-port* "ERROR: cannot read " infile) |
︙ | ︙ | |||
1102 1103 1104 1105 1106 1107 1108 | (debug:print-info 4 *default-log-port* "running-deadtime = " running-deadtime) (debug:print-info 4 *default-log-port* "deadtime-trim = " deadtime-trim) (db:with-db dbstruct run-id #f (lambda (dbdat db) (let* ((stmth1 (db:get-cache-stmth | | | | | 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 | (debug:print-info 4 *default-log-port* "running-deadtime = " running-deadtime) (debug:print-info 4 *default-log-port* "deadtime-trim = " deadtime-trim) (db:with-db dbstruct run-id #f (lambda (dbdat db) (let* ((stmth1 (db:get-cache-stmth dbdat db "SELECT id,rundir,uname,testname,item_path,event_time,run_duration FROM tests WHERE run_id=? AND (strftime('%s','now') - event_time) > (run_duration + ?) AND state IN ('RUNNING');")) (stmth2 (db:get-cache-stmth dbdat db "SELECT id,rundir,uname,testname,item_path,event_time,run_duration FROM tests WHERE run_id=? AND (strftime('%s','now') - event_time) > (run_duration + ?) AND state IN ('REMOTEHOSTSTART');")) (stmth3 (db:get-cache-stmth dbdat db "SELECT id,rundir,uname,testname,item_path FROM tests WHERE run_id=? AND (strftime('%s','now') - event_time) > 86400 AND state IN ('LAUNCHED');"))) ;; in RUNNING or REMOTEHOSTSTART for more than 10 minutes ;; ;; HOWEVER: this code in run:test seems to work fine ;; (> (- (current-seconds)(+ (db:test-get-event_time testdat) |
︙ | ︙ | |||
1381 1382 1383 1384 1385 1386 1387 | ;; (begin ;; (debug:print-info 4 *default-log-port* "launch throttle factor=" *global-delta*) ;; (set! *last-global-delta-printed* *global-delta*))) (define (db:set-var dbstruct var val) (db:with-db dbstruct #f #t (lambda (dbdat db) | | | | | 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 | ;; (begin ;; (debug:print-info 4 *default-log-port* "launch throttle factor=" *global-delta*) ;; (set! *last-global-delta-printed* *global-delta*))) (define (db:set-var dbstruct var val) (db:with-db dbstruct #f #t (lambda (dbdat db) (sqlite3:execute (db:get-cache-stmth dbdat db "INSERT OR REPLACE INTO metadat (var,val) VALUES (?,?);") var val)))) (define (db:add-var dbstruct var val) (db:with-db dbstruct #f #t (lambda (dbdat db) (sqlite3:execute (db:get-cache-stmth dbdat db "UPDATE metadat SET val=val+? WHERE var=?;") val var)))) (define (db:del-var dbstruct var) (db:with-db dbstruct #f #t (lambda (dbdat db) (sqlite3:execute (db:get-cache-stmth dbdat db "DELETE FROM metadat WHERE var=?;") var)))) ;;====================================================================== ;; no-sync.db - small bits of data to be shared between servers ;;====================================================================== (define (db:no-sync-db db-in) (if db-in |
︙ | ︙ | |||
2449 2450 2451 2452 2453 2454 2455 | ;; "SELECT count(id) FROM tests WHERE state in ('RUNNING','LAUNCHED','REMOTEHOSTSTART') AND NOT (uname = 'n/a' AND item_path = '') LIMIT 1;" "SELECT count(id) FROM tests WHERE state in ('RUNNING','LAUNCHED','REMOTEHOSTSTART') AND NOT (uname = 'n/a' AND item_path = '');")) ;; ) (db:with-db dbstruct run-id #f (lambda (dbdat db) | | | 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 | ;; "SELECT count(id) FROM tests WHERE state in ('RUNNING','LAUNCHED','REMOTEHOSTSTART') AND NOT (uname = 'n/a' AND item_path = '') LIMIT 1;" "SELECT count(id) FROM tests WHERE state in ('RUNNING','LAUNCHED','REMOTEHOSTSTART') AND NOT (uname = 'n/a' AND item_path = '');")) ;; ) (db:with-db dbstruct run-id #f (lambda (dbdat db) (let* ((stmth (db:get-cache-stmth dbdat db qry))) (sqlite3:first-result stmth)))))) ;; NEW BEHAVIOR: Count tests running in only one run! ;; (define (db:get-count-tests-actually-running dbstruct run-id) (db:with-db dbstruct |
︙ | ︙ | |||
2479 2480 2481 2482 2483 2484 2485 | ;; "SELECT count(id) FROM tests WHERE state in ('RUNNING','LAUNCHED','REMOTEHOSTSTART') AND run_id=? LIMIT 1;" "SELECT count(id) FROM tests WHERE state in ('RUNNING','LAUNCHED','REMOTEHOSTSTART') AND run_id=?;")) ;; ) (db:with-db dbstruct run-id #f (lambda (dbdat db) | | | | 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 | ;; "SELECT count(id) FROM tests WHERE state in ('RUNNING','LAUNCHED','REMOTEHOSTSTART') AND run_id=? LIMIT 1;" "SELECT count(id) FROM tests WHERE state in ('RUNNING','LAUNCHED','REMOTEHOSTSTART') AND run_id=?;")) ;; ) (db:with-db dbstruct run-id #f (lambda (dbdat db) (let* ((stmth (db:get-cache-stmth dbdat db qry))) (sqlite3:first-result stmth run-id)))))) ;; For a given testname how many items are running? Used to determine ;; probability for regenerating html ;; (define (db:get-count-tests-running-for-testname dbstruct run-id testname) (db:with-db dbstruct run-id #f (lambda (dbdat db) (let* ((stmt "SELECT count(id) FROM tests WHERE state in ('RUNNING','LAUNCHED','REMOTEHOSTSTART') AND run_id=? AND NOT (uname = 'n/a' AND item_path = '') AND testname=?;") (stmth (db:get-cache-stmth dbdat db stmt))) (sqlite3:first-result stmth run-id testname))))) (define (db:get-not-completed-cnt dbstruct run-id) (db:with-db dbstruct run-id |
︙ | ︙ | |||
2866 2867 2868 2869 2870 2871 2872 | (define (db:get-data-info-by-id dbstruct run-id test-data-id) (let* ((stmt "SELECT id,test_id, category, variable, value, expected, tol, units, comment, status, type, last_update FROM test_data WHERE id=? ORDER BY id ASC;")) ;; event_time DESC,id ASC; (db:with-db dbstruct run-id #f (lambda (dbdat db) | | | 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 | (define (db:get-data-info-by-id dbstruct run-id test-data-id) (let* ((stmt "SELECT id,test_id, category, variable, value, expected, tol, units, comment, status, type, last_update FROM test_data WHERE id=? ORDER BY id ASC;")) ;; event_time DESC,id ASC; (db:with-db dbstruct run-id #f (lambda (dbdat db) (let* ((stmth (db:get-cache-stmth dbdat db stmt)) (res (sqlite3:fold-row (lambda (res id test-id category variable value expected tol units comment status type last-update) (vector id test-id category variable value expected tol units comment status type last-update)) (vector #f #f #f #f #f #f #f #f #f #f #f #f) stmth test-data-id))) res))))) |
︙ | ︙ |
Modified dbfile.scm from [ddd005b3a5] to [94b42101fa].
︙ | ︙ | |||
1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 | (let* ((dbdat (dbfile:open-db dbstruct run-id dbinit))) (set! *db-write-access* (not (dbr:dbdat-read-only dbdat))) ;; (mutex-unlock! *db-open-mutex*) dbdat)) (define dbfile:db-init-proc (make-parameter #f)) ;; (db:with-db dbstruct run-id sqlite3:exec "select blah fgrom blaz;") ;; r/w is a flag to indicate if the db is modified by this query #t = yes, #f = no ;; (define (db:with-db dbstruct run-id r/w proc . params) (assert dbstruct "FATAL: db:with-db called with dbstruct "#f) (assert (dbr:dbstruct? dbstruct) "FATAL: dbstruct is "dbstruct) (let* ((use-mutex (> *api-process-request-count* 25)) ;; risk of db corruption (have-struct (dbr:dbstruct? dbstruct)) (dbdat (if have-struct ;; this stuff just allows us to call with a db handle directly (db:open-db dbstruct run-id (dbfile:db-init-proc)) ;; (dbfile:get-subdb dbstruct run-id) #f)) (db (if have-struct ;; this stuff just allows us to call with a db handle directly (dbr:dbdat-dbh dbdat) dbstruct)) (fname (if dbdat (dbr:dbdat-dbfile dbdat) "nofilenameavailable")) | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | < | < < > | 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 | (let* ((dbdat (dbfile:open-db dbstruct run-id dbinit))) (set! *db-write-access* (not (dbr:dbdat-read-only dbdat))) ;; (mutex-unlock! *db-open-mutex*) dbdat)) (define dbfile:db-init-proc (make-parameter #f)) ;; create a dropping near the db file in a qif dir ;; use count of such files to gate queries (queries in flight) ;; (define (dbfile:wait-for-qif fname run-id params) (let* ((thedir (pathname-directory fname)) (destdir (conc thedir"/qif")) (uniqn (get-area-path-signature (conc run-id params))) (crumbn (conc destdir"/"(current-seconds)"-"uniqn"."(current-process-id)))) (if (not (file-exists? destdir))(create-directory destdir #t)) (let loop ((count 0)) (let* ((currlks (glob (conc destdir"/*"))) (numqrys (length currlks)) (delayval (cond ((> numqrys 50) (if (> numqrys 50) (for-each (lambda (f) (if (> (- (current-seconds) (file-modification-time f)) 10) (begin (dbfile:print-err "Removing qif file "f" older than 10 seconds") (delete-file* f)))) currlks)) 1) ((> numqrys 25) 0.25) ((> numqrys 10) 0.1) (else #f)))) (if (and delayval (< count 5)) (begin (thread-sleep! delayval) (loop (+ count 1)))))) (with-output-to-file crumbn (lambda () (print fname" "run-id" "params))) crumbn)) ;; (db:with-db dbstruct run-id sqlite3:exec "select blah fgrom blaz;") ;; r/w is a flag to indicate if the db is modified by this query #t = yes, #f = no ;; (define (db:with-db dbstruct run-id r/w proc . params) (assert dbstruct "FATAL: db:with-db called with dbstruct "#f) (assert (dbr:dbstruct? dbstruct) "FATAL: dbstruct is "dbstruct) (let* ((use-mutex (> *api-process-request-count* 25)) ;; risk of db corruption (have-struct (dbr:dbstruct? dbstruct)) (dbdat (if have-struct ;; this stuff just allows us to call with a db handle directly (db:open-db dbstruct run-id (dbfile:db-init-proc)) ;; (dbfile:get-subdb dbstruct run-id) #f)) (db (if have-struct ;; this stuff just allows us to call with a db handle directly (dbr:dbdat-dbh dbdat) dbstruct)) (fname (if dbdat (dbr:dbdat-dbfile dbdat) "nofilenameavailable")) (jfile (conc fname"-journal"))) ;; (crumbfile (dbfile:wait-for-qif fname run-id params))) (assert (sqlite3:database? db) "FATAL: db:with-db, db is not a database, db="db", fname="fname) (if (file-exists? jfile) (begin (dbfile:print-err "INFO: "jfile" exists, delaying to reduce database load") (thread-sleep! 0.2))) (if (and use-mutex (common:low-noise-print 120 "over-50-parallel-api-requests")) (dbfile:print-err *api-process-request-count* " parallel api requests being processed in process " (current-process-id))) ;; ", throttling access")) (condition-case (begin (if use-mutex (mutex-lock! *db-with-db-mutex*)) (let ((res (apply proc dbdat db params))) ;; the actual call is here. (if use-mutex (mutex-unlock! *db-with-db-mutex*)) ;; (if (vector? dbstruct)(db:done-with dbstruct run-id r/w)) (if dbdat (dbfile:add-dbdat dbstruct run-id dbdat)) ;; (delete-file* crumbfile) res)) (exn (io-error) (db:generic-error-printout exn "ERROR: i/o error with " fname ". Check permissions, disk space etc. and try again.")) (exn (corrupt) (db:generic-error-printout exn "ERROR: database " fname " is corrupt. Repair it to proceed.")) (exn (busy) (db:generic-error-printout exn "ERROR: database " fname |
︙ | ︙ | |||
1183 1184 1185 1186 1187 1188 1189 | (let ((gotlock (dbfile:simple-file-lock-and-wait fname expire-time: expire-time))) (if gotlock (let ((res (proc))) (dbfile:simple-file-release-lock fname) res) (assert #t "FATAL: simple file lock never got a lock.")))) | | > > > > > > > > > | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 | (let ((gotlock (dbfile:simple-file-lock-and-wait fname expire-time: expire-time))) (if gotlock (let ((res (proc))) (dbfile:simple-file-release-lock fname) res) (assert #t "FATAL: simple file lock never got a lock.")))) (define (db:get-cache-stmth dbdat db stmt) (let* (;; (dbdat (dbfile:get-dbdat dbstruct run-id)) (stmt-cache (dbr:dbdat-stmt-cache dbdat)) ;; (stmth (db:hoh-get stmt-cache db stmt)) (stmth (hash-table-ref/default stmt-cache stmt #f))) (or stmth (let* ((newstmth (sqlite3:prepare db stmt))) ;; (db:hoh-set! stmt-cache db stmt newstmth) (hash-table-set! stmt-cache stmt newstmth) newstmth)))) (define (db:have-incompletes? dbstruct run-id ovr-deadtime) (let* ((incompleted '()) (oldlaunched '()) (toplevels '()) ;; (deadtime-str (configf:lookup *configdat* "setup" "deadtime")) (deadtime (or ovr-deadtime 72000))) ;; twenty hours (db:with-db dbstruct run-id #f (lambda (dbdat db) ;; in RUNNING or REMOTEHOSTSTART for more than 10 minutes ;; ;; HOWEVER: this code in run:test seems to work fine ;; (> (- (current-seconds)(+ (db:test-get-event_time testdat) ;; (db:test-get-run_duration testdat))) ;; 600) ;; (db:delay-if-busy dbdat) (sqlite3:for-each-row (lambda (test-id run-dir uname testname item-path) (if (and (equal? uname "n/a") (equal? item-path "")) ;; this is a toplevel test ;; what to do with toplevel? call rollup? (begin (set! toplevels (cons (list test-id run-dir uname testname item-path run-id) toplevels))) ;; (print-info "Found old toplevel test in RUNNING state, test-id=" test-id)) (set! incompleted (cons (list test-id run-dir uname testname item-path run-id) incompleted)))) (db:get-cache-stmth dbdat db "SELECT id,rundir,uname,testname,item_path FROM tests WHERE run_id=? AND (strftime('%s','now') - event_time) > (run_duration + ?) AND state IN ('RUNNING','REMOTEHOSTSTART');") run-id deadtime) ;; in LAUNCHED for more than one day. Could be long due to job queues TODO/BUG: Need override for this in config ;; ;; (db:delay-if-busy dbdat) (sqlite3:for-each-row (lambda (test-id run-dir uname testname item-path) (if (and (equal? uname "n/a") (equal? item-path "")) ;; this is a toplevel test ;; what to do with toplevel? call rollup? (set! toplevels (cons (list test-id run-dir uname testname item-path run-id) toplevels)) (set! oldlaunched (cons (list test-id run-dir uname testname item-path run-id) oldlaunched)))) (db:get-cache-stmth dbdat db "SELECT id,rundir,uname,testname,item_path FROM tests WHERE run_id=? AND (strftime('%s','now') - event_time) > 86400 AND state IN ('LAUNCHED');") run-id) ;; (print-info "Found " (length oldlaunched) " old LAUNCHED items, " (length toplevels) " old LAUNCHED toplevel tests and " (length incompleted) " tests marked RUNNING but apparently dead.") (if (and (null? incompleted) (null? oldlaunched) (null? toplevels)) #f #t))))) ) |