Overview
Comment: | fixed obscure bug when db is slightly malformed due to ^C. Tweak server gating, it is still not quite right... |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | v1.65-cleanup |
Files: | files | file ages | folders |
SHA1: |
58bd90c5bc09ba63e6bda955e36fdc79 |
User & Date: | mrwellan on 2020-08-26 22:56:54 |
Other Links: | branch diff | manifest | tags |
Context
2020-10-04
| ||
22:21 | Trial 3. Backout server throttle code. Closed-Leaf check-in: 293027ea36 user: matt tags: v1.65-cleanup-try-3 | |
2020-08-26
| ||
23:04 | Tweaked server gate - still not right :( ==/FAIL/orion/== check-in: 7e26fb2f0c user: mrwellan tags: v1.65-cleanup | |
22:56 | fixed obscure bug when db is slightly malformed due to ^C. Tweak server gating, it is still not quite right... check-in: 58bd90c5bc user: mrwellan tags: v1.65-cleanup | |
21:14 | Added instrumentation for server start throttle. ==/3.18/0.6/PASS/1201/orion/== check-in: 56b3986bbb user: mrwellan tags: v1.65-cleanup | |
Changes
Modified db.scm from [f437d94aa0] to [2f649dc1fb].
︙ | ︙ | |||
457 458 459 460 461 462 463 | exn (begin (print "Attempt to safely close sqlite3 db failed. Trying again. exn=" exn) (thread-sleep! 3) (sqlite3:interrupt! db) (db:safely-close-sqlite3-db db stmtcache try-num: (- try-num 1))) (if (sqlite3:database? db) | | | 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 | exn (begin (print "Attempt to safely close sqlite3 db failed. Trying again. exn=" exn) (thread-sleep! 3) (sqlite3:interrupt! db) (db:safely-close-sqlite3-db db stmtcache try-num: (- try-num 1))) (if (sqlite3:database? db) (let* ((stmts (and stmt-cache (hash-table-ref/default stmt-cache db #f)))) (if stmts (map sqlite3:finalize! (hash-table-values stmts))) (sqlite3:finalize! db) #t) #f)))) ;; close all opened run-id dbs (define (db:close-all dbstruct) |
︙ | ︙ | |||
824 825 826 827 828 829 830 | (thread-sleep! delay-handicap) (debug:print-info 0 *default-log-port* "synthetic sync delay of "delay-handicap" seconds completed") ) ;; first pass implementation, just insert all changed rows (for-each (lambda (targdb) | | | | | | | > | 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 | (thread-sleep! delay-handicap) (debug:print-info 0 *default-log-port* "synthetic sync delay of "delay-handicap" seconds completed") ) ;; first pass implementation, just insert all changed rows (for-each (lambda (targdb) (let* ((db (db:dbdat-get-db targdb)) (drp-trigger (if (member "last_update" field-names) (db:drop-trigger db tablename) #f)) (is-trigger-dropped (if (member "last_update" field-names) (db:is-trigger-dropped db tablename) #f)) (stmth (sqlite3:prepare db full-ins))) ;; (db:delay-if-busy targdb) ;; NO WAITING (if (member "last_update" field-names) (debug:print-info 3 *default-log-port* "is-trigger-dropped: " is-trigger-dropped)) (for-each (lambda (fromdat-lst) (sqlite3:with-transaction |
︙ | ︙ | |||
1222 1223 1224 1225 1226 1227 1228 | (define db:trigger-list (list (list "update_runs_trigger" "CREATE TRIGGER IF NOT EXISTS update_runs_trigger AFTER UPDATE ON runs FOR EACH ROW BEGIN UPDATE runs SET last_update=(strftime('%s','now')) WHERE id=old.id; END;" ) | | | | | | | | | | > | | | | | | | | > | | | | | | > | | | | 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 | (define db:trigger-list (list (list "update_runs_trigger" "CREATE TRIGGER IF NOT EXISTS update_runs_trigger AFTER UPDATE ON runs FOR EACH ROW BEGIN UPDATE runs SET last_update=(strftime('%s','now')) WHERE id=old.id; END;" ) (list "update_run_stats_trigger" "CREATE TRIGGER IF NOT EXISTS update_run_stats_trigger AFTER UPDATE ON run_stats FOR EACH ROW BEGIN UPDATE run_stats SET last_update=(strftime('%s','now')) WHERE id=old.id; END;" ) (list "update_tests_trigger" "CREATE TRIGGER IF NOT EXISTS update_tests_trigger AFTER UPDATE ON tests FOR EACH ROW BEGIN UPDATE tests SET last_update=(strftime('%s','now')) WHERE id=old.id; END;" ) (list "update_teststeps_trigger" "CREATE TRIGGER IF NOT EXISTS update_teststeps_trigger AFTER UPDATE ON test_steps FOR EACH ROW BEGIN UPDATE test_steps SET last_update=(strftime('%s','now')) WHERE id=old.id; END;" ) (list "update_test_data_trigger" "CREATE TRIGGER IF NOT EXISTS update_test_data_trigger AFTER UPDATE ON test_data FOR EACH ROW BEGIN UPDATE test_data SET last_update=(strftime('%s','now')) WHERE id=old.id; END;" ))) (define (db:create-all-triggers dbstruct) (db:with-db dbstruct #f #f (lambda (db) (db:create-triggers db)))) (define (db:create-triggers db) (for-each (lambda (key) (sqlite3:execute db (cadr key))) db:trigger-list)) (define (db:drop-all-triggers dbstruct) (db:with-db dbstruct #f #f (lambda (db) (db:drop-triggers db)))) (define (db:is-trigger-dropped db tbl-name) (let* ((trigger-name (if (equal? tbl-name "test_steps") "update_teststeps_trigger" (conc "update_" tbl-name "_trigger"))) (res #f)) (sqlite3:for-each-row (lambda (name) (if (equal? name trigger-name) (set! res #t))) db "SELECT name FROM sqlite_master WHERE type = 'trigger' ;" ))) (define (db:drop-triggers db) (for-each (lambda (key) (sqlite3:execute db (conc "drop trigger if exists " (car key)))) db:trigger-list)) (define (db:drop-trigger db tbl-name) (let* ((trigger-name (if (equal? tbl-name "test_steps") "update_teststeps_trigger" (conc "update_" tbl-name "_trigger")))) (for-each (lambda (key) (if (equal? (car key) trigger-name) (sqlite3:execute db (conc "drop trigger if exists " trigger-name)))) db:trigger-list))) (define (db:create-trigger db tbl-name) (let* ((trigger-name (if (equal? tbl-name "test_steps") "update_teststeps_trigger" (conc "update_" tbl-name "_trigger")))) (for-each (lambda (key) (if (equal? (car key) trigger-name) |
︙ | ︙ | |||
2178 2179 2180 2181 2182 2183 2184 | (string->number res) #f))) (if newres newres res)) res))) | | | | > | | | 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 | (string->number res) #f))) (if newres newres res)) res))) (define (db:no-sync-close-db db stmt-cache) (db:safely-close-sqlite3-db db stmt-cache)) ;; transaction protected lock aquisition ;; either: ;; fails returns (#f . lock-creation-time) ;; succeeds (returns (#t . lock-creation-time) ;; use (db:no-sync-del! db keyname) to release the lock ;; (define (db:no-sync-get-lock db-in keyname) (let ((db (db:no-sync-db db-in))) (sqlite3:with-transaction db (lambda () (handle-exceptions exn (let ((lock-time (current-seconds))) (debug:print-info 2 *default-log-port* "db:no-sync-get-lock keyname=" keyname ", lock-time=" lock-time ", exn=" exn) (sqlite3:execute db "INSERT INTO no_sync_metadat (var,val) VALUES(?,?);" keyname lock-time) `(#t . ,lock-time)) `(#f . ,(sqlite3:first-result db "SELECT val FROM no_sync_metadat WHERE var=?;" keyname))))))) ;; use a global for some primitive caching, it is just silly to ;; re-read the db over and over again for the keys since they never ;; change |
︙ | ︙ | |||
2236 2237 2238 2239 2240 2241 2242 | (let loop ((hed (car header)) (tal (cdr header)) (n 0)) (if (equal? hed field) (handle-exceptions exn (begin | | > | 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 | (let loop ((hed (car header)) (tal (cdr header)) (n 0)) (if (equal? hed field) (handle-exceptions exn (begin (debug:print 0 *default-log-port* "WARNING: attempt to read non-existant field, row=" row " header=" header " field=" field ", exn=" exn) #f) (vector-ref row n)) (if (null? tal) #f (loop (car tal)(cdr tal)(+ n 1))))))) ;; Accessors for the header/data structure ;; get rows and header from (define (db:get-header vec)(vector-ref vec 0)) |
︙ | ︙ | |||
4477 4478 4479 4480 4481 4482 4483 | (if dbdat (let* ((dbpath (db:dbdat-get-path dbdat)) (db (db:dbdat-get-db dbdat)) ;; we'll return this so (db:delay--if-busy can be called inline (dbfj (conc dbpath "-journal"))) (if (handle-exceptions exn (begin | | | 4483 4484 4485 4486 4487 4488 4489 4490 4491 4492 4493 4494 4495 4496 4497 | (if dbdat (let* ((dbpath (db:dbdat-get-path dbdat)) (db (db:dbdat-get-db dbdat)) ;; we'll return this so (db:delay--if-busy can be called inline (dbfj (conc dbpath "-journal"))) (if (handle-exceptions exn (begin (debug:print-info 0 *default-log-port* "WARNING: failed to test for existance of " dbfj ", exn=" exn) (thread-sleep! 1) (db:delay-if-busy count (- count 1))) (common:file-exists? dbfj)) (case count ((6) (thread-sleep! 0.2) (db:delay-if-busy count: 5)) |
︙ | ︙ |
Modified server.scm from [1606531507] to [0ee248d5ed].
︙ | ︙ | |||
322 323 324 325 326 327 328 | ;; wait for server=start-last to be three seconds old ;; (define (server:wait-for-server-start-last-flag areapath) (let* ((start-flag (conc areapath "/logs/server-start-last"))) (if (file-exists? start-flag) (let* ((fmodtime (file-modification-time start-flag)) | | | | > > | | < < | < | > | 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 | ;; wait for server=start-last to be three seconds old ;; (define (server:wait-for-server-start-last-flag areapath) (let* ((start-flag (conc areapath "/logs/server-start-last"))) (if (file-exists? start-flag) (let* ((fmodtime (file-modification-time start-flag)) (reftime (+ 2 (random 3))) (delta (- (current-seconds) fmodtime)) (all-go (> delta reftime))) (if all-go #t ;; (system (conc "touch " start-flag)) ;; lazy but safe (begin (debug:print-info 0 *default-log-port* "Gating server start, last start: " fmodtime ", delta: " delta ", reftime: " reftime ", all-go=" all-go) (thread-sleep! 5) (server:wait-for-server-start-last-flag areapath)))) #;(system (conc "touch " start-flag))))) ;; kind start up of servers, wait 40 seconds before allowing another server for a given ;; run-id to be launched ;; (define (server:kind-run areapath) ;; look for $MT_RUN_AREA_HOME/logs/server-start-last ;; and wait for it to be at least 3 seconds old (server:wait-for-server-start-last-flag areapath) (if (not (server:check-if-running areapath)) ;; why try if there is already a server running? (let* ((last-run-dat (hash-table-ref/default *server-kind-run* areapath '(0 0))) ;; callnum, whenrun (call-num (car last-run-dat)) (when-run (cadr last-run-dat)) (run-delay (+ (case call-num ((0) 0) ((1) 20) ((2) 300) (else 600)) (random 5))) ;; add a small random number just in case a lot of jobs hit the work hosts simultaneously (lock-file (conc areapath "/logs/server-start.lock"))) (if (> (- (current-seconds) when-run) run-delay) (let* ((start-flag (conc areapath "/logs/server-start-last"))) (common:simple-file-lock-and-wait lock-file expire-time: 15) (system (conc "touch " start-flag)) ;; lazy but safe (server:run areapath) (thread-sleep! 2) ;; don't release the lock for at least a few seconds (common:simple-file-release-lock lock-file))) (hash-table-set! *server-kind-run* areapath (list (+ call-num 1)(current-seconds)))))) ;; this one seems to be the general entry point ;; |
︙ | ︙ |