Overview
Comment: | wip |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | v1.7001-multi-db-wip2 | v1.7001-multi-db-rb01 |
Files: | files | file ages | folders |
SHA1: |
bd65c1e66194dce5a99a9ca31567c196 |
User & Date: | matt on 2022-04-11 21:43:16 |
Other Links: | branch diff | manifest | tags |
Context
2022-04-12
| ||
07:15 | no-sync db and db open locking working. Closed-Leaf check-in: a82e2bb971 user: matt tags: v1.7001-multi-db-wip2, v1.7001-multi-db-rb01 | |
2022-04-11
| ||
21:43 | wip check-in: bd65c1e661 user: matt tags: v1.7001-multi-db-wip2, v1.7001-multi-db-rb01 | |
2022-04-07
| ||
07:04 | wip check-in: 5209afd099 user: matt tags: v1.7001-multi-db-rb01 | |
Changes
Modified api.scm from [fcdb3b5c5f] to [b65cdceb6b].
︙ | ︙ | |||
144 145 146 147 148 149 150 151 152 153 154 155 156 157 | ;; These are called by the server on recipt of /api calls ;; - keep it simple, only return the actual result of the call, i.e. no meta info here ;; ;; - returns #( flag result ) ;; (define (api:execute-requests dbstruct dat) (handle-exceptions exn (let ((call-chain (get-call-chain))) (debug:print 0 *default-log-port* "WARNING: api:execute-requests received an exception from peer, dat=" dat ", exn=" exn) (print-call-chain (current-error-port)) (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) (vector #f (vector exn call-chain dat))) ;; return some stuff for debug if an exception happens | > | 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 | ;; These are called by the server on recipt of /api calls ;; - keep it simple, only return the actual result of the call, i.e. no meta info here ;; ;; - returns #( flag result ) ;; (define (api:execute-requests dbstruct dat) (db:open-no-sync-db) ;; sets *no-sync-db* (handle-exceptions exn (let ((call-chain (get-call-chain))) (debug:print 0 *default-log-port* "WARNING: api:execute-requests received an exception from peer, dat=" dat ", exn=" exn) (print-call-chain (current-error-port)) (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) (vector #f (vector exn call-chain dat))) ;; return some stuff for debug if an exception happens |
︙ | ︙ |
Modified db.scm from [40daf428a9] to [5c6fcf9a46].
︙ | ︙ | |||
210 211 212 213 214 215 216 | (use-mutex (> *api-process-request-count* 25))) ;; was 25 (if (and use-mutex (common:low-noise-print 120 "over-50-parallel-api-requests")) (debug:print-info 0 *default-log-port* *api-process-request-count* " parallel api requests being processed in process " (current-process-id) ", throttling access")) (if (common:low-noise-print 600 (conc "parallel-api-requests" *max-api-process-requests*)) (debug:print-info 2 *default-log-port* "Parallel api request count: " *api-process-request-count* " max parallel requests: " *max-api-process-requests*)) (condition-case | | | | | | | | | | | | | | | | | | | | 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 | (use-mutex (> *api-process-request-count* 25))) ;; was 25 (if (and use-mutex (common:low-noise-print 120 "over-50-parallel-api-requests")) (debug:print-info 0 *default-log-port* *api-process-request-count* " parallel api requests being processed in process " (current-process-id) ", throttling access")) (if (common:low-noise-print 600 (conc "parallel-api-requests" *max-api-process-requests*)) (debug:print-info 2 *default-log-port* "Parallel api request count: " *api-process-request-count* " max parallel requests: " *max-api-process-requests*)) (condition-case (begin (if use-mutex (mutex-lock! *db-with-db-mutex*)) (let ((res (apply proc dbdat db params))) (if use-mutex (mutex-unlock! *db-with-db-mutex*)) ;; (if (vector? dbstruct)(db:done-with dbstruct run-id r/w)) (if dbdat (stack-push! (dbr:subdb-dbstack subdb) dbdat)) res)) (exn (io-error) (db:generic-error-printout exn "ERROR: i/o error with " fname ". Check permissions, disk space etc. and try again.")) (exn (corrupt) (db:generic-error-printout exn "ERROR: database " fname " is corrupt. Repair it to proceed.")) (exn (busy) (db:generic-error-printout exn "ERROR: database " fname " is locked. Try copying to another location, remove original and copy back.")) (exn (permission)(db:generic-error-printout exn "ERROR: database " fname " has some permissions problem.")) (exn () (db:generic-error-printout exn "ERROR: Unknown error with database " fname " message: " ((condition-property-accessor 'exn 'message) exn)))))) ;; NB// #f => return dbdir only ;; (was planned to be; zeroth db with name=main.db) ;; ;; If run-id is #f return to create and retrieve the path where the db will live. ;; |
︙ | ︙ | |||
2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 | (db:with-db dbstruct #f #t (lambda (dbdat db) (sqlite3:execute db "DELETE FROM metadat WHERE var=?;" var)))) ;;====================================================================== ;; no-sync.db - small bits of data to be shared between servers ;;====================================================================== (define (db:open-no-sync-db) (dbfile:open-no-sync-db (db:dbfile-path))) (define (db:no-sync-close-db db stmt-cache) (db:safely-close-sqlite3-db db stmt-cache)) | > > > > > > > > > > > > > > > > > | 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 | (db:with-db dbstruct #f #t (lambda (dbdat db) (sqlite3:execute db "DELETE FROM metadat WHERE var=?;" var)))) ;;====================================================================== ;; no-sync.db - small bits of data to be shared between servers ;;====================================================================== (define (db:no-sync-db db-in) (if db-in db-in (if *no-sync-db* *no-sync-db* (begin (mutex-lock! *db-access-mutex*) (let ((dbpath (common:get-db-tmp-area)) (db (dbfile:open-no-sync-db dbpath))) (set! *no-sync-db* db) (mutex-unlock! *db-access-mutex*) db))))) (define (with-no-sync-db proc) (let* ((db (db:no-sync-db *no-sync-db*))) (proc db))) (define (db:open-no-sync-db) (dbfile:open-no-sync-db (db:dbfile-path))) (define (db:no-sync-close-db db stmt-cache) (db:safely-close-sqlite3-db db stmt-cache)) |
︙ | ︙ | |||
5007 5008 5009 5010 5011 5012 5013 | (dbfiles (glob (conc tmp-area"/.db/*.db"))) (sync-durations (make-hash-table))) ;; (debug:print-info 0 *default-log-port* "lock-and-sync, dbfiles: "dbfiles) (for-each (lambda (file) (let* ((fname (conc (pathname-file file) ".db")) (fulln (conc *toppath*"/.db/"fname)) | > | > > > > | > > > | 5024 5025 5026 5027 5028 5029 5030 5031 5032 5033 5034 5035 5036 5037 5038 5039 5040 5041 5042 5043 5044 5045 5046 5047 | (dbfiles (glob (conc tmp-area"/.db/*.db"))) (sync-durations (make-hash-table))) ;; (debug:print-info 0 *default-log-port* "lock-and-sync, dbfiles: "dbfiles) (for-each (lambda (file) (let* ((fname (conc (pathname-file file) ".db")) (fulln (conc *toppath*"/.db/"fname)) (time1 (if (file-exists? file) (file-modification-time file) (begin (debug:print-info 0 *default-log-port* "Sync - I do not see file "file) 1))) (time2 (if (file-exists? fulln) (file-modification-time fulln) (begin (debug:print-info 0 *default-log-port* "Sync - I do not see file "fulln) 0))) (changed (> time1 time2)) (do-cp (cond ((not (file-exists? fulln)) ;; shouldn't happen, but this might recover (debug:print-info 0 *default-log-port* "File "fulln" not found! Copying "fname" to "fulln) #t) (changed ;; (and changed ;; (> (- (current-seconds) time1) 3)) ;; if file is changed and three seconds have passed. |
︙ | ︙ |
Modified dbfile.scm from [c5ec4d8a51] to [13bb3140b5].
︙ | ︙ | |||
442 443 444 445 446 447 448 | ;; no-sync.db - small bits of data to be shared between servers ;;====================================================================== ;; if we are not a server create a db handle. this is not finalized ;; so watch for problems. I'm still not clear if it is needed to manually ;; finalize sqlite3 dbs with the sqlite3 egg. ;; | | > > > | > | > | > | > > > | < | > | > > > > | | > > | | | | | | | | < | | | | | | | 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 | ;; no-sync.db - small bits of data to be shared between servers ;;====================================================================== ;; if we are not a server create a db handle. this is not finalized ;; so watch for problems. I'm still not clear if it is needed to manually ;; finalize sqlite3 dbs with the sqlite3 egg. ;; (define (dbfile:cautious-open-database fname #!optional (tries-left 5)) (let* ((retry (lambda () (thread-sleep! 0.5) (if (> tries-left 0) (dbfile:cautious-open-database fname (- tries-left 1)))))) (condition-case (sqlite3:open-database fname) (exn (io-error) (dbfile:print-err exn "ERROR: i/o error with " fname ". Check permissions, disk space etc. and try again.") (retry)) (exn (corrupt) (dbfile:print-err exn "ERROR: database " fname " is corrupt. Repair it to proceed.") (retry)) (exn (busy) (dbfile:print-err exn "ERROR: database " fname " is locked. Try copying to another location, remove original and copy back.") (retry)) (exn (permission)(dbfile:print-err exn "ERROR: database " fname " has some permissions problem.") (retry)) (exn () (dbfile:print-err exn "ERROR: Unknown error with database " fname " message: " ((condition-property-accessor 'exn 'message) exn)) (retry))))) (define (dbfile:open-no-sync-db dbpath) (if (not (file-exists? dbpath)) (create-directory dbpath #t)) (let* ((dbname (conc dbpath "/no-sync.db")) (db-exists (file-exists? dbname)) (db (dbfile:cautious-open-database dbname))) ;; (sqlite3:open-database dbname))) (sqlite3:set-busy-handler! db (sqlite3:make-busy-timeout 136000)) (if (not db-exists) (begin (sqlite3:execute db "PRAGMA synchronous = 0;") (sqlite3:execute db "CREATE TABLE IF NOT EXISTS no_sync_metadat (var TEXT,val TEXT, CONSTRAINT no_sync_metadat_constraint UNIQUE (var));") #;(sqlite3:execute db "PRAGMA journal_mode=WAL;"))) db)) (define (db:no-sync-set db var val) (sqlite3:execute db "INSERT OR REPLACE INTO no_sync_metadat (var,val) VALUES (?,?);" var val)) (define (db:no-sync-del! db var) (sqlite3:execute db "DELETE FROM no_sync_metadat WHERE var=?;" var)) (define (db:no-sync-get/default db var default) (let ((res default)) (sqlite3:for-each-row (lambda (val) (set! res val)) db "SELECT val FROM no_sync_metadat WHERE var=?;" var) (if res (let ((newres (if (string? res) (string->number res) #f))) (if newres newres res)) res))) ;; transaction protected lock aquisition ;; either: ;; fails returns (#f . lock-creation-time) ;; succeeds (returns (#t . lock-creation-time) ;; use (db:no-sync-del! db keyname) to release the lock ;; (define (db:no-sync-get-lock db keyname) (sqlite3:with-transaction db (lambda () (handle-exceptions exn (let ((lock-time (current-seconds))) ;; (debug:print-info 2 *default-log-port* "db:no-sync-get-lock keyname=" keyname ", lock-time=" lock-time ", exn=" exn) (sqlite3:execute db "INSERT INTO no_sync_metadat (var,val) VALUES(?,?);" keyname lock-time) `(#t . ,lock-time)) `(#f . ,(sqlite3:first-result db "SELECT val FROM no_sync_metadat WHERE var=?;" keyname)))))) ;;====================================================================== ;; file utils ;;====================================================================== ;;====================================================================== |
︙ | ︙ |
Modified http-transport.scm from [df8bc9e37e] to [4a4c885dbb].
︙ | ︙ | |||
401 402 403 404 405 406 407 408 409 410 411 412 413 414 | ;; (define (http-transport:keep-running) ;; if none running or if > 20 seconds since ;; server last used then start shutdown ;; This thread waits for the server to come alive (debug:print-info 0 *default-log-port* "Starting the sync-back, keep alive thread in server") (let* ((sdat #f) (tmp-area (common:get-db-tmp-area)) (started-file (conc tmp-area "/.server-started")) (server-start-time (current-seconds)) (server-info (let loop ((start-time (current-seconds)) (changed #t) (last-sdat "not this")) (begin ;; let ((sdat #f)) | > | 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 | ;; (define (http-transport:keep-running) ;; if none running or if > 20 seconds since ;; server last used then start shutdown ;; This thread waits for the server to come alive (debug:print-info 0 *default-log-port* "Starting the sync-back, keep alive thread in server") (let* ((sdat #f) (no-sync-db (db:open-no-sync-db)) (tmp-area (common:get-db-tmp-area)) (started-file (conc tmp-area "/.server-started")) (server-start-time (current-seconds)) (server-info (let loop ((start-time (current-seconds)) (changed #t) (last-sdat "not this")) (begin ;; let ((sdat #f)) |
︙ | ︙ |
Modified tasks.scm from [19e9ab848e] to [e54ee89bd7].
︙ | ︙ | |||
512 513 514 515 516 517 518 | exn '() (sqlite3:first-row db "SELECT id,action,owner,state,target,name,testpatt,keylock,params WHERE params LIKE ? AND state LIKE ? AND action LIKE ? AND testpatt LIKE ?;" param-key state-patt action-patt test-patt))))) (define (tasks:find-task-queue-records dbstruct target run-name test-patt state-patt action-patt) | < < < | | > > | | | | | | | | 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 | exn '() (sqlite3:first-row db "SELECT id,action,owner,state,target,name,testpatt,keylock,params WHERE params LIKE ? AND state LIKE ? AND action LIKE ? AND testpatt LIKE ?;" param-key state-patt action-patt test-patt))))) (define (tasks:find-task-queue-records dbstruct target run-name test-patt state-patt action-patt) (db:with-db dbstruct #f #f (lambda (dbdat db) (let ((res '())) (sqlite3:for-each-row (lambda (a . b) (set! res (cons (cons a b) res))) db "SELECT id,action,owner,state,target,name,testpatt,keylock,params FROM tasks_queue WHERE target = ? AND name = ? AND state LIKE ? AND action LIKE ? AND testpatt LIKE ?;" target run-name state-patt action-patt test-patt) res)))) ;; kill any runner processes (i.e. processes handling -runtests) that match target/runname ;; ;; do a remote call to get the task queue info but do the killing as self here. ;; (define (tasks:kill-runner target run-name testpatt) (let ((records (rmt:tasks-find-task-queue-records target run-name testpatt "running" "run-tests")) |
︙ | ︙ |
Modified tests/simplerun/Makefile from [115e15e0c2] to [d958b22b7d].
1 2 3 | cleanup : killall mtest dboard -v -9 || true | | | 1 2 3 4 5 | cleanup : killall mtest dboard -v -9 || true rm -rf *.log *.bak NB* logs/* .meta .db /tmp/$(USER)/megatest_localdb/simplerun ../simpleruns/* lt |