Overview
Comment: | Added switchable support for db on /tmp instead of inmem. Added couple asserts to help find why run-id and servers are not aligned in some cases. |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | v1.80 |
Files: | files | file ages | folders |
SHA1: |
cfcc13973cae9eeaff6bacca9085fe94 |
User & Date: | matt on 2023-04-07 08:25:40 |
Other Links: | branch diff | manifest | tags |
Context
2023-04-09
| ||
13:27 | Added host name to messages about server not started check-in: 21f45d51cf user: mmgraham tags: v1.80 | |
2023-04-08
| ||
02:45 | Removed assert from simple lock. Removed use of statement cache which fixed bind issues check-in: 532f050f93 user: matt tags: v1.80 | |
2023-04-07
| ||
08:25 | Added switchable support for db on /tmp instead of inmem. Added couple asserts to help find why run-id and servers are not aligned in some cases. check-in: cfcc13973c user: matt tags: v1.80 | |
03:56 | For /tmp db cache disambiguate the db with pid check-in: 481acc5191 user: matt tags: v1.80 | |
Changes
Modified dbfile.scm from [50f8ec9e6b] to [cf63c9cd5f].
︙ | ︙ | |||
192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 | #;(db:safely-close-sqlite3-db rdb #f))) ;; stmt-cache))))) ;; (if (sqlite3:database? rdb) (sqlite3:finalize! rdb)))))) subdbs) #t ) #f ) ) (define (dbfile:run-id->path apath run-id) (conc apath"/"(dbfile:run-id->dbname run-id))) (define (db:dbname->path apath dbname) (conc apath"/"dbname)) (define (dbfile:run-id->dbnum run-id) (cond ((number? run-id) (modulo run-id (num-run-dbs))) | > | | > > > > | > | > | 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 | #;(db:safely-close-sqlite3-db rdb #f))) ;; stmt-cache))))) ;; (if (sqlite3:database? rdb) (sqlite3:finalize! rdb)))))) subdbs) #t ) #f ) ) (define (dbfile:run-id->path apath run-id) (conc apath"/"(dbfile:run-id->dbname run-id))) (define (db:dbname->path apath dbname) (conc apath"/"dbname)) (define (dbfile:run-id->dbnum run-id) (cond ((number? run-id) (modulo run-id (num-run-dbs))) ((not run-id) "main") ;; 0 or main? No, not 0. (else (assert #f "FATAL: run-id is required to be a number or #f")))) ;; just the filename (define (dbfile:run-id->dbfname run-id) (conc (dbfile:run-id->dbnum run-id)".db")) ;; the path in MTRAH with the filename (define (dbfile:run-id->dbname run-id) (conc ".mtdb/"(dbfile:run-id->dbfname run-id))) ;; Make the dbstruct, setup up auxillary db's and call for main db at least once ;; ;; called in http-transport and replicated in rmt.scm for *local* access. ;; (define (dbfile:setup do-sync areapath tmppath) (cond |
︙ | ︙ |
Modified dbmod.scm from [722704066d] to [e3fed73fc8].
︙ | ︙ | |||
39 40 41 42 43 44 45 46 47 48 49 50 51 52 | srfi-18 srfi-69 commonmod dbfile debugprint ) ;; NOTE: This returns only the name "1.db", "main.db", not the path ;; (define (dbmod:run-id->dbfname run-id) (conc (dbfile:run-id->dbnum run-id)".db")) (define (dbmod:get-dbdir dbstruct) | > > | 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 | srfi-18 srfi-69 commonmod dbfile debugprint ) (define dbcache-mode (make-parameter 'tmp)) ;; 'inmem, 'tmp ;; NOTE: This returns only the name "1.db", "main.db", not the path ;; (define (dbmod:run-id->dbfname run-id) (conc (dbfile:run-id->dbnum run-id)".db")) (define (dbmod:get-dbdir dbstruct) |
︙ | ︙ | |||
163 164 165 166 167 168 169 170 | #!key (dbstruct-in #f) (syncdir 'todisk)) (let* ((dbstruct (or dbstruct-in (make-dbr:dbstruct areapath: areapath))) (dbfname (or dbfname-in (dbmod:run-id->dbfname run-id))) (dbpath (dbmod:get-dbdir dbstruct)) ;; directory where all the .db files are kept (dbfullname (conc dbpath"/"dbfname)) ;; (dbmod:run-id->full-dbfname dbstruct run-id)) (dbexists (file-exists? dbfullname)) (inmem (dbmod:open-inmem-db init-proc | > > > > > > < > > > | 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 | #!key (dbstruct-in #f) (syncdir 'todisk)) (let* ((dbstruct (or dbstruct-in (make-dbr:dbstruct areapath: areapath))) (dbfname (or dbfname-in (dbmod:run-id->dbfname run-id))) (dbpath (dbmod:get-dbdir dbstruct)) ;; directory where all the .db files are kept (dbfullname (conc dbpath"/"dbfname)) ;; (dbmod:run-id->full-dbfname dbstruct run-id)) (dbexists (file-exists? dbfullname)) (tmpdir (conc "/tmp/"(current-user-name))) (tmpdb (let* ((fname (conc tmpdir"/"(current-process-id)"-"dbfname))) (if (not (file-exists? tmpdir))(create-directory tmpdir)) ;; check if tmpdb already exists, either delete it or ;; add something to the name fname)) (inmem (dbmod:open-inmem-db init-proc (if (eq? (dbcache-mode) 'inmem) #f tmpdb) )) (write-access (file-write-access? dbpath)) (db (dbmod:safely-open-db dbfullname init-proc write-access)) (tables (db:sync-all-tables-list keys))) (assert (sqlite3:database? inmem) "FATAL: open-dbmoddb: inmem is not a db") (assert (sqlite3:database? db) "FATAL: open-dbmoddb: db is not a db") (dbr:dbstruct-inmem-set! dbstruct inmem) |
︙ | ︙ |
Modified rmt.scm from [759d7a27d6] to [35e00a1063].
︙ | ︙ | |||
126 127 128 129 130 131 132 | (if (not runremote) (let* ((newremote (make-and-init-remote areapath))) (set! *runremote* newremote) (set! runremote newremote))) (let* ((dbfname (conc (dbfile:run-id->dbnum run-id)".db"))) ;;(dbfile:run-id->path areapath run-id))) (tt:handler runremote cmd run-id params attemptnum area-dat areapath readonly-mode dbfname testsuite mtexe))) | < < | 126 127 128 129 130 131 132 133 134 135 136 137 138 139 | (if (not runremote) (let* ((newremote (make-and-init-remote areapath))) (set! *runremote* newremote) (set! runremote newremote))) (let* ((dbfname (conc (dbfile:run-id->dbnum run-id)".db"))) ;;(dbfile:run-id->path areapath run-id))) (tt:handler runremote cmd run-id params attemptnum area-dat areapath readonly-mode dbfname testsuite mtexe))) (define (rmt:print-db-stats) (let ((fmtstr "~40a~7-d~9-d~20,2-f")) ;; "~20,2-f" (debug:print 18 *default-log-port* "DB Stats\n========") (debug:print 18 *default-log-port* (format #f "~40a~8a~10a~10a" "Cmd" "Count" "TotTime" "Avg")) (for-each (lambda (cmd) (let ((cmd-dat (hash-table-ref *db-stats* cmd))) (debug:print 18 *default-log-port* (format #f fmtstr cmd (vector-ref cmd-dat 0) (vector-ref cmd-dat 1) (/ (vector-ref cmd-dat 1)(vector-ref cmd-dat 0)))))) |
︙ | ︙ |
Modified tcp-transportmod.scm from [09d461be9f] to [a71da4bf27].
︙ | ︙ | |||
113 114 115 116 117 118 119 | ;; make ttdat visible (define *server-info* #f) (define (tt:make-remote areapath) (make-tt areapath: areapath)) ;; 1 ... or #f | > > > | | | > | | 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 | ;; make ttdat visible (define *server-info* #f) (define (tt:make-remote areapath) (make-tt areapath: areapath)) ;; 1 ... or #f ;; and check that dbfname matches. FIXME: the propagation of dbfname and run-id ;; might not make the best sense ;; (define (tt:valid-run-id run-id dbfname) (and (or (number? run-id) (not run-id)) (equal? (dbfile:run-id->dbfname run-id) dbfname))) (tcp-buffer-size 2048) ;; (max-connections 4096) ;; do all the busy work of finding and setting up conn for ;; connecting to a server ;; (define (tt:client-connect-to-server ttdat dbfname run-id testsuite) (assert (tt:valid-run-id run-id dbfname) "FATAL: invalid run-id "run-id) (let* ((conn (hash-table-ref/default (tt-conns ttdat) dbfname #f)) (server-start-proc (lambda () (tt:server-process-run (tt-areapath ttdat) testsuite ;; (dbfile:testsuite-name) (common:find-local-megatest) run-id)))) |
︙ | ︙ | |||
174 175 176 177 178 179 180 | (if (> (- (current-seconds) (tt-last-serv-start ttdat)) 5) ;; really do not want to swamp the machine with servers (begin (debug:print-info 0 *default-log-port* "No server found. Starting one for run-id "run-id" in dbfile "dbfname) (server-start-proc) (tt-last-serv-start-set! ttdat (current-seconds)))) (thread-sleep! 1) (tt:client-connect-to-server ttdat dbfname run-id testsuite))))))) | | | 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 | (if (> (- (current-seconds) (tt-last-serv-start ttdat)) 5) ;; really do not want to swamp the machine with servers (begin (debug:print-info 0 *default-log-port* "No server found. Starting one for run-id "run-id" in dbfile "dbfname) (server-start-proc) (tt-last-serv-start-set! ttdat (current-seconds)))) (thread-sleep! 1) (tt:client-connect-to-server ttdat dbfname run-id testsuite))))))) (define (tt:ping host port server-id) (let* ((res (tt:send-receive-direct host port `(ping #f #f #f)))) ;; please send me your server-id ;; ;; need two threads, one a 5 second timer ;; (match res ((status errmsg result meta) |
︙ | ︙ | |||
263 264 265 266 267 268 269 | ;; returns list of (host port startseconds server-id servinfofile) ;; (define (tt:get-server-info-sorted ttdat dbfname) (let* ((areapath (tt-areapath ttdat)) (sfiles (tt:find-server areapath dbfname)) (sdats (filter car (map tt:server-get-info sfiles))) ;; first element is #f if the file disappeared while being read (sorted (sort sdats (lambda (a b) | > | > > > | 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 | ;; returns list of (host port startseconds server-id servinfofile) ;; (define (tt:get-server-info-sorted ttdat dbfname) (let* ((areapath (tt-areapath ttdat)) (sfiles (tt:find-server areapath dbfname)) (sdats (filter car (map tt:server-get-info sfiles))) ;; first element is #f if the file disappeared while being read (sorted (sort sdats (lambda (a b) (let* ((starta (list-ref a 2)) (startb (list-ref b 2))) (if (eq? starta startb) (string>? (list-ref a 3)(list-ref b 3)) ;; if servers started at same time look at server-id (< starta startb)))))) (count 0)) (for-each (lambda (rec) (if (or (> (length sorted) 1) (common:low-noise-print 120 "server info sorted")) (debug:print 0 *default-log-port* "SERVER #"count": "(string-intersperse (map conc sorted) ", "))) (set! count (+ count 1))) |
︙ | ︙ | |||
427 428 429 430 431 432 433 | (> (- (current-seconds)(file-modification-time servinfofile)) 30)) (begin ;; can't ping and file has been on disk 15 seconds, go ahead and try to remove it (debug:print-info 0 *default-log-port* "Removing apparently dead server info file: "servinfofile) (delete-file* servinfofile) #t) ;; not the server but the server is not reachable (begin | | > | 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 | (> (- (current-seconds)(file-modification-time servinfofile)) 30)) (begin ;; can't ping and file has been on disk 15 seconds, go ahead and try to remove it (debug:print-info 0 *default-log-port* "Removing apparently dead server info file: "servinfofile) (delete-file* servinfofile) #t) ;; not the server but the server is not reachable (begin (debug:print 0 *default-log-port* "I'm not the server but could not ping "host":"port", will try again.") (thread-sleep! 1) ;; just because #t))))) (else ;; should never get here (debug:print 0 *default-log-port* "BAD SERVER RECORD: "leadsrv) (assert #f "Bad server record "leadsrv)))))))) (if ok ;; (if (> *api-process-request-count* 0) ;; have requests in flight ;; (tt-last-access-set! ttdat (current-seconds))) (tt-last-access-set! ttdat *db-last-access*) ;; bit silly, just use db-last-access (begin (debug:print 0 *default-log-port* "Exiting immediately") (cleanup) (exit))) (let* ((last-update (dbr:dbstruct-last-update dbstruct)) (curr-secs (current-seconds))) (if (and (eq? (tt-state ttdat) 'running) (> (- curr-secs last-update) 3)) ;; every 3-4 seconds update the db? maybe this should be refresh the inmem? |
︙ | ︙ |