Changes In Branch v1.80 Through [dbb24dafce] Excluding Merge-Ins
This is equivalent to a diff from 60b31fb56a to dbb24dafce
2023-10-24
| ||
12:56 | Changed megatest version to v1.8018 check-in: 431016c344 user: mmgraham tags: v1.80, v1.8018 | |
12:47 | Allow 1 server per db file check-in: dbb24dafce user: mmgraham tags: v1.80 | |
12:40 | merged fork check-in: e51e15945e user: mmgraham tags: v1.80 | |
2023-10-20
| ||
05:12 |
Merged fork
This node ran run-core-tests.sh through kill-rerun - a pretty good result. Dashboard comes up quickly also. check-in: e607892c7d user: mrwellan tags: v1.80 | |
04:57 | Merged fork check-in: 53900a0d02 user: mrwellan tags: v1.80-start-all | |
2023-10-19
| ||
18:55 | changed a debug msg to level2, increased delay from 0.5 to 2 secs Leaf check-in: 60b31fb56a user: mmgraham tags: v1.80-processes | |
16:09 | moved make-tmpdir-name into commonmod check-in: 1624c400a9 user: mmgraham tags: v1.80-processes | |
Modified api.scm from [5fa313076b] to [13a08c65d1].
︙ | ︙ | |||
369 370 371 372 373 374 375 | ((dec-var) (apply db:dec-var dbstruct params)) ((del-var) (apply db:del-var dbstruct params)) ((add-var) (apply db:add-var dbstruct params)) ((insert-run) (apply db:insert-run dbstruct params)) ;; STEPS | > | > | 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 | ((dec-var) (apply db:dec-var dbstruct params)) ((del-var) (apply db:del-var dbstruct params)) ((add-var) (apply db:add-var dbstruct params)) ((insert-run) (apply db:insert-run dbstruct params)) ;; STEPS ((teststep-set-status!) ;; (apply db:teststep-set-status! dbstruct params)) (db:add-cached-write dbstruct db:teststep-set-status! run-id params)) ((delete-steps-for-test!) (apply db:delete-steps-for-test! dbstruct params)) ;; TEST DATA ((test-data-rollup) (apply db:test-data-rollup dbstruct params)) ((csv->test-data) (apply db:csv->test-data dbstruct params)) ;; MISC |
︙ | ︙ |
Modified db.scm from [b1837f1312] to [0a367c507f].
︙ | ︙ | |||
2737 2738 2739 2740 2741 2742 2743 | (sqlite3:execute db "INSERT OR REPLACE into test_steps (test_id,stepname,state,status,event_time,comment,logfile) VALUES(?,?,?,?,?,?,?);" test-id teststep-name state-in status-in (current-seconds) (if comment comment "") (if logfile logfile ""))))) | < < | 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 | (sqlite3:execute db "INSERT OR REPLACE into test_steps (test_id,stepname,state,status,event_time,comment,logfile) VALUES(?,?,?,?,?,?,?);" test-id teststep-name state-in status-in (current-seconds) (if comment comment "") (if logfile logfile ""))))) (define (db:delete-steps-for-test! dbstruct run-id test-id) ;; TODO: figure out why status is the key field rather than state (note: CONSTRAINT test_steps_constraint UNIQUE (test_id,stepname,state) ) (db:with-db dbstruct run-id #t (lambda (dbdat db) |
︙ | ︙ | |||
4245 4246 4247 4248 4249 4250 4251 4252 4253 4254 4255 4256 4257 4258 | (begin (debug:print 0 *default-log-port* "WARNING: path given, " outputfile " is relative, prefixing with current directory") (conc (current-directory) "/" outputfile))) results) ;; brutal clean up (dbfile:add-dbdat dbstruct #f dbdat) (system "rm -rf tempdir"))) ;; (db:extract-ods-file db "outputfile.ods" '(("sysname" "%")("fsname" "%")("datapath" "%")) "%") ;;====================================================================== ;; moving watch dogs here due to dependencies ;;====================================================================== | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 4243 4244 4245 4246 4247 4248 4249 4250 4251 4252 4253 4254 4255 4256 4257 4258 4259 4260 4261 4262 4263 4264 4265 4266 4267 4268 4269 4270 4271 4272 4273 4274 4275 4276 4277 4278 4279 4280 4281 4282 4283 4284 4285 4286 4287 4288 4289 4290 4291 4292 4293 4294 4295 4296 4297 4298 4299 4300 4301 4302 | (begin (debug:print 0 *default-log-port* "WARNING: path given, " outputfile " is relative, prefixing with current directory") (conc (current-directory) "/" outputfile))) results) ;; brutal clean up (dbfile:add-dbdat dbstruct #f dbdat) (system "rm -rf tempdir"))) ;;====================================================================== ;; cached writes stuff ;;====================================================================== (define (db:add-cached-write dbstruct proc run-id params) (debug:print 0 *default-log-port* "Adding cached write for run-id "run-id" params " params) (mutex-lock! *cached-writes-mutex*) (let* ((hkey (cons dbstruct run-id)) (cached-writes-queue (hash-table-ref/default *cached-writes-queues* hkey '()))) (hash-table-set! *cached-writes-queues* hkey (cons (list proc params) cached-writes-queue))) (if (not *cached-writes-flag*) (begin (set! *cached-writes-flag* #t) (thread-start! (make-thread (lambda () (debug:print 0 *default-log-port* "process cached writes thread started.") (thread-sleep! 1) (db:process-cached-writes-queue)))))) (mutex-unlock! *cached-writes-mutex*)) (define (db:process-cached-writes-queue) (mutex-lock! *cached-writes-mutex*) (hash-table-for-each *cached-writes-queues* (lambda (hkey writes-list) (let* ((dbstruct (car hkey)) (run-id (cdr hkey))) (debug:print 0 *default-log-port* "Processing "(length writes-list)" cached writes for run "run-id) (db:with-db dbstruct run-id #t (lambda (dbdat db) (sqlite3:with-transaction db (lambda () (for-each (lambda (queued-write) (match queued-write ((proc params)(apply proc dbstruct params)) (else (assert #f "BAD queued-write")))) writes-list))) (hash-table-delete! *cached-writes-queues* hkey)))))) (set! *cached-writes-flag* #f) (mutex-unlock! *cached-writes-mutex*)) ;; (db:extract-ods-file db "outputfile.ods" '(("sysname" "%")("fsname" "%")("datapath" "%")) "%") ;;====================================================================== ;; moving watch dogs here due to dependencies ;;====================================================================== |
︙ | ︙ |
Modified dbfile.scm from [b5eea0764a] to [5eebf6582a].
︙ | ︙ | |||
1577 1578 1579 1580 1581 1582 1583 1584 1585 | (result (or stmth (let* ((newstmth (sqlite3:prepare db stmt))) ;; (db:hoh-set! stmt-cache db stmt newstmth) (hash-table-set! stmt-cache stmt newstmth) newstmth)))) (mutex-unlock! *get-cache-stmth-mutex*) result)) ) | > > > > > > > > > > | 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 | (result (or stmth (let* ((newstmth (sqlite3:prepare db stmt))) ;; (db:hoh-set! stmt-cache db stmt newstmth) (hash-table-set! stmt-cache stmt newstmth) newstmth)))) (mutex-unlock! *get-cache-stmth-mutex*) result)) ;;====================================================================== ;; cached writes - run list of procs inside transaction ;; NOTE: this only works because we have once database per process ;;====================================================================== (define *cached-writes-mutex* (make-mutex)) (define *cached-writes-flag* #f) (define *cached-writes-queues* (make-hash-table)) ;; dbstruct->list of writes ) |
Modified dbmod.scm from [9f0ce614a3] to [00037bb607].
︙ | ︙ | |||
117 118 119 120 121 122 123 | (debug:print-info 0 *default-log-port* "dbmod:with-db, database is busy, will try "count" more times.") (thread-sleep! 1) (loop (- count 1))) (begin (debug:print-info 0 *default-log-port* "dbmod:with-db, database is busy, giving up.") (exit 1)))) (exn () | | > | > | 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 | (debug:print-info 0 *default-log-port* "dbmod:with-db, database is busy, will try "count" more times.") (thread-sleep! 1) (loop (- count 1))) (begin (debug:print-info 0 *default-log-port* "dbmod:with-db, database is busy, giving up.") (exit 1)))) (exn () (dbfile:print-err exn "ERROR: Unknown error with db for run-id " run-id", message: " ((condition-property-accessor 'exn 'message) exn) ", details: "(condition->list exn)) (exit 2)))))) (if use-mutex (mutex-unlock! *db-with-db-mutex*)) res))) (define (db:with-db dbstruct run-id w/r proc . params) (dbmod:with-db dbstruct run-id w/r proc params)) |
︙ | ︙ |
Modified rmt.scm from [564930aec3] to [b6600b2d34].
︙ | ︙ | |||
70 71 72 73 74 75 76 | (define *send-receive-mutex* (make-mutex)) ;; should have separate mutex per run-id ;; RA => e.g. usage (rmt:send-receive 'get-var #f (list varname)) ;; (define (rmt:send-receive cmd rid params #!key (attemptnum 1)(area-dat #f)) ;; start attemptnum at 1 so the modulo below works as expected (assert *toppath* "FATAL: rmt:send-receive called with *toppath* not set.") | | < < < < < < < < < < < < < < < < < < < < < < < | 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 | (define *send-receive-mutex* (make-mutex)) ;; should have separate mutex per run-id ;; RA => e.g. usage (rmt:send-receive 'get-var #f (list varname)) ;; (define (rmt:send-receive cmd rid params #!key (attemptnum 1)(area-dat #f)) ;; start attemptnum at 1 so the modulo below works as expected (assert *toppath* "FATAL: rmt:send-receive called with *toppath* not set.") ;; 1. check if server is started IFF cmd is a write OR if we are not on the homehost, store in runremote ;; 2. check the age of the connections. refresh the connection if it is older than timeout-20 seconds. ;; 3. do the query, if on homehost use local access ;; (let* ((start-time (current-seconds)) ;; snapshot time so all use cases get same value (areapath *toppath*);; TODO - resolve from dbstruct to be compatible with multiple areas (runremote (or area-dat |
︙ | ︙ |
Modified tcp-transportmod.scm from [cc561d90e9] to [44aa462a83].
︙ | ︙ | |||
230 231 232 233 234 235 236 237 238 239 240 241 242 243 | ;; client side handler ;; ;;(tt:handler #<tt> get-keys #f () 2 #f "/home/matt/data/megatest/ext-tests" #f "main.db" "ext-tests" "/home/matt/data/megatest/bin/.22.04/../megatest") ;; (define (tt:handler ttdat cmd run-id params attemptnum area-dat areapath readonly-mode dbfname testsuite mtexe) (debug:print 2 *default-log-port* "tt:handler cmd: " cmd " run-id: " run-id " attemptnum: " attemptnum) ;; NOTE: areapath is passed in and in tt struct. We'll use passed in value for now. (let* ((conn (tt:client-connect-to-server ttdat dbfname run-id testsuite))) ;; (hash-table-ref/default (tt-conns ttdat) dbfname #f))) (if conn ;; have connection, call the server (let* ((res (tt:send-receive ttdat conn cmd run-id params))) ;; res is (status errmsg result meta) ; (debug:print 0 *default-log-port* "conn:" conn " res: " res) (match res | > | 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 | ;; client side handler ;; ;;(tt:handler #<tt> get-keys #f () 2 #f "/home/matt/data/megatest/ext-tests" #f "main.db" "ext-tests" "/home/matt/data/megatest/bin/.22.04/../megatest") ;; (define (tt:handler ttdat cmd run-id params attemptnum area-dat areapath readonly-mode dbfname testsuite mtexe) (debug:print 2 *default-log-port* "tt:handler cmd: " cmd " run-id: " run-id " attemptnum: " attemptnum) ;; NOTE: areapath is passed in and in tt struct. We'll use passed in value for now. ;; connect-to-server will start a server if needed. (let* ((conn (tt:client-connect-to-server ttdat dbfname run-id testsuite))) ;; (hash-table-ref/default (tt-conns ttdat) dbfname #f))) (if conn ;; have connection, call the server (let* ((res (tt:send-receive ttdat conn cmd run-id params))) ;; res is (status errmsg result meta) ; (debug:print 0 *default-log-port* "conn:" conn " res: " res) (match res |
︙ | ︙ | |||
258 259 260 261 262 263 264 | ((loaded) (debug:print 0 *default-log-port* "WARNING: server for "dbfname" is loaded, slowing queries.") (tt:backoff-incr (tt-conn-host conn)(tt-conn-port conn)) result) ;; (tt:handler ttdat cmd run-id params (+ attemptnum 1) area-dat areapath readonly-mode dbfname testsuite mtexe)) (else result))) (else ;; did not receive properly formated result | | | 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 | ((loaded) (debug:print 0 *default-log-port* "WARNING: server for "dbfname" is loaded, slowing queries.") (tt:backoff-incr (tt-conn-host conn)(tt-conn-port conn)) result) ;; (tt:handler ttdat cmd run-id params (+ attemptnum 1) area-dat areapath readonly-mode dbfname testsuite mtexe)) (else result))) (else ;; did not receive properly formated result (if (not res) ;; tt:send-receive telling us that communication failed (let* ((host (tt-conn-host conn)) (port (tt-conn-port conn)) ;; (dbfname (tt-conn-port conn)) ;; 192.168.0.127:4242-726924:4.db (pid (tt-conn-pid conn)) ;;(servinf (tt-conn-servinf-file conn))) (servinf (tt-servinf-file ttdat))) ;; (conc areapath"/.servinfo/"host":"port"-"pid":"dbfname))) ;; TODO, use (server:get-servinfo-dir areapath) (hash-table-set! (tt-conns ttdat) dbfname #f) |
︙ | ︙ | |||
291 292 293 294 295 296 297 | ;; start server - addressed in client-connect-to-server ;; delay - addressed in client-connect-to-server ;; try again (thread-sleep! 0.25) ;; dunno, I think this needs to be here (tt:handler ttdat cmd run-id params (+ attemptnum 1) area-dat areapath readonly-mode dbfname testsuite mtexe)) )))) (begin ;; no server file, delay and try again | | | 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 | ;; start server - addressed in client-connect-to-server ;; delay - addressed in client-connect-to-server ;; try again (thread-sleep! 0.25) ;; dunno, I think this needs to be here (tt:handler ttdat cmd run-id params (+ attemptnum 1) area-dat areapath readonly-mode dbfname testsuite mtexe)) )))) (begin ;; no server file, delay and try again (debug:print 2 *default-log-port* "INFO: connection to server "host":"port" broken for "dbfname", no servinf file. Server exited? ") (thread-sleep! 0.5) (tt:handler ttdat cmd run-id params (+ attemptnum 1) area-dat areapath readonly-mode dbfname testsuite mtexe)))) (begin ;; this case is where res is malformed. Probably should abort (assert #f "FATAL: tt:handler received bad data "res) ;; (debug:print 0 *default-log-port* "INFO: got corrupt data from server "host":"port", "res", for "dbfname", will try again.") ;; (tt:handler ttdat cmd run-id params (+ attemptnum 1) area-dat areapath readonly-mode dbfname testsuite mtexe) ))))) |
︙ | ︙ | |||
474 475 476 477 478 479 480 | (define (tt:start-server areapath run-id dbfname-in handler keys) (assert areapath "FATAL: areapath not provided for tt:start-server") ;; is there already a server for this dbfile? Then exit. (debug:print 2 *default-log-port* "tt:start-server: " dbfname-in) (let* ((ttdat (make-tt areapath: areapath)) (dbfname (or dbfname-in (dbmod:run-id->dbfname run-id))) (servers (tt:find-server areapath dbfname))) ;; should use tt:get-current-server-info instead | > | | 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 | (define (tt:start-server areapath run-id dbfname-in handler keys) (assert areapath "FATAL: areapath not provided for tt:start-server") ;; is there already a server for this dbfile? Then exit. (debug:print 2 *default-log-port* "tt:start-server: " dbfname-in) (let* ((ttdat (make-tt areapath: areapath)) (dbfname (or dbfname-in (dbmod:run-id->dbfname run-id))) (servers (tt:find-server areapath dbfname))) ;; should use tt:get-current-server-info instead (debug:print 0 *default-log-port* "Found " (length servers) " already running for " dbfname) (if (> (length servers) 0) (begin (debug:print 0 *default-log-port* "INFO: found server(s) already running for db "dbfname", "(string-intersperse servers ",")" Exiting.") (exit)) (let* ((dbstruct (dbmod:open-dbmoddb areapath run-id dbfname (dbfile:db-init-proc) keys))) (tt-handler-set! ttdat (handler dbstruct)) (let* ((tcp-thread (make-thread (lambda () |
︙ | ︙ | |||
535 536 537 538 539 540 541 | (if (tt-cleanup-proc ttdat) ((tt-cleanup-proc ttdat))) (dbfile:with-no-sync-db nosyncdbpath (lambda (db) (let* ((dbtmpname (dbr:dbstruct-dbtmpname dbstruct))) (debug:print-info 0 *default-log-port* "Running clean up, including removing db file "dbtmpname) (db:no-sync-del! db dbfname) | < | | 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 | (if (tt-cleanup-proc ttdat) ((tt-cleanup-proc ttdat))) (dbfile:with-no-sync-db nosyncdbpath (lambda (db) (let* ((dbtmpname (dbr:dbstruct-dbtmpname dbstruct))) (debug:print-info 0 *default-log-port* "Running clean up, including removing db file "dbtmpname) (db:no-sync-del! db dbfname) )))))) (set! *server-info* ttdat) (let loop ((count 0)) (if (> count 240) (begin (debug:print 0 *default-log-port* "FATAL: Could not start a tcp server, giving up.") (exit 1)) (if (not (tt-port ttdat)) ;; no connection yet |
︙ | ︙ | |||
583 584 585 586 587 588 589 590 591 592 593 594 595 596 | (debug:print 0 *default-log-port* "Failed to get server lock for "dbfname) #f)))))) (if (and res (common:low-noise-print 120 "top server message")) (debug:print-info 0 *default-log-port* "Keep running, I'm the top server for " dbfname" on "(tt-host ttdat)":"(tt-port ttdat))) res)) (else (debug:print-info 0 *default-log-port* "I'm not the lead server: "servers) (let* ((leadsrv (car servers))) (match leadsrv ((host port startseconds server-id pid dbfname servinfofile) (let* ((result (tt:timed-ping host port server-id)) (res (car result)) (ping (cdr result))) | > | 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 | (debug:print 0 *default-log-port* "Failed to get server lock for "dbfname) #f)))))) (if (and res (common:low-noise-print 120 "top server message")) (debug:print-info 0 *default-log-port* "Keep running, I'm the top server for " dbfname" on "(tt-host ttdat)":"(tt-port ttdat))) res)) (else ;; wrong servinfo file (debug:print-info 0 *default-log-port* "I'm not the lead server: "servers) (let* ((leadsrv (car servers))) (match leadsrv ((host port startseconds server-id pid dbfname servinfofile) (let* ((result (tt:timed-ping host port server-id)) (res (car result)) (ping (cdr result))) |
︙ | ︙ |