Overview
Comment: | wip |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | v1.6584-ck5 |
Files: | files | file ages | folders |
SHA1: |
58cf8acf440c8ff545936813718305d6 |
User & Date: | matt on 2021-05-16 23:22:01 |
Other Links: | branch diff | manifest | tags |
Context
2021-05-16
| ||
23:26 | wip check-in: 84869b5b12 user: matt tags: v1.6584-ck5 | |
23:22 | wip check-in: 58cf8acf44 user: matt tags: v1.6584-ck5 | |
2021-05-15
| ||
21:57 | wip check-in: db4714b500 user: matt tags: v1.6584-ck5 | |
Changes
Modified dbmod.scm from [df51662aee] to [43fca71bc1].
︙ | ︙ | |||
226 227 228 229 230 231 232 | ;; Make the dbstruct, call for main db at least once ;; sync disk db to inmem ;; ;; called in http-transport and replicated in rmt.scm for *local* access. ;; (define (db:setup run-id) (assert *toppath* "FATAL: db:setup called before toppath is available.") | | | | 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 | ;; Make the dbstruct, call for main db at least once ;; sync disk db to inmem ;; ;; called in http-transport and replicated in rmt.scm for *local* access. ;; (define (db:setup run-id) (assert *toppath* "FATAL: db:setup called before toppath is available.") (let* ((dbstruct (or *dbstruct-db* (make-dbr:dbstruct))) (db-file (db:run-id->path *toppath* run-id))) (db:get-dbdat dbstruct *toppath* db-file) (if (not *dbstruct-db*)(set! *dbstruct-db* dbstruct)) dbstruct)) ;;====================================================================== ;; setting/getting a lock on the db for only one server per db ;; ;; NOTE: ;; These operate directly on the disk file, NOT on the inmemory db |
︙ | ︙ | |||
411 412 413 414 415 416 417 | ;; ;; (set! *db-last-access* start-t) ;; ;; (mutex-unlock! *db-multi-sync-mutex*) ;; ;; (stack-push! (dbr:dbstruct-dbstack dbstruct) tmpdb))) ;; NOTE: touched logic is disabled/not done ;; sync run to disk if touched ;; | | | | | < | < | > | 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 | ;; ;; (set! *db-last-access* start-t) ;; ;; (mutex-unlock! *db-multi-sync-mutex*) ;; ;; (stack-push! (dbr:dbstruct-dbstack dbstruct) tmpdb))) ;; NOTE: touched logic is disabled/not done ;; sync run to disk if touched ;; (define (db:sync-inmem->disk dbstruct apath dbfile #!key (force-sync #f)) (let* ((dbdat (db:get-dbdat dbstruct apath dbfile)) (db (dbr:dbdat-db dbdat)) (inmem (dbr:dbdat-inmem dbdat)) (start-t (current-seconds)) (last-update (dbr:dbdat-last-write dbdat)) (last-sync (dbr:dbdat-last-sync dbdat))) (debug:print-info 4 *default-log-port* "Syncing for dbfile: " dbfile) (mutex-lock! *db-multi-sync-mutex*) (let* ((update_info (cons (if force-sync 0 last-update) "last_update")) (need-sync (or force-sync (>= last-update last-sync)))) (if need-sync (db:sync-tables (db:sync-all-tables-list) update_info inmem db) (debug:print 0 *default-log-port* "Skipping sync as nothing touched."))) (dbr:dbdat-last-sync-set! dbdat start-t) (mutex-unlock! *db-multi-sync-mutex*))) ;; TODO: Add final sync to this ;; (define (db:safely-close-sqlite3-db db stmt-cache #!key (try-num 3)) (if (<= try-num 0) #f (handle-exceptions exn (begin (print "Attempt to safely close sqlite3 db failed. Trying again. exn=" exn) |
︙ | ︙ |
Modified launchmod.scm from [c24b724e9b] to [c2309e8637].
︙ | ︙ | |||
2279 2280 2281 2282 2283 2284 2285 | )) ;; we can safely cache megatest.config since we have a valid runconfig data)))) ;;====================================================================== ;; TODO: for multiple areas, we will have multiple watchdogs; and multiple threads to manage (define (common:watchdog) (debug:print-info 13 *default-log-port* "common:watchdog entered.") | > > > > > > > | > > > | > | | | | | | | | | | | | | | | | | | | | | | 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 | )) ;; we can safely cache megatest.config since we have a valid runconfig data)))) ;;====================================================================== ;; TODO: for multiple areas, we will have multiple watchdogs; and multiple threads to manage (define (common:watchdog) (debug:print-info 13 *default-log-port* "common:watchdog entered.") (assert *toppath* "common:watchdog started before *toppath* is set") (let* ((start-time (current-seconds)) (am-server (args:get-arg "-server")) (dbfile (args:get-arg "-db")) (apath *toppath*)) (let loop () (thread-sleep! 5) ;; add control / setting for this (if am-server (if (not *dbstruct-db*) (loop) (db:sync-inmem->disk *dbstruct-db* *toppath* dbfile)))))) ;; ;; (let ((dbstruct ;; (debug:print-info 13 *default-log-port* "after db:setup with dbstruct=" dbstruct) ;; (cond ;; ((dbr:dbstruct-read-only dbstruct) ;; (debug:print-info 13 *default-log-port* "loading read-only watchdog") ;; (common:readonly-watchdog dbstruct)) ;; (else ;; (debug:print-info 13 *default-log-port* "loading writable-watchdog.") ;; (let* ((syncer (or (configf:lookup *configdat* "server" "sync-method") "brute-force-sync"))) ;; (cond ;; ((equal? syncer "brute-force-sync") ;; (server:writable-watchdog-bruteforce dbstruct)) ;; ((equal? syncer "delta-sync") ;; (server:writable-watchdog-deltasync dbstruct)) ;; (else ;; (debug:print-error 0 *default-log-port* "Unknown server/sync-method specified ("syncer") - valid values are brute-force-sync and delta-sync.") ;; (exit 1))) ;; ;;(debug:print 1 *default-log-port* "INFO: ["(common:human-time)"] Syncer started (method="syncer")") ;; ))) ;; (debug:print-info 13 *default-log-port* "watchdog done.")) ;; (debug:print-info 13 *default-log-port* "no need for watchdog on non-homehost")))) ;;====================================================================== ;; currently the primary job of the watchdog is to run the sync back to megatest.db from the db in /tmp ;; if we are on the homehost and we are a server (by definition we are on the homehost if we are a server) ;; (define (common:readonly-watchdog dbstruct) #f) |
︙ | ︙ | |||
2522 2523 2524 2525 2526 2527 2528 | (delay-loop (+ count 1)))) (if (not (bdat-time-to-exit *bdat*)) (loop)))) ;; time to exit, close the no-sync db here (db:no-sync-close-db no-sync-db stmt-cache) (if (common:low-noise-print 30) (debug:print-info 0 *default-log-port* "Exiting watchdog timer, *time-to-exit* = " (bdat-time-to-exit *bdat*)" pid="(current-process-id) ))))))) ;;" this-wd-num="this-wd-num))))))) | | | | 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 | (delay-loop (+ count 1)))) (if (not (bdat-time-to-exit *bdat*)) (loop)))) ;; time to exit, close the no-sync db here (db:no-sync-close-db no-sync-db stmt-cache) (if (common:low-noise-print 30) (debug:print-info 0 *default-log-port* "Exiting watchdog timer, *time-to-exit* = " (bdat-time-to-exit *bdat*)" pid="(current-process-id) ))))))) ;;" this-wd-num="this-wd-num))))))) #;(define (server:writable-watchdog-bruteforce dbstruct) (thread-sleep! 1) ;; delay for startup (let* ((do-a-sync (server:get-bruteforce-syncer dbstruct)) (final-sync (server:get-bruteforce-syncer dbstruct fork-to-background: #t persist-until-sync: #t))) (when (and (not (args:get-arg "-sync-to-megatest.db")) ;; conditions under which we do not run the sync (args:get-arg "-server")) (let loop () (do-a-sync) (if (not (bdat-time-to-exit *bdat*)) (loop))) ;; keep going unless time to exit ;; time to exit, close the no-sync db here (final-sync) (if (common:low-noise-print 30) (debug:print-info 0 *default-log-port* "Exiting watchdog timer, *time-to-exit* = "(bdat-time-to-exit *bdat*)" pid="(current-process-id) ))))) ;; moving this here as it needs access to db and cannot be in common. ;; #;(define (server:get-bruteforce-syncer dbstruct #!key (fork-to-background #f) (persist-until-sync #f)) (let* ((sqlite-exe (or (get-environment-variable "MT_SQLITE3_EXE"))) ;; defined in cfg.sh (sync-log (or (args:get-arg "-sync-log") (conc *toppath* "/logs/sync-" (current-process-id) "-" (get-host-name) ".log"))) (tmp-area (common:get-db-tmp-area)) (tmp-db (conc tmp-area "/megatest.db")) (staging-file (conc *toppath* "/.megatest.db")) (mtdbfile (conc *toppath* "/megatest.db")) (lockfile (common:get-sync-lock-filepath)) |
︙ | ︙ |
Modified megatest.scm from [3e262da95d] to [2adacf8022].
︙ | ︙ | |||
741 742 743 744 745 746 747 | "-create-megatest-area" "-mark-incompletes" "-convert-to-norm" "-convert-to-old" "-import-megatest.db" "-sync-to-megatest.db" | | | 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 | "-create-megatest-area" "-mark-incompletes" "-convert-to-norm" "-convert-to-old" "-import-megatest.db" "-sync-to-megatest.db" "-sync-brute-force" "-logging" "-v" ;; verbose 2, more than normal (normal is 1) "-q" ;; quiet 0, errors/warnings only "-diff-rep" "-syscheck" |
︙ | ︙ | |||
2579 2580 2581 2582 2583 2584 2585 | 'dejunk 'adj-testids 'old2new ;; 'new2old ) (set! *didsomething* #t))) | | | 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 | 'dejunk 'adj-testids 'old2new ;; 'new2old ) (set! *didsomething* #t))) #;(when (args:get-arg "-sync-brute-force") ((server:get-bruteforce-syncer (db:setup #t) persist-until-sync: #t)) (set! *didsomething* #t)) #;(if (args:get-arg "-sync-to-megatest.db") (let* ((dbstruct (db:setup #f)) (tmpdbpth (cdr (dbr:dbstruct-tmpdb dbstruct))) (lockfile (conc tmpdbpth ".lock")) |
︙ | ︙ |
Modified rmtmod.scm from [f26d9abd38] to [93f26ad84f].
︙ | ︙ | |||
642 643 644 645 646 647 648 | (rmt:send-receive 'get-num-runs #f (list runpatt))) (define (rmt:get-runs-cnt-by-patt runpatt targetpatt keys) (rmt:send-receive 'get-runs-cnt-by-patt #f (list runpatt targetpatt keys))) ;; Use the special run-id == #f scenario here since there is no run yet (define (rmt:register-run keyvals runname state status user contour) | > | > > > > | 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 | (rmt:send-receive 'get-num-runs #f (list runpatt))) (define (rmt:get-runs-cnt-by-patt runpatt targetpatt keys) (rmt:send-receive 'get-runs-cnt-by-patt #f (list runpatt targetpatt keys))) ;; Use the special run-id == #f scenario here since there is no run yet (define (rmt:register-run keyvals runname state status user contour) ;; first register in main.db (thus the #f) (let* ((run-id (rmt:send-receive 'register-run #f (list keyvals runname state status user contour)))) ;; now register in the run db itself (rmt:send-receive 'register-run run-id (list keyvals runname state status user contour)) run-id)) (define (rmt:get-run-name-from-id run-id) (rmt:send-receive 'get-run-name-from-id run-id (list run-id))) (define (rmt:delete-run run-id) (rmt:send-receive 'delete-run run-id (list run-id))) |
︙ | ︙ | |||
1543 1544 1545 1546 1547 1548 1549 | ;; (exit 1)))) (define (common:run-sync?) ;; (and (common:on-homehost?) (args:get-arg "-server")) | < < | | 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 | ;; (exit 1)))) (define (common:run-sync?) ;; (and (common:on-homehost?) (args:get-arg "-server")) ;; this one seems to be the general entry point ;; #;(define (server:start-and-wait areapath #!key (timeout 60)) (let ((give-up-time (+ (current-seconds) timeout))) (let loop ((server-info (server:check-if-running areapath)) (try-num 0)) (if (or server-info (> (current-seconds) give-up-time)) ;; server-url will be #f if no server available. (server:record->url server-info) (let ((num-ok (length (server:get-best (server:get-list areapath))))) |
︙ | ︙ | |||
1584 1585 1586 1587 1588 1589 1590 | (if res server-url #f))) ;; no longer care if multiple servers are started by accident. older ;; servers will drop off in time. ;; | > > | > > | | 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 | (if res server-url #f))) ;; no longer care if multiple servers are started by accident. older ;; servers will drop off in time. ;; ;; defunct ;; #;(define (server:check-if-running areapath) ;; #!key (numservers "2")) (let* ((ns (server:get-num-servers)) (servers (server:get-best (server:get-list areapath)))) (if (or (and servers (null? servers)) (not servers) (and (list? servers) (< (length servers) (pseudo-random-integer ns)))) ;; somewhere between 0 and numservers #f (let loop ((hed (car servers)) (tal (cdr servers))) (let ((res (server:check-server hed))) (if res hed (if (null? tal) #f (loop (car tal)(cdr tal))))))))) ;; kind start up of servers, wait 40 seconds before allowing another server for a given ;; run-id to be launched ;; ;; defunct ;; #;(define (server:kind-run areapath) ;; look for $MT_RUN_AREA_HOME/logs/server-start-last ;; and wait for it to be at least 3 seconds old ;; (server:wait-for-server-start-last-flag areapath) (if (not (server:check-if-running areapath)) ;; why try if there is already a server running? (let* ((last-run-dat (hash-table-ref/default *server-kind-run* areapath '(0 0))) ;; callnum, whenrun (call-num (car last-run-dat)) (when-run (cadr last-run-dat)) |
︙ | ︙ | |||
1638 1639 1640 1641 1642 1643 1644 | (http-transport:client-connect iface port) #;(case (server:get-transport) ((rpc) (rpc:client-connect iface port)) ((http) (http:client-connect iface port)) ((zmq) (zmq:client-connect iface port)) (else (rpc:client-connect iface port)))) | > > > | | 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 | (http-transport:client-connect iface port) #;(case (server:get-transport) ((rpc) (rpc:client-connect iface port)) ((http) (http:client-connect iface port)) ((zmq) (zmq:client-connect iface port)) (else (rpc:client-connect iface port)))) ;; ;; defunct ;; #;(define (client:setup areapath #!key (remaining-tries 100) (failed-connects 0)) (print "got here") ;; (client:setup-http areapath remaining-tries: remaining-tries failed-connects: failed-connects) #;(case (server:get-transport) ((rpc) (rpc-transport:client-setup remaining-tries: remaining-tries failed-connects: failed-connects)) ;;(client:setup-rpc run-id)) ((http)(client:setup-http areapath remaining-tries: remaining-tries failed-connects: failed-connects)) (else (rpc-transport:client-setup remaining-tries: remaining-tries failed-connects: failed-connects)))) ;; (client:setup-rpc run-id)))) |
︙ | ︙ |
Modified tests/unittests/basicserver.scm from [983ffc6ad7] to [4ef17a5055].
︙ | ︙ | |||
53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 | ;; with-input-from-request ;; rmt:get-connection ;; with-input-from-request ) (define *db* (db:setup #f)) (test #f 'server-started (api:execute-requests *db* 'get-server (list *toppath* ".db/1.db"))) (test #f 'server-started (rmt:general-open-connection *rmt:remote* *toppath* ".db/1.db")) (test #f '("SYSTEM" "RELEASE") (rmt:get-keys)) ;; (delete-file* "logs/1.log") ;; (define run-id 1) ;; (test "setup for run" #t (begin (launch:setup) ;; (string? (getenv "MT_RUN_AREA_HOME")))) ;; | > > > | 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 | ;; with-input-from-request ;; rmt:get-connection ;; with-input-from-request ) (define *db* (db:setup #f)) (test #f 'server-started (api:execute-requests *db* 'get-server (list *toppath* ".db/1.db"))) (set! *dbstruct-db* #f) (test #f 'server-started (rmt:general-open-connection *rmt:remote* *toppath* ".db/1.db")) (test #f '("SYSTEM" "RELEASE") (rmt:get-keys)) (test #f 1 (rmt:register-run '(("SYSTEM" "a")("RELEASE" "b")) "run1" "new" "n/a" "justme" #f)) ;; (delete-file* "logs/1.log") ;; (define run-id 1) ;; (test "setup for run" #t (begin (launch:setup) ;; (string? (getenv "MT_RUN_AREA_HOME")))) ;; |
︙ | ︙ |