Index: common.scm ================================================================== --- common.scm +++ common.scm @@ -584,19 +584,48 @@ (define *wdnum* 0) (define *wdnum*mutex (make-mutex)) ;; currently the primary job of the watchdog is to run the sync back to megatest.db from the db in /tmp ;; if we are on the homehost and we are a server (by definition we are on the homehost if we are a server) ;; -(define (common:watchdog) + + +(define (common:readonly-watchdog dbstruct) + (thread-sleep! 0.05) ;; delay for startup + + ;; sync megatest.db to /tmp/.../megatst.db + (let ((sync-cool-off-duration 3) + (golden-mtdb (dbr:dbstruct-mtdb dbstruct)) + (golden-mtpath (db:dbdat-get-path mtdb)) + (tmp-mtdb (dbr:dbstruct-tmpdb dbstruct)) + (tmp-mtpath (db:dbdat-get-path mtdb))) + (debug:print-info 0 *default-log-port* "Read-only periodic sync thread started.") + (let loop ((last-sync-time 0)) + (let* ((duration-since-last-sync (- (current-seconds) last-sync-time))) + (if (and (not *time-to-exit*) + (< duration-since-last-sync sync-cool-off-duration)) + (thread-sleep! (- sync-cool-off-duration duration-since-last-sync))) + (if (not *time-to-exit*) + (let ((golden-mtdb-mtime (file-modification-time golden-mtpath)) + (tmp-mtdb-mtime (file-modification-time tmp-mtpath))) + (if (> golden-mtdb-mtime tmp-mtdb-mtime) + (let ((res (db:multi-db-sync dbstruct 'old2new))) + (debug:print-info 0 *default-log-port* "rosync called, " res " records transferred.")) + (loop (current-seconds))) + #t)))) + (debug:print-info 0 *default-log-port* "Exiting readonly-watchdog timer, *time-to-exit* = " *time-to-exit*" pid="(current-process-id)" mtpath="golden-mtpath))) + + + +(define (common:writable-watchdog dbstruct) (thread-sleep! 0.05) ;; delay for startup (let ((legacy-sync (common:run-sync?)) (debug-mode (debug:debug-mode 1)) (last-time (current-seconds)) (this-wd-num (begin (mutex-lock! *wdnum*mutex) (let ((x *wdnum*)) (set! *wdnum* (add1 *wdnum*)) (mutex-unlock! *wdnum*mutex) x)))) (debug:print-info 3 *default-log-port* "watchdog starting. legacy-sync is " legacy-sync" pid="(current-process-id)" this-wd-num="this-wd-num) (if (and legacy-sync (not *time-to-exit*)) - (let* ((dbstruct (db:setup)) + (let* (;;(dbstruct (db:setup)) (mtdb (dbr:dbstruct-mtdb dbstruct)) (mtpath (db:dbdat-get-path mtdb))) (debug:print-info 0 *default-log-port* "Server running, periodic sync started.") (let loop () ;; sync for filesystem local db writes @@ -649,10 +678,18 @@ (thread-sleep! 1) (delay-loop (+ count 1)))) (if (not *time-to-exit*) (loop)))) (if (common:low-noise-print 30) (debug:print-info 0 *default-log-port* "Exiting watchdog timer, *time-to-exit* = " *time-to-exit*" pid="(current-process-id)" this-wd-num="this-wd-num))))))) + +;; TODO: for multiple areas, we will have multiple watchdogs; and multiple threads to manage +(define (common:watchdog) + (let ((dbstruct (db:setup))) + (if (dbstruct-readonly dbstruct) + (common:readonly-watchdog dbstruct) + (common:writable-watchdog dbstruct)))) + (define (std-exit-procedure) (on-exit (lambda () 0)) ;;(BB> "std-exit-procedure called; *time-to-exit*="*time-to-exit*) (let ((no-hurry (if *time-to-exit* ;; hurry up Index: db.scm ================================================================== --- db.scm +++ db.scm @@ -42,16 +42,17 @@ ;; each db entry is a pair ( db . dbfilepath ) ;; I propose this record evolves into the area record ;; (defstruct dbr:dbstruct - ;; (tmpdb #f) + (tmpdb #f) (dbstack #f) ;; stack for tmp db handles, do not initialize with a stack (mtdb #f) (refndb #f) (homehost #f) ;; not used yet (on-homehost #f) ;; not used yet + (read-only #f) ) ;; goal is to converge on one struct for an area but for now it is too confusing ;; record for keeping state,status and count for doing roll-ups in ;; iterated tests @@ -269,11 +270,11 @@ ;; (db:sync-tables db:sync-tests-only *megatest-db* db) ;; db)) ;; This routine creates the db if not already present. It is only called if the db is not already opened ;; -(define (db:open-db dbstruct #!key (areapath #f)) +(define (db:open-db dbstruct #!key (areapath #f)) ;; TODO: actually use areapath (let ((tmpdb-stack (dbr:dbstruct-dbstack dbstruct))) ;; RA => Returns the first reference in dbstruct (if (stack? tmpdb-stack) (db:get-db tmpdb-stack) ;; get previously opened db (will create new db handle if all in the stack are already used (let* ((dbpath (db:dbfile-path)) ;; 0)) (dbexists (file-exists? dbpath)) @@ -281,13 +282,15 @@ (tmpdb (db:open-megatest-db path: dbpath)) ;; lock-create-open dbpath db:initialize-main-db)) (mtdb (db:open-megatest-db)) (refndb (db:open-megatest-db path: dbpath name: "megatest_ref.db")) (write-access (file-write-access? dbpath))) (if (and dbexists (not write-access)) - (set! *db-write-access* #f)) + (begin (set! *db-write-access* #f) + (dbr:dbstruct-readonly-set! dbstruct #t))) (dbr:dbstruct-mtdb-set! dbstruct mtdb) - (dbr:dbstruct-dbstack-set! dbstruct (make-stack)) + (dbr:dbstruct-tmpdb-set! dbstruct tmpdb) + (dbr:dbstruct-dbstack-set! dbstruct (make-stack)) ;; BB: why a stack? Why would the number of db's be indeterminate? Is this a legacy of 1.db 2.db .. ? (stack-push! (dbr:dbstruct-dbstack dbstruct) tmpdb) ;; olddb is already a (cons db path) (dbr:dbstruct-refndb-set! dbstruct refndb) ;; (mutex-unlock! *rundb-mutex*) (if (and (not dbfexists) write-access) ;; *db-write-access*) ;; did not have a prior db and do have write access @@ -301,19 +304,20 @@ ;; Make the dbstruct, setup up auxillary db's and call for main db at least once ;; ;; called in http-transport and replicated in rmt.scm for *local* access. ;; (define (db:setup #!key (areapath #f)) - (or *dbstruct-db* - (if (common:on-homehost?) - (let* ((dbstruct (make-dbr:dbstruct))) - (db:open-db dbstruct areapath: areapath) - (set! *dbstruct-db* dbstruct) - dbstruct) - (begin - (debug:print 0 *default-log-port* "ERROR: attempt to open database when not on homehost. Exiting. Homehost: " (common:get-homehost)) - (exit 1))))) + (cond + (*dbstruct-db* *dbstruct-db*);; TODO: when multiple areas are supported, this optimization will be a hazard + (else ;;(common:on-homehost?) + (let* ((dbstruct (make-dbr:dbstruct))) + (db:open-db dbstruct areapath: areapath) + (set! *dbstruct-db* dbstruct) + dbstruct)))) + ;; (else + ;; (debug:print 0 *default-log-port* "ERROR: attempt to open database when not on homehost. Exiting. Homehost: " (common:get-homehost)) + ;; (exit 1)))) ;; Open the classic megatest.db file (defaults to open in toppath) ;; ;; NOTE: returns a dbdat not a dbstruct! ;; @@ -561,10 +565,25 @@ ((not todb) (debug:print 3 *default-log-port* "WARNING: db:sync-tables called with todb missing") -2) ((not (sqlite3:database? (db:dbdat-get-db fromdb))) (debug:print-error 0 *default-log-port* "db:sync-tables called with fromdb not a database " fromdb) -3) ((not (sqlite3:database? (db:dbdat-get-db todb))) (debug:print-error 0 *default-log-port* "db:sync-tables called with todb not a database " todb) -4) + + ((not (file-write-access? (cadr todb))) + (debug:print-error 0 *default-log-port* "db:sync-tables called with todb not a read-only database " todb) -5) + ((not (null? (let ((readonly-slave-dbs + (filter + (lambda (dbdat) + (not (file-write-access? (cadr todb)))) + + slave-dbs))) + (for-each + (lambda (bad-dbdat) + (debug:print-error + 0 *default-log-port* "db:sync-tables called with todb not a read-only database " bad-dbdat)) + readonly-slave-dbs) + readonly-slave-dbs))) -6) (else (let ((stmts (make-hash-table)) ;; table-field => stmt (all-stmts '()) ;; ( ( stmt1 value1 ) ( stml2 value2 )) (numrecs (make-hash-table)) (start-time (current-milliseconds)) @@ -811,15 +830,14 @@ ;; options: ;; ;; 'killservers - kills all servers ;; 'dejunk - removes junk records ;; 'adj-testids - move test-ids into correct ranges -;; 'old2new - sync megatest.db records to .db/{main,1,2 ...}.db -;; 'new2old - sync .db/{main,1,2,3 ...}.db to megatest.db +;; 'old2new - sync megatest.db to /tmp/.../megatest.db and /tmp/.../megatest_ref.db +;; 'new2old - sync /tmp/.../megatest.db to megatest.db and /tmp/.../megatest_ref.db (and update data_synced) ;; 'closeall - close all opened dbs ;; 'schema - attempt to apply schema changes -;; ;; run-ids: '(1 2 3 ...) or #f (for all) ;; (define (db:multi-db-sync dbstruct . options) (if (not (launch:setup)) (debug:print 0 *default-log-port* "ERROR: not able to setup up for megatest.") @@ -858,11 +876,13 @@ ;; sync runs, test_meta etc. ;; (if (member 'old2new options) ;; (begin - (db:sync-tables (db:sync-all-tables-list dbstruct) #f mtdb tmpdb refndb)) + (set! data-synced + (+ (db:sync-tables (db:sync-all-tables-list dbstruct) #f mtdb tmpdb refndb) + data-synced))) ;; (db:sync-main-list mtdb) mtdb (db:get-db dbstruct #f)) ;; (for-each ;; (lambda (run-id) ;; (db:delay-if-busy mtdb) ;; (let ((testrecs (db:get-all-tests-info-by-run-id mtdb run-id))) @@ -877,10 +897,11 @@ ;; (if (member 'new2old options) (set! data-synced (+ (db:sync-tables (db:sync-all-tables-list dbstruct) #f tmpdb refndb mtdb) data-synced))) + (if (member 'fixschema options) (begin (db:patch-schema-maindb (db:dbdat-get-db mtdb)) ADDED docs/api_access_methods_evolution.ods Index: docs/api_access_methods_evolution.ods ================================================================== --- /dev/null +++ docs/api_access_methods_evolution.ods cannot compute difference between binary files Index: megatest.scm ================================================================== --- megatest.scm +++ megatest.scm @@ -364,10 +364,12 @@ (let ((targ (or (args:get-arg "-reqtarg")(args:get-arg "-target")))) (if targ (setenv "MT_TARGET" targ))) ;; The watchdog is to keep an eye on things like db sync etc. ;; + +;; TODO: for multiple areas, we will have multiple watchdogs; and multiple threads to manage (define *watchdog* (make-thread common:watchdog "Watchdog thread")) (if (not (args:get-arg "-server")) (thread-start! *watchdog*)) ;; if starting a server; wait till we get to running state before kicking off watchdog @@ -2019,10 +2021,11 @@ (debug:print 0 *default-log-port* help)) ;;(BB> "thread-join! watchdog") ;; join the watchdog thread if it has been thread-start!ed (it may not have been started in the case of a server that never enters running state) ;; (symbols returned by thread-state: created ready running blocked suspended sleeping terminated dead) +;; TODO: for multiple areas, we will have multiple watchdogs; and multiple threads to manage (if (thread? *watchdog*) (case (thread-state *watchdog*) ((ready running blocked sleeping terminated dead) (thread-join! *watchdog*)))) Index: rmt.scm ================================================================== --- rmt.scm +++ rmt.scm @@ -55,16 +55,35 @@ ;; 1. check if server is started IFF cmd is a write OR if we are not on the homehost, store in runremote ;; 2. check the age of the connections. refresh the connection if it is older than timeout-20 seconds. ;; 3. do the query, if on homehost use local access ;; (let* ((start-time (current-seconds)) ;; snapshot time so all use cases get same value + (areapath *toppath*);; TODO - resolve from dbstruct to be compatible with multiple areas + (readonly-mode (file-write-access? (conc *toppath* "/megatest.db") )) ;; TODO: use dbstruct or runremote to figure this out in future (runremote (or area-dat *runremote*))) (cond ;; give up if more than 15 attempts ((> attemptnum 15) (debug:print 0 *default-log-port* "ERROR: 15 tries to start/connect to server. Giving up.") (exit 1)) + + ;; readonly mode, read request- handle it - case 20 + ((and readonly-mode + (member cmd api:read-only-queries)) + (mutex-unlock! *rmt-mutex*) + (debug:print-info 12 *default-log-port* "rmt:send-receive, case 20") + (rmt:open-qry-close-locally cmd 0 params) + ) + + ;; readonly mode, write request. Do nothing, return #f + (readonly-mode + (mutex-unlock! *rmt-mutex*) + (debug:print-info 12 *default-log-port* "rmt:send-receive, case 21") + (debug:print 0 *default-log-port* "WARNING: write transaction requested on a readonly area. cmd="cmd" params="params) + #f + ) + ;; reset the connection if it has been unused too long ((and runremote (remote-conndat runremote) (let ((expire-time (+ (- start-time (remote-server-timeout runremote))(random 30)))) ;; add 30 seconds of noise so that not all running tests expire at the same time causing a storm of server starts (< (http-transport:server-dat-get-last-access (remote-conndat runremote)) expire-time)))