Index: Makefile ================================================================== --- Makefile +++ Makefile @@ -28,11 +28,15 @@ ezsteps.scm lock-queue.scm sdb.scm rmt.scm api.scm \ subrun.scm portlogger.scm archive.scm env.scm \ diff-report.scm cgisetup/models/pgdb.scm # module source files -MSRCFILES = dbmod.scm +MSRCFILES = dbmod.scm dbfile.scm debugprint.scm mtargs.scm + +mofiles/dbfile.o : mofiles/debugprint.o +mofiles/debugprint.o : mofiles/mtargs.o + # ftail.scm rmtmod.scm commonmod.scm removed # MSRCFILES = ducttape-lib.scm pkts.scm stml2.scm cookie.scm mutils.scm \ # mtargs.scm commonmod.scm dbmod.scm adjutant.scm ulex.scm \ # rmtmod.scm apimod.scm @@ -162,19 +166,19 @@ monitor.o dashboard.o archive.o megatest.o : db_records.scm megatest-fossil-hash.scm tests.o runs.o dashboard.o dashboard-tests.o dashboard-context-menu.o dashboard-main.o : run_records.scm db.o ezsteps.o keys.o launch.o megatest.o monitor.o runs-for-ref.o runs.o tests.o : key_records.scm -db.o api.o : mofiles/dbmod.o +db.o api.o : mofiles/dbmod.o mofiles/dbfile.o mofiles/debugprint.o tests.o tasks.o dashboard-tasks.o : task_records.scm runs.o : test_records.scm megatest.o : megatest-fossil-hash.scm megatest-version.scm -rmt.scm client.scm common.scm configf.scm dashboard-guimonitor.scm dashboard-tests.scm dashboard.scm db.scm dcommon.scm ezsteps.scm fs-transport.scm http-transport.scm index-tree.scm items.scm keys.scm launch.scm megatest.scm monitor.scm mt.scm newdashboard.scm runconfig.scm runs.scm server.scm tdb.scm tests.scm tree.scm : common_records.scm megatest-version.scm +rmt.scm client.scm common.scm configf.scm dashboard-guimonitor.scm dashboard-tests.scm dashboard.scm db.scm dcommon.scm ezsteps.scm http-transport.scm index-tree.scm items.scm keys.scm launch.scm megatest.scm monitor.scm mt.scm newdashboard.scm runconfig.scm runs.scm server.scm tdb.scm tests.scm tree.scm : common_records.scm megatest-version.scm common_records.scm : altdb.scm # mofiles/stml2.o : mofiles/cookie.o # configf.o : mofiles/commonmod.o Index: api.scm ================================================================== --- api.scm +++ api.scm @@ -24,13 +24,15 @@ (declare (unit api)) (declare (uses rmt)) (declare (uses db)) (declare (uses dbmod)) +(declare (uses dbfile)) (declare (uses tasks)) (import dbmod) +(import dbfile) ;; allow these queries through without starting a server ;; (define api:read-only-queries '(get-key-val-pairs Index: archive.scm ================================================================== --- archive.scm +++ archive.scm @@ -397,11 +397,11 @@ (bup-restore-params (list "-d" archive-path "restore" "-C" *toppath* archive-internal-path))) (debug:print-info 2 *default-log-port* "Restoring archived data to " *toppath* " from archive in " archive-path " ... " archive-internal-path) (run-n-wait bup-exe params: bup-restore-params print-cmd: "Running:")) (sleep 2) (db:multi-db-sync - (db:setup #f) + (db:setup #t) ;; (db:setup-db *dbstruct-dbs* *toppath* #f) 'killservers ;'dejunk ;'adj-testids 'old2new ) Index: common.scm ================================================================== --- common.scm +++ common.scm @@ -135,11 +135,11 @@ (define *default-log-port* (current-error-port)) (define *time-zero* (current-seconds)) ;; for the watchdog (define *default-area-tag* "local") ;; DATABASE -(define *dbstruct-db* #f) ;; used to cache the dbstruct in db:setup. Goal is to remove this. +(define *dbstruct-dbs* #f) ;; used to cache the dbstruct in db:setup. Goal is to remove this. ;; db stats (define *db-stats* (make-hash-table)) ;; hash of vectors < count duration-total > (define *db-stats-mutex* (make-mutex)) ;; db access (define *db-last-access* (current-seconds)) ;; last db access, used in server @@ -591,13 +591,13 @@ ;; (define (common:exit-on-version-changed) (if (common:on-homehost?) (if (common:api-changed?) (let* ((mtconf (conc (get-environment-variable "MT_RUN_AREA_HOME") "/megatest.config")) - (dbfile (conc (get-environment-variable "MT_RUN_AREA_HOME") "/megatest.db")) - (read-only (not (file-write-access? dbfile))) - (dbstruct (db:setup #t))) + (dbfile (conc (get-environment-variable "MT_RUN_AREA_HOME") "/megatest.db")) + (read-only (not (file-write-access? dbfile))) + (dbstruct (db:setup #t))) ;; (db:setup-db *dbstruct-dbs* *toppath* #f))) ;; #t))) (debug:print 0 *default-log-port* "WARNING: Version mismatch!\n" " expected: " (common:version-signature) "\n" " got: " (common:get-last-run-version)) (cond @@ -979,10 +979,18 @@ "/megatest_localdb/" tsname (string-translate *toppath* "/" ".")) )))) (set! *db-cache-path* dbpath) + ;; ensure megatest area has .db + (let ((dbarea (conc *toppath* "/.db"))) + (if (not (file-exists? dbarea)) + (create-directory dbarea))) + ;; ensure tmp area has .db + (let ((dbarea (conc dbpath "/.db"))) + (if (not (file-exists? dbarea)) + (create-directory dbarea))) dbpath)) #f))) (define (common:get-area-path-signature) (message-digest-string (md5-primitive) *toppath*)) @@ -999,118 +1007,10 @@ (args:get-arg "-server"))) (define (common:human-time) (time->string (seconds->local-time (current-seconds)) "%Y-%m-%d %H:%M:%S")) -;;====================================================================== -;; currently the primary job of the watchdog is to run the sync back to megatest.db from the db in /tmp -;; if we are on the homehost and we are a server (by definition we are on the homehost if we are a server) -;; -(define (common:readonly-watchdog dbstruct) - (thread-sleep! 0.05) ;; delay for startup - (debug:print-info 13 *default-log-port* "common:readonly-watchdog entered.") - ;; sync megatest.db to /tmp/.../megatst.db - (let* ((sync-cool-off-duration 3) - (golden-mtdb (dbr:dbstruct-mtdb dbstruct)) - (golden-mtpath (db:dbdat-get-path golden-mtdb)) - (tmp-mtdb (dbr:dbstruct-tmpdb dbstruct)) - (tmp-mtpath (db:dbdat-get-path tmp-mtdb))) - (debug:print-info 0 *default-log-port* "Read-only periodic sync thread started.") - (let loop ((last-sync-time 0)) - (debug:print-info 13 *default-log-port* "loop top tmp-mtpath="tmp-mtpath" golden-mtpath="golden-mtpath) - (let* ((duration-since-last-sync (- (current-seconds) last-sync-time))) - (debug:print-info 13 *default-log-port* "duration-since-last-sync="duration-since-last-sync) - (if (and (not *time-to-exit*) - (< duration-since-last-sync sync-cool-off-duration)) - (thread-sleep! (- sync-cool-off-duration duration-since-last-sync))) - (if (not *time-to-exit*) - (let ((golden-mtdb-mtime (file-modification-time golden-mtpath)) - (tmp-mtdb-mtime (file-modification-time tmp-mtpath))) - (if (> golden-mtdb-mtime tmp-mtdb-mtime) - (if (< golden-mtdb-mtime (- (current-seconds) 3)) ;; file has NOT been touched in past three seconds, this way multiple servers won't fight to sync back - (let ((res (db:multi-db-sync dbstruct 'old2new))) - (debug:print-info 13 *default-log-port* "rosync called, " res " records transferred.")))) - (loop (current-seconds))) - #t))) - (debug:print-info 0 *default-log-port* "Exiting readonly-watchdog timer, *time-to-exit* = " *time-to-exit*" pid="(current-process-id)" mtpath="golden-mtpath))) - -;;====================================================================== -;; TODO: for multiple areas, we will have multiple watchdogs; and multiple threads to manage -(define (common:watchdog) - (debug:print-info 13 *default-log-port* "common:watchdog entered.") - (if (launch:setup) - (if (common:on-homehost?) - (let ((dbstruct (db:setup #t))) - (debug:print-info 13 *default-log-port* "after db:setup with dbstruct=" dbstruct) - (cond - ((dbr:dbstruct-read-only dbstruct) - (debug:print-info 13 *default-log-port* "loading read-only watchdog") - (common:readonly-watchdog dbstruct)) - (else - (debug:print-info 13 *default-log-port* "loading writable-watchdog.") - (let* ((syncer (or (configf:lookup *configdat* "server" "sync-method") "brute-force-sync"))) - (cond - ((equal? syncer "brute-force-sync") - (server:writable-watchdog-bruteforce dbstruct)) - ((equal? syncer "delta-sync") - (server:writable-watchdog-deltasync dbstruct)) - (else - (debug:print-error 0 *default-log-port* "Unknown server/sync-method specified ("syncer") - valid values are brute-force-sync and delta-sync.") - (exit 1))) - ;;(debug:print 1 *default-log-port* "INFO: ["(common:human-time)"] Syncer started (method="syncer")") - ))) - (debug:print-info 13 *default-log-port* "watchdog done.")) - (debug:print-info 13 *default-log-port* "no need for watchdog on non-homehost")))) - - -(define (std-exit-procedure) - ;;(common:telemetry-log-close) - (on-exit (lambda () 0)) - ;;(debug:print-info 13 *default-log-port* "std-exit-procedure called; *time-to-exit*="*time-to-exit*) - (let ((no-hurry (if *time-to-exit* ;; hurry up - #f - (begin - (set! *time-to-exit* #t) - #t)))) - (debug:print-info 4 *default-log-port* "starting exit process, finalizing databases.") - (if (and no-hurry (debug:debug-mode 18)) - (rmt:print-db-stats)) - (let ((th1 (make-thread (lambda () ;; thread for cleaning up, give it five seconds - (if *dbstruct-db* (db:close-all *dbstruct-db*)) ;; one second allocated - (if *task-db* - (let ((db (cdr *task-db*))) - (if (sqlite3:database? db) - (begin - (sqlite3:interrupt! db) - (sqlite3:finalize! db #t) - ;; (vector-set! *task-db* 0 #f) - (set! *task-db* #f))))) - (http-client#close-all-connections!) - ;; (if (and *runremote* - ;; (remote-conndat *runremote*)) - ;; (begin - ;; (http-client#close-all-connections!))) ;; for http-client - (if (not (eq? *default-log-port* (current-error-port))) - (close-output-port *default-log-port*)) - (set! *default-log-port* (current-error-port))) "Cleanup db exit thread")) - (th2 (make-thread (lambda () - (debug:print 4 *default-log-port* "Attempting clean exit. Please be patient and wait a few seconds...") - (if no-hurry - (begin - (thread-sleep! 5)) ;; give the clean up few seconds to do it's stuff - (begin - (thread-sleep! 2))) - (debug:print 4 *default-log-port* " ... done") - ) - "clean exit"))) - (thread-start! th1) - (thread-start! th2) - (thread-join! th1) - ) - ) - - 0) (define (std-signal-handler signum) ;; (signal-mask! signum) (set! *time-to-exit* #t) ;;(debug:print-info 13 *default-log-port* "got signal "signum) ADDED configfmod.scm Index: configfmod.scm ================================================================== --- /dev/null +++ configfmod.scm @@ -0,0 +1,75 @@ +;;====================================================================== +;; Copyright 2017, Matthew Welland. +;; +;; This file is part of Megatest. +;; +;; Megatest is free software: you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation, either version 3 of the License, or +;; (at your option) any later version. +;; +;; Megatest is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with Megatest. If not, see . + +;;====================================================================== + +(declare (unit configfmod)) +;; (declare (uses mtargs)) +;; (declare (uses debugprint)) +;; (declare (uses keysmod)) + +(module configfmod +* + +(import srfi-1 + +;; scheme +;; +;; big-chicken ;; more of a reminder than anything ... +;; chicken.base +;; chicken.condition +;; chicken.file +;; chicken.io +;; chicken.pathname +;; chicken.port +;; chicken.pretty-print +;; chicken.process +;; chicken.process-context +;; chicken.process-context.posix +;; chicken.sort +;; chicken.string +;; chicken.time +;; chicken.eval +;; +;; debugprint +;; (prefix mtargs args:) +;; pkts +;; keysmod +;; +;; (prefix base64 base64:) +;; (prefix dbi dbi:) +;; (prefix sqlite3 sqlite3:) +;; (srfi 18) +;; directory-utils +;; format +;; matchable +;; md5 +;; message-digest +;; regex +;; regex-case +;; sparse-vectors +;; srfi-1 +;; srfi-13 +;; srfi-69 +;; stack +;; typed-records +;; z3 + + ) +) + Index: db.scm ================================================================== --- db.scm +++ db.scm @@ -22,17 +22,36 @@ ;; Database access ;;====================================================================== ;; dbstruct vector containing all the relevant dbs like main.db, megatest.db, run.db etc -(use (srfi 18) extras tcp stack) -(use sqlite3 srfi-1 posix regex regex-case srfi-69 csv-xml s11n md5 message-digest base64 format dot-locking z3 typed-records matchable) -(import (prefix sqlite3 sqlite3:)) -(import (prefix base64 base64:)) +(use (srfi 18) + extras + tcp + stack + (prefix sqlite3 sqlite3:) + srfi-1 + posix + regex + regex-case + srfi-69 + csv-xml + s11n + md5 + message-digest + (prefix base64 base64:) + format + dot-locking + z3 + typed-records + matchable) (declare (unit db)) (declare (uses common)) +(declare (uses dbmod)) +;; (declare (uses debugprint)) +(declare (uses dbfile)) (declare (uses keys)) (declare (uses ods)) (declare (uses client)) (declare (uses mt)) @@ -42,44 +61,21 @@ (include "run_records.scm") (define *number-of-writes* 0) (define *number-non-write-queries* 0) -;;====================================================================== -;; R E C O R D S -;;====================================================================== - -;; each db entry is a pair ( db . dbfilepath ) -;; I propose this record evolves into the area record -;; -(defstruct dbr:dbstruct - (tmpdb #f) - (dbstack #f) ;; stack for tmp db handles, do not initialize with a stack - (mtdb #f) - (refndb #f) - (homehost #f) ;; not used yet - (on-homehost #f) ;; not used yet - (read-only #f) - (stmt-cache (make-hash-table)) - ) ;; goal is to converge on one struct for an area but for now it is too confusing - +(import dbmod) +(import dbfile) ;; record for keeping state,status and count for doing roll-ups in ;; iterated tests ;; (defstruct dbr:counts (state #f) (status #f) (count 0)) -;;====================================================================== -;; alist-of-alists -;;====================================================================== -;; -;; (define (db:aa-set! dat key1 key2 val) -;; (let loop (( - ;;====================================================================== ;; hash of hashs ;;====================================================================== @@ -127,37 +123,40 @@ (begin (debug:print-error 0 *default-log-port* " query " stmt " failed, params: " params ", error: " ((condition-property-accessor 'exn 'message) exn) ", exn=" exn) (print-call-chain (current-error-port)) default))) (apply sqlite3:first-result db stmt params))) + +(define (db:generic-error-printout exn . message) + (print-call-chain (current-error-port)) + (apply debug:print-error 0 *default-log-port* message) + (debug:print-error 0 *default-log-port* ;; " params: " params + ", error: " ((condition-property-accessor 'exn 'message) exn) + ", arguments: " ((condition-property-accessor 'exn 'arguments) exn) + ", location: " ((condition-property-accessor 'exn 'location) exn) + )) ;; Get/open a database ;; if run-id => get run specific db ;; if #f => get main db +;; if run-id is a string treat it as a filename ;; if db already open - return inmem ;; if db not open, open inmem, rundb and sync then return inmem ;; inuse gets set automatically for rundb's ;; -(define (db:get-db dbstruct) ;; run-id) - (if (stack? (dbr:dbstruct-dbstack dbstruct)) - (if (stack-empty? (dbr:dbstruct-dbstack dbstruct)) - (let ((newdb (db:open-megatest-db path: (db:dbfile-path)))) - ;; (stack-push! (dbr:dbstruct-dbstack dbstruct) newdb) - newdb) - (stack-pop! (dbr:dbstruct-dbstack dbstruct))) - (db:open-db dbstruct))) - -;; ;; legacy handling of structure for managing db's. Refactor this into dbr:? -(define (db:dbdat-get-db dbdat) - (if (pair? dbdat) - (car dbdat) - dbdat)) - -(define (db:dbdat-get-path dbdat) - (if (pair? dbdat) - (cdr dbdat) - #f)) +(define (db:get-db dbstruct run-id) ;; RENAME TO db:get-dbh + (let* ((subdb (dbfile:get-subdb dbstruct run-id))) + (if (stack? (dbr:subdb-dbstack subdb)) + (if (stack-empty? (dbr:subdb-dbstack subdb)) + (let* ((dbname (db:run-id->dbname run-id)) + (newdb (db:open-megatest-db path: (db:dbfile-path) + name: dbname))) + ;; NOTE: pushing on the stack only happens AFTER the handle has been used + ;; (stack-push! (dbr:dbstruct-dbstack dbstruct) newdb) + newdb) + (stack-pop! (dbr:subdb-dbstack subdb))) + (db:open-db subdb run-id)))) (define-inline (db:generic-error-printout exn . message) (print-call-chain (current-error-port)) (apply debug:print-error 0 *default-log-port* message) (debug:print-error 0 *default-log-port* " params: " params @@ -169,17 +168,22 @@ ;; (db:with-db dbstruct run-id sqlite3:exec "select blah fgrom blaz;") ;; r/w is a flag to indicate if the db is modified by this query #t = yes, #f = no ;; (define (db:with-db dbstruct run-id r/w proc . params) (let* ((have-struct (dbr:dbstruct? dbstruct)) - (dbdat (if have-struct - (db:get-db dbstruct) + (dbdat (if have-struct ;; this stuff just allows us to call with a db handle directly + (db:get-db dbstruct run-id) #f)) - (db (if have-struct - (db:dbdat-get-db dbdat) + (db (if have-struct ;; this stuff just allows us to call with a db handle directly + (dbr:dbdat-dbh dbdat) dbstruct)) - (fname (db:dbdat-get-path dbdat)) + (fname (if dbdat + (dbr:dbdat-dbfile dbdat) + "nofilenameavailable")) + (subdb (if have-struct + (dbfile:get-subdb dbstruct run-id) + #f)) (use-mutex (> *api-process-request-count* 25))) ;; was 25 (if (and use-mutex (common:low-noise-print 120 "over-50-parallel-api-requests")) (debug:print-info 0 *default-log-port* *api-process-request-count* " parallel api requests being processed in process " (current-process-id) ", throttling access")) (if (common:low-noise-print 600 (conc "parallel-api-requests" *max-api-process-requests*)) @@ -188,11 +192,11 @@ (begin (if use-mutex (mutex-lock! *db-with-db-mutex*)) (let ((res (apply proc db params))) (if use-mutex (mutex-unlock! *db-with-db-mutex*)) ;; (if (vector? dbstruct)(db:done-with dbstruct run-id r/w)) - (if dbdat (stack-push! (dbr:dbstruct-dbstack dbstruct) dbdat)) + (if dbdat (stack-push! (dbr:subdb-dbstack subdb) dbdat)) res)) (exn (io-error) (db:generic-error-printout exn "ERROR: i/o error with " fname ". Check permissions, disk space etc. and try again.")) (exn (corrupt) (db:generic-error-printout exn "ERROR: database " fname " is corrupt. Repair it to proceed.")) @@ -202,33 +206,10 @@ (exn (permission)(db:generic-error-printout exn "ERROR: database " fname " has some permissions problem.")) (exn () (db:generic-error-printout exn "ERROR: Unknown error with database " fname " message: " ((condition-property-accessor 'exn 'message) exn)))))) -;;====================================================================== -;; K E E P F I L E D B I N dbstruct -;;====================================================================== - -;; (define (db:get-filedb dbstruct run-id) -;; (let ((db (vector-ref dbstruct 2))) -;; (if db -;; db -;; (let ((fdb (filedb:open-db (conc *toplevel* "/db/files.db")))) -;; (vector-set! dbstruct 2 fdb) -;; fdb)))) -;; -;; ;; Can also be used to save arbitrary strings -;; ;; -;; (define (db:save-path dbstruct path) -;; (let ((fdb (db:get-filedb dbstruct)))b -;; (filedb:register-path fdb path))) -;; -;; ;; Use to get a path. To get an arbitrary string see next define -;; ;; -;; (define (db:get-path dbstruct id) -;; (let ((fdb (db:get-filedb dbstruct))) -;; (filedb:get-path db id))) ;; NB// #f => return dbdir only ;; (was planned to be; zeroth db with name=main.db) ;; ;; If run-id is #f return to create and retrieve the path where the db will live. @@ -312,182 +293,148 @@ (exn (busy) (debug:print 0 *default-log-port* "ERROR: database " fname " is locked. Try copying to another location, remove original and copy back.")) (exn (permission)(debug:print 0 *default-log-port* "ERROR: database " fname " has some permissions problem.")) (exn () (debug:print 0 *default-log-port* "ERROR: Unknown error with database " fname " message: " ((condition-property-accessor 'exn 'message) exn)))) ))) - ;; This routine creates the db if not already present. It is only called if the db is not already opened ;; -(define (db:open-db dbstruct #!key (areapath #f)(do-sync #t)) ;; TODO: actually use areapath - (let ((tmpdb-stack (dbr:dbstruct-dbstack dbstruct))) ;; RA => Returns the first reference in dbstruct +(define (db:open-db subdb run-id #!key (areapath #f)(do-sync #t)) ;; TODO: actually use areapath + (let* ((tmpdb-stack (dbr:subdb-dbstack subdb))) ;; RA => Returns the first reference in dbstruct (if (stack? tmpdb-stack) - (db:get-db tmpdb-stack) ;; get previously opened db (will create new db handle if all in the stack are already used + (db:get-db tmpdb-stack run-id) ;; get previously opened db (will create new db handle if all in the stack are already used (let* ((max-stale-tmp (configf:lookup-number *configdat* "server" "filling-db-max-stale-seconds" default: 10)) - (dbpath (db:dbfile-path )) ;; path to tmp db area + (dbpath (db:dbfile-path)) ;; path to tmp db area + (dbname (db:run-id->dbname run-id)) (dbexists (common:file-exists? dbpath)) - (tmpdbfname (conc dbpath "/megatest.db")) + (mtdbfname (conc *toppath* "/"dbname)) + (mtdbexists (common:file-exists? mtdbfname)) + (mtdbmodtime (if mtdbexists (common:lazy-sqlite-db-modification-time mtdbfname) #f)) + (mtdb (db:open-megatest-db mtdbfname)) + ;; the reference db for syncing + (refdbfname (conc dbpath "/"dbname"_ref")) + (refndb (db:open-megatest-db refdbfname)) + ;; (mtdbpath (dbr:dbdat-dbfile mtdb)) + ;; the tmpdb + (tmpdbfname (conc dbpath"/"dbname)) ;; /tmp//.db/[main|1,2...].db + (tmpdb (db:open-megatest-db tmpdbfname)) ;; lock-create-open dbpath db:initialize-main-db)) (dbfexists (common:file-exists? tmpdbfname)) ;; (conc dbpath "/megatest.db"))) - (mtdbexists (common:file-exists? (conc *toppath* "/megatest.db"))) - - (mtdbmodtime (if mtdbexists (common:lazy-sqlite-db-modification-time (conc *toppath* "/megatest.db")) #f)) - (tmpdbmodtime (if dbfexists (common:lazy-sqlite-db-modification-time tmpdbfname) #f)) - (mtdb (db:open-megatest-db)) - (mtdbpath (db:dbdat-get-path mtdb)) - (tmpdb (db:open-megatest-db path: dbpath)) ;; lock-create-open dbpath db:initialize-main-db)) - (refndb (db:open-megatest-db path: dbpath name: "megatest_ref.db")) - (write-access (file-write-access? mtdbpath)) + (tmpdbmodtime (if dbfexists (common:lazy-sqlite-db-modification-time tmpdbfname) #f)) + + (write-access (file-write-access? mtdbfname)) ;(mtdbmodtime (if mtdbexists (common:lazy-sqlite-db-modification-time mtdbpath) #f)) ; moving this before db:open-megatest-db is called. if wal mode is on -WAL and -shm file get created with causing the tmpdbmodtime timestamp always greater than mtdbmodtime ;(tmpdbmodtime (if dbfexists (common:lazy-sqlite-db-modification-time tmpdbfname) #f)) ;if wal mode is on -WAL and -shm file get created when db:open-megatest-db is called. modtimedelta will always be < 10 so db in tmp not get synced ;(tmpdbmodtime (if dbfexists (db:get-last-update-time (car tmpdb)) #f)) ;(fmt (file-modification-time tmpdbfname)) (modtimedelta (and mtdbmodtime tmpdbmodtime (- mtdbmodtime tmpdbmodtime)))) (when write-access - (sqlite3:execute (car mtdb) "drop trigger if exists update_tests_trigger") - (sqlite3:execute (car mtdb) "drop trigger if exists update_runs_trigger")) + (sqlite3:execute (dbr:dbdat-dbh mtdb) "drop trigger if exists update_tests_trigger") + (sqlite3:execute (dbr:dbdat-dbh mtdb) "drop trigger if exists update_runs_trigger")) - ;(print "mtdbmodtime " mtdbmodtime " tmpdbmodtime " tmpdbmodtime " mtdbpath " mtdbpath " " (conc *toppath* "/megatest.db")) - ;;(debug:print-info 13 *default-log-port* "db:open-db>> mtdbpath="mtdbpath" mtdbexists="mtdbexists" and write-access="write-access) + ;; (print "mtdbmodtime " mtdbmodtime " tmpdbmodtime " tmpdbmodtime " mtdbpath " mtdbpath " " (conc *toppath* "/megatest.db")) + ;; (debug:print-info 13 *default-log-port* "db:open-db>> mtdbpath="mtdbpath" mtdbexists="mtdbexists" and write-access="write-access) (if (and dbexists (not write-access)) (begin (set! *db-write-access* #f) - (dbr:dbstruct-read-only-set! dbstruct #t))) - (dbr:dbstruct-mtdb-set! dbstruct mtdb) - (dbr:dbstruct-tmpdb-set! dbstruct tmpdb) - (dbr:dbstruct-dbstack-set! dbstruct (make-stack)) ;; BB: why a stack? Why would the number of db's be indeterminate? Is this a legacy of 1.db 2.db .. ? - (stack-push! (dbr:dbstruct-dbstack dbstruct) tmpdb) ;; olddb is already a (cons db path) - (dbr:dbstruct-refndb-set! dbstruct refndb) + (dbr:subdb-read-only-set! subdb #t))) + (dbr:subdb-mtdb-set! subdb mtdb) + (dbr:subdb-tmpdb-set! subdb tmpdb) + (dbr:subdb-dbstack-set! subdb (make-stack)) ;; BB: why a stack? Why would the number of db's be indeterminate? Is this a legacy of 1.db 2.db .. ? + (stack-push! (dbr:subdb-dbstack subdb) tmpdb) ;; olddb is already a (cons db path) + (dbr:subdb-refndb-set! subdb refndb) (if (and (or (not dbfexists) (and modtimedelta (> modtimedelta max-stale-tmp))) ;; if db in tmp is over ten seconds older than the file in MTRA then do a sync back do-sync) (begin - (debug:print 1 *default-log-port* "filling db " (db:dbdat-get-path tmpdb) " with data \n from " (db:dbdat-get-path mtdb) " mod time delta: " modtimedelta) - (db:sync-tables (db:sync-all-tables-list dbstruct) #f mtdb refndb tmpdb) - ;touch tmp db to avoid wal mode wierdness - (set! (file-modification-time tmpdbfname) (current-seconds)) + (debug:print 1 *default-log-port* "filling db " (dbr:dbdat-dbfile tmpdb) " with data \n from " (dbr:dbdat-dbfile mtdb) " mod time delta: " modtimedelta) + (db:sync-tables (db:sync-all-tables-list subdb) #f mtdb refndb tmpdb) + ;; touch tmp db to avoid wal mode wierdness + (set! (file-modification-time tmpdbfname) (current-seconds)) (debug:print-info 13 *default-log-port* "db:sync-all-tables-list done.") ) - (debug:print 4 *default-log-port* " db, " (db:dbdat-get-path tmpdb) " already exists or fresh enough, not propogating data from\n " (db:dbdat-get-path mtdb) " mod time delta: " modtimedelta) ) - ;; (db:multi-db-sync dbstruct 'old2new)) ;; migrate data from megatest.db automatically + (debug:print 4 *default-log-port* " db, " (dbr:dbdat-dbfile tmpdb) " already exists or fresh enough, not propogating data from\n " (dbr:dbdat-dbfile mtdb) " mod time delta: " modtimedelta) ) + ;; (db:multi-db-sync subdb 'old2new)) ;; migrate data from megatest.db automatically tmpdb)))) (define (db:get-last-update-time db) -; (db:with-db -; dbstruct #f #f -; (lambda (db) - (let ((last-update-time #f)) - (sqlite3:for-each-row - (lambda (lup) - (set! last-update-time lup)) - db - "select max(lup) from ( select max(last_update) as lup from tests union select max(last_update) as lup from runs);") - last-update-time)) -;)) + (let ((last-update-time #f)) + (sqlite3:for-each-row + (lambda (lup) + (set! last-update-time lup)) + db + "select max(lup) from ( select max(last_update) as lup from tests union select max(last_update) as lup from runs);") + last-update-time)) + ;; Make the dbstruct, setup up auxillary db's and call for main db at least once ;; ;; called in http-transport and replicated in rmt.scm for *local* access. ;; (define (db:setup do-sync #!key (areapath #f)) ;; (cond - (*dbstruct-db* *dbstruct-db*);; TODO: when multiple areas are supported, this optimization will be a hazard + (*dbstruct-dbs* *dbstruct-dbs*);; TODO: when multiple areas are supported, this optimization will be a hazard (else ;;(common:on-homehost?) - (debug:print-info 13 *default-log-port* "db:setup entered (first time, not cached.)") - (let* ((dbstruct (make-dbr:dbstruct))) + (let* ((dbstructs (make-dbr:dbstruct))) (when (not *toppath*) - (debug:print-info 13 *default-log-port* "in db:setup, *toppath* not set; calling launch:setup") + (debug:print-info 0 *default-log-port* "in db:setup, *toppath* not set; calling launch:setup") (launch:setup areapath: areapath)) - (debug:print-info 13 *default-log-port* "Begin db:open-db") - (db:open-db dbstruct areapath: areapath do-sync: do-sync) - (debug:print-info 13 *default-log-port* "Done db:open-db") - (set! *dbstruct-db* dbstruct) - ;;(debug:print-info 13 *default-log-port* "new dbstruct = "(dbr:dbstruct->alist dbstruct)) - dbstruct)))) - ;; (else - ;; (debug:print 0 *default-log-port* "ERROR: attempt to open database when not on homehost. Exiting. Homehost: " (common:get-homehost)) - ;; (exit 1)))) + (set! *dbstruct-dbs* dbstructs) + (dbr:dbstruct-areapath-set! dbstructs *toppath*) + dbstructs)))) + +(define (dbfile:get-subdb dbstruct run-id) + (let* ((res (hash-table-ref/default (dbr:dbstruct-subdbs dbstruct) (dbfile:run-id->key run-id) #f))) + (if res + res + (let* ((newsubdb (make-dbr:subdb))) + (db:open-db newsubdb run-id areapath: (dbr:dbstruct-areapath dbstruct) do-sync: #t) + (hash-table-set! (dbr:dbstruct-subdbs dbstruct) (dbfile:run-id->key run-id) newsubdb) + newsubdb)))) ;; Open the classic megatest.db file (defaults to open in toppath) ;; ;; NOTE: returns a dbdat not a dbstruct! ;; ;;(define (db:reopen-megatest-db -(define (db:open-megatest-db #!key (path #f)(name #f)) - (let* ((dbdir (or path *toppath*)) - (dbpath (conc dbdir "/" (or name "megatest.db"))) - (dbexists (common:file-exists? dbpath)) +(define (db:open-megatest-db dbpath) + (let* ((dbexists (common:file-exists? dbpath)) (db (db:lock-create-open dbpath (lambda (db) - (db:initialize-main-db db) - ;;(db:initialize-run-id-db db) - ))) + (db:initialize-main-db db)))) (write-access (file-write-access? dbpath))) (debug:print-info 13 *default-log-port* "db:open-megatest-db "dbpath) (if (and dbexists (not write-access)) (set! *db-write-access* #f)) - (cons db dbpath))) + ;; (cons db dbpath))) + (make-dbr:dbdat dbfile: dbpath dbh: db read-only: (not write-access)))) ;; sync run to disk if touched ;; (define (db:sync-touched dbstruct run-id #!key (force-sync #f)) - (let ((tmpdb (db:get-db dbstruct)) - (mtdb (dbr:dbstruct-mtdb dbstruct)) - (refndb (dbr:dbstruct-refndb dbstruct)) - (start-t (current-seconds))) + (let* ((subdb (dbfile:get-subdb dbstruct run-id)) + (tmpdb (db:get-db dbstruct run-id)) + (mtdb (dbr:subdb-mtdb subdb)) + (refndb (dbr:subdb-refndb subdb)) + (start-t (current-seconds))) (debug:print-info 4 *default-log-port* "Syncing for run-id: " run-id) (mutex-lock! *db-multi-sync-mutex*) (let ((update_info (cons (if force-sync 0 *db-last-sync*) "last_update"))) (mutex-unlock! *db-multi-sync-mutex*) (db:sync-tables (db:sync-all-tables-list dbstruct) update_info tmpdb refndb mtdb)) (mutex-lock! *db-multi-sync-mutex*) (set! *db-last-sync* start-t) (set! *db-last-access* start-t) (mutex-unlock! *db-multi-sync-mutex*) - (stack-push! (dbr:dbstruct-dbstack dbstruct) tmpdb))) - -(define (db:safely-close-sqlite3-db db stmt-cache #!key (try-num 3)) - (if (<= try-num 0) - #f - (handle-exceptions - exn - (begin - (print "Attempt to safely close sqlite3 db failed. Trying again. exn=" exn) - (thread-sleep! 3) - (sqlite3:interrupt! db) - (db:safely-close-sqlite3-db db stmt-cache try-num: (- try-num 1))) - (if (sqlite3:database? db) - (let* ((stmts (and stmt-cache (hash-table-ref/default stmt-cache db #f)))) - (if stmts (map sqlite3:finalize! (hash-table-values stmts))) - (sqlite3:finalize! db) - #t) - #f)))) - -;; close all opened run-id dbs -(define (db:close-all dbstruct) - (if (dbr:dbstruct? dbstruct) - (handle-exceptions - exn - (begin - (debug:print 0 *default-log-port* "WARNING: Finalizing failed, " ((condition-property-accessor 'exn 'message) exn) ", note - exn=" exn) - (print-call-chain *default-log-port*)) - ;; (db:sync-touched dbstruct 0 force-sync: #t) ;; NO. Do not do this here. Instead we rely on a server to be started when there are writes, even if the server itself is not going to be used as a server. - (let ((tdbs (map db:dbdat-get-db - (stack->list (dbr:dbstruct-dbstack dbstruct)))) - (mdb (db:dbdat-get-db (dbr:dbstruct-mtdb dbstruct))) - (rdb (db:dbdat-get-db (dbr:dbstruct-refndb dbstruct))) - (stmt-cache (dbr:dbstruct-stmt-cache dbstruct))) - (map (lambda (db) - (db:safely-close-sqlite3-db db stmt-cache)) - tdbs) - (db:safely-close-sqlite3-db mdb stmt-cache) ;; (if (sqlite3:database? mdb) (sqlite3:finalize! mdb)) - (db:safely-close-sqlite3-db rdb stmt-cache))))) ;; (if (sqlite3:database? rdb) (sqlite3:finalize! rdb)))))) + (stack-push! (dbr:subdb-dbstack subdb) tmpdb))) + +;; db:safely-close-sqlite3-db and db:close-all were here, moved to dbfile ;; (let ((locdbs (dbr:dbstruct-locdbs dbstruct))) ;; (if (hash-table? locdbs) ;; (for-each (lambda (run-id) ;; (db:close-run-db dbstruct run-id)) @@ -601,11 +548,11 @@ db:sync-tests-only)) ;; use bunch of Unix commands to try to break the lock and recreate the db ;; (define (db:move-and-recreate-db dbdat) - (let* ((dbpath (db:dbdat-get-path dbdat)) + (let* ((dbpath (dbr:dbdat-dbfile dbdat)) (dbdir (pathname-directory dbpath)) (fname (pathname-strip-directory dbpath)) (fnamejnl (conc fname "-journal")) (tmpname (conc fname "." (current-process-id))) (tmpjnl (conc fnamejnl "." (current-process-id)))) @@ -622,11 +569,11 @@ ;; return #f to indicate the dbdat should be closed/reopened ;; else return dbdat ;; (define (db:repair-db dbdat #!key (numtries 1)) - (let* ((dbpath (db:dbdat-get-path dbdat)) + (let* ((dbpath (dbr:dbdat-dbfile dbdat)) (dbdir (pathname-directory dbpath)) (fname (pathname-strip-directory dbpath))) (debug:print-info 0 *default-log-port* "Checking db " dbpath " for errors.") (cond ((not (file-write-access? dbdir)) @@ -691,13 +638,13 @@ (debug:print 0 *default-log-port* "EXCEPTION: database probably overloaded or unreadable in db:sync-tables.") (print-call-chain (current-error-port)) (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) (debug:print 5 *default-log-port* "exn=" (condition->list exn)) (debug:print 0 *default-log-port* " status: " ((condition-property-accessor 'sqlite3 'status) exn)) - (debug:print 0 *default-log-port* " src db: " (db:dbdat-get-path fromdb)) + (debug:print 0 *default-log-port* " src db: " (dbr:dbdat-dbfile fromdb)) (for-each (lambda (dbdat) - (let ((dbpath (db:dbdat-get-path dbdat))) + (let ((dbpath (dbr:dbdat-dbfile dbdat))) (debug:print 0 *default-log-port* " dbpath: " dbpath) (if (not (db:repair-db dbdat)) (begin (debug:print-error 0 *default-log-port* "Failed to rebuild " dbpath ", exiting now.") (exit))))) @@ -708,24 +655,24 @@ (cond ((not fromdb) (debug:print 3 *default-log-port* "WARNING: db:sync-tables called with fromdb missing") -1) ((not todb) (debug:print 3 *default-log-port* "WARNING: db:sync-tables called with todb missing") -2) - ((not (sqlite3:database? (db:dbdat-get-db fromdb))) + ((not (sqlite3:database? (dbr:dbdat-dbh fromdb))) (debug:print-error 0 *default-log-port* "db:sync-tables called with fromdb not a database " fromdb) -3) - ((not (sqlite3:database? (db:dbdat-get-db todb))) + ((not (sqlite3:database? (dbr:dbdat-dbh todb))) (debug:print-error 0 *default-log-port* "db:sync-tables called with todb not a database " todb) -4) - ((not (file-write-access? (db:dbdat-get-path todb))) + ((not (file-write-access? (dbr:dbdat-dbfile todb))) (debug:print-error 0 *default-log-port* "db:sync-tables called with todb not a read-only database " todb) -5) ((not (null? (let ((readonly-slave-dbs (filter (lambda (dbdat) - (not (file-write-access? (db:dbdat-get-path todb)))) + (not (file-write-access? (dbr:dbdat-dbfile todb)))) slave-dbs))) (for-each (lambda (bad-dbdat) (debug:print-error 0 *default-log-port* "db:sync-tables called with todb not a read-only database " bad-dbdat)) @@ -800,11 +747,11 @@ (if (> (length fromdat) batch-len) (begin (set! fromdats (cons fromdat fromdats)) (set! fromdat '()) (set! totrecords (+ totrecords 1))))) - (db:dbdat-get-db fromdb) + (dbr:dbdat-dbh fromdb) full-sel) ;; tack on remaining records in fromdat (if (not (null? fromdat)) (set! fromdats (cons fromdat fromdats))) @@ -814,11 +761,11 @@ ;; read the target table; BBHERE (sqlite3:for-each-row (lambda (a . b) (hash-table-set! todat a (apply vector a b))) - (db:dbdat-get-db todb) + (dbr:dbdat-dbh todb) full-sel) (when (and delay-handicap (> delay-handicap 0)) (debug:print-info 0 *default-log-port* "imposing synthetic sync delay of "delay-handicap" seconds since sync/delay-handicap is configured") (thread-sleep! delay-handicap) @@ -826,11 +773,11 @@ ) ;; first pass implementation, just insert all changed rows (for-each (lambda (targdb) - (let* ((db (db:dbdat-get-db targdb)) + (let* ((db (dbr:dbdat-dbh targdb)) (drp-trigger (if (member "last_update" field-names) (db:drop-trigger db tablename) #f)) (is-trigger-dropped (if (member "last_update" field-names) (db:is-trigger-dropped db tablename) @@ -1066,85 +1013,97 @@ ;; run-ids: '(1 2 3 ...) or #f (for all) ;; (define (db:multi-db-sync dbstruct . options) ;; (if (not (launch:setup)) ;; (debug:print 0 *default-log-port* "ERROR: not able to setup up for megatest.") - (let* ((mtdb (dbr:dbstruct-mtdb dbstruct)) - (tmpdb (db:get-db dbstruct)) - (refndb (dbr:dbstruct-refndb dbstruct)) - (allow-cleanup #t) ;; (if run-ids #f #t)) - (servers (server:get-list *toppath*)) ;; (tasks:get-all-servers (db:delay-if-busy tdbdat))) - (data-synced 0)) ;; count of changed records (I hope) - - (for-each - (lambda (option) - - (case option - ;; kill servers - ((killservers) - (for-each - (lambda (server) - (handle-exceptions - exn - (begin - (debug:print-info 0 *default-log-port* "Unable to get host and/or port from " server ", exn=" exn) - #f) - (match-let (((mod-time host port start-time server-id pid) server)) - (if (and host pid) - (tasks:kill-server host pid))))) - servers) - - ;; /tmp/bjbarcla/megatest_localdb/fullrun/.nfs.pdx.disks.icf_env_disk001.bjbarcla.gwa.issues.mtdev.matt-bisect.megatest.ext-tests.runs.all.v1.65.1.6524.dev.bb-24-justrollup-f8.rollup.fullrun/megatest.db.lock - (delete-file* (common:get-sync-lock-filepath)) - ) - - ;; clear out junk records - ;; - ((dejunk) - ;; (db:delay-if-busy mtdb) ;; ok to delay on mtdb - (when (file-write-access? (db:dbdat-get-path mtdb)) (db:clean-up mtdb)) - (db:clean-up tmpdb) - (db:clean-up refndb)) - - ;; sync runs, test_meta etc. - ;; - ((old2new) - (set! data-synced - (+ (db:sync-tables (db:sync-all-tables-list dbstruct) #f mtdb tmpdb refndb) - data-synced))) - - ;; now ensure all newdb data are synced to megatest.db - ;; do not use the run-ids list passed in to the function - ;; - ((new2old) - (set! data-synced - (+ (db:sync-tables (db:sync-all-tables-list dbstruct) #f tmpdb refndb mtdb) - data-synced))) - - ((adj-target) - (db:adj-target (db:dbdat-get-db mtdb)) - (db:adj-target (db:dbdat-get-db tmpdb)) - (db:adj-target (db:dbdat-get-db refndb))) - - ((schema) - (db:patch-schema-maindb (db:dbdat-get-db mtdb)) - (db:patch-schema-maindb (db:dbdat-get-db tmpdb)) - (db:patch-schema-maindb (db:dbdat-get-db refndb)) - (db:patch-schema-rundb (db:dbdat-get-db mtdb)) - (db:patch-schema-rundb (db:dbdat-get-db tmpdb)) - (db:patch-schema-rundb (db:dbdat-get-db refndb)))) - - (stack-push! (dbr:dbstruct-dbstack dbstruct) tmpdb)) - options) + (assert #f "FATAL: Call to db:multi-db-sync which is not completed yet.") + (let* ((data-synced 0)) ;; count of changed records (I hope) + (for-each + (lambda (subdb) + (let* ((mtdb (dbr:subdb-mtdb subdb)) + (tmpdb (dbr:subdb-tmpdb subdb)) + (refndb (dbr:subdb-refndb subdb)) + (allow-cleanup #t) ;; (if run-ids #f #t)) + (servers (server:get-list *toppath*)) ;; (tasks:get-all-servers (db:delay-if-busy tdbdat))) + ) + (for-each + (lambda (option) + + (case option + ;; kill servers + ((killservers) + (for-each + (lambda (server) + (handle-exceptions + exn + (begin + (debug:print-info 0 *default-log-port* "Unable to get host and/or port from " server ", exn=" exn) + #f) + (match-let (((mod-time host port start-time server-id pid) server)) + (if (and host pid) + (tasks:kill-server host pid))))) + servers) + + ;; /tmp/bjbarcla/megatest_localdb/fullrun/.nfs.pdx.disks.icf_env_disk001.bjbarcla.gwa.issues.mtdev.matt-bisect.megatest.ext-tests.runs.all.v1.65.1.6524.dev.bb-24-justrollup-f8.rollup.fullrun/megatest.db.lock + (delete-file* (common:get-sync-lock-filepath))) + + ;; clear out junk records + ;; + ((dejunk) + ;; (db:delay-if-busy mtdb) ;; ok to delay on mtdb + (when (file-write-access? (dbr:dbdat-dbfile mtdb)) (db:clean-up mtdb)) + (db:clean-up tmpdb) + (db:clean-up refndb)) + + ;; sync runs, test_meta etc. + ;; + ((old2new) + (set! data-synced + (+ (db:sync-tables (db:sync-all-tables-list dbstruct) #f mtdb tmpdb refndb) + data-synced))) + + ;; now ensure all newdb data are synced to megatest.db + ;; do not use the run-ids list passed in to the function + ;; + ((new2old) + (set! data-synced + (+ (db:sync-tables (db:sync-all-tables-list dbstruct) #f tmpdb refndb mtdb) + data-synced))) + + ((adj-target) + (db:adj-target (dbr:dbdat-dbh mtdb)) + (db:adj-target (dbr:dbdat-dbh tmpdb)) + (db:adj-target (dbr:dbdat-dbh refndb))) + + ((schema) + (db:patch-schema-maindb (dbr:dbdat-dbh mtdb)) + (db:patch-schema-maindb (dbr:dbdat-dbh tmpdb)) + (db:patch-schema-maindb (dbr:dbdat-dbh refndb)) + (db:patch-schema-rundb (dbr:dbdat-dbh mtdb)) + (db:patch-schema-rundb (dbr:dbdat-dbh tmpdb)) + (db:patch-schema-rundb (dbr:dbdat-dbh refndb)))) + + (stack-push! (dbr:subdb-dbstack subdb) tmpdb)) + options))) + (hash-table-values (dbr:dbstruct-subdbs dbstruct))) data-synced)) -(define (db:tmp->megatest.db-sync dbstruct last-update) - (let* ((mtdb (dbr:dbstruct-mtdb dbstruct)) - (tmpdb (db:get-db dbstruct)) - (refndb (dbr:dbstruct-refndb dbstruct)) - (res (db:sync-tables (db:sync-all-tables-list dbstruct) last-update tmpdb refndb mtdb))) - (stack-push! (dbr:dbstruct-dbstack dbstruct) tmpdb) +;; Sync all changed db's +;; +(define (db:tmp->megatest.db-sync dbstruct run-id last-update) + (let* ((subdbs (hash-table-values (dbr:dbstruct-subdbs dbstruct))) + (res '())) + (for-each + (lambda (subdb) + (let* ((dbname (db:run-id->dbname run-id)) + (mtdb (dbr:subdb-mtdb subdb)) + (tmpdb (db:get-db dbstruct run-id)) + (refndb (dbr:subdb-refndb subdb)) + (newres (db:sync-tables (db:sync-all-tables-list dbstruct) last-update tmpdb refndb mtdb))) + (stack-push! (dbr:subdb-dbstack subdb) tmpdb) + (set! res (cons newres res)))) + subdbs) res)) ;;;; run-ids ;; if #f use *db-local-sync* : or 'local-sync-flags ;; if #t use timestamps : or 'timestamps @@ -1186,11 +1145,11 @@ (print "I don't work anymore. open-run-close-no-exception-handling needs fixing or removing...") (exit) (if (or *db-write-access* (not #t)) ;; was: (member proc * db:all-write-procs *))) (let* ((db (cond - ((pair? idb) (db:dbdat-get-db idb)) + ((pair? idb) (dbr:dbdat-dbh idb)) ((sqlite3:database? idb) idb) ((not idb) (debug:print-error 0 *default-log-port* "cannot open-run-close with #f anymore")) ((procedure? idb) (idb)) (else (debug:print-error 0 *default-log-port* "cannot open-run-close with #f anymore")))) (res #f)) @@ -1319,11 +1278,11 @@ (let* ((configdat (car *configinfo*)) ;; tut tut, global warning... (keys (keys:config-get-fields configdat)) (havekeys (> (length keys) 0)) (keystr (keys->keystr keys)) (fieldstr (keys:make-key/field-string configdat)) - (db (db:dbdat-get-db dbdat))) + (db (dbr:dbdat-dbh dbdat))) (for-each (lambda (key) (let ((keyn key)) (if (member (string-downcase keyn) (list "runname" "state" "status" "owner" "event_time" "comment" "fail_count" "pass_count" "contour")) @@ -1552,12 +1511,12 @@ ;; dneeded is minimum space needed, scan for existing archives that ;; are on disks with adequate space and already have this test/itempath ;; archived ;; (define (db:archive-get-allocations dbstruct testname itempath dneeded) - (let* ((dbdat (db:get-db dbstruct)) ;; archive tables are in main.db - (db (db:dbdat-get-db dbdat)) + (let* ((dbdat (db:get-db dbstruct #f)) ;; archive tables are in main.db + (db (dbr:dbdat-dbh dbdat)) (res '()) (blocks '())) ;; a block is an archive chunck that can be added too if there is space (sqlite3:for-each-row (lambda (id archive-disk-id disk-path last-du last-du-time) (set! res (cons (vector id archive-disk-id disk-path last-du last-du-time) res))) @@ -1584,12 +1543,12 @@ ;; returns id of the record, register a disk allocated to archiving and record it's last known ;; available space ;; (define (db:archive-register-disk dbstruct bdisk-name bdisk-path df) - (let* ((dbdat (db:get-db dbstruct)) ;; archive tables are in main.db - (db (db:dbdat-get-db dbdat)) + (let* ((dbdat (db:get-db dbstruct #f)) ;; archive tables are in main.db + (db (dbr:dbdat-dbh dbdat)) (res #f)) (sqlite3:for-each-row (lambda (id) (set! res id)) db @@ -1614,12 +1573,12 @@ ;; record an archive path created on a given archive disk (identified by it's bdisk-id) ;; if path starts with / then it is full, otherwise it is relative to the archive disk ;; preference is to store the relative path. ;; (define (db:archive-register-block-name dbstruct bdisk-id archive-path #!key (du #f)) - (let* ((dbdat (db:get-db dbstruct)) ;; archive tables are in main.db - (db (db:dbdat-get-db dbdat)) + (let* ((dbdat (db:get-db dbstruct #f)) ;; archive tables are in main.db + (db (dbr:dbdat-dbh dbdat)) (res #f)) ;; first look to see if this path is already registered (sqlite3:for-each-row (lambda (id) (set! res id)) @@ -1668,11 +1627,11 @@ archive-block-id) res)))) ;; (define (db:archive-allocate-testsuite/area-to-block block-id testsuite-name areakey) ;; (let* ((dbdat (db:get-db dbstruct #f)) ;; archive tables are in main.db -;; (db (db:dbdat-get-db dbdat)) +;; (db (dbr:dbdat-dbh dbdat)) ;; (res '()) ;; (blocks '())) ;; a block is an archive chunck that can be added too if there is space ;; (sqlite3:for-each-row #f) ;;====================================================================== @@ -1944,11 +1903,11 @@ ;; b. .... ;; (define (db:clean-up dbdat) ;; (debug:print 0 *default-log-port* "WARNING: db clean up not fully ported to v1.60, cleanup action will be on megatest.db") (let* ((keep-record-age ( - (current-seconds) (common:hms-string->seconds (or (configf:lookup *configdat* "setup" "delete-record-age") "30d")))) - (db (db:dbdat-get-db dbdat)) + (db (dbr:dbdat-dbh dbdat)) (count-stmt (sqlite3:prepare db "SELECT (SELECT count(id) FROM tests)+(SELECT count(id) FROM runs);")) (statements (map (lambda (stmt) (sqlite3:prepare db stmt)) (list @@ -1999,11 +1958,11 @@ ;; a. If have tests that are not deleted, set state='unknown' ;; b. .... ;; (define (db:clean-up-rundb dbdat) ;; (debug:print 0 *default-log-port* "WARNING: db clean up not fully ported to v1.60, cleanup action will be on megatest.db") - (let* ((db (db:dbdat-get-db dbdat)) + (let* ((db (dbr:dbdat-dbh dbdat)) (count-stmt (sqlite3:prepare db "SELECT (SELECT count(id) FROM tests);")) (statements (map (lambda (stmt) (sqlite3:prepare db stmt)) (list @@ -2040,11 +1999,11 @@ ;; a. If have tests that are not deleted, set state='unknown' ;; b. .... ;; (define (db:clean-up-maindb dbdat) ;; (debug:print 0 *default-log-port* "WARNING: db clean up not fully ported to v1.60, cleanup action will be on megatest.db") - (let* ((db (db:dbdat-get-db dbdat)) + (let* ((db (dbr:dbdat-dbh dbdat)) (count-stmt (sqlite3:prepare db "SELECT (SELECT count(id) FROM runs);")) (statements (map (lambda (stmt) (sqlite3:prepare db stmt)) (list @@ -2140,21 +2099,11 @@ ;;====================================================================== ;; no-sync.db - small bits of data to be shared between servers ;;====================================================================== (define (db:open-no-sync-db) - (let* ((dbpath (db:dbfile-path)) - (dbname (conc dbpath "/no-sync.db")) - (db-exists (common:file-exists? dbname)) - (db (sqlite3:open-database dbname))) - (sqlite3:set-busy-handler! db (sqlite3:make-busy-timeout 136000)) - (if (not db-exists) - (begin - (sqlite3:execute db "PRAGMA synchronous = 0;") - (sqlite3:execute db "CREATE TABLE IF NOT EXISTS no_sync_metadat (var TEXT,val TEXT, CONSTRAINT no_sync_metadat_constraint UNIQUE (var));") - (sqlite3:execute db "PRAGMA journal_mode=WAL;"))) - db)) + (dbfile:open-no-syncd-db (db:dbfile-path))) ;; if we are not a server create a db handle. this is not finalized ;; so watch for problems. I'm still not clear if it is needed to manually ;; finalize sqlite3 dbs with the sqlite3 egg. ;; @@ -3440,11 +3389,11 @@ (let loop ((new-id min-test-id)) (let ((test-id-found #f)) (sqlite3:for-each-row (lambda (id) (set! test-id-found id)) - (db:dbdat-get-db mtdb) + (dbr:dbdat-dbh mtdb) "SELECT id FROM tests WHERE id=?;" new-id) ;; if test-id-found then need to try again (if test-id-found (loop (+ new-id 1)) @@ -3458,11 +3407,11 @@ (debug:print-info 0 *default-log-port* "Adjusting test ids in megatest.db for run " run-id) (let ((min-test-id (* run-id 30000))) (for-each (lambda (testrec) (let* ((test-id (vector-ref testrec (db:field->number "id" db:test-record-fields)))) - (db:adj-test-id (db:dbdat-get-db mtdb) min-test-id test-id))) + (db:adj-test-id (dbr:dbdat-dbh mtdb) min-test-id test-id))) testrecs))) ;; 1. move test ids into the 30k * run_id range ;; 2. move step ids into the 30k * run_id range ;; @@ -3469,11 +3418,11 @@ (define (db:prep-megatest.db-for-migration mtdb) (let* ((run-ids (db:get-all-run-ids mtdb))) (for-each (lambda (run-id) (let ((testrecs (db:get-all-tests-info-by-run-id mtdb run-id))) - (db:prep-megatest.db-adj-test-ids (db:dbdat-get-db mtdb) run-id testrecs))) + (db:prep-megatest.db-adj-test-ids (dbr:dbdat-dbh mtdb) run-id testrecs))) run-ids))) ;; Get test data using test_id, run-id is not used ;; (define (db:get-test-info-by-id dbstruct run-id test-id) @@ -4467,14 +4416,14 @@ ;; Function recursively checks if .journal exists; if yes means db busy; call itself after delayed interval ;; return the sqlite3 db handle if possible ;; (define (db:delay-if-busy dbdat #!key (count 6)) (if (not (configf:lookup *configdat* "server" "delay-on-busy")) - (and dbdat (db:dbdat-get-db dbdat)) + (and dbdat (dbr:dbdat-dbh dbdat)) (if dbdat - (let* ((dbpath (db:dbdat-get-path dbdat)) - (db (db:dbdat-get-db dbdat)) ;; we'll return this so (db:delay--if-busy can be called inline + (let* ((dbpath (dbr:dbdat-dbfile dbdat)) + (db (dbr:dbdat-dbh dbdat)) ;; we'll return this so (db:delay--if-busy can be called inline (dbfj (conc dbpath "-journal"))) (if (handle-exceptions exn (begin (debug:print-info 0 *default-log-port* "WARNING: failed to test for existance of " dbfj ", exn=" exn) @@ -4880,16 +4829,17 @@ ;; NOT REWRITTEN YET!!!!! ;; runspatt is a comma delimited list of run patterns ;; keypatt-alist must contain *all* keys with an associated pattern: '( ("KEY1" "%") .. ) (define (db:extract-ods-file dbstruct outputfile keypatt-alist runspatt pathmod) + (assert #f "FATAL: call to db:extract-ods-file which is not ported yet.") (let* ((keysstr (string-intersperse (map car keypatt-alist) ",")) (keyqry (string-intersperse (map (lambda (p)(conc (car p) " LIKE ? ")) keypatt-alist) " AND ")) (numkeys (length keypatt-alist)) (test-ids '()) (dbdat (db:get-db dbstruct)) - (db (db:dbdat-get-db dbdat)) + (db (dbr:dbdat-dbh dbdat)) (windows (and pathmod (substring-index "\\" pathmod))) (tempdir (conc "/tmp/" (current-user-name) "/" runspatt "_" (random 10000) "_" (current-process-id))) (runsheader (append (list "Run Id" "Runname") ; 0 1 (map car keypatt-alist) ; + N = length keypatt-alist (list "Testname" ; 2 @@ -5001,6 +4951,283 @@ (stack-push! (dbr:dbstruct-dbstack dbstruct) dbdat) (system "rm -rf tempdir"))) ;; (db:extract-ods-file db "outputfile.ods" '(("sysname" "%")("fsname" "%")("datapath" "%")) "%") +;;====================================================================== +;; moving watch dogs here due to dependencies +;;====================================================================== + +;;====================================================================== +;; currently the primary job of the watchdog is to run the sync back to megatest.db from the db in /tmp +;; if we are on the homehost and we are a server (by definition we are on the homehost if we are a server) +;; +(define (common:readonly-watchdog dbstruct) + (thread-sleep! 0.05) ;; delay for startup + (debug:print-info 13 *default-log-port* "common:readonly-watchdog entered.") + ;; sync megatest.db to /tmp/.../megatst.db + (let* ((sync-cool-off-duration 3) + (golden-mtdb (dbr:dbstruct-mtdb dbstruct)) + (golden-mtpath (db:dbdat-get-path golden-mtdb)) + (tmp-mtdb (dbr:dbstruct-tmpdb dbstruct)) + (tmp-mtpath (db:dbdat-get-path tmp-mtdb))) + (debug:print-info 0 *default-log-port* "Read-only periodic sync thread started.") + (let loop ((last-sync-time 0)) + (debug:print-info 13 *default-log-port* "loop top tmp-mtpath="tmp-mtpath" golden-mtpath="golden-mtpath) + (let* ((duration-since-last-sync (- (current-seconds) last-sync-time))) + (debug:print-info 13 *default-log-port* "duration-since-last-sync="duration-since-last-sync) + (if (and (not *time-to-exit*) + (< duration-since-last-sync sync-cool-off-duration)) + (thread-sleep! (- sync-cool-off-duration duration-since-last-sync))) + (if (not *time-to-exit*) + (let ((golden-mtdb-mtime (file-modification-time golden-mtpath)) + (tmp-mtdb-mtime (file-modification-time tmp-mtpath))) + (if (> golden-mtdb-mtime tmp-mtdb-mtime) + (if (< golden-mtdb-mtime (- (current-seconds) 3)) ;; file has NOT been touched in past three seconds, this way multiple servers won't fight to sync back + (let ((res (db:multi-db-sync dbstruct 'old2new))) + (debug:print-info 13 *default-log-port* "rosync called, " res " records transferred.")))) + (loop (current-seconds))) + #t))) + (debug:print-info 0 *default-log-port* "Exiting readonly-watchdog timer, *time-to-exit* = " *time-to-exit*" pid="(current-process-id)" mtpath="golden-mtpath))) + +;;====================================================================== +;; TODO: for multiple areas, we will have multiple watchdogs; and multiple threads to manage +(define (common:watchdog) + (debug:print-info 13 *default-log-port* "common:watchdog entered.") + (if (launch:setup) + (if (common:on-homehost?) + (let ((dbstruct (db:setup #t))) ;; (db:setup-db *dbstruct-dbs* *toppath* #f))) ;; #t))) + (debug:print-info 13 *default-log-port* "after db:setup with dbstruct=" dbstruct) + (cond + ((dbr:dbstruct-read-only dbstruct) + (debug:print-info 13 *default-log-port* "loading read-only watchdog") + (common:readonly-watchdog dbstruct)) + (else + (debug:print-info 13 *default-log-port* "loading writable-watchdog.") + (let* ((syncer (or (configf:lookup *configdat* "server" "sync-method") "delta-sync"))) ;; "brute-force-sync"))) + (cond + ((equal? syncer "brute-force-sync") + (server:writable-watchdog-bruteforce dbstruct)) + ((equal? syncer "delta-sync") + (server:writable-watchdog-deltasync dbstruct)) + (else + (debug:print-error 0 *default-log-port* "Unknown server/sync-method specified ("syncer") - valid values are brute-force-sync and delta-sync.") + (exit 1))) + ;;(debug:print 1 *default-log-port* "INFO: ["(common:human-time)"] Syncer started (method="syncer")") + ))) + (debug:print-info 13 *default-log-port* "watchdog done.")) + (debug:print-info 13 *default-log-port* "no need for watchdog on non-homehost")))) + +(define (server:writable-watchdog-bruteforce dbstruct) + (thread-sleep! 1) ;; delay for startup + #;(let* ((do-a-sync (server:get-bruteforce-syncer dbstruct)) + (final-sync (server:get-bruteforce-syncer dbstruct fork-to-background: #t persist-until-sync: #t))) + (when (and (not (args:get-arg "-sync-to-megatest.db")) ;; conditions under which we do not run the sync + (args:get-arg "-server")) + + (let loop () + (do-a-sync) + (if (not *time-to-exit*) (loop))) ;; keep going unless time to exit + + ;; time to exit, close the no-sync db here + (final-sync) + + (if (common:low-noise-print 30) + (debug:print-info 0 *default-log-port* "Exiting watchdog timer, *time-to-exit* = " *time-to-exit*" pid="(current-process-id) + ))))) + +(define (server:writable-watchdog-deltasync dbstruct) + ;; This is awful complex and convoluted. Plan to redo? + ;; for now ... skip it. +;; ==> +;; ==> (thread-sleep! 0.05) ;; delay for startup +;; ==> (let ((legacy-sync (common:run-sync?)) +;; ==> (sync-stale-seconds (configf:lookup-number *configdat* "server" "sync-stale-seconds" default: 300)) +;; ==> (debug-mode (debug:debug-mode 1)) +;; ==> (last-time (current-seconds)) +;; ==> (no-sync-db (db:open-no-sync-db)) +;; ==> (stmt-cache #f) ;; (dbr:dbstruct-stmt-cache dbstruct)) +;; ==> (sync-duration 0) ;; run time of the sync in milliseconds +;; ==> (subdbs (hash-table-values (dbr:dbstruct-subdbs dbstruct)))) +;; ==> (set! *no-sync-db* no-sync-db) ;; make the no sync db available to api calls +;; ==> (debug:print-info 2 *default-log-port* "Periodic sync thread started.") +;; ==> (debug:print-info 3 *default-log-port* "watchdog starting. legacy-sync is " legacy-sync" pid="(current-process-id) );; " this-wd-num="this-wd-num) +;; ==> +;; ==> (if (and legacy-sync (not *time-to-exit*)) +;; ==> (begin +;; ==> (debug:print-info 0 *default-log-port* "Server running, periodic sync started.") + (let loop () +;; ==> ;; sync for filesystem local db writes +;; ==> ;; +;; ==> (mutex-lock! *db-multi-sync-mutex*) +;; ==> (let* ((start-file (conc tmp-area "/.start-sync")) +;; ==> (end-file (conc tmp-area "/.end-sync")) +;; ==> +;; ==> (need-sync (>= *db-last-access* *db-last-sync*)) ;; no sync since last write +;; ==> (sync-in-progress *db-sync-in-progress*) +;; ==> (min-intersync-delay (configf:lookup-number *configdat* "server" "minimum-intersync-delay" default: 5)) +;; ==> (should-sync (and (not *time-to-exit*) +;; ==> (> (- (current-seconds) *db-last-sync*) min-intersync-delay))) ;; sync every five seconds minimum, deprecated logic, can probably be removed +;; ==> (start-time (current-seconds)) +;; ==> (cpu-load-adj (alist-ref 'adj-proc-load (common:get-normalized-cpu-load #f))) +;; ==> (mt-mod-time (file-modification-time mtpath)) +;; ==> (last-sync-start (if (common:file-exists? start-file) +;; ==> (file-modification-time start-file) +;; ==> 0)) +;; ==> (last-sync-end (if (common:file-exists? end-file) +;; ==> (file-modification-time end-file) +;; ==> 10)) +;; ==> (sync-period (+ 3 (* cpu-load-adj 30))) ;; as adjusted load increases increase the sync period +;; ==> (recently-synced (and (< (- start-time mt-mod-time) sync-period) ;; not useful if sync didn't modify megatest.db! +;; ==> (< mt-mod-time last-sync-start))) +;; ==> (sync-done (<= last-sync-start last-sync-end)) +;; ==> (sync-stale (> start-time (+ last-sync-start sync-stale-seconds))) +;; ==> (will-sync (and (not *time-to-exit*) ;; do not start a sync if we are in the process of exiting +;; ==> (or need-sync should-sync) +;; ==> (or sync-done sync-stale) +;; ==> (not sync-in-progress) +;; ==> (not recently-synced)))) +;; ==> (debug:print-info 13 *default-log-port* "WD writable-watchdog top of loop. need-sync="need-sync" sync-in-progress=" sync-in-progress +;; ==> " should-sync="should-sync" start-time="start-time" mt-mod-time="mt-mod-time" recently-synced="recently-synced" will-sync="will-sync +;; ==> " sync-done=" sync-done " sync-period=" sync-period) +;; ==> (if (and (> sync-period 5) +;; ==> (common:low-noise-print 30 "sync-period")) +;; ==> (debug:print-info 0 *default-log-port* "Increased sync period due to long sync times, sync took: " sync-period " seconds.")) +;; ==> ;; (if recently-synced (debug:print-info 0 *default-log-port* "Skipping sync due to recently-synced flag=" recently-synced)) +;; ==> ;; (debug:print-info 0 *default-log-port* "need-sync: " need-sync " sync-in-progress: " sync-in-progress " should-sync: " should-sync " will-sync: " will-sync) +;; ==> (if will-sync (set! *db-sync-in-progress* #t)) +;; ==> (mutex-unlock! *db-multi-sync-mutex*) +;; ==> (if will-sync +;; ==> (let (;; (max-sync-duration (configf:lookup-number *configdat* "server" "max-sync-duration")) ;; KEEPING THIS AVAILABLE BUT SHOULD NOT USE, I'M PRETTY SURE IT DOES NOT WORK! +;; ==> (sync-start (current-milliseconds))) +;; ==> (with-output-to-file start-file (lambda ()(print (current-process-id)))) +;; ==> +;; ==> ;; put lock here +;; ==> +;; ==> ;; (if (or (not max-sync-duration) +;; ==> ;; (< sync-duration max-sync-duration)) ;; NOTE: db:sync-to-megatest.db keeps track of time of last sync and syncs incrementally +;; ==> +;; ==> ;; +;; ==> +;; ==> (for-each +;; ==> (lambda (subdb) +;; ==> (let* (;;(dbstruct (db:setup)) +;; ==> (mtdb (dbr:subdb-mtdb subdb)) +;; ==> (mtpath (db:dbdat-get-path mtdb)) +;; ==> (tmp-area (common:get-db-tmp-area)) +;; ==> (res (db:sync-to-megatest.db dbstruct no-sync-db: no-sync-db))) ;; did we sync any data? If so need to set the db touched flag to keep the server alive +;; ==> (set! sync-duration (- (current-milliseconds) sync-start)) +;; ==> (if (> res 0) ;; some records were transferred, keep the db alive +;; ==> (begin +;; ==> (mutex-lock! *heartbeat-mutex*) +;; ==> (set! *db-last-access* (current-seconds)) +;; ==> (mutex-unlock! *heartbeat-mutex*) +;; ==> (debug:print-info 0 *default-log-port* "sync called, " res " records transferred.")) +;; ==> (debug:print-info 2 *default-log-port* "sync called but zero records transferred"))) +;; ==> ) +;; ==> subdbs))) +;; ==> ;; ;; TODO: factor this next routine out into a function +;; ==> ;; (with-input-from-pipe ;; this should not block other threads but need to verify this +;; ==> ;; (conc "megatest -sync-to-megatest.db -m testsuite:" (common:get-area-name) ":" *toppath*) +;; ==> ;; (lambda () +;; ==> ;; (let loop ((inl (read-line)) +;; ==> ;; (res #f)) +;; ==> ;; (if (eof-object? inl) +;; ==> ;; (begin +;; ==> ;; (set! sync-duration (- (current-milliseconds) sync-start)) +;; ==> ;; (cond +;; ==> ;; ((not res) +;; ==> ;; (debug:print 0 *default-log-port* "ERROR: sync from /tmp db to megatest.db appears to have failed. Recommended that you stop your runs and run \"megatest -cleanup-db\"")) +;; ==> ;; ((> res 0) +;; ==> ;; (mutex-lock! *heartbeat-mutex*) +;; ==> ;; (set! *db-last-access* (current-seconds)) +;; ==> ;; (mutex-unlock! *heartbeat-mutex*)))) +;; ==> ;; (let ((num-synced (let ((matches (string-match "^Synced (\\d+).*$" inl))) +;; ==> ;; (if matches +;; ==> ;; (string->number (cadr matches)) +;; ==> ;; #f)))) +;; ==> ;; (loop (read-line) +;; ==> ;; (or num-synced res)))))))))) +;; ==> +;; ==> (if will-sync +;; ==> (begin +;; ==> (mutex-lock! *db-multi-sync-mutex*) +;; ==> (set! *db-sync-in-progress* #f) +;; ==> (set! *db-last-sync* start-time) +;; ==> (with-output-to-file end-file (lambda ()(print (current-process-id)))) +;; ==> +;; ==> ;; release lock here +;; ==> +;; ==> (mutex-unlock! *db-multi-sync-mutex*))) +;; ==> (if (and debug-mode +;; ==> (> (- start-time last-time) 60)) +;; ==> (begin +;; ==> (set! last-time start-time) +;; ==> (debug:print-info 4 *default-log-port* "timestamp -> " (seconds->time-string (current-seconds)) ", time since start -> " (seconds->hr-min-sec (- (current-seconds) *time-zero*)))))) +;; ==> + ;; keep going unless time to exit + ;; + (if (not *time-to-exit*) + (let delay-loop ((count 0)) + ;;(debug:print-info 13 *default-log-port* "delay-loop top; count="count" pid="(current-process-id)" this-wd-num="this-wd-num" *time-to-exit*="*time-to-exit*) + + (if (and (not *time-to-exit*) + (< count 6)) ;; was 11, changing to 4. + (begin + (thread-sleep! 1) + (delay-loop (+ count 1)))) + (if (not *time-to-exit*) (loop)))) + +;; ==> ;; time to exit, close the no-sync db here +;; ==> (db:no-sync-close-db no-sync-db stmt-cache) + (if (common:low-noise-print 30) + (debug:print-info 0 *default-log-port* "Exiting watchdog timer, *time-to-exit* = " *time-to-exit*" pid="(current-process-id) )))) ;; ))) ;;" this-wd-num="this-wd-num))))))) + + +(define (std-exit-procedure) + ;;(common:telemetry-log-close) + (on-exit (lambda () 0)) + ;;(debug:print-info 13 *default-log-port* "std-exit-procedure called; *time-to-exit*="*time-to-exit*) + (let ((no-hurry (if *time-to-exit* ;; hurry up + #f + (begin + (set! *time-to-exit* #t) + #t)))) + (debug:print-info 4 *default-log-port* "starting exit process, finalizing databases.") + (if (and no-hurry (debug:debug-mode 18)) + (rmt:print-db-stats)) + (let ((th1 (make-thread (lambda () ;; thread for cleaning up, give it five seconds + (if *dbstruct-dbs* (db:close-all *dbstruct-dbs*)) ;; one second allocated + (if *task-db* + (let ((db (cdr *task-db*))) + (if (sqlite3:database? db) + (begin + (sqlite3:interrupt! db) + (sqlite3:finalize! db #t) + ;; (vector-set! *task-db* 0 #f) + (set! *task-db* #f))))) + (http-client#close-all-connections!) + ;; (if (and *runremote* + ;; (remote-conndat *runremote*)) + ;; (begin + ;; (http-client#close-all-connections!))) ;; for http-client + (if (not (eq? *default-log-port* (current-error-port))) + (close-output-port *default-log-port*)) + (set! *default-log-port* (current-error-port))) "Cleanup db exit thread")) + (th2 (make-thread (lambda () + (debug:print 4 *default-log-port* "Attempting clean exit. Please be patient and wait a few seconds...") + (if no-hurry + (begin + (thread-sleep! 5)) ;; give the clean up few seconds to do it's stuff + (begin + (thread-sleep! 2))) + (debug:print 4 *default-log-port* " ... done") + ) + "clean exit"))) + (thread-start! th1) + (thread-start! th2) + (thread-join! th1) + ) + ) + + 0) ADDED dbfile.scm Index: dbfile.scm ================================================================== --- /dev/null +++ dbfile.scm @@ -0,0 +1,204 @@ +;;====================================================================== +;; Copyright 2017, Matthew Welland. +;; +;; This file is part of Megatest. +;; +;; Megatest is free software: you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation, either version 3 of the License, or +;; (at your option) any later version. +;; +;; Megatest is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with Megatest. If not, see . + +;;====================================================================== + +(declare (unit dbfile)) +;; (declare (uses debugprint)) + +(module dbfile + * + +(import scheme chicken data-structures extras) +(import (prefix sqlite3 sqlite3:) + posix typed-records srfi-18 + srfi-69 + stack + ) + +;; (import debugprint) + +;;====================================================================== +;; R E C O R D S +;;====================================================================== + +;; a single Megatest area with it's multiple dbs is +;; managed in a dbstruct +;; +(defstruct dbr:dbstruct + (areapath #f) + (homehost #f) + (read-only #f) + (subdbs (make-hash-table)) + ) + +;; NOTE: Need one dbr:subdb per main.db, 1.db ... +;; +(defstruct dbr:subdb + (dbname #f) ;; .db/1.db + (mtdb #f) ;; mtrah/.db/1.db + ;; (dbdats (make-hash-table)) ;; id => dbdat + (tmpdb #f) ;; /tmp/.../.db/1.db + (refndb #f) ;; /tmp/.../.db/1.db_ref + (dbstack #f) ;; stack for tmp db handles, do not initialize with a stack + (homehost #f) ;; not used yet + (on-homehost #f) ;; not used yet + (read-only #f) + (last-sync 0) + (last-write (current-seconds)) + ) ;; goal is to converge on one struct for an area but for now it is too confusing + +;; need to keep dbhandles and cached statements together +(defstruct dbr:dbdat + (dbfile #f) + (dbh #f) + (stmt-cache (make-hash-table)) + (read-only #f)) + +(define (dbfile:run-id->key run-id) + (or run-id 'main)) + +(define (db:safely-close-sqlite3-db db stmt-cache #!key (try-num 3)) + (if (<= try-num 0) + #f + (handle-exceptions + exn + (begin + (print "Attempt to safely close sqlite3 db failed. Trying again. exn=" exn) + (thread-sleep! 3) + (sqlite3:interrupt! db) + (db:safely-close-sqlite3-db db stmt-cache try-num: (- try-num 1))) + (if (sqlite3:database? db) + (let* ((stmts (and stmt-cache (hash-table-ref/default stmt-cache db #f)))) + (if stmts (map sqlite3:finalize! (hash-table-values stmts))) + (sqlite3:finalize! db) + #t) + #f)))) + +;; close all opened run-id dbs +(define (db:close-all dbstruct) + (if (dbr:dbstruct? dbstruct) +;; (handle-exceptions +;; exn +;; (begin +;; (debug:print 0 *default-log-port* "WARNING: Finalizing failed, " ((condition-property-accessor 'exn 'message) exn) ", note - exn=" exn) +;; (print-call-chain *default-log-port*)) + ;; (db:sync-touched dbstruct 0 force-sync: #t) ;; NO. Do not do this here. Instead we rely on a server to be started when there are writes, even if the server itself is not going to be used as a server. + (let* ((subdbs (hash-table-values (dbr:dbstruct-subdbs dbstruct)))) + (for-each + (lambda (subdb) + (let* ((tdbs (stack->list (dbr:subdb-dbstack subdb))) + (mdb (dbr:dbdat-dbh (dbr:subdb-mtdb subdb))) + (rdb (dbr:dbdat-dbh (dbr:subdb-refndb subdb)))) + + (map (lambda (dbdat) + (let* ((stmt-cache (dbr:dbdat-stmt-cache dbdat)) + (dbh (dbr:dbdat-dbh dbdat))) + (db:safely-close-sqlite3-db dbh stmt-cache))) + tdbs) + (db:safely-close-sqlite3-db mdb #f) ;; stmt-cache) ;; (if (sqlite3:database? mdb) (sqlite3:finalize! mdb)) + (db:safely-close-sqlite3-db rdb #f))) ;; stmt-cache))))) ;; (if (sqlite3:database? rdb) (sqlite3:finalize! rdb)))))) + subdbs)))) +;; ) + +;; ;; set up a single db (e.g. main.db, 1.db ... etc.) +;; ;; +;; (define (db:setup-db dbstruct areapath run-id) +;; (let* ((dbname (db:run-id->dbname run-id)) +;; (dbstruct (hash-table-ref/default dbstructs dbname #f))) +;; (if dbstruct +;; dbstruct +;; (let* ((dbstruct-new (make-dbr:dbstruct))) +;; (db:open-db dbstruct-new run-id areapath: areapath do-sync: #t) +;; (hash-table-set! dbstructs dbname dbstruct-new) +;; dbstruct-new)))) + +;; ; Returns the dbdat for a particular dbfile inside the area +;; ;; +;; (define (dbr:dbstruct-get-dbdat dbstruct dbfile) +;; (hash-table-ref/default (dbr:dbstruct-dbdats dbstruct) dbfile #f)) +;; +;; (define (dbr:dbstruct-dbdat-put! dbstruct dbfile db) +;; (hash-table-set! (dbr:dbstruct-dbdats dbstruct) dbfile db)) +;; +;; (define (db:run-id->first-num run-id) +;; (let* ((s (number->string run-id)) +;; (l (string-length s))) +;; (substring s (- l 1) l))) + +;; 1234 => 4/1234.db +;; #f => 0/main.db +;; (abandoned the idea of num/db) +;; +(define (db:run-id->path apath run-id) + (conc apath"/"(db:run-id->dbname run-id))) + +(define (db:dbname->path apath dbname) + (conc apath"/"dbname)) + +(define (db:run-id->dbname run-id) + (cond + ((number? run-id) (conc ".db/" (modulo run-id 100) ".db")) + ((not run-id) (conc ".db/main.db")) + (else run-id))) + +(define (dbfile:get-subdb dbstruct run-id) + (let* ((dbfname (db:run-id->dbname run-id))) + (hash-table-ref/default (dbr:dbstruct-subdbs dbstruct) dbfname #f))) + +;; Get/open a database +;; if run-id => get run specific db +;; if #f => get main db +;; if run-id is a string treat it as a filename +;; if db already open - return inmem +;; if db not open, open inmem, rundb and sync then return inmem +;; inuse gets set automatically for rundb's +;; +(define (dbfile:get-dbh dbstruct run-id) ;; RENAME TO db:get-dbh + (let* ((subdb (dbfile:get-subdb dbstruct run-id))) + (if (stack? (dbr:subdb-dbstack subdb)) + (if (stack-empty? (dbr:subdb-dbstack subdb)) + #f + (stack-pop! (dbr:subdb-dbstack subdb))) + #f))) + +(define (dbfile:add-dbh dbstruct run-id dbh) + (let* ((subdb (dbfile:get-subdb dbstruct run-id))) + (if (not (stack? (dbr:subdb-dbstack subdb))) + (dbr:subdb-dbstack-set! subdb (make-stack))) + (stack-push! (dbr:subdb-dbstack subdb) dbh))) + +;;====================================================================== +;; no-sync.db - small bits of data to be shared between servers +;;====================================================================== + +(define (dbfile:open-no-sync-db dbpath) + (let* (;; (dbpath (db:dbfile-path)) + (dbname (conc dbpath "/no-sync.db")) + (db-exists (file-exists? dbname)) + (db (sqlite3:open-database dbname))) + (sqlite3:set-busy-handler! db (sqlite3:make-busy-timeout 136000)) + (if (not db-exists) + (begin + (sqlite3:execute db "PRAGMA synchronous = 0;") + (sqlite3:execute db "CREATE TABLE IF NOT EXISTS no_sync_metadat (var TEXT,val TEXT, CONSTRAINT no_sync_metadat_constraint UNIQUE (var));") + (sqlite3:execute db "PRAGMA journal_mode=WAL;"))) + db)) + + +) ADDED debugprint.scm Index: debugprint.scm ================================================================== --- /dev/null +++ debugprint.scm @@ -0,0 +1,175 @@ + +(declare (unit debugprint)) +(declare (uses mtargs)) + +(module debugprint + * + +;;(import scheme chicken data-structures extras files ports) + (import + scheme + chicken + data-structures + posix + ports + extras + + ;; scheme + ;; chicken.base + ;; chicken.string + ;; chicken.time + ;; chicken.time.posix + ;; chicken.port + ;; chicken.process-context + ;; chicken.process-context.posix + + (prefix mtargs args:) + srfi-1 + ;; system-information + ) + +;;====================================================================== +;; debug stuff +;;====================================================================== + +(define verbosity (make-parameter '())) +(define *default-log-port* (current-error-port)) +(define debug:print-logger (make-parameter #f)) ;; set to a proc to call on every logging print + +(define (debug:setup) + (let ((debugstr (or (args:get-arg "-debug") + (args:get-arg "-debug-noprop") + (get-environment-variable "MT_DEBUG_MODE")))) + (verbosity (debug:calc-verbosity debugstr 'q)) + (debug:check-verbosity (verbosity) debugstr) + ;; if we were handed a bad verbosity rule then we will override it with 1 and continue + (if (not (verbosity))(verbosity 1)) + (if (and (not (args:get-arg "-debug-noprop")) + (or (args:get-arg "-debug") + (not (get-environment-variable "MT_DEBUG_MODE")))) + (setenv #;set-environment-variable! "MT_DEBUG_MODE" (if (list? (verbosity)) + (string-intersperse (map conc (verbosity)) ",") + (conc (verbosity))))))) + +;; check verbosity, #t is ok +(define (debug:check-verbosity verbosity vstr) + (if (not (or (number? verbosity) + (list? verbosity))) + (begin + (print "ERROR: Invalid debug value \"" vstr "\"") + #f) + #t)) + +;;====================================================================== +;; (define (debug:print . params) #f) +;; (define (debug:print-info . params) #f) +;; +;; (define (set-functions dbgp dbgpinfo) +;; (set! debug:print dbgp) +;; (set! debug:print-info dbgpinfo)) + +;;====================================================================== +;; this was cached based on results from profiling but it turned out the profiling +;; somehow went wrong - perhaps too many processes writing to it. Leaving the caching +;; in for now but can probably take it out later. +;; +(define (debug:calc-verbosity vstr arg) ;; arg is 'v (verbose) or 'q (quiet) + (let* ((res (cond + ((number? vstr) vstr) + ((not (string? vstr)) 1) + ;; ((string-match "^\\s*$" vstr) 1) + (vstr (let ((debugvals (filter number? (map string->number (string-split vstr ","))))) + (cond + ((> (length debugvals) 1) debugvals) + ((> (length debugvals) 0)(car debugvals)) + (else 1)))) + ((eq? arg 'v) 2) ;; verbose + ((eq? arg 'q) 0) ;; quiet + (else 1)))) + (verbosity res) + res)) + +;;====================================================================== +;; check verbosity, #t is ok +#;(define (debug-check-verbosity verbosity vstr) + (if (not (or (number? verbosity) + (list? verbosity))) + (begin + (print "ERROR: Invalid debug value \"" vstr "\"") + #f) + #t)) + +(define (debug:debug-mode n) + (let* ((vb (verbosity))) + (cond + ((and (number? vb) ;; number number + (number? n)) + (<= n vb)) + ((and (list? vb) ;; list number + (number? n)) + (member n vb)) + ((and (list? vb) ;; list list + (list? n)) + (not (null? (lset-intersection! eq? vb n)))) + ((and (number? vb) + (list? n)) + (member vb n)) + (else #f)))) + +(define (debug:handle-remote-logging params) + (if (debug:print-logger) ;; NOTE: turn params into string to keep things simple for now + ((debug:print-logger)(conc "REMOTE ("(get-host-name)", pid="(current-process-id)") " + (string-intersperse (map conc params) " ") "; " + (string-intersperse (command-line-arguments) " "))))) + +(define debug:enable-timestamp (make-parameter #t)) + +(define (debug:timestamp) + (if (debug:enable-timestamp) + (conc (time->string + (seconds->local-time (current-seconds)) "%H:%M:%S") " ") + "")) + + (define (debug:print n e . params) + (if (debug:debug-mode n) + (with-output-to-port (or e (current-error-port)) + (lambda () + ;; (if *logging* + ;; (db:log-event (apply conc params)) + (apply print (debug:timestamp) params) + ;; (debug:handle-remote-logging params) + ))) + #t ;; only here to make remote stuff happy. It'd be nice to fix that ... + ) + +(define (debug:print-error n e . params) + ;; normal print + (if (debug:debug-mode n) + (with-output-to-port (if (port? e) e (current-error-port)) + (lambda () + (apply print "ERROR: " (debug:timestamp) params) + ;; (debug:handle-remote-logging (cons "ERROR: " params)) + ))) + ;; pass important messages to stderr + (if (and (eq? n 0)(not (eq? e (current-error-port)))) + (with-output-to-port (current-error-port) + (lambda () + (apply print "ERROR: " (debug:timestamp) params) + )))) + +(define (debug:print-info n e . params) + (if (debug:debug-mode n) + (with-output-to-port (if (port? e) e (current-error-port)) + (lambda () + (apply print "INFO: (" n ") "(debug:timestamp) params) ;; res) + ;; (debug:handle-remote-logging (cons "INFO: " params)) + )))) + +(define (debug:print-warn n e . params) + (if (debug:debug-mode n) + (with-output-to-port (if (port? e) e (current-error-port)) + (lambda () + (apply print "WARN: (" n ") " (debug:timestamp) params) ;; res) + ;; (debug:handle-remote-logging (cons "WARN: " params)) + )))) +) DELETED fs-transport.scm Index: fs-transport.scm ================================================================== --- fs-transport.scm +++ /dev/null @@ -1,52 +0,0 @@ - -;; Copyright 2006-2012, Matthew Welland. -;; -;; This file is part of Megatest. -;; -;; Megatest is free software: you can redistribute it and/or modify -;; it under the terms of the GNU General Public License as published by -;; the Free Software Foundation, either version 3 of the License, or -;; (at your option) any later version. -;; -;; Megatest is distributed in the hope that it will be useful, -;; but WITHOUT ANY WARRANTY; without even the implied warranty of -;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -;; GNU General Public License for more details. -;; -;; You should have received a copy of the GNU General Public License -;; along with Megatest. If not, see . - -(require-extension (srfi 18) extras tcp s11n) - -(use sqlite3 srfi-1 posix regex regex-case srfi-69 hostinfo md5 message-digest) -(import (prefix sqlite3 sqlite3:)) - -(use spiffy uri-common intarweb http-client spiffy-request-vars) - -(tcp-buffer-size 2048) - -(declare (unit fs-transport)) - -(declare (uses common)) -(declare (uses db)) -(declare (uses tests)) -(declare (uses tasks)) ;; tasks are where stuff is maintained about what is running. - -(include "common_records.scm") -(include "db_records.scm") - - -;;====================================================================== -;; F S T R A N S P O R T S E R V E R -;;====================================================================== - -;; There is no "server" per se but a convience routine to make it non -;; necessary to be reopening the db over and over again. -;; - -(define (fs:process-queue-item packet) - (if (not *dbstruct-db*) ;; we will require that (setup-for-run) has already been called - (set! *dbstruct-db* (db:setup-db))) - (debug:print-info 11 *default-log-port* "fs:process-queue-item called with packet=" packet) - (db:process-queue-item *dbstruct-db* packet)) - Index: http-transport.scm ================================================================== --- http-transport.scm +++ http-transport.scm @@ -97,11 +97,11 @@ (dat ($ 'dat)) (res #f)) (cond ((equal? (uri-path (request-uri (current-request))) '(/ "api")) - (send-response body: (api:process-request *dbstruct-db* $) ;; the $ is the request vars proc + (send-response body: (api:process-request *dbstruct-dbs* $) ;; the $ is the request vars proc headers: '((content-type text/plain))) (mutex-lock! *heartbeat-mutex*) (set! *db-last-access* (current-seconds)) (mutex-unlock! *heartbeat-mutex*)) ((equal? (uri-path (request-uri (current-request))) @@ -458,14 +458,14 @@ (let loop ((count 0) (server-state 'available) (bad-sync-count 0) (start-time (current-milliseconds))) ;; Use this opportunity to sync the tmp db to megatest.db - (if (not server-going) ;; *dbstruct-db* + (if (not server-going) ;; *dbstruct-dbs* (begin (debug:print 0 *default-log-port* "SERVER: dbprep") - (set! *dbstruct-db* (db:setup #t)) ;; run-id)) + (set! *dbstruct-dbs* (db:setup #t)) ;; run-id)) FIXME!!! (set! server-going #t) (debug:print 0 *default-log-port* "SERVER: running, megatest version: " (common:get-full-version)) ;; NOTE: the server is NOT yet marked as running in the log. We do that in the keep-running routine. (thread-start! *watchdog*))) ;; when things go wrong we don't want to be doing the various queries too often Index: megatest-version.scm ================================================================== --- megatest-version.scm +++ megatest-version.scm @@ -18,6 +18,6 @@ ;; Always use two or four digit decimal ;; 1.01, 1.02...1.10,1.11,1.1101 ... 1.99,2.00.. ;; (declare (unit megatest-version)) -(define megatest-version 1.6591) +(define megatest-version 1.7001) Index: megatest.scm ================================================================== --- megatest.scm +++ megatest.scm @@ -30,21 +30,34 @@ (declare (uses server)) (declare (uses client)) (declare (uses tests)) (declare (uses genexample)) ;; (declare (uses daemon)) + (declare (uses db)) ;; (declare (uses dcommon)) (declare (uses tdb)) (declare (uses mt)) (declare (uses api)) (declare (uses tasks)) ;; only used for debugging. (declare (uses env)) (declare (uses diff-report)) +(declare (uses dbmod)) +(declare (uses dbmod.import)) +(declare (uses dbfile)) +(declare (uses dbfile.import)) +;; (declare (uses debugprint)) +;; (declare (uses debugprint.import)) +;; (declare (uses mtargs)) +;; (declare (uses mtargs.import)) + ;; (declare (uses ftail)) ;; (import ftail) + +(import dbmod + dbfile) (define *db* #f) ;; this is only for the repl, do not use in general!!!! (include "common_records.scm") (include "key_records.scm") @@ -2290,22 +2303,22 @@ (begin (debug:print 0 *default-log-port* "Failed to setup, exiting") (exit 1))) ;; keep this one local ;; (open-run-close patch-db #f) - (let ((dbstruct (db:setup #f areapath: *toppath*))) - (common:cleanup-db dbstruct full: #t)) + (let ((dbstructs (db:setup #f areapath: *toppath*))) + (common:cleanup-db dbstructs full: #t)) (set! *didsomething* #t))) (if (args:get-arg "-cleanup-db") (begin (if (not (launch:setup)) (begin (debug:print 0 *default-log-port* "Failed to setup, exiting") (exit 1))) - (let ((dbstruct (db:setup #f areapath: *toppath*))) - (common:cleanup-db dbstruct)) + (let ((dbstructs (db:setup #f areapath: *toppath*))) + (common:cleanup-db dbstructs)) (set! *didsomething* #t))) (if (args:get-arg "-mark-incompletes") (begin (if (not (launch:setup)) @@ -2357,14 +2370,14 @@ (if (or (getenv "MT_RUNSCRIPT") (args:get-arg "-repl") (args:get-arg "-load")) (let* ((toppath (launch:setup)) - (dbstruct (if (and toppath - (common:on-homehost?)) - (db:setup #t) - #f))) ;; make-dbr:dbstruct path: toppath local: (args:get-arg "-local")) #f))) + (dbstructs (if (and toppath + (common:on-homehost?)) + (db:setup #t) + #f))) ;; make-dbr:dbstruct path: toppath local: (args:get-arg "-local")) #f))) (if *toppath* (cond ((getenv "MT_RUNSCRIPT") ;; How to run megatest scripts ;; @@ -2377,15 +2390,16 @@ ;; EOF (repl)) (else (begin - (set! *db* dbstruct) + (set! *db* dbstructs) (import extras) ;; might not be needed ;; (import csi) (import readline) (import apropos) + (import dbfile) ;; (import (prefix sqlite3 sqlite3:)) ;; doesn't work ... (if *use-new-readline* (begin (install-history-file (get-environment-variable "HOME") ".megatest_history") ;; [homedir] [filename] [nlines]) Index: mtargs/mtargs.scm ================================================================== --- mtargs/mtargs.scm +++ mtargs/mtargs.scm @@ -56,10 +56,20 @@ (if (string? help) (print help) (print "Usage: " (car (argv)) " ... ")) (exit 0)) + ;; one-of args defined +(define (args:any-defined? . param) + (let ((res #f)) + (for-each + (lambda (arg) + (if (get-arg arg)(set! res #t))) + param) + res)) + +;; args: (define (get-args args params switches arg-hash num-needed) (let* ((numtargs (length args)) (adj-num-needed (if num-needed (+ num-needed 2) #f))) (if (< numtargs (if adj-num-needed adj-num-needed 2)) (if (>= num-needed 1) Index: rmt.scm ================================================================== --- rmt.scm +++ rmt.scm @@ -368,28 +368,28 @@ (loop (car tal)(cdr tal) newmax-cmd currmax))))))) (mutex-unlock! *db-stats-mutex*) res)) (define (rmt:open-qry-close-locally cmd run-id params #!key (remretries 5)) - (let* ((qry-is-write (not (member cmd api:read-only-queries))) - (db-file-path (db:dbfile-path)) ;; 0)) - (dbstruct-local (db:setup #t)) ;; make-dbr:dbstruct path: dbdir local: #t))) - (read-only (not (file-write-access? db-file-path))) - (start (current-milliseconds)) - (resdat (if (not (and read-only qry-is-write)) - (let ((v (api:execute-requests dbstruct-local (vector (symbol->string cmd) params)))) - (handle-exceptions ;; there has been a long history of receiving strange errors from values returned by the client when things go wrong.. - exn ;; This is an attempt to detect that situation and recover gracefully - (begin - (debug:print 0 *default-log-port* "ERROR: bad data from server " v " message: " ((condition-property-accessor 'exn 'message) exn) ", exn=" exn) - (vector #t '())) ;; should always get a vector but if something goes wrong return a dummy - (if (and (vector? v) - (> (vector-length v) 1)) - (let ((newvec (vector (vector-ref v 0)(vector-ref v 1)))) - newvec) ;; by copying the vector while inside the error handler we should force the detection of a corrupted record - (vector #t '())))) ;; we could also check that the returned types are valid - (vector #t '()))) + (let* ((qry-is-write (not (member cmd api:read-only-queries))) + (db-file-path (db:dbfile-path)) ;; 0)) + (dbstructs-local (db:setup #t)) ;; make-dbr:dbstruct path: dbdir local: #t))) + (read-only (not (file-write-access? db-file-path))) + (start (current-milliseconds)) + (resdat (if (not (and read-only qry-is-write)) + (let ((v (api:execute-requests dbstructs-local (vector (symbol->string cmd) params)))) + (handle-exceptions ;; there has been a long history of receiving strange errors from values returned by the client when things go wrong.. + exn ;; This is an attempt to detect that situation and recover gracefully + (begin + (debug:print 0 *default-log-port* "ERROR: bad data from server " v " message: " ((condition-property-accessor 'exn 'message) exn) ", exn=" exn) + (vector #t '())) ;; should always get a vector but if something goes wrong return a dummy + (if (and (vector? v) + (> (vector-length v) 1)) + (let ((newvec (vector (vector-ref v 0)(vector-ref v 1)))) + newvec) ;; by copying the vector while inside the error handler we should force the detection of a corrupted record + (vector #t '())))) ;; we could also check that the returned types are valid + (vector #t '()))) (success (vector-ref resdat 0)) (res (vector-ref resdat 1)) (duration (- (current-milliseconds) start))) (if (and read-only qry-is-write) (debug:print 0 *default-log-port* "ERROR: attempt to write to read-only database ignored. cmd=" cmd)) Index: server.scm ================================================================== --- server.scm +++ server.scm @@ -606,11 +606,14 @@ ;; moving this here as it needs access to db and cannot be in common. ;; (define (server:get-bruteforce-syncer dbstruct #!key (fork-to-background #f) (persist-until-sync #f)) - (let* ((sqlite-exe (or (get-environment-variable "MT_SQLITE3_EXE"))) ;; defined in cfg.sh + (debug:print "WARNING: bruteforce-syncer is called but has been disabled!") + (lambda () + (debug:print "WARNING: bruteforce-syncer is called but has been disabled!")) + #;(let* ((sqlite-exe (or (get-environment-variable "MT_SQLITE3_EXE"))) ;; defined in cfg.sh (sync-log (or (args:get-arg "-sync-log") (conc *toppath* "/logs/sync-" (current-process-id) "-" (get-host-name) ".log"))) (tmp-area (common:get-db-tmp-area)) (tmp-db (conc tmp-area "/megatest.db")) (staging-file (conc *toppath* "/.megatest.db")) (mtdbfile (conc *toppath* "/megatest.db")) @@ -703,155 +706,5 @@ finalres) ) ;; end lambda )) do-a-sync)) -(define (server:writable-watchdog-bruteforce dbstruct) - (thread-sleep! 1) ;; delay for startup - (let* ((do-a-sync (server:get-bruteforce-syncer dbstruct)) - (final-sync (server:get-bruteforce-syncer dbstruct fork-to-background: #t persist-until-sync: #t))) - (when (and (not (args:get-arg "-sync-to-megatest.db")) ;; conditions under which we do not run the sync - (args:get-arg "-server")) - - (let loop () - (do-a-sync) - (if (not *time-to-exit*) (loop))) ;; keep going unless time to exit - - ;; time to exit, close the no-sync db here - (final-sync) - - (if (common:low-noise-print 30) - (debug:print-info 0 *default-log-port* "Exiting watchdog timer, *time-to-exit* = " *time-to-exit*" pid="(current-process-id) - ))))) - -(define (server:writable-watchdog-deltasync dbstruct) - (thread-sleep! 0.05) ;; delay for startup - (let ((legacy-sync (common:run-sync?)) - (sync-stale-seconds (configf:lookup-number *configdat* "server" "sync-stale-seconds" default: 300)) - (debug-mode (debug:debug-mode 1)) - (last-time (current-seconds)) - (no-sync-db (db:open-no-sync-db)) - (stmt-cache (dbr:dbstruct-stmt-cache dbstruct)) - (sync-duration 0) ;; run time of the sync in milliseconds - ) - (set! *no-sync-db* no-sync-db) ;; make the no sync db available to api calls - (debug:print-info 2 *default-log-port* "Periodic sync thread started.") - (debug:print-info 3 *default-log-port* "watchdog starting. legacy-sync is " legacy-sync" pid="(current-process-id) );; " this-wd-num="this-wd-num) - (if (and legacy-sync (not *time-to-exit*)) - (let* (;;(dbstruct (db:setup)) - (mtdb (dbr:dbstruct-mtdb dbstruct)) - (mtpath (db:dbdat-get-path mtdb)) - (tmp-area (common:get-db-tmp-area)) - (start-file (conc tmp-area "/.start-sync")) - (end-file (conc tmp-area "/.end-sync"))) - (debug:print-info 0 *default-log-port* "Server running, periodic sync started.") - (let loop () - ;; sync for filesystem local db writes - ;; - (mutex-lock! *db-multi-sync-mutex*) - (let* ((need-sync (>= *db-last-access* *db-last-sync*)) ;; no sync since last write - (sync-in-progress *db-sync-in-progress*) - (min-intersync-delay (configf:lookup-number *configdat* "server" "minimum-intersync-delay" default: 5)) - (should-sync (and (not *time-to-exit*) - (> (- (current-seconds) *db-last-sync*) min-intersync-delay))) ;; sync every five seconds minimum, deprecated logic, can probably be removed - (start-time (current-seconds)) - (cpu-load-adj (alist-ref 'adj-proc-load (common:get-normalized-cpu-load #f))) - (mt-mod-time (file-modification-time mtpath)) - (last-sync-start (if (common:file-exists? start-file) - (file-modification-time start-file) - 0)) - (last-sync-end (if (common:file-exists? end-file) - (file-modification-time end-file) - 10)) - (sync-period (+ 3 (* cpu-load-adj 30))) ;; as adjusted load increases increase the sync period - (recently-synced (and (< (- start-time mt-mod-time) sync-period) ;; not useful if sync didn't modify megatest.db! - (< mt-mod-time last-sync-start))) - (sync-done (<= last-sync-start last-sync-end)) - (sync-stale (> start-time (+ last-sync-start sync-stale-seconds))) - (will-sync (and (not *time-to-exit*) ;; do not start a sync if we are in the process of exiting - (or need-sync should-sync) - (or sync-done sync-stale) - (not sync-in-progress) - (not recently-synced)))) - (debug:print-info 13 *default-log-port* "WD writable-watchdog top of loop. need-sync="need-sync" sync-in-progress=" sync-in-progress - " should-sync="should-sync" start-time="start-time" mt-mod-time="mt-mod-time" recently-synced="recently-synced" will-sync="will-sync - " sync-done=" sync-done " sync-period=" sync-period) - (if (and (> sync-period 5) - (common:low-noise-print 30 "sync-period")) - (debug:print-info 0 *default-log-port* "Increased sync period due to long sync times, sync took: " sync-period " seconds.")) - ;; (if recently-synced (debug:print-info 0 *default-log-port* "Skipping sync due to recently-synced flag=" recently-synced)) - ;; (debug:print-info 0 *default-log-port* "need-sync: " need-sync " sync-in-progress: " sync-in-progress " should-sync: " should-sync " will-sync: " will-sync) - (if will-sync (set! *db-sync-in-progress* #t)) - (mutex-unlock! *db-multi-sync-mutex*) - (if will-sync - (let (;; (max-sync-duration (configf:lookup-number *configdat* "server" "max-sync-duration")) ;; KEEPING THIS AVAILABLE BUT SHOULD NOT USE, I'M PRETTY SURE IT DOES NOT WORK! - (sync-start (current-milliseconds))) - (with-output-to-file start-file (lambda ()(print (current-process-id)))) - - ;; put lock here - - ;; (if (or (not max-sync-duration) - ;; (< sync-duration max-sync-duration)) ;; NOTE: db:sync-to-megatest.db keeps track of time of last sync and syncs incrementally - (let ((res (db:sync-to-megatest.db dbstruct no-sync-db: no-sync-db))) ;; did we sync any data? If so need to set the db touched flag to keep the server alive - (set! sync-duration (- (current-milliseconds) sync-start)) - (if (> res 0) ;; some records were transferred, keep the db alive - (begin - (mutex-lock! *heartbeat-mutex*) - (set! *db-last-access* (current-seconds)) - (mutex-unlock! *heartbeat-mutex*) - (debug:print-info 0 *default-log-port* "sync called, " res " records transferred.")) - (debug:print-info 2 *default-log-port* "sync called but zero records transferred"))))) -;; ;; TODO: factor this next routine out into a function -;; (with-input-from-pipe ;; this should not block other threads but need to verify this -;; (conc "megatest -sync-to-megatest.db -m testsuite:" (common:get-area-name) ":" *toppath*) -;; (lambda () -;; (let loop ((inl (read-line)) -;; (res #f)) -;; (if (eof-object? inl) -;; (begin -;; (set! sync-duration (- (current-milliseconds) sync-start)) -;; (cond -;; ((not res) -;; (debug:print 0 *default-log-port* "ERROR: sync from /tmp db to megatest.db appears to have failed. Recommended that you stop your runs and run \"megatest -cleanup-db\"")) -;; ((> res 0) -;; (mutex-lock! *heartbeat-mutex*) -;; (set! *db-last-access* (current-seconds)) -;; (mutex-unlock! *heartbeat-mutex*)))) -;; (let ((num-synced (let ((matches (string-match "^Synced (\\d+).*$" inl))) -;; (if matches -;; (string->number (cadr matches)) -;; #f)))) -;; (loop (read-line) -;; (or num-synced res)))))))))) - (if will-sync - (begin - (mutex-lock! *db-multi-sync-mutex*) - (set! *db-sync-in-progress* #f) - (set! *db-last-sync* start-time) - (with-output-to-file end-file (lambda ()(print (current-process-id)))) - - ;; release lock here - - (mutex-unlock! *db-multi-sync-mutex*))) - (if (and debug-mode - (> (- start-time last-time) 60)) - (begin - (set! last-time start-time) - (debug:print-info 4 *default-log-port* "timestamp -> " (seconds->time-string (current-seconds)) ", time since start -> " (seconds->hr-min-sec (- (current-seconds) *time-zero*)))))) - - ;; keep going unless time to exit - ;; - (if (not *time-to-exit*) - (let delay-loop ((count 0)) - ;;(debug:print-info 13 *default-log-port* "delay-loop top; count="count" pid="(current-process-id)" this-wd-num="this-wd-num" *time-to-exit*="*time-to-exit*) - - (if (and (not *time-to-exit*) - (< count 6)) ;; was 11, changing to 4. - (begin - (thread-sleep! 1) - (delay-loop (+ count 1)))) - (if (not *time-to-exit*) (loop)))) - ;; time to exit, close the no-sync db here - (db:no-sync-close-db no-sync-db stmt-cache) - (if (common:low-noise-print 30) - (debug:print-info 0 *default-log-port* "Exiting watchdog timer, *time-to-exit* = " *time-to-exit*" pid="(current-process-id) ))))))) ;;" this-wd-num="this-wd-num))))))) - ADDED tests/simplerun/Makefile Index: tests/simplerun/Makefile ================================================================== --- /dev/null +++ tests/simplerun/Makefile @@ -0,0 +1,5 @@ + +cleanup : + killall mtest dboard -v -9 || true + rm -rf *.log *.bak NB* logs/* .meta .db ../simpleruns/* lt + ADDED tests/simplerun/debug.scm Index: tests/simplerun/debug.scm ================================================================== --- /dev/null +++ tests/simplerun/debug.scm @@ -0,0 +1,61 @@ + +(module junk + * + +(import big-chicken + rmtmod + apimod + dbmod + srfi-18 + trace) + +(trace-call-sites #t) +(trace + ;; db:get-tests-for-run + ;; rmt:general-open-connection + ;; rmt:open-main-connection + ;; rmt:drop-conn + ;; rmt:send-receive + ;; rmt:log-to-main + ) + +(define (make-run-id) + (let* ((s (conc (current-process-id))) + (l (string-length s))) + (string->number (substring s (- l 3) l)) + )) + +(define (run) + (let* ((th1 (make-thread + (lambda () + (let loop ((r 0) + (i 1) + (s 0)) ;; sum + (let ((start-time (current-milliseconds)) + (run-id (+ r (make-run-id)))) + (rmt:register-test run-id "test1" (conc "item_" i)) + (thread-sleep! 0.01) + (let* ((qry-time (- (current-milliseconds) start-time)) + (tot-query-time (+ qry-time s)) + (avg-query-time (* 1.0 (/ tot-query-time (max i 1))))) + (if (> qry-time 500) + (print "WARNING: rmt:register-test took more than 500ms, "qry-time"ms, i="i", avg-query-time="avg-query-time)) + (if (eq? (modulo i 100) 0) + (print "For run-id="run-id", "(rmt:get-keys-write)" num tests registered="i" avg-query-time="avg-query-time)) + (if (< i 500) + (loop r (+ i 1) tot-query-time) + (if (< r 100) + (let* ((start-time (current-milliseconds))) + (print "rmt:get-keys "(rmt:get-keys)" in "(- (current-milliseconds) start-time)) + ;; run-id testpatt states statuses offset limit not-in sort-by sort-order qryvals last-update mode + (print "Got "(length (rmt:get-tests-for-run run-id "%" '() '() #f #f #f #f #f #f 0 #f))" tests for run "run-id) + (print "Average query time: "avg-query-time) + (loop (+ r 1) 0 tot-query-time)))))))) + ))) + (thread-start! th1) + (thread-join! th1))) + +(run) +) + + Index: tests/simplerun/megatest.config ================================================================== --- tests/simplerun/megatest.config +++ tests/simplerun/megatest.config @@ -21,10 +21,12 @@ [setup] # Adjust max_concurrent_jobs to limit how much you load your machines max_concurrent_jobs 50 +[server] +timeout 3600 # Uncomment this to make the in-mem db into a disk based db (slower but good for debug) # be aware that some unit tests will fail with this due to persistent data # # tmpdb /tmp @@ -35,15 +37,15 @@ [validvalues] state start end completed # Job tools are more advanced ways to control how your jobs are launched [jobtools] -useshell yes -launcher nbfind +# useshell yes +launcher nbfake # You can override environment variables for all your tests here [env-override] EXAMPLE_VAR example value # As you run more tests you may need to add additional disks, the names are arbitrary but must be unique [disks] disk0 #{getenv MT_RUN_AREA_HOME}/../simpleruns Index: tests/simplerun/tests/test1/testconfig ================================================================== --- tests/simplerun/tests/test1/testconfig +++ tests/simplerun/tests/test1/testconfig @@ -24,11 +24,11 @@ [requirements] # waiton setup priority 0 # Iteration for your tests are controlled by the items section -[items] +# [items] # PARTOFDAY morning noon afternoon evening night # test_meta is a section for storing additional data on your test [test_meta] author matt