Index: Makefile
==================================================================
--- Makefile
+++ Makefile
@@ -28,11 +28,12 @@
ezsteps.scm lock-queue.scm rmt.scm api.scm \
subrun.scm portlogger.scm archive.scm env.scm \
diff-report.scm cgisetup/models/pgdb.scm
# module source files
-MSRCFILES = dbfile.scm debugprint.scm mtargs.scm commonmod.scm dbmod.scm
+MSRCFILES = dbfile.scm debugprint.scm mtargs.scm commonmod.scm dbmod.scm \
+ dbmemmod.scm tcp-transportmod.scm
all : $(PREFIX)/bin/.$(ARCHSTR) mtest dboard mtut tcmt
# dbmod.import.o is just a hack here
mofiles/dbfile.o : mofiles/debugprint.o mofiles/commonmod.o dbmod.import.o
ADDED dbmemmod.scm
Index: dbmemmod.scm
==================================================================
--- /dev/null
+++ dbmemmod.scm
@@ -0,0 +1,1322 @@
+;;======================================================================
+;; Copyright 2017, Matthew Welland.
+;;
+;; This file is part of Megatest.
+;;
+;; Megatest is free software: you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation, either version 3 of the License, or
+;; (at your option) any later version.
+;;
+;; Megatest is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with Megatest. If not, see .
+
+;;======================================================================
+
+(declare (unit dbmemmod))
+;; (declare (uses debugprint))
+(declare (uses commonmod))
+
+(module dbmemmod
+ *
+
+ (import scheme
+ chicken
+ data-structures
+ extras
+ matchable)
+
+(import (prefix sqlite3 sqlite3:)
+ posix typed-records srfi-18 srfi-1
+ srfi-69
+ stack
+ files
+ ports
+
+ commonmod
+ ;; debugprint
+ )
+
+(define keep-age-param (make-parameter 10)) ;; qif file age, if over move to attic
+(define num-run-dbs (make-parameter 10)) ;; number of db's in .megatest
+
+;;======================================================================
+;; R E C O R D S
+;;======================================================================
+
+;; a single Megatest area with it's multiple dbs is
+;; managed in a dbstruct
+;;
+(defstruct dbr:dbstruct
+ (areapath #f)
+ (homehost #f)
+ (tmppath #f)
+ (read-only #f)
+ (subdbs (make-hash-table))
+ )
+
+;; NOTE: Need one dbr:subdb per main.db, 1.db ...
+;;
+(defstruct dbr:subdb
+ (dbname #f) ;; .megatest/1.db
+ (mtdbfile #f) ;; mtrah/.megatest/1.db
+ (mtdbdat #f) ;; only need one of these for syncing
+ ;; (dbdats (make-hash-table)) ;; id => dbdat
+ (tmpdbfile #f) ;; /tmp/.../.megatest/1.db
+ ;; (refndbfile #f) ;; /tmp/.../.megatest/1.db_ref
+ (dbstack (make-stack)) ;; stack for tmp dbr:dbdat,
+ (homehost #f) ;; not used yet
+ (on-homehost #f) ;; not used yet
+ (read-only #f)
+ (last-sync 0)
+ (last-write (current-seconds))
+ ) ;; goal is to converge on one struct for an area but for now it is too confusing
+
+;; need to keep dbhandles and cached statements together
+(defstruct dbr:dbdat
+ (dbfile #f)
+ (dbh #f)
+ (stmt-cache (make-hash-table))
+ (read-only #f)
+ (birth-sec (current-seconds)))
+
+(define *dbstruct-dbs* #f)
+(define *db-open-mutex* (make-mutex))
+(define *db-access-mutex* (make-mutex)) ;; used in common.scm
+(define *no-sync-db* #f)
+(define *db-sync-in-progress* #f)
+(define *db-with-db-mutex* (make-mutex))
+(define *max-api-process-requests* 0)
+(define *api-process-request-count* 0)
+(define *db-write-access* #t)
+(define *db-last-sync* 0) ;; last time the sync to megatest.db happened
+(define *db-multi-sync-mutex* (make-mutex)) ;; protect access to *db-sync-in-progress*, *db-last-sync*
+
+(define (db:generic-error-printout exn . message)
+ (print-call-chain (current-error-port))
+ (apply dbfile:print-err message)
+ (dbfile:print-err
+ ", error: " ((condition-property-accessor 'exn 'message) exn)
+ ", arguments: " ((condition-property-accessor 'exn 'arguments) exn)
+ ", location: " ((condition-property-accessor 'exn 'location) exn)
+ ))
+
+(define (dbfile:run-id->key run-id)
+ (or run-id 'main))
+
+(define (db:safely-close-sqlite3-db db stmt-cache #!key (try-num 3))
+ (if (<= try-num 0)
+ #f
+ (handle-exceptions
+ exn
+ (begin
+ (print "Attempt to safely close sqlite3 db failed. Trying again. exn=" exn)
+ (thread-sleep! 3)
+ (sqlite3:interrupt! db)
+ (db:safely-close-sqlite3-db db stmt-cache try-num: (- try-num 1)))
+ (if (sqlite3:database? db)
+ (let* ((stmts (and stmt-cache (hash-table-ref/default stmt-cache db #f))))
+ (if stmts (map sqlite3:finalize! (hash-table-values stmts)))
+ (sqlite3:finalize! db)
+ #t)
+ (begin
+ (dbfile:print-err "db:safely-close-sqlite3-db: " db " is not an sqlite3 db")
+ #f
+ )
+ ))))
+
+;; close all opened run-id dbs
+(define (db:close-all dbstruct)
+ (if (dbr:dbstruct? dbstruct)
+;; (handle-exceptions
+;; exn
+;; (begin
+;; (debug:print 0 *default-log-port* "WARNING: Finalizing failed, " ((condition-property-accessor 'exn 'message) exn) ", note - exn=" exn)
+;; (print-call-chain *default-log-port*))
+ ;; (db:sync-touched dbstruct 0 force-sync: #t) ;; NO. Do not do this here. Instead we rely on a server to be started when there are writes, even if the server itself is not going to be used as a server.
+ (let* ((subdbs (hash-table-values (dbr:dbstruct-subdbs dbstruct))))
+ (for-each
+ (lambda (subdb)
+ (let* ((tdbs (stack->list (dbr:subdb-dbstack subdb)))
+ (mtdbdat (dbr:dbdat-dbh (dbr:subdb-mtdbdat subdb)))
+ #;(rdb (dbr:dbdat-dbh (dbr:subdb-refndb subdb))))
+
+ (map (lambda (dbdat)
+ (let* ((stmt-cache (dbr:dbdat-stmt-cache dbdat))
+ (dbh (dbr:dbdat-dbh dbdat)))
+ (db:safely-close-sqlite3-db dbh stmt-cache)))
+ tdbs)
+ (db:safely-close-sqlite3-db mtdbdat (dbr:dbdat-stmt-cache (dbr:subdb-mtdbdat subdb)))
+ ;; (if (sqlite3:database? mdb) (sqlite3:finalize! mdb))
+ #;(db:safely-close-sqlite3-db rdb #f))) ;; stmt-cache))))) ;; (if (sqlite3:database? rdb) (sqlite3:finalize! rdb))))))
+ subdbs)
+ #t
+ )
+ #f
+ )
+)
+
+;; ;; set up a single db (e.g. main.db, 1.db ... etc.)
+;; ;;
+;; (define (db:setup-db dbstruct areapath run-id)
+;; (let* ((dbname (db:run-id->dbname run-id))
+;; (dbstruct (hash-table-ref/default dbstructs dbname #f)))
+;; (if dbstruct
+;; dbstruct
+;; (let* ((dbstruct-new (make-dbr:dbstruct)))
+;; (db:open-db dbstruct-new run-id areapath: areapath do-sync: #t)
+;; (hash-table-set! dbstructs dbname dbstruct-new)
+;; dbstruct-new))))
+
+;; ; Returns the dbdat for a particular dbfile inside the area
+;; ;;
+;; (define (dbr:dbstruct-get-dbdat dbstruct dbfile)
+;; (hash-table-ref/default (dbr:dbstruct-dbdats dbstruct) dbfile #f))
+;;
+;; (define (dbr:dbstruct-dbdat-put! dbstruct dbfile db)
+;; (hash-table-set! (dbr:dbstruct-dbdats dbstruct) dbfile db))
+;;
+;; (define (db:run-id->first-num run-id)
+;; (let* ((s (number->string run-id))
+;; (l (string-length s)))
+;; (substring s (- l 1) l)))
+
+;; 1234 => 4/1234.db
+;; #f => 0/main.db
+;; (abandoned the idea of num/db)
+;;
+(define (dbfile:run-id->path apath run-id)
+ (conc apath"/"(dbfile:run-id->dbname run-id)))
+
+(define (db:dbname->path apath dbname)
+ (conc apath"/"dbname))
+
+(define (dbfile:run-id->dbnum run-id)
+ (cond
+ ((number? run-id)
+ (modulo run-id (num-run-dbs)))
+ ((not run-id) "main") ;; 0 or main?
+ (else run-id)))
+
+;; POTENTIAL BUG: this implementation could produce a db file if run-id is neither #f or a number
+(define (dbfile:run-id->dbname run-id)
+ (conc ".megatest/"(dbfile:run-id->dbnum run-id)".db"))
+
+;; Make the dbstruct, setup up auxillary db's and call for main db at least once
+;;
+;; called in http-transport and replicated in rmt.scm for *local* access.
+;;
+(define (dbfile:setup do-sync areapath tmppath)
+ (cond
+ (*dbstruct-dbs*
+ (dbfile:print-err "WARNING: dbfile:setup called when *dbstruct-dbs* is already initialized")
+ *dbstruct-dbs*) ;; TODO: when multiple areas are supported, this optimization will be a hazard
+ (else
+ (let* ((dbstruct (make-dbr:dbstruct)))
+ (set! *dbstruct-dbs* dbstruct)
+ (dbr:dbstruct-areapath-set! dbstruct areapath)
+ (dbr:dbstruct-tmppath-set! dbstruct tmppath)
+ dbstruct))))
+
+(define (dbfile:get-subdb dbstruct run-id)
+ (let* ((dbfname (dbfile:run-id->dbname run-id)))
+ (hash-table-ref/default (dbr:dbstruct-subdbs dbstruct) dbfname #f)))
+
+(define (dbfile:set-subdb dbstruct run-id subdb)
+ (hash-table-set! (dbr:dbstruct-subdbs dbstruct) (dbfile:run-id->dbname run-id) subdb))
+
+;; (define *dbfile:num-handles-in-use* 0)
+
+;; Get/open a database
+;; if run-id => get run specific db
+;; if #f => get main db
+;; if run-id is a string treat it as a filename
+;; if db already open - return inmem
+;; if db not open, open inmem, rundb and sync then return inmem
+;; inuse gets set automatically for rundb's
+;;
+(define (dbfile:get-dbdat dbstruct run-id)
+ (let* ((subdb (dbfile:get-subdb dbstruct run-id)))
+ (if (stack-empty? (dbr:subdb-dbstack subdb))
+ #f
+ (begin
+ (stack-pop! (dbr:subdb-dbstack subdb))))))
+
+;; return a previously opened db handle to the stack of available handles
+(define (dbfile:add-dbdat dbstruct run-id dbdat)
+ (let* ((subdb (dbfile:get-subdb dbstruct run-id))
+ (dbstk (dbr:subdb-dbstack subdb))
+ (count (stack-count dbstk)))
+ (if (> count 15)
+ (dbfile:print-err "WARNING: stack for "run-id".db is "count"."))
+ (stack-push! dbstk dbdat)
+ dbdat))
+
+;; set up a subdb
+;;
+(define (dbfile:init-subdb dbstruct run-id init-proc)
+ (let* ((dbname (dbfile:run-id->dbname run-id))
+ (areapath (dbr:dbstruct-areapath dbstruct))
+ (tmppath (dbr:dbstruct-tmppath dbstruct))
+ (mtdbpath (dbfile:run-id->path areapath run-id))
+ (tmpdbpath (dbfile:run-id->path tmppath run-id))
+ (mtdbdat (dbfile:open-sqlite3-db mtdbpath init-proc sync-mode: 0 journal-mode: #f)) ;; "WAL"))
+ (newsubdb (make-dbr:subdb dbname: dbname
+ mtdbfile: mtdbpath
+ tmpdbfile: tmpdbpath
+ mtdbdat: mtdbdat)))
+ (dbfile:set-subdb dbstruct run-id newsubdb)
+ newsubdb)) ;; return the new subdb - but shouldn't really use it
+
+;; returns dbdat with dbh and dbfilepath
+;;
+;; NOTE: the handle is on /tmp db file!
+;;
+;; 1. if needed setup the subdb for the given run-id
+;; 2. if there is no existing db handle in the stack
+;; create a new handle and return it (do NOT add
+;; it to the stack).
+;;
+(define (dbfile:open-db dbstruct run-id init-proc)
+ (let* ((subdb (dbfile:get-subdb dbstruct run-id)))
+ (if (not subdb) ;; not yet defined
+ (begin
+ (dbfile:init-subdb dbstruct run-id init-proc)
+ (dbfile:open-db dbstruct run-id init-proc))
+ (let* ((dbdat (dbfile:get-dbdat dbstruct run-id)))
+ (if dbdat
+ dbdat
+ (let* ((tmppath (dbr:dbstruct-tmppath dbstruct))
+ (tmpdbpath (dbfile:run-id->path tmppath run-id))
+ (dbdat (dbfile:open-sqlite3-db tmpdbpath init-proc sync-mode: 0 journal-mode: "WAL")))
+ ;; the following line short-circuits the "one db handle per thread" model
+ ;;
+ ;; (dbfile:add-dbdat dbstruct run-id dbdat)
+ ;;
+ dbdat))))))
+
+;; COMBINE dbfile:open-sqlite-db and dbfile:lock-create-open
+;;
+
+;; this stuff is for initial debugging, please remove it when
+;; this code stabilizes
+(define *dbopens* (make-hash-table))
+(define (dbfile:inc-db-open dbfile)
+ (let* ((curr-opens-count (+ (hash-table-ref/default *dbopens* dbfile 0) 1)))
+ (if (and (> curr-opens-count 1) ;; this should NOT be happening
+ (common:low-noise-print 15 "db-opens"))
+ (dbfile:print-err "INFO: db "dbfile" has been opened "curr-opens-count" times!"))
+ (hash-table-set! *dbopens* dbfile curr-opens-count)
+ curr-opens-count))
+
+;; Open the classic megatest.db file (defaults to open in toppath)
+;;
+;; NOTE: returns a dbdat not a dbstruct!
+;;
+(define (dbfile:open-sqlite3-db dbpath init-proc #!key (sync-mode 0)(journal-mode #f))
+ (let* ((dbexists (file-exists? dbpath))
+ (write-access (file-write-access? dbpath))
+ (db (dbfile:cautious-open-database dbpath init-proc sync-mode journal-mode)))
+ (dbfile:inc-db-open dbpath)
+ ;; (init-proc db)
+ (make-dbr:dbdat dbfile: dbpath dbh: db read-only: (not write-access))))
+
+(define (dbfile:print-and-exit . params)
+ (with-output-to-port
+ (current-error-port)
+ (lambda ()
+ (apply print params)))
+ (exit 1))
+
+(define (dbfile:print-err . params)
+ (with-output-to-port
+ (current-error-port)
+ (lambda ()
+ (apply print params))))
+
+(define (dbfile:cautious-open-database fname init-proc sync-mode journal-mode #!optional (tries-left 500))
+ (let* ((busy-file (conc fname "-journal"))
+ (delay-time (* (- 51 tries-left) 1.1))
+ (write-access (file-write-access? fname))
+ (dir-access (file-write-access? (pathname-directory fname)))
+ (retry (lambda ()
+ (thread-sleep! delay-time)
+ (if (> tries-left 0)
+ (dbfile:cautious-open-database fname init-proc
+ sync-mode journal-mode
+ (- tries-left 1))))))
+ (assert (>= tries-left 0) (conc "FATAL: too many attempts in dbfile:cautious-open-database of "fname", giving up."))
+
+ (if (and (file-write-access? fname)
+ (file-exists? busy-file))
+ (begin
+ (if (common:low-noise-print 120 busy-file)
+ (dbfile:print-err "INFO: dbfile:cautious-open-database: journal file "
+ busy-file" exists, trying again in few seconds."))
+ (thread-sleep! 1)
+ (if (eq? tries-left 2)
+ (begin
+ (dbfile:print-err "INFO: forcing journal rollup "busy-file)
+ (dbfile:brute-force-salvage-db fname)))
+ (dbfile:cautious-open-database fname init-proc sync-mode journal-mode (- tries-left 1)))
+
+ (let* ((result (condition-case
+ (if dir-access
+ (dbfile:with-simple-file-lock
+ (conc fname ".lock")
+ (lambda ()
+ (let* ((db-exists (file-exists? fname))
+ (db (sqlite3:open-database fname))) ;; creates an empty db if it did not already exist.
+ (sqlite3:set-busy-handler! db (sqlite3:make-busy-timeout 30000))
+ (if sync-mode
+ (sqlite3:execute db (conc "PRAGMA synchronous = "sync-mode";")))
+ (if journal-mode
+ (sqlite3:execute db (conc "PRAGMA journal_mode = "journal-mode";")))
+ (if (and init-proc (not db-exists))
+ (init-proc db))
+ db)))
+ (begin
+ (if (file-exists? fname )
+ (let ((db (sqlite3:open-database fname)))
+ ;; pragmas synchronous not needed because this db is used read-only
+ ;; (sqlite3:execute db (conc "PRAGMA synchronous = "mode";")
+ (sqlite3:set-busy-handler! db (sqlite3:make-busy-timeout 30000)) ;; read-only but still need timeout
+ db )
+ (print "file doesn't exist: " fname))))
+ (exn (io-error)
+ (dbfile:print-err exn "ERROR: i/o error with " fname ". Check permissions, disk space etc. and try again.")
+ (retry))
+ (exn (corrupt)
+ (dbfile:print-err exn "ERROR: database " fname " is corrupt. Repair it to proceed.")
+ (retry))
+ (exn (busy)
+ (dbfile:print-err exn "ERROR: database " fname
+ " is locked. Try copying to another location, remove original and copy back.")
+ (retry))
+ (exn (permission)(dbfile:print-err exn "ERROR: database " fname " has some permissions problem.")
+ (retry))
+ (exn ()
+ (dbfile:print-err exn "ERROR: Unknown error with database " fname " message: "
+ ((condition-property-accessor 'exn 'message) exn))
+ (retry)))))
+ result))))
+
+(define (dbfile:brute-force-salvage-db fname)
+ (let* ((backupfname (conc fname"-"(current-process-id)".bak"))
+ (cmd (conc "cp "fname" "backupfname";mv "fname" "(conc fname ".delme;")
+ "cp "backupfname" "fname)))
+ (dbfile:print-err "WARNING: attempting recovery of file "fname" by running commands:\n"
+ " "cmd)
+ (system cmd)))
+
+
+(define (dbfile:open-no-sync-db dbpath)
+ (if *no-sync-db*
+ *no-sync-db*
+ (begin
+ (if (not (file-exists? dbpath))
+ (create-directory dbpath #t))
+ (let* ((dbname (conc dbpath "/no-sync.db"))
+ (db-exists (file-exists? dbname))
+ (init-proc (lambda (db)
+ (if (not db-exists)
+ (begin
+ (sqlite3:execute db "CREATE TABLE IF NOT EXISTS no_sync_metadat (var TEXT,val TEXT, CONSTRAINT no_sync_metadat_constraint UNIQUE (var));"))
+ )))
+ (db (dbfile:cautious-open-database dbname init-proc 0 "WAL"))) ;; (sqlite3:open-database dbname)))
+ ;; (sqlite3:execute db "PRAGMA synchronous = 0;")
+ ;; (sqlite3:set-busy-handler! db (sqlite3:make-busy-timeout 136000)) ;; done in cautious-open-database
+ (set! *no-sync-db* db)
+ db))))
+
+(define (db:no-sync-set db var val)
+ (sqlite3:execute db "INSERT OR REPLACE INTO no_sync_metadat (var,val) VALUES (?,?);" var val))
+
+(define (db:no-sync-del! db var)
+ (sqlite3:execute db "DELETE FROM no_sync_metadat WHERE var=?;" var))
+
+(define (db:no-sync-get/default db var default)
+ (let ((res default))
+ (sqlite3:for-each-row
+ (lambda (val)
+ (set! res val))
+ db
+ "SELECT val FROM no_sync_metadat WHERE var=?;"
+ var)
+ (if res
+ (let ((newres (if (string? res)
+ (string->number res)
+ #f)))
+ (if newres
+ newres
+ res))
+ res)))
+
+;; transaction protected lock aquisition
+;; either:
+;; fails returns (#f . lock-creation-time)
+;; succeeds (returns (#t . lock-creation-time)
+;; use (db:no-sync-del! db keyname) to release the lock
+;;
+(define (db:no-sync-get-lock db keyname)
+ (sqlite3:with-transaction
+ db
+ (lambda ()
+ (condition-case
+ (let* ((curr-val (db:no-sync-get/default db keyname #f)))
+ (if curr-val
+ `(#f . ,curr-val) ;; (sqlite3:first-result db "SELECT val FROM no_sync_metadat WHERE var=?;" keyname))
+ (let ((lock-time (current-seconds)))
+ (sqlite3:execute db "INSERT OR REPLACE INTO no_sync_metadat (var,val) VALUES(?,?);" keyname lock-time)
+ `(#t . ,lock-time))))
+ (exn (io-error) (dbfile:print-err "ERROR: i/o error with no-sync db. Check permissions, disk space etc. and try again."))
+ (exn (corrupt) (dbfile:print-err "ERROR: database no-sync db is corrupt. Repair it to proceed."))
+ (exn (busy) (dbfile:print-err "ERROR: database no-sync db is locked. Try copying to another location, remove original and copy back."))
+ (exn (permission)(dbfile:print-err "ERROR: database no-sync db has some permissions problem."))
+ (exn () ;; (status done) ;; I don't know how to detect status done but no data!
+ (dbfile:print-err "ERROR: Unknown error with database no-sync db message: exn="(condition->list exn)", \n"
+ ((condition-property-accessor 'exn 'message) exn))
+ `(#f . ,(current-seconds)))))))
+
+(define (db:no-sync-get-lock-timeout db keyname timeout)
+ (let* ((lockdat (db:no-sync-get-lock db keyname)))
+ (match lockdat
+ ((#f . lock-time)
+ (if (> (- (current-seconds) (if (string? lock-time)(string->number lock-time)lock-time)) timeout)
+ (let ((lock-time (current-seconds)))
+ ;; (debug:print-info 2 *default-log-port* "db:no-sync-get-lock keyname=" keyname ", lock-time=" lock-time ", exn=" exn)
+ (sqlite3:execute db "INSERT OR REPLACE INTO no_sync_metadat (var,val) VALUES(?,?);" keyname lock-time)
+ `(#t . ,lock-time))
+ lockdat))
+ (else lockdat))))
+
+;; NOTE: This will steal the lock after timeout of waiting.
+;;
+(define (db:with-no-sync-lock db keyname timeout proc)
+ (let* ((lockdat (db:no-sync-get-lock-timeout db keyname))
+ (gotlock (car lockdat))
+ (locktime (cdr lockdat)))
+ (if gotlock
+ (let ((res (proc)))
+ (db:no-sync-del! db keyname)
+ res))))
+
+;;======================================================================
+;; sync back functions pulled from db.scm
+;;======================================================================
+
+;; Get a lock from the no-sync-db for the from-db, then delta sync the from-db to the to-db, otherwise return #f
+;;
+(define (db:lock-and-delta-sync no-sync-db dbstruct from-db-file runid keys dbinit)
+ (assert (not *db-sync-in-progress*) "FATAL: db:lock-and-sync called while a sync is in progress.")
+ ;; (dbfile:print-err *default-log-port* "db:lock-and-delta-sync")
+ (let* ((lock-file (conc from-db-file ".lock")))
+ (if (common:simple-file-lock lock-file)
+ (begin
+ (dbfile:print-err "INFO: db:lock-and-delta-sync copying db "runid" at "(current-seconds))
+ (set! *db-sync-in-progress* #t)
+ (db:sync-touched dbstruct runid keys dbinit)
+ (set! *db-sync-in-progress* #f)
+ (delete-file* lock-file)
+ #t)
+ (begin
+ (if (common:low-noise-print 120 (conc "no lock "from-db-file))
+ (dbfile:print-err "INFO: could not get lock for " from-db-file ", sync likely in progress."))
+ #f
+ ))))
+
+;; ;; Get a lock from the no-sync-db for the from-db, then delta sync the from-db to the to-db, otherwise return #f
+;; ;;
+;; (define (db:lock-and-delta-sync-orig no-sync-db dbstruct from-db-file runid keys dbinit)
+;; (assert (not *db-sync-in-progress*) "FATAL: db:lock-and-sync called while a sync is in progress.")
+;; ;; (dbfile:print-err *default-log-port* "db:lock-and-delta-sync")
+;; (let* ((lockdat (db:no-sync-get-lock-timeout no-sync-db from-db-file 60))
+;; (gotlock (car lockdat))
+;; (locktime (cdr lockdat)))
+;; ;; (debug:print-info 3 *default-log-port* "db:lock-and-delta-sync: got lock?")
+;;
+;; (if gotlock
+;; (begin
+;; (dbfile:print-err "INFO: db:lock-and-delta-sync copying db "runid" at "(current-seconds))
+;; (set! *db-sync-in-progress* #t)
+;; (db:sync-touched dbstruct runid keys dbinit)
+;; (set! *db-sync-in-progress* #f)
+;; (db:no-sync-del! no-sync-db from-db-file)
+;; #t)
+;; (begin
+;; (dbfile:print-err "ERROR: could not get lock for " from-db-file " from no-sync-db")
+;; #f
+;; ))))
+
+;; sync run from tmp disk to nfs disk if touched
+;;
+;; call with dbinit=db:initialize-main-db
+;;
+(define (db:sync-touched dbstruct run-id keys #!key dbinit (force-sync #f))
+ (dbfile:print-err "db:sync-touched Syncing: " (conc (if run-id run-id "main") ".db"))
+ (let* (;; the subdb is needed to access the mtdbdat
+ (subdb (or (dbfile:get-subdb dbstruct run-id)
+ (dbfile:init-subdb dbstruct run-id dbinit)))
+ (tmpdbfile (dbr:subdb-tmpdbfile subdb))
+ (mtdb (dbr:subdb-mtdbdat subdb))
+ (tmpdb (db:open-db dbstruct run-id dbinit)) ;; sqlite3-db tmpdbfile #f))
+ (start-t (current-seconds)))
+ (mutex-lock! *db-multi-sync-mutex*)
+ (let ((update_info (cons "last_update" (if force-sync 0 *db-last-sync*) )))
+ (mutex-unlock! *db-multi-sync-mutex*)
+ (db:sync-tables (db:sync-all-tables-list dbstruct keys) update_info tmpdb mtdb))
+ (mutex-lock! *db-multi-sync-mutex*)
+ (set! *db-last-sync* start-t)
+ (set! *db-last-access* start-t)
+ (mutex-unlock! *db-multi-sync-mutex*)
+ (dbfile:add-dbdat dbstruct run-id tmpdb)
+ #t))
+
+;; just tests, test_steps and test_data tables
+(define db:sync-tests-only
+ (list
+ ;; (list "strs"
+ ;; '("id" #f)
+ ;; '("str" #f))
+ (list "tests"
+ '("id" #f)
+ '("run_id" #f)
+ '("testname" #f)
+ '("host" #f)
+ '("cpuload" #f)
+ '("diskfree" #f)
+ '("uname" #f)
+ '("rundir" #f)
+ '("shortdir" #f)
+ '("item_path" #f)
+ '("state" #f)
+ '("status" #f)
+ '("attemptnum" #f)
+ '("final_logf" #f)
+ '("logdat" #f)
+ '("run_duration" #f)
+ '("comment" #f)
+ '("event_time" #f)
+ '("fail_count" #f)
+ '("pass_count" #f)
+ '("archived" #f)
+ '("last_update" #f))
+ (list "test_steps"
+ '("id" #f)
+ '("test_id" #f)
+ '("stepname" #f)
+ '("state" #f)
+ '("status" #f)
+ '("event_time" #f)
+ '("comment" #f)
+ '("logfile" #f)
+ '("last_update" #f))
+ (list "test_data"
+ '("id" #f)
+ '("test_id" #f)
+ '("category" #f)
+ '("variable" #f)
+ '("value" #f)
+ '("expected" #f)
+ '("tol" #f)
+ '("units" #f)
+ '("comment" #f)
+ '("status" #f)
+ '("type" #f)
+ '("last_update" #f))))
+
+;; needs db to get keys, this is for syncing all tables
+;;
+(define (db:sync-main-list dbstruct keys)
+ (let ((keys keys)) ;; (db:get-keys dbstruct)))
+ (list
+ (list "keys"
+ '("id" #f)
+ '("fieldname" #f)
+ '("fieldtype" #f))
+ (list "metadat" '("var" #f) '("val" #f))
+ (append (list "runs"
+ '("id" #f))
+ (map (lambda (k)(list k #f))
+ (append keys
+ (list "runname" "state" "status" "owner" "event_time" "comment" "fail_count" "pass_count" "contour" "last_update"))))
+ (list "archive_disks"
+ '("id" #f)
+ '("archive_area_name" #f)
+ '("disk_path" #f)
+ '("last_df" #f)
+ '("last_df_time" #f)
+ '("creation_time" #f))
+
+ (list "archive_blocks"
+ '("id" #f)
+ '("archive_disk_id" #f)
+ '("disk_path" #f)
+ '("last_du" #f)
+ '("last_du_time" #f)
+ '("creation_time" #f))
+
+ (list "test_meta"
+ '("id" #f)
+ '("testname" #f)
+ '("owner" #f)
+ '("description" #f)
+ '("reviewed" #f)
+ '("iterated" #f)
+ '("avg_runtime" #f)
+ '("avg_disk" #f)
+ '("tags" #f)
+ '("jobgroup" #f))
+
+
+ (list "tasks_queue"
+ '("id" #f)
+ '("action" #f)
+ '("owner" #f)
+ '("state" #f)
+ '("target" #f)
+ '("name" #f)
+ '("testpatt" #f)
+ '("keylock" #f)
+ '("params" #f)
+ '("creation_time" #f)
+ '("execution_time" #f))
+ )))
+
+(define (db:sync-all-tables-list dbstruct keys)
+ (append (db:sync-main-list dbstruct keys)
+ db:sync-tests-only))
+
+;; tbls is ( ("tablename" ( "field1" [#f|proc1] ) ( "field2" [#f|proc2] ) .... ) )
+;; db's are dbdat's
+;;
+;; if last-update specified ("field-name" . time-in-seconds)
+;; then sync only records where field-name >= time-in-seconds
+;; IFF field-name exists
+;;
+(define (db:sync-tables tbls last-update fromdb todb . slave-dbs)
+ (handle-exceptions
+ exn
+ (begin
+ (dbfile:print-err "EXCEPTION: database probably overloaded or unreadable in db:sync-tables.")
+ (print-call-chain (current-error-port))
+ (dbfile:print-err " message: " ((condition-property-accessor 'exn 'message) exn))
+ (dbfile:print-err "exn=" (condition->list exn))
+ (dbfile:print-err " status: " ((condition-property-accessor 'sqlite3 'status) exn))
+ (dbfile:print-err " src db: " (dbr:dbdat-dbfile fromdb))
+ (for-each (lambda (dbdat)
+ (let ((dbpath (dbr:dbdat-dbfile dbdat)))
+ (dbfile:print-err " dbpath: " dbpath)
+ (if #t ;; (not (db:repair-db dbdat))
+ (begin
+ (dbfile:print-err "Failed to rebuild (repair is turned off) " dbpath ", exiting now.")
+ (exit)))))
+ (cons todb slave-dbs))
+
+ 0)
+
+ ;; this is the work to be done")
+ (cond
+ ((not fromdb) (dbfile:print-err "WARNING: db:sync-tables called with fromdb missing")
+ -1)
+ ((not todb) (dbfile:print-err "WARNING: db:sync-tables called with todb missing")
+ -2)
+ ((not (sqlite3:database? (dbr:dbdat-dbh fromdb)))
+ (dbfile:print-err "db:sync-tables called with fromdb not a database " fromdb)
+ -3)
+ ((not (sqlite3:database? (dbr:dbdat-dbh todb)))
+ (dbfile:print-err "db:sync-tables called with todb not a database " todb)
+ -4)
+
+ ((not (file-write-access? (dbr:dbdat-dbfile todb)))
+ (dbfile:print-err "db:sync-tables called with todb not a read-only database " todb)
+ -5)
+ ((not (null? (let ((readonly-slave-dbs
+ (filter
+ (lambda (dbdat)
+ (not (file-write-access? (dbr:dbdat-dbfile todb))))
+ slave-dbs)))
+ (for-each
+ (lambda (bad-dbdat)
+ (dbfile:print-err "db:sync-tables called with todb not a read-only database " bad-dbdat))
+ readonly-slave-dbs)
+ readonly-slave-dbs))) -6)
+ (else
+ ;; (dbfile:print-err "db:sync-tables: args are good")
+
+ (let ((stmts (make-hash-table)) ;; table-field => stmt
+ (all-stmts '()) ;; ( ( stmt1 value1 ) ( stml2 value2 ))
+ (numrecs (make-hash-table))
+ (start-time (current-milliseconds))
+ (tot-count 0))
+ (for-each ;; table
+ (lambda (tabledat)
+ (let* ((tablename (car tabledat))
+ (fields (cdr tabledat))
+ (has-last-update (member "last_update" fields))
+ (use-last-update (cond
+ ((and has-last-update
+ (member "last_update" fields))
+ #t) ;; if given a number, just use it for all fields
+ ((number? last-update) #f) ;; if not matched first entry then ignore last-update for this table
+ ((and (pair? last-update)
+ (member (car last-update) ;; last-update field name
+ (map car fields)))
+ #t)
+ ((and last-update (not (pair? last-update)) (not (number? last-update)))
+ (dbfile:print-err "ERROR: parameter last-update for db:sync-tables must be a pair or a number, received: " last-update);; found in fields
+ #f)
+ (else
+ #f)))
+ (last-update-value (if use-last-update ;; no need to check for has-last-update - it is already accounted for
+ (if (number? last-update)
+ last-update
+ (cdr last-update))
+ #f))
+ (last-update-field (if use-last-update
+ (if (number? last-update)
+ "last_update"
+ (car last-update))
+ #f))
+ (num-fields (length fields))
+ (field->num (make-hash-table))
+ (num->field (apply vector (map car fields))) ;; BBHERE
+ (full-sel (conc "SELECT " (string-intersperse (map car fields) ",")
+ " FROM " tablename (if use-last-update ;; apply last-update criteria
+ (conc " WHERE " last-update-field " >= " last-update-value)
+ "")
+ ";"))
+ (full-ins (conc "INSERT OR REPLACE INTO " tablename " ( " (string-intersperse (map car fields) ",") " ) "
+ " VALUES ( " (string-intersperse (make-list num-fields "?") ",") " );"))
+ (fromdat '())
+ (fromdats '())
+ (totrecords 0)
+ (batch-len 100) ;; (string->number (or (configf:lookup *configdat* "sync" "batchsize") "100")))
+ (todat (make-hash-table))
+ (count 0)
+ (field-names (map car fields))
+ (delay-handicap 0) ;; (string->number (or (configf:lookup *configdat* "sync" "delay-handicap") "0")))
+ )
+
+ ;; set up the field->num table
+ (for-each
+ (lambda (field)
+ (hash-table-set! field->num field count)
+ (set! count (+ count 1)))
+ fields)
+
+ ;; read the source table
+ ;; store a list of all rows in the table in fromdat, up to batch-len.
+ ;; Then add fromdat to the fromdats list, clear fromdat and repeat.
+ (sqlite3:for-each-row
+ (lambda (a . b)
+ (set! fromdat (cons (apply vector a b) fromdat))
+ (if (> (length fromdat) batch-len)
+ (begin
+ (set! fromdats (cons fromdat fromdats))
+ (set! fromdat '())
+ (set! totrecords (+ totrecords 1)))
+ )
+ )
+ (dbr:dbdat-dbh fromdb)
+ full-sel)
+
+ ;; Count less than batch-len as a record
+ (if (> (length fromdat) 0)
+ (set! totrecords (+ totrecords 1)))
+
+ ;; tack on remaining records in fromdat
+ (if (not (null? fromdat))
+ (set! fromdats (cons fromdat fromdats)))
+
+ (sqlite3:for-each-row
+ (lambda (a . b)
+ (hash-table-set! todat a (apply vector a b)))
+ (dbr:dbdat-dbh todb)
+ full-sel)
+
+ (when (and delay-handicap (> delay-handicap 0))
+ (dbfile:print-err "imposing synthetic sync delay of "delay-handicap" seconds since sync/delay-handicap is configured")
+ (thread-sleep! delay-handicap)
+ (dbfile:print-err "synthetic sync delay of "delay-handicap" seconds completed")
+ )
+
+ ;; first pass implementation, just insert all changed rows
+
+ (for-each
+ (lambda (targdb)
+ (let* ((db (dbr:dbdat-dbh targdb))
+ (drp-trigger (if (member "last_update" field-names)
+ (db:drop-trigger db tablename)
+ #f))
+ (has-last-update (member "last_update" field-names))
+ (is-trigger-dropped (if has-last-update
+ (db:is-trigger-dropped db tablename)
+ #f))
+ (stmth (sqlite3:prepare db full-ins))
+ (changed-rows 0))
+ (for-each
+ (lambda (fromdat-lst)
+ (sqlite3:with-transaction
+ db
+ (lambda ()
+ (for-each ;;
+ (lambda (fromrow)
+ (let* ((a (vector-ref fromrow 0))
+ (curr (hash-table-ref/default todat a #f))
+ (same #t))
+ (let loop ((i 0))
+ (if (or (not curr)
+ (not (equal? (vector-ref fromrow i)(vector-ref curr i))))
+ (set! same #f))
+ (if (and same
+ (< i (- num-fields 1)))
+ (loop (+ i 1))))
+ (if (not same)
+ (begin
+ (apply sqlite3:execute stmth (vector->list fromrow))
+ (hash-table-set! numrecs tablename (+ 1 (hash-table-ref/default numrecs tablename 0)))
+ (set! changed-rows (+ changed-rows 1))
+ )
+ )
+ ))
+ fromdat-lst))))
+ fromdats)
+
+ (sqlite3:finalize! stmth)
+ (if (member "last_update" field-names)
+ (db:create-trigger db tablename))))
+ (append (list todb) slave-dbs)
+ )
+ )
+ )
+ tbls)
+ (let* ((runtime (- (current-milliseconds) start-time))
+ (should-print (or ;; (debug:debug-mode 12)
+ (common:low-noise-print 120 "db sync")
+ (> runtime 500)))) ;; low and high sync times treated as separate.
+ (for-each
+ (lambda (dat)
+ (let ((tblname (car dat))
+ (count (cdr dat)))
+ (set! tot-count (+ tot-count count))
+ ))
+ (sort (hash-table->alist numrecs)(lambda (a b)(> (cdr a)(cdr b))))))
+ tot-count)))))
+
+;;======================================================================
+;; trigger setup/takedown
+;;======================================================================
+
+(define db:trigger-list
+ (list (list "update_runs_trigger" "CREATE TRIGGER IF NOT EXISTS update_runs_trigger AFTER UPDATE ON runs
+ FOR EACH ROW
+ BEGIN
+ UPDATE runs SET last_update=(strftime('%s','now'))
+ WHERE id=old.id;
+ END;" )
+ (list "update_run_stats_trigger" "CREATE TRIGGER IF NOT EXISTS update_run_stats_trigger AFTER UPDATE ON run_stats
+ FOR EACH ROW
+ BEGIN
+ UPDATE run_stats SET last_update=(strftime('%s','now'))
+ WHERE id=old.id;
+ END;" )
+ (list "update_tests_trigger" "CREATE TRIGGER IF NOT EXISTS update_tests_trigger AFTER UPDATE ON tests
+ FOR EACH ROW
+ BEGIN
+ UPDATE tests SET last_update=(strftime('%s','now'))
+ WHERE id=old.id;
+ END;" )
+ (list "update_teststeps_trigger" "CREATE TRIGGER IF NOT EXISTS update_teststeps_trigger AFTER UPDATE ON test_steps
+ FOR EACH ROW
+ BEGIN
+ UPDATE test_steps SET last_update=(strftime('%s','now'))
+ WHERE id=old.id;
+ END;" )
+ (list "update_test_data_trigger" "CREATE TRIGGER IF NOT EXISTS update_test_data_trigger AFTER UPDATE ON test_data
+ FOR EACH ROW
+ BEGIN
+ UPDATE test_data SET last_update=(strftime('%s','now'))
+ WHERE id=old.id;
+ END;" )))
+;;
+;; ADD run-id SUPPORT
+;;
+(define (db:create-all-triggers dbstruct)
+ (db:with-db
+ dbstruct #f #f
+ (lambda (dbdat db)
+ (db:create-triggers db))))
+
+(define (db:create-triggers db)
+ (for-each (lambda (key)
+ (sqlite3:execute db (cadr key)))
+ db:trigger-list))
+
+(define (db:drop-all-triggers dbstruct)
+ (db:with-db
+ dbstruct #f #f
+ (lambda (dbdat db)
+ (db:drop-triggers db))))
+
+(define (db:is-trigger-dropped db tbl-name)
+ (let* ((trigger-name (if (equal? tbl-name "test_steps")
+ "update_teststeps_trigger"
+ (conc "update_" tbl-name "_trigger")))
+ (res #f))
+ (sqlite3:for-each-row
+ (lambda (name)
+ (if (equal? name trigger-name)
+ (set! res #t)))
+ db
+ "SELECT name FROM sqlite_master WHERE type = 'trigger' ;")
+ res))
+
+(define (db:drop-triggers db)
+ (for-each
+ (lambda (key)
+ (sqlite3:execute db (conc "drop trigger if exists " (car key))))
+ db:trigger-list))
+
+(define (db:drop-trigger db tbl-name)
+ (let* ((trigger-name (if (equal? tbl-name "test_steps")
+ "update_teststeps_trigger"
+ (conc "update_" tbl-name "_trigger"))))
+ (for-each
+ (lambda (key)
+ (if (equal? (car key) trigger-name)
+ (sqlite3:execute db (conc "drop trigger if exists " trigger-name))))
+ db:trigger-list)))
+
+(define (db:create-trigger db tbl-name)
+ (let* ((trigger-name (if (equal? tbl-name "test_steps")
+ "update_teststeps_trigger"
+ (conc "update_" tbl-name "_trigger"))))
+ (for-each (lambda (key)
+ (if (equal? (car key) trigger-name)
+ (sqlite3:execute db (cadr key))))
+ db:trigger-list)))
+
+;;======================================================================
+;; db access stuff
+;;======================================================================
+
+;; call with dbinit=db:initialize-main-db
+;;
+(define (db:open-db dbstruct run-id dbinit)
+ ;; (mutex-lock! *db-open-mutex*)
+ (let* ((dbdat (dbfile:open-db dbstruct run-id dbinit)))
+ (set! *db-write-access* (not (dbr:dbdat-read-only dbdat)))
+ ;; (mutex-unlock! *db-open-mutex*)
+ dbdat))
+
+(define dbfile:db-init-proc (make-parameter #f))
+
+;; in xmaxima this gives a curve close to what I want:
+;; plot2d ((exp(x/1.2)-1)/300, [x, 0, 10])$
+;; plot2d ((exp(x/1.5)-1)/40, [x, 0, 10])$
+;; plot2d ((exp(x/5)-1)/40, [x, 0, 20])$
+(define (dbfile:droop x)
+ (/ (- (exp (/ x 5)) 1) 40))
+ ;; (* numqrys (/ 1 (qif-slope))))
+
+;; create a dropping near the db file in a qif dir
+;; use count of such files to gate queries (queries in flight)
+;;
+(define (dbfile:wait-for-qif fname run-id params)
+ (let* ((thedir (pathname-directory fname))
+ (dbnum (dbfile:run-id->dbnum run-id))
+ (destdir (conc thedir"/qif-"dbnum))
+ (uniqn (get-area-path-signature (conc dbnum params)))
+ (crumbn (conc destdir"/"(current-seconds)"-"uniqn"."(current-process-id))))
+ (if (not (file-exists? destdir))(create-directory (conc destdir"/attic") #t))
+ (let loop ((count 0))
+ (let* ((currlks (glob (conc destdir"/*")))
+ (numqrys (length currlks))
+ (delayval (cond ;; do a droopish curve
+ ((> numqrys 25)
+ (for-each
+ (lambda (f)
+ (if (> (- (current-seconds)
+ (handle-exceptions
+ exn
+ (current-seconds) ;; file is likely gone, just fake out
+ (file-modification-time f)))
+ (keep-age-param))
+ (let* ((basedir (pathname-directory f))
+ (filen (pathname-file f))
+ (destf (conc basedir"/attic/"filen)))
+ (dbfile:print-err "Moving qif file "f" older than 10 seconds to "destf)
+ ;; (delete-file* f)
+ (handle-exceptions
+ exn
+ #t
+ (file-move f destf #t)))))
+ currlks)
+ 4)
+ ((> numqrys 0) (dbfile:droop numqrys)) ;; slope of 1/100
+ (else #f))))
+ (if (and delayval
+ (< count 5))
+ (begin
+ (thread-sleep! delayval)
+ (loop (+ count 1))))))
+ (with-output-to-file crumbn
+ (lambda ()
+ (print fname" run-id="run-id" params="params)
+ ))
+ crumbn))
+
+(define no-condition-db-with-db (make-parameter #t))
+
+;; (db:with-db dbstruct run-id sqlite3:exec "select blah fgrom blaz;")
+;; r/w is a flag to indicate if the db is modified by this query #t = yes, #f = no
+;;
+(define (db:with-db dbstruct run-id r/w proc . params)
+ (assert dbstruct "FATAL: db:with-db called with dbstruct "#f)
+ (assert (dbr:dbstruct? dbstruct) "FATAL: dbstruct is "dbstruct)
+ (let* ((use-mutex (> *api-process-request-count* 25)) ;; risk of db corruption
+ (have-struct (dbr:dbstruct? dbstruct))
+ (dbdat (if have-struct ;; this stuff just allows us to call with a db handle directly
+ (db:open-db dbstruct run-id (dbfile:db-init-proc)) ;; (dbfile:get-subdb dbstruct run-id)
+ #f))
+ (db (if have-struct ;; this stuff just allows us to call with a db handle directly
+ (dbr:dbdat-dbh dbdat)
+ dbstruct))
+ (fname (if dbdat
+ (dbr:dbdat-dbfile dbdat)
+ "nofilenameavailable"))
+ (jfile (conc fname"-journal"))
+ (qryproc (lambda ()
+ (if use-mutex (mutex-lock! *db-with-db-mutex*))
+ (let ((res (apply proc dbdat db params))) ;; the actual call is here.
+ (if use-mutex (mutex-unlock! *db-with-db-mutex*))
+ ;; (if (vector? dbstruct)(db:done-with dbstruct run-id r/w))
+ (if dbdat
+ (dbfile:add-dbdat dbstruct run-id dbdat))
+ ;; (delete-file* crumbfile)
+ res))))
+
+ (assert (sqlite3:database? db) "FATAL: db:with-db, db is not a database, db="db", fname="fname)
+ (if (file-exists? jfile)
+ (begin
+ (dbfile:print-err "INFO: "jfile" exists, delaying to reduce database load")
+ (thread-sleep! 0.2)))
+ (if (and use-mutex
+ (common:low-noise-print 120 "over-50-parallel-api-requests"))
+ (dbfile:print-err *api-process-request-count* " parallel api requests being processed in process "
+ (current-process-id))) ;; ", throttling access"))
+ (if (no-condition-db-with-db)
+ (qryproc)
+ (condition-case
+ (qryproc)
+ (exn (io-error)
+ (db:generic-error-printout exn "ERROR: i/o error with " fname ". Check permissions, disk space etc. and try again."))
+ (exn (corrupt)
+ (db:generic-error-printout exn "ERROR: database " fname " is corrupt. Repair it to proceed."))
+ (exn (busy)
+ (db:generic-error-printout exn "ERROR: database " fname
+ " is locked. Try copying to another location, remove original and copy back."))
+ (exn (permission)(db:generic-error-printout exn "ERROR: database " fname " has some permissions problem."))
+ (exn ()
+ (db:generic-error-printout exn "ERROR: Unknown error with database " fname " message: "
+ ((condition-property-accessor 'exn 'message) exn)))))))
+
+;;======================================================================
+;; another attempt at a transactionized queue
+;;======================================================================
+
+;; ;; ;; (define *transaction-queues* (make-hash-table))
+;; ;; ;;
+;; ;; ;; (define (db:get-queue run-id)
+;; ;; ;; (let* ((res (hash-table-ref/default *transaction-queues* run-id #f)))
+;; ;; ;; (if res
+;; ;; ;; res
+;; ;; ;; (let* ((newq (make-queue)))
+;; ;; ;; (hash-table-set! *transaction-queues* run-id newq)
+;; ;; ;; newq))))
+;; ;; ;;
+;; ;; ;; (define (db:add-to-transaction-queue dbstruct proc params)
+;; ;; ;; (let* ((mbox (make-mailbox))
+;; ;; ;; (q (db:get-queue run-id)))
+;; ;; ;; (queue-add! *transaction-queue* (list dbstruct proc mbox))
+;; ;; ;; (mailbox-receive mbox)))
+;; ;; ;;
+;; ;; ;; (define (db:process-transaction-queue *dbstruct-dbs*)
+;; ;; ;; (for-each
+;; ;; ;; (lambda (run-id)
+;; ;; ;; (let* ((q (hash-table-ref *transaction-queue* run-id)))
+;; ;; ;; ;; with-transaction
+;; ;; ;; ;; dbstruct
+;; ;; ;; ;; pop items from queue and execute them, return results via mailbox
+;; ;; ;; q
+;; ;; ;; ;; pop
+;; ;; ;; ))
+;; ;; ;; (hash-table-keys *transaction-queues*)))
+
+;;======================================================================
+;; file utils
+;;======================================================================
+
+;;======================================================================
+;; lazy-safe get file mod time. on any error (file not existing etc.) return 0
+;;
+(define (dbfile:lazy-modification-time fpath)
+ (handle-exceptions
+ exn
+ (begin
+ (dbfile:print-err "Failed to get modification time for " fpath ", treating it as zero. exn=" exn)
+ 0)
+ (if (file-exists? fpath)
+ (file-modification-time fpath)
+ 0)))
+
+;;======================================================================
+;; find timestamp of newest file associated with a sqlite db file
+(define (dbfile:lazy-sqlite-db-modification-time fpath)
+ (let* ((glob-list (handle-exceptions
+ exn
+ (begin
+ (dbfile:print-err "Failed to glob " fpath "*, exn=" exn)
+ `(,(conc "/no/such/file, message: " ((condition-property-accessor 'exn 'message) exn))))
+ (glob (conc fpath "*"))))
+ (file-list (if (eq? 0 (length glob-list))
+ '("/no/such/file")
+ glob-list)))
+ (apply max
+ (map
+ dbfile:lazy-modification-time
+ file-list))))
+
+;; dot-locking egg seems not to work, using this for now
+;; if lock is older than expire-time then remove it and try again
+;; to get the lock
+;;
+(define (dbfile:simple-file-lock fname #!key (expire-time 300))
+ (let ((fmod-time (handle-exceptions
+ ext
+ (current-seconds)
+ (file-modification-time fname))))
+ (if (file-exists? fname)
+ (if (> (- (current-seconds) fmod-time) expire-time)
+ (begin
+ (handle-exceptions exn #f (delete-file* fname))
+ (dbfile:simple-file-lock fname expire-time: expire-time))
+ #f)
+ (let ((key-string (conc (get-host-name) "-" (current-process-id)))
+ (oup (open-output-file fname)))
+ (with-output-to-port
+ oup
+ (lambda ()
+ (print key-string)))
+ (close-output-port oup)
+ #;(with-output-to-file fname ;; bizarre. with-output-to-file does not seem to be cleaning up after itself.
+ (lambda ()
+ (print key-string)))
+ (thread-sleep! 0.25)
+ (if (file-exists? fname)
+ (handle-exceptions exn
+ #f
+ (with-input-from-file fname
+ (lambda ()
+ (equal? key-string (read-line)))))
+ #f)
+ )
+ )
+ )
+)
+
+(define (dbfile:simple-file-lock-and-wait fname #!key (expire-time 300))
+ (let ((end-time (+ expire-time (current-seconds))))
+ (let loop ((got-lock (dbfile:simple-file-lock fname expire-time: expire-time)))
+ (if got-lock
+ #t
+ (if (> end-time (current-seconds))
+ (begin
+ (thread-sleep! 3)
+ (loop (dbfile:simple-file-lock fname expire-time: expire-time)))
+ #f)))))
+
+(define (dbfile:simple-file-release-lock fname)
+ (handle-exceptions
+ exn
+ #f ;; I don't really care why this failed (at least for now)
+ (delete-file* fname)))
+
+(define (dbfile:with-simple-file-lock fname proc #!key (expire-time 300))
+ (let ((gotlock (dbfile:simple-file-lock-and-wait fname expire-time: expire-time)))
+ (if gotlock
+ (let ((res (proc)))
+ (dbfile:simple-file-release-lock fname)
+ res)
+ (assert #t "FATAL: simple file lock never got a lock."))))
+
+(define (db:get-cache-stmth dbdat db stmt)
+ (let* (;; (dbdat (dbfile:get-dbdat dbstruct run-id))
+ (stmt-cache (dbr:dbdat-stmt-cache dbdat))
+ ;; (stmth (db:hoh-get stmt-cache db stmt))
+ (stmth (hash-table-ref/default stmt-cache stmt #f)))
+ (or stmth
+ (let* ((newstmth (sqlite3:prepare db stmt)))
+ ;; (db:hoh-set! stmt-cache db stmt newstmth)
+ (hash-table-set! stmt-cache stmt newstmth)
+ newstmth))))
+
+(define (db:have-incompletes? dbstruct run-id ovr-deadtime)
+ (let* ((incompleted '())
+ (oldlaunched '())
+ (toplevels '())
+ ;; (deadtime-str (configf:lookup *configdat* "setup" "deadtime"))
+ (deadtime (or ovr-deadtime 72000))) ;; twenty hours
+ (db:with-db
+ dbstruct run-id #f
+ (lambda (dbdat db)
+
+ ;; in RUNNING or REMOTEHOSTSTART for more than 10 minutes
+ ;;
+ ;; HOWEVER: this code in run:test seems to work fine
+ ;; (> (- (current-seconds)(+ (db:test-get-event_time testdat)
+ ;; (db:test-get-run_duration testdat)))
+ ;; 600)
+ ;; (db:delay-if-busy dbdat)
+ (sqlite3:for-each-row
+ (lambda (test-id run-dir uname testname item-path)
+ (if (and (equal? uname "n/a")
+ (equal? item-path "")) ;; this is a toplevel test
+ ;; what to do with toplevel? call rollup?
+ (begin
+ (set! toplevels (cons (list test-id run-dir uname testname item-path run-id) toplevels)))
+ ;; (print-info "Found old toplevel test in RUNNING state, test-id=" test-id))
+ (set! incompleted (cons (list test-id run-dir uname testname item-path run-id) incompleted))))
+ (db:get-cache-stmth dbdat db
+ "SELECT id,rundir,uname,testname,item_path FROM tests WHERE run_id=? AND (strftime('%s','now') - event_time) > (run_duration + ?) AND state IN ('RUNNING','REMOTEHOSTSTART');")
+ run-id deadtime)
+
+ ;; in LAUNCHED for more than one day. Could be long due to job queues TODO/BUG: Need override for this in config
+ ;;
+ ;; (db:delay-if-busy dbdat)
+ (sqlite3:for-each-row
+ (lambda (test-id run-dir uname testname item-path)
+ (if (and (equal? uname "n/a")
+ (equal? item-path "")) ;; this is a toplevel test
+ ;; what to do with toplevel? call rollup?
+ (set! toplevels (cons (list test-id run-dir uname testname item-path run-id) toplevels))
+ (set! oldlaunched (cons (list test-id run-dir uname testname item-path run-id) oldlaunched))))
+ (db:get-cache-stmth dbdat db
+ "SELECT id,rundir,uname,testname,item_path FROM tests WHERE run_id=? AND (strftime('%s','now') - event_time) > 86400 AND state IN ('LAUNCHED');")
+ run-id)
+
+ ;; (print-info "Found " (length oldlaunched) " old LAUNCHED items, " (length toplevels) " old LAUNCHED toplevel tests and " (length incompleted) " tests marked RUNNING but apparently dead.")
+ (if (and (null? incompleted)
+ (null? oldlaunched)
+ (null? toplevels))
+ #f
+ #t)))))
+
+
+)
Index: mtargs/mtargs.scm
==================================================================
--- mtargs/mtargs.scm
+++ mtargs/mtargs.scm
@@ -23,11 +23,11 @@
get-arg-from
get-args
usage
print-args
any-defined?
- )
+ )
(import scheme) ;; gives us cond-expand in chicken-4
(cond-expand
(chicken-5
Index: rmt.scm
==================================================================
--- rmt.scm
+++ rmt.scm
@@ -22,15 +22,20 @@
(declare (unit rmt))
(declare (uses api))
(declare (uses http-transport))
(declare (uses dbfile))
+(declare (uses dbmemmod))
(include "common_records.scm")
;; (declare (uses rmtmod))
+;; used by http-transport
(import dbfile) ;; rmtmod)
+(import dbmemmod)
+
+(define rmt:transport-mode (make-parameter 'http))
;;
;; THESE ARE ALL CALLED ON THE CLIENT SIDE!!!
;;
;; generate entries for ~/.megatestrc with the following
@@ -119,11 +124,13 @@
(begin
(remote-server-url-set! *runremote* (server:record->url server-info))
(remote-server-id-set! *runremote* (server:record->id server-info)))))
(set! runremote *runremote*))) ;; new runremote will come from this on next iteration
- (http-transport-handler runremote cmd rid params attemptnum area-dat areapath readonly-mode)))
+ (case (rmt:transport-mode)
+ ((http)(http-transport-handler runremote cmd rid params attemptnum area-dat areapath readonly-mode))
+ ((tcp) (tcp-transport-handler runremote cmd rid params attemptnum area-dat areapath readonly-mode)))))
(define (http-transport-handler runremote cmd rid params attemptnum area-dat areapath readonly-mode)
;; DOT SET_HOMEHOST; // leaving off - doesn't really add to the clarity
;; DOT MUTEXLOCK -> SET_HOMEHOST [label="no homehost?"];
;; DOT SET_HOMEHOST -> MUTEXLOCK;
ADDED tcp-transportmod.scm
Index: tcp-transportmod.scm
==================================================================
--- /dev/null
+++ tcp-transportmod.scm
@@ -0,0 +1,69 @@
+;;======================================================================
+;; Copyright 2017, Matthew Welland.
+;;
+;; This file is part of Megatest.
+;;
+;; Megatest is free software: you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation, either version 3 of the License, or
+;; (at your option) any later version.
+;;
+;; Megatest is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with Megatest. If not, see .
+
+;;======================================================================
+
+(declare (unit tcp-transportmod))
+(declare (uses debugprint))
+(declare (uses commonmod))
+
+(module tcp-transportmod
+ *
+
+ (import scheme
+ chicken
+ data-structures
+ extras
+ matchable)
+
+(import (prefix sqlite3 sqlite3:)
+ posix typed-records srfi-18 srfi-1
+ srfi-69
+ stack
+ files
+ ports
+
+ commonmod
+ ;; debugprint
+ )
+
+;; (define keep-age-param (make-parameter 10)) ;; qif file age, if over move to attic
+
+(defstruct tt
+ (area #f)
+
+ )
+
+(define (tt:bid-for-servership run-id)
+ #f)
+
+(define (tt:get-current-server run-id)
+ #f)
+
+(define (tt:send-receive ttdat run-id cmd params)
+ #f)
+
+(define (tt:sync-dbs ttdat)
+ #f)
+
+(define (tt:shutdown-server ttdat)
+ #f)
+
+
+
+)