Index: Makefile
==================================================================
--- Makefile
+++ Makefile
@@ -28,11 +28,11 @@
            ezsteps.scm lock-queue.scm rmt.scm api.scm		\
            subrun.scm portlogger.scm archive.scm env.scm		\
            diff-report.scm cgisetup/models/pgdb.scm
 
 # module source files
-MSRCFILES = dbmod.scm dbfile.scm debugprint.scm mtargs.scm
+MSRCFILES = dbmod.scm dbfile.scm debugprint.scm mtargs.scm commonmod.scm
 
 mofiles/dbfile.o     : mofiles/debugprint.o
 mofiles/debugprint.o : mofiles/mtargs.o
 
 # ftail.scm rmtmod.scm commonmod.scm removed
@@ -154,11 +154,12 @@
 	$(INSTALL) mt-pg.sql $(PREFIX)/share/db/mt-pg.sql
 
 # Special dependencies for the includes
 $(MOFILE) $(MOIMPFILES) : megatest-fossil-hash.scm
 
-# common.o : mofiles/commonmod.o megatest-fossil-hash.scm
+common.o : mofiles/commonmod.o megatest-fossil-hash.scm
+
 # mofiles/dbmod.o : mofiles/configfmod.o
 
 # commonmod.o dashboard.o megatest.o tcmt.o apimod.o : megatest-fossil-hash.scm
 
 tests.o db.o launch.o runs.o dashboard-tests.o				\
@@ -166,11 +167,11 @@
 monitor.o dashboard.o archive.o megatest.o : db_records.scm megatest-fossil-hash.scm
 
 tests.o runs.o dashboard.o dashboard-tests.o dashboard-context-menu.o dashboard-main.o  : run_records.scm
 
 db.o ezsteps.o keys.o launch.o megatest.o monitor.o runs-for-ref.o runs.o tests.o : key_records.scm
-db.o api.o : mofiles/dbmod.o mofiles/dbfile.o mofiles/debugprint.o
+db.o api.o : mofiles/dbmod.o mofiles/dbfile.o mofiles/debugprint.o mofiles/commonmod.o
 
 tests.o tasks.o dashboard-tasks.o : task_records.scm
 
 runs.o : test_records.scm
 
@@ -177,10 +178,12 @@
 megatest.o : megatest-fossil-hash.scm megatest-version.scm
 
 rmt.scm client.scm common.scm configf.scm dashboard-guimonitor.scm dashboard-tests.scm dashboard.scm db.scm dcommon.scm ezsteps.scm http-transport.scm index-tree.scm items.scm keys.scm launch.scm megatest.scm monitor.scm mt.scm newdashboard.scm runconfig.scm runs.scm server.scm tdb.scm tests.scm tree.scm : common_records.scm megatest-version.scm
 
 common_records.scm : altdb.scm
+
+mofiles/dbfile.o : mofiles/commonmod.o
 
 # mofiles/stml2.o : mofiles/cookie.o
 # configf.o : mofiles/commonmod.o
 
 vg.o dashboard.o : vg_records.scm megatest-version.scm

Index: api.scm
==================================================================
--- api.scm
+++ api.scm
@@ -147,17 +147,17 @@
 ;;
 ;;    - returns #( flag result )
 ;;
 (define (api:execute-requests dbstruct dat)
   (db:open-no-sync-db) ;; sets *no-sync-db*
-  (handle-exceptions
-   exn
-   (let ((call-chain (get-call-chain)))
-     (debug:print 0 *default-log-port* "WARNING: api:execute-requests received an exception from peer, dat=" dat ", exn=" exn)
-     (print-call-chain (current-error-port))
-     (debug:print 0 *default-log-port* " message: "  ((condition-property-accessor 'exn 'message) exn))       
-     (vector #f (vector exn call-chain dat))) ;; return some stuff for debug if an exception happens
+;;   (handle-exceptions
+;;    exn
+;;    (let ((call-chain (get-call-chain)))
+;;      (debug:print 0 *default-log-port* "WARNING: api:execute-requests received an exception from peer, dat=" dat ", exn=" exn)
+;;      (print-call-chain (current-error-port))
+;;      (debug:print 0 *default-log-port* " message: "  ((condition-property-accessor 'exn 'message) exn))       
+;;      (vector #f (vector exn call-chain dat))) ;; return some stuff for debug if an exception happens
    (cond
     ((not (vector? dat))                    ;; it is an error to not receive a vector
      (vector #f (vector #f "remote must be called with a vector")))
     ((> *api-process-request-count* 200) ;; 20)
      (debug:print 0 *default-log-port* "WARNING: api:execute-requests received an overloaded message.")
@@ -239,11 +239,11 @@
                    ((test-data-rollup)             (apply db:test-data-rollup dbstruct params))
                    ((csv->test-data)               (apply db:csv->test-data dbstruct params))
 
                    ;; MISC
                    ((sync-inmem->db)               (let ((run-id (car params)))
-                                                     (db:sync-touched dbstruct run-id force-sync: #t)))
+                                                     (db:sync-touched dbstruct run-id db:initialize-main-db force-sync: #t)))
                    ((mark-incomplete)              (apply db:find-and-mark-incomplete dbstruct params))
                    ((create-all-triggers)          (db:create-all-triggers dbstruct))
                    ((drop-all-triggers)            (db:drop-all-triggers dbstruct)) 
 
                    ;; TESTMETA
@@ -375,11 +375,11 @@
 	     (vector #f res))
            (begin
              #;(common:telemetry-log (conc "api-out:"(->string cmd))
                                    payload: `((params . ,params)
                                               (ok-res . #f)))
-             (vector #t res))))))))
+             (vector #t res))))))) ;; )
 
 ;; http-server  send-response
 ;;                 api:process-request
 ;;                    db:*
 ;;

Index: client.scm
==================================================================
--- client.scm
+++ client.scm
@@ -71,12 +71,18 @@
 ;;
 ;; client:setup
 ;;
 ;; lookup_server, need to remove *runremote* stuff
 ;;
-
+ 
 (define (client:setup-http areapath #!key (remaining-tries 100) (failed-connects 0)(area-dat #f))
+  (mutex-lock! *rmt-mutex*)
+  (let ((res (client:setup-http-baby areapath remaining-tries: remaining-tries failed-connects: failed-connects area-dat: area-dat)))
+    (mutex-unlock! *rmt-mutex*)
+    res))
+
+(define (client:setup-http-baby areapath #!key (remaining-tries 100) (failed-connects 0)(area-dat #f))
   (debug:print-info 2 *default-log-port* "client:setup remaining-tries=" remaining-tries)
   (server:start-and-wait areapath)
   (if (<= remaining-tries 0)
       (begin
 	(debug:print-error 0 *default-log-port* "failed to start or connect to server")
@@ -86,11 +92,11 @@
       ;; through them searching for a good one.
       ;;
       (let* ((server-dat (server:get-rand-best areapath)) ;; (server:get-first-best areapath))
 	     (runremote  (or area-dat *runremote*)))
 	(if (not server-dat) ;; no server found
-	    (client:setup-http areapath remaining-tries: (- remaining-tries 1))
+	    (client:setup-http-baby areapath remaining-tries: (- remaining-tries 1))
 	    (let ((host  (cadr  server-dat))
 		  (port  (caddr server-dat))
                   (server-id (caddr (cddr server-dat))))
 	      (debug:print-info 4 *default-log-port* "client:setup server-dat=" server-dat ", remaining-tries=" remaining-tries)
 	      (if (and (not area-dat)
@@ -108,23 +114,26 @@
 			 (ping-res  (case *transport-type* 
 				      ((http)(rmt:login-no-auto-client-setup start-res)))))
 		    (if (and start-res
 			     ping-res)
 			(let ((runremote (or area-dat *runremote*))) ;; it might have been generated only a few statements ago
-			  (remote-conndat-set! runremote start-res) ;; (hash-table-set! runremote run-id start-res)
-			  (debug:print-info 2 *default-log-port* "connected to " (http-transport:server-dat-make-url start-res))
-			  start-res)
+			  (if runremote
+			      (begin
+				(remote-conndat-set! runremote start-res) ;; (hash-table-set! runremote run-id start-res)
+				(debug:print-info 2 *default-log-port* "connected to " (http-transport:server-dat-make-url start-res))
+				start-res)
+			      (client:setup-http-baby areapath remaining-tries: (- remaining-tries 1))))
 			(begin    ;; login failed but have a server record, clean out the record and try again
 			  (debug:print-info 0 *default-log-port* "client:setup, login unsuccessful, will attempt to start server ... start-res=" start-res ", server-dat=" server-dat) ;; had runid.  Fixes part of Randy;s ticket 1405717332
 			  (case *transport-type* 
 			    ((http)(http-transport:close-connections)))
 			  (remote-conndat-set! runremote #f)  ;; (hash-table-delete! runremote run-id)
 			  (thread-sleep! 1)
-			  (client:setup-http areapath remaining-tries: (- remaining-tries 1))
+			  (client:setup-http-baby areapath remaining-tries: (- remaining-tries 1))
 			  )))
 		  (begin    ;; no server registered
 		    ;; (server:kind-run areapath)
 		    (server:start-and-wait areapath)
 		    (debug:print-info 0 *default-log-port* "client:setup, no server registered, remaining-tries=" remaining-tries)
 		    (thread-sleep! 1) ;; (+ 5 (random (- 20 remaining-tries))))  ;; give server a little time to start up, randomize a little to avoid start storms.
-		    (client:setup-http areapath remaining-tries: (- remaining-tries 1)))))))))
+		    (client:setup-http-baby areapath remaining-tries: (- remaining-tries 1)))))))))
 

Index: common.scm
==================================================================
--- common.scm
+++ common.scm
@@ -26,12 +26,12 @@
      (prefix sqlite3 sqlite3:)
      pkts (prefix dbi dbi:)
      )
 
 (declare (unit common))
-;; (declare (uses commonmod))
-;; (import commonmod)
+(declare (uses commonmod))
+(import commonmod)
 
 (include "common_records.scm")
 
 
 ;; (require-library margs)
@@ -129,11 +129,11 @@
 
 (define *test-meta-updated* (make-hash-table))
 (define *globalexitstatus*  0) ;; attempt to work around possible thread issues
 (define *passnum*           0) ;; when running track calls to run-tests or similar
 ;; (define *alt-log-file* #f)  ;; used by -log
-(define *common:denoise*    (make-hash-table)) ;; for low noise printing
+;; (define *common:denoise*    (make-hash-table)) ;; for low noise printing
 (define *default-log-port*  (current-error-port))
 (define *time-zero* (current-seconds)) ;; for the watchdog
 (define *default-area-tag* "local")
 
 ;; DATABASE
@@ -141,22 +141,22 @@
 ;; db stats
 (define *db-stats*            (make-hash-table)) ;; hash of vectors < count duration-total >
 (define *db-stats-mutex*      (make-mutex))
 ;; db access
 (define *db-last-access*      (current-seconds)) ;; last db access, used in server
-(define *db-write-access*     #t)
+;; (define *db-write-access*     #t)
 ;; db sync
-(define *db-last-sync*        0)                 ;; last time the sync to megatest.db happened
+;; (define *db-last-sync*        0)                 ;; last time the sync to megatest.db happened
 (define *db-sync-in-progress* #f)                ;; if there is a sync in progress do not try to start another
-(define *db-multi-sync-mutex* (make-mutex))      ;; protect access to *db-sync-in-progress*, *db-last-sync*
+;; (define *db-multi-sync-mutex* (make-mutex))      ;; protect access to *db-sync-in-progress*, *db-last-sync*
 ;; task db
 (define *task-db*             #f) ;; (vector db path-to-db)
 (define *db-access-allowed*   #t) ;; flag to allow access
 ;; (define *db-access-mutex*     (make-mutex)) ;; moved to dbfile
 (define *db-transaction-mutex* (make-mutex))
 (define *db-cache-path*       #f)
-(define *db-with-db-mutex*    (make-mutex))
+;; (define *db-with-db-mutex*    (make-mutex))
 (define *db-api-call-time*    (make-hash-table)) ;; hash of command => (list of times)
 ;; no sync db
 ;; (define *no-sync-db*          #f)  ;; moved to dbfile
 
 ;; SERVER
@@ -172,12 +172,12 @@
 (define *run-id*            #f)
 (define *server-kind-run*   (make-hash-table))
 (define *home-host*         #f)
 ;; (define *total-non-write-delay* 0)
 (define *heartbeat-mutex*   (make-mutex))
-(define *api-process-request-count* 0)
-(define *max-api-process-requests* 0)
+;; (define *api-process-request-count* 0)
+;; (define *max-api-process-requests* 0)
 (define *server-overloaded*  #f)
 
 ;; client
 (define *rmt-mutex*         (make-mutex))     ;; remote access calls mutex 
 
@@ -310,10 +310,11 @@
   (hh-dat            (common:get-homehost)) ;; homehost record ( addr . hhflag )
   (server-url        #f) ;; (server:check-if-running *toppath*) #f))
   (server-id         #f)
   (server-info       (if *toppath* (server:check-if-running *toppath*) #f))
   (last-server-check 0)  ;; last time we checked to see if the server was alive
+  (connect-time      (current-seconds))
   (conndat           #f)
   (transport         *transport-type*)
   (server-timeout    (server:expiration-timeout))
   (force-server      #f)
   (ro-mode           #f)  
@@ -725,54 +726,10 @@
       (print-call-chain (current-error-port))
       #f)
     (read (open-input-string (base64:base64-decode instr))))
    (read (open-input-string (z3:decode-buffer (base64:base64-decode instr))))))
 
-;; dot-locking egg seems not to work, using this for now
-;; if lock is older than expire-time then remove it and try again
-;; to get the lock
-;;
-(define (common:simple-file-lock fname #!key (expire-time 300))
-  (let ((fmod-time (handle-exceptions
-		       ext
-		     (current-seconds)
-		     (file-modification-time fname))))
-    (if (common:file-exists? fname)
-	(if (> (- (current-seconds) fmod-time) expire-time)
-	    (begin
-	      (handle-exceptions exn #f (delete-file* fname))	
-	      (common:simple-file-lock fname expire-time: expire-time))
-	    #f)
-	(let ((key-string (conc (get-host-name) "-" (current-process-id))))
-	  (with-output-to-file fname
-	    (lambda ()
-	      (print key-string)))
-	  (thread-sleep! 0.25)
-	  (if (common:file-exists? fname)
-	      (handle-exceptions exn
-                #f 
-                (with-input-from-file fname
-	  	  (lambda ()
-		    (equal? key-string (read-line)))))
-	      #f)))))
-
-(define (common:simple-file-lock-and-wait fname #!key (expire-time 300))
-  (let ((end-time (+ expire-time (current-seconds))))
-    (let loop ((got-lock (common:simple-file-lock fname expire-time: expire-time)))
-      (if got-lock
-	  #t
-	  (if (> end-time (current-seconds))
-	      (begin
-		(thread-sleep! 3)
-		(loop (common:simple-file-lock fname expire-time: expire-time)))
-	      #f)))))
-
-(define (common:simple-file-release-lock fname)
-  (handle-exceptions
-      exn
-      #f ;; I don't really care why this failed (at least for now)
-    (delete-file* fname)))
 
 ;;======================================================================
 ;; S T A T E S   A N D   S T A T U S E S
 ;;======================================================================
 

Index: commonmod.scm
==================================================================
--- commonmod.scm
+++ commonmod.scm
@@ -17,16 +17,19 @@
 ;;     along with Megatest.  If not, see <http://www.gnu.org/licenses/>.
 
 ;;======================================================================
 
 (declare (unit commonmod))
+
+(use srfi-69)
 
 (module commonmod
 	*
-	
+
 (import scheme chicken data-structures extras files)
-(import (prefix sqlite3 sqlite3:) posix typed-records srfi-18 srfi-69
+(import (prefix sqlite3 sqlite3:)
+	posix typed-records srfi-18 srfi-69
 	md5 message-digest
 	regex srfi-1)
 
 ;;======================================================================
 ;; CONTENTS
@@ -44,10 +47,21 @@
   (conc megatest-version "-" megatest-fossil-hash))
 
 (define (version-signature)
   (conc megatest-version "-" (substring megatest-fossil-hash 0 4)))
 
+(define *common:denoise*    (make-hash-table)) ;; for low noise printing
+
+(define (common:low-noise-print waitval . keys)
+  (let* ((key      (string-intersperse (map conc keys) "-" ))
+	 (lasttime (hash-table-ref/default *common:denoise* key 0))
+	 (currtime (current-seconds)))
+    (if (> (- currtime lasttime) waitval)
+	(begin
+	  (hash-table-set! *common:denoise* key currtime)
+	  #t)
+	#f)))
 
 ;;======================================================================
 ;; config file utils
 ;;======================================================================
 
@@ -72,10 +86,55 @@
 	'()))) ;; should it return empty list or #f to indicate not set?
 
 
 (define (get-section cfgdat section)
   (hash-table-ref/default cfgdat section '()))
+
+;; dot-locking egg seems not to work, using this for now
+;; if lock is older than expire-time then remove it and try again
+;; to get the lock
+;;
+(define (common:simple-file-lock fname #!key (expire-time 300))
+  (let ((fmod-time (handle-exceptions
+		       ext
+		     (current-seconds)
+		     (file-modification-time fname))))
+    (if (file-exists? fname) ;; (common:file-exists? fname)
+	(if (> (- (current-seconds) fmod-time) expire-time)
+	    (begin
+	      (handle-exceptions exn #f (delete-file* fname))	
+	      (common:simple-file-lock fname expire-time: expire-time))
+	    #f)
+	(let ((key-string (conc (get-host-name) "-" (current-process-id))))
+	  (with-output-to-file fname
+	    (lambda ()
+	      (print key-string)))
+	  (thread-sleep! 0.25)
+	  (if (file-exists? fname) ;; (common:file-exists? fname)
+	      (handle-exceptions exn
+                #f 
+                (with-input-from-file fname
+	  	  (lambda ()
+		    (equal? key-string (read-line)))))
+	      #f)))))
+
+(define (common:simple-file-lock-and-wait fname #!key (expire-time 300))
+  (let ((end-time (+ expire-time (current-seconds))))
+    (let loop ((got-lock (common:simple-file-lock fname expire-time: expire-time)))
+      (if got-lock
+	  #t
+	  (if (> end-time (current-seconds))
+	      (begin
+		(thread-sleep! 3)
+		(loop (common:simple-file-lock fname expire-time: expire-time)))
+	      #f)))))
+
+(define (common:simple-file-release-lock fname)
+  (handle-exceptions
+      exn
+      #f ;; I don't really care why this failed (at least for now)
+    (delete-file* fname)))
 
 ;;======================================================================
 ;; misc conversion, data manipulation functions
 ;;======================================================================
 

Index: db.scm
==================================================================
--- db.scm
+++ db.scm
@@ -71,10 +71,11 @@
 ;;
 (defstruct dbr:counts
   (state #f)
   (status #f)
   (count  0)) 
+
 
 ;;======================================================================
 ;; hash of hashs
 ;;======================================================================
 
@@ -125,23 +126,16 @@
 	   (debug:print-error 0 *default-log-port* " query " stmt " failed, params: " params ", error: " ((condition-property-accessor 'exn 'message) exn) ", exn=" exn)
 	   (print-call-chain (current-error-port))
 	   default)))
    (apply sqlite3:first-result db stmt params)))
 
-(define (db:generic-error-printout exn . message)
-  (print-call-chain (current-error-port))
-  (apply debug:print-error 0 *default-log-port* message)
-  (debug:print-error 0 *default-log-port* ;; "   params: " params
-		     ", error: "     ((condition-property-accessor 'exn 'message)   exn)
-		     ", arguments: " ((condition-property-accessor 'exn 'arguments) exn)
-		     ", location: "  ((condition-property-accessor 'exn 'location)  exn)
-		     ))
-
 (define (db:setup do-sync)
   (assert *toppath* "FATAL: db:setup called before launch:setup has been run.")
   (let* ((tmpdir (common:get-db-tmp-area)))
-    (dbfile:setup do-sync *toppath* tmpdir)))
+    (if (not *dbstruct-dbs*)
+	(dbfile:setup do-sync *toppath* tmpdir)
+	*dbstruct-dbs*)))
 
 ;; looks up subdb and returns it, if not found then set up
 ;; and then return it.
 ;;
 #;(define (db:get-db dbstruct run-id)
@@ -194,59 +188,10 @@
 		     ", error: "     ((condition-property-accessor 'exn 'message)   exn)
 		     ", arguments: " ((condition-property-accessor 'exn 'arguments) exn)
 		     ", location: "  ((condition-property-accessor 'exn 'location)  exn)
 		     ))
 
-(define (db:open-db dbstruct run-id)
-  (let* ((dbdat (dbfile:open-db dbstruct run-id db:initialize-main-db)))
-    (set! *db-write-access* (not (dbr:dbdat-read-only dbdat)))
-    dbdat))
-
-;; (db:with-db dbstruct run-id sqlite3:exec "select blah fgrom blaz;")
-;; r/w is a flag to indicate if the db is modified by this query #t = yes, #f = no
-;;
-(define (db:with-db dbstruct run-id r/w proc . params)
-  (let* ((have-struct (dbr:dbstruct? dbstruct))
-         (dbdat     (if have-struct                ;; this stuff just allows us to call with a db handle directly
-			(db:open-db dbstruct run-id) ;; (dbfile:get-subdb dbstruct run-id)
-			#f))
-	 (db        (if have-struct                ;; this stuff just allows us to call with a db handle directly
-			(dbr:dbdat-dbh dbdat)
-			dbstruct))
-	 (fname     (if dbdat
-			(dbr:dbdat-dbfile dbdat)
-			"nofilenameavailable"))
-	 #;(subdb     (if have-struct
-			(dbfile:get-subdb dbstruct run-id)
-			#f))
-	 (use-mutex (> *api-process-request-count* 25))) ;; was 25
-    (if (and use-mutex
-	     (common:low-noise-print 120 "over-50-parallel-api-requests"))
-	(debug:print-info 0 *default-log-port* *api-process-request-count* " parallel api requests being processed in process " (current-process-id) ", throttling access"))
-    (if (common:low-noise-print 600 (conc "parallel-api-requests" *max-api-process-requests*))
-	(debug:print-info 1 *default-log-port* "Parallel api request count: " *api-process-request-count* " max parallel requests: " *max-api-process-requests*))
-    (condition-case
-	(begin
-	  (if use-mutex (mutex-lock! *db-with-db-mutex*))
-	  (let ((res (apply proc dbdat db params)))
-	    (if use-mutex (mutex-unlock! *db-with-db-mutex*))
-	    ;; (if (vector? dbstruct)(db:done-with dbstruct run-id r/w))
-	    (if dbdat
-		(dbfile:add-dbdat dbstruct run-id dbdat))
-	    res))
-      (exn (io-error)
-	   (db:generic-error-printout exn "ERROR: i/o error with " fname ". Check permissions, disk space etc. and try again."))
-      (exn (corrupt)
-	   (db:generic-error-printout exn "ERROR: database " fname " is corrupt. Repair it to proceed."))
-      (exn (busy)
-	   (db:generic-error-printout exn "ERROR: database " fname
-				      " is locked. Try copying to another location, remove original and copy back."))
-      (exn (permission)(db:generic-error-printout exn "ERROR: database " fname " has some permissions problem."))
-      (exn ()
-	   (db:generic-error-printout exn "ERROR: Unknown error with database " fname " message: "
-				      ((condition-property-accessor 'exn 'message) exn))))))
-      
 
 ;; NB// #f => return dbdir only
 ;;      (was planned to be;  zeroth db with name=main.db)
 ;; 
 ;; If run-id is #f return to create and retrieve the path where the db will live.
@@ -430,31 +375,10 @@
     (if (and dbexists (not write-access))
 	(set! *db-write-access* #f))
     ;; (cons db dbpath)))
     (make-dbr:dbdat dbfile: dbpath dbh: db read-only: (not write-access))))
 
-;; sync run from tmp disk to nfs disk if touched
-;;
-(define (db:sync-touched dbstruct run-id #!key (force-sync #f))
-  (debug:print-info 0 *default-log-port* "db:sync-touched Syncing: " (conc (if run-id run-id "main") ".db"))
-  (let* (;; the subdb is needed to access the mtdbdat
-	 (subdb     (or (dbfile:get-subdb dbstruct run-id)
-			(dbfile:init-subdb dbstruct run-id db:initialize-main-db)))
-         (tmpdbfile (dbr:subdb-tmpdbfile subdb))
-	 (mtdb      (dbr:subdb-mtdbdat subdb))
-         (tmpdb     (dbfile:open-db dbstruct run-id db:initialize-main-db)) ;; sqlite3-db tmpdbfile #f))
-	 (start-t   (current-seconds)))
-    (mutex-lock! *db-multi-sync-mutex*)
-    (let ((update_info (cons "last_update" (if force-sync 0 *db-last-sync*) )))
-      (mutex-unlock! *db-multi-sync-mutex*)
-      (db:sync-tables (db:sync-all-tables-list dbstruct) update_info tmpdb mtdb))
-    (mutex-lock! *db-multi-sync-mutex*)
-    (set! *db-last-sync* start-t)
-    (set! *db-last-access* start-t)
-    (mutex-unlock! *db-multi-sync-mutex*)
-    (dbfile:add-dbdat dbstruct run-id tmpdb)
-  #t))
 
 
 ;; db:safely-close-sqlite3-db and db:close-all were here, moved to dbfile
 
 ;;   (let ((locdbs (dbr:dbstruct-locdbs dbstruct)))
@@ -468,110 +392,10 @@
 ;; 	 (handler (make-busy-timeout 3600)))
 ;;     (sqlite3:set-busy-handler! db handler)
 ;;     (db:initialize-run-id-db db)
 ;;     (cons db #f)))
 
-;; just tests, test_steps and test_data tables
-(define db:sync-tests-only
-  (list
-   ;; (list "strs"
-   ;;       '("id"             #f)
-   ;;       '("str"            #f))
-   (list "tests" 
-	 '("id"             #f)
-	 '("run_id"         #f)
-	 '("testname"       #f)
-	 '("host"           #f)
-	 '("cpuload"        #f)
-	 '("diskfree"       #f)
-	 '("uname"          #f)
-	 '("rundir"         #f)
-	 '("shortdir"       #f)
-	 '("item_path"      #f)
-	 '("state"          #f)
-	 '("status"         #f)
-	 '("attemptnum"     #f)
-	 '("final_logf"     #f)
-	 '("logdat"         #f)
-	 '("run_duration"   #f)
-	 '("comment"        #f)
-	 '("event_time"     #f)
-	 '("fail_count"     #f)
-	 '("pass_count"     #f)
-	 '("archived"       #f)
-         '("last_update"    #f))
-  (list "test_steps"
-	 '("id"             #f)
-	 '("test_id"        #f)
-	 '("stepname"       #f)
-	 '("state"          #f)
-	 '("status"         #f)
-	 '("event_time"     #f)
-	 '("comment"        #f)
-	 '("logfile"        #f)
-         '("last_update"    #f))
-   (list "test_data"
-	 '("id"             #f)
-	 '("test_id"        #f)
-	 '("category"       #f)
-	 '("variable"       #f)
-	 '("value"          #f)
-	 '("expected"       #f)
-	 '("tol"            #f)
-	 '("units"          #f)
-	 '("comment"        #f)
-	 '("status"         #f)
-	 '("type"           #f)
-         '("last_update"    #f))))
-
-;; needs db to get keys, this is for syncing all tables
-;;
-(define (db:sync-main-list dbstruct)
-  (let ((keys  (db:get-keys dbstruct)))
-    (list
-     (list "keys"
-	   '("id"        #f)
-	   '("fieldname" #f)
-	   '("fieldtype" #f))
-     (list "metadat" '("var" #f) '("val" #f))
-     (append (list "runs" 
-		   '("id"  #f))
-	     (map (lambda (k)(list k #f))
-		  (append keys
-			  (list "runname" "state" "status" "owner" "event_time" "comment" "fail_count" "pass_count" "contour" "last_update"))))
-     (list "archive_disks"
-           '("id" #f)
-           '("archive_area_name" #f) 
-           '("disk_path" #f)
-           '("last_df" #f)
-           '("last_df_time" #f)
-           '("creation_time" #f)) 
-
-     (list "archive_blocks"
-           '("id" #f)
-           '("archive_disk_id" #f) 
-           '("disk_path" #f)
-           '("last_du" #f)
-           '("last_du_time" #f)
-           '("creation_time" #f)) 
-
-     (list "test_meta"
-	   '("id"             #f)
-	   '("testname"       #f)
-	   '("owner"          #f)
-	   '("description"    #f)
-	   '("reviewed"       #f)
-	   '("iterated"       #f)
-	   '("avg_runtime"    #f)
-	   '("avg_disk"       #f)
-	   '("tags"           #f)
-	   '("jobgroup"       #f)))))
-
-(define (db:sync-all-tables-list dbstruct)
-  (append (db:sync-main-list dbstruct)
-	  db:sync-tests-only))
-
 ;; use bunch of Unix commands to try to break the lock and recreate the db
 ;;
 (define (db:move-and-recreate-db dbdat)
   (let* ((dbpath   (dbr:dbdat-dbfile        dbdat))
 	 (dbdir    (pathname-directory       dbpath))
@@ -645,240 +469,10 @@
 	   (sqlite3:execute db "vacuum;")))
 	 
 	 (sqlite3:finalize! db)
 	 #t))))))
     
-;; tbls is ( ("tablename" ( "field1" [#f|proc1] ) ( "field2" [#f|proc2] ) .... ) )
-;; db's are dbdat's
-;;
-;; if last-update specified ("field-name" . time-in-seconds)
-;;    then sync only records where field-name >= time-in-seconds
-;;    IFF field-name exists
-;;
-(define (db:sync-tables tbls last-update fromdb todb . slave-dbs)
-  (handle-exceptions
-   exn
-   (begin
-     (debug:print 0 *default-log-port* "EXCEPTION: database probably overloaded or unreadable in db:sync-tables.")
-     (print-call-chain (current-error-port))
-     (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))
-     (debug:print 0 *default-log-port* "exn=" (condition->list exn))
-     (debug:print 0 *default-log-port* " status:  " ((condition-property-accessor 'sqlite3 'status) exn))
-     (debug:print 0 *default-log-port* " src db:  " (dbr:dbdat-dbfile fromdb))
-     (for-each (lambda (dbdat)
-		 (let ((dbpath (dbr:dbdat-dbfile dbdat)))
-		   (debug:print 0 *default-log-port* " dbpath:  " dbpath)
-		   (if (not (db:repair-db dbdat))
-		       (begin
-			 (debug:print-error 0 *default-log-port* "Failed to rebuild " dbpath ", exiting now.")
-			 (exit)))))
-	       (cons todb slave-dbs))
-     
-     0)
-
-   ;; this is the work to be done")
-   (cond
-    ((not fromdb) (debug:print 0 *default-log-port* "WARNING: db:sync-tables called with fromdb missing")
-     -1)
-    ((not todb)   (debug:print 0 *default-log-port* "WARNING: db:sync-tables called with todb missing")
-     -2)
-    ((not (sqlite3:database? (dbr:dbdat-dbh fromdb)))
-     (debug:print-error 0 *default-log-port* "db:sync-tables called with fromdb not a database " fromdb)
-   -3)
-    ((not (sqlite3:database? (dbr:dbdat-dbh todb)))
-     (debug:print-error 0 *default-log-port* "db:sync-tables called with todb not a database " todb)
-   -4)
-
-    ((not (file-write-access? (dbr:dbdat-dbfile todb)))
-     (debug:print-error 0 *default-log-port* "db:sync-tables called with todb not a read-only database " todb)
-     -5)
-    ((not (null? (let ((readonly-slave-dbs
-                        (filter
-                         (lambda (dbdat)
-                           (not (file-write-access? (dbr:dbdat-dbfile todb))))
-                         slave-dbs)))
-                   (for-each
-                    (lambda (bad-dbdat)
-                      (debug:print-error
-                       0 *default-log-port* "db:sync-tables called with todb not a read-only database " bad-dbdat))
-                    readonly-slave-dbs)
-                   readonly-slave-dbs))) -6)
-    (else
-     (debug:print 3 *default-log-port* "db:sync-tables: args are good")
-
-     (let ((stmts       (make-hash-table)) ;; table-field => stmt
-	   (all-stmts   '())              ;; ( ( stmt1 value1 ) ( stml2 value2 ))
-	   (numrecs     (make-hash-table))
-	   (start-time  (current-milliseconds))
-	   (tot-count   0))
-       (for-each ;; table
-	(lambda (tabledat)
-	  (let* ((tablename        (car tabledat))
-		 (fields           (cdr tabledat))
-		 (has-last-update  (member "last_update" fields))
-		 (use-last-update  (cond
-				    ((and has-last-update
-					  (member "last_update" fields))
-				     #t) ;; if given a number, just use it for all fields
-				    ((number? last-update) #f) ;; if not matched first entry then ignore last-update for this table
-				    ((and (pair? last-update)
-					  (member (car last-update)    ;; last-update field name
-						  (map car fields)))
-                                        #t)
-				    ((and last-update (not (pair? last-update)) (not (number? last-update)))
-				     (debug:print 0 *default-log-port* "ERROR: parameter last-update for db:sync-tables must be a pair or a number, received: " last-update);; found in fields
-				     #f)
-				    (else
-				     #f)))
-		 (last-update-value (if use-last-update ;; no need to check for has-last-update - it is already accounted for
-					(if (number? last-update)
-					    last-update
-					    (cdr last-update))
-					#f))
-		 (last-update-field (if use-last-update
-					(if (number? last-update)
-					    "last_update"
-					    (car last-update))
-					#f))
-		 (num-fields (length fields))
-		 (field->num (make-hash-table))
-		 (num->field (apply vector (map car fields))) ;; BBHERE
-		 (full-sel   (conc "SELECT " (string-intersperse (map car fields) ",") 
-				   " FROM " tablename (if use-last-update ;; apply last-update criteria
-							  (conc " WHERE " last-update-field " >= " last-update-value)
-							  "")
-				   ";"))
-		 (full-ins   (conc "INSERT OR REPLACE INTO " tablename " ( " (string-intersperse (map car fields) ",") " ) "
-				   " VALUES ( " (string-intersperse (make-list num-fields "?") ",") " );"))
-		 (fromdat    '())
-		 (fromdats   '())
-		 (totrecords 0)
-		 (batch-len  (string->number (or (configf:lookup *configdat* "sync" "batchsize") "100")))
-		 (todat      (make-hash-table))
-		 (count      0)
-                 (field-names (map car fields))
-                 (delay-handicap  (string->number (or (configf:lookup *configdat* "sync" "delay-handicap") "0")))
-                 )
-
-	    ;; set up the field->num table
-	    (for-each
-	     (lambda (field)
-	       (hash-table-set! field->num field count)
-	       (set! count (+ count 1)))
-	     fields)
-
-	    ;; read the source table
-            ;; store a list of all rows in the table in fromdat, up to batch-len.
-            ;; Then add fromdat to the fromdats list, clear fromdat and repeat.
-	    (sqlite3:for-each-row
-	     (lambda (a . b)
-	       (set! fromdat (cons (apply vector a b) fromdat))
-	       (if (> (length fromdat) batch-len)
-		   (begin
-		     (set! fromdats (cons fromdat fromdats))
-		     (set! fromdat  '())
-		     (set! totrecords (+ totrecords 1)))
-               )
-             )
-	     (dbr:dbdat-dbh fromdb)
-	     full-sel)
-
-             ;; Count less than batch-len as a record
-             (if (> (length fromdat) 0)
-                 (set! totrecords (+ totrecords 1)))
-
-	    ;; tack on remaining records in fromdat
-	    (if (not (null? fromdat))
-		(set! fromdats (cons fromdat fromdats)))
-
-	    (if (common:low-noise-print 120 "sync-records")
-		(debug:print 0 *default-log-port* "found " totrecords " records to sync"))
-
-	    (sqlite3:for-each-row
-	     (lambda (a . b)
-	       (hash-table-set! todat a (apply vector a b)))
-	     (dbr:dbdat-dbh todb)
-	     full-sel)
-
-            (when (and delay-handicap (> delay-handicap 0))
-              (debug:print-info 0 *default-log-port* "imposing synthetic sync delay of "delay-handicap" seconds since sync/delay-handicap is configured")
-              (thread-sleep! delay-handicap)
-              (debug:print-info 0 *default-log-port* "synthetic sync delay of "delay-handicap" seconds completed")
-              )
-            
-	    ;; first pass implementation, just insert all changed rows
-
-	    (for-each 
-	     (lambda (targdb)
-	       (let* ((db                 (dbr:dbdat-dbh targdb))
-                      (drp-trigger        (if (member "last_update" field-names)
-					      (db:drop-trigger db tablename) 
-					      #f))
-                      (is-trigger-dropped (if (member "last_update" field-names)
-                                              (db:is-trigger-dropped db tablename)
-					      #f)) 
-		      (stmth  (sqlite3:prepare db full-ins))
-                      (changed-rows 0))
-		 ;; (db:delay-if-busy targdb) ;; NO WAITING
-                 (if (member "last_update" field-names)
-                     (debug:print-info 3 *default-log-port* "is-trigger-dropped: " is-trigger-dropped)) 
-
-		 (for-each
-		  (lambda (fromdat-lst)
-		    (sqlite3:with-transaction
-		     db
-		     (lambda ()
-		       (for-each ;; 
-			(lambda (fromrow)
-			  (let* ((a    (vector-ref fromrow 0))
-				 (curr (hash-table-ref/default todat a #f))
-				 (same #t))
-			    (let loop ((i 0))
-			      (if (or (not curr)
-				      (not (equal? (vector-ref fromrow i)(vector-ref curr i))))
-				  (set! same #f))
-			      (if (and same
-				       (< i (- num-fields 1)))
-				  (loop (+ i 1))))
-			    (if (not same)
-				(begin
-				  (apply sqlite3:execute stmth (vector->list fromrow))
-				  (hash-table-set! numrecs tablename (+ 1 (hash-table-ref/default numrecs tablename 0)))
-                                  (set! changed-rows (+ changed-rows 1))
-                                )
-                            )
-                            ))
-			fromdat-lst))))
-		  fromdats)
-
-
-                 (if (> changed-rows 0)
-	           (debug:print 0 *default-log-port* "table " tablename " changed rows: " changed-rows)
-                 )
-
-
-		 (sqlite3:finalize! stmth)
-                 (if (member "last_update" field-names)
-                    (db:create-trigger db tablename))))
-	     (append (list todb) slave-dbs)
-           )
-          )
-        )
-	tbls)
-       (let* ((runtime      (- (current-milliseconds) start-time))
-	      (should-print (or (debug:debug-mode 12)
-				(common:low-noise-print 120 "db sync" (> runtime 500))))) ;; low and high sync times treated as separate.
-	 (if should-print (debug:print 0 *default-log-port* "INFO: db sync, total run time " runtime " ms"))
-	 (for-each 
-	  (lambda (dat)
-	    (let ((tblname (car dat))
-		  (count   (cdr dat)))
-	      (set! tot-count (+ tot-count count))
-	      (if (> count 0)
-		  (if should-print (debug:print 0 *default-log-port* (format #f "    ~10a ~5a" tblname count))))))
-	  (sort (hash-table->alist numrecs)(lambda (a b)(> (cdr a)(cdr b))))))
-       tot-count)))))
 
 (define (db:patch-schema-rundb frundb)
   ;;
   ;; remove this some time after September 2016 (added in version v1.6031
   ;;
@@ -1039,11 +633,11 @@
 ;; 			  (lambda ()
 ;; 			    (if (and (common:file-exists? megatest-db)
 ;; 				     (file-write-access? megatest-db))
 ;; 				(begin
 ;; 				  (db:sync-to-megatest.db dbstruct 'timestamps) ;; internally mutexes on *db-local-sync*
-;; 				  (debug:print-info 2 *default-log-port* "Done syncing to megatest.db"))))
+;; 				  (debug:print-info 2 *default-log-port* "Done syncing to megatest.db")))) 
 ;; 			  "call-with-cached-db sync-to-megatest.db"))
 ;; 	       (cache-db (db:cache-for-read-only
 ;; 			  megatest-db
 ;; 			  (conc cache-dir "/" fname)
 ;; 			  use-last-update: #t)))
@@ -1051,37 +645,13 @@
 ;; 	  (apply proc cache-db params)
 ;; 	  ))))
 
 
 
-
-;; Get a lock from the no-sync-db for the from-db, then delta sync the from-db to the to-db, otherwise return #f
-
-(define (db:lock-and-delta-sync no-sync-db dbstruct from-db-file runid)
-  (assert (not *db-sync-in-progress*) "FATAL: db:lock-and-sync called while a sync is in progress.")
-  (debug:print-info 3 *default-log-port* "db:lock-and-delta-sync")
-  (let* ((lockdat  (db:no-sync-get-lock no-sync-db from-db-file))
-	 (gotlock  (car lockdat))
-	 (locktime (cdr lockdat)))
-
-    (debug:print-info 3 *default-log-port* "db:lock-and-delta-sync: got lock?")
-    (if gotlock
-	(begin
-          (debug:print 0 *default-log-port* "db:lock-and-delta-sync copying db "runid" at "(current-seconds))
-          (db:sync-touched dbstruct runid)
-	  (db:no-sync-del! no-sync-db from-db-file)
-	  #t)
-        (begin
-          (debug:print 0 *default-log-port* "could not get lock for " from-db-file " from no-sync-db")
-	  #f
-        ))))
-
-
-
 
 (define (db:all-db-sync dbstruct)
-  (let* ((dbdat (db:open-db dbstruct #f))
+  (let* ((dbdat (db:open-db dbstruct #f db:initialize-main-db))
 	 (data-synced       0) ;; count of changed records
     (tmp-area       (common:get-db-tmp-area))
     (dbfiles        (glob (conc tmp-area"/.db/*.db")))
     (sync-durations (make-hash-table))
     (no-sync-db        (db:open-no-sync-db)))
@@ -1117,11 +687,11 @@
               (fname (pathname-file file))
               (runid (if (string= fname "main") #f (string->number fname)))
               )
 	       (debug:print-info 3 *default-log-port* "db:all-db-sync: fname: "
 				 fname", delta: " (- time1 time2) " seconds")
-	       (db:lock-and-delta-sync no-sync-db dbstruct fname runid)
+	       (db:lock-and-delta-sync no-sync-db dbstruct fname runid (db:get-keys dbstruct) db:initialize-main-db)
 	       (hash-table-set! sync-durations (conc fname".db")
 				(- (current-milliseconds) start-time)))
 	     (debug:print-info 3 *default-log-port* "skipping sync. " file " is up to date")
          )))
      dbfiles
@@ -1357,99 +927,10 @@
 #;(define open-run-close open-run-close-exception-handling)
 		;;	   open-run-close-no-exception-handling
 ;;			   open-run-close-exception-handling)
 ;;)
 
-(define db:trigger-list 
-     (list (list "update_runs_trigger"  "CREATE TRIGGER IF NOT EXISTS update_runs_trigger AFTER UPDATE ON runs
-                             FOR EACH ROW
-                               BEGIN 
-                                 UPDATE runs SET last_update=(strftime('%s','now'))
-                                   WHERE id=old.id;
-                               END;" ) 
-	   (list "update_run_stats_trigger"  "CREATE TRIGGER  IF NOT EXISTS update_run_stats_trigger AFTER UPDATE ON run_stats
-                             FOR EACH ROW
-                               BEGIN 
-                                 UPDATE run_stats SET last_update=(strftime('%s','now'))
-                                   WHERE id=old.id;
-                               END;" )
-	   (list "update_tests_trigger"  "CREATE TRIGGER  IF NOT EXISTS update_tests_trigger AFTER UPDATE ON tests
-                             FOR EACH ROW
-                               BEGIN 
-                                 UPDATE tests SET last_update=(strftime('%s','now'))
-                                   WHERE id=old.id;
-                               END;" )
-	   (list "update_teststeps_trigger"  "CREATE TRIGGER  IF NOT EXISTS update_teststeps_trigger AFTER UPDATE ON test_steps
-                             FOR EACH ROW
-                               BEGIN 
-                                 UPDATE test_steps SET last_update=(strftime('%s','now'))
-                                   WHERE id=old.id;
-                               END;" )
-	   (list "update_test_data_trigger"  "CREATE TRIGGER  IF NOT EXISTS update_test_data_trigger AFTER UPDATE ON test_data
-                             FOR EACH ROW
-                               BEGIN 
-                                 UPDATE test_data SET last_update=(strftime('%s','now'))
-                                   WHERE id=old.id;
-                               END;" )))
-;;
-;; ADD run-id SUPPORT
-;;
-(define (db:create-all-triggers dbstruct)
-(db:with-db
-   dbstruct #f #f
-   (lambda (dbdat db)
-(db:create-triggers db))))
-
-(define (db:create-triggers db)
-    (for-each (lambda (key)
-              (sqlite3:execute db (cadr key)))
-          db:trigger-list))
-
-(define (db:drop-all-triggers dbstruct)
-  (db:with-db
-   dbstruct #f #f
-   (lambda (dbdat db)
-     (db:drop-triggers db))))
-
-(define (db:is-trigger-dropped db tbl-name)
-  (let* ((trigger-name (if (equal? tbl-name "test_steps")
-			   "update_teststeps_trigger" 
-                           (conc "update_" tbl-name "_trigger")))
-	 (res          #f))
-    (sqlite3:for-each-row
-     (lambda (name)
-       (if (equal? name trigger-name)
-	   (set! res #t)))
-     db 
-     "SELECT name FROM sqlite_master WHERE type = 'trigger' ;" 
-     )))
-
-(define (db:drop-triggers db)
-  (for-each
-   (lambda (key) 
-     (sqlite3:execute db (conc "drop trigger if exists " (car key))))
-   db:trigger-list))
-
-(define  (db:drop-trigger db tbl-name)
-  (let* ((trigger-name (if (equal? tbl-name "test_steps")
-			   "update_teststeps_trigger" 
-                           (conc "update_" tbl-name "_trigger"))))
-    (for-each
-     (lambda (key) 
-       (if (equal? (car key) trigger-name)
-           (sqlite3:execute db (conc "drop trigger if exists " trigger-name))))
-     db:trigger-list)))
-
-(define  (db:create-trigger db tbl-name)
-      (let* ((trigger-name (if (equal? tbl-name "test_steps")
-                              "update_teststeps_trigger" 
-                              (conc "update_" tbl-name "_trigger"))))
-       (for-each (lambda (key) 
-             (if (equal? (car key) trigger-name)
-             (sqlite3:execute db (cadr key))))
-      db:trigger-list))) 
-
 
 (define (db:initialize-main-db db)
   (when (not *configinfo*)
            (launch:setup)) ;; added because Elena was getting stack dump because *configinfo* below was #f.
   (let* ((configdat (car *configinfo*))  ;; tut tut, global warning...
@@ -2308,10 +1789,11 @@
 ;; using keys:config-get-fields?
 
 (define (db:get-keys dbstruct)
   (keys:config-get-fields *configdat*)
 )
+
 ;;  (if *db-keys* *db-keys* 
 ;;      (let ((res '()))
 ;;	(db:with-db dbstruct #f #f
 ;;		    (lambda (dbdat db)
 ;;		      (sqlite3:for-each-row 
@@ -2438,16 +1920,18 @@
 	 (andstr    (if (> (length keys) 0) " AND " ""))
 	 (valslots  (keys->valslots keys)) ;; ?,?,? ...
 	 (allvals   (append (list runname state status user contour) (map cadr keyvals)))
 	 (qryvals   (append (list runname) (map cadr keyvals)))
 	 (key=?str  (string-intersperse (map (lambda (k)(conc k "=?")) keys) " AND ")))
+  (debug:print 0 *default-log-port* "Got here 0.")
     (debug:print 3 *default-log-port* "keys: " keys " allvals: " allvals " keyvals: " keyvals " key=?str is " key=?str)
     (debug:print 2 *default-log-port* "NOTE: using target " (string-intersperse (map cadr keyvals) "/") " for this run")
     (if (and runname (null? (filter (lambda (x)(not x)) keyvals))) ;; there must be a better way to "apply and"
 	(db:with-db
 	 dbstruct #f #f
 	 (lambda (dbdat db)
+  (debug:print 0 *default-log-port* "Got here 1.")
 	   (let ((res #f))
 	     (apply sqlite3:execute db (conc "INSERT OR IGNORE INTO runs (runname,state,status,owner,event_time,contour" comma keystr ") VALUES (?,?,?,?,strftime('%s','now'),?" comma valslots ");")
 		    allvals)
 	     (apply sqlite3:for-each-row 
 		    (lambda (id)

Index: dbfile.scm
==================================================================
--- dbfile.scm
+++ dbfile.scm
@@ -18,28 +18,29 @@
 
 ;;======================================================================
 
 (declare (unit dbfile))
 ;; (declare (uses debugprint))
-;; (declare (uses commonmod))
+(declare (uses commonmod))
 
 (module dbfile
 	*
 	
   (import scheme
 	  chicken
 	  data-structures
-	  extras)
+	  extras
+	  matchable)
   
 (import (prefix sqlite3 sqlite3:)
-	posix typed-records srfi-18
+	posix typed-records srfi-18 srfi-1
 	srfi-69
 	stack
 	files
 	ports
 
-	;; commonmod
+	commonmod
 	)
 
 ;; (import debugprint)
 
 ;;======================================================================
@@ -77,15 +78,32 @@
 ;; need to keep dbhandles and cached statements together
 (defstruct dbr:dbdat
   (dbfile      #f)
   (dbh         #f)    
   (stmt-cache  (make-hash-table))
-  (read-only   #f))
+  (read-only   #f)
+  (birth-sec   (current-seconds)))
 
 (define *dbstruct-dbs* #f)
 (define *db-access-mutex* (make-mutex))
 (define *no-sync-db*   #f)
+(define *db-sync-in-progress* #f)
+(define *db-with-db-mutex*    (make-mutex))
+(define *max-api-process-requests* 0)
+(define *api-process-request-count* 0)
+(define *db-write-access*     #t)
+(define *db-last-sync*        0)                 ;; last time the sync to megatest.db happened
+(define *db-multi-sync-mutex* (make-mutex))      ;; protect access to *db-sync-in-progress*, *db-last-sync*
+
+(define (db:generic-error-printout exn . message)
+  (print-call-chain (current-error-port))
+  (apply dbfile:print-err message)
+  (dbfile:print-err
+    ", error: "     ((condition-property-accessor 'exn 'message)   exn)
+    ", arguments: " ((condition-property-accessor 'exn 'arguments) exn)
+    ", location: "  ((condition-property-accessor 'exn 'location)  exn)
+    ))
 
 (define (dbfile:run-id->key run-id)
   (or run-id 'main))
 
 (define (db:safely-close-sqlite3-db db stmt-cache #!key (try-num 3))
@@ -222,13 +240,20 @@
 	  (set! *dbfile:num-handles-in-use* (+ *dbfile:num-handles-in-use* 1))
 	  (stack-pop! (dbr:subdb-dbstack subdb))))))
 
 ;; return a previously opened db handle to the stack of available handles
 (define (dbfile:add-dbdat dbstruct run-id dbdat)
-  (let* ((subdb (dbfile:get-subdb dbstruct run-id)))
-    (set! *dbfile:num-handles-in-use* (- *dbfile:num-handles-in-use* 1))
-    (stack-push! (dbr:subdb-dbstack subdb) dbdat)))
+  (let* ((subdb (dbfile:get-subdb dbstruct run-id))
+	 (age   (- (current-seconds)(dbr:dbdat-birth-sec dbdat))))
+    (if (> age 300) ;; just testing - discard and close after 30 sec
+	(begin
+	  ;; (map sqlite3:finalize! (hash-table-values (dbr:dbdat-stmt-cache dbdat)))
+	  ;; (sqlite3:finalize! (dbr:dbdat-dbh dbdat))
+	  (dbfile:print-err "INFO: Discarded dbdat over 30 sec old ("age"s)"))
+	(begin
+	  (set! *dbfile:num-handles-in-use* (- *dbfile:num-handles-in-use* 1))
+	  (stack-push! (dbr:subdb-dbstack subdb) dbdat)))))
 
 ;; set up a subdb
 ;;
 (define (dbfile:init-subdb dbstruct run-id init-proc)
   (let* ((dbname    (dbfile:run-id->dbname run-id))
@@ -476,34 +501,37 @@
 	  (if (eq? tries-left 2)
 	      (begin
 		(dbfile:print-err "INFO: forcing journal rollup "busy-file)
 		(dbfile:brute-force-salvage-db fname)))
 	  (dbfile:cautious-open-database fname init-proc (- tries-left 1)))
-	(let* ((db-exists (file-exists? fname))
-	       (result (condition-case
-			   (let* ((db (sqlite3:open-database fname)))
-			     (if (and init-proc (not db-exists))
-				 (init-proc db))
-			     db)
-			(exn (io-error)
-			     (dbfile:print-err exn "ERROR: i/o error with " fname ". Check permissions, disk space etc. and try again.")
-			     (retry))
-			(exn (corrupt)
-			     (dbfile:print-err exn "ERROR: database " fname " is corrupt. Repair it to proceed.")
-			     (retry))
-			(exn (busy)
-			     (dbfile:print-err exn "ERROR: database " fname
-					       " is locked. Try copying to another location, remove original and copy back.")
-			     (retry))
-			(exn (permission)(dbfile:print-err exn "ERROR: database " fname " has some permissions problem.")
-			     (retry))
-			(exn ()
-			     (dbfile:print-err exn "ERROR: Unknown error with database " fname " message: "
-					       ((condition-property-accessor 'exn 'message) exn))
-			     (retry)))))
+	(let* ((result (condition-case
+			   (dbfile:with-simple-file-lock
+			    (conc fname ".lock")
+			    (lambda ()
+			      (let* ((db-exists (file-exists? fname))
+				     (db        (sqlite3:open-database fname)))
+				(if (and init-proc (not db-exists))
+				    (init-proc db))
+				db)))
+			 (exn (io-error)
+			      (dbfile:print-err exn "ERROR: i/o error with " fname ". Check permissions, disk space etc. and try again.")
+			      (retry))
+			 (exn (corrupt)
+			      (dbfile:print-err exn "ERROR: database " fname " is corrupt. Repair it to proceed.")
+			      (retry))
+			 (exn (busy)
+			      (dbfile:print-err exn "ERROR: database " fname
+						" is locked. Try copying to another location, remove original and copy back.")
+			      (retry))
+			 (exn (permission)(dbfile:print-err exn "ERROR: database " fname " has some permissions problem.")
+			      (retry))
+			 (exn ()
+			      (dbfile:print-err exn "ERROR: Unknown error with database " fname " message: "
+						((condition-property-accessor 'exn 'message) exn))
+			      (retry)))))
           #;(if (file-write-access? fname)
-	    (dbfile:simple-file-release-lock lock-file))
+	  (dbfile:simple-file-release-lock lock-file))
 	  result))))
 
 (define (dbfile:brute-force-salvage-db fname)
   (let* ((backupfname (conc fname"-"(current-process-id)".bak"))
 	 (cmd (conc "cp "fname" "backupfname";mv "fname" "(conc fname ".delme;")
@@ -510,11 +538,11 @@
 		    "cp "backupfname" "fname)))
     (dbfile:print-err "WARNING: attempting recovery of file "fname" by running commands:\n"
 		      "  "cmd)
     (system cmd)))
 
-(define (dbfile:cautious-open-database-orig fname init-proc #!optional (tries-left 50))
+#;(define (dbfile:cautious-open-database-orig fname init-proc #!optional (tries-left 50))
   (let* ((lock-file  (conc fname".lock"))
 	 (delay-time (* (- 51 tries-left) 1.1))
 	 (retry      (lambda ()
 		       (thread-sleep! delay-time)
 		       (if (> tries-left 0)
@@ -608,18 +636,601 @@
 ;;
 (define (db:no-sync-get-lock db keyname)
   (sqlite3:with-transaction
    db
    (lambda ()
-     (handle-exceptions
-	 exn
-	 (let ((lock-time (current-seconds)))
-	   ;; (debug:print-info 2 *default-log-port* "db:no-sync-get-lock keyname=" keyname ", lock-time=" lock-time ", exn=" exn)
-	   (sqlite3:execute db "INSERT INTO no_sync_metadat (var,val) VALUES(?,?);" keyname lock-time)
-	   `(#t . ,lock-time))
-       `(#f . ,(sqlite3:first-result db "SELECT val FROM no_sync_metadat WHERE var=?;" keyname))))))
-
+     (condition-case
+	 (let* ((curr-val (db:no-sync-get/default db keyname #f)))
+	   (if curr-val
+	       `(#f . ,curr-val)   ;; (sqlite3:first-result db "SELECT val FROM no_sync_metadat WHERE var=?;" keyname))
+	       (let ((lock-time (current-seconds)))
+		 (sqlite3:execute db "INSERT OR REPLACE INTO no_sync_metadat (var,val) VALUES(?,?);" keyname lock-time)
+		 `(#t . ,lock-time))))
+       (exn (io-error)  (dbfile:print-err "ERROR: i/o error with no-sync db. Check permissions, disk space etc. and try again."))
+       (exn (corrupt)   (dbfile:print-err "ERROR: database no-sync db is corrupt. Repair it to proceed."))
+       (exn (busy)      (dbfile:print-err "ERROR: database no-sync db is locked. Try copying to another location, remove original and copy back."))
+       (exn (permission)(dbfile:print-err "ERROR: database no-sync db has some permissions problem."))
+       (exn () ;; (status done) ;; I don't know how to detect status done but no data!
+	    (dbfile:print-err "ERROR: Unknown error with database no-sync db message: exn="(condition->list exn)", \n"
+			      ((condition-property-accessor 'exn 'message) exn))
+	    `(#f . ,(current-seconds)))))))
+
+(define (db:no-sync-get-lock-timeout db keyname timeout)
+  (let* ((lockdat (db:no-sync-get-lock db keyname)))
+    (match lockdat
+      ((#f . lock-time)
+       (if (> (- (current-seconds) (if (string? lock-time)(string->number lock-time)lock-time)) timeout)
+	   (let ((lock-time (current-seconds)))
+	     ;; (debug:print-info 2 *default-log-port* "db:no-sync-get-lock keyname=" keyname ", lock-time=" lock-time ", exn=" exn)
+	     (sqlite3:execute db "INSERT OR REPLACE INTO no_sync_metadat (var,val) VALUES(?,?);" keyname lock-time)
+	     `(#t . ,lock-time))
+	   lockdat))
+      (else lockdat))))
+
+;; NOTE: This will steal the lock after timeout of waiting.
+;;
+(define (db:with-no-sync-lock db keyname timeout proc)
+  (let* ((lockdat  (db:no-sync-get-lock-timeout db keyname))
+	 (gotlock  (car lockdat))
+	 (locktime (cdr lockdat)))
+    (if gotlock
+	(let ((res (proc)))
+	  (db:no-sync-del! db keyname)
+	  res))))
+  
+;;======================================================================
+;; sync back functions pulled from db.scm
+;;======================================================================
+
+;; Get a lock from the no-sync-db for the from-db, then delta sync the from-db to the to-db, otherwise return #f
+;;
+(define (db:lock-and-delta-sync no-sync-db dbstruct from-db-file runid keys dbinit)
+  (assert (not *db-sync-in-progress*) "FATAL: db:lock-and-sync called while a sync is in progress.")
+  ;; (dbfile:print-err *default-log-port* "db:lock-and-delta-sync")
+  (let* ((lock-file (conc from-db-file ".lock")))
+    (if (common:simple-file-lock lock-file)
+	(begin
+	  (dbfile:print-err "INFO: db:lock-and-delta-sync copying db "runid" at "(current-seconds))
+	  (set! *db-sync-in-progress* #t)
+	  (db:sync-touched dbstruct runid keys dbinit)
+	  (set! *db-sync-in-progress* #f)
+	  (delete-file* lock-file)
+	  #t)
+        (begin
+          (dbfile:print-err "INFO: could not get lock for " from-db-file ", sync likely in progress.")
+	  #f
+	  ))))
+
+;; ;; Get a lock from the no-sync-db for the from-db, then delta sync the from-db to the to-db, otherwise return #f
+;; ;;
+;; (define (db:lock-and-delta-sync-orig no-sync-db dbstruct from-db-file runid keys dbinit)
+;;   (assert (not *db-sync-in-progress*) "FATAL: db:lock-and-sync called while a sync is in progress.")
+;;   ;; (dbfile:print-err *default-log-port* "db:lock-and-delta-sync")
+;;   (let* ((lockdat  (db:no-sync-get-lock-timeout no-sync-db from-db-file 60))
+;; 	 (gotlock  (car lockdat))
+;; 	 (locktime (cdr lockdat)))
+;;     ;; (debug:print-info 3 *default-log-port* "db:lock-and-delta-sync: got lock?")
+;;     
+;;     (if gotlock
+;; 	(begin
+;;           (dbfile:print-err "INFO: db:lock-and-delta-sync copying db "runid" at "(current-seconds))
+;; 	  (set! *db-sync-in-progress* #t)
+;;           (db:sync-touched dbstruct runid keys dbinit)
+;; 	  (set! *db-sync-in-progress* #f)
+;; 	  (db:no-sync-del! no-sync-db from-db-file)
+;; 	  #t)
+;;         (begin
+;;           (dbfile:print-err "ERROR: could not get lock for " from-db-file " from no-sync-db")
+;; 	  #f
+;;         ))))
+
+;; sync run from tmp disk to nfs disk if touched
+;;
+;; call with dbinit=db:initialize-main-db
+;;
+(define (db:sync-touched dbstruct run-id keys #!key dbinit (force-sync #f))
+  (dbfile:print-err "db:sync-touched Syncing: " (conc (if run-id run-id "main") ".db"))
+  (let* (;; the subdb is needed to access the mtdbdat
+	 (subdb     (or (dbfile:get-subdb dbstruct run-id)
+			(dbfile:init-subdb dbstruct run-id dbinit)))
+         (tmpdbfile (dbr:subdb-tmpdbfile subdb))
+	 (mtdb      (dbr:subdb-mtdbdat subdb))
+         (tmpdb     (dbfile:open-db dbstruct run-id dbinit)) ;; sqlite3-db tmpdbfile #f))
+	 (start-t   (current-seconds)))
+    (mutex-lock! *db-multi-sync-mutex*)
+    (let ((update_info (cons "last_update" (if force-sync 0 *db-last-sync*) )))
+      (mutex-unlock! *db-multi-sync-mutex*)
+      (db:sync-tables (db:sync-all-tables-list dbstruct keys) update_info tmpdb mtdb))
+    (mutex-lock! *db-multi-sync-mutex*)
+    (set! *db-last-sync* start-t)
+    (set! *db-last-access* start-t)
+    (mutex-unlock! *db-multi-sync-mutex*)
+    (dbfile:add-dbdat dbstruct run-id tmpdb)
+  #t))
+
+;; just tests, test_steps and test_data tables
+(define db:sync-tests-only
+  (list
+   ;; (list "strs"
+   ;;       '("id"             #f)
+   ;;       '("str"            #f))
+   (list "tests" 
+	 '("id"             #f)
+	 '("run_id"         #f)
+	 '("testname"       #f)
+	 '("host"           #f)
+	 '("cpuload"        #f)
+	 '("diskfree"       #f)
+	 '("uname"          #f)
+	 '("rundir"         #f)
+	 '("shortdir"       #f)
+	 '("item_path"      #f)
+	 '("state"          #f)
+	 '("status"         #f)
+	 '("attemptnum"     #f)
+	 '("final_logf"     #f)
+	 '("logdat"         #f)
+	 '("run_duration"   #f)
+	 '("comment"        #f)
+	 '("event_time"     #f)
+	 '("fail_count"     #f)
+	 '("pass_count"     #f)
+	 '("archived"       #f)
+         '("last_update"    #f))
+  (list "test_steps"
+	 '("id"             #f)
+	 '("test_id"        #f)
+	 '("stepname"       #f)
+	 '("state"          #f)
+	 '("status"         #f)
+	 '("event_time"     #f)
+	 '("comment"        #f)
+	 '("logfile"        #f)
+         '("last_update"    #f))
+   (list "test_data"
+	 '("id"             #f)
+	 '("test_id"        #f)
+	 '("category"       #f)
+	 '("variable"       #f)
+	 '("value"          #f)
+	 '("expected"       #f)
+	 '("tol"            #f)
+	 '("units"          #f)
+	 '("comment"        #f)
+	 '("status"         #f)
+	 '("type"           #f)
+         '("last_update"    #f))))
+
+;; needs db to get keys, this is for syncing all tables
+;;
+(define (db:sync-main-list dbstruct keys)
+  (let ((keys  keys)) ;; (db:get-keys dbstruct)))
+    (list
+     (list "keys"
+	   '("id"        #f)
+	   '("fieldname" #f)
+	   '("fieldtype" #f))
+     (list "metadat" '("var" #f) '("val" #f))
+     (append (list "runs" 
+		   '("id"  #f))
+	     (map (lambda (k)(list k #f))
+		  (append keys
+			  (list "runname" "state" "status" "owner" "event_time" "comment" "fail_count" "pass_count" "contour" "last_update"))))
+     (list "archive_disks"
+           '("id" #f)
+           '("archive_area_name" #f) 
+           '("disk_path" #f)
+           '("last_df" #f)
+           '("last_df_time" #f)
+           '("creation_time" #f)) 
+
+     (list "archive_blocks"
+           '("id" #f)
+           '("archive_disk_id" #f) 
+           '("disk_path" #f)
+           '("last_du" #f)
+           '("last_du_time" #f)
+           '("creation_time" #f)) 
+
+     (list "test_meta"
+	   '("id"             #f)
+	   '("testname"       #f)
+	   '("owner"          #f)
+	   '("description"    #f)
+	   '("reviewed"       #f)
+	   '("iterated"       #f)
+	   '("avg_runtime"    #f)
+	   '("avg_disk"       #f)
+	   '("tags"           #f)
+	   '("jobgroup"       #f)))))
+
+(define (db:sync-all-tables-list dbstruct keys)
+  (append (db:sync-main-list dbstruct keys)
+	  db:sync-tests-only))
+
+;; tbls is ( ("tablename" ( "field1" [#f|proc1] ) ( "field2" [#f|proc2] ) .... ) )
+;; db's are dbdat's
+;;
+;; if last-update specified ("field-name" . time-in-seconds)
+;;    then sync only records where field-name >= time-in-seconds
+;;    IFF field-name exists
+;;
+(define (db:sync-tables tbls last-update fromdb todb . slave-dbs)
+  (handle-exceptions
+   exn
+   (begin
+     (dbfile:print-err  "EXCEPTION: database probably overloaded or unreadable in db:sync-tables.")
+     (print-call-chain (current-error-port))
+     (dbfile:print-err  " message: " ((condition-property-accessor 'exn 'message) exn))
+     (dbfile:print-err  "exn=" (condition->list exn))
+     (dbfile:print-err  " status:  " ((condition-property-accessor 'sqlite3 'status) exn))
+     (dbfile:print-err  " src db:  " (dbr:dbdat-dbfile fromdb))
+     (for-each (lambda (dbdat)
+		 (let ((dbpath (dbr:dbdat-dbfile dbdat)))
+		   (dbfile:print-err  " dbpath:  " dbpath)
+		   (if #t ;; (not (db:repair-db dbdat))
+		       (begin
+			 (dbfile:print-err "Failed to rebuild (repair is turned off) " dbpath ", exiting now.")
+			 (exit)))))
+	       (cons todb slave-dbs))
+     
+     0)
+
+   ;; this is the work to be done")
+   (cond
+    ((not fromdb) (dbfile:print-err  "WARNING: db:sync-tables called with fromdb missing")
+     -1)
+    ((not todb)   (dbfile:print-err  "WARNING: db:sync-tables called with todb missing")
+     -2)
+    ((not (sqlite3:database? (dbr:dbdat-dbh fromdb)))
+     (dbfile:print-err "db:sync-tables called with fromdb not a database " fromdb)
+   -3)
+    ((not (sqlite3:database? (dbr:dbdat-dbh todb)))
+     (dbfile:print-err "db:sync-tables called with todb not a database " todb)
+   -4)
+
+    ((not (file-write-access? (dbr:dbdat-dbfile todb)))
+     (dbfile:print-err "db:sync-tables called with todb not a read-only database " todb)
+     -5)
+    ((not (null? (let ((readonly-slave-dbs
+                        (filter
+                         (lambda (dbdat)
+                           (not (file-write-access? (dbr:dbdat-dbfile todb))))
+                         slave-dbs)))
+                   (for-each
+                    (lambda (bad-dbdat)
+                      (dbfile:print-err "db:sync-tables called with todb not a read-only database " bad-dbdat))
+                    readonly-slave-dbs)
+                   readonly-slave-dbs))) -6)
+    (else
+     (dbfile:print-err "db:sync-tables: args are good")
+
+     (let ((stmts       (make-hash-table)) ;; table-field => stmt
+	   (all-stmts   '())              ;; ( ( stmt1 value1 ) ( stml2 value2 ))
+	   (numrecs     (make-hash-table))
+	   (start-time  (current-milliseconds))
+	   (tot-count   0))
+       (for-each ;; table
+	(lambda (tabledat)
+	  (let* ((tablename        (car tabledat))
+		 (fields           (cdr tabledat))
+		 (has-last-update  (member "last_update" fields))
+		 (use-last-update  (cond
+				    ((and has-last-update
+					  (member "last_update" fields))
+				     #t) ;; if given a number, just use it for all fields
+				    ((number? last-update) #f) ;; if not matched first entry then ignore last-update for this table
+				    ((and (pair? last-update)
+					  (member (car last-update)    ;; last-update field name
+						  (map car fields)))
+                                        #t)
+				    ((and last-update (not (pair? last-update)) (not (number? last-update)))
+				     (dbfile:print-err  "ERROR: parameter last-update for db:sync-tables must be a pair or a number, received: " last-update);; found in fields
+				     #f)
+				    (else
+				     #f)))
+		 (last-update-value (if use-last-update ;; no need to check for has-last-update - it is already accounted for
+					(if (number? last-update)
+					    last-update
+					    (cdr last-update))
+					#f))
+		 (last-update-field (if use-last-update
+					(if (number? last-update)
+					    "last_update"
+					    (car last-update))
+					#f))
+		 (num-fields (length fields))
+		 (field->num (make-hash-table))
+		 (num->field (apply vector (map car fields))) ;; BBHERE
+		 (full-sel   (conc "SELECT " (string-intersperse (map car fields) ",") 
+				   " FROM " tablename (if use-last-update ;; apply last-update criteria
+							  (conc " WHERE " last-update-field " >= " last-update-value)
+							  "")
+				   ";"))
+		 (full-ins   (conc "INSERT OR REPLACE INTO " tablename " ( " (string-intersperse (map car fields) ",") " ) "
+				   " VALUES ( " (string-intersperse (make-list num-fields "?") ",") " );"))
+		 (fromdat    '())
+		 (fromdats   '())
+		 (totrecords 0)
+		 (batch-len  100) ;; (string->number (or (configf:lookup *configdat* "sync" "batchsize") "100")))
+		 (todat      (make-hash-table))
+		 (count      0)
+                 (field-names (map car fields))
+                 (delay-handicap  0) ;; (string->number (or (configf:lookup *configdat* "sync" "delay-handicap") "0")))
+                 )
+
+	    ;; set up the field->num table
+	    (for-each
+	     (lambda (field)
+	       (hash-table-set! field->num field count)
+	       (set! count (+ count 1)))
+	     fields)
+
+	    ;; read the source table
+            ;; store a list of all rows in the table in fromdat, up to batch-len.
+            ;; Then add fromdat to the fromdats list, clear fromdat and repeat.
+	    (sqlite3:for-each-row
+	     (lambda (a . b)
+	       (set! fromdat (cons (apply vector a b) fromdat))
+	       (if (> (length fromdat) batch-len)
+		   (begin
+		     (set! fromdats (cons fromdat fromdats))
+		     (set! fromdat  '())
+		     (set! totrecords (+ totrecords 1)))
+               )
+             )
+	     (dbr:dbdat-dbh fromdb)
+	     full-sel)
+
+             ;; Count less than batch-len as a record
+             (if (> (length fromdat) 0)
+                 (set! totrecords (+ totrecords 1)))
+
+	    ;; tack on remaining records in fromdat
+	    (if (not (null? fromdat))
+		(set! fromdats (cons fromdat fromdats)))
+
+	    (if (common:low-noise-print 120 "sync-records")
+		(dbfile:print-err  "found " totrecords " records to sync"))
+
+	    (sqlite3:for-each-row
+	     (lambda (a . b)
+	       (hash-table-set! todat a (apply vector a b)))
+	     (dbr:dbdat-dbh todb)
+	     full-sel)
+
+            (when (and delay-handicap (> delay-handicap 0))
+              (dbfile:print-err "imposing synthetic sync delay of "delay-handicap" seconds since sync/delay-handicap is configured")
+              (thread-sleep! delay-handicap)
+              (dbfile:print-err "synthetic sync delay of "delay-handicap" seconds completed")
+              )
+            
+	    ;; first pass implementation, just insert all changed rows
+
+	    (for-each 
+	     (lambda (targdb)
+	       (let* ((db                 (dbr:dbdat-dbh targdb))
+                      (drp-trigger        (if (member "last_update" field-names)
+					      (db:drop-trigger db tablename) 
+					      #f))
+		      (has-last-update    (member "last_update" field-names))
+                      (is-trigger-dropped (if has-last-update
+                                              (db:is-trigger-dropped db tablename)
+					      #f)) 
+		      (stmth  (sqlite3:prepare db full-ins))
+                      (changed-rows 0))
+		 ;; (db:delay-if-busy targdb) ;; NO WAITING
+                 (if (and
+		      has-last-update
+		      (common:low-noise-print 120 "is-trigger-dropped"))
+                     (dbfile:print-err "is-trigger-dropped: " is-trigger-dropped))
+		 (for-each
+		  (lambda (fromdat-lst)
+		    (sqlite3:with-transaction
+		     db
+		     (lambda ()
+		       (for-each ;; 
+			(lambda (fromrow)
+			  (let* ((a    (vector-ref fromrow 0))
+				 (curr (hash-table-ref/default todat a #f))
+				 (same #t))
+			    (let loop ((i 0))
+			      (if (or (not curr)
+				      (not (equal? (vector-ref fromrow i)(vector-ref curr i))))
+				  (set! same #f))
+			      (if (and same
+				       (< i (- num-fields 1)))
+				  (loop (+ i 1))))
+			    (if (not same)
+				(begin
+				  (apply sqlite3:execute stmth (vector->list fromrow))
+				  (hash-table-set! numrecs tablename (+ 1 (hash-table-ref/default numrecs tablename 0)))
+                                  (set! changed-rows (+ changed-rows 1))
+                                )
+                            )
+                            ))
+			fromdat-lst))))
+		  fromdats)
+
+
+                 (if (> changed-rows 0)
+	           (dbfile:print-err  "table " tablename " changed rows: " changed-rows)
+                 )
+
+
+		 (sqlite3:finalize! stmth)
+                 (if (member "last_update" field-names)
+                    (db:create-trigger db tablename))))
+	     (append (list todb) slave-dbs)
+           )
+          )
+        )
+	tbls)
+       (let* ((runtime      (- (current-milliseconds) start-time))
+	      (should-print (or ;; (debug:debug-mode 12)
+				(common:low-noise-print 120 "db sync" (> runtime 500))))) ;; low and high sync times treated as separate.
+	 (if should-print (dbfile:print-err  "INFO: db sync, total run time " runtime " ms"))
+	 (for-each 
+	  (lambda (dat)
+	    (let ((tblname (car dat))
+		  (count   (cdr dat)))
+	      (set! tot-count (+ tot-count count))
+	      (if (> count 0)
+		  (if should-print (dbfile:print-err "FIXME: tblname: " tblname", count: "count" "))))) ;; (format #f "    ~10a ~5a" tblname count))))))
+	  (sort (hash-table->alist numrecs)(lambda (a b)(> (cdr a)(cdr b))))))
+       tot-count)))))
+
+;;======================================================================
+;; trigger setup/takedown
+;;======================================================================
+
+(define db:trigger-list 
+     (list (list "update_runs_trigger"  "CREATE TRIGGER IF NOT EXISTS update_runs_trigger AFTER UPDATE ON runs
+                             FOR EACH ROW
+                               BEGIN 
+                                 UPDATE runs SET last_update=(strftime('%s','now'))
+                                   WHERE id=old.id;
+                               END;" ) 
+	   (list "update_run_stats_trigger"  "CREATE TRIGGER  IF NOT EXISTS update_run_stats_trigger AFTER UPDATE ON run_stats
+                             FOR EACH ROW
+                               BEGIN 
+                                 UPDATE run_stats SET last_update=(strftime('%s','now'))
+                                   WHERE id=old.id;
+                               END;" )
+	   (list "update_tests_trigger"  "CREATE TRIGGER  IF NOT EXISTS update_tests_trigger AFTER UPDATE ON tests
+                             FOR EACH ROW
+                               BEGIN 
+                                 UPDATE tests SET last_update=(strftime('%s','now'))
+                                   WHERE id=old.id;
+                               END;" )
+	   (list "update_teststeps_trigger"  "CREATE TRIGGER  IF NOT EXISTS update_teststeps_trigger AFTER UPDATE ON test_steps
+                             FOR EACH ROW
+                               BEGIN 
+                                 UPDATE test_steps SET last_update=(strftime('%s','now'))
+                                   WHERE id=old.id;
+                               END;" )
+	   (list "update_test_data_trigger"  "CREATE TRIGGER  IF NOT EXISTS update_test_data_trigger AFTER UPDATE ON test_data
+                             FOR EACH ROW
+                               BEGIN 
+                                 UPDATE test_data SET last_update=(strftime('%s','now'))
+                                   WHERE id=old.id;
+                               END;" )))
+;;
+;; ADD run-id SUPPORT
+;;
+(define (db:create-all-triggers dbstruct)
+  (db:with-db
+   dbstruct #f #f
+   (lambda (dbdat db)
+     (db:create-triggers db))))
+
+(define (db:create-triggers db)
+    (for-each (lambda (key)
+              (sqlite3:execute db (cadr key)))
+          db:trigger-list))
+
+(define (db:drop-all-triggers dbstruct)
+  (db:with-db
+   dbstruct #f #f
+   (lambda (dbdat db)
+     (db:drop-triggers db))))
+
+(define (db:is-trigger-dropped db tbl-name)
+  (let* ((trigger-name (if (equal? tbl-name "test_steps")
+			   "update_teststeps_trigger" 
+                           (conc "update_" tbl-name "_trigger")))
+	 (res          #f))
+    (sqlite3:for-each-row
+     (lambda (name)
+       (if (equal? name trigger-name)
+	   (set! res #t)))
+     db 
+     "SELECT name FROM sqlite_master WHERE type = 'trigger' ;")
+    res))
+
+(define (db:drop-triggers db)
+  (for-each
+   (lambda (key) 
+     (sqlite3:execute db (conc "drop trigger if exists " (car key))))
+   db:trigger-list))
+
+(define  (db:drop-trigger db tbl-name)
+  (let* ((trigger-name (if (equal? tbl-name "test_steps")
+			   "update_teststeps_trigger" 
+                           (conc "update_" tbl-name "_trigger"))))
+    (for-each
+     (lambda (key) 
+       (if (equal? (car key) trigger-name)
+           (sqlite3:execute db (conc "drop trigger if exists " trigger-name))))
+     db:trigger-list)))
+
+(define  (db:create-trigger db tbl-name)
+      (let* ((trigger-name (if (equal? tbl-name "test_steps")
+                              "update_teststeps_trigger" 
+                              (conc "update_" tbl-name "_trigger"))))
+       (for-each (lambda (key) 
+             (if (equal? (car key) trigger-name)
+             (sqlite3:execute db (cadr key))))
+      db:trigger-list))) 
+
+;;======================================================================
+;; db access stuff
+;;======================================================================
+
+;; call with dbinit=db:initialize-main-db
+;;
+(define (db:open-db dbstruct run-id dbinit)
+  (let* ((dbdat (dbfile:open-db dbstruct run-id dbinit)))
+    (set! *db-write-access* (not (dbr:dbdat-read-only dbdat)))
+    dbdat))
+
+(define dbfile:db-init-proc (make-parameter #f))
+
+;; (db:with-db dbstruct run-id sqlite3:exec "select blah fgrom blaz;")
+;; r/w is a flag to indicate if the db is modified by this query #t = yes, #f = no
+;;
+(define (db:with-db dbstruct run-id r/w proc . params)
+  (let* ((have-struct (dbr:dbstruct? dbstruct))
+         (dbdat     (if have-struct                ;; this stuff just allows us to call with a db handle directly
+			(db:open-db dbstruct run-id (dbfile:db-init-proc)) ;; (dbfile:get-subdb dbstruct run-id)
+			#f))
+	 (db        (if have-struct                ;; this stuff just allows us to call with a db handle directly
+			(dbr:dbdat-dbh dbdat)
+			dbstruct))
+	 (fname     (if dbdat
+			(dbr:dbdat-dbfile dbdat)
+			"nofilenameavailable"))
+	 #;(subdb     (if have-struct
+			(dbfile:get-subdb dbstruct run-id)
+			#f))
+	 (use-mutex (> *api-process-request-count* 25))) ;; was 25
+    (if (and use-mutex
+	     (common:low-noise-print 120 "over-50-parallel-api-requests"))
+	(dbfile:print-err *api-process-request-count* " parallel api requests being processed in process " (current-process-id) ", throttling access"))
+    (if (common:low-noise-print 600 (conc "parallel-api-requests" *max-api-process-requests*))
+	(dbfile:print-err "Parallel api request count: " *api-process-request-count* " max parallel requests: " *max-api-process-requests*))
+    (condition-case
+	(begin
+	  (if use-mutex (mutex-lock! *db-with-db-mutex*))
+	  (let ((res (apply proc dbdat db params)))
+	    (if use-mutex (mutex-unlock! *db-with-db-mutex*))
+	    ;; (if (vector? dbstruct)(db:done-with dbstruct run-id r/w))
+	    (if dbdat
+		(dbfile:add-dbdat dbstruct run-id dbdat))
+	    res))
+      (exn (io-error)
+	   (db:generic-error-printout exn "ERROR: i/o error with " fname ". Check permissions, disk space etc. and try again."))
+      (exn (corrupt)
+	   (db:generic-error-printout exn "ERROR: database " fname " is corrupt. Repair it to proceed."))
+      (exn (busy)
+	   (db:generic-error-printout exn "ERROR: database " fname
+				      " is locked. Try copying to another location, remove original and copy back."))
+      (exn (permission)(db:generic-error-printout exn "ERROR: database " fname " has some permissions problem."))
+      (exn ()
+	   (db:generic-error-printout exn "ERROR: Unknown error with database " fname " message: "
+				      ((condition-property-accessor 'exn 'message) exn))))))
+      
 
 ;;======================================================================
 ;; file utils
 ;;======================================================================
 
@@ -706,7 +1317,14 @@
   (handle-exceptions
       exn
       #f ;; I don't really care why this failed (at least for now)
     (delete-file* fname)))
 
-
+(define (dbfile:with-simple-file-lock fname proc #!key (expire-time 300))
+  (let ((gotlock (dbfile:simple-file-lock-and-wait fname expire-time: expire-time)))
+    (if gotlock
+	(let ((res (proc)))
+	  (dbfile:simple-file-release-lock fname)
+	  res)
+	(assert #t "FATAL: simple file lock never got a lock."))))
+  
 )

Index: http-transport.scm
==================================================================
--- http-transport.scm
+++ http-transport.scm
@@ -36,16 +36,17 @@
 (declare (uses server))
 ;; (declare (uses daemon))
 (declare (uses portlogger))
 (declare (uses rmt))
 (declare (uses dbfile))
+(declare (uses commonmod))
 
 (include "common_records.scm")
 (include "db_records.scm")
 (include "js-path.scm")
 
-(import dbfile)
+(import dbfile commonmod)
 
 (require-library stml)
 (define (http-transport:make-server-url hostport)
   (if (not hostport)
       #f
@@ -284,29 +285,31 @@
                                                     (begin
                                                       (debug:print 0 *default-log-port* "WARNING: failure in with-input-from-request to " fullurl ".")
                                                       (debug:print 0 *default-log-port* " message: " msg ", exn=" exn)
                                                       (debug:print 0 *default-log-port* " cmd: " cmd " params: " params " key:" (or server-id "thekey"))
                                                       (debug:print 0 *default-log-port* " call-chain: " call-chain)))
-                                                (if runremote
-						    (remote-conndat-set! runremote #f))
+						(set! *runremote* #f)
+						(set! runremote #f)
+						;; (if runremote
+						;;    (remote-conndat-set! runremote #f))
 						;; Killing associated server to allow clean retry.")
 						;; (tasks:kill-server-run-id run-id)  ;; better to kill the server in the logic that called this routine?
 						(mutex-unlock! *http-mutex*)
-					     ;;; (signal (make-composite-condition
-					     ;;;          (make-property-condition 'commfail 'message "failed to connect to server")))
-					     ;;; "communications failed"
+						;; (signal (make-composite-condition
+						;;          (make-property-condition 'commfail 'message "failed to connect to server")))
+						;; "communications failed"
 						(db:obj->string #f))
-					    (with-input-from-request ;; was dat
-					     fullurl 
-					     (list (cons 'key (or server-id   "thekey"))
-						   (cons 'cmd cmd)
-						   (cons 'params sparams))
-					     read-string))
+					      (with-input-from-request ;; was dat
+					       fullurl 
+					       (list (cons 'key (or server-id   "thekey"))
+						     (cons 'cmd cmd)
+						     (cons 'params sparams))
+					       read-string))
 					  transport: 'http)
-                                         0)) ;; added this speculatively
+					 0)) ;; added this speculatively
 			      ;; Shouldn't this be a call to the managed call-all-connections stuff above?
-			      (close-all-connections!)
+			      (close-all-connections!) ;; BUG? WHY IS THIS HERE? Are we failing to reuse connections?
 			      (mutex-unlock! *http-mutex*)
 			      ))
 	      (time-out     (lambda ()
 			      (thread-sleep! 45)
 			      (debug:print 0 *default-log-port* "WARNING: send-receive took more than 45 seconds!!")
@@ -552,12 +555,16 @@
               (debug:print-info 0 *default-log-port* "Server continuing, seconds since last db access: " (- (current-seconds) last-access))
 	      (let ((curr-time (current-seconds)))
 		(handle-exceptions
 		    exn
 		    (debug:print 0 *default-log-port* "ERROR: Failed to change timestamp on log file " server-log-file ". Are you out of space on that disk? exn=" exn)
-		  (if (not *server-overloaded*)
-		      (change-file-times server-log-file curr-time curr-time)))))
+		    (if (and (< (- (current-seconds) server-start-time) 600) ;; run for ten minutes for experiment, 3600 thereafter
+			     (not *server-overloaded*))
+			(change-file-times server-log-file curr-time curr-time)
+			(if (common:low-noise-print 120 "start new server")
+			    (server:kind-run *toppath*) ;; server:kind-run uses [servers] numservers
+			)))))
           (loop 0 server-state bad-sync-count (current-milliseconds)))
          (else
           (debug:print-info 0 *default-log-port* "Server timed out. seconds since last db access: " (- (current-seconds) last-access))
           (http-transport:server-shutdown port)))))))
 

Index: megatest.scm
==================================================================
--- megatest.scm
+++ megatest.scm
@@ -40,12 +40,15 @@
 (declare (uses mt))
 (declare (uses api))
 (declare (uses tasks)) ;; only used for debugging.
 (declare (uses env))
 (declare (uses diff-report))
+(declare (uses db))
 (declare (uses dbmod))
 (declare (uses dbmod.import))
+(declare (uses commonmod))
+(declare (uses commonmod.import))
 (declare (uses dbfile))
 (declare (uses dbfile.import))
 ;; (declare (uses debugprint))
 ;; (declare (uses debugprint.import))
 ;; (declare (uses mtargs))
@@ -53,10 +56,11 @@
 
 ;; (declare (uses ftail))
 ;; (import ftail)
 
 (import dbmod
+	commonmod
 	dbfile)
 
 (define *db* #f) ;; this is only for the repl, do not use in general!!!!
 
 (include "common_records.scm")
@@ -75,10 +79,12 @@
 
 (require-library mutils)
 
 (define *usage-log-file* #f)    ;; put path to file for logging usage in this var in the ~/.megatestrc file
 (define *usage-use-seconds* #t) ;; for Epoc seconds in usage logging change this to #t in ~/.megatestrc file
+
+(dbfile:db-init-proc db:initialize-main-db)
 
 ;; load the ~/.megatestrc file, put (use trace)(trace-call-sites #t)(trace function-you-want-to-trace) in this file
 ;;
 (let ((debugcontrolf (conc (get-environment-variable "HOME") "/.megatestrc")))
   (if (common:file-exists? debugcontrolf)

Index: rmt.scm
==================================================================
--- rmt.scm
+++ rmt.scm
@@ -21,14 +21,15 @@
 (use format typed-records) ;; RADT => purpose of json format??
 
 (declare (unit rmt))
 (declare (uses api))
 (declare (uses http-transport))
+(declare (uses dbfile))
 (include "common_records.scm")
 ;; (declare (uses rmtmod))
 
-;; (import rmtmod)
+(import dbfile) ;; rmtmod)
 
 ;;
 ;; THESE ARE ALL CALLED ON THE CLIENT SIDE!!!
 ;;
 
@@ -63,11 +64,11 @@
 (define (rmt:send-receive cmd rid params #!key (attemptnum 1)(area-dat #f)) ;; start attemptnum at 1 so the modulo below works as expected
 
   #;(common:telemetry-log (conc "rmt:"(->string cmd))
                         payload: `((rid . ,rid)
                                    (params . ,params)))
-                          
+
   (if (> attemptnum 2)
       (debug:print 0 *default-log-port* "INFO: attemptnum in rmt:send-receive is " attemptnum))
     
   (cond
    ((> attemptnum 2) (thread-sleep! 0.05))
@@ -119,10 +120,17 @@
 	(thread-sleep! 0.1) ;; since we shouldn't get here, delay a little
 	(remote-hh-dat-set! runremote (common:get-homehost)))
     
     ;;(print "BB> readonly-mode is "readonly-mode" dbfile is "dbfile)
     (cond
+     ((> (- (current-seconds)(remote-connect-time runremote)) 180) ;; reconnect to server every 180 seconds
+      (debug:print 0 *default-log-port* "Forcing reconnect to server(s) due to 180 second timeout.")
+      (set! *runremote* #f)
+      ;; BUG: close-connections should go here?
+      (mutex-unlock! *rmt-mutex*)
+      (rmt:send-receive cmd rid params attemptnum: 1 area-dat: area-dat))
+     
      ;;DOT EXIT;
      ;;DOT MUTEXLOCK -> EXIT [label="> 15 attempts"]; {rank=same "case 1" "EXIT" }
      ;; give up if more than 150 attempts
      ((> attemptnum 150)
       (debug:print 0 *default-log-port* "ERROR: 150 tries to start/connect to server. Giving up.")
@@ -375,20 +383,20 @@
 	 (dbstructs-local (db:setup #t))  ;; make-dbr:dbstruct path:  dbdir local: #t)))
 	 (read-only       (not (file-write-access? db-file-path)))
 	 (start           (current-milliseconds))
 	 (resdat          (if (not (and read-only qry-is-write))
 			      (let ((v (api:execute-requests dbstructs-local (vector (symbol->string cmd) params))))
-				(handle-exceptions ;; there has been a long history of receiving strange errors from values returned by the client when things go wrong..
-				 exn               ;;  This is an attempt to detect that situation and recover gracefully
-				 (begin
-				   (debug:print 0 *default-log-port* "ERROR: bad data from server " v " message: "  ((condition-property-accessor 'exn 'message) exn) ", exn=" exn)
-				   (vector #t '())) ;; should always get a vector but if something goes wrong return a dummy
+			;;	(handle-exceptions ;; there has been a long history of receiving strange errors from values returned by the client when things go wrong..
+			;;	 exn               ;;  This is an attempt to detect that situation and recover gracefully
+			;;	 (begin
+			;;	   (debug:print 0 *default-log-port* "ERROR: bad data from server " v " message: "  ((condition-property-accessor 'exn 'message) exn) ", exn=" exn)
+			;;	   (vector #t '())) ;; should always get a vector but if something goes wrong return a dummy
 				 (if (and (vector? v)
 					  (> (vector-length v) 1))
 				     (let ((newvec (vector (vector-ref v 0)(vector-ref v 1))))
 				       newvec)           ;; by copying the vector while inside the error handler we should force the detection of a corrupted record
-				     (vector #t '()))))  ;; we could also check that the returned types are valid
+				     (vector #t '()))) ;; )  ;; we could also check that the returned types are valid
 			      (vector #t '())))
 	 (success        (vector-ref resdat 0))
 	 (res            (vector-ref resdat 1))
 	 (duration       (- (current-milliseconds) start)))
     (if (and read-only qry-is-write)
@@ -412,16 +420,16 @@
                 (mutex-unlock! *db-multi-sync-mutex*)))))
     res))
 
 (define (rmt:send-receive-no-auto-client-setup connection-info cmd run-id params)
   (let* ((run-id   (if run-id run-id 0))
-	 (res  	   (handle-exceptions
-		       exn
-		     (begin
-		       (print "transport failed. exn=" exn)
-		       #f)
-		     (http-transport:client-api-send-receive run-id connection-info cmd params))))
+	 (res  	   ;; (handle-exceptions
+		   ;;     exn
+		   ;;   (begin
+		   ;;     (print "transport failed. exn=" exn)
+		   ;;     #f)
+		     (http-transport:client-api-send-receive run-id connection-info cmd params))) ;; )
     (if (and res (vector-ref res 0))
 	(vector-ref res 1) ;;; YES!! THIS IS CORRECT!! CHANGE IT HERE, THEN CHANGE rmt:send-receive ALSO!!!
 	#f)))
 
 ;;======================================================================

Index: server.scm
==================================================================
--- server.scm
+++ server.scm
@@ -23,18 +23,22 @@
 
 (use spiffy uri-common intarweb http-client spiffy-request-vars)
 
 (declare (unit server))
 
+(declare (uses commonmod))
+
 (declare (uses common))
 (declare (uses db))
 (declare (uses tasks)) ;; tasks are where stuff is maintained about what is running.
 ;; (declare (uses synchash))
 (declare (uses http-transport))
 ;;(declare (uses rpc-transport))
 (declare (uses launch))
 ;; (declare (uses daemon))
+
+(import commonmod)
 
 (include "common_records.scm")
 (include "db_records.scm")
 
 (define (server:make-server-url hostport)

Index: tasks.scm
==================================================================
--- tasks.scm
+++ tasks.scm
@@ -20,15 +20,17 @@
 
 (use sqlite3 srfi-1 posix regex regex-case srfi-69 dot-locking format)
 (import (prefix sqlite3 sqlite3:))
 
 (declare (unit tasks))
+(declare (uses dbfile))
 (declare (uses db))
 (declare (uses rmt))
 (declare (uses common))
 (declare (uses pgdb))
 
+(import dbfile)
 ;; (import pgdb) ;; pgdb is a module
 
 (include "task_records.scm")
 (include "db_records.scm")