Index: Makefile
--- Makefile
+++ Makefile
@@ -22,17 +22,24 @@
 SRCFILES = common.scm items.scm launch.scm ods.scm runconfig.scm	\
            server.scm configf.scm db.scm keys.scm margs.scm		\
            process.scm runs.scm tasks.scm tests.scm genexample.scm	\
-           http-transport.scm filedb.scm tdb.scm client.scm mt.scm	\
-           ezsteps.scm lock-queue.scm sdb.scm rmt.scm api.scm		\
+           http-transport.scm tdb.scm client.scm mt.scm	\
+           ezsteps.scm lock-queue.scm rmt.scm api.scm		\
            subrun.scm portlogger.scm archive.scm env.scm		\
            diff-report.scm cgisetup/models/pgdb.scm
 # module source files
-MSRCFILES = dbmod.scm
+MSRCFILES = dbmod.scm dbfile.scm debugprint.scm mtargs.scm commonmod.scm
+all : $(PREFIX)/bin/.$(ARCHSTR) mtest dboard mtut tcmt
+# dbmod.import.o is just a hack here
+mofiles/dbfile.o     : mofiles/debugprint.o dbmod.import.o
+mofiles/debugprint.o : mofiles/mtargs.o
 # ftail.scm rmtmod.scm commonmod.scm removed
 # MSRCFILES = ducttape-lib.scm pkts.scm stml2.scm cookie.scm mutils.scm	\
 #             mtargs.scm commonmod.scm dbmod.scm adjutant.scm ulex.scm	\
 #             rmtmod.scm apimod.scm
@@ -78,12 +85,10 @@
 ARCHSTR=$(shell if [[ -e /usr/bin/sw_vers ]]; then /usr/bin/sw_vers -productVersion; else lsb_release -sr; fi)
 # ARCHSTR=$(shell bash -c "echo \$$MACHTYPE")
 PNGFILES = $(shell cd docs/manual;ls *png)
-# all : $(PREFIX)/bin/.$(ARCHSTR) mtest dboard mtut ndboard
-all : $(PREFIX)/bin/.$(ARCHSTR) mtest dboard mtut tcmt
 mtest: $(OFILES) readline-fix.scm megatest.o $(MOFILES) $(MOIMPFILES)  megatest-version.scm
 	csc $(CSCOPTS) $(OFILES) $(MOFILES) $(MOIMPFILES) megatest.o -o mtest
@@ -150,11 +155,13 @@
 	$(INSTALL) mt-pg.sql $(PREFIX)/share/db/mt-pg.sql
 # Special dependencies for the includes
 $(MOFILE) $(MOIMPFILES) : megatest-fossil-hash.scm
-# common.o : mofiles/commonmod.o megatest-fossil-hash.scm
+mofiles/commonmod.o : megatest-fossil-hash.scm
+common.o : mofiles/commonmod.o
 # mofiles/dbmod.o : mofiles/configfmod.o
 # commonmod.o dashboard.o megatest.o tcmt.o apimod.o : megatest-fossil-hash.scm
 tests.o db.o launch.o runs.o dashboard-tests.o				\
@@ -162,21 +169,23 @@
 monitor.o dashboard.o archive.o megatest.o : db_records.scm megatest-fossil-hash.scm
 tests.o runs.o dashboard.o dashboard-tests.o dashboard-context-menu.o dashboard-main.o  : run_records.scm
 db.o ezsteps.o keys.o launch.o megatest.o monitor.o runs-for-ref.o runs.o tests.o : key_records.scm
-db.o api.o : mofiles/dbmod.o
+db.o api.o : mofiles/dbmod.o mofiles/dbfile.o mofiles/debugprint.o mofiles/commonmod.o
 tests.o tasks.o dashboard-tasks.o : task_records.scm
 runs.o : test_records.scm
 megatest.o : megatest-fossil-hash.scm megatest-version.scm
-rmt.scm client.scm common.scm configf.scm dashboard-guimonitor.scm dashboard-tests.scm dashboard.scm db.scm dcommon.scm ezsteps.scm fs-transport.scm http-transport.scm index-tree.scm items.scm keys.scm launch.scm megatest.scm monitor.scm mt.scm newdashboard.scm runconfig.scm runs.scm server.scm tdb.scm tests.scm tree.scm : common_records.scm megatest-version.scm
+rmt.scm client.scm common.scm configf.scm dashboard-guimonitor.scm dashboard-tests.scm dashboard.scm db.scm dcommon.scm ezsteps.scm http-transport.scm index-tree.scm items.scm keys.scm launch.scm megatest.scm monitor.scm mt.scm newdashboard.scm runconfig.scm runs.scm server.scm tdb.scm tests.scm tree.scm : common_records.scm megatest-version.scm
 common_records.scm : altdb.scm
+mofiles/dbfile.o : mofiles/commonmod.o
 # mofiles/stml2.o : mofiles/cookie.o
 # configf.o : mofiles/commonmod.o
 vg.o dashboard.o : vg_records.scm megatest-version.scm
@@ -293,10 +302,19 @@
 	chmod a+x $@
 $(PREFIX)/bin/mtrunner : utils/mtrunner
 	$(INSTALL) $< $@
 	chmod a+x $@
+$(PREFIX)/bin/ : utils/
+	$(INSTALL) $< $@
+	chmod a+x $@
+$(PREFIX)/bin/ : utils/
+	$(INSTALL) $< $@
+	chmod a+x $@
 deploytarg/nbfake : utils/nbfake
 	$(INSTALL) $< $@
 	chmod a+x $@
@@ -339,10 +357,11 @@
 install : $(PREFIX)/bin/.$(ARCHSTR) $(PREFIX)/bin/.$(ARCHSTR)/mtest $(PREFIX)/bin/megatest \
           $(PREFIX)/bin/.$(ARCHSTR)/dboard $(PREFIX)/bin/dashboard $(HELPERS) $(PREFIX)/bin/nbfake \
           $(PREFIX)/bin/.$(ARCHSTR)/mtexec $(PREFIX)/bin/mtexec $(PREFIX)/bin/serialize-env \
 	  $(PREFIX)/bin/nbfind $(PREFIX)/bin/mtrunner $(PREFIX)/bin/viewscreen $(PREFIX)/bin/mt_xterm \
+          $(PREFIX)/bin/ $(PREFIX)/bin/ \
 	  $(PREFIX)/share/docs/megatest_manual.html $(PREFIX)/bin/remrun \
 	  $(PREFIX)/share/docs/megatest_manual.html $(PREFIX)/bin/remrun $(PREFIX)/bin/mtutil \
 	  $(PREFIX)/bin/tcmt $(PREFIX)/share/db/mt-pg.sql \
           $(PREFIX)/share/js/jquery-3.1.0.slim.min.js \
           $(PREFIX)/bin/.$(ARCHSTR)/lib/ \
@@ -455,12 +474,12 @@
 	if csi -ne '(use postgresql)';then \
 	   echo "(use postgresql)(hash-table-set! *available-db* 'postgresql #t)" >> altdb.scm;\
-portlogger-example : portlogger-example.scm api.o archive.o client.o common.o configf.o dashboard-tests.o dashboard-context-menu.o db.o dcommon.o ezsteps.o filedb.o genexample.o gutils.o http-transport.o items.o keys.o launch.o lock-queue.o margs.o mt.o ods.o portlogger.o process.o rmt.o runconfig.o runs.o sdb.o server.o synchash.o tasks.o tdb.o tests.o tree.o
-	csc $(CSCOPTS) portlogger-example.scm api.o archive.o client.o common.o configf.o dashboard-tests.o dashboard-context-menu.o db.o dcommon.o ezsteps.o filedb.o genexample.o gutils.o http-transport.o items.o keys.o launch.o lock-queue.o margs.o mt.o ods.o portlogger.o process.o rmt.o runconfig.o runs.o sdb.o server.o synchash.o tasks.o tdb.o tests.o tree.o
+portlogger-example : portlogger-example.scm api.o archive.o client.o common.o configf.o dashboard-tests.o dashboard-context-menu.o db.o dcommon.o ezsteps.o genexample.o gutils.o http-transport.o items.o keys.o launch.o lock-queue.o margs.o mt.o ods.o portlogger.o process.o rmt.o runconfig.o runs.o server.o synchash.o tasks.o tdb.o tests.o tree.o
+	csc $(CSCOPTS) portlogger-example.scm api.o archive.o client.o common.o configf.o dashboard-tests.o dashboard-context-menu.o db.o dcommon.o ezsteps.o genexample.o gutils.o http-transport.o items.o keys.o launch.o lock-queue.o margs.o mt.o ods.o portlogger.o process.o rmt.o runconfig.o runs.o server.o synchash.o tasks.o tdb.o tests.o tree.o
 # create a pdf dot graphviz diagram from notations in rmt.scm
 rmt.pdf : rmt.scm
 	grep ';;DOT' rmt.scm | sed -e 's/.*;;DOT //' >;dot -Tpdf -o rmt.pdf

Index: api.scm
--- api.scm
+++ api.scm
@@ -24,13 +24,15 @@
 (declare (unit api))
 (declare (uses rmt))
 (declare (uses db))
 (declare (uses dbmod))
+(declare (uses dbfile))
 (declare (uses tasks))
 (import dbmod)
+(import dbfile)
 ;; allow these queries through without starting a server
 (define api:read-only-queries
@@ -94,12 +96,12 @@
 (define api:write-queries
     get-keys-write ;; dummy "write" query to force server start
     ;; SERVERS
-    start-server
-    kill-server
+    ;; start-server
+    ;; kill-server
     ;; TESTS
@@ -137,246 +139,264 @@
     ;; TASKS
+(define *db-write-mutexes* (make-hash-table))
 ;; These are called by the server on recipt of /api calls
 ;;    - keep it simple, only return the actual result of the call, i.e. no meta info here
 ;;    - returns #( flag result )
 (define (api:execute-requests dbstruct dat)
-  (handle-exceptions
-   exn
-   (let ((call-chain (get-call-chain)))
-     (debug:print 0 *default-log-port* "WARNING: api:execute-requests received an exception from peer, dat=" dat ", exn=" exn)
-     (print-call-chain (current-error-port))
-     (debug:print 0 *default-log-port* " message: "  ((condition-property-accessor 'exn 'message) exn))       
-     (vector #f (vector exn call-chain dat))) ;; return some stuff for debug if an exception happens
+  (db:open-no-sync-db) ;; sets *no-sync-db*
+;;   (handle-exceptions
+;;    exn
+;;    (let ((call-chain (get-call-chain)))
+;;      (debug:print 0 *default-log-port* "WARNING: api:execute-requests received an exception from peer, dat=" dat ", exn=" exn)
+;;      (print-call-chain (current-error-port))
+;;      (debug:print 0 *default-log-port* " message: "  ((condition-property-accessor 'exn 'message) exn))       
+  ;;      (vector #f (vector exn call-chain dat))) ;; return some stuff for debug if an exception happens
+  (if (> *api-process-request-count* 200)
+      (begin
+	(if (common:low-noise-print 30 "too many threads")
+	    (debug:print 0 *default-log-port* "WARNING: "*api-process-request-count*" threads, potential overload, adding 0.5 sec delay."))
+	(thread-sleep! 0.5) ;; take a nap
+	))
     ((not (vector? dat))                    ;; it is an error to not receive a vector
      (vector #f (vector #f "remote must be called with a vector")))
-    ((> *api-process-request-count* 20) ;; 20)
+    #;((> *api-process-request-count* 200) ;; 20)
      (debug:print 0 *default-log-port* "WARNING: api:execute-requests received an overloaded message.")
      (set! *server-overloaded* #t)
      (vector #f (vector #f 'overloaded))) ;; the inner vector is what gets returned. nope, don't know why. please refactor!
      (let* ((cmd-in            (vector-ref dat 0))
             (cmd               (if (symbol? cmd-in)
 				   (string->symbol cmd-in)))
             (params            (vector-ref dat 1))
+	    (run-id            (if (null? params)
+				   0
+				   (car params)))
+	    (write-mutex       (if (hash-table-exists? *db-write-mutexes* run-id)
+				   (hash-table-ref *db-write-mutexes* run-id)
+				   (let* ((newmutex (make-mutex)))
+				     (hash-table-set! *db-write-mutexes* run-id newmutex)
+				     newmutex)))
             (start-t           (current-milliseconds))
             (readonly-mode     (dbr:dbstruct-read-only dbstruct))
             (readonly-command  (member cmd api:read-only-queries))
-            (writecmd-in-readonly-mode (and readonly-mode (not readonly-command)))
-            #;(foo               (begin
-                                 (common:telemetry-log (conc "api-in:"(->string cmd))
-                                                       payload: `((params . ,params)))
-                                 #t))
-            (res    
-             (if writecmd-in-readonly-mode
-                 (conc "attempt to run write command "cmd" on a read-only database")
-                 (case cmd
-                   ;;===============================================
-                   ;; READ/WRITE QUERIES
-                   ;;===============================================
-                   ((get-keys-write)                        (db:get-keys dbstruct)) ;; force a dummy "write" query to force server; for debug in -repl
-                   ;; SERVERS
-                   ((start-server)                    (apply server:kind-run params))
-                   ((kill-server)                     (set! *server-run* #f))
-                   ;; TESTS
-                   ;;((test-set-state-status-by-id)     (apply mt:test-set-state-status-by-id dbstruct params))
-                   ;;BB - commented out above because it was calling below, eventually, incorrectly (dbstruct passed to mt:test-set-state-status-by-id, which previosly did more, but now only passes thru to db:set-state-status-and-roll-up-items.
-                   ((test-set-state-status-by-id)
-                    ;; (define (db:set-state-status-and-roll-up-items dbstruct run-id test-name item-path state status comment)
-                    (db:set-state-status-and-roll-up-items
-                     dbstruct
-                     (list-ref params 0) ; run-id
-                     (list-ref params 1) ; test-name
-                     #f                  ; item-path
-                     (list-ref params 2) ; state
-                     (list-ref params 3) ; status
-                     (list-ref params 4) ; comment
-                     ))
-                   ((delete-test-records)             (apply db:delete-test-records dbstruct params))
-                   ((delete-old-deleted-test-records) (apply db:delete-old-deleted-test-records dbstruct params))
-                   ((test-set-state-status)           (apply db:test-set-state-status dbstruct params))
-                   ((test-set-top-process-pid)        (apply db:test-set-top-process-pid dbstruct params))
-                   ((set-state-status-and-roll-up-items) (apply db:set-state-status-and-roll-up-items dbstruct params))
-                   ((set-state-status-and-roll-up-run) (apply db:set-state-status-and-roll-up-run dbstruct params)) 
-                   ((top-test-set-per-pf-counts)      (apply db:top-test-set-per-pf-counts dbstruct params))
-                   ((test-set-archive-block-id)       (apply db:test-set-archive-block-id dbstruct params))
-                   ;; RUNS
-                   ((register-run)                 (apply db:register-run dbstruct params))
-                   ((set-tests-state-status)       (apply db:set-tests-state-status dbstruct params))
-                   ((delete-run)                   (apply db:delete-run dbstruct params))
-                   ((lock/unlock-run)              (apply db:lock/unlock-run dbstruct params))
-                   ((update-run-event_time)        (apply db:update-run-event_time dbstruct params))
-                   ((update-run-stats)             (apply db:update-run-stats dbstruct params))
-                   ((set-var)                      (apply db:set-var dbstruct params))
-                   ((inc-var)                      (apply db:inc-var dbstruct params))
-		   ((dec-var)                      (apply db:dec-var dbstruct params))
-                   ((del-var)                      (apply db:del-var dbstruct params))
-		   ((add-var)                      (apply db:add-var dbstruct params))
-                   ;; STEPS
-                   ((teststep-set-status!)         (apply db:teststep-set-status! dbstruct params))
-                   ((delete-steps-for-test!)        (apply db:delete-steps-for-test! dbstruct params))
-                   ;; TEST DATA
-                   ((test-data-rollup)             (apply db:test-data-rollup dbstruct params))
-                   ((csv->test-data)               (apply db:csv->test-data dbstruct params))
-                   ;; MISC
-                   ((sync-inmem->db)               (let ((run-id (car params)))
-                                                     (db:sync-touched dbstruct run-id force-sync: #t)))
-                   ((mark-incomplete)              (apply db:find-and-mark-incomplete dbstruct params))
-                   ((create-all-triggers)          (db:create-all-triggers dbstruct))
-                   ((drop-all-triggers)            (db:drop-all-triggers dbstruct)) 
-                   ;; TESTMETA
-                   ((testmeta-add-record)       (apply db:testmeta-add-record dbstruct params))
-                   ((testmeta-update-field)     (apply db:testmeta-update-field dbstruct params))
-                   ((get-tests-tags)            (db:get-tests-tags dbstruct))
-                   ;; TASKS
-                   ((tasks-add)                 (apply tasks:add dbstruct params))   
-                   ((tasks-set-state-given-param-key) (apply tasks:set-state-given-param-key dbstruct params))
-                   ((tasks-get-last)            (apply tasks:get-last dbstruct params))
-		   ;; NO SYNC DB
-		   ((no-sync-set)               (apply db:no-sync-set         *no-sync-db* params))
-		   ((no-sync-get/default)       (apply db:no-sync-get/default *no-sync-db* params))
-		   ((no-sync-del!)              (apply db:no-sync-del!        *no-sync-db* params))
-		   ((no-sync-get-lock)          (apply db:no-sync-get-lock    *no-sync-db* params))
-                   ;; ARCHIVES
-                   ;; ((archive-get-allocations)   
-                   ((archive-register-disk)     (apply db:archive-register-disk dbstruct params))
-                   ((archive-register-block-name)(apply db:archive-register-block-name dbstruct params))
-                   ;; ((archive-allocate-testsuite/area-to-block)(apply db:archive-allocate-testsuite/area-to-block dbstruct block-id testsuite-name areakey))
-                   ;;======================================================================
-                   ;; READ ONLY QUERIES
-                   ;;======================================================================
-                   ;; KEYS
-                   ((get-key-val-pairs)               (apply db:get-key-val-pairs dbstruct params))
-                   ((get-keys)                        (db:get-keys dbstruct))
-                   ((get-key-vals)                    (apply db:get-key-vals dbstruct params))
-                   ((get-target)                      (apply db:get-target dbstruct params))
-                   ((get-targets)                     (db:get-targets dbstruct))
-                   ;; ARCHIVES
-                   ((test-get-archive-block-info)     (apply db:test-get-archive-block-info dbstruct params))
-                   ;; TESTS
-                   ((test-toplevel-num-items)         (apply db:test-toplevel-num-items dbstruct params))
-                   ((get-test-info-by-id)	       (apply db:get-test-info-by-id dbstruct params))
-                   ((test-get-rundir-from-test-id)    (apply db:test-get-rundir-from-test-id dbstruct params))
-                   ((get-count-tests-running-for-testname) (apply db:get-count-tests-running-for-testname dbstruct params))
-                   ((get-count-tests-running)         (apply db:get-count-tests-running dbstruct params))
-                   ((get-count-tests-running-in-jobgroup) (apply db:get-count-tests-running-in-jobgroup dbstruct params))
-                   ;; ((delete-test-step-records)        (apply db:delete-test-step-records dbstruct params))
-                   ;; ((get-previous-test-run-record)    (apply db:get-previous-test-run-record dbstruct params))
-                   ((get-matching-previous-test-run-records)(apply db:get-matching-previous-test-run-records dbstruct params))
-                   ((test-get-logfile-info)           (apply db:test-get-logfile-info dbstruct params))
-                   ((test-get-records-for-index-file)  (apply db:test-get-records-for-index-file dbstruct params))
-                   ((get-testinfo-state-status)       (apply db:get-testinfo-state-status dbstruct params))
-                   ((test-get-top-process-pid)        (apply db:test-get-top-process-pid dbstruct params))
-                   ((test-get-paths-matching-keynames-target-new) (apply db:test-get-paths-matching-keynames-target-new dbstruct params))
-                   ((get-prereqs-not-met)             (apply db:get-prereqs-not-met dbstruct params))
-                   ((get-count-tests-running-for-run-id) (apply db:get-count-tests-running-for-run-id dbstruct params))
-                   ((get-not-completed-cnt)           (apply db:get-not-completed-cnt  dbstruct params)) 
-                   ;; ((synchash-get)                    (apply synchash:server-get dbstruct params))
-                   ((get-raw-run-stats)               (apply db:get-raw-run-stats dbstruct params))
-		   ((get-test-times)                  (apply db:get-test-times dbstruct params))
-                   ;; RUNS
-                   ((get-run-info)                 (apply db:get-run-info dbstruct params))
-                   ((get-run-status)               (apply db:get-run-status dbstruct params))
-                   ((get-run-state)                (apply db:get-run-state dbstruct params))
-                   ((set-run-status)               (apply db:set-run-status dbstruct params))
-                   ((set-run-state-status)  			 (apply db:set-run-state-status dbstruct params))
-                   ((update-tesdata-on-repilcate-db) (apply db:update-tesdata-on-repilcate-db  dbstruct params)) 
-                   ((get-tests-for-run)            (apply db:get-tests-for-run dbstruct params))
-                   ((get-tests-for-run-state-status) (apply db:get-tests-for-run-state-status dbstruct params))
-                   ((get-test-id)                  (apply db:get-test-id dbstruct params))
-                   ((get-tests-for-run-mindata)    (apply db:get-tests-for-run-mindata dbstruct params))
-                   ;; ((get-tests-for-runs-mindata)   (apply db:get-tests-for-runs-mindata dbstruct params))
-                   ((get-runs)                     (apply db:get-runs dbstruct params))
-                   ((simple-get-runs)              (apply db:simple-get-runs dbstruct params))
-                   ((get-num-runs)                 (apply db:get-num-runs dbstruct params))
-                   ((get-runs-cnt-by-patt)         (apply db:get-runs-cnt-by-patt dbstruct params))
-                   ((get-all-run-ids)              (db:get-all-run-ids dbstruct))
-                   ((get-prev-run-ids)             (apply db:get-prev-run-ids dbstruct params))
-                   ((get-run-ids-matching-target)  (apply db:get-run-ids-matching-target dbstruct params))
-                   ((get-runs-by-patt)             (apply db:get-runs-by-patt dbstruct params))
-                   ((get-run-name-from-id)         (apply db:get-run-name-from-id dbstruct params))
-                   ((get-main-run-stats)           (apply db:get-main-run-stats dbstruct params))
-                   ((get-var)                      (apply db:get-var dbstruct params))
-                   ((get-run-stats)                (apply db:get-run-stats dbstruct params))
-                   ((get-run-times)                (apply db:get-run-times dbstruct params)) 
-                   ;; STEPS
-                   ((get-steps-data)               (apply db:get-steps-data dbstruct params))
-                   ((get-steps-for-test)           (apply db:get-steps-for-test dbstruct params))
-		   ((get-steps-info-by-id)         (apply db:get-steps-info-by-id dbstruct params))
-                   ;; TEST DATA
-                   ((read-test-data)               (apply db:read-test-data dbstruct params))
-                   ((read-test-data-varpatt)       (apply db:read-test-data-varpatt dbstruct params))
-                   ((get-data-info-by-id)          (apply db:get-data-info-by-id dbstruct params)) 
-                   ;; MISC
-                   ((get-latest-host-load)         (apply db:get-latest-host-load dbstruct params))
-                   ((have-incompletes?)            (apply db:have-incompletes? dbstruct params))
-                   ((login)                        (apply db:login dbstruct params))
-                   ((general-call)                 (let ((stmtname   (car params))
-                                                         (run-id     (cadr params))
-                                                         (realparams (cddr params)))
-                                                     (db:general-call dbstruct stmtname realparams)))
-                   ((sdb-qry)                      (apply sdb:qry params))
-                   ((ping)                         (current-process-id))
-		   ((get-changed-record-ids)       (apply db:get-changed-record-ids dbstruct params))
-		   ((get-run-record-ids) 	   (apply db:get-run-record-ids dbstruct params))	
-                   ;; TESTMETA
-                   ((testmeta-get-record)       (apply db:testmeta-get-record dbstruct params))
-                   ;; TASKS 
-                   ((find-task-queue-records)   (apply tasks:find-task-queue-records dbstruct params))
-		   (else
-		    (debug:print 0 *default-log-port* "ERROR: bad api call " cmd)
-		    (conc "ERROR: BAD api call " cmd))))))
-       ;; save all stats
-       (let ((delta-t (- (current-milliseconds)
-			 start-t)))
-	 (hash-table-set! *db-api-call-time* cmd
-			  (cons delta-t (hash-table-ref/default *db-api-call-time* cmd '()))))
-       (if writecmd-in-readonly-mode
-           (begin
-             #;(common:telemetry-log (conc "api-out:"(->string cmd))
-                                   payload: `((params . ,params)
-                                              (ok-res . #t)))
-	     (vector #f res))
-           (begin
-             #;(common:telemetry-log (conc "api-out:"(->string cmd))
-                                   payload: `((params . ,params)
-                                              (ok-res . #f)))
-             (vector #t res))))))))
+            (writecmd-in-readonly-mode (and readonly-mode (not readonly-command))))
+       (if (not readonly-command)
+	   (mutex-lock! write-mutex))
+       (let* ((res    
+               (if writecmd-in-readonly-mode
+                   (conc "attempt to run write command "cmd" on a read-only database")
+                   (case cmd
+                     ;;===============================================
+                     ;; READ/WRITE QUERIES
+                     ;;===============================================
+                     ((get-keys-write)                        (db:get-keys dbstruct)) ;; force a dummy "write" query to force server; for debug in -repl
+                     ;; SERVERS
+                     ((start-server)                    (apply server:kind-run params))
+                     ((kill-server)                     (set! *server-run* #f))
+                     ;; TESTS
+                     ;;((test-set-state-status-by-id)     (apply mt:test-set-state-status-by-id dbstruct params))
+                     ;;BB - commented out above because it was calling below, eventually, incorrectly (dbstruct passed to mt:test-set-state-status-by-id, which previosly did more, but now only passes thru to db:set-state-status-and-roll-up-items.
+                     ((test-set-state-status-by-id)
+                      ;; (define (db:set-state-status-and-roll-up-items dbstruct run-id test-name item-path state status comment)
+                      (db:set-state-status-and-roll-up-items
+                       dbstruct
+                       (list-ref params 0) ; run-id
+                       (list-ref params 1) ; test-name
+                       #f                  ; item-path
+                       (list-ref params 2) ; state
+                       (list-ref params 3) ; status
+                       (list-ref params 4) ; comment
+                       ))
+                     ((delete-test-records)             (apply db:delete-test-records dbstruct params))
+                     ((delete-old-deleted-test-records) (apply db:delete-old-deleted-test-records dbstruct params))
+                     ((test-set-state-status)           (apply db:test-set-state-status dbstruct params))
+                     ((test-set-top-process-pid)        (apply db:test-set-top-process-pid dbstruct params))
+                     ((set-state-status-and-roll-up-items) (apply db:set-state-status-and-roll-up-items dbstruct params))
+                     ((set-state-status-and-roll-up-run) (apply db:set-state-status-and-roll-up-run dbstruct params)) 
+                     ((top-test-set-per-pf-counts)      (apply db:top-test-set-per-pf-counts dbstruct params))
+                     ((test-set-archive-block-id)       (apply db:test-set-archive-block-id dbstruct params))
+                     ;; RUNS
+                     ((register-run)                 (apply db:register-run dbstruct params))
+                     ((set-tests-state-status)       (apply db:set-tests-state-status dbstruct params))
+                     ((delete-run)                   (apply db:delete-run dbstruct params))
+                     ((lock/unlock-run)              (apply db:lock/unlock-run dbstruct params))
+                     ((update-run-event_time)        (apply db:update-run-event_time dbstruct params))
+                     ((update-run-stats)             (apply db:update-run-stats dbstruct params))
+                     ((set-var)                      (apply db:set-var dbstruct params))
+                     ((inc-var)                      (apply db:inc-var dbstruct params))
+		     ((dec-var)                      (apply db:dec-var dbstruct params))
+                     ((del-var)                      (apply db:del-var dbstruct params))
+		     ((add-var)                      (apply db:add-var dbstruct params))
+                     ;; STEPS
+                     ((teststep-set-status!)         (apply db:teststep-set-status! dbstruct params))
+                     ((delete-steps-for-test!)       (apply db:delete-steps-for-test! dbstruct params))
+                     ;; TEST DATA
+                     ((test-data-rollup)             (apply db:test-data-rollup dbstruct params))
+                     ((csv->test-data)               (apply db:csv->test-data dbstruct params))
+                     ;; MISC
+                     ((sync-inmem->db)               (let ((run-id (car params)))
+                                                       (db:sync-touched dbstruct run-id db:initialize-main-db force-sync: #t)))
+                     ((mark-incomplete)              (apply db:find-and-mark-incomplete dbstruct params))
+                     ((create-all-triggers)          (db:create-all-triggers dbstruct))
+                     ((drop-all-triggers)            (db:drop-all-triggers dbstruct)) 
+                     ;; TESTMETA
+                     ((testmeta-add-record)       (apply db:testmeta-add-record dbstruct params))
+                     ((testmeta-update-field)     (apply db:testmeta-update-field dbstruct params))
+                     ((get-tests-tags)            (db:get-tests-tags dbstruct))
+                     ;; TASKS
+                     ((tasks-add)                 (apply tasks:add dbstruct params))   
+                     ((tasks-set-state-given-param-key) (apply tasks:set-state-given-param-key dbstruct params))
+                     ((tasks-get-last)            (apply tasks:get-last dbstruct params))
+		     ;; NO SYNC DB
+		     ((no-sync-set)               (apply db:no-sync-set         *no-sync-db* params))
+		     ((no-sync-get/default)       (apply db:no-sync-get/default *no-sync-db* params))
+		     ((no-sync-del!)              (apply db:no-sync-del!        *no-sync-db* params))
+		     ((no-sync-get-lock)          (apply db:no-sync-get-lock    *no-sync-db* params))
+                     ;; ARCHIVES
+                     ;; ((archive-get-allocations)   
+                     ((archive-register-disk)     (apply db:archive-register-disk dbstruct params))
+                     ((archive-register-block-name)(apply db:archive-register-block-name dbstruct params))
+                     ;; ((archive-allocate-testsuite/area-to-block)(apply db:archive-allocate-testsuite/area-to-block dbstruct block-id testsuite-name areakey))
+                     ;;======================================================================
+                     ;; READ ONLY QUERIES
+                     ;;======================================================================
+                     ;; KEYS
+                     ((get-key-val-pairs)               (apply db:get-key-val-pairs dbstruct params))
+                     ((get-keys)                        (db:get-keys dbstruct))
+                     ((get-key-vals)                    (apply db:get-key-vals dbstruct params))
+                     ((get-target)                      (apply db:get-target dbstruct params))
+                     ((get-targets)                     (db:get-targets dbstruct))
+                     ;; ARCHIVES
+                     ((test-get-archive-block-info)     (apply db:test-get-archive-block-info dbstruct params))
+                     ;; TESTS
+                     ((test-toplevel-num-items)         (apply db:test-toplevel-num-items dbstruct params))
+                     ((get-test-info-by-id)	       (apply db:get-test-info-by-id dbstruct params))
+                     ((test-get-rundir-from-test-id)    (apply db:test-get-rundir-from-test-id dbstruct params))
+                     ((get-count-tests-running-for-testname) (apply db:get-count-tests-running-for-testname dbstruct params))
+                     ((get-count-tests-running)         (apply db:get-count-tests-running dbstruct params))
+                     ((get-count-tests-running-in-jobgroup) (apply db:get-count-tests-running-in-jobgroup dbstruct params))
+                     ;; ((delete-test-step-records)        (apply db:delete-test-step-records dbstruct params))
+                     ;; ((get-previous-test-run-record)    (apply db:get-previous-test-run-record dbstruct params))
+                     ((get-matching-previous-test-run-records)(apply db:get-matching-previous-test-run-records dbstruct params))
+                     ((test-get-logfile-info)           (apply db:test-get-logfile-info dbstruct params))
+                     ((test-get-records-for-index-file)  (apply db:test-get-records-for-index-file dbstruct params))
+                     ((get-testinfo-state-status)       (apply db:get-testinfo-state-status dbstruct params))
+                     ((test-get-top-process-pid)        (apply db:test-get-top-process-pid dbstruct params))
+                     ((test-get-paths-matching-keynames-target-new) (apply db:test-get-paths-matching-keynames-target-new dbstruct params))
+                     ((get-prereqs-not-met)             (apply db:get-prereqs-not-met dbstruct params))
+                     ((get-count-tests-running-for-run-id) (apply db:get-count-tests-running-for-run-id dbstruct params))
+                     ((get-not-completed-cnt)           (apply db:get-not-completed-cnt  dbstruct params)) 
+                     ;; ((synchash-get)                    (apply synchash:server-get dbstruct params))
+                     ((get-raw-run-stats)               (apply db:get-raw-run-stats dbstruct params))
+		     ((get-test-times)                  (apply db:get-test-times dbstruct params))
+                     ;; RUNS
+                     ((get-run-info)                 (apply db:get-run-info dbstruct params))
+                     ((get-run-status)               (apply db:get-run-status dbstruct params))
+                     ((get-run-state)                (apply db:get-run-state dbstruct params))
+                     ((set-run-status)               (apply db:set-run-status dbstruct params))
+                     ((set-run-state-status)  			 (apply db:set-run-state-status dbstruct params))
+                     ((update-tesdata-on-repilcate-db) (apply db:update-tesdata-on-repilcate-db  dbstruct params)) 
+                     ((get-tests-for-run)            (apply db:get-tests-for-run dbstruct params))
+                     ((get-tests-for-run-state-status) (apply db:get-tests-for-run-state-status dbstruct params))
+                     ((get-test-id)                  (apply db:get-test-id dbstruct params))
+                     ((get-tests-for-run-mindata)    (apply db:get-tests-for-run-mindata dbstruct params))
+                     ;; ((get-tests-for-runs-mindata)   (apply db:get-tests-for-runs-mindata dbstruct params))
+                     ((get-runs)                     (apply db:get-runs dbstruct params))
+                     ((simple-get-runs)              (apply db:simple-get-runs dbstruct params))
+                     ((get-num-runs)                 (apply db:get-num-runs dbstruct params))
+                     ((get-runs-cnt-by-patt)         (apply db:get-runs-cnt-by-patt dbstruct params))
+                     ((get-all-run-ids)              (db:get-all-run-ids dbstruct))
+                     ((get-prev-run-ids)             (apply db:get-prev-run-ids dbstruct params))
+                     ((get-run-ids-matching-target)  (apply db:get-run-ids-matching-target dbstruct params))
+                     ((get-runs-by-patt)             (apply db:get-runs-by-patt dbstruct params))
+                     ((get-run-name-from-id)         (apply db:get-run-name-from-id dbstruct params))
+                     ((get-main-run-stats)           (apply db:get-main-run-stats dbstruct params))
+                     ((get-var)                      (apply db:get-var dbstruct params))
+                     ((get-run-stats)                (apply db:get-run-stats dbstruct params))
+                     ((get-run-times)                (apply db:get-run-times dbstruct params)) 
+                     ;; STEPS
+                     ((get-steps-data)               (apply db:get-steps-data dbstruct params))
+                     ((get-steps-for-test)           (apply db:get-steps-for-test dbstruct params))
+		     ((get-steps-info-by-id)         (apply db:get-steps-info-by-id dbstruct params))
+                     ;; TEST DATA
+                     ((read-test-data)               (apply db:read-test-data dbstruct params))
+                     ((read-test-data-varpatt)       (apply db:read-test-data-varpatt dbstruct params))
+                     ((get-data-info-by-id)          (apply db:get-data-info-by-id dbstruct params)) 
+                     ;; MISC
+                     ((get-latest-host-load)         (apply db:get-latest-host-load dbstruct params))
+                     ((have-incompletes?)            (apply db:have-incompletes? dbstruct params))
+                     ((login)                        (apply db:login dbstruct params))
+                     ((general-call)                 (let ((stmtname   (car params))
+                                                           (run-id     (cadr params))
+                                                           (realparams (cddr params)))
+                                                       (db:general-call dbstruct run-id stmtname realparams)))
+                     ((sdb-qry)                      (apply sdb:qry params))
+                     ((ping)                         (current-process-id))
+		     ((get-changed-record-ids)       (apply db:get-changed-record-ids dbstruct params))
+		     ((get-run-record-ids) 	   (apply db:get-run-record-ids dbstruct params))	
+                     ;; TESTMETA
+                     ((testmeta-get-record)       (apply db:testmeta-get-record dbstruct params))
+                     ;; TASKS 
+                     ((find-task-queue-records)   (apply tasks:find-task-queue-records dbstruct params))
+		     (else
+		      (debug:print 0 *default-log-port* "ERROR: bad api call " cmd)
+		      (conc "ERROR: BAD api call " cmd))))))
+	 (if (not readonly-command)
+	     (mutex-unlock! write-mutex))
+	 ;; save all stats
+	 (let ((delta-t (- (current-milliseconds)
+			   start-t))
+	       (modified-cmd (if (eq? cmd 'general-call)
+				 (string->symbol (conc "general-call-" (car params)))
+				 cmd)))
+	   (hash-table-set! *db-api-call-time* modified-cmd
+			    (cons delta-t (hash-table-ref/default *db-api-call-time* modified-cmd '()))))
+	 (if writecmd-in-readonly-mode
+             (begin
+               #;(common:telemetry-log (conc "api-out:"(->string cmd))
+               payload: `((params . ,params)
+               (ok-res . #t)))
+	       (vector #f res))
+             (begin
+               #;(common:telemetry-log (conc "api-out:"(->string cmd))
+               payload: `((params . ,params)
+               (ok-res . #f)))
+               (vector #t res))))))))
 ;; http-server  send-response
 ;;                 api:process-request
 ;;                    db:*

Index: archive.scm
--- archive.scm
+++ archive.scm
@@ -397,11 +397,11 @@
          (bup-restore-params  (list "-d" archive-path "restore" "-C" *toppath* archive-internal-path)))
 		 (debug:print-info 2 *default-log-port* "Restoring archived data to " *toppath* " from archive in " archive-path " ... " archive-internal-path)
 		 (run-n-wait bup-exe params: bup-restore-params print-cmd: "Running:"))
       (sleep 2)
-       (db:setup #f)
+       (db:setup #t) ;; (db:setup-db *dbstruct-dbs* *toppath* #f)

ADDED   attic/filedb.scm
Index: attic/filedb.scm
--- /dev/null
+++ attic/filedb.scm
@@ -0,0 +1,255 @@
+;; Copyright 2006-2011, Matthew Welland.
+;; This file is part of Megatest.
+;;     Megatest is free software: you can redistribute it and/or modify
+;;     it under the terms of the GNU General Public License as published by
+;;     the Free Software Foundation, either version 3 of the License, or
+;;     (at your option) any later version.
+;;     Megatest is distributed in the hope that it will be useful,
+;;     but WITHOUT ANY WARRANTY; without even the implied warranty of
+;;     GNU General Public License for more details.
+;;     You should have received a copy of the GNU General Public License
+;;     along with Megatest.  If not, see <>.
+;; (require-extension synch sqlite3 posix srfi-13 srfi-1 utils regex)
+(use sqlite3 srfi-1 posix regex srfi-69 srfi-13 posix-extras)
+(import (prefix sqlite3 sqlite3:))
+(declare (unit filedb))
+(include "fdb_records.scm")
+;; (include "settings.scm")
+(define (filedb:open-db dbpath)
+  (let* ((fdb      (make-filedb:fdb))
+	 (dbexists (common:file-exists? dbpath))
+	 (db (sqlite3:open-database dbpath)))
+    (filedb:fdb-set-db!        fdb db)
+    (filedb:fdb-set-dbpath!    fdb dbpath)
+    (filedb:fdb-set-pathcache! fdb (make-hash-table))
+    (filedb:fdb-set-idcache!   fdb (make-hash-table))
+    (filedb:fdb-set-partcache! fdb (make-hash-table))
+    (sqlite3:set-busy-handler!  db (make-busy-timeout 136000))
+    (if (not dbexists)
+	(begin
+	  (sqlite3:execute db "PRAGMA synchronous = OFF;")
+	  (sqlite3:execute db "CREATE TABLE names (id INTEGER PRIMARY KEY,name TEST);") ;; for future use - change path in paths table to path_id
+	  (sqlite3:execute db "CREATE INDEX name_index ON names (name);")
+	  ;; NB// We store a useful subset of file attributes but do not attempt to store all
+	  (sqlite3:execute db "CREATE TABLE paths (id        INTEGER PRIMARY KEY,
+                                                   path      TEXT,
+                                                   parent_id INTEGER,
+                                                   mode      INTEGER DEFAULT -1,
+                                                   uid       INTEGER DEFAULT -1,
+                                                   gid       INTEGER DEFAULT -1,
+                                                   size      INTEGER DEFAULT -1,
+                                                   mtime     INTEGER DEFAULT -1);")
+	  (sqlite3:execute db "CREATE INDEX path_index ON paths (path,parent_id);")
+	  (sqlite3:execute db "CREATE TABLE bases (id INTEGER PRIMARY KEY,base TEXT,                  updated TIMESTAMP);")))
+    ;; close the sqlite3 db and open it as needed
+    (filedb:finalize-db! fdb)
+    (filedb:fdb-set-db! fdb #f)
+    fdb))
+(define (filedb:reopen-db fdb)
+  (let ((db (sqlite3:open-database (filedb:fdb-get-dbpath fdb))))
+    (filedb:fdb-set-db! fdb db)
+    (sqlite3:set-busy-handler!  db (make-busy-timeout 136000))))
+(define (filedb:finalize-db! fdb)
+  (sqlite3:finalize! (filedb:fdb-get-db fdb)))
+(define (filedb:get-current-time-string)
+  (string-chomp (time->string (seconds->local-time (current-seconds)))))
+(define (filedb:get-base-id db path)
+  (let ((stmt   (sqlite3:prepare db "SELECT id FROM bases WHERE base=?;"))
+        (id-num #f))
+    (sqlite3:for-each-row 
+     (lambda (num) (set! id-num num)) stmt path)
+    (sqlite3:finalize! stmt)
+    id-num))
+(define (filedb:get-path-id db path parent)
+  (let ((stmt   (sqlite3:prepare db "SELECT id FROM paths WHERE path=? AND parent_id=?;"))
+        (id-num #f))
+    (sqlite3:for-each-row 
+     (lambda (num) (set! id-num num)) stmt path parent)
+    (sqlite3:finalize! stmt)
+    id-num))
+(define (filedb:add-base db path)
+  (let ((existing (filedb:get-base-id db path)))
+    (if existing #f
+        (begin
+          (sqlite3:execute db "INSERT INTO bases (base,updated) VALUES (?,?);" path (filedb:get-current-time-string))))))
+;; index 	value 	field 	notes
+;; 0 	inode number 	st_ino 	
+;; 1 	mode 	st_mode 	bitfield combining file permissions and file type
+;; 2 	number of hard links 	st_nlink 	
+;; 3 	UID of owner 	st_uid 	as with file-owner
+;; 4 	GID of owner 	st_gid 	
+;; 5 	size 	st_size 	as with file-size
+;; 6 	access time 	st_atime 	as with file-access-time
+;; 7 	change time 	st_ctime 	as with file-change-time
+;; 8 	modification time 	st_mtime 	as with file-modification-time
+;; 9 	parent device ID 	st_dev 	ID of device on which this file resides
+;; 10 	device ID 	st_rdev 	device ID for special files (i.e. the raw major/minor number)
+;; 11 	block size 	st_blksize 	
+;; 12 	number of blocks allocated 	st_blocks 	
+(define (filedb:add-path-stat db path parent statinfo)
+  (let ((stmt (sqlite3:prepare db "INSERT INTO paths (path,parent_id,mode,uid,gid,size,mtime) VALUES (?,?,?,?,?,?,?);")))
+	(sqlite3:execute stmt
+			 path
+			 parent
+			 (vector-ref statinfo 1) ;; mode
+			 (vector-ref statinfo 3) ;; uid
+			 (vector-ref statinfo 4) ;; gid
+			 (vector-ref statinfo 5) ;; size
+			 (vector-ref statinfo 8) ;; mtime
+			 )
+	(sqlite3:finalize! stmt))) ;;  (filedb:get-current-time-string))))
+(define (filedb:add-path db path parent)
+  (let ((stmt (sqlite3:prepare db "INSERT INTO paths (path,parent_id) VALUES (?,?);")))
+    (sqlite3:execute stmt path parent)
+    (sqlite3:finalize! stmt)))
+(define (filedb:register-path fdb path #!key (save-stat #f))
+  (let* ((db        (filedb:fdb-get-db        fdb))
+	 (pathcache (filedb:fdb-get-pathcache fdb))
+	 (stat      (if save-stat (file-stat path #t)))
+	 (id        (hash-table-ref/default pathcache path #f)))
+    (if (not db)(filedb:reopen-db fdb))
+    (if id id 
+        (let ((plist (string-split path "/")))
+          (let loop ((head (car plist))
+                     (tail (cdr plist))
+                     (parent 0))
+            (let ((id (filedb:get-path-id db head parent))
+                  (done (null? tail)))
+              (if id          ;; we'll have a id if the path is already registered
+                  (if done 
+                      (begin
+                        (hash-table-set! pathcache path id)
+                        id) ;; return the last path id for a result
+                      (loop (car tail)(cdr tail) id))
+                  (begin      ;; add the path and then repeat the loop with the same data
+		    (if save-stat
+			(filedb:add-path-stat db head parent stat) 
+			(filedb:add-path db head parent))
+                    (loop head tail parent)))))))))
+(define (filedb:update-recursively fdb path #!key (save-stat #f))
+  (let ((p  (open-input-pipe (string-append "find -L " path)))) ;; (resolve-pathname path)))) ;; (string-append "find " path))))
+    (print "processed 0 files...")
+    (let loop ((l  (read-line p))
+               (lc 0)) ;; line count
+      (if (eof-object? l) 
+	  (begin
+	    (print "          " lc " files")
+	    (close-input-port p))
+          (begin
+            (filedb:register-path fdb l save-stat: save-stat) ;; (get-real-path l)) ;; don't like losing the original path info
+            (if (= (modulo lc 100) 0)
+                (print "          " lc " files"))
+            (loop (read-line p)(+ lc 1)))))))
+(define (filedb:update fdb path #!key (save-stat #f))
+  ;; first get the realpath and add it to the bases table
+  (let ((real-path path) ;; (filedb:get-real-path path))
+	(db        (filedb:fdb-get-db    fdb)))
+    (filedb:add-base db real-path)
+    (filedb:update-recursively fdb path save-stat: save-stat)))
+;; not used and broken
+(define (filedb:get-real-path path)
+  (let* ((p (open-input-pipe (string-append real-path " " (regexp-escape path))))
+         (pth (read-line p)))
+    (if (eof-object? pth) path
+	(begin
+	  (close-input-port p)
+	  pth))))
+(define (filedb:drop-base fdb path)
+  (print "Sorry, I don't do anything yet"))
+(define (filedb:find-all fdb pattern action)
+  (let* ((db     (filedb:fdb-get-db fdb))
+	 (stmt   (sqlite3:prepare db "SELECT id FROM paths WHERE path like ?;"))
+	 (result '()))
+    (sqlite3:for-each-row 
+     (lambda (num)
+       (action num)
+       (set! result (cons num result))) stmt pattern)
+    (sqlite3:finalize! stmt)
+    result))
+(define (filedb:get-path-record fdb id)
+  (let* ((db        (filedb:fdb-get-db        fdb))
+	 (partcache (filedb:fdb-get-partcache fdb))
+	 (dat (hash-table-ref/default partcache id #f)))
+    (if dat dat
+	(let ((stmt (sqlite3:prepare db "SELECT path,parent_id FROM paths WHERE id=?;"))
+	      (result #f))
+	  (sqlite3:for-each-row 
+	   (lambda (path parent_id)(set! result (list path parent_id))) stmt id)
+	  (hash-table-set! partcache id result)
+	  (sqlite3:finalize! stmt)
+	  result))))
+(define (filedb:get-children fdb parent-id)
+  (let* ((db        (filedb:fdb-get-db fdb))
+	 (res       '()))
+    (sqlite3:for-each-row
+     (lambda (id path parent-id)
+       (set! res (cons (vector id path parent-id) res)))
+     db "SELECT id,path,parent_id FROM paths WHERE parent_id=?;"
+     parent-id)
+    res))
+;; retrieve all that have children and those without
+;; children that match patt
+(define (filedb:get-children-patt fdb parent-id search-patt)
+  (let* ((db        (filedb:fdb-get-db fdb))
+	 (res       '()))
+    ;; first get the children that have no children
+    (sqlite3:for-each-row
+     (lambda (id path parent-id)
+       (set! res (cons (vector id path parent-id) res)))
+     db "SELECT id,path,parent_id FROM paths WHERE parent_id=? AND 
+            (id IN (SELECT parent_id FROM paths) OR path LIKE ?);"
+     parent-id search-patt)
+    res))
+(define (filedb:get-path fdb id)
+  (let* ((db      (filedb:fdb-get-db      fdb))
+	 (idcache (filedb:fdb-get-idcache fdb))
+	 (path    (hash-table-ref/default idcache id #f)))
+    (if (not db)(filedb:reopen-db fdb))
+    (if path path
+        (let loop ((curr-id id)
+                   (path    ""))
+          (let ((path-record (filedb:get-path-record fdb curr-id)))
+            (if (not path-record) #f ;; this id has no path
+                (let* ((parent-id (list-ref path-record 1))
+                       (pname     (list-ref path-record 0))
+                       (newpath   (string-append  "/" pname path)))
+                  (if (= parent-id 0) ;; fields 0=path, 1=parent. root parent=0
+                      (begin
+                        (hash-table-set! idcache id newpath)
+                        newpath)
+                      (loop parent-id newpath)))))))))
+(define (filedb:search db pattern)
+  (let ((action (lambda (id)(print (filedb:get-path db id)))))
+    (filedb:find-all db pattern action)))

ADDED   attic/ftail.scm
Index: attic/ftail.scm
--- /dev/null
+++ attic/ftail.scm
@@ -0,0 +1,108 @@
+;; Copyright 2017, Matthew Welland.
+;; This file is part of Megatest.
+;;     Megatest is free software: you can redistribute it and/or modify
+;;     it under the terms of the GNU General Public License as published by
+;;     the Free Software Foundation, either version 3 of the License, or
+;;     (at your option) any later version.
+;;     Megatest is distributed in the hope that it will be useful,
+;;     but WITHOUT ANY WARRANTY; without even the implied warranty of
+;;     GNU General Public License for more details.
+;;     You should have received a copy of the GNU General Public License
+;;     along with Megatest.  If not, see <>.
+(declare (unit ftail))
+(module ftail
+    (
+     open-tail-db
+     tail-write
+     tail-get-fid
+     file-tail
+     )
+(import scheme chicken data-structures extras)
+(use (prefix sqlite3 sqlite3:) posix typed-records)
+(define (open-tail-db )
+  (let* ((basedir   (create-directory (conc "/tmp/" (current-user-name))))
+	 (dbpath    (conc basedir "/megatest_logs.db"))
+	 (dbexists  (file-exists? dbpath))
+	 (db        (sqlite3:open-database dbpath))
+	 (handler   (sqlite3:make-busy-timeout 136000)))
+    (sqlite3:set-busy-handler! db handler)
+    (sqlite3:execute db "PRAGMA synchronous = 0;")
+    (if (not dbexists)
+	(begin
+	  (sqlite3:execute db "CREATE TABLE IF NOT EXISTS log_files (id INTEGER PRIMARY KEY,filename TEXT,event_time TIMESTAMP DEFAULT (strftime('%s','now')));")
+	  (sqlite3:execute db "CREATE TABLE IF NOT EXISTS log_data  (id INTEGER PRIMARY KEY,fid INTEGER,line TEXT,event_time TIMESTAMP DEFAULT (strftime('%s','now')));")
+	  ))
+    db))
+(define (tail-write db fid lines)
+  (sqlite3:with-transaction
+   db
+   (lambda ()
+     (for-each
+      (lambda (line)
+	(sqlite3:execute db "INSERT INTO log_data (fid,line) VALUES (?,?);" fid line))
+      lines))))
+(define (tail-get-fid db fname)
+  (let ((fid   (handle-exceptions
+		   exn
+		   #f
+		 (sqlite3:first-result db "SELECT id FROM log_files WHERE filename=?;" fname))))
+    (if fid
+	fid
+	(begin
+	  (sqlite3:execute db "INSERT INTO log_files (filename) VALUES (?);" fname)
+	  (tail-get-fid db fname)))))
+(define (file-tail fname #!key (db-in #f))
+  (let* ((inp (open-input-file fname))
+	 (db  (or db-in (open-tail-db)))
+	 (fid (tail-get-fid db fname)))
+    (let loop ((inl    (read-line inp))
+	       (lines '())
+	       (lastwr (current-seconds)))
+      (if (eof-object? inl)
+	  (let ((timed-out (> (- (current-seconds) lastwr) 60)))
+	    (if timed-out (tail-write db fid (reverse lines)))
+	    (sleep 1)
+	    (if timed-out
+		(loop (read-line inp) '() (current-seconds))
+		(loop (read-line inp) lines lastwr)))
+	  (let* ((savelines (> (length lines) 19)))
+	    ;; (print inl)
+	    (if savelines (tail-write db fid (reverse lines)))
+	    (loop (read-line inp)
+		  (if savelines
+		      '()
+		      (cons inl lines))
+		  (if savelines
+		      (current-seconds)
+		      lastwr)))))))
+;; offset -20 means get last 20 lines
+(define (tail-get-lines db fid offset count)
+  (if (> offset 0)
+      (sqlite3:map-row (lambda (id line)
+		 (vector id line))
+	       db
+	       "SELECT id,line FROM log_data WHERE fid=? OFFSET ? LIMIT ?;" fid offset count)
+      (reverse ;; get N from the end
+       (sqlite3:map-row (lambda (id line)
+		  (vector id line))
+		db
+		"SELECT id,line FROM log_data WHERE fid=? ORDER BY id DESC LIMIT ?;" fid (abs offset)))))

ADDED   attic/records-vs-vectors-vs-coops.scm
Index: attic/records-vs-vectors-vs-coops.scm
--- /dev/null
+++ attic/records-vs-vectors-vs-coops.scm
@@ -0,0 +1,110 @@
+;;  Copyright 2006-2017, Matthew Welland.
+;; This file is part of Megatest.
+;;     Megatest is free software: you can redistribute it and/or modify
+;;     it under the terms of the GNU General Public License as published by
+;;     the Free Software Foundation, either version 3 of the License, or
+;;     (at your option) any later version.
+;;     Megatest is distributed in the hope that it will be useful,
+;;     but WITHOUT ANY WARRANTY; without even the implied warranty of
+;;     GNU General Public License for more details.
+;;     You should have received a copy of the GNU General Public License
+;;     along with Megatest.  If not, see <>.
+;; (include "vg.scm")
+;; (declare (uses vg))
+(use foof-loop defstruct coops)
+(defstruct obj     type fill-color angle)
+(define (make-vg:obj)(make-vector 3))
+(define-inline (vg:obj-get-type         vec)    (vector-ref  vec 0))
+(define-inline (vg:obj-get-fill-color   vec)    (vector-ref  vec 1))
+(define-inline (vg:obj-get-angle        vec)    (vector-ref  vec 2))
+(define-inline (vg:obj-set-type!        vec val)(vector-set! vec 0 val))
+(define-inline (vg:obj-set-fill-color!  vec val)(vector-set! vec 1 val))
+(define-inline (vg:obj-set-angle!       vec val)(vector-set! vec 2 val))
+(use simple-exceptions)
+(define vgs:obj-exn (make-exception "wrong record type, expected vgs:obj." 'assert))
+(define (make-vgs:obj)(let ((v (make-vector 4)))(vector-set! v 0 'vgs:obj) v))
+(define-inline (vgs:obj-type             vec)(if (eq? (vector-ref vec 0) 'vgs:obj)(vector-ref  vec 1)(raise (vgs:obj-exn 'vgs:obj-type 'xpr))))
+(define-inline (vgs:obj-fill-color       vec)(if (eq? (vector-ref vec 0) 'vgs:obj)(vector-ref  vec 2)(raise (vgs:obj-exn 'vgs:obj-fill-color 'xpr))))
+(define-inline (vgs:obj-angle            vec)(if (eq? (vector-ref vec 0) 'vgs:obj)(vector-ref  vec 3)(raise (vgs:obj-exn 'vgs:obj-angle 'xpr))))
+(define-inline (vgs:obj-type-set!        vec val)(if (eq? (vector-ref vec 0) 'vgs:obj)(vector-set! vec 1 val)(raise (vgs:obj-exn 'type))))
+(define-inline (vgs:obj-fill-color-set!  vec val)(if (eq? (vector-ref vec 0) 'vgs:obj)(vector-set! vec 2 val)(raise (vgs:obj-exn 'fill-color))))
+(define-inline (vgs:obj-angle-set!       vec val)(if (eq? (vector-ref vec 0) 'vgs:obj)(vector-set! vec 3 val)(raise (vgs:obj-exn 'angle))))
+(define-class <vgc> ()
+  ((type)
+   (fill-color)
+   (angle)))
+;; first use raw vectors
+(print "Using vectors")
+ (loop ((for r (up-from 0 (to 255))))
+       (loop ((for g (up-from 0 (to 255))))
+	     (loop ((for b (up-from 0 (to 255))))
+		   (let ((obj (make-vg:obj)))
+		     (vg:obj-set-type! obj 'abc)
+		     (vg:obj-set-fill-color! obj "green")
+		     (vg:obj-set-angle! obj 135)
+		     (let ((a (vg:obj-get-type obj))
+			   (b (vg:obj-get-fill-color obj))
+			   (c (vg:obj-get-angle obj)))
+		       obj))))))
+;; first use raw vectors with safe mode
+(print "Using vectors (safe mode)")
+ (loop ((for r (up-from 0 (to 255))))
+       (loop ((for g (up-from 0 (to 255))))
+	     (loop ((for b (up-from 0 (to 255))))
+		   (let ((obj (make-vgs:obj)))
+		     ;; (badobj (make-vector 20)))
+		     (vgs:obj-type-set! obj 'abc)
+		     (vgs:obj-fill-color-set! obj "green")
+		     (vgs:obj-angle-set! obj 135)
+		     (let ((a (vgs:obj-type obj))
+			   (b (vgs:obj-fill-color obj))
+			   (c (vgs:obj-angle obj)))
+		       obj))))))
+;; first use defstruct
+(print "Using defstruct")
+ (loop ((for r (up-from 0 (to 255))))
+       (loop ((for g (up-from 0 (to 255))))
+	     (loop ((for b (up-from 0 (to 255))))
+		   (let ((obj (make-obj)))
+		     (obj-type-set! obj 'abc)
+		     (obj-fill-color-set! obj "green")
+		     (obj-angle-set! obj 135)
+		     (let ((a (obj-type obj))
+			   (b (obj-fill-color obj))
+			   (c (obj-angle obj)))
+		       obj))))))
+;; first use defstruct
+(print "Using coops")
+ (loop ((for r (up-from 0 (to 255))))
+       (loop ((for g (up-from 0 (to 255))))
+	     (loop ((for b (up-from 0 (to 255))))
+		   (let ((obj (make <vgc>)))
+		     (set! (slot-value obj 'type) 'abc)
+		     (set! (slot-value obj 'fill-color) "green")
+		     (set! (slot-value obj 'angle) 135)
+		     (let ((a (slot-value obj 'type))
+			   (b (slot-value obj 'fill-color))
+			   (c (slot-value obj 'angle)))
+		       obj))))))

ADDED   attic/runs-launch-loop-test.scm
Index: attic/runs-launch-loop-test.scm
--- /dev/null
+++ attic/runs-launch-loop-test.scm
@@ -0,0 +1,76 @@
+;;  Copyright 2006-2017, Matthew Welland.
+;; This file is part of Megatest.
+;;     Megatest is free software: you can redistribute it and/or modify
+;;     it under the terms of the GNU General Public License as published by
+;;     the Free Software Foundation, either version 3 of the License, or
+;;     (at your option) any later version.
+;;     Megatest is distributed in the hope that it will be useful,
+;;     but WITHOUT ANY WARRANTY; without even the implied warranty of
+;;     GNU General Public License for more details.
+;;     You should have received a copy of the GNU General Public License
+;;     along with Megatest.  If not, see <>.
+(use srfi-69)
+(define (runs:queue-next-hed tal reg n regful)
+  (if regful
+      (car reg)
+      (car tal)))
+(define (runs:queue-next-tal tal reg n regful)
+  (if regful
+      tal
+      (let ((newtal (cdr tal)))
+	(if (null? newtal)
+	    reg
+	    newtal
+	    ))))
+(define (runs:queue-next-reg tal reg n regful)
+  (if regful
+      (cdr reg)
+      (if (eq? (length tal) 1)
+	  '()
+	  reg)))
+(use trace)
+(trace runs:queue-next-hed
+       runs:queue-next-tal
+       runs:queue-next-reg)
+(define tests '(1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20))
+(define test-registry (make-hash-table))
+(define n 3)
+(let loop ((hed   (car tests))
+           (tal   (cdr tests))
+           (reg   '()))
+  (let* ((reglen (length reg))
+	 (regful (> reglen n)))
+    (print "hed=" hed ", length reg=" (length reg) ", (> lenreg n)=" (> (length reg) n))
+    (let ((newtal (append tal (list hed)))) ;; used if we are not done with this test
+      (cond
+       ((not (hash-table-ref/default test-registry hed #f))
+	(hash-table-set! test-registry hed #t)
+	(print "Registering #" hed)
+	(if (not (null? tal))
+          (loop (runs:queue-next-hed tal reg n regful)
+                (runs:queue-next-tal tal reg n regful)
+		(let ((newl (append reg (list hed))))
+		  (if regful
+		      (cdr newl)
+		      newl)))))
+       (else
+	(print "Running #" hed)
+	(if (not (null? tal))
+	    (loop (runs:queue-next-hed tal reg n regful)
+		  (runs:queue-next-tal tal reg n regful)
+		  (runs:queue-next-reg tal reg n regful))))))))

ADDED   attic/sdb.scm
Index: attic/sdb.scm
--- /dev/null
+++ attic/sdb.scm
@@ -0,0 +1,116 @@
+;; Copyright 2006-2013, Matthew Welland.
+;; This file is part of Megatest.
+;;     Megatest is free software: you can redistribute it and/or modify
+;;     it under the terms of the GNU General Public License as published by
+;;     the Free Software Foundation, either version 3 of the License, or
+;;     (at your option) any later version.
+;;     Megatest is distributed in the hope that it will be useful,
+;;     but WITHOUT ANY WARRANTY; without even the implied warranty of
+;;     GNU General Public License for more details.
+;;     You should have received a copy of the GNU General Public License
+;;     along with Megatest.  If not, see <>.
+;; Simple persistant strings lookup table. Keep out of the main db
+;; so writes/reads don't slow down central access.
+(require-extension (srfi 18) extras)
+(use sqlite3 srfi-1 posix regex regex-case srfi-69 csv-xml s11n md5 message-digest base64)
+(import (prefix sqlite3 sqlite3:))
+(import (prefix base64 base64:))
+(declare (unit sdb))
+(define (sdb:open fname)
+  (let* ((dbpath    (pathname-directory fname))
+	 (dbexists  (let ((fe (common:file-exists? fname)))
+		      (if fe 
+			  fe
+			  (begin
+			    (create-directory dbpath #t)
+			    #f))))
+	 (sdb        (sqlite3:open-database fname))
+	 (handler   (make-busy-timeout 136000)))
+    (sqlite3:set-busy-handler! sdb handler)
+    (if (not dbexists)
+	(sdb:initialize sdb))
+    (sqlite3:execute sdb "PRAGMA synchronous = 1;")
+    sdb))
+(define (sdb:initialize sdb)
+  (sqlite3:execute sdb "CREATE TABLE IF NOT EXISTS strs
+                           (id  INTEGER PRIMARY KEY,
+                            str TEXT,
+                        CONSTRAINT str UNIQUE (str));")
+  (sqlite3:execute sdb "CREATE INDEX IF NOT EXISTS strindx ON strs (str);"))
+;; (define sumup (let ((a 0))(lambda (x)(set! a (+ x a)) a)))
+(define (sdb:register-string sdb str)
+  (sqlite3:execute sdb "INSERT OR IGNORE INTO strs (str) VALUES (?);" str))
+(define (sdb:string->id sdb str-cache str)
+  (let ((id (hash-table-ref/default str-cache str #f)))
+    (if (not id)
+	(sqlite3:for-each-row
+	 (lambda (sid)
+	   (set! id sid)
+	   (hash-table-set! str-cache str id))
+	 sdb
+	 "SELECT id FROM strs WHERE str=?;" str))
+    id))
+(define (sdb:id->string sdb id-cache id)
+  (let ((str (hash-table-ref/default id-cache id #f)))
+    (if (not str)
+	(sqlite3:for-each-row
+	 (lambda (istr)
+	   (set! str istr)
+	   (hash-table-set! id-cache id str))
+	 sdb
+	 "SELECT str FROM strs WHERE id=?;" id))
+    str))
+;; Numbers get passed though in both directions
+(define (make-sdb:qry fname)
+  (let ((sdb    #f)
+	(scache (make-hash-table))
+	(icache (make-hash-table)))
+    (lambda (cmd var)
+      (case cmd
+	((setup)   (set! sdb (if (not sdb)
+				 (sdb:open (if var var fname)))))
+	((setdb)    (set! sdb var))
+	((getdb)    sdb)
+	((finalize) (if sdb
+			(begin
+			  (sqlite3:finalize! sdb)
+			  (set! sdb #f))))
+	((getid)     (let ((id (if (or (number? var)
+				       (string->number var))
+				   var
+				   (sdb:string->id sdb scache var))))
+		       (if id
+			   id
+			   (begin
+			     (sdb:register-string sdb var)
+			     (sdb:string->id sdb scache var)))))
+	((getstr)    (if (or (number? var)
+			     (string->number var))
+			 (sdb:id->string sdb icache var)
+			 var))
+	((passid)    var)
+	((passstr)   var)
+	(else #f)))))

ADDED   attic/vg-test.scm
Index: attic/vg-test.scm
--- /dev/null
+++ attic/vg-test.scm
@@ -0,0 +1,119 @@
+;;  Copyright 2006-2017, Matthew Welland.
+;; This file is part of Megatest.
+;;     Megatest is free software: you can redistribute it and/or modify
+;;     it under the terms of the GNU General Public License as published by
+;;     the Free Software Foundation, either version 3 of the License, or
+;;     (at your option) any later version.
+;;     Megatest is distributed in the hope that it will be useful,
+;;     but WITHOUT ANY WARRANTY; without even the implied warranty of
+;;     GNU General Public License for more details.
+;;     You should have received a copy of the GNU General Public License
+;;     along with Megatest.  If not, see <>.
+(use canvas-draw iup foof-loop)
+(import canvas-draw-iup)
+(load "vg.scm")
+(define numtorun 1000)
+;; (if (> (length (argv)) 1)
+;; 		     (string->number (cadr (argv)))
+;; 		     1000))
+ (use trace)
+ ;; (trace 
+ ;;  ;; vg:draw-rect
+ ;;  ;; vg:grow-rect
+ ;;  vg:get-extents-for-objs
+ ;;  vg:components-get-extents
+ ;;  vg:instances-get-extents
+ ;;  vg:get-extents-for-two-rects
+ ;;  canvas-line!)
+(define d1 (vg:drawing-new))
+(define l1 (vg:lib-new))
+(define c1 (vg:comp-new))
+(define c2 (vg:comp-new))
+(define bt1 (vg:make-rect-obj 10 40 20 50 text: "A long piece of text" font: "Helvetica, -10"))
+(let ((r1 (vg:make-rect-obj 20 20 30 30 text: "r1" font: "Helvetica, -20"))
+      (r2 (vg:make-rect-obj 30 30 60 60 text: "r2" font: "Helvetica, -10"))
+      (t1 (vg:make-text-obj 60 60 "The middle" font: "Helvetica, -10")))
+  (vg:add-objs-to-comp c1 r1 r2 t1 bt1))
+(loop ((for x (up-from 0 (to 20))))
+       (loop ((for y (up-from 0 (to 20))))
+	     (vg:add-objs-to-comp c1 (vg:make-rect-obj x y (+ x 5)(+ y 5)))))
+(let ((start (current-seconds)))
+  (let loop ((i 0))
+    (vg:add-obj-to-comp c1 (vg:make-rect-obj 0 0 100 100))
+    (if (< i numtorun)(loop (+ i 1))))
+  (print "Run time: " (- (current-seconds) start)))
+(vg:add-obj-to-comp c1 (vg:make-line-obj 0 0 100 100))
+;; add the c1 component to lib l1 with name firstcomp
+(vg:add-comp-to-lib l1 "firstcomp" c1)
+(vg:add-comp-to-lib l1 "secondcomp" c2)
+;; add the l1 lib to drawing with name firstlib
+(vg:add-lib d1 "firstlib" l1)
+;; instantiate firstlib/firstcomp as inst1 in drawing d1 at 0,0
+(vg:instantiate d1 "firstlib" "firstcomp" "inst1" 0 0)
+(vg:instantiate d1 "firstlib" "firstcomp" "inst2" 200 200)
+;; (vg:drawing-scalex-set! d1 1.1)
+;; (vg:drawing-scaley-set! d1 0.5)
+;; (define xtnts (vg:scale-offset-xy 
+;; 	       (vg:component-get-extents c1)
+;; 	       1.1 1.1 -2 -2))
+;; get extents of c1 and put a rectange around it
+(define xtnts (apply vg:grow-rect 10 10 (vg:components-get-extents d1 c1)))
+(vg:add-objs-to-comp c1 (apply vg:make-rect-obj xtnts))
+(define bt1xt (vg:obj-get-extents d1 bt1))
+(print "bt1xt: " bt1xt)
+(vg:add-objs-to-comp c1 (apply vg:make-rect-obj bt1xt))
+;; get extents of all objects and put rectangle around it
+(define big-xtnts (vg:instances-get-extents d1))
+(vg:add-objs-to-comp c2 (apply vg:make-rect-obj big-xtnts))
+(vg:instantiate d1 "firstlib" "secondcomp" "inst3" 0 0)
+(vg:drawing-scalex-set! d1 1.5)
+(vg:drawing-scaley-set! d1 1.5)
+(define cnv #f)
+(define the-cnv (canvas 
+		 #:size "500x400"
+		 #:expand "YES"
+		 #:scrollbar "YES"
+		 #:posx "0.5"
+		 #:posy "0.5"
+		 #:action (make-canvas-action
+			   (lambda (c xadj yadj)
+			     (set! cnv c)))))
+ (dialog
+  (vbox
+   the-cnv)))
+(vg:drawing-cnv-set! d1 cnv)
+(vg:draw d1 #t)
+;; (canvas-rectangle! cnv  10 100 10 80)

Index: client.scm
--- client.scm
+++ client.scm
@@ -71,12 +71,18 @@
 ;; client:setup
 ;; lookup_server, need to remove *runremote* stuff
 (define (client:setup-http areapath #!key (remaining-tries 100) (failed-connects 0)(area-dat #f))
+  (mutex-lock! *rmt-mutex*)
+  (let ((res (client:setup-http-baby areapath remaining-tries: remaining-tries failed-connects: failed-connects area-dat: area-dat)))
+    (mutex-unlock! *rmt-mutex*)
+    res))
+(define (client:setup-http-baby areapath #!key (remaining-tries 100) (failed-connects 0)(area-dat #f))
   (debug:print-info 2 *default-log-port* "client:setup remaining-tries=" remaining-tries)
   (server:start-and-wait areapath)
   (if (<= remaining-tries 0)
 	(debug:print-error 0 *default-log-port* "failed to start or connect to server")
@@ -86,11 +92,11 @@
       ;; through them searching for a good one.
       (let* ((server-dat (server:get-rand-best areapath)) ;; (server:get-first-best areapath))
 	     (runremote  (or area-dat *runremote*)))
 	(if (not server-dat) ;; no server found
-	    (client:setup-http areapath remaining-tries: (- remaining-tries 1))
+	    (client:setup-http-baby areapath remaining-tries: (- remaining-tries 1))
 	    (let ((host  (cadr  server-dat))
 		  (port  (caddr server-dat))
                   (server-id (caddr (cddr server-dat))))
 	      (debug:print-info 4 *default-log-port* "client:setup server-dat=" server-dat ", remaining-tries=" remaining-tries)
 	      (if (and (not area-dat)
@@ -108,23 +114,28 @@
 			 (ping-res  (case *transport-type* 
 				      ((http)(rmt:login-no-auto-client-setup start-res)))))
 		    (if (and start-res
 			(let ((runremote (or area-dat *runremote*))) ;; it might have been generated only a few statements ago
-			  (remote-conndat-set! runremote start-res) ;; (hash-table-set! runremote run-id start-res)
-			  (debug:print-info 2 *default-log-port* "connected to " (http-transport:server-dat-make-url start-res))
-			  start-res)
+			  (if runremote
+			      (begin
+				(remote-conndat-set! runremote start-res) ;; (hash-table-set! runremote run-id start-res)
+				(debug:print-info 2 *default-log-port* "connected to " (http-transport:server-dat-make-url start-res))
+				start-res)
+			      (client:setup-http-baby areapath remaining-tries: (- remaining-tries 1))))
 			(begin    ;; login failed but have a server record, clean out the record and try again
 			  (debug:print-info 0 *default-log-port* "client:setup, login unsuccessful, will attempt to start server ... start-res=" start-res ", server-dat=" server-dat) ;; had runid.  Fixes part of Randy;s ticket 1405717332
 			  (case *transport-type* 
-			  (remote-conndat-set! runremote #f)  ;; (hash-table-delete! runremote run-id)
+                          (if *runremote* 
+			    (remote-conndat-set! runremote #f)  ;; (hash-table-delete! runremote run-id)
+                          )
 			  (thread-sleep! 1)
-			  (client:setup-http areapath remaining-tries: (- remaining-tries 1))
+			  (client:setup-http-baby areapath remaining-tries: (- remaining-tries 1))
 		  (begin    ;; no server registered
 		    ;; (server:kind-run areapath)
 		    (server:start-and-wait areapath)
 		    (debug:print-info 0 *default-log-port* "client:setup, no server registered, remaining-tries=" remaining-tries)
 		    (thread-sleep! 1) ;; (+ 5 (random (- 20 remaining-tries))))  ;; give server a little time to start up, randomize a little to avoid start storms.
-		    (client:setup-http areapath remaining-tries: (- remaining-tries 1)))))))))
+		    (client:setup-http-baby areapath remaining-tries: (- remaining-tries 1)))))))))

Index: common.scm
--- common.scm
+++ common.scm
@@ -26,26 +26,30 @@
      (prefix sqlite3 sqlite3:)
      pkts (prefix dbi dbi:)
 (declare (unit common))
-;; (declare (uses commonmod))
-;; (import commonmod)
+(declare (uses commonmod))
+(import commonmod)
 (include "common_records.scm")
-;; (require-library margs)
-;; (include "margs.scm")
-;; (define old-exit exit)
-;; (define (exit . code)
-;;   (if (null? code)
-;;       (old-exit)
-;;       (old-exit code)))
+(define (remove-server-files directory-path)
+  (let ((files (glob (string-append directory-path "/server*"))))
+    (for-each delete-file files)))
+(define (stop-the-train)
+  (thread-start! (make-thread (lambda ()
+				(let loop ()
+				  (if (and *toppath*
+					   (file-exists? (conc *toppath*"/stop-the-train")))
+				      (begin
+					(debug:print 0 *default-log-port* "ERROR: found file "*toppath*"/stop-the-train, exiting immediately")
+                                        (remove-server-files (conc *toppath* "/logs"))
+					(exit 1)))
+				  (thread-sleep! 5)
+				  (loop))))))
 ;; execute thunk, return value.  If exception thrown, trap exception, return #f, and emit nonfatal condition note to *default-log-port* .
 ;; arguments - thunk, message
 (define (common:fail-safe thunk warning-message-on-exception)
@@ -129,37 +133,37 @@
 (define *test-meta-updated* (make-hash-table))
 (define *globalexitstatus*  0) ;; attempt to work around possible thread issues
 (define *passnum*           0) ;; when running track calls to run-tests or similar
 ;; (define *alt-log-file* #f)  ;; used by -log
-(define *common:denoise*    (make-hash-table)) ;; for low noise printing
+;; (define *common:denoise*    (make-hash-table)) ;; for low noise printing
 (define *default-log-port*  (current-error-port))
 (define *time-zero* (current-seconds)) ;; for the watchdog
 (define *default-area-tag* "local")
-(define *dbstruct-db*         #f) ;; used to cache the dbstruct in db:setup. Goal is to remove this.
+;; (define *dbstruct-dbs*         #f) ;; used to cache the dbstruct in db:setup. Goal is to remove this.
 ;; db stats
 (define *db-stats*            (make-hash-table)) ;; hash of vectors < count duration-total >
 (define *db-stats-mutex*      (make-mutex))
 ;; db access
 (define *db-last-access*      (current-seconds)) ;; last db access, used in server
-(define *db-write-access*     #t)
+;; (define *db-write-access*     #t)
 ;; db sync
-(define *db-last-sync*        0)                 ;; last time the sync to megatest.db happened
+;; (define *db-last-sync*        0)                 ;; last time the sync to megatest.db happened
 (define *db-sync-in-progress* #f)                ;; if there is a sync in progress do not try to start another
-(define *db-multi-sync-mutex* (make-mutex))      ;; protect access to *db-sync-in-progress*, *db-last-sync*
+;; (define *db-multi-sync-mutex* (make-mutex))      ;; protect access to *db-sync-in-progress*, *db-last-sync*
 ;; task db
 (define *task-db*             #f) ;; (vector db path-to-db)
 (define *db-access-allowed*   #t) ;; flag to allow access
-(define *db-access-mutex*     (make-mutex))
+;; (define *db-access-mutex*     (make-mutex)) ;; moved to dbfile
 (define *db-transaction-mutex* (make-mutex))
 (define *db-cache-path*       #f)
-(define *db-with-db-mutex*    (make-mutex))
+;; (define *db-with-db-mutex*    (make-mutex))
 (define *db-api-call-time*    (make-hash-table)) ;; hash of command => (list of times)
 ;; no sync db
-(define *no-sync-db*          #f)
+;; (define *no-sync-db*          #f)  ;; moved to dbfile
 (define *my-client-signature* #f)
 (define *transport-type*    'http)             ;; override with [server] transport http|rpc|nmsg
 (define *runremote*         #f)                ;; if set up for server communication this will hold <host port>
@@ -172,12 +176,12 @@
 (define *run-id*            #f)
 (define *server-kind-run*   (make-hash-table))
 (define *home-host*         #f)
 ;; (define *total-non-write-delay* 0)
 (define *heartbeat-mutex*   (make-mutex))
-(define *api-process-request-count* 0)
-(define *max-api-process-requests* 0)
+;; (define *api-process-request-count* 0)
+;; (define *max-api-process-requests* 0)
 (define *server-overloaded*  #f)
 ;; client
 (define *rmt-mutex*         (make-mutex))     ;; remote access calls mutex 
@@ -310,10 +314,11 @@
   (hh-dat            (common:get-homehost)) ;; homehost record ( addr . hhflag )
   (server-url        #f) ;; (server:check-if-running *toppath*) #f))
   (server-id         #f)
   (server-info       (if *toppath* (server:check-if-running *toppath*) #f))
   (last-server-check 0)  ;; last time we checked to see if the server was alive
+  (connect-time      (current-seconds))
   (conndat           #f)
   (transport         *transport-type*)
   (server-timeout    (server:expiration-timeout))
   (force-server      #f)
   (ro-mode           #f)  
@@ -405,19 +410,15 @@
 (define (common:cleanup-db dbstruct #!key (full #f))
   (apply db:multi-db-sync 
-   ;; 'new2old
-   ;; 'old2new
-   ;; (if full
-       '(dejunk)
-       ;; '())
-       )
+   '(dejunk)
+  )
   (if (common:api-changed?)
 (define (common:snapshot-file filepath #!key (subdir  ".") )
   (if (file-exists? filepath)
@@ -591,13 +592,13 @@
 (define (common:exit-on-version-changed)
   (if (common:on-homehost?)
       (if (common:api-changed?)
 	  (let* ((mtconf (conc (get-environment-variable "MT_RUN_AREA_HOME") "/megatest.config"))
-                (dbfile  (conc (get-environment-variable "MT_RUN_AREA_HOME") "/megatest.db"))
-                (read-only (not (file-write-access? dbfile)))
-                (dbstruct (db:setup #t)))
+                 (dbfile  (conc (get-environment-variable "MT_RUN_AREA_HOME") "/megatest.db"))
+                 (read-only (not (file-write-access? dbfile)))
+                 (dbstruct (db:setup #t))) ;; (db:setup-db *dbstruct-dbs* *toppath* #f))) ;;  #t)))
 	    (debug:print 0 *default-log-port*
 			 "WARNING: Version mismatch!\n"
 			 "   expected: " (common:version-signature) "\n"
 			 "   got:      " (common:get-last-run-version))
@@ -724,54 +725,10 @@
       (print-call-chain (current-error-port))
     (read (open-input-string (base64:base64-decode instr))))
    (read (open-input-string (z3:decode-buffer (base64:base64-decode instr))))))
-;; dot-locking egg seems not to work, using this for now
-;; if lock is older than expire-time then remove it and try again
-;; to get the lock
-(define (common:simple-file-lock fname #!key (expire-time 300))
-  (let ((fmod-time (handle-exceptions
-		       ext
-		     (current-seconds)
-		     (file-modification-time fname))))
-    (if (common:file-exists? fname)
-	(if (> (- (current-seconds) fmod-time) expire-time)
-	    (begin
-	      (handle-exceptions exn #f (delete-file* fname))	
-	      (common:simple-file-lock fname expire-time: expire-time))
-	    #f)
-	(let ((key-string (conc (get-host-name) "-" (current-process-id))))
-	  (with-output-to-file fname
-	    (lambda ()
-	      (print key-string)))
-	  (thread-sleep! 0.25)
-	  (if (common:file-exists? fname)
-	      (handle-exceptions exn
-                #f 
-                (with-input-from-file fname
-	  	  (lambda ()
-		    (equal? key-string (read-line)))))
-	      #f)))))
-(define (common:simple-file-lock-and-wait fname #!key (expire-time 300))
-  (let ((end-time (+ expire-time (current-seconds))))
-    (let loop ((got-lock (common:simple-file-lock fname expire-time: expire-time)))
-      (if got-lock
-	  #t
-	  (if (> end-time (current-seconds))
-	      (begin
-		(thread-sleep! 3)
-		(loop (common:simple-file-lock fname expire-time: expire-time)))
-	      #f)))))
-(define (common:simple-file-release-lock fname)
-  (handle-exceptions
-      exn
-      #f ;; I don't really care why this failed (at least for now)
-    (delete-file* fname)))
 ;; S T A T E S   A N D   S T A T U S E S
@@ -952,10 +909,22 @@
 		  (debug:print-error 0 *default-log-port* "Unable to find megatest home directory.")
 		(loop (pathname-directory thepath)))))
+(define (common:db-tmp-area-path)
+  (conc "/tmp/" 
+         (current-user-name)
+	 "/megatest_localdb/"
+	 (common:get-testsuite-name)
+         "/"
+	 (string-translate *toppath* "/" ".")
+  )
 ;; redefine for future cleanup (converge on area-name, the more generic
 (define common:get-area-name common:get-testsuite-name)
@@ -979,10 +948,18 @@
 					  (string-translate *toppath* "/" "."))
 		(set! *db-cache-path* dbpath)
+		;; ensure megatest area has .megatest
+		(let ((dbarea (conc *toppath* "/.megatest")))
+		  (if (not (file-exists? dbarea))
+		      (create-directory dbarea)))
+		;; ensure tmp area has .megatest
+		(let ((dbarea (conc dbpath "/.megatest")))
+		  (if (not (file-exists? dbarea))
+		      (create-directory dbarea)))
 (define (common:get-area-path-signature)
   (message-digest-string (md5-primitive) *toppath*))
@@ -999,118 +976,10 @@
 	 (args:get-arg "-server")))
 (define (common:human-time)
   (time->string (seconds->local-time (current-seconds)) "%Y-%m-%d %H:%M:%S"))
-;; currently the primary job of the watchdog is to run the sync back to megatest.db from the db in /tmp
-;; if we are on the homehost and we are a server (by definition we are on the homehost if we are a server)
-(define (common:readonly-watchdog dbstruct)
-  (thread-sleep! 0.05) ;; delay for startup
-  (debug:print-info 13 *default-log-port* "common:readonly-watchdog entered.")
-  ;; sync megatest.db to /tmp/.../megatst.db
-  (let* ((sync-cool-off-duration   3)
-        (golden-mtdb     (dbr:dbstruct-mtdb dbstruct))
-        (golden-mtpath   (db:dbdat-get-path golden-mtdb))
-        (tmp-mtdb        (dbr:dbstruct-tmpdb dbstruct))
-        (tmp-mtpath      (db:dbdat-get-path tmp-mtdb)))
-    (debug:print-info 0 *default-log-port* "Read-only periodic sync thread started.")
-    (let loop ((last-sync-time 0))
-      (debug:print-info 13 *default-log-port* "loop top tmp-mtpath="tmp-mtpath" golden-mtpath="golden-mtpath)
-      (let* ((duration-since-last-sync (- (current-seconds) last-sync-time)))
-        (debug:print-info 13 *default-log-port* "duration-since-last-sync="duration-since-last-sync)
-        (if (and (not *time-to-exit*)
-                 (< duration-since-last-sync sync-cool-off-duration))
-            (thread-sleep! (- sync-cool-off-duration duration-since-last-sync)))
-        (if (not *time-to-exit*)
-            (let ((golden-mtdb-mtime (file-modification-time golden-mtpath))
-                  (tmp-mtdb-mtime    (file-modification-time tmp-mtpath)))
-	      (if (> golden-mtdb-mtime tmp-mtdb-mtime)
-		  (if (< golden-mtdb-mtime (- (current-seconds) 3)) ;; file has NOT been touched in past three seconds, this way multiple servers won't fight to sync back
-		      (let ((res (db:multi-db-sync dbstruct 'old2new)))
-			(debug:print-info 13 *default-log-port* "rosync called, " res " records transferred."))))
-              (loop (current-seconds)))
-            #t)))
-    (debug:print-info 0 *default-log-port* "Exiting readonly-watchdog timer, *time-to-exit* = " *time-to-exit*" pid="(current-process-id)" mtpath="golden-mtpath)))
-;; TODO: for multiple areas, we will have multiple watchdogs; and multiple threads to manage
-(define (common:watchdog)
-  (debug:print-info 13 *default-log-port* "common:watchdog entered.")
-  (if (launch:setup)
-      (if (common:on-homehost?)
-	  (let ((dbstruct (db:setup #t)))
-	    (debug:print-info 13 *default-log-port* "after db:setup with dbstruct=" dbstruct)
-	    (cond
-	     ((dbr:dbstruct-read-only dbstruct)
-	      (debug:print-info 13 *default-log-port* "loading read-only watchdog")
-	      (common:readonly-watchdog dbstruct))
-	     (else
-	      (debug:print-info 13 *default-log-port* "loading writable-watchdog.")
-              (let* ((syncer (or (configf:lookup *configdat* "server" "sync-method") "brute-force-sync")))
-                (cond
-                 ((equal? syncer "brute-force-sync")
-                  (server:writable-watchdog-bruteforce dbstruct))
-                 ((equal? syncer "delta-sync")
-                  (server:writable-watchdog-deltasync dbstruct))
-                 (else
-                  (debug:print-error 0 *default-log-port* "Unknown server/sync-method specified ("syncer") - valid values are brute-force-sync and delta-sync.")
-                  (exit 1)))
-                ;;(debug:print 1 *default-log-port* "INFO: ["(common:human-time)"] Syncer started (method="syncer")")
-                )))
-	    (debug:print-info 13 *default-log-port* "watchdog done."))
-	  (debug:print-info 13 *default-log-port* "no need for watchdog on non-homehost"))))
-(define (std-exit-procedure)
-  ;;(common:telemetry-log-close)
-  (on-exit (lambda () 0))
-  ;;(debug:print-info 13 *default-log-port* "std-exit-procedure called; *time-to-exit*="*time-to-exit*)
-  (let ((no-hurry  (if *time-to-exit* ;; hurry up
-		       #f
-		       (begin
-			 (set! *time-to-exit* #t)
-			 #t))))
-    (debug:print-info 4 *default-log-port* "starting exit process, finalizing databases.")
-    (if (and no-hurry (debug:debug-mode 18))
-	(rmt:print-db-stats))
-    (let ((th1 (make-thread (lambda () ;; thread for cleaning up, give it five seconds
-                              (if *dbstruct-db* (db:close-all *dbstruct-db*)) ;; one second allocated
-			      (if *task-db*    
-				  (let ((db (cdr *task-db*)))
-				    (if (sqlite3:database? db)
-					(begin
-					  (sqlite3:interrupt! db)
-					  (sqlite3:finalize! db #t)
-					  ;; (vector-set! *task-db* 0 #f)
-					  (set! *task-db* #f)))))
-                              (http-client#close-all-connections!)
-                              ;; (if (and *runremote*
-                              ;;          (remote-conndat *runremote*))
-                              ;;     (begin
-                              ;;       (http-client#close-all-connections!))) ;; for http-client
-                              (if (not (eq? *default-log-port* (current-error-port)))
-                                  (close-output-port *default-log-port*))
-			      (set! *default-log-port* (current-error-port))) "Cleanup db exit thread"))
-	  (th2 (make-thread (lambda ()
-			      (debug:print 4 *default-log-port* "Attempting clean exit. Please be patient and wait a few seconds...")
-			      (if no-hurry
-                                  (begin
-                                    (thread-sleep! 5)) ;; give the clean up few seconds to do it's stuff
-                                  (begin
-      				  (thread-sleep! 2)))
-      			      (debug:print 4 *default-log-port* " ... done")
-      			      )
-			    "clean exit")))
-      (thread-start! th1)
-      (thread-start! th2)
-      (thread-join! th1)
-      )
-    )
-  0)
 (define (std-signal-handler signum)
   ;; (signal-mask! signum)
   (set! *time-to-exit* #t) 
   ;;(debug:print-info 13 *default-log-port* "got signal "signum)
@@ -1822,15 +1691,17 @@
 	   (min (max (/ (expt r1 (* r2 s2 ratio)) s1) 0) 30))
 	   (debug:print 0 *default-log-port* "BAD exp-params, should be \"r1 r2 s1 s2\" but got " paramstr)
-(define (common:print-delay-table)
-  (let loop ((x 0))
-    (print x "," (common:get-delay x 1))
-    (if (< x 2)
-	(loop (+ x 0.1)))))
+;; -mrw- this appears to not be used
+;; (define (common:print-delay-table)
+;;   (let loop ((x 0))
+;;     (print x "," (common:get-delay x 1))
+;;     (if (< x 2)
+;; 	(loop (+ x 0.1)))))
 (define (get-cpu-load #!key (remote-host #f))
   (car (common:get-cpu-load remote-host)))
@@ -1903,19 +1774,32 @@
 	    (debug:print 0 *default-log-port* "failed to write file " fullpath ", exn=" exn)
 	  (with-output-to-file fullpath (lambda ()(pp dat)))))
-(define (common:raw-get-remote-host-load remote-host)
+(define (common:raw-get-remote-host-load-orig remote-host)
       (debug:print 0 *default-log-port* "failed to ssh to " remote-host " and get loadavg. exn=" exn)
       #f) ;; more specific handling of errors needed
      (conc "ssh " remote-host " cat /proc/loadavg")
      (lambda ()(list (read)(read)(read))))))
+(define (common:raw-get-remote-host-load remote-host)
+  (let* ((inp #f))
+    (handle-exceptions
+	exn
+      (begin
+	(close-input-pipe inp)
+	(debug:print 0 *default-log-port* "failed to ssh to " remote-host " and get loadavg. exn=" exn)
+	#f) ;; more specific handling of errors needed
+      (set! inp (open-input-pipe (conc "ssh " remote-host " cat /proc/loadavg")))
+      (let ((res (list (read inp)(read inp)(read inp))))
+	(close-input-pipe inp)
+	res))))
 ;; get cpu load by reading from /proc/loadavg, return all three values
 (define (common:get-cpu-load remote-host)
@@ -1927,13 +1811,11 @@
     (let* ((actual-hostname (or remote-host (get-host-name) "localhost")))
       (or (common:get-cached-info actual-hostname "cpu-load")
 	  (let ((result (if remote-host
 			    (map (lambda (res)
 				   (if (eof-object? res) 9e99 res))
-			         (with-input-from-pipe 
-				  (conc "ssh " remote-host " cat /proc/loadavg")
-				  (lambda ()(list (read)(read)(read)))))
+			         (common:raw-get-remote-host-load remote-host))
 			    (with-input-from-file "/proc/loadavg" 
 			      (lambda ()(list (read)(read)(read)))))))
 	      ((l1 l2 l3)
@@ -1972,13 +1854,20 @@
 (define (common:get-normalized-cpu-load-raw remote-host)
   (let* ((actual-host (or remote-host (get-host-name)))) ;; #f is localhost
     (or (common:get-cached-info actual-host "normalized-load")
 	(let ((data (if remote-host
-			(with-input-from-pipe 
-			    (conc "ssh " remote-host " \"cat /proc/loadavg;cat /proc/cpuinfo;echo end\"")
-			  read-lines)
+			(let ((inp #f))
+			  (handle-exceptions
+			      exn
+			    (begin
+			      (close-input-port inp)
+			      '())
+			    (set! inp (open-input-port (conc "ssh " remote-host " \"cat /proc/loadavg;cat /proc/cpuinfo;echo end\"")))
+			    (let* ((res (read-lines inp)))
+			      (close-input-port inp)
+			      res)))
 			 (with-input-from-file "/proc/loadavg" 
 			 (with-input-from-file "/proc/cpuinfo"
@@ -2175,20 +2064,37 @@
 						   (loop (if (string-match "^processor\\s+:\\s+\\d+$" inl)
 							     (+ numcpu 1)
 				   (result (if remote-host
-					       (with-input-from-pipe 
+					       (common:generic-ssh
 						(conc "ssh " remote-host " cat /proc/cpuinfo")
-						proc)
+						proc -1)
 					       (with-input-from-file "/proc/cpuinfo" proc))))
 			      (if (and (number? result)
 				       (> result 0))
 				  (common:write-cached-info actual-host "num-cpus" result))
 	  (hash-table-set! *numcpus-cache* actual-host numcpus)
+(define (common:generic-ssh ssh-command proc default #!optional (msg-proc #f))
+  (let ((inp #f))
+    (handle-exceptions
+	exn
+      (begin
+	(close-input-port inp)
+	(if msg-proc
+	    (msg-proc)
+	    (debug:print 0 *default-log-port* "Command: \""ssh-command"\" failed. exn="exn))
+	default)
+      (set! inp (open-input-pipe ssh-command))
+      (with-input-from-port inp
+	(lambda ()
+	  (let ((res (proc)))
+	    (close-input-port inp)
+	    res))))))
 ;; wait for normalized cpu load to drop below maxload
 (define (common:wait-for-normalized-load maxnormload msg remote-host #!optional (rem-tries 5))
@@ -2600,38 +2506,31 @@
 		  ((directory? p)(hash-table-set! directories p #t))
 		   (case (vector-ref rule 1)
 		     ((keep)(hash-table-set! keepers p rule))
-		      (print "Removing file " p)
+		      (debug:print 0 *default-log-port* "Removing file " p)
 		      (delete-file p))
-		      (print "Compressing file " p)
+		      (debug:print 0 *default-log-port* "Compressing file " p)
 		      (system (conc compress " " p)))
-		      (print "No match for file " p))))))))
+		      (debug:print 0 *default-log-port* "No match for file " p))))))))
     (if remove-empty
 	 (lambda (d)
 	   (if (null? (glob (conc d "/.*")(conc d "/*")))
-		 (print "Removing empty directory " d)
+		 (debug:print 0 *default-log-port* "Removing empty directory " d)
 		 (delete-directory d))))
 	 (sort (hash-table-keys directories) (lambda (a b)(> (string-length a)(string-length b))))))
 ;; E N V I R O N M E N T   V A R S
-(define (bb-check-path #!key (msg "check-path: "))
-  (let ((path (or (get-environment-variable "PATH") "none")))
-    (debug:print-info 0 *default-log-port* (conc msg" : $PATH="path))
-    (if (string-match "^.*/isoenv-core/.*" path)
-        (debug:print-error 0 *default-log-port* (conc msg" : !!ISOENV PRESENT!!")) ;; remove for prod
-        (debug:print-info 1 *default-log-port* (conc msg" : **no isoenv present**")))))
 (define (save-environment-as-files fname #!key (ignorevars (list "USER" "HOME" "DISPLAY" "LS_COLORS" "XKEYSYMDB" "EDITOR" "MAKEFLAGS" "MAKEF" "MAKEOVERRIDES" "HOSTNAME")))
   ;;(bb-check-path msg: "save-environment-as-files entry")
   (let ((envvars (get-environment-variables))
         (whitesp (regexp "[^a-zA-Z0-9_\\-:,\\.\\/%$]"))
 	(mungeval (lambda (val)
@@ -2642,13 +2541,16 @@
     (with-output-to-file (conc fname ".csh")
        (lambda ()
           (for-each (lambda (keyval)
 		      (let* ((key   (car keyval))
 			     (val   (cdr keyval))
-			     (delim (if (string-search whitesp val) 
+			     (delim (if (and (string-search whitesp val)
+					     (not (string-search "^\".*\"$" val))
+					     (not (string-search "^'.*'$" val)))
 			(print (if (or (member key ignorevars)
 				       (string-search whitesp key))
 				   "# setenv "
 				   "setenv ")
 			       key " " delim (mungeval val) delim)))
@@ -2656,11 +2558,13 @@
      (with-output-to-file (conc fname ".sh")
        (lambda ()
           (for-each (lambda (keyval)
 		      (let* ((key (car keyval))
 			     (val (cdr keyval))
-			     (delim (if (string-search whitesp val) 
+			     (delim (if (and (string-search whitesp val)
+					     (not (string-search "^\".*\"$" val))
+					     (not (string-search "^'.*'$" val)))
 			(print (if (or (member key ignorevars)
 				       (string-search whitesp key)
 				       (string-search ":" key)) ;; internal only values to be skipped.
@@ -3494,11 +3398,17 @@
 	  (debug:print 0 *default-log-port* "ERROR: packets directory path " pktsdir " is not a directory."))
 	 ((not (file-read-access? pktsdir))
 	  (debug:print 0 *default-log-port* "ERROR: packets directory path " pktsdir " is not readable."))
 	  (debug:print-info 0 *default-log-port* "Loading packets found in " pktsdir)
-	  (let ((pkts (glob (conc pktsdir "/*.pkt"))))
+	  (let ((pkts (glob (conc pktsdir "/*.pkt")))
+                (sqdb (dbi:db-conn pdb))
+                )
+           ;; Put this in a transaction to avoid issues overloading the db
+           (sqlite3:with-transaction
+            sqdb
+            (lambda ()
 	     (lambda (pkt)
 	       (let* ((uuid    (cadr (string-match ".*/([0-9a-f]+).pkt" pkt)))
 		      (exists  (lookup-by-uuid pdb uuid #f)))
 		 (if (not exists)
@@ -3509,11 +3419,11 @@
 			    (ptype  (alist-ref 'T apkt)))
 		       (add-to-queue pdb pktdat uuid (or ptype 'cmd) #f 0)
 		       (debug:print 4 *default-log-port* "Added " uuid " of type " ptype " to queue"))
 		     (debug:print 4 *default-log-port* "pkt: " uuid " exists, skipping...")
-	     pkts)))))
+	     pkts)))))))
    use-lt: use-lt))
 (define (common:get-pkt-alists pkts)
   (map (lambda (x)

Index: commonmod.scm
--- commonmod.scm
+++ commonmod.scm
@@ -17,16 +17,19 @@
 ;;     along with Megatest.  If not, see <>.
 (declare (unit commonmod))
+(use srfi-69)
 (module commonmod
 (import scheme chicken data-structures extras files)
-(import (prefix sqlite3 sqlite3:) posix typed-records srfi-18 srfi-69
+(import (prefix sqlite3 sqlite3:)
+	posix typed-records srfi-18 srfi-69
 	md5 message-digest
 	regex srfi-1)
@@ -44,10 +47,21 @@
   (conc megatest-version "-" megatest-fossil-hash))
 (define (version-signature)
   (conc megatest-version "-" (substring megatest-fossil-hash 0 4)))
+(define *common:denoise*    (make-hash-table)) ;; for low noise printing
+(define (common:low-noise-print waitval . keys)
+  (let* ((key      (string-intersperse (map conc keys) "-" ))
+	 (lasttime (hash-table-ref/default *common:denoise* key 0))
+	 (currtime (current-seconds)))
+    (if (> (- currtime lasttime) waitval)
+	(begin
+	  (hash-table-set! *common:denoise* key currtime)
+	  #t)
+	#f)))
 ;; config file utils
@@ -72,10 +86,55 @@
 	'()))) ;; should it return empty list or #f to indicate not set?
 (define (get-section cfgdat section)
   (hash-table-ref/default cfgdat section '()))
+;; dot-locking egg seems not to work, using this for now
+;; if lock is older than expire-time then remove it and try again
+;; to get the lock
+(define (common:simple-file-lock fname #!key (expire-time 300))
+  (let ((fmod-time (handle-exceptions
+		       ext
+		     (current-seconds)
+		     (file-modification-time fname))))
+    (if (file-exists? fname) ;; (common:file-exists? fname)
+	(if (> (- (current-seconds) fmod-time) expire-time)
+	    (begin
+	      (handle-exceptions exn #f (delete-file* fname))	
+	      (common:simple-file-lock fname expire-time: expire-time))
+	    #f)
+	(let ((key-string (conc (get-host-name) "-" (current-process-id))))
+	  (with-output-to-file fname
+	    (lambda ()
+	      (print key-string)))
+	  (thread-sleep! 0.25)
+	  (if (file-exists? fname) ;; (common:file-exists? fname)
+	      (handle-exceptions exn
+                #f 
+                (with-input-from-file fname
+	  	  (lambda ()
+		    (equal? key-string (read-line)))))
+	      #f)))))
+(define (common:simple-file-lock-and-wait fname #!key (expire-time 300))
+  (let ((end-time (+ expire-time (current-seconds))))
+    (let loop ((got-lock (common:simple-file-lock fname expire-time: expire-time)))
+      (if got-lock
+	  #t
+	  (if (> end-time (current-seconds))
+	      (begin
+		(thread-sleep! 3)
+		(loop (common:simple-file-lock fname expire-time: expire-time)))
+	      #f)))))
+(define (common:simple-file-release-lock fname)
+  (handle-exceptions
+      exn
+      #f ;; I don't really care why this failed (at least for now)
+    (delete-file* fname)))
 ;; misc conversion, data manipulation functions
@@ -149,14 +208,18 @@
   (let ((adat (get-section cfgdat "areas")))
     (map (lambda (entry)
 	   `(,(car entry) . 
 	     ,(val->alist (cadr entry))))
+;; misc stuff
 ;; (define (debug:print . params) #f)
 ;; (define (debug:print-info . params) #f)
 ;; (define (set-functions dbgp dbgpinfo)
 ;;   (set! debug:print dbgp)
 ;;   (set! debug:print-info dbgpinfo))

Index: configf.scm
--- configf.scm
+++ configf.scm
@@ -165,11 +165,11 @@
 	(let ((outres (string-intersperse 
 	  (debug:print-info 4 *default-log-port* "shell result:\n" outres)
-	(begin
+	(begin ;; why is this printing to error-port and not using debug:print? -mrw-
 	  (with-output-to-port (current-error-port)
 	    (lambda ()
 	      (print "ERROR: " cmd " returned bad exit code " status)))

ADDED   configfmod.scm
Index: configfmod.scm
--- /dev/null
+++ configfmod.scm
@@ -0,0 +1,75 @@
+;; Copyright 2017, Matthew Welland.
+;; This file is part of Megatest.
+;;     Megatest is free software: you can redistribute it and/or modify
+;;     it under the terms of the GNU General Public License as published by
+;;     the Free Software Foundation, either version 3 of the License, or
+;;     (at your option) any later version.
+;;     Megatest is distributed in the hope that it will be useful,
+;;     but WITHOUT ANY WARRANTY; without even the implied warranty of
+;;     GNU General Public License for more details.
+;;     You should have received a copy of the GNU General Public License
+;;     along with Megatest.  If not, see <>.
+(declare (unit configfmod))
+;; (declare (uses mtargs))
+;; (declare (uses debugprint))
+;; (declare (uses keysmod))
+(module configfmod
+(import srfi-1
+;;        scheme
+;;	big-chicken        ;; more of a reminder than anything ...
+;;	chicken.base
+;;	chicken.condition
+;;	chicken.file
+;;	chicken.pathname
+;;	chicken.port
+;;	chicken.pretty-print
+;;	chicken.process
+;;	chicken.process-context
+;;	chicken.process-context.posix
+;;	chicken.sort
+;;	chicken.string
+;;	chicken.time
+;;	chicken.eval
+;;	debugprint
+;;	(prefix mtargs args:)
+;;	pkts
+;;	keysmod
+;;	(prefix base64 base64:)
+;;	(prefix dbi dbi:)
+;;	(prefix sqlite3 sqlite3:)
+;;	(srfi 18)
+;;	directory-utils
+;;	format
+;;	matchable
+;;	md5
+;;	message-digest
+;;	regex
+;;	regex-case
+;;	sparse-vectors
+;;	srfi-1
+;;	srfi-13
+;;	srfi-69
+;;	stack
+;;	typed-records
+;;	z3
+	)

Index: dashboard-context-menu.scm
--- dashboard-context-menu.scm
+++ dashboard-context-menu.scm
@@ -319,11 +319,11 @@
                      (lambda ()
                        (if scheme-match
-                              (print "error with custom menu scheme, exn=" exn)
+                              (debug:print 0 *default-log-port* "error with custom menu scheme, exn=" exn)
                                 ;;(BB> "gonna eval it!")
                                 (eval (with-input-from-string (cadr scheme-match) read)))))
                            (common:run-a-command command-line with-vars: #t))))))))

Index: dashboard-guimonitor.scm
--- dashboard-guimonitor.scm
+++ dashboard-guimonitor.scm
@@ -74,21 +74,22 @@
 			 (iup:button "Start"  
 				     #:expand "HORIZONTAL"
 				     #:action (lambda (obj)
 						(tasks:add-from-params tdb "run" keys key-params var-params)
-						(print "Launch Run")))
+						;; (print "Launch Run")
+						))
 			 (iup:button "Remove" 
 				     #:expand "HORIZONTAL"
 				     #:action (lambda (obj)
-						(print "Remove Run")
+						;; (print "Remove Run")
 						(tasks:add-from-params tdb "remove" keys key-params var-params)
 			 (iup:button "Rollup" 
 				     #:expand "HORIZONTAL"
 				     #:action (lambda (obj)
-						(print "Rollup Run")
+						;; (print "Rollup Run")
 						(tasks:add-from-params tdb "rollup" keys key-params var-params)))))
 			#:title "Misc"
 			 (iup:button "Quit" 

Index: dashboard-tests.scm
--- dashboard-tests.scm
+++ dashboard-tests.scm
@@ -707,12 +707,12 @@
 				       " -archive save-remove -testpatt " (conc testname "/" (if (equal? item-path "")
-	   ((not testdat)(begin (print "ERROR: bad test info for " test-id)(exit 1)))
-	   ((not rundat)(begin (print "ERROR: found test info but there is a problem with the run info for " run-id)(exit 1)))
+	   ((not testdat)(begin (debug:print 0 *default-log-port* "ERROR: bad test info for " test-id)(exit 1)))
+	   ((not rundat)(begin (debug:print 0 *default-log-port* "ERROR: found test info but there is a problem with the run info for " run-id)(exit 1)))
 	    ;;  (test-set-status! db run-id test-name state status itemdat)
 	    (set! self ; 
 		  (iup:dialog #:close_cb (lambda (a)(exit)) ; #:expand "YES"
 			      #:title testfullname
@@ -757,11 +757,11 @@
 									 (let* ((mtrx-rc  (conc lin ":" 6))
 										(fname    (iup:attribute obj mtrx-rc))
                                                                                 (stepname (iup:attribute obj (conc lin ":" 1)))                                                                                            (comment  (iup:attribute obj (conc lin ":" 7))))
                                                                            (case col
-                                                                             ((7) (print "Comment from step "stepname": "comment))
+                                                                             ((7) (debug:print 0 *default-log-port* "Comment from step "stepname": "comment))
                                                                              ((8) (ezsteps:spawn-run-from testdat stepname #t))
                                                                              ((9) (ezsteps:spawn-run-from testdat stepname #f))
                                                                              (else (view-a-log fname))))))))
 					 ;; (let loop ((count 0))
 					 ;;   (iup:attribute-set! steps-matrix "FITTOTEXT" (conc "L" count))

Index: dashboard.scm
--- dashboard.scm
+++ dashboard.scm
@@ -26,10 +26,11 @@
 (use canvas-draw)
 (import canvas-draw-iup)
 (use ducttape-lib)
 (use sqlite3 srfi-1 posix regex regex-case srfi-69 typed-records sparse-vectors) ;; defstruct
 (import (prefix sqlite3 sqlite3:))
+(import dbfile)
 (declare (uses common))
 (declare (uses margs))
 (declare (uses keys))
 (declare (uses items))
@@ -37,133 +38,74 @@
 (declare (uses configf))
 (declare (uses process))
 (declare (uses launch))
 (declare (uses runs))
 (declare (uses dashboard-tests))
-(declare (uses dashboard-guimonitor))
 (declare (uses tree))
 (declare (uses dcommon))
 (declare (uses dashboard-context-menu))
 (declare (uses vg))
 (declare (uses subrun))
-;; (declare (uses dashboard-main))
 (declare (uses mt))
+(declare (uses dbfile))        
 (include "common_records.scm")
 (include "db_records.scm")
 (include "run_records.scm")
 (include "task_records.scm")
 (include "megatest-version.scm")
 (include "megatest-fossil-hash.scm")
 (include "vg_records.scm")
+(dbfile:db-init-proc db:initialize-main-db)
 (define help (conc 
-	      "Megatest Dashboard, documentation at
-  version " megatest-version "
-  license GPL, Copyright (C) Matt Welland 2012-2017
+	      "Megatest Dashboard, documentation at version " megatest-version 
+              " license GPL, Copyright (C) Matt Welland 2012-2017
 Usage: dashboard [options]
   -h                    : this help
-  -test run-id,test-id  : control test identified by testid
+  -test run-id test-id  : open a test control panel on this test
   -skip-version-check   : skip the version check
-  -use-db-cache         : access database via cache 
   -rows R         : set number of rows
   -cols C         : set number of columns
+  -start-dir dir  : start dashboard in the given directory
+  -target target  : filter runs tab to given target.
+  -debug  n[,n]   : set debug level(s) e.g. -debug 4 or -debug 0,9
+  -repl           : Start a chicken scheme interpreter
-;;   -server host:port     : connect to host:port instead of db access
-;;   -xterm run-id,test-id : Start a new xterm with specified run-id and test-id
-;;   -guimonitor           : control panel for runs
 ;; process args
 (define remargs (args:get-args 
+                 ;; parameters (need arguments)
 		 (list  "-rows"
-			"-run"
-			"-test"
-                        "-xterm"
-			"-debug"
-			"-host" 
-			"-transport"
-                        "-start-dir"
-			) 
-		 (list  "-h"
-			"-use-server"
-			"-guimonitor"
-			"-main"
-			"-v"
-			"-q"
-			"-use-db-cache"
+			"-test" ;; given a run id and test id, open only a test control panel on that test..
+			"-debug"
+                        "-start-dir"
+                        "-target"
+			) 
+                 ;; switches (don't take arguments)
+		 (list  "-h"
-                        "-rh5.11" ;; fix to allow running on rh5.11
 			"-:p"     ;; ignore the built in chicken profiling switch
-;; check for MT_* environment variables and exit if found
-(if (not (args:get-arg "-test"))
-    (begin
-      (display "Checking for MT_ vars: ")
-      (for-each (lambda (var)
-		  (display " ")(display var)
-		  (if (get-environment-variable var)
-		      (begin
-			(print "ERROR: environment variable " var " is set in this terminal, this will cause you problems. Exiting now.")
-			(exit 1))))
-      (print ". Done. All ok.")))
-(if (not (null? remargs))
-    (begin
-      (print "Unrecognised arguments: " (string-intersperse remargs " "))
-      (exit)))
-(if (args:get-arg "-h")
-    (begin
-      (print help)
-      (exit)))
-(if (args:get-arg "-start-dir")
-    (if (directory-exists? (args:get-arg "-start-dir"))
-        (let ((fullpath (common:real-path (args:get-arg "-start-dir"))))
-          (setenv "PWD" fullpath)
-          (change-directory fullpath))
-	(begin
-	  (debug:print-error 0 *default-log-port* "non-existant start dir " (args:get-arg "-start-dir") " specified, exiting.")
- 	  (exit 1))))
-;; TODO: Move this inside (main)
-(if (not (launch:setup))
-    (begin
-      (print "Failed to find megatest.config, exiting") 
-      (exit 1)))
-;; deal with RH 5.11 gtk lib or iup lib missing detachbox feature
-;; first check for the switch
-(if (or (args:get-arg "-rh5.11")
-	(configf:lookup *configdat* "dashboard" "no-detachbox")
-        (not (file-exists? "/etc/os-release")))
-    (set! iup:detachbox iup:vbox))
-(if (not (common:on-homehost?))
-    (begin
-      (debug:print 0 *default-log-port* "WARNING: Current policy requires running dashboard on homehost: " (common:get-homehost))))
 ;; RA => Might require revert for filters 
 ;; create a watch dog to move changes from lt/.db/*.db to megatest.db
 ;;;(if (file-write-access? (conc *toppath* "/megatest.db"))
 ;;(debug:print-info 13 *default-log-port* "Before common:watchdog spawn")
-(thread-start! (make-thread common:watchdog "Watchdog thread"))
+;; (thread-start! (make-thread common:watchdog "Watchdog thread"))
 ;;(debug:print-info 13 *default-log-port* "After common:watchdog spawn")
 ;; (if (not (args:get-arg "-use-db-cache"))
 ;;     (begin
 ;;       (debug:print-info 0 *default-log-port* "Forcing db-cache mode due to read-only access to megatest.db")
 ;;       (hash-table-set! args:arg-hash "-use-db-cache" #t)));;;)
@@ -178,10 +120,11 @@
   uidat ;; needs to move to tabdat at some time
+  target
 (define (dboard:commondat-make)
    curr-tab-num:         0
@@ -189,10 +132,11 @@
    please-update:        #t
    update-mutex:         (make-mutex)
    updaters:             (make-hash-table)
    updating:             #f
    hide-not-hide-tabs:   #f
+   target:               ""
 ;; buttons color using image
@@ -260,19 +204,20 @@
 ;; gets and calls updater list based on curr-tab-num
 (define (dboard:common-run-curr-updaters commondat #!key (tab-num #f))
+  (sync-db-to-tmp (dboard:common-get-tabdat commondat tab-num: tab-num))
   (if (dboard:common-get-tabdat commondat tab-num: tab-num) ;; only update if there is a tabdat
       (let* ((tnum     (or tab-num (dboard:commondat-curr-tab-num commondat)))
 	     (updaters (hash-table-ref/default (dboard:commondat-updaters commondat)
 	(debug:print 4 *default-log-port* "Found these updaters: " updaters " for tab-num: " tnum)
 	(for-each ;; perform the function calls for the complete updaters list
 	 (lambda (updater)
-	   ;; (debug:print 3 *default-log-port* "Running " updater)
+          ;; (debug:print 3 *default-log-port* "Running " updater)
 ;; if tab-num passed in then use it, otherwise look in commondat at curr-tab-num
 ;; adds the updater passed in the updaters list at that hashkey
@@ -428,14 +373,14 @@
 (define (dboard:setup-tabdat tabdat)
   (dboard:tabdat-dbdir-set! tabdat (db:dbfile-path)) ;; (conc (configf:lookup *configdat* "setup" "linktree") "/.db"))
   (dboard:tabdat-dbfpath-set! tabdat (db:dbfile-path))
   (dboard:tabdat-monitor-db-path-set! tabdat (conc (dboard:tabdat-dbdir tabdat) "/monitor.db"))
   ;; HACK ALERT: this is a hack, please fix.
   (dboard:tabdat-ro-set! tabdat (not (file-read-access? (dboard:tabdat-dbfpath tabdat))))
   (dboard:tabdat-keys-set! tabdat (rmt:get-keys))
   (dboard:tabdat-dbkeys-set! tabdat (append (dboard:tabdat-keys tabdat) (list "runname")))
   (dboard:tabdat-tot-runs-set! tabdat (rmt:get-num-runs "%"))
@@ -473,11 +418,11 @@
   hierdat        ;; put hierarchial sorted list here
   tests          ;; hash of id => testdat
   ((tests-by-name (make-hash-table)) : hash-table) ;; hash of testfullname => testdat
   ((last-update   0)                 : number)    ;; last query to db got records from before last-update
-  ((last-db-time  0)                 : number)    ;; last timestamp on megatest.db
+  ((last-db-time  0)                 : number)    ;; last timestamp on main.db
   ((data-changed  #f)                : boolean)   
   ((run-data-offset  0)              : number)      ;; get only 100 items per call, set back to zero when received less than 100 items
   (db-path #f))
 ;; for the new runs view lets build up a few new record types and then consolidate later
@@ -719,11 +664,11 @@
 	 (last-db-time (if do-not-use-db-file-timestamps
 			   (dboard:rundat-last-db-time run-dat)))
 	 (db-path      (or (dboard:rundat-db-path run-dat)
 			   (let* ((db-dir (common:get-db-tmp-area))
-				  (db-pth (conc db-dir "/megatest.db")))
+				  (db-pth (conc db-dir "/.megatest/main.db")))
 			     (dboard:rundat-db-path-set! run-dat db-pth)
 	 (db-mod-time  (common:lazy-sqlite-db-modification-time db-path))
 	 (db-modified  (>= db-mod-time last-db-time))
 	 (multi-get    (> (dboard:rundat-run-data-offset run-dat) 0))  ;; multi-get in progress
@@ -734,11 +679,11 @@
 					      (dboard:rundat-run-data-offset run-dat) ;; query offset
 					      (dboard:tabdat-hide-not-hide tabdat) ;; no-in
 					      sort-by                              ;; sort-by
 					      sort-order                           ;; sort-order
-					      #f ;; 'shortlist                     ;; qrytype
+					      'shortlist                           ;; qrytype (was #f)
 					      last-update                          ;; last-update
 					      *dashboard-mode*)                    ;; use dashboard mode
 	 (use-new    (dboard:tabdat-hide-not-hide tabdat))
 	 (tests-ht   (if (dboard:tabdat-filters-changed tabdat)
@@ -746,15 +691,15 @@
 			   (dboard:rundat-tests-set! run-dat ht)
 			 (dboard:rundat-tests run-dat)))
 	 (got-all      (< (length tmptests) num-to-get))               ;; got all for this round  
+    ;; (debug:print-info 0 *default-log-port* "got-all="got-all", (hash-table-size tests-ht)="(hash-table-size tests-ht))
     ;; if we saw the db modified, reset it (the signal has already been used)
     (if (and got-all ;; (not multi-get)
-	(dboard:rundat-last-db-time-set!    run-dat (- start-time 2)))
+       (dboard:rundat-last-db-time-set!    run-dat (- start-time 2)))
     ;; to limit the amount of data transferred each cycle use limit of num-to-get and offset
     ;; DO NOT bump time indexes last-update and last-db-time until all the first pass of the
     ;; data has been read
     ;; set last-update to 0 if still getting data incrementally ;; NO NEED, handled above
@@ -865,11 +810,11 @@
 		  (hash-table-delete! (dboard:tabdat-allruns-by-id tabdat) run-id)
 		  (hash-table-set!    (dboard:tabdat-allruns-by-id tabdat) run-id run-struct))
 	      (if (or (null? tal)
 		      (> elapsed-time 2)) ;; stop loading data after 5 seconds, on the next call more data *should* be loaded since get-tests-for-run uses last update
-		    (if (> elapsed-time 2)(print "WARNING: timed out in update-testdat " elapsed-time "s"))
+		    (if (> elapsed-time 2)(debug:print 0 *default-log-port*  "WARNING: timed out in update-testdat " elapsed-time "s"))
 		    (dboard:tabdat-allruns-set! tabdat new-res)
 		  (if (> (dboard:rundat-run-data-offset run-struct) 0)
 		      (loop run tal new-res newmaxtests) ;; not done getting data for this run
 		      (loop (car tal)(cdr tal) new-res newmaxtests)))))))
@@ -1045,65 +990,86 @@
 	      (iup:attribute-set! lbl "TITLE" munged-val)))
 	(iup:attribute-set! lbl "FGCOLOR" (if (hash-table-ref/default *collapsed* newval #f) "0 112 112" "0 0 0"))
 	(if (< i maxn)
 	    (loop (+ i 1)))))))
 (define (get-itemized-tests test-dats)
   (let ((tnames '()))
     (for-each (lambda (tdat)
 		(let ((tname (vector-ref tdat 0))  ;; (db:test-get-testname tdat))
 		      (ipath (vector-ref tdat 1))) ;; (db:test-get-item-path tdat)))
 		  (if (not (equal? ipath ""))
 		      (if (and (list? tnames)
 			       (string? tname)
 			       (not (member tname tnames)))
-			  (set! tnames (append tnames (list tname)))))))
+			  (set! tnames (cons tname tnames))))))
-    tnames))
+    (reverse tnames)))
 ;; Bubble up the top tests to above the items, collect the items underneath
 ;; all while preserving the sort order from the SQL query as best as possible.
 (define (bubble-up tabdat test-dats #!key (priority 'itempath))
   (if (null? test-dats)
 	(let* ((tnames   '())                ;; list of names used to reserve order
-	       (tests    (make-hash-table))  ;; hash of lists, used to build as we go
+	       (tests-ht  (make-hash-table))  ;; hash of lists, used to build as we go
 	       (itemized (get-itemized-tests test-dats)))
-	  (for-each 
+	  #;(for-each 
 	   (lambda (testdat)
 	     (let* ((tname (vector-ref testdat 0))  ;; db:test-get-testname testdat))
 		    (ipath (vector-ref testdat 1))) ;; db:test-get-item-path testdat)))
-	       ;;   (seen  (hash-table-ref/default tests tname #f)))
+	       ;;   (seen  (hash-table-ref/default tests-th tname #f)))
 	       (if (not (member tname tnames))
 		   (if (or (and (eq? priority 'itempath)
 				(not (equal? ipath "")))
 			   (and (eq? priority 'testname)
 				(equal? ipath ""))
 			   (not (member tname itemized)))
 		       (set! tnames (append tnames (list tname)))))
 	       (if (equal? ipath "")
 		   ;; This a top level, prepend it
-		   (hash-table-set! tests tname (cons testdat (hash-table-ref/default tests tname '())))
+		   (hash-table-set! tests-ht tname (cons testdat (hash-table-ref/default tests-ht tname '())))
 		   ;; This is item, append it
-		   (hash-table-set! tests tname (append (hash-table-ref/default tests tname '())(list testdat))))))
+		   (hash-table-set! tests-ht tname (append (hash-table-ref/default tests-ht tname '())(list testdat))))))
+	  test-dats)
+	  ;; 1. put all test/items into lists in tests-ht
+	  (for-each
+	   (lambda (testdat)
+	     (let* ((tname (vector-ref testdat 0))  ;; db:test-get-testname testdat))
+		    (ipath (vector-ref testdat 1))) ;; db:test-get-item-path testdat)))
+	       ;;   (seen  (hash-table-ref/default tests-ht tname #f)))
+	       (if (not (member tname tnames))
+		   (if (or (and (eq? priority 'itempath)
+				(not (equal? ipath "")))
+			   (and (eq? priority 'testname)
+				(equal? ipath ""))
+			   (not (member tname itemized)))
+		       (set! tnames (append tnames (list tname)))))
+	       (hash-table-set! tests-ht tname (cons testdat (hash-table-ref/default tests-ht tname '())))))
+	  ;; now bubble up the non-item test in itemized tests
+	  (hash-table-for-each
+	   tests-ht
+	   (lambda (k v)
+	     (if (> (length v) 1) ;; must be itemized, push the no-item to the front
+		 (hash-table-set! tests-ht k (sort v (lambda (a b)(not (equal? (vector-ref b 1) ""))))))))
 	  ;; Set all tests with items 
 	  (dboard:tabdat-item-test-names-set! tabdat (append (if (null? tnames)
 							      (filter (lambda (tname)
-									(let ((tlst (hash-table-ref tests tname)))
+									(let ((tlst (hash-table-ref tests-ht tname)))
 									  (and (list tlst)
 									       (> (length tlst) 1))))
 							  (dboard:tabdat-item-test-names tabdat)))
 	  (let loop ((hed (car tnames))
 		     (tal (cdr tnames))
 		     (res '()))
-	    (let ((newres (append res (hash-table-ref tests hed))))
+	    (let ((newres (append res (hash-table-ref tests-ht hed))))
 	      (if (null? tal)
 		  (loop (car tal)(cdr tal) newres))))))))
 ;; optimized to get runs constrained by what is visible on the screen
@@ -1671,11 +1637,11 @@
 			      ;;  NAMEid from IupTree to avoid
 			      ;;  conflict with the common attribute
 			      ;;  NAME. Use the TITLEid attribute."
            #:expand "YES"
            #:addexpanded "YES"
-           #:size "10x"
+           ;; #:size "10x"
            (lambda (obj id state)
               (lambda ()
                 (let* ((run-path (tree:node->path obj id))
@@ -1757,11 +1723,11 @@
 	#:expand "HORIZONTAL"
 	#:value 1
 	#:action (lambda (obj tstate)
 		    (lambda ()
-		      (print "tstate: " tstate)
+		      ;; (print "tstate: " tstate)
 		      (if (eq? tstate 0)
 			  (dboard:tabdat-compact-layout-set! tabdat #f)
 			  (dboard:tabdat-compact-layout-set! tabdat #t))
 		      (dboard:tabdat-last-filter-str-set! tabdat "")
@@ -1996,11 +1962,11 @@
          (dashboard:run-id->tests-mindat dest-run-id tabdat runs-hash)
          hide-clean: hide-clean)
-(define (dashboard:get-runs-hash tabdat)
+(define (dashboard:get-runs-hash tabdat) 
   (let* ((access-mode       (dboard:tabdat-access-mode tabdat))
          (last-runs-update  0);;(dboard:tabdat-last-runs-update tabdat))
 	 (runs-dat     (rmt:get-runs-by-patt (dboard:tabdat-keys tabdat) "%" #f #f #f #f last-runs-update))
 	 (runs-header  (vector-ref runs-dat 0)) ;; 0 is header, 1 is list of records
          (runs         (vector-ref runs-dat 1))
@@ -2194,11 +2160,11 @@
 	   (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn) ", exn=" exn)
 	   (debug:print 0 *default-log-port* "ERROR: failed call procedure " viewgen
 			", with; tab-num=" tab-num ", view-name=" view-name
 			", and views-cfgdat and megatest configdat as parameters. To debug try loading in the repl: megatest -repl")
 	   (set! success #f))
-	 (print "Adding tab " view-name " with proc " viewgen)
+	 (debug:print 0 *default-log-port* "Adding tab " view-name " with proc " viewgen)
 	 ;; (iup:child-add! tabs
 	 (set! result-child 
 	       ((eval (string->symbol viewgen)) commondat tabs tab-num view-name views-cfgdat *configdat*))))
     ;; and finally set the updater
     (if success
@@ -2316,11 +2282,11 @@
 (define (dashboard:runs-summary commondat tabdat #!key (tab-num #f))
   (let* ((update-mutex (dboard:commondat-update-mutex commondat))
 	 (tb      (iup:treebox
 		   #:value 0
 		   ;;#:name "Runs"
-                   #:title "Runs" ;;  was #:name -- iup 3.19 changed this... "Changed: [DEPRECATED REMOVED] removed the old attribute NAMEid from IupTree to avoid conflict with the common attribute NAME. Use the TITLEid attribute."
+                   #:title "Runs"
 		   #:expand "YES"
 		   #:addexpanded "YES"
 		   (lambda (obj id state)
@@ -2783,11 +2749,12 @@
     (dboard:runs-tree-new-browser commondat rdat)
     (dboard:runs-new-matrix commondat rdat)
 (define (make-dashboard-buttons commondat) ;;  runs-sum-dat new-view-dat)
-  (let* ((stats-dat       (dboard:tabdat-make-data))
+  (let* (
+         (stats-dat       (dboard:tabdat-make-data))
 	 (runs-dat        (dboard:tabdat-make-data))
 	 (runs2-dat       (make-dboard:rdat)) ;; (dboard:tabdat-make-data))
 	 (onerun-dat      (dboard:tabdat-make-data)) ;; name for run-summary structure 
 	 (runcontrols-dat (dboard:tabdat-make-data))
 	 (runtimes-dat    (dboard:tabdat-make-data))
@@ -2809,11 +2776,13 @@
 	 (btn-fontsz      (dboard:tabdat-runs-btn-fontsz runs-dat))
 	 (cell-width      (dboard:tabdat-runs-cell-width runs-dat))
 	 (use-bgcolor     (equal? (configf:lookup *configdat* "dashboard" "use-bgcolor") "yes")))
     ;; controls (along bottom)
     ;; (set! controls (dboard:make-controls commondat runs-dat))
     ;; create the left most column for the run key names and the test names 
     (set! lftlst
 	  (list (iup:hbox
 		 (iup:label) ;; (iup:valuator)
 		 (apply iup:vbox 
@@ -2969,14 +2938,14 @@
       #:title (conc "Megatest dashboard " (current-user-name) ":" *toppath*)
       #:menu (dcommon:main-menu)
       (let* ((runs-view (iup:vbox
 			  #:orientation "VERTICAL" ;; "HORIZONTAL"
-			  #:value 100
+			  #:value 250
 			  (dboard:runs-tree-browser commondat runs-dat)
-			   #:value 100
+			   #:value 200
 			   ;; left most block, including row names
 			   (apply iup:vbox lftlst)
 			   ;; right hand block, including cells
 			    #:expand "YES"
@@ -3027,20 +2996,22 @@
 						 (let* ((tab-num (dboard:commondat-curr-tab-num commondat))
 							(tabdat  (dboard:common-get-tabdat commondat tab-num: tab-num)))
 						   (dboard:commondat-please-update-set! commondat #t)
 						   (dboard:tabdat-layout-update-ok-set! tabdat #t)))
-			  (dashboard:summary commondat stats-dat tab-num: 0)
+			  (dashboard:summary commondat stats-dat tab-num: 1)
 			  ;; (make-runs-view commondat runs2-dat 2)
 			  (dashboard:runs-summary commondat onerun-dat tab-num: 2)
 			  (dashboard:run-controls commondat runcontrols-dat tab-num: 3)
 			  (dashboard:run-times commondat runtimes-dat tab-num: 4)
-			  additional-views)))
+			  additional-views))
+             (target-run (dboard:commondat-target commondat))
+             )
 	;; (set! (iup:callback tabs tabchange-cb:) (lambda (a b c)(print "SWITCHED TO TAB: " a " " b " " c)))
-	(iup:attribute-set! tabs "TABTITLE0" "Summary")
-	(iup:attribute-set! tabs "TABTITLE1" "Runs")
+	(iup:attribute-set! tabs "TABTITLE0" "Runs")
+	(iup:attribute-set! tabs "TABTITLE1" "Summary")
 	;; (iup:attribute-set! tabs "TABTITLE2" "Runs2")
 	(iup:attribute-set! tabs "TABTITLE2" "Run Summary")
 	(iup:attribute-set! tabs "TABTITLE3" "Run Control")
 	(iup:attribute-set! tabs "TABTITLE4" "Run Times")
 	;; (iup:attribute-set! tabs "TABTITLE3" "New View")
@@ -3054,12 +3025,18 @@
 	(iup:attribute-set! tabs "BGCOLOR" "190 190 190")
 	;; make the iup tabs object available (for changing color for example)
 	(dboard:commondat-hide-not-hide-tabs-set! commondat tabs)
 	;; now set up the tabdat lookup
-	(dboard:common-set-tabdat! commondat 0 stats-dat)
-	(dboard:common-set-tabdat! commondat 1 runs-dat)
+	;; (dboard:common-set-tabdat! commondat 0 stats-dat)
+        (if target-run
+          (begin
+            (dboard:tabdat-target-set! runs-dat (string-split target-run "/"))
+          )
+        )
+	(dboard:common-set-tabdat! commondat 0 runs-dat)
 	;;(dboard:common-set-tabdat! commondat 2 runs2-dat)
 	(dboard:common-set-tabdat! commondat 2 onerun-dat)
 	(dboard:common-set-tabdat! commondat 3 runcontrols-dat)
 	(dboard:common-set-tabdat! commondat 4 runtimes-dat)
@@ -3298,11 +3275,10 @@
 	       (filtrstr  (conc targpatt "/" runpatt "/" testpatt)))
 	  ;; (print "targpatt: " targpatt " runpatt: " runpatt " testpatt: " testpatt)
 	  (if (not (equal? (dboard:tabdat-last-filter-str tabdat) filtrstr))
 	      (let ((dwg (dboard:tabdat-drawing tabdat)))
-		(print "reseting drawing")
 		(dboard:tabdat-layout-update-ok-set! tabdat #f)
 		(vg:drawing-libs-set! dwg (make-hash-table))
 		(vg:drawing-insts-set! dwg (make-hash-table))
 		(vg:drawing-cache-set! dwg '())
 		(dboard:tabdat-allruns-by-id-set! tabdat (make-hash-table))
@@ -3360,11 +3336,11 @@
 			  (debug:print 0 *default-log-port* "ERROR: I only know sqlite3 databases for now: " dbstr)
     (if (and dbpth (file-read-access? dbpth))
 	(let ((db (sqlite3:open-database dbpth))) ;; (open-database dbpth)))
-	  (sqlite3:set-busy-handler! db (make-busy-timeout 10000))
+	  (sqlite3:set-busy-handler! db (sqlite3:make-busy-timeout 10000))
 ;; sqlite3:path tablename timefieldname varfieldname field1 field2 ...
@@ -3528,11 +3504,11 @@
                                               ;;(vg:make-rect-obj last-tval lly curr-tval last-yval ;; (- stval 2) lly (+ stval 2)(+ lly (* yval yscale))
                                               (vg:make-line-obj curr-tval last-yval curr-tval next-yval
                                                                 line-color: graph-color)))         
-                                           (print "ERROR: curr-tval is not > last-tval; curr-tval " curr-tval ", last-tval " last-tval))))
+                                           (debug:print 0 *default-log-port* "ERROR: curr-tval is not > last-tval; curr-tval " curr-tval ", last-tval " last-tval))))
                                #f ;; (vector tstart minval minval)
                               )))))) ;; for each data point in the series
 	      (hash-table-keys alldat)))))
@@ -3689,11 +3665,11 @@
 						(cons obj test-objs))))))
 				  ;; (print "event_time: " (db:test-get-event_time   testdat) " mapped event_time: " event-time)
 				  ;; (print "run-duration: "  (db:test-get-run_duration testdat) " mapped run_duration: " run-duration)
 				  (if (> item-num 50)
 				      (if (eq? 0 (modulo item-num 50))
-					  (print "processing " run-num " of " num-runs " runs " item-num " of " num-items " of test " test-name ", " test-num " of " num-tests " tests")))
+					  (debug:print 0 *default-log-port* "processing " run-num " of " num-runs " runs " item-num " of " num-items " of test " test-name ", " test-num " of " num-tests " tests")))
 				  ;; (print "test-name: " test-name " event-time: " event-time " run-duration: " run-duration)
 				  (let ((newdoneruns (cons rundat doneruns)))
 				    (if (null? tidstal)
 					(if iterated
 					    (let* ((xtents (vg:get-extents-for-objs drawing new-test-objs))
@@ -3714,11 +3690,11 @@
 					    (escapeloop #t) ;; (dboard:tabdat-layout-update-ok tabdat)
 			      ;; If it is an iterated test put box around it now.
 			      (if (not (null? tests-tal))
 				  (if #f ;; (> (- (current-seconds) update-start-time) 5)
-				      (print "drawing runs taking too long")
+				      (debug:print 0 *default-log-port* "drawing runs taking too long")
 				      (if (dboard:tabdat-layout-update-ok tabdat)
 					  (testsloop  (car tests-tal)(cdr tests-tal)(+ test-num 1))
 					  (escapeloop #t) ;; (dboard:tabdat-layout-update-ok tabdat)
 			  ;; placeholder box
@@ -3754,11 +3730,11 @@
 				    (dboard:rundat-data-changed-set! rundat #f) 
 				    (dboard:tabdat-not-done-runs-set! tabdat '())
 				    (dboard:tabdat-done-runs-set! tabdat allruns))
 				  (if #f ;; (> (- (current-seconds) update-start-time) 5)
-					(print "drawing runs taking too long....  have " (length runtal) " remaining")
+					(debug:print 0 *default-log-port* "drawing runs taking too long....  have " (length runtal) " remaining")
 					;; (dboard:tabdat-done-runs-set! tabdat newdoneruns) ;; taking too long? stop here!
 					;; (time (vg:draw (dboard:tabdat-drawing tabdat) #t))
 					(dboard:tabdat-not-done-runs-set! tabdat runtal))
 					(if (dboard:tabdat-layout-update-ok tabdat)
@@ -3811,17 +3787,45 @@
 ;; The heavy lifting starts here
 (define (main)
-  (let ((mtdb-path (conc *toppath* "/megatest.db"))) ;; 
+  ;; (print "Starting dashboard main")
+  (let* ((mtdb-path (conc *toppath* "/.megatest/main.db"))
+         (target (args:get-arg "-target"))
+         (commondat       (dboard:commondat-make)))
+    (if target
+        (begin
+          (args:remove-arg-from-ht "-target")
+          (dboard:commondat-target-set! commondat target)
+        )
+    )
+    (if (not (launch:setup))
+        (begin
+          (debug:print 0 *default-log-port* "Failed to find megatest.config, exiting") 
+          (exit 1)
+        )
+    )
+    (if (not (common:on-homehost?))
+    (begin
+      (debug:print 0 *default-log-port* "WARNING: You are starting the dashboard on a machine that is not the homehost:" (common:get-homehost))
+      (debug:print 0 *default-log-port* "It will be slower.")
+      ))
     (if (and (common:file-exists? mtdb-path)
 	     (file-write-access? mtdb-path))
 	(if (not (args:get-arg "-skip-version-check"))
-    (let* ((commondat       (dboard:commondat-make)))
+    (let* ()
       ;; Move this stuff to db.scm? I'm not sure that is the right thing to do...
        ((args:get-arg "-test") ;; run-id,test-id
 	(let* ((dat     (let ((d (map string->number (string-split (args:get-arg "-test") ",")))) 
 			  (if (> (length d) 1)
@@ -3834,26 +3838,24 @@
 		   (>= test-id 0))
 	      (dashboard-tests:examine-test run-id test-id)
 		(debug:print 3 *default-log-port* "INFO: tried to open test with invalid run-id,test-id. " (args:get-arg "-test"))
 		(exit 1)))))
-       ;; ((args:get-arg "-guimonitor")
-       ;;  (gui-monitor (dboard:tabdat-dblocal tabdat)))
 	(dboard:commondat-uidat-set! commondat (make-dashboard-buttons commondat))
 	(dboard:commondat-curr-tab-num-set! commondat 0)
 	 (lambda ()
-	   (dashboard:runs-tab-updater commondat 1))
-	 tab-num: 1)
+	   (dashboard:runs-tab-updater commondat 0))
+	 tab-num: 0)
         ;; may not want this alive (manually merged it from v1.66)
-	(dboard:commondat-add-updater 
-	 commondat 
-	 (lambda ()
-	   (dashboard:runs-tab-updater commondat 1))
-	 tab-num: 2)
+	;; (dboard:commondat-add-updater 
+	;;  commondat 
+	;;  (lambda ()
+	   ;;  (dashboard:runs-tab-updater commondat 1))
+	;; tab-num: 2)
 	(iup:callback-set! *tim*
 			   (lambda (time-obj)
 			     (let ((update-is-running #f))
 			     (mutex-lock! (dboard:commondat-update-mutex commondat))
@@ -3867,23 +3869,102 @@
 			     (mutex-lock! (dboard:commondat-update-mutex commondat))
 			     (dboard:commondat-updating-set! commondat #f)
 			     (mutex-unlock! (dboard:commondat-update-mutex commondat)))
+      ;; (debug:print 0 *default-log-port* "Starting updaters")
       (let ((th1 (make-thread (lambda ()
 				(thread-sleep! 1)
 				(dboard:common-run-curr-updaters commondat 0) ;; force update of summary tab 
 				) "update buttons once"))
 	    (th2 (make-thread iup:main-loop "Main loop")))
+        ;; (print "Starting main loop")
 	(thread-start! th2)
-	(thread-join! th2)))))
+	(thread-join! th2)
+      )
+    )
+  )
+(define last-copy-time 0)
+;; Sync to tmp only if in read-only mode.
+(define (sync-db-to-tmp tabdat)
+  (let* ((db-file "./.megatest/main.db"))
+    (if (and (not (file-write-access? db-file)) ( > (current-seconds) (+ last-copy-time 5)))
+      (begin
+        (db:multi-db-sync (db:setup #f) 'old2new)
+        (set! last-copy-time (current-seconds))
+      )
+    )
+  )
+;; ########################### top level code ########################
+;; check for MT_* environment variables and exit if found
+(if (not (args:get-arg "-test"))
+    (begin
+      (for-each (lambda (var)
+		  ;; (display " ")(display var)
+		  (if (get-environment-variable var)
+		      (begin
+			(debug:print 0 *default-log-port* "ERROR: environment variable " var " is set in this terminal, this will cause you problems. Exiting now.")
+			(exit 1))))
+    )
+(setenv "MT_RUN_AREA_HOME" (get-environment-variable "PWD"))
+(if (not (null? remargs))
+  (if remargs
+    (begin
+      (debug:print 0 *default-log-port* "Unrecognised arguments: " (string-intersperse remargs " "))
+      (exit)
+    )
+    (begin
+      (print help)
+      (exit)
+    )
+  )
+(if (args:get-arg "-h")
+    (begin
+      (print help)
+      (exit)))
+(if (args:get-arg "-start-dir")
+    (if (directory-exists? (args:get-arg "-start-dir"))
+        (let ((fullpath (common:real-path (args:get-arg "-start-dir"))))
+          (setenv "PWD" fullpath)
+          (change-directory fullpath))
+	(begin
+	  (debug:print-error 0 *default-log-port* "non-existant start dir " (args:get-arg "-start-dir") " specified, exiting.")
+ 	  (exit 1))))
+;; deal with RH 5.11 gtk lib or iup lib missing detachbox feature
+;; first check for the switch
+(if (or 
+	(configf:lookup *configdat* "dashboard" "no-detachbox")
+        (not (file-exists? "/etc/os-release")))
+    (set! iup:detachbox iup:vbox))
 ;; ease debugging by loading ~/.dashboardrc
 (let ((debugcontrolf (conc (get-environment-variable "HOME") "/.dashboardrc")))
   (if (common:file-exists? debugcontrolf)
       (load debugcontrolf)))
 (if (args:get-arg "-repl")

Index: db.scm
--- db.scm
+++ db.scm
@@ -22,17 +22,37 @@
 ;; Database access
 ;; dbstruct vector containing all the relevant dbs like main.db, megatest.db, run.db etc
-(use (srfi 18) extras tcp stack)
-(use sqlite3 srfi-1 posix regex regex-case srfi-69 csv-xml s11n md5 message-digest base64 format dot-locking z3 typed-records matchable)
-(import (prefix sqlite3 sqlite3:))
-(import (prefix base64 base64:))
+(use (srfi 18)
+     extras
+     tcp
+     stack
+     (prefix sqlite3 sqlite3:)
+     srfi-1
+     posix
+     regex
+     regex-case
+     srfi-69
+     csv-xml
+     s11n
+     md5
+     message-digest
+     (prefix base64 base64:)
+     format
+     dot-locking
+     z3
+     typed-records
+     matchable
+     files)
 (declare (unit db))
 (declare (uses common))
+(declare (uses dbmod))
+;; (declare (uses debugprint))
+(declare (uses dbfile))
 (declare (uses keys))
 (declare (uses ods))
 (declare (uses client))
 (declare (uses mt))
@@ -42,43 +62,21 @@
 (include "run_records.scm")
 (define *number-of-writes* 0)
 (define *number-non-write-queries* 0)
-;;  R E C O R D S
-;; each db entry is a pair ( db . dbfilepath )
-;; I propose this record evolves into the area record
-(defstruct dbr:dbstruct 
-  (tmpdb       #f)
-  (dbstack     #f) ;; stack for tmp db handles, do not initialize with a stack
-  (mtdb        #f)
-  (refndb      #f)
-  (homehost    #f) ;; not used yet
-  (on-homehost #f) ;; not used yet
-  (read-only   #f)
-  (stmt-cache  (make-hash-table))
-  )                ;; goal is to converge on one struct for an area but for now it is too confusing
+(import dbmod)
+(import dbfile)
 ;; record for keeping state,status and count for doing roll-ups in
 ;; iterated tests
 (defstruct dbr:counts
   (state #f)
   (status #f)
   (count  0)) 
-;; alist-of-alists
-;; (define (db:aa-set! dat key1 key2 val)
-;;   (let loop ((
 ;; hash of hashs
@@ -94,13 +92,14 @@
 (define (db:hoh-get dat key1 key2)
   (let* ((subhash (hash-table-ref/default dat key1 #f)))
     (and subhash
 	 (hash-table-ref/default subhash key2 #f))))
-(define (db:get-cache-stmth dbstruct db stmt)
-  (let* ((stmt-cache        (dbr:dbstruct-stmt-cache dbstruct))
-	 (stmth             (db:hoh-get stmt-cache db stmt)))
+(define (db:get-cache-stmth dbdat run-id db stmt)
+  (let* (;; (dbdat       (dbfile:get-dbdat dbstruct run-id))
+	 (stmt-cache  (dbr:dbdat-stmt-cache dbdat))
+	 (stmth       (db:hoh-get stmt-cache db stmt)))
     (or stmth
 	(let* ((newstmth (sqlite3:prepare db stmt)))
 	  (db:hoh-set! stmt-cache db stmt newstmth)
@@ -127,37 +126,63 @@
 	   (debug:print-error 0 *default-log-port* " query " stmt " failed, params: " params ", error: " ((condition-property-accessor 'exn 'message) exn) ", exn=" exn)
 	   (print-call-chain (current-error-port))
    (apply sqlite3:first-result db stmt params)))
+(define (db:setup do-sync)
+  (assert *toppath* "FATAL: db:setup called before launch:setup has been run.")
+  (let* ((tmpdir (common:get-db-tmp-area)))
+    (if (not *dbstruct-dbs*)
+	(dbfile:setup do-sync *toppath* tmpdir)
+	*dbstruct-dbs*)))
+;; looks up subdb and returns it, if not found then set up
+;; and then return it.
+#;(define (db:get-db dbstruct run-id)
+  (let* ((res (dbfile:get-subdb dbstruct run-id)))
+    (if res
+	res
+	(let* ((newsubdb (make-dbr:subdb)))
+	  (dbfile:set-subdb dbstruct run-id newsubdb)
+	  (db:open-db dbstruct run-id areapath: (dbr:dbstruct-areapath dbstruct) do-sync: #t)
+	  newsubdb))))
 ;; Get/open a database
 ;;    if run-id => get run specific db
 ;;    if #f     => get main db
+;;    if run-id is a string treat it as a filename
 ;;    if db already open - return inmem
 ;;    if db not open, open inmem, rundb and sync then return inmem
 ;;    inuse gets set automatically for rundb's
-(define (db:get-db dbstruct) ;;  run-id) 
-  (if (stack? (dbr:dbstruct-dbstack dbstruct))
-      (if (stack-empty? (dbr:dbstruct-dbstack dbstruct))
-          (let ((newdb (db:open-megatest-db path: (db:dbfile-path))))
-            ;; (stack-push! (dbr:dbstruct-dbstack dbstruct) newdb)
-            newdb)
-          (stack-pop! (dbr:dbstruct-dbstack dbstruct)))
-      (db:open-db dbstruct)))
-;; ;; legacy handling of structure for managing db's. Refactor this into dbr:?
-(define (db:dbdat-get-db dbdat)
-  (if (pair? dbdat)
-      (car dbdat)
-      dbdat))
-(define (db:dbdat-get-path dbdat)
-  (if (pair? dbdat)
-      (cdr dbdat)
-      #f))
+;; (define db:get-db db:get-subdb)
+;; (define (db:get-db subdb #;dbstruct run-id) ;; RENAME TO db:get-dbh
+;;   ;; (let* ((subdb (dbfile:get-subdb dbstruct run-id)))
+;;     (if (stack? (dbr:subdb-dbstack subdb))
+;; 	(if (stack-empty? (dbr:subdb-dbstack subdb))
+;; 	    (let* ((dbname (db:run-id->dbname run-id))
+;; 		   (newdb  (db:open-megatest-db path: (db:dbfile-path)
+;; 						name: dbname)))
+;; 	      ;; NOTE: pushing on the stack only happens AFTER the handle has been used
+;; 	      ;; (stack-push! (dbr:dbstruct-dbstack dbstruct) newdb)
+;; 	      newdb)
+;;           (stack-pop! (dbr:subdb-dbstack subdb)))
+;; 	(db:open-db subdb run-id))) ;; )
+#;(define (db:get-db dbstruct run-id) 
+   (let* ((subdb (dbfile:get-subdb dbstruct run-id))
+        (dbdat (dbfile:get-dbdat dbstruct run-id)))
+        (if (dbr:dbdat? dbdat)
+          dbdat
+          (dbfile:open-db *dbstruct-dbs* #f db:initialize-main-db)
+        )
+   )
 (define-inline (db:generic-error-printout exn . message)
   (print-call-chain (current-error-port))
   (apply debug:print-error 0 *default-log-port* message)
   (debug:print-error 0 *default-log-port* "   params: " params
@@ -164,71 +189,10 @@
 		     ", error: "     ((condition-property-accessor 'exn 'message)   exn)
 		     ", arguments: " ((condition-property-accessor 'exn 'arguments) exn)
 		     ", location: "  ((condition-property-accessor 'exn 'location)  exn)
-;; (db:with-db dbstruct run-id sqlite3:exec "select blah fgrom blaz;")
-;; r/w is a flag to indicate if the db is modified by this query #t = yes, #f = no
-(define (db:with-db dbstruct run-id r/w proc . params)
-  (let* ((have-struct (dbr:dbstruct? dbstruct))
-         (dbdat     (if have-struct 
-			(db:get-db dbstruct)
-			#f))
-	 (db        (if have-struct
-			(db:dbdat-get-db dbdat)
-			dbstruct))
-	 (fname     (db:dbdat-get-path dbdat))
-	 (use-mutex (> *api-process-request-count* 25))) ;; was 25
-    (if (and use-mutex
-	     (common:low-noise-print 120 "over-50-parallel-api-requests"))
-	(debug:print-info 0 *default-log-port* *api-process-request-count* " parallel api requests being processed in process " (current-process-id) ", throttling access"))
-    (if (common:low-noise-print 600 (conc "parallel-api-requests" *max-api-process-requests*))
-	(debug:print-info 2 *default-log-port* "Parallel api request count: " *api-process-request-count* " max parallel requests: " *max-api-process-requests*))
-    (condition-case
-     (begin
-       (if use-mutex (mutex-lock! *db-with-db-mutex*))
-       (let ((res (apply proc db params)))
-	 (if use-mutex (mutex-unlock! *db-with-db-mutex*))
-	 ;; (if (vector? dbstruct)(db:done-with dbstruct run-id r/w))
-	 (if dbdat (stack-push! (dbr:dbstruct-dbstack dbstruct) dbdat))
-	 res))
-     (exn (io-error)
-	  (db:generic-error-printout exn "ERROR: i/o error with " fname ". Check permissions, disk space etc. and try again."))
-     (exn (corrupt)
-	  (db:generic-error-printout exn "ERROR: database " fname " is corrupt. Repair it to proceed."))
-     (exn (busy)
-	  (db:generic-error-printout exn "ERROR: database " fname
-				     " is locked. Try copying to another location, remove original and copy back."))
-     (exn (permission)(db:generic-error-printout exn "ERROR: database " fname " has some permissions problem."))
-     (exn ()
-	  (db:generic-error-printout exn "ERROR: Unknown error with database " fname " message: "
-		       ((condition-property-accessor 'exn 'message) exn))))))
-;; K E E P   F I L E D B   I N   dbstruct
-;; (define (db:get-filedb dbstruct run-id)
-;;   (let ((db (vector-ref dbstruct 2)))
-;;     (if db
-;; 	db
-;; 	(let ((fdb (filedb:open-db (conc *toplevel* "/db/files.db"))))
-;; 	  (vector-set! dbstruct 2 fdb)
-;; 	  fdb))))
-;; ;; Can also be used to save arbitrary strings
-;; ;;
-;; (define (db:save-path dbstruct path)
-;;   (let ((fdb (db:get-filedb dbstruct)))b
-;;     (filedb:register-path fdb path)))
-;; ;; Use to get a path. To get an arbitrary string see next define
-;; ;;
-;; (define (db:get-path dbstruct id)
-;;   (let ((fdb (db:get-filedb dbstruct)))
-;;     (filedb:get-path db id)))
 ;; NB// #f => return dbdir only
 ;;      (was planned to be;  zeroth db with name=main.db)
 ;; If run-id is #f return to create and retrieve the path where the db will live.
@@ -237,375 +201,42 @@
 (define (db:set-sync db)
   (let ((syncprag (configf:lookup *configdat* "setup" "sychronous")))
     (sqlite3:execute db (conc "PRAGMA synchronous = " (or syncprag 0) ";")))) 
-;; open an sql database inside a file lock
-;; returns: db existed-prior-to-opening
-;; RA => Returns a db handler; sets the lock if opened in writable mode
-;; (define *db-open-mutex* (make-mutex))
-(define (db:lock-create-open fname initproc)
-  (let* ((parent-dir   (or (pathname-directory fname)(current-directory))) ;; no parent? go local
-         (raw-fname    (pathname-file fname))
-	 (dir-writable (file-write-access? parent-dir))
-	 (file-exists  (common:file-exists? fname))
-	 (file-write   (if file-exists
-			   (file-write-access? fname)
-			   dir-writable )))
-    ;; (mutex-lock! *db-open-mutex*) ;; tried this mutex, not clear it helped.
-    (if file-write ;; dir-writable
-	(condition-case
-         (let* ((lockfname   (conc fname ".lock"))
-                (readyfname  (conc parent-dir "/.ready-" raw-fname))
-                (readyexists (common:file-exists? readyfname)))
-           (if (not readyexists)
-               (common:simple-file-lock-and-wait lockfname))
-           (let ((db      (sqlite3:open-database fname)))
-             (sqlite3:set-busy-handler! db (sqlite3:make-busy-timeout 136000))
-             (sqlite3:execute db "PRAGMA synchronous = 0;")
-             (if (and (configf:lookup *configdat* "setup" "tmp_mode") (string-match "^/tmp/.*" fname))
-                 (begin
-                   ;;(print "DEBUG: Setting tmp_mode for " fname) 
-                   (sqlite3:execute db (configf:lookup *configdat* "setup" "tmp_mode"))
-                   )
-                 )  
-             (if (and (configf:lookup *configdat* "setup" "nfs_mode") (not (string-match "^/tmp/.*" fname)))
-                 (begin
-                   ;;(print "DEBUG: Setting nfs_mode for " fname)
-                   (sqlite3:execute db (configf:lookup *configdat* "setup" "nfs_mode"))
-                   )
-                 )  
-             (if (and (not (or (configf:lookup *configdat* "setup" "tmp_mode") (configf:lookup *configdat* "setup" "nfs_mode")))  
-                      (configf:lookup *configdat* "setup" "use-wal")
-                      (string-match "^/tmp/.*" fname)) ;; this is a file in /tmp
-                 (sqlite3:execute db "PRAGMA journal_mode=WAL;")
-                 (debug:print 2 *default-log-port* "Creating " fname " in NON-WAL mode."))
-             (if (not file-exists)
-                 (initproc db))
-             (if (not readyexists)
-                 (begin
-                   (common:simple-file-release-lock lockfname)
-                   (with-output-to-file
-                       readyfname
-                     (lambda ()
-                       (print "Ready at " 
-                              (seconds->year-work-week/day-time 
-                               (current-seconds)))))))
-             db))
-         (exn (io-error)  (debug:print 0 *default-log-port* "ERROR: i/o error with " fname ". Check permissions, disk space etc. and try again."))
-         (exn (corrupt)   (debug:print 0 *default-log-port* "ERROR: database " fname " is corrupt. Repair it to proceed."))
-         (exn (busy)      (debug:print 0 *default-log-port* "ERROR: database " fname " is locked. Try copying to another location, remove original and copy back."))
-         (exn (permission)(debug:print 0 *default-log-port* "ERROR: database " fname " has some permissions problem."))
-         (exn () (debug:print 0 *default-log-port* "ERROR: Unknown error with database " fname " message: " ((condition-property-accessor 'exn 'message) exn))))
-	(condition-case
-         (begin
-           (debug:print 2 *default-log-port* "WARNING: opening db in non-writable dir " fname)
-           (let ((db (sqlite3:open-database fname)))
-             (sqlite3:set-busy-handler! db (sqlite3:make-busy-timeout 136000))
-             (sqlite3:execute db "PRAGMA synchronous = 0;")
-	     ;; (mutex-unlock! *db-open-mutex*)
-             db))
-         (exn (io-error)  (debug:print 0 *default-log-port* "ERROR: i/o error with " fname ". Check permissions, disk space etc. and try again."))
-         (exn (corrupt)   (debug:print 0 *default-log-port* "ERROR: database " fname " is corrupt. Repair it to proceed."))
-         (exn (busy)      (debug:print 0 *default-log-port* "ERROR: database " fname " is locked. Try copying to another location, remove original and copy back."))
-         (exn (permission)(debug:print 0 *default-log-port* "ERROR: database " fname " has some permissions problem."))
-         (exn () (debug:print 0 *default-log-port* "ERROR: Unknown error with database " fname " message: " ((condition-property-accessor 'exn 'message) exn))))
-	)))
-;; This routine creates the db if not already present. It is only called if the db is not already opened
-(define (db:open-db dbstruct #!key (areapath #f)(do-sync #t)) ;; TODO: actually use areapath
-  (let ((tmpdb-stack (dbr:dbstruct-dbstack dbstruct))) ;; RA => Returns the first reference in dbstruct
-    (if (stack? tmpdb-stack)
-	(db:get-db tmpdb-stack) ;; get previously opened db (will create new db handle if all in the stack are already used
-        (let* ((max-stale-tmp (configf:lookup-number *configdat* "server" "filling-db-max-stale-seconds" default: 10))
-               (dbpath       (db:dbfile-path ))      ;; path to tmp db area
-               (dbexists     (common:file-exists? dbpath))
-	       (tmpdbfname   (conc dbpath "/megatest.db"))
-	       (dbfexists    (common:file-exists? tmpdbfname))  ;; (conc dbpath "/megatest.db")))
-               (mtdbexists   (common:file-exists? (conc *toppath* "/megatest.db")))
-               (mtdbmodtime  (if mtdbexists (common:lazy-sqlite-db-modification-time (conc *toppath* "/megatest.db"))   #f))
-	        		 (tmpdbmodtime (if dbfexists  (common:lazy-sqlite-db-modification-time tmpdbfname) #f)) 
-               (mtdb         (db:open-megatest-db))
-               (mtdbpath     (db:dbdat-get-path mtdb))
-               (tmpdb        (db:open-megatest-db path: dbpath)) ;; lock-create-open dbpath db:initialize-main-db))
-               (refndb       (db:open-megatest-db path: dbpath name: "megatest_ref.db"))
-               (write-access (file-write-access? mtdbpath))
-	       ;(mtdbmodtime  (if mtdbexists (common:lazy-sqlite-db-modification-time mtdbpath)   #f)) ; moving this before db:open-megatest-db is called. if wal mode is on -WAL and -shm file get created with causing the  tmpdbmodtime timestamp always greater than mtdbmodtime
-	       ;(tmpdbmodtime (if dbfexists  (common:lazy-sqlite-db-modification-time tmpdbfname) #f)) 
-					;if wal mode is on -WAL and -shm file get created when db:open-megatest-db is called. modtimedelta will always be < 10 so db in tmp not get synced
-          ;(tmpdbmodtime (if dbfexists (db:get-last-update-time (car tmpdb)) #f))    
-          ;(fmt (file-modification-time tmpdbfname))
-	       (modtimedelta (and mtdbmodtime tmpdbmodtime (- mtdbmodtime tmpdbmodtime))))
-          (when write-access
-            (sqlite3:execute (car mtdb) "drop trigger if exists update_tests_trigger")
-            (sqlite3:execute (car mtdb) "drop trigger if exists update_runs_trigger"))
-         ;(print "mtdbmodtime " mtdbmodtime " tmpdbmodtime " tmpdbmodtime " mtdbpath " mtdbpath " " (conc *toppath* "/megatest.db"))
-	        ;;(debug:print-info 13 *default-log-port* "db:open-db>> mtdbpath="mtdbpath" mtdbexists="mtdbexists" and write-access="write-access)
-          (if (and dbexists (not write-access))
-              (begin
-                (set! *db-write-access* #f)
-                (dbr:dbstruct-read-only-set! dbstruct #t)))
-          (dbr:dbstruct-mtdb-set!   dbstruct mtdb)
-          (dbr:dbstruct-tmpdb-set!  dbstruct tmpdb)
-          (dbr:dbstruct-dbstack-set! dbstruct (make-stack)) ;; BB: why a stack?  Why would the number of db's be indeterminate?  Is this a legacy of 1.db 2.db .. ?
-          (stack-push! (dbr:dbstruct-dbstack dbstruct) tmpdb) ;; olddb is already a (cons db path)
-          (dbr:dbstruct-refndb-set! dbstruct refndb)
-          (if (and  (or (not dbfexists)
-			(and modtimedelta
-			     (> modtimedelta max-stale-tmp))) ;; if db in tmp is over ten seconds older than the file in MTRA then do a sync back
-		    do-sync)
-	      (begin
-		(debug:print 1 *default-log-port* "filling db " (db:dbdat-get-path tmpdb) " with data \n    from " (db:dbdat-get-path mtdb) " mod time delta: " modtimedelta)
-		(db:sync-tables (db:sync-all-tables-list dbstruct) #f mtdb refndb tmpdb)
-    ;touch tmp db to avoid wal mode wierdness  
-     (set! (file-modification-time tmpdbfname) (current-seconds))  
-                (debug:print-info 13 *default-log-port* "db:sync-all-tables-list done.")
-                )
-	      (debug:print 4 *default-log-port* " db, " (db:dbdat-get-path tmpdb) " already exists or fresh enough, not propogating data from\n     " (db:dbdat-get-path mtdb) " mod time delta: " modtimedelta) )
-	  ;; (db:multi-db-sync dbstruct 'old2new))  ;; migrate data from megatest.db automatically
-          tmpdb))))
 (define (db:get-last-update-time db)
-;	(db:with-db
-;   dbstruct #f #f 
-;    (lambda (db)
-			(let ((last-update-time #f))
-      	(sqlite3:for-each-row 
-          (lambda (lup) 
-             (set! last-update-time lup))     
-          db    
-					"select max(lup) from ( select max(last_update) as lup  from tests union select max(last_update) as lup from runs);")
-        last-update-time))
-;; Make the dbstruct, setup up auxillary db's and call for main db at least once
-;; called in http-transport and replicated in rmt.scm for *local* access. 
-(define (db:setup do-sync #!key (areapath #f))
-  ;;
-  (cond
-   (*dbstruct-db* *dbstruct-db*);; TODO: when multiple areas are supported, this optimization will be a hazard
-   (else ;;(common:on-homehost?)
-    (debug:print-info 13 *default-log-port* "db:setup entered (first time, not cached.)")
-    (let* ((dbstruct (make-dbr:dbstruct)))
-      (when (not *toppath*)
-        (debug:print-info 13 *default-log-port* "in db:setup, *toppath* not set; calling launch:setup")
-        (launch:setup areapath: areapath))
-      (debug:print-info 13 *default-log-port* "Begin db:open-db")
-      (db:open-db dbstruct areapath: areapath do-sync: do-sync)
-      (debug:print-info 13 *default-log-port* "Done db:open-db")
-      (set! *dbstruct-db* dbstruct)
-      ;;(debug:print-info 13 *default-log-port* "new dbstruct = "(dbr:dbstruct->alist dbstruct))
-      dbstruct))))
-   ;; (else
-   ;;  (debug:print 0 *default-log-port* "ERROR: attempt to open database when not on homehost. Exiting. Homehost: " (common:get-homehost))
-   ;;  (exit 1))))
+  (let ((last-update-time #f))
+    (sqlite3:for-each-row 
+     (lambda (lup) 
+       (set! last-update-time lup))     
+     db    
+     "select max(lup) from ( select max(last_update) as lup  from tests union select max(last_update) as lup from runs);")
+    last-update-time))
 ;; Open the classic megatest.db file (defaults to open in toppath)
 ;;   NOTE: returns a dbdat not a dbstruct!
-;;(define (db:reopen-megatest-db
-(define (db:open-megatest-db #!key (path #f)(name #f))
-  (let* ((dbdir        (or path *toppath*))
-         (dbpath       (conc  dbdir "/" (or name "megatest.db")))
-	 (dbexists     (common:file-exists? dbpath))
+(define (db:open-megatest-db dbpath)
+  (let* ((dbexists     (file-exists? dbpath))
 	 (db           (db:lock-create-open dbpath
 					    (lambda (db)
-                                              (db:initialize-main-db db)
-					      ;;(db:initialize-run-id-db db)
-					      )))
+                                              (db:initialize-main-db db))))
 	 (write-access (file-write-access? dbpath)))
     (debug:print-info 13 *default-log-port* "db:open-megatest-db "dbpath)
     (if (and dbexists (not write-access))
 	(set! *db-write-access* #f))
-    (cons db dbpath)))
-;; sync run to disk if touched
-(define (db:sync-touched dbstruct run-id #!key (force-sync #f))
-  (let ((tmpdb   (db:get-db dbstruct))
-	(mtdb    (dbr:dbstruct-mtdb   dbstruct))
-        (refndb  (dbr:dbstruct-refndb dbstruct))
-	(start-t (current-seconds)))
-    (debug:print-info 4 *default-log-port* "Syncing for run-id: " run-id)
-    (mutex-lock! *db-multi-sync-mutex*)
-    (let ((update_info (cons (if force-sync 0 *db-last-sync*) "last_update")))
-      (mutex-unlock! *db-multi-sync-mutex*)
-      (db:sync-tables (db:sync-all-tables-list dbstruct) update_info tmpdb refndb mtdb))
-    (mutex-lock! *db-multi-sync-mutex*)
-    (set! *db-last-sync* start-t)
-    (set! *db-last-access* start-t)
-    (mutex-unlock! *db-multi-sync-mutex*)
-    (stack-push! (dbr:dbstruct-dbstack dbstruct) tmpdb)))
-(define (db:safely-close-sqlite3-db db stmt-cache #!key (try-num 3))
-  (if (<= try-num 0)
-      #f
-      (handle-exceptions
-	  exn
-	(begin
-	  (print "Attempt to safely close sqlite3 db failed. Trying again. exn=" exn)
-	  (thread-sleep! 3)
-	  (sqlite3:interrupt! db)
-	  (db:safely-close-sqlite3-db db stmt-cache try-num: (- try-num 1)))
-	(if (sqlite3:database? db)
-	    (let* ((stmts (and stmt-cache (hash-table-ref/default stmt-cache db #f))))
-	      (if stmts (map sqlite3:finalize! (hash-table-values stmts)))
-	      (sqlite3:finalize! db)
-	      #t)
-	    #f))))
-;; close all opened run-id dbs
-(define (db:close-all dbstruct)
-  (if (dbr:dbstruct? dbstruct)
-      (handle-exceptions
-	  exn
-	  (begin
-	    (debug:print 0 *default-log-port* "WARNING: Finalizing failed, "  ((condition-property-accessor 'exn 'message) exn) ", note - exn=" exn)
-	    (print-call-chain *default-log-port*))
-	;; (db:sync-touched dbstruct 0 force-sync: #t) ;; NO. Do not do this here. Instead we rely on a server to be started when there are writes, even if the server itself is not going to be used as a server.
-        (let ((tdbs       (map db:dbdat-get-db 
-			       (stack->list (dbr:dbstruct-dbstack dbstruct))))
-              (mdb        (db:dbdat-get-db (dbr:dbstruct-mtdb   dbstruct)))
-              (rdb        (db:dbdat-get-db (dbr:dbstruct-refndb dbstruct)))
-	      (stmt-cache (dbr:dbstruct-stmt-cache dbstruct)))
-          (map (lambda (db)
-		 (db:safely-close-sqlite3-db db stmt-cache))
-	       tdbs)
-          (db:safely-close-sqlite3-db mdb stmt-cache)     ;; (if (sqlite3:database? mdb) (sqlite3:finalize! mdb))
-          (db:safely-close-sqlite3-db rdb stmt-cache))))) ;; (if (sqlite3:database? rdb) (sqlite3:finalize! rdb))))))
-;;   (let ((locdbs (dbr:dbstruct-locdbs dbstruct)))
-;;     (if (hash-table? locdbs)
-;; 	(for-each (lambda (run-id)
-;; 		    (db:close-run-db dbstruct run-id))
-;; 		  (hash-table-keys locdbs)))))
-;; (define (db:open-inmem-db)
-;;   (let* ((db      (sqlite3:open-database ":memory:"))
-;; 	 (handler (make-busy-timeout 3600)))
-;;     (sqlite3:set-busy-handler! db handler)
-;;     (db:initialize-run-id-db db)
-;;     (cons db #f)))
-;; just tests, test_steps and test_data tables
-(define db:sync-tests-only
-  (list
-   ;; (list "strs"
-   ;;       '("id"             #f)
-   ;;       '("str"            #f))
-   (list "tests" 
-	 '("id"             #f)
-	 '("run_id"         #f)
-	 '("testname"       #f)
-	 '("host"           #f)
-	 '("cpuload"        #f)
-	 '("diskfree"       #f)
-	 '("uname"          #f)
-	 '("rundir"         #f)
-	 '("shortdir"       #f)
-	 '("item_path"      #f)
-	 '("state"          #f)
-	 '("status"         #f)
-	 '("attemptnum"     #f)
-	 '("final_logf"     #f)
-	 '("logdat"         #f)
-	 '("run_duration"   #f)
-	 '("comment"        #f)
-	 '("event_time"     #f)
-	 '("fail_count"     #f)
-	 '("pass_count"     #f)
-	 '("archived"       #f)
-         '("last_update"    #f))
-  (list "test_steps"
-	 '("id"             #f)
-	 '("test_id"        #f)
-	 '("stepname"       #f)
-	 '("state"          #f)
-	 '("status"         #f)
-	 '("event_time"     #f)
-	 '("comment"        #f)
-	 '("logfile"        #f)
-         '("last_update"    #f))
-   (list "test_data"
-	 '("id"             #f)
-	 '("test_id"        #f)
-	 '("category"       #f)
-	 '("variable"       #f)
-	 '("value"          #f)
-	 '("expected"       #f)
-	 '("tol"            #f)
-	 '("units"          #f)
-	 '("comment"        #f)
-	 '("status"         #f)
-	 '("type"           #f)
-         '("last_update"    #f))))
-;; needs db to get keys, this is for syncing all tables
-(define (db:sync-main-list dbstruct)
-  (let ((keys  (db:get-keys dbstruct)))
-    (list
-     (list "keys"
-	   '("id"        #f)
-	   '("fieldname" #f)
-	   '("fieldtype" #f))
-     (list "metadat" '("var" #f) '("val" #f))
-     (append (list "runs" 
-		   '("id"  #f))
-	     (map (lambda (k)(list k #f))
-		  (append keys
-			  (list "runname" "state" "status" "owner" "event_time" "comment" "fail_count" "pass_count" "contour" "last_update"))))
-     (list "archive_disks"
-           '("id" #f)
-           '("archive_area_name" #f) 
-           '("disk_path" #f)
-           '("last_df" #f)
-           '("last_df_time" #f)
-           '("creation_time" #f)) 
-     (list "archive_blocks"
-           '("id" #f)
-           '("archive_disk_id" #f) 
-           '("disk_path" #f)
-           '("last_du" #f)
-           '("last_du_time" #f)
-           '("creation_time" #f)) 
-     (list "test_meta"
-	   '("id"             #f)
-	   '("testname"       #f)
-	   '("owner"          #f)
-	   '("description"    #f)
-	   '("reviewed"       #f)
-	   '("iterated"       #f)
-	   '("avg_runtime"    #f)
-	   '("avg_disk"       #f)
-	   '("tags"           #f)
-	   '("jobgroup"       #f)))))
-(define (db:sync-all-tables-list dbstruct)
-  (append (db:sync-main-list dbstruct)
-	  db:sync-tests-only))
+    ;; (cons db dbpath)))
+    (make-dbr:dbdat dbfile: dbpath dbh: db read-only: (not write-access))))
 ;; use bunch of Unix commands to try to break the lock and recreate the db
 (define (db:move-and-recreate-db dbdat)
-  (let* ((dbpath   (db:dbdat-get-path        dbdat))
+  (let* ((dbpath   (dbr:dbdat-dbfile        dbdat))
 	 (dbdir    (pathname-directory       dbpath))
 	 (fname    (pathname-strip-directory dbpath))
 	 (fnamejnl (conc fname "-journal"))
 	 (tmpname  (conc fname "." (current-process-id)))
 	 (tmpjnl   (conc fnamejnl "." (current-process-id))))
@@ -622,11 +253,11 @@
 ;; return #f to indicate the dbdat should be closed/reopened
 ;; else return dbdat
 (define (db:repair-db dbdat #!key (numtries 1))
-  (let* ((dbpath   (db:dbdat-get-path        dbdat))
+  (let* ((dbpath   (dbr:dbdat-dbfile        dbdat))
 	 (dbdir    (pathname-directory       dbpath))
 	 (fname    (pathname-strip-directory dbpath)))
     (debug:print-info 0 *default-log-port* "Checking db " dbpath " for errors.")
      ((not (file-write-access? dbdir))
@@ -639,11 +270,11 @@
      (else ;; ((equal? fname "megatest.db") ;; this file can be regenerated if needed
-	 (print "Problems trying to repair the db, exn=" exn)
+	 (debug:print 0 *default-debug-port* "Problems trying to repair the db, exn=" exn)
 	 ;; (db:move-and-recreate-db dbdat)
 	 (if (> numtries 0)
 	     (db:repair-db dbdat numtries: (- numtries 1))
 	 (debug:print 0 *default-log-port* "FATAL: file " dbpath " was found corrupted, an attempt to fix has been made but you must start over.")
@@ -675,289 +306,11 @@
 	   (sqlite3:execute db "vacuum;")))
 	 (sqlite3:finalize! db)
-;; tbls is ( ("tablename" ( "field1" [#f|proc1] ) ( "field2" [#f|proc2] ) .... ) )
-;; db's are dbdat's
-;; if last-update specified ("field-name" . time-in-seconds)
-;;    then sync only records where field-name >= time-in-seconds
-;;    IFF field-name exists
-(define (db:sync-tables tbls last-update fromdb todb . slave-dbs)
-  (handle-exceptions
-   exn
-   (begin
-     (debug:print 0 *default-log-port* "EXCEPTION: database probably overloaded or unreadable in db:sync-tables.")
-     (print-call-chain (current-error-port))
-     (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))
-     (debug:print 5 *default-log-port* "exn=" (condition->list exn))
-     (debug:print 0 *default-log-port* " status:  " ((condition-property-accessor 'sqlite3 'status) exn))
-     (debug:print 0 *default-log-port* " src db:  " (db:dbdat-get-path fromdb))
-     (for-each (lambda (dbdat)
-		 (let ((dbpath (db:dbdat-get-path dbdat)))
-		   (debug:print 0 *default-log-port* " dbpath:  " dbpath)
-		   (if (not (db:repair-db dbdat))
-		       (begin
-			 (debug:print-error 0 *default-log-port* "Failed to rebuild " dbpath ", exiting now.")
-			 (exit)))))
-	       (cons todb slave-dbs))
-     0)
-   ;; this is the work to be done
-   (cond
-    ((not fromdb) (debug:print 3 *default-log-port* "WARNING: db:sync-tables called with fromdb missing")
-     -1)
-    ((not todb)   (debug:print 3 *default-log-port* "WARNING: db:sync-tables called with todb missing")
-     -2)
-    ((not (sqlite3:database? (db:dbdat-get-db fromdb)))
-     (debug:print-error 0 *default-log-port* "db:sync-tables called with fromdb not a database " fromdb)
-     -3)
-    ((not (sqlite3:database? (db:dbdat-get-db todb)))
-     (debug:print-error 0 *default-log-port* "db:sync-tables called with todb not a database " todb)
-     -4)
-    ((not (file-write-access? (db:dbdat-get-path todb)))
-     (debug:print-error 0 *default-log-port* "db:sync-tables called with todb not a read-only database " todb)
-     -5)
-    ((not (null? (let ((readonly-slave-dbs
-                        (filter
-                         (lambda (dbdat)
-                           (not (file-write-access? (db:dbdat-get-path todb))))
-                         slave-dbs)))
-                   (for-each
-                    (lambda (bad-dbdat)
-                      (debug:print-error
-                       0 *default-log-port* "db:sync-tables called with todb not a read-only database " bad-dbdat))
-                    readonly-slave-dbs)
-                   readonly-slave-dbs))) -6)
-    (else
-     (let ((stmts       (make-hash-table)) ;; table-field => stmt
-	   (all-stmts   '())              ;; ( ( stmt1 value1 ) ( stml2 value2 ))
-	   (numrecs     (make-hash-table))
-	   (start-time  (current-milliseconds))
-	   (tot-count   0))
-       (for-each ;; table
-	(lambda (tabledat)
-	  (let* ((tablename        (car tabledat))
-		 (fields           (cdr tabledat))
-		 (has-last-update  (member "last_update" fields))
-		 (use-last-update  (cond
-				    ((and has-last-update
-					  (member "last_update" fields))
-				     #t) ;; if given a number, just use it for all fields
-				    ((number? last-update) #f) ;; if not matched first entry then ignore last-update for this table
-				    ((and (pair? last-update)
-					  (member (car last-update)    ;; last-update field name
-						  (map car fields)))
-                                        #t)
-				    (last-update
-				     (debug:print 0 *default-log-port* "ERROR: parameter last-update for db:sync-tables must be a pair or a number, received: " last-update);; found in fields
-				     #f)
-				    (else
-				     #f)))
-		 (last-update-value (if use-last-update ;; no need to check for has-last-update - it is already accounted for
-					(if (number? last-update)
-					    last-update
-					    (cdr last-update))
-					#f))
-		 (last-update-field (if use-last-update
-					(if (number? last-update)
-					    "last_update"
-					    (car last-update))
-					#f))
-		 (num-fields (length fields))
-		 (field->num (make-hash-table))
-		 (num->field (apply vector (map car fields))) ;; BBHERE
-		 (full-sel   (conc "SELECT " (string-intersperse (map car fields) ",") 
-				   " FROM " tablename (if use-last-update ;; apply last-update criteria
-							  (conc " WHERE " last-update-field " >= " last-update-value)
-							  "")
-				   ";"))
-		 (full-ins   (conc "INSERT OR REPLACE INTO " tablename " ( " (string-intersperse (map car fields) ",") " ) "
-				   " VALUES ( " (string-intersperse (make-list num-fields "?") ",") " );"))
-		 (fromdat    '())
-		 (fromdats   '())
-		 (totrecords 0)
-		 (batch-len  (string->number (or (configf:lookup *configdat* "sync" "batchsize") "100")))
-		 (todat      (make-hash-table))
-		 (count      0)
-                 (field-names (map car fields))
-                 (delay-handicap  (string->number (or (configf:lookup *configdat* "sync" "delay-handicap") "0")))
-                 )
-	    ;; set up the field->num table
-	    (for-each
-	     (lambda (field)
-	       (hash-table-set! field->num field count)
-	       (set! count (+ count 1)))
-	     fields)
-	    ;; read the source table
-	    (sqlite3:for-each-row
-	     (lambda (a . b)
-	       (set! fromdat (cons (apply vector a b) fromdat))
-	       (if (> (length fromdat) batch-len)
-		   (begin
-		     (set! fromdats (cons fromdat fromdats))
-		     (set! fromdat  '())
-		     (set! totrecords (+ totrecords 1)))))
-	     (db:dbdat-get-db fromdb)
-	     full-sel)
-	    ;; tack on remaining records in fromdat
-	    (if (not (null? fromdat))
-		(set! fromdats (cons fromdat fromdats)))
-	    (if (common:low-noise-print 120 "sync-records")
-		(debug:print-info 4 *default-log-port* "found " totrecords " records to sync"))
-	    ;; read the target table; BBHERE
-	    (sqlite3:for-each-row
-	     (lambda (a . b)
-	       (hash-table-set! todat a (apply vector a b)))
-	     (db:dbdat-get-db todb)
-	     full-sel)
-            (when (and delay-handicap (> delay-handicap 0))
-              (debug:print-info 0 *default-log-port* "imposing synthetic sync delay of "delay-handicap" seconds since sync/delay-handicap is configured")
-              (thread-sleep! delay-handicap)
-              (debug:print-info 0 *default-log-port* "synthetic sync delay of "delay-handicap" seconds completed")
-              )
-	    ;; first pass implementation, just insert all changed rows
-	    (for-each 
-	     (lambda (targdb)
-	       (let* ((db                 (db:dbdat-get-db targdb))
-                      (drp-trigger        (if (member "last_update" field-names)
-					      (db:drop-trigger db tablename) 
-					      #f))
-                      (is-trigger-dropped (if (member "last_update" field-names)
-                                              (db:is-trigger-dropped db tablename)
-					      #f)) 
-		      (stmth  (sqlite3:prepare db full-ins)))
-		 ;; (db:delay-if-busy targdb) ;; NO WAITING
-                 (if (member "last_update" field-names)
-                     (debug:print-info 3 *default-log-port* "is-trigger-dropped: " is-trigger-dropped)) 
-		 (for-each
-		  (lambda (fromdat-lst)
-		    (sqlite3:with-transaction
-		     db
-		     (lambda ()
-		       (for-each ;; 
-			(lambda (fromrow)
-			  (let* ((a    (vector-ref fromrow 0))
-				 (curr (hash-table-ref/default todat a #f))
-				 (same #t))
-			    (let loop ((i 0))
-			      (if (or (not curr)
-				      (not (equal? (vector-ref fromrow i)(vector-ref curr i))))
-				  (set! same #f))
-			      (if (and same
-				       (< i (- num-fields 1)))
-				  (loop (+ i 1))))
-			    (if (not same)
-				(begin
-				  (apply sqlite3:execute stmth (vector->list fromrow))
-				  (hash-table-set! numrecs tablename (+ 1 (hash-table-ref/default numrecs tablename 0)))))))
-			fromdat-lst))))
-		  fromdats)
-		 (sqlite3:finalize! stmth)
-                 (if (member "last_update" field-names)
-                    (db:create-trigger db tablename))))
-	     (append (list todb) slave-dbs))))
-	tbls)
-       (let* ((runtime      (- (current-milliseconds) start-time))
-	      (should-print (or (debug:debug-mode 12)
-				(common:low-noise-print 120 "db sync" (> runtime 500))))) ;; low and high sync times treated as separate.
-	 (if should-print (debug:print 3 *default-log-port* "INFO: db sync, total run time " runtime " ms"))
-	 (for-each 
-	  (lambda (dat)
-	    (let ((tblname (car dat))
-		  (count   (cdr dat)))
-	      (set! tot-count (+ tot-count count))
-	      (if (> count 0)
-		  (if should-print (debug:print 0 *default-log-port* (format #f "    ~10a ~5a" tblname count))))))
-	  (sort (hash-table->alist numrecs)(lambda (a b)(> (cdr a)(cdr b))))))
-       tot-count)))))
-(define (db:patch-schema-rundb frundb)
-  ;;
-  ;; remove this some time after September 2016 (added in version v1.6031
-  ;;
-  (for-each
-   (lambda (table-name)
-     (handle-exceptions
-      exn
-      (if (string-match ".*duplicate.*" ((condition-property-accessor 'exn 'message) exn))
-          (debug:print 0 *default-log-port* "Column last_update already added to " table-name " table")
-          (db:general-sqlite-error-dump exn "alter table " table-name " ..." #f "none"))
-      (sqlite3:execute
-       frundb
-       (conc "ALTER TABLE " table-name " ADD COLUMN last_update INTEGER DEFAULT 0")))
-     (sqlite3:execute
-      frundb
-      (conc "DROP TRIGGER IF EXISTS update_" table-name "_trigger;"))
-     (sqlite3:execute
-      frundb
-      (conc "CREATE TRIGGER IF NOT EXISTS update_" table-name "_trigger AFTER UPDATE ON " table-name "
-                             FOR EACH ROW
-                               BEGIN 
-                                 UPDATE " table-name " SET last_update=(strftime('%s','now'))
-                                   WHERE;
-                               END;"))
-     )
-   '("tests" "test_steps" "test_data")))
-(define (db:patch-schema-maindb maindb)
-  ;;
-  ;; remove all these some time after september 2016 (added in v1.6031
-  ;;
-  (for-each
-   (lambda (column type default)
-     (handle-exceptions
-	 exn
-	 (if (string-match ".*duplicate.*" ((condition-property-accessor 'exn 'message) exn))
-	     (debug:print 0 *default-log-port* "Column " column " already added to runs table")
-	     (db:general-sqlite-error-dump exn "alter table runs ..." #f "none"))
-       (sqlite3:execute
-	maindb
-	(conc "ALTER TABLE runs ADD COLUMN " column " " type " DEFAULT " default))))
-   (list "last_update" "contour")
-   (list "INTEGER"     "TEXT"   )
-   (list "0"           "''"   ))
-  ;; these schema changes don't need exception handling
-  (sqlite3:execute
-   maindb
-   "CREATE TRIGGER IF NOT EXISTS update_runs_trigger AFTER UPDATE ON runs
-                             FOR EACH ROW
-                               BEGIN 
-                                 UPDATE runs SET last_update=(strftime('%s','now'))
-                                   WHERE;
-                               END;")
-  (sqlite3:execute maindb "CREATE TABLE IF NOT EXISTS run_stats (
-                              id     INTEGER PRIMARY KEY,
-                              run_id INTEGER,
-                              state  TEXT,
-                              status TEXT,
-                              count  INTEGER,
-                              last_update INTEGER DEFAULT (strftime('%s','now')))")
-  (sqlite3:execute maindb "CREATE TRIGGER  IF NOT EXISTS update_run_stats_trigger AFTER UPDATE ON run_stats
-                             FOR EACH ROW
-                               BEGIN 
-                                 UPDATE run_stats SET last_update=(strftime('%s','now'))
-                                   WHERE;
-                               END;")
-  (sqlite3:execute maindb "CREATE TABLE IF NOT EXISTS test_rundat (
-                              id           INTEGER PRIMARY KEY,
-                              test_id      INTEGER,
-                              update_time  TIMESTAMP,
-                              cpuload      INTEGER DEFAULT -1,
-                              diskfree     INTEGER DEFAULT -1,
-                              diskusage    INTGER DEFAULT -1,
-                              run_duration INTEGER DEFAULT 0);"))
 (define (db:adj-target db)
   (let ((fields    (configf:get-section *configdat* "fields"))
 	(field-num 0))
     ;; because we will be refreshing the keys table it is best to clear it here
@@ -1042,19 +395,82 @@
 ;; 			  (lambda ()
 ;; 			    (if (and (common:file-exists? megatest-db)
 ;; 				     (file-write-access? megatest-db))
 ;; 				(begin
 ;; 				  (db:sync-to-megatest.db dbstruct 'timestamps) ;; internally mutexes on *db-local-sync*
-;; 				  (debug:print-info 2 *default-log-port* "Done syncing to megatest.db"))))
+;; 				  (debug:print-info 2 *default-log-port* "Done syncing to megatest.db")))) 
 ;; 			  "call-with-cached-db sync-to-megatest.db"))
 ;; 	       (cache-db (db:cache-for-read-only
 ;; 			  megatest-db
 ;; 			  (conc cache-dir "/" fname)
 ;; 			  use-last-update: #t)))
 ;; 	  (thread-start! th1)
 ;; 	  (apply proc cache-db params)
 ;; 	  ))))
+(define (db:all-db-sync dbstruct)
+  (let* ((dbdat (db:open-db dbstruct #f db:initialize-main-db))
+	 (data-synced       0) ;; count of changed records
+    (tmp-area       (common:get-db-tmp-area))
+    (dbfiles        (glob (conc tmp-area"/.megatest/*.db")))
+    (sync-durations (make-hash-table))
+    (no-sync-db        (db:open-no-sync-db)))
+    (for-each
+     (lambda (file) ;; tmp db file
+       (debug:print-info 3 *default-log-port* "file: " file)
+       (let* ((fname       (conc (pathname-file file) ".db")) ;; fname is tmp db file
+              (wal-file (conc file "-wal"))
+              (shm-file (conc file "-shm"))
+	      (fulln       (conc *toppath*"/.megatest/"fname)) ;; fulln is nfs db name
+              (wal-time     (if (file-exists? wal-file)             
+			       (file-modification-time wal-file)
+                               0))
+              (shm-time     (if (file-exists? shm-file)             
+			       (file-modification-time shm-file)
+                               0))
+	      (time1       (if (file-exists? file)              ;; time1 is the max itime of the tmp db, -wal and -shm files.
+			       (max (file-modification-time file) wal-time shm-time)
+			       (begin
+				 (debug:print-info 2 *default-log-port* "Sync - I do not see file "file)
+			   1)))
+	      (time2       (if (file-exists? fulln)             ;; time2 is nfs file time
+			       (file-modification-time fulln)
+			       (begin
+				 (debug:print-info 2 *default-log-port* "Sync - I do not see file "fulln)
+				 0)))
+	      (changed      (> (- time1 time2) (+ (random 5) 1)))  ;; it has been a few seconds since last synced
+	      (changed10    (> (- time1 time2) 10)) ;; it has been at least ten seconds since sync'd
+	      (jfile-exists (file-exists? (conc file"-journal"))) ;; i.e. are we busy?
+	      (do-cp        (cond
+			     ((not (file-exists? fulln)) ;; shouldn't happen, but this might recover
+			      (cons #t (conc "File "fulln" not found! Copying "fname" to "fulln)))
+			     ((and (not jfile-exists) changed)
+			      (cons #t "not busy, changed")) ;; not busy and changed
+			     ((and jfile-exists changed10)
+			      (cons #t "busy but not synced in a while")) ;; busy but not sync'd in over 10 seconds
+			     ((and changed *time-to-exit*)
+			      (cons #t "Time to exit, forced final sync")) ;; last sync
+			     (else
+			      (cons #f "No sync needed")))))
+	 (if (car do-cp)
+	     (let* ((start-time (current-milliseconds))
+		    (fname (pathname-file file))
+		    (runid (if (string= fname "main") #f (string->number fname))))
+	       (debug:print-info 3 *default-log-port* "db:all-db-sync: fname: "
+				 fname", delta: " (- time1 time2) " seconds, reason: "(cdr do-cp))
+	       (db:lock-and-delta-sync no-sync-db dbstruct fname runid (db:get-keys dbstruct) db:initialize-main-db)
+	       (hash-table-set! sync-durations (conc fname".db")
+				(- (current-milliseconds) start-time)))
+	     (debug:print-info 3 *default-log-port* "skipping sync. " file " is up to date")
+         )))
+     dbfiles)
+    (if dbdat (dbfile:add-dbdat dbstruct #f dbdat)))
+  #t)
 ;; options:
 ;;  'killservers  - kills all servers
 ;;  'dejunk       - removes junk records
@@ -1064,26 +480,27 @@
 ;;  'closeall     - close all opened dbs
 ;;  'schema       - attempt to apply schema changes
 ;;  run-ids: '(1 2 3 ...) or #f (for all)
 (define (db:multi-db-sync dbstruct . options)
-  ;; (if (not (launch:setup))
-  ;;    (debug:print 0 *default-log-port* "ERROR: not able to setup up for megatest.")
-  (let* ((mtdb     (dbr:dbstruct-mtdb dbstruct))
-	 (tmpdb    (db:get-db dbstruct))
-	 (refndb   (dbr:dbstruct-refndb dbstruct))
-	 (allow-cleanup #t) ;; (if run-ids #f #t))
-	 (servers (server:get-list *toppath*)) ;; (tasks:get-all-servers (db:delay-if-busy tdbdat)))
-	 (data-synced 0)) ;; count of changed records (I hope)
-    (for-each
-     (lambda (option)
-       (case option
-	 ;; kill servers
-	 ((killservers)
-	  (for-each
+  (let* (;; (dbdat       (db:open-db dbstruct #f dbfile:db-init-proc))
+	 (data-synced 0) ;; count of changed records
+    (tmp-area       (common:get-db-tmp-area))
+    (old2new (member 'old2new options))
+    (dejunk (member 'dejunk options))
+    (killservers (member 'killservers options))
+    (servers (server:get-list *toppath*))
+    (src-area (if old2new *toppath* tmp-area))
+    (dest-area (if old2new tmp-area *toppath*))
+    (dbfiles        (if old2new (glob (conc *toppath* "/.megatest/*.db")) (glob (conc tmp-area "/.megatest/*.db"))))
+    (keys (db:get-keys dbstruct))
+    (sync-durations (make-hash-table)))
+    (if killservers
+      (begin
+       	  (for-each
 	   (lambda (server)
                (debug:print-info 0 *default-log-port*  "Unable to get host and/or port from " server ", exn=" exn)     
@@ -1090,61 +507,96 @@
 	     (match-let (((mod-time host port start-time server-id pid) server))
 	       (if (and host pid)
 		   (tasks:kill-server host pid)))))
-          ;; /tmp/bjbarcla/megatest_localdb/fullrun/
           (delete-file* (common:get-sync-lock-filepath))
-          )
-	 ;; clear out junk records
-	 ;;
-	 ((dejunk)
-	  ;; (db:delay-if-busy mtdb) ;; ok to delay on mtdb
-	  (when (file-write-access? (db:dbdat-get-path mtdb)) (db:clean-up mtdb))
-	  (db:clean-up tmpdb)
-	  (db:clean-up refndb))
-	 ;; sync runs, test_meta etc.
-	 ;;
-	 ((old2new)
-	  (set! data-synced
-	    (+ (db:sync-tables (db:sync-all-tables-list dbstruct) #f mtdb tmpdb refndb)
-	       data-synced)))
-	 ;; now ensure all newdb data are synced to megatest.db
-	 ;; do not use the run-ids list passed in to the function
-	 ;;
-	 ((new2old)
-	  (set! data-synced
-	    (+ (db:sync-tables (db:sync-all-tables-list dbstruct) #f tmpdb refndb mtdb)
-	       data-synced)))
-	 ((adj-target)
-	  (db:adj-target (db:dbdat-get-db mtdb))
-	  (db:adj-target (db:dbdat-get-db tmpdb))
-	  (db:adj-target (db:dbdat-get-db refndb)))
-	 ((schema)
-	  (db:patch-schema-maindb (db:dbdat-get-db mtdb))
-	  (db:patch-schema-maindb (db:dbdat-get-db tmpdb))
-	  (db:patch-schema-maindb (db:dbdat-get-db refndb))
-	  (db:patch-schema-rundb  (db:dbdat-get-db mtdb))
-	  (db:patch-schema-rundb  (db:dbdat-get-db tmpdb))
-	  (db:patch-schema-rundb  (db:dbdat-get-db refndb))))
-       (stack-push! (dbr:dbstruct-dbstack dbstruct) tmpdb))
-     options)
-    data-synced))
-(define (db:tmp->megatest.db-sync dbstruct last-update)
-  (let* ((mtdb        (dbr:dbstruct-mtdb dbstruct))
-	 (tmpdb       (db:get-db dbstruct))
-	 (refndb      (dbr:dbstruct-refndb dbstruct))
-	 (res         (db:sync-tables (db:sync-all-tables-list dbstruct) last-update tmpdb refndb mtdb)))
-    (stack-push! (dbr:dbstruct-dbstack dbstruct) tmpdb)
+      )
+    )
+    (for-each
+     (lambda (srcfile)
+       (debug:print-info 3 *default-log-port* "file: " srcfile)
+       (let* ((fname (conc (pathname-file srcfile) ".db"))
+              (basename (pathname-file srcfile))
+              (run-id (if (string= basename "main") #f (string->number basename)))
+	      (destfile (conc dest-area "/.megatest/" fname))
+              (dest-directory  (conc dest-area "/.megatest/"))
+              (dummy (debug:print-info 2 *default-log-port* "destfile = " destfile))
+              (dummy2 (debug:print-info 2 *default-log-port* "dejunk = " dejunk))
+              ;; TODO: time1 and time2 need to take into account -wal and -shm files
+	      (time1 (file-modification-time srcfile))
+              (time2 (if (file-exists? destfile)
+                         (begin
+                            (debug:print-info 2 *default-log-port* "destfile " destfile " exists")
+			    (file-modification-time destfile)
+                         )
+			 (begin
+			   (debug:print-info 0 *default-log-port* "Sync - I do not see file " destfile)
+			   0)))
+	      (changed ( < (- time2 time1) 6.0)) ;; dest db not updated within last 6 seconds
+      (do-cp (cond
+		      ((not (file-exists? destfile)) ;; shouldn't happen, but this might recover
+		       (debug:print-info 2 *default-log-port* "File " destfile " not found. Copying "srcfile" to "destfile)
+                       (system (conc "/bin/mkdir -p " dest-directory))
+                       (system (conc "/bin/cp " srcfile " " destfile))
+		       #t)
+		      (changed ;; (and changed
+		       ;; (> (- (current-seconds) time1) 3)) ;; if file is changed and three seconds have passed.
+		       #t)
+		      ((and changed *time-to-exit*) ;; last sync
+		       #t)
+		      (else
+		       #f))))
+          (if (or dejunk do-cp)
+	     (let* (
+                    (start-time (current-milliseconds))
+                    (subdb (or (dbfile:get-subdb dbstruct run-id) (dbfile:init-subdb dbstruct run-id dbfile:db-init-proc)))
+                    (mtdb      (dbr:subdb-mtdbdat subdb))
+                    (tmpdb     (dbfile:open-db dbstruct run-id dbfile:db-init-proc))
+                    )
+	       (debug:print-info 2 *default-log-port* "delta syncing file: " srcfile ", time diff: " (- time1 time2) " seconds")
+               (if old2new
+                 (begin
+                   (if dejunk (db:clean-up run-id mtdb))
+		   (db:sync-tables (db:sync-all-tables-list dbstruct (db:get-keys dbstruct)) #f mtdb tmpdb)
+                 )
+                 (begin
+                   (if dejunk (db:clean-up run-id tmpdb))
+		   (db:sync-tables (db:sync-all-tables-list dbstruct (db:get-keys dbstruct)) #f tmpdb mtdb)
+                 )
+               )
+	       (hash-table-set! sync-durations (conc srcfile ".db") (- (current-milliseconds) start-time)))
+	     (debug:print-info 2 *default-log-port* "skipping delta sync. " srcfile " is up to date")
+          )
+       )
+     )
+     dbfiles
+    )
+    data-synced
+  )
+;; Sync all changed db's
+(define (db:tmp->megatest.db-sync dbstruct run-id last-update)
+  (let* ((subdbs (hash-table-values (dbr:dbstruct-subdbs dbstruct)))
+	 (res    '()))
+    (for-each
+     (lambda (subdb)
+       (let* ((dbname (db:run-id->dbname run-id))
+	      (mtdb   (dbr:subdb-mtdb subdb))
+	      (tmpdb  (db:get-subdb dbstruct run-id))
+	      (refndb (dbr:subdb-refndb subdb))
+	      (newres (db:sync-tables (db:sync-all-tables-list dbstruct (db:get-keys dbstruct)) last-update tmpdb refndb mtdb)))
+	 ;; (stack-push! (dbr:subdb-dbstack subdb) tmpdb)
+	 (dbfile:add-dbdat dbstruct run-id tmpdb)
+	 (set! res (cons newres res))))
+     subdbs)
 ;;;; run-ids
 ;;    if #f use *db-local-sync* : or 'local-sync-flags
 ;;    if #t use timestamps      : or 'timestamps
@@ -1178,159 +630,27 @@
           (if sync-needed
               (debug:print-info 0 *default-log-port* "Sync of " res " records from newdb to olddb completed in " sync-time " seconds pid="(current-process-id))
               (debug:print-info 0 *default-log-port* "No sync needed, last updated " (- start-time last-update) " seconds ago")))
-;; keeping it around for debugging purposes only
-#;(define (open-run-close-no-exception-handling  proc idb . params)
-  (debug:print-info 11 *default-log-port* "open-run-close-no-exception-handling START given a db=" (if idb "yes " "no ") ", params=" params)
-  (print "I don't work anymore. open-run-close-no-exception-handling needs fixing or removing...")
-  (exit)
-  (if (or *db-write-access*
-	  (not #t)) ;; was: (member proc * db:all-write-procs *)))
-      (let* ((db (cond
-		  ((pair? idb)                 (db:dbdat-get-db idb))
-		  ((sqlite3:database? idb)     idb)
-		  ((not idb)                   (debug:print-error 0 *default-log-port* "cannot open-run-close with #f anymore"))
-		  ((procedure? idb)            (idb))
-		  (else   	               (debug:print-error 0 *default-log-port* "cannot open-run-close with #f anymore"))))
-	     (res #f))
-	(set! res (apply proc db params))
-	(if (not idb)(sqlite3:finalize! dbstruct))
-	(debug:print-info 11 *default-log-port* "open-run-close-no-exception-handling END" )
-	res)
-      #f))
-#;(define (open-run-close-exception-handling proc idb . params)
-  (handle-exceptions
-   exn
-   (let ((sleep-time (random 30))
-	 (err-status ((condition-property-accessor 'sqlite3 'status #f) exn)))
-     (case err-status
-       ((busy)
-	(thread-sleep! sleep-time))
-       (else
-	(debug:print 0 *default-log-port* "EXCEPTION: database probably overloaded or unreadable.")
-	(debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))
-	(debug:print 5 *default-log-port* "exn=" (condition->list exn))
-	(debug:print 0 *default-log-port* " status:  " ((condition-property-accessor 'sqlite3 'status) exn))
-	(print-call-chain (current-error-port))
-	(thread-sleep! sleep-time)
-	(debug:print-info 0 *default-log-port* "trying db call one more time....this may never recover, if necessary kill process " (current-process-id) " on host " (get-host-name) " to clean up")))
-     (apply open-run-close-exception-handling proc idb params))
-   (apply open-run-close-no-exception-handling proc idb params)))
-;; (define open-run-close 
-#;(define open-run-close open-run-close-exception-handling)
-		;;	   open-run-close-no-exception-handling
-;;			   open-run-close-exception-handling)
-(define db:trigger-list 
-     (list (list "update_runs_trigger"  "CREATE TRIGGER IF NOT EXISTS update_runs_trigger AFTER UPDATE ON runs
-                             FOR EACH ROW
-                               BEGIN 
-                                 UPDATE runs SET last_update=(strftime('%s','now'))
-                                   WHERE;
-                               END;" ) 
-	   (list "update_run_stats_trigger"  "CREATE TRIGGER  IF NOT EXISTS update_run_stats_trigger AFTER UPDATE ON run_stats
-                             FOR EACH ROW
-                               BEGIN 
-                                 UPDATE run_stats SET last_update=(strftime('%s','now'))
-                                   WHERE;
-                               END;" )
-	   (list "update_tests_trigger"  "CREATE TRIGGER  IF NOT EXISTS update_tests_trigger AFTER UPDATE ON tests
-                             FOR EACH ROW
-                               BEGIN 
-                                 UPDATE tests SET last_update=(strftime('%s','now'))
-                                   WHERE;
-                               END;" )
-	   (list "update_teststeps_trigger"  "CREATE TRIGGER  IF NOT EXISTS update_teststeps_trigger AFTER UPDATE ON test_steps
-                             FOR EACH ROW
-                               BEGIN 
-                                 UPDATE test_steps SET last_update=(strftime('%s','now'))
-                                   WHERE;
-                               END;" )
-	   (list "update_test_data_trigger"  "CREATE TRIGGER  IF NOT EXISTS update_test_data_trigger AFTER UPDATE ON test_data
-                             FOR EACH ROW
-                               BEGIN 
-                                 UPDATE test_data SET last_update=(strftime('%s','now'))
-                                   WHERE;
-                               END;" )))
-(define (db:create-all-triggers dbstruct)
-   dbstruct #f #f
-   (lambda (db)
-(db:create-triggers db))))
-(define (db:create-triggers db)
-    (for-each (lambda (key)
-              (sqlite3:execute db (cadr key)))
-          db:trigger-list))
-(define (db:drop-all-triggers dbstruct)
-  (db:with-db
-   dbstruct #f #f
-   (lambda (db)
-     (db:drop-triggers db))))
-(define (db:is-trigger-dropped db tbl-name)
-  (let* ((trigger-name (if (equal? tbl-name "test_steps")
-			   "update_teststeps_trigger" 
-                           (conc "update_" tbl-name "_trigger")))
-	 (res          #f))
-    (sqlite3:for-each-row
-     (lambda (name)
-       (if (equal? name trigger-name)
-	   (set! res #t)))
-     db 
-     "SELECT name FROM sqlite_master WHERE type = 'trigger' ;" 
-     )))
-(define (db:drop-triggers db)
-  (for-each
-   (lambda (key) 
-     (sqlite3:execute db (conc "drop trigger if exists " (car key))))
-   db:trigger-list))
-(define  (db:drop-trigger db tbl-name)
-  (let* ((trigger-name (if (equal? tbl-name "test_steps")
-			   "update_teststeps_trigger" 
-                           (conc "update_" tbl-name "_trigger"))))
-    (for-each
-     (lambda (key) 
-       (if (equal? (car key) trigger-name)
-           (sqlite3:execute db (conc "drop trigger if exists " trigger-name))))
-     db:trigger-list)))
-(define  (db:create-trigger db tbl-name)
-      (let* ((trigger-name (if (equal? tbl-name "test_steps")
-                              "update_teststeps_trigger" 
-                              (conc "update_" tbl-name "_trigger"))))
-       (for-each (lambda (key) 
-             (if (equal? (car key) trigger-name)
-             (sqlite3:execute db (cadr key))))
-      db:trigger-list))) 
-(define (db:initialize-main-db dbdat)
+(define (db:initialize-main-db db)
   (when (not *configinfo*)
            (launch:setup)) ;; added because Elena was getting stack dump because *configinfo* below was #f.
   (let* ((configdat (car *configinfo*))  ;; tut tut, global warning...
 	 (keys     (keys:config-get-fields configdat))
 	 (havekeys (> (length keys) 0))
 	 (keystr   (keys->keystr keys))
 	 (fieldstr (keys:make-key/field-string configdat))
-	 (db       (db:dbdat-get-db dbdat)))
+	 #;(db       (dbr:dbdat-dbh dbdat)))
     (for-each (lambda (key)
 		(let ((keyn key))
 		  (if (member (string-downcase keyn)
 			      (list "runname" "state" "status" "owner" "event_time" "comment" "fail_count"
 				    "pass_count" "contour"))
-			(print "ERROR: your key cannot be named " keyn " as this conflicts with the same named field in the runs table, you must remove your megatest.db and <linktree>/.db before trying again.")
+			(debug:print 0 *default-log-port* "ERROR: your key cannot be named " keyn " as this conflicts with the same named field in the runs table, you must remove your megatest.db and <linktree>/.db before trying again.")
 			(exit 1)))))
      (lambda ()
@@ -1494,11 +814,11 @@
                                event_time TIMESTAMP,
                                comment TEXT DEFAULT '',
                                logfile TEXT DEFAULT '',
                                last_update  INTEGER DEFAULT (strftime('%s','now')),
                                CONSTRAINT test_steps_constraint UNIQUE (test_id,stepname,state));")
-	(sqlite3:execute db "CREATE INDEX IF NOT EXISTS teststeps_index ON tests (run_id, testname, item_path);")
+	(sqlite3:execute db "CREATE INDEX IF NOT EXISTS teststeps_index ON test_steps (test_id, stepname, state);")
         ;; All triggers created at once in end
 	;;(sqlite3:execute db "CREATE TRIGGER  IF NOT EXISTS update_teststeps_trigger AFTER UPDATE ON test_steps
         ;;                     FOR EACH ROW
         ;;                       BEGIN 
         ;;                         UPDATE test_steps SET last_update=(strftime('%s','now'))
@@ -1539,11 +859,10 @@
                               state        TEXT DEFAULT 'new',
                               status       TEXT DEFAULT 'n/a',
                               archive_type TEXT DEFAULT 'bup',
                               du           INTEGER,
                               archive_path TEXT);")))
-         (print "creating triggers from init") 
         (db:create-triggers db)    
      db)) ;; )
 ;; A R C H I V E S
@@ -1552,12 +871,12 @@
 ;; dneeded is minimum space needed, scan for existing archives that 
 ;; are on disks with adequate space and already have this test/itempath
 ;; archived
 (define (db:archive-get-allocations dbstruct testname itempath dneeded)
-  (let* ((dbdat        (db:get-db dbstruct)) ;; archive tables are in main.db
-	 (db           (db:dbdat-get-db dbdat))
+  (let* ((dbdat        (db:get-subdb dbstruct #f)) ;; archive tables are in main.db
+	 (db           (dbr:dbdat-dbh dbdat))
 	 (res          '())
 	 (blocks       '())) ;; a block is an archive chunck that can be added too if there is space
      (lambda (id archive-disk-id disk-path last-du last-du-time)
        (set! res (cons (vector id archive-disk-id disk-path last-du last-du-time) res)))
@@ -1577,19 +896,19 @@
 	  "SELECT,d.archive_area_name,disk_path,last_df,last_df_time FROM archive_disks AS d
              INNER JOIN archive_blocks AS b ON
              WHERE IN (" (string-intersperse (map conc res) ",") ") AND
          last_df > ?;")
-    (stack-push! (dbr:dbstruct-dbstack dbstruct) dbdat)
+    (dbfile:add-dbdat dbstruct #f dbdat)
 ;; returns id of the record, register a disk allocated to archiving and record it's last known
 ;; available space
 (define (db:archive-register-disk dbstruct bdisk-name bdisk-path df)
-  (let* ((dbdat        (db:get-db dbstruct)) ;; archive tables are in main.db
-	 (db           (db:dbdat-get-db dbdat))
+  (let* ((dbdat        (db:get-subdb dbstruct #f)) ;; archive tables are in main.db
+	 (db           (dbr:dbdat-dbh dbdat))
 	 (res          #f))
      (lambda (id)
        (set! res id))
@@ -1598,28 +917,28 @@
     (if res ;; record exists, update df and return id
 	  (sqlite3:execute db "UPDATE archive_disks SET last_df=?,last_df_time=(strftime('%s','now'))
                                   WHERE archive_area_name=? AND disk_path=?;"
 			   df bdisk-name bdisk-path)
-          (stack-push! (dbr:dbstruct-dbstack dbstruct) dbdat)
+          (dbfile:add-dbdat dbstruct #f dbdat)
 	   "INSERT OR REPLACE INTO archive_disks (archive_area_name,disk_path,last_df)
                 VALUES (?,?,?);"
 	   bdisk-name bdisk-path df)
-          (stack-push! (dbr:dbstruct-dbstack dbstruct) dbdat)
+          (dbfile:add-dbdat dbstruct #f dbdat)
 	  (db:archive-register-disk dbstruct bdisk-name bdisk-path df)))))
 ;; record an archive path created on a given archive disk (identified by it's bdisk-id)
 ;; if path starts with / then it is full, otherwise it is relative to the archive disk
 ;; preference is to store the relative path.
 (define (db:archive-register-block-name dbstruct bdisk-id archive-path #!key (du #f))
-  (let* ((dbdat        (db:get-db dbstruct)) ;; archive tables are in main.db
-	 (db           (db:dbdat-get-db dbdat))
+  (let* ((dbdat        (db:get-subdb dbstruct #f)) ;; archive tables are in main.db
+	 (db           (dbr:dbdat-dbh dbdat))
 	 (res          #f))
     ;; first look to see if this path is already registered
      (lambda (id)
        (set! res id))
@@ -1633,11 +952,11 @@
 	  (sqlite3:execute db "INSERT OR REPLACE INTO archive_blocks (archive_disk_id,disk_path,last_du)
                                                         VALUES (?,?,?);"
 			   bdisk-id archive-path (or du 0))
 	  (set! res (db:archive-register-block-name dbstruct bdisk-id archive-path du: du))))
-    (stack-push! (dbr:dbstruct-dbstack dbstruct) dbdat)
+    (dbfile:add-dbdat dbstruct #f dbdat)
 ;; The "archived" field in tests is overloaded; 0 = not archived, > 0 archived in block with given id
@@ -1644,11 +963,11 @@
 (define (db:test-set-archive-block-id dbstruct run-id test-id archive-block-id)
-   (lambda (db)
+   (lambda (dbdat db)
      (sqlite3:execute db "UPDATE tests SET archived=? WHERE id=?;"
 		      archive-block-id test-id))))
 ;; Look up the archive block info given a block-id
@@ -1655,11 +974,11 @@
 (define (db:test-get-archive-block-info dbstruct archive-block-id)
-   (lambda (db)
+   (lambda (dbdat db)
      (let ((res #f))
 	;;        0         1           2        3          4           5
 	(lambda (id archive-disk-id disk-path last-du last-du-time creation-time)
 	  (set! res (vector id archive-disk-id disk-path last-du last-du-time creation-time)))
@@ -1667,49 +986,16 @@
 	"SELECT id,archive_disk_id,disk_path,last_du,last_du_time,creation_time FROM archive_blocks WHERE id=?;"
 ;; (define (db:archive-allocate-testsuite/area-to-block block-id testsuite-name areakey)
-;;   (let* ((dbdat        (db:get-db dbstruct #f)) ;; archive tables are in main.db
-;; 	 (db           (db:dbdat-get-db dbdat))
+;;   (let* ((dbdat        (db:get-subdb dbstruct #f)) ;; archive tables are in main.db
+;; 	 (db           (dbr:dbdat-dbh dbdat))
 ;; 	 (res          '())
 ;; 	 (blocks       '())) ;; a block is an archive chunck that can be added too if there is space
 ;;     (sqlite3:for-each-row  #f)
-;; L O G G I N G    D B 
-(define (open-logging-db)
-  (let* ((dbpath    (conc (if *toppath* (conc *toppath* "/") "") "logging.db")) ;; fname)
-	 (dbexists  (common:file-exists? dbpath))
-	 (db        (sqlite3:open-database dbpath))
-	 (handler   (sqlite3:make-busy-timeout (if (args:get-arg "-override-timeout")
-					   (string->number (args:get-arg "-override-timeout"))
-					   136000)))) ;; 136000)))
-    (sqlite3:set-busy-handler! db handler)
-    (if (not dbexists)
-	(begin
-	  (sqlite3:execute db "CREATE TABLE IF NOT EXISTS log (id INTEGER PRIMARY KEY,event_time TIMESTAMP DEFAULT (strftime('%s','now')),logline TEXT,pwd TEXT,cmdline TEXT,pid INTEGER);")
-	  (db:set-sync db) ;; (sqlite3:execute db (conc "PRAGMA synchronous = 0;"))
-	  ))
-    db))
-(define (db:log-local-event . loglst)
-  (let ((logline (apply conc loglst)))
-    (db:log-event logline)))
-(define (db:log-event logline)
-  (let ((db (open-logging-db)))
-    (sqlite3:execute db "INSERT INTO log (logline,pwd,cmdline,pid) VALUES (?,?,?,?);"
-		     logline
-		     (current-directory)
-		     (string-intersperse (argv) " ")
-		     (current-process-id))
-    (sqlite3:finalize! db)
-    logline))
 ;; D B   U T I L S
@@ -1724,12 +1010,12 @@
 	 (deadtime     (if (and deadtime-str
 				(string->number deadtime-str))
 			   (string->number deadtime-str)
 			   72000))) ;; twenty hours
-     dbstruct #f #f
-     (lambda (db)
+     dbstruct run-id #f
+     (lambda (dbdat db)
        (if (number? ovr-deadtime)(set! deadtime ovr-deadtime))
        ;; in RUNNING or REMOTEHOSTSTART for more than 10 minutes
        ;; HOWEVER: this code in run:test seems to work fine
@@ -1774,12 +1060,12 @@
 (define (db:get-status-from-final-status-file run-dir)
   (let ((infile (conc run-dir "/.final-status")))
     ;; first verify we are able to write the output file
     (if (not (file-read-access? infile))
-	  (debug:print 0 *default-log-port* "ERROR: cannot read " infile)
-          (debug:print 0 *default-log-port* "ERROR: run-dir is " run-dir)
+	  (debug:print 2 *default-log-port* "ERROR: cannot read " infile)
+          (debug:print 2 *default-log-port* "ERROR: run-dir is " run-dir)
         (with-input-from-file infile read-lines)
@@ -1807,24 +1093,24 @@
     (debug:print-info 4  *default-log-port* "running-deadtime = " running-deadtime)
     (debug:print-info 4  *default-log-port* "deadtime-trim = " deadtime-trim)
-     dbstruct #f #f
-     (lambda (db)
+     dbstruct run-id #f
+     (lambda (dbdat db)
        (let* ((stmth1 (db:get-cache-stmth
-		       dbstruct db
+		       dbdat run-id db
 		       "SELECT id,rundir,uname,testname,item_path,event_time,run_duration FROM tests 
                            WHERE run_id=? AND (strftime('%s','now') - event_time) > (run_duration + ?)
                                           AND state IN ('RUNNING');"))
 	      (stmth2 (db:get-cache-stmth
-		       dbstruct db
+		       dbdat run-id db
 		       "SELECT id,rundir,uname,testname,item_path,event_time,run_duration FROM tests 
                            WHERE run_id=? AND (strftime('%s','now') - event_time) > (run_duration + ?)
                                           AND state IN ('REMOTEHOSTSTART');"))
 	      (stmth3 (db:get-cache-stmth
-		       dbstruct db
+		       dbdat run-id db
 		       "SELECT id,rundir,uname,testname,item_path FROM tests
                            WHERE run_id=? AND (strftime('%s','now') - event_time) > 86400
                                           AND state IN ('LAUNCHED');")))
 	 ;; in RUNNING or REMOTEHOSTSTART for more than 10 minutes
@@ -1928,11 +1214,11 @@
 ;; BUG: Probably broken - does not explicitly use run-id in the query
 (define (db:top-test-set-per-pf-counts dbstruct run-id test-name)
-  (db:general-call dbstruct 'top-test-set-per-pf-counts (list test-name test-name test-name test-name test-name test-name test-name test-name test-name test-name test-name test-name test-name test-name test-name test-name test-name)))
+  (db:general-call dbstruct run-id 'top-test-set-per-pf-counts (list test-name test-name test-name test-name test-name test-name test-name test-name test-name test-name test-name test-name test-name test-name test-name test-name test-name)))
 ;; Clean out old junk and vacuum the database
 ;; Ultimately do something like this:
@@ -1941,54 +1227,20 @@
 ;;    b. If test dir gone, delete the test record
 ;; 2. Look at run records
 ;;    a. If have tests that are not deleted, set state='unknown'
 ;;    b. ....
-(define (db:clean-up dbdat)
-  ;; (debug:print 0 *default-log-port* "WARNING: db clean up not fully ported to v1.60, cleanup action will be on megatest.db")
-  (let* ((keep-record-age ( - (current-seconds) (common:hms-string->seconds (or (configf:lookup *configdat* "setup" "delete-record-age") "30d"))))
-	 (db         (db:dbdat-get-db dbdat))
-	 (count-stmt (sqlite3:prepare db "SELECT (SELECT count(id) FROM tests)+(SELECT count(id) FROM runs);"))
-	(statements
-	 (map (lambda (stmt)
-		(sqlite3:prepare db stmt))
-	      (list
-	       ;; delete all tests that belong to runs that are 'deleted'
-	       (conc "DELETE FROM tests WHERE run_id in (SELECT id FROM runs WHERE state='deleted') and last_update < " keep-record-age ";")
-	       ;; delete all tests that are 'DELETED'
-	       (conc "DELETE FROM tests WHERE state='DELETED' and last_update < " keep-record-age " ;")
-	       ;; delete all tests that have no run
-	       (conc "DELETE FROM tests WHERE run_id NOT IN (SELECT DISTINCT id FROM runs) and last_update < " keep-record-age "; ")
-	       ;; delete all runs that are state='deleted'
-	       (conc "DELETE FROM runs WHERE state='deleted' and last_update < " keep-record-age ";")
-	       ;; delete empty runs
-	       (conc "DELETE FROM runs WHERE id NOT IN (SELECT DISTINCT FROM runs AS r INNER JOIN tests AS t ON and last_update < " keep-record-age ";")
-	       ;; remove orphaned test_rundat entries
-	       (conc "DELETE FROM test_rundat where test_id NOT IN (SELECT id FROM tests);")
-	       ;; remove orphaned test_steps entries
-	       (conc "DELETE FROM test_steps WHERE test_id NOT IN (SELECT id FROM tests);")
-               ;; remove orphaned test_dat entries
-	       (conc "DELETE FROM test_data WHERE test_id NOT IN (SELECT id FROM tests);")
-	       ))))
-    ;; (db:delay-if-busy dbdat)
-    ;(debug:print-info 0 *default-log-port*  statements) 
-    (sqlite3:with-transaction 
-     db
-     (lambda ()
-       (sqlite3:for-each-row (lambda (tot)
-			       (debug:print-info 0 *default-log-port* "Records count before clean: " tot))
-			     count-stmt)
-       (map sqlite3:execute statements)
-       (sqlite3:for-each-row (lambda (tot)
-			       (debug:print-info 0 *default-log-port* "Records count after  clean: " tot))
-			     count-stmt)))
-    (map sqlite3:finalize! statements)
-    (sqlite3:finalize! count-stmt)
-    ;; (db:find-and-mark-incomplete db)
-    ;; (db:delay-if-busy dbdat)
-    (sqlite3:execute db "VACUUM;")))
+(define (db:clean-up run-id dbdat)
+  (debug:print 2 *default-log-port* "db:clean-up")
+  (if run-id
+    (db:clean-up-rundb dbdat)
+    (db:clean-up-maindb dbdat)
+  )
 ;; Clean out old junk and vacuum the database
 ;; Ultimately do something like this:
@@ -1999,11 +1251,11 @@
 ;;    a. If have tests that are not deleted, set state='unknown'
 ;;    b. ....
 (define (db:clean-up-rundb dbdat)
   ;; (debug:print 0 *default-log-port* "WARNING: db clean up not fully ported to v1.60, cleanup action will be on megatest.db")
-  (let* ((db         (db:dbdat-get-db dbdat))
+  (let* ((db         (dbr:dbdat-dbh dbdat))
 	 (count-stmt (sqlite3:prepare db "SELECT (SELECT count(id) FROM tests);"))
 	 (map (lambda (stmt)
 		(sqlite3:prepare db stmt))
@@ -2040,11 +1292,11 @@
 ;;    a. If have tests that are not deleted, set state='unknown'
 ;;    b. ....
 (define (db:clean-up-maindb dbdat)
   ;; (debug:print 0 *default-log-port* "WARNING: db clean up not fully ported to v1.60, cleanup action will be on megatest.db")
-  (let* ((db         (db:dbdat-get-db dbdat))
+  (let* ((db         (dbr:dbdat-dbh dbdat))
 	 (count-stmt (sqlite3:prepare db "SELECT (SELECT count(id) FROM runs);"))
 	  (map (lambda (stmt)
 		 (sqlite3:prepare db stmt))
@@ -2085,12 +1337,12 @@
 ;; also updates *global-delta*
 (define (db:get-var dbstruct var)
   (let* ((res      #f))
-     dbstruct #f #f
-     (lambda (db)
+     dbstruct #f #f  ;; for the moment vars are only stored in main.db
+     (lambda (dbdat db)
         (lambda (val)
           (set! res val))
         "SELECT val FROM metadat WHERE var=?;" var)
@@ -2100,16 +1352,16 @@
              (if valnum (set! res valnum))))
 (define (db:inc-var dbstruct var)
   (db:with-db dbstruct #f #t 
-	      (lambda (db)
+	      (lambda (dbdat db)
 		(sqlite3:execute db "UPDATE metadat SET val=val+1 WHERE var=?;" var))))
 (define (db:dec-var dbstruct var)
   (db:with-db dbstruct #f #t 
-	      (lambda (db)
+	      (lambda (dbdat db)
 		(sqlite3:execute db "UPDATE metadat SET val=val-1 WHERE var=?;" var))))
 ;; This was part of db:get-var. It was used to estimate the load on
 ;; the database files.
@@ -2122,99 +1374,49 @@
 ;; 	  (debug:print-info 4 *default-log-port* "launch throttle factor=" *global-delta*)
 ;; 	  (set! *last-global-delta-printed* *global-delta*)))
 (define (db:set-var dbstruct var val)
   (db:with-db dbstruct #f #t 
-	      (lambda (db)
+	      (lambda (dbdat db)
 		(sqlite3:execute db "INSERT OR REPLACE INTO metadat (var,val) VALUES (?,?);" var val))))
 (define (db:add-var dbstruct var val)
   (db:with-db dbstruct #f #t 
-	      (lambda (db)
+	      (lambda (dbdat db)
 		(sqlite3:execute db "UPDATE metadat SET val=val+? WHERE var=?;" val var))))
 (define (db:del-var dbstruct var)
   (db:with-db dbstruct #f #t 
-	      (lambda (db)
+	      (lambda (dbdat db)
 		(sqlite3:execute db "DELETE FROM metadat WHERE var=?;" var))))
 ;; no-sync.db - small bits of data to be shared between servers
-(define (db:open-no-sync-db)
-  (let* ((dbpath (db:dbfile-path))
-	 (dbname (conc dbpath "/no-sync.db"))
-	 (db-exists (common:file-exists? dbname))
-	 (db     (sqlite3:open-database dbname)))
-    (sqlite3:set-busy-handler! db (sqlite3:make-busy-timeout 136000))
-    (if (not db-exists)
-	(begin
-	  (sqlite3:execute db "PRAGMA synchronous = 0;")
-	  (sqlite3:execute db "CREATE TABLE IF NOT EXISTS no_sync_metadat (var TEXT,val TEXT, CONSTRAINT no_sync_metadat_constraint UNIQUE (var));")
-	  (sqlite3:execute db "PRAGMA journal_mode=WAL;")))
-    db))
-;; if we are not a server create a db handle. this is not finalized
-;; so watch for problems. I'm still not clear if it is needed to manually
-;; finalize sqlite3 dbs with the sqlite3 egg.
 (define (db:no-sync-db db-in)
-  (mutex-lock! *db-access-mutex*)
-  (let ((res (if db-in
-                 db-in
-                 (let ((db (db:open-no-sync-db)))
-                   (set! *no-sync-db* db)
-                   db))))
-    (mutex-unlock! *db-access-mutex*)
-    res))
-(define (db:no-sync-set db var val)
-  (sqlite3:execute (db:no-sync-db db) "INSERT OR REPLACE INTO no_sync_metadat (var,val) VALUES (?,?);" var val))
-(define (db:no-sync-del! db var)
-  (sqlite3:execute (db:no-sync-db db) "DELETE FROM no_sync_metadat WHERE var=?;" var))
-(define (db:no-sync-get/default db var default)
-  (let ((res default))
-    (sqlite3:for-each-row
-     (lambda (val)
-       (set! res val))
-     (db:no-sync-db db)
-     "SELECT val FROM no_sync_metadat WHERE var=?;"
-     var)
-    (if res
-        (let ((newres (if (string? res)
-			  (string->number res)
-			  #f)))
-          (if newres
-              newres
-              res))
-        res)))
+  (if db-in
+      db-in
+      (if *no-sync-db*
+	  *no-sync-db*
+	  (begin
+	    (mutex-lock! *db-access-mutex*)
+	    (let ((dbpath (common:get-db-tmp-area))
+		  (db     (dbfile:open-no-sync-db dbpath)))
+	      (set! *no-sync-db* db)
+	      (mutex-unlock! *db-access-mutex*)
+	      db)))))
+(define (with-no-sync-db proc)
+  (let* ((db  (db:no-sync-db *no-sync-db*)))
+    (proc db)))
+(define (db:open-no-sync-db)
+  (dbfile:open-no-sync-db (db:dbfile-path)))
 (define (db:no-sync-close-db db stmt-cache)
   (db:safely-close-sqlite3-db db stmt-cache))
-;; transaction protected lock aquisition
-;; either:
-;;    fails    returns  (#f . lock-creation-time)
-;;    succeeds (returns (#t . lock-creation-time)
-;; use (db:no-sync-del! db keyname) to release the lock
-(define (db:no-sync-get-lock db-in keyname)
-  (let ((db (db:no-sync-db db-in)))
-    (sqlite3:with-transaction
-     db
-     (lambda ()
-       (handle-exceptions
-	   exn
-	 (let ((lock-time (current-seconds)))
-	   (debug:print-info 2 *default-log-port* "db:no-sync-get-lock keyname=" keyname ", lock-time=" lock-time ", exn=" exn)
-	   (sqlite3:execute db "INSERT INTO no_sync_metadat (var,val) VALUES(?,?);" keyname lock-time)
-	   `(#t . ,lock-time))
-	 `(#f . ,(sqlite3:first-result db "SELECT val FROM no_sync_metadat WHERE var=?;" keyname)))))))
 ;; use a global for some primitive caching, it is just silly to
 ;; re-read the db over and over again for the keys since they never
 ;; change
@@ -2221,21 +1423,24 @@
 ;; why get the keys from the db? why not get from the *configdat*
 ;; using keys:config-get-fields?
 (define (db:get-keys dbstruct)
-  (if *db-keys* *db-keys* 
-      (let ((res '()))
-	(db:with-db dbstruct #f #f
-		    (lambda (db)
-		      (sqlite3:for-each-row 
-		       (lambda (key)
-			 (set! res (cons key res)))
-		       db
-		       "SELECT fieldname FROM keys ORDER BY id DESC;")))
-	(set! *db-keys* res)
-	res)))
+  (keys:config-get-fields *configdat*)
+;;  (if *db-keys* *db-keys* 
+;;      (let ((res '()))
+;;	(db:with-db dbstruct #f #f
+;;		    (lambda (dbdat db)
+;;		      (sqlite3:for-each-row 
+;;		       (lambda (key)
+;;			 (set! res (cons key res)))
+;;		       db
+;;		       "SELECT fieldname FROM keys ORDER BY id DESC;")))
+;;	(set! *db-keys* res)
+;;	res)))
 ;; extract index number given a header/data structure
 (define (db:get-index-by-header header field)
   (list-index (lambda (x)(equal? x field)) header))
@@ -2275,11 +1480,11 @@
 ;(print qry)
    #f ;; this is for the main runs db
    #f ;; does not modify db
-   (lambda (db)
+   (lambda (dbdat db)
 	(lambda (runname runtime target )
 	  (set! res (cons (vector runname runtime target) res)))
@@ -2292,11 +1497,11 @@
 (define (db:get-run-name-from-id dbstruct run-id)
    #f ;; this is for the main runs db
    #f ;; does not modify db
-   (lambda (db)
+   (lambda (dbdat db)
      (let ((res #f))
 	(lambda (runname)
 	  (set! res runname))
@@ -2307,11 +1512,11 @@
 (define (db:get-run-key-val dbstruct run-id key)
-   (lambda (db)
+   (lambda (dbdat db)
      (let ((res #f))
 	(lambda (val)
 	  (set! res val))
@@ -2351,18 +1556,22 @@
 	 (andstr    (if (> (length keys) 0) " AND " ""))
 	 (valslots  (keys->valslots keys)) ;; ?,?,? ...
 	 (allvals   (append (list runname state status user contour) (map cadr keyvals)))
 	 (qryvals   (append (list runname) (map cadr keyvals)))
 	 (key=?str  (string-intersperse (map (lambda (k)(conc k "=?")) keys) " AND ")))
+    ;; (debug:print 0 *default-log-port* "Got here 0.")
     (debug:print 3 *default-log-port* "keys: " keys " allvals: " allvals " keyvals: " keyvals " key=?str is " key=?str)
     (debug:print 2 *default-log-port* "NOTE: using target " (string-intersperse (map cadr keyvals) "/") " for this run")
     (if (and runname (null? (filter (lambda (x)(not x)) keyvals))) ;; there must be a better way to "apply and"
 	 dbstruct #f #f
-	 (lambda (db)
+	 (lambda (dbdat db)
+	   ;; (debug:print 0 *default-log-port* "Got here 1.")
 	   (let ((res #f))
-	     (apply sqlite3:execute db (conc "INSERT OR IGNORE INTO runs (runname,state,status,owner,event_time,contour" comma keystr ") VALUES (?,?,?,?,strftime('%s','now'),?" comma valslots ");")
+	     (apply sqlite3:execute db
+		    (conc "INSERT OR IGNORE INTO runs (runname,state,status,owner,event_time,contour"
+			  comma keystr ") VALUES (?,?,?,?,strftime('%s','now'),?" comma valslots ");")
 	     (apply sqlite3:for-each-row 
 		    (lambda (id)
 		      (set! res id))
@@ -2406,11 +1615,11 @@
 			   (if (number? offset)
 			       (conc " OFFSET " offset)
     (debug:print-info 11 *default-log-port* "db:get-runs START qrystr: " qrystr " keypatts: " keypatts " offset: " offset " limit: " count)
     (db:with-db dbstruct #f #f
-		(lambda (db)		
+		(lambda (dbdat db)		
 		   (lambda (a . x)
 		     (set! res (cons (apply vector a x) res)))
@@ -2449,11 +1658,11 @@
 				 (conc " OFFSET " offset)
     (debug:print-info 11 *default-log-port* "db:get-runs START qrystr: " qrystr " target: " target " offset: " offset " limit: " count)
     (db:with-db dbstruct #f #f
-		(lambda (db)		
+		(lambda (dbdat db)		
 		   (lambda (target id runname state status owner event_time)
 		     (set! res (cons (make-simple-run target id runname state status owner event_time) res)))
@@ -2461,25 +1670,34 @@
     (debug:print-info 11 *default-log-port* "db:get-runs END qrystr: " qrystr " target: " target " offset: " offset " limit: " count)
 ;; TODO: Switch this to use max(update_time) from each run db? Then if using a server there is no disk traffic (using inmem db)
-(define (db:get-changed-run-ids since-time)
+;; NOTE: This DOESN'T (necessarily) get the real run ids, but the number of the <number>.db!!
+ (define (db:get-changed-run-ids since-time)
   (let* ((dbdir      (db:dbfile-path)) ;; (configf:lookup *configdat* "setup" "dbdir"))
-	 (alldbs     (glob (conc dbdir "/[0-9]*.db")))
-	 (changed    (filter (lambda (dbfile)
-			       (> (file-modification-time dbfile) since-time))
-			     alldbs)))
+        (alldbs     (glob (conc dbdir "/.megatest/[0-9]*.db*")))
+        (changed    (filter (lambda (dbfile)
+                              (> (file-modification-time dbfile) since-time))
+                            alldbs)))
      (map (lambda (dbfile)
-	    (let* ((res (string-match ".*\\/(\\d)*\\.db" dbfile)))
-	      (if res
-		  (string->number (cadr res))
-		  (begin
-		    (debug:print 2 *default-log-port* "WARNING: Failed to process " dbfile " for run-id")
-		    0))))
-	  changed))))
+           (let* ((res (string-match ".*\\/(\\d\\d)\\.db*" dbfile)))
+             (if res
+                 (string->number (cadr res))
+                 (begin
+                   (debug:print 2 *default-log-port* "WARNING: Failed to process " dbfile " for run-id")
+                   0))))
+         changed))))
 ;; Get all targets from the db
 (define (db:get-targets dbstruct)
   (let* ((res       '())
@@ -2490,11 +1708,11 @@
 	 (seen       (make-hash-table)))
-     (lambda (db)
+     (lambda (dbdat db)
 	(lambda (a . x)
 	  (let ((targ (cons a x)))
 	    (if (not (hash-table-ref/default seen targ #f))
@@ -2509,11 +1727,11 @@
 (define (db:get-num-runs dbstruct runpatt)
-   (lambda (db)
+   (lambda (dbdat db)
      (let ((numruns 0))
        (debug:print-info 11 *default-log-port* "db:get-num-runs START " runpatt)
 	(lambda (count)
 	  (set! numruns count))
@@ -2526,11 +1744,11 @@
 (define (db:get-runs-cnt-by-patt dbstruct runpatt targetpatt keys)
-   (lambda (db)
+   (lambda (dbdat db)
      (let ((numruns 0)
            (qry-str #f)
            (key-patt "")
 	     	   (keyvals  (if targetpatt (keys:target->keyval keys targetpatt) '())))
@@ -2564,11 +1782,11 @@
 (define (db:get-raw-run-stats dbstruct run-id)
-   (lambda (db)
+   (lambda (dbdat db)
 	(lambda (res state status count)
 	  (cons (list state status count) res))
@@ -2583,11 +1801,11 @@
-   (lambda (db)
+   (lambda (dbdat db)
      ;; remove previous data
      (let* ((stmt1 (sqlite3:prepare db "DELETE FROM run_stats WHERE run_id=? AND state=? AND status=?;"))
 	    (stmt2 (sqlite3:prepare db "INSERT INTO run_stats (run_id,state,status,count) VALUES (?,?,?,?);"))
@@ -2607,11 +1825,11 @@
 (define (db:get-main-run-stats dbstruct run-id)
    #f ;; this data comes from main
-   (lambda (db)
+   (lambda (dbdat db)
 	(lambda (res state status count)
 	  (cons (list state status count) res))
@@ -2640,11 +1858,11 @@
 (define (db:get-all-run-ids dbstruct)
-   (lambda (db)
+   (lambda (dbdat db)
      (let ((run-ids '()))
 	(lambda (run-id)
 	  (set! run-ids (cons run-id run-ids)))
@@ -2664,11 +1882,11 @@
 	 (res          '())
 	 (runs-info    '()))
     ;; First get all the runname/run-ids
      dbstruct #f #f
-     (lambda (db)
+     (lambda (dbdat db)
 	(lambda (run-id runname)
 	  (set! runs-info (cons (list run-id runname) runs-info)))
 	"SELECT id,runname FROM runs WHERE state != 'deleted' ORDER BY event_time DESC;"))) ;; If you change this to the more logical ASC please adjust calls to db:get-run-stats
@@ -2680,11 +1898,11 @@
 	      (run-name (cadr run-info)))
-	  (lambda (db)
+	  (lambda (dbdat db)
 	     (lambda (state status count)
 	       (let ((netstate (if (equal? state "COMPLETED") status state)))
 		 (if (string? netstate)
@@ -2742,11 +1960,11 @@
     ;(print "runs:get-runs-by-patt qry=" qry-str " " runnamepatt)
     (vector header 
              (db:with-db dbstruct #f #f ;; reads db, does not write to it.
-                         (lambda (db)
+                         (lambda (dbdat db)
                             (lambda (res . r)
                               (cons (list->vector r) res))
@@ -2768,11 +1986,11 @@
 			  (string-intersperse remfields ","))))
     (debug:print-info 11 *default-log-port* "db:get-run-info run-id: " run-id " header: " header " keystr: " keystr)
      dbstruct #f #f
-     (lambda (db)
+     (lambda (dbdat db)
 	(lambda (a . x)
 	  (set! res (apply vector a x)))
 	(conc "SELECT " keystr " FROM runs WHERE id=?;")
@@ -2783,19 +2001,19 @@
 (define (db:set-comment-for-run dbstruct run-id comment)
    dbstruct #f #f
-   (lambda (db)
+   (lambda (dbdat db)
      (sqlite3:execute db "UPDATE runs SET comment=? WHERE id=?;" comment ;; (sdb:qry 'getid comment)
 ;; does not (obviously!) removed dependent data. But why not!!?
 (define (db:delete-run dbstruct run-id)
    dbstruct #f #f
-   (lambda (db)
+   (lambda (dbdat db)
       (lambda ()
         (sqlite3:execute db "DELETE FROM test_steps WHERE test_id IN (SELECT id FROM tests WHERE run_id=?);" run-id)
         (sqlite3:execute db "DELETE FROM test_data WHERE test_id IN (SELECT id FROM tests WHERE run_id=?);"  run-id)
@@ -2804,17 +2022,17 @@
         (sqlite3:execute db "UPDATE runs SET state='deleted',comment='' WHERE id=?;" run-id))))))
 (define (db:update-run-event_time dbstruct run-id)
    dbstruct #f #t
-   (lambda (db)
+   (lambda (dbdat db)
      (sqlite3:execute db "UPDATE runs SET event_time=strftime('%s','now') WHERE id=?;" run-id))))
 (define (db:lock/unlock-run dbstruct run-id lock unlock user)
    dbstruct #f #t
-   (lambda (db)
+   (lambda (dbdat db)
      (let ((newlockval (if lock "locked"
 			   (if unlock
 			       "locked")))) ;; semi-failsafe
        (sqlite3:execute db "UPDATE runs SET state=? WHERE id=?;" newlockval run-id)
@@ -2823,28 +2041,28 @@
        (debug:print-info 1 *default-log-port* "" newlockval " run number " run-id)))))
 (define (db:set-run-status dbstruct run-id status msg)
    dbstruct #f #f
-   (lambda (db)
+   (lambda (dbdat db)
        (if msg
          (sqlite3:execute db "UPDATE runs SET status=?,comment=? WHERE id=?;" status msg run-id)
          (sqlite3:execute db "UPDATE runs SET status=? WHERE id=?;" status run-id)))))
 (define (db:set-run-state-status dbstruct run-id state status )
    dbstruct #f #f
-   (lambda (db)
+   (lambda (dbdat db)
           (sqlite3:execute db "UPDATE runs SET status=?,state=? WHERE id=?;" status state run-id))))
 (define (db:get-run-status dbstruct run-id)
   (let ((res "n/a"))
      dbstruct #f #f
-     (lambda (db)
+     (lambda (dbdat db)
 	(lambda (status)
 	  (set! res status))
 	"SELECT status FROM runs WHERE id=?;" 
@@ -2853,11 +2071,11 @@
 (define (db:get-run-state dbstruct run-id)
   (let ((res "n/a"))
      dbstruct #f #f
-     (lambda (db)
+     (lambda (dbdat db)
 	(lambda (status)
 	  (set! res status))
 	"SELECT state FROM runs WHERE id=?;" 
@@ -2874,11 +2092,11 @@
 (define (db:get-key-val-pairs dbstruct run-id)
   (let* ((keys (db:get-keys dbstruct))
 	 (res  '()))
      dbstruct #f #f
-     (lambda (db)
+     (lambda (dbdat db)
 	(lambda (key)
 	  (let ((qry (conc "SELECT " key " FROM runs WHERE id=?;")))
 	     (lambda (key-val)
@@ -2891,11 +2109,11 @@
 (define (db:get-key-vals dbstruct run-id)
   (let* ((keys (db:get-keys dbstruct))
 	 (res  '()))
      dbstruct #f #f
-     (lambda (db)
+     (lambda (dbdat db)
 	(lambda (key)
 	  (let ((qry (conc "SELECT " key " FROM runs WHERE id=?;")))
 	    ;; (db:delay-if-busy dbdat)
@@ -2923,11 +2141,11 @@
     (let ((prev-run-ids '()))
       (if (null? keyvals)
             (db:with-db dbstruct #f #f ;; #f means work with the zeroth db - i.e. the runs db
-                        (lambda (db)
+                        (lambda (dbdat db)
                           (apply sqlite3:for-each-row
                                  (lambda (id)
                                    (set! prev-run-ids (cons id prev-run-ids)))
                                  (conc "SELECT id FROM runs WHERE " qrystr " AND state != 'deleted' AND id != ?;")
@@ -3018,11 +2236,11 @@
 				(if offset (conc " OFFSET " offset) " ")
     (debug:print-info 8 *default-log-port* "db:get-tests-for-run run-id=" run-id ", qry=" qry)
     (let* ((res (db:with-db dbstruct run-id #f
-			    (lambda (db)
+			    (lambda (dbdat db)
 			      ;; (let* ((stmth (db:get-cache-stmth dbstruct db qry))) ;; due to use of last-update we can't efficiently cache this query
 				  (lambda (res . row)
 				    ;; id run-id testname state status event-time host cpuload
@@ -3055,11 +2273,11 @@
 ;; 3. convert for-each-row to fold
 ;; (define (db:get-tests-for-run-state-status dbstruct run-id testpatt)
 ;;   (db:with-db
 ;;    dbstruct run-id #f
-;;    (lambda (db)
+;;    (lambda (dbdat db)
 ;;      (let* ((res            '())
 ;; 	    (stmt-cache      (dbr:dbstruct-stmt-cache dbstruct))
 ;; 	    (stmth           (let* ((sh (db:hoh-get stmt-cache db testpatt)))
 ;; 			       (or sh
 ;; 				   (let* ((tests-match-qry (tests:match->sqlqry testpatt))
@@ -3085,11 +2303,11 @@
 				" AND last_update > ? "
 				(if tests-match-qry (conc " AND (" tests-match-qry ") ") "")
     (debug:print-info 8 *default-log-port* "db:get-tests-for-run qry=" qry)
     (db:with-db dbstruct run-id #f
-		(lambda (db)
+		(lambda (dbdat db)
 		   (lambda (res id testname item-path state status event-time run-duration)
 		     ;;            id,run_id,testname,state,status,event_time,host,cpuload,diskfree,uname,rundir,item_path,run_duration,final_logf,comment
 		     (cons (vector id run-id testname state status event-time  ""     -1      -1       ""    "-"  item-path run-duration  "-"         "-") res))
@@ -3099,11 +2317,11 @@
 		   (or last-update 0))))))
 (define (db:get-testinfo-state-status dbstruct run-id test-id)
   (let ((res            #f))
     (db:with-db dbstruct run-id #f
-		(lambda (db)
+		(lambda (dbdat db)
 		   (lambda (run-id testname item-path state status)
 		     ;; id,run_id,testname,state,status,event_time,host,cpuload,diskfree,uname,rundir,item_path,run_duration,final_logf,comment
 		     (set! res (vector test-id run-id testname state status -1 "" -1 -1 "" "-" item-path -1 "-" "-")))
@@ -3134,15 +2352,15 @@
 ;; Convert calling routines to get list of run-ids and loop, do not use the get-tests-for-runs
 (define (db:delete-test-records dbstruct run-id test-id)
-  (db:general-call dbstruct 'delete-test-step-records (list test-id))
-  (db:general-call dbstruct 'delete-test-data-records (list test-id))
+  (db:general-call dbstruct run-id 'delete-test-step-records (list test-id))
+  (db:general-call dbstruct run-id 'delete-test-data-records (list test-id))
-   dbstruct #f #f
-   (lambda (db)
+   dbstruct run-id #f
+   (lambda (dbdat db)
      (sqlite3:execute db "UPDATE tests SET state='DELETED',status='n/a',comment='' WHERE id=?;" test-id))))
 (define (db:delete-old-deleted-test-records dbstruct)
   (let ((targtime (- (current-seconds)
@@ -3150,11 +2368,11 @@
 			 (* 30 24 60 60))))) ;; one month in the past
-     (lambda (db)
+     (lambda (dbdat db)
 	(lambda ()
 	  (sqlite3:execute db "DELETE FROM test_steps WHERE test_id IN (SELECT id FROM tests WHERE state='DELETED' AND event_time<?);" targtime)
 	  (sqlite3:execute db "DELETE FROM test_data WHERE test_id IN (SELECT id FROM tests WHERE state='DELETED' AND event_time<?);" targtime)
@@ -3181,11 +2399,11 @@
 	     (test-id (db:get-test-id dbstruct run-id testname "")))
-	  (lambda (db)
+	  (lambda (dbdat db)
 	    (sqlite3:execute db qry
 			     (or newstate  currstate "NOT_STARTED")
 			     (or newstatus currstate "UNKNOWN")
 			     run-id testname)))
 	 (if test-id
@@ -3201,14 +2419,13 @@
 ;;      NOTE: run-id is not used
 ;; ;;
 (define (db:test-set-state-status dbstruct run-id test-id newstate newstatus newcomment)
-   ;; run-id
-   #f
+   run-id
-   (lambda (db)
+   (lambda (dbdat db)
       ((and newstate newstatus newcomment)
        (sqlite3:execute db "UPDATE tests SET state=?,status=?,comment=? WHERE id=?;" newstate newstatus newcomment ;; (sdb:qry 'getid newcomment)
       ((and newstate newstatus)
@@ -3228,22 +2445,22 @@
 		  "SELECT count(id) FROM tests WHERE state in ('RUNNING','LAUNCHED','REMOTEHOSTSTART') AND NOT (uname = 'n/a' AND item_path = '');")) ;; )
-   (lambda (db)
-     (let* ((stmth (db:get-cache-stmth dbstruct  db qry)))
+   (lambda (dbdat db)
+     (let* ((stmth (db:get-cache-stmth dbdat run-id db qry)))
        (sqlite3:first-result stmth))))))
 ;; NEW BEHAVIOR: Count tests running in only one run!
 (define (db:get-count-tests-actually-running dbstruct run-id)
-   (lambda (db)
+   (lambda (dbdat db)
       ;; WARNING BUG EDIT ME - merged from v1.55 - not sure what is right here ...
       ;; "SELECT count(id) FROM tests WHERE state in ('RUNNING','LAUNCHED','REMOTEHOSTSTART') AND run_id NOT IN (SELECT id FROM runs WHERE state='deleted') AND NOT (uname = 'n/a' AND item_path = '');")
       "SELECT count(id) FROM tests WHERE state in ('RUNNING','REMOTEHOSTSTART','LAUNCHED') AND run_id=?;" 
@@ -3258,12 +2475,12 @@
 		  "SELECT count(id) FROM tests WHERE state in ('RUNNING','LAUNCHED','REMOTEHOSTSTART') AND run_id=?;")) ;; )
-     (lambda (db)
-       (let* ((stmth (db:get-cache-stmth dbstruct db qry)))
+     (lambda (dbdat db)
+       (let* ((stmth (db:get-cache-stmth dbdat run-id db qry)))
 	 (sqlite3:first-result stmth run-id))))))
 ;; For a given testname how many items are running? Used to determine
 ;; probability for regenerating html
@@ -3270,22 +2487,22 @@
 (define (db:get-count-tests-running-for-testname dbstruct run-id testname)
-   (lambda (db)
+   (lambda (dbdat db)
      (let* ((stmt "SELECT count(id) FROM tests WHERE state in ('RUNNING','LAUNCHED','REMOTEHOSTSTART') AND run_id=? AND NOT (uname = 'n/a' AND item_path = '') AND testname=?;")
-	    (stmth (db:get-cache-stmth dbstruct db stmt)))
+	    (stmth (db:get-cache-stmth dbdat run-id db stmt)))
 	stmth run-id testname)))))
 (define (db:get-not-completed-cnt dbstruct run-id)
-   (lambda (db)
+   (lambda (dbdat db)
       ;(print "SELECT count(id) FROM tests WHERE state not in ('COMPLETED', 'DELETED') AND run_id=" run-id)  
       "SELECT count(id) FROM tests WHERE state not in ('COMPLETED', 'DELETED') AND run_id=?;" run-id))))
@@ -3294,11 +2511,11 @@
       0 ;; 
       (let ((testnames '()))
 	;; get the testnames
 	 dbstruct #f #f
-	 (lambda (db)
+	 (lambda (dbdat db)
 	    (lambda (testname)
 	      (set! testnames (cons testname testnames)))
 	    "SELECT testname FROM test_meta WHERE jobgroup=?"
@@ -3307,11 +2524,11 @@
 	(if (not (null? testnames))
-	     (lambda (db)
+	     (lambda (dbdat db)
 		(conc "SELECT count(id) FROM tests WHERE state in ('RUNNING','LAUNCHED','REMOTEHOSTSTART') AND testname in ('"
 		      (string-intersperse testnames "','")
 		      "') AND NOT (uname = 'n/a' AND item_path='');")) ;; should this include the (uname = 'n/a' ...) ???
@@ -3326,11 +2543,11 @@
 (define (db:estimated-tests-remaining dbstruct run-id)
-   (lambda (db)
+   (lambda (dbdat db)
       "SELECT count(id) FROM tests WHERE state in ('LAUNCHED','NOT_STARTED','REMOTEHOSTSTART','RUNNING','KILLREQ') AND run_id=?;")
@@ -3338,11 +2555,11 @@
 (define (db:get-test-id dbstruct run-id testname item-path)
-   (lambda (db)
+   (lambda (dbdat db)
       "SELECT id FROM tests WHERE testname=? AND item_path=? AND run_id=?;"
       #f ;; the default
       testname item-path run-id))))
@@ -3353,20 +2570,20 @@
 (define (db:test-set-top-process-pid dbstruct run-id test-id pid)
-   (lambda (db)
+   (lambda (dbdat db)
      (sqlite3:execute db "UPDATE tests SET attemptnum=? WHERE id=?;"
 		      pid test-id))))
 (define (db:test-get-top-process-pid dbstruct run-id test-id)
-   (lambda (db)
+   (lambda (dbdat db)
       "SELECT attemptnum FROM tests WHERE id=?;"
@@ -3392,21 +2609,21 @@
 (define db:test-record-qry-selector (string-intersperse db:test-record-fields ","))
 (define (db:update-tesdata-on-repilcate-db dbstruct old-lt new-lt)
    dbstruct   #f   #f
-   (lambda (db)
+   (lambda (dbdat db)
      (sqlite3:execute db "UPDATE tests SET rundir= replace(rundir,?,?), shortdir=replace(shortdir,?,?);"
 		      old-lt new-lt  old-lt new-lt))))
 ;; NOTE: Use db:test-get* to access records
 ;; NOTE: This needs rundir decoding? Decide, decode here or where used? For the moment decode where used.
 (define (db:get-all-tests-info-by-run-id dbstruct run-id)
   (let* ((res '()))
      dbstruct #f #f
-     (lambda (db)
+     (lambda (dbdat db)
 	(lambda (id run-id testname state status event-time host cpuload diskfree uname rundir item-path run-duration final-logf comment shortdir attemptnum archived)
 	  ;;                 0    1       2      3      4        5       6      7        8     9     10      11          12          13       14     15        16
 	  (set! res (cons (vector id run-id testname state status event-time host cpuload diskfree uname rundir item-path run-duration final-logf comment shortdir attemptnum archived)
@@ -3415,11 +2632,11 @@
 (define (db:replace-test-records dbstruct run-id testrecs)
   (db:with-db dbstruct run-id #t 
-	      (lambda (db)
+	      (lambda (dbdat db)
 		(let* ((qmarks (string-intersperse (make-list (length db:test-record-fields) "?") ","))
 		       (qrystr (conc "INSERT OR REPLACE INTO tests (" db:test-record-qry-selector ") VALUES (" qmarks ") WHERE run_id=?;"))
 		       (qry    (sqlite3:prepare db qrystr)))
 		  (debug:print 0 *default-log-port* "INFO: migrating test records for run with id " run-id)
@@ -3440,11 +2657,11 @@
       (let loop ((new-id min-test-id))
 	(let ((test-id-found #f))
 	   (lambda (id)
 	     (set! test-id-found id))
-	   (db:dbdat-get-db mtdb)
+	   (dbr:dbdat-dbh mtdb)
 	   "SELECT id FROM tests WHERE id=?;"
 	  ;; if test-id-found then need to try again
 	  (if test-id-found
 	      (loop (+ new-id 1))
@@ -3458,11 +2675,11 @@
   (debug:print-info 0 *default-log-port* "Adjusting test ids in megatest.db for run " run-id)
   (let ((min-test-id (* run-id 30000)))
      (lambda (testrec)
        (let* ((test-id (vector-ref testrec (db:field->number "id" db:test-record-fields))))
-	 (db:adj-test-id (db:dbdat-get-db mtdb) min-test-id test-id)))
+	 (db:adj-test-id (dbr:dbdat-dbh mtdb) min-test-id test-id)))
 ;; 1. move test ids into the 30k * run_id range
 ;; 2. move step ids into the 30k * run_id range
@@ -3469,21 +2686,21 @@
 (define (db:prep-megatest.db-for-migration mtdb)
   (let* ((run-ids (db:get-all-run-ids mtdb)))
      (lambda (run-id)
        (let ((testrecs (db:get-all-tests-info-by-run-id mtdb run-id)))
-	 (db:prep-megatest.db-adj-test-ids (db:dbdat-get-db mtdb) run-id testrecs)))
+	 (db:prep-megatest.db-adj-test-ids (dbr:dbdat-dbh mtdb) run-id testrecs)))
 ;; Get test data using test_id, run-id is not used
 (define (db:get-test-info-by-id dbstruct run-id test-id)
-   #f ;; run-id
+   run-id
-   (lambda (db)
+   (lambda (dbdat db)
      (let ((res #f))
        (sqlite3:for-each-row ;; attemptnum added to hold pid of top process (not Megatest) controlling a test
 	(lambda (id run-id testname state status event-time host cpuload diskfree uname rundir-id item-path run_duration final-logf-id comment short-dir-id attemptnum archived last-update)
 	  ;;                0    1       2      3      4        5       6      7        8     9     10      11          12          13           14         15          16
 	  (set! res (vector id run-id testname state status event-time host cpuload diskfree uname rundir-id item-path run_duration final-logf-id comment short-dir-id attemptnum archived last-update)))
@@ -3498,11 +2715,11 @@
 (define (db:get-test-info-by-ids dbstruct run-id test-ids)
-   (lambda (db)
+   (lambda (dbdat db)
      (let ((res '()))
 	(lambda (a . b)
 	  ;;                 0    1       2      3      4        5       6      7        8     9     10      11          12          13       14
 	  (set! res (cons (apply vector a b) res)))
@@ -3514,11 +2731,11 @@
 (define (db:get-test-info dbstruct run-id test-name item-path)
-   (lambda (db)
+   (lambda (dbdat db)
      (let ((res #f))
 	(lambda (a . b)
 	  (set! res (apply vector a b)))
@@ -3529,11 +2746,11 @@
 (define (db:test-get-rundir-from-test-id dbstruct run-id test-id)
-   (lambda (db)
+   (lambda (dbdat db)
       "SELECT rundir FROM tests WHERE id=?;"
       #f ;; default result
@@ -3545,11 +2762,11 @@
 		      " as target from tests inner join runs on tests.run_id = where runs.runname = ? and target = ?  ;")))
     #f ;; this is for the main runs db
     #f ;; does not modify db
-    (lambda (db)
+    (lambda (dbdat db)
 	(lambda (test-name item-path test-time target )
 	  (set! res (cons (vector test-name item-path test-time) res)))
@@ -3563,11 +2780,11 @@
 (define (db:teststep-set-status! dbstruct run-id test-id teststep-name state-in status-in comment logfile)
-   (lambda (db)
+   (lambda (dbdat db)
       "INSERT OR REPLACE into test_steps (test_id,stepname,state,status,event_time,comment,logfile) VALUES(?,?,?,?,?,?,?);"
       test-id teststep-name state-in status-in (current-seconds)
       (if comment comment "")
@@ -3579,11 +2796,11 @@
   ;; TODO: figure out why status is the key field rather than state (note:  CONSTRAINT test_steps_constraint UNIQUE (test_id,stepname,state) )
-   (lambda (db)
+   (lambda (dbdat db)
       "UPDATE test_steps set status='DELETED' where test_id=?" ;;  and run_id=? !! - run_id not in table (bummer) TODO: get run_id into schema for test_steps
@@ -3592,26 +2809,26 @@
 (define (db:get-steps-for-test dbstruct run-id test-id)
-   (lambda (db)
+   (lambda (dbdat db)
      (let* ((res '()))
 	(lambda (id test-id stepname state status event-time logfile comment)
 	  (set! res (cons (vector id test-id stepname state status event-time (if (string? logfile) logfile "") comment) res)))
 	"SELECT id,test_id,stepname,state,status,event_time,logfile,comment FROM test_steps WHERE status != 'DELETED' AND test_id=? ORDER BY id ASC;" ;; event_time DESC,id ASC;
        (reverse res)))))
- (define (db:get-steps-info-by-id dbstruct  test-step-id)
+ (define (db:get-steps-info-by-id dbstruct run-id test-step-id)
-    #f 
+    run-id
-    (lambda (db)
+    (lambda (dbdat db)
       (let* ((res (vector #f #f #f #f #f #f #f #f #f)))
        (lambda (id test-id stepname state status event-time logfile comment last-update)
          (set! res (vector id test-id stepname state status event-time (if (string? logfile) logfile "") comment last-update)))
@@ -3622,11 +2839,11 @@
 (define (db:get-steps-data dbstruct run-id test-id)
-   (lambda (db)
+   (lambda (dbdat db)
      (let ((res '()))
 	(lambda (id test-id stepname state status event-time logfile)
 	  (set! res (cons (vector id test-id stepname state status event-time (if (string? logfile) logfile "")) res)))
@@ -3636,18 +2853,18 @@
 ;; T E S T  D A T A 
-(define (db:get-data-info-by-id dbstruct  test-data-id)
+(define (db:get-data-info-by-id dbstruct run-id test-data-id)
   (let* ((stmt        "SELECT id,test_id, category, variable, value, expected, tol, units, comment, status, type, last_update FROM test_data WHERE id=? ORDER BY id ASC;")) ;; event_time DESC,id ASC;
-     #f 
+     run-id
-     (lambda (db)
-       (let* ((stmth (db:get-cache-stmth dbstruct db stmt))
+     (lambda (dbdat db)
+       (let* ((stmth (db:get-cache-stmth dbdat #f db stmt))
 	      (res   (sqlite3:fold-row
 		      (lambda (res id test-id  category variable value expected tol units comment status type last-update)
 			(vector id test-id  category variable value expected tol units comment status type last-update))
 		      (vector #f #f #f #f #f #f #f #f #f #f #f #f)
@@ -3661,24 +2878,24 @@
 ;;    if one or more are fail (any case) then set test status to PASS, non "pass" or "fail" are ignored
 (define (db:test-data-rollup dbstruct run-id test-id status)
   (let* ((fail-count 0)
 	 (pass-count 0))
-     dbstruct #f #f
-     (lambda (db)
+     dbstruct run-id #f
+     (lambda (dbdat db)
 	(lambda (fcount pcount)
 	  (set! fail-count fcount)
 	  (set! pass-count pcount))
 	"SELECT (SELECT count(id) FROM test_data WHERE test_id=? AND status like 'fail') AS fail_count,
              (SELECT count(id) FROM test_data WHERE test_id=? AND status like 'pass') AS pass_count;"
 	test-id test-id)
        ;; Now rollup the counts to the central megatest.db
-       (db:general-call dbstruct 'pass-fail-counts (list pass-count fail-count test-id))
+       (db:general-call dbstruct run-id 'pass-fail-counts (list pass-count fail-count test-id))
        ;; if the test is not FAIL then set status based on the fail and pass counts.
-       (db:general-call dbstruct 'test_data-pf-rollup (list test-id test-id test-id test-id))))))
+       (db:general-call dbstruct run-id 'test_data-pf-rollup (list test-id test-id test-id test-id))))))
 ;; each section is a rule except "final" which is the final result
 ;; [rule-5]
 ;; operator in
@@ -3761,11 +2978,11 @@
 (define (db:csv->test-data dbstruct run-id test-id csvdata)
   (debug:print 4 *default-log-port* "test-id " test-id ", csvdata: " csvdata)
    dbstruct #f #f
-   (lambda (db)
+   (lambda (dbdat db)
      (let* ((csvlist (csv->list (make-csv-reader
 				 (open-input-string csvdata)
 				 '((strip-leading-whitespace? #t)
 				   (strip-trailing-whitespace? #t)))))) ;; (csv->list csvdata)))
@@ -3824,11 +3041,11 @@
 (define (db:read-test-data dbstruct run-id test-id categorypatt)
   (let* ((res '()))
      dbstruct #f #f
-     (lambda (db)
+     (lambda (dbdat db)
 	(lambda (id test_id category variable value expected tol units comment status type)
 	  (set! res (cons (vector id test_id category variable value expected tol units comment status type) res)))
 	"SELECT id,test_id,category,variable,value,expected,tol,units,comment,status,type FROM test_data WHERE test_id=? AND category LIKE ? ORDER BY category,variable;" test-id categorypatt)
@@ -3838,11 +3055,11 @@
 (define (db:read-test-data-varpatt dbstruct run-id test-id categorypatt varpatt)
   (let* ((res '()))
      dbstruct #f #f
-     (lambda (db)
+     (lambda (dbdat db)
 	(lambda (id test_id category variable value expected tol units comment status type)
 	  (set! res (cons (vector id test_id category variable value expected tol units comment status type) res)))
 	"SELECT id,test_id,category,variable,value,expected,tol,units,comment,status,type FROM test_data WHERE test_id=? AND category LIKE ? AND variable LIKE ? ORDER BY category,variable;" test-id categorypatt varpatt)
@@ -3854,11 +3071,11 @@
 (define (db:get-run-ids-matching-target dbstruct keynames target res runname testpatt statepatt statuspatt)
    dbstruct #f #f
-   (lambda (db)
+   (lambda (dbdat db)
      (let* ((row-ids '())
 	    (keystr (string-intersperse 
 		     (map (lambda (key val)
 			    (conc key " like '" val "'"))
@@ -3881,11 +3098,11 @@
 	 (tstsqry (conc "SELECT rundir FROM tests WHERE run_id=? AND " testqry " AND state LIKE '" statepatt "' AND status LIKE '" statuspatt "' ORDER BY event_time ASC;")))
-     (lambda (db)
+     (lambda (dbdat db)
 	(lambda (p)
 	  (set! res (cons p res)))
@@ -3895,11 +3112,11 @@
 (define (db:test-toplevel-num-items dbstruct run-id testname)
-   (lambda (db)
+   (lambda (dbdat db)
      (let ((res 0))
 	(lambda (num-items)
 	  (set! res num-items))
@@ -3946,11 +3163,11 @@
     (else msg))) ;; rpc
 ;; ; This is to be the big daddy call NOPE: Replaced by db:set-state-status-and-roll-up-items
 ;; ;
 ;; define (db:test-set-state-status dbstruct run-id test-id state status msg)
-;;  (let ((dbdat  (db:get-db dbstruct run-id)))
+;;  (let ((dbdat  (db:get-subdb dbstruct run-id)))
 ;;    (if (member state '("LAUNCHED" "REMOTEHOSTSTART"))
 ;; 	(db:general-call dbdat 'set-test-start-time (list test-id)))
 ;;    ;; (if msg
 ;;    ;; 	(db:general-call dbdat 'state-status-msg (list state status msg test-id))
 ;;    ;; 	(db:general-call dbdat 'state-status     (list state status test-id)))
@@ -3961,11 +3178,11 @@
 ;;    (mt:process-triggers dbstruct run-id test-id state status)))
 ;; state is the priority rollup of all states
 ;; status is the priority rollup of all completed statesfu
-;; if test-name is an integer work off that instead of test-name test-path
+;; if test-name is an integer work off that as test-id instead of test-name test-path
 (define (db:set-state-status-and-roll-up-items dbstruct run-id test-name item-path state status comment)
   ;; establish info on incoming test followed by info on top level test
   ;; BBnote - for mode itemwait, linkage between upstream test & matching item status is propagated to run queue in db:prereqs-not-met
   (let* ((testdat      (if (number? test-name)
@@ -3979,26 +3196,26 @@
          (tl-testdat   (db:get-test-info dbstruct run-id test-name ""))
          (tl-test-id   (if tl-testdat
 			   (db:test-get-id tl-testdat)
     (if (member state '("LAUNCHED" "REMOTEHOSTSTART")) 
-	(db:general-call dbstruct 'set-test-start-time (list test-id)))
+	(db:general-call dbstruct run-id 'set-test-start-time (list test-id)))
     (mutex-lock! *db-transaction-mutex*)
-     dbstruct #f #f
-     (lambda (db)
+     dbstruct run-id #f
+     (lambda (dbdat db)
        (let ((tr-res
                (lambda ()
                  ;; NB// Pass the db so it is part fo the transaction
                  (db:test-set-state-status db run-id test-id state status comment) ;; this call sets the item state/status
                  (if (not (equal? item-path "")) ;; only roll up IF incoming test is an item
-                     (let* ((state-status-counts  (db:get-all-state-status-counts-for-test dbstruct run-id test-name item-path state status)) ;; item-path is used to exclude current state/status of THIS test
-			      						  (state-stauses (db:roll-up-rules state-status-counts state status))
-                          (newstate (car state-stauses))
-                          (newstatus (cadr state-stauses)))
+                     (let* ((state-status-counts  (db:get-all-state-status-counts-for-test db run-id test-name item-path state status)) ;; item-path is used to exclude current state/status of THIS test
+			    (state-statuses        (db:roll-up-rules state-status-counts state status))
+                          (newstate (car state-statuses))
+                          (newstatus (cadr state-statuses)))
                        (debug:print 4 *default-log-port* "BB> tl-test-id="tl-test-id" ; "test-name":"item-path" newstate="newstate" newstatus="newstatus" len(sscs)="(length state-status-counts)  " state-status-counts: "
 							(apply conc
                   (map (lambda (x)
                      		(with-output-to-string (lambda () (pp (dbr:counts->alist x)))) " | "))
@@ -4011,94 +3228,93 @@
          (if (and test-id state status (equal? status "AUTO")) 
              (db:test-data-rollup dbstruct run-id test-id status))
 (define (db:roll-up-rules state-status-counts state status)
-		(let* ((running     (length (filter (lambda (x)
-                          (member (dbr:counts-state x) *common:running-states*))
-                                 state-status-counts)))
-           (bad-not-started      (length (filter (lambda (x)
-                                      (and (equal? (dbr:counts-state x) "NOT_STARTED") 
-                                        (not (member (dbr:counts-status x)  *common:not-started-ok-statuses*))))
-																	state-status-counts)))
-           (all-curr-states      (common:special-sort  ;; worst -> best (sort of)
-                                    (delete-duplicates
-                                      (if (and state (not (member state *common:dont-roll-up-states*)))
-                                          (cons state (map dbr:counts-state state-status-counts))
-                                          (map dbr:counts-state state-status-counts)))
-                                                  *common:std-states* >))
-           (all-curr-statuses    (common:special-sort  ;; worst -> best
-                                    (delete-duplicates
-                                      (if (and state status (not (member state *common:dont-roll-up-states*)))
-                                          (cons status (map dbr:counts-status state-status-counts))
-                                          (map dbr:counts-status state-status-counts)))
-                                                   *common:std-statuses* >))
-           (non-completes        (filter (lambda (x)
-							 										 (not (member x (cons "COMPLETED" *common:dont-roll-up-states*))))
-						       									all-curr-states))
-			     (preq-fails        (filter (lambda (x)
-							 								(equal? x "PREQ_FAIL"))
-						       							all-curr-statuses))
-           (num-non-completes (length non-completes))
- 					 (newstate          (cond
-															((> running 0)           "RUNNING")            ;; anything running, call the situation running
-                              ((> (length preq-fails) 0) "NOT_STARTED")
-															((> bad-not-started 0)   "COMPLETED")          ;; we have an ugly situation, it is completed in the sense we cannot do more.
-															((> num-non-completes 0) (car non-completes))  ;;  (remove (lambda (x)(equal? "COMPLETED" x)) all-curr-states))) ;; only rollup DELETED if all DELETED
-															(else                    (car all-curr-states))))
-           (newstatus         (cond
-                              ((> (length preq-fails) 0)  "PREQ_FAIL")
-                              ((or (> bad-not-started 0)
-                                   (and (equal? newstate "NOT_STARTED")
-                                      (> num-non-completes 0)))
-                                            "STARTED")
-                              (else (car all-curr-statuses)))))
- 					(debug:print-info 2 *default-log-port*
-                                         "\n--> probe db:set-state-status-and-roll-up-items: "
-                                         "\n--> state-status-counts: "(map dbr:counts->alist state-status-counts)
-                                         "\n--> running:             "running
-                                         "\n--> bad-not-started:     "bad-not-started
-                                         "\n--> non-non-completes:   "num-non-completes
-                                         "\n--> non-completes:       "non-completes
-                                         "\n--> all-curr-states:     "all-curr-states
-                                         "\n--> all-curr-statuses:     "all-curr-statuses
-                                         "\n--> newstate              "newstate
-                                         "\n--> newstatus            "newstatus
-                                         "\n\n")
-                        ;; NB// Pass the db so it is part of the transaction
-         (list newstate newstatus)))
+  (let* ((running     (length (filter (lambda (x)
+					(member (dbr:counts-state x) *common:running-states*))
+				      state-status-counts)))
+	 (bad-not-started      (length (filter (lambda (x)
+						 (and (equal? (dbr:counts-state x) "NOT_STARTED") 
+						      (not (member (dbr:counts-status x)  *common:not-started-ok-statuses*))))
+					       state-status-counts)))
+	 (all-curr-states      (common:special-sort  ;; worst -> best (sort of)
+				(delete-duplicates
+				 (if (and state (not (member state *common:dont-roll-up-states*)))
+				     (cons state (map dbr:counts-state state-status-counts))
+				     (map dbr:counts-state state-status-counts)))
+				*common:std-states* >))
+	 (all-curr-statuses    (common:special-sort  ;; worst -> best
+				(delete-duplicates
+				 (if (and state status (not (member state *common:dont-roll-up-states*)))
+				     (cons status (map dbr:counts-status state-status-counts))
+				     (map dbr:counts-status state-status-counts)))
+				*common:std-statuses* >))
+	 (non-completes        (filter (lambda (x)
+					 (not (member x (cons "COMPLETED" *common:dont-roll-up-states*))))
+				       all-curr-states))
+	 (preq-fails        (filter (lambda (x)
+				      (equal? x "PREQ_FAIL"))
+				    all-curr-statuses))
+	 (num-non-completes (length non-completes))
+	 (newstate          (cond
+			     ((> running 0)           "RUNNING")            ;; anything running, call the situation running
+			     ((> (length preq-fails) 0) "NOT_STARTED")
+			     ((> bad-not-started 0)   "COMPLETED")          ;; we have an ugly situation, it is completed in the sense we cannot do more.
+			     ((> num-non-completes 0) (car non-completes))  ;;  (remove (lambda (x)(equal? "COMPLETED" x)) all-curr-states))) ;; only rollup DELETED if all DELETED
+			     (else                    (car all-curr-states))))
+	 (newstatus         (cond
+			     ((> (length preq-fails) 0)  "PREQ_FAIL")
+			     ((or (> bad-not-started 0)
+				  (and (equal? newstate "NOT_STARTED")
+				       (> num-non-completes 0)))
+			      "STARTED")
+			     (else (car all-curr-statuses)))))
+    (debug:print-info 2 *default-log-port*
+		      "\n--> probe db:set-state-status-and-roll-up-items: "
+		      "\n--> state-status-counts: "(map dbr:counts->alist state-status-counts)
+		      "\n--> running:             "running
+		      "\n--> bad-not-started:     "bad-not-started
+		      "\n--> non-non-completes:   "num-non-completes
+		      "\n--> non-completes:       "non-completes
+		      "\n--> all-curr-states:     "all-curr-states
+		      "\n--> all-curr-statuses:     "all-curr-statuses
+		      "\n--> newstate              "newstate
+		      "\n--> newstatus            "newstatus
+		      "\n\n")
+    ;; NB// Pass the db so it is part of the transaction
+    (list newstate newstatus)))
 (define (db:set-state-status-and-roll-up-run dbstruct run-id curr-state curr-status)
     (mutex-lock! *db-transaction-mutex*)
-     dbstruct #f #f
-     (lambda (db)
+     dbstruct run-id #f
+     (lambda (dbdat db)
        (let ((tr-res
                (lambda ()
-                   (let* ((state-status-counts  (db:get-all-state-status-counts-for-run dbstruct run-id))
-													(state-stauses (db:roll-up-rules state-status-counts #f #f ))
-                          (newstate (car state-stauses))
-                          (newstatus (cadr state-stauses))) 
-                    (if (or (not (eq? newstate curr-state)) (not (eq?  newstatus curr-status)))
-                   (db:set-run-state-status dbstruct run-id newstate newstatus )))))))
+                   (let* ((state-status-counts (db:get-all-state-status-counts-for-run db run-id))
+			  (state-statuses      (db:roll-up-rules state-status-counts #f #f ))
+                          (newstate            (car state-statuses))
+                          (newstatus           (cadr state-statuses))) 
+		     (if (or (not (eq? newstate curr-state)) (not (eq?  newstatus curr-status)))
+			 (db:set-run-state-status db run-id newstate newstatus )))))))
          (mutex-unlock! *db-transaction-mutex*)
 (define (db:get-all-state-status-counts-for-run dbstruct run-id)
  (let* ((test-count-recs (db:with-db
-                                  dbstruct #f #f
-                                  (lambda (db)
-                                    (sqlite3:map-row
-                                     (lambda (state status count)
-                                        (make-dbr:counts state: state status: status count: count))
-                                     db
-                                     "SELECT state,status,count(id) FROM tests WHERE run_id=?  GROUP BY state,status;"
-                                     run-id )))))
+                          dbstruct #f #f
+                          (lambda (dbdat db)
+                            (sqlite3:map-row
+                             (lambda (state status count)
+                               (make-dbr:counts state: state status: status count: count))
+                             db
+                             "SELECT state,status,count(id) FROM tests WHERE run_id=?  GROUP BY state,status;"
+                             run-id )))))
 ;; BBnote: db:get-all-state-status-counts-for-test returns dbr:counts object aggregating state and status of items of a given test, *not including rollup state/status*
@@ -4107,12 +3323,12 @@
 (define (db:get-all-state-status-counts-for-test dbstruct run-id test-name item-path item-state-in item-status-in)
   (let* ((test-info   (db:get-test-info dbstruct run-id test-name item-path))
          (item-state  (or item-state-in (db:test-get-state test-info))) 
          (item-status (or item-status-in (db:test-get-status test-info)))
          (other-items-count-recs (db:with-db
-                                  dbstruct #f #f
-                                  (lambda (db)
+                                  dbstruct run-id #f
+                                  (lambda (dbdat db)
                                      (lambda (state status count)
                                        (make-dbr:counts state: state status: status count: count))
                                      ;; ignore current item because we have changed its value in the current transation so this select will see the old value.
@@ -4157,11 +3373,11 @@
 (define (db:test-get-logfile-info dbstruct run-id test-name)
-   (lambda (db)
+   (lambda (dbdat db)
      (let ((res #f))
 	(lambda (path final_logf)
 	  ;; (let ((path       (sdb:qry 'getstr path-id))
 	  ;;       (final_logf (sdb:qry 'getstr final_logf-id)))
@@ -4343,29 +3559,30 @@
     (hash-table-set! *logged-in-clients* client-signature (current-seconds))
     '(#t "successful login"))))
-(define (db:general-call dbstruct stmtname params)
+(define (db:general-call dbstruct run-id stmtname params)
+  ;; Why is db:lookup-query above not used here to get the query?
   (let ((query (let ((q (alist-ref (if (string? stmtname)
 				       (string->symbol stmtname)
  		 (if q (car q) #f))))
-     dbstruct #f #f
-     (lambda (db)
+     dbstruct run-id #f
+     (lambda (dbdat db)
        (apply sqlite3:execute db query params)
 ;; get a summary of state and status counts to calculate a rollup
 (define (db:get-state-status-summary dbstruct run-id testname)
   (let ((res   '()))
-     dbstruct #f #f
-     (lambda (db)
+     dbstruct run-id #f
+     (lambda (dbdat db)
 	(lambda (state status count)
 	  (set! res (cons (vector state status count) res)))
 	"SELECT state,status,count(state) FROM tests WHERE run_id=? AND testname=? AND item_path='' GROUP BY state,status;"
@@ -4375,11 +3592,11 @@
 (define (db:get-latest-host-load dbstruct raw-hostname)
   (let* ((hostname (string-substitute "\\..*$" "" raw-hostname))
          (res  (cons -1 0)))
      dbstruct #f #f
-     (lambda (db)
+     (lambda (dbdat db)
         (lambda (cpuload update-time)  (set! res (cons cpuload update-time)))
         "SELECT tr.cpuload, tr.update_time FROM test_rundat tr, tests t WHERE AND tr.cpuload != -1  AND ORDER BY tr.update_time DESC LIMIT 1;"
         hostname))) res ))
@@ -4420,11 +3637,11 @@
 	 (keyvals #f)
 	 (tests-hash (make-hash-table)))
     ;; first look up the key values from the run selected by run-id
      dbstruct #f #f
-     (lambda (db)
+     (lambda (dbdat db)
 	(lambda (a . b)
 	  (set! keyvals (cons a b)))
 	(conc "SELECT " selstr " FROM runs WHERE id=? ORDER BY event_time DESC;") run-id)))
@@ -4431,11 +3648,11 @@
     (if (not keyvals)
 	(let ((prev-run-ids '()))
 	   dbstruct #f #f
-	   (lambda (db)
+	   (lambda (dbdat db)
 	     (apply sqlite3:for-each-row
 		    (lambda (id)
 		      (set! prev-run-ids (cons id prev-run-ids)))
 		    (conc "SELECT id FROM runs WHERE " qrystr " AND id != ?;") (append keyvals (list run-id)))))
@@ -4467,14 +3684,14 @@
 ;; Function recursively checks if <db>.journal exists; if yes means db busy; call itself after delayed interval
 ;; return the sqlite3 db handle if possible
 (define (db:delay-if-busy dbdat #!key (count 6))
   (if (not (configf:lookup *configdat* "server" "delay-on-busy")) 
-      (and dbdat (db:dbdat-get-db dbdat))
+      (and dbdat (dbr:dbdat-dbh dbdat))
       (if dbdat
-	  (let* ((dbpath (db:dbdat-get-path dbdat))
-		 (db     (db:dbdat-get-db   dbdat)) ;; we'll return this so (db:delay--if-busy can be called inline
+	  (let* ((dbpath (dbr:dbdat-dbfile dbdat))
+		 (db     (dbr:dbdat-dbh   dbdat)) ;; we'll return this so (db:delay--if-busy can be called inline
 		 (dbfj   (conc dbpath "-journal")))
 	    (if (handle-exceptions
 		   (debug:print-info 0 *default-log-port* "WARNING: failed to test for existance of " dbfj ", exn=" exn)
@@ -4510,11 +3727,11 @@
   (let ((res '()))
-     (lambda (db)
+     (lambda (dbdat db)
 	(lambda (id itempath state status run_duration logf comment)
 	  (set! res (cons (vector id itempath state status run_duration logf comment) res)))
 	"SELECT id,item_path,state,status,run_duration,final_logf,comment FROM tests WHERE testname=? AND item_path != '' AND run_id=?;" ;; BUG! WHY NO run_id?
@@ -4529,11 +3746,11 @@
 ;; returns a hash table of tags to tests
 (define (db:get-tests-tags dbstruct)
    dbstruct #f #f
-   (lambda (db)
+   (lambda (dbdat db)
      (let* ((res     (make-hash-table)))
 	(lambda (testname tags-in)
 	  (let ((tags (string-split tags-in ",")))
@@ -4551,11 +3768,11 @@
   (let ((res   #f))
-     (lambda (db)
+     (lambda (dbdat db)
 	(lambda (id testname author owner description reviewed iterated avg_runtime avg_disk tags jobgroup)
 	  (set! res (vector id testname author owner description reviewed iterated avg_runtime avg_disk tags jobgroup)))
 	"SELECT id,testname,author,owner,description,reviewed,iterated,avg_runtime,avg_disk,tags,jobgroup FROM test_meta WHERE testname=?;"
@@ -4563,26 +3780,26 @@
 ;; create a new record for a given testname
 (define (db:testmeta-add-record dbstruct testname)
   (db:with-db dbstruct #f #f 
-	      (lambda (db)
+	      (lambda (dbdat db)
 		 "INSERT OR IGNORE INTO test_meta (testname,author,owner,description,reviewed,iterated,avg_runtime,avg_disk,tags) VALUES (?,'','','','','','','','');" testname))))
 ;; update one of the testmeta fields
 (define (db:testmeta-update-field dbstruct testname field value)
   (db:with-db dbstruct #f #f 
-	      (lambda (db)
+	      (lambda (dbdat db)
 		 (conc "UPDATE test_meta SET " field "=? WHERE testname=?;") value testname))))
 (define (db:testmeta-get-all dbstruct)
   (db:with-db dbstruct #f #f 
-	      (lambda (db)
+	      (lambda (dbdat db)
 		(let ((res '()))
 		   (lambda (a . b)
 		     (set! res (cons (apply vector a b) res)))
@@ -4830,50 +4047,145 @@
 ;; To sync individual run
 (define (db:get-run-record-ids dbstruct target run keynames test-patt)
-(let ((backcons (lambda (lst item)(cons item lst))))
-    (db:with-db
-     dbstruct #f #f 
-     (lambda (db)
-        (let* ((keystr (string-intersperse 
-		     (map (lambda (key val)
+   (let* ((backcons (lambda (lst item)(cons item lst)))
+         (all_tests '())
+         (all_test_steps '())
+         (all_test_data '())
+         (keystr (string-intersperse 
+	                  (map (lambda (key val)
 			    (conc key " like '" val "'"))
-			  keynames 
-			  (string-split target "/"))
-		     " AND "))
+			     keynames 
+			     (string-split target "/"))
+		              " AND ")
+         )
          (run-qry (conc "SELECT id FROM runs  WHERE " keystr  " and runname='" run"'"))
-         (test-qry (conc "SELECT id FROM tests WHERE run_id in (" run-qry ") and testname like '" test-patt "'")))
-         (print run-qry)
-         (print test-qry) 
-	 `((runs       . ,(sqlite3:fold-row backcons '() db run-qry))
-	   (tests      . ,(sqlite3:fold-row backcons '() db test-qry))
-	   (test_steps . ,(sqlite3:fold-row backcons '() db (conc "SELECT id FROM test_steps WHERE test_id in (" test-qry ")")))
-	   (test_data  . ,(sqlite3:fold-row backcons '() db (conc "SELECT id FROM test_data  WHERE test_id in (" test-qry ")" )))
-	   ))))))
+         (test-qry (conc "SELECT id FROM tests WHERE run_id in (" run-qry ") and testname like '" test-patt "'"))
+         (run_ids 
+           (db:with-db dbstruct #f #f 
+             (lambda (dbdat db)
+               (sqlite3:fold-row backcons '() db run-qry))
+           )
+         )
+        )
+        (for-each
+          (lambda (run_id)
+            (set! all_tests 
+             (append 
+               (map (lambda (x) (cons x run_id))                
+                (db:with-db dbstruct run_id #f 
+                  (lambda (dbdat db)
+                    (sqlite3:fold-row backcons '() db (conc "SELECT id FROM tests WHERE run_id in (" run_id ") and testname like '" test-patt "'"))
+                  )
+                )
+               ) all_tests
+              )
+            )
+            (set! all_test_steps 
+              (append 
+                (map (lambda (x) (cons x run_id))
+                  (db:with-db dbstruct run_id #f 
+                    (lambda (dbdat db)
+                      (sqlite3:fold-row backcons '() db (conc "SELECT id FROM test_steps  WHERE test_id in (" test-qry ")"))
+                    )
+                  )
+                ) all_test_steps
+              )
+            )
+            (set! all_test_data 
+              (append 
+                (map (lambda (x) (cons x run_id))
+                  (db:with-db dbstruct run_id #f 
+                    (lambda (dbdat db)
+                      (sqlite3:fold-row backcons '() db (conc "SELECT id FROM test_data  WHERE test_id in (" test-qry ")"))
+                    )
+                  )
+                ) all_test_data
+              )
+            )
+          )
+          run_ids
+        )
+      `((runs       . ,run_ids)
+        (tests      . ,all_tests)
+        (test_steps . ,all_test_steps)
+        (test_data  . ,all_test_data)
+       )
+   )
 ;; Just for sync, procedures to make sync easy
-;; get an alist of record ids changed since time since-time
-;;   '((runs . (1 2 3 ...))(steps . (5 6 7 ...) ...))
+;; get an alist of run ids and test/run, test_step/run pairs changed since time since-time
+;;   '((runs . (1 2 3 ...))(tests . ((5 . 1) (6 . 3) (6 . 2) (7 . 1)  ...
+;; Retrieves record IDs from the database based on the timestamp of their last update.
+;; The function takes two arguments: dbstruct, which represents the database structure, and since-time, which is a timestamp indicating the time of the last update.
+;; The function first defines a few helper functions, including backcons, which takes a list and an item and adds the item to the front of the list. 
+;; It then initializes several variables to empty lists: all_tests, all_test_steps, all_test_data, all_run_ids, and all_test_ids.
+;; The function then retrieves a list of IDs for runs that have been changed since since-time using the db:get-changed-run-ids function. 
+;; It then filters the full list of run IDs to only include those that match the changed run IDs based on their modulo 100.
+;; For each changed run ID, the function retrieves a list of test IDs, test step IDs, and test data IDs that have been updated since since-time. 
+;; It appends these IDs to the appropriate lists (all_tests, all_test_steps, and all_test_data) using the append and map functions.
+;; The function then retrieves a list of run stat IDs that have been updated since since-time.
+;; Finally, the function returns a list of associations between record types and their corresponding IDs: runs, tests, test_steps, test_data, and run_stats.
 (define (db:get-changed-record-ids dbstruct since-time)
   ;; no transaction, allow the db to be accessed between the big queries
-  (let ((backcons (lambda (lst item)(cons item lst))))
-    (db:with-db
-     dbstruct #f #f 
-     (lambda (db)
-       `((runs       . ,(sqlite3:fold-row backcons '() db "SELECT id FROM runs  WHERE last_update>=?" since-time))
-	 (tests      . ,(sqlite3:fold-row backcons '() db "SELECT id FROM tests WHERE last_update>=?" since-time))
-	 (test_steps . ,(sqlite3:fold-row backcons '() db "SELECT id FROM test_steps WHERE last_update>=?" since-time))
-	 (test_data  . ,(sqlite3:fold-row backcons '() db "SELECT id FROM test_data  WHERE last_update>=?" since-time))
-	 ;; (test_meta  . ,(fold-row backcons '() db "SELECT id FROM test_meta  WHERE last_update>?" since-time))
-	 (run_stats  . ,(sqlite3:fold-row backcons '() db "SELECT id FROM run_stats  WHERE last_update>=?" since-time))
-	 )))))
+  (let* ((backcons (lambda (lst item)(cons item lst)))
+         (all_tests '())
+         (all_test_steps '())
+         (all_test_data '())
+         (changed_run_dbs (db:get-changed-run-ids since-time)) ;; gets the rundb numbers
+         (all_run_ids 
+          (db:with-db dbstruct #f #f 
+            (lambda (dbdat db)
+              (sqlite3:fold-row backcons '() db "SELECT id FROM runs"))
+          )
+         )
+         (changed_run_ids (filter (lambda (run) (member (modulo run 100) changed_run_dbs)) all_run_ids))
+         ;; TODO: couldn't we just use changed_run_ids for run_ids?
+         (run_ids 
+          (db:with-db dbstruct #f #f 
+            (lambda (dbdat db)
+              (sqlite3:fold-row backcons '() db "SELECT id FROM runs  WHERE last_update>=?" since-time))
+          )
+         )
+        )
+        (for-each
+          (lambda (run_id)
+           (set! all_tests 
+             (append 
+               (map (lambda (x) (cons x run_id))                
+                (db:with-db dbstruct run_id #f 
+                  (lambda (dbdat db)
+                    (sqlite3:fold-row backcons '() db "SELECT id FROM tests  WHERE run_id=? and last_update>=?" run_id since-time)
+                  )
+                )
+               ) all_tests
+              )
+            )
+          )
+          changed_run_ids
+        )
+        (debug:print 2 *default-log-port*  "run_ids = " run_ids)
+        (debug:print 2 *default-log-port*  "all_tests = " all_tests)
+      `((runs       . ,run_ids)
+        (tests      . ,all_tests)
+       )
+  )
 ;; Extract ods file from the db
@@ -4880,16 +4192,17 @@
 ;; runspatt is a comma delimited list of run patterns
 ;; keypatt-alist must contain *all* keys with an associated pattern: '( ("KEY1" "%") .. )
 (define (db:extract-ods-file dbstruct outputfile keypatt-alist runspatt pathmod)
+  (assert #f "FATAL: call to db:extract-ods-file which is not ported yet.")
   (let* ((keysstr  (string-intersperse (map car keypatt-alist) ","))
 	 (keyqry   (string-intersperse (map (lambda (p)(conc (car p) " LIKE ? ")) keypatt-alist) " AND "))
 	 (numkeys  (length keypatt-alist))
 	 (test-ids '())
-	 (dbdat    (db:get-db dbstruct))
-	 (db       (db:dbdat-get-db dbdat))
+	 (dbdat    (db:get-subdb dbstruct))
+	 (db       (dbr:dbdat-dbh dbdat))
 	 (windows  (and pathmod (substring-index "\\" pathmod)))
 	 (tempdir  (conc "/tmp/" (current-user-name) "/" runspatt "_" (random 10000) "_" (current-process-id)))
 	 (runsheader (append (list "Run Id" "Runname") ; 0 1
 			     (map car keypatt-alist)   ; + N = length keypatt-alist
 			     (list "Testname"          ; 2
@@ -4996,11 +4309,330 @@
 	   (debug:print 0 *default-log-port* "WARNING: path given, " outputfile " is relative, prefixing with current directory")
 	   (conc (current-directory) "/" outputfile)))
     ;; brutal clean up
-    (stack-push! (dbr:dbstruct-dbstack dbstruct) dbdat)
+    (dbfile:add-dbdat dbstruct #f dbdat)
     (system "rm -rf tempdir")))
 ;; (db:extract-ods-file db "outputfile.ods" '(("sysname" "%")("fsname" "%")("datapath" "%")) "%")
+;; moving watch dogs here due to dependencies
+;; currently the primary job of the watchdog is to run the sync back to megatest.db from the db in /tmp
+;; if we are on the homehost and we are a server (by definition we are on the homehost if we are a server)
+(define (common:readonly-watchdog dbstruct)
+  (thread-sleep! 0.05) ;; delay for startup
+  (debug:print-info 13 *default-log-port* "common:readonly-watchdog entered.")
+  ;; sync megatest.db to /tmp/.../megatst.db
+  (let* ((sync-cool-off-duration   3)
+        (golden-mtdb     (dbr:dbstruct-mtdb dbstruct))
+        (golden-mtpath   (db:dbdat-get-path golden-mtdb))
+        (tmp-mtdb        (dbr:dbstruct-tmpdb dbstruct))
+        (tmp-mtpath      (db:dbdat-get-path tmp-mtdb)))
+    (debug:print-info 0 *default-log-port* "Read-only periodic sync thread started.")
+    (let loop ((last-sync-time 0))
+      (debug:print-info 13 *default-log-port* "loop top tmp-mtpath="tmp-mtpath" golden-mtpath="golden-mtpath)
+      (let* ((duration-since-last-sync (- (current-seconds) last-sync-time)))
+        (debug:print-info 13 *default-log-port* "duration-since-last-sync="duration-since-last-sync)
+        (if (and (not *time-to-exit*)
+                 (< duration-since-last-sync sync-cool-off-duration))
+            (thread-sleep! (- sync-cool-off-duration duration-since-last-sync)))
+        (if (not *time-to-exit*)
+            (let ((golden-mtdb-mtime (file-modification-time golden-mtpath))
+                  (tmp-mtdb-mtime    (file-modification-time tmp-mtpath)))
+	      (if (> golden-mtdb-mtime tmp-mtdb-mtime)
+		  (if (< golden-mtdb-mtime (- (current-seconds) 3)) ;; file has NOT been touched in past three seconds, this way multiple servers won't fight to sync back
+		      (let ((res (db:multi-db-sync dbstruct 'old2new)))
+			(debug:print-info 13 *default-log-port* "rosync called, " res " records transferred."))))
+              (loop (current-seconds)))
+            #t)))
+    (debug:print-info 0 *default-log-port* "Exiting readonly-watchdog timer, *time-to-exit* = " *time-to-exit*" pid="(current-process-id)" mtpath="golden-mtpath)))
+;; Get a lock from the no-sync-db for the from-db, then copy the from-db to the to-db, otherwise return #f
+(define (db:lock-and-sync no-sync-db from-db to-db)
+  (assert (not *db-sync-in-progress*) "FATAL: db:lock-and-sync called while a sync is in progress.")
+  (let* ((lockdat  (db:no-sync-get-lock no-sync-db from-db))
+	 (gotlock  (car lockdat))
+	 (locktime (cdr lockdat)))
+    (if gotlock
+	(begin
+	  (file-copy from-db to-db #t)
+	  (db:no-sync-del! no-sync-db from-db)
+	  #t)
+        (begin
+          (debug:print 0 *default-log-port* "could not get lock for " from-db " from no-sync-db")
+	  #f
+        ))))
+;; sync for filesystem local db writes
+(define (db:run-lock-and-sync no-sync-db)
+  (let* ((tmp-area       (common:get-db-tmp-area))
+	 (dbfiles        (glob (conc tmp-area"/.megatest/*.db")))
+	 (sync-durations (make-hash-table)))
+    ;; (debug:print-info 0 *default-log-port* "lock-and-sync, dbfiles: "dbfiles)
+    (for-each
+     (lambda (file)
+       (let* ((fname (conc (pathname-file file) ".db"))
+	      (fulln (conc *toppath*"/.megatest/"fname))
+	      (time1 (if (file-exists? file)
+			 (file-modification-time file)
+			 (begin
+			   (debug:print-info 0 *default-log-port* "Sync - I do not see file "file)
+			   1)))
+	      (time2 (if (file-exists? fulln)
+			 (file-modification-time fulln)
+			 (begin
+			   (debug:print-info 0 *default-log-port* "Sync - I do not see file "fulln)
+			   0)))
+	      (changed (> time1 time2))
+	      (do-cp (cond
+		      ((not (file-exists? fulln)) ;; shouldn't happen, but this might recover
+		       (debug:print-info 0 *default-log-port* "File "fulln" not found! Copying "fname" to "fulln)
+		       #t)
+		      (changed ;; (and changed
+		       ;; (> (- (current-seconds) time1) 3)) ;; if file is changed and three seconds have passed.
+		       #t)
+		      ((and changed *time-to-exit*) ;; last copy
+		       #t)
+		      (else
+		       #f))))
+	 (if do-cp
+	     (let* ((start-time (current-milliseconds)))
+	       (debug:print-info 0 *default-log-port* "sync copy file: " fname", delta: " (- time1 time2) " seconds")
+	       (db:lock-and-sync no-sync-db file fulln)
+	       (hash-table-set! sync-durations (conc fname".db") (- (current-milliseconds) start-time)))
+	     #;(debug:print-info 0 *default-log-port* "skipping sync..."))))
+     dbfiles)
+    (hash-table->alist sync-durations)))
+;; straight forward copy based sync
+;;  1. for each .db fil
+;;  2. next if file changed since last sync cycle
+;;  2. next if time delta /tmp file to MTRA less than 3 seconds
+;;  3. get a lock for the file in nosyncdb
+;;  4. copy the file
+;;  5. when copy is done release the lock
+;;  DONE
+(define (server:writable-watchdog-copysync dbstruct)
+  (thread-sleep! 0.05) ;; delay for startup
+  (let ((legacy-sync        (common:run-sync?))
+	(sync-stale-seconds (configf:lookup-number *configdat* "server" "sync-stale-seconds" default: 300))
+ 	(debug-mode         (debug:debug-mode 1))
+ 	(last-time          (current-seconds))     ;; last time through the sync loop
+ 	(no-sync-db         (db:open-no-sync-db))
+ 	(sync-duration      0)  ;; run time of the sync in milliseconds
+	(tmp-area           (common:get-db-tmp-area)))
+    ;; Sync moved to http-transport keep-running loop
+    (set! *no-sync-db* no-sync-db) ;; make the no sync db available to api calls
+    (debug:print-info 2 *default-log-port* "Periodic copy-based sync thread started. syncer is copy-sync, tmp-area is " tmp-area)
+    (debug:print-info 3 *default-log-port* "watchdog starting. syncer is copy-sync pid="(current-process-id));;  " this-wd-num="this-wd-num)
+    (if (and legacy-sync (not *time-to-exit*))
+ 	(begin
+ 	  (debug:print-info 0 *default-log-port* "Server running, periodic copy-based sync started.")
+	  (let loop ()
+	    ;; run the sync and print out durations
+	    (debug:print-info 0 *default-log-port* "Sync durations: "(db:run-lock-and-sync no-sync-db))
+	    ;; keep going unless time to exit
+	    ;;
+	    (if (not *time-to-exit*)
+		(let delay-loop ((count 0))
+		  ;;(debug:print-info 13 *default-log-port* "delay-loop top; count="count" pid="(current-process-id)" this-wd-num="this-wd-num" *time-to-exit*="*time-to-exit*)
+		  (if (and (not *time-to-exit*)
+			   (< count 6)) ;; was 11, changing to 4. 
+		      (begin
+			(thread-sleep! 1)
+			(delay-loop (+ count 1))))
+		  (if (not *time-to-exit*) (loop))))
+	    ;; ==> 	       ;; time to exit, close the no-sync db here
+	    ;; ==> 	       (db:no-sync-close-db no-sync-db stmt-cache)
+	    (if (common:low-noise-print 30)
+		(debug:print-info 0 *default-log-port* "Exiting watchdog timer, *time-to-exit* = "
+				  *time-to-exit*" pid="(current-process-id) )))))))
+(define (server:writable-watchdog-deltasync dbstruct)
+  ;; This is awful complex and convoluted. Plan to redo?
+  ;; for now ... skip it.
+  (thread-sleep! 0.05) ;; delay for startup
+  (let ((legacy-sync  (common:run-sync?)))
+       (sync-stale-seconds (configf:lookup-number *configdat* "server" "sync-stale-seconds" default: 300))
+ 	(debug-mode   (debug:debug-mode 1))
+ 	(last-time    (current-seconds))
+ 	(no-sync-db   (db:open-no-sync-db))
+ 	(stmt-cache   #f) ;; (dbr:dbstruct-stmt-cache dbstruct))
+ 	(sync-duration 0) ;; run time of the sync in milliseconds
+       (subdbs       (hash-table-values (dbr:dbstruct-subdbs dbstruct))))
+   (set! *no-sync-db* no-sync-db) ;; make the no sync db available to api calls
+   (debug:print-info 2 *default-log-port* "Periodic sync thread started.")
+   (debug:print-info 3 *default-log-port* "watchdog starting. legacy-sync is " legacy-sync" pid="(current-process-id)  );;  " this-wd-num="this-wd-num)
+   (if (and legacy-sync (not *time-to-exit*))
+ 	(begin
+ 	  (debug:print-info 0 *default-log-port* "Server running, periodic sync started.")
+	  (let loop ()
+ 	    ;; sync for filesystem local db writes
+ 	    ;;
+ 	    (mutex-lock! *db-multi-sync-mutex*)
+ 	       (let* ((start-file (conc tmp-area "/.start-sync"))
+ 		      (end-file   (conc tmp-area "/.end-sync"))
+ 		      (need-sync        (>= *db-last-access* *db-last-sync*)) ;; no sync since last write
+ 		      (sync-in-progress *db-sync-in-progress*)
+ 		      (min-intersync-delay (configf:lookup-number *configdat* "server" "minimum-intersync-delay" default: 5))
+ 		      (should-sync      (and (not *time-to-exit*)
+ 					     (> (- (current-seconds) *db-last-sync*) min-intersync-delay))) ;; sync every five seconds minimum, deprecated logic, can probably be removed
+ 		      (start-time       (current-seconds))
+ 		      (cpu-load-adj     (alist-ref 'adj-proc-load (common:get-normalized-cpu-load #f)))
+ 		      (mt-mod-time      (file-modification-time mtpath))
+ 		      (last-sync-start  (if (common:file-exists? start-file)
+ 					    (file-modification-time start-file)
+ 					    0))
+ 		      (last-sync-end    (if (common:file-exists? end-file)
+ 					    (file-modification-time end-file)
+ 					    10))
+ 		      (sync-period      (+ 3 (* cpu-load-adj 30))) ;; as adjusted load increases increase the sync period
+ 		      (recently-synced  (and (< (- start-time mt-mod-time) sync-period) ;; not useful if sync didn't modify megatest.db!
+ 					     (< mt-mod-time last-sync-start)))
+ 		      (sync-done        (<= last-sync-start last-sync-end))
+ 		      (sync-stale       (> start-time (+ last-sync-start sync-stale-seconds)))
+ 		      (will-sync        (and (not *time-to-exit*)       ;; do not start a sync if we are in the process of exiting
+ 					     (or need-sync should-sync)
+ 					     (or sync-done sync-stale)
+ 					     (not sync-in-progress)
+ 					     (not recently-synced))))
+ 		 (debug:print-info 13 *default-log-port* "WD writable-watchdog top of loop.  need-sync="need-sync" sync-in-progress=" sync-in-progress
+ 				   " should-sync="should-sync" start-time="start-time" mt-mod-time="mt-mod-time" recently-synced="recently-synced" will-sync="will-sync
+ 				   " sync-done=" sync-done " sync-period=" sync-period)
+ 		 (if (and (> sync-period 5)
+ 			  (common:low-noise-print 30 "sync-period"))
+ 		     (debug:print-info 0 *default-log-port* "Increased sync period due to long sync times, sync took: " sync-period " seconds."))
+ 		 ;; (if recently-synced (debug:print-info 0 *default-log-port* "Skipping sync due to recently-synced flag=" recently-synced))
+ 		 ;; (debug:print-info 0 *default-log-port* "need-sync: " need-sync " sync-in-progress: " sync-in-progress " should-sync: " should-sync " will-sync: " will-sync)
+ 		 (if will-sync (set! *db-sync-in-progress* #t))
+ 		 (mutex-unlock! *db-multi-sync-mutex*)
+ 		 (if will-sync
+ 		     (let (;; (max-sync-duration  (configf:lookup-number *configdat* "server" "max-sync-duration")) ;; KEEPING THIS AVAILABLE BUT SHOULD NOT USE, I'M PRETTY SURE IT DOES NOT WORK!
+ 			   (sync-start         (current-milliseconds)))
+ 		       (with-output-to-file start-file (lambda ()(print (current-process-id))))
+ 		       ;; put lock here
+ 		       ;; (if (or (not max-sync-duration)
+ 		       ;;        (< sync-duration max-sync-duration)) ;; NOTE: db:sync-to-megatest.db keeps track of time of last sync and syncs incrementally
+ 		      ;;
+ 		       (for-each
+ 			(lambda (subdb)
+ 			  (let* (;;(dbstruct (db:setup))
+ 				 (mtdb       (dbr:subdb-mtdb subdb))
+ 				 (mtpath     (db:dbdat-get-path mtdb))
+ 				 (tmp-area   (common:get-db-tmp-area))
+ 				 (res        (db:sync-to-megatest.db dbstruct no-sync-db: no-sync-db))) ;; did we sync any data? If so need to set the db touched flag to keep the server alive
+ 			    (set! sync-duration (- (current-milliseconds) sync-start))
+ 			    (if (> res 0) ;; some records were transferred, keep the db alive
+ 				(begin
+ 				  (mutex-lock! *heartbeat-mutex*)
+ 				  (set! *db-last-access* (current-seconds))
+ 				  (mutex-unlock! *heartbeat-mutex*)
+ 				  (debug:print-info 0 *default-log-port* "sync called, " res " records transferred."))
+ 				(debug:print-info 2 *default-log-port* "sync called but zero records transferred")))
+ 			  )
+ 			subdbs)))
+ 		 (if will-sync
+ 		     (begin
+ 		       (mutex-lock! *db-multi-sync-mutex*)
+ 		       (set! *db-sync-in-progress* #f)
+ 		       (set! *db-last-sync* start-time)
+ 		       (with-output-to-file end-file (lambda ()(print (current-process-id))))
+ 		       ;; release lock here
+ 		       (mutex-unlock! *db-multi-sync-mutex*)))
+ 		 (if (and debug-mode
+ 			  (> (- start-time last-time) 60))
+ 		     (begin
+ 		       (set! last-time start-time)
+ 		       (debug:print-info 4 *default-log-port* "timestamp -> " (seconds->time-string (current-seconds)) ", time since start -> " (seconds->hr-min-sec (- (current-seconds) *time-zero*))))))
+	       ;; keep going unless time to exit
+	       ;;
+	       (if (not *time-to-exit*)
+		   (let delay-loop ((count 0))
+		     ;;(debug:print-info 13 *default-log-port* "delay-loop top; count="count" pid="(current-process-id)" this-wd-num="this-wd-num" *time-to-exit*="*time-to-exit*)
+		     (if (and (not *time-to-exit*)
+			      (< count 6)) ;; was 11, changing to 4. 
+			 (begin
+			   (thread-sleep! 1)
+			   (delay-loop (+ count 1))))
+		     (if (not *time-to-exit*) (loop))))
+;; 	       ;; time to exit, close the no-sync db here
+;; 	       (db:no-sync-close-db no-sync-db stmt-cache)
+	       (if (common:low-noise-print 30)
+		   (debug:print-info 0 *default-log-port* "Exiting watchdog timer, *time-to-exit* = " *time-to-exit*" pid="(current-process-id) )))) 
+(define (std-exit-procedure)
+  ;;(common:telemetry-log-close)
+  (on-exit (lambda () 0))
+  ;;(debug:print-info 13 *default-log-port* "std-exit-procedure called; *time-to-exit*="*time-to-exit*)
+  (let ((no-hurry  (if *time-to-exit* ;; hurry up
+		       #f
+		       (begin
+			 (set! *time-to-exit* #t)
+			 #t))))
+    (debug:print-info 4 *default-log-port* "starting exit process, finalizing databases.")
+    (if (and no-hurry (debug:debug-mode 18))
+	(rmt:print-db-stats))
+    (let ((th1 (make-thread (lambda () ;; thread for cleaning up, give it five seconds
+                              (if *dbstruct-dbs* (db:close-all *dbstruct-dbs*)) ;; one second allocated
+			      (if *task-db*    
+				  (let ((db (cdr *task-db*)))
+				    (if (sqlite3:database? db)
+					(begin
+					  (sqlite3:interrupt! db)
+					  (sqlite3:finalize! db #t)
+					  ;; (vector-set! *task-db* 0 #f)
+					  (set! *task-db* #f)))))
+                              (http-client#close-all-connections!)
+                              ;; (if (and *runremote*
+                              ;;          (remote-conndat *runremote*))
+                              ;;     (begin
+                              ;;       (http-client#close-all-connections!))) ;; for http-client
+                              (if (not (eq? *default-log-port* (current-error-port)))
+                                  (close-output-port *default-log-port*))
+			      (set! *default-log-port* (current-error-port))) "Cleanup db exit thread"))
+	  (th2 (make-thread (lambda ()
+			      (debug:print 4 *default-log-port* "Attempting clean exit. Please be patient and wait a few seconds...")
+			      (if no-hurry
+                                  (begin
+                                    (thread-sleep! 5)) ;; give the clean up few seconds to do it's stuff
+                                  (begin
+      				  (thread-sleep! 2)))
+      			      (debug:print 4 *default-log-port* " ... done")
+      			      )
+			    "clean exit")))
+      (thread-start! th1)
+      (thread-start! th2)
+      (thread-join! th1)
+      )
+    )
+  0)

ADDED   dbfile.scm
Index: dbfile.scm
--- /dev/null
+++ dbfile.scm
@@ -0,0 +1,1188 @@
+;; Copyright 2017, Matthew Welland.
+;; This file is part of Megatest.
+;;     Megatest is free software: you can redistribute it and/or modify
+;;     it under the terms of the GNU General Public License as published by
+;;     the Free Software Foundation, either version 3 of the License, or
+;;     (at your option) any later version.
+;;     Megatest is distributed in the hope that it will be useful,
+;;     but WITHOUT ANY WARRANTY; without even the implied warranty of
+;;     GNU General Public License for more details.
+;;     You should have received a copy of the GNU General Public License
+;;     along with Megatest.  If not, see <>.
+(declare (unit dbfile))
+;; (declare (uses debugprint))
+(declare (uses commonmod))
+(module dbfile
+	*
+  (import scheme
+	  chicken
+	  data-structures
+	  extras
+	  matchable)
+(import (prefix sqlite3 sqlite3:)
+	posix typed-records srfi-18 srfi-1
+	srfi-69
+        srfi-19
+	stack
+	files
+	ports
+	commonmod
+	)
+;; (import debugprint)
+;;  R E C O R D S
+;; a single Megatest area with it's multiple dbs is
+;; managed in a dbstruct
+(defstruct dbr:dbstruct
+  (areapath  #f)
+  (homehost  #f)
+  (tmppath   #f)
+  (read-only #f)
+  (subdbs (make-hash-table))
+  )
+;; NOTE: Need one dbr:subdb per main.db, 1.db ...
+(defstruct dbr:subdb
+  (dbname      #f) ;; .megatest/1.db
+  (mtdbfile    #f) ;; mtrah/.megatest/1.db
+  (mtdbdat     #f) ;; only need one of these for syncing
+  ;; (dbdats      (make-hash-table))  ;; id => dbdat 
+  (tmpdbfile   #f) ;; /tmp/.../.megatest/1.db
+  ;; (refndbfile  #f) ;; /tmp/.../.megatest/1.db_ref
+  (dbstack     (make-stack)) ;; stack for tmp dbr:dbdat,
+  (homehost    #f) ;; not used yet
+  (on-homehost #f) ;; not used yet
+  (read-only   #f)
+  (last-sync   0)
+  (last-write  (current-seconds))
+  )                ;; goal is to converge on one struct for an area but for now it is too confusing
+;; need to keep dbhandles and cached statements together
+(defstruct dbr:dbdat
+  (dbfile      #f)
+  (dbh         #f)    
+  (stmt-cache  (make-hash-table))
+  (read-only   #f)
+  (birth-sec   (current-seconds)))
+(define *dbstruct-dbs* #f)
+(define *db-open-mutex* (make-mutex))
+(define *db-access-mutex* (make-mutex)) ;; used in common.scm
+(define *no-sync-db*   #f)
+(define *db-sync-in-progress* #f)
+(define *db-with-db-mutex*    (make-mutex))
+(define *max-api-process-requests* 0)
+(define *api-process-request-count* 0)
+(define *db-write-access*     #t)
+(define *db-last-sync*        0)                 ;; last time the sync to megatest.db happened
+(define *db-multi-sync-mutex* (make-mutex))      ;; protect access to *db-sync-in-progress*, *db-last-sync*
+(define (db:generic-error-printout exn . message)
+  (print-call-chain (current-error-port))
+  (apply dbfile:print-err message)
+  (dbfile:print-err
+    ", error: "     ((condition-property-accessor 'exn 'message)   exn)
+    ", arguments: " ((condition-property-accessor 'exn 'arguments) exn)
+    ", location: "  ((condition-property-accessor 'exn 'location)  exn)
+    ))
+(define (dbfile:run-id->key run-id)
+  (or run-id 'main))
+(define (db:safely-close-sqlite3-db db stmt-cache #!key (try-num 3))
+  (if (<= try-num 0)
+      #f
+      (handle-exceptions
+	  exn
+	(begin
+	  (print "Attempt to safely close sqlite3 db failed. Trying again. exn=" exn)
+	  (thread-sleep! 3)
+	  (sqlite3:interrupt! db)
+	  (db:safely-close-sqlite3-db db stmt-cache try-num: (- try-num 1)))
+	(if (sqlite3:database? db)
+	    (let* ((stmts (and stmt-cache (hash-table-ref/default stmt-cache db #f))))
+	      (if stmts (map sqlite3:finalize! (hash-table-values stmts)))
+	      (sqlite3:finalize! db)
+	      #t)
+            (begin
+             (dbfile:print-err "db:safely-close-sqlite3-db: " db " is not an sqlite3 db")
+	     #f
+            )
+        ))))
+;; close all opened run-id dbs
+(define (db:close-all dbstruct)
+  (if (dbr:dbstruct? dbstruct)
+;; (handle-exceptions
+;; 	  exn
+;; 	  (begin
+;; 	    (debug:print 0 *default-log-port* "WARNING: Finalizing failed, "  ((condition-property-accessor 'exn 'message) exn) ", note - exn=" exn)
+;; 	    (print-call-chain *default-log-port*))
+	;; (db:sync-touched dbstruct 0 force-sync: #t) ;; NO. Do not do this here. Instead we rely on a server to be started when there are writes, even if the server itself is not going to be used as a server.
+        (let* ((subdbs     (hash-table-values (dbr:dbstruct-subdbs dbstruct))))
+	  (for-each
+	   (lambda (subdb)
+	     (let* ((tdbs       (stack->list (dbr:subdb-dbstack subdb)))
+		    (mtdbdat    (dbr:dbdat-dbh (dbr:subdb-mtdbdat subdb)))
+		    #;(rdb        (dbr:dbdat-dbh (dbr:subdb-refndb subdb))))
+	       (map (lambda (dbdat)
+		      (let* ((stmt-cache (dbr:dbdat-stmt-cache dbdat))
+			     (dbh        (dbr:dbdat-dbh        dbdat)))
+			(db:safely-close-sqlite3-db dbh stmt-cache)))
+		    tdbs)
+	       (db:safely-close-sqlite3-db mtdbdat (dbr:dbdat-stmt-cache  (dbr:subdb-mtdbdat subdb))) 
+               ;; (if (sqlite3:database? mdb) (sqlite3:finalize! mdb))
+	       #;(db:safely-close-sqlite3-db rdb #f))) ;; stmt-cache))))) ;; (if (sqlite3:database? rdb) (sqlite3:finalize! rdb))))))
+	   subdbs)
+           #t
+          )
+          #f
+  )
+;; ;; set up a single db (e.g. main.db, 1.db ... etc.)
+;; ;;
+;; (define (db:setup-db dbstruct areapath run-id)
+;;   (let* ((dbname   (db:run-id->dbname run-id))
+;; 	 (dbstruct (hash-table-ref/default dbstructs dbname #f)))
+;;     (if dbstruct
+;; 	dbstruct
+;; 	(let* ((dbstruct-new (make-dbr:dbstruct)))
+;; 	  (db:open-db dbstruct-new run-id areapath: areapath do-sync: #t)
+;; 	  (hash-table-set! dbstructs dbname dbstruct-new)
+;; 	  dbstruct-new))))
+;; ; Returns the dbdat for a particular dbfile inside the area
+;; ;;
+;; (define (dbr:dbstruct-get-dbdat dbstruct dbfile)
+;;   (hash-table-ref/default (dbr:dbstruct-dbdats dbstruct) dbfile #f))
+;; (define (dbr:dbstruct-dbdat-put! dbstruct dbfile db)
+;;   (hash-table-set! (dbr:dbstruct-dbdats dbstruct) dbfile db))
+;; (define (db:run-id->first-num run-id)
+;;   (let* ((s (number->string run-id))
+;; 	 (l (string-length s)))
+;;     (substring s (- l 1) l)))
+;; 1234 => 4/1234.db
+;;   #f => 0/main.db
+;;   (abandoned the idea of num/db)
+(define (dbfile:run-id->path apath run-id)
+  (conc apath"/"(dbfile:run-id->dbname run-id)))
+(define (db:dbname->path apath dbname)
+  (conc apath"/"dbname))
+;; POTENTIAL BUG: this implementation could produce a db file if run-id is neither #f or a number
+(define (dbfile:run-id->dbname run-id)
+  (cond
+   ((number? run-id) (conc ".megatest/" (modulo run-id 100) ".db"))
+   ((not run-id)     (conc ".megatest/main.db"))
+   (else             run-id)))
+;; Make the dbstruct, setup up auxillary db's and call for main db at least once
+;; called in http-transport and replicated in rmt.scm for *local* access. 
+(define (dbfile:setup do-sync areapath tmppath)
+  (cond
+   (*dbstruct-dbs*
+    (dbfile:print-err "WARNING: dbfile:setup called when *dbstruct-dbs* is already initialized")
+    *dbstruct-dbs*) ;; TODO: when multiple areas are supported, this optimization will be a hazard
+   (else
+    (let* ((dbstruct (make-dbr:dbstruct)))
+      (set! *dbstruct-dbs* dbstruct)
+      (dbr:dbstruct-areapath-set! dbstruct areapath)
+      (dbr:dbstruct-tmppath-set!  dbstruct tmppath)
+      dbstruct))))
+(define (dbfile:get-subdb dbstruct run-id)
+  (let* ((dbfname (dbfile:run-id->dbname run-id)))
+    (hash-table-ref/default (dbr:dbstruct-subdbs dbstruct) dbfname #f)))
+(define (dbfile:set-subdb dbstruct run-id subdb)
+  (hash-table-set! (dbr:dbstruct-subdbs dbstruct) (dbfile:run-id->dbname run-id) subdb))
+;; (define *dbfile:num-handles-in-use* 0)
+;; Get/open a database
+;;    if run-id => get run specific db
+;;    if #f     => get main db
+;;    if run-id is a string treat it as a filename
+;;    if db already open - return inmem
+;;    if db not open, open inmem, rundb and sync then return inmem
+;;    inuse gets set automatically for rundb's
+(define (dbfile:get-dbdat dbstruct run-id)
+  (let* ((subdb (dbfile:get-subdb dbstruct run-id)))
+    (if (stack-empty? (dbr:subdb-dbstack subdb))
+	#f
+	(begin
+	  (stack-pop! (dbr:subdb-dbstack subdb))))))
+;; return a previously opened db handle to the stack of available handles
+(define (dbfile:add-dbdat dbstruct run-id dbdat)
+  (let* ((subdb (dbfile:get-subdb dbstruct run-id)))
+    (stack-push! (dbr:subdb-dbstack subdb) dbdat)
+    dbdat))
+;; set up a subdb
+(define (dbfile:init-subdb dbstruct run-id init-proc)
+  (let* ((dbname    (dbfile:run-id->dbname run-id))
+	 (areapath  (dbr:dbstruct-areapath dbstruct))
+	 (tmppath   (dbr:dbstruct-tmppath  dbstruct))
+	 (mtdbpath  (dbfile:run-id->path areapath run-id))
+	 (tmpdbpath (dbfile:run-id->path tmppath run-id))
+	 (mtdbdat   (dbfile:open-sqlite3-db mtdbpath init-proc sync-mode: 0 journal-mode: #f)) ;; "WAL"))
+	 (newsubdb  (make-dbr:subdb dbname:    dbname
+				    mtdbfile:  mtdbpath
+				    tmpdbfile: tmpdbpath
+				    mtdbdat:   mtdbdat)))
+    (dbfile:set-subdb dbstruct run-id newsubdb)
+    newsubdb)) ;; return the new subdb - but shouldn't really use it
+;; returns dbdat with dbh and dbfilepath
+;; NOTE: the handle is on /tmp db file!
+;;  1. if needed setup the subdb for the given run-id
+;;  2. if there is no existing db handle in the stack
+;;     create a new handle and return it (do NOT add
+;;     it to the stack).
+(define (dbfile:open-db dbstruct run-id init-proc)
+  (let* ((subdb (dbfile:get-subdb dbstruct run-id)))
+    (if (not subdb) ;; not yet defined
+	(begin
+	  (dbfile:init-subdb dbstruct run-id init-proc)
+	  (dbfile:open-db dbstruct run-id init-proc))
+	(let* ((dbdat (dbfile:get-dbdat dbstruct run-id)))
+	  (if dbdat
+	      dbdat
+	      (let* ((tmppath   (dbr:dbstruct-tmppath  dbstruct))
+		     (tmpdbpath (dbfile:run-id->path tmppath run-id))
+		     (dbdat     (dbfile:open-sqlite3-db tmpdbpath init-proc sync-mode: 0 journal-mode: "WAL")))
+		;; the following line short-circuits the "one db handle per thread" model
+		;; 
+		;; (dbfile:add-dbdat dbstruct run-id dbdat)
+		;;
+		dbdat))))))
+;; COMBINE dbfile:open-sqlite-db and dbfile:lock-create-open
+;; this stuff is for initial debugging, please remove it when
+;; this code stabilizes
+(define *dbopens* (make-hash-table))
+(define (dbfile:inc-db-open dbfile)
+  (let* ((curr-opens-count (+ (hash-table-ref/default *dbopens* dbfile 0) 1)))
+    (if (and (> curr-opens-count 1) ;; this should NOT be happening
+	     (common:low-noise-print 15 "db-opens"))
+	(dbfile:print-err "INFO: db "dbfile" has been opened "curr-opens-count" times!"))
+    (hash-table-set! *dbopens* dbfile curr-opens-count)
+    curr-opens-count))
+;; Open the classic megatest.db file (defaults to open in toppath)
+;;   NOTE: returns a dbdat not a dbstruct!
+(define (dbfile:open-sqlite3-db dbpath init-proc #!key (sync-mode 0)(journal-mode #f))
+  (let* ((dbexists     (file-exists? dbpath))
+	 (write-access (file-write-access? dbpath))
+	 (db           (dbfile:cautious-open-database dbpath init-proc sync-mode journal-mode)))
+    (dbfile:inc-db-open dbpath)
+    ;; (init-proc db)
+    (make-dbr:dbdat dbfile: dbpath dbh: db read-only: (not write-access))))
+(define (dbfile:print-and-exit . params)
+  (with-output-to-port
+      (current-error-port)
+    (lambda ()
+      (apply print params)))
+  (exit 1))
+(define (dbfile:print-err . params)
+  (with-output-to-port
+      (current-error-port)
+    (lambda ()
+      (apply print params))))
+(define (dbfile:cautious-open-database fname init-proc sync-mode journal-mode #!optional (tries-left 500))
+  (let* ((busy-file  (conc fname"-journal"))
+	 (delay-time (* (- 51 tries-left) 1.1))
+      	 (write-access (file-write-access? fname))
+         (dir-access (file-write-access? (pathname-directory fname)))
+         (retry      (lambda ()
+		       (thread-sleep! delay-time)
+		       (if (> tries-left 0)
+			   (dbfile:cautious-open-database fname init-proc
+							  sync-mode journal-mode
+							  (- tries-left 1))))))
+    (assert (>= tries-left 0) (conc "FATAL: too many attempts in dbfile:cautious-open-database of "fname", giving up."))
+    (if (and (file-write-access? fname)
+	     (file-exists? busy-file))
+	(begin
+	  (if (common:low-noise-print 120 busy-file)
+	      (dbfile:print-err "INFO: dbfile:cautious-open-database: journal file "
+				busy-file" exists, trying again in few seconds."))
+	  (thread-sleep! 1)
+	  (if (eq? tries-left 2)
+	      (begin
+	  	(dbfile:print-err "INFO: forcing journal rollup "busy-file)
+	  	(dbfile:brute-force-salvage-db fname)))
+	  (dbfile:cautious-open-database fname init-proc sync-mode journal-mode (- tries-left 1)))
+	(let* ((result (condition-case
+		        (if dir-access
+			    (dbfile:with-simple-file-lock
+			     (conc fname ".lock")
+			     (lambda ()
+			       (let* ((db-exists (file-exists? fname))
+				      (db        (sqlite3:open-database fname))) ;; creates an empty db if it did not already exist.
+                                 (sqlite3:set-busy-handler! db (sqlite3:make-busy-timeout 30000))
+				 (if sync-mode
+				     (sqlite3:execute db (conc "PRAGMA synchronous = "sync-mode";")))
+				 (if journal-mode
+				     (sqlite3:execute db (conc "PRAGMA journal_mode = "journal-mode";")))
+				 (if (and init-proc (not db-exists))
+				     (init-proc db))
+				 db)))
+                            (begin
+			      (if (file-exists? fname )
+                                  (let ((db (sqlite3:open-database fname)))
+				    ;; pragmas synchronous not needed because this db is used read-only
+				    ;; (sqlite3:execute db (conc "PRAGMA synchronous = "mode";")
+				    (sqlite3:set-busy-handler! db (sqlite3:make-busy-timeout 30000)) ;; read-only but still need timeout
+				    db )
+                                  (print "file doesn't exist: " fname))))
+			(exn (io-error)
+			     (dbfile:print-err exn "ERROR: i/o error with " fname ". Check permissions, disk space etc. and try again.")
+			     (retry))
+			(exn (corrupt)
+			     (dbfile:print-err exn "ERROR: database " fname " is corrupt. Repair it to proceed.")
+			     (retry))
+			(exn (busy)
+			     (dbfile:print-err exn "ERROR: database " fname
+					       " is locked. Try copying to another location, remove original and copy back.")
+			     (retry))
+			(exn (permission)(dbfile:print-err exn "ERROR: database " fname " has some permissions problem.")
+			     (retry))
+			(exn ()
+			     (dbfile:print-err exn "ERROR: Unknown error with database " fname " message: "
+					       ((condition-property-accessor 'exn 'message) exn))
+			     (retry)))))
+	  result))))
+(define (dbfile:brute-force-salvage-db fname)
+  (let* ((backupfname (conc fname"-"(current-process-id)".bak"))
+	 (cmd (conc "cp "fname" "backupfname";mv "fname" "(conc fname ".delme;")
+		    "cp "backupfname" "fname)))
+    (dbfile:print-err "WARNING: attempting recovery of file "fname" by running commands:\n"
+		      "  "cmd)
+    (system cmd)))
+(define (dbfile:open-no-sync-db dbpath)
+  (if *no-sync-db*
+      *no-sync-db*
+      (begin
+	(if (not (file-exists? dbpath))
+	    (create-directory dbpath #t))
+	(let* ((dbname    (conc dbpath "/no-sync.db"))
+	       (db-exists (file-exists? dbname))
+	       (init-proc (lambda (db)
+			    (if (not db-exists)
+				(begin
+				  (sqlite3:execute db "CREATE TABLE IF NOT EXISTS no_sync_metadat (var TEXT,val TEXT, CONSTRAINT no_sync_metadat_constraint UNIQUE (var));"))
+				)))
+	       (db        (dbfile:cautious-open-database dbname init-proc 0 "WAL"))) ;; (sqlite3:open-database dbname)))
+	  ;; (sqlite3:execute db "PRAGMA synchronous = 0;")
+	  ;; (sqlite3:set-busy-handler! db (sqlite3:make-busy-timeout 136000)) ;; done in cautious-open-database
+	  (set! *no-sync-db* db)
+	  db))))
+(define (db:no-sync-set db var val)
+  (sqlite3:execute db "INSERT OR REPLACE INTO no_sync_metadat (var,val) VALUES (?,?);" var val))
+(define (db:no-sync-del! db var)
+  (sqlite3:execute db "DELETE FROM no_sync_metadat WHERE var=?;" var))
+(define (db:no-sync-get/default db var default)
+  (let ((res default))
+    (sqlite3:for-each-row
+     (lambda (val)
+       (set! res val))
+     db
+     "SELECT val FROM no_sync_metadat WHERE var=?;"
+     var)
+    (if res
+        (let ((newres (if (string? res)
+			  (string->number res)
+			  #f)))
+          (if newres
+              newres
+              res))
+        res)))
+;; transaction protected lock aquisition
+;; either:
+;;    fails    returns  (#f . lock-creation-time)
+;;    succeeds (returns (#t . lock-creation-time)
+;; use (db:no-sync-del! db keyname) to release the lock
+(define (db:no-sync-get-lock db keyname)
+  (sqlite3:with-transaction
+   db
+   (lambda ()
+     (condition-case
+	 (let* ((curr-val (db:no-sync-get/default db keyname #f)))
+	   (if curr-val
+	       `(#f . ,curr-val)   ;; (sqlite3:first-result db "SELECT val FROM no_sync_metadat WHERE var=?;" keyname))
+	       (let ((lock-time (current-seconds)))
+		 (sqlite3:execute db "INSERT OR REPLACE INTO no_sync_metadat (var,val) VALUES(?,?);" keyname lock-time)
+		 `(#t . ,lock-time))))
+       (exn (io-error)  (dbfile:print-err "ERROR: i/o error with no-sync db. Check permissions, disk space etc. and try again."))
+       (exn (corrupt)   (dbfile:print-err "ERROR: database no-sync db is corrupt. Repair it to proceed."))
+       (exn (busy)      (dbfile:print-err "ERROR: database no-sync db is locked. Try copying to another location, remove original and copy back."))
+       (exn (permission)(dbfile:print-err "ERROR: database no-sync db has some permissions problem."))
+       (exn () ;; (status done) ;; I don't know how to detect status done but no data!
+	    (dbfile:print-err "ERROR: Unknown error with database no-sync db message: exn="(condition->list exn)", \n"
+			      ((condition-property-accessor 'exn 'message) exn))
+	    `(#f . ,(current-seconds)))))))
+(define (db:no-sync-get-lock-timeout db keyname timeout)
+  (let* ((lockdat (db:no-sync-get-lock db keyname)))
+    (match lockdat
+      ((#f . lock-time)
+       (if (> (- (current-seconds) (if (string? lock-time)(string->number lock-time)lock-time)) timeout)
+	   (let ((lock-time (current-seconds)))
+	     ;; (debug:print-info 2 *default-log-port* "db:no-sync-get-lock keyname=" keyname ", lock-time=" lock-time ", exn=" exn)
+	     (sqlite3:execute db "INSERT OR REPLACE INTO no_sync_metadat (var,val) VALUES(?,?);" keyname lock-time)
+	     `(#t . ,lock-time))
+	   lockdat))
+      (else lockdat))))
+;; NOTE: This will steal the lock after timeout of waiting.
+(define (db:with-no-sync-lock db keyname timeout proc)
+  (let* ((lockdat  (db:no-sync-get-lock-timeout db keyname))
+	 (gotlock  (car lockdat))
+	 (locktime (cdr lockdat)))
+    (if gotlock
+	(let ((res (proc)))
+	  (db:no-sync-del! db keyname)
+	  res))))
+;; sync back functions pulled from db.scm
+;; Get a lock from the no-sync-db for the from-db, then delta sync the from-db to the to-db, otherwise return #f
+(define (db:lock-and-delta-sync no-sync-db dbstruct from-db-file runid keys dbinit)
+  (assert (not *db-sync-in-progress*) "FATAL: db:lock-and-sync called while a sync is in progress.")
+  ;; (dbfile:print-err *default-log-port* "db:lock-and-delta-sync")
+  (let* ((lock-file (conc from-db-file ".lock")))
+    (if (common:simple-file-lock lock-file)
+	(begin
+	  (dbfile:print-err "INFO: db:lock-and-delta-sync copying db " from-db-file ".db at "(date->string (seconds->date (current-seconds))))
+	  (set! *db-sync-in-progress* #t)
+	  (db:sync-touched dbstruct runid keys dbinit)
+	  (set! *db-sync-in-progress* #f)
+	  (delete-file* lock-file)
+	  #t)
+        (begin
+          (dbfile:print-err "INFO: could not get lock for " from-db-file ".db, sync likely in progress.")
+	  #f
+	  ))))
+;; ;; Get a lock from the no-sync-db for the from-db, then delta sync the from-db to the to-db, otherwise return #f
+;; ;;
+;; (define (db:lock-and-delta-sync-orig no-sync-db dbstruct from-db-file runid keys dbinit)
+;;   (assert (not *db-sync-in-progress*) "FATAL: db:lock-and-sync called while a sync is in progress.")
+;;   ;; (dbfile:print-err *default-log-port* "db:lock-and-delta-sync")
+;;   (let* ((lockdat  (db:no-sync-get-lock-timeout no-sync-db from-db-file 60))
+;; 	 (gotlock  (car lockdat))
+;; 	 (locktime (cdr lockdat)))
+;;     ;; (debug:print-info 3 *default-log-port* "db:lock-and-delta-sync: got lock?")
+;;     (if gotlock
+;; 	(begin
+;;           (dbfile:print-err "INFO: db:lock-and-delta-sync copying db "runid" at "(current-seconds))
+;; 	  (set! *db-sync-in-progress* #t)
+;;           (db:sync-touched dbstruct runid keys dbinit)
+;; 	  (set! *db-sync-in-progress* #f)
+;; 	  (db:no-sync-del! no-sync-db from-db-file)
+;; 	  #t)
+;;         (begin
+;;           (dbfile:print-err "ERROR: could not get lock for " from-db-file " from no-sync-db")
+;; 	  #f
+;;         ))))
+;; sync run from tmp disk to nfs disk if touched
+;; call with dbinit=db:initialize-main-db
+(define (db:sync-touched dbstruct run-id keys #!key dbinit (force-sync #f))
+  (dbfile:print-err "db:sync-touched Syncing: " (conc (if run-id run-id "main") ".db"))
+  (let* (;; the subdb is needed to access the mtdbdat
+	 (subdb     (or (dbfile:get-subdb dbstruct run-id)
+			(dbfile:init-subdb dbstruct run-id dbinit)))
+         (tmpdbfile (dbr:subdb-tmpdbfile subdb))
+	 (mtdb      (dbr:subdb-mtdbdat subdb))
+         (tmpdb     (db:open-db dbstruct run-id dbinit)) ;; sqlite3-db tmpdbfile #f))
+	 (start-t   (current-seconds)))
+    (mutex-lock! *db-multi-sync-mutex*)
+    (let ((update_info (cons "last_update" (if force-sync 0 *db-last-sync*) )))
+      (mutex-unlock! *db-multi-sync-mutex*)
+      (dbfile:print-err "db:sync-touched: calling db:sync-tables with last_update = "*db-last-sync*)
+      (db:sync-tables (db:sync-all-tables-list dbstruct keys) update_info tmpdb mtdb))
+    (mutex-lock! *db-multi-sync-mutex*)
+    (set! *db-last-sync* start-t)
+    (set! *db-last-access* start-t)
+    (mutex-unlock! *db-multi-sync-mutex*)
+    (dbfile:add-dbdat dbstruct run-id tmpdb)
+  #t))
+;; just tests, test_steps and test_data tables
+(define db:sync-tests-only
+  (list
+   ;; (list "strs"
+   ;;       '("id"             #f)
+   ;;       '("str"            #f))
+   (list "tests" 
+	 '("id"             #f)
+	 '("run_id"         #f)
+	 '("testname"       #f)
+	 '("host"           #f)
+	 '("cpuload"        #f)
+	 '("diskfree"       #f)
+	 '("uname"          #f)
+	 '("rundir"         #f)
+	 '("shortdir"       #f)
+	 '("item_path"      #f)
+	 '("state"          #f)
+	 '("status"         #f)
+	 '("attemptnum"     #f)
+	 '("final_logf"     #f)
+	 '("logdat"         #f)
+	 '("run_duration"   #f)
+	 '("comment"        #f)
+	 '("event_time"     #f)
+	 '("fail_count"     #f)
+	 '("pass_count"     #f)
+	 '("archived"       #f)
+         '("last_update"    #f))
+  (list "test_steps"
+	 '("id"             #f)
+	 '("test_id"        #f)
+	 '("stepname"       #f)
+	 '("state"          #f)
+	 '("status"         #f)
+	 '("event_time"     #f)
+	 '("comment"        #f)
+	 '("logfile"        #f)
+         '("last_update"    #f))
+   (list "test_data"
+	 '("id"             #f)
+	 '("test_id"        #f)
+	 '("category"       #f)
+	 '("variable"       #f)
+	 '("value"          #f)
+	 '("expected"       #f)
+	 '("tol"            #f)
+	 '("units"          #f)
+	 '("comment"        #f)
+	 '("status"         #f)
+	 '("type"           #f)
+         '("last_update"    #f))))
+;; needs db to get keys, this is for syncing all tables
+(define (db:sync-main-list dbstruct keys)
+  (let ((keys  keys)) ;; (db:get-keys dbstruct)))
+    (list
+     (list "keys"
+	   '("id"        #f)
+	   '("fieldname" #f)
+	   '("fieldtype" #f))
+     (list "metadat" '("var" #f) '("val" #f))
+     (append (list "runs" 
+		   '("id"  #f))
+	     (map (lambda (k)(list k #f))
+		  (append keys
+			  (list "runname" "state" "status" "owner" "event_time" "comment" "fail_count" "pass_count" "contour" "last_update"))))
+     (list "archive_disks"
+           '("id" #f)
+           '("archive_area_name" #f) 
+           '("disk_path" #f)
+           '("last_df" #f)
+           '("last_df_time" #f)
+           '("creation_time" #f)) 
+     (list "archive_blocks"
+           '("id" #f)
+           '("archive_disk_id" #f) 
+           '("disk_path" #f)
+           '("last_du" #f)
+           '("last_du_time" #f)
+           '("creation_time" #f)) 
+     (list "test_meta"
+	   '("id"             #f)
+	   '("testname"       #f)
+	   '("owner"          #f)
+	   '("description"    #f)
+	   '("reviewed"       #f)
+	   '("iterated"       #f)
+	   '("avg_runtime"    #f)
+	   '("avg_disk"       #f)
+	   '("tags"           #f)
+	   '("jobgroup"       #f)))))
+(define (db:sync-all-tables-list dbstruct keys)
+  (append (db:sync-main-list dbstruct keys)
+	  db:sync-tests-only))
+;; tbls is ( ("tablename" ( "field1" [#f|proc1] ) ( "field2" [#f|proc2] ) .... ) )
+;; db's are dbdat's
+;; if last-update specified ("field-name" . time-in-seconds)
+;;    then sync only records where field-name >= time-in-seconds
+;;    IFF field-name exists
+;;slave-dbs are an optional list of other dbs to sync to.
+;;I only see this used in one place, in db:tmp->megatest.db-sync, with refndb, which is now obsolete.
+;;TODO: resolve the above issue.
+(define (db:sync-tables tbls last-update fromdb todb . slave-dbs)
+  (handle-exceptions
+   exn
+   (begin
+     (dbfile:print-err  "EXCEPTION: database probably overloaded or unreadable in db:sync-tables.")
+     (print-call-chain (current-error-port))
+     (dbfile:print-err  " message: " ((condition-property-accessor 'exn 'message) exn))
+     (dbfile:print-err  "exn=" (condition->list exn))
+     (dbfile:print-err  " status:  " ((condition-property-accessor 'sqlite3 'status) exn))
+     (dbfile:print-err  " src db:  " (dbr:dbdat-dbfile fromdb))
+     (for-each (lambda (dbdat)
+		 (let ((dbpath (dbr:dbdat-dbfile dbdat)))
+		   (dbfile:print-err  " dbpath:  " dbpath)
+		   (if #t ;; (not (db:repair-db dbdat))
+		       (begin
+			 (dbfile:print-err "Failed to rebuild (repair is turned off) " dbpath ", exiting now.")
+			 (exit)))))
+	       (cons todb slave-dbs))
+     0)
+   ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+   ;;  Check fromdb, todb and the slave dbs. 
+   (cond
+    ((not fromdb) (dbfile:print-err  "WARNING: db:sync-tables called with fromdb missing")
+     -1)
+    ((not todb)   (dbfile:print-err  "WARNING: db:sync-tables called with todb missing")
+     -2)
+    ((not (sqlite3:database? (dbr:dbdat-dbh fromdb)))
+     (dbfile:print-err "db:sync-tables called with fromdb not a database " fromdb)
+   -3)
+    ((not (sqlite3:database? (dbr:dbdat-dbh todb)))
+     (dbfile:print-err "db:sync-tables called with todb not a database " todb)
+   -4)
+    ((not (file-write-access? (dbr:dbdat-dbfile todb)))
+     (dbfile:print-err "db:sync-tables called with todb not a read-only database " todb)
+     -5)
+    ;; Make sure the optional slave dbs are not readonly.
+    ((not (null? (let ((readonly-slave-dbs
+                        (filter
+                         (lambda (dbdat)
+                           (not (file-write-access? (dbr:dbdat-dbfile todb))))
+                         slave-dbs)))
+                   (for-each
+                    (lambda (bad-dbdat)
+                      (dbfile:print-err "db:sync-tables called with todb not a read-only database " bad-dbdat))
+                    readonly-slave-dbs)
+                   readonly-slave-dbs))) -6)
+    ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+    (else
+     ;; args are good")
+     (let ((stmts       (make-hash-table)) ;; table-field => stmt
+	   (all-stmts   '())              ;; ( ( stmt1 value1 ) ( stml2 value2 ))
+	   (numrecs     (make-hash-table))
+	   (start-time  (current-milliseconds))
+	   (tot-count   0))
+       (for-each ;; table
+	(lambda (tabledat)
+	  (let* ((tablename        (car tabledat))
+		 (fields           (cdr tabledat))
+		 (has-last-update  (member "last_update" fields))
+		 (use-last-update  (cond
+				    ((and has-last-update
+					  (member "last_update" fields))
+				     #t) ;; if given a number, just use it for all fields
+				    ((number? last-update) #f) ;; if not matched first entry then ignore last-update for this table
+				    ((and (pair? last-update)
+					  (member (car last-update)    ;; last-update field name
+						  (map car fields)))
+                                        #t)
+				    ((and last-update (not (pair? last-update)) (not (number? last-update)))
+				     (dbfile:print-err  "ERROR: parameter last-update for db:sync-tables must be a pair or a number, received: " last-update);; found in fields
+				     #f)
+				    (else
+				     #f)))
+		 (last-update-value (if use-last-update ;; no need to check for has-last-update - it is already accounted for
+					(if (number? last-update)
+					    last-update
+					    (cdr last-update))
+					#f))
+		 (last-update-field (if use-last-update
+					(if (number? last-update)
+					    "last_update"
+					    (car last-update))
+					#f))
+		 (num-fields (length fields))
+		 (field->num (make-hash-table))
+		 (num->field (apply vector (map car fields))) ;; BBHERE
+		 (full-sel   (conc "SELECT " (string-intersperse (map car fields) ",") 
+				   " FROM " tablename (if use-last-update ;; apply last-update criteria
+							  (conc " WHERE " last-update-field " >= " last-update-value)
+							  "")
+				   ";"))
+		 (full-ins   (conc "INSERT OR REPLACE INTO " tablename " ( " (string-intersperse (map car fields) ",") " ) "
+				   " VALUES ( " (string-intersperse (make-list num-fields "?") ",") " );"))
+		 (fromdat    '())
+		 (fromdats   '())
+		 (totrecords 0)
+		 (batch-len  100) ;; (string->number (or (configf:lookup *configdat* "sync" "batchsize") "100")))
+		 (todat      (make-hash-table))
+		 (count      0)
+                 (field-names (map car fields))
+                 (delay-handicap  0) ;; (string->number (or (configf:lookup *configdat* "sync" "delay-handicap") "0")))
+                 )
+	    ;; set up the field->num table
+	    (for-each
+	     (lambda (field)
+	       (hash-table-set! field->num field count)
+	       (set! count (+ count 1)))
+	     fields)
+	    ;; read the source table
+            ;; store a list of all rows in the table in fromdat, up to batch-len.
+            ;; Then add fromdat to the fromdats list, clear fromdat and repeat.
+            (sqlite3:with-transaction 
+              (dbr:dbdat-dbh fromdb)
+              (lambda ()
+	        (sqlite3:for-each-row
+	          (lambda (a . b)
+	            (set! fromdat (cons (apply vector a b) fromdat))
+	            (if (> (length fromdat) batch-len)
+		      (begin
+		        (set! fromdats (cons fromdat fromdats))
+		        (set! fromdat  '())
+		        (set! totrecords (+ totrecords 1)))
+                    )
+                 )
+	         (dbr:dbdat-dbh fromdb)
+	         full-sel)
+              )
+            )
+             ;; Count less than batch-len as a record
+             (if (> (length fromdat) 0)
+                 (set! totrecords (+ totrecords 1)))
+	    ;; tack on remaining records in fromdat
+	    (if (not (null? fromdat))
+		(set! fromdats (cons fromdat fromdats)))
+	    (sqlite3:for-each-row
+	     (lambda (a . b)
+	       (hash-table-set! todat a (apply vector a b)))
+	     (dbr:dbdat-dbh todb)
+	     full-sel)
+            (when (and delay-handicap (> delay-handicap 0))
+              (dbfile:print-err "imposing synthetic sync delay of "delay-handicap" seconds since sync/delay-handicap is configured")
+              (thread-sleep! delay-handicap)
+              (dbfile:print-err "synthetic sync delay of "delay-handicap" seconds completed")
+              )
+	    ;; first pass implementation, just insert all changed rows
+	    (for-each 
+	     (lambda (targdb)
+	       (let* ((db                 (dbr:dbdat-dbh targdb))
+                      (drp-trigger        (if (member "last_update" field-names)
+					      (db:drop-trigger db tablename) 
+					      #f))
+		      (has-last-update    (member "last_update" field-names))
+                      (is-trigger-dropped (if has-last-update
+                                              (db:is-trigger-dropped db tablename)
+					      #f)) 
+		      (stmth  (sqlite3:prepare db full-ins))
+                      (changed-rows 0))
+		 (for-each
+		  (lambda (fromdat-lst)
+		    (sqlite3:with-transaction
+		     db
+		     (lambda ()
+		       (for-each ;; 
+			(lambda (fromrow)
+			  (let* ((a    (vector-ref fromrow 0))
+				 (curr (hash-table-ref/default todat a #f))
+				 (same #t))
+			    (let loop ((i 0))
+			      (if (or (not curr)
+				      (not (equal? (vector-ref fromrow i)(vector-ref curr i))))
+				  (set! same #f))
+			      (if (and same
+				       (< i (- num-fields 1)))
+				  (loop (+ i 1))))
+			    (if (not same)
+				(begin
+				  (apply sqlite3:execute stmth (vector->list fromrow))
+				  (hash-table-set! numrecs tablename (+ 1 (hash-table-ref/default numrecs tablename 0)))
+                                  (set! changed-rows (+ changed-rows 1))
+                                )
+                            )
+                            ))
+			fromdat-lst))))
+		  fromdats)
+		 (sqlite3:finalize! stmth)
+                 (if (member "last_update" field-names)
+                    (db:create-trigger db tablename))))
+	     (append (list todb) slave-dbs)
+           )
+          )
+        )
+	tbls)
+       (let* ((runtime      (- (current-milliseconds) start-time))
+	      (should-print (or ;; (debug:debug-mode 12)
+				(common:low-noise-print 120 "db sync" (> runtime 500))))) ;; low and high sync times treated as separate.
+	 (for-each 
+	  (lambda (dat)
+	    (let ((tblname (car dat))
+		  (count   (cdr dat)))
+	      (set! tot-count (+ tot-count count))
+              )) 
+	  (sort (hash-table->alist numrecs)(lambda (a b)(> (cdr a)(cdr b))))))
+       tot-count)))))
+;; trigger setup/takedown
+(define db:trigger-list 
+     (list (list "update_runs_trigger"  "CREATE TRIGGER IF NOT EXISTS update_runs_trigger AFTER UPDATE ON runs
+                             FOR EACH ROW
+                               BEGIN 
+                                 UPDATE runs SET last_update=(strftime('%s','now'))
+                                   WHERE;
+                               END;" ) 
+	   (list "update_run_stats_trigger"  "CREATE TRIGGER  IF NOT EXISTS update_run_stats_trigger AFTER UPDATE ON run_stats
+                             FOR EACH ROW
+                               BEGIN 
+                                 UPDATE run_stats SET last_update=(strftime('%s','now'))
+                                   WHERE;
+                               END;" )
+	   (list "update_tests_trigger"  "CREATE TRIGGER  IF NOT EXISTS update_tests_trigger AFTER UPDATE ON tests
+                             FOR EACH ROW
+                               BEGIN 
+                                 UPDATE tests SET last_update=(strftime('%s','now'))
+                                   WHERE;
+                               END;" )
+	   (list "update_teststeps_trigger"  "CREATE TRIGGER  IF NOT EXISTS update_teststeps_trigger AFTER UPDATE ON test_steps
+                             FOR EACH ROW
+                               BEGIN 
+                                 UPDATE test_steps SET last_update=(strftime('%s','now'))
+                                   WHERE;
+                               END;" )
+	   (list "update_test_data_trigger"  "CREATE TRIGGER  IF NOT EXISTS update_test_data_trigger AFTER UPDATE ON test_data
+                             FOR EACH ROW
+                               BEGIN 
+                                 UPDATE test_data SET last_update=(strftime('%s','now'))
+                                   WHERE;
+                               END;" )))
+;; ADD run-id SUPPORT
+(define (db:create-all-triggers dbstruct)
+  (db:with-db
+   dbstruct #f #f
+   (lambda (dbdat db)
+     (db:create-triggers db))))
+(define (db:create-triggers db)
+    (for-each (lambda (key)
+              (sqlite3:execute db (cadr key)))
+          db:trigger-list))
+(define (db:drop-all-triggers dbstruct)
+  (db:with-db
+   dbstruct #f #f
+   (lambda (dbdat db)
+     (db:drop-triggers db))))
+(define (db:is-trigger-dropped db tbl-name)
+  (let* ((trigger-name (if (equal? tbl-name "test_steps")
+			   "update_teststeps_trigger" 
+                           (conc "update_" tbl-name "_trigger")))
+	 (res          #f))
+    (sqlite3:for-each-row
+     (lambda (name)
+       (if (equal? name trigger-name)
+	   (set! res #t)))
+     db 
+     "SELECT name FROM sqlite_master WHERE type = 'trigger' ;")
+    res))
+(define (db:drop-triggers db)
+  (for-each
+   (lambda (key) 
+     (sqlite3:execute db (conc "drop trigger if exists " (car key))))
+   db:trigger-list))
+(define  (db:drop-trigger db tbl-name)
+  (let* ((trigger-name (if (equal? tbl-name "test_steps")
+			   "update_teststeps_trigger" 
+                           (conc "update_" tbl-name "_trigger"))))
+    (for-each
+     (lambda (key) 
+       (if (equal? (car key) trigger-name)
+           (sqlite3:execute db (conc "drop trigger if exists " trigger-name))))
+     db:trigger-list)))
+(define  (db:create-trigger db tbl-name)
+      (let* ((trigger-name (if (equal? tbl-name "test_steps")
+                              "update_teststeps_trigger" 
+                              (conc "update_" tbl-name "_trigger"))))
+       (for-each (lambda (key) 
+             (if (equal? (car key) trigger-name)
+             (sqlite3:execute db (cadr key))))
+      db:trigger-list))) 
+;; db access stuff
+;; call with dbinit=db:initialize-main-db
+(define (db:open-db dbstruct run-id dbinit)
+  ;; (mutex-lock! *db-open-mutex*)
+  (let* ((dbdat (dbfile:open-db dbstruct run-id dbinit)))
+    (set! *db-write-access* (not (dbr:dbdat-read-only dbdat)))
+    ;; (mutex-unlock! *db-open-mutex*)
+    dbdat))
+(define dbfile:db-init-proc (make-parameter #f))
+;; (db:with-db dbstruct run-id sqlite3:exec "select blah fgrom blaz;")
+;; r/w is a flag to indicate if the db is modified by this query #t = yes, #f = no
+(define (db:with-db dbstruct run-id r/w proc . params)
+  (let* ((use-mutex (> *api-process-request-count* 25)) ;; risk of db corruption
+	 (have-struct (dbr:dbstruct? dbstruct))
+         (dbdat     (if have-struct                ;; this stuff just allows us to call with a db handle directly
+			(db:open-db dbstruct run-id (dbfile:db-init-proc)) ;; (dbfile:get-subdb dbstruct run-id)
+			#f))
+	 (db        (if have-struct                ;; this stuff just allows us to call with a db handle directly
+			(dbr:dbdat-dbh dbdat)
+			dbstruct))
+	 (fname     (if dbdat
+			(dbr:dbdat-dbfile dbdat)
+			"nofilenameavailable"))
+	 (jfile     (conc fname"-journal"))
+	 #;(subdb     (if have-struct
+			(dbfile:get-subdb dbstruct run-id)
+			#f))
+	 ) ;; was 25
+    (if (file-exists? jfile)
+	(begin
+	  (dbfile:print-err "INFO: "jfile" exists, delaying to reduce database load")
+	  (thread-sleep! 0.2)))
+    (if (and use-mutex
+	     (common:low-noise-print 120 "over-50-parallel-api-requests"))
+	(dbfile:print-err *api-process-request-count* " parallel api requests being processed in process "
+			  (current-process-id) ", throttling access"))
+    (condition-case
+	(begin
+	  (if use-mutex (mutex-lock! *db-with-db-mutex*))
+	  (let ((res (apply proc dbdat db params))) ;; the actual call is here.
+	    (if use-mutex (mutex-unlock! *db-with-db-mutex*))
+	    ;; (if (vector? dbstruct)(db:done-with dbstruct run-id r/w))
+	    (if dbdat
+		(dbfile:add-dbdat dbstruct run-id dbdat))
+	    res))
+      (exn (io-error)
+	   (db:generic-error-printout exn "ERROR: i/o error with " fname ". Check permissions, disk space etc. and try again."))
+      (exn (corrupt)
+	   (db:generic-error-printout exn "ERROR: database " fname " is corrupt. Repair it to proceed."))
+      (exn (busy)
+	   (db:generic-error-printout exn "ERROR: database " fname
+				      " is locked. Try copying to another location, remove original and copy back."))
+      (exn (permission)(db:generic-error-printout exn "ERROR: database " fname " has some permissions problem."))
+      (exn ()
+	   (db:generic-error-printout exn "ERROR: Unknown error with database " fname " message: "
+				      ((condition-property-accessor 'exn 'message) exn))))))
+;; another attempt at a transactionized queue
+;; ;; ;; (define *transaction-queues* (make-hash-table))
+;; ;; ;; 
+;; ;; ;; (define (db:get-queue run-id)
+;; ;; ;;   (let* ((res (hash-table-ref/default *transaction-queues* run-id #f)))
+;; ;; ;;     (if res
+;; ;; ;; 	res
+;; ;; ;; 	(let* ((newq (make-queue)))
+;; ;; ;; 	  (hash-table-set! *transaction-queues* run-id newq)
+;; ;; ;; 	  newq))))
+;; ;; ;; 
+;; ;; ;; (define (db:add-to-transaction-queue dbstruct proc params)
+;; ;; ;;   (let* ((mbox (make-mailbox))
+;; ;; ;; 	 (q    (db:get-queue run-id)))
+;; ;; ;;     (queue-add! *transaction-queue* (list dbstruct proc mbox))
+;; ;; ;;     (mailbox-receive mbox)))
+;; ;; ;; 
+;; ;; ;; (define (db:process-transaction-queue *dbstruct-dbs*)
+;; ;; ;;   (for-each
+;; ;; ;;    (lambda (run-id)
+;; ;; ;;      (let* ((q (hash-table-ref *transaction-queue* run-id)))
+;; ;; ;;        ;; with-transaction
+;; ;; ;;        ;;     dbstruct
+;; ;; ;;        ;; pop items from queue and execute them, return results via mailbox
+;; ;; ;;        q
+;; ;; ;;        ;; pop 
+;; ;; ;;        ))
+;; ;; ;;    (hash-table-keys *transaction-queues*)))
+;; file utils
+;; lazy-safe get file mod time. on any error (file not existing etc.) return 0
+(define (dbfile:lazy-modification-time fpath)
+  (handle-exceptions
+      exn
+    (begin
+      (dbfile:print-err "Failed to get modification time for " fpath ", treating it as zero. exn=" exn)
+      0)
+    (if (file-exists? fpath)
+	(file-modification-time fpath)
+	0)))
+;; find timestamp of newest file associated with a sqlite db file
+(define (dbfile:lazy-sqlite-db-modification-time fpath)
+  (let* ((glob-list (handle-exceptions
+			exn
+		      (begin
+			(dbfile:print-err "Failed to glob " fpath "*, exn=" exn)
+			`(,(conc "/no/such/file, message: " ((condition-property-accessor 'exn 'message) exn))))
+		      (glob (conc fpath "*"))))
+         (file-list (if (eq? 0 (length glob-list))
+			'("/no/such/file")
+			glob-list)))
+  (apply max
+	 (map
+	  dbfile:lazy-modification-time 
+	  file-list))))
+;; dot-locking egg seems not to work, using this for now
+;; if lock is older than expire-time then remove it and try again
+;; to get the lock
+(define (dbfile:simple-file-lock fname #!key (expire-time 300))
+  (let ((fmod-time (handle-exceptions
+		       ext
+		     (current-seconds)
+		     (file-modification-time fname))))
+    (if (file-exists? fname)
+	(if (> (- (current-seconds) fmod-time) expire-time)
+	    (begin
+	      (handle-exceptions exn #f (delete-file* fname))	
+	      (dbfile:simple-file-lock fname expire-time: expire-time))
+	    #f)
+	(let ((key-string (conc (get-host-name) "-" (current-process-id)))
+	      (oup        (open-output-file fname)))
+	  (with-output-to-port
+	      oup
+	    (lambda ()
+	      (print key-string)))
+	  (close-output-port oup)
+	  #;(with-output-to-file fname ;; bizarre. with-output-to-file does not seem to be cleaning up after itself.
+	    (lambda ()
+	  (print key-string)))
+	  (thread-sleep! 0.25)
+	  (if (file-exists? fname)
+	      (handle-exceptions exn
+                #f 
+                (with-input-from-file fname
+	  	  (lambda ()
+		    (equal? key-string (read-line)))))
+	      #f)
+       )
+    )
+  )
+(define (dbfile:simple-file-lock-and-wait fname #!key (expire-time 300))
+  (let ((end-time (+ expire-time (current-seconds))))
+    (let loop ((got-lock (dbfile:simple-file-lock fname expire-time: expire-time)))
+      (if got-lock
+	  #t
+	  (if (> end-time (current-seconds))
+	      (begin
+		(thread-sleep! 3)
+		(loop (dbfile:simple-file-lock fname expire-time: expire-time)))
+	      #f)))))
+(define (dbfile:simple-file-release-lock fname)
+  (handle-exceptions
+      exn
+      #f ;; I don't really care why this failed (at least for now)
+    (delete-file* fname)))
+(define (dbfile:with-simple-file-lock fname proc #!key (expire-time 300))
+  (let ((gotlock (dbfile:simple-file-lock-and-wait fname expire-time: expire-time)))
+    (if gotlock
+	(let ((res (proc)))
+	  (dbfile:simple-file-release-lock fname)
+	  res)
+	(assert #t "FATAL: simple file lock never got a lock."))))

Index: dbmod.scm
--- dbmod.scm
+++ dbmod.scm
@@ -31,7 +31,26 @@
 (define (db:run-id->dbname run-id)
    ((number? run-id)(conc run-id ".db"))
    ((not run-id)    "main.db")
    (else            run-id)))
+;; hash of hashs
+(define (db:hoh-set! dat key1 key2 val)
+  (let* ((subhash (hash-table-ref/default dat key1 #f)))
+    (if subhash
+	(hash-table-set! subhash key2 val)
+	(begin
+	  (hash-table-set! dat key1 (make-hash-table))
+	  (db:hoh-set! dat key1 key2 val)))))
+(define (db:hoh-get dat key1 key2)
+  (let* ((subhash (hash-table-ref/default dat key1 #f)))
+    (and subhash
+	 (hash-table-ref/default subhash key2 #f))))

Index: dcommon.scm
--- dcommon.scm
+++ dcommon.scm
@@ -522,17 +522,20 @@
                                (let* ((shell (if (get-environment-variable "SHELL") 
                                                  (conc "-e " (get-environment-variable "SHELL"))
                                       (command (conc "cd " rundir 
                                                      ";mt_xterm -T \"" (string-translate testfullname "()" "  ") "\" " shell "&")))
-                                 (print "Command =" command)
+                                 ;; (print "Command =" command)
                                (message-window  (conc "Directory " rundir " not found"))))))
-          (print "Adding xterm code")))))
+        )
+     )
+  )
 ;; D A T A   T A B L E S
@@ -547,11 +550,11 @@
                         #:numcol 1
                         #:numlin (length key-vals)
                         #:numcol-visible 1
                         #:numlin-visible (length key-vals)
                         #:click-cb (lambda (obj lin col status)
-                                     (print "obj: " obj " lin: " lin " col: " col " status: " status)))))
+                                     (debug:print 0 *default-log-port* "obj: " obj " lin: " lin " col: " col " status: " status)))))
     ;; (iup:attribute-set! keys-matrix "0:0" "Run Keys")
     (iup:attribute-set! keys-matrix "WIDTH0" 0)
     (iup:attribute-set! keys-matrix "0:1" "Key Name")
     ;; (iup:attribute-set! keys-matrix "WIDTH1" "100")
     ;; fill in keys
@@ -797,22 +800,23 @@
 										  (iup:attribute fd "VALUE"))
 							      (iup:destroy! fd))))
 			   ;; (lambda (obj)
 			   ;;  (iup:show (iup:file-dialog))
 			   ;;  (print "File->open " obj)))
-			   (iup:menu-item "Save"  #:action (lambda (obj)(print "File->save " obj)))
+			   ;; (iup:menu-item "Save"  #:action (lambda (obj)(print "File->save " obj)))
 			   (iup:menu-item "Exit"  #:action (lambda (obj)(exit)))))
-   (iup:menu-item "Tools" (iup:menu
-			   (iup:menu-item "Create new blah" #:action (lambda (obj)(print "Tools->new blah")))
-			   ;; (iup:menu-item "Show dialog"     #:action (lambda (obj)
-			   ;;  					   (show message-window
-			   ;;  					     #:modal? #t
-			   ;;  					     ;; set positon using coordinates or center, start, top, left, end, bottom, right, parent-center, current
-			   ;;  					     ;; #:x 'mouse
-			   ;;  					     ;; #:y 'mouse
-			   ;;  )					     
-			   ))))
+;; (iup:menu-item "Tools" (iup:menu
+;; 			   (iup:menu-item "Create new blah" #:action (lambda (obj)(print "Tools->new blah")))
+;; 			   ;; (iup:menu-item "Show dialog"     #:action (lambda (obj)
+;; 			   ;;  					   (show message-window
+;; 			   ;;  					     #:modal? #t
+;; 			   ;;  					     ;; set positon using coordinates or center, start, top, left, end, bottom, right, parent-center, current
+;; 			   ;;  					     ;; #:x 'mouse
+;; 			   ;;  					     ;; #:y 'mouse
+;; 			   ;;  )					     
+;; 			   ))
+   ))
@@ -1354,11 +1358,10 @@
 	  (let* ((status  (vector-ref hed 3))
                  (val     (vector-ref hed (- colnum 1)))
                  (bgcolor (cond
                            ((member (conc status) '("" "-" "#<unspecified>"))
                            ((member (conc status) '("0" 0))
                            (else test-status-color)))
                           ; (else failcolor)))
 		 (mtrx-rc (conc rownum ":" colnum)))

ADDED   debugprint.scm
Index: debugprint.scm
--- /dev/null
+++ debugprint.scm
@@ -0,0 +1,175 @@
+(declare (unit debugprint))
+(declare (uses mtargs))
+(module debugprint
+	*
+;;(import scheme chicken data-structures extras files ports)
+  (import
+    scheme
+    chicken
+    data-structures
+    posix
+    ports
+    extras
+    ;; scheme
+    ;; chicken.base
+    ;; chicken.string
+    ;; chicken.time
+    ;; chicken.time.posix
+    ;; chicken.port
+    ;; chicken.process-context
+    ;; chicken.process-context.posix
+    (prefix mtargs args:)
+    srfi-1
+    ;; system-information
+    )
+;; debug stuff
+(define verbosity (make-parameter '()))
+(define *default-log-port*  (current-error-port))
+(define debug:print-logger (make-parameter #f)) ;; set to a proc to call on every logging print
+(define (debug:setup)
+  (let ((debugstr (or (args:get-arg "-debug")
+      		      (args:get-arg "-debug-noprop")
+      		      (get-environment-variable "MT_DEBUG_MODE"))))
+    (verbosity (debug:calc-verbosity debugstr 'q))
+    (debug:check-verbosity (verbosity) debugstr)
+    ;; if we were handed a bad verbosity rule then we will override it with 1 and continue
+    (if (not (verbosity))(verbosity 1))
+    (if (and (not (args:get-arg "-debug-noprop"))
+      	     (or (args:get-arg "-debug")
+      		 (not (get-environment-variable "MT_DEBUG_MODE"))))
+      	(setenv #;set-environment-variable! "MT_DEBUG_MODE" (if (list? (verbosity))
+      				    (string-intersperse (map conc (verbosity)) ",")
+      				    (conc (verbosity)))))))
+;; check verbosity, #t is ok
+(define (debug:check-verbosity verbosity vstr)
+  (if (not (or (number? verbosity)
+     	       (list?   verbosity)))
+      (begin
+     	(print "ERROR: Invalid debug value \"" vstr "\"")
+     	#f)
+      #t))
+;; (define (debug:print . params) #f)
+;; (define (debug:print-info . params) #f)
+;; (define (set-functions dbgp dbgpinfo)
+;;   (set! debug:print dbgp)
+;;   (set! debug:print-info dbgpinfo))
+;; this was cached based on results from profiling but it turned out the profiling
+;; somehow went wrong - perhaps too many processes writing to it. Leaving the caching
+;; in for now but can probably take it out later.
+(define (debug:calc-verbosity vstr arg) ;; arg is 'v (verbose) or 'q (quiet)
+  (let* ((res (cond
+	       ((number? vstr) vstr)
+	       ((not (string?  vstr))   1)
+	       ;; ((string-match  "^\\s*$" vstr) 1)
+	       (vstr           (let ((debugvals  (filter number? (map string->number (string-split vstr ",")))))
+				 (cond
+				  ((> (length debugvals) 1) debugvals)
+				  ((> (length debugvals) 0)(car debugvals))
+				  (else 1))))
+	       ((eq? arg 'v)   2) ;; verbose
+	       ((eq? arg 'q)   0) ;; quiet
+	       (else                   1))))
+    (verbosity res)
+    res))
+;; check verbosity, #t is ok
+#;(define (debug-check-verbosity verbosity vstr)
+  (if (not (or (number? verbosity)
+	       (list?   verbosity)))
+      (begin
+	(print "ERROR: Invalid debug value \"" vstr "\"")
+	#f)
+      #t))
+(define (debug:debug-mode n)
+  (let* ((vb (verbosity)))
+    (cond
+     ((and (number? vb)   ;; number number
+	   (number? n))
+      (<= n vb))
+     ((and (list? vb)     ;; list   number
+	   (number? n))
+      (member n vb))
+     ((and (list? vb)     ;; list   list
+	   (list? n))
+      (not (null? (lset-intersection! eq? vb n))))
+     ((and (number? vb)
+	   (list? n))
+      (member vb n))
+     (else #f))))
+(define (debug:handle-remote-logging params)
+  (if (debug:print-logger) ;; NOTE: turn params into string to keep things simple for now
+      ((debug:print-logger)(conc "REMOTE ("(get-host-name)", pid="(current-process-id)") "
+				 (string-intersperse (map conc params) " ") "; "
+				 (string-intersperse (command-line-arguments) " ")))))
+(define debug:enable-timestamp (make-parameter #t))
+(define (debug:timestamp)
+  (if (debug:enable-timestamp)
+      (conc (time->string 
+	     (seconds->local-time (current-seconds)) "%H:%M:%S") " ")
+      ""))
+  (define (debug:print n e . params)
+  (if (debug:debug-mode n)
+      (with-output-to-port (or e (current-error-port))
+	(lambda ()
+	  ;; (if *logging*
+	      ;; (db:log-event (apply conc params))
+	  (apply print (debug:timestamp) params)
+	  ;; (debug:handle-remote-logging params)
+	  )))
+  #t ;; only here to make remote stuff happy. It'd be nice to fix that ...
+  )
+(define (debug:print-error n e . params)
+  ;; normal print
+  (if (debug:debug-mode n)
+      (with-output-to-port (if (port? e) e (current-error-port))
+	(lambda ()
+	  (apply print "ERROR: " (debug:timestamp) params)
+	  ;; (debug:handle-remote-logging (cons "ERROR: " params))
+	  )))
+  ;; pass important messages to stderr
+  (if (and (eq? n 0)(not (eq? e (current-error-port)))) 
+      (with-output-to-port (current-error-port)
+	(lambda ()
+	  (apply print "ERROR: " (debug:timestamp) params)
+	  ))))
+(define (debug:print-info n e . params)
+  (if (debug:debug-mode n)
+      (with-output-to-port (if (port? e) e (current-error-port))
+	(lambda ()
+	  (apply print "INFO: (" n ") "(debug:timestamp) params) ;; res)
+	  ;; (debug:handle-remote-logging (cons "INFO: " params))
+	  ))))
+(define (debug:print-warn n e . params)
+  (if (debug:debug-mode n)
+      (with-output-to-port (if (port? e) e (current-error-port))
+	(lambda ()
+	  (apply print "WARN: (" n ") " (debug:timestamp) params) ;; res)
+	  ;; (debug:handle-remote-logging (cons "WARN: " params))
+	  ))))

Index: diff-report.scm
--- diff-report.scm
+++ diff-report.scm
@@ -146,11 +146,11 @@
 (define (diff:target+run-name->run-id target run-name)
   (let* ((keys (rmt:get-keys))
          (target-parts (if target (string-split target "/") (map (lambda (x) "%") keys))))
     (if (not (eq? (length keys) (length keys)))
-          (print "Error: Target ("target") item count does not match fields count target tokens="target-parts" fields="keys)
+          (debug:print 0 *default-log-port* "Error: Target ("target") item count does not match fields count target tokens="target-parts" fields="keys)
         (let* ((target-map (zip keys target-parts))
                (qry-res (rmt:get-runs run-name 1 0 target-map)))
           (if (eq? 2 (vector-length qry-res))
@@ -384,22 +384,10 @@
     (if html-output-file
         (with-output-to-file html-output-file (lambda () (print html-body))))
     (when (and email-recipients-list (> (length email-recipients-list) 0))
       (sendmail (string-join email-recipients-list ",") email-subject html-body use_html: #t))
-;; (let* ((src-run-name "all57")
-;;        (dest-run-name "all60")
-;;        (src-run-id (diff:run-name->run-id src-run-name))
-;;        (dest-run-id (diff:run-name->run-id dest-run-name))
-;;        (to-list (list "bjbarcla")))
-;;   (diff:deliver-diff-report src-run-id dest-run-id email-recipients-list: to-list html-output-file: "/tmp/bjbarcla/zippy.html")
-;;   )
 (define (do-diff-report src-target src-runname dest-target dest-runname html-file to-list-raw)
   (let* (;;(src-target "nope%")
          ;;(src-runname "all57")
          ;;(dest-target "%")
@@ -410,16 +398,16 @@
          (to-list (if (string? to-list-raw) (string-split to-list-raw ",:") #f))
      ((not src-run-id)
-      (print "No match for source target/runname="src-target"/"src-runname)
-      (print "Cannot proceed.")
+      (debug:print 0 *default-log-port* "No match for source target/runname="src-target"/"src-runname)
+      (debug:print 0 *default-log-port* "Cannot proceed.")
      ((not dest-run-id)
-      (print "No match for source target/runname="dest-target"/"dest-runname)
-      (print "Cannot proceed.")
+      (debug:print 0 *default-log-port* "No match for source target/runname="dest-target"/"dest-runname)
+      (debug:print 0 *default-log-port* "Cannot proceed.")
       (diff:deliver-diff-report src-run-id dest-run-id email-recipients-list: to-list html-output-file: html-file)))))

DELETED filedb.scm
Index: filedb.scm
--- filedb.scm
+++ /dev/null
@@ -1,255 +0,0 @@
-;; Copyright 2006-2011, Matthew Welland.
-;; This file is part of Megatest.
-;;     Megatest is free software: you can redistribute it and/or modify
-;;     it under the terms of the GNU General Public License as published by
-;;     the Free Software Foundation, either version 3 of the License, or
-;;     (at your option) any later version.
-;;     Megatest is distributed in the hope that it will be useful,
-;;     but WITHOUT ANY WARRANTY; without even the implied warranty of
-;;     GNU General Public License for more details.
-;;     You should have received a copy of the GNU General Public License
-;;     along with Megatest.  If not, see <>.
-;; (require-extension synch sqlite3 posix srfi-13 srfi-1 utils regex)
-(use sqlite3 srfi-1 posix regex srfi-69 srfi-13 posix-extras)
-(import (prefix sqlite3 sqlite3:))
-(declare (unit filedb))
-(include "fdb_records.scm")
-;; (include "settings.scm")
-(define (filedb:open-db dbpath)
-  (let* ((fdb      (make-filedb:fdb))
-	 (dbexists (common:file-exists? dbpath))
-	 (db (sqlite3:open-database dbpath)))
-    (filedb:fdb-set-db!        fdb db)
-    (filedb:fdb-set-dbpath!    fdb dbpath)
-    (filedb:fdb-set-pathcache! fdb (make-hash-table))
-    (filedb:fdb-set-idcache!   fdb (make-hash-table))
-    (filedb:fdb-set-partcache! fdb (make-hash-table))
-    (sqlite3:set-busy-handler!  db (make-busy-timeout 136000))
-    (if (not dbexists)
-	(begin
-	  (sqlite3:execute db "PRAGMA synchronous = OFF;")
-	  (sqlite3:execute db "CREATE TABLE names (id INTEGER PRIMARY KEY,name TEST);") ;; for future use - change path in paths table to path_id
-	  (sqlite3:execute db "CREATE INDEX name_index ON names (name);")
-	  ;; NB// We store a useful subset of file attributes but do not attempt to store all
-	  (sqlite3:execute db "CREATE TABLE paths (id        INTEGER PRIMARY KEY,
-                                                   path      TEXT,
-                                                   parent_id INTEGER,
-                                                   mode      INTEGER DEFAULT -1,
-                                                   uid       INTEGER DEFAULT -1,
-                                                   gid       INTEGER DEFAULT -1,
-                                                   size      INTEGER DEFAULT -1,
-                                                   mtime     INTEGER DEFAULT -1);")
-	  (sqlite3:execute db "CREATE INDEX path_index ON paths (path,parent_id);")
-	  (sqlite3:execute db "CREATE TABLE bases (id INTEGER PRIMARY KEY,base TEXT,                  updated TIMESTAMP);")))
-    ;; close the sqlite3 db and open it as needed
-    (filedb:finalize-db! fdb)
-    (filedb:fdb-set-db! fdb #f)
-    fdb))
-(define (filedb:reopen-db fdb)
-  (let ((db (sqlite3:open-database (filedb:fdb-get-dbpath fdb))))
-    (filedb:fdb-set-db! fdb db)
-    (sqlite3:set-busy-handler!  db (make-busy-timeout 136000))))
-(define (filedb:finalize-db! fdb)
-  (sqlite3:finalize! (filedb:fdb-get-db fdb)))
-(define (filedb:get-current-time-string)
-  (string-chomp (time->string (seconds->local-time (current-seconds)))))
-(define (filedb:get-base-id db path)
-  (let ((stmt   (sqlite3:prepare db "SELECT id FROM bases WHERE base=?;"))
-        (id-num #f))
-    (sqlite3:for-each-row 
-     (lambda (num) (set! id-num num)) stmt path)
-    (sqlite3:finalize! stmt)
-    id-num))
-(define (filedb:get-path-id db path parent)
-  (let ((stmt   (sqlite3:prepare db "SELECT id FROM paths WHERE path=? AND parent_id=?;"))
-        (id-num #f))
-    (sqlite3:for-each-row 
-     (lambda (num) (set! id-num num)) stmt path parent)
-    (sqlite3:finalize! stmt)
-    id-num))
-(define (filedb:add-base db path)
-  (let ((existing (filedb:get-base-id db path)))
-    (if existing #f
-        (begin
-          (sqlite3:execute db "INSERT INTO bases (base,updated) VALUES (?,?);" path (filedb:get-current-time-string))))))
-;; index 	value 	field 	notes
-;; 0 	inode number 	st_ino 	
-;; 1 	mode 	st_mode 	bitfield combining file permissions and file type
-;; 2 	number of hard links 	st_nlink 	
-;; 3 	UID of owner 	st_uid 	as with file-owner
-;; 4 	GID of owner 	st_gid 	
-;; 5 	size 	st_size 	as with file-size
-;; 6 	access time 	st_atime 	as with file-access-time
-;; 7 	change time 	st_ctime 	as with file-change-time
-;; 8 	modification time 	st_mtime 	as with file-modification-time
-;; 9 	parent device ID 	st_dev 	ID of device on which this file resides
-;; 10 	device ID 	st_rdev 	device ID for special files (i.e. the raw major/minor number)
-;; 11 	block size 	st_blksize 	
-;; 12 	number of blocks allocated 	st_blocks 	
-(define (filedb:add-path-stat db path parent statinfo)
-  (let ((stmt (sqlite3:prepare db "INSERT INTO paths (path,parent_id,mode,uid,gid,size,mtime) VALUES (?,?,?,?,?,?,?);")))
-	(sqlite3:execute stmt
-			 path
-			 parent
-			 (vector-ref statinfo 1) ;; mode
-			 (vector-ref statinfo 3) ;; uid
-			 (vector-ref statinfo 4) ;; gid
-			 (vector-ref statinfo 5) ;; size
-			 (vector-ref statinfo 8) ;; mtime
-			 )
-	(sqlite3:finalize! stmt))) ;;  (filedb:get-current-time-string))))
-(define (filedb:add-path db path parent)
-  (let ((stmt (sqlite3:prepare db "INSERT INTO paths (path,parent_id) VALUES (?,?);")))
-    (sqlite3:execute stmt path parent)
-    (sqlite3:finalize! stmt)))
-(define (filedb:register-path fdb path #!key (save-stat #f))
-  (let* ((db        (filedb:fdb-get-db        fdb))
-	 (pathcache (filedb:fdb-get-pathcache fdb))
-	 (stat      (if save-stat (file-stat path #t)))
-	 (id        (hash-table-ref/default pathcache path #f)))
-    (if (not db)(filedb:reopen-db fdb))
-    (if id id 
-        (let ((plist (string-split path "/")))
-          (let loop ((head (car plist))
-                     (tail (cdr plist))
-                     (parent 0))
-            (let ((id (filedb:get-path-id db head parent))
-                  (done (null? tail)))
-              (if id          ;; we'll have a id if the path is already registered
-                  (if done 
-                      (begin
-                        (hash-table-set! pathcache path id)
-                        id) ;; return the last path id for a result
-                      (loop (car tail)(cdr tail) id))
-                  (begin      ;; add the path and then repeat the loop with the same data
-		    (if save-stat
-			(filedb:add-path-stat db head parent stat) 
-			(filedb:add-path db head parent))
-                    (loop head tail parent)))))))))
-(define (filedb:update-recursively fdb path #!key (save-stat #f))
-  (let ((p  (open-input-pipe (string-append "find -L " path)))) ;; (resolve-pathname path)))) ;; (string-append "find " path))))
-    (print "processed 0 files...")
-    (let loop ((l  (read-line p))
-               (lc 0)) ;; line count
-      (if (eof-object? l) 
-	  (begin
-	    (print "          " lc " files")
-	    (close-input-port p))
-          (begin
-            (filedb:register-path fdb l save-stat: save-stat) ;; (get-real-path l)) ;; don't like losing the original path info
-            (if (= (modulo lc 100) 0)
-                (print "          " lc " files"))
-            (loop (read-line p)(+ lc 1)))))))
-(define (filedb:update fdb path #!key (save-stat #f))
-  ;; first get the realpath and add it to the bases table
-  (let ((real-path path) ;; (filedb:get-real-path path))
-	(db        (filedb:fdb-get-db    fdb)))
-    (filedb:add-base db real-path)
-    (filedb:update-recursively fdb path save-stat: save-stat)))
-;; not used and broken
-(define (filedb:get-real-path path)
-  (let* ((p (open-input-pipe (string-append real-path " " (regexp-escape path))))
-         (pth (read-line p)))
-    (if (eof-object? pth) path
-	(begin
-	  (close-input-port p)
-	  pth))))
-(define (filedb:drop-base fdb path)
-  (print "Sorry, I don't do anything yet"))
-(define (filedb:find-all fdb pattern action)
-  (let* ((db     (filedb:fdb-get-db fdb))
-	 (stmt   (sqlite3:prepare db "SELECT id FROM paths WHERE path like ?;"))
-	 (result '()))
-    (sqlite3:for-each-row 
-     (lambda (num)
-       (action num)
-       (set! result (cons num result))) stmt pattern)
-    (sqlite3:finalize! stmt)
-    result))
-(define (filedb:get-path-record fdb id)
-  (let* ((db        (filedb:fdb-get-db        fdb))
-	 (partcache (filedb:fdb-get-partcache fdb))
-	 (dat (hash-table-ref/default partcache id #f)))
-    (if dat dat
-	(let ((stmt (sqlite3:prepare db "SELECT path,parent_id FROM paths WHERE id=?;"))
-	      (result #f))
-	  (sqlite3:for-each-row 
-	   (lambda (path parent_id)(set! result (list path parent_id))) stmt id)
-	  (hash-table-set! partcache id result)
-	  (sqlite3:finalize! stmt)
-	  result))))
-(define (filedb:get-children fdb parent-id)
-  (let* ((db        (filedb:fdb-get-db fdb))
-	 (res       '()))
-    (sqlite3:for-each-row
-     (lambda (id path parent-id)
-       (set! res (cons (vector id path parent-id) res)))
-     db "SELECT id,path,parent_id FROM paths WHERE parent_id=?;"
-     parent-id)
-    res))
-;; retrieve all that have children and those without
-;; children that match patt
-(define (filedb:get-children-patt fdb parent-id search-patt)
-  (let* ((db        (filedb:fdb-get-db fdb))
-	 (res       '()))
-    ;; first get the children that have no children
-    (sqlite3:for-each-row
-     (lambda (id path parent-id)
-       (set! res (cons (vector id path parent-id) res)))
-     db "SELECT id,path,parent_id FROM paths WHERE parent_id=? AND 
-            (id IN (SELECT parent_id FROM paths) OR path LIKE ?);"
-     parent-id search-patt)
-    res))
-(define (filedb:get-path fdb id)
-  (let* ((db      (filedb:fdb-get-db      fdb))
-	 (idcache (filedb:fdb-get-idcache fdb))
-	 (path    (hash-table-ref/default idcache id #f)))
-    (if (not db)(filedb:reopen-db fdb))
-    (if path path
-        (let loop ((curr-id id)
-                   (path    ""))
-          (let ((path-record (filedb:get-path-record fdb curr-id)))
-            (if (not path-record) #f ;; this id has no path
-                (let* ((parent-id (list-ref path-record 1))
-                       (pname     (list-ref path-record 0))
-                       (newpath   (string-append  "/" pname path)))
-                  (if (= parent-id 0) ;; fields 0=path, 1=parent. root parent=0
-                      (begin
-                        (hash-table-set! idcache id newpath)
-                        newpath)
-                      (loop parent-id newpath)))))))))
-(define (filedb:search db pattern)
-  (let ((action (lambda (id)(print (filedb:get-path db id)))))
-    (filedb:find-all db pattern action)))

DELETED fs-transport.scm
Index: fs-transport.scm
--- fs-transport.scm
+++ /dev/null
@@ -1,52 +0,0 @@
-;; Copyright 2006-2012, Matthew Welland.
-;; This file is part of Megatest.
-;;     Megatest is free software: you can redistribute it and/or modify
-;;     it under the terms of the GNU General Public License as published by
-;;     the Free Software Foundation, either version 3 of the License, or
-;;     (at your option) any later version.
-;;     Megatest is distributed in the hope that it will be useful,
-;;     but WITHOUT ANY WARRANTY; without even the implied warranty of
-;;     GNU General Public License for more details.
-;;     You should have received a copy of the GNU General Public License
-;;     along with Megatest.  If not, see <>.
-(require-extension (srfi 18) extras tcp s11n)
-(use sqlite3 srfi-1 posix regex regex-case srfi-69 hostinfo md5 message-digest)
-(import (prefix sqlite3 sqlite3:))
-(use spiffy uri-common intarweb http-client spiffy-request-vars)
-(tcp-buffer-size 2048)
-(declare (unit fs-transport))
-(declare (uses common))
-(declare (uses db))
-(declare (uses tests))
-(declare (uses tasks)) ;; tasks are where stuff is maintained about what is running.
-(include "common_records.scm")
-(include "db_records.scm")
-;; F S   T R A N S P O R T   S E R V E R
-;; There is no "server" per se but a convience routine to make it non
-;; necessary to be reopening the db over and over again.
-(define (fs:process-queue-item packet)
-  (if (not *dbstruct-db*) ;; we will require that (setup-for-run) has already been called
-      (set! *dbstruct-db* (db:setup-db)))
-  (debug:print-info 11 *default-log-port* "fs:process-queue-item called with packet=" packet)
-  (db:process-queue-item *dbstruct-db* packet))

DELETED ftail.scm
Index: ftail.scm
--- ftail.scm
+++ /dev/null
@@ -1,108 +0,0 @@
-;; Copyright 2017, Matthew Welland.
-;; This file is part of Megatest.
-;;     Megatest is free software: you can redistribute it and/or modify
-;;     it under the terms of the GNU General Public License as published by
-;;     the Free Software Foundation, either version 3 of the License, or
-;;     (at your option) any later version.
-;;     Megatest is distributed in the hope that it will be useful,
-;;     but WITHOUT ANY WARRANTY; without even the implied warranty of
-;;     GNU General Public License for more details.
-;;     You should have received a copy of the GNU General Public License
-;;     along with Megatest.  If not, see <>.
-(declare (unit ftail))
-(module ftail
-    (
-     open-tail-db
-     tail-write
-     tail-get-fid
-     file-tail
-     )
-(import scheme chicken data-structures extras)
-(use (prefix sqlite3 sqlite3:) posix typed-records)
-(define (open-tail-db )
-  (let* ((basedir   (create-directory (conc "/tmp/" (current-user-name))))
-	 (dbpath    (conc basedir "/megatest_logs.db"))
-	 (dbexists  (file-exists? dbpath))
-	 (db        (sqlite3:open-database dbpath))
-	 (handler   (sqlite3:make-busy-timeout 136000)))
-    (sqlite3:set-busy-handler! db handler)
-    (sqlite3:execute db "PRAGMA synchronous = 0;")
-    (if (not dbexists)
-	(begin
-	  (sqlite3:execute db "CREATE TABLE IF NOT EXISTS log_files (id INTEGER PRIMARY KEY,filename TEXT,event_time TIMESTAMP DEFAULT (strftime('%s','now')));")
-	  (sqlite3:execute db "CREATE TABLE IF NOT EXISTS log_data  (id INTEGER PRIMARY KEY,fid INTEGER,line TEXT,event_time TIMESTAMP DEFAULT (strftime('%s','now')));")
-	  ))
-    db))
-(define (tail-write db fid lines)
-  (sqlite3:with-transaction
-   db
-   (lambda ()
-     (for-each
-      (lambda (line)
-	(sqlite3:execute db "INSERT INTO log_data (fid,line) VALUES (?,?);" fid line))
-      lines))))
-(define (tail-get-fid db fname)
-  (let ((fid   (handle-exceptions
-		   exn
-		   #f
-		 (sqlite3:first-result db "SELECT id FROM log_files WHERE filename=?;" fname))))
-    (if fid
-	fid
-	(begin
-	  (sqlite3:execute db "INSERT INTO log_files (filename) VALUES (?);" fname)
-	  (tail-get-fid db fname)))))
-(define (file-tail fname #!key (db-in #f))
-  (let* ((inp (open-input-file fname))
-	 (db  (or db-in (open-tail-db)))
-	 (fid (tail-get-fid db fname)))
-    (let loop ((inl    (read-line inp))
-	       (lines '())
-	       (lastwr (current-seconds)))
-      (if (eof-object? inl)
-	  (let ((timed-out (> (- (current-seconds) lastwr) 60)))
-	    (if timed-out (tail-write db fid (reverse lines)))
-	    (sleep 1)
-	    (if timed-out
-		(loop (read-line inp) '() (current-seconds))
-		(loop (read-line inp) lines lastwr)))
-	  (let* ((savelines (> (length lines) 19)))
-	    ;; (print inl)
-	    (if savelines (tail-write db fid (reverse lines)))
-	    (loop (read-line inp)
-		  (if savelines
-		      '()
-		      (cons inl lines))
-		  (if savelines
-		      (current-seconds)
-		      lastwr)))))))
-;; offset -20 means get last 20 lines
-(define (tail-get-lines db fid offset count)
-  (if (> offset 0)
-      (sqlite3:map-row (lambda (id line)
-		 (vector id line))
-	       db
-	       "SELECT id,line FROM log_data WHERE fid=? OFFSET ? LIMIT ?;" fid offset count)
-      (reverse ;; get N from the end
-       (sqlite3:map-row (lambda (id line)
-		  (vector id line))
-		db
-		"SELECT id,line FROM log_data WHERE fid=? ORDER BY id DESC LIMIT ?;" fid (abs offset)))))

Index: http-transport.scm
--- http-transport.scm
+++ http-transport.scm
@@ -35,14 +35,18 @@
 (declare (uses tasks)) ;; tasks are where stuff is maintained about what is running.
 (declare (uses server))
 ;; (declare (uses daemon))
 (declare (uses portlogger))
 (declare (uses rmt))
+(declare (uses dbfile))
+(declare (uses commonmod))
 (include "common_records.scm")
 (include "db_records.scm")
 (include "js-path.scm")
+(import dbfile commonmod)
 (require-library stml)
 (define (http-transport:make-server-url hostport)
   (if (not hostport)
@@ -97,11 +101,11 @@
 				      (dat ($ 'dat))
 				      (res #f))
 				  ((equal? (uri-path (request-uri (current-request)))
 					   '(/ "api"))
-				   (send-response body:    (api:process-request *dbstruct-db* $) ;; the $ is the request vars proc
+				   (send-response body:    (api:process-request *dbstruct-dbs* $) ;; the $ is the request vars proc
 						  headers: '((content-type text/plain)))
 				   (mutex-lock! *heartbeat-mutex*)
 				   (set! *db-last-access* (current-seconds))
 				   (mutex-unlock! *heartbeat-mutex*))
 				  ((equal? (uri-path (request-uri (current-request))) 
@@ -163,21 +167,21 @@
 		;; get_next_port goes here
 		(http-transport:try-start-server ipaddrstr
 						 (portlogger:open-run-close portlogger:find-port)))
-		(print "ERROR: Tried and tried but could not start the server"))))
+		(debug:print 0 *default-log-port* "ERROR: Tried and tried but could not start the server"))))
       ;; any error in following steps will result in a retry
       (set! *server-info* (list ipaddrstr portnum))
       (debug:print 0 *default-log-port* "INFO: Trying to start server on " ipaddrstr ":" portnum)
       ;; This starts the spiffy server
       ;; (start-server bind-address: ipaddrstr port: portnum)
       (if config-hostname ;; this is a hint to bind directly
-	  (start-server port: portnum bind-address: (if (equal? config-hostname "-")
-							ipaddrstr
-							config-hostname))
+	  (start-server port: portnum) ;; bind-address: (if (equal? config-hostname "-")
+					;;		ipaddrstr
+					;;		config-hostname))
 	  (start-server port: portnum))
       (portlogger:open-run-close portlogger:set-port portnum "released")
       (debug:print 1 *default-log-port* "INFO: server has been stopped"))))
@@ -281,29 +285,32 @@
                                                       (debug:print 0 *default-log-port* "WARNING: failure in with-input-from-request to " fullurl ".")
                                                       (debug:print 0 *default-log-port* " message: " msg ", exn=" exn)
                                                       (debug:print 0 *default-log-port* " cmd: " cmd " params: " params " key:" (or server-id "thekey"))
                                                       (debug:print 0 *default-log-port* " call-chain: " call-chain)))
-                                                (if runremote
-						    (remote-conndat-set! runremote #f))
+						(set! *runremote* #f)
+						(set! runremote #f)
+						;; (if runremote
+						;;    (remote-conndat-set! runremote #f))
 						;; Killing associated server to allow clean retry.")
 						;; (tasks:kill-server-run-id run-id)  ;; better to kill the server in the logic that called this routine?
 						(mutex-unlock! *http-mutex*)
-					     ;;; (signal (make-composite-condition
-					     ;;;          (make-property-condition 'commfail 'message "failed to connect to server")))
-					     ;;; "communications failed"
+						;; (signal (make-composite-condition
+						;;          (make-property-condition 'commfail 'message "failed to connect to server")))
+						;; "communications failed"
+						(close-all-connections!)
 						(db:obj->string #f))
-					    (with-input-from-request ;; was dat
-					     fullurl 
-					     (list (cons 'key (or server-id   "thekey"))
-						   (cons 'cmd cmd)
-						   (cons 'params sparams))
-					     read-string))
+					      (with-input-from-request ;; was dat
+					       fullurl 
+					       (list (cons 'key (or server-id   "thekey"))
+						     (cons 'cmd cmd)
+						     (cons 'params sparams))
+					       read-string))
 					  transport: 'http)
-                                         0)) ;; added this speculatively
+					 0)) ;; added this speculatively
 			      ;; Shouldn't this be a call to the managed call-all-connections stuff above?
-			      (close-all-connections!)
+			      ;; (close-all-connections!) ;; BUG? WHY IS THIS HERE? Are we failing to reuse connections?
 			      (mutex-unlock! *http-mutex*)
 	      (time-out     (lambda ()
 			      (thread-sleep! 45)
 			      (debug:print 0 *default-log-port* "WARNING: send-receive took more than 45 seconds!!")
@@ -350,11 +357,11 @@
 	      (print-call-chain *default-log-port*)
 	      (debug:print-error 0 *default-log-port* " closing connection failed with error: " ((condition-property-accessor 'exn 'message) exn) ", exn=" exn))
 	    (close-connection! api-dat)
-            ;;(close-idle-connections!)
+            (close-idle-connections!)
 (define (make-http-transport:server-dat)(make-vector 6))
@@ -390,10 +397,13 @@
   (let* ((api-url      (conc "http://" iface ":" port "/api"))
 	 (api-uri      (uri-reference (conc "http://" iface ":" port "/api")))
 	 (api-req      (make-request method: 'POST uri: api-uri))
 	 (server-dat   (vector iface port api-uri api-url api-req (current-seconds) server-id)))
 ;; run http-transport:keep-running in a parallel thread to monitor that the db is being 
 ;; used and to shutdown after sometime if it is not.
 (define (http-transport:keep-running) 
@@ -400,10 +410,11 @@
   ;; if none running or if > 20 seconds since 
   ;; server last used then start shutdown
   ;; This thread waits for the server to come alive
   (debug:print-info 0 *default-log-port* "Starting the sync-back, keep alive thread in server")
   (let* ((sdat              #f)
+	 (no-sync-db        (db:open-no-sync-db))
 	 (tmp-area          (common:get-db-tmp-area))
 	 (started-file      (conc tmp-area "/.server-started"))
 	 (server-start-time (current-seconds))
 	 (server-info (let loop ((start-time (current-seconds))
 				 (changed    #t)
@@ -457,18 +468,36 @@
     (let loop ((count         0)
 	       (server-state 'available)
 	       (bad-sync-count 0)
 	       (start-time     (current-milliseconds)))
       ;; Use this opportunity to sync the tmp db to megatest.db
-      (if (not server-going) ;; *dbstruct-db* 
+      (if (not server-going) ;; *dbstruct-dbs* 
 	    (debug:print 0 *default-log-port* "SERVER: dbprep")
-	    (set! *dbstruct-db*  (db:setup #t)) ;;  run-id))
+	    (set! *dbstruct-dbs*  (db:setup #t)) ;;  run-id)) FIXME!!!
 	    (set! server-going #t)
 	    (debug:print 0 *default-log-port* "SERVER: running, megatest version: " (common:get-full-version)) ;; NOTE: the server is NOT yet marked as running in the log. We do that in the keep-running routine.
-	    (thread-start! *watchdog*)))
+	    ;; (thread-start! *watchdog*)
+          ) 
+	  (if (and no-sync-db
+		   (common:low-noise-print 5 "sync-all")) ;; cheesy way to reduce frequency of running sync :)
+              (begin
+                (debug:print 0 *default-log-port* "keep-running calling db:all-db-sync at " (time->string (seconds->local-time) "%H:%M:%S"))
+		;; This is tougher than it seems - have to deal with multiple dbs
+		;; (db:process-transaction-queue *dbstruct-dbs*)
+		(db:all-db-sync *dbstruct-dbs*)
+                ;; (db:do-sync no-sync-db)
+	        ;; (db:run-lock-and-sync *no-sync-db*)
+              )
+          )
+      )
       ;; when things go wrong we don't want to be doing the various queries too often
       ;; so we strive to run this stuff only every four seconds or so.
       (let* ((sync-time (- (current-milliseconds) start-time))
 	    (rem-time  (quotient (- 4000 sync-time) 1000)))
@@ -490,11 +519,10 @@
 	    (debug:print-info 0 *default-log-port* "WARNING: interface changed, refreshing iface and port info")
 	    (set! iface new-iface)
 	    (set! port  new-port)
              (if (not *server-id*)
               (set! *server-id* (server:mk-signature)))
-            (debug:print 0 *default-log-port* (current-seconds) (current-directory) (current-process-id) (argv))
 	    (debug:print 0 *default-log-port* "SERVER STARTED: " iface ":" port " AT " (current-seconds) " server-id: " *server-id*)
 	    (flush-output *default-log-port*)))
       ;; Transfer *db-last-access* to last-access to use in checking that we are still alive
       (mutex-lock! *heartbeat-mutex*)
@@ -502,31 +530,47 @@
       (mutex-unlock! *heartbeat-mutex*)
       (if (common:low-noise-print 120 (conc "server running on " iface ":" port))
              (if (not *server-id*)
-              (set! *server-id* (server:mk-signature)))
-            (debug:print 0 *default-log-port* (current-seconds) (current-directory) (current-process-id) (argv))   
-	    (debug:print 0 *default-log-port* "SERVER STARTED: " iface ":" port " AT " (current-seconds) " server-id: " *server-id*)
-	    (flush-output *default-log-port*)))
+		 (set! *server-id* (server:mk-signature)))
+             (debug:print 0 *default-log-port* (current-seconds) (current-directory) (current-process-id) (argv))   
+	     (debug:print 0 *default-log-port* "SERVER STARTED: " iface ":" port " AT " (current-seconds) " server-id: " *server-id*)
+	     (flush-output *default-log-port*)))
       (if (common:low-noise-print 60 "dbstats")
 	    (debug:print 0 *default-log-port* "Server stats:")
       (let* ((hrs-since-start  (/ (- (current-seconds) server-start-time) 3600)))
+	 #;((and *server-run*
+	       (> (- (current-seconds) server-start-time) 420)) ;; let's try server replacement
+	  ;; ((adj-proc-load . 0.056875) (adj-core-load . 0.11375) (1m-load . 0.91) (5m-load . 0.77) (15m-load . 1.0) (proc . 16) (core . 8) (phys . 1))
+	  (let* ((loaddat       (common:get-normalized-cpu-load #f))
+		 (adj-proc-load (alist-ref 'adj-proc-load loaddat))
+		 (adj-core-load (alist-ref 'adj-core-load loaddat))
+		 (adj-load      (max adj-proc-load adj-core-load)))
+	    (if (< adj-load 2) ;; reduce chance of runaway
+		(server:run *toppath*))
+	    (db:all-db-sync *dbstruct-dbs*)
+	    (thread-sleep! 30)
+	    (http-transport:server-shutdown port)))
          ((and *server-run*
 	       (> (+ last-access server-timeout)
           (if (common:low-noise-print 120 "server continuing")
               (debug:print-info 0 *default-log-port* "Server continuing, seconds since last db access: " (- (current-seconds) last-access))
 	      (let ((curr-time (current-seconds)))
 		    (debug:print 0 *default-log-port* "ERROR: Failed to change timestamp on log file " server-log-file ". Are you out of space on that disk? exn=" exn)
-		  (if (not *server-overloaded*)
-		      (change-file-times server-log-file curr-time curr-time)))))
+		    (if (and (< (- (current-seconds) server-start-time) 600) ;; run for ten minutes for experiment, 3600 thereafter
+			     (not *server-overloaded*))
+			(change-file-times server-log-file curr-time curr-time)
+			(if (common:low-noise-print 120 "start new server")
+			    (server:kind-run *toppath*) ;; server:kind-run uses [servers] numservers
+			)))))
           (loop 0 server-state bad-sync-count (current-milliseconds)))
           (debug:print-info 0 *default-log-port* "Server timed out. seconds since last db access: " (- (current-seconds) last-access))
           (http-transport:server-shutdown port)))))))

Index: launch.scm
--- launch.scm
+++ launch.scm
@@ -439,16 +439,16 @@
 	  (let ((sighand (lambda (signum)
 			   ;; (signal-mask! signum) ;; to mask or not? seems to cause issues in exiting
 			   (if (eq? signum signal/stop)
 			       (debug:print-error 0 *default-log-port* "attempt to STOP process. Exiting."))
 			   (set! *time-to-exit* #t)
-			   (print "Received signal " signum ", cleaning up before exit (set this test to COMPLETED/ABORT) . Please wait...")
+			   (debug:print 0 *default-log-port* "Received signal " signum ", cleaning up before exit (set this test to COMPLETED/ABORT) . Please wait...")
 			   (let ((th1 (make-thread (lambda ()
-                                                     (print "set test to COMPLETED/ABORT begin.")
+                                                     (debug:print 0 *default-log-port* "set test to COMPLETED/ABORT begin.")
 						     (rmt:test-set-state-status run-id test-id "COMPLETED" "ABORT" "received kill signal")
-                                                     (print "set test to COMPLETED/ABORT complete.")
-						     (print "Killed by signal " signum ". Exiting")
+                                                     (debug:print 0 *default-log-port* "set test to COMPLETED/ABORT complete.")
+						     (debug:print 0 *default-log-port* "Killed by signal " signum ". Exiting")
 						     (exit 1))))
 				 (th2 (make-thread (lambda ()
 						     (thread-sleep! 20)
 						     (debug:print 0 *default-log-port* "Done")
 						     (exit 4)))))
@@ -481,11 +481,11 @@
              ;; -mrw- I'm removing KILLREQ from this list so that a test in KILLREQ state is treated as a "do not run" flag.
 	     ((member (db:test-get-state test-info) '("INCOMPLETE" "KILLED" "UNKNOWN" "STUCK")) ;; prior run of this test didn't complete, go ahead and try to rerun
 	      (debug:print 0 *default-log-port* "INFO: test is INCOMPLETE or KILLED, treat this execute call as a rerun request")
 	      ;; (tests:test-force-state-status! run-id test-id "REMOTEHOSTSTART" "n/a")
-              (rmt:general-call 'set-test-start-time #f test-id)
+              (rmt:general-call 'set-test-start-time run-id test-id)
               (rmt:test-set-state-status run-id test-id "REMOTEHOSTSTART" "n/a" #f)
 	      ) ;; prime it for running
 	     ((member (db:test-get-state test-info) '("RUNNING" "REMOTEHOSTSTART"))
 	      (if (process:alive-on-host? test-host test-pid)
 		  (debug:print-error 0 *default-log-port* "test state is "  (db:test-get-state test-info) " and process " test-pid " is still running on host " test-host ", cannot proceed")
@@ -494,11 +494,11 @@
 	      (debug:print 0 *default-log-port* "test state is " (db:test-get-state test-info) ", cannot proceed")
 	      (debug:print 0 *default-log-port* "exiting with status 1")
 	      (exit 1))
 	     ((not (member (db:test-get-state test-info) '("REMOVING" "REMOTEHOSTSTART" "RUNNING" "KILLREQ")))
 	      ;; (tests:test-force-state-status! run-id test-id "REMOTEHOSTSTART" "n/a")
-              (rmt:general-call 'set-test-start-time #f test-id)
+              (rmt:general-call 'set-test-start-time run-id test-id)
 	      (rmt:test-set-state-status run-id test-id "REMOTEHOSTSTART" "n/a" #f))
 	     (else ;; (member (db:test-get-state test-info) '("REMOVING" "REMOTEHOSTSTART" "RUNNING" "KILLREQ"))
 	      (debug:print 0 *default-log-port* "test state is " (db:test-get-state test-info) ", cannot proceed")
 	      (debug:print 0 *default-log-port* "exiting with status 1")
 	      (exit 1))))
@@ -593,11 +593,14 @@
 	      (list  "MT_TARGET"    target)
 	      (list  "MT_LINKTREE"  (common:get-linktree)) ;; (configf:lookup *configdat* "setup" "linktree"))
 	      (list  "MT_TESTSUITENAME" (common:get-testsuite-name))))
           ;;(bb-check-path msg: "launch:execute post block 3")
-	  (if mt-bindir-path (setenv "PATH" (conc (getenv "PATH") ":" mt-bindir-path)))
+	  (let ((tmppath (getenv "PATH")))
+	    (if (string-search tmppath " ")
+		(debug:print 0 *default-log-port* "WARNING: spaces in PATH are not supported."))
+	    (if mt-bindir-path (setenv "PATH" (conc tmppath":"mt-bindir-path))))
           ;;(bb-check-path msg: "launch:execute post block 4")
 	  ;; (change-directory top-path)
 	  ;; Can setup as client for server mode now
 	  ;; (client:setup)
@@ -817,19 +820,25 @@
 	(if (eq? (length output) 0)
+;; this is a close duplicate of:
+;;    process:alist-on-host?
+;;    process:alive
 (define (launch:is-test-alive host pid)
   (let* ((same-host (equal? host (get-host-name)))
 	 (cmd (conc 
 	       (if same-host "" (conc "ssh "host" "))
 	       "pstree -A "pid)))
     (if (and host pid
 	     (not (equal? host "n/a")))
-	(let* ((output (with-input-from-pipe cmd read-lines)))
+	(let* ((output (if same-host
+			   (with-input-from-pipe cmd read-lines)
+			   (common:generic-ssh cmd read-lines '())))) ;; (with-input-from-pipe cmd read-lines)))
 	  (debug:print 2 *default-log-port* "Running " cmd " received " output)
 	  (if (eq? (length output) 0)
 	#t))) ;; assuming bad query is about a live test is likely not the right thing to do?
@@ -910,10 +919,16 @@
 ;;           *runconfigdat* (runconfigs.config info)
 ;;           *configstatus* (status of the read data)
 (define (launch:setup #!key (force-reread #f) (areapath #f))
   (mutex-lock! *launch-setup-mutex*)
+  ;; this stops the train quickly for new processes
+  (if (and *toppath*
+	   (file-exists? (conc *toppath*"/stop-the-train")))
+      (begin
+	(debug:print 0 *default-log-port* "ERROR: found file "*toppath*"/stop-the-train, exiting immediately")
+	(exit 1)))
   (if (and *toppath*
 	   (eq? *configstatus* 'fulldata) (not force-reread)) ;; got it all
 	(debug:print 2 *default-log-port* "NOTE: skipping launch:setup-body call since we have fulldata")
 	(mutex-unlock! *launch-setup-mutex*)
@@ -1450,12 +1465,12 @@
 	 (contour         #f)) ;; NOT READY FOR THIS (args:get-arg "-contour")))
     (let loop ((delta        (- (current-seconds) *last-launch*))
 	       (launch-delay (configf:lookup-number *configdat* "setup" "launch-delay" default: 0)))
       (if (> launch-delay delta)
-	    (if (common:low-noise-print 1200 "test launch delay") ;; every two hours or so remind the user about launch delay.
-		(debug:print-info 0 *default-log-port* "NOTE: test launches are delayed by " launch-delay " seconds. See megatest.config launch-delay setting to adjust.")) ;; launch of " test-name " for " (- launch-delay delta) " seconds"))
+	    ;; (if (common:low-noise-print 1200 "test launch delay") ;; every two hours or so remind the user about launch delay.
+	;;	(debug:print-info 0 *default-log-port* "NOTE: test launches are delayed by " launch-delay " seconds. See megatest.config launch-delay setting to adjust.")) ;; launch of " test-name " for " (- launch-delay delta) " seconds"))
 	    (thread-sleep! (- launch-delay delta))
 	    (loop (- (current-seconds) *last-launch*) launch-delay))))
     (change-directory *toppath*)
     (alist->env-vars ;; consolidate this code with the code in megatest.scm for "-execute", *maybe* - the longer they are set the longer each launch takes (must be non-overlapping with the vars)
@@ -1647,11 +1662,11 @@
 	(debug:print 2 *default-log-port* "Launching completed, updating db")
 	(debug:print 2 *default-log-port* "Launch results: " launch-results)
 	(if (not launch-results)
-	      (print "ERROR: Failed to run " (string-intersperse fullcmd " ") ", exiting now")
+	      (debug:print 0 *default-log-port* "ERROR: Failed to run " (string-intersperse fullcmd " ") ", exiting now")
 	      ;; (sqlite3:finalize! db)
 	      ;; good ole "exit" seems not to work
 	      ;; (_exit 9)
 	      ;; but this hack will work! Thanks go to Alan Post of the Chicken email list
 	      ;; NB// Is this still needed? Should be safe to go back to "exit" now?

Index: margs.scm
--- margs.scm
+++ margs.scm
@@ -33,10 +33,15 @@
 (define (args:get-arg-from ht arg . default)
   (if (null? default)
       (hash-table-ref/default ht arg #f)
       (hash-table-ref/default ht arg (car default))))
+(define (args:remove-arg-from-ht arg)
+      (hash-table-delete! args:arg-hash arg)
 (define (args:usage . args)
   (if (> (length args) 0)
       (apply print "ERROR: " args))
   (if (string? help)
       (print help)

Index: megatest-version.scm
--- megatest-version.scm
+++ megatest-version.scm
@@ -18,6 +18,6 @@
 ;; Always use two or four digit decimal
 ;; 1.01, 1.02...1.10,1.11,1.1101 ... 1.99,2.00..
 ;; (declare (unit megatest-version))
-(define megatest-version 1.6592)
+(define megatest-version 1.7015)

Index: megatest.scm
--- megatest.scm
+++ megatest.scm
@@ -30,21 +30,38 @@
 (declare (uses server))
 (declare (uses client))
 (declare (uses tests))
 (declare (uses genexample))
 ;; (declare (uses daemon))
 (declare (uses db))
 ;; (declare (uses dcommon))
 (declare (uses tdb))
 (declare (uses mt))
 (declare (uses api))
 (declare (uses tasks)) ;; only used for debugging.
 (declare (uses env))
 (declare (uses diff-report))
+(declare (uses db))
+(declare (uses dbmod))
+(declare (uses dbmod.import))
+(declare (uses commonmod))
+(declare (uses commonmod.import))
+(declare (uses dbfile))
+(declare (uses dbfile.import))
+;; (declare (uses debugprint))
+;; (declare (uses debugprint.import))
+;; (declare (uses mtargs))
+;; (declare (uses mtargs.import))
 ;; (declare (uses ftail))
 ;; (import ftail)
+(import dbmod
+	commonmod
+	dbfile)
 (define *db* #f) ;; this is only for the repl, do not use in general!!!!
 (include "common_records.scm")
 (include "key_records.scm")
@@ -62,10 +79,12 @@
 (require-library mutils)
 (define *usage-log-file* #f)    ;; put path to file for logging usage in this var in the ~/.megatestrc file
 (define *usage-use-seconds* #t) ;; for Epoc seconds in usage logging change this to #t in ~/.megatestrc file
+(dbfile:db-init-proc db:initialize-main-db)
 ;; load the ~/.megatestrc file, put (use trace)(trace-call-sites #t)(trace function-you-want-to-trace) in this file
 (let ((debugcontrolf (conc (get-environment-variable "HOME") "/.megatestrc")))
   (if (common:file-exists? debugcontrolf)
@@ -76,20 +95,19 @@
 (if (and *usage-log-file*
          (file-write-access? *usage-log-file*))
       (lambda ()
-        (print
-         (if *usage-use-seconds*
-             (current-seconds)
-             (time->string
-              (seconds->local-time (current-seconds))
-              "%Yww%V.%w %H:%M:%S"))
-         " "
-         (current-user-name) " "
-         (current-directory) " "
-         "\"" (string-intersperse (argv) " ") "\""))
+        (print (if *usage-use-seconds*
+		   (current-seconds)
+		   (time->string
+		    (seconds->local-time (current-seconds))
+		    "%Yww%V.%w %H:%M:%S"))
+               " "
+               (current-user-name) " "
+               (current-directory) " "
+               "\"" (string-intersperse (argv) " ") "\""))
 ;; Disabled help items
 ;;  -rollup                 : (currently disabled) fill run (set by :runname)  with latest test(s)
 ;;                            from prior runs with same keys
@@ -494,19 +512,19 @@
 ;; The watchdog is to keep an eye on things like db sync etc.
 ;; TODO: for multiple areas, we will have multiple watchdogs; and multiple threads to manage
-(define *watchdog* (make-thread
-		    (lambda ()
-		      (handle-exceptions
-			  exn
-			  (begin
-			    (print-call-chain)
-			    (print " message: " ((condition-property-accessor 'exn 'message) exn) ", exn=" exn))
-			(common:watchdog)))
-		    "Watchdog thread"))
+;;(define *watchdog* (make-thread
+;;		    (lambda ()
+;;		      (handle-exceptions
+;;			  exn
+;;			  (begin
+;;			    (print-call-chain)
+;;			    (print " message: " ((condition-property-accessor 'exn 'message) exn) ", exn=" exn))
+;;			(common:watchdog)))
+;;		    "Watchdog thread"))
 ;;(if (not (args:get-arg "-server"))
 ;;    (thread-start! *watchdog*)) ;; if starting a server; wait till we get to running state before kicking off watchdog
 (let* ((no-watchdog-args
@@ -534,13 +552,17 @@
                                                  (loop (car tail) (cdr tail))))))      
        (no-watchdog-args-vals (filter (lambda (x) x)
                                       (map args:get-arg no-watchdog-args)))
        (start-watchdog (and (null? no-watchdog-args-vals) start-watchdog-specail-arg-val)))
        ;(print  "no-watchdog-args="no-watchdog-args "no-watchdog-args-vals="no-watchdog-args-vals " start-watchdog-specail-arg-val:" start-watchdog-specail-arg-val " start-watchdog:" start-watchdog) 
-  (if start-watchdog
-      (thread-start! *watchdog*)))
+;;  (if start-watchdog
+;;      (thread-start! *watchdog*))
+    #t
+;; stop the train watchdog
 ;; bracket open-output-file with code to make leading directory if it does not exist and handle exceptions
 (define (open-logfile logpath-in)
    (let* ((log-dir (or (pathname-directory logpath-in) "."))
@@ -696,18 +718,17 @@
       (save-environment-as-files (args:get-arg "-env2file"))
       (set! *didsomething* #t)))
 (if (args:get-arg "-list-disks")
     (let ((toppath (launch:setup)))
-      (print 
-       (string-intersperse 
-	(map (lambda (x)
-	       (string-intersperse 
-		x
-		" => "))
-	     (common:get-disks *configdat*))
-	"\n"))
+      (print (string-intersperse 
+	      (map (lambda (x)
+		     (string-intersperse 
+		      x
+		      " => "))
+		   (common:get-disks *configdat*))
+	      "\n"))
       (set! *didsomething* #t)))
 ;; csv processing record
 (define (make-refdb:csv)
@@ -931,13 +952,13 @@
 (if (or (args:get-arg "-list-servers")
         (args:get-arg "-kill-servers"))
     (let ((tl (launch:setup)))
       (if tl ;; all roads from here exit
 	  (let* ((servers (server:get-list *toppath*))
-		 (fmtstr  "~8a~22a~20a~20a~8a\n"))
-	    (format #t fmtstr "pid" "Interface:port" "age (hms)" "Last mod" "State")
-	    (format #t fmtstr "===" "==============" "=========" "========" "=====")
+		 (fmtstr  "~33a~22a~20a~20a~8a\n"))
+	    (format #t fmtstr "ID" "host:port" "age (hms)" "Last mod" "State")
+	    (format #t fmtstr "==" "=========" "=========" "========" "=====")
 	    (for-each ;;  ( mod-time host port start-time pid )
 	     (lambda (server)
 	       (let* ((mtm (any->number (car server)))
 		      (mod (if mtm (- (current-seconds) mtm) "unk"))
 		      (age (- (current-seconds)(or (any->number (list-ref server 3)) (current-seconds))))
@@ -1835,11 +1856,11 @@
        (lambda (target runname keys keyvals)
 	 (if (or (string-search "%" target)
 		 (string-search "%" runname)) ;; we are being asked to re-run multiple runs
 	     (let* ((run-specs (rmt:simple-get-runs runname #f #f target #f))) ;; list of simple-run records
 	       (debug:print-info 0 *default-log-port* "Pattern supplied for target or runname with "
-				 (length run-specs) " matches round. Running each in turn.")
+				 (length run-specs) " matches found. Running each in turn.")
 	       (if (null? run-specs)
 		   (debug:print 0 *default-log-port* "WARNING: No runs match target " target " and runname " runname))
 	       (for-each (lambda (spec) 
 			   (let* ((precmd     (if (args:get-arg "-precmd")(conc (args:get-arg "-precmd") " ") ""))
 				  (newcmdline (conc
@@ -2290,22 +2311,22 @@
 	    (debug:print 0 *default-log-port* "Failed to setup, exiting") 
 	    (exit 1)))
       ;; keep this one local
       ;; (open-run-close patch-db #f)
-      (let ((dbstruct (db:setup #f areapath: *toppath*)))
-        (common:cleanup-db dbstruct full: #t))
+      (let ((dbstructs (db:setup #f)))
+        (common:cleanup-db dbstructs full: #t))
       (set! *didsomething* #t)))
 (if (args:get-arg "-cleanup-db")
       (if (not (launch:setup))
 	    (debug:print 0 *default-log-port* "Failed to setup, exiting") 
 	    (exit 1)))
-      (let ((dbstruct (db:setup #f areapath: *toppath*)))
-        (common:cleanup-db dbstruct))
+      (let ((dbstructs (db:setup #f)))
+        (common:cleanup-db dbstructs))
       (set! *didsomething* #t)))
 (if (args:get-arg "-mark-incompletes")
       (if (not (launch:setup))
@@ -2357,14 +2378,14 @@
 (if (or (getenv "MT_RUNSCRIPT")
 	(args:get-arg "-repl")
 	(args:get-arg "-load"))
     (let* ((toppath (launch:setup))
-	   (dbstruct (if (and toppath
-                              (common:on-homehost?))
-                         (db:setup #t)
-                         #f))) ;; make-dbr:dbstruct path: toppath local: (args:get-arg "-local")) #f)))
+	   (dbstructs (if (and toppath
+                               (common:on-homehost?))
+                          (db:setup #t)
+                          #f))) ;; make-dbr:dbstruct path: toppath local: (args:get-arg "-local")) #f)))
       (if *toppath*
 	   ((getenv "MT_RUNSCRIPT")
 	    ;; How to run megatest scripts
@@ -2377,15 +2398,16 @@
 	    ;; EOF
-	      (set! *db* dbstruct)
+	      (set! *db* dbstructs)
 	      (import extras) ;; might not be needed
 	      ;; (import csi)
 	      (import readline)
 	      (import apropos)
+	      (import dbfile)
 	      ;; (import (prefix sqlite3 sqlite3:)) ;; doesn't work ...
 	      (if *use-new-readline*
 		    (install-history-file (get-environment-variable "HOME") ".megatest_history") ;;  [homedir] [filename] [nlines])
@@ -2447,27 +2469,29 @@
 ;; ;; ;; redo me        (list "uname" "rundir" "final_logf" "comment"))
 ;; ;; ;; redo me       (set! *didsomething* #t)))
 (if (args:get-arg "-import-megatest.db")
+      (launch:setup)
        (db:setup #f)
-       ;; 'new2old
       (set! *didsomething* #t)))
 (when (args:get-arg "-sync-brute-force")
+  (launch:setup)
   ((server:get-bruteforce-syncer (db:setup #t) persist-until-sync: #t))
   (set! *didsomething* #t))
 (if (args:get-arg "-sync-to-megatest.db")
-    (let* ((dbstruct (db:setup #f))
-	   (tmpdbpth (cdr (dbr:dbstruct-tmpdb dbstruct)))
+    (let* ((duh      (launch:setup))
+	   (dbstruct (db:setup #t))
+	   (tmpdbpth (dbr:dbstruct-tmppath dbstruct))
 	   (lockfile (conc tmpdbpth ".lock"))
 	   (locked   (common:simple-file-lock lockfile)) 
 	   (res      (if locked
@@ -2474,12 +2498,12 @@
       (if res
 	    (common:simple-file-release-lock lockfile)
-	    (print "Synced " res " records to megatest.db"))
-	  (print "Skipping sync, there is a sync in progress."))
+	    (debug:print 0 *default-log-port* "Synced " res " records to megatest.db"))
+	  (debug:print 0 *default-log-port* "Skipping sync, there is a sync in progress."))
       (set! *didsomething* #t)))
 (if (args:get-arg "-sync-to")
     (let ((toppath (launch:setup)))
       (tasks:sync-to-postgres *configdat* (args:get-arg "-sync-to"))
@@ -2533,14 +2557,14 @@
 ;;(debug:print-info 13 *default-log-port* "thread-join! watchdog")
 ;; join the watchdog thread if it has been thread-start!ed  (it may not have been started in the case of a server that never enters running state)
 ;;   (symbols returned by thread-state: created ready running blocked suspended sleeping terminated dead)
 ;; TODO: for multiple areas, we will have multiple watchdogs; and multiple threads to manage
-(if (thread? *watchdog*)
-    (case (thread-state *watchdog*)
-      ((ready running blocked sleeping terminated dead)
-       (thread-join! *watchdog*))))
+;;(if (thread? *watchdog*)
+;;    (case (thread-state *watchdog*)
+;;      ((ready running blocked sleeping terminated dead)
+;;       (thread-join! *watchdog*))))
 (set! *time-to-exit* #t)
 (if (not (eq? *globalexitstatus* 0))
     (if (or (args:get-arg "-run")(args:get-arg "-runtests")(args:get-arg "-runall"))

Index: mtargs/mtargs.scm
--- mtargs/mtargs.scm
+++ mtargs/mtargs.scm
@@ -56,10 +56,20 @@
   (if (string? help)
       (print help)
       (print "Usage: " (car (argv)) " ... "))
   (exit 0))
+ ;; one-of args defined
+(define (any-defined? . param)
+  (let ((res #f))
+    (for-each 
+     (lambda (arg)
+       (if (get-arg arg)(set! res #t)))
+     param)
+    res))
+;; args: 
 (define (get-args args params switches arg-hash num-needed)
   (let* ((numtargs (length args))
 	 (adj-num-needed (if num-needed (+ num-needed 2) #f)))
     (if (< numtargs (if adj-num-needed adj-num-needed 2))
 	(if (>= num-needed 1)

Index: newdashboard.scm
--- newdashboard.scm
+++ newdashboard.scm
@@ -416,11 +416,13 @@
 			    #:numcol 1
 			    #:numlin 4
 			    #:numcol-visible 1
 			    #:numlin-visible 4
 			    #:click-cb (lambda (obj lin col status)
-					 (print "obj: " obj " lin: " lin " col: " col " status: " status))))
+					 #f
+					 ;;(print "obj: " obj " lin: " lin " col: " col " status: " status)
+					 )))
 	 (test-info-matrix (iup:matrix
 		            #:expand "YES"
 		            #:numcol 1
 		            #:numlin 7
 		            #:numcol-visible 1
@@ -555,11 +557,12 @@
 		      (let* ((run-path (tree:node->path obj id))
 			     (test-id  (tree-path->test-id (cdr run-path))))
 			;; (if test-id
 			;;     (hash-table-set! (dboard:data-curr-test-ids *data*)
 			;; 		     window-id test-id))
-			(print "path: " (tree:node->path obj id) " test-id: " test-id))))))
+			;; (print "path: " (tree:node->path obj id) " test-id: " test-id)
+			)))))
      (iup:attribute-set! tb "VALUE" "0")
      (iup:attribute-set! tb "NAME" "Runs")
      ;;(iup:attribute-set! tb "ADDEXPANDED" "NO")
      ;; (dboard:data-tests-tree-set! *data* tb)
@@ -663,11 +666,13 @@
 			   #:numcol 100
 			   #:numlin 100
 			   #:numcol-visible 7
 			   #:numlin-visible 7
 			   #:click-cb (lambda (obj lin col status)
-					(print "obj: " obj " lin: " lin " col: " col " status: " status)))))
+					#f
+					;; (print "obj: " obj " lin: " lin " col: " col " status: " status)
+					))))
     (iup:attribute-set! runs-matrix "RESIZEMATRIX" "YES")
     (iup:attribute-set! runs-matrix "WIDTH0" "100")
     ;; (dboard:data-runs-matrix-set! *data* runs-matrix)

Index: process.scm
--- process.scm
+++ process.scm
@@ -85,11 +85,11 @@
 (define (process:cmd-run-proc-each-line cmd proc . params)
   ;; (print "Called with cmd=" cmd ", proc=" proc ", params=" params)
-     (print "ERROR:  Failed to run command: " cmd " " (string-intersperse params " "))
+     (debug:print 0 *default-log-port* "ERROR:  Failed to run command: " cmd " " (string-intersperse params " "))
      (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))
      (debug:print 5 *default-log-port* "exn=" (condition->list exn))
    (let-values (((fh fho pid) (if (null? params)
 				  (process cmd)
@@ -195,25 +195,33 @@
        (and (number? rpid)
 	    (equal? rpid pid)))))
 (define (process:alive-on-host? host pid)
   (let ((cmd (conc "ssh " host " ps -o pid= -p " pid)))
-    (handle-exceptions
-	exn
-      (begin
-	(debug:print 0 *default-log-port* "failed to identify if process " pid ", on host " host " is alive. exn=" exn)
-	#f) ;; anything goes wrong - assume the process in NOT running.
-     (with-input-from-pipe 
-      cmd
-      (lambda ()
-	(let loop ((inl (read-line)))
-	  (if (eof-object? inl)
-	      #f
-	      (let* ((clean-str (string-substitute "^[^\\d]*([0-9]+)[^\\d]*$" "\\1" inl))
-		     (innum     (string->number clean-str)))
-		(and innum
-		     (eq? pid innum))))))))))
+    (common:generic-ssh
+     cmd
+   ;; 
+   ;; handle-exceptions
+   ;; 	exn
+   ;;  (begin
+   ;; 	(debug:print 0 *default-log-port* "failed to identify if process " pid ", on host " host " is alive. exn=" exn)
+   ;; 	#f) ;; anything goes wrong - assume the process in NOT running.
+   ;;  (with-input-from-pipe 
+   ;;   cmd
+     (lambda ()
+       (let loop ((inl (read-line)))
+	 (if (eof-object? inl)
+	     #f
+	     (let* ((clean-str (string-substitute "^[^\\d]*([0-9]+)[^\\d]*$" "\\1" inl))
+		    (innum     (string->number clean-str)))
+	       (and innum
+		    (eq? pid innum))))))
+     #f
+     (lambda ()
+       (debug:print 0 *default-log-port* "failed to identify if process "
+		    pid", on host "host" is alive. exn="exn)))))
 (define (process:get-sub-pids pid)
    (conc "pstree -A -p " pid) ;; | tr 'a-z\\-+`()\\.' ' ' " pid)
    (lambda ()

DELETED records-vs-vectors-vs-coops.scm
Index: records-vs-vectors-vs-coops.scm
--- records-vs-vectors-vs-coops.scm
+++ /dev/null
@@ -1,110 +0,0 @@
-;;  Copyright 2006-2017, Matthew Welland.
-;; This file is part of Megatest.
-;;     Megatest is free software: you can redistribute it and/or modify
-;;     it under the terms of the GNU General Public License as published by
-;;     the Free Software Foundation, either version 3 of the License, or
-;;     (at your option) any later version.
-;;     Megatest is distributed in the hope that it will be useful,
-;;     but WITHOUT ANY WARRANTY; without even the implied warranty of
-;;     GNU General Public License for more details.
-;;     You should have received a copy of the GNU General Public License
-;;     along with Megatest.  If not, see <>.
-;; (include "vg.scm")
-;; (declare (uses vg))
-(use foof-loop defstruct coops)
-(defstruct obj     type fill-color angle)
-(define (make-vg:obj)(make-vector 3))
-(define-inline (vg:obj-get-type         vec)    (vector-ref  vec 0))
-(define-inline (vg:obj-get-fill-color   vec)    (vector-ref  vec 1))
-(define-inline (vg:obj-get-angle        vec)    (vector-ref  vec 2))
-(define-inline (vg:obj-set-type!        vec val)(vector-set! vec 0 val))
-(define-inline (vg:obj-set-fill-color!  vec val)(vector-set! vec 1 val))
-(define-inline (vg:obj-set-angle!       vec val)(vector-set! vec 2 val))
-(use simple-exceptions)
-(define vgs:obj-exn (make-exception "wrong record type, expected vgs:obj." 'assert))
-(define (make-vgs:obj)(let ((v (make-vector 4)))(vector-set! v 0 'vgs:obj) v))
-(define-inline (vgs:obj-type             vec)(if (eq? (vector-ref vec 0) 'vgs:obj)(vector-ref  vec 1)(raise (vgs:obj-exn 'vgs:obj-type 'xpr))))
-(define-inline (vgs:obj-fill-color       vec)(if (eq? (vector-ref vec 0) 'vgs:obj)(vector-ref  vec 2)(raise (vgs:obj-exn 'vgs:obj-fill-color 'xpr))))
-(define-inline (vgs:obj-angle            vec)(if (eq? (vector-ref vec 0) 'vgs:obj)(vector-ref  vec 3)(raise (vgs:obj-exn 'vgs:obj-angle 'xpr))))
-(define-inline (vgs:obj-type-set!        vec val)(if (eq? (vector-ref vec 0) 'vgs:obj)(vector-set! vec 1 val)(raise (vgs:obj-exn 'type))))
-(define-inline (vgs:obj-fill-color-set!  vec val)(if (eq? (vector-ref vec 0) 'vgs:obj)(vector-set! vec 2 val)(raise (vgs:obj-exn 'fill-color))))
-(define-inline (vgs:obj-angle-set!       vec val)(if (eq? (vector-ref vec 0) 'vgs:obj)(vector-set! vec 3 val)(raise (vgs:obj-exn 'angle))))
-(define-class <vgc> ()
-  ((type)
-   (fill-color)
-   (angle)))
-;; first use raw vectors
-(print "Using vectors")
- (loop ((for r (up-from 0 (to 255))))
-       (loop ((for g (up-from 0 (to 255))))
-	     (loop ((for b (up-from 0 (to 255))))
-		   (let ((obj (make-vg:obj)))
-		     (vg:obj-set-type! obj 'abc)
-		     (vg:obj-set-fill-color! obj "green")
-		     (vg:obj-set-angle! obj 135)
-		     (let ((a (vg:obj-get-type obj))
-			   (b (vg:obj-get-fill-color obj))
-			   (c (vg:obj-get-angle obj)))
-		       obj))))))
-;; first use raw vectors with safe mode
-(print "Using vectors (safe mode)")
- (loop ((for r (up-from 0 (to 255))))
-       (loop ((for g (up-from 0 (to 255))))
-	     (loop ((for b (up-from 0 (to 255))))
-		   (let ((obj (make-vgs:obj)))
-		     ;; (badobj (make-vector 20)))
-		     (vgs:obj-type-set! obj 'abc)
-		     (vgs:obj-fill-color-set! obj "green")
-		     (vgs:obj-angle-set! obj 135)
-		     (let ((a (vgs:obj-type obj))
-			   (b (vgs:obj-fill-color obj))
-			   (c (vgs:obj-angle obj)))
-		       obj))))))
-;; first use defstruct
-(print "Using defstruct")
- (loop ((for r (up-from 0 (to 255))))
-       (loop ((for g (up-from 0 (to 255))))
-	     (loop ((for b (up-from 0 (to 255))))
-		   (let ((obj (make-obj)))
-		     (obj-type-set! obj 'abc)
-		     (obj-fill-color-set! obj "green")
-		     (obj-angle-set! obj 135)
-		     (let ((a (obj-type obj))
-			   (b (obj-fill-color obj))
-			   (c (obj-angle obj)))
-		       obj))))))
-;; first use defstruct
-(print "Using coops")
- (loop ((for r (up-from 0 (to 255))))
-       (loop ((for g (up-from 0 (to 255))))
-	     (loop ((for b (up-from 0 (to 255))))
-		   (let ((obj (make <vgc>)))
-		     (set! (slot-value obj 'type) 'abc)
-		     (set! (slot-value obj 'fill-color) "green")
-		     (set! (slot-value obj 'angle) 135)
-		     (let ((a (slot-value obj 'type))
-			   (b (slot-value obj 'fill-color))
-			   (c (slot-value obj 'angle)))
-		       obj))))))

Index: rmt.scm
--- rmt.scm
+++ rmt.scm
@@ -21,14 +21,15 @@
 (use format typed-records) ;; RADT => purpose of json format??
 (declare (unit rmt))
 (declare (uses api))
 (declare (uses http-transport))
+(declare (uses dbfile))
 (include "common_records.scm")
 ;; (declare (uses rmtmod))
-;; (import rmtmod)
+(import dbfile) ;; rmtmod)
@@ -63,11 +64,11 @@
 (define (rmt:send-receive cmd rid params #!key (attemptnum 1)(area-dat #f)) ;; start attemptnum at 1 so the modulo below works as expected
   #;(common:telemetry-log (conc "rmt:"(->string cmd))
                         payload: `((rid . ,rid)
                                    (params . ,params)))
   (if (> attemptnum 2)
       (debug:print 0 *default-log-port* "INFO: attemptnum in rmt:send-receive is " attemptnum))
    ((> attemptnum 2) (thread-sleep! 0.05))
@@ -119,10 +120,17 @@
 	(thread-sleep! 0.1) ;; since we shouldn't get here, delay a little
 	(remote-hh-dat-set! runremote (common:get-homehost)))
     ;;(print "BB> readonly-mode is "readonly-mode" dbfile is "dbfile)
+     #;((> (- (current-seconds)(remote-connect-time runremote)) 180) ;; reconnect to server every 180 seconds
+      (debug:print 0 *default-log-port* "Forcing reconnect to server(s) due to 180 second timeout.")
+      (set! *runremote* #f)
+      ;; BUG: close-connections should go here?
+      (mutex-unlock! *rmt-mutex*)
+      (rmt:send-receive cmd rid params attemptnum: 1 area-dat: area-dat))
      ;;DOT EXIT;
      ;;DOT MUTEXLOCK -> EXIT [label="> 15 attempts"]; {rank=same "case 1" "EXIT" }
      ;; give up if more than 150 attempts
      ((> attemptnum 150)
       (debug:print 0 *default-log-port* "ERROR: 150 tries to start/connect to server. Giving up.")
@@ -157,12 +165,12 @@
            (remote-conndat runremote)
 	   (> (current-seconds) ;; if it has been more than server-timeout seconds since last contact, close this connection and start a new on
 	      (+ (http-transport:server-dat-get-last-access (remote-conndat runremote))
 		 (remote-server-timeout runremote))))
       (debug:print-info 0 *default-log-port* "Connection to " (remote-server-url runremote) " expired due to no accesses, forcing new connection.")
-      (http-transport:close-connections area-dat: runremote)
       (remote-conndat-set! runremote #f) ;; invalidate the connection, thus forcing a new connection.
+      (http-transport:close-connections area-dat: runremote)
       (mutex-unlock! *rmt-mutex*)
       (rmt:send-receive cmd rid params attemptnum: attemptnum))
      ;;DOT CASE5 [label="local\nread"];
      ;;DOT MUTEXLOCK -> CASE5 [label="server not required,\non homehost,\nread-only query"]; {rank=same "case 5" CASE5};
@@ -182,10 +190,12 @@
      ;; on homehost and this is a write, we already have a server, but server has died
      ((and (cdr (remote-hh-dat runremote))           ;; on homehost
            (not (member cmd api:read-only-queries))  ;; this is a write
            (remote-server-url runremote)             ;; have a server
            (not (server:ping (remote-server-url runremote) (remote-server-id runremote))))  ;; server has died. NOTE: this is not a cheap call! Need better approach.
+      (debug:print 0 *default-log-port* "WARNING: server appears to have died, trying to reconnect, case 6")
+      (http-transport:close-connections area-dat: runremote) ;; make sure to clean up
       (set! *runremote* (make-remote))
       (let* ((server-info (remote-server-info *runremote*))) 
             (if server-info
 		  (remote-server-url-set! *runremote* (server:record->url server-info))
@@ -258,10 +268,32 @@
      ;;DOT CASE11 -> "rmt:send-receive" [label="call failed"];
      ;;DOT CASE11 -> "RESULT" [label="call succeeded"];
      ;; not on homehost, do server query
      (else (extras-case-11 *default-log-port* runremote cmd params attemptnum rid)))))
     ;;DOT }
+;; No Title 
+;; Error: (vector-ref) out of range
+;; #(#<condition: (exn type)> (#("db.scm:3740: regex#regexp" #f #f) #("db.scm:3739: regex#string-substitute" #f #f) #("db.scm:3738: base64#base64-decode" #f #f) #("db.scm:3737: z3#z3:decode-buffer" #f #f) #("db.scm:3736: with-input-from-string" #f #f) #("db.scm:3741: s11n#deserialize" #f #f) #("api.scm:374: api:execute-requests" #f #f) #("api.scm:139: call-with-current-continuation" #f #f) #("api.scm:139: with-exception-handler" #f #f) #("api.scm:139: ##sys#call-with-values" #f #f) #("api.scm:158: string->symbol" #f #f) #("api.scm:160: current-milliseconds" #f #f) #("api.scm:161: dbr:dbstruct-read-only" #f #f) #("api.scm:139: k15" #f #f) #("api.scm:139: g19" #f #f) #("api.scm:142: get-call-chain" #f #f)) #("get-test-info-by-id" (1102 507299)))
+;; 6
+;; 	Call history:
+;; 	http-transport.scm:306: thread-terminate!	  
+;; 	http-transport.scm:307: debug:print-info	  
+;; 	common_records.scm:235: debug:debug-mode	  
+;; 	rmt.scm:259: k587	  
+;; 	rmt.scm:259: g591	  
+;; 	rmt.scm:276: http-transport:server-dat-update-last-access	  
+;; 	http-transport.scm:364: current-seconds	  
+;; 	rmt.scm:282: debug:print-info	  
+;; 	common_records.scm:235: debug:debug-mode	  
+;; 	rmt.scm:283: mutex-unlock!	  
+;; 	rmt.scm:287: extras-transport-succeded	  	<--
+;; +-----------------------------------------------------------------------------+
+;; | Exit Status    : 70  
 ;; bunch of small functions factored out of send-receive to make debug easier
 (define (extras-case-11 *default-log-port* runremote cmd params attemptnum rid)
@@ -283,33 +315,10 @@
 			      ((commfail)(vector #f "communications fail"))
 			      ((exn)(vector #f "other fail" (print-call-chain)))))
 		      (debug:print 0 *default-log-port* "ERROR: transport " (remote-transport runremote) " not supported")
-;; No Title 
-;; Error: (vector-ref) out of range
-;; #(#<condition: (exn type)> (#("db.scm:3740: regex#regexp" #f #f) #("db.scm:3739: regex#string-substitute" #f #f) #("db.scm:3738: base64#base64-decode" #f #f) #("db.scm:3737: z3#z3:decode-buffer" #f #f) #("db.scm:3736: with-input-from-string" #f #f) #("db.scm:3741: s11n#deserialize" #f #f) #("api.scm:374: api:execute-requests" #f #f) #("api.scm:139: call-with-current-continuation" #f #f) #("api.scm:139: with-exception-handler" #f #f) #("api.scm:139: ##sys#call-with-values" #f #f) #("api.scm:158: string->symbol" #f #f) #("api.scm:160: current-milliseconds" #f #f) #("api.scm:161: dbr:dbstruct-read-only" #f #f) #("api.scm:139: k15" #f #f) #("api.scm:139: g19" #f #f) #("api.scm:142: get-call-chain" #f #f)) #("get-test-info-by-id" (1102 507299)))
-;; 6
-;; 	Call history:
-;; 	http-transport.scm:306: thread-terminate!	  
-;; 	http-transport.scm:307: debug:print-info	  
-;; 	common_records.scm:235: debug:debug-mode	  
-;; 	rmt.scm:259: k587	  
-;; 	rmt.scm:259: g591	  
-;; 	rmt.scm:276: http-transport:server-dat-update-last-access	  
-;; 	http-transport.scm:364: current-seconds	  
-;; 	rmt.scm:282: debug:print-info	  
-;; 	common_records.scm:235: debug:debug-mode	  
-;; 	rmt.scm:283: mutex-unlock!	  
-;; 	rmt.scm:287: extras-transport-succeded	  	<--
-;; +-----------------------------------------------------------------------------+
-;; | Exit Status    : 70  
 	 (dat      (if (and (vector? dat-in) ;; ... check it is a correct size
 			    (> (vector-length dat-in) 1))
 		       (vector #f (conc "communications fail (type 2), dat-in=" dat-in))))
 	 (success  (if (vector? dat) (vector-ref dat 0) #f))
@@ -326,11 +335,11 @@
     (if success ;; success only tells us that the transport was
 	;; successful, have to examine the data to see if
 	;; there was a detected issue at the other end
 	(extras-transport-succeded *default-log-port* *rmt-mutex* attemptnum runremote res params rid cmd)
-           (debug:print-error 0 *default-log-port* " dat=" dat) 
+           (debug:print-error 2 *default-log-port* " dat=" dat) 
            (extras-transport-failed *default-log-port* *rmt-mutex* attemptnum runremote cmd rid params))
 (define (rmt:print-db-stats)
   (let ((fmtstr "~40a~7-d~9-d~20,2-f")) ;; "~20,2-f"
@@ -368,28 +377,28 @@
 			     (loop (car tal)(cdr tal) newmax-cmd currmax)))))))
     (mutex-unlock! *db-stats-mutex*)
 (define (rmt:open-qry-close-locally cmd run-id params #!key (remretries 5))
-  (let* ((qry-is-write   (not (member cmd api:read-only-queries)))
-	 (db-file-path   (db:dbfile-path)) ;;  0))
-	 (dbstruct-local (db:setup #t))  ;; make-dbr:dbstruct path:  dbdir local: #t)))
-	 (read-only      (not (file-write-access? db-file-path)))
-	 (start          (current-milliseconds))
-	 (resdat         (if (not (and read-only qry-is-write))
-			     (let ((v (api:execute-requests dbstruct-local (vector (symbol->string cmd) params))))
-			       (handle-exceptions ;; there has been a long history of receiving strange errors from values returned by the client when things go wrong..
-				exn               ;;  This is an attempt to detect that situation and recover gracefully
-				(begin
-				  (debug:print 0 *default-log-port* "ERROR: bad data from server " v " message: "  ((condition-property-accessor 'exn 'message) exn) ", exn=" exn)
-				  (vector #t '())) ;; should always get a vector but if something goes wrong return a dummy
-				(if (and (vector? v)
-					 (> (vector-length v) 1))
-				    (let ((newvec (vector (vector-ref v 0)(vector-ref v 1))))
-				      newvec)           ;; by copying the vector while inside the error handler we should force the detection of a corrupted record
-				    (vector #t '()))))  ;; we could also check that the returned types are valid
-			     (vector #t '())))
+  (let* ((qry-is-write    (not (member cmd api:read-only-queries)))
+	 (db-file-path    (db:dbfile-path)) ;;  0))
+	 (dbstructs-local (db:setup #t))  ;; make-dbr:dbstruct path:  dbdir local: #t)))
+	 (read-only       (not (file-write-access? db-file-path)))
+	 (start           (current-milliseconds))
+	 (resdat          (if (not (and read-only qry-is-write))
+			      (let ((v (api:execute-requests dbstructs-local (vector (symbol->string cmd) params))))
+			;;	(handle-exceptions ;; there has been a long history of receiving strange errors from values returned by the client when things go wrong..
+			;;	 exn               ;;  This is an attempt to detect that situation and recover gracefully
+			;;	 (begin
+			;;	   (debug:print 0 *default-log-port* "ERROR: bad data from server " v " message: "  ((condition-property-accessor 'exn 'message) exn) ", exn=" exn)
+			;;	   (vector #t '())) ;; should always get a vector but if something goes wrong return a dummy
+				 (if (and (vector? v)
+					  (> (vector-length v) 1))
+				     (let ((newvec (vector (vector-ref v 0)(vector-ref v 1))))
+				       newvec)           ;; by copying the vector while inside the error handler we should force the detection of a corrupted record
+				     (vector #t '()))) ;; )  ;; we could also check that the returned types are valid
+			      (vector #t '())))
 	 (success        (vector-ref resdat 0))
 	 (res            (vector-ref resdat 1))
 	 (duration       (- (current-milliseconds) start)))
     (if (and read-only qry-is-write)
         (debug:print 0 *default-log-port* "ERROR: attempt to write to read-only database ignored. cmd=" cmd))
@@ -412,16 +421,16 @@
                 (mutex-unlock! *db-multi-sync-mutex*)))))
 (define (rmt:send-receive-no-auto-client-setup connection-info cmd run-id params)
   (let* ((run-id   (if run-id run-id 0))
-	 (res  	   (handle-exceptions
-		       exn
-		     (begin
-		       (print "transport failed. exn=" exn)
-		       #f)
-		     (http-transport:client-api-send-receive run-id connection-info cmd params))))
+	 (res  	   ;; (handle-exceptions
+		   ;;     exn
+		   ;;   (begin
+		   ;;     (print "transport failed. exn=" exn)
+		   ;;     #f)
+		     (http-transport:client-api-send-receive run-id connection-info cmd params))) ;; )
     (if (and res (vector-ref res 0))
 	(vector-ref res 1) ;;; YES!! THIS IS CORRECT!! CHANGE IT HERE, THEN CHANGE rmt:send-receive ALSO!!!
@@ -443,18 +452,18 @@
 ;;  M I S C
 (define (rmt:login run-id)
-  (rmt:send-receive 'login run-id (list *toppath* megatest-version *my-client-signature*)))
+  (rmt:send-receive 'login run-id (list *toppath* megatest-version (client:get-signature))))
 ;; This login does no retries under the hood - it acts a bit like a ping.
 ;; Deprecated for nmsg-transport.
 (define (rmt:login-no-auto-client-setup connection-info)
   (case *transport-type* ;; run-id of 0 is just a placeholder
-    ((http)(rmt:send-receive-no-auto-client-setup connection-info 'login 0 (list *toppath* megatest-version *my-client-signature*)))
+    ((http)(rmt:send-receive-no-auto-client-setup connection-info 'login 0 (list *toppath* megatest-version (client:get-signature))))
     ;;((nmsg)(nmsg-transport:client-api-send-receive run-id connection-info 'login (list *toppath* megatest-version run-id *my-client-signature*)))
 ;; hand off a call to one of the db:queries statements
 ;; added run-id to make looking up the correct db possible 
@@ -525,10 +534,11 @@
 (define (rmt:get-targets)
   (rmt:send-receive 'get-targets #f '()))
 (define (rmt:get-target run-id)
+  (assert (number? run-id) "FATAL: Run id required.")
   (rmt:send-receive 'get-target run-id (list run-id)))
 (define (rmt:get-run-times runpatt targetpatt)
   (rmt:send-receive 'get-run-times #f (list runpatt targetpatt ))) 
@@ -537,13 +547,15 @@
 ;;  T E S T S
 ;; Just some syntatic sugar
 (define (rmt:register-test run-id test-name item-path)
+  (assert (number? run-id) "FATAL: Run id required.")
   (rmt:general-call 'register-test run-id run-id test-name item-path))
 (define (rmt:get-test-id run-id testname item-path)
+  (assert (number? run-id) "FATAL: Run id required.")
   (rmt:send-receive 'get-test-id run-id (list run-id testname item-path)))
 ;; run-id is NOT used
 (define (rmt:get-test-info-by-id run-id test-id)
@@ -556,39 +568,46 @@
 (define (rmt:test-get-rundir-from-test-id run-id test-id)
   (rmt:send-receive 'test-get-rundir-from-test-id run-id (list run-id test-id)))
 (define (rmt:open-test-db-by-test-id run-id test-id #!key (work-area #f))
+  (assert (number? run-id) "FATAL: Run id required.")
   (let* ((test-path (if (string? work-area)
 			(rmt:test-get-rundir-from-test-id run-id test-id))))
     (debug:print 3 *default-log-port* "TEST PATH: " test-path)
     (open-test-db test-path)))
 ;; WARNING: This currently bypasses the transaction wrapped writes system
 (define (rmt:test-set-state-status-by-id run-id test-id newstate newstatus newcomment)
+  (assert (number? run-id) "FATAL: Run id required.")
   (rmt:send-receive 'test-set-state-status-by-id run-id (list run-id test-id newstate newstatus newcomment)))
-(define (rmt:set-tests-state-status run-id                      testnames currstate currstatus newstate newstatus)
+(define (rmt:set-tests-state-status run-id testnames currstate currstatus newstate newstatus)
+  (assert (number? run-id) "FATAL: Run id required.")
   (rmt:send-receive 'set-tests-state-status run-id (list run-id testnames currstate currstatus newstate newstatus)))
 (define (rmt:get-tests-for-run run-id testpatt states statuses offset limit not-in sort-by sort-order qryvals last-update mode)
+  (assert (number? run-id) "FATAL: Run id required.")
   ;; (if (number? run-id)
   (rmt:send-receive 'get-tests-for-run run-id (list run-id testpatt states statuses offset limit not-in sort-by sort-order qryvals last-update mode)))
   ;;    (begin
   ;;	(debug:print-error 0 *default-log-port* "rmt:get-tests-for-run called with bad run-id=" run-id)
   ;;	(print-call-chain (current-error-port))
   ;;	'())))
 (define (rmt:get-tests-for-run-state-status run-id testpatt last-update)
+  (assert (number? run-id) "FATAL: Run id required.")
   (rmt:send-receive 'get-tests-for-run-state-status run-id (list run-id testpatt last-update)))
 ;; get stuff via synchash 
 (define (rmt:synchash-get run-id proc synckey keynum params)
+  (assert (number? run-id) "FATAL: Run id required.")
   (rmt:send-receive 'synchash-get run-id (list run-id proc synckey keynum params)))
 (define (rmt:get-tests-for-run-mindata run-id testpatt states status not-in)
+  (assert (number? run-id) "FATAL: Run id required.")
   (rmt:send-receive 'get-tests-for-run-mindata run-id (list run-id testpatt states status not-in)))
 ;; IDEA: Threadify these - they spend a lot of time waiting ...
 (define (rmt:get-tests-for-runs-mindata run-ids testpatt states status not-in)
@@ -631,40 +650,50 @@
 ;;     (apply append (map (lambda (run-id)
 ;; 			 (rmt:send-receive 'get-tests-for-run-mindata run-id (list run-ids testpatt states status not-in)))
 ;; 		       run-id-list))))
 (define (rmt:delete-test-records run-id test-id)
+  (assert (number? run-id) "FATAL: Run id required.")
   (rmt:send-receive 'delete-test-records run-id (list run-id test-id)))
 (define (rmt:test-set-state-status run-id test-id state status msg)
+  (assert (number? run-id) "FATAL: Run id required.")
   (rmt:send-receive 'test-set-state-status run-id (list run-id test-id state status msg)))
 (define (rmt:test-toplevel-num-items run-id test-name)
+  (assert (number? run-id) "FATAL: Run id required.")
   (rmt:send-receive 'test-toplevel-num-items run-id (list run-id test-name)))
 ;; (define (rmt:get-previous-test-run-record run-id test-name item-path)
 ;;   (rmt:send-receive 'get-previous-test-run-record run-id (list run-id test-name item-path)))
 (define (rmt:get-matching-previous-test-run-records run-id test-name item-path)
+  (assert (number? run-id) "FATAL: Run id required.")
   (rmt:send-receive 'get-matching-previous-test-run-records run-id (list run-id test-name item-path)))
 (define (rmt:test-get-logfile-info run-id test-name)
+  (assert (number? run-id) "FATAL: Run id required.")
   (rmt:send-receive 'test-get-logfile-info run-id (list run-id test-name)))
 (define (rmt:test-get-records-for-index-file run-id test-name)
+  (assert (number? run-id) "FATAL: Run id required.")
   (rmt:send-receive 'test-get-records-for-index-file run-id (list run-id test-name)))
 (define (rmt:get-testinfo-state-status run-id test-id)
+  (assert (number? run-id) "FATAL: Run id required.")
   (rmt:send-receive 'get-testinfo-state-status run-id (list run-id test-id)))
 (define (rmt:test-set-log! run-id test-id logf)
+  (assert (number? run-id) "FATAL: Run id required.")
   (if (string? logf)(rmt:general-call 'test-set-log run-id logf test-id)))
 (define (rmt:test-set-top-process-pid run-id test-id pid)
+  (assert (number? run-id) "FATAL: Run id required.")
   (rmt:send-receive 'test-set-top-process-pid run-id (list run-id test-id pid)))
 (define (rmt:test-get-top-process-pid run-id test-id)
+  (assert (number? run-id) "FATAL: Run id required.")
   (rmt:send-receive 'test-get-top-process-pid run-id (list run-id test-id)))
 (define (rmt:get-run-ids-matching-target keynames target res runname testpatt statepatt statuspatt)
   (rmt:send-receive 'get-run-ids-matching-target #f (list keynames target res runname testpatt statepatt statuspatt)))
@@ -676,57 +705,71 @@
 	   (map (lambda (run-id)
 		  (rmt:send-receive 'test-get-paths-matching-keynames-target-new run-id (list run-id keynames target res testpatt statepatt statuspatt runname)))
 (define (rmt:get-prereqs-not-met run-id waitons ref-test-name ref-item-path #!key (mode '(normal))(itemmaps #f))
+  (assert (number? run-id) "FATAL: Run id required.")
   (rmt:send-receive 'get-prereqs-not-met run-id (list run-id waitons ref-test-name ref-item-path mode itemmaps)))
 (define (rmt:get-count-tests-running-for-run-id run-id)
+  (assert (number? run-id) "FATAL: Run id required.")
   (rmt:send-receive 'get-count-tests-running-for-run-id run-id (list run-id)))
 (define (rmt:get-not-completed-cnt run-id)
+  (assert (number? run-id) "FATAL: Run id required.")
   (rmt:send-receive 'get-not-completed-cnt run-id (list run-id)))
 ;; Statistical queries
 (define (rmt:get-count-tests-running run-id)
+  (assert (number? run-id) "FATAL: Run id required.")
   (rmt:send-receive 'get-count-tests-running run-id (list run-id)))
 (define (rmt:get-count-tests-running-for-testname run-id testname)
+  (assert (number? run-id) "FATAL: Run id required.")
   (rmt:send-receive 'get-count-tests-running-for-testname run-id (list run-id testname)))
 (define (rmt:get-count-tests-running-in-jobgroup run-id jobgroup)
+  (assert (number? run-id) "FATAL: Run id required.")
   (rmt:send-receive 'get-count-tests-running-in-jobgroup run-id (list run-id jobgroup)))
 ;; state and status are extra hints not usually used in the calculation
 (define (rmt:set-state-status-and-roll-up-items run-id test-name item-path state status comment)
+  (assert (number? run-id) "FATAL: Run id required.")
   (rmt:send-receive 'set-state-status-and-roll-up-items run-id (list run-id test-name item-path state status comment)))
 (define (rmt:set-state-status-and-roll-up-run run-id state status)
+  (assert (number? run-id) "FATAL: Run id required.")
   (rmt:send-receive 'set-state-status-and-roll-up-run run-id (list run-id state status)))
 (define (rmt:update-pass-fail-counts run-id test-name)
+  (assert (number? run-id) "FATAL: Run id required.")
   (rmt:general-call 'update-pass-fail-counts run-id test-name test-name test-name))
 (define (rmt:top-test-set-per-pf-counts run-id test-name)
+  (assert (number? run-id) "FATAL: Run id required.")
   (rmt:send-receive 'top-test-set-per-pf-counts run-id (list run-id test-name)))
 (define (rmt:get-raw-run-stats run-id)
+  (assert (number? run-id) "FATAL: Run id required.")
   (rmt:send-receive 'get-raw-run-stats run-id (list run-id)))
 (define (rmt:get-test-times runname target)
   (rmt:send-receive 'get-test-times #f (list runname target ))) 
 ;;  R U N S
 (define (rmt:get-run-info run-id)
-  (rmt:send-receive 'get-run-info run-id (list run-id)))
+  (assert (number? run-id) "FATAL: Run id required.")
+  (rmt:send-receive 'get-run-info #f (list run-id)))
 (define (rmt:get-num-runs runpatt)
   (rmt:send-receive 'get-num-runs #f (list runpatt)))
 (define (rmt:get-runs-cnt-by-patt runpatt targetpatt keys)
@@ -735,14 +778,15 @@
 ;; Use the special run-id == #f scenario here since there is no run yet
 (define (rmt:register-run keyvals runname state status user contour)
   (rmt:send-receive 'register-run #f (list keyvals runname state status user contour)))
 (define (rmt:get-run-name-from-id run-id)
-  (rmt:send-receive 'get-run-name-from-id run-id (list run-id)))
+  (assert (number? run-id) "FATAL: Run id required.")
+  (rmt:send-receive 'get-run-name-from-id #f (list run-id)))
 (define (rmt:delete-run run-id)
-  (rmt:send-receive 'delete-run run-id (list run-id)))
+  (rmt:send-receive 'delete-run #f (list run-id)))
 (define (rmt:update-run-stats run-id stats)
   (rmt:send-receive 'update-run-stats #f (list run-id stats)))
 (define (rmt:delete-old-deleted-test-records)
@@ -756,43 +800,52 @@
 (define (rmt:get-all-run-ids)
   (rmt:send-receive 'get-all-run-ids #f '()))
 (define (rmt:get-prev-run-ids run-id)
+  (assert (number? run-id) "FATAL: Run id required.")
   (rmt:send-receive 'get-prev-run-ids #f (list run-id)))
 (define (rmt:lock/unlock-run run-id lock unlock user)
+  (assert (number? run-id) "FATAL: Run id required.")
   (rmt:send-receive 'lock/unlock-run #f (list run-id lock unlock user)))
 ;; set/get status
 (define (rmt:get-run-status run-id)
+  (assert (number? run-id) "FATAL: Run id required.")
   (rmt:send-receive 'get-run-status #f (list run-id)))
 (define (rmt:get-run-state run-id)
+  (assert (number? run-id) "FATAL: Run id required.")
   (rmt:send-receive 'get-run-state #f (list run-id)))
 (define (rmt:set-run-status run-id run-status #!key (msg #f))
+  (assert (number? run-id) "FATAL: Run id required.")
   (rmt:send-receive 'set-run-status #f (list run-id run-status msg)))
 (define (rmt:set-run-state-status run-id state status )
+  (assert (number? run-id) "FATAL: Run id required.")
   (rmt:send-receive 'set-run-state-status #f (list run-id state status)))
 (define (rmt:update-tesdata-on-repilcate-db old-lt new-lt)
 (rmt:send-receive 'update-tesdata-on-repilcate-db #f (list old-lt new-lt)))
 (define (rmt:update-run-event_time run-id)
+  (assert (number? run-id) "FATAL: Run id required.")
   (rmt:send-receive 'update-run-event_time #f (list run-id)))
 (define (rmt:get-runs-by-patt  keys runnamepatt targpatt offset limit fields last-runs-update  #!key  (sort-order "asc")) ;; fields of #f uses default
   (rmt:send-receive 'get-runs-by-patt #f (list keys runnamepatt targpatt offset limit fields last-runs-update sort-order)))
 (define (rmt:find-and-mark-incomplete run-id ovr-deadtime)
+  (assert (number? run-id) "FATAL: Run id required.")
   ;; (if (rmt:send-receive 'have-incompletes? run-id (list run-id ovr-deadtime))
   (rmt:send-receive 'mark-incomplete run-id (list run-id ovr-deadtime))) ;; )
 (define (rmt:get-main-run-stats run-id)
+  (assert (number? run-id) "FATAL: Run id required.")
   (rmt:send-receive 'get-main-run-stats #f (list run-id)))
 (define (rmt:get-var varname)
   (rmt:send-receive 'get-var #f (list varname)))
@@ -869,39 +922,46 @@
 ;;(define (rmt:get-steps-for-test run-id test-id)
 ;;  (rmt:send-receive 'get-steps-data run-id (list test-id)))
 (define (rmt:teststep-set-status! run-id test-id teststep-name state-in status-in comment logfile)
+  (assert (number? run-id) "FATAL: Run id required.")
   (let* ((state     (items:check-valid-items "state" state-in))
 	 (status    (items:check-valid-items "status" status-in)))
     (if (or (not state)(not status))
 	(debug:print 3 *default-log-port* "WARNING: Invalid " (if status "status" "state")
 		     " value \"" (if status state-in status-in) "\", update your validvalues section in megatest.config"))
     (rmt:send-receive 'teststep-set-status! run-id (list run-id test-id teststep-name state-in status-in comment logfile))))
 (define (rmt:delete-steps-for-test! run-id test-id)
+  (assert (number? run-id) "FATAL: Run id required.")
   (rmt:send-receive 'delete-steps-for-test! run-id (list run-id test-id)))
 (define (rmt:get-steps-for-test run-id test-id)
+  (assert (number? run-id) "FATAL: Run id required.")
   (rmt:send-receive 'get-steps-for-test run-id (list run-id test-id)))
-(define (rmt:get-steps-info-by-id test-step-id)
-  (rmt:send-receive 'get-steps-info-by-id #f (list test-step-id)))
+(define (rmt:get-steps-info-by-id run-id test-step-id)
+  (assert (number? run-id) "FATAL: Run id required.")
+  (rmt:send-receive 'get-steps-info-by-id #f (list run-id test-step-id)))
 ;;  T E S T   D A T A 
 (define (rmt:read-test-data run-id test-id categorypatt #!key (work-area #f)) 
+  (assert (number? run-id) "FATAL: Run id required.")
   (rmt:send-receive 'read-test-data run-id (list run-id test-id categorypatt)))
 (define (rmt:read-test-data-varpatt run-id test-id categorypatt varpatt #!key (work-area #f)) 
+  (assert (number? run-id) "FATAL: Run id required.")
   (rmt:send-receive 'read-test-data-varpatt run-id (list run-id test-id categorypatt varpatt)))
-(define (rmt:get-data-info-by-id test-data-id)
-   (rmt:send-receive 'get-data-info-by-id #f (list test-data-id)))
+(define (rmt:get-data-info-by-id run-id test-data-id)
+  (assert (number? run-id) "FATAL: Run id required.")
+   (rmt:send-receive 'get-data-info-by-id #f (list run-id test-data-id)))
 (define (rmt:testmeta-add-record testname)
   (rmt:send-receive 'testmeta-add-record #f (list testname)))
 (define (rmt:testmeta-get-record testname)
@@ -909,13 +969,15 @@
 (define (rmt:testmeta-update-field test-name fld val)
   (rmt:send-receive 'testmeta-update-field #f (list test-name fld val)))
 (define (rmt:test-data-rollup run-id test-id status)
+  (assert (number? run-id) "FATAL: Run id required.")
   (rmt:send-receive 'test-data-rollup run-id (list run-id test-id status)))
 (define (rmt:csv->test-data run-id test-id csvdata)
+  (assert (number? run-id) "FATAL: Run id required.")
   (rmt:send-receive 'csv->test-data run-id (list run-id test-id csvdata)))
 ;;  T A S K S
@@ -963,10 +1025,11 @@
 (define (rmt:archive-register-disk bdisk-name bdisk-path df)
   (rmt:send-receive 'archive-register-disk #f (list bdisk-name bdisk-path df)))
 (define (rmt:test-set-archive-block-id run-id test-id archive-block-id)
+  (assert (number? run-id) "FATAL: Run id required.")
   (rmt:send-receive 'test-set-archive-block-id run-id (list run-id test-id archive-block-id)))
 (define (rmt:test-get-archive-block-info archive-block-id)
   (rmt:send-receive 'test-get-archive-block-info #f (list archive-block-id)))
@@ -973,12 +1036,12 @@
 (define (rmtmod:calc-ro-mode runremote *toppath*)
   (if (and runremote
 	   (remote-ro-mode-checked runremote))
       (remote-ro-mode runremote)
-      (let* ((dbfile  (conc *toppath* "/megatest.db"))
-	     (ro-mode (not (file-write-access? dbfile)))) ;; TODO: use dbstruct or runremote to figure this out in future
+      (let* ((mtcfgfile  (conc *toppath* "/megatest.config"))
+	     (ro-mode (not (file-write-access? mtcfgfile)))) ;; TODO: use dbstruct or runremote to figure this out in future
 	(if runremote
 	      (remote-ro-mode-set! runremote ro-mode)
 	      (remote-ro-mode-checked-set! runremote #t)

DELETED runs-launch-loop-test.scm
Index: runs-launch-loop-test.scm
--- runs-launch-loop-test.scm
+++ /dev/null
@@ -1,76 +0,0 @@
-;;  Copyright 2006-2017, Matthew Welland.
-;; This file is part of Megatest.
-;;     Megatest is free software: you can redistribute it and/or modify
-;;     it under the terms of the GNU General Public License as published by
-;;     the Free Software Foundation, either version 3 of the License, or
-;;     (at your option) any later version.
-;;     Megatest is distributed in the hope that it will be useful,
-;;     but WITHOUT ANY WARRANTY; without even the implied warranty of
-;;     GNU General Public License for more details.
-;;     You should have received a copy of the GNU General Public License
-;;     along with Megatest.  If not, see <>.
-(use srfi-69)
-(define (runs:queue-next-hed tal reg n regful)
-  (if regful
-      (car reg)
-      (car tal)))
-(define (runs:queue-next-tal tal reg n regful)
-  (if regful
-      tal
-      (let ((newtal (cdr tal)))
-	(if (null? newtal)
-	    reg
-	    newtal
-	    ))))
-(define (runs:queue-next-reg tal reg n regful)
-  (if regful
-      (cdr reg)
-      (if (eq? (length tal) 1)
-	  '()
-	  reg)))
-(use trace)
-(trace runs:queue-next-hed
-       runs:queue-next-tal
-       runs:queue-next-reg)
-(define tests '(1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20))
-(define test-registry (make-hash-table))
-(define n 3)
-(let loop ((hed   (car tests))
-           (tal   (cdr tests))
-           (reg   '()))
-  (let* ((reglen (length reg))
-	 (regful (> reglen n)))
-    (print "hed=" hed ", length reg=" (length reg) ", (> lenreg n)=" (> (length reg) n))
-    (let ((newtal (append tal (list hed)))) ;; used if we are not done with this test
-      (cond
-       ((not (hash-table-ref/default test-registry hed #f))
-	(hash-table-set! test-registry hed #t)
-	(print "Registering #" hed)
-	(if (not (null? tal))
-          (loop (runs:queue-next-hed tal reg n regful)
-                (runs:queue-next-tal tal reg n regful)
-		(let ((newl (append reg (list hed))))
-		  (if regful
-		      (cdr newl)
-		      newl)))))
-       (else
-	(print "Running #" hed)
-	(if (not (null? tal))
-	    (loop (runs:queue-next-hed tal reg n regful)
-		  (runs:queue-next-tal tal reg n regful)
-		  (runs:queue-next-reg tal reg n regful))))))))

Index: runs.scm
--- runs.scm
+++ runs.scm
@@ -100,16 +100,15 @@
 		(thread-sleep! 2))
 		(if (runs:lownoise "runners-softlock-nowait" 360)
 		    (debug:print-info 0 *default-log-port* "No runners in flight, updating softlock"))
 		(let* ((ouf (open-output-file my-lock-file)))
-		  (with-output-to-port ouf
-		    (lambda ()(print (current-seconds))))
+		  (with-output-to-port ouf (lambda ()(print (current-seconds))))
 		  (close-output-port ouf))))
 	  (runs:dat-last-fuel-check-set! rdat (current-seconds))))))
-;; Fourth try, do accounting through time
+;; Fourth try, do accounting through time....
 (define (runs:parallel-runners-mgmt rdat)
   (let ((time-to-check (configf:lookup-number *configdat* "runners" "time-to-check" default: 10)) ;; 28
 	(time-to-wait  (configf:lookup-number *configdat* "runners" "time-to-wait" default: 30))
 	(now-time      (current-seconds)))
@@ -228,13 +227,12 @@
 		  (debug:print 0 *default-log-port* "FATAL: *configdat* was inaccessible! This should never happen. Retried " count
 			       " times. Message: " msg)
 		  (debug:print 0 *default-log-port* "Call chain:")
 		  (with-output-to-port *default-log-port*
-                    (lambda ()
-                      (print "*configdat* is >>"*configdat*"<<")
+		    (lambda ()
+		      (print "*configdat* is >>"*configdat*"<<")
                       (pp *configdat*)
                       (pp call-chain)))
 		  (exit 1))))
           ;;(bb-check-path msg: "runs:set-megatest-env-vars block 1.5")
@@ -312,10 +310,25 @@
 	  (hash-table-set! *runs:denoise* key currtime)
+(define *last-test-launch* 0)
+(define *too-soon-delays* (make-hash-table))
+;; to-soon delay, when matching event happened in less than dseconds delay wseconds
+(define (runs:too-soon-delay key dseconds wseconds)
+  (let* ((last-time (hash-table-ref/default *too-soon-delays* key #f)))
+    (if (and last-time
+	     (< (- (current-seconds) last-time) dseconds))
+	(begin
+	  (if (runs:lownoise (conc "too-soon-delay"key) 60)
+	      (debug:print-info 2 *default-log-port* "Polling throttle for "key))
+	  (thread-sleep! wseconds)))
+    (hash-table-set! *too-soon-delays* key (current-seconds))))
 (define (runs:can-run-more-tests runsdat run-id jobgroup max-concurrent-jobs)
   ;; Take advantage of a good place to exit if running the one-pass methodology
   (if (and (> (runs:dat-can-run-more-tests-count runsdat) 20)
 	   (args:get-arg "-one-pass"))
@@ -323,11 +336,11 @@
   (if (runs:dat-load-mgmt-function runsdat)((runs:dat-load-mgmt-function runsdat)))
   (let* ((num-running             (rmt:get-count-tests-running run-id))
 	 (num-running-in-jobgroup (rmt:get-count-tests-running-in-jobgroup run-id jobgroup))
-	 (job-group-limit         (let ((jobg-count (configf:lookup *configdat* "jobgroups" jobgroup)))
+	 (job-group-limit         (let ((jobg-count (configf:lookup-number *configdat* "jobgroups" jobgroup)))
 				    (if (string? jobg-count)
 					(string->number jobg-count)
     (if (> (+ num-running num-running-in-jobgroup) 0)
 	(runs:inc-can-run-more-tests-count runsdat)) ;; (set! *runs:can-run-more-tests-count* (+ *runs:can-run-more-tests-count* 1)))
@@ -508,12 +521,12 @@
   (let* ((keys               (keys:config-get-fields *configdat*))
 	 (keyvals            (keys:target->keyval keys target))
 	 (run-id             (rmt:register-run keyvals runname "new" "n/a" user (args:get-arg "-contour")))  ;;  test-name)))
 	 ;; (deferred          '()) ;; delay running these since they have a waiton clause
 	 (runconfigf         (conc  *toppath* "/runconfigs.config"))
-         (dbfile             (conc  *toppath* "/megatest.db"))
-         (readonly-mode      (not (file-write-access? dbfile)))
+         (mtconfig           (conc  *toppath* "/megatest.config"))
+         (readonly-mode      (not (file-write-access? mtconfig)))
 	 (test-records       (make-hash-table))
 	 ;; need to process runconfigs before generating these lists
 	 (all-tests-registry #f)  ;; (tests:get-all)) ;; (tests:get-valid-tests (make-hash-table) test-search-path)) ;; all valid tests to check waiton names
 	 (all-test-names     #f)  ;; (hash-table-keys all-tests-registry))
 	 (test-names         #f)  ;; Generated by a call to (tests:filter-test-names all-test-names test-patts))
@@ -526,11 +539,11 @@
 	 (allowed-tests      #f)
 	 (runconf            #f))
     ;; check if readonly
     (when readonly-mode
-      (debug:print-error 0 *default-log-port* "megatest.db is readonly.  Cannot proceed.")
+      (debug:print-error 0 *default-log-port* "Megatest database is readonly.  Cannot proceed.")
       (exit 1))
     ;; per user request. If less than 100Meg space on dbdir partition, bail out with error
     ;; this will reduce issues in database corruption
@@ -544,15 +557,15 @@
     ;; (if (tasks:need-server run-id)(tasks:start-and-wait-for-server tdbdat run-id 10))
     (let ((sighand (lambda (signum)
 		     ;; (signal-mask! signum) ;; to mask or not? seems to cause issues in exiting
 		     (set! *time-to-exit* #t)
-		     (print "Received signal " signum ", cleaning up before exit. Please wait...")
+		     (debug:print 0 *default-log-port* "Received signal " signum ", cleaning up before exit. Please wait...")
 		     (let ((th1 (make-thread (lambda ()
 					       ;; (let ((tdbdat (tasks:open-db)))
 						 (rmt:tasks-set-state-given-param-key task-key "killed") ;; )
-					       (print "Killed by signal " signum ". Exiting")
+					       (debug:print 0 *default-log-port* "Killed by signal " signum ". Exiting")
 					       (thread-sleep! 3)
 			   (th2 (make-thread (lambda ()
 					       (thread-sleep! 5)
 					       (debug:print 0 *default-log-port* "Done")
@@ -788,19 +801,19 @@
     (debug:print-info 4 *default-log-port* "test-records=" (hash-table->alist test-records))
     (let ((reglen (configf:lookup *configdat* "setup" "runqueue")))
       (if (> (length (hash-table-keys test-records)) 0)
 	  (let* ((keep-going        #t)
 		 (run-queue-retries 5)
-		 #;(th1        (make-thread (lambda ()
-					    (handle-exceptions
-						exn
-						(begin
-						  (print-call-chain)
-						  (print " message: " ((condition-property-accessor 'exn 'message) exn)))
-					      (runs:run-tests-queue run-id runname test-records keyvals flags test-patts required-tests
-								    (any->number reglen) all-tests-registry)))
-					  "runs:run-tests-queue"))
+		;; (th1        (make-thread (lambda ()
+		;; 			    (handle-exceptions
+		;; 				exn
+		;; 				(begin
+		;; 				  (print-call-chain)
+		;; 				  (print " message: " ((condition-property-accessor 'exn 'message) exn)))
+		;; 			      (runs:run-tests-queue run-id runname test-records keyvals flags test-patts required-tests
+		;; 						    (any->number reglen) all-tests-registry)))
+		;; 			  "runs:run-tests-queue"))
 		 (th2        (make-thread (lambda ()			 ;; BBQ: why are we visiting ALL runs here?	    
 					    ;; (rmt:find-and-mark-incomplete-all-runs))))) CAN'T INTERRUPT IT ...
 					    (let ((run-ids (rmt:get-all-run-ids)))
 					      (for-each (lambda (run-id)
 							  (if keep-going
@@ -1287,10 +1300,11 @@
       ;; we are going to reset all the counters for test retries by setting a new hash table
       ;; this means they will increment only when nothing can be run
       (set! *max-tries-hash* (make-hash-table))
       (run:test run-id run-info keyvals runname test-record flags #f test-registry all-tests-registry runsdat testdat)
+      (set! *last-test-launch* (current-seconds))
       (runs:incremental-print-results run-id)
       (hash-table-set! test-registry (db:test-make-full-name test-name item-path) 'running)
       (runs:shrink-can-run-more-tests-count runsdat)  ;; DELAY TWEAKER (still needed?)
       ;; (thread-sleep! *global-delta*)
       (if (or (not (null? tal))(not (null? reg)))
@@ -1502,10 +1516,12 @@
 (define *max-tries-hash* (make-hash-table))
 (define (runs:pretty-long-list lst)
    (if (> (length lst) 8)(append (take lst 3)(list "...")) lst))
+(define *last-loop-time-ms* 0)
 ;; runs:run-tests-queue is called by runs:run-tests
@@ -1640,12 +1656,29 @@
 			   testmode:    testmode
 			   newtal:      newtal
 			   itemmaps:    itemmaps
 			   ;; prereqs-not-met: prereqs-not-met
+	;; too-tight loop detection and delay, this might hide issues
+	;; that occur in long run times. Consider commenting when debugging
+	;; 
+	(if (and (>= num-running max-concurrent-jobs)
+		 (< (- (current-milliseconds) *last-loop-time-ms*) 500))
+	    (begin
+	      (if (runs:lownoise "too-tight-loop" 5)
+		  (debug:print-info 2 *default-log-port* "Excessively fast loop, delaying 1/2 second"))
+	      (thread-sleep! 0.5)))
+	(set! *last-loop-time-ms* (current-milliseconds))
 	(runs:dat-regfull-set! runsdat regfull)
+	(if (> (- (current-seconds) *last-test-launch*) 5)        ;; be pretty aggressive for five seconds after
+	    (runs:too-soon-delay (conc "loop delay " hed) 1 0.6)    ;; starting a test then apply more delay
+	    (runs:too-soon-delay (conc "loop delay " hed) 1 0.1)) 
 	(if (> num-running 0)
             (set! last-time-some-running (current-seconds)))
         (if (> (current-seconds)(+ last-time-some-running (or (configf:lookup *configdat* "setup" "give-up-waiting") 36000)))
             (hash-table-set! *max-tries-hash* tfullname (+ (hash-table-ref/default *max-tries-hash* tfullname 0) 1)))
@@ -1890,11 +1923,11 @@
     (rmt:set-var (conc "lunch-complete-" run-id) "yes")  
     ;; now *if* -run-wait we wait for all tests to be done
     ;; Now wait for any RUNNING tests to complete (if in run-wait mode)
     ;; (if (runs:dat-load-mgmt-function runsdat)((runs:dat-load-mgmt-function runsdat)))
-    (thread-sleep! 10) ;; I think there is a race condition here. Let states/statuses settle
+    (thread-sleep! 0.1) ;; I think there is a race condition here. Let states/statuses settle
     (let wait-loop ((num-running      (rmt:get-count-tests-running-for-run-id run-id))
 		    (prev-num-running 0))
       ;; (debug:print-info 13 *default-log-port* "num-running=" num-running ", prev-num-running=" prev-num-running)
       (if (and (or (args:get-arg "-run-wait")
@@ -2179,11 +2212,11 @@
 		       (if (runs:dat-wait-for-jobs-function runsdat)
 			   ((runs:dat-wait-for-jobs-function runsdat) testdat-rec))
 		       (if (not (launch-test test-id run-id run-info keyvals runname test-conf test-name test-path itemdat flags))
-			     (print "ERROR: Failed to launch the test. Exiting as soon as possible")
+			     (debug:print 0 *default-log-port* "ERROR: Failed to launch the test. Exiting as soon as possible")
 			     (set! *globalexitstatus* 1) ;; 
 			     (process-signal (current-process-id) signal/kill))
 		       ;; wait again here?
@@ -2347,15 +2380,15 @@
 	 (bup-mutex    (make-mutex))
          (keep-records (args:get-arg "-keep-records")) ;; used in conjunction with -remove-runs to keep the records, TODO: consolidate this with "mode".
 	 (test-records '())) ;; for tasks that we wish to operate on all tests in one fell swoop
     (let* ((write-access-actions '(remove-runs set-state-status archive run-wait kill-runs))
-           (dbfile             (conc  *toppath* "/megatest.db"))
+           (dbfile             (conc  *toppath* "/.megatest/main.db"))
            (readonly-mode      (not (file-write-access? dbfile))))
       (when (and readonly-mode
                  (member action write-access-actions))
-        (debug:print-error 0 *default-log-port* "megatest.db is readonly.  Cannot proceed with action ["action"] in which write-access isrequired .")
+        (debug:print-error 0 *default-log-port* dbfile " is readonly.  Cannot proceed with action ["action"] in which write-access isrequired .")
         (exit 1)))
     (debug:print-info 4 *default-log-port* "runs:operate-on => Header: " header " action: " action " new-state-status: " new-state-status)
     (if (> 2 (length state-status))
@@ -2702,13 +2735,13 @@
     ;; special case - archive get
     (if (equal? (args:get-arg "-archive") "get")
 	(archive:bup-get-data "get" #f #f test-records rp-mutex bup-mutex))
     (if (or (equal? (args:get-arg "-archive") "save") (equal? (args:get-arg "-archive") "save-remove"))
-             (print "db archive started")  
+             (debug:print 0 *default-log-port* "db archive started")  
              (archive:megatest-db target runnamepatt)
-             (print "db archived")))
+             (debug:print 0 *default-log-port* "db archived")))
 (define (runs:remove-test-directory test mode) ;; remove-data-only)
@@ -2873,11 +2906,11 @@
 	      (fld (car  key))
 	      (val (configf:lookup test-conf "test_meta" fld)))
 	 ;; (debug:print 5 *default-log-port* "idx: " idx " fld: " fld " val: " val)
 	 (if (and val (not (equal? (vector-ref currrecord idx) val)))
-	       (print "Updating " test-name " " fld " to " val)
+	       (debug:print 0 *default-log-port* "Updating " test-name " " fld " to " val)
 	       (rmt:testmeta-update-field test-name fld val)))))
      '(("author" 2)("owner" 3)("description" 4)("reviewed" 5)("tags" 9)("jobgroup" 10)))))
 ;; find tests with matching tags, tagpatt is a string "tagpatt1,tagpatt2%, ..."

DELETED sdb.scm
Index: sdb.scm
--- sdb.scm
+++ /dev/null
@@ -1,116 +0,0 @@
-;; Copyright 2006-2013, Matthew Welland.
-;; This file is part of Megatest.
-;;     Megatest is free software: you can redistribute it and/or modify
-;;     it under the terms of the GNU General Public License as published by
-;;     the Free Software Foundation, either version 3 of the License, or
-;;     (at your option) any later version.
-;;     Megatest is distributed in the hope that it will be useful,
-;;     but WITHOUT ANY WARRANTY; without even the implied warranty of
-;;     GNU General Public License for more details.
-;;     You should have received a copy of the GNU General Public License
-;;     along with Megatest.  If not, see <>.
-;; Simple persistant strings lookup table. Keep out of the main db
-;; so writes/reads don't slow down central access.
-(require-extension (srfi 18) extras)
-(use sqlite3 srfi-1 posix regex regex-case srfi-69 csv-xml s11n md5 message-digest base64)
-(import (prefix sqlite3 sqlite3:))
-(import (prefix base64 base64:))
-(declare (unit sdb))
-(define (sdb:open fname)
-  (let* ((dbpath    (pathname-directory fname))
-	 (dbexists  (let ((fe (common:file-exists? fname)))
-		      (if fe 
-			  fe
-			  (begin
-			    (create-directory dbpath #t)
-			    #f))))
-	 (sdb        (sqlite3:open-database fname))
-	 (handler   (make-busy-timeout 136000)))
-    (sqlite3:set-busy-handler! sdb handler)
-    (if (not dbexists)
-	(sdb:initialize sdb))
-    (sqlite3:execute sdb "PRAGMA synchronous = 1;")
-    sdb))
-(define (sdb:initialize sdb)
-  (sqlite3:execute sdb "CREATE TABLE IF NOT EXISTS strs
-                           (id  INTEGER PRIMARY KEY,
-                            str TEXT,
-                        CONSTRAINT str UNIQUE (str));")
-  (sqlite3:execute sdb "CREATE INDEX IF NOT EXISTS strindx ON strs (str);"))
-;; (define sumup (let ((a 0))(lambda (x)(set! a (+ x a)) a)))
-(define (sdb:register-string sdb str)
-  (sqlite3:execute sdb "INSERT OR IGNORE INTO strs (str) VALUES (?);" str))
-(define (sdb:string->id sdb str-cache str)
-  (let ((id (hash-table-ref/default str-cache str #f)))
-    (if (not id)
-	(sqlite3:for-each-row
-	 (lambda (sid)
-	   (set! id sid)
-	   (hash-table-set! str-cache str id))
-	 sdb
-	 "SELECT id FROM strs WHERE str=?;" str))
-    id))
-(define (sdb:id->string sdb id-cache id)
-  (let ((str (hash-table-ref/default id-cache id #f)))
-    (if (not str)
-	(sqlite3:for-each-row
-	 (lambda (istr)
-	   (set! str istr)
-	   (hash-table-set! id-cache id str))
-	 sdb
-	 "SELECT str FROM strs WHERE id=?;" id))
-    str))
-;; Numbers get passed though in both directions
-(define (make-sdb:qry fname)
-  (let ((sdb    #f)
-	(scache (make-hash-table))
-	(icache (make-hash-table)))
-    (lambda (cmd var)
-      (case cmd
-	((setup)   (set! sdb (if (not sdb)
-				 (sdb:open (if var var fname)))))
-	((setdb)    (set! sdb var))
-	((getdb)    sdb)
-	((finalize) (if sdb
-			(begin
-			  (sqlite3:finalize! sdb)
-			  (set! sdb #f))))
-	((getid)     (let ((id (if (or (number? var)
-				       (string->number var))
-				   var
-				   (sdb:string->id sdb scache var))))
-		       (if id
-			   id
-			   (begin
-			     (sdb:register-string sdb var)
-			     (sdb:string->id sdb scache var)))))
-	((getstr)    (if (or (number? var)
-			     (string->number var))
-			 (sdb:id->string sdb icache var)
-			 var))
-	((passid)    var)
-	((passstr)   var)
-	(else #f)))))

Index: server.scm
--- server.scm
+++ server.scm
@@ -23,18 +23,22 @@
 (use spiffy uri-common intarweb http-client spiffy-request-vars)
 (declare (unit server))
+(declare (uses commonmod))
 (declare (uses common))
 (declare (uses db))
 (declare (uses tasks)) ;; tasks are where stuff is maintained about what is running.
 ;; (declare (uses synchash))
 (declare (uses http-transport))
 ;;(declare (uses rpc-transport))
 (declare (uses launch))
 ;; (declare (uses daemon))
+(import commonmod)
 (include "common_records.scm")
 (include "db_records.scm")
 (define (server:make-server-url hostport)
@@ -229,15 +233,28 @@
 		   (exn (i/o file)(debug:print 0 *default-log-port* "ERROR: Cannot create directory at " (conc areapath "/logs")))
 		   (exn ()(debug:print 0 *default-log-port* "ERROR: Unknown error attemtping to get server list. exn=" exn)))
 		  (directory-exists? (conc areapath "/logs")))
-        ;; Get the list of server logs. First remove logs for servers that have exited.
+        ;; Get the list of server logs.
 	(let* (
                ;; For some reason, when I uncomment the below line, ext-tests sometimes starts 1000's of servers.
                ;; (exiting-servers (system (conc "bash -c 'rm -f `grep -il exiting " areapath "/logs/server-*-*.log 2> /dev/null`'")))
-               (server-logs   (glob (conc areapath "/logs/server-*-*.log")))
+               (server-logs   
+                (handle-exceptions
+		   exn
+		   (begin
+		     (debug:print 0 *default-log-port* "server:get-list: glob failed , exn=" exn)
+                     (thread-sleep! 60)
+                     (system "lsof -c mtest > /tmp/$USER/glob-failed.$$.lsof")
+                     (debug:print 0 *default-log-port* "lsof output saved in /tmp/$USER/glob-failed.$$.lsof")
+                     (thread-sleep! 60)
+                     (glob (conc areapath "/logs/server-*-*.log"))
+		   )
+                   (glob (conc areapath "/logs/server-*-*.log"))
+                )
+               )
 	       (num-serv-logs (length server-logs)))
 	  (if (or (null? server-logs) (= num-serv-logs 0))
               (let ()
                  (debug:print 2  *default-log-port* "There are no servers running at " (common:human-time))
@@ -246,11 +263,11 @@
 			 (tal  (cdr server-logs))
 			 (res '()))
 		(let* ((mod-time  (handle-exceptions
-				     (debug:print 0 *default-log-port* "failed to get modification time on " hed ", exn=" exn)
+				     (debug:print 0 *default-log-port* "server:get-list: failed to get modification time on " hed ", exn=" exn)
 				     (current-seconds)) ;; 0
 				   (file-modification-time hed))) ;; default to *very* old so log gets ignored if deleted
 		       (down-time (- (current-seconds) mod-time))
 		       (serv-dat  (if (or (< num-serv-logs 10)
 				  	  (< down-time 900)) ;; day-seconds))
@@ -382,32 +399,28 @@
 	 (server-key (conc (get-host-name) "-" (current-process-id))))
     (if (file-exists? start-flag)
 	(let* ((fmodtime (file-modification-time start-flag))
 	       (delta    (- (current-seconds) fmodtime))
 	       (old-enough   (> delta idletime))
-               (new-server-key "")
-              )
+               (new-server-key ""))
           ;; write start-flag file, wait 0.25s, then if previously the start-flag file was older than <idletime> seconds, and the new file still has the same server key as you just wrote, return #t.
 	  ;; the intention is to make sure nfs can read the file we just wrote, and make sure it was written by us, and not another process.
            (if (and old-enough
-		   (begin
-                     (debug:print-info 2 *default-log-port* "Writing " start-flag)
-		     (with-output-to-file start-flag (lambda () (print server-key)))
-		     (thread-sleep! 0.25)
-		     (set! new-server-key (with-input-from-file start-flag (lambda () (read-line))))
-		     (equal? server-key new-server-key))
-                )
-	      #t
-           ;; If either of the above conditions is not true, print a "Gating server start" message, wait <idle-time> + 1, then call this function recursively. 
-	      (begin
-		(debug:print-info 0 *default-log-port* "Gating server start, last start: "
-				  (seconds->time-string fmodtime) ", time since last start: " delta ", required idletime: " idletime ", gating reason:" (if old-enough "another job started a server" "too soon to start another server"))
-		(thread-sleep! ( + 1 idletime))
-		(server:wait-for-server-start-last-flag areapath)))))))
+		    (begin
+                      (debug:print-info 2 *default-log-port* "Writing " start-flag)
+		      (with-output-to-file start-flag (lambda () (print server-key)))
+		      (thread-sleep! 0.25)
+		      (set! new-server-key (with-input-from-file start-flag (lambda () (read-line))))
+		      (equal? server-key new-server-key)))
+	       #t
+               ;; If either of the above conditions is not true, print a "Gating server start" message, wait <idle-time> + 1, then call this function recursively. 
+	       (begin
+		 (debug:print-info 0 *default-log-port* "Gating server start, last start: "
+				   (seconds->time-string fmodtime) ", time since last start: " delta ", required idletime: " idletime ", gating reason:" (if old-enough "another job started a server" "too soon to start another server"))
+		 (thread-sleep! ( + 1 idletime))
+		 (server:wait-for-server-start-last-flag areapath)))))))
 ;; kind start up of server, wait before allowing another server for a given
 ;; area to be launched
@@ -415,23 +428,19 @@
 (define (server:kind-run areapath)
   ;; look for $MT_RUN_AREA_HOME/logs/server-start-last
   ;; and wait for it to be at least <server idletime> seconds old
   (server:wait-for-server-start-last-flag areapath)
   (if (not (server:check-if-running areapath)) ;; why try if there is already a server running?
-      (let* (
-	     (lock-file    (conc areapath "/logs/server-start.lock")))
+      (let* ((lock-file    (conc areapath "/logs/server-start.lock")))
 	(let* ((start-flag (conc areapath "/logs/server-start-last")))
 	  (common:simple-file-lock-and-wait lock-file expire-time: 25)
 	  (debug:print-info  2 *default-log-port* "server:kind-run: touching " start-flag)
 	  (system (conc "touch " start-flag)) ;; lazy but safe
 	  (server:run areapath)
 	  (thread-sleep! 20) ;; don't release the lock for at least a few seconds. And allow time for the server startup to get to "SERVER STARTED".
 	  (common:simple-file-release-lock lock-file)))
-      (debug:print-info 0 *default-log-port* "Found server already running. NOT trying to start another.")
-   )
+      (debug:print-info 0 *default-log-port* "Found server already running. NOT trying to start another.")))
 ;; this one seems to be the general entry point
 (define (server:start-and-wait areapath #!key (timeout 60))
   (let ((give-up-time (+ (current-seconds) timeout)))
@@ -565,17 +574,19 @@
     (if (equal? *toppath* toppath)
 ;; timeout is hms string: 1h 5m 3s, default is 1 minute
+;; This is currently broken. Just use the number of hours with no unit.
+;; Default is 60 seconds.
 (define (server:expiration-timeout)
   (let ((tmo (configf:lookup *configdat* "server" "timeout")))
     (if (and (string? tmo)
 	     (common:hms-string->seconds tmo)) ;; BUG: hms-string->seconds is broken, if given "10" returns 0. Also, it doesn't belong in this logic unless the string->number is changed below
         (* 3600 (string->number tmo))
-	60)))
+	1200)))
 (define (server:get-best-guess-address hostname)
   (let ((res #f))
      (lambda (adr)
@@ -606,11 +617,14 @@
 ;; moving this here as it needs access to db and cannot be in common.
 (define (server:get-bruteforce-syncer dbstruct #!key (fork-to-background #f) (persist-until-sync #f))
-  (let* ((sqlite-exe   (or (get-environment-variable "MT_SQLITE3_EXE"))) ;; defined in
+  (debug:print "WARNING: bruteforce-syncer is called but has been disabled!")
+  (lambda ()
+    (debug:print "WARNING: bruteforce-syncer is called but has been disabled!"))
+  #;(let* ((sqlite-exe   (or (get-environment-variable "MT_SQLITE3_EXE"))) ;; defined in
          (sync-log     (or (args:get-arg "-sync-log") (conc *toppath* "/logs/sync-" (current-process-id) "-" (get-host-name) ".log")))
 	 (tmp-area     (common:get-db-tmp-area))
 	 (tmp-db       (conc tmp-area "/megatest.db"))
 	 (staging-file (conc *toppath* "/.megatest.db"))
 	 (mtdbfile     (conc *toppath* "/megatest.db"))
@@ -703,155 +717,5 @@
             ) ;; end lambda
-(define (server:writable-watchdog-bruteforce dbstruct)
-  (thread-sleep! 1) ;; delay for startup
-  (let* ((do-a-sync  (server:get-bruteforce-syncer dbstruct))
-         (final-sync (server:get-bruteforce-syncer dbstruct fork-to-background: #t persist-until-sync: #t)))
-    (when (and (not (args:get-arg "-sync-to-megatest.db")) ;; conditions under which we do not run the sync
-	       (args:get-arg "-server"))
-      (let loop ()
-	(do-a-sync)
-        (if (not *time-to-exit*) (loop))) ;; keep going unless time to exit
-      ;; time to exit, close the no-sync db here
-      (final-sync)
-      (if (common:low-noise-print 30)
-	  (debug:print-info 0 *default-log-port* "Exiting watchdog timer, *time-to-exit* = " *time-to-exit*" pid="(current-process-id)
-			    )))))
-(define (server:writable-watchdog-deltasync dbstruct)
-  (thread-sleep! 0.05) ;; delay for startup
-  (let ((legacy-sync  (common:run-sync?))
-        (sync-stale-seconds (configf:lookup-number *configdat* "server" "sync-stale-seconds" default: 300))
-	(debug-mode   (debug:debug-mode 1))
-	(last-time    (current-seconds))
-	(no-sync-db   (db:open-no-sync-db))
-	(stmt-cache   (dbr:dbstruct-stmt-cache dbstruct))
-        (sync-duration 0) ;; run time of the sync in milliseconds
-        )
-    (set! *no-sync-db* no-sync-db) ;; make the no sync db available to api calls
-    (debug:print-info 2 *default-log-port* "Periodic sync thread started.")
-    (debug:print-info 3 *default-log-port* "watchdog starting. legacy-sync is " legacy-sync" pid="(current-process-id)  );;  " this-wd-num="this-wd-num)
-    (if (and legacy-sync (not *time-to-exit*))
-	(let* (;;(dbstruct (db:setup))
-	       (mtdb       (dbr:dbstruct-mtdb dbstruct))
-	       (mtpath     (db:dbdat-get-path mtdb))
-	       (tmp-area   (common:get-db-tmp-area))
-	       (start-file (conc tmp-area "/.start-sync"))
-	       (end-file   (conc tmp-area "/.end-sync")))
-	  (debug:print-info 0 *default-log-port* "Server running, periodic sync started.")
-	  (let loop ()
-	    ;; sync for filesystem local db writes
-	    ;;
-	    (mutex-lock! *db-multi-sync-mutex*)
-	    (let* ((need-sync        (>= *db-last-access* *db-last-sync*)) ;; no sync since last write
-		   (sync-in-progress *db-sync-in-progress*)
-                   (min-intersync-delay (configf:lookup-number *configdat* "server" "minimum-intersync-delay" default: 5))
-		   (should-sync      (and (not *time-to-exit*)
-                                          (> (- (current-seconds) *db-last-sync*) min-intersync-delay))) ;; sync every five seconds minimum, deprecated logic, can probably be removed
-		   (start-time       (current-seconds))
-                   (cpu-load-adj     (alist-ref 'adj-proc-load (common:get-normalized-cpu-load #f)))
-		   (mt-mod-time      (file-modification-time mtpath))
-		   (last-sync-start  (if (common:file-exists? start-file)
-					 (file-modification-time start-file)
-					 0))
-		   (last-sync-end    (if (common:file-exists? end-file)
-					 (file-modification-time end-file)
-					 10))
-                   (sync-period      (+ 3 (* cpu-load-adj 30))) ;; as adjusted load increases increase the sync period
-		   (recently-synced  (and (< (- start-time mt-mod-time) sync-period) ;; not useful if sync didn't modify megatest.db!
-					  (< mt-mod-time last-sync-start)))
-		   (sync-done        (<= last-sync-start last-sync-end))
-                   (sync-stale       (> start-time (+ last-sync-start sync-stale-seconds)))
-		   (will-sync        (and (not *time-to-exit*)       ;; do not start a sync if we are in the process of exiting
-                                          (or need-sync should-sync)
-					  (or sync-done sync-stale)
-					  (not sync-in-progress)
-					  (not recently-synced))))
-              (debug:print-info 13 *default-log-port* "WD writable-watchdog top of loop.  need-sync="need-sync" sync-in-progress=" sync-in-progress
-				" should-sync="should-sync" start-time="start-time" mt-mod-time="mt-mod-time" recently-synced="recently-synced" will-sync="will-sync
-				" sync-done=" sync-done " sync-period=" sync-period)
-              (if (and (> sync-period 5)
-                       (common:low-noise-print 30 "sync-period"))
-                  (debug:print-info 0 *default-log-port* "Increased sync period due to long sync times, sync took: " sync-period " seconds."))
-	      ;; (if recently-synced (debug:print-info 0 *default-log-port* "Skipping sync due to recently-synced flag=" recently-synced))
-	      ;; (debug:print-info 0 *default-log-port* "need-sync: " need-sync " sync-in-progress: " sync-in-progress " should-sync: " should-sync " will-sync: " will-sync)
-	      (if will-sync (set! *db-sync-in-progress* #t))
-	      (mutex-unlock! *db-multi-sync-mutex*)
-	      (if will-sync
-                  (let (;; (max-sync-duration  (configf:lookup-number *configdat* "server" "max-sync-duration")) ;; KEEPING THIS AVAILABLE BUT SHOULD NOT USE, I'M PRETTY SURE IT DOES NOT WORK!
-                        (sync-start         (current-milliseconds)))
-		    (with-output-to-file start-file (lambda ()(print (current-process-id))))
-		    ;; put lock here
-                    ;; (if (or (not max-sync-duration)
-                    ;;        (< sync-duration max-sync-duration)) ;; NOTE: db:sync-to-megatest.db keeps track of time of last sync and syncs incrementally
-                        (let ((res        (db:sync-to-megatest.db dbstruct no-sync-db: no-sync-db))) ;; did we sync any data? If so need to set the db touched flag to keep the server alive
-                          (set! sync-duration (- (current-milliseconds) sync-start))
-                          (if (> res 0) ;; some records were transferred, keep the db alive
-                              (begin
-                                (mutex-lock! *heartbeat-mutex*)
-                                (set! *db-last-access* (current-seconds))
-                                (mutex-unlock! *heartbeat-mutex*)
-                                (debug:print-info 0 *default-log-port* "sync called, " res " records transferred."))
-                              (debug:print-info 2 *default-log-port* "sync called but zero records transferred")))))
-;;                         ;; TODO: factor this next routine out into a function
-;;                         (with-input-from-pipe ;; this should not block other threads but need to verify this
-;;                          (conc "megatest -sync-to-megatest.db -m testsuite:" (common:get-area-name) ":" *toppath*)
-;;                          (lambda ()
-;;                            (let loop ((inl (read-line))
-;;                                       (res #f))
-;;                              (if (eof-object? inl)
-;;                                  (begin
-;;                                    (set! sync-duration (- (current-milliseconds) sync-start))
-;;                                    (cond
-;;                                     ((not res)
-;;                                      (debug:print 0 *default-log-port* "ERROR: sync from /tmp db to megatest.db appears to have failed. Recommended that you stop your runs and run \"megatest -cleanup-db\""))
-;;                                     ((> res 0)
-;;                                      (mutex-lock! *heartbeat-mutex*)
-;;                                      (set! *db-last-access* (current-seconds))
-;;                                      (mutex-unlock! *heartbeat-mutex*))))
-;;                                  (let ((num-synced (let ((matches (string-match "^Synced (\\d+).*$" inl)))
-;;                                                      (if matches
-;;                                                          (string->number (cadr matches))
-;;                                                          #f))))
-;;                                    (loop (read-line)
-;;                                          (or num-synced res))))))))))
-	      (if will-sync
-		  (begin
-		    (mutex-lock! *db-multi-sync-mutex*)
-		    (set! *db-sync-in-progress* #f)
-		    (set! *db-last-sync* start-time)
-		    (with-output-to-file end-file (lambda ()(print (current-process-id))))
-		    ;; release lock here
-		    (mutex-unlock! *db-multi-sync-mutex*)))
-	      (if (and debug-mode
-		       (> (- start-time last-time) 60))
-		  (begin
-		    (set! last-time start-time)
-		    (debug:print-info 4 *default-log-port* "timestamp -> " (seconds->time-string (current-seconds)) ", time since start -> " (seconds->hr-min-sec (- (current-seconds) *time-zero*))))))
-	    ;; keep going unless time to exit
-	    ;;
-	    (if (not *time-to-exit*)
-		(let delay-loop ((count 0))
-                  ;;(debug:print-info 13 *default-log-port* "delay-loop top; count="count" pid="(current-process-id)" this-wd-num="this-wd-num" *time-to-exit*="*time-to-exit*)
-		  (if (and (not *time-to-exit*)
-			   (< count 6)) ;; was 11, changing to 4. 
-		      (begin
-			(thread-sleep! 1)
-			(delay-loop (+ count 1))))
-		  (if (not *time-to-exit*) (loop))))
-	    ;; time to exit, close the no-sync db here
-	    (db:no-sync-close-db no-sync-db stmt-cache)
-	    (if (common:low-noise-print 30)
-		(debug:print-info 0 *default-log-port* "Exiting watchdog timer, *time-to-exit* = " *time-to-exit*" pid="(current-process-id) ))))))) ;;" this-wd-num="this-wd-num)))))))

Index: spublish.scm
--- spublish.scm
+++ spublish.scm
@@ -396,19 +396,19 @@
      (if (< (sauth-common:space-left-at-dest target-path) (sauth-common:src-size src-path))
              (sauth:print-error "Destination does not have enough disk space.")
              (exit 1)))    
      (if (is_directory src-path) 
-        (begin
-            (let* ((parent-dir src-path)
-                   (start-dir target-path))
-                 (run (pipe
-                   (begin (system (conc "cd " parent-dir " ;tar chf - ." )))
-                   (begin (change-directory start-dir)
-                          ;(print "123")
-                          (run-cmd "tar" (list "xf" "-")))))
-                          (print "Copied data to " start-dir))) 
+	 (begin
+	   (let* ((parent-dir src-path)
+		  (start-dir target-path))
+	     (run (pipe
+		   (begin (system (conc "cd " parent-dir " ;tar chf - ." )))
+		   (begin (change-directory start-dir)
+					;(print "123")
+			  (run-cmd "tar" (list "xf" "-")))))
+	     (print "Copied data to " start-dir))) 
            (let*((parent-dir (pathname-directory src-path))
                   (start-dir target-path)
                 (filename (if  (pathname-extension src-path)  
                                       (conc(pathname-file src-path) "." (pathname-extension src-path))

Index: subrun.scm
--- subrun.scm
+++ subrun.scm
@@ -47,11 +47,11 @@
 (define (subrun:launch-dashboard test-run-dir)
   (if (subrun:subrun-test-initialized? test-run-dir)
       (let* ((subarea (subrun:get-runarea test-run-dir)))
         (if (and subarea (common:file-exists? subarea))
-            (system (conc "cd " subarea ";env -i PATH=$PATH DISPLAY=$DISPLAY HOME=$HOME USER=$USER dashboard &"))))))
+            (system (conc "cd " subarea ";env -i PATH=\"$PATH\" DISPLAY=$DISPLAY HOME=$HOME USER=$USER dashboard &"))))))
 (define (subrun:subrun-removed? test-run-dir)
   (if (subrun:subrun-test-initialized? test-run-dir)
       (let ((flagfile (conc test-run-dir "/subrun.removed")))
         (if (common:file-exists? flagfile)

Index: tasks.scm
--- tasks.scm
+++ tasks.scm
@@ -20,15 +20,17 @@
 (use sqlite3 srfi-1 posix regex regex-case srfi-69 dot-locking format)
 (import (prefix sqlite3 sqlite3:))
 (declare (unit tasks))
+(declare (uses dbfile))
 (declare (uses db))
 (declare (uses rmt))
 (declare (uses common))
 (declare (uses pgdb))
+(import dbfile)
 ;; (import pgdb) ;; pgdb is a module
 (include "task_records.scm")
 (include "db_records.scm")
@@ -253,11 +255,11 @@
   (let ((deadlist '()))
      (lambda (id pid host last-update delta)
-       (print "Going to delete stale record for monitor with pid " pid " on host " host " last updated " delta " seconds ago")
+       (debug:print 0 *default-log-port* "Going to delete stale record for monitor with pid " pid " on host " host " last updated " delta " seconds ago")
        (set! deadlist (cons id deadlist)))
      "SELECT id,pid,hostname,last_update,strftime('%s','now')-last_update AS delta FROM monitors WHERE delta > 700;")
     (sqlite3:execute mdb (conc "DELETE FROM monitors WHERE id IN ('" (string-intersperse (map conc deadlist) "','") "');")))
@@ -264,11 +266,11 @@
 (define (tasks:register-monitor db port)
   (let* ((pid (current-process-id))
 	 (hostname (get-host-name))
 	 (userinfo (user-information (current-user-id)))
 	 (username (car userinfo)))
-    (print "Register monitor, pid: " pid ", hostname: " hostname ", port: " port ", username: " username)
+    (debug:print 0 *default-log-port* "Register monitor, pid: " pid ", hostname: " hostname ", port: " port ", username: " username)
     (sqlite3:execute db "INSERT INTO monitors (pid,start_time,last_update,hostname,username) VALUES (?,strftime('%s','now'),strftime('%s','now'),?,?);"
 		     pid hostname username)))
 (define (tasks:get-num-alive-monitors mdb)
   (let ((res 0))
@@ -327,11 +329,11 @@
 ;; register a task
 (define (tasks:add dbstruct action owner target runname testpatt params)
    dbstruct #f #t
-   (lambda (db)
+   (lambda (dbdat db)
      (sqlite3:execute db "INSERT INTO tasks_queue (action,owner,state,target,name,testpatt,params,creation_time,execution_time)
                              VALUES (?,?,'new',?,?,?,?,strftime('%s','now'),0);" 
@@ -362,11 +364,11 @@
 (define (tasks:snag-a-task dbstruct)
   (let ((res    #f)
 	(keytxt (conc (current-process-id) "-" (get-host-name) "-" (car (user-information (current-user-id))))))
      dbstruct #f #t
-     (lambda (db)
+     (lambda (dat db)
        ;; first randomly set a new to pid-hostname-hostname
 	"UPDATE tasks_queue SET keylock=? WHERE id IN
            (SELECT id FROM tasks_queue 
@@ -389,11 +391,11 @@
 (define (tasks:reset-stuck-tasks dbstruct)
   (let ((res '()))
      dbstruct #f #t
-     (lambda (db)
+     (lambda (dat db)
 	(lambda (id delta)
 	  (set! res (cons id res)))
 	"SELECT id,strftime('%s','now')-execution_time AS delta FROM tasks_queue WHERE state='inprogress' AND delta>700 ORDER BY delta DESC LIMIT 2;")
@@ -406,11 +408,11 @@
 (define (tasks:get-tasks dbstruct types states)
   (let ((res '()))
      dbstruct #f #f
-     (lambda (db)
+     (lambda (dbdat db)
 	(lambda (id . rem)
 	  (set! res (cons (apply vector id rem) res)))
 	(conc "SELECT id,action,owner,state,target,name,test,item,params,creation_time,execution_time 
@@ -423,11 +425,11 @@
 (define (tasks:get-last dbstruct target runname)
   (let ((res #f))
      dbstruct #f #f
-     (lambda (db)
+     (lambda (dbdat db)
 	(lambda (id . rem)
 	  (set! res (apply vector id rem)))
 	(conc "SELECT id,action,owner,state,target,name,testpatt,keylock,params,creation_time,execution_time 
@@ -440,26 +442,26 @@
 ;; remove tasks given by a string of numbers comma separated
 (define (tasks:remove-queue-entries dbstruct task-ids)
    dbstruct #f #t
-   (lambda (db)
+   (lambda (dbdat db)
      (sqlite3:execute db (conc "DELETE FROM tasks_queue WHERE id IN (" task-ids ");")))))
-#;(define (tasks:process-queue dbstruct)
-  (let* ((task   (tasks:snag-a-task dbstruct))
-	 (action (if task (tasks:task-get-action task) #f)))
-    (if action (print "tasks:process-queue task: " task))
-    (if action
-	(case (string->symbol action)
-	  ((run)       (tasks:start-run     dbstruct task))
-	  ((remove)    (tasks:remove-runs   dbstruct task))
-	  ((lock)      (tasks:lock-runs     dbstruct task))
-	  ;; ((monitor)   (tasks:start-monitor db task))
-	  #;((rollup)    (tasks:rollup-runs   dbstruct task))
-	  ((updatemeta)(tasks:update-meta   dbstruct task))
-	  #;((kill)      (tasks:kill-monitors dbstruct task))))))
+;; (define (tasks:process-queue dbstruct)
+;;   (let* ((task   (tasks:snag-a-task dbstruct))
+;; 	 (action (if task (tasks:task-get-action task) #f)))
+;;     (if action (print "tasks:process-queue task: " task))
+;;     (if action
+;; 	(case (string->symbol action)
+;; 	  ((run)       (tasks:start-run     dbstruct task))
+;; 	  ((remove)    (tasks:remove-runs   dbstruct task))
+;; 	  ((lock)      (tasks:lock-runs     dbstruct task))
+;; 	  ;; ((monitor)   (tasks:start-monitor db task))
+;; 	  #;((rollup)    (tasks:rollup-runs   dbstruct task))
+;; 	  ((updatemeta)(tasks:update-meta   dbstruct task))
+;; 	  #;((kill)      (tasks:kill-monitors dbstruct task))))))
 (define (tasks:tasks->text tasks)
   (let ((fmtstr "~10a~10a~10a~12a~20a~12a~12a~10a"))
     (conc (format #f fmtstr "id" "action" "owner" "state" "target" "runname" "testpatts" "params") "\n"
@@ -477,11 +479,11 @@
 		tasks) "\n"))))
 (define (tasks:set-state dbstruct task-id state)
    dbstruct #f #t
-   (lambda (db)
+   (lambda (dbdat db)
      (sqlite3:execute db "UPDATE tasks_queue SET state=? WHERE id=?;" 
@@ -489,49 +491,48 @@
 (define (tasks:param-key->id dbstruct task-params)
    dbstruct #f #f
-   (lambda (db)
+   (lambda (dbdat db)
       (sqlite3:first-result db "SELECT id FROM tasks_queue WHERE params LIKE ?;"
 (define (tasks:set-state-given-param-key dbstruct param-key new-state)
    dbstruct #f #t
-   (lambda (db)
+   (lambda (dbdat db)
      (sqlite3:execute db "UPDATE tasks_queue SET state=? WHERE params LIKE ?;" new-state param-key))))
 (define (tasks:get-records-given-param-key dbstruct param-key state-patt action-patt test-patt)
    dbstruct #f #f
-   (lambda (db)
+   (lambda (dbdat db)
       (sqlite3:first-row db "SELECT id,action,owner,state,target,name,testpatt,keylock,params WHERE
                                params LIKE ? AND state LIKE ? AND action LIKE ? AND testpatt LIKE ?;"
 			 param-key state-patt action-patt test-patt)))))
 (define (tasks:find-task-queue-records dbstruct target run-name test-patt state-patt action-patt)
-  ;; (handle-exceptions
-  ;;  exn
-  ;;  '()
-  ;;  (sqlite3:first-row
-  (let ((db (db:delay-if-busy (db:get-db dbstruct)))
-	(res '()))
-    (sqlite3:for-each-row 
-     (lambda (a . b)
-       (set! res (cons (cons a b) res)))
-     db "SELECT id,action,owner,state,target,name,testpatt,keylock,params FROM tasks_queue 
+  (db:with-db
+   dbstruct
+   #f #f
+   (lambda (dbdat db)
+     (let ((res '()))
+       (sqlite3:for-each-row 
+	(lambda (a . b)
+	  (set! res (cons (cons a b) res)))
+	db "SELECT id,action,owner,state,target,name,testpatt,keylock,params FROM tasks_queue 
               target = ? AND name = ? AND state LIKE ? AND action LIKE ? AND testpatt LIKE ?;"
-     target run-name state-patt action-patt test-patt)
-    res)) ;; )
+	target run-name state-patt action-patt test-patt)
+       res))))
 ;; kill any runner processes (i.e. processes handling -runtests) that match target/runname
 ;; do a remote call to get the task queue info but do the killing as self here.
@@ -663,11 +664,11 @@
            (run-times  (rmt:get-run-times  run-patt target-patt )))
    (if (eq? (length run-times) 0)
-       (print "Data not found!!")
+       (debug:print 0 *default-log-port* "Data not found!!")
    (if (equal? (args:get-arg "-dumpmode") "json")
        (task:print-runtime-as-json run-times)
          (if (equal? (args:get-arg "-dumpmode") "csv")
 	     (task:print-runtime run-times ",")
@@ -709,46 +710,72 @@
            (test-times  (rmt:get-test-times  runname target )))
    (if (not runname)
-      (print "Error: Missing argument -runname")
+      (debug:print 0 *default-log-port* "Error: Missing argument -runname")
     (if (string-contains runname "%")
-      (print "Error: Invalid runname, '%' not allowed  (" runname ") ")
+      (debug:print 0 *default-log-port* "Error: Invalid runname, '%' not allowed  (" runname ") ")
     (if (not target)
-      (print "Error: Missing argument -target")
+      (debug:print 0 *default-log-port* "Error: Missing argument -target")
      (if  (string-contains target "%")
-      (print "Error: Invalid target, '%' not allowed  (" target ") ")
+      (debug:print 0 *default-log-port* "Error: Invalid target, '%' not allowed  (" target ") ")
    (if (eq? (length test-times) 0)
-       (print "Data not found!!")
+       (debug:print 0 *default-log-port* "Data not found!!")
    (if (equal? (args:get-arg "-dumpmode") "json")
        (task:print-testtime-as-json test-times)
          (if (equal? (args:get-arg "-dumpmode") "csv")
 	     (task:print-testtime test-times ",")
 	     (task:print-testtime test-times "  ")))))
 ;; gets mtpg-run-id and syncs the record if different
+;; This function takes  parameters including a database handle (dbh), cached information (cached-info), a run ID (run-id), area information (area-info), and the smallest last update time (smallest-last-update-time).
+;; The function first tries to retrieve information about the given run ID from the cached information. If it is already cached, the function returns the cached information. 
+;; Otherwise, the function retrieves information about the run from the megatest database using various functions such as "rmt:get-run-info" and "db:get-value-by-header".
+;; The function then uses this retrieved information to create a new run in the pgdb database, or update an existing one, using functions such as "pgdb:get-ttype", "pgdb:get-run-id", and "pgdb:insert-run". 
+;; It also sets various fields such as "state", "status", "owner", and "event_time".
+;; The smallest-last-update-time variable is a single item hash table that is used to keep track of the smallest (earliest) "last_update" time among all the runs that are processed by calls to this function.
+;; The "last_update" time is obtained from the megatest database for each run that needs to be processed. Then, if the "last_update" time for a particular run is greater (later) than the "last_update" time of the
+;; corresponding run in the PostgreSQL database, the "last_update" time of that run is stored in the "smallest-last-update-time" hash table.
+;; This is done to ensure that only the runs that have been updated since the last time the synchronization was performed are updated in the PostgreSQL database. This is because updating all the runs, even those 
+;; that have not been modified since the last synchronization, can be a time-consuming process.
+;; At the end of the function, if a new run is successfully inserted into the PostgreSQL database, the "last_update" time of that run is compared with the current value of "smallest-time" in the "smallest-last-update-time" 
+;; hash table. If the "last_update" time is smaller than the current value of "smallest-time" or if "smallest-time" does not exist, "last_update" time is stored as the new "smallest-time". This ensures that the smallest 
+;; "last_update" time among all the runs processed by the function is always stored in the "smallest-last-update-time" hash table.
+;; The smallest-last-update-time hash is referenced in calling functions and is used in the call to "pgdb:write-sync-time dbh area-info smallest-time" in tasks:sync-to-postgres.
+;; If a new entry was successfully created or updated, the function returns the ID of the new entry. If there was an error, the function returns false.
 (define (tasks:run-id->mtpg-run-id dbh cached-info run-id area-info smallest-last-update-time)
   (let* ((runs-ht (hash-table-ref cached-info 'runs))
 	 (runinf  (hash-table-ref/default runs-ht run-id #f))
          (area-id (vector-ref area-info 0)))
        (if runinf
-	runinf ;; already cached
-	(let* ((run-dat    (rmt:get-run-info run-id))               ;; NOTE: get-run-info returns a vector < row header >
+	 runinf ;; already cached
+	 (let* ((run-dat    (rmt:get-run-info run-id))               ;; NOTE: get-run-info returns a vector < row header >
 	       (run-name   (rmt:get-run-name-from-id run-id))
 	       (row        (db:get-rows run-dat))                   ;; yes, this returns a single row
 	       (header     (db:get-header run-dat))
 	       (state      (db:get-value-by-header row header "state"))
 	       (status     (db:get-value-by-header row header "status"))
@@ -776,45 +803,56 @@
 	       (spec-id    (pgdb:get-ttype dbh keytarg))
 	       (publish-time (if (args:get-arg "-cp-eventtime-to-publishtime")
 	       (new-run-id (if (and run-name base-target) (pgdb:get-run-id dbh spec-id target run-name area-id) #f)))
-         (if new-run-id
+             (if new-run-id
 	         (begin ;; let ((run-record (pgdb:get-run-info dbh new-run-id))
-		        (hash-table-set! runs-ht run-id new-run-id)
-		;; ensure key fields are up to date
-     ;; if last_update == pgdb_last_update do not update smallest-last-update-time  
-    (let* ((pgdb-last-update (pgdb:get-run-last-update dbh new-run-id))
-           (smallest-time (hash-table-ref/default smallest-last-update-time "smallest-time" #f)))
-     (if (and  (> last-update pgdb-last-update) (or (not smallest-time) (< last-update smallest-time)))
-        (hash-table-set! smallest-last-update-time "smallest-time" last-update)))
-		(pgdb:refresh-run-info
-		 dbh
-		 new-run-id
-		 state status owner event-time comment fail-count pass-count area-id last-update publish-time)
-     (debug:print-info 4 *default-log-port* "Working on run-id " run-id " pgdb-id "  new-run-id )
-     (if (not (equal? run-tag ""))
-      (task:add-run-tag dbh new-run-id run-tag))
-		new-run-id) 
+		   (hash-table-set! runs-ht run-id new-run-id)
+	           ;; ensure key fields are up to date
+                   ;; if last_update == pgdb_last_update do not update smallest-last-update-time  
+                   (let* ((pgdb-last-update (pgdb:get-run-last-update dbh new-run-id))
+                     (smallest-time (hash-table-ref/default smallest-last-update-time "smallest-time" #f)))
+                     (if (and  (> last-update pgdb-last-update) (or (not smallest-time) (< last-update smallest-time)))
+                       (hash-table-set! smallest-last-update-time "smallest-time" last-update))
+                   )
+		   (pgdb:refresh-run-info dbh new-run-id state status owner event-time comment fail-count pass-count area-id last-update publish-time)
+                   (debug:print-info 4 *default-log-port* "Working on run-id " run-id " pgdb-id "  new-run-id )
+                   (if (not (equal? run-tag ""))
+                     (task:add-run-tag dbh new-run-id run-tag)
+                   )
+		   new-run-id
+                ) 
+              ;; if no pgdb run id was found
 	      (if (or (not state) (equal? state "deleted"))
-          (begin 
-          (debug:print-info 1 *default-log-port*  "Warning: Run with id " run-id " was created after previous sync and deleted before the sync") #f)
-          (if (handle-exceptions
-		        exn
-		        (begin (print-call-chain)
-              (print ((condition-property-accessor 'exn 'message) exn))     
-			      #f)
-            (pgdb:insert-run
-		     dbh
-		     spec-id target run-name state status owner event-time comment fail-count pass-count  area-id last-update publish-time))
-		       (let* ((smallest-time (hash-table-ref/default smallest-last-update-time "smallest-time" #f)))
-             (if (or (not smallest-time) (< last-update smallest-time))
-        				(hash-table-set! smallest-last-update-time "smallest-time" last-update))
-             (tasks:run-id->mtpg-run-id dbh cached-info run-id area-info smallest-last-update-time))
-		  #f)))))))
+                (begin 
+                  (debug:print-info 1 *default-log-port*  "Warning: Run with id " run-id " was created after previous sync and deleted before the sync") #f)
+                   (if (handle-exceptions
+		      exn
+		      (begin (print-call-chain)
+                        (debug:print 0 *default-log-port* ((condition-property-accessor 'exn 'message) exn))     
+			#f
+                      )
+                      (pgdb:insert-run dbh spec-id target run-name state status owner event-time comment fail-count pass-count  area-id last-update publish-time)
+                      )
+		      (let* ((smallest-time (hash-table-ref/default smallest-last-update-time "smallest-time" #f)))
+                        (if (or (not smallest-time) (< last-update smallest-time))
+        		  (hash-table-set! smallest-last-update-time "smallest-time" last-update)
+                        )
+                        ;; call this function recursively to get the pgdb run id
+                        ;; TODO: Why not just call pgdb:get-run-id here to get the id?
+                        (tasks:run-id->mtpg-run-id dbh cached-info run-id area-info smallest-last-update-time)
+                      )
+		      #f
+                   )
+                )
+             )
+          )
+       )
+   )
 (define (task:add-run-tag dbh run-id tag) 
   (let* ((tag-info (pgdb:get-tag-info-by-name dbh tag)))
    (if (not tag-info)
@@ -837,14 +875,20 @@
 (define (tasks:sync-test-steps dbh cached-info test-step-ids smallest-last-update-time)
  ; (print "Sync Steps " test-step-ids )
   (let ((test-ht (hash-table-ref cached-info 'tests))
-        (step-ht (hash-table-ref cached-info 'steps)))
+        (step-ht (hash-table-ref cached-info 'steps))
+        (run-id-in #f)
+        )
      (lambda (test-step-id)
-        (let* ((test-step-info  (rmt:get-steps-info-by-id test-step-id))
+        (set! run-id-in (cdr test-step-id))
+        (set! test-step-id (car test-step-id))
+        (let* ((test-step-info  (rmt:get-steps-info-by-id run-id-in test-step-id))
                (step-id (tdb:step-get-id test-step-info))
                (test-id  (tdb:step-get-test_id    test-step-info))   
 	       (stepname (tdb:step-get-stepname  test-step-info))
 	       (state (tdb:step-get-state test-step-info))	
 	       (status (tdb:step-get-status test-step-info))	
@@ -879,14 +923,18 @@
       (debug:print-info 1 *default-log-port*  "Error: Could not get test step info for step id " test-step-id ))))	;; this is a wierd senario need to debug      	
 (define (tasks:sync-test-gen-data dbh cached-info test-data-ids smallest-last-update-time)
   (let ((test-ht (hash-table-ref cached-info 'tests))
-        (data-ht (hash-table-ref cached-info 'data)))
+        (data-ht (hash-table-ref cached-info 'data))
+        (run-id-in #f)
+        )
      (lambda (test-data-id)
-        (let* ((test-data-info  (rmt:get-data-info-by-id test-data-id))
+        (set! run-id-in (cdr test-data-id))
+        (set! test-data-id (car test-data-id))
+        (let* ((test-data-info  (rmt:get-data-info-by-id run-id-in test-data-id))
                (data-id (db:test-data-get-id  test-data-info))
                (test-id  (db:test-data-get-test_id   test-data-info))   
 	       (category  (db:test-data-get-category  test-data-info))
 	       (variable  (db:test-data-get-variable test-data-info))	
 	       (value (db:test-data-get-value  test-data-info))	
@@ -917,11 +965,11 @@
  		      (debug:print-info 4 *default-log-port*  "Inserting test-data with test-id: " test-id " and data-id " data-id " pgdb test id: " pgdb-test-id)
                        (if (handle-exceptions
 		      (begin (print-call-chain)
-                              (print ((condition-property-accessor 'exn 'message) exn))     
+                              (debug:print 0 *default-log-port* ((condition-property-accessor 'exn 'message) exn))     
                     (pgdb:insert-test-data dbh pgdb-test-id category variable value expected tol units comment status type last-update))
 		       ;(tasks:run-id->mtpg-run-id dbh cached-info run-id area-info)
@@ -935,18 +983,40 @@
                  (debug:print-info 1 *default-log-port*  "Error: Test not in pgdb"))))
       (debug:print-info 1 *default-log-port*  "Error: Could not get test data info for data id " test-data-id ))))	;; this is a wierd senario need to debug      	
+;; This function synchronizes test data between the megatest db and the pgdb. It takes several parameters, including a pgdb database handle (dbh), 
+;; a hash table of cached information (cached-info), a list of test IDs (test-ids), and other data related to the specific area being synced (area-info and smallest-last-update-time).
+;; The function first retrieves the cached test hash table (test-ht) from cached-info and sets the run-id-in variable to false. It then iterates over each test ID in test-ids using 
+;; a for-each loop. Within the loop, it sets run-id-in to the second element of the test ID pair (which is assumed to be a cons cell), and sets test-id to the first element of the pair.
+;; The function then uses rmt:get-test-info-by-id  to retrieve information about the test specified by run-id-in and test-id. It extracts various properties from this 
+;; information, such as the run-id, test-name, item-path, state, status, host, and so on.
+;; If item-path is null or empty, the function prints a message to the log. Otherwise, it checks if pgdb-run-id is truthy, meaning that a corresponding test record was found in the 
+;; remote database. If pgdb-test-id is also truthy, it means that the test record exists in the remote database and needs to be updated. If pgdb-test-id is falsey, it means that the 
+;; test record needs to be inserted into the remote database.
+;; If the last-update timestamp of the local test record is greater than the last-update timestamp of the remote test record, the function updates the remote test record with the new 
+;; data. If the last-update timestamp is less than or equal to the smallest-time value in smallest-last-update-time, the function updates the smallest-time value to the new last-update 
+;; timestamp. If the remote test record does not exist, the function inserts a new test record into the remote database.
+;; After each test ID is processed, the function updates the test-ht hash table with the corresponding pgdb-test-id.
 (define (tasks:sync-tests-data dbh cached-info test-ids area-info smallest-last-update-time)
-  (let ((test-ht (hash-table-ref cached-info 'tests)))
+  (let ((test-ht (hash-table-ref cached-info 'tests))
+        (run-id-in #f))
      (lambda (test-id)
-      ; (print test-id)
-       (let* ((test-info    (rmt:get-test-info-by-id #f test-id))
+        (set! run-id-in  (cdr test-id))
+        (set! test-id (car test-id))
+        (debug:print 0 *default-log-port*  "test-id: " test-id " run-id: " run-id-in) 
+       (let* ((test-info    (rmt:get-test-info-by-id run-id-in test-id))
 	      (run-id       (db:test-get-run_id    test-info)) ;; look these up in db_records.scm
 	      (test-id      (db:test-get-id        test-info))
 	      (test-name    (db:test-get-testname  test-info))
 	      (item-path    (db:test-get-item-path test-info))
 	      (state        (db:test-get-state     test-info))
@@ -972,29 +1042,42 @@
 	 ;; "id"           "run_id"        "testname"  "state"      "status"      "event_time"
 	 ;; "host"         "cpuload"       "diskfree"  "uname"      "rundir"      "item_path"
 	 ;; "run_duration" "final_logf"    "comment"   "shortdir"   "attemptnum"  "archived"
          (if (or (not item-path) (string-null? item-path))
-             (debug:print-info 0 *default-log-port* "Working on Run id : " run-id "and test name : " test-name)) 
+             (debug:print-info 0 *default-log-port* "Working on Run id : " run-id " and test name : " test-name)) 
          (if pgdb-run-id
-	   (if pgdb-test-id ;; have a record
-	     (begin ;; let ((key-name (conc run-id "/" test-name "/" item-path)))
-	       (debug:print-info 4 *default-log-port*  "Updating existing test with run-id: " run-id " and test-id: " test-id " pgdb run id: " pgdb-run-id "  pgdb-test-id "  pgdb-test-id)
-         (let* ((pgdb-last-update (pgdb:get-test-last-update dbh pgdb-test-id)))
-         (if (and  (>  last-update pgdb-last-update) (or (not smallest-time) (< last-update smallest-time))) ;;if last-update is same as pgdb-last-update then it is safe to assume the records are identical and we can use a larger last update time.
-        (hash-table-set! smallest-last-update-time "smallest-time" last-update))) 
-	       (pgdb:update-test dbh pgdb-test-id pgdb-run-id test-name item-path state status host cpuload diskfree uname run-dir log-file run-duration comment event-time archived last-update pid))
-	     (begin 
-           (debug:print-info 4 *default-log-port*  "Inserting test with run-id: " run-id " and test-id: " test-id  " pgdb run id: " pgdb-run-id)
-           (pgdb:insert-test dbh pgdb-run-id test-name item-path state status host cpuload diskfree uname run-dir log-file run-duration comment event-time archived last-update pid)
-            (if (or (not smallest-time) (< last-update smallest-time))
-        				(hash-table-set! smallest-last-update-time "smallest-time" last-update))
-           (set! pgdb-test-id (pgdb:get-test-id dbh pgdb-run-id test-name item-path))))
-           (hash-table-set! test-ht test-id pgdb-test-id))
-           (debug:print-info 1 *default-log-port*  "WARNING: Skipping run with run-id:" run-id ". This run was created after privious sync and removed before this sync."))))
-     test-ids)))
+	     (if pgdb-test-id ;; have a record
+	       (begin ;; let ((key-name (conc run-id "/" test-name "/" item-path)))
+	         (debug:print-info 4 *default-log-port*  "Updating existing test with run-id: " run-id " and test-id: " test-id " pgdb run id: " pgdb-run-id "  pgdb-test-id "  pgdb-test-id)
+                 (let* ((pgdb-last-update (pgdb:get-test-last-update dbh pgdb-test-id)))
+                   (if (and  (>  last-update pgdb-last-update) (or (not smallest-time) (< last-update smallest-time))) ;;if last-update is same as pgdb-last-update then it is safe to assume the records are identical and we can use a larger last update time.
+                     (hash-table-set! smallest-last-update-time "smallest-time" last-update)
+                   )
+                 ) 
+	         (pgdb:update-test dbh pgdb-test-id pgdb-run-id test-name item-path state status host cpuload diskfree uname run-dir log-file run-duration comment event-time archived last-update pid)
+               )
+	       (begin 
+                 (debug:print-info 4 *default-log-port*  "Inserting test with run-id: " run-id " and test-id: " test-id  " pgdb run id: " pgdb-run-id)
+                 (pgdb:insert-test dbh pgdb-run-id test-name item-path state status host cpuload diskfree uname run-dir log-file run-duration comment event-time archived last-update pid)
+                 (if (or (not smallest-time) (< last-update smallest-time))
+        				(hash-table-set! smallest-last-update-time "smallest-time" last-update)
+                 )
+                 (set! pgdb-test-id (pgdb:get-test-id dbh pgdb-run-id test-name item-path))
+               )
+            )
+            (hash-table-set! test-ht test-id pgdb-test-id))
+            (debug:print-info 1 *default-log-port*  "WARNING: Skipping run with run-id:" run-id ". This run was created after previous sync and removed before this sync.")
+         )
+       )
+     )
+     test-ids
+    )
+  )
 (define (task:add-area-tag dbh area-info tag) 
   (let* ((tag-info (pgdb:get-tag-info-by-name dbh tag)))
    (if (not tag-info)
@@ -1021,80 +1104,114 @@
       (debug:print-info 4 *default-log-port*   "Check if run with " run-id " needs to be synced" )
        (tasks:run-id->mtpg-run-id dbh cached-info run-id area-info smallest-last-update-time))
-;; get runs changed since last sync
-;; (define (tasks:sync-test-data dbh cached-info area-info)
-;;   (let* ((
+;; This function takes two arguments "configdat" and "dest". The purpose of this function is to synchronize data from a source database to a PostgreSQL database.
+;; Here's what this function does:
+;; Opens a connection to the PostgreSQL database using the "pgdb:open" function.
+;; Retrieves information about the last sync of *toppath*, using pgdb:get-area-by-path, and stores it in the "area-info" vector variable. (id, area_name, area_path, last_sync)
+;; Creates a hash table named "cached-info" which will contain a start time, and megatest to pgdb ID mapping tables for runs, targets, tests, steps, and data. The targets table does not seem to be used. (TODO: remove it?)
+;; Checks if the user has provided EITHER both the "target" and "run-name" arguments or NEITHER. If not, it prints an error message and exits.
+;; Sets the "start" variable to the current time.
+;; Creates hash tables for five data types: runs, targets, tests, steps, and data, and enters them in the cached-info table..
+;; Sets the "start" time in the "cached-info" hash table.
+;; If area-info was found,
+;;  Retrieves the last synchronization time from the area-info vector or from the megatest "-since" argument.
+;;  Creates the smallest-last-update-time hash table.
+;;  Retrieves records that have been changed since the last synchronization time using the "rmt:get-changed-record-ids" or "rmt:get-run-record-ids" function, depending on whether the user has provided the "-target" and "-runname" arguments.
+;;  Retrieves the IDs of runs, tests, test steps, test data, and run stats that have been changed.
+;;  Retrieves the area tag from the user-provided "-area-tag" or "-area" argument or sets it to the default area tag.
+;;  Adds the area tag to the area record in the PostgreSQL database.
+;;  synchronizes the runs, tests, test steps, and test data using their respective "tasks:sync-" functions.
+;;  Writes the synchronization time to the PostgreSQL database from smallest-last-update-time..
+;; If the area info was not found, it sets the area using tasks:set-area.
+;;  If the "tasks:set-area" function returns true, the "tasks:sync-to-postgres" function is called again recursively. Otherwise, an error message is printed and the function returns false. 
+;;  TODO: just set the area-info when it is not found, instead of doing recursion here.
 (define (tasks:sync-to-postgres configdat dest)
-  (print "In sync")
   (let* ((dbh         (pgdb:open configdat dbname: dest))
 	 (area-info   (pgdb:get-area-by-path dbh *toppath*))
 	 (cached-info (make-hash-table))
 	 (start       (current-seconds))
-   (test-patt   (if (args:get-arg "-testpatt")
-											(args:get-arg "-testpatt")
+         (test-patt   (if (args:get-arg "-testpatt")
+		      (args:get-arg "-testpatt")
-   (target         (if (args:get-arg "-target")
-														 (args:get-arg "-target")
-													#f))
-    (run-name         (if (args:get-arg "-runname")
-														 (args:get-arg "-runname")
-													#f)))
+         (target      (if (args:get-arg "-target")
+		      (args:get-arg "-target")
+		      #f))
+         (run-name   (if (args:get-arg "-runname")
+		     (args:get-arg "-runname")
+		     #f)))
      (if (and target  (not run-name))
-					(print "Error: Provide runname")
+	  (debug:print 0 *default-log-port* "Error: Provide runname")
           (exit 1)))
      (if (and (not target)  run-name)
-					(print "Error: Provide target")
+	  (debug:print 0 *default-log-port* "Error: Provide target")
           (exit 1)))
-    ;(print "123")
-    ;(exit 1) 
     (for-each (lambda (dtype)
 		(hash-table-set! cached-info dtype (make-hash-table)))
 	      '(runs targets tests steps data))
     (hash-table-set! cached-info 'start start) ;; when done we'll set sync times to this
     (if area-info
-	(let* ((last-sync-time (vector-ref area-info 3))
+	(let* ((last-sync-time (if (args:get-arg "-since") (string->number (args:get-arg "-since")) (vector-ref area-info 3)))
 	       (smallest-last-update-time  (make-hash-table))
-         (changed      (if (and target run-name)
+               (changed      (if (and target run-name)
                             (rmt:get-run-record-ids target run-name (rmt:get-keys) test-patt)
                             (rmt:get-changed-record-ids last-sync-time)))
 	       (run-ids        (alist-ref 'runs       changed))
 	       (test-ids       (alist-ref 'tests      changed))
-	       (test-step-ids  (alist-ref 'test_steps changed))
-	       (test-data-ids  (alist-ref 'test_data  changed))
-	       (run-stat-ids   (alist-ref 'run_stats  changed))
-         (area-tag    (if (args:get-arg "-area-tag") 
+               (area-tag    (if (args:get-arg "-area-tag") 
                                  (args:get-arg "-area-tag")
                                  (if (args:get-arg "-area") 
                                    (args:get-arg "-area") 
+           (debug:print-info 0 *default-log-port* "changed records since "  (time->string (seconds->local-time last-sync-time) "%m/%d %H:%M") ": " changed)
+           (debug:print-info 0 *default-log-port* "last sync time: " last-sync-time)
            (if (and (equal? area-tag "") (not (pgdb:is-area-taged dbh (vector-ref area-info 0))))
             (set! area-tag *default-area-tag*)) 
            (if (not (equal? area-tag "")) 
              (task:add-area-tag dbh area-info area-tag)) 
-	  (if (or (not (null? test-ids)) (not (null? run-ids)))
-	      (begin
-                (debug:print-info 0 *default-log-port*  "syncing runs")   
-	              (tasks:sync-run-data dbh cached-info run-ids area-info smallest-last-update-time) 
-                (debug:print-info 0 *default-log-port*  "syncing tests")
-		            (tasks:sync-tests-data dbh cached-info test-ids area-info smallest-last-update-time)
-                (debug:print-info 0 *default-log-port*  "syncing test steps")
-                (tasks:sync-test-steps dbh cached-info test-step-ids smallest-last-update-time)
-								(debug:print-info 0 *default-log-port*  "syncing test data")
-                (tasks:sync-test-gen-data dbh cached-info test-data-ids smallest-last-update-time)
-                (print "----------done---------------")))
-     (let*  ((smallest-time (hash-table-ref/default smallest-last-update-time "smallest-time" (current-seconds))))
-     (debug:print-info 0 "smallest-time :" smallest-time  " last-sync-time " last-sync-time)
-    (if (not (and target run-name)) 
-	  (if (or (and smallest-time (> smallest-time last-sync-time)) (and smallest-time (eq? last-sync-time 0)))
-				(pgdb:write-sync-time dbh area-info smallest-time))))) ;;this needs to be changed
+          (if (not (null? run-ids))
+            (begin
+               (debug:print-info 0 *default-log-port*  "syncing runs: " run-ids)   
+	       (tasks:sync-run-data dbh cached-info run-ids area-info smallest-last-update-time) 
+            )
+          )
+          (if (not (null? test-ids))
+            (begin
+              (debug:print-info 0 *default-log-port*  "syncing tests: " test-ids)
+	      (tasks:sync-tests-data dbh cached-info test-ids area-info smallest-last-update-time)
+            )
+          )
+          (let*  ((smallest-time (hash-table-ref/default smallest-last-update-time "smallest-time" (current-seconds))))
+             (debug:print-info 0 "smallest-time :" smallest-time  " last-sync-time " last-sync-time)
+             (if (not (and target run-name)) 
+	       (if (or (and smallest-time (> smallest-time last-sync-time)) (and smallest-time (eq? last-sync-time 0)))
+		 (pgdb:write-sync-time dbh area-info smallest-time))
+             )
+          )
+        ) ;;this needs to be changed
+        ;; If the area info was not found in the the areas table, set it and repeat tasks:sync-to-postgres.
+        ;; TODO: why not just set the area info when it is not found rather than do recursion?
 	(if (tasks:set-area dbh configdat)
 	    (tasks:sync-to-postgres configdat dest)
 	      (debug:print 0 *default-log-port* "ERROR: unable to create an area record")
-	      #f)))))
+	      #f)
+        )
+      )
+   )

ADDED   testlocking/justtest.scm
Index: testlocking/justtest.scm
--- /dev/null
+++ testlocking/justtest.scm
@@ -0,0 +1,9 @@
+(print "Starting at "(current-seconds))
+(dbfile:with-simple-file-lock "justtesting.lock" 
+			      (lambda ()
+				(print "got the lock at "(current-seconds)"!")))
+(print "Done at: " (current-seconds))

ADDED   testlocking/
Index: testlocking/
--- /dev/null
+++ testlocking/
@@ -0,0 +1,12 @@
+HRS=$(date +%H)
+MINS=$(($(date +%M) + 1))
+if [[ $MINS -lt 10 ]];then
+  MINS="0${MINS}"
+for x in a b c d e f g h i j;do
+   echo "megatest -load justtest.scm > $x.log" | at $THETIME

Index: tests.scm
--- tests.scm
+++ tests.scm
@@ -25,19 +25,21 @@
 (declare (unit tests))
 (declare (uses lock-queue))
 (declare (uses db))
 (declare (uses tdb))
 (declare (uses common))
+(declare (uses commonmod))
 ;; (declare (uses dcommon)) ;; needed for the steps processing
 (declare (uses items))
 (declare (uses runconfig))
 ;; (declare (uses sdb))
 (declare (uses server))
 ;;(declare (uses stml2))
 (use sqlite3 srfi-1 posix regex regex-case srfi-69 dot-locking tcp directory-utils)
 (import (prefix sqlite3 sqlite3:))
+(import commonmod)
 (require-library stml)
 (include "common_records.scm")
 (include "key_records.scm")
 (include "db_records.scm")
@@ -554,11 +556,11 @@
 	(let ((my-start-time (current-seconds))
 	      (lockf         (conc outputfilename ".lock")))
 	  (let loop ((have-lock  (common:simple-file-lock lockf)))
 	    (if have-lock
 		(let ((script (configf:lookup *configdat* "testrollup" test-name)))
-		  (print "Obtained lock for " outputfilename)
+		  (debug:print 0 *default-log-port* "Obtained lock for " outputfilename)
 		  (rmt:set-state-status-and-roll-up-items run-id test-name "" #f #f #f)
 		  (if script
 		      (system (conc script " > " outputfilename " & "))
 		      (tests:generate-html-summary-for-iterated-test run-id test-id test-name outputfilename))
 		  (common:simple-file-release-lock lockf)
@@ -568,11 +570,11 @@
 		;; didn't get the lock, check to see if current update started later than this 
 		;; update, if so we can exit without doing any work
 		(if (> my-start-time (handle-exceptions
-					 (print "failed to get mod time on " lockf ", exn=" exn)
+					 (debug:print 0 *default-log-port* "failed to get mod time on " lockf ", exn=" exn)
 				       (file-modification-time lockf)))
 		    ;; we started since current re-gen in flight, delay a little and try again
 		      (debug:print-info 1 *default-log-port* "Waiting to update " outputfilename ", another test currently updating it")
@@ -611,11 +613,11 @@
 					   comment) "</td>"
 	 (if (list? testdat)
-	       (print "ERROR: failed to get records with rmt:test-get-records-for-index-file run-id=" run-id "test-name=" test-name)
+	       (debug:print 0 *default-log-port* "ERROR: failed to get records with rmt:test-get-records-for-index-file run-id=" run-id "test-name=" test-name)
 	(print "<table><tr><td valign=\"top\">")
 	;; Print out stats for status
 	(set! tot 0)
@@ -1775,11 +1777,11 @@
 ;; (tests:run-dot (list "digraph tests {" "a -> b" "}") "plain")
 (define (tests:run-dot indat outtype) ;; outtype is plain, fig, dot, etc.
-  (let-values (((inp oup pid)(process "env -i PATH=$PATH dot" (list "-T" outtype))))
+  (let-values (((inp oup pid)(process "env -i PATH=\"$PATH\" dot" (list "-T" outtype))))
     (with-output-to-port oup
       (lambda ()
 	(map print indat)))
     (close-output-port oup)
     (let ((res (with-input-from-port inp
@@ -1797,14 +1799,14 @@
     (tests:write-dot-file testrecords dfile sizex sizey)
     (if (common:file-exists? fname)
 	(let ((res (with-input-from-file fname
 		     (lambda ()
-	  (system (conc "env -i PATH=$PATH dot -T " outtype " < " dfile " > " fname "&"))
+	  (system (conc "env -i PATH=\"$PATH\" dot -T " outtype " < " dfile " > " fname "&"))
-	  (system (conc "env -i PATH=$PATH dot -T " outtype " < " dfile " > " fname))
+	  (system (conc "env -i PATH=\"$PATH\" dot -T " outtype " < " dfile " > " fname))
 	  (with-input-from-file fname
 	    (lambda ()

ADDED   tests/simplerun/Makefile
Index: tests/simplerun/Makefile
--- /dev/null
+++ tests/simplerun/Makefile
@@ -0,0 +1,5 @@
+cleanup :
+	killall mtest dboard -v -9 || true
+	rm -rf *.log *.bak NB* logs/* .meta .db /tmp/$(USER)/megatest_localdb/simplerun ../simpleruns/* lt

ADDED   tests/simplerun/debug.scm
Index: tests/simplerun/debug.scm
--- /dev/null
+++ tests/simplerun/debug.scm
@@ -0,0 +1,61 @@
+(module junk
+	*
+(import big-chicken
+	rmtmod
+	apimod
+	dbmod
+	srfi-18
+	trace)
+(trace-call-sites #t)
+  ;; db:get-tests-for-run
+  ;; rmt:general-open-connection
+  ;; rmt:open-main-connection
+  ;; rmt:drop-conn
+  ;; rmt:send-receive
+  ;; rmt:log-to-main
+  )
+(define (make-run-id)
+  (let* ((s (conc (current-process-id)))
+	 (l (string-length s)))
+    (string->number (substring s (- l 3) l))
+    ))
+(define (run)
+  (let* ((th1 (make-thread
+	       (lambda ()
+		 (let loop ((r 0)
+			    (i 1)
+			    (s 0)) ;; sum
+		   (let ((start-time (current-milliseconds))
+			 (run-id     (+ r (make-run-id))))
+		     (rmt:register-test run-id "test1" (conc "item_" i))
+		     (thread-sleep! 0.01)
+		     (let* ((qry-time (- (current-milliseconds) start-time))
+			    (tot-query-time (+ qry-time s))
+			    (avg-query-time (* 1.0 (/ tot-query-time (max i 1)))))
+		       (if (> qry-time 500)
+			   (print "WARNING: rmt:register-test took more than 500ms, "qry-time"ms, i="i", avg-query-time="avg-query-time))
+		       (if (eq? (modulo i 100) 0)
+			   (print "For run-id="run-id", "(rmt:get-keys-write)" num tests registered="i" avg-query-time="avg-query-time))
+		       (if (< i 500)
+			   (loop r (+ i 1) tot-query-time)
+			   (if (< r 100)
+			       (let* ((start-time (current-milliseconds)))
+				 (print "rmt:get-keys "(rmt:get-keys)" in "(- (current-milliseconds) start-time))
+				 ;;                                          run-id testpatt states statuses offset limit not-in sort-by sort-order qryvals last-update mode
+				 (print "Got "(length (rmt:get-tests-for-run run-id "%"     '()    '()       #f      #f    #f     #f      #f         #f      0           #f))" tests for run "run-id)
+				 (print "Average query time: "avg-query-time)
+				 (loop (+ r 1) 0 tot-query-time))))))))
+	       )))
+    (thread-start! th1)
+    (thread-join! th1)))

Index: tests/simplerun/megatest.config
--- tests/simplerun/megatest.config
+++ tests/simplerun/megatest.config
@@ -20,10 +20,14 @@
 # Adjust max_concurrent_jobs to limit how much you load your machines
 max_concurrent_jobs 50
+timeout 3
+# 3600
 # Uncomment this to make the in-mem db into a disk based db (slower but good for debug)
 # be aware that some unit tests will fail with this due to persistent data
 # tmpdb /tmp
@@ -35,15 +39,15 @@
 state start end completed
 # Job tools are more advanced ways to control how your jobs are launched
-useshell yes
-launcher nbfind
+# useshell yes
+launcher nbfake
 # You can override environment variables for all your tests here
 EXAMPLE_VAR example value
 # As you run more tests you may need to add additional disks, the names are arbitrary but must be unique
 disk0 #{getenv MT_RUN_AREA_HOME}/../simpleruns

Index: tests/simplerun/tests/test1/testconfig
--- tests/simplerun/tests/test1/testconfig
+++ tests/simplerun/tests/test1/testconfig
@@ -24,11 +24,11 @@
 # waiton setup
 priority 0
 # Iteration for your tests are controlled by the items section
+# [items]
 # PARTOFDAY morning noon afternoon evening night
 # test_meta is a section for storing additional data on your test
 author matt

ADDED   tests/simplerun/thebeginning.scm
Index: tests/simplerun/thebeginning.scm
--- /dev/null
+++ tests/simplerun/thebeginning.scm
@@ -0,0 +1,126 @@
+(use trace test (prefix sqlite3 sqlite3:))
+(import dbfile)
+(trace-call-sites #t)
+ ;; dbfile:setup
+ ;; dbfile:open-sqlite3-db
+ ;; dbfile:init-subdb
+ ;; dbfile:add-dbdat
+ ;; db:initialize-main-db
+ ;; dbfile:set-subdb
+ ;; db:with-db
+ ;; dbfile:get-subdb
+ )
+(system "touch /tmp/mmgraham/megatest_localdb/simplerun/.nfs.pdx.disks.icf_gwa_001.mmgraham.fossil.megatest1.7.mod.tests.simplerun/.db/10.db")
+;; *************** dbfile.scm tests ****************
+;; (debug:print 0 *default-log-port* " tmp area: " (common:get-db-tmp-area))
+(define tmpdir (common:get-db-tmp-area))
+(test #f #t (dbr:dbstruct? (dbfile:setup #t *toppath* tmpdir)))
+(test #f #t (dbr:dbstruct? (db:setup #t)))
+(define dbstruct *dbstruct-dbs*)
+;; (test #f #t (dbr:subdb? (dbfile:init-subdb dbstruct #f db:initialize-main-db))) ;; this opens the nfs main db
+;; (test #f #t (dbr:dbdat? (dbfile:open-db *dbstruct-dbs* #f db:initialize-main-db))) ;; this opens the tmp db.
+;; (define maindbdat (dbfile:open-db *dbstruct-dbs* #f db:initialize-main-db)) ;; this opens the tmp db.
+;; (dbfile:add-dbdat dbstruct #f maindbdat)
+;;(test #f #t (dbr:subdb? (dbfile:get-subdb dbstruct #f)))
+;; (test #f #t (dbr:dbdat? (dbfile:get-dbdat dbstruct #f)))
+;; (test #f #f (dbr:dbdat? (dbfile:get-dbdat dbstruct #f))) ;; stack empty so should fail.
+;; (test #f #t (hash-table? (dbr:dbstruct-subdbs dbstruct)))
+;; (test #f #t (stack? (dbr:subdb-dbstack (dbfile:get-subdb dbstruct #f))))
+;; (test #f '("SYSTEM" "RELEASE") (db:get-keys *dbstruct-dbs*))
+;; (test #f #t (dbr:dbdat? (dbfile:open-db dbstruct 1 db:initialize-main-db)))
+;; (test #f #t (dbr:dbdat? (dbfile:open-db dbstruct 2 db:initialize-main-db)))
+;; (define rundbdat (dbfile:open-db dbstruct 1 db:initialize-main-db))
+;; (define rundbdat2 (dbfile:open-db dbstruct 2 db:initialize-main-db))
+;; (define rundbdat3 (dbfile:open-db dbstruct 3 db:initialize-main-db))
+;; (dbfile:add-dbdat dbstruct 1 rundbdat)
+;; (dbfile:add-dbdat dbstruct 2 rundbdat2)
+;; (dbfile:add-dbdat dbstruct 3 rundbdat3)
+;; (test #f #t (dbr:subdb? (dbfile:get-subdb dbstruct 1)))
+;; (test #f #t (dbr:dbdat? (dbfile:get-dbdat dbstruct 1)))
+;; (test #f #t (dbr:subdb? (dbfile:get-subdb dbstruct 2)))
+;; (test #f #t (dbr:dbdat? (dbfile:get-dbdat dbstruct 2)))
+;; (test #f #t (> (dbfile:lazy-sqlite-db-modification-time ".db/main.db") 0))
+;; (test #f #t (> (dbfile:lazy-sqlite-db-modification-time ".db/1.db") 0))
+;; (test #f #t (> (dbfile:lazy-sqlite-db-modification-time ".db/2.db") 0))
+;; (test #f #t (common:simple-file-lock "./db.lock"))
+;; (test #f "./db.lock" (common:simple-file-release-lock "./db.lock"))
+;; *************** db.scm tests ****************
+;; (define thisdbdat (db:open-db dbstruct #f))
+;; (test #f #t (dbr:dbdat? thisdbdat))
+;; (test #f #t (dbr:dbdat? (db:get-db dbstruct #f)))
+;; (test #f #t (dbr:dbdat? (db:get-db dbstruct 1)))
+;; (test #f #t (dbr:dbdat? (db:get-db dbstruct 2)))
+;; (dbfile:add-dbdat dbstruct #f maindbdat)
+;; (define maindbdat (dbfile:get-dbdat dbstruct #f))
+;; (dbfile:add-dbdat dbstruct #f maindbdat)
+;; (define mtdbdat2 (dbr:subdb-mtdbdat (dbfile:get-subdb dbstruct #f)))
+;; (define areapath  (dbr:dbstruct-areapath dbstruct))
+;; (define mtdbpath  (dbfile:run-id->path areapath #f))
+;; (define init-proc db:initialize-main-db)
+;; (define mtdbdat   (dbfile:open-sqlite3-db mtdbpath init-proc))
+;; (define maindb-handle (dbr:dbdat-dbh mtdbdat))
+;; (define maindb-handle2 (dbr:dbdat-dbh mtdbdat2))
+;; (sqlite3:execute maindb-handle "vacuum")
+;; (sqlite3:execute maindb-handle2 "vacuum")
+;; (define full-sel   (conc "SELECT * from runs"))
+;; (sqlite3:for-each-row
+;; 	     (lambda (a . b)
+;;                (debug:print 0 *default-log-port* "a: " a " b: " b)
+;;              )
+;; 	     maindb-handle
+;; 	     full-sel)
+;; (test #f #t (db:sync-touched dbstruct #f))
+;; (test #f #t (db:sync-touched dbstruct 1))
+;; (test #f #t (db:sync-touched dbstruct 2))
+;; (test #f #t (dbr:subdb? (dbfile:get-subdb dbstruct #f)))
+;; (test #f #t (dbr:subdb? (dbfile:get-subdb dbstruct (string->number "1"))))
+;; (test #f #t (dbr:subdb? (dbfile:get-subdb dbstruct 2)))
+;; (test #f #t (db:sync-touched dbstruct #f))
+;; (test #f #t (db:sync-touched dbstruct 1))
+;; (test #f #t (db:sync-touched dbstruct 2))
+(test #f #t (db:all-db-sync dbstruct))
+;; (test #f #t (db:close-all dbstruct))
+(test #f #t (db:safely-close-sqlite3-db (dbr:dbdat-dbh rundbdat) (dbr:dbdat-stmt-cache rundbdat)))
+(test #f #t (db:safely-close-sqlite3-db (dbr:dbdat-dbh rundbdat2) (dbr:dbdat-stmt-cache rundbdat2)))
+(test #f #t (db:safely-close-sqlite3-db (dbr:dbdat-dbh mtdbdat) (dbr:dbdat-stmt-cache mtdbdat)))

Index: tree.scm
--- tree.scm
+++ tree.scm
@@ -85,11 +85,11 @@
      ((not (equal? top (iup:attribute obj "TITLE0")))
-      (print "ERROR: top name " top " doesn't match " (iup:attribute obj "TITLE0")))
+      (debug:print 0 *default-log-port* "ERROR: top name " top " doesn't match " (iup:attribute obj "TITLE0")))
      ((null? nodelst))
       (let loop ((hed      (car nodelst))
 		 (tal      (cdr nodelst))
 		 (depth    1)
@@ -131,11 +131,11 @@
 	  (loop (+ currnode 1)
 (define (tree:delete-node obj top node-path) ;; node-path is a list of strings
   (let ((id  (tree:find-node obj (cons top node-path))))
-    (print "Found node to remove " id " for path " top " " node-path)
+    (debug:print 0 *default-log-port* "Found node to remove " id " for path " top " " node-path)
     (iup:attribute-set! obj (conc "DELNODE" id) "SELECTED")))
   (let* ((tb      (iup:treebox

ADDED   utils/mt-new-to-old.scm
Index: utils/mt-new-to-old.scm
--- /dev/null
+++ utils/mt-new-to-old.scm
@@ -0,0 +1,73 @@
+(module mt-new-to-old
+ *
+ scheme
+ chicken.file
+ chicken.base
+ chicken.string
+ chicken.pretty-print
+ sqlite3)
+(if (not (file-exists? ".megatest/main.db"))
+    (begin
+     (print "No .megatest/main.db found, exiting")
+     (exit 1)))
+(copy-file ".megatest/main.db" "megatest.db" #t)
+(define tests_fields "run_id,testname,host,cpuload,diskfree,uname,rundir,shortdir,item_path,state,status,attemptnum,final_logf,logdat,run_duration,comment,event_time,fail_count,pass_count,archived")
+(define extra_fields "testname,item_path")
+(define (import-one dbfile destdb)
+  (print "Importing "dbfile)
+  (let* ((db   (open-database dbfile))
+	 (rows (fold-row
+		(lambda (res . row)
+		  (cons row res))
+		'()
+		db
+		(conc "SELECT "extra_fields","tests_fields" FROM tests;"))))
+    (finalize! db)
+    (print "Found "(length rows)" records to insert.")
+    (for-each
+     (lambda (row)
+       (let* ((testname (car row))
+	      (itempath (cadr row))
+	      (remrow   (cddr row))
+	      (run-id   (car remrow))
+	      (ready-row (string-intersperse
+			  (map (lambda (x)
+				 (if (number? x)
+				     (conc x)
+				     (conc "'"x"'")))
+			       remrow)
+			  ",")))
+	 (print run-id","testname"/"itempath)
+	 (execute destdb "DELETE FROM tests WHERE testname=? AND item_path=? AND run_id=?;"
+		  (or testname "")
+		  (or itempath "")
+		  (or run-id ""))
+	 (apply execute destdb (conc "INSERT INTO tests ("tests_fields") VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?);") remrow)))
+     ;; ("ready-row");"))))
+     rows)))
+(define (process-all)
+  (let* ((outdb (open-database "megatest.db"))
+	 (indbs (glob ".megatest/[0-9]*.db")))
+    (with-transaction
+     outdb
+     (lambda ()
+       (for-each
+	(lambda (dbfname)
+	  (import-one dbfname outdb))
+	indbs)))
+    (finalize! outdb)))
+(import mt-new-to-old)

ADDED   utils/
Index: utils/
--- /dev/null
+++ utils/
@@ -0,0 +1,8 @@
+cp .megatest/main.db megatest.db
+for db in $(ls .megatest/?.db); do
+  echo $db
+  sqlite3 $db "SELECT * FROM tests" | sqlite3 megatest.db ".import /dev/stdin tests"

ADDED   utils/
Index: utils/
--- /dev/null
+++ utils/
@@ -0,0 +1,41 @@
+if [ -d ".megatest" ]
+  echo ".megatest directory present."
+  echo "You have already migrated. "
+  exit
+mkdir -p .megatest
+cp megatest.db .megatest/main.db
+sqlite3 .megatest/main.db << END_SQL
+delete from tests; 
+delete from test_steps;
+version_id=$(sqlite3 .megatest/main.db "select id from metadat where var = 'MEGATEST_VERSION'")
+current_version=$(megatest -version)
+sqlite3 .megatest/main.db "replace into metadat (id,var,val) values($version_id,'MEGATEST_VERSION','$current_version')"
+sqlite3 megatest.db 'select id from runs' > runs.txt
+for run in $(cat runs.txt)
+   echo "working on run id $run"
+   dbnum=$(($run%100))
+   if [ ! -f ".megatest/$dbnum.db" ]
+   then
+     dbnum=$(($run%100))
+     cp megatest.db .megatest/$dbnum.db
+     sqlite3 .megatest/$dbnum.db  << END_SQL
+     delete from tests where run_id in (select id from runs where id%100!=$dbnum); 
+     delete from test_data; 
+     delete from test_meta; 
+     delete from test_rundat;
+     delete from test_steps where not exists ( select id from tests where = test_steps.test_id);
+     replace into metadat (id,var,val) values($version_id,'MEGATEST_VERSION','$current_version');
+     VACUUM;
+    fi

Index: utils/mt_ezstep
--- utils/mt_ezstep
+++ utils/mt_ezstep
@@ -31,11 +31,11 @@
 # Since the user may not have . on the path and since we are likely to want to 
 # run test scripts in the current directory add the current dir to the path
-export PATH=$PATH:$PWD
+export PATH="$PATH:$PWD"
 testrundir=$1; shift

Index: utils/mt_xterm
--- utils/mt_xterm
+++ utils/mt_xterm
@@ -16,14 +16,25 @@
 #     You should have received a copy of the GNU General Public License
 #     along with Megatest.  If not, see <>.
-if [ -e ];then
-  source
+grep -v "export USER=" | grep -v "export HOME=" > $tmpfile
+source $tmpfile
+rm $tmpfile
+# if [ -e ];then
+export USER=$USER
+export HOME=$MT_HOME
 if [ x"$MT_XTERM_CMD" == "x" ];then
   exec xterm "$@"
   exec $MT_XTERM_CMD

Index: utils/nbfake
--- utils/nbfake
+++ utils/nbfake
@@ -96,10 +96,10 @@
 if [[ -z "$MY_NBFAKE_HOST" ]]; then
   # Run locally
-  sh -c "cd $CURRWD;export DISPLAY=$DISPLAY; export PATH=$PATH; nohup $WASHCMD $* >> $MY_NBFAKE_LOG 2>&1 &"
+  sh -c "cd $CURRWD;export DISPLAY=$DISPLAY; export PATH=\"$PATH\"; nohup $WASHCMD $* >> $MY_NBFAKE_LOG 2>&1 &"
   # run remotely
-  ssh -X -n -f $MY_NBFAKE_HOST "sh -c \"cd $CURRWD;export DISPLAY=$DISPLAY; export PATH=$PATH; nohup $WASHCMD $* >> $MY_NBFAKE_LOG 2>&1 &\""
+  ssh -X -n -f $MY_NBFAKE_HOST "sh -c \"cd $CURRWD;export DISPLAY=$DISPLAY; export PATH=\"$PATH\"; nohup $WASHCMD $* >> $MY_NBFAKE_LOG 2>&1 &\""

ADDED   utils/
Index: utils/
--- /dev/null
+++ utils/
@@ -0,0 +1,3 @@
+echo "Database opens:  $(lsof -c mtest|egrep '.*db$'|wc -l)"
+echo "Logfile opens:   $(lsof -c mtest|egrep '.*log$'|wc -l)"
+echo "TCP connections: $(lsof -c mtest|grep TCP|wc -l)"

DELETED vg-test.scm
Index: vg-test.scm
--- vg-test.scm
+++ /dev/null
@@ -1,119 +0,0 @@
-;;  Copyright 2006-2017, Matthew Welland.
-;; This file is part of Megatest.
-;;     Megatest is free software: you can redistribute it and/or modify
-;;     it under the terms of the GNU General Public License as published by
-;;     the Free Software Foundation, either version 3 of the License, or
-;;     (at your option) any later version.
-;;     Megatest is distributed in the hope that it will be useful,
-;;     but WITHOUT ANY WARRANTY; without even the implied warranty of
-;;     GNU General Public License for more details.
-;;     You should have received a copy of the GNU General Public License
-;;     along with Megatest.  If not, see <>.
-(use canvas-draw iup foof-loop)
-(import canvas-draw-iup)
-(load "vg.scm")
-(define numtorun 1000)
-;; (if (> (length (argv)) 1)
-;; 		     (string->number (cadr (argv)))
-;; 		     1000))
- (use trace)
- ;; (trace 
- ;;  ;; vg:draw-rect
- ;;  ;; vg:grow-rect
- ;;  vg:get-extents-for-objs
- ;;  vg:components-get-extents
- ;;  vg:instances-get-extents
- ;;  vg:get-extents-for-two-rects
- ;;  canvas-line!)
-(define d1 (vg:drawing-new))
-(define l1 (vg:lib-new))
-(define c1 (vg:comp-new))
-(define c2 (vg:comp-new))
-(define bt1 (vg:make-rect-obj 10 40 20 50 text: "A long piece of text" font: "Helvetica, -10"))
-(let ((r1 (vg:make-rect-obj 20 20 30 30 text: "r1" font: "Helvetica, -20"))
-      (r2 (vg:make-rect-obj 30 30 60 60 text: "r2" font: "Helvetica, -10"))
-      (t1 (vg:make-text-obj 60 60 "The middle" font: "Helvetica, -10")))
-  (vg:add-objs-to-comp c1 r1 r2 t1 bt1))
-(loop ((for x (up-from 0 (to 20))))
-       (loop ((for y (up-from 0 (to 20))))
-	     (vg:add-objs-to-comp c1 (vg:make-rect-obj x y (+ x 5)(+ y 5)))))
-(let ((start (current-seconds)))
-  (let loop ((i 0))
-    (vg:add-obj-to-comp c1 (vg:make-rect-obj 0 0 100 100))
-    (if (< i numtorun)(loop (+ i 1))))
-  (print "Run time: " (- (current-seconds) start)))
-(vg:add-obj-to-comp c1 (vg:make-line-obj 0 0 100 100))
-;; add the c1 component to lib l1 with name firstcomp
-(vg:add-comp-to-lib l1 "firstcomp" c1)
-(vg:add-comp-to-lib l1 "secondcomp" c2)
-;; add the l1 lib to drawing with name firstlib
-(vg:add-lib d1 "firstlib" l1)
-;; instantiate firstlib/firstcomp as inst1 in drawing d1 at 0,0
-(vg:instantiate d1 "firstlib" "firstcomp" "inst1" 0 0)
-(vg:instantiate d1 "firstlib" "firstcomp" "inst2" 200 200)
-;; (vg:drawing-scalex-set! d1 1.1)
-;; (vg:drawing-scaley-set! d1 0.5)
-;; (define xtnts (vg:scale-offset-xy 
-;; 	       (vg:component-get-extents c1)
-;; 	       1.1 1.1 -2 -2))
-;; get extents of c1 and put a rectange around it
-(define xtnts (apply vg:grow-rect 10 10 (vg:components-get-extents d1 c1)))
-(vg:add-objs-to-comp c1 (apply vg:make-rect-obj xtnts))
-(define bt1xt (vg:obj-get-extents d1 bt1))
-(print "bt1xt: " bt1xt)
-(vg:add-objs-to-comp c1 (apply vg:make-rect-obj bt1xt))
-;; get extents of all objects and put rectangle around it
-(define big-xtnts (vg:instances-get-extents d1))
-(vg:add-objs-to-comp c2 (apply vg:make-rect-obj big-xtnts))
-(vg:instantiate d1 "firstlib" "secondcomp" "inst3" 0 0)
-(vg:drawing-scalex-set! d1 1.5)
-(vg:drawing-scaley-set! d1 1.5)
-(define cnv #f)
-(define the-cnv (canvas 
-		 #:size "500x400"
-		 #:expand "YES"
-		 #:scrollbar "YES"
-		 #:posx "0.5"
-		 #:posy "0.5"
-		 #:action (make-canvas-action
-			   (lambda (c xadj yadj)
-			     (set! cnv c)))))
- (dialog
-  (vbox
-   the-cnv)))
-(vg:drawing-cnv-set! d1 cnv)
-(vg:draw d1 #t)
-;; (canvas-rectangle! cnv  10 100 10 80)