Index: Makefile
==================================================================
--- Makefile
+++ Makefile
@@ -28,13 +28,19 @@
            ezsteps.scm lock-queue.scm rmt.scm api.scm		\
            subrun.scm portlogger.scm archive.scm env.scm		\
            diff-report.scm cgisetup/models/pgdb.scm
 
 # module source files
-MSRCFILES = dbfile.scm debugprint.scm mtargs.scm commonmod.scm dbmod.scm
+MSRCFILES = dbfile.scm debugprint.scm mtargs.scm commonmod.scm dbmod.scm \
+           dbmemmod.scm tcp-transportmod.scm
 
 all : $(PREFIX)/bin/.$(ARCHSTR) mtest dboard mtut tcmt
+
+transport-mode.scm : transport-mode.scm.template
+	cp transport-mode.scm.template transport-mode.scm
+
+megatest.scm : transport-mode.scm
 
 # dbmod.import.o is just a hack here
 mofiles/dbfile.o     : mofiles/debugprint.o mofiles/commonmod.o dbmod.import.o
 db.o : dbmod.import.o
 mofiles/debugprint.o : mofiles/mtargs.o

Index: client.scm
==================================================================
--- client.scm
+++ client.scm
@@ -104,11 +104,11 @@
 	       (debug:print-info 4 *default-log-port* "client:setup server-dat=" server-dat ", remaining-tries=" remaining-tries)
 	       (if (not runremote)
                    (begin
 		     ;; Here we are creating a runremote where there was none or it was clobbered with #f
 		     ;;
-		     (set! runremote (make-remote))
+		     (set! runremote (make-and-init-remote))
                      (let* ((server-info (server:check-if-running areapath)))
 		       (remote-server-info-set! runremote server-info)
                        (if server-info
                            (begin
                              (remote-server-url-set! runremote (server:record->url server-info))

Index: common.scm
==================================================================
--- common.scm
+++ common.scm
@@ -249,33 +249,10 @@
 (define (common:get-sync-lock-filepath)
   (let* ((tmp-area     (common:get-db-tmp-area))
          (lockfile     (conc tmp-area "/megatest.db.sync-lock")))
     lockfile))
 
-;;======================================================================
-;; when called from a wrapper I need sometimes to find the calling
-;; wrapper, this is for dashboard to find the correct megatest.
-;;
-(define (common:find-local-megatest #!optional (progname "megatest"))
-  (let ((res (filter file-exists?
-		     (map (lambda (updir)
-			    (let* ((lm  (car (argv)))
-				   (dir (pathname-directory lm))
-				   (exe (pathname-strip-directory lm)))
-			      (conc (if dir (conc dir "/") "")
-				    (case (string->symbol exe)
-				      ((dboard)    (conc updir progname))
-				      ((mtest)     (conc updir progname))
-				      ((dashboard) progname)
-				      (else exe)))))
-			  '("../../" "../")))))
-    (if (null? res)
-	(begin
-	  (debug:print 0 *default-log-port* "Failed to find this executable! Using what can be found on the path")
-	  progname)
-	(car res))))
-
 (define *common:logpro-exit-code->status-sym-alist*
   '( ( 0 . pass )
      ( 1 . fail )
      ( 2 . warn )
      ( 3 . check )
@@ -998,25 +975,19 @@
 	  #f)))
 
 (define (common:get-area-path-signature)
   (message-digest-string (md5-primitive) *toppath*))
 
-(define (common:get-signature str)
-  (message-digest-string (md5-primitive) str))
-
 ;;======================================================================
 ;; E X I T   H A N D L I N G
 ;;======================================================================
 
 (define (common:run-sync?)
   (and *toppath*               ;; gate if called before *toppath* is set
        (common:on-homehost?)
        (args:get-arg "-server")))
 
-(define (common:human-time)
-  (time->string (seconds->local-time (current-seconds)) "%Y-%m-%d %H:%M:%S"))
-
 
 (define (std-signal-handler signum)
   ;; (signal-mask! signum)
   (set! *time-to-exit* #t) 
   ;;(debug:print-info 13 *default-log-port* "got signal "signum)
@@ -2653,291 +2624,10 @@
     (cond
      (with-vars     (common:without-vars  fullcmd))
      (with-orig-env (common:with-orig-env fullcmd))
      (else          (common:without-vars  fullcmd "MT_.*")))))
 		  
-;;======================================================================
-;; T I M E   A N D   D A T E
-;;======================================================================
-
-;;======================================================================
-;; Convert strings like "5s 2h 3m" => 60x60x2 + 3x60 + 5
-(define (common:hms-string->seconds tstr)
-  (let ((parts     (string-split-fields "\\w+" tstr))
-	(time-secs 0)
-	;; s=seconds, m=minutes, h=hours, d=days, M=months, y=years, w=weeks
-	(trx       (regexp "(\\d+)([smhdMyw])")))
-    (for-each (lambda (part)
-		(let ((match  (string-match trx part)))
-		  (if match
-		      (let ((val (string->number (cadr match)))
-			    (unt (caddr match)))
-			(if val 
-			    (set! time-secs (+ time-secs (* val
-							    (case (string->symbol unt)
-							      ((s) 1)
-							      ((m) 60) ;; minutes
-							      ((h) 3600)
-							      ((d) 86400)
-							      ((w) 604800)
-							      ((M) 2628000) ;; aproximately one month
-							      ((y) 31536000)
-							      (else #f))))))))))
-	      parts)
-    time-secs))
-		       
-(define (seconds->hr-min-sec secs)
-  (let* ((hrs (quotient secs 3600))
-	 (min (quotient (- secs (* hrs 3600)) 60))
-	 (sec (- secs (* hrs 3600)(* min 60))))
-    (conc (if (> hrs 0)(conc hrs "hr ") "")
-	  (if (> min 0)(conc min "m ")  "")
-	  sec "s")))
-
-(define (seconds->time-string sec)
-  (time->string 
-   (seconds->local-time sec) "%H:%M:%S"))
-
-(define (seconds->work-week/day-time sec)
-  (time->string
-   (seconds->local-time sec) "ww%V.%u %H:%M"))
-
-(define (seconds->work-week/day sec)
-  (time->string
-   (seconds->local-time sec) "ww%V.%u"))
-
-(define (seconds->year-work-week/day sec)
-  (time->string
-   (seconds->local-time sec) "%yww%V.%w"))
-
-(define (seconds->year-work-week/day-time sec)
-  (time->string
-   (seconds->local-time sec) "%Yww%V.%w %H:%M"))
-
-(define (seconds->year-week/day-time sec)
-  (time->string
-   (seconds->local-time sec) "%Yw%V.%w %H:%M"))
-
-(define (seconds->quarter sec)
-  (case (string->number
-	 (time->string 
-	  (seconds->local-time sec)
-	  "%m"))
-    ((1 2 3) 1)
-    ((4 5 6) 2)
-    ((7 8 9) 3)
-    ((10 11 12) 4)
-    (else #f)))
-
-;;======================================================================
-;; basic ISO8601 format (e.g. "2017-02-28 06:02:54") date time => Unix epoch
-;;
-(define (common:date-time->seconds datetime)
-  (local-time->seconds (string->time datetime "%Y-%m-%d %H:%M:%S")))
-
-;;======================================================================
-;; given span of seconds tstart to tend
-;; find start time to mark and mark delta
-;;
-(define (common:find-start-mark-and-mark-delta tstart tend)
-  (let* ((deltat   (- (max tend (+ tend 10)) tstart)) ;; can't handle runs of less than 4 seconds. Pad it to 10 seconds ...
-	 (result   #f)
-	 (min      60)
-	 (hr       (* 60 60))
-	 (day      (* 24 hr))
-	 (yr       (* 365 day)) ;; year
-	 (mo       (/ yr 12))
-	 (wk       (* day 7)))
-    (for-each
-     (lambda (max-blks)
-       (for-each
-	(lambda (span) ;; 5 2 1
-	  (if (not result)
-	      (for-each 
-	       (lambda (timeunit timesym) ;; year month day hr min sec
-		 (if (not result)
-		     (let* ((time-blk (* span timeunit))
-			    (num-blks (quotient deltat time-blk)))
-		       (if (and (> num-blks 4)(< num-blks max-blks))
-			   (let ((first (* (quotient tstart time-blk) time-blk)))
-			     (set! result (list span timeunit time-blk first timesym))
-			     )))))
-	       (list yr mo wk day hr min 1)
-	       '(     y  mo w  d   h  m   s))))
-	(list 8 6 5 2 1)))
-     '(5 10 15 20 30 40 50 500))
-    (if values
-	(apply values result)
-	(values 0 day 1 0 'd))))
-
-;;======================================================================
-;; given x y lim return the cron expansion
-;;
-(define (common:expand-cron-slash x y lim)
-  (let loop ((curr x)
-	     (res  `()))
-    (if (< curr lim)
-	(loop (+ curr y) (cons curr res))
-	(reverse res))))
-
-;;======================================================================
-;; expand a complex cron string to a list of cron strings
-;;
-;;  x/y   => x, x+y, x+2y, x+3y while x+Ny<max_for_field
-;;  a,b,c => a, b ,c
-;;
-;;   NOTE: with flatten a lot of the crud below can be factored down.
-;;
-(define (common:cron-expand cron-str)
-  (if (list? cron-str)
-      (flatten
-       (fold (lambda (x res)
-	       (if (list? x)
-		   (let ((newres (map common:cron-expand x)))
-		     (append x newres))
-		   (cons x res)))
-	     '()
-	     cron-str)) ;; (map common:cron-expand cron-str))
-      (let ((cron-items (string-split cron-str))
-	    (slash-rx   (regexp "(\\d+)/(\\d+)"))
-	    (comma-rx   (regexp ".*,.*"))
-	    (max-vals   '((min        . 60)
-			  (hour       . 24)
-			  (dayofmonth . 28) ;;; BUG!!!! This will be a bug for some combinations
-			  (month      . 12)
-			  (dayofweek  . 7))))
-	(if (< (length cron-items) 5) ;; bad spec
-	    cron-str ;; `(,cron-str)              ;; just return the string, something downstream will fix it
-	    (let loop ((hed  (car cron-items))
-		       (tal  (cdr cron-items))
-		       (type 'min)
-		       (type-tal '(hour dayofmonth month dayofweek))
-		       (res  '()))
-	      (regex-case
-		  hed
-		(slash-rx ( _ base incr ) (let* ((basen          (string->number base))
-						 (incrn          (string->number incr))
-						 (expanded-vals  (common:expand-cron-slash basen incrn (alist-ref type max-vals)))
-						 (new-list-crons (fold (lambda (x myres)
-									 (cons (conc (if (null? res)
-											 ""
-											 (conc (string-intersperse res " ") " "))
-										     x " " (string-intersperse tal " "))
-									       myres))
-								       '() expanded-vals)))
-					    ;; (print "new-list-crons: " new-list-crons)
-					    ;; (fold (lambda (x res)
-					    ;; 	    (if (list? x)
-					    ;; 		(let ((newres (map common:cron-expand x)))
-					    ;; 		  (append x newres))
-					    ;; 		(cons x res)))
-					    ;; 	  '()
-					    (flatten (map common:cron-expand new-list-crons))))
-		;;					    (map common:cron-expand (map common:cron-expand new-list-crons))))
-		(else (if (null? tal)
-			  cron-str
-			  (loop (car tal)(cdr tal)(car type-tal)(cdr type-tal)(append res (list hed)))))))))))
-
-;;======================================================================
-;; given a cron string and the last time event was processed return #t to run or #f to not run
-;;
-;;  min    hour   dayofmonth month  dayofweek
-;; 0-59    0-23   1-31       1-12   0-6          ### NOTE: dayofweek does not include 7
-;;
-;;  #t => yes, run the job
-;;  #f => no, do not run the job
-;;
-(define (common:cron-event cron-str now-seconds-in last-done) ;; ref-seconds = #f is NOW.
-  (let* ((cron-items     (map string->number (string-split cron-str)))
-	 (now-seconds    (or now-seconds-in (current-seconds)))
-	 (now-time       (seconds->local-time now-seconds))
-	 (last-done-time (seconds->local-time last-done))
-	 (all-times      (make-hash-table)))
-    ;; (print "cron-items: " cron-items "(length cron-items): " (length cron-items))
-    (if (not (eq? (length cron-items) 5)) ;; don't even try to figure out junk strings
-	#f
-	(match-let (((     cmin chour cdayofmonth cmonth    cdayofweek)
-		     cron-items)
-		    ;; 0     1    2        3         4    5      6
-		    ((nsec nmin nhour ndayofmonth nmonth nyr ndayofweek n7 n8 n9)
-		     (vector->list now-time))
-		    ((lsec lmin lhour ldayofmonth lmonth lyr ldayofweek l7 l8 l9)
-		     (vector->list last-done-time)))
-	  ;; create all possible time slots
-	  ;; remove invalid slots due to (for example) day of week
-	  ;; get the start and end entries for the ref-seconds (current) time
-	  ;; if last-done > ref-seconds => this is an ERROR!
-	  ;; does the last-done time fall in the legit region?
-	  ;;    yes => #f  do not run again this command
-	  ;;    no  => #t  ok to run the command
-	  (for-each ;; month
-	   (lambda (month)
-	     (for-each ;; dayofmonth
-	      (lambda (dom)
-		(for-each
-		 (lambda (hr) ;; hour
-		   (for-each
-		    (lambda (minute) ;; minute
-		      (let ((copy-now (apply vector (vector->list now-time))))
-			(vector-set! copy-now 0 0) ;; force seconds to zero
-			(vector-set! copy-now 1 minute)
-			(vector-set! copy-now 2 hr)
-			(vector-set! copy-now 3 dom)  ;; dom is already corrected for zero referenced
-			(vector-set! copy-now 4 month)
-			(let* ((copy-now-secs (local-time->seconds copy-now))
-			       (new-copy      (seconds->local-time copy-now-secs))) ;; remake the time vector
-			  (if (or (not cdayofweek)
-				  (equal? (vector-ref new-copy 6)
-					  cdayofweek)) ;; if the day is specified and a match OR if the day is NOT specified
-			      (if (or (not cdayofmonth)
-				      (equal? (vector-ref new-copy 3)
-					      (+ 1 cdayofmonth))) ;; if the month is specified and a match OR if the month is NOT specified
-				  (hash-table-set! all-times copy-now-secs new-copy))))))
-		    (if cmin
-			`(,cmin)  ;; if given cmin, have to use it
-			(list (- nmin 1) nmin (+ nmin 1))))) ;; minute
-		 (if chour
-		     `(,chour)
-		     (list (- nhour 1) nhour (+ nhour 1))))) ;; hour
-	      (if cdayofmonth
-		  `(,cdayofmonth)
-		  (list (- ndayofmonth 1) ndayofmonth (+ ndayofmonth 1)))))
-	   (if cmonth
-	       `(,cmonth)
-	       (list (- nmonth 1) nmonth (+ nmonth 1))))
-	  (let ((before #f)
-		(is-in  #f))
-	    (for-each
-	     (lambda (moment)
-	       (if (and before
-			(<= before now-seconds)
-			(>= moment now-seconds))
-		   (begin
-		     ;; (print)
-		     ;; (print "Before: " (time->string (seconds->local-time before)))
-		     ;; (print "Now:    " (time->string (seconds->local-time now-seconds)))
-		     ;; (print "After:  " (time->string (seconds->local-time moment)))
-		     ;; (print "Last:   " (time->string (seconds->local-time last-done)))
-		     (if (<  last-done before)
-			 (set! is-in before))
-		     ))
-	       (set! before moment))
-	     (sort (hash-table-keys all-times) <))
-	    is-in)))))
-
-(define (common:extended-cron  cron-str now-seconds-in last-done)
-  (let ((expanded-cron (common:cron-expand cron-str)))
-    (if (string? expanded-cron)
-	(common:cron-event expanded-cron now-seconds-in last-done)
-	(let loop ((hed (car expanded-cron))
-		   (tal (cdr expanded-cron)))
-	  (if (common:cron-event hed now-seconds-in last-done)
-	      #t
-	      (if (null? tal)
-		  #f
-		  (loop (car tal)(cdr tal))))))))
-
 ;;======================================================================
 ;; C O L O R S
 ;;======================================================================
       
 (define (common:name->iup-color name)

Index: commonmod.scm
==================================================================
--- commonmod.scm
+++ commonmod.scm
@@ -17,21 +17,37 @@
 ;;     along with Megatest.  If not, see <http://www.gnu.org/licenses/>.
 
 ;;======================================================================
 
 (declare (unit commonmod))
+;; (declare (uses debugprint))
 
 (use srfi-69)
 
 (module commonmod
 	*
 
-(import scheme chicken data-structures extras files)
-(import (prefix sqlite3 sqlite3:)
-	posix typed-records srfi-18 srfi-69
-	md5 message-digest
-	regex srfi-1)
+(import scheme
+	chicken
+
+	(prefix sqlite3 sqlite3:)
+	data-structures
+	extras
+	files
+	matchable
+	md5
+	message-digest
+	posix
+	regex
+	regex-case
+	srfi-1
+	srfi-18
+	srfi-69
+	typed-records
+
+	;; debugprint
+	)
 
 ;;======================================================================
 ;; CONTENTS
 ;;
 ;;  config file utils
@@ -208,18 +224,344 @@
   (let ((adat (get-section cfgdat "areas")))
     (map (lambda (entry)
 	   `(,(car entry) . 
 	     ,(val->alist (cadr entry))))
 	 adat)))
+
+;;======================================================================
+;; time utils
+;;======================================================================
+
+(define (common:human-time)
+  (time->string (seconds->local-time (current-seconds)) "%Y-%m-%d %H:%M:%S"))
+
+;;======================================================================
+;; T I M E   A N D   D A T E
+;;======================================================================
+
+;;======================================================================
+;; Convert strings like "5s 2h 3m" => 60x60x2 + 3x60 + 5
+(define (common:hms-string->seconds tstr)
+  (let ((parts     (string-split-fields "\\w+" tstr))
+	(time-secs 0)
+	;; s=seconds, m=minutes, h=hours, d=days, M=months, y=years, w=weeks
+	(trx       (regexp "(\\d+)([smhdMyw])")))
+    (for-each (lambda (part)
+		(let ((match  (string-match trx part)))
+		  (if match
+		      (let ((val (string->number (cadr match)))
+			    (unt (caddr match)))
+			(if val 
+			    (set! time-secs (+ time-secs (* val
+							    (case (string->symbol unt)
+							      ((s) 1)
+							      ((m) 60) ;; minutes
+							      ((h) 3600)
+							      ((d) 86400)
+							      ((w) 604800)
+							      ((M) 2628000) ;; aproximately one month
+							      ((y) 31536000)
+							      (else #f))))))))))
+	      parts)
+    time-secs))
+		       
+(define (seconds->hr-min-sec secs)
+  (let* ((hrs (quotient secs 3600))
+	 (min (quotient (- secs (* hrs 3600)) 60))
+	 (sec (- secs (* hrs 3600)(* min 60))))
+    (conc (if (> hrs 0)(conc hrs "hr ") "")
+	  (if (> min 0)(conc min "m ")  "")
+	  sec "s")))
+
+(define (seconds->time-string sec)
+  (time->string 
+   (seconds->local-time sec) "%H:%M:%S"))
+
+(define (seconds->work-week/day-time sec)
+  (time->string
+   (seconds->local-time sec) "ww%V.%u %H:%M"))
+
+(define (seconds->work-week/day sec)
+  (time->string
+   (seconds->local-time sec) "ww%V.%u"))
+
+(define (seconds->year-work-week/day sec)
+  (time->string
+   (seconds->local-time sec) "%yww%V.%w"))
+
+(define (seconds->year-work-week/day-time sec)
+  (time->string
+   (seconds->local-time sec) "%Yww%V.%w %H:%M"))
+
+(define (seconds->year-week/day-time sec)
+  (time->string
+   (seconds->local-time sec) "%Yw%V.%w %H:%M"))
+
+(define (seconds->quarter sec)
+  (case (string->number
+	 (time->string 
+	  (seconds->local-time sec)
+	  "%m"))
+    ((1 2 3) 1)
+    ((4 5 6) 2)
+    ((7 8 9) 3)
+    ((10 11 12) 4)
+    (else #f)))
+
+;;======================================================================
+;; basic ISO8601 format (e.g. "2017-02-28 06:02:54") date time => Unix epoch
+;;
+(define (common:date-time->seconds datetime)
+  (local-time->seconds (string->time datetime "%Y-%m-%d %H:%M:%S")))
+
+;;======================================================================
+;; given span of seconds tstart to tend
+;; find start time to mark and mark delta
+;;
+(define (common:find-start-mark-and-mark-delta tstart tend)
+  (let* ((deltat   (- (max tend (+ tend 10)) tstart)) ;; can't handle runs of less than 4 seconds. Pad it to 10 seconds ...
+	 (result   #f)
+	 (min      60)
+	 (hr       (* 60 60))
+	 (day      (* 24 hr))
+	 (yr       (* 365 day)) ;; year
+	 (mo       (/ yr 12))
+	 (wk       (* day 7)))
+    (for-each
+     (lambda (max-blks)
+       (for-each
+	(lambda (span) ;; 5 2 1
+	  (if (not result)
+	      (for-each 
+	       (lambda (timeunit timesym) ;; year month day hr min sec
+		 (if (not result)
+		     (let* ((time-blk (* span timeunit))
+			    (num-blks (quotient deltat time-blk)))
+		       (if (and (> num-blks 4)(< num-blks max-blks))
+			   (let ((first (* (quotient tstart time-blk) time-blk)))
+			     (set! result (list span timeunit time-blk first timesym))
+			     )))))
+	       (list yr mo wk day hr min 1)
+	       '(     y  mo w  d   h  m   s))))
+	(list 8 6 5 2 1)))
+     '(5 10 15 20 30 40 50 500))
+    (if values
+	(apply values result)
+	(values 0 day 1 0 'd))))
+
+;;======================================================================
+;; given x y lim return the cron expansion
+;;
+(define (common:expand-cron-slash x y lim)
+  (let loop ((curr x)
+	     (res  `()))
+    (if (< curr lim)
+	(loop (+ curr y) (cons curr res))
+	(reverse res))))
+
+;;======================================================================
+;; expand a complex cron string to a list of cron strings
+;;
+;;  x/y   => x, x+y, x+2y, x+3y while x+Ny<max_for_field
+;;  a,b,c => a, b ,c
+;;
+;;   NOTE: with flatten a lot of the crud below can be factored down.
+;;
+(define (common:cron-expand cron-str)
+  (if (list? cron-str)
+      (flatten
+       (fold (lambda (x res)
+	       (if (list? x)
+		   (let ((newres (map common:cron-expand x)))
+		     (append x newres))
+		   (cons x res)))
+	     '()
+	     cron-str)) ;; (map common:cron-expand cron-str))
+      (let ((cron-items (string-split cron-str))
+	    (slash-rx   (regexp "(\\d+)/(\\d+)"))
+	    (comma-rx   (regexp ".*,.*"))
+	    (max-vals   '((min        . 60)
+			  (hour       . 24)
+			  (dayofmonth . 28) ;;; BUG!!!! This will be a bug for some combinations
+			  (month      . 12)
+			  (dayofweek  . 7))))
+	(if (< (length cron-items) 5) ;; bad spec
+	    cron-str ;; `(,cron-str)              ;; just return the string, something downstream will fix it
+	    (let loop ((hed  (car cron-items))
+		       (tal  (cdr cron-items))
+		       (type 'min)
+		       (type-tal '(hour dayofmonth month dayofweek))
+		       (res  '()))
+	      (regex-case
+		  hed
+		(slash-rx ( _ base incr ) (let* ((basen          (string->number base))
+						 (incrn          (string->number incr))
+						 (expanded-vals  (common:expand-cron-slash basen incrn (alist-ref type max-vals)))
+						 (new-list-crons (fold (lambda (x myres)
+									 (cons (conc (if (null? res)
+											 ""
+											 (conc (string-intersperse res " ") " "))
+										     x " " (string-intersperse tal " "))
+									       myres))
+								       '() expanded-vals)))
+					    ;; (print "new-list-crons: " new-list-crons)
+					    ;; (fold (lambda (x res)
+					    ;; 	    (if (list? x)
+					    ;; 		(let ((newres (map common:cron-expand x)))
+					    ;; 		  (append x newres))
+					    ;; 		(cons x res)))
+					    ;; 	  '()
+					    (flatten (map common:cron-expand new-list-crons))))
+		;;					    (map common:cron-expand (map common:cron-expand new-list-crons))))
+		(else (if (null? tal)
+			  cron-str
+			  (loop (car tal)(cdr tal)(car type-tal)(cdr type-tal)(append res (list hed)))))))))))
+
+;;======================================================================
+;; given a cron string and the last time event was processed return #t to run or #f to not run
+;;
+;;  min    hour   dayofmonth month  dayofweek
+;; 0-59    0-23   1-31       1-12   0-6          ### NOTE: dayofweek does not include 7
+;;
+;;  #t => yes, run the job
+;;  #f => no, do not run the job
+;;
+(define (common:cron-event cron-str now-seconds-in last-done) ;; ref-seconds = #f is NOW.
+  (let* ((cron-items     (map string->number (string-split cron-str)))
+	 (now-seconds    (or now-seconds-in (current-seconds)))
+	 (now-time       (seconds->local-time now-seconds))
+	 (last-done-time (seconds->local-time last-done))
+	 (all-times      (make-hash-table)))
+    ;; (print "cron-items: " cron-items "(length cron-items): " (length cron-items))
+    (if (not (eq? (length cron-items) 5)) ;; don't even try to figure out junk strings
+	#f
+	(match-let (((     cmin chour cdayofmonth cmonth    cdayofweek)
+		     cron-items)
+		    ;; 0     1    2        3         4    5      6
+		    ((nsec nmin nhour ndayofmonth nmonth nyr ndayofweek n7 n8 n9)
+		     (vector->list now-time))
+		    ((lsec lmin lhour ldayofmonth lmonth lyr ldayofweek l7 l8 l9)
+		     (vector->list last-done-time)))
+	  ;; create all possible time slots
+	  ;; remove invalid slots due to (for example) day of week
+	  ;; get the start and end entries for the ref-seconds (current) time
+	  ;; if last-done > ref-seconds => this is an ERROR!
+	  ;; does the last-done time fall in the legit region?
+	  ;;    yes => #f  do not run again this command
+	  ;;    no  => #t  ok to run the command
+	  (for-each ;; month
+	   (lambda (month)
+	     (for-each ;; dayofmonth
+	      (lambda (dom)
+		(for-each
+		 (lambda (hr) ;; hour
+		   (for-each
+		    (lambda (minute) ;; minute
+		      (let ((copy-now (apply vector (vector->list now-time))))
+			(vector-set! copy-now 0 0) ;; force seconds to zero
+			(vector-set! copy-now 1 minute)
+			(vector-set! copy-now 2 hr)
+			(vector-set! copy-now 3 dom)  ;; dom is already corrected for zero referenced
+			(vector-set! copy-now 4 month)
+			(let* ((copy-now-secs (local-time->seconds copy-now))
+			       (new-copy      (seconds->local-time copy-now-secs))) ;; remake the time vector
+			  (if (or (not cdayofweek)
+				  (equal? (vector-ref new-copy 6)
+					  cdayofweek)) ;; if the day is specified and a match OR if the day is NOT specified
+			      (if (or (not cdayofmonth)
+				      (equal? (vector-ref new-copy 3)
+					      (+ 1 cdayofmonth))) ;; if the month is specified and a match OR if the month is NOT specified
+				  (hash-table-set! all-times copy-now-secs new-copy))))))
+		    (if cmin
+			`(,cmin)  ;; if given cmin, have to use it
+			(list (- nmin 1) nmin (+ nmin 1))))) ;; minute
+		 (if chour
+		     `(,chour)
+		     (list (- nhour 1) nhour (+ nhour 1))))) ;; hour
+	      (if cdayofmonth
+		  `(,cdayofmonth)
+		  (list (- ndayofmonth 1) ndayofmonth (+ ndayofmonth 1)))))
+	   (if cmonth
+	       `(,cmonth)
+	       (list (- nmonth 1) nmonth (+ nmonth 1))))
+	  (let ((before #f)
+		(is-in  #f))
+	    (for-each
+	     (lambda (moment)
+	       (if (and before
+			(<= before now-seconds)
+			(>= moment now-seconds))
+		   (begin
+		     ;; (print)
+		     ;; (print "Before: " (time->string (seconds->local-time before)))
+		     ;; (print "Now:    " (time->string (seconds->local-time now-seconds)))
+		     ;; (print "After:  " (time->string (seconds->local-time moment)))
+		     ;; (print "Last:   " (time->string (seconds->local-time last-done)))
+		     (if (<  last-done before)
+			 (set! is-in before))
+		     ))
+	       (set! before moment))
+	     (sort (hash-table-keys all-times) <))
+	    is-in)))))
+
+(define (common:extended-cron  cron-str now-seconds-in last-done)
+  (let ((expanded-cron (common:cron-expand cron-str)))
+    (if (string? expanded-cron)
+	(common:cron-event expanded-cron now-seconds-in last-done)
+	(let loop ((hed (car expanded-cron))
+		   (tal (cdr expanded-cron)))
+	  (if (common:cron-event hed now-seconds-in last-done)
+	      #t
+	      (if (null? tal)
+		  #f
+		  (loop (car tal)(cdr tal))))))))
+
+
 
 ;;======================================================================
 ;; misc stuff
 ;;======================================================================
 
-;; (define (debug:print . params) #f)
-;; (define (debug:print-info . params) #f)
-;; 
-;; (define (set-functions dbgp dbgpinfo)
-;;   (set! debug:print dbgp)
-;;   (set! debug:print-info dbgpinfo))
+(define (common:get-signature str)
+  (message-digest-string (md5-primitive) str))
+
+;;======================================================================
+;; hash of hashs
+;;======================================================================
+
+(define (db:hoh-set! dat key1 key2 val)
+  (let* ((subhash (hash-table-ref/default dat key1 #f)))
+    (if subhash
+	(hash-table-set! subhash key2 val)
+	(begin
+	  (hash-table-set! dat key1 (make-hash-table))
+	  (db:hoh-set! dat key1 key2 val)))))
+
+(define (db:hoh-get dat key1 key2)
+  (let* ((subhash (hash-table-ref/default dat key1 #f)))
+    (and subhash
+	 (hash-table-ref/default subhash key2 #f))))
+
+;;======================================================================
+;; when called from a wrapper I need sometimes to find the calling
+;; wrapper, this is for dashboard to find the correct megatest.
+;;
+(define (common:find-local-megatest #!optional (progname "megatest"))
+  (let ((res (filter file-exists?
+		     (map (lambda (updir)
+			    (let* ((lm  (car (argv)))
+				   (dir (pathname-directory lm))
+				   (exe (pathname-strip-directory lm)))
+			      (conc (if dir (conc dir "/") "")
+				    (case (string->symbol exe)
+				      ((dboard)    (conc updir progname))
+				      ((mtest)     (conc updir progname))
+				      ((dashboard) progname)
+				      (else exe)))))
+			  '("../../" "../")))))
+    (if (null? res)
+	(begin
+	  ;; (debug:print 0 *default-log-port* "Failed to find this executable! Using what can be found on the path")
+	  progname)
+	(car res))))
 
 )

Index: db.scm
==================================================================
--- db.scm
+++ db.scm
@@ -22,10 +22,20 @@
 ;; Database access
 ;;======================================================================
 
 ;; dbstruct vector containing all the relevant dbs like main.db, megatest.db, run.db etc
 
+(declare (unit db))
+(declare (uses common))
+(declare (uses debugprint))
+(declare (uses dbmod))
+(declare (uses dbfile))
+(declare (uses keys))
+(declare (uses ods))
+(declare (uses client))
+(declare (uses mt))
+
 (use (srfi 18)
      extras
      tcp
      stack
      (prefix sqlite3 sqlite3:)
@@ -44,28 +54,19 @@
      z3
      typed-records
      matchable
      files)
 
-(declare (unit db))
-(declare (uses common))
-(declare (uses dbmod))
-;; (declare (uses debugprint))
-(declare (uses dbfile))
-(declare (uses keys))
-(declare (uses ods))
-(declare (uses client))
-(declare (uses mt))
-
 (include "common_records.scm")
 (include "db_records.scm")
 (include "key_records.scm")
 (include "run_records.scm")
 
 (define *number-of-writes* 0)
 (define *number-non-write-queries* 0)
 
+(import debugprint)
 (import dbmod)
 (import dbfile)
 
 ;; record for keeping state,status and count for doing roll-ups in
 ;; iterated tests
@@ -124,10 +125,88 @@
   (assert *toppath* "FATAL: db:setup called before launch:setup has been run.")
   (let* ((tmpdir (common:get-db-tmp-area)))
     (if (not *dbstruct-dbs*)
 	(dbfile:setup do-sync *toppath* tmpdir)
 	*dbstruct-dbs*)))
+
+(define (db:with-db dbstruct run-id r/w proc . params)
+  (case (rmt:transport-mode)
+    ((http)(dbfile:with-db dbstruct run-id r/w proc params))
+    ((tcp) (dbmod:with-db dbstruct run-id r/w proc params))))
+
+;; moved from dbfile
+;;
+;; ADD run-id SUPPORT
+;;
+(define (db:create-all-triggers dbstruct)
+  (db:with-db
+   dbstruct #f #f
+   (lambda (dbdat db)
+     (db:create-triggers db))))
+
+(define (db:create-triggers db)
+    (for-each (lambda (key)
+              (sqlite3:execute db (cadr key)))
+          db:trigger-list))
+
+(define (db:drop-all-triggers dbstruct)
+  (db:with-db
+   dbstruct #f #f
+   (lambda (dbdat db)
+     (db:drop-triggers db))))
+
+(define (db:have-incompletes? dbstruct run-id ovr-deadtime)
+  (let* ((incompleted '())
+	 (oldlaunched '())
+	 (toplevels   '())
+	 ;; (deadtime-str (configf:lookup *configdat* "setup" "deadtime"))
+	 (deadtime  (or ovr-deadtime 72000))) ;; twenty hours
+    (db:with-db
+     dbstruct run-id #f
+     (lambda (dbdat db)
+       
+       ;; in RUNNING or REMOTEHOSTSTART for more than 10 minutes
+       ;;
+       ;; HOWEVER: this code in run:test seems to work fine
+       ;;              (> (- (current-seconds)(+ (db:test-get-event_time testdat)
+       ;;                     (db:test-get-run_duration testdat)))
+       ;;                    600) 
+       ;; (db:delay-if-busy dbdat)
+       (sqlite3:for-each-row 
+        (lambda (test-id run-dir uname testname item-path)
+          (if (and (equal? uname "n/a")
+                   (equal? item-path "")) ;; this is a toplevel test
+              ;; what to do with toplevel? call rollup?
+              (begin
+                (set! toplevels   (cons (list test-id run-dir uname testname item-path run-id) toplevels)))
+                ;; (print-info "Found old toplevel test in RUNNING state, test-id=" test-id))
+              (set! incompleted (cons (list test-id run-dir uname testname item-path run-id) incompleted))))
+        (db:get-cache-stmth dbdat db
+        "SELECT id,rundir,uname,testname,item_path FROM tests WHERE run_id=? AND (strftime('%s','now') - event_time) > (run_duration + ?) AND state IN ('RUNNING','REMOTEHOSTSTART');")
+        run-id deadtime)
+
+       ;; in LAUNCHED for more than one day. Could be long due to job queues TODO/BUG: Need override for this in config
+       ;;
+       ;; (db:delay-if-busy dbdat)
+       (sqlite3:for-each-row
+        (lambda (test-id run-dir uname testname item-path)
+          (if (and (equal? uname "n/a")
+                   (equal? item-path "")) ;; this is a toplevel test
+              ;; what to do with toplevel? call rollup?
+              (set! toplevels   (cons (list test-id run-dir uname testname item-path run-id) toplevels))
+              (set! oldlaunched (cons (list test-id run-dir uname testname item-path run-id) oldlaunched))))
+        (db:get-cache-stmth dbdat db
+        "SELECT id,rundir,uname,testname,item_path FROM tests WHERE run_id=? AND (strftime('%s','now') - event_time) > 86400 AND state IN ('LAUNCHED');")
+        run-id)
+       
+       ;; (print-info "Found " (length oldlaunched) " old LAUNCHED items, " (length toplevels) " old LAUNCHED toplevel tests and " (length incompleted) " tests marked RUNNING but apparently dead.")
+       (if (and (null? incompleted)
+                (null? oldlaunched)
+                (null? toplevels))
+           #f
+           #t)))))
+
 
 ;; looks up subdb and returns it, if not found then set up
 ;; and then return it.
 ;;
 #;(define (db:get-db dbstruct run-id)
@@ -583,12 +662,11 @@
 (define (db:tmp->megatest.db-sync dbstruct run-id last-update)
   (let* ((subdbs (hash-table-values (dbr:dbstruct-subdbs dbstruct)))
 	 (res    '()))
     (for-each
      (lambda (subdb)
-       (let* ((dbname (db:run-id->dbname run-id))
-	      (mtdb   (dbr:subdb-mtdb subdb))
+       (let* ((mtdb   (dbr:subdb-mtdb subdb))
 	      (tmpdb  (db:get-subdb dbstruct run-id))
 	      (refndb (dbr:subdb-refndb subdb))
 	      (newres (db:sync-tables (db:sync-all-tables-list dbstruct (db:get-keys dbstruct)) last-update tmpdb refndb mtdb)))
 	 ;; (stack-push! (dbr:subdb-dbstack subdb) tmpdb)
 	 ;; BUG: verify this is really needed

Index: dbfile.scm
==================================================================
--- dbfile.scm
+++ dbfile.scm
@@ -41,11 +41,12 @@
 	commonmod
 	;; debugprint
 	)
 
 (define keep-age-param (make-parameter 10)) ;; qif file age, if over move to attic
-(define num-run-dbs (make-parameter 10))     ;; number of db's in .megatest
+(define num-run-dbs    (make-parameter 10))     ;; number of db's in .megatest
+(define dbfile:testsuite-name (make-parameter #f))
 
 ;;======================================================================
 ;;  R E C O R D S
 ;;======================================================================
 
@@ -56,10 +57,17 @@
   (areapath  #f)
   (homehost  #f)
   (tmppath   #f)
   (read-only #f)
   (subdbs (make-hash-table))
+  ;;
+  ;; for the inmem approach (see dbmod.scm)
+  ;; this is one db per server
+  (inmem     #f)  ;; handle for the in memory copy
+  (dbfile    #f)  ;; path to the db file on disk
+  (ondiskdb  #f)  ;; handle for the on-disk file
+  (dbdat     #f)  ;; create a dbdat for the downstream calls such as db:with-db
   )
 
 ;; NOTE: Need one dbr:subdb per main.db, 1.db ...
 ;;
 (defstruct dbr:subdb
@@ -945,30 +953,10 @@
                              FOR EACH ROW
                                BEGIN 
                                  UPDATE test_data SET last_update=(strftime('%s','now'))
                                    WHERE id=old.id;
                                END;" )))
-;;
-;; ADD run-id SUPPORT
-;;
-(define (db:create-all-triggers dbstruct)
-  (db:with-db
-   dbstruct #f #f
-   (lambda (dbdat db)
-     (db:create-triggers db))))
-
-(define (db:create-triggers db)
-    (for-each (lambda (key)
-              (sqlite3:execute db (cadr key)))
-          db:trigger-list))
-
-(define (db:drop-all-triggers dbstruct)
-  (db:with-db
-   dbstruct #f #f
-   (lambda (dbdat db)
-     (db:drop-triggers db))))
-
 (define (db:is-trigger-dropped db tbl-name)
   (let* ((trigger-name (if (equal? tbl-name "test_steps")
 			   "update_teststeps_trigger" 
                            (conc "update_" tbl-name "_trigger")))
 	 (res          #f))
@@ -1012,10 +1000,14 @@
 ;; call with dbinit=db:initialize-main-db
 ;;
 (define (db:open-db dbstruct run-id dbinit)
   ;; (mutex-lock! *db-open-mutex*)
   (let* ((dbdat (dbfile:open-db dbstruct run-id dbinit)))
+              #;(case (rmt:transport-mode)
+		  ((http) (dbfile:open-db dbstruct run-id dbinit))
+		  ((tcp)  (dbmod:open-db  dbstruct run-id dbinit))
+		  (else (assert #f "FATAL: rmt:transport-node not correct value"(rmt:transport-mode))))
     (set! *db-write-access* (not (dbr:dbdat-read-only dbdat)))
     ;; (mutex-unlock! *db-open-mutex*)
     dbdat))
 
 (define dbfile:db-init-proc (make-parameter #f))
@@ -1078,11 +1070,11 @@
 (define no-condition-db-with-db (make-parameter #t))
 
 ;; (db:with-db dbstruct run-id sqlite3:exec "select blah fgrom blaz;")
 ;; r/w is a flag to indicate if the db is modified by this query #t = yes, #f = no
 ;;
-(define (db:with-db dbstruct run-id r/w proc . params)
+(define (dbfile:with-db dbstruct run-id r/w proc params)
   (assert dbstruct "FATAL: db:with-db called with dbstruct "#f)
   (assert (dbr:dbstruct? dbstruct) "FATAL: dbstruct is "dbstruct)
   (let* ((use-mutex (> *api-process-request-count* 25)) ;; risk of db corruption
 	 (have-struct (dbr:dbstruct? dbstruct))
          (dbdat     (if have-struct                ;; this stuff just allows us to call with a db handle directly
@@ -1268,58 +1260,8 @@
 	(let* ((newstmth (sqlite3:prepare db stmt)))
 	  ;; (db:hoh-set! stmt-cache db stmt newstmth)
 	  (hash-table-set! stmt-cache stmt newstmth)
 	  newstmth))))
 
-(define (db:have-incompletes? dbstruct run-id ovr-deadtime)
-  (let* ((incompleted '())
-	 (oldlaunched '())
-	 (toplevels   '())
-	 ;; (deadtime-str (configf:lookup *configdat* "setup" "deadtime"))
-	 (deadtime  (or ovr-deadtime 72000))) ;; twenty hours
-    (db:with-db
-     dbstruct run-id #f
-     (lambda (dbdat db)
-       
-       ;; in RUNNING or REMOTEHOSTSTART for more than 10 minutes
-       ;;
-       ;; HOWEVER: this code in run:test seems to work fine
-       ;;              (> (- (current-seconds)(+ (db:test-get-event_time testdat)
-       ;;                     (db:test-get-run_duration testdat)))
-       ;;                    600) 
-       ;; (db:delay-if-busy dbdat)
-       (sqlite3:for-each-row 
-        (lambda (test-id run-dir uname testname item-path)
-          (if (and (equal? uname "n/a")
-                   (equal? item-path "")) ;; this is a toplevel test
-              ;; what to do with toplevel? call rollup?
-              (begin
-                (set! toplevels   (cons (list test-id run-dir uname testname item-path run-id) toplevels)))
-                ;; (print-info "Found old toplevel test in RUNNING state, test-id=" test-id))
-              (set! incompleted (cons (list test-id run-dir uname testname item-path run-id) incompleted))))
-        (db:get-cache-stmth dbdat db
-        "SELECT id,rundir,uname,testname,item_path FROM tests WHERE run_id=? AND (strftime('%s','now') - event_time) > (run_duration + ?) AND state IN ('RUNNING','REMOTEHOSTSTART');")
-        run-id deadtime)
-
-       ;; in LAUNCHED for more than one day. Could be long due to job queues TODO/BUG: Need override for this in config
-       ;;
-       ;; (db:delay-if-busy dbdat)
-       (sqlite3:for-each-row
-        (lambda (test-id run-dir uname testname item-path)
-          (if (and (equal? uname "n/a")
-                   (equal? item-path "")) ;; this is a toplevel test
-              ;; what to do with toplevel? call rollup?
-              (set! toplevels   (cons (list test-id run-dir uname testname item-path run-id) toplevels))
-              (set! oldlaunched (cons (list test-id run-dir uname testname item-path run-id) oldlaunched))))
-        (db:get-cache-stmth dbdat db
-        "SELECT id,rundir,uname,testname,item_path FROM tests WHERE run_id=? AND (strftime('%s','now') - event_time) > 86400 AND state IN ('LAUNCHED');")
-        run-id)
-       
-       ;; (print-info "Found " (length oldlaunched) " old LAUNCHED items, " (length toplevels) " old LAUNCHED toplevel tests and " (length incompleted) " tests marked RUNNING but apparently dead.")
-       (if (and (null? incompleted)
-                (null? oldlaunched)
-                (null? toplevels))
-           #f
-           #t)))))
 
 
 )

ADDED   dbmemmod.scm
Index: dbmemmod.scm
==================================================================
--- /dev/null
+++ dbmemmod.scm
@@ -0,0 +1,1322 @@
+;;======================================================================
+;; Copyright 2017, Matthew Welland.
+;; 
+;; This file is part of Megatest.
+;; 
+;;     Megatest is free software: you can redistribute it and/or modify
+;;     it under the terms of the GNU General Public License as published by
+;;     the Free Software Foundation, either version 3 of the License, or
+;;     (at your option) any later version.
+;; 
+;;     Megatest is distributed in the hope that it will be useful,
+;;     but WITHOUT ANY WARRANTY; without even the implied warranty of
+;;     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;;     GNU General Public License for more details.
+;; 
+;;     You should have received a copy of the GNU General Public License
+;;     along with Megatest.  If not, see <http://www.gnu.org/licenses/>.
+
+;;======================================================================
+
+(declare (unit dbmemmod))
+(declare (uses debugprint))
+(declare (uses commonmod))
+
+(module dbmemmod
+	*
+	
+  (import scheme
+	  chicken
+	  data-structures
+	  extras
+	  matchable)
+  
+(import (prefix sqlite3 sqlite3:)
+	posix typed-records srfi-18 srfi-1
+	srfi-69
+	stack
+	files
+	ports
+
+	debugprint
+	commonmod
+	)
+
+(define keep-age-param (make-parameter 10)) ;; qif file age, if over move to attic
+(define num-run-dbs (make-parameter 10))     ;; number of db's in .megatest
+
+;;======================================================================
+;;  R E C O R D S
+;;======================================================================
+
+;; a single Megatest area with it's multiple dbs is
+;; managed in a dbstruct
+;;
+(defstruct dbr:dbstruct
+  (areapath  #f)
+  (homehost  #f)
+  (tmppath   #f)
+  (read-only #f)
+  (subdbs (make-hash-table))
+  )
+
+;; NOTE: Need one dbr:subdb per main.db, 1.db ...
+;;
+(defstruct dbr:subdb
+  (dbname      #f) ;; .megatest/1.db
+  (mtdbfile    #f) ;; mtrah/.megatest/1.db
+  (mtdbdat     #f) ;; only need one of these for syncing
+  ;; (dbdats      (make-hash-table))  ;; id => dbdat 
+  (tmpdbfile   #f) ;; /tmp/.../.megatest/1.db
+  ;; (refndbfile  #f) ;; /tmp/.../.megatest/1.db_ref
+  (dbstack     (make-stack)) ;; stack for tmp dbr:dbdat,
+  (homehost    #f) ;; not used yet
+  (on-homehost #f) ;; not used yet
+  (read-only   #f)
+  (last-sync   0)
+  (last-write  (current-seconds))
+  )                ;; goal is to converge on one struct for an area but for now it is too confusing
+
+;; need to keep dbhandles and cached statements together
+(defstruct dbr:dbdat
+  (dbfile      #f)
+  (dbh         #f)    
+  (stmt-cache  (make-hash-table))
+  (read-only   #f)
+  (birth-sec   (current-seconds)))
+
+(define *dbstruct-dbs* #f)
+(define *db-open-mutex* (make-mutex))
+(define *db-access-mutex* (make-mutex)) ;; used in common.scm
+(define *no-sync-db*   #f)
+(define *db-sync-in-progress* #f)
+(define *db-with-db-mutex*    (make-mutex))
+(define *max-api-process-requests* 0)
+(define *api-process-request-count* 0)
+(define *db-write-access*     #t)
+(define *db-last-sync*        0)                 ;; last time the sync to megatest.db happened
+(define *db-multi-sync-mutex* (make-mutex))      ;; protect access to *db-sync-in-progress*, *db-last-sync*
+
+(define (db:generic-error-printout exn . message)
+  (print-call-chain (current-error-port))
+  (apply dbfile:print-err message)
+  (dbfile:print-err
+    ", error: "     ((condition-property-accessor 'exn 'message)   exn)
+    ", arguments: " ((condition-property-accessor 'exn 'arguments) exn)
+    ", location: "  ((condition-property-accessor 'exn 'location)  exn)
+    ))
+
+(define (dbfile:run-id->key run-id)
+  (or run-id 'main))
+
+(define (db:safely-close-sqlite3-db db stmt-cache #!key (try-num 3))
+  (if (<= try-num 0)
+      #f
+      (handle-exceptions
+	  exn
+	(begin
+	  (print "Attempt to safely close sqlite3 db failed. Trying again. exn=" exn)
+	  (thread-sleep! 3)
+	  (sqlite3:interrupt! db)
+	  (db:safely-close-sqlite3-db db stmt-cache try-num: (- try-num 1)))
+	(if (sqlite3:database? db)
+	    (let* ((stmts (and stmt-cache (hash-table-ref/default stmt-cache db #f))))
+	      (if stmts (map sqlite3:finalize! (hash-table-values stmts)))
+	      (sqlite3:finalize! db)
+	      #t)
+            (begin
+             (dbfile:print-err "db:safely-close-sqlite3-db: " db " is not an sqlite3 db")
+	     #f
+            )
+        ))))
+
+;; close all opened run-id dbs
+(define (db:close-all dbstruct)
+  (if (dbr:dbstruct? dbstruct)
+;; (handle-exceptions
+;; 	  exn
+;; 	  (begin
+;; 	    (debug:print 0 *default-log-port* "WARNING: Finalizing failed, "  ((condition-property-accessor 'exn 'message) exn) ", note - exn=" exn)
+;; 	    (print-call-chain *default-log-port*))
+	;; (db:sync-touched dbstruct 0 force-sync: #t) ;; NO. Do not do this here. Instead we rely on a server to be started when there are writes, even if the server itself is not going to be used as a server.
+        (let* ((subdbs     (hash-table-values (dbr:dbstruct-subdbs dbstruct))))
+	  (for-each
+	   (lambda (subdb)
+	     (let* ((tdbs       (stack->list (dbr:subdb-dbstack subdb)))
+		    (mtdbdat    (dbr:dbdat-dbh (dbr:subdb-mtdbdat subdb)))
+		    #;(rdb        (dbr:dbdat-dbh (dbr:subdb-refndb subdb))))
+		    
+	       (map (lambda (dbdat)
+		      (let* ((stmt-cache (dbr:dbdat-stmt-cache dbdat))
+			     (dbh        (dbr:dbdat-dbh        dbdat)))
+			(db:safely-close-sqlite3-db dbh stmt-cache)))
+		    tdbs)
+	       (db:safely-close-sqlite3-db mtdbdat (dbr:dbdat-stmt-cache  (dbr:subdb-mtdbdat subdb))) 
+               ;; (if (sqlite3:database? mdb) (sqlite3:finalize! mdb))
+	       #;(db:safely-close-sqlite3-db rdb #f))) ;; stmt-cache))))) ;; (if (sqlite3:database? rdb) (sqlite3:finalize! rdb))))))
+	   subdbs)
+           #t
+          )
+          #f
+  )
+)
+
+;; ;; set up a single db (e.g. main.db, 1.db ... etc.)
+;; ;;
+;; (define (db:setup-db dbstruct areapath run-id)
+;;   (let* ((dbname   (db:run-id->dbname run-id))
+;; 	 (dbstruct (hash-table-ref/default dbstructs dbname #f)))
+;;     (if dbstruct
+;; 	dbstruct
+;; 	(let* ((dbstruct-new (make-dbr:dbstruct)))
+;; 	  (db:open-db dbstruct-new run-id areapath: areapath do-sync: #t)
+;; 	  (hash-table-set! dbstructs dbname dbstruct-new)
+;; 	  dbstruct-new))))
+    
+;; ; Returns the dbdat for a particular dbfile inside the area
+;; ;;
+;; (define (dbr:dbstruct-get-dbdat dbstruct dbfile)
+;;   (hash-table-ref/default (dbr:dbstruct-dbdats dbstruct) dbfile #f))
+;; 
+;; (define (dbr:dbstruct-dbdat-put! dbstruct dbfile db)
+;;   (hash-table-set! (dbr:dbstruct-dbdats dbstruct) dbfile db))
+;; 
+;; (define (db:run-id->first-num run-id)
+;;   (let* ((s (number->string run-id))
+;; 	 (l (string-length s)))
+;;     (substring s (- l 1) l)))
+
+;; 1234 => 4/1234.db
+;;   #f => 0/main.db
+;;   (abandoned the idea of num/db)
+;; 
+(define (dbfile:run-id->path apath run-id)
+  (conc apath"/"(dbfile:run-id->dbname run-id)))
+
+(define (db:dbname->path apath dbname)
+  (conc apath"/"dbname))
+
+(define (dbfile:run-id->dbnum run-id)
+  (cond
+   ((number? run-id)
+    (modulo run-id (num-run-dbs)))
+   ((not run-id) "main")   ;; 0 or main?
+   (else run-id)))
+
+;; POTENTIAL BUG: this implementation could produce a db file if run-id is neither #f or a number
+(define (dbfile:run-id->dbname run-id)
+  (conc ".megatest/"(dbfile:run-id->dbnum run-id)".db"))
+
+;; Make the dbstruct, setup up auxillary db's and call for main db at least once
+;;
+;; called in http-transport and replicated in rmt.scm for *local* access. 
+;;
+(define (dbfile:setup do-sync areapath tmppath)
+  (cond
+   (*dbstruct-dbs*
+    (dbfile:print-err "WARNING: dbfile:setup called when *dbstruct-dbs* is already initialized")
+    *dbstruct-dbs*) ;; TODO: when multiple areas are supported, this optimization will be a hazard
+   (else
+    (let* ((dbstruct (make-dbr:dbstruct)))
+      (set! *dbstruct-dbs* dbstruct)
+      (dbr:dbstruct-areapath-set! dbstruct areapath)
+      (dbr:dbstruct-tmppath-set!  dbstruct tmppath)
+      dbstruct))))
+
+(define (dbfile:get-subdb dbstruct run-id)
+  (let* ((dbfname (dbfile:run-id->dbname run-id)))
+    (hash-table-ref/default (dbr:dbstruct-subdbs dbstruct) dbfname #f)))
+
+(define (dbfile:set-subdb dbstruct run-id subdb)
+  (hash-table-set! (dbr:dbstruct-subdbs dbstruct) (dbfile:run-id->dbname run-id) subdb))
+
+;; (define *dbfile:num-handles-in-use* 0)
+
+;; Get/open a database
+;;    if run-id => get run specific db
+;;    if #f     => get main db
+;;    if run-id is a string treat it as a filename
+;;    if db already open - return inmem
+;;    if db not open, open inmem, rundb and sync then return inmem
+;;    inuse gets set automatically for rundb's
+;;
+(define (dbfile:get-dbdat dbstruct run-id)
+  (let* ((subdb (dbfile:get-subdb dbstruct run-id)))
+    (if (stack-empty? (dbr:subdb-dbstack subdb))
+	#f
+	(begin
+	  (stack-pop! (dbr:subdb-dbstack subdb))))))
+
+;; return a previously opened db handle to the stack of available handles
+(define (dbfile:add-dbdat dbstruct run-id dbdat)
+  (let* ((subdb (dbfile:get-subdb dbstruct run-id))
+	 (dbstk (dbr:subdb-dbstack subdb))
+	 (count (stack-count dbstk)))
+    (if (> count 15)
+	(dbfile:print-err "WARNING: stack for "run-id".db is "count"."))
+    (stack-push! dbstk dbdat)
+    dbdat))
+
+;; set up a subdb
+;;
+(define (dbfile:init-subdb dbstruct run-id init-proc)
+  (let* ((dbname    (dbfile:run-id->dbname run-id))
+	 (areapath  (dbr:dbstruct-areapath dbstruct))
+	 (tmppath   (dbr:dbstruct-tmppath  dbstruct))
+	 (mtdbpath  (dbfile:run-id->path areapath run-id))
+	 (tmpdbpath (dbfile:run-id->path tmppath run-id))
+	 (mtdbdat   (dbfile:open-sqlite3-db mtdbpath init-proc sync-mode: 0 journal-mode: #f)) ;; "WAL"))
+	 (newsubdb  (make-dbr:subdb dbname:    dbname
+				    mtdbfile:  mtdbpath
+				    tmpdbfile: tmpdbpath
+				    mtdbdat:   mtdbdat)))
+    (dbfile:set-subdb dbstruct run-id newsubdb)
+    newsubdb)) ;; return the new subdb - but shouldn't really use it
+
+;; returns dbdat with dbh and dbfilepath
+;;
+;; NOTE: the handle is on /tmp db file!
+;;
+;;  1. if needed setup the subdb for the given run-id
+;;  2. if there is no existing db handle in the stack
+;;     create a new handle and return it (do NOT add
+;;     it to the stack).
+;;
+(define (dbfile:open-db dbstruct run-id init-proc)
+  (let* ((subdb (dbfile:get-subdb dbstruct run-id)))
+    (if (not subdb) ;; not yet defined
+	(begin
+	  (dbfile:init-subdb dbstruct run-id init-proc)
+	  (dbfile:open-db dbstruct run-id init-proc))
+	(let* ((dbdat (dbfile:get-dbdat dbstruct run-id)))
+	  (if dbdat
+	      dbdat
+	      (let* ((tmppath   (dbr:dbstruct-tmppath  dbstruct))
+		     (tmpdbpath (dbfile:run-id->path tmppath run-id))
+		     (dbdat     (dbfile:open-sqlite3-db tmpdbpath init-proc sync-mode: 0 journal-mode: "WAL")))
+		;; the following line short-circuits the "one db handle per thread" model
+		;; 
+		;; (dbfile:add-dbdat dbstruct run-id dbdat)
+		;;
+		dbdat))))))
+    
+;; COMBINE dbfile:open-sqlite-db and dbfile:lock-create-open
+;;
+
+;; this stuff is for initial debugging, please remove it when
+;; this code stabilizes
+(define *dbopens* (make-hash-table))
+(define (dbfile:inc-db-open dbfile)
+  (let* ((curr-opens-count (+ (hash-table-ref/default *dbopens* dbfile 0) 1)))
+    (if (and (> curr-opens-count 1) ;; this should NOT be happening
+	     (common:low-noise-print 15 "db-opens"))
+	(dbfile:print-err "INFO: db "dbfile" has been opened "curr-opens-count" times!"))
+    (hash-table-set! *dbopens* dbfile curr-opens-count)
+    curr-opens-count))
+
+;; Open the classic megatest.db file (defaults to open in toppath)
+;;
+;;   NOTE: returns a dbdat not a dbstruct!
+;;
+(define (dbfile:open-sqlite3-db dbpath init-proc #!key (sync-mode 0)(journal-mode #f))
+  (let* ((dbexists     (file-exists? dbpath))
+	 (write-access (file-write-access? dbpath))
+	 (db           (dbfile:cautious-open-database dbpath init-proc sync-mode journal-mode)))
+    (dbfile:inc-db-open dbpath)
+    ;; (init-proc db)
+    (make-dbr:dbdat dbfile: dbpath dbh: db read-only: (not write-access))))
+
+(define (dbfile:print-and-exit . params)
+  (with-output-to-port
+      (current-error-port)
+    (lambda ()
+      (apply print params)))
+  (exit 1))
+    
+(define (dbfile:print-err . params)
+  (with-output-to-port
+      (current-error-port)
+    (lambda ()
+      (apply print params))))
+
+(define (dbfile:cautious-open-database fname init-proc sync-mode journal-mode #!optional (tries-left 500))
+  (let* ((busy-file  (conc fname "-journal"))
+	 (delay-time (* (- 51 tries-left) 1.1))
+      	 (write-access (file-write-access? fname))
+         (dir-access (file-write-access? (pathname-directory fname)))
+         (retry      (lambda ()
+		       (thread-sleep! delay-time)
+		       (if (> tries-left 0)
+			   (dbfile:cautious-open-database fname init-proc
+							  sync-mode journal-mode
+							  (- tries-left 1))))))
+    (assert (>= tries-left 0) (conc "FATAL: too many attempts in dbfile:cautious-open-database of "fname", giving up."))
+    
+    (if (and (file-write-access? fname)
+	     (file-exists? busy-file))
+	(begin
+	  (if (common:low-noise-print 120 busy-file)
+	      (dbfile:print-err "INFO: dbfile:cautious-open-database: journal file "
+				busy-file" exists, trying again in few seconds."))
+	  (thread-sleep! 1)
+	  (if (eq? tries-left 2)
+	      (begin
+	  	(dbfile:print-err "INFO: forcing journal rollup "busy-file)
+	  	(dbfile:brute-force-salvage-db fname)))
+	  (dbfile:cautious-open-database fname init-proc sync-mode journal-mode (- tries-left 1)))
+	
+	(let* ((result (condition-case
+		        (if dir-access
+			    (dbfile:with-simple-file-lock
+			     (conc fname ".lock")
+			     (lambda ()
+			       (let* ((db-exists (file-exists? fname))
+				      (db        (sqlite3:open-database fname))) ;; creates an empty db if it did not already exist.
+                                 (sqlite3:set-busy-handler! db (sqlite3:make-busy-timeout 30000))
+				 (if sync-mode
+				     (sqlite3:execute db (conc "PRAGMA synchronous = "sync-mode";")))
+				 (if journal-mode
+				     (sqlite3:execute db (conc "PRAGMA journal_mode = "journal-mode";")))
+				 (if (and init-proc (not db-exists))
+				     (init-proc db))
+				 db)))
+                            (begin
+			      (if (file-exists? fname )
+                                  (let ((db (sqlite3:open-database fname)))
+				    ;; pragmas synchronous not needed because this db is used read-only
+				    ;; (sqlite3:execute db (conc "PRAGMA synchronous = "mode";")
+				    (sqlite3:set-busy-handler! db (sqlite3:make-busy-timeout 30000)) ;; read-only but still need timeout
+				    db )
+                                  (print "file doesn't exist: " fname))))
+			(exn (io-error)
+			     (dbfile:print-err exn "ERROR: i/o error with " fname ". Check permissions, disk space etc. and try again.")
+			     (retry))
+			(exn (corrupt)
+			     (dbfile:print-err exn "ERROR: database " fname " is corrupt. Repair it to proceed.")
+			     (retry))
+			(exn (busy)
+			     (dbfile:print-err exn "ERROR: database " fname
+					       " is locked. Try copying to another location, remove original and copy back.")
+			     (retry))
+			(exn (permission)(dbfile:print-err exn "ERROR: database " fname " has some permissions problem.")
+			     (retry))
+			(exn ()
+			     (dbfile:print-err exn "ERROR: Unknown error with database " fname " message: "
+					       ((condition-property-accessor 'exn 'message) exn))
+			     (retry)))))
+	  result))))
+
+(define (dbfile:brute-force-salvage-db fname)
+  (let* ((backupfname (conc fname"-"(current-process-id)".bak"))
+	 (cmd (conc "cp "fname" "backupfname";mv "fname" "(conc fname ".delme;")
+		    "cp "backupfname" "fname)))
+    (dbfile:print-err "WARNING: attempting recovery of file "fname" by running commands:\n"
+		      "  "cmd)
+    (system cmd)))
+
+
+(define (dbfile:open-no-sync-db dbpath)
+  (if *no-sync-db*
+      *no-sync-db*
+      (begin
+	(if (not (file-exists? dbpath))
+	    (create-directory dbpath #t))
+	(let* ((dbname    (conc dbpath "/no-sync.db"))
+	       (db-exists (file-exists? dbname))
+	       (init-proc (lambda (db)
+			    (if (not db-exists)
+				(begin
+				  (sqlite3:execute db "CREATE TABLE IF NOT EXISTS no_sync_metadat (var TEXT,val TEXT, CONSTRAINT no_sync_metadat_constraint UNIQUE (var));"))
+				)))
+	       (db        (dbfile:cautious-open-database dbname init-proc 0 "WAL"))) ;; (sqlite3:open-database dbname)))
+	  ;; (sqlite3:execute db "PRAGMA synchronous = 0;")
+	  ;; (sqlite3:set-busy-handler! db (sqlite3:make-busy-timeout 136000)) ;; done in cautious-open-database
+	  (set! *no-sync-db* db)
+	  db))))
+
+(define (db:no-sync-set db var val)
+  (sqlite3:execute db "INSERT OR REPLACE INTO no_sync_metadat (var,val) VALUES (?,?);" var val))
+
+(define (db:no-sync-del! db var)
+  (sqlite3:execute db "DELETE FROM no_sync_metadat WHERE var=?;" var))
+
+(define (db:no-sync-get/default db var default)
+  (let ((res default))
+    (sqlite3:for-each-row
+     (lambda (val)
+       (set! res val))
+     db
+     "SELECT val FROM no_sync_metadat WHERE var=?;"
+     var)
+    (if res
+        (let ((newres (if (string? res)
+			  (string->number res)
+			  #f)))
+          (if newres
+              newres
+              res))
+        res)))
+
+;; transaction protected lock aquisition
+;; either:
+;;    fails    returns  (#f . lock-creation-time)
+;;    succeeds (returns (#t . lock-creation-time)
+;; use (db:no-sync-del! db keyname) to release the lock
+;;
+(define (db:no-sync-get-lock db keyname)
+  (sqlite3:with-transaction
+   db
+   (lambda ()
+     (condition-case
+	 (let* ((curr-val (db:no-sync-get/default db keyname #f)))
+	   (if curr-val
+	       `(#f . ,curr-val)   ;; (sqlite3:first-result db "SELECT val FROM no_sync_metadat WHERE var=?;" keyname))
+	       (let ((lock-time (current-seconds)))
+		 (sqlite3:execute db "INSERT OR REPLACE INTO no_sync_metadat (var,val) VALUES(?,?);" keyname lock-time)
+		 `(#t . ,lock-time))))
+       (exn (io-error)  (dbfile:print-err "ERROR: i/o error with no-sync db. Check permissions, disk space etc. and try again."))
+       (exn (corrupt)   (dbfile:print-err "ERROR: database no-sync db is corrupt. Repair it to proceed."))
+       (exn (busy)      (dbfile:print-err "ERROR: database no-sync db is locked. Try copying to another location, remove original and copy back."))
+       (exn (permission)(dbfile:print-err "ERROR: database no-sync db has some permissions problem."))
+       (exn () ;; (status done) ;; I don't know how to detect status done but no data!
+	    (dbfile:print-err "ERROR: Unknown error with database no-sync db message: exn="(condition->list exn)", \n"
+			      ((condition-property-accessor 'exn 'message) exn))
+	    `(#f . ,(current-seconds)))))))
+
+(define (db:no-sync-get-lock-timeout db keyname timeout)
+  (let* ((lockdat (db:no-sync-get-lock db keyname)))
+    (match lockdat
+      ((#f . lock-time)
+       (if (> (- (current-seconds) (if (string? lock-time)(string->number lock-time)lock-time)) timeout)
+	   (let ((lock-time (current-seconds)))
+	     ;; (debug:print-info 2 *default-log-port* "db:no-sync-get-lock keyname=" keyname ", lock-time=" lock-time ", exn=" exn)
+	     (sqlite3:execute db "INSERT OR REPLACE INTO no_sync_metadat (var,val) VALUES(?,?);" keyname lock-time)
+	     `(#t . ,lock-time))
+	   lockdat))
+      (else lockdat))))
+
+;; NOTE: This will steal the lock after timeout of waiting.
+;;
+(define (db:with-no-sync-lock db keyname timeout proc)
+  (let* ((lockdat  (db:no-sync-get-lock-timeout db keyname))
+	 (gotlock  (car lockdat))
+	 (locktime (cdr lockdat)))
+    (if gotlock
+	(let ((res (proc)))
+	  (db:no-sync-del! db keyname)
+	  res))))
+  
+;;======================================================================
+;; sync back functions pulled from db.scm
+;;======================================================================
+
+;; Get a lock from the no-sync-db for the from-db, then delta sync the from-db to the to-db, otherwise return #f
+;;
+(define (db:lock-and-delta-sync no-sync-db dbstruct from-db-file runid keys dbinit)
+  (assert (not *db-sync-in-progress*) "FATAL: db:lock-and-sync called while a sync is in progress.")
+  ;; (dbfile:print-err *default-log-port* "db:lock-and-delta-sync")
+  (let* ((lock-file (conc from-db-file ".lock")))
+    (if (common:simple-file-lock lock-file)
+	(begin
+	  (dbfile:print-err "INFO: db:lock-and-delta-sync copying db "runid" at "(current-seconds))
+	  (set! *db-sync-in-progress* #t)
+	  (db:sync-touched dbstruct runid keys dbinit)
+	  (set! *db-sync-in-progress* #f)
+	  (delete-file* lock-file)
+	  #t)
+        (begin
+	  (if (common:low-noise-print 120 (conc "no lock "from-db-file))
+	      (dbfile:print-err "INFO: could not get lock for " from-db-file ", sync likely in progress."))
+	  #f
+	  ))))
+
+;; ;; Get a lock from the no-sync-db for the from-db, then delta sync the from-db to the to-db, otherwise return #f
+;; ;;
+;; (define (db:lock-and-delta-sync-orig no-sync-db dbstruct from-db-file runid keys dbinit)
+;;   (assert (not *db-sync-in-progress*) "FATAL: db:lock-and-sync called while a sync is in progress.")
+;;   ;; (dbfile:print-err *default-log-port* "db:lock-and-delta-sync")
+;;   (let* ((lockdat  (db:no-sync-get-lock-timeout no-sync-db from-db-file 60))
+;; 	 (gotlock  (car lockdat))
+;; 	 (locktime (cdr lockdat)))
+;;     ;; (debug:print-info 3 *default-log-port* "db:lock-and-delta-sync: got lock?")
+;;     
+;;     (if gotlock
+;; 	(begin
+;;           (dbfile:print-err "INFO: db:lock-and-delta-sync copying db "runid" at "(current-seconds))
+;; 	  (set! *db-sync-in-progress* #t)
+;;           (db:sync-touched dbstruct runid keys dbinit)
+;; 	  (set! *db-sync-in-progress* #f)
+;; 	  (db:no-sync-del! no-sync-db from-db-file)
+;; 	  #t)
+;;         (begin
+;;           (dbfile:print-err "ERROR: could not get lock for " from-db-file " from no-sync-db")
+;; 	  #f
+;;         ))))
+
+;; sync run from tmp disk to nfs disk if touched
+;;
+;; call with dbinit=db:initialize-main-db
+;;
+(define (db:sync-touched dbstruct run-id keys #!key dbinit (force-sync #f))
+  (dbfile:print-err "db:sync-touched Syncing: " (conc (if run-id run-id "main") ".db"))
+  (let* (;; the subdb is needed to access the mtdbdat
+	 (subdb     (or (dbfile:get-subdb dbstruct run-id)
+			(dbfile:init-subdb dbstruct run-id dbinit)))
+         (tmpdbfile (dbr:subdb-tmpdbfile subdb))
+	 (mtdb      (dbr:subdb-mtdbdat subdb))
+         (tmpdb     (db:open-db dbstruct run-id dbinit)) ;; sqlite3-db tmpdbfile #f))
+	 (start-t   (current-seconds)))
+    (mutex-lock! *db-multi-sync-mutex*)
+    (let ((update_info (cons "last_update" (if force-sync 0 *db-last-sync*) )))
+      (mutex-unlock! *db-multi-sync-mutex*)
+      (db:sync-tables (db:sync-all-tables-list dbstruct keys) update_info tmpdb mtdb))
+    (mutex-lock! *db-multi-sync-mutex*)
+    (set! *db-last-sync* start-t)
+    (set! *db-last-access* start-t)
+    (mutex-unlock! *db-multi-sync-mutex*)
+    (dbfile:add-dbdat dbstruct run-id tmpdb)
+  #t))
+
+;; just tests, test_steps and test_data tables
+(define db:sync-tests-only
+  (list
+   ;; (list "strs"
+   ;;       '("id"             #f)
+   ;;       '("str"            #f))
+   (list "tests" 
+	 '("id"             #f)
+	 '("run_id"         #f)
+	 '("testname"       #f)
+	 '("host"           #f)
+	 '("cpuload"        #f)
+	 '("diskfree"       #f)
+	 '("uname"          #f)
+	 '("rundir"         #f)
+	 '("shortdir"       #f)
+	 '("item_path"      #f)
+	 '("state"          #f)
+	 '("status"         #f)
+	 '("attemptnum"     #f)
+	 '("final_logf"     #f)
+	 '("logdat"         #f)
+	 '("run_duration"   #f)
+	 '("comment"        #f)
+	 '("event_time"     #f)
+	 '("fail_count"     #f)
+	 '("pass_count"     #f)
+	 '("archived"       #f)
+         '("last_update"    #f))
+  (list "test_steps"
+	 '("id"             #f)
+	 '("test_id"        #f)
+	 '("stepname"       #f)
+	 '("state"          #f)
+	 '("status"         #f)
+	 '("event_time"     #f)
+	 '("comment"        #f)
+	 '("logfile"        #f)
+         '("last_update"    #f))
+   (list "test_data"
+	 '("id"             #f)
+	 '("test_id"        #f)
+	 '("category"       #f)
+	 '("variable"       #f)
+	 '("value"          #f)
+	 '("expected"       #f)
+	 '("tol"            #f)
+	 '("units"          #f)
+	 '("comment"        #f)
+	 '("status"         #f)
+	 '("type"           #f)
+         '("last_update"    #f))))
+
+;; needs db to get keys, this is for syncing all tables
+;;
+(define (db:sync-main-list dbstruct keys)
+  (let ((keys  keys)) ;; (db:get-keys dbstruct)))
+    (list
+     (list "keys"
+	   '("id"        #f)
+	   '("fieldname" #f)
+	   '("fieldtype" #f))
+     (list "metadat" '("var" #f) '("val" #f))
+     (append (list "runs" 
+		   '("id"  #f))
+	     (map (lambda (k)(list k #f))
+		  (append keys
+			  (list "runname" "state" "status" "owner" "event_time" "comment" "fail_count" "pass_count" "contour" "last_update"))))
+     (list "archive_disks"
+           '("id" #f)
+           '("archive_area_name" #f) 
+           '("disk_path" #f)
+           '("last_df" #f)
+           '("last_df_time" #f)
+           '("creation_time" #f)) 
+
+     (list "archive_blocks"
+           '("id" #f)
+           '("archive_disk_id" #f) 
+           '("disk_path" #f)
+           '("last_du" #f)
+           '("last_du_time" #f)
+           '("creation_time" #f)) 
+
+     (list "test_meta"
+	   '("id"             #f)
+	   '("testname"       #f)
+	   '("owner"          #f)
+	   '("description"    #f)
+	   '("reviewed"       #f)
+	   '("iterated"       #f)
+	   '("avg_runtime"    #f)
+	   '("avg_disk"       #f)
+	   '("tags"           #f)
+	   '("jobgroup"       #f))
+
+
+     (list "tasks_queue"
+           '("id"            #f)
+           '("action"        #f)
+           '("owner"         #f) 
+           '("state"         #f)
+           '("target"        #f)
+           '("name"          #f)
+           '("testpatt"      #f)
+           '("keylock"       #f)
+           '("params"        #f)
+           '("creation_time" #f)
+           '("execution_time" #f))
+     )))
+
+(define (db:sync-all-tables-list dbstruct keys)
+  (append (db:sync-main-list dbstruct keys)
+	  db:sync-tests-only))
+
+;; tbls is ( ("tablename" ( "field1" [#f|proc1] ) ( "field2" [#f|proc2] ) .... ) )
+;; db's are dbdat's
+;;
+;; if last-update specified ("field-name" . time-in-seconds)
+;;    then sync only records where field-name >= time-in-seconds
+;;    IFF field-name exists
+;;
+(define (db:sync-tables tbls last-update fromdb todb . slave-dbs)
+  (handle-exceptions
+   exn
+   (begin
+     (dbfile:print-err  "EXCEPTION: database probably overloaded or unreadable in db:sync-tables.")
+     (print-call-chain (current-error-port))
+     (dbfile:print-err  " message: " ((condition-property-accessor 'exn 'message) exn))
+     (dbfile:print-err  "exn=" (condition->list exn))
+     (dbfile:print-err  " status:  " ((condition-property-accessor 'sqlite3 'status) exn))
+     (dbfile:print-err  " src db:  " (dbr:dbdat-dbfile fromdb))
+     (for-each (lambda (dbdat)
+		 (let ((dbpath (dbr:dbdat-dbfile dbdat)))
+		   (dbfile:print-err  " dbpath:  " dbpath)
+		   (if #t ;; (not (db:repair-db dbdat))
+		       (begin
+			 (dbfile:print-err "Failed to rebuild (repair is turned off) " dbpath ", exiting now.")
+			 (exit)))))
+	       (cons todb slave-dbs))
+     
+     0)
+
+   ;; this is the work to be done")
+   (cond
+    ((not fromdb) (dbfile:print-err  "WARNING: db:sync-tables called with fromdb missing")
+     -1)
+    ((not todb)   (dbfile:print-err  "WARNING: db:sync-tables called with todb missing")
+     -2)
+    ((not (sqlite3:database? (dbr:dbdat-dbh fromdb)))
+     (dbfile:print-err "db:sync-tables called with fromdb not a database " fromdb)
+   -3)
+    ((not (sqlite3:database? (dbr:dbdat-dbh todb)))
+     (dbfile:print-err "db:sync-tables called with todb not a database " todb)
+   -4)
+
+    ((not (file-write-access? (dbr:dbdat-dbfile todb)))
+     (dbfile:print-err "db:sync-tables called with todb not a read-only database " todb)
+     -5)
+    ((not (null? (let ((readonly-slave-dbs
+                        (filter
+                         (lambda (dbdat)
+                           (not (file-write-access? (dbr:dbdat-dbfile todb))))
+                         slave-dbs)))
+                   (for-each
+                    (lambda (bad-dbdat)
+                      (dbfile:print-err "db:sync-tables called with todb not a read-only database " bad-dbdat))
+                    readonly-slave-dbs)
+                   readonly-slave-dbs))) -6)
+    (else
+     ;; (dbfile:print-err "db:sync-tables: args are good")
+
+     (let ((stmts       (make-hash-table)) ;; table-field => stmt
+	   (all-stmts   '())              ;; ( ( stmt1 value1 ) ( stml2 value2 ))
+	   (numrecs     (make-hash-table))
+	   (start-time  (current-milliseconds))
+	   (tot-count   0))
+       (for-each ;; table
+	(lambda (tabledat)
+	  (let* ((tablename        (car tabledat))
+		 (fields           (cdr tabledat))
+		 (has-last-update  (member "last_update" fields))
+		 (use-last-update  (cond
+				    ((and has-last-update
+					  (member "last_update" fields))
+				     #t) ;; if given a number, just use it for all fields
+				    ((number? last-update) #f) ;; if not matched first entry then ignore last-update for this table
+				    ((and (pair? last-update)
+					  (member (car last-update)    ;; last-update field name
+						  (map car fields)))
+                                        #t)
+				    ((and last-update (not (pair? last-update)) (not (number? last-update)))
+				     (dbfile:print-err  "ERROR: parameter last-update for db:sync-tables must be a pair or a number, received: " last-update);; found in fields
+				     #f)
+				    (else
+				     #f)))
+		 (last-update-value (if use-last-update ;; no need to check for has-last-update - it is already accounted for
+					(if (number? last-update)
+					    last-update
+					    (cdr last-update))
+					#f))
+		 (last-update-field (if use-last-update
+					(if (number? last-update)
+					    "last_update"
+					    (car last-update))
+					#f))
+		 (num-fields (length fields))
+		 (field->num (make-hash-table))
+		 (num->field (apply vector (map car fields))) ;; BBHERE
+		 (full-sel   (conc "SELECT " (string-intersperse (map car fields) ",") 
+				   " FROM " tablename (if use-last-update ;; apply last-update criteria
+							  (conc " WHERE " last-update-field " >= " last-update-value)
+							  "")
+				   ";"))
+		 (full-ins   (conc "INSERT OR REPLACE INTO " tablename " ( " (string-intersperse (map car fields) ",") " ) "
+				   " VALUES ( " (string-intersperse (make-list num-fields "?") ",") " );"))
+		 (fromdat    '())
+		 (fromdats   '())
+		 (totrecords 0)
+		 (batch-len  100) ;; (string->number (or (configf:lookup *configdat* "sync" "batchsize") "100")))
+		 (todat      (make-hash-table))
+		 (count      0)
+                 (field-names (map car fields))
+                 (delay-handicap  0) ;; (string->number (or (configf:lookup *configdat* "sync" "delay-handicap") "0")))
+                 )
+
+	    ;; set up the field->num table
+	    (for-each
+	     (lambda (field)
+	       (hash-table-set! field->num field count)
+	       (set! count (+ count 1)))
+	     fields)
+
+	    ;; read the source table
+            ;; store a list of all rows in the table in fromdat, up to batch-len.
+            ;; Then add fromdat to the fromdats list, clear fromdat and repeat.
+	    (sqlite3:for-each-row
+	     (lambda (a . b)
+	       (set! fromdat (cons (apply vector a b) fromdat))
+	       (if (> (length fromdat) batch-len)
+		   (begin
+		     (set! fromdats (cons fromdat fromdats))
+		     (set! fromdat  '())
+		     (set! totrecords (+ totrecords 1)))
+               )
+             )
+	     (dbr:dbdat-dbh fromdb)
+	     full-sel)
+
+             ;; Count less than batch-len as a record
+             (if (> (length fromdat) 0)
+                 (set! totrecords (+ totrecords 1)))
+
+	    ;; tack on remaining records in fromdat
+	    (if (not (null? fromdat))
+		(set! fromdats (cons fromdat fromdats)))
+
+	    (sqlite3:for-each-row
+	     (lambda (a . b)
+	       (hash-table-set! todat a (apply vector a b)))
+	     (dbr:dbdat-dbh todb)
+	     full-sel)
+
+            (when (and delay-handicap (> delay-handicap 0))
+              (dbfile:print-err "imposing synthetic sync delay of "delay-handicap" seconds since sync/delay-handicap is configured")
+              (thread-sleep! delay-handicap)
+              (dbfile:print-err "synthetic sync delay of "delay-handicap" seconds completed")
+              )
+            
+	    ;; first pass implementation, just insert all changed rows
+
+	    (for-each 
+	     (lambda (targdb)
+	       (let* ((db                 (dbr:dbdat-dbh targdb))
+                      (drp-trigger        (if (member "last_update" field-names)
+					      (db:drop-trigger db tablename) 
+					      #f))
+		      (has-last-update    (member "last_update" field-names))
+                      (is-trigger-dropped (if has-last-update
+                                              (db:is-trigger-dropped db tablename)
+					      #f)) 
+		      (stmth  (sqlite3:prepare db full-ins))
+                      (changed-rows 0))
+		 (for-each
+		  (lambda (fromdat-lst)
+		    (sqlite3:with-transaction
+		     db
+		     (lambda ()
+		       (for-each ;; 
+			(lambda (fromrow)
+			  (let* ((a    (vector-ref fromrow 0))
+				 (curr (hash-table-ref/default todat a #f))
+				 (same #t))
+			    (let loop ((i 0))
+			      (if (or (not curr)
+				      (not (equal? (vector-ref fromrow i)(vector-ref curr i))))
+				  (set! same #f))
+			      (if (and same
+				       (< i (- num-fields 1)))
+				  (loop (+ i 1))))
+			    (if (not same)
+				(begin
+				  (apply sqlite3:execute stmth (vector->list fromrow))
+				  (hash-table-set! numrecs tablename (+ 1 (hash-table-ref/default numrecs tablename 0)))
+                                  (set! changed-rows (+ changed-rows 1))
+                                )
+                            )
+                            ))
+			fromdat-lst))))
+		  fromdats)
+
+		 (sqlite3:finalize! stmth)
+                 (if (member "last_update" field-names)
+                    (db:create-trigger db tablename))))
+	     (append (list todb) slave-dbs)
+           )
+          )
+        )
+	tbls)
+       (let* ((runtime      (- (current-milliseconds) start-time))
+	      (should-print (or ;; (debug:debug-mode 12)
+			     (common:low-noise-print 120 "db sync")
+			     (> runtime 500)))) ;; low and high sync times treated as separate.
+	 (for-each 
+	  (lambda (dat)
+	    (let ((tblname (car dat))
+		  (count   (cdr dat)))
+	      (set! tot-count (+ tot-count count))
+              )) 
+	  (sort (hash-table->alist numrecs)(lambda (a b)(> (cdr a)(cdr b))))))
+       tot-count)))))
+
+;;======================================================================
+;; trigger setup/takedown
+;;======================================================================
+
+(define db:trigger-list 
+     (list (list "update_runs_trigger"  "CREATE TRIGGER IF NOT EXISTS update_runs_trigger AFTER UPDATE ON runs
+                             FOR EACH ROW
+                               BEGIN 
+                                 UPDATE runs SET last_update=(strftime('%s','now'))
+                                   WHERE id=old.id;
+                               END;" ) 
+	   (list "update_run_stats_trigger"  "CREATE TRIGGER  IF NOT EXISTS update_run_stats_trigger AFTER UPDATE ON run_stats
+                             FOR EACH ROW
+                               BEGIN 
+                                 UPDATE run_stats SET last_update=(strftime('%s','now'))
+                                   WHERE id=old.id;
+                               END;" )
+	   (list "update_tests_trigger"  "CREATE TRIGGER  IF NOT EXISTS update_tests_trigger AFTER UPDATE ON tests
+                             FOR EACH ROW
+                               BEGIN 
+                                 UPDATE tests SET last_update=(strftime('%s','now'))
+                                   WHERE id=old.id;
+                               END;" )
+	   (list "update_teststeps_trigger"  "CREATE TRIGGER  IF NOT EXISTS update_teststeps_trigger AFTER UPDATE ON test_steps
+                             FOR EACH ROW
+                               BEGIN 
+                                 UPDATE test_steps SET last_update=(strftime('%s','now'))
+                                   WHERE id=old.id;
+                               END;" )
+	   (list "update_test_data_trigger"  "CREATE TRIGGER  IF NOT EXISTS update_test_data_trigger AFTER UPDATE ON test_data
+                             FOR EACH ROW
+                               BEGIN 
+                                 UPDATE test_data SET last_update=(strftime('%s','now'))
+                                   WHERE id=old.id;
+                               END;" )))
+;;
+;; ADD run-id SUPPORT
+;;
+(define (db:create-all-triggers dbstruct)
+  (db:with-db
+   dbstruct #f #f
+   (lambda (dbdat db)
+     (db:create-triggers db))))
+
+(define (db:create-triggers db)
+    (for-each (lambda (key)
+              (sqlite3:execute db (cadr key)))
+          db:trigger-list))
+
+(define (db:drop-all-triggers dbstruct)
+  (db:with-db
+   dbstruct #f #f
+   (lambda (dbdat db)
+     (db:drop-triggers db))))
+
+(define (db:is-trigger-dropped db tbl-name)
+  (let* ((trigger-name (if (equal? tbl-name "test_steps")
+			   "update_teststeps_trigger" 
+                           (conc "update_" tbl-name "_trigger")))
+	 (res          #f))
+    (sqlite3:for-each-row
+     (lambda (name)
+       (if (equal? name trigger-name)
+	   (set! res #t)))
+     db 
+     "SELECT name FROM sqlite_master WHERE type = 'trigger' ;")
+    res))
+
+(define (db:drop-triggers db)
+  (for-each
+   (lambda (key) 
+     (sqlite3:execute db (conc "drop trigger if exists " (car key))))
+   db:trigger-list))
+
+(define  (db:drop-trigger db tbl-name)
+  (let* ((trigger-name (if (equal? tbl-name "test_steps")
+			   "update_teststeps_trigger" 
+                           (conc "update_" tbl-name "_trigger"))))
+    (for-each
+     (lambda (key) 
+       (if (equal? (car key) trigger-name)
+           (sqlite3:execute db (conc "drop trigger if exists " trigger-name))))
+     db:trigger-list)))
+
+(define  (db:create-trigger db tbl-name)
+      (let* ((trigger-name (if (equal? tbl-name "test_steps")
+                              "update_teststeps_trigger" 
+                              (conc "update_" tbl-name "_trigger"))))
+       (for-each (lambda (key) 
+             (if (equal? (car key) trigger-name)
+             (sqlite3:execute db (cadr key))))
+      db:trigger-list))) 
+
+;;======================================================================
+;; db access stuff
+;;======================================================================
+
+;; call with dbinit=db:initialize-main-db
+;;
+(define (db:open-db dbstruct run-id dbinit)
+  ;; (mutex-lock! *db-open-mutex*)
+  (let* ((dbdat (dbfile:open-db dbstruct run-id dbinit)))
+    (set! *db-write-access* (not (dbr:dbdat-read-only dbdat)))
+    ;; (mutex-unlock! *db-open-mutex*)
+    dbdat))
+
+(define dbfile:db-init-proc (make-parameter #f))
+
+;; in xmaxima this gives a curve close to what I want:
+;;    plot2d ((exp(x/1.2)-1)/300, [x, 0, 10])$
+;;    plot2d ((exp(x/1.5)-1)/40, [x, 0, 10])$
+;;    plot2d ((exp(x/5)-1)/40, [x, 0, 20])$
+(define (dbfile:droop x)
+  (/ (- (exp (/ x 5)) 1) 40))
+  ;; (* numqrys (/ 1 (qif-slope))))
+
+;; create a dropping near the db file in a qif dir
+;; use count of such files to gate queries (queries in flight)
+;;
+(define (dbfile:wait-for-qif fname run-id params)
+  (let* ((thedir  (pathname-directory fname))
+	 (dbnum   (dbfile:run-id->dbnum run-id))
+	 (destdir (conc thedir"/qif-"dbnum))
+	 (uniqn   (get-area-path-signature (conc dbnum params)))
+	 (crumbn  (conc destdir"/"(current-seconds)"-"uniqn"."(current-process-id))))
+    (if (not (file-exists? destdir))(create-directory (conc destdir"/attic") #t))
+    (let loop ((count 0))
+      (let* ((currlks (glob (conc destdir"/*")))
+	     (numqrys (length currlks))
+	     (delayval (cond ;; do a droopish curve
+			((> numqrys 25)
+			 (for-each
+			  (lambda (f)
+			    (if (> (- (current-seconds)
+				      (handle-exceptions
+					  exn
+					(current-seconds) ;; file is likely gone, just fake out
+					(file-modification-time f)))
+				   (keep-age-param))
+				(let* ((basedir (pathname-directory f))
+				       (filen   (pathname-file f))
+				       (destf   (conc basedir"/attic/"filen)))
+				  (dbfile:print-err "Moving qif file "f" older than 10 seconds to "destf)
+				  ;; (delete-file* f)
+				  (handle-exceptions
+				      exn
+				    #t
+				    (file-move f destf #t)))))
+			  currlks)
+			 4)
+			((> numqrys 0)  (dbfile:droop numqrys)) ;; slope of 1/100
+			(else #f))))
+	(if (and delayval
+		 (< count 5))
+	    (begin
+	      (thread-sleep! delayval)
+	      (loop (+ count 1))))))
+    (with-output-to-file crumbn
+      (lambda ()
+	(print fname" run-id="run-id" params="params)
+	))
+    crumbn))
+
+(define no-condition-db-with-db (make-parameter #t))
+
+;; (db:with-db dbstruct run-id sqlite3:exec "select blah fgrom blaz;")
+;; r/w is a flag to indicate if the db is modified by this query #t = yes, #f = no
+;;
+(define (db:with-db dbstruct run-id r/w proc . params)
+  (assert dbstruct "FATAL: db:with-db called with dbstruct "#f)
+  (assert (dbr:dbstruct? dbstruct) "FATAL: dbstruct is "dbstruct)
+  (let* ((use-mutex (> *api-process-request-count* 25)) ;; risk of db corruption
+	 (have-struct (dbr:dbstruct? dbstruct))
+         (dbdat     (if have-struct                ;; this stuff just allows us to call with a db handle directly
+			(db:open-db dbstruct run-id (dbfile:db-init-proc)) ;; (dbfile:get-subdb dbstruct run-id)
+			#f))
+	 (db        (if have-struct                ;; this stuff just allows us to call with a db handle directly
+			(dbr:dbdat-dbh dbdat)
+			dbstruct))
+	 (fname     (if dbdat
+			(dbr:dbdat-dbfile dbdat)
+			"nofilenameavailable"))
+	 (jfile     (conc fname"-journal"))
+	 (qryproc   (lambda ()
+		      (if use-mutex (mutex-lock! *db-with-db-mutex*))
+		      (let ((res (apply proc dbdat db params))) ;; the actual call is here.
+			(if use-mutex (mutex-unlock! *db-with-db-mutex*))
+			;; (if (vector? dbstruct)(db:done-with dbstruct run-id r/w))
+			(if dbdat
+			    (dbfile:add-dbdat dbstruct run-id dbdat))
+			;; (delete-file* crumbfile)
+			res))))
+
+    (assert (sqlite3:database? db) "FATAL: db:with-db, db is not a database, db="db", fname="fname)
+    (if (file-exists? jfile)
+	(begin
+	  (dbfile:print-err "INFO: "jfile" exists, delaying to reduce database load")
+	  (thread-sleep! 0.2)))
+    (if (and use-mutex
+	     (common:low-noise-print 120 "over-50-parallel-api-requests"))
+	(dbfile:print-err *api-process-request-count* " parallel api requests being processed in process "
+			  (current-process-id))) ;;  ", throttling access"))
+    (if (no-condition-db-with-db)
+	(qryproc)
+	(condition-case
+	 (qryproc)
+	 (exn (io-error)
+	      (db:generic-error-printout exn "ERROR: i/o error with " fname ". Check permissions, disk space etc. and try again."))
+	 (exn (corrupt)
+	      (db:generic-error-printout exn "ERROR: database " fname " is corrupt. Repair it to proceed."))
+	 (exn (busy)
+	      (db:generic-error-printout exn "ERROR: database " fname
+					 " is locked. Try copying to another location, remove original and copy back."))
+	 (exn (permission)(db:generic-error-printout exn "ERROR: database " fname " has some permissions problem."))
+	 (exn ()
+	      (db:generic-error-printout exn "ERROR: Unknown error with database " fname " message: "
+					 ((condition-property-accessor 'exn 'message) exn)))))))
+
+;;======================================================================
+;; another attempt at a transactionized queue
+;;======================================================================
+
+;; ;; ;; (define *transaction-queues* (make-hash-table))
+;; ;; ;; 
+;; ;; ;; (define (db:get-queue run-id)
+;; ;; ;;   (let* ((res (hash-table-ref/default *transaction-queues* run-id #f)))
+;; ;; ;;     (if res
+;; ;; ;; 	res
+;; ;; ;; 	(let* ((newq (make-queue)))
+;; ;; ;; 	  (hash-table-set! *transaction-queues* run-id newq)
+;; ;; ;; 	  newq))))
+;; ;; ;; 
+;; ;; ;; (define (db:add-to-transaction-queue dbstruct proc params)
+;; ;; ;;   (let* ((mbox (make-mailbox))
+;; ;; ;; 	 (q    (db:get-queue run-id)))
+;; ;; ;;     (queue-add! *transaction-queue* (list dbstruct proc mbox))
+;; ;; ;;     (mailbox-receive mbox)))
+;; ;; ;; 
+;; ;; ;; (define (db:process-transaction-queue *dbstruct-dbs*)
+;; ;; ;;   (for-each
+;; ;; ;;    (lambda (run-id)
+;; ;; ;;      (let* ((q (hash-table-ref *transaction-queue* run-id)))
+;; ;; ;;        ;; with-transaction
+;; ;; ;;        ;;     dbstruct
+;; ;; ;;        ;; pop items from queue and execute them, return results via mailbox
+;; ;; ;;        q
+;; ;; ;;        ;; pop 
+;; ;; ;;        ))
+;; ;; ;;    (hash-table-keys *transaction-queues*)))
+
+;;======================================================================
+;; file utils
+;;======================================================================
+
+;;======================================================================
+;; lazy-safe get file mod time. on any error (file not existing etc.) return 0
+;;
+(define (dbfile:lazy-modification-time fpath)
+  (handle-exceptions
+      exn
+    (begin
+      (dbfile:print-err "Failed to get modification time for " fpath ", treating it as zero. exn=" exn)
+      0)
+    (if (file-exists? fpath)
+	(file-modification-time fpath)
+	0)))
+
+;;======================================================================
+;; find timestamp of newest file associated with a sqlite db file
+(define (dbfile:lazy-sqlite-db-modification-time fpath)
+  (let* ((glob-list (handle-exceptions
+			exn
+		      (begin
+			(dbfile:print-err "Failed to glob " fpath "*, exn=" exn)
+			`(,(conc "/no/such/file, message: " ((condition-property-accessor 'exn 'message) exn))))
+		      (glob (conc fpath "*"))))
+         (file-list (if (eq? 0 (length glob-list))
+			'("/no/such/file")
+			glob-list)))
+  (apply max
+	 (map
+	  dbfile:lazy-modification-time 
+	  file-list))))
+
+;; dot-locking egg seems not to work, using this for now
+;; if lock is older than expire-time then remove it and try again
+;; to get the lock
+;;
+(define (dbfile:simple-file-lock fname #!key (expire-time 300))
+  (let ((fmod-time (handle-exceptions
+		       ext
+		     (current-seconds)
+		     (file-modification-time fname))))
+    (if (file-exists? fname)
+	(if (> (- (current-seconds) fmod-time) expire-time)
+	    (begin
+	      (handle-exceptions exn #f (delete-file* fname))	
+	      (dbfile:simple-file-lock fname expire-time: expire-time))
+	    #f)
+	(let ((key-string (conc (get-host-name) "-" (current-process-id)))
+	      (oup        (open-output-file fname)))
+	  (with-output-to-port
+	      oup
+	    (lambda ()
+	      (print key-string)))
+	  (close-output-port oup)
+	  #;(with-output-to-file fname ;; bizarre. with-output-to-file does not seem to be cleaning up after itself.
+	    (lambda ()
+	  (print key-string)))
+	  (thread-sleep! 0.25)
+	  (if (file-exists? fname)
+	      (handle-exceptions exn
+                #f 
+                (with-input-from-file fname
+	  	  (lambda ()
+		    (equal? key-string (read-line)))))
+	      #f)
+       )
+    )
+  )
+)
+
+(define (dbfile:simple-file-lock-and-wait fname #!key (expire-time 300))
+  (let ((end-time (+ expire-time (current-seconds))))
+    (let loop ((got-lock (dbfile:simple-file-lock fname expire-time: expire-time)))
+      (if got-lock
+	  #t
+	  (if (> end-time (current-seconds))
+	      (begin
+		(thread-sleep! 3)
+		(loop (dbfile:simple-file-lock fname expire-time: expire-time)))
+	      #f)))))
+
+(define (dbfile:simple-file-release-lock fname)
+  (handle-exceptions
+      exn
+      #f ;; I don't really care why this failed (at least for now)
+    (delete-file* fname)))
+
+(define (dbfile:with-simple-file-lock fname proc #!key (expire-time 300))
+  (let ((gotlock (dbfile:simple-file-lock-and-wait fname expire-time: expire-time)))
+    (if gotlock
+	(let ((res (proc)))
+	  (dbfile:simple-file-release-lock fname)
+	  res)
+	(assert #t "FATAL: simple file lock never got a lock."))))
+  
+(define (db:get-cache-stmth dbdat db stmt)
+  (let* (;; (dbdat       (dbfile:get-dbdat dbstruct run-id))
+	 (stmt-cache  (dbr:dbdat-stmt-cache dbdat))
+	 ;; (stmth       (db:hoh-get stmt-cache db stmt))
+	 (stmth       (hash-table-ref/default stmt-cache stmt #f)))
+    (or stmth
+	(let* ((newstmth (sqlite3:prepare db stmt)))
+	  ;; (db:hoh-set! stmt-cache db stmt newstmth)
+	  (hash-table-set! stmt-cache stmt newstmth)
+	  newstmth))))
+
+(define (db:have-incompletes? dbstruct run-id ovr-deadtime)
+  (let* ((incompleted '())
+	 (oldlaunched '())
+	 (toplevels   '())
+	 ;; (deadtime-str (configf:lookup *configdat* "setup" "deadtime"))
+	 (deadtime  (or ovr-deadtime 72000))) ;; twenty hours
+    (db:with-db
+     dbstruct run-id #f
+     (lambda (dbdat db)
+       
+       ;; in RUNNING or REMOTEHOSTSTART for more than 10 minutes
+       ;;
+       ;; HOWEVER: this code in run:test seems to work fine
+       ;;              (> (- (current-seconds)(+ (db:test-get-event_time testdat)
+       ;;                     (db:test-get-run_duration testdat)))
+       ;;                    600) 
+       ;; (db:delay-if-busy dbdat)
+       (sqlite3:for-each-row 
+        (lambda (test-id run-dir uname testname item-path)
+          (if (and (equal? uname "n/a")
+                   (equal? item-path "")) ;; this is a toplevel test
+              ;; what to do with toplevel? call rollup?
+              (begin
+                (set! toplevels   (cons (list test-id run-dir uname testname item-path run-id) toplevels)))
+                ;; (print-info "Found old toplevel test in RUNNING state, test-id=" test-id))
+              (set! incompleted (cons (list test-id run-dir uname testname item-path run-id) incompleted))))
+        (db:get-cache-stmth dbdat db
+        "SELECT id,rundir,uname,testname,item_path FROM tests WHERE run_id=? AND (strftime('%s','now') - event_time) > (run_duration + ?) AND state IN ('RUNNING','REMOTEHOSTSTART');")
+        run-id deadtime)
+
+       ;; in LAUNCHED for more than one day. Could be long due to job queues TODO/BUG: Need override for this in config
+       ;;
+       ;; (db:delay-if-busy dbdat)
+       (sqlite3:for-each-row
+        (lambda (test-id run-dir uname testname item-path)
+          (if (and (equal? uname "n/a")
+                   (equal? item-path "")) ;; this is a toplevel test
+              ;; what to do with toplevel? call rollup?
+              (set! toplevels   (cons (list test-id run-dir uname testname item-path run-id) toplevels))
+              (set! oldlaunched (cons (list test-id run-dir uname testname item-path run-id) oldlaunched))))
+        (db:get-cache-stmth dbdat db
+        "SELECT id,rundir,uname,testname,item_path FROM tests WHERE run_id=? AND (strftime('%s','now') - event_time) > 86400 AND state IN ('LAUNCHED');")
+        run-id)
+       
+       ;; (print-info "Found " (length oldlaunched) " old LAUNCHED items, " (length toplevels) " old LAUNCHED toplevel tests and " (length incompleted) " tests marked RUNNING but apparently dead.")
+       (if (and (null? incompleted)
+                (null? oldlaunched)
+                (null? toplevels))
+           #f
+           #t)))))
+
+
+)

Index: dbmod.scm
==================================================================
--- dbmod.scm
+++ dbmod.scm
@@ -17,40 +17,108 @@
 ;;     along with Megatest.  If not, see <http://www.gnu.org/licenses/>.
 
 ;;======================================================================
 
 (declare (unit dbmod))
+(declare (uses dbfile))
+(declare (uses commonmod))
+(declare (uses debugprint))
 
 (module dbmod
 	*
 	
-(import scheme chicken data-structures extras)
-(import (prefix sqlite3 sqlite3:)
-	posix typed-records srfi-18
-	srfi-69)
-
-(define (db:run-id->dbname run-id)
-  (cond
-   ((number? run-id)(conc run-id ".db"))
-   ((not run-id)    "main.db")
-   (else            run-id)))
-
-
-;;======================================================================
-;; hash of hashs
-;;======================================================================
-
-
-(define (db:hoh-set! dat key1 key2 val)
-  (let* ((subhash (hash-table-ref/default dat key1 #f)))
-    (if subhash
-	(hash-table-set! subhash key2 val)
-	(begin
-	  (hash-table-set! dat key1 (make-hash-table))
-	  (db:hoh-set! dat key1 key2 val)))))
-
-(define (db:hoh-get dat key1 key2)
-  (let* ((subhash (hash-table-ref/default dat key1 #f)))
-    (and subhash
-	 (hash-table-ref/default subhash key2 #f))))
+(import scheme
+	chicken
+	data-structures
+	extras
+
+	(prefix sqlite3 sqlite3:)
+	posix
+	typed-records
+	srfi-18
+	srfi-69
+
+	commonmod
+	dbfile
+	debugprint
+	)
+
+;; NOTE: This returns only the name "1.db", "main.db", not the path
+;;
+(define (dbmod:run-id->dbfname run-id)
+  (conc (dbfile:run-id->dbnum run-id)".db"))
+
+(define (dbmod:get-dbdir dbstruct run-id)
+  (let* ((areapath (dbr:dbstruct-areapath dbstruct)))
+    (conc areapath"/.megatest")))
+
+(define (dbmod:run-id->full-dbfname dbstruct run-id)
+  (conc (dbmod:get-dbdir dbstruct run-id)"/"(dbmod:run-id->dbfname run-id)))
+
+;;======================================================================
+;; The inmem one-db file per server method goes in here
+;;======================================================================
+
+(define (dbmod:with-db dbstruct run-id r/w proc params)
+  (let* ((dbdat  (dbmod:open-db dbstruct run-id (dbfile:db-init-proc)))
+	 (dbh    (dbr:dbdat-dbh dbdat))
+	 (dbfile (dbr:dbdat-dbfile dbdat)))
+    (apply proc dbdat dbh params)))
+
+(define (dbmod:open-inmem-db initproc)
+  (let* ((db      (sqlite3:open-database ":memory:"))
+	 (handler (sqlite3:make-busy-timeout 3600)))
+    (sqlite3:set-busy-handler! db handler)
+    (initproc db)
+    db))
+
+(define (dbmod:open-db dbstruct run-id dbinit)
+  (or (dbr:dbstruct-dbdat dbstruct)
+      (let* ((dbdat (make-dbr:dbdat
+		     dbfile: (dbr:dbstruct-dbfile dbstruct)
+		     dbh:    (dbr:dbstruct-inmem  dbstruct)
+		     )))
+	(dbr:dbstruct-dbdat-set! dbstruct dbdat)
+	dbdat)))
+
+;; Open the inmem db and the on-disk db
+;; populate the inmem db with data
+;;
+;; Updates fields in dbstruct
+;; Returns dbstruct
+;;
+;; * This routine creates the db if not found
+;; * Probably can get rid of the dbstruct-in
+;; 
+(define (dbmod:open-dbmoddb areapath run-id init-proc #!key (dbstruct-in #f)) ;;  (conc *toppath* "/megatest.db") (car *configinfo*)))
+  (let* ((dbstruct     (or dbstruct-in (make-dbr:dbstruct areapath: areapath)))
+	 (dbfname      (dbmod:run-id->dbfname run-id))
+	 (dbpath       (dbmod:get-dbdir dbstruct run-id))             ;; directory where all the .db files are kept
+	 (dbfullname   (dbmod:run-id->full-dbfname dbstruct run-id))
+	 (dbexists     (file-exists? dbfullname))
+	 (inmem        (dbmod:open-inmem-db init-proc))
+	 (write-access (file-write-access? dbpath))
+	 (db           (dbfile:with-simple-file-lock
+			(conc dbfullname".lock")
+			(lambda ()
+			  (let* ((db      (sqlite3:open-database dbfullname))
+				 (handler (sqlite3:make-busy-timeout 136000)))
+			    (sqlite3:set-busy-handler! db handler)
+			    (if write-access
+				(init-proc db))
+			    db)))))
+    (dbr:dbstruct-inmem-set!    dbstruct inmem)
+    (dbr:dbstruct-ondiskdb-set! dbstruct db)
+    (dbr:dbstruct-dbfile-set!   dbstruct dbfullname)
+    dbstruct))
+
+(define (dbmod:close-db dbstruct)
+  ;; do final sync to disk file
+  ;; (do-sync ...)
+  (sqlite3:finalize! (dbr:dbstruct-ondiskdb dbstruct)))
+
+;;======================================================================
+;; Moved from dbfile
+;;======================================================================
+
 
 )

Index: launch.scm
==================================================================
--- launch.scm
+++ launch.scm
@@ -33,17 +33,19 @@
 (declare (uses common))
 (declare (uses commonmod))
 (declare (uses configf))
 (declare (uses db))
 (declare (uses ezsteps))
+(declare (uses dbfile))
 
 (include "common_records.scm")
 (include "key_records.scm")
 (include "db_records.scm")
 (include "megatest-fossil-hash.scm")
 
-(import commonmod)
+(import commonmod
+	dbfile)
 
 ;;======================================================================
 ;; ezsteps
 ;;======================================================================
 
@@ -1143,11 +1145,14 @@
 	      (setenv "MT_TESTSUITENAME" (common:get-testsuite-name)))
 	    (begin
 	      (debug:print-error 0 *default-log-port* "failed to find the top path to your Megatest area.")
 	      (set! *toppath* #f) ;; force it to be false so we return #f
 	      #f))
-	
+
+	;; needed by various transport and db modules
+	(dbfile:testsuite-name (get-testsuite-name *toppath* *configdat*))
+
         ;; one more attempt to cache the configs for future reading
         (let* ((cachefiles   (launch:get-cache-file-paths areapath toppath target mtconfig))
                (mtcachef     (car cachefiles))
                (rccachef     (cdr cachefiles)))
 

Index: margs.scm
==================================================================
--- margs.scm
+++ margs.scm
@@ -23,10 +23,20 @@
 
 (define (args:get-arg arg . default)
   (if (null? default)
       (hash-table-ref/default args:arg-hash arg #f)
       (hash-table-ref/default args:arg-hash arg (car default))))
+
+;; get an arg as a number
+(define (args:get-arg-number arg . default)
+  (let* ((val-str (args:get-arg arg))
+	 (val     (if val-str (string->number val-str) #f)))
+    (if val
+	val
+	(if (null? default)
+	    #f
+	    default))))
 
 (define (args:any? . args)
   (not (null? (filter (lambda (x) x)
 		      (map args:get-arg args)))))
 

Index: megatest.scm
==================================================================
--- megatest.scm
+++ megatest.scm
@@ -23,10 +23,16 @@
 (define (toplevel-command . a) #f)
 
 (declare (uses common))
 ;; (declare (uses megatest-version))
 (declare (uses margs))
+(declare (uses commonmod))
+(declare (uses commonmod.import))
+(declare (uses mtargs))
+(declare (uses mtargs.import))
+(declare (uses debugprint))
+(declare (uses debugprint.import))
 (declare (uses runs))
 (declare (uses launch))
 (declare (uses server))
 (declare (uses client))
 (declare (uses tests))
@@ -41,27 +47,27 @@
 (declare (uses api))
 (declare (uses tasks)) ;; only used for debugging.
 (declare (uses env))
 (declare (uses diff-report))
 (declare (uses db))
+(declare (uses dbfile))
+(declare (uses dbfile.import))
 (declare (uses dbmod))
 (declare (uses dbmod.import))
-(declare (uses commonmod))
-(declare (uses commonmod.import))
-(declare (uses dbfile))
-(declare (uses dbfile.import))
+(declare (uses tcp-transportmod))
+(declare (uses tcp-transportmod.import))
 ;; (declare (uses debugprint))
 ;; (declare (uses debugprint.import))
-;; (declare (uses mtargs))
-;; (declare (uses mtargs.import))
 
 ;; (declare (uses ftail))
 ;; (import ftail)
 
-(import dbmod
+(import debugprint
+	dbmod
 	commonmod
-	dbfile)
+	dbfile
+	tcp-transportmod)
 
 (define *db* #f) ;; this is only for the repl, do not use in general!!!!
 
 (include "common_records.scm")
 (include "key_records.scm")
@@ -80,10 +86,12 @@
 (require-library mutils)
 
 (define *usage-log-file* #f)    ;; put path to file for logging usage in this var in the ~/.megatestrc file
 (define *usage-use-seconds* #t) ;; for Epoc seconds in usage logging change this to #t in ~/.megatestrc file
 
+;; set some parameters here
+(include "transport-mode.scm")
 (dbfile:db-init-proc db:initialize-main-db)
 
 ;; load the ~/.megatestrc file, put (use trace)(trace-call-sites #t)(trace function-you-want-to-trace) in this file
 ;;
 (let ((debugcontrolf (conc (get-environment-variable "HOME") "/.megatestrc")))
@@ -585,13 +593,12 @@
 ;; where (launch:setup) returns #f?
 ;;
 (if (or (args:get-arg "-log")(args:get-arg "-server")) ;; redirect the log always when a server
     (handle-exceptions
 	exn
-	(begin
-	  (print "ERROR: Failed to switch to log output. " ((condition-property-accessor 'exn 'message) exn) ", exn=" exn)
-	  )
+      (begin
+	(print "ERROR: Failed to switch to log output. " ((condition-property-accessor 'exn 'message) exn) ", exn=" exn))
       (let* ((tl   (or (args:get-arg "-log")(launch:setup)))   ;; run launch:setup if -server, ensure we do NOT run launch:setup if -log specified
 	     (logf (or (args:get-arg "-log") ;; use -log unless we are a server, then craft a logfile name
 		       (conc tl "/logs/server-" (current-process-id) "-" (get-host-name) ".log")))
 	     (oup  (open-logfile logf)))
 	(if (not (args:get-arg "-log"))
@@ -921,13 +928,22 @@
 ;;======================================================================
 
 ;; Server? Start up here.
 ;;
 (if (args:get-arg "-server")
-    (let ((tl        (launch:setup)))
-      ;; (server:launch 0 'http)
-      (http-transport:launch)
+    (let* ((run-id (args:get-arg-number "-run-id"))
+	   (tl        (launch:setup)))
+      (case (rmt:transport-mode)
+	((http)(http-transport:launch))
+	((tcp)
+	 (debug:print 0 *default-log-port* "INFO: Running using tcp method.")
+	 (if run-id
+	     (tt:start-server tl run-id (dbmod:run-id->dbfname run-id) api:dispatch-request)
+	     (begin
+	       (debug:print 0 *default-log-port* "ERROR: transport mode is tcp - -run-id is required.")
+	       (exit 1))))
+	(else (debug:print 0 *default-log-port* "ERROR: rmt:transport-mode value not recognised "(rmt:transport-mode))))
       (set! *didsomething* #t)))
 
 ;; The adjutant is a bit different, it does NOT run (launch:setup) as it is not necessarily tied to
 ;; a specific Megatest area. Detail are being hashed out and this may change.
 ;;
@@ -2370,11 +2386,13 @@
 (if (or (getenv "MT_RUNSCRIPT")
 	(args:get-arg "-repl")
 	(args:get-arg "-load"))
     (let* ((toppath (launch:setup))
 	   (dbstructs (if (and toppath
-                               (server:choose-server toppath 'home?))
+			       ;; NOTE: server:choose-server is starting a server
+			       ;;   either add equivalent for tcp mode or ????
+                               #;(server:choose-server toppath 'home?))
                           (db:setup #t)
                           #f))) ;; make-dbr:dbstruct path: toppath local: (args:get-arg "-local")) #f)))
       (if *toppath*
 	  (cond
 	   ((getenv "MT_RUNSCRIPT")

Index: mtargs/mtargs.scm
==================================================================
--- mtargs/mtargs.scm
+++ mtargs/mtargs.scm
@@ -18,16 +18,17 @@
 
 (module mtargs
     (
      arg-hash
      get-arg
+     get-arg-number
      get-arg-from
      get-args
      usage
      print-args
      any-defined?
-     )
+     ) 
 
 (import scheme) ;; gives us cond-expand in chicken-4
 
 (cond-expand
  (chicken-5
@@ -42,10 +43,20 @@
 
 (define (get-arg arg . default)
   (if (null? default)
       (hash-table-ref/default arg-hash arg #f)
       (hash-table-ref/default arg-hash arg (car default))))
+
+;; get an arg as a number
+(define (get-arg-number arg . default)
+  (let* ((val-str (get-arg arg))
+	 (val     (if val-str (string->number val-str) #f)))
+    (if val
+	val
+	(if (null? default)
+	    #f
+	    default))))
 
 (define (any-defined? . args)
   (not (null? (filter (lambda (x) x)
 		      (map get-arg args)))))
 

Index: rmt.scm
==================================================================
--- rmt.scm
+++ rmt.scm
@@ -21,16 +21,25 @@
 (use format typed-records) ;; RADT => purpose of json format??
 
 (declare (unit rmt))
 (declare (uses api))
 (declare (uses http-transport))
+(declare (uses commonmod))
 (declare (uses dbfile))
+(declare (uses dbmemmod))
+(declare (uses tcp-transportmod))
 (include "common_records.scm")
 ;; (declare (uses rmtmod))
 
+;; used by http-transport
 (import dbfile) ;; rmtmod)
 
+(import commonmod
+	dbmemmod
+	tcp-transportmod)
+
+(define rmt:transport-mode (make-parameter 'http))
 ;;
 ;; THESE ARE ALL CALLED ON THE CLIENT SIDE!!!
 ;;
 
 ;; generate entries for ~/.megatestrc with the following
@@ -61,10 +70,15 @@
 	(cdr hh-dat)
 	(begin
 	  (debug:print-info 0 *default-log-port* "hh-dat="hh-dat)
 	  #f))))
 
+(define (make-and-init-remote areapath)
+   (case (rmt:transport-mode)
+     ((http)(make-remote))
+     ((tcp) (tt:make-remote areapath))
+     (else #f)))
 
 ;;======================================================================
 
 (define *send-receive-mutex* (make-mutex)) ;; should have separate mutex per run-id
 
@@ -73,109 +87,94 @@
 (define (rmt:send-receive cmd rid params #!key (attemptnum 1)(area-dat #f)) ;; start attemptnum at 1 so the modulo below works as expected
 
   #;(common:telemetry-log (conc "rmt:"(->string cmd))
                         payload: `((rid . ,rid)
                                    (params . ,params)))
-
+  
   (if (> attemptnum 2)
       (debug:print 0 *default-log-port* "INFO: attemptnum in rmt:send-receive is " attemptnum))
-    
+  
   (cond
    ((> attemptnum 2) (thread-sleep! 0.05))
    ((> attemptnum 10) (thread-sleep! 0.5))
    ((> attemptnum 20) (thread-sleep! 1)))
+
+  ;; I'm turning this off, it may make sense to move it
+  ;; into http-transport-handler
   (if (and (> attemptnum 5) (= 0 (modulo attemptnum 15)))  
-    (begin (server:run *toppath*) (thread-sleep! 3))) 
-  
-  
-  ;;DOT digraph megatest_state_status {
-  ;;DOT   ranksep=0;
-  ;;DOT   // rankdir=LR;
-  ;;DOT   node [shape="box"];
-  ;;DOT "rmt:send-receive" -> MUTEXLOCK;
-  ;;DOT { edge [style=invis];"case 1" -> "case 2" -> "case 3" -> "case 4" -> "case 5" -> "case 6" -> "case 7" -> "case 8" -> "case 9" -> "case 10" -> "case 11"; }
-  ;; do all the prep locked under the rmt-mutex
-  (mutex-lock! *rmt-mutex*)
-  
+      (begin
+	(server:run *toppath*)
+	(thread-sleep! 3)))
+
   ;; 1. check if server is started IFF cmd is a write OR if we are not on the homehost, store in runremote
   ;; 2. check the age of the connections. refresh the connection if it is older than timeout-20 seconds.
   ;; 3. do the query, if on homehost use local access
   ;;
   (let* ((start-time    (current-seconds)) ;; snapshot time so all use cases get same value
          (areapath      *toppath*);; TODO - resolve from dbstruct to be compatible with multiple areas
 	 (runremote     (or area-dat
 			    *runremote*))
          (attemptnum    (+ 1 attemptnum))
-	 (readonly-mode (rmtmod:calc-ro-mode runremote *toppath*)))
-
-    ;; DOT INIT_RUNREMOTE; // leaving off - doesn't really add to the clarity
-    ;; DOT MUTEXLOCK -> INIT_RUNREMOTE [label="no remote?"];
-    ;; DOT INIT_RUNREMOTE -> MUTEXLOCK;
-    ;; ensure we have a record for our connection for given area
-    (if (not runremote)                   ;; can remove this one. should never get here.         
-	(begin
-	  (set! *runremote* (make-remote))
-          (let* ((server-info (remote-server-info *runremote*))) 
-            (if server-info
-		(begin
-			(remote-server-url-set! *runremote* (server:record->url server-info))
-			(remote-server-id-set! *runremote* (server:record->id server-info)))))  
-	  (set! runremote   *runremote*))) ;; new runremote will come from this on next iteration
-
-    (http-transport-handler runremote cmd rid params attemptnum area-dat areapath readonly-mode)))
-
-(define (http-transport-handler runremote cmd rid params attemptnum area-dat areapath readonly-mode)
-  ;; DOT SET_HOMEHOST; // leaving off - doesn't really add to the clarity
-  ;; DOT MUTEXLOCK -> SET_HOMEHOST [label="no homehost?"];
-  ;; DOT SET_HOMEHOST -> MUTEXLOCK;
+	 (readonly-mode (rmtmod:calc-ro-mode runremote *toppath*))
+	 (testsuite     (common:get-testsuite-name))
+	 (mtexe         (common:find-local-megatest)))
+
+    (case (rmt:transport-mode)
+      ((http)(http-transport-handler runremote cmd rid params attemptnum area-dat areapath readonly-mode))
+      ((tcp) (tcp-transport-handler  runremote cmd rid params attemptnum area-dat areapath readonly-mode testsuite mtexe)))))
+
+(define (tcp-transport-handler runremote cmd rid params attemptnum area-dat areapath readonly-mode testsuite mtexe)
+  (if (not runremote)
+      (let* ((newremote  (make-and-init-remote areapath)))
+	(set! *runremote* newremote)
+	(set! runremote newremote)))
+  (let* ((dbfname (conc (dbfile:run-id->dbnum rid)".db"))) ;;(dbfile:run-id->path areapath run-id)))
+    (tt:handler runremote cmd rid params attemptnum area-dat areapath readonly-mode dbfname testsuite mtexe)))
+	
+(define (http-transport-handler runremote cmd rid params attemptnum area-dat areapath readonly-mode testsuite mtexe)
+  ;; do all the prep locked under the rmt-mutex
+  (mutex-lock! *rmt-mutex*)
+  
+  ;; ensure we have a record for our connection for given area
+  (if (not runremote)                   ;; can remove this one. should never get here.         
+      (begin
+	(set! *runremote* (make-and-init-remote areapath))
+        (let* ((server-info (remote-server-info *runremote*))) 
+          (if server-info
+	      (begin
+		(remote-server-url-set! *runremote* (server:record->url server-info))
+		(remote-server-id-set! *runremote* (server:record->id server-info)))))  
+	(set! runremote   *runremote*))) ;; new runremote will come from this on next iteration
+
   ;; ensure we have a homehost record
   (if (or (not (pair? (remote-hh-dat runremote)))  ;; not on homehost
 	  (not (cdr (remote-hh-dat runremote))))   ;; not on homehost
       (thread-sleep! 0.1) ;; since we shouldn't get here, delay a little
       (let ((hh-data (server:choose-server areapath 'homehost)))
 	(remote-hh-dat-set! runremote (or hh-data (cons #f #f)))))
   
-  ;;(print "BB> readonly-mode is "readonly-mode" dbfile is "dbfile)
   (cond
-   #;((> (- (current-seconds)(remote-connect-time runremote)) 180) ;; reconnect to server every 180 seconds
-   (debug:print 0 *default-log-port* "Forcing reconnect to server(s) due to 180 second timeout.")
-   (set! *runremote* #f)
-   ;; BUG: close-connections should go here?
-   (mutex-unlock! *rmt-mutex*)
-   (rmt:send-receive cmd rid params attemptnum: 1 area-dat: area-dat))
-   
-   ;;DOT EXIT;
-   ;;DOT MUTEXLOCK -> EXIT [label="> 15 attempts"]; {rank=same "case 1" "EXIT" }
    ;; give up if more than 150 attempts
    ((> attemptnum 150)
     (debug:print 0 *default-log-port* "ERROR: 150 tries to start/connect to server. Giving up.")
     (exit 1))
 
-   ;;DOT CASE2 [label="local\nreadonly\nquery"];
-   ;;DOT MUTEXLOCK -> CASE2; {rank=same "case 2" CASE2}
-   ;;DOT CASE2 -> "rmt:open-qry-close-locally";
    ;; readonly mode, read request-  handle it - case 2
    ((and readonly-mode
          (member cmd api:read-only-queries)) 
     (mutex-unlock! *rmt-mutex*)
     (debug:print-info 12 *default-log-port* "rmt:send-receive, case 2")
     (rmt:open-qry-close-locally cmd 0 params)
     )
 
-   ;;DOT CASE3 [label="write in\nread-only mode"];
-   ;;DOT MUTEXLOCK -> CASE3 [label="readonly\nmode?"]; {rank=same "case 3" CASE3}
-   ;;DOT CASE3 -> "#f";
    ;; readonly mode, write request.  Do nothing, return #f
    (readonly-mode (extras-readonly-mode *rmt-mutex* *default-log-port* cmd params))
 
    ;; This block was for pre-emptively resetting the connection if there had been no communication for some time.
    ;; I don't think it adds any value. If the server is not there, just fail and start a new connection.
    ;; also, the expire-time calculation might not be correct. We want, time-since-last-server-access > (server:get-timeout)
    ;;
-   ;;DOT CASE4 [label="reset\nconnection"];
-   ;;DOT MUTEXLOCK -> CASE4 [label="have connection,\nlast_access > expire_time"]; {rank=same "case 4" CASE4}
-   ;;DOT CASE4 -> "rmt:send-receive";
    ;; reset the connection if it has been unused too long
    ((and runremote
          (remote-api-url runremote)
 	 (> (current-seconds) ;; if it has been more than server-timeout seconds since last contact, close this connection and start a new on
 	    (+ (remote-last-access runremote)
@@ -185,61 +184,27 @@
     ;; moving this setting of runremote conndat to #f to inside the http-transport:close-connections
     ;; (remote-conndat-set! runremote #f) ;; invalidate the connection, thus forcing a new connection.
     (mutex-unlock! *rmt-mutex*)
     (rmt:send-receive cmd rid params attemptnum: attemptnum))
    
-   ;;DOT CASE5 [label="local\nread"];
-   ;;DOT MUTEXLOCK -> CASE5 [label="server not required,\non homehost,\nread-only query"]; {rank=same "case 5" CASE5};
-   ;;DOT CASE5 -> "rmt:open-qry-close-locally";
-
    ;; on homehost and this is a read
    ((and (not (remote-force-server runremote)) ;; honor forced use of server, i.e. server NOT required
 	 (rmt:on-homehost? runremote)
          (member cmd api:read-only-queries))   ;; this is a read
     (mutex-unlock! *rmt-mutex*)
     (debug:print-info 12 *default-log-port* "rmt:send-receive, case  5")
     (rmt:open-qry-close-locally cmd 0 params))
 
-   ;;DOT CASE6 [label="init\nremote"];
-   ;;DOT MUTEXLOCK -> CASE6 [label="on homehost,\nwrite query,\nhave server,\ncan't reach it"]; {rank=same "case 6" CASE6};
-   ;;DOT CASE6 -> "rmt:send-receive";
-   ;; on homehost and this is a write, we already have a server, but server has died
-
-   ;; reinstate this keep-alive section but inject a time condition into the (add ...
-   ;;
-   ;; ((and (cdr (remote-hh-dat runremote))           ;; on homehost
-   ;;       (not (member cmd api:read-only-queries))  ;; this is a write
-   ;;       (remote-server-url runremote)             ;; have a server
-   ;;       (not (server:ping (remote-server-url runremote) (remote-server-id runremote))))  ;; server has died. NOTE: this is not a cheap call! Need better approach.
-   ;;  (debug:print 0 *default-log-port* "WARNING: server appears to have died, trying to reconnect, case 6")
-   ;;  (http-transport:close-connections area-dat: runremote) ;; make sure to clean up
-   ;;  (set! *runremote* (make-remote))
-   ;;  (let* ((server-info (remote-server-info *runremote*))) 
-   ;;        (if server-info
-   ;; 		(begin
-   ;; 		  (remote-server-url-set! *runremote* (server:record->url server-info))
-   ;;              (remote-server-id-set! *runremote* (server:record->id server-info)))))
-   ;;  (remote-force-server-set! runremote (common:force-server?))
-   ;;  (mutex-unlock! *rmt-mutex*)
-   ;;  (debug:print-info 12 *default-log-port* "rmt:send-receive, case  6")
-   ;;  (rmt:send-receive cmd rid params attemptnum: attemptnum))
-
-   ;;DOT CASE7 [label="homehost\nwrite"];
-   ;;DOT MUTEXLOCK -> CASE7 [label="server not required,\non homehost,\na write,\nhave a server"]; {rank=same "case 7" CASE7};
-   ;;DOT CASE7 -> "rmt:open-qry-close-locally";
    ;; on homehost and this is a write, we already have a server
    ((and (not (remote-force-server runremote))     ;; honor forced use of server, i.e. server NOT required
 	 (cdr (remote-hh-dat runremote))           ;; on homehost
          (not (member cmd api:read-only-queries))  ;; this is a write
          (remote-server-url runremote))            ;; have a server (needed to sync written data back)
     (mutex-unlock! *rmt-mutex*)
     (debug:print-info 12 *default-log-port* "rmt:send-receive, case  4.1")
     (rmt:open-qry-close-locally cmd 0 params))
 
-   ;;DOT CASE8 [label="force\nserver"];
-   ;;DOT MUTEXLOCK -> CASE8 [label="server not required,\nhave homehost info,\nno connection yet,\nnot a read-only query"]; {rank=same "case 8" CASE8};
-   ;;DOT CASE8 -> "rmt:open-qry-close-locally";
    ;;  on homehost, no server contact made and this is a write, passively start a server 
    ((and (not (remote-force-server runremote))     ;; honor forced use of server, i.e. server NOT required
 	 (cdr (remote-hh-dat runremote))           ;; have homehost
          (not (remote-server-url runremote))       ;; no connection yet
 	 (not (member cmd api:read-only-queries))) ;; not a read-only query
@@ -286,11 +251,10 @@
    ;;DOT MUTEXLOCK -> CASE11 [label="else"]; {rank=same "case 11" CASE11};
    ;;DOT CASE11 -> "rmt:send-receive" [label="call failed"];
    ;;DOT CASE11 -> "RESULT" [label="call succeeded"];
    ;; not on homehost, do server query
    (else (extras-case-11 *default-log-port* runremote cmd params attemptnum rid))))
-;;DOT }
 
 ;; bunch of small functions factored out of send-receive to make debug easier
 ;;
 
 (define (extras-case-11 *default-log-port* runremote cmd params attemptnum rid)

Index: runs.scm
==================================================================
--- runs.scm
+++ runs.scm
@@ -22,10 +22,11 @@
      sxml-modifications matchable)
 
 (declare (unit runs))
 (declare (uses db))
 (declare (uses common))
+(declare (uses commonmod))
 (declare (uses items))
 (declare (uses runconfig))
 (declare (uses tests))
 (declare (uses server))
 (declare (uses mt))
@@ -37,10 +38,12 @@
 (include "db_records.scm")
 (include "run_records.scm")
 (include "test_records.scm")
 
 ;; (include "debugger.scm")
+
+(import commonmod)
 
 ;; use this struct to facilitate refactoring
 ;;
 
 (defstruct runs:dat

Index: server.scm
==================================================================
--- server.scm
+++ server.scm
@@ -101,24 +101,24 @@
   (if *server-id* *server-id*
       (let ((sig (server:mk-signature))) ;; clients re-use the server:mk-signature logic
         (set! *server-id* sig)
         *server-id*)))
 
-;; When using zmq this would send the message back (two step process)
-;; with spiffy or rpc this simply returns the return data to be returned
-;; 
-(define (server:reply return-addr query-sig success/fail result)
-  (debug:print-info 11 *default-log-port* "server:reply return-addr=" return-addr ", result=" result)
-  ;; (send-message pubsock target send-more: #t)
-  ;; (send-message pubsock 
-  (case (server:get-transport)
-    ((rpc)  (db:obj->string (vector success/fail query-sig result)))
-    ((http) (db:obj->string (vector success/fail query-sig result)))
-    ((fs)   result)
-    (else 
-     (debug:print-error 0 *default-log-port* "unrecognised transport type: " *transport-type*)
-     result)))
+;; ;; When using zmq this would send the message back (two step process)
+;; ;; with spiffy or rpc this simply returns the return data to be returned
+;; ;; 
+;; (define (server:reply return-addr query-sig success/fail result)
+;;   (debug:print-info 11 *default-log-port* "server:reply return-addr=" return-addr ", result=" result)
+;;   ;; (send-message pubsock target send-more: #t)
+;;   ;; (send-message pubsock 
+;;   (case (server:get-transport)
+;;     ((rpc)  (db:obj->string (vector success/fail query-sig result)))
+;;     ((http) (db:obj->string (vector success/fail query-sig result)))
+;;     ((fs)   result)
+;;     (else 
+;;      (debug:print-error 0 *default-log-port* "unrecognised transport type: " *transport-type*)
+;;      result)))
 
 ;; Given an area path,  start a server process    ### NOTE ### > file 2>&1 
 ;; if the target-host is set 
 ;; try running on that host
 ;;   incidental: rotate logs in logs/ dir.
@@ -676,11 +676,11 @@
 		     (else
 		      #f))))
     (cond
      ((and (list? host-port)
 	   (eq? (length host-port) 2))
-      (let* ((myrunremote (make-remote))
+      (let* ((myrunremote (make-and-init-remote *toppath*))
 	     (iface       (car host-port))
 	     (port        (cadr host-port))
 	     (server-dat  (client:connect iface port server-id myrunremote))
 	     (login-res   (rmt:login-no-auto-client-setup myrunremote)))
 	(http-transport:close-connections myrunremote)

ADDED   tcp-transportmod.scm
Index: tcp-transportmod.scm
==================================================================
--- /dev/null
+++ tcp-transportmod.scm
@@ -0,0 +1,472 @@
+;;======================================================================
+;; Copyright 2017, Matthew Welland.
+;; 
+;; This file is part of Megatest.
+;; 
+;;     Megatest is free software: you can redistribute it and/or modify
+;;     it under the terms of the GNU General Public License as published by
+;;     the Free Software Foundation, either version 3 of the License, or
+;;     (at your option) any later version.
+;; 
+;;     Megatest is distributed in the hope that it will be useful,
+;;     but WITHOUT ANY WARRANTY; without even the implied warranty of
+;;     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;;     GNU General Public License for more details.
+;; 
+;;     You should have received a copy of the GNU General Public License
+;;     along with Megatest.  If not, see <http://www.gnu.org/licenses/>.
+
+;;======================================================================
+
+(declare (unit tcp-transportmod))
+(declare (uses debugprint))
+(declare (uses commonmod))
+(declare (uses dbfile))
+(declare (uses dbmod))
+
+(module tcp-transportmod
+	*
+	
+  (import scheme
+	  (prefix sqlite3 sqlite3:)
+	  chicken
+	  data-structures
+
+	  address-info
+	  directory-utils
+	  extras
+	  files
+	  hostinfo
+	  matchable
+	  md5
+	  message-digest
+	  ports
+	  posix
+	  regex
+	  regex-case
+	  s11n
+	  srfi-1
+	  srfi-18
+	  srfi-4
+	  srfi-69
+	  stack
+	  typed-records
+	  tcp-server
+	  tcp
+	  
+	  debugprint
+	  commonmod
+	  dbfile
+	  dbmod
+	)
+
+;;======================================================================
+;; client
+;;======================================================================
+
+;; (define keep-age-param (make-parameter 10)) ;; qif file age, if over move to attic
+
+(defstruct tt-conn
+  host
+  port
+  dbfname
+  server-id
+  server-start
+  pid
+)
+
+(defstruct tt
+  ;; client related
+  (conns (make-hash-table)) ;; dbfname -> conn
+
+  ;; server related
+  (areapath     #f)
+  (host         #f)
+  (port         #f)
+  (conn         #f)
+  (cleanup-proc #f)
+  (handler      #f) ;; receives data and responds
+  (socket       #f)
+  (thread       #f)
+  (host-port    #f)
+  (cmd-thread   #f)
+  )
+
+(define (tt:make-remote areapath)
+  (make-tt area: areapath))
+
+;; do all the busy work of finding and setting up conn for
+;; connecting to a server
+;; 
+(define (tt:client-connect-to-server ttdat dbfname run-id)
+  (let* ((conn (hash-table-ref/default (tt-conns ttdat) dbfname #f)))
+    (if conn
+	conn ;; we are already connected to the server
+	(let* ((sdat (tt:get-current-server-info ttdat dbfname run-id)))
+	  (match sdat
+	    ((host port start-time server-id pid)
+	     (let ((conn (make-tt-conn
+			  host: host
+			  port: port
+			  dbfname: dbfname
+			  server-id: server-id
+			  server-start: start-time
+			  pid: pid)))
+	       (hash-table-set! (tt-conns ttdat) dbfname conn)
+	       conn))
+	    (else
+	     (tt:server-process-run
+	      (tt-areapath ttdat)
+	      (dbfile:testsuite-name)
+	      (common:find-local-megatest)
+	      run-id)
+	     (thread-sleep! 1)
+	     (tt:client-connect-to-server ttdat dbfname run-id)))))))
+
+;; client side handler
+;;
+(define (tt:handler ttdat cmd run-id params attemptnum area-dat areapath readonly-mode dbfname testsuite mtexe)
+  ;; NOTE: areapath is passed in and in tt struct. We'll use passed in value for now.
+  (let* ((conn (tt:client-connect-to-server ttdat dbfname run-id))) ;; (hash-table-ref/default (tt-conns ttdat) dbfname #f)))
+    (if conn
+	;; have connection, call the server
+	(let* ((res (tt:send-receive ttdat conn cmd run-id params)))
+	  (cond
+	   ((member res '(busy starting))
+	    (thread-sleep! 1)
+	    (tt:handler  ttdat cmd run-id params attemptnum area-dat areapath readonly-mode dbfname testsuite mtexe))
+	   (else
+	    res)))
+	(begin
+	  (thread-sleep! 1) ;; give it a rest and try again
+	  (tt:handler ttdat cmd run-id params attemptnum area-dat areapath readonly-mode dbfname testsuite mtexe)))))
+
+	;; no conn yet, find and or start and find a server
+;; 	(let* ((server (tt:find-server ttdat dbfname)))
+;; 	  (if server
+;; 	      (let* ((conn (tt:client-connect-to-server server)))
+;; 		(hash-table-set! (tt-conns ttdat) dbfname conn)
+;; 		(tt:handler  ttdat cmd run-id params attemptnum area-dat areapath readonly-mode
+;; 			     dbfname testsuite mtexe))
+;; 	      ;; no server, try to start a server process
+;; 	      (begin
+;; 		(tt:server-process-run areapath testsuite mtexe run-id) ;;  #!key (profile-mode "")) 
+;; 		(thread-sleep! 1)
+;; 		(tt:handler  ttdat cmd run-id params attemptnum area-dat areapath
+;; 			     readonly-mode dbfname testsuite mtexe)))))))
+
+(define (tt:bid-for-servership run-id)
+  #f)
+
+(define (tt:get-current-server-info ttdat dbfname run-id)
+  (let* ((sfiles (tt:find-server ttdat dbfname)))
+    (case (length sfiles)
+      ((0) #f) ;; no server around
+      ((1) (tt:server-get-info (car sfiles)))
+      (else #f) ;; we'll want to wait until extra servers have exited
+      )))
+
+(define (tt:send-receive ttdat conn cmd run-id params)
+  (let* ((host-port (conc (tt-conn-host conn)":"(tt-conn-port conn)))
+	 (dat       (list cmd run-id params)))
+    (let-values (((inp oup)(tcp-connect host-port)))
+      (let ((res (if (and inp oup)
+		     (begin
+		       (serialize dat oup)
+		       (close-output-port oup)
+		       (deserialize inp))
+		     (begin
+		       (debug:print 0 *default-log-port* "ERROR: send called but no receiver has been setup. Please call setup first!")
+		       #f))))
+	(close-input-port inp)
+	;; (mutex-unlock! *send-mutex*) ;; DOESN'T SEEM TO HELP
+	res))))
+
+;;======================================================================
+;; server
+;;======================================================================
+
+(define (tt:sync-dbs ttdat)
+  #f)
+
+;; start the listener and start responding to requests
+;;
+;; NOTE: organise by dbfname, not run-id so we don't need
+;;       to pull in more modules
+;;
+(define (tt:start-server areapath run-id dbfname handler)
+  ;; is there already a server for this dbfile? Then exit.
+  (let* ((ttdat  (make-tt areapath: areapath))
+	 (servers (tt:find-server ttdat dbfname)))
+    (tt-handler-set! ttdat handler)
+    (if (null? servers)
+	(let* ((dbstruct (dbmod:open-dbmoddb areapath run-id (dbfile:db-init-proc))))
+	  (tt:start-tcp-server ttdat) ;; start the tcp-server which applies handler to incoming data
+	  (tt:keep-running ttdat dbfname handler))
+	(begin
+	  (debug:print 0 *default-log-port* "INFO: found server(s) already running for db "dbfname", "(string-intersperse servers ",")" Exiting.")
+	  (exit)))))
+
+;; find a port and start tcp-server
+;;
+(define (tt:start-tcp-server ttdat)
+  (setup-listener ttdat)
+  (let* ((socket   (tt-socket ttdat))
+	 (handler  (tt-handler    ttdat)))
+    ((make-tcp-server socket handler)
+     #t ;; yes, send error messages to std-err
+     )))
+
+(define (tt:keep-running ttdat dbfile)
+  ;; verfiy conn for ready
+  ;; listener socket has been started by this stage
+  (debug:print 0 *default-log-port* "INFO: Got here!!!!"))
+
+;; ;; given an already set up uconn start the cmd-loop
+;; ;;
+;; (define (tt:cmd-loop ttdat)
+;;   (let* ((serv-listener (-socket uconn))
+;; 	 (listener      (lambda ()
+;; 			  (let loop ((state 'start))
+;; 			    (let-values (((inp oup)(tcp-accept serv-listener)))
+;; 			      ;; (mutex-lock! *send-mutex*) ;; DOESN'T SEEM TO HELP
+;; 			      (let* ((rdat  (deserialize inp)) ;; '(my-host-port qrykey cmd params)
+;; 				     (resp  (ulex-handler uconn rdat)))
+;; 				(serialize resp oup)
+;; 				(close-input-port inp)
+;; 				(close-output-port oup)
+;; 				;; (mutex-unlock! *send-mutex*) ;; DOESN'T SEEM TO HELP
+;; 				)
+;; 			      (loop state))))))
+;;     ;; start N of them
+;;     (let loop ((thnum   0)
+;; 	       (threads '()))
+;;       (if (< thnum 100)
+;; 	  (let* ((th (make-thread listener (conc "listener" thnum))))
+;; 	    (thread-start! th)
+;; 	    (loop (+ thnum 1)
+;; 		  (cons th threads)))
+;; 	  (map thread-join! threads)))))
+;; 
+;; 
+;; 
+;; (define (wait-and-close uconn)
+;;   (thread-join! (udat-cmd-thread uconn))
+;;   (tcp-close (udat-socket uconn)))
+;; 
+;; 
+
+(define (tt:shutdown-server ttdat)
+  (let* ((cleanproc (tt-cleanup-proc ttdat)))
+    (if cleanproc (cleanproc))
+    (tcp-close (tt-socket ttdat)) ;; close up ports here
+    ))
+
+;; (define (wait-and-close uconn)
+;;   (thread-join! (tt-cmd-thread uconn))
+;;   (tcp-close (tt-socket uconn)))
+
+;; return servid
+;; side-effects:
+;;   ttdat-cleanup-proc is populated with function to remove the serverinfo file
+(define (tt:create-server-registration-file ttdat dbfname)
+  (let* ((areapath (tt-areapath ttdat))
+	 (servdir  (tt:get-servinfo-dir areapath))
+	 (conn     (hash-table-ref/default (tt-conns ttdat) dbfname #f)))
+    (assert conn "FATAL: tt:create-server-registration-file called with no conn, dbfname="dbfname)
+    (let* ((host    (tt-conn-host conn))
+	   (port    (tt-conn-port conn))
+	   (servinf (conc servdir"/"host":"port"-"(current-process-id)":"dbfname))
+	   (serv-id (tt:mk-signature areapath))
+	   (clean-proc (lambda ()
+			 (delete-file* servinf))))
+      (tt-cleanup-proc-set! ttdat clean-proc)
+      (with-output-to-file servinf
+	(lambda ()
+	  (print "SERVER STARTED: "host":"port" AT "(current-seconds)" server-id: "serv-id" pid: "(current-process-id)" dbfname: "dbfname)))
+      serv-id)))
+
+;; find valid server
+;; get servers listed, last part of name must match :<dbfname>
+;; if more than one, wait one second and look again
+;; future: ping oldest, if alive remove other :<dbfname> files
+;;
+(define (tt:find-server ttdat dbfname)
+  (let* ((areapath (tt-areapath ttdat))
+	 (servdir  (tt:get-servinfo-dir areapath))
+	 (sfiles   (glob (conc servdir"/*:"dbfname))))
+    sfiles))
+
+;; given a path to a server info file return: host port startseconds server-id
+;; example of what it's looking for in the log file:
+;;     SERVER STARTED: 10.38.175.67:50216 AT 1616502350.0 server-id: 4907e90fc55c7a09694e3f658c639cf4 
+;;
+(define (tt:server-get-info logf)
+  (let ((server-rx    (regexp "^SERVER STARTED: (\\S+):(\\d+) AT ([\\d\\.]+) server-id: (\\S+) pid: (\\d+)")) ;; SERVER STARTED: host:port AT timesecs server id
+        (dbprep-rx    (regexp "^SERVER: dbprep"))
+        (dbprep-found 0)
+	(bad-dat      (list #f #f #f #f #f)))
+    (handle-exceptions
+     exn
+     (begin
+       ;; WARNING: this is potentially dangerous to blanket ignore the errors
+       (if (file-exists? logf)
+	   (debug:print-info 2 *default-log-port* "Unable to get server info from "logf", exn=" exn))
+       bad-dat) ;; no idea what went wrong, call it a bad server
+     (with-input-from-file
+	 logf
+       (lambda ()
+	 (let loop ((inl  (read-line))
+		    (lnum 0))
+	   (if (not (eof-object? inl))
+	       (let ((mlst (string-match server-rx inl))
+		     (dbprep (string-match dbprep-rx inl)))
+		 (if dbprep (set! dbprep-found 1))
+		 (if (not mlst)
+		     (if (< lnum 500) ;; give up if more than 500 lines of server log read
+			 (loop (read-line)(+ lnum 1))
+			 (begin 
+                           (debug:print-info 0 *default-log-port* "Unable to get server info from first 500 lines of " logf )
+                           bad-dat))
+		     (match mlst
+			    ((_ host port start server-id pid)
+			     (list host
+				   (string->number port)
+				   (string->number start)
+				   server-id
+				   (string->number pid)))
+			    (else
+			     (debug:print 0 *default-log-port* "ERROR: did not recognise SERVER line info "mlst)
+			     bad-dat))))
+	       (begin 
+		 (if dbprep-found
+		     (begin
+		       (debug:print-info 2 *default-log-port* "Server is in dbprep at " (common:human-time))
+		       (thread-sleep! 0.5)) ;; was 25 sec but that blocked things from starting?
+		     (debug:print-info 0 *default-log-port* "Unable to get server info from " logf " at " (seconds->time-string (current-seconds))))
+		 bad-dat))))))))
+
+;; Given an area path,  start a server process    ### NOTE ### > file 2>&1 
+;; if the target-host is set 
+;; try running on that host
+;;   incidental: rotate logs in logs/ dir.
+;;
+(define  (tt:server-process-run areapath testsuite mtexe run-id #!key (profile-mode "")) ;; areapath is *toppath* for a given testsuite area
+  (let* ((logfile   (conc areapath "/logs/server.log")) ;; -" curr-pid "-" target-host ".log"))
+	 (cmdln     (conc
+		     mtexe
+		     " -server - ";; (or target-host "-")
+		     " -m testsuite:" testsuite
+		     " -run-id " (or run-id "main")
+		     " -db "  (dbmod:run-id->dbfname run-id)
+		     " " profile-mode
+		     ))) ;; (conc " >> " logfile " 2>&1 &")))))
+    ;; we want the remote server to start in *toppath* so push there
+    (push-directory areapath)
+    (debug:print 0 *default-log-port* "INFO: Trying to start server in tcp mode (" cmdln ") ...")
+    (debug:print 0 *default-log-port* "INFO: starting server at " (common:human-time))
+    (system (conc "nbfake " cmdln))
+    (pop-directory)))
+
+;;======================================================================
+;; tcp connection stuff
+;;======================================================================
+
+;; create a tcp listener and return a populated udat struct with
+;; my port, address, hostname, pid etc.
+;; return #f if fail to find a port to allocate.
+;;
+;;  if udata-in is #f create the record
+;;  if there is already a serv-listener return the udata
+;;
+(define (setup-listener uconn #!optional (port 4242))
+  (assert (tt? uconn) "FATAL: setup-listener called with wrong struct "uconn)
+  (handle-exceptions
+   exn
+   (if (< port 65535)
+       (setup-listener uconn (+ port 1))
+       #f)
+   (connect-listener uconn port)))
+
+(define (connect-listener uconn port)
+  ;; (tcp-listener-socket LISTENER)(socket-name so)
+  ;; sockaddr-address, sockaddr-port, sockaddr->string
+  (let* ((tlsn (tcp-listen port 1000 #f)) ;; (tcp-listen TCPPORT [BACKLOG [HOST]])
+	 (addr (get-my-best-address))) ;; (hostinfo-addresses (host-information (current-hostname)))
+    (tt-port-set!      uconn port)
+    (tt-host-port-set! uconn (conc addr":"port))
+    (tt-socket-set!    uconn tlsn)
+    uconn))
+
+
+
+;;======================================================================
+;; utils
+;;======================================================================
+
+;; Generate a unique signature for this server
+(define (tt:mk-signature areapath)
+  (message-digest-string (md5-primitive) 
+			 (with-output-to-string
+			   (lambda ()
+			     (write (list areapath
+                                          (current-process-id)
+					  (argv)))))))
+
+
+(define (tt:get-best-guess-address hostname)
+  (let ((res #f))
+    (for-each 
+     (lambda (adr)
+       (if (not (eq? (u8vector-ref adr 0) 127))
+	   (set! res adr)))
+     ;; NOTE: This can fail when there is no mention of the host in /etc/hosts. FIXME
+     (vector->list (hostinfo-addresses (hostname->hostinfo hostname))))
+    (string-intersperse 
+     (map number->string
+	  (u8vector->list
+	   (if res res (hostname->ip hostname)))) ".")))
+
+(define (tt:get-servinfo-dir areapath)
+  (let* ((spath (conc areapath"/.servinfo")))
+    (if (not (file-exists? spath))
+	(create-directory spath #t))
+    spath))
+
+;;======================================================================
+;; network utilities
+;;======================================================================
+
+;; NOTE: Look at address-info egg as alternative to some of this
+
+(define (rate-ip ipaddr)
+  (regex-case ipaddr
+    ( "^127\\..*" _ 0 )
+    ( "^(10\\.0|192\\.168)\\..*" _ 1 )
+    ( else 2 ) ))
+
+;; Change this to bias for addresses with a reasonable broadcast value?
+;;
+(define (ip-pref-less? a b)
+  (> (rate-ip a) (rate-ip b)))
+
+(define (get-my-best-address)
+  (let ((all-my-addresses (get-all-ips)))
+    (cond
+     ((null? all-my-addresses)
+      (get-host-name))                                          ;; no interfaces?
+     ((eq? (length all-my-addresses) 1)
+      (car all-my-addresses))                      ;; only one to choose from, just go with it
+     (else
+      (car (sort all-my-addresses ip-pref-less?))))))
+
+(define (get-all-ips-sorted)
+  (sort (get-all-ips) ip-pref-less?))
+
+(define (get-all-ips)
+  (map address-info-host
+       (filter (lambda (x)
+		 (equal? (address-info-type x) "tcp"))
+	       (address-infos (get-host-name)))))
+
+)

ADDED   transport-mode.scm.template
Index: transport-mode.scm.template
==================================================================
--- /dev/null
+++ transport-mode.scm.template
@@ -0,0 +1,3 @@
+;; 'http or 'tcp
+;; (rmt:transport-mode 'tcp)
+(rmt:transport-mode 'http)