Index: Makefile
==================================================================
--- Makefile
+++ Makefile
@@ -24,10 +24,18 @@
 SRCFILES =
 
 # all : $(PREFIX)/bin/.$(ARCHSTR) mtest dboard mtut ndboard
 all : $(PREFIX)/bin/.$(ARCHSTR) mtest
 # add dboard mtut and tcmt back later
+
+# Configuration stuff
+transport-flavor :
+	@echo Creating transport-flavor with full as flavor. Options include: full, simple
+	echo full > transport-flavor
+
+ulex.scm dbmgrmod.scm : ulex.scm.template dbmgrmod.scm.template transport-flavor ulex-*/*scm
+	./configure
 
 # module source files
 MSRCFILES = autoload.scm dbi.scm ducttape-lib.scm pkts.scm stml2.scm	\
             cookie.scm mutils.scm mtargs.scm apimod.scm	ulex.scm	\
             configfmod.scm commonmod.scm dbmod.scm rmtmod.scm		\
@@ -36,23 +44,17 @@
             itemsmod.scm keysmod.scm mtmod.scm rmtmod.scm		\
             tasksmod.scm pgdb.scm launchmod.scm runsmod.scm		\
             portloggermod.scm archivemod.scm ezstepsmod.scm		\
             subrunmod.scm bigmod.scm testsmod.scm dbmgrmod.scm
 
-# GUISRCF = 
-
 GUIMODFILES = tree.scm dashboard-tests.scm  vgmod.scm \
               dashboard-context-menu.scm dcommon.scm 
 
-# dashboard-guimonitor.scm 
-
 mofiles/dashboard-context-menu.o : mofiles/dcommon.o
 mofiles/dashboard-tests.o : mofiles/dcommon.o
-# mofiles/dcommon.o mofiles/tree.o : mofiles/gutils.o
 
 OFILES   = $(SRCFILES:%.scm=%.o)
-# GOFILES  = $(GUISRCF:%.scm=%.o)
 
 MOFILES = $(addprefix mofiles/,$(MSRCFILES:%.scm=%.o))
 GMOFILES = $(addprefix mofiles/,$(GUIMODFILES:%.scm=%.o))
 
 # compiled import files

ADDED   attic/configure
Index: attic/configure
==================================================================
--- /dev/null
+++ attic/configure
@@ -0,0 +1,101 @@
+#!/bin/bash
+
+#  Copyright 2006-2017, Matthew Welland.
+# 
+# This file is part of Megatest.
+# 
+#     Megatest is free software: you can redistribute it and/or modify
+#     it under the terms of the GNU General Public License as published by
+#     the Free Software Foundation, either version 3 of the License, or
+#     (at your option) any later version.
+# 
+#     Megatest is distributed in the hope that it will be useful,
+#     but WITHOUT ANY WARRANTY; without even the implied warranty of
+#     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#     GNU General Public License for more details.
+# 
+#     You should have received a copy of the GNU General Public License
+#     along with Megatest.  If not, see <http://www.gnu.org/licenses/>.
+
+# Configure the build
+
+if [[ "$1"x == "x" ]];then
+    PREFIX=$PWD
+else
+    PREFIX=$1
+fi
+
+
+#======================================================================
+# Configure stuff needed for eggs
+#======================================================================
+
+function configure_dependencies () {
+
+    #======================================================================
+    # libnanomsg
+    #======================================================================
+
+    if [[ ! $(ls /usr/lib/*/libnanomsg*) ]];then
+	echo "libnanomsg build needed."
+	echo "BUILD_NANOMSG=yes" >> makefile.inc
+    fi
+
+    #======================================================================
+    # postgresql libraries
+    #======================================================================
+
+    if [[ ! $(ls /usr/lib/*/libpq.*) ]];then
+	echo "Postgresql build needed."
+	echo "BUILD_POSTGRES=yes" >> makefile.inc
+    fi
+
+    if [[ ! $(ls /usr/lib/*/libsqlite3.*) ]];then
+    echo "Sqlite3 build needed."
+	echo "BUILD_SQLITE3=yes" >> makefile.inc
+    fi
+
+}
+
+#======================================================================
+# Initialize makefile.inc
+#======================================================================
+
+echo "" > makefile.inc
+
+#======================================================================
+# Do we need Chicken?
+#======================================================================
+
+if [[ -e /usr/bin/sw_vers ]]; then
+    ARCHSTR=$(/usr/bin/sw_vers -productVersion)
+else
+    ARCHSTR=$(lsb_release -sr)
+fi
+
+echo "CHICKEN_PREFIX=$PREFIX/.$ARCHSTR" >> makefile.inc
+CHICKEN_PREFIX=$PREFIX/bin/.$ARCHSTR
+
+if [[ ! $(type csi) ]];then
+    echo "Chicken build needed."
+    echo "BUILD_CHICKEN=yes" >> makefile.inc
+    configure_dependencies
+    echo "include chicken.makefile" >> makefile.inc
+else
+    echo "CSIPATH=$(which csi)" >> makefile.inc
+    CSIPATH=$(which csi)
+    echo "CKPATH=$(dirname $(dirname $CSIPATH))" >> makefile.inc
+fi
+
+# Make setup scripts
+echo "#!/bin/bash" > setup.sh
+echo "export PATH=$CHICKEN_PREFIX/bin:\$PATH" >> setup.sh
+echo "export LD_LIBRARY_PATH=$CHICKEN_PREFIX/lib" >> setup.sh
+echo 'exec "$@"' >> setup.sh
+chmod a+x setup.sh
+
+echo "setenv PATH $CHICKEN_PREFIX/bin:\$PATH" > setup.csh
+echo "setenv LD_LIBRARY_PATH $CHICKEN_PREFIX/lib" >> setup.csh
+
+echo "All done creating makefile.inc, feel free to edit it!"
+echo "run \"setup.sh bash\" or source setup.csh to get PATH and LD_LIBRARY_PATH adjusted"

Index: commonmod.scm
==================================================================
--- commonmod.scm
+++ commonmod.scm
@@ -184,10 +184,11 @@
 make-and-init-bigdata
 call-with-environment-variables
 common:simple-file-lock
 common:simple-file-lock-and-wait
 common:simple-file-release-lock
+common:with-simple-file-lock
 common:fail-safe
 get-file-descriptor-count
 common:get-this-exe-fullpath
 common:get-sync-lock-filepath
 common:find-local-megatest
@@ -1242,10 +1243,17 @@
 (define (common:simple-file-release-lock fname)
   (handle-exceptions
       exn
       #f ;; I don't really care why this failed (at least for now)
     (delete-file* fname)))
+
+(define (common:with-simple-file-lock fname proc)
+  (let* ((lkfname (conc fname ".lock")))
+    (common:simple-file-lock-and-wait lkfname)
+    (let ((res (proc)))
+      (common:simple-file-release-lock lkfname)
+      res)))
 
 ;;======================================================================
 ;; PUlled below from common.scm
 ;;======================================================================
 

Index: configfmod.scm
==================================================================
--- configfmod.scm
+++ configfmod.scm
@@ -49,10 +49,11 @@
 	 configf:write-alist
 	 configf:write-config
 	 find-config
 	 getenv
 	 mytarget
+	 my-with-lock
 	 nice-path
 	 process:cmd-run->list
 	 runconfig:read
 	 runconfigs-get
 	 safe-setenv
@@ -114,14 +115,19 @@
 ;;======================================================================
 
 ;; while targets are Megatest specific they are a useful concept
 (define mytarget (make-parameter #f))
 
+;; fake locker
+(define (fake-locker fname proc)(proc))
+
 ;; locking is optional, many environments don't care (e.g. running on one machine)
 ;; NOTE: the locker must follow the same syntax as with-dot-lock*
+;;       with-dot-lock* has problems if /tmp and the file being
+;;       locked are not on the same filesystem
 ;;
-(define my-with-lock (make-parameter with-dot-lock*))
+(define my-with-lock (make-parameter fake-locker)) ;; with-dot-lock*))
 
 ;;======================================================================
 ;; move debug stuff to separate module then put these back where they belong
 ;;======================================================================
 ;;======================================================================
@@ -1190,11 +1196,11 @@
 
 ;;======================================================================
 ;; DO THE LOCKING AROUND THE CALL
 ;;======================================================================
 ;;
-(define (configf:write-alist cdat fname)
+(define (configf:write-alist cdat fname #!optional (check-written #f))
   ;; (if (not (common:faux-lock fname))
   ;; (debug:print 0 *default-log-port* "INFO: NEED LOCKING ADDED HERE " fname)
   ((my-with-lock)
    fname
    (lambda ()
@@ -1202,26 +1208,27 @@
             (res
              (begin
                (with-output-to-file fname ;; first write out the file
 		 (lambda ()
                    (pp dat)))
-               ;; I don't like this. It makes write-alist opaque and complicated. -mrw-
-               (if (file-exists? fname)   ;; now verify it is readable
-                   (if (configf:read-alist fname)
-                       #t ;; data is good.
-                       (begin
-			 (handle-exceptions
-			  exn
-			  (begin
-			    (debug:print 0 *default-log-port* "deleting " fname " failed, exn=" exn)
-			    #f)
-			  (debug:print 0 *default-log-port* "WARNING: content " dat " for cache " fname " is not readable. Deleting generated file.")
-			  (delete-file fname))
-			 #f))
-                   #f))))
+               ;; I don't like this. It makes write-alist complicated
+	       ;; move to something like write-and-verify-alist. -mrw-
+               (if check-written
+		   (if (file-exists? fname)   ;; now verify it is readable
+		       (if (configf:read-alist fname)
+			   'data-good ;; data is good.
+			   (handle-exceptions
+			       exn
+			       (begin
+				 (debug:print 0 *default-log-port* "deleting " fname " failed, exn=" exn)
+				 'data-bad)
+			     (debug:print 0 *default-log-port* "WARNING: content " dat " for cache " fname " is not readable. Deleting generated file.")
+			     (delete-file fname)))
+		       'data-not-there)
+		   'data-not-checked))))
        res))))
   
 (define (common:get-fields cfgdat)
   (let ((fields (hash-table-ref/default cfgdat "fields" '())))
     (map car fields)))
 
 )

Index: configure
==================================================================
--- configure
+++ configure
@@ -15,87 +15,18 @@
 #     GNU General Public License for more details.
 # 
 #     You should have received a copy of the GNU General Public License
 #     along with Megatest.  If not, see <http://www.gnu.org/licenses/>.
 
-# Configure the build
-
-if [[ "$1"x == "x" ]];then
-    PREFIX=$PWD
-else
-    PREFIX=$1
-fi
-
-
-#======================================================================
-# Configure stuff needed for eggs
-#======================================================================
-
-function configure_dependencies () {
-
-    #======================================================================
-    # libnanomsg
-    #======================================================================
-
-    if [[ ! $(ls /usr/lib/*/libnanomsg*) ]];then
-	echo "libnanomsg build needed."
-	echo "BUILD_NANOMSG=yes" >> makefile.inc
-    fi
-
-    #======================================================================
-    # postgresql libraries
-    #======================================================================
-
-    if [[ ! $(ls /usr/lib/*/libpq.*) ]];then
-	echo "Postgresql build needed."
-	echo "BUILD_POSTGRES=yes" >> makefile.inc
-    fi
-
-    if [[ ! $(ls /usr/lib/*/libsqlite3.*) ]];then
-    echo "Sqlite3 build needed."
-	echo "BUILD_SQLITE3=yes" >> makefile.inc
-    fi
-
-}
-
-#======================================================================
-# Initialize makefile.inc
-#======================================================================
-
-echo "" > makefile.inc
-
-#======================================================================
-# Do we need Chicken?
-#======================================================================
-
-if [[ -e /usr/bin/sw_vers ]]; then
-    ARCHSTR=$(/usr/bin/sw_vers -productVersion)
-else
-    ARCHSTR=$(lsb_release -sr)
-fi
-
-echo "CHICKEN_PREFIX=$PREFIX/.$ARCHSTR" >> makefile.inc
-CHICKEN_PREFIX=$PREFIX/bin/.$ARCHSTR
-
-if [[ ! $(type csi) ]];then
-    echo "Chicken build needed."
-    echo "BUILD_CHICKEN=yes" >> makefile.inc
-    configure_dependencies
-    echo "include chicken.makefile" >> makefile.inc
-else
-    echo "CSIPATH=$(which csi)" >> makefile.inc
-    CSIPATH=$(which csi)
-    echo "CKPATH=$(dirname $(dirname $CSIPATH))" >> makefile.inc
-fi
-
-# Make setup scripts
-echo "#!/bin/bash" > setup.sh
-echo "export PATH=$CHICKEN_PREFIX/bin:\$PATH" >> setup.sh
-echo "export LD_LIBRARY_PATH=$CHICKEN_PREFIX/lib" >> setup.sh
-echo 'exec "$@"' >> setup.sh
-chmod a+x setup.sh
-
-echo "setenv PATH $CHICKEN_PREFIX/bin:\$PATH" > setup.csh
-echo "setenv LD_LIBRARY_PATH $CHICKEN_PREFIX/lib" >> setup.csh
-
-echo "All done creating makefile.inc, feel free to edit it!"
-echo "run \"setup.sh bash\" or source setup.csh to get PATH and LD_LIBRARY_PATH adjusted"
+# Flavors include: simple, full and none
+
+# look at build.config (not a version controlled file and
+# create ulex.scm and dbmgr.scm
+
+if [[ -e transport-flavor ]];then
+    FLAVOR=$(cat transport-flavor)
+else
+    FLAVOR=full
+fi
+
+sed -e "s/FLAVOR/$FLAVOR/" ulex.scm.template > ulex.scm
+sed -e "s/FLAVOR/$FLAVOR/" dbmgrmod.scm.template > dbmgrmod.scm

ADDED   dbmgrmod.scm
Index: dbmgrmod.scm
==================================================================
--- /dev/null
+++ dbmgrmod.scm
@@ -0,0 +1,21 @@
+;;======================================================================
+;; Copyright 2022, Matthew Welland.
+;; 
+;; This file is part of Megatest.
+;; 
+;;     Megatest is free software: you can redistribute it and/or modify
+;;     it under the terms of the GNU General Public License as published by
+;;     the Free Software Foundation, either version 3 of the License, or
+;;     (at your option) any later version.
+;; 
+;;     Megatest is distributed in the hope that it will be useful,
+;;     but WITHOUT ANY WARRANTY; without even the implied warranty of
+;;     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;;     GNU General Public License for more details.
+;; 
+;;     You should have received a copy of the GNU General Public License
+;;     along with Megatest.  If not, see <http://www.gnu.org/licenses/>.
+
+;;======================================================================
+
+(include "ulex-simple/dbmgr.scm")

ADDED   dbmgrmod.scm.template
Index: dbmgrmod.scm.template
==================================================================
--- /dev/null
+++ dbmgrmod.scm.template
@@ -0,0 +1,21 @@
+;;======================================================================
+;; Copyright 2022, Matthew Welland.
+;; 
+;; This file is part of Megatest.
+;; 
+;;     Megatest is free software: you can redistribute it and/or modify
+;;     it under the terms of the GNU General Public License as published by
+;;     the Free Software Foundation, either version 3 of the License, or
+;;     (at your option) any later version.
+;; 
+;;     Megatest is distributed in the hope that it will be useful,
+;;     but WITHOUT ANY WARRANTY; without even the implied warranty of
+;;     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;;     GNU General Public License for more details.
+;; 
+;;     You should have received a copy of the GNU General Public License
+;;     along with Megatest.  If not, see <http://www.gnu.org/licenses/>.
+
+;;======================================================================
+
+(include "ulex-FLAVOR/dbmgr.scm")

Index: dbmod.scm
==================================================================
--- dbmod.scm
+++ dbmod.scm
@@ -3454,12 +3454,14 @@
     (db:with-db
      dbstruct
      run-id
      #f
      (lambda (db)
-       (let* ((stmth (db:get-cache-stmth dbstruct db qry)))
-	 (sqlite3:first-result stmth run-id))))))
+       (let* (#;(stmth (db:get-cache-stmth dbstruct db qry)))
+	 #;(sqlite3:first-result stmth run-id)
+	 (sqlite3:first-result db qry run-id)
+	 )))))
 
 ;; For a given testname how many items are running? Used to determine
 ;; probability for regenerating html
 ;;
 (define (db:get-count-tests-running-for-testname dbstruct run-id testname)

Index: dcommon.scm
==================================================================
--- dcommon.scm
+++ dcommon.scm
@@ -26,10 +26,11 @@
 (declare (uses commonmod))
 (declare (uses configfmod))
 (declare (uses rmtmod))
 (declare (uses mtargs))
 (declare (uses testsmod))
+(declare (uses dbmgrmod))
 
 (module dcommon
 	*
 
  (import scheme
@@ -62,10 +63,11 @@
 	 srfi-1
 	 )
 
 (import mtver
 	dbmod
+	dbmgrmod
 	commonmod
 	debugprint
 	configfmod
 	rmtmod
 	;; gutils

Index: launchmod.scm
==================================================================
--- launchmod.scm
+++ launchmod.scm
@@ -663,11 +663,12 @@
 	  (let* ((tconfig-fname   (conc work-area "/.testconfig"))
 		 (tconfig-tmpfile (conc tconfig-fname ".tmp"))
 		 (tconfig         (tests:get-testconfig test-name item-path tconfigreg #t force-create: #t)) ;; 'return-procs)))
 		 (scripts (configf:get-section tconfig "scripts")))
 	    ;; create .testconfig file
-	    (configf:write-alist tconfig tconfig-tmpfile)
+	    (configf:write-alist tconfig tconfig-tmpfile #t) ;; the #t forces a check of the written data
+	    (assert (file-exists? tconfig-tmpfile) "FATAL: We just wrote the dang file, how can it not exist?")
 	    (move-file tconfig-tmpfile tconfig-fname #t)
 	    (delete-file* ".final-status")
 
 	    ;; extract scripts from testconfig and write them to files in test run dir
 	    (for-each

Index: megatest.scm
==================================================================
--- megatest.scm
+++ megatest.scm
@@ -167,12 +167,14 @@
 ;;   ;; ulex parameters
 ;;   (work-method 'direct)
 ;;   (return-method 'direct)
   
   ;; ulex parameters
-  (work-method   'mailbox)
-  (return-method 'mailbox)
+  ;; (work-method   'mailbox)
+  ;; (return-method 'mailbox)
+
+(my-with-lock common:with-simple-file-lock)
   
 ;; fake out readline usage of toplevel-command
 (define (toplevel-command . a) #f)
 (define *didsomething* #f)  
 (define *db* #f) ;; this is only for the repl, do not use in general!!!!

Index: rmtmod.scm
==================================================================
--- rmtmod.scm
+++ rmtmod.scm
@@ -1191,11 +1191,11 @@
     (if (and no-hurry (debug:debug-mode 18))
 	(rmt:print-db-stats))
     (let ((th1 (make-thread
 		(lambda () ;; thread for cleaning up, give it five seconds
 		  (let* ((start-time (current-seconds)))
-		    (if *db-serv-info*
+		    #;(if *db-serv-info*
 			(let* ((host (servdat-host *db-serv-info*))
 			       (port 	       (servdat-port *db-serv-info*)))
 			  (debug:print-info 0 *default-log-port* "Shutting down server/responder.")
 			  ;;
 			  ;; TODO - add flushing/waiting on the work queue

Index: runsmod.scm
==================================================================
--- runsmod.scm
+++ runsmod.scm
@@ -244,10 +244,23 @@
 	(begin
 	  (hash-table-set! *runs:denoise* key currtime)
 	  #t)
 	#f)))
 
+(define *too-soon-delays* (make-hash-table))
+
+;; to-soon delay, when matching event happened in less than dseconds delay wseconds
+;;
+(define (runs:too-soon-delay key dseconds wseconds)
+  (let* ((last-time (hash-table-ref/default *too-soon-delays* key #f)))
+    (if (and last-time
+	     (< (- (current-seconds) last-time) dseconds))
+	(begin
+	  (debug:print-info 0 *default-log-port* "Whoa, slow down there ... "key" has been too recently seen.")
+	  (thread-sleep! wseconds)))
+    (hash-table-set! *too-soon-delays* key (current-seconds))))
+
 (define (runs:can-run-more-tests runsdat run-id jobgroup max-concurrent-jobs)
 
   ;; Take advantage of a good place to exit if running the one-pass methodology
   (if (and (> (runs:dat-can-run-more-tests-count runsdat) 20)
 	   (args:get-arg "-one-pass"))
@@ -1467,11 +1480,13 @@
 			   newtal:      newtal
 			   itemmaps:    itemmaps
 			   ;; prereqs-not-met: prereqs-not-met
 			   )))
 	(runs:dat-regfull-set! runsdat regfull)
-    
+
+	(runs:too-soon-delay (conc "loop delay " hed) 1 1)
+	
 	(if (> num-running 0)
             (set! last-time-some-running (current-seconds)))
 
         (if (> (current-seconds)(+ last-time-some-running (or (configf:lookup *configdat* "setup" "give-up-waiting") 36000)))
             (hash-table-set! *max-tries-hash* tfullname (+ (hash-table-ref/default *max-tries-hash* tfullname 0) 1)))
@@ -1494,10 +1509,11 @@
 	      (if (or (not (null? tal))(not (null? reg)))
 		  (loop (runs:queue-next-hed tal reg reglen regfull)
 			(runs:queue-next-tal tal reg reglen regfull)
 			(runs:queue-next-reg tal reg reglen regfull)
 			reruns))))
+	
         ;; (loop (car tal)(cdr tal) reg reruns))))
 
 	(runs:incremental-print-results run-id)
 	(debug:print 4 *default-log-port* "TOP OF LOOP => "
 		     "test-name: " test-name
@@ -1725,10 +1741,11 @@
 		   (equal? (configf:lookup *configdat* "setup" "run-wait") "yes"))
 	       (> num-running 0))
 	  (begin
 	    ;; Here we mark any old defunct tests as incomplete. Do this every fifteen minutes
 	    ;; (debug:print 0 *default-log-port* "Got here eh! num-running=" num-running " (> num-running 0) " (> num-running 0))
+	    (thread-sleep! 5) ;; let's always sleep, prevents abutting calls to rum:get-count-tests-running-for-run-id - didn't help
 	    (if (> (current-seconds)(+ last-time-incomplete 900))
 		(let ((actual-num-running (rmt:get-count-tests-running-for-run-id run-id)))
 		  (debug:print-info 0 *default-log-port* "Marking stuck tests as INCOMPLETE while waiting for run " run-id
 				    ". Running as pid " (current-process-id) " on " (get-host-name))
 		  (set! last-time-incomplete (current-seconds)) ;; FIXME, this might be causing slow down - use of set!
@@ -1735,11 +1752,10 @@
 		  (runs:find-and-mark-incomplete-and-check-end-of-run run-id #f)
 		  (debug:print-info 0 *default-log-port* "run-wait specified, waiting on " actual-num-running
 				    " tests in RUNNING, REMOTEHOSTSTART or LAUNCHED state at "
 				    (time->string (seconds->local-time (current-seconds))))))
 	    ;; (if (runs:dat-load-mgmt-function runsdat)((runs:dat-load-mgmt-function runsdat)))
-	    (thread-sleep! 5) ;; (if (>= num-running max-concurrent-jobs) 5 1))
 	    (wait-loop (rmt:get-count-tests-running-for-run-id run-id)
 		       num-running))))
     ;; LET* ((test-record
     ;; we get here on "drop through". All done!
     ;; this is moved to runs:run-testes since this function is getting called twice to ensure everthing is completed. 

ADDED   tests/simplerun/simple.scm
Index: tests/simplerun/simple.scm
==================================================================
--- /dev/null
+++ tests/simplerun/simple.scm
@@ -0,0 +1,2 @@
+(print (rmt:get-keys))
+

Index: tests/simplerun/tests/test1/testconfig
==================================================================
--- tests/simplerun/tests/test1/testconfig
+++ tests/simplerun/tests/test1/testconfig
@@ -24,11 +24,11 @@
 [requirements]
 # waiton setup
 priority 0
 
 # Iteration for your tests are controlled by the items section
-[items]
+# [items]
 # PARTOFDAY morning noon afternoon evening night
 
 # test_meta is a section for storing additional data on your test
 [test_meta]
 author matt

ADDED   ulex-full/dbmgr.scm
Index: ulex-full/dbmgr.scm
==================================================================
--- /dev/null
+++ ulex-full/dbmgr.scm
@@ -0,0 +1,1131 @@
+;;======================================================================
+;; Copyright 2022, Matthew Welland.
+;; 
+;; This file is part of Megatest.
+;; 
+;;     Megatest is free software: you can redistribute it and/or modify
+;;     it under the terms of the GNU General Public License as published by
+;;     the Free Software Foundation, either version 3 of the License, or
+;;     (at your option) any later version.
+;; 
+;;     Megatest is distributed in the hope that it will be useful,
+;;     but WITHOUT ANY WARRANTY; without even the implied warranty of
+;;     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;;     GNU General Public License for more details.
+;; 
+;;     You should have received a copy of the GNU General Public License
+;;     along with Megatest.  If not, see <http://www.gnu.org/licenses/>.
+
+;;======================================================================
+
+(declare (unit dbmgrmod))
+
+(declare (uses ulex))
+(declare (uses apimod))
+(declare (uses pkts))
+(declare (uses commonmod))
+(declare (uses dbmod))
+(declare (uses mtargs))
+(declare (uses portloggermod))
+(declare (uses debugprint))
+
+(module dbmgrmod
+    *
+
+(import scheme
+	chicken.base
+	chicken.condition
+	chicken.file
+	chicken.format
+	chicken.port
+	chicken.process
+	chicken.process-context
+	chicken.process-context.posix
+	chicken.sort
+	chicken.string
+	chicken.time
+	
+	(prefix sqlite3 sqlite3:)
+	matchable
+	md5
+	message-digest
+	regex
+	s11n
+	srfi-1
+	srfi-18
+	srfi-69
+	system-information
+	typed-records
+	
+	pkts
+	ulex
+
+	commonmod
+	apimod
+	dbmod
+	debugprint
+	(prefix mtargs args:)
+	portloggermod
+	)
+
+;; Configurations for server
+;; (tcp-buffer-size 2048)
+;; (max-connections 2048) 
+
+;; info about me as a listener and my connections to db servers
+;; stored (for now) in *db-serv-info*
+;;
+(defstruct servdat
+  (host #f)
+  (port #f)
+  (uuid #f)
+  (dbfile #f)
+  (uconn   #f) ;; this is the listener *FOR THIS PROCESS*
+  (mode    #f)
+  (status 'starting)
+  (trynum 0) ;; count the number of ports we've tried
+  (conns  (make-hash-table)) ;; apath/dbname => conndat
+  ) 
+
+(define *db-serv-info* (make-servdat))
+
+(define (servdat->url sdat)
+  (conc (servdat-host sdat)":"(servdat-port sdat)))
+
+;; db servers contact info
+;;
+(defstruct conndat
+  (apath    #f)
+  (dbname   #f)
+  (fullname #f)
+  (hostport #f)
+  (ipaddr   #f)
+  (port     #f)
+  (srvpkt   #f)
+  (srvkey   #f)
+  (lastmsg  0)
+  (expires  0))
+
+(define *srvpktspec*
+  `((server (host    . h)
+	    (port    . p)
+	    (servkey . k)
+	    (pid     . i)
+	    (ipaddr  . a)
+	    (dbpath  . d))))
+
+;;======================================================================
+;;  S U P P O R T   F U N C T I O N S
+;;======================================================================
+
+;; set up the api proc, seems like there should be a better place for this?
+;;
+;; IS THIS NEEDED ANYMORE? TODO - REMOVE IF POSSIBLE
+;;
+;; (define api-proc (make-parameter conc))
+;; (api-proc api:execute-requests)
+
+;; do we have a connection to apath dbname and
+;; is it not expired? then return it
+;;
+;; else setup a connection
+;;
+;; if that fails, return '(#f "some reason") ;; NB// convert to raising an exception
+;;
+(define (rmt:get-conn remdat apath dbname)
+  (let* ((fullname (db:dbname->path apath dbname)))
+    (hash-table-ref/default (servdat-conns remdat) fullname #f)))
+
+(define (rmt:drop-conn remdat apath dbname)
+  (let* ((fullname (db:dbname->path apath dbname)))
+    (hash-table-delete! (servdat-conns remdat) fullname)))
+  
+(define (rmt:find-main-server uconn apath dbname)
+  (let* ((pktsdir     (get-pkts-dir apath))
+	 (all-srvpkts (get-all-server-pkts pktsdir *srvpktspec*))
+	 (viable-srvs (get-viable-servers all-srvpkts dbname)))
+    (get-the-server uconn apath viable-srvs)))
+
+
+(define *connstart-mutex* (make-mutex))
+(define *last-main-start* 0)
+
+;; looks for a connection to main, returns if have and not exired
+;; creates new otherwise
+;; 
+;; connections for other servers happens by requesting from main
+;;
+;; TODO: This is unnecessarily re-creating the record in the hash table
+;;
+(define (rmt:open-main-connection remdat apath)
+  (let* ((fullpath (db:dbname->path apath ".db/main.db"))
+	 (conns    (servdat-conns remdat))
+	 (conn     (rmt:get-conn remdat apath ".db/main.db")) ;; (hash-table-ref/default conns fullpath #f)) ;; TODO - create call for this
+	 (start-rmt:run (lambda ()
+			  (let* ((th1 (make-thread (lambda ()(rmt:run (get-host-name))) "non-db mode server")))
+			    (thread-start! th1)
+			    (thread-sleep! 1)
+			    (let loop ((count 0))
+			      (assert (< count 30) "FATAL: responder failed to initialize in rmt:open-main-connection")
+			      (if (or (not *db-serv-info*)
+				      (not (servdat-uconn *db-serv-info*)))
+				  (begin
+				    (thread-sleep! 1)
+				    (loop (+ count 1)))
+				  (begin
+				    (servdat-mode-set! *db-serv-info* 'non-db)
+				    (servdat-uconn *db-serv-info*)))))))
+	 (myconn    (servdat-uconn *db-serv-info*)))
+    (cond
+     ((not myconn)
+      (start-rmt:run)
+      (rmt:open-main-connection remdat apath))
+     ((and conn                                             ;; conn is NOT a socket, just saying ...
+	   (< (current-seconds) (conndat-expires conn)))
+      #t) ;; we are current and good to go - we'll deal elsewhere with a server that was killed or died 
+     ((and conn
+	   (>= (current-seconds)(conndat-expires conn)))
+      (debug:print-info 0 *default-log-port* "connection to "fullpath" server expired. Reconnecting.")
+      (rmt:drop-conn remdat apath ".db/main.db") ;;
+      (rmt:open-main-connection remdat apath))
+     (else
+      ;; Below we will find or create and connect to main
+      (debug:print-info 0 *default-log-port* "rmt:open-main-connection - starting from scratch")
+      (let* ((dbname         (db:run-id->dbname #f))
+	     (the-srv        (rmt:find-main-server myconn apath dbname))
+	     (start-main-srv (lambda () ;; call IF there is no the-srv found
+			       (mutex-lock! *connstart-mutex*)
+			       (if (> (- (current-seconds) *last-main-start*) 5) ;; at least four seconds since last attempt to start main server
+				   (begin
+				     (api:run-server-process apath dbname)
+				     (set! *last-main-start* (current-seconds))
+				     (thread-sleep! 1))
+				   (thread-sleep! 0.25))
+			       (mutex-unlock! *connstart-mutex*)
+			       (rmt:open-main-connection remdat apath) ;; TODO: Add limit to number of tries
+			       )))
+	(if (not the-srv) ;; have server, try connecting to it
+	    (start-main-srv)
+	    (let* ((srv-addr (server-address the-srv)) ;; need serv
+		   (ipaddr   (alist-ref 'ipaddr  the-srv))
+		   (port     (alist-ref 'port    the-srv))
+		   (srvkey   (alist-ref 'servkey the-srv))
+		   (fullpath (db:dbname->path apath dbname))
+		   
+		   (new-the-srv (make-conndat
+				 apath:   apath
+				 dbname:  dbname
+				 fullname: fullpath
+				 hostport: srv-addr
+				 ;; socket: (open-nn-connection srv-addr)  - TODO - open ulex connection?
+				 ipaddr: ipaddr
+				 port: port
+				 srvpkt: the-srv
+				 srvkey: srvkey ;; generated by rmt:get-signature on the server side
+				 lastmsg: (current-seconds)
+				 expires: (+ (current-seconds)
+					     (server:expiration-timeout)
+					     -2) ;; this needs to be gathered during the ping
+				 )))
+	      (hash-table-set! conns fullpath new-the-srv)))
+	#t)))))
+
+;; NB// sinfo is a servdat struct
+;;
+(define (rmt:general-open-connection sinfo apath dbname #!key (num-tries 5))
+  (assert (not (equal? dbname ".db/main.db")) "ERROR: general-open-connection should never be called with main as the db")
+  (let* ((mdbname  ".db/main.db") ;; (db:run-id->dbname #f)) TODO: put this back to the lookup when stable
+	 (fullname (db:dbname->path apath dbname))
+	 (conns    (servdat-conns sinfo))
+	 (mconn    (rmt:get-conn sinfo apath ".db/main.db"))
+	 (dconn    (rmt:get-conn sinfo apath dbname)))
+    #;(if (and mconn
+	     (not (debug:print-logger)))
+	(begin
+	  (debug:print-info 0 *default-log-port* "Turning on logging to main, look in logs dir for main log.")
+	  (debug:print-logger rmt:log-to-main)))
+    (cond
+     ((and mconn
+	   dconn
+	   (< (current-seconds)(conndat-expires dconn)))
+      #t) ;; good to go
+     ((not mconn) ;; no channel open to main? open it...
+      (rmt:open-main-connection sinfo apath)
+      (rmt:general-open-connection sinfo apath dbname num-tries: (- num-tries 1)))
+     ((not dconn)                 ;; no channel open to dbname?     
+      (let* ((res (rmt:send-receive-real sinfo apath mdbname 'get-server `(,apath ,dbname))))
+	(case res
+	  ((server-started)
+	   (if (> num-tries 0)
+	       (begin
+		 (thread-sleep! 2)
+		 (rmt:general-open-connection sinfo apath dbname num-tries: (- num-tries 1)))
+	       (begin
+		 (debug:print-error 0 *default-log-port* "Failed to start servers needed or open channel to "apath", "dbname)
+		 (exit 1))))
+	  (else
+	   (if (list? res) ;; server has been registered and the info was returned. pass it on.
+	       (begin ;;  ("192.168.0.9" 53817
+		      ;;  "5e34239f48e8973b3813221e54701a01" "24310"
+		      ;;  "192.168.0.9"
+		      ;;  "/home/matt/data/megatest/tests/simplerun"
+		 ;;  ".db/1.db")
+		 (match
+		  res
+		  ((host port servkey pid ipaddr apath dbname)
+		   (debug:print-info 0 *default-log-port* "got "res)
+		   (hash-table-set! conns
+				    fullname
+				    (make-conndat
+				     apath: apath
+				     dbname: dbname
+				     hostport: (conc host":"port)
+				     ;; socket: (open-nn-connection (conc host":"port)) ;; TODO - open ulex connection?
+				     ipaddr: ipaddr
+				     port: port
+				     srvkey: servkey
+				     lastmsg: (current-seconds)
+				     expires: (+ (current-seconds)
+						 (server:expiration-timeout)
+						 -2))))
+		  (else
+		   (debug:print-info 0 *default-log-port* "return data from starting server did not match host port servkey pid ipaddr apath dbname " res)))
+		 res)
+	       (begin
+		 (debug:print-info 0 *default-log-port* "Unexpected result: " res)
+		 res)))))))
+    #t))
+
+;;======================================================================
+
+;; FOR DEBUGGING SET TO #t
+;; (define *localmode* #t)
+(define *localmode* #f)
+(define *dbstruct* (make-dbr:dbstruct))
+
+;; Defaults to current area
+;;
+(define (rmt:send-receive cmd rid params #!key (attemptnum 1)(area-dat #f))
+  (let* ((apath      *toppath*)
+	 (sinfo      *db-serv-info*)
+	 (dbname     (db:run-id->dbname rid)))
+    (if *localmode*
+	(api:execute-requests *dbstruct* cmd params)
+	(begin
+	  (rmt:open-main-connection sinfo apath)
+	  (if rid (rmt:general-open-connection sinfo apath dbname))
+	  #;(if (not (member cmd '(log-to-main)))
+	      (debug:print-info 0 *default-log-port* "rmt:send-receive "cmd" params="params))
+	  (rmt:send-receive-real sinfo apath dbname cmd params)))))
+
+;; db is at apath/.db/dbname, rid is an intermediary solution and will be removed
+;; sometime in the future
+;;
+(define (rmt:send-receive-real sinfo apath dbname cmd params)
+  (assert (not (eq? 'primordial (thread-name (current-thread)))) "FATAL: Do not call rmt:send-receive-real in the primodial thread.")
+  (let* ((cdat (rmt:get-conn sinfo apath dbname)))
+    (assert cdat "FATAL: rmt:send-receive-real called without the needed channels opened")
+    (let* ((uconn    (servdat-uconn sinfo)) ;; get the interface to ulex
+	   ;; then send-receive using the ulex layer to host-port stored in cdat
+	   (res      (send-receive uconn (conndat-hostport cdat) cmd params))
+	   #;(th1      (make-thread (lambda ()
+				    (set! res (send-receive uconn (conndat-hostport cdat) cmd params)))
+				  "send-receive thread")))
+      ;; (thread-start! th1)
+      ;; (thread-join! th1)   ;; gratuitious thread stuff is so that mailbox is not used in primordial thead
+      ;; since we accessed the server we can bump the expires time up
+      (conndat-expires-set! cdat (+ (current-seconds)
+				    (server:expiration-timeout)
+				    -2)) ;; two second margin for network time misalignments etc.
+      res)))
+
+;; db is at apath/.db/dbname, rid is an intermediary solution and will be removed
+;; sometime in the future.
+;;
+;; Purpose - call the main.db server and request a server be started
+;; for the given area path and dbname
+;;
+
+(define (rmt:print-db-stats)
+  (let ((fmtstr "~40a~7-d~9-d~20,2-f")) ;; "~20,2-f"
+    (debug:print 18 *default-log-port* "DB Stats, "(seconds->year-week/day-time (current-seconds))"\n=====================")
+    (debug:print 18 *default-log-port* (format #f "~40a~8a~10a~10a" "Cmd" "Count" "TotTime" "Avg"))
+    (for-each (lambda (cmd)
+		(let ((cmd-dat (hash-table-ref *db-stats* cmd)))
+		  (debug:print 18 *default-log-port* (format #f fmtstr cmd (vector-ref cmd-dat 0) (vector-ref cmd-dat 1) (/ (vector-ref cmd-dat 1)(vector-ref cmd-dat 0))))))
+	      (sort (hash-table-keys *db-stats*)
+		    (lambda (a b)
+		      (> (vector-ref (hash-table-ref *db-stats* a) 0)
+			 (vector-ref (hash-table-ref *db-stats* b) 0)))))))
+
+(define (rmt:get-max-query-average run-id)
+  (mutex-lock! *db-stats-mutex*)
+  (let* ((runkey (conc "run-id=" run-id " "))
+	 (cmds   (filter (lambda (x)
+			   (substring-index runkey x))
+			 (hash-table-keys *db-stats*)))
+	 (res    (if (null? cmds)
+		     (cons 'none 0)
+		     (let loop ((cmd (car cmds))
+				(tal (cdr cmds))
+				(max-cmd (car cmds))
+				(res 0))
+		       (let* ((cmd-dat (hash-table-ref *db-stats* cmd))
+			      (tot     (vector-ref cmd-dat 0))
+			      (curravg (/ (vector-ref cmd-dat 1) (vector-ref cmd-dat 0))) ;; count is never zero by construction
+			      (currmax (max res curravg))
+			      (newmax-cmd (if (> curravg res) cmd max-cmd)))
+			 (if (null? tal)
+			     (if (> tot 10)
+				 (cons newmax-cmd currmax)
+				 (cons 'none 0))
+			     (loop (car tal)(cdr tal) newmax-cmd currmax)))))))
+    (mutex-unlock! *db-stats-mutex*)
+    res))
+
+;; host and port are used to ensure we are remove proper records
+(define (rmt:server-shutdown host port)
+  (let ((dbfile   (servdat-dbfile *db-serv-info*)))
+    (debug:print-info 0 *default-log-port* "dbfile is "dbfile)
+    (if dbfile
+	(let* ((am-server  (args:get-arg "-server"))
+	       (dbfile     (args:get-arg "-db"))
+	       (apath      *toppath*)
+	       #;(sinfo     *remotedat*)) ;; foundation for future fix
+	  (if *dbstruct-db*
+	      (let* ((dbdat      (db:get-dbdat *dbstruct-db* apath dbfile))
+		     (db         (dbr:dbdat-db dbdat))
+		     (inmem      (dbr:dbdat-db dbdat))   ;; WRONG
+		     )
+		;; do a final sync here
+		(debug:print-info 0 *default-log-port* "Doing final sync for "apath" "dbfile" at "(current-seconds))
+		(db:sync-inmem->disk *dbstruct-db* apath dbfile force-sync: #t)
+		;; let's finalize here
+		(debug:print-info 0 *default-log-port* "Finalizing db and inmem")
+		(if (sqlite3:database? db)
+		    (sqlite3:finalize! db)
+		    (debug:print-info 0 *default-log-port* "in rmt:server-shutdown, db is not a database, not finalizing..."))
+		(if (sqlite3:database? inmem)
+		    (sqlite3:finalize! inmem)
+		    (debug:print-info 0 *default-log-port* "in rmt:server-shutdown, inmem is not a database, not finalizing..."))
+		(debug:print-info 0 *default-log-port* "Finalizing db and inmem complete"))
+	      (debug:print-info 0 *default-log-port* "Db was never opened, no cleanup to do."))
+	  (if (not am-server)
+	      (debug:print-info 0 *default-log-port* "I am not a server, should NOT get here!")
+	      (if (string-match ".*/main.db$" dbfile)
+		  (let ((pkt-file (conc (get-pkts-dir *toppath*)
+					"/" (servdat-uuid *db-serv-info*)
+					".pkt")))
+		    (debug:print-info 0 *default-log-port* "removing pkt "pkt-file)
+		    (delete-file* pkt-file)
+		    (debug:print-info 0 *default-log-port* "Releasing lock (if any) for "dbfile ", host "host", port "port)
+		    (db:with-lock-db
+		     (servdat-dbfile *db-serv-info*)
+		     (lambda (dbh dbfile)
+		       (db:release-lock dbh dbfile host port)))) ;; I'm not the server - should not have a lock to remove
+		  (let* ((sdat *db-serv-info*) ;; we have a run-id server
+			 (host (servdat-host sdat))
+			 (port (servdat-port sdat))
+			 (uuid (servdat-uuid sdat))
+			 (res  (rmt:deregister-server *db-serv-info* *toppath* host port uuid dbfile)))
+		    (debug:print-info 0 *default-log-port* "deregistered-server, res="res)
+		    (debug:print-info 0 *default-log-port* "deregistering server "host":"port" with uuid "uuid)
+		    )))))))
+
+
+(define (common:run-sync?)
+    ;; (and (common:on-homehost?)
+  (args:get-arg "-server"))
+
+(define *rmt:run-mutex* (make-mutex))
+(define *rmt:run-flag* #f)
+
+;; Main entry point to start a server. was start-server
+(define (rmt:run hostn)
+  (mutex-lock! *rmt:run-mutex*)
+  (if *rmt:run-flag*
+      (begin
+	(debug:print-warn 0 *default-log-port* "rmt:run already running.")
+	(mutex-unlock! *rmt:run-mutex*))
+      (begin
+	(set! *rmt:run-flag* #t)
+	(mutex-unlock! *rmt:run-mutex*)
+	;;  ;; Configurations for server
+	;;  (tcp-buffer-size 2048)
+	;;  (max-connections 2048) 
+	(debug:print 2 *default-log-port* "PID: "(current-process-id)". Attempting to start the server ...")
+	(if (and *db-serv-info*
+		 (servdat-uconn *db-serv-info*))
+	    (let* ((uconn (servdat-uconn *db-serv-info*)))
+	      (wait-and-close uconn))
+	    (let* ((port            (portlogger:open-run-close portlogger:find-port))
+		   (handler-proc    (lambda (rem-host-port qrykey cmd params) ;;
+				      (set! *db-last-access* (current-seconds))
+				      (assert (list? params) "FATAL: handler called with non-list params")
+				      (assert (args:get-arg "-server") "FATAL: handler called on non-server side. cmd="cmd", params="params)
+				      (debug:print 0 *default-log-port* "handler call: "cmd", params="params)
+				      (api:execute-requests *dbstruct-db* cmd params))))
+	      ;; (api:process-request *dbstuct-db* 
+	      (if (not *db-serv-info*)
+		  (set! *db-serv-info* (make-servdat host: hostn port: port)))
+	      (let* ((uconn (run-listener handler-proc port))
+		     (rport (udat-port uconn))) ;; the real port
+		(servdat-host-set! *db-serv-info* hostn)
+		(servdat-port-set! *db-serv-info* rport)
+		(servdat-uconn-set! *db-serv-info* uconn)
+		(wait-and-close uconn)
+		(db:print-current-query-stats)
+		)))
+	(let* ((host (servdat-host *db-serv-info*))
+	       (port (servdat-port *db-serv-info*))
+	       (mode (or (servdat-mode *db-serv-info*)
+			 "non-db")))
+	  ;; server exit stuff here
+	  ;; (rmt:server-shutdown host port) - always do in on-exit
+	  ;; (portlogger:open-run-close portlogger:set-port port "released") ;; moved to on-exit 
+	  (debug:print-info 0 *default-log-port* "Server "host":"port" mode "mode"shutdown complete. Exiting")
+	  ))))
+
+;;======================================================================
+;; S E R V E R   U T I L I T I E S 
+;;======================================================================
+
+
+;;======================================================================
+;; NEW SERVER METHOD
+;;======================================================================
+
+;; only use for main.db - need to re-write some of this :(
+;;
+(define (get-lock-db sdat dbfile host port)
+  (assert host "FATAL: get-lock-db called with host not set.")
+  (assert port "FATAL: get-lock-db called with port not set.")
+  (let* ((dbh (db:open-run-db dbfile db:initialize-db)) ;; open-run-db creates a standard db with schema used by all situations
+	 (res (db:get-iam-server-lock dbh dbfile host port))
+	 (uconn (servdat-uconn sdat)))
+    ;; res => list then already locked, check server is responsive
+    ;;     => #t then sucessfully got the lock
+    ;;     => #f reserved for future use as to indicate something went wrong
+    (match res
+      ((owner_pid owner_host owner_port event_time)
+       (if (server-ready? uconn (conc owner_host":"owner_port) "abc")
+	   #f      ;; locked by someone else
+	   (begin  ;; locked by someone dead and gone
+	     (debug:print 0 *default-log-port* "WARNING: stale lock - have to steal it. This may fail.")
+	     (db:steal-lock-db dbh dbfile port))))
+      (#t  #t) ;; placeholder so that we don't touch res if it is #t
+      (else (set! res #f)))
+    (sqlite3:finalize! dbh)
+    res))
+
+
+(define (register-server pkts-dir pkt-spec host port servkey ipaddr dbpath)
+  (let* ((pkt-dat `((host    . ,host)
+		    (port    . ,port)
+		    (servkey . ,servkey)
+		    (pid     . ,(current-process-id))
+		    (ipaddr  . ,ipaddr)
+		    (dbpath  . ,dbpath)))
+	 (uuid    (write-alist->pkt
+		   pkts-dir
+		   pkt-dat
+		   pktspec: pkt-spec
+		   ptype: 'server)))
+    (debug:print 0 *default-log-port* "Server on "host":"port" registered in pkt "uuid)
+    uuid))
+
+(define (get-pkts-dir #!optional (apath #f))
+  (let* ((effective-toppath (or *toppath* apath)))
+    (assert effective-toppath
+	    "ERROR: get-pkts-dir called without *toppath* set. Exiting.")
+    (let* ((pdir (conc effective-toppath "/.meta/srvpkts")))
+      (if (file-exists? pdir)
+	  pdir
+	  (begin
+	    (handle-exceptions ;; this exception handler should NOT be needed but ...
+		exn
+		pdir
+	      (create-directory pdir #t))
+	    pdir)))))
+
+;; given a pkts dir read 
+;;
+(define (get-all-server-pkts pktsdir-in pktspec)
+  (let* ((pktsdir  (if (file-exists? pktsdir-in)
+		       pktsdir-in
+		       (begin
+			 (create-directory pktsdir-in #t)
+			 pktsdir-in)))
+	 (all-pkt-files (glob (conc pktsdir "/*.pkt"))))
+    (map (lambda (pkt-file)
+	   (read-pkt->alist pkt-file pktspec: pktspec))
+	 all-pkt-files)))
+
+(define (server-address srv-pkt)
+  (conc (alist-ref 'host srv-pkt) ":"
+	(alist-ref 'port srv-pkt)))
+	
+(define (server-ready? uconn host-port key) ;; server-address is host:port
+  (let* ((params `((cmd . ping)(key . ,key)))
+	 (data `((cmd . ping)
+		 (key . ,key)
+		 (params . ,params))) ;; I don't get it.
+	 (res  (send-receive uconn host-port 'ping data)))
+    (if (eq? res 'ack) ;; yep, likely it is who we want on the other end
+	res
+	#f)))
+;; (begin (debug:print-info 0 *default-log-port* "server-ready? => "res) #f))))
+
+; from the pkts return servers associated with dbpath
+;; NOTE: Only one can be alive - have to check on each
+;;       in the list of pkts returned
+;;
+(define (get-viable-servers serv-pkts dbpath)
+  (let loop ((tail serv-pkts)
+	     (res  '()))
+    (if (null? tail)
+	res ;; NOTE: sort by age so oldest is considered first
+	(let* ((spkt (car tail)))
+	  (loop (cdr tail)
+		(if (equal? dbpath (alist-ref 'dbpath spkt))
+		    (cons spkt res)
+		    res))))))
+
+(define (remove-pkts-if-not-alive uconn serv-pkts)
+  (filter (lambda (pkt)
+	    (let* ((host (alist-ref 'host pkt))
+		   (port (alist-ref 'port pkt))
+		   (host-port (conc host":"port))
+		   (key  (alist-ref 'servkey  pkt))
+		   (pktz (alist-ref 'Z        pkt))
+		   (res  (server-ready? uconn host-port key)))
+	      (if res
+		  res
+		  (let* ((pktsdir (get-pkts-dir *toppath*))
+			 (pktpath (conc pktsdir"/"pktz".pkt")))
+		    (debug:print 0 *default-log-port* "WARNING: pkt with no server "pktpath)
+		    (delete-file* pktpath)
+		    #f))))
+	  serv-pkts))
+
+;; from viable servers get one that is alive and ready
+;;
+(define (get-the-server uconn apath serv-pkts)
+  (let loop ((tail serv-pkts))
+    (if (null? tail)
+	#f
+	(let* ((spkt  (car tail))
+	       (host  (alist-ref 'ipaddr spkt))
+	       (port  (alist-ref 'port spkt))
+	       (host-port (conc host":"port))
+	       (dbpth (alist-ref 'dbpath spkt))
+	       (srvkey (alist-ref 'Z spkt)) ;; (alist-ref 'srvkey spkt))
+	       (addr  (server-address spkt)))
+	  (if (server-ready? uconn host-port srvkey)
+	      spkt
+	      (loop (cdr tail)))))))
+
+;; am I the "first" in line server? I.e. my D card is smallest
+;; use Z card as tie breaker
+;;
+(define (get-best-candidate serv-pkts dbpath)
+  (if (null? serv-pkts)
+      #f
+      (let loop ((tail serv-pkts)
+		 (best  (car serv-pkts)))
+	(if (null? tail)
+	    best
+	    (let* ((candidate (car tail))
+		   (candidate-bd (string->number (alist-ref 'D candidate)))
+		   (best-bd      (string->number (alist-ref 'D best)))
+		   ;; bigger number is younger
+		   (candidate-z  (alist-ref 'Z candidate))
+		   (best-z       (alist-ref 'Z best))
+		   (new-best     (cond
+				  ((> best-bd candidate-bd) ;; best is younger than candidate
+				   candidate)
+				  ((< best-bd candidate-bd) ;; candidate is younger than best
+				   best)
+				  (else
+				   (if (string>=? best-z candidate-z)
+				       best
+				       candidate))))) ;; use Z card as tie breaker
+	      (if (null? tail)
+		  new-best
+		  (loop (cdr tail) new-best)))))))
+	  
+
+;;======================================================================
+;; END NEW SERVER METHOD
+;;======================================================================
+
+;; if .db/main.db check the pkts
+;; 
+(define (rmt:wait-for-server pkts-dir db-file server-key)
+  (let* ((sdat *db-serv-info*))
+    (let loop ((start-time (current-seconds))
+	       (changed    #t)
+	       (last-sdat  "not this"))
+      (begin ;; let ((sdat #f))
+	(thread-sleep! 0.01)
+	(debug:print-info 0 *default-log-port* "Waiting for server alive signature")
+	(mutex-lock! *heartbeat-mutex*)
+	(set! sdat *db-serv-info*)
+	(mutex-unlock! *heartbeat-mutex*)
+	(if (and sdat
+		 (not changed)
+		 (> (- (current-seconds) start-time) 2))
+	    (let* ((uconn (servdat-uconn sdat)))
+	      (servdat-status-set! sdat 'iface-stable)
+	      (debug:print-info 0 *default-log-port* "Received server alive signature, now attempting to lock in server")
+	      ;; create a server pkt in *toppath*/.meta/srvpkts
+	      
+	      ;; TODO:
+	      ;;   1. change sdat to stuct
+	      ;;   2. add uuid to struct
+	      ;;   3. update uuid in sdat here
+	      ;;
+	      (servdat-uuid-set! sdat
+				 (register-server
+				  pkts-dir *srvpktspec*
+				  (get-host-name)
+				  (servdat-port sdat) server-key
+				  (servdat-host sdat) db-file))
+	      ;; (set! *my-signature* (servdat-uuid sdat)) ;; replace with Z, no, stick with proper key
+	      ;; now read pkts and see if we are a contender
+	      (let* ((all-pkts     (get-all-server-pkts pkts-dir *srvpktspec*))
+		     (viables      (get-viable-servers all-pkts db-file))
+		     (alive        (remove-pkts-if-not-alive uconn viables))
+		     (best-srv     (get-best-candidate alive db-file))
+		     (best-srv-key (if best-srv (alist-ref 'servkey best-srv) #f))
+		     (i-am-srv     (equal? best-srv-key server-key))
+		     (delete-pkt   (lambda ()
+				     (let* ((pktfile (conc (get-pkts-dir *toppath*)
+							 "/" (servdat-uuid *db-serv-info*)
+							 ".pkt")))
+				       (debug:print-info 0 *default-log-port* "Attempting to remove bogus pkt file "pktfile)
+				       (delete-file* pktfile))))) ;; remove immediately instead of waiting for on-exit
+		(debug:print 0 *default-log-port* "best-srv-key: "best-srv-key", server-key: "server-key", i-am-srv: "i-am-srv)
+		;; am I the best-srv, compare server-keys to know
+		(if i-am-srv
+		    (if (get-lock-db sdat db-file (servdat-host sdat)(servdat-port sdat)) ;; (db:get-iam-server-lock *dbstruct-db* *toppath* run-id)
+			(begin
+			  (debug:print-info 0 *default-log-port* "I'm the server!")
+			  (servdat-dbfile-set! sdat db-file)
+			  (servdat-status-set! sdat 'db-locked))
+			(begin
+			  (debug:print-info 0 *default-log-port* "I'm not the server, exiting.")
+			  (bdat-time-to-exit-set! *bdat* #t)
+			  (delete-pkt)
+			  (thread-sleep! 0.2)
+			  (exit)))
+		    (begin
+		      (debug:print-info 0 *default-log-port*
+				   "Keys do not match "best-srv-key", "server-key", exiting.")
+		      (bdat-time-to-exit-set! *bdat* #t)
+		      (delete-pkt)
+		      (thread-sleep! 0.2)
+		      (exit)))
+		sdat))
+	    (begin ;; sdat not yet contains server info
+	      (debug:print-info 0 *default-log-port* "Still waiting, last-sdat=" last-sdat)
+	      (sleep 4)
+	      (if (> (- (current-seconds) start-time) 120) ;; been waiting for two minutes
+		  (begin
+		    (debug:print-error 0 *default-log-port* "transport appears to have died, exiting server")
+		    (exit))
+		  (loop start-time
+			(equal? sdat last-sdat)
+			sdat))))))))
+
+(define (rmt:register-server sinfo apath iface port server-key dbname)
+  (servdat-conns sinfo) ;; just checking types
+  (rmt:open-main-connection sinfo apath) ;; we need a channel to main.db
+  (rmt:send-receive-real sinfo apath      ;; params: host port servkey pid ipaddr dbpath
+			 (db:run-id->dbname #f)
+			 'register-server `(,iface
+					    ,port
+					    ,server-key
+					    ,(current-process-id)
+					    ,iface
+					    ,apath
+					    ,dbname)))
+
+(define (rmt:get-count-servers sinfo apath)
+  (servdat-conns sinfo) ;; just checking types
+  (rmt:open-main-connection sinfo apath) ;; we need a channel to main.db
+  (rmt:send-receive-real sinfo apath      ;; params: host port servkey pid ipaddr dbpath
+			 (db:run-id->dbname #f)
+			 'get-count-servers `(,apath)))
+
+(define (rmt:get-servers-info apath)
+  (rmt:send-receive 'get-servers-info #f `(,apath)))
+
+(define (rmt:deregister-server db-serv-info apath iface port server-key dbname)
+  (rmt:open-main-connection db-serv-info apath) ;; we need a channel to main.db
+  (rmt:send-receive-real db-serv-info apath      ;; params: host port servkey pid ipaddr dbpath
+                         (db:run-id->dbname #f)
+                         'deregister-server `(,iface
+                                              ,port
+                                              ,server-key
+                                              ,(current-process-id)
+                                              ,iface
+                                              ,apath
+                                              ,dbname)))
+
+(define (rmt:wait-for-stable-interface #!optional (num-tries-allowed 100))
+  ;; wait until *db-serv-info* stops changing
+  (let* ((stime (current-seconds)))
+    (let loop ((last-host  #f)
+	       (last-port  #f)
+	       (tries 0))
+      (let* ((curr-host (and *db-serv-info* (servdat-host *db-serv-info*)))
+	     (curr-port (and *db-serv-info* (servdat-port *db-serv-info*))))
+	;; first we verify port and interface, update *db-serv-info* in need be.
+	(cond
+	 ((> tries num-tries-allowed)
+	  (debug:print 0 *default-log-port* "rmt:keep-running, giving up after trying for several minutes.")
+	  (exit 1))
+	 ((not *db-serv-info*)
+	  (thread-sleep! 0.25)
+	  (loop curr-host curr-port (+ tries 1)))
+	 ((or (not last-host)(not last-port))
+	  (debug:print 0 *default-log-port* "rmt:keep-running, still no interface, tries="tries)
+	  (thread-sleep! 0.25)
+	  (loop curr-host curr-port (+ tries 1)))
+	 ((or (not (equal? last-host curr-host))
+	      (not (equal? last-port curr-port)))
+	  (debug:print-info 0 *default-log-port* "WARNING: interface changed, refreshing iface and port info")
+	  (thread-sleep! 0.25)
+	  (loop curr-host curr-port (+ tries 1)))
+	 ((< (- (current-seconds) stime) 1) ;; keep up the looping until at least 3 seconds have passed
+	  (thread-sleep! 0.5)
+	  (loop curr-host curr-port (+ tries 1)))
+	 (else
+	  (rmt:get-signature) ;; sets *my-signature* as side effect
+	  (servdat-status-set! *db-serv-info* 'interface-stable)
+	  (debug:print 0 *default-log-port*
+		       "SERVER STARTED: " curr-host
+		       ":" curr-port
+		       " AT " (current-seconds) " server signature: " *my-signature*
+		       " with "(servdat-trynum *db-serv-info*)" port changes")
+	  (flush-output *default-log-port*)
+	  #t))))))
+
+;; run rmt:keep-running in a parallel thread to monitor that the db is being 
+;; used and to shutdown after sometime if it is not.
+;;
+(define (rmt:keep-running dbname) 
+  ;; if none running or if > 20 seconds since 
+  ;; server last used then start shutdown
+  ;; This thread waits for the server to come alive
+  (debug:print-info 0 *default-log-port* "Starting the sync-back, keep alive thread in server")
+
+  (let* ((sinfo             *db-serv-info*)
+	 (server-start-time (current-seconds))
+	 (pkts-dir          (get-pkts-dir))
+	 (server-key        (rmt:get-signature)) ;; This servers key
+	 (is-main           (equal? (args:get-arg "-db") ".db/main.db"))
+	 (last-access       0)
+	 (server-timeout    (server:expiration-timeout))
+	 (shutdown-server-sequence (lambda (host port)
+				     (set! *unclean-shutdown* #f) ;; Should not be needed anymore
+				     (debug:print-info 0 *default-log-port* "Starting to shutdown the server. pid="(current-process-id))
+				     ;; (rmt:server-shutdown host port) -- called in on-exit
+				     ;; (portlogger:open-run-close portlogger:set-port port "released") called in on-exit
+				     (exit)))
+	 (timed-out?        (lambda ()
+			      (<= (+ last-access server-timeout)
+				  (current-seconds)))))
+    (servdat-dbfile-set! *db-serv-info* (args:get-arg "-db"))
+    ;; main and run db servers have both got wait logic (could/should merge it)
+    (if is-main
+	(rmt:wait-for-server pkts-dir dbname server-key)
+	(rmt:wait-for-stable-interface))
+    ;; this is our forever loop
+    (let* ((iface (servdat-host *db-serv-info*))
+	   (port  (servdat-port *db-serv-info*))
+	   (uconn (servdat-uconn *db-serv-info*)))
+      (let loop ((count          0)
+		 (bad-sync-count 0)
+		 (start-time     (current-milliseconds)))
+	(if (and (not is-main)
+		 (common:low-noise-print 60 "servdat-status"))
+	    (debug:print-info 0 *default-log-port* "servdat-status is " (servdat-status *db-serv-info*)))
+
+	(mutex-lock! *heartbeat-mutex*)
+	;; set up the database handle
+	(if (not *dbstruct-db*) ;; no db opened yet, open the db and register with main if appropriate
+	      (let ((watchdog (bdat-watchdog *bdat*)))
+		(debug:print 0 *default-log-port* "SERVER: dbprep")
+		(db:setup dbname) ;; sets *dbstruct-db* as side effect
+		(servdat-status-set! *db-serv-info* 'db-opened)
+		;; IFF I'm not main, call into main and register self
+		(if (not is-main)
+		    (let ((res (rmt:register-server sinfo
+						    *toppath* iface port
+						    server-key dbname)))
+		      (if res ;; we are the server
+			  (servdat-status-set! *db-serv-info* 'have-interface-and-db)
+			  ;; now check that the db locker is alive, clear it out if not
+			  (let* ((serv-info (rmt:server-info *toppath* dbname)))
+			    (match serv-info
+				   ((host port servkey pid ipaddr apath dbpath)
+				    (if (not (server-ready? uconn (conc host":"port) servkey))
+					(begin
+					  (debug:print-info 0 *default-log-port* "Server registered but not alive. Removing and trying again.")
+					  (rmt:deregister-server sinfo apath host port servkey dbpath) ;; servkey pid ipaddr apath dbpath)
+					  (loop (+ count 1) bad-sync-count start-time))))
+				   (else
+				    (debug:print 0 *default-log-port* "We are not the server for "dbname", exiting. Server info is: "serv-info)
+				    (exit)))))))
+		(debug:print 0 *default-log-port*
+			     "SERVER: running, db "dbname" opened, megatest version: "
+			   (common:get-full-version))
+	      ;; start the watchdog
+
+	      ;; is this really needed?
+	      
+	      #;(if watchdog
+		  (if (not (member (thread-state watchdog)
+				   '(ready running blocked
+					   sleeping dead)))
+		      (begin
+			(debug:print-info 0 *default-log-port* "Starting watchdog thread (in state "(thread-state watchdog)")")
+			(thread-start! watchdog))
+		      (debug:print-info 0 *default-log-port* "Not starting watchdog thread (in state "(thread-state watchdog)")"))
+		  (debug:print 0 *default-log-port* "ERROR: *watchdog* not setup, cannot start it."))
+	      #;(loop (+ count 1) bad-sync-count start-time)
+	      ))
+	
+	(db:sync-inmem->disk *dbstruct-db* *toppath* dbname force-sync: #t)
+	
+	(mutex-unlock! *heartbeat-mutex*)
+	
+	;; when things go wrong we don't want to be doing the various
+	;; queries too often so we strive to run this stuff only every
+	;; four seconds or so.
+	(let* ((sync-time (- (current-milliseconds) start-time))
+	       (rem-time  (quotient (- 4000 sync-time) 1000)))
+	  (if (and (<= rem-time 4)
+		   (>  rem-time 0))
+	      (thread-sleep! rem-time)))
+    
+	;; Transfer *db-last-access* to last-access to use in checking that we are still alive
+	(set! last-access *db-last-access*)
+	
+	(if (< count 1) ;; 3x3 = 9 secs aprox
+	    (loop (+ count 1) bad-sync-count (current-milliseconds)))
+	
+	(if (common:low-noise-print 60 "dbstats")
+	    (begin
+	      (debug:print 0 *default-log-port* "Server stats:")
+	      (db:print-current-query-stats)))
+	(let* ((hrs-since-start  (/ (- (current-seconds) server-start-time) 3600)))
+	  (cond
+	   ((not *server-run*)
+	    (debug:print-info 0 *default-log-port* "*server-run* set to #f. Shutting down.")
+	    (shutdown-server-sequence (get-host-name) port))
+	   ((timed-out?)
+	    (debug:print-info 0 *default-log-port* "Server timed out. seconds since last db access: " (- (current-seconds) last-access))
+	    (shutdown-server-sequence (get-host-name) port))
+	   ((and *server-run*
+		 (or (not (timed-out?))
+		     (if is-main ;; do not exit if there are other servers (keep main open until all others gone)
+			 (> (rmt:get-count-servers sinfo *toppath*) 1)
+			 #f)))
+	    (if (common:low-noise-print 120 "server continuing")
+		(debug:print-info 0 *default-log-port* "Server continuing, seconds since last db access: " (- (current-seconds) last-access)))
+	    (loop 0 bad-sync-count (current-milliseconds)))
+	   (else
+	    (set! *unclean-shutdown* #f)
+	    (debug:print-info 0 *default-log-port* "Server timed out. seconds since last db access: " (- (current-seconds) last-access))
+	    (shutdown-server-sequence (get-host-name) port)
+	    #;(debug:print-info 0 *default-log-port* "Sending 'quit to server, received: "
+			      (open-send-receive-nn (conc iface":"port)      ;; do this here and not in server-shutdown
+						    (sexpr->string 'quit))))))))))
+
+(define (rmt:get-reasonable-hostname)
+  (let* ((inhost (or (args:get-arg "-server") "-")))
+    (if (equal? inhost "-")
+	(get-host-name)
+	inhost)))
+
+;; Call this to start the actual server
+;;
+;; all routes though here end in exit ...
+;;
+;; This is the point at which servers are started
+;;
+(define (rmt:server-launch dbname)
+  (debug:print-info 0 *default-log-port* "Entered rmt:server-launch")
+  (let* ((th2 (make-thread (lambda ()
+			     (debug:print-info 0 *default-log-port* "Server run thread started")
+			     (rmt:run (rmt:get-reasonable-hostname)))
+			   "Server run"))
+	 (th3 (make-thread (lambda ()
+			     (debug:print-info 0 *default-log-port* "Server monitor thread started")
+			     (if (args:get-arg "-server")
+				 (rmt:keep-running dbname)))
+			     "Keep running")))
+    (thread-start! th2)
+    (thread-sleep! 0.252) ;; give the server time to settle before starting the keep-running monitor.
+    (thread-start! th3)
+    (set! *didsomething* #t)
+    (thread-join! th2)
+    (thread-join! th3))
+  #f)
+	    
+;;======================================================================
+;;  S E R V E R   -  D I R E C T   C A L L S
+;;======================================================================
+
+(define (rmt:kill-server run-id)
+  (rmt:send-receive 'kill-server #f (list run-id)))
+
+(define (rmt:start-server run-id)
+  (rmt:send-receive 'start-server #f (list run-id)))
+
+(define (rmt:server-info apath dbname)
+  (rmt:send-receive 'get-server-info #f (list apath dbname)))
+
+;;======================================================================
+;; Nanomsg transport
+;;======================================================================
+
+#;(define (is-port-in-use port-num)
+  (let* ((ret #f))
+    (let-values (((inp oup pid)
+		  (process "netstat" (list  "-tulpn" ))))
+      (let loop ((inl (read-line inp)))
+        (if (not (eof-object? inl))
+            (begin 
+	      (if (string-search (regexp (conc ":" port-num)) inl)
+		  (begin
+					;(print "Output: "  inl)
+		    (set! ret  #t))
+		  (loop (read-line inp)))))))
+    ret))
+
+#;(define (open-nn-connection host-port)
+  (let ((req  (make-req-socket))
+        (uri  (conc "tcp://" host-port)))
+    (nng-dial req uri)
+    (socket-set! req 'nng/recvtimeo 2000)
+    req))
+
+#;(define (send-receive-nn req msg)
+  (nng-send req msg)
+  (nng-recv req))
+
+#;(define (close-nn-connection req)
+  (nng-close! req))
+  
+;; ;; open connection to server, send message, close connection
+;; ;;
+;; (define (open-send-close-nn host-port msg #!key (timeout 3) ) ;; default timeout is 3 seconds
+;;   (let ((req  (make-req-socket 'req))
+;;         (uri  (conc "tcp://" host-port))
+;;         (res  #f)
+;;         ;; (contacts (alist-ref 'contact attrib))
+;;         ;; (mode (alist-ref 'mode attrib))
+;; 	)
+;;     (socket-set! req 'nng/recvtimeo 2000)
+;;     (handle-exceptions
+;;      exn
+;;      (let ((emsg ((condition-property-accessor 'exn 'message) exn)))
+;;        ;; Send notification       
+;;        (debug:print 0 *default-log-port* "ERROR: Failed to connect / send to " uri " message was \"" emsg "\"" )
+;;        #f)
+;;      (nng-dial req uri)
+;;      ;; (print "Connected to the server " )
+;;      (nng-send req msg)
+;;      ;; (print "Request Sent")  
+;;      (let* ((th1  (make-thread (lambda ()
+;;                                  (let ((resp (nng-recv req)))
+;;                                    (nng-close! req)
+;;                                    (set! res (if (equal? resp "ok")
+;;                                                  #t
+;;                                                  #f))))
+;;                                "recv thread"))
+;;             (th2 (make-thread (lambda ()
+;;                                 (thread-sleep! timeout)
+;;                                 (thread-terminate! th1))
+;; 			      "timer thread")))
+;;        (thread-start! th1)
+;;        (thread-start! th2)
+;;        (thread-join! th1)
+;;        res))))
+;; 
+#;(define (open-send-receive-nn host-port msg #!key (timeout 3) ) ;; default timeout is 3 seconds
+  (let ((req  (make-req-socket))
+        (uri  (conc "tcp://" host-port))
+        (res  #f)) 
+    (handle-exceptions
+     exn
+     (let ((emsg ((condition-property-accessor 'exn 'message) exn)))
+       ;; Send notification      
+       (debug:print 0 *default-log-port* "ERROR: Failed to connect / send to " uri " message was \"" emsg "\", exn=" exn)
+       #f)
+     (nng-dial req uri)
+     (nng-send req msg)
+     (let* ((th1  (make-thread (lambda ()
+                                 (let ((resp (nng-recv req)))
+                                   (nng-close! req)
+                                   ;; (print resp)
+                                   (set! res resp)))
+                               "recv thread"))
+            (th2 (make-thread (lambda ()
+                                (thread-sleep! timeout)
+                                (thread-terminate! th1))
+                             "timer thread")))
+       (thread-start! th1)
+       (thread-start! th2)
+       (thread-join! th1)
+       res))))
+
+;;======================================================================
+;; S E R V E R   U T I L I T I E S 
+;;======================================================================
+
+;; run ping in separate process, safest way in some cases
+;;
+#;(define (server:ping-server ifaceport)
+  (with-input-from-pipe 
+   (conc (common:get-megatest-exe) " -ping " ifaceport)
+   (lambda ()
+     (let loop ((inl (read-line))
+		(res "NOREPLY"))
+       (if (eof-object? inl)
+	   (case (string->symbol res)
+	     ((NOREPLY)  #f)
+	     ((LOGIN_OK) #t)
+	     (else       #f))
+	   (loop (read-line) inl))))))
+
+;; NOT USED (well, ok, reference in rpc-transport but otherwise not used).
+;;
+#;(define (server:login toppath)
+  (lambda (toppath)
+    (set! *db-last-access* (current-seconds)) ;; might not be needed.
+    (if (equal? *toppath* toppath)
+	#t
+	#f)))
+
+;; (define server:sync-lock-token "SERVER_SYNC_LOCK")
+;; (define (server:release-sync-lock)
+;;   (db:no-sync-del! *no-sync-db* server:sync-lock-token))
+;; (define (server:have-sync-lock?)
+;;   (let* ((have-lock-pair (db:no-sync-get-lock *no-sync-db* server:sync-lock-token))
+;;          (have-lock?     (car have-lock-pair))
+;;          (lock-time      (cdr have-lock-pair))
+;;          (lock-age       (- (current-seconds) lock-time)))
+;;     (cond
+;;      (have-lock? #t)
+;;      ((>lock-age
+;;        (* 3 (configf:lookup-number *configdat* "server" "minimum-intersync-delay" default: 180)))
+;;       (server:release-sync-lock)
+;;       (server:have-sync-lock?))
+;;      (else #f))))
+
+)

ADDED   ulex-full/ulex.scm
Index: ulex-full/ulex.scm
==================================================================
--- /dev/null
+++ ulex-full/ulex.scm
@@ -0,0 +1,569 @@
+;; ulex: Distributed sqlite3 db
+;;;
+;; Copyright (C) 2018-2021 Matt Welland
+;; Redistribution and use in source and binary forms, with or without
+;; modification, is permitted.
+;;
+;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
+;; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+;; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+;; ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE
+;; LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+;; CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
+;; OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+;; BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+;; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+;; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
+;; USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
+;; DAMAGE.
+
+;;======================================================================
+;; ABOUT:
+;;   See README in the distribution at https://www.kiatoa.com/fossils/ulex
+;; NOTES:
+;;   Why sql-de-lite and not say, dbi?  - performance mostly, then simplicity.
+;;
+;;======================================================================
+
+(module ulex
+	*
+	#;(
+     
+     ;; NOTE: looking for the handler proc - find the run-listener :)
+     
+     run-listener     ;; (run-listener handler-proc [port]) => uconn
+
+     ;; NOTE: handler-proc params;
+     ;;       (handler-proc rem-host-port qrykey cmd params)
+
+     send-receive     ;; (send-receive uconn host-port cmd data)
+
+     ;; NOTE: cmd can be any plain text symbol except for these;
+     ;;         'ping 'ack 'goodbye 'response
+     
+     set-work-handler ;; (set-work-handler proc)
+
+     wait-and-close   ;; (wait-and-close uconn)
+
+     ulex-listener?
+     
+     ;; needed to get the interface:port that was automatically found
+     udat-port
+     udat-host-port
+     
+     ;; for testing only
+     ;; pp-uconn
+     
+     ;; parameters
+     work-method   ;; parameter; 'threads, 'mailbox, 'limited, 'direct
+     return-method ;; parameter; 'mailbox, 'polling, 'direct
+     )
+
+(import scheme
+	chicken.base
+	chicken.file
+	chicken.io
+	chicken.time
+	chicken.condition
+	chicken.string
+	chicken.sort
+	chicken.pretty-print
+	
+	address-info
+	mailbox
+	matchable
+	;; queues
+	regex
+	regex-case
+	simple-exceptions
+	s11n
+	srfi-1
+	srfi-18
+	srfi-4
+	srfi-69
+	system-information
+	tcp6
+	typed-records
+	)
+
+;; udat struct, used by both caller and callee
+;; instantiated as uconn by convention
+;;
+(defstruct udat
+  ;; the listener side
+  (port #f)
+  (host-port #f)
+  (socket #f)
+  ;; the peers
+  (peers  (make-hash-table)) ;; host:port->peer
+  ;; work handling
+  (work-queue (make-mailbox))
+  (work-proc  #f)                ;; set by user
+  (cnum       0)                 ;; cookie number
+  (mboxes     (make-hash-table)) ;; for the replies
+  (avail-cmboxes '())            ;; list of (<cookie> . <mbox>) for re-use
+  ;; threads
+  (numthreads 10)
+  (cmd-thread #f)
+  (work-queue-thread #f)
+  (num-threads-running 0)
+  ) 
+
+;; Parameters
+
+;; work-method:
+(define work-method (make-parameter 'mailbox))
+;;    mailbox - all rdat goes through mailbox
+;;    threads - all rdat immediately executed in new thread
+;;    direct  - no queuing
+;;
+
+;; return-method, return the result to waiting send-receive:
+(define return-method (make-parameter 'mailbox))
+;;    mailbox - create a mailbox and use it for passing returning results to send-receive
+;;    polling - put the result in a hash table keyed by qrykey and send-receive can poll it for result
+;;    direct  - no queuing, result is passed back in single tcp connection
+;;
+
+;; ;; struct for keeping track of others we are talking to
+;; ;;
+;; (defstruct pdat
+;;   (host-port  #f)
+;;   (conns      '()) ;; list of pcon structs, pop one off when calling the peer
+;;   )
+;; 
+;; ;; struct for peer connections, keep track of expiration etc.
+;; ;;
+;; (defstruct pcon
+;;   (inp #f)
+;;   (oup #f)
+;;   (exp (+ (current-seconds) 59)) ;; expires at this time, set to (+ (current-seconds) 59)
+;;   (lifetime (+ (current-seconds) 600)) ;; throw away and create new after five minutes
+;;   )
+
+;;======================================================================
+;; listener
+;;======================================================================
+
+;; is uconn a ulex connector (listener)
+;;
+(define (ulex-listener? uconn)
+  (udat? uconn))
+
+;; create a tcp listener and return a populated udat struct with
+;; my port, address, hostname, pid etc.
+;; return #f if fail to find a port to allocate.
+;;
+;;  if udata-in is #f create the record
+;;  if there is already a serv-listener return the udata
+;;
+(define (setup-listener uconn #!optional (port 4242))
+  (handle-exceptions
+   exn
+   (if (< port 65535)
+       (setup-listener uconn (+ port 1))
+       #f)
+   (connect-listener uconn port)))
+
+(define (connect-listener uconn port)
+  ;; (tcp-listener-socket LISTENER)(socket-name so)
+  ;; sockaddr-address, sockaddr-port, sockaddr->string
+  (let* ((tlsn (tcp-listen port 1000 #f)) ;; (tcp-listen TCPPORT [BACKLOG [HOST]])
+	 (addr (get-my-best-address))) ;; (hostinfo-addresses (host-information (current-hostname)))
+    (udat-port-set!      uconn port)
+    (udat-host-port-set! uconn (conc addr":"port))
+    (udat-socket-set!    uconn tlsn)
+    uconn))
+
+;; run-listener does all the work of starting a listener in a thread
+;; it then returns control
+;;
+(define (run-listener handler-proc #!optional (port-suggestion 4242))
+  (let* ((uconn (make-udat)))
+    (udat-work-proc-set! uconn handler-proc)
+    (if (setup-listener uconn port-suggestion)
+	(let* ((th1 (make-thread (lambda ()(ulex-cmd-loop uconn)) "Ulex command loop"))
+	       (th2 (make-thread (lambda ()
+				   (case (work-method)
+				     ((mailbox limited)
+				      (process-work-queue uconn))))
+				 "Ulex work queue processor")))
+	  ;; (tcp-buffer-size 2048)
+	  (thread-start! th1)
+	  (thread-start! th2)
+	  (udat-cmd-thread-set! uconn th1)
+	  (udat-work-queue-thread-set! uconn th2)
+	  (print "cmd loop and process workers started, listening on "(udat-host-port uconn)".")
+	  uconn)
+	(assert #f "ERROR: run-listener called without proper setup."))))
+
+(define (wait-and-close uconn)
+  (thread-join! (udat-cmd-thread uconn))
+  (tcp-close (udat-socket uconn)))
+
+;;======================================================================
+;; peers and connections
+;;======================================================================
+
+(define *send-mutex* (make-mutex))
+
+;; send structured data to recipient
+;;
+;;  NOTE: qrykey is what was called the "cookie" previously
+;;
+;;     retval tells send to expect and wait for return data (one line) and return it or time out
+;;       this is for ping where we don't want to necessarily have set up our own server yet.
+;;
+;; NOTE: see below for beginnings of code to allow re-use of tcp connections
+;;        - I believe (without substantial evidence) that re-using connections will
+;;          be beneficial ...
+;;
+(define (send udata host-port qrykey cmd params)
+  (let* ((my-host-port (udat-host-port udata))          ;; remote will return to this
+	 (isme         #f #;(equal? host-port my-host-port)) ;; calling myself?
+	 ;; dat is a self-contained work block that can be sent or handled locally
+	 (dat          (list my-host-port qrykey cmd params #;(cons (current-seconds)(current-milliseconds)))))
+    (cond
+     (isme (ulex-handler udata dat)) ;; no transmission needed
+     (else
+      (handle-exceptions ;; TODO - MAKE THIS EXCEPTION CMD SPECIFIC?
+	  exn
+	  (message exn)
+	(begin
+	  ;; (mutex-lock! *send-mutex*) ;; DOESN'T SEEM TO HELP
+	  (let-values (((inp oup)(tcp-connect host-port)))
+	    (let ((res (if (and inp oup)
+			   (begin
+			     (serialize dat oup)
+			     (close-output-port oup)
+			     (deserialize inp)
+			     )
+			   (begin
+			     (print "ERROR: send called but no receiver has been setup. Please call setup first!")
+			     #f))))
+	      (close-input-port inp)
+	      ;; (mutex-unlock! *send-mutex*) ;; DOESN'T SEEM TO HELP
+	      res)))))))) ;; res will always be 'ack unless return-method is direct
+
+(define (send-via-polling uconn host-port cmd data)
+  (let* ((qrykey (make-cookie uconn))
+	 (sres   (send uconn host-port qrykey cmd data)))
+    (case sres
+      ((ack)
+       (let loop ((start-time (current-milliseconds)))
+	 (if (> (current-milliseconds)(+ start-time 10000)) ;; ten seconds timeout
+	     (begin
+	       (print "ULEX ERROR: timed out waiting for response from "host-port", "cmd" "data)
+	       #f)
+	     (let* ((result (hash-table-ref/default (udat-mboxes uconn) qrykey #f))) ;; NOTE: we are re-using mboxes hash
+	       (if result ;; result is '(status . result-data) or #f for nothing yet
+		   (begin
+		     (hash-table-delete! (udat-mboxes uconn) qrykey)
+		     (cdr result))
+		   (begin
+		     (thread-sleep! 0.01)
+		     (loop start-time)))))))
+      (else
+       (print "ULEX ERROR: Communication failed? sres="sres)
+       #f))))
+
+(define (send-via-mailbox uconn host-port cmd data)
+  (let* ((cmbox     (get-cmbox uconn)) ;; would it be better to keep a stack of mboxes to reuse?
+	 (qrykey    (car cmbox))
+	 (mbox      (cdr cmbox))
+	 (mbox-time (current-milliseconds))
+	 (sres      (send uconn host-port qrykey cmd data))) ;; short res
+    (if (eq? sres 'ack)
+	(let* ((mbox-timeout-secs    120 #;(if (eq? 'primordial (thread-name (current-thread)))
+				     #f
+				     120)) ;; timeout)
+	       (mbox-timeout-result 'MBOX_TIMEOUT)
+	       (res                  (mailbox-receive! mbox mbox-timeout-secs mbox-timeout-result))
+	       (mbox-receive-time    (current-milliseconds)))
+	  ;; (put-cmbox uconn cmbox) ;; reuse mbox and cookie. is it worth it?
+	  (hash-table-delete! (udat-mboxes uconn) qrykey)
+	  (if (eq? res 'MBOX_TIMEOUT)
+	      (begin
+		(print "WARNING: mbox timed out for query "cmd", with data "data
+		       ", waiting for response from "host-port".")
+
+		;; here it might make sense to clean up connection records and force clean start?
+		;; NO. The progam using ulex needs to do the reset. Right thing here is exception
+		
+		#f)  ;; convert to raising exception?
+	      res))
+	(begin
+	  (print "ERROR: Communication failed? Got "sres)
+	  #f))))
+  
+;; send a request to the given host-port and register a mailbox in udata
+;; wait for the mailbox data and return it
+;;
+(define (send-receive uconn host-port cmd data)
+  (let* ((start-time (current-milliseconds))
+	 (result     (cond
+		      ((member cmd '(ping goodbye)) ;; these are immediate
+		       (send uconn host-port 'ping cmd data))
+		      ((eq? (work-method) 'direct)
+		       ;; the result from send will be the actual result, not an 'ack
+		       (send uconn host-port 'direct cmd data))
+		      (else
+		       (case (return-method)
+			 ((polling)
+			  (send-via-polling uconn host-port cmd data))
+			 ((mailbox) 
+			  (send-via-mailbox uconn host-port cmd data))
+			 (else
+			  (print "ULEX ERROR: unrecognised return-method "(return-method)".")
+			  #f)))))
+	 (duration    (- (current-milliseconds) start-time)))
+    ;; this is ONLY for development and debugging. It will be removed once Ulex is stable.
+    (if (< 5000 duration)
+	(print "ULEX WARNING: round-trip took "(inexact->exact (round (/ duration 1000)))
+	       " seconds; "cmd", host-port="host-port", data="data))
+    result))
+    
+
+;;======================================================================
+;; responder side
+;;======================================================================
+
+;; take a request, rdat, and if not immediate put it in the work queue
+;;
+;; Reserved cmds; ack ping goodbye response
+;;
+(define (ulex-handler uconn rdat)
+  (assert (list? rdat) "FATAL: ulex-handler give rdat as not list")
+  (match rdat ;;  (string-split controldat)
+    ((rem-host-port qrykey cmd params);; timedata)
+     ;; (print "ulex-handler got: "rem-host-port" qrykey: "qrykey" cmd: "cmd" params: "params)
+     (case cmd
+       ;; ((ack )(print "Got ack! But why? Should NOT get here.") 'ack)
+       ((ping)
+	;; (print "Got Ping!")
+	;; (add-to-work-queue uconn rdat)
+	'ack)
+       ((goodbye)
+	;; just clear out references to the caller. NOT COMPLETE
+	(add-to-work-queue uconn rdat)
+	'ack)
+       ((response) ;; this is a result from remote processing, send it as mail ...
+	(case (return-method)
+	  ((polling)
+	   (hash-table-set! (udat-mboxes uconn) qrykey (cons 'ok params))
+	   'ack)
+	  ((mailbox)
+	   (let ((mbox (hash-table-ref/default (udat-mboxes uconn) qrykey #f)))
+	     (if mbox
+		 (begin
+		   (mailbox-send! mbox params) ;; params here is our result
+		   'ack)
+		 (begin
+		   (print "ERROR: received result but no associated mbox for cookie "qrykey)
+		   'no-mbox-found))))
+	  (else (print "ULEX ERROR: unrecognised return-method "(return-method))
+		'bad-return-method)))
+       (else ;; generic request - hand it to the work queue
+	(add-to-work-queue uconn rdat)
+	'ack)))
+    (else
+     (print "ULEX ERROR: bad rdat "rdat)
+     'bad-rdat)))
+
+;; given an already set up uconn start the cmd-loop
+;;
+(define (ulex-cmd-loop uconn)
+  (let* ((serv-listener (udat-socket uconn))
+	 (listener      (lambda ()
+			  (let loop ((state 'start))
+			    (let-values (((inp oup)(tcp-accept serv-listener)))
+			      ;; (mutex-lock! *send-mutex*) ;; DOESN'T SEEM TO HELP
+			      (let* ((rdat  (deserialize inp)) ;; '(my-host-port qrykey cmd params)
+				     (resp  (ulex-handler uconn rdat)))
+				(serialize resp oup)
+				(close-input-port inp)
+				(close-output-port oup)
+				;; (mutex-unlock! *send-mutex*) ;; DOESN'T SEEM TO HELP
+				)
+			      (loop state))))))
+    ;; start N of them
+    (let loop ((thnum   0)
+	       (threads '()))
+      (if (< thnum 100)
+	  (let* ((th (make-thread listener (conc "listener" thnum))))
+	    (thread-start! th)
+	    (loop (+ thnum 1)
+		  (cons th threads)))
+	  (map thread-join! threads)))))
+
+;; add a proc to the cmd list, these are done symetrically (i.e. in all instances)
+;; so that the proc can be dereferenced remotely
+;;
+(define (set-work-handler uconn proc)
+  (udat-work-proc-set! uconn proc))
+
+;;======================================================================
+;; work queues - this is all happening on the listener side
+;;======================================================================
+
+;; rdat is (rem-host-port qrykey cmd params)
+					     
+(define (add-to-work-queue uconn rdat)
+  #;(queue-add! (udat-work-queue uconn) rdat)
+  (case (work-method)
+    ((threads)
+     (thread-start! (make-thread (lambda ()
+				   (do-work uconn rdat))
+				 "worker thread")))
+    ((mailbox)
+     (mailbox-send! (udat-work-queue uconn) rdat))
+    ((direct)
+     (do-work uconn rdat))
+    (else
+     (print "ULEX ERROR: work-method "(work-method)" not recognised, using mailbox.")
+     (mailbox-send! (udat-work-queue uconn) rdat))))
+     
+;; move the logic to return the result somewhere else?
+;;
+(define (do-work uconn rdat)
+  (let* ((proc (udat-work-proc uconn))) ;; get it each time - conceivebly it could change
+    ;; put this following into a do-work procedure
+    (match rdat
+      ((rem-host-port qrykey cmd params)
+       (let* ((start-time (current-milliseconds))
+	      (result (proc rem-host-port qrykey cmd params))
+	      (end-time (current-milliseconds))
+	      (run-time (- end-time start-time)))
+	 (case (work-method)
+	   ((direct) result)
+	   (else
+	    (print "ULEX: work "cmd", "params" done in "run-time" ms")
+	    ;; send 'response as cmd and result as params
+	    (send uconn rem-host-port qrykey 'response result) ;; could check for ack
+	    (print "ULEX: response sent back to "rem-host-port" in "(- (current-milliseconds) end-time))))))
+      (MBOX_TIMEOUT 'do-work-timeout)
+      (else
+       (print "ERROR: rdat "rdat", did not match rem-host-port qrykey cmd params")))))
+
+;; NEW APPROACH:
+;;   
+(define (process-work-queue uconn) 
+  (let ((wqueue (udat-work-queue uconn))
+	(proc   (udat-work-proc  uconn))
+	(numthr (udat-numthreads uconn)))
+    (let loop ((thnum    1)
+	       (threads '()))
+      (let ((thlst (cons (make-thread (lambda ()
+					(let work-loop ()
+					  (let ((rdat (mailbox-receive! wqueue 24000 'MBOX_TIMEOUT)))
+					    (do-work uconn rdat))
+					  (work-loop)))
+				      (conc "work thread " thnum))
+			 threads)))
+	(if (< thnum numthr)
+	    (loop (+ thnum 1)
+		  thlst)
+	    (begin
+	      (print "ULEX: Starting "(length thlst)" worker threads.")
+	      (map thread-start! thlst)
+	      (print "ULEX: Threads started. Joining all.")
+	      (map thread-join! thlst)))))))
+
+;; below was to enable re-use of connections. This seems non-trivial so for
+;; now lets open on each call
+;;
+;; ;; given host-port get or create peer struct
+;; ;;
+;; (define (udat-get-peer uconn host-port)
+;;   (or (hash-table-ref/default (udat-peers uconn) host-port #f)
+;;       ;; no peer, so create pdat and init it
+;;       
+;;       ;; NEED stack of connections, pop and use; inp, oup,
+;;       ;; creation_time (remove and create new if over 24hrs old
+;;       ;; 
+;;       (let ((pdat (make-pdat host-port: host-port)))
+;; 	(hash-table-set! (udat-peers uconn) host-port pdat)
+;; 	pdat)))
+;; 
+;; ;; is pcon alive
+;; 
+;; ;; given host-port and pdat get a pcon
+;; ;;
+;; (define (pdat-get-pcon pdat host-port)
+;;   (let loop ((conns (pdat-conns pdat)))
+;;     (if (null? conns) ;; none? make and return - do NOT add - it will be pushed back on list later
+;; 	(init-pcon (make-pcon))
+;; 	(let* ((conn (pop conns)))
+;; 	  
+;; ;; given host-port get a pcon struct
+;; ;;
+;; (define (udat-get-pcon 
+      
+;;======================================================================
+;; misc utils
+;;======================================================================
+
+(define (make-cookie uconn)
+  (let ((newcnum (+ (udat-cnum uconn) 1)))
+    (udat-cnum-set! uconn newcnum)
+    (conc (udat-host-port uconn) ":"
+	  newcnum)))
+
+;; cookie/mboxes
+
+;; we store each mbox with a cookie (<cookie> . <mbox>)
+;;
+(define (get-cmbox uconn)
+  (if (null? (udat-avail-cmboxes uconn))
+      (let ((cookie (make-cookie uconn))
+	    (mbox   (make-mailbox)))
+	(hash-table-set! (udat-mboxes uconn) cookie mbox)
+	`(,cookie . ,mbox))
+      (let ((cmbox (car (udat-avail-cmboxes uconn))))
+	(udat-avail-cmboxes-set! uconn (cdr (udat-avail-cmboxes uconn)))
+	cmbox)))
+
+(define (put-cmbox uconn cmbox)
+  (udat-avail-cmboxes-set! uconn (cons cmbox (udat-avail-cmboxes uconn))))
+
+(define (pp-uconn uconn)
+  (pp (udat->alist uconn)))
+
+  
+;;======================================================================
+;; network utilities
+;;======================================================================
+
+;; NOTE: Look at address-info egg as alternative to some of this
+
+(define (rate-ip ipaddr)
+  (regex-case ipaddr
+    ( "^127\\..*" _ 0 )
+    ( "^(10\\.0|192\\.168)\\..*" _ 1 )
+    ( else 2 ) ))
+
+;; Change this to bias for addresses with a reasonable broadcast value?
+;;
+(define (ip-pref-less? a b)
+  (> (rate-ip a) (rate-ip b)))
+
+(define (get-my-best-address)
+  (let ((all-my-addresses (get-all-ips)))
+    (cond
+     ((null? all-my-addresses)
+      (get-host-name))                                          ;; no interfaces?
+     ((eq? (length all-my-addresses) 1)
+      (car all-my-addresses))                      ;; only one to choose from, just go with it
+     (else
+      (car (sort all-my-addresses ip-pref-less?))))))
+
+(define (get-all-ips-sorted)
+  (sort (get-all-ips) ip-pref-less?))
+
+(define (get-all-ips)
+  (map address-info-host
+       (filter (lambda (x)
+		 (equal? (address-info-type x) "tcp"))
+	       (address-infos (get-host-name)))))
+
+)

ADDED   ulex-none/dbmgr.scm
Index: ulex-none/dbmgr.scm
==================================================================
--- /dev/null
+++ ulex-none/dbmgr.scm
@@ -0,0 +1,1123 @@
+;;======================================================================
+;; Copyright 2022, Matthew Welland.
+;; 
+;; This file is part of Megatest.
+;; 
+;;     Megatest is free software: you can redistribute it and/or modify
+;;     it under the terms of the GNU General Public License as published by
+;;     the Free Software Foundation, either version 3 of the License, or
+;;     (at your option) any later version.
+;; 
+;;     Megatest is distributed in the hope that it will be useful,
+;;     but WITHOUT ANY WARRANTY; without even the implied warranty of
+;;     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;;     GNU General Public License for more details.
+;; 
+;;     You should have received a copy of the GNU General Public License
+;;     along with Megatest.  If not, see <http://www.gnu.org/licenses/>.
+
+;;======================================================================
+
+(declare (unit dbmgrmod))
+
+(declare (uses ulex))
+(declare (uses apimod))
+(declare (uses pkts))
+(declare (uses commonmod))
+(declare (uses dbmod))
+(declare (uses mtargs))
+(declare (uses portloggermod))
+(declare (uses debugprint))
+
+(module dbmgrmod
+    *
+
+(import scheme
+	chicken.base
+	chicken.condition
+	chicken.file
+	chicken.format
+	chicken.port
+	chicken.process
+	chicken.process-context
+	chicken.process-context.posix
+	chicken.sort
+	chicken.string
+	chicken.time
+	
+	(prefix sqlite3 sqlite3:)
+	matchable
+	md5
+	message-digest
+	regex
+	s11n
+	srfi-1
+	srfi-18
+	srfi-69
+	system-information
+	typed-records
+	
+	pkts
+	ulex
+
+	commonmod
+	apimod
+	dbmod
+	debugprint
+	(prefix mtargs args:)
+	portloggermod
+	)
+
+;; ;; Configurations for server
+;; ;; (tcp-buffer-size 2048)
+;; ;; (max-connections 2048) 
+;; 
+;; ;; info about me as a listener and my connections to db servers
+;; ;; stored (for now) in *db-serv-info*
+;; ;;
+;; (defstruct servdat
+;;   (host #f)
+;;   (port #f)
+;;   (uuid #f)
+;;   (dbfile #f)
+;;   (uconn   #f) ;; this is the listener *FOR THIS PROCESS*
+;;   (mode    #f)
+;;   (status 'starting)
+;;   (trynum 0) ;; count the number of ports we've tried
+;;   (conns  (make-hash-table)) ;; apath/dbname => conndat
+;;   ) 
+;; 
+;; (define *db-serv-info* (make-servdat))
+;; 
+;; (define (servdat->url sdat)
+;;   (conc (servdat-host sdat)":"(servdat-port sdat)))
+;; 
+;; ;; db servers contact info
+;; ;;
+;; (defstruct conndat
+;;   (apath    #f)
+;;   (dbname   #f)
+;;   (fullname #f)
+;;   (hostport #f)
+;;   (ipaddr   #f)
+;;   (port     #f)
+;;   (srvpkt   #f)
+;;   (srvkey   #f)
+;;   (lastmsg  0)
+;;   (expires  0))
+;; 
+;; (define *srvpktspec*
+;;   `((server (host    . h)
+;; 	    (port    . p)
+;; 	    (servkey . k)
+;; 	    (pid     . i)
+;; 	    (ipaddr  . a)
+;; 	    (dbpath  . d))))
+;; 
+;; ;;======================================================================
+;; ;;  S U P P O R T   F U N C T I O N S
+;; ;;======================================================================
+;; 
+;; ;; set up the api proc, seems like there should be a better place for this?
+;; ;;
+;; ;; IS THIS NEEDED ANYMORE? TODO - REMOVE IF POSSIBLE
+;; ;;
+;; ;; (define api-proc (make-parameter conc))
+;; ;; (api-proc api:execute-requests)
+;; 
+;; ;; do we have a connection to apath dbname and
+;; ;; is it not expired? then return it
+;; ;;
+;; ;; else setup a connection
+;; ;;
+;; ;; if that fails, return '(#f "some reason") ;; NB// convert to raising an exception
+;; ;;
+;; (define (rmt:get-conn remdat apath dbname)
+;;   (let* ((fullname (db:dbname->path apath dbname)))
+;;     (hash-table-ref/default (servdat-conns remdat) fullname #f)))
+;; 
+;; (define (rmt:drop-conn remdat apath dbname)
+;;   (let* ((fullname (db:dbname->path apath dbname)))
+;;     (hash-table-delete! (servdat-conns remdat) fullname)))
+;;   
+;; (define (rmt:find-main-server uconn apath dbname)
+;;   (let* ((pktsdir     (get-pkts-dir apath))
+;; 	 (all-srvpkts (get-all-server-pkts pktsdir *srvpktspec*))
+;; 	 (viable-srvs (get-viable-servers all-srvpkts dbname)))
+;;     (get-the-server uconn apath viable-srvs)))
+;; 
+;; 
+;; (define *connstart-mutex* (make-mutex))
+;; (define *last-main-start* 0)
+;; 
+;; ;; looks for a connection to main, returns if have and not exired
+;; ;; creates new otherwise
+;; ;; 
+;; ;; connections for other servers happens by requesting from main
+;; ;;
+;; ;; TODO: This is unnecessarily re-creating the record in the hash table
+;; ;;
+;; (define (rmt:open-main-connection remdat apath)
+;;   (let* ((fullpath (db:dbname->path apath ".db/main.db"))
+;; 	 (conns    (servdat-conns remdat))
+;; 	 (conn     (rmt:get-conn remdat apath ".db/main.db")) ;; (hash-table-ref/default conns fullpath #f)) ;; TODO - create call for this
+;; 	 (start-rmt:run (lambda ()
+;; 			  (let* ((th1 (make-thread (lambda ()(rmt:run (get-host-name))) "non-db mode server")))
+;; 			    (thread-start! th1)
+;; 			    (thread-sleep! 1)
+;; 			    (let loop ((count 0))
+;; 			      (assert (< count 30) "FATAL: responder failed to initialize in rmt:open-main-connection")
+;; 			      (if (or (not *db-serv-info*)
+;; 				      (not (servdat-uconn *db-serv-info*)))
+;; 				  (begin
+;; 				    (thread-sleep! 1)
+;; 				    (loop (+ count 1)))
+;; 				  (begin
+;; 				    (servdat-mode-set! *db-serv-info* 'non-db)
+;; 				    (servdat-uconn *db-serv-info*)))))))
+;; 	 (myconn    (servdat-uconn *db-serv-info*)))
+;;     (cond
+;;      ((not myconn)
+;;       (start-rmt:run)
+;;       (rmt:open-main-connection remdat apath))
+;;      ((and conn                                             ;; conn is NOT a socket, just saying ...
+;; 	   (< (current-seconds) (conndat-expires conn)))
+;;       #t) ;; we are current and good to go - we'll deal elsewhere with a server that was killed or died 
+;;      ((and conn
+;; 	   (>= (current-seconds)(conndat-expires conn)))
+;;       (debug:print-info 0 *default-log-port* "connection to "fullpath" server expired. Reconnecting.")
+;;       (rmt:drop-conn remdat apath ".db/main.db") ;;
+;;       (rmt:open-main-connection remdat apath))
+;;      (else
+;;       ;; Below we will find or create and connect to main
+;;       (debug:print-info 0 *default-log-port* "rmt:open-main-connection - starting from scratch")
+;;       (let* ((dbname         (db:run-id->dbname #f))
+;; 	     (the-srv        (rmt:find-main-server myconn apath dbname))
+;; 	     (start-main-srv (lambda () ;; call IF there is no the-srv found
+;; 			       (mutex-lock! *connstart-mutex*)
+;; 			       (if (> (- (current-seconds) *last-main-start*) 5) ;; at least four seconds since last attempt to start main server
+;; 				   (begin
+;; 				     (api:run-server-process apath dbname)
+;; 				     (set! *last-main-start* (current-seconds))
+;; 				     (thread-sleep! 1))
+;; 				   (thread-sleep! 0.25))
+;; 			       (mutex-unlock! *connstart-mutex*)
+;; 			       (rmt:open-main-connection remdat apath) ;; TODO: Add limit to number of tries
+;; 			       )))
+;; 	(if (not the-srv) ;; have server, try connecting to it
+;; 	    (start-main-srv)
+;; 	    (let* ((srv-addr (server-address the-srv)) ;; need serv
+;; 		   (ipaddr   (alist-ref 'ipaddr  the-srv))
+;; 		   (port     (alist-ref 'port    the-srv))
+;; 		   (srvkey   (alist-ref 'servkey the-srv))
+;; 		   (fullpath (db:dbname->path apath dbname))
+;; 		   
+;; 		   (new-the-srv (make-conndat
+;; 				 apath:   apath
+;; 				 dbname:  dbname
+;; 				 fullname: fullpath
+;; 				 hostport: srv-addr
+;; 				 ;; socket: (open-nn-connection srv-addr)  - TODO - open ulex connection?
+;; 				 ipaddr: ipaddr
+;; 				 port: port
+;; 				 srvpkt: the-srv
+;; 				 srvkey: srvkey ;; generated by rmt:get-signature on the server side
+;; 				 lastmsg: (current-seconds)
+;; 				 expires: (+ (current-seconds)
+;; 					     (server:expiration-timeout)
+;; 					     -2) ;; this needs to be gathered during the ping
+;; 				 )))
+;; 	      (hash-table-set! conns fullpath new-the-srv)))
+;; 	#t)))))
+;; 
+;; ;; NB// sinfo is a servdat struct
+;; ;;
+;; (define (rmt:general-open-connection sinfo apath dbname #!key (num-tries 5))
+;;   (assert (not (equal? dbname ".db/main.db")) "ERROR: general-open-connection should never be called with main as the db")
+;;   (let* ((mdbname  ".db/main.db") ;; (db:run-id->dbname #f)) TODO: put this back to the lookup when stable
+;; 	 (fullname (db:dbname->path apath dbname))
+;; 	 (conns    (servdat-conns sinfo))
+;; 	 (mconn    (rmt:get-conn sinfo apath ".db/main.db"))
+;; 	 (dconn    (rmt:get-conn sinfo apath dbname)))
+;;     #;(if (and mconn
+;; 	     (not (debug:print-logger)))
+;; 	(begin
+;; 	  (debug:print-info 0 *default-log-port* "Turning on logging to main, look in logs dir for main log.")
+;; 	  (debug:print-logger rmt:log-to-main)))
+;;     (cond
+;;      ((and mconn
+;; 	   dconn
+;; 	   (< (current-seconds)(conndat-expires dconn)))
+;;       #t) ;; good to go
+;;      ((not mconn) ;; no channel open to main? open it...
+;;       (rmt:open-main-connection sinfo apath)
+;;       (rmt:general-open-connection sinfo apath dbname num-tries: (- num-tries 1)))
+;;      ((not dconn)                 ;; no channel open to dbname?     
+;;       (let* ((res (rmt:send-receive-real sinfo apath mdbname 'get-server `(,apath ,dbname))))
+;; 	(case res
+;; 	  ((server-started)
+;; 	   (if (> num-tries 0)
+;; 	       (begin
+;; 		 (thread-sleep! 2)
+;; 		 (rmt:general-open-connection sinfo apath dbname num-tries: (- num-tries 1)))
+;; 	       (begin
+;; 		 (debug:print-error 0 *default-log-port* "Failed to start servers needed or open channel to "apath", "dbname)
+;; 		 (exit 1))))
+;; 	  (else
+;; 	   (if (list? res) ;; server has been registered and the info was returned. pass it on.
+;; 	       (begin ;;  ("192.168.0.9" 53817
+;; 		      ;;  "5e34239f48e8973b3813221e54701a01" "24310"
+;; 		      ;;  "192.168.0.9"
+;; 		      ;;  "/home/matt/data/megatest/tests/simplerun"
+;; 		 ;;  ".db/1.db")
+;; 		 (match
+;; 		  res
+;; 		  ((host port servkey pid ipaddr apath dbname)
+;; 		   (debug:print-info 0 *default-log-port* "got "res)
+;; 		   (hash-table-set! conns
+;; 				    fullname
+;; 				    (make-conndat
+;; 				     apath: apath
+;; 				     dbname: dbname
+;; 				     hostport: (conc host":"port)
+;; 				     ;; socket: (open-nn-connection (conc host":"port)) ;; TODO - open ulex connection?
+;; 				     ipaddr: ipaddr
+;; 				     port: port
+;; 				     srvkey: servkey
+;; 				     lastmsg: (current-seconds)
+;; 				     expires: (+ (current-seconds)
+;; 						 (server:expiration-timeout)
+;; 						 -2))))
+;; 		  (else
+;; 		   (debug:print-info 0 *default-log-port* "return data from starting server did not match host port servkey pid ipaddr apath dbname " res)))
+;; 		 res)
+;; 	       (begin
+;; 		 (debug:print-info 0 *default-log-port* "Unexpected result: " res)
+;; 		 res)))))))
+;;     #t))
+;; 
+;; ;;======================================================================
+;; 
+;; ;; FOR DEBUGGING SET TO #t
+;; ;; (define *localmode* #t)
+;; (define *localmode* #f)
+(define *dbstruct* (make-dbr:dbstruct))
+ 
+;; Defaults to current area
+;;
+(define (rmt:send-receive cmd rid params #!key (attemptnum 1)(area-dat #f))
+  (let* ((apath      *toppath*)
+	 (dbname     (db:run-id->dbname rid)))
+	(api:execute-requests *dbstruct* cmd params)))
+
+;; ;; db is at apath/.db/dbname, rid is an intermediary solution and will be removed
+;; ;; sometime in the future
+;; ;;
+;; (define (rmt:send-receive-real sinfo apath dbname cmd params)
+;;   (assert (not (eq? 'primordial (thread-name (current-thread)))) "FATAL: Do not call rmt:send-receive-real in the primodial thread.")
+;;   (let* ((cdat (rmt:get-conn sinfo apath dbname)))
+;;     (assert cdat "FATAL: rmt:send-receive-real called without the needed channels opened")
+;;     (let* ((uconn    (servdat-uconn sinfo)) ;; get the interface to ulex
+;; 	   ;; then send-receive using the ulex layer to host-port stored in cdat
+;; 	   (res      (send-receive uconn (conndat-hostport cdat) cmd params))
+;; 	   #;(th1      (make-thread (lambda ()
+;; 				    (set! res (send-receive uconn (conndat-hostport cdat) cmd params)))
+;; 				  "send-receive thread")))
+;;       ;; (thread-start! th1)
+;;       ;; (thread-join! th1)   ;; gratuitious thread stuff is so that mailbox is not used in primordial thead
+;;       ;; since we accessed the server we can bump the expires time up
+;;       (conndat-expires-set! cdat (+ (current-seconds)
+;; 				    (server:expiration-timeout)
+;; 				    -2)) ;; two second margin for network time misalignments etc.
+;;       res)))
+;; 
+;; db is at apath/.db/dbname, rid is an intermediary solution and will be removed
+;; sometime in the future.
+;;
+;; Purpose - call the main.db server and request a server be started
+;; for the given area path and dbname
+;;
+
+(define (rmt:print-db-stats)
+  (let ((fmtstr "~40a~7-d~9-d~20,2-f")) ;; "~20,2-f"
+    (debug:print 18 *default-log-port* "DB Stats, "(seconds->year-week/day-time (current-seconds))"\n=====================")
+    (debug:print 18 *default-log-port* (format #f "~40a~8a~10a~10a" "Cmd" "Count" "TotTime" "Avg"))
+    (for-each (lambda (cmd)
+		(let ((cmd-dat (hash-table-ref *db-stats* cmd)))
+		  (debug:print 18 *default-log-port* (format #f fmtstr cmd (vector-ref cmd-dat 0) (vector-ref cmd-dat 1) (/ (vector-ref cmd-dat 1)(vector-ref cmd-dat 0))))))
+	      (sort (hash-table-keys *db-stats*)
+		    (lambda (a b)
+		      (> (vector-ref (hash-table-ref *db-stats* a) 0)
+			 (vector-ref (hash-table-ref *db-stats* b) 0)))))))
+
+(define (rmt:get-max-query-average run-id)
+  (mutex-lock! *db-stats-mutex*)
+  (let* ((runkey (conc "run-id=" run-id " "))
+	 (cmds   (filter (lambda (x)
+			   (substring-index runkey x))
+			 (hash-table-keys *db-stats*)))
+	 (res    (if (null? cmds)
+		     (cons 'none 0)
+		     (let loop ((cmd (car cmds))
+				(tal (cdr cmds))
+				(max-cmd (car cmds))
+				(res 0))
+		       (let* ((cmd-dat (hash-table-ref *db-stats* cmd))
+			      (tot     (vector-ref cmd-dat 0))
+			      (curravg (/ (vector-ref cmd-dat 1) (vector-ref cmd-dat 0))) ;; count is never zero by construction
+			      (currmax (max res curravg))
+			      (newmax-cmd (if (> curravg res) cmd max-cmd)))
+			 (if (null? tal)
+			     (if (> tot 10)
+				 (cons newmax-cmd currmax)
+				 (cons 'none 0))
+			     (loop (car tal)(cdr tal) newmax-cmd currmax)))))))
+    (mutex-unlock! *db-stats-mutex*)
+    res))
+
+;; ;; host and port are used to ensure we are remove proper records
+;; (define (rmt:server-shutdown host port)
+;;   (let ((dbfile   (servdat-dbfile *db-serv-info*)))
+;;     (debug:print-info 0 *default-log-port* "dbfile is "dbfile)
+;;     (if dbfile
+;; 	(let* ((am-server  (args:get-arg "-server"))
+;; 	       (dbfile     (args:get-arg "-db"))
+;; 	       (apath      *toppath*)
+;; 	       #;(sinfo     *remotedat*)) ;; foundation for future fix
+;; 	  (if *dbstruct-db*
+;; 	      (let* ((dbdat      (db:get-dbdat *dbstruct-db* apath dbfile))
+;; 		     (db         (dbr:dbdat-db dbdat))
+;; 		     (inmem      (dbr:dbdat-db dbdat))   ;; WRONG
+;; 		     )
+;; 		;; do a final sync here
+;; 		(debug:print-info 0 *default-log-port* "Doing final sync for "apath" "dbfile" at "(current-seconds))
+;; 		(db:sync-inmem->disk *dbstruct-db* apath dbfile force-sync: #t)
+;; 		;; let's finalize here
+;; 		(debug:print-info 0 *default-log-port* "Finalizing db and inmem")
+;; 		(if (sqlite3:database? db)
+;; 		    (sqlite3:finalize! db)
+;; 		    (debug:print-info 0 *default-log-port* "in rmt:server-shutdown, db is not a database, not finalizing..."))
+;; 		(if (sqlite3:database? inmem)
+;; 		    (sqlite3:finalize! inmem)
+;; 		    (debug:print-info 0 *default-log-port* "in rmt:server-shutdown, inmem is not a database, not finalizing..."))
+;; 		(debug:print-info 0 *default-log-port* "Finalizing db and inmem complete"))
+;; 	      (debug:print-info 0 *default-log-port* "Db was never opened, no cleanup to do."))
+;; 	  (if (not am-server)
+;; 	      (debug:print-info 0 *default-log-port* "I am not a server, should NOT get here!")
+;; 	      (if (string-match ".*/main.db$" dbfile)
+;; 		  (let ((pkt-file (conc (get-pkts-dir *toppath*)
+;; 					"/" (servdat-uuid *db-serv-info*)
+;; 					".pkt")))
+;; 		    (debug:print-info 0 *default-log-port* "removing pkt "pkt-file)
+;; 		    (delete-file* pkt-file)
+;; 		    (debug:print-info 0 *default-log-port* "Releasing lock (if any) for "dbfile ", host "host", port "port)
+;; 		    (db:with-lock-db
+;; 		     (servdat-dbfile *db-serv-info*)
+;; 		     (lambda (dbh dbfile)
+;; 		       (db:release-lock dbh dbfile host port)))) ;; I'm not the server - should not have a lock to remove
+;; 		  (let* ((sdat *db-serv-info*) ;; we have a run-id server
+;; 			 (host (servdat-host sdat))
+;; 			 (port (servdat-port sdat))
+;; 			 (uuid (servdat-uuid sdat))
+;; 			 (res  (rmt:deregister-server *db-serv-info* *toppath* host port uuid dbfile)))
+;; 		    (debug:print-info 0 *default-log-port* "deregistered-server, res="res)
+;; 		    (debug:print-info 0 *default-log-port* "deregistering server "host":"port" with uuid "uuid)
+;; 		    )))))))
+;; 
+;; 
+;; (define (common:run-sync?)
+;;     ;; (and (common:on-homehost?)
+;;   (args:get-arg "-server"))
+;; 
+;; (define *rmt:run-mutex* (make-mutex))
+;; (define *rmt:run-flag* #f)
+;; 
+;; ;; Main entry point to start a server. was start-server
+;; (define (rmt:run hostn)
+;;   (mutex-lock! *rmt:run-mutex*)
+;;   (if *rmt:run-flag*
+;;       (begin
+;; 	(debug:print-warn 0 *default-log-port* "rmt:run already running.")
+;; 	(mutex-unlock! *rmt:run-mutex*))
+;;       (begin
+;; 	(set! *rmt:run-flag* #t)
+;; 	(mutex-unlock! *rmt:run-mutex*)
+;; 	;;  ;; Configurations for server
+;; 	;;  (tcp-buffer-size 2048)
+;; 	;;  (max-connections 2048) 
+;; 	(debug:print 2 *default-log-port* "PID: "(current-process-id)". Attempting to start the server ...")
+;; 	(if (and *db-serv-info*
+;; 		 (servdat-uconn *db-serv-info*))
+;; 	    (let* ((uconn (servdat-uconn *db-serv-info*)))
+;; 	      (wait-and-close uconn))
+;; 	    (let* ((port            (portlogger:open-run-close portlogger:find-port))
+;; 		   (handler-proc    (lambda (rem-host-port qrykey cmd params) ;;
+;; 				      (set! *db-last-access* (current-seconds))
+;; 				      (assert (list? params) "FATAL: handler called with non-list params")
+;; 				      (assert (args:get-arg "-server") "FATAL: handler called on non-server side. cmd="cmd", params="params)
+;; 				      (debug:print 0 *default-log-port* "handler call: "cmd", params="params)
+;; 				      (api:execute-requests *dbstruct-db* cmd params))))
+;; 	      ;; (api:process-request *dbstuct-db* 
+;; 	      (if (not *db-serv-info*)
+;; 		  (set! *db-serv-info* (make-servdat host: hostn port: port)))
+;; 	      (let* ((uconn (run-listener handler-proc port))
+;; 		     (rport (udat-port uconn))) ;; the real port
+;; 		(servdat-host-set! *db-serv-info* hostn)
+;; 		(servdat-port-set! *db-serv-info* rport)
+;; 		(servdat-uconn-set! *db-serv-info* uconn)
+;; 		(wait-and-close uconn)
+;; 		(db:print-current-query-stats)
+;; 		)))
+;; 	(let* ((host (servdat-host *db-serv-info*))
+;; 	       (port (servdat-port *db-serv-info*))
+;; 	       (mode (or (servdat-mode *db-serv-info*)
+;; 			 "non-db")))
+;; 	  ;; server exit stuff here
+;; 	  ;; (rmt:server-shutdown host port) - always do in on-exit
+;; 	  ;; (portlogger:open-run-close portlogger:set-port port "released") ;; moved to on-exit 
+;; 	  (debug:print-info 0 *default-log-port* "Server "host":"port" mode "mode"shutdown complete. Exiting")
+;; 	  ))))
+;; 
+;; ;;======================================================================
+;; ;; S E R V E R   U T I L I T I E S 
+;; ;;======================================================================
+;; 
+;; 
+;; ;;======================================================================
+;; ;; NEW SERVER METHOD
+;; ;;======================================================================
+;; 
+;; ;; only use for main.db - need to re-write some of this :(
+;; ;;
+;; (define (get-lock-db sdat dbfile host port)
+;;   (assert host "FATAL: get-lock-db called with host not set.")
+;;   (assert port "FATAL: get-lock-db called with port not set.")
+;;   (let* ((dbh (db:open-run-db dbfile db:initialize-db)) ;; open-run-db creates a standard db with schema used by all situations
+;; 	 (res (db:get-iam-server-lock dbh dbfile host port))
+;; 	 (uconn (servdat-uconn sdat)))
+;;     ;; res => list then already locked, check server is responsive
+;;     ;;     => #t then sucessfully got the lock
+;;     ;;     => #f reserved for future use as to indicate something went wrong
+;;     (match res
+;;       ((owner_pid owner_host owner_port event_time)
+;;        (if (server-ready? uconn (conc owner_host":"owner_port) "abc")
+;; 	   #f      ;; locked by someone else
+;; 	   (begin  ;; locked by someone dead and gone
+;; 	     (debug:print 0 *default-log-port* "WARNING: stale lock - have to steal it. This may fail.")
+;; 	     (db:steal-lock-db dbh dbfile port))))
+;;       (#t  #t) ;; placeholder so that we don't touch res if it is #t
+;;       (else (set! res #f)))
+;;     (sqlite3:finalize! dbh)
+;;     res))
+;; 
+;; 
+;; (define (register-server pkts-dir pkt-spec host port servkey ipaddr dbpath)
+;;   (let* ((pkt-dat `((host    . ,host)
+;; 		    (port    . ,port)
+;; 		    (servkey . ,servkey)
+;; 		    (pid     . ,(current-process-id))
+;; 		    (ipaddr  . ,ipaddr)
+;; 		    (dbpath  . ,dbpath)))
+;; 	 (uuid    (write-alist->pkt
+;; 		   pkts-dir
+;; 		   pkt-dat
+;; 		   pktspec: pkt-spec
+;; 		   ptype: 'server)))
+;;     (debug:print 0 *default-log-port* "Server on "host":"port" registered in pkt "uuid)
+;;     uuid))
+;; 
+;; (define (get-pkts-dir #!optional (apath #f))
+;;   (let* ((effective-toppath (or *toppath* apath)))
+;;     (assert effective-toppath
+;; 	    "ERROR: get-pkts-dir called without *toppath* set. Exiting.")
+;;     (let* ((pdir (conc effective-toppath "/.meta/srvpkts")))
+;;       (if (file-exists? pdir)
+;; 	  pdir
+;; 	  (begin
+;; 	    (handle-exceptions ;; this exception handler should NOT be needed but ...
+;; 		exn
+;; 		pdir
+;; 	      (create-directory pdir #t))
+;; 	    pdir)))))
+;; 
+;; ;; given a pkts dir read 
+;; ;;
+;; (define (get-all-server-pkts pktsdir-in pktspec)
+;;   (let* ((pktsdir  (if (file-exists? pktsdir-in)
+;; 		       pktsdir-in
+;; 		       (begin
+;; 			 (create-directory pktsdir-in #t)
+;; 			 pktsdir-in)))
+;; 	 (all-pkt-files (glob (conc pktsdir "/*.pkt"))))
+;;     (map (lambda (pkt-file)
+;; 	   (read-pkt->alist pkt-file pktspec: pktspec))
+;; 	 all-pkt-files)))
+;; 
+;; (define (server-address srv-pkt)
+;;   (conc (alist-ref 'host srv-pkt) ":"
+;; 	(alist-ref 'port srv-pkt)))
+	
+(define (server-ready? uconn host-port key) ;; server-address is host:port
+  #;(let* ((params `((cmd . ping)(key . ,key)))
+	 (data `((cmd . ping)
+		 (key . ,key)
+		 (params . ,params))) ;; I don't get it.
+	 (res  (send-receive uconn host-port 'ping data)))
+    (if (eq? res 'ack) ;; yep, likely it is who we want on the other end
+	res
+  #f))
+  #t)
+
+;; ; from the pkts return servers associated with dbpath
+;; ;; NOTE: Only one can be alive - have to check on each
+;; ;;       in the list of pkts returned
+;; ;;
+;; (define (get-viable-servers serv-pkts dbpath)
+;;   (let loop ((tail serv-pkts)
+;; 	     (res  '()))
+;;     (if (null? tail)
+;; 	res ;; NOTE: sort by age so oldest is considered first
+;; 	(let* ((spkt (car tail)))
+;; 	  (loop (cdr tail)
+;; 		(if (equal? dbpath (alist-ref 'dbpath spkt))
+;; 		    (cons spkt res)
+;; 		    res))))))
+;; 
+;; (define (remove-pkts-if-not-alive uconn serv-pkts)
+;;   (filter (lambda (pkt)
+;; 	    (let* ((host (alist-ref 'host pkt))
+;; 		   (port (alist-ref 'port pkt))
+;; 		   (host-port (conc host":"port))
+;; 		   (key  (alist-ref 'servkey  pkt))
+;; 		   (pktz (alist-ref 'Z        pkt))
+;; 		   (res  (server-ready? uconn host-port key)))
+;; 	      (if res
+;; 		  res
+;; 		  (let* ((pktsdir (get-pkts-dir *toppath*))
+;; 			 (pktpath (conc pktsdir"/"pktz".pkt")))
+;; 		    (debug:print 0 *default-log-port* "WARNING: pkt with no server "pktpath)
+;; 		    (delete-file* pktpath)
+;; 		    #f))))
+;; 	  serv-pkts))
+;; 
+;; ;; from viable servers get one that is alive and ready
+;; ;;
+;; (define (get-the-server uconn apath serv-pkts)
+;;   (let loop ((tail serv-pkts))
+;;     (if (null? tail)
+;; 	#f
+;; 	(let* ((spkt  (car tail))
+;; 	       (host  (alist-ref 'ipaddr spkt))
+;; 	       (port  (alist-ref 'port spkt))
+;; 	       (host-port (conc host":"port))
+;; 	       (dbpth (alist-ref 'dbpath spkt))
+;; 	       (srvkey (alist-ref 'Z spkt)) ;; (alist-ref 'srvkey spkt))
+;; 	       (addr  (server-address spkt)))
+;; 	  (if (server-ready? uconn host-port srvkey)
+;; 	      spkt
+;; 	      (loop (cdr tail)))))))
+;; 
+;; ;; am I the "first" in line server? I.e. my D card is smallest
+;; ;; use Z card as tie breaker
+;; ;;
+;; (define (get-best-candidate serv-pkts dbpath)
+;;   (if (null? serv-pkts)
+;;       #f
+;;       (let loop ((tail serv-pkts)
+;; 		 (best  (car serv-pkts)))
+;; 	(if (null? tail)
+;; 	    best
+;; 	    (let* ((candidate (car tail))
+;; 		   (candidate-bd (string->number (alist-ref 'D candidate)))
+;; 		   (best-bd      (string->number (alist-ref 'D best)))
+;; 		   ;; bigger number is younger
+;; 		   (candidate-z  (alist-ref 'Z candidate))
+;; 		   (best-z       (alist-ref 'Z best))
+;; 		   (new-best     (cond
+;; 				  ((> best-bd candidate-bd) ;; best is younger than candidate
+;; 				   candidate)
+;; 				  ((< best-bd candidate-bd) ;; candidate is younger than best
+;; 				   best)
+;; 				  (else
+;; 				   (if (string>=? best-z candidate-z)
+;; 				       best
+;; 				       candidate))))) ;; use Z card as tie breaker
+;; 	      (if (null? tail)
+;; 		  new-best
+;; 		  (loop (cdr tail) new-best)))))))
+;; 	  
+;; 
+;; ;;======================================================================
+;; ;; END NEW SERVER METHOD
+;; ;;======================================================================
+;; 
+;; ;; if .db/main.db check the pkts
+;; ;; 
+;; (define (rmt:wait-for-server pkts-dir db-file server-key)
+;;   (let* ((sdat *db-serv-info*))
+;;     (let loop ((start-time (current-seconds))
+;; 	       (changed    #t)
+;; 	       (last-sdat  "not this"))
+;;       (begin ;; let ((sdat #f))
+;; 	(thread-sleep! 0.01)
+;; 	(debug:print-info 0 *default-log-port* "Waiting for server alive signature")
+;; 	(mutex-lock! *heartbeat-mutex*)
+;; 	(set! sdat *db-serv-info*)
+;; 	(mutex-unlock! *heartbeat-mutex*)
+;; 	(if (and sdat
+;; 		 (not changed)
+;; 		 (> (- (current-seconds) start-time) 2))
+;; 	    (let* ((uconn (servdat-uconn sdat)))
+;; 	      (servdat-status-set! sdat 'iface-stable)
+;; 	      (debug:print-info 0 *default-log-port* "Received server alive signature, now attempting to lock in server")
+;; 	      ;; create a server pkt in *toppath*/.meta/srvpkts
+;; 	      
+;; 	      ;; TODO:
+;; 	      ;;   1. change sdat to stuct
+;; 	      ;;   2. add uuid to struct
+;; 	      ;;   3. update uuid in sdat here
+;; 	      ;;
+;; 	      (servdat-uuid-set! sdat
+;; 				 (register-server
+;; 				  pkts-dir *srvpktspec*
+;; 				  (get-host-name)
+;; 				  (servdat-port sdat) server-key
+;; 				  (servdat-host sdat) db-file))
+;; 	      ;; (set! *my-signature* (servdat-uuid sdat)) ;; replace with Z, no, stick with proper key
+;; 	      ;; now read pkts and see if we are a contender
+;; 	      (let* ((all-pkts     (get-all-server-pkts pkts-dir *srvpktspec*))
+;; 		     (viables      (get-viable-servers all-pkts db-file))
+;; 		     (alive        (remove-pkts-if-not-alive uconn viables))
+;; 		     (best-srv     (get-best-candidate alive db-file))
+;; 		     (best-srv-key (if best-srv (alist-ref 'servkey best-srv) #f))
+;; 		     (i-am-srv     (equal? best-srv-key server-key))
+;; 		     (delete-pkt   (lambda ()
+;; 				     (let* ((pktfile (conc (get-pkts-dir *toppath*)
+;; 							 "/" (servdat-uuid *db-serv-info*)
+;; 							 ".pkt")))
+;; 				       (debug:print-info 0 *default-log-port* "Attempting to remove bogus pkt file "pktfile)
+;; 				       (delete-file* pktfile))))) ;; remove immediately instead of waiting for on-exit
+;; 		(debug:print 0 *default-log-port* "best-srv-key: "best-srv-key", server-key: "server-key", i-am-srv: "i-am-srv)
+;; 		;; am I the best-srv, compare server-keys to know
+;; 		(if i-am-srv
+;; 		    (if (get-lock-db sdat db-file (servdat-host sdat)(servdat-port sdat)) ;; (db:get-iam-server-lock *dbstruct-db* *toppath* run-id)
+;; 			(begin
+;; 			  (debug:print-info 0 *default-log-port* "I'm the server!")
+;; 			  (servdat-dbfile-set! sdat db-file)
+;; 			  (servdat-status-set! sdat 'db-locked))
+;; 			(begin
+;; 			  (debug:print-info 0 *default-log-port* "I'm not the server, exiting.")
+;; 			  (bdat-time-to-exit-set! *bdat* #t)
+;; 			  (delete-pkt)
+;; 			  (thread-sleep! 0.2)
+;; 			  (exit)))
+;; 		    (begin
+;; 		      (debug:print-info 0 *default-log-port*
+;; 				   "Keys do not match "best-srv-key", "server-key", exiting.")
+;; 		      (bdat-time-to-exit-set! *bdat* #t)
+;; 		      (delete-pkt)
+;; 		      (thread-sleep! 0.2)
+;; 		      (exit)))
+;; 		sdat))
+;; 	    (begin ;; sdat not yet contains server info
+;; 	      (debug:print-info 0 *default-log-port* "Still waiting, last-sdat=" last-sdat)
+;; 	      (sleep 4)
+;; 	      (if (> (- (current-seconds) start-time) 120) ;; been waiting for two minutes
+;; 		  (begin
+;; 		    (debug:print-error 0 *default-log-port* "transport appears to have died, exiting server")
+;; 		    (exit))
+;; 		  (loop start-time
+;; 			(equal? sdat last-sdat)
+;; 			sdat))))))))
+;; 
+;; (define (rmt:register-server sinfo apath iface port server-key dbname)
+;;   (servdat-conns sinfo) ;; just checking types
+;;   (rmt:open-main-connection sinfo apath) ;; we need a channel to main.db
+;;   (rmt:send-receive-real sinfo apath      ;; params: host port servkey pid ipaddr dbpath
+;; 			 (db:run-id->dbname #f)
+;; 			 'register-server `(,iface
+;; 					    ,port
+;; 					    ,server-key
+;; 					    ,(current-process-id)
+;; 					    ,iface
+;; 					    ,apath
+;; 					    ,dbname)))
+;; 
+;; (define (rmt:get-count-servers sinfo apath)
+;;   (servdat-conns sinfo) ;; just checking types
+;;   (rmt:open-main-connection sinfo apath) ;; we need a channel to main.db
+;;   (rmt:send-receive-real sinfo apath      ;; params: host port servkey pid ipaddr dbpath
+;; 			 (db:run-id->dbname #f)
+;; 			 'get-count-servers `(,apath)))
+
+(define (rmt:get-servers-info apath)
+  (rmt:send-receive 'get-servers-info #f `(,apath)))
+
+;; (define (rmt:deregister-server db-serv-info apath iface port server-key dbname)
+;;   (rmt:open-main-connection db-serv-info apath) ;; we need a channel to main.db
+;;   (rmt:send-receive-real db-serv-info apath      ;; params: host port servkey pid ipaddr dbpath
+;;                          (db:run-id->dbname #f)
+;;                          'deregister-server `(,iface
+;;                                               ,port
+;;                                               ,server-key
+;;                                               ,(current-process-id)
+;;                                               ,iface
+;;                                               ,apath
+;;                                               ,dbname)))
+;; 
+;; (define (rmt:wait-for-stable-interface #!optional (num-tries-allowed 100))
+;;   ;; wait until *db-serv-info* stops changing
+;;   (let* ((stime (current-seconds)))
+;;     (let loop ((last-host  #f)
+;; 	       (last-port  #f)
+;; 	       (tries 0))
+;;       (let* ((curr-host (and *db-serv-info* (servdat-host *db-serv-info*)))
+;; 	     (curr-port (and *db-serv-info* (servdat-port *db-serv-info*))))
+;; 	;; first we verify port and interface, update *db-serv-info* in need be.
+;; 	(cond
+;; 	 ((> tries num-tries-allowed)
+;; 	  (debug:print 0 *default-log-port* "rmt:keep-running, giving up after trying for several minutes.")
+;; 	  (exit 1))
+;; 	 ((not *db-serv-info*)
+;; 	  (thread-sleep! 0.25)
+;; 	  (loop curr-host curr-port (+ tries 1)))
+;; 	 ((or (not last-host)(not last-port))
+;; 	  (debug:print 0 *default-log-port* "rmt:keep-running, still no interface, tries="tries)
+;; 	  (thread-sleep! 0.25)
+;; 	  (loop curr-host curr-port (+ tries 1)))
+;; 	 ((or (not (equal? last-host curr-host))
+;; 	      (not (equal? last-port curr-port)))
+;; 	  (debug:print-info 0 *default-log-port* "WARNING: interface changed, refreshing iface and port info")
+;; 	  (thread-sleep! 0.25)
+;; 	  (loop curr-host curr-port (+ tries 1)))
+;; 	 ((< (- (current-seconds) stime) 1) ;; keep up the looping until at least 3 seconds have passed
+;; 	  (thread-sleep! 0.5)
+;; 	  (loop curr-host curr-port (+ tries 1)))
+;; 	 (else
+;; 	  (rmt:get-signature) ;; sets *my-signature* as side effect
+;; 	  (servdat-status-set! *db-serv-info* 'interface-stable)
+;; 	  (debug:print 0 *default-log-port*
+;; 		       "SERVER STARTED: " curr-host
+;; 		       ":" curr-port
+;; 		       " AT " (current-seconds) " server signature: " *my-signature*
+;; 		       " with "(servdat-trynum *db-serv-info*)" port changes")
+;; 	  (flush-output *default-log-port*)
+;; 	  #t))))))
+;; 
+;; ;; run rmt:keep-running in a parallel thread to monitor that the db is being 
+;; ;; used and to shutdown after sometime if it is not.
+;; ;;
+;; (define (rmt:keep-running dbname) 
+;;   ;; if none running or if > 20 seconds since 
+;;   ;; server last used then start shutdown
+;;   ;; This thread waits for the server to come alive
+;;   (debug:print-info 0 *default-log-port* "Starting the sync-back, keep alive thread in server")
+;; 
+;;   (let* ((sinfo             *db-serv-info*)
+;; 	 (server-start-time (current-seconds))
+;; 	 (pkts-dir          (get-pkts-dir))
+;; 	 (server-key        (rmt:get-signature)) ;; This servers key
+;; 	 (is-main           (equal? (args:get-arg "-db") ".db/main.db"))
+;; 	 (last-access       0)
+;; 	 (server-timeout    (server:expiration-timeout))
+;; 	 (shutdown-server-sequence (lambda (host port)
+;; 				     (set! *unclean-shutdown* #f) ;; Should not be needed anymore
+;; 				     (debug:print-info 0 *default-log-port* "Starting to shutdown the server. pid="(current-process-id))
+;; 				     ;; (rmt:server-shutdown host port) -- called in on-exit
+;; 				     ;; (portlogger:open-run-close portlogger:set-port port "released") called in on-exit
+;; 				     (exit)))
+;; 	 (timed-out?        (lambda ()
+;; 			      (<= (+ last-access server-timeout)
+;; 				  (current-seconds)))))
+;;     (servdat-dbfile-set! *db-serv-info* (args:get-arg "-db"))
+;;     ;; main and run db servers have both got wait logic (could/should merge it)
+;;     (if is-main
+;; 	(rmt:wait-for-server pkts-dir dbname server-key)
+;; 	(rmt:wait-for-stable-interface))
+;;     ;; this is our forever loop
+;;     (let* ((iface (servdat-host *db-serv-info*))
+;; 	   (port  (servdat-port *db-serv-info*))
+;; 	   (uconn (servdat-uconn *db-serv-info*)))
+;;       (let loop ((count          0)
+;; 		 (bad-sync-count 0)
+;; 		 (start-time     (current-milliseconds)))
+;; 	(if (and (not is-main)
+;; 		 (common:low-noise-print 60 "servdat-status"))
+;; 	    (debug:print-info 0 *default-log-port* "servdat-status is " (servdat-status *db-serv-info*)))
+;; 
+;; 	(mutex-lock! *heartbeat-mutex*)
+;; 	;; set up the database handle
+;; 	(if (not *dbstruct-db*) ;; no db opened yet, open the db and register with main if appropriate
+;; 	      (let ((watchdog (bdat-watchdog *bdat*)))
+;; 		(debug:print 0 *default-log-port* "SERVER: dbprep")
+;; 		(db:setup dbname) ;; sets *dbstruct-db* as side effect
+;; 		(servdat-status-set! *db-serv-info* 'db-opened)
+;; 		;; IFF I'm not main, call into main and register self
+;; 		(if (not is-main)
+;; 		    (let ((res (rmt:register-server sinfo
+;; 						    *toppath* iface port
+;; 						    server-key dbname)))
+;; 		      (if res ;; we are the server
+;; 			  (servdat-status-set! *db-serv-info* 'have-interface-and-db)
+;; 			  ;; now check that the db locker is alive, clear it out if not
+;; 			  (let* ((serv-info (rmt:server-info *toppath* dbname)))
+;; 			    (match serv-info
+;; 				   ((host port servkey pid ipaddr apath dbpath)
+;; 				    (if (not (server-ready? uconn (conc host":"port) servkey))
+;; 					(begin
+;; 					  (debug:print-info 0 *default-log-port* "Server registered but not alive. Removing and trying again.")
+;; 					  (rmt:deregister-server sinfo apath host port servkey dbpath) ;; servkey pid ipaddr apath dbpath)
+;; 					  (loop (+ count 1) bad-sync-count start-time))))
+;; 				   (else
+;; 				    (debug:print 0 *default-log-port* "We are not the server for "dbname", exiting. Server info is: "serv-info)
+;; 				    (exit)))))))
+;; 		(debug:print 0 *default-log-port*
+;; 			     "SERVER: running, db "dbname" opened, megatest version: "
+;; 			   (common:get-full-version))
+;; 	      ;; start the watchdog
+;; 
+;; 	      ;; is this really needed?
+;; 	      
+;; 	      #;(if watchdog
+;; 		  (if (not (member (thread-state watchdog)
+;; 				   '(ready running blocked
+;; 					   sleeping dead)))
+;; 		      (begin
+;; 			(debug:print-info 0 *default-log-port* "Starting watchdog thread (in state "(thread-state watchdog)")")
+;; 			(thread-start! watchdog))
+;; 		      (debug:print-info 0 *default-log-port* "Not starting watchdog thread (in state "(thread-state watchdog)")"))
+;; 		  (debug:print 0 *default-log-port* "ERROR: *watchdog* not setup, cannot start it."))
+;; 	      #;(loop (+ count 1) bad-sync-count start-time)
+;; 	      ))
+;; 	
+;; 	(db:sync-inmem->disk *dbstruct-db* *toppath* dbname force-sync: #t)
+;; 	
+;; 	(mutex-unlock! *heartbeat-mutex*)
+;; 	
+;; 	;; when things go wrong we don't want to be doing the various
+;; 	;; queries too often so we strive to run this stuff only every
+;; 	;; four seconds or so.
+;; 	(let* ((sync-time (- (current-milliseconds) start-time))
+;; 	       (rem-time  (quotient (- 4000 sync-time) 1000)))
+;; 	  (if (and (<= rem-time 4)
+;; 		   (>  rem-time 0))
+;; 	      (thread-sleep! rem-time)))
+;;     
+;; 	;; Transfer *db-last-access* to last-access to use in checking that we are still alive
+;; 	(set! last-access *db-last-access*)
+;; 	
+;; 	(if (< count 1) ;; 3x3 = 9 secs aprox
+;; 	    (loop (+ count 1) bad-sync-count (current-milliseconds)))
+;; 	
+;; 	(if (common:low-noise-print 60 "dbstats")
+;; 	    (begin
+;; 	      (debug:print 0 *default-log-port* "Server stats:")
+;; 	      (db:print-current-query-stats)))
+;; 	(let* ((hrs-since-start  (/ (- (current-seconds) server-start-time) 3600)))
+;; 	  (cond
+;; 	   ((not *server-run*)
+;; 	    (debug:print-info 0 *default-log-port* "*server-run* set to #f. Shutting down.")
+;; 	    (shutdown-server-sequence (get-host-name) port))
+;; 	   ((timed-out?)
+;; 	    (debug:print-info 0 *default-log-port* "Server timed out. seconds since last db access: " (- (current-seconds) last-access))
+;; 	    (shutdown-server-sequence (get-host-name) port))
+;; 	   ((and *server-run*
+;; 		 (or (not (timed-out?))
+;; 		     (if is-main ;; do not exit if there are other servers (keep main open until all others gone)
+;; 			 (> (rmt:get-count-servers sinfo *toppath*) 1)
+;; 			 #f)))
+;; 	    (if (common:low-noise-print 120 "server continuing")
+;; 		(debug:print-info 0 *default-log-port* "Server continuing, seconds since last db access: " (- (current-seconds) last-access)))
+;; 	    (loop 0 bad-sync-count (current-milliseconds)))
+;; 	   (else
+;; 	    (set! *unclean-shutdown* #f)
+;; 	    (debug:print-info 0 *default-log-port* "Server timed out. seconds since last db access: " (- (current-seconds) last-access))
+;; 	    (shutdown-server-sequence (get-host-name) port)
+;; 	    #;(debug:print-info 0 *default-log-port* "Sending 'quit to server, received: "
+;; 			      (open-send-receive-nn (conc iface":"port)      ;; do this here and not in server-shutdown
+;; 						    (sexpr->string 'quit))))))))))
+;; 
+;; (define (rmt:get-reasonable-hostname)
+;;   (let* ((inhost (or (args:get-arg "-server") "-")))
+;;     (if (equal? inhost "-")
+;; 	(get-host-name)
+;; 	inhost)))
+;; 
+;; Call this to start the actual server
+;;
+;; all routes though here end in exit ...
+;;
+;; This is the point at which servers are started
+;;
+(define (rmt:server-launch dbname)
+  (debug:print-info 0 *default-log-port* "Entered rmt:server-launch")
+  #;(let* ((th2 (make-thread (lambda ()
+			     (debug:print-info 0 *default-log-port* "Server run thread started")
+			     (rmt:run (rmt:get-reasonable-hostname)))
+			   "Server run"))
+	 (th3 (make-thread (lambda ()
+			     (debug:print-info 0 *default-log-port* "Server monitor thread started")
+			     (if (args:get-arg "-server")
+				 (rmt:keep-running dbname)))
+			     "Keep running")))
+    (thread-start! th2)
+    (thread-sleep! 0.252) ;; give the server time to settle before starting the keep-running monitor.
+    (thread-start! th3)
+    (set! *didsomething* #t)
+    (thread-join! th2)
+    (thread-join! th3))
+  #f)
+;; 	    
+;; ;;======================================================================
+;; ;;  S E R V E R   -  D I R E C T   C A L L S
+;; ;;======================================================================
+;; 
+;; (define (rmt:kill-server run-id)
+;;   (rmt:send-receive 'kill-server #f (list run-id)))
+;; 
+;; (define (rmt:start-server run-id)
+;;   (rmt:send-receive 'start-server #f (list run-id)))
+;; 
+;; (define (rmt:server-info apath dbname)
+;;   (rmt:send-receive 'get-server-info #f (list apath dbname)))
+;; 
+;; ;;======================================================================
+;; ;; Nanomsg transport
+;; ;;======================================================================
+;; 
+;; #;(define (is-port-in-use port-num)
+;;   (let* ((ret #f))
+;;     (let-values (((inp oup pid)
+;; 		  (process "netstat" (list  "-tulpn" ))))
+;;       (let loop ((inl (read-line inp)))
+;;         (if (not (eof-object? inl))
+;;             (begin 
+;; 	      (if (string-search (regexp (conc ":" port-num)) inl)
+;; 		  (begin
+;; 					;(print "Output: "  inl)
+;; 		    (set! ret  #t))
+;; 		  (loop (read-line inp)))))))
+;;     ret))
+;; 
+;; #;(define (open-nn-connection host-port)
+;;   (let ((req  (make-req-socket))
+;;         (uri  (conc "tcp://" host-port)))
+;;     (nng-dial req uri)
+;;     (socket-set! req 'nng/recvtimeo 2000)
+;;     req))
+;; 
+;; #;(define (send-receive-nn req msg)
+;;   (nng-send req msg)
+;;   (nng-recv req))
+;; 
+;; #;(define (close-nn-connection req)
+;;   (nng-close! req))
+;;   
+;; ;; ;; open connection to server, send message, close connection
+;; ;; ;;
+;; ;; (define (open-send-close-nn host-port msg #!key (timeout 3) ) ;; default timeout is 3 seconds
+;; ;;   (let ((req  (make-req-socket 'req))
+;; ;;         (uri  (conc "tcp://" host-port))
+;; ;;         (res  #f)
+;; ;;         ;; (contacts (alist-ref 'contact attrib))
+;; ;;         ;; (mode (alist-ref 'mode attrib))
+;; ;; 	)
+;; ;;     (socket-set! req 'nng/recvtimeo 2000)
+;; ;;     (handle-exceptions
+;; ;;      exn
+;; ;;      (let ((emsg ((condition-property-accessor 'exn 'message) exn)))
+;; ;;        ;; Send notification       
+;; ;;        (debug:print 0 *default-log-port* "ERROR: Failed to connect / send to " uri " message was \"" emsg "\"" )
+;; ;;        #f)
+;; ;;      (nng-dial req uri)
+;; ;;      ;; (print "Connected to the server " )
+;; ;;      (nng-send req msg)
+;; ;;      ;; (print "Request Sent")  
+;; ;;      (let* ((th1  (make-thread (lambda ()
+;; ;;                                  (let ((resp (nng-recv req)))
+;; ;;                                    (nng-close! req)
+;; ;;                                    (set! res (if (equal? resp "ok")
+;; ;;                                                  #t
+;; ;;                                                  #f))))
+;; ;;                                "recv thread"))
+;; ;;             (th2 (make-thread (lambda ()
+;; ;;                                 (thread-sleep! timeout)
+;; ;;                                 (thread-terminate! th1))
+;; ;; 			      "timer thread")))
+;; ;;        (thread-start! th1)
+;; ;;        (thread-start! th2)
+;; ;;        (thread-join! th1)
+;; ;;        res))))
+;; ;; 
+;; #;(define (open-send-receive-nn host-port msg #!key (timeout 3) ) ;; default timeout is 3 seconds
+;;   (let ((req  (make-req-socket))
+;;         (uri  (conc "tcp://" host-port))
+;;         (res  #f)) 
+;;     (handle-exceptions
+;;      exn
+;;      (let ((emsg ((condition-property-accessor 'exn 'message) exn)))
+;;        ;; Send notification      
+;;        (debug:print 0 *default-log-port* "ERROR: Failed to connect / send to " uri " message was \"" emsg "\", exn=" exn)
+;;        #f)
+;;      (nng-dial req uri)
+;;      (nng-send req msg)
+;;      (let* ((th1  (make-thread (lambda ()
+;;                                  (let ((resp (nng-recv req)))
+;;                                    (nng-close! req)
+;;                                    ;; (print resp)
+;;                                    (set! res resp)))
+;;                                "recv thread"))
+;;             (th2 (make-thread (lambda ()
+;;                                 (thread-sleep! timeout)
+;;                                 (thread-terminate! th1))
+;;                              "timer thread")))
+;;        (thread-start! th1)
+;;        (thread-start! th2)
+;;        (thread-join! th1)
+;;        res))))
+;; 
+;; ;;======================================================================
+;; ;; S E R V E R   U T I L I T I E S 
+;; ;;======================================================================
+;; 
+;; ;; run ping in separate process, safest way in some cases
+;; ;;
+;; #;(define (server:ping-server ifaceport)
+;;   (with-input-from-pipe 
+;;    (conc (common:get-megatest-exe) " -ping " ifaceport)
+;;    (lambda ()
+;;      (let loop ((inl (read-line))
+;; 		(res "NOREPLY"))
+;;        (if (eof-object? inl)
+;; 	   (case (string->symbol res)
+;; 	     ((NOREPLY)  #f)
+;; 	     ((LOGIN_OK) #t)
+;; 	     (else       #f))
+;; 	   (loop (read-line) inl))))))
+;; 
+;; ;; NOT USED (well, ok, reference in rpc-transport but otherwise not used).
+;; ;;
+;; #;(define (server:login toppath)
+;;   (lambda (toppath)
+;;     (set! *db-last-access* (current-seconds)) ;; might not be needed.
+;;     (if (equal? *toppath* toppath)
+;; 	#t
+;; 	#f)))
+;; 
+;; ;; (define server:sync-lock-token "SERVER_SYNC_LOCK")
+;; ;; (define (server:release-sync-lock)
+;; ;;   (db:no-sync-del! *no-sync-db* server:sync-lock-token))
+;; ;; (define (server:have-sync-lock?)
+;; ;;   (let* ((have-lock-pair (db:no-sync-get-lock *no-sync-db* server:sync-lock-token))
+;; ;;          (have-lock?     (car have-lock-pair))
+;; ;;          (lock-time      (cdr have-lock-pair))
+;; ;;          (lock-age       (- (current-seconds) lock-time)))
+;; ;;     (cond
+;; ;;      (have-lock? #t)
+;; ;;      ((>lock-age
+;; ;;        (* 3 (configf:lookup-number *configdat* "server" "minimum-intersync-delay" default: 180)))
+;; ;;       (server:release-sync-lock)
+;; ;;       (server:have-sync-lock?))
+;; ;;      (else #f))))
+
+)

ADDED   ulex-none/ulex.scm
Index: ulex-none/ulex.scm
==================================================================
--- /dev/null
+++ ulex-none/ulex.scm
@@ -0,0 +1,569 @@
+;; ulex: Distributed sqlite3 db
+;;;
+;; Copyright (C) 2018-2021 Matt Welland
+;; Redistribution and use in source and binary forms, with or without
+;; modification, is permitted.
+;;
+;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
+;; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+;; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+;; ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE
+;; LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+;; CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
+;; OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+;; BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+;; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+;; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
+;; USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
+;; DAMAGE.
+
+;;======================================================================
+;; ABOUT:
+;;   See README in the distribution at https://www.kiatoa.com/fossils/ulex
+;; NOTES:
+;;   Why sql-de-lite and not say, dbi?  - performance mostly, then simplicity.
+;;
+;;======================================================================
+
+(module ulex
+	*
+	#;(
+     
+     ;; NOTE: looking for the handler proc - find the run-listener :)
+     
+     run-listener     ;; (run-listener handler-proc [port]) => uconn
+
+     ;; NOTE: handler-proc params;
+     ;;       (handler-proc rem-host-port qrykey cmd params)
+
+     send-receive     ;; (send-receive uconn host-port cmd data)
+
+     ;; NOTE: cmd can be any plain text symbol except for these;
+     ;;         'ping 'ack 'goodbye 'response
+     
+     set-work-handler ;; (set-work-handler proc)
+
+     wait-and-close   ;; (wait-and-close uconn)
+
+     ulex-listener?
+     
+     ;; needed to get the interface:port that was automatically found
+     udat-port
+     udat-host-port
+     
+     ;; for testing only
+     ;; pp-uconn
+     
+     ;; parameters
+     work-method   ;; parameter; 'threads, 'mailbox, 'limited, 'direct
+     return-method ;; parameter; 'mailbox, 'polling, 'direct
+     )
+
+(import scheme
+	chicken.base
+	chicken.file
+	chicken.io
+	chicken.time
+	chicken.condition
+	chicken.string
+	chicken.sort
+	chicken.pretty-print
+	
+	address-info
+	mailbox
+	matchable
+	;; queues
+	regex
+	regex-case
+	simple-exceptions
+	s11n
+	srfi-1
+	srfi-18
+	srfi-4
+	srfi-69
+	system-information
+	tcp6
+	typed-records
+	)
+
+;; ;; udat struct, used by both caller and callee
+;; ;; instantiated as uconn by convention
+;; ;;
+;; (defstruct udat
+;;   ;; the listener side
+;;   (port #f)
+;;   (host-port #f)
+;;   (socket #f)
+;;   ;; the peers
+;;   (peers  (make-hash-table)) ;; host:port->peer
+;;   ;; work handling
+;;   (work-queue (make-mailbox))
+;;   (work-proc  #f)                ;; set by user
+;;   (cnum       0)                 ;; cookie number
+;;   (mboxes     (make-hash-table)) ;; for the replies
+;;   (avail-cmboxes '())            ;; list of (<cookie> . <mbox>) for re-use
+;;   ;; threads
+;;   (numthreads 10)
+;;   (cmd-thread #f)
+;;   (work-queue-thread #f)
+;;   (num-threads-running 0)
+;;   ) 
+;; 
+;; ;; Parameters
+;; 
+;; ;; work-method:
+;; (define work-method (make-parameter 'mailbox))
+;; ;;    mailbox - all rdat goes through mailbox
+;; ;;    threads - all rdat immediately executed in new thread
+;; ;;    direct  - no queuing
+;; ;;
+;; 
+;; ;; return-method, return the result to waiting send-receive:
+;; (define return-method (make-parameter 'mailbox))
+;; ;;    mailbox - create a mailbox and use it for passing returning results to send-receive
+;; ;;    polling - put the result in a hash table keyed by qrykey and send-receive can poll it for result
+;; ;;    direct  - no queuing, result is passed back in single tcp connection
+;; ;;
+;; 
+;; ;; ;; struct for keeping track of others we are talking to
+;; ;; ;;
+;; ;; (defstruct pdat
+;; ;;   (host-port  #f)
+;; ;;   (conns      '()) ;; list of pcon structs, pop one off when calling the peer
+;; ;;   )
+;; ;; 
+;; ;; ;; struct for peer connections, keep track of expiration etc.
+;; ;; ;;
+;; ;; (defstruct pcon
+;; ;;   (inp #f)
+;; ;;   (oup #f)
+;; ;;   (exp (+ (current-seconds) 59)) ;; expires at this time, set to (+ (current-seconds) 59)
+;; ;;   (lifetime (+ (current-seconds) 600)) ;; throw away and create new after five minutes
+;; ;;   )
+;; 
+;; ;;======================================================================
+;; ;; listener
+;; ;;======================================================================
+;; 
+;; ;; is uconn a ulex connector (listener)
+;; ;;
+;; (define (ulex-listener? uconn)
+;;   (udat? uconn))
+;; 
+;; ;; create a tcp listener and return a populated udat struct with
+;; ;; my port, address, hostname, pid etc.
+;; ;; return #f if fail to find a port to allocate.
+;; ;;
+;; ;;  if udata-in is #f create the record
+;; ;;  if there is already a serv-listener return the udata
+;; ;;
+;; (define (setup-listener uconn #!optional (port 4242))
+;;   (handle-exceptions
+;;    exn
+;;    (if (< port 65535)
+;;        (setup-listener uconn (+ port 1))
+;;        #f)
+;;    (connect-listener uconn port)))
+;; 
+;; (define (connect-listener uconn port)
+;;   ;; (tcp-listener-socket LISTENER)(socket-name so)
+;;   ;; sockaddr-address, sockaddr-port, sockaddr->string
+;;   (let* ((tlsn (tcp-listen port 1000 #f)) ;; (tcp-listen TCPPORT [BACKLOG [HOST]])
+;; 	 (addr (get-my-best-address))) ;; (hostinfo-addresses (host-information (current-hostname)))
+;;     (udat-port-set!      uconn port)
+;;     (udat-host-port-set! uconn (conc addr":"port))
+;;     (udat-socket-set!    uconn tlsn)
+;;     uconn))
+;; 
+;; ;; run-listener does all the work of starting a listener in a thread
+;; ;; it then returns control
+;; ;;
+;; (define (run-listener handler-proc #!optional (port-suggestion 4242))
+;;   (let* ((uconn (make-udat)))
+;;     (udat-work-proc-set! uconn handler-proc)
+;;     (if (setup-listener uconn port-suggestion)
+;; 	(let* ((th1 (make-thread (lambda ()(ulex-cmd-loop uconn)) "Ulex command loop"))
+;; 	       (th2 (make-thread (lambda ()
+;; 				   (case (work-method)
+;; 				     ((mailbox limited)
+;; 				      (process-work-queue uconn))))
+;; 				 "Ulex work queue processor")))
+;; 	  ;; (tcp-buffer-size 2048)
+;; 	  (thread-start! th1)
+;; 	  (thread-start! th2)
+;; 	  (udat-cmd-thread-set! uconn th1)
+;; 	  (udat-work-queue-thread-set! uconn th2)
+;; 	  (print "cmd loop and process workers started, listening on "(udat-host-port uconn)".")
+;; 	  uconn)
+;; 	(assert #f "ERROR: run-listener called without proper setup."))))
+;; 
+;; (define (wait-and-close uconn)
+;;   (thread-join! (udat-cmd-thread uconn))
+;;   (tcp-close (udat-socket uconn)))
+;; 
+;; ;;======================================================================
+;; ;; peers and connections
+;; ;;======================================================================
+;; 
+;; (define *send-mutex* (make-mutex))
+;; 
+;; ;; send structured data to recipient
+;; ;;
+;; ;;  NOTE: qrykey is what was called the "cookie" previously
+;; ;;
+;; ;;     retval tells send to expect and wait for return data (one line) and return it or time out
+;; ;;       this is for ping where we don't want to necessarily have set up our own server yet.
+;; ;;
+;; ;; NOTE: see below for beginnings of code to allow re-use of tcp connections
+;; ;;        - I believe (without substantial evidence) that re-using connections will
+;; ;;          be beneficial ...
+;; ;;
+;; (define (send udata host-port qrykey cmd params)
+;;   (let* ((my-host-port (udat-host-port udata))          ;; remote will return to this
+;; 	 (isme         #f #;(equal? host-port my-host-port)) ;; calling myself?
+;; 	 ;; dat is a self-contained work block that can be sent or handled locally
+;; 	 (dat          (list my-host-port qrykey cmd params #;(cons (current-seconds)(current-milliseconds)))))
+;;     (cond
+;;      (isme (ulex-handler udata dat)) ;; no transmission needed
+;;      (else
+;;       (handle-exceptions ;; TODO - MAKE THIS EXCEPTION CMD SPECIFIC?
+;; 	  exn
+;; 	  (message exn)
+;; 	(begin
+;; 	  ;; (mutex-lock! *send-mutex*) ;; DOESN'T SEEM TO HELP
+;; 	  (let-values (((inp oup)(tcp-connect host-port)))
+;; 	    (let ((res (if (and inp oup)
+;; 			   (begin
+;; 			     (serialize dat oup)
+;; 			     (close-output-port oup)
+;; 			     (deserialize inp)
+;; 			     )
+;; 			   (begin
+;; 			     (print "ERROR: send called but no receiver has been setup. Please call setup first!")
+;; 			     #f))))
+;; 	      (close-input-port inp)
+;; 	      ;; (mutex-unlock! *send-mutex*) ;; DOESN'T SEEM TO HELP
+;; 	      res)))))))) ;; res will always be 'ack unless return-method is direct
+;; 
+;; (define (send-via-polling uconn host-port cmd data)
+;;   (let* ((qrykey (make-cookie uconn))
+;; 	 (sres   (send uconn host-port qrykey cmd data)))
+;;     (case sres
+;;       ((ack)
+;;        (let loop ((start-time (current-milliseconds)))
+;; 	 (if (> (current-milliseconds)(+ start-time 10000)) ;; ten seconds timeout
+;; 	     (begin
+;; 	       (print "ULEX ERROR: timed out waiting for response from "host-port", "cmd" "data)
+;; 	       #f)
+;; 	     (let* ((result (hash-table-ref/default (udat-mboxes uconn) qrykey #f))) ;; NOTE: we are re-using mboxes hash
+;; 	       (if result ;; result is '(status . result-data) or #f for nothing yet
+;; 		   (begin
+;; 		     (hash-table-delete! (udat-mboxes uconn) qrykey)
+;; 		     (cdr result))
+;; 		   (begin
+;; 		     (thread-sleep! 0.01)
+;; 		     (loop start-time)))))))
+;;       (else
+;;        (print "ULEX ERROR: Communication failed? sres="sres)
+;;        #f))))
+;; 
+;; (define (send-via-mailbox uconn host-port cmd data)
+;;   (let* ((cmbox     (get-cmbox uconn)) ;; would it be better to keep a stack of mboxes to reuse?
+;; 	 (qrykey    (car cmbox))
+;; 	 (mbox      (cdr cmbox))
+;; 	 (mbox-time (current-milliseconds))
+;; 	 (sres      (send uconn host-port qrykey cmd data))) ;; short res
+;;     (if (eq? sres 'ack)
+;; 	(let* ((mbox-timeout-secs    120 #;(if (eq? 'primordial (thread-name (current-thread)))
+;; 				     #f
+;; 				     120)) ;; timeout)
+;; 	       (mbox-timeout-result 'MBOX_TIMEOUT)
+;; 	       (res                  (mailbox-receive! mbox mbox-timeout-secs mbox-timeout-result))
+;; 	       (mbox-receive-time    (current-milliseconds)))
+;; 	  ;; (put-cmbox uconn cmbox) ;; reuse mbox and cookie. is it worth it?
+;; 	  (hash-table-delete! (udat-mboxes uconn) qrykey)
+;; 	  (if (eq? res 'MBOX_TIMEOUT)
+;; 	      (begin
+;; 		(print "WARNING: mbox timed out for query "cmd", with data "data
+;; 		       ", waiting for response from "host-port".")
+;; 
+;; 		;; here it might make sense to clean up connection records and force clean start?
+;; 		;; NO. The progam using ulex needs to do the reset. Right thing here is exception
+;; 		
+;; 		#f)  ;; convert to raising exception?
+;; 	      res))
+;; 	(begin
+;; 	  (print "ERROR: Communication failed? Got "sres)
+;; 	  #f))))
+;;   
+;; ;; send a request to the given host-port and register a mailbox in udata
+;; ;; wait for the mailbox data and return it
+;; ;;
+;; (define (send-receive uconn host-port cmd data)
+;;   (let* ((start-time (current-milliseconds))
+;; 	 (result     (cond
+;; 		      ((member cmd '(ping goodbye)) ;; these are immediate
+;; 		       (send uconn host-port 'ping cmd data))
+;; 		      ((eq? (work-method) 'direct)
+;; 		       ;; the result from send will be the actual result, not an 'ack
+;; 		       (send uconn host-port 'direct cmd data))
+;; 		      (else
+;; 		       (case (return-method)
+;; 			 ((polling)
+;; 			  (send-via-polling uconn host-port cmd data))
+;; 			 ((mailbox) 
+;; 			  (send-via-mailbox uconn host-port cmd data))
+;; 			 (else
+;; 			  (print "ULEX ERROR: unrecognised return-method "(return-method)".")
+;; 			  #f)))))
+;; 	 (duration    (- (current-milliseconds) start-time)))
+;;     ;; this is ONLY for development and debugging. It will be removed once Ulex is stable.
+;;     (if (< 5000 duration)
+;; 	(print "ULEX WARNING: round-trip took "(inexact->exact (round (/ duration 1000)))
+;; 	       " seconds; "cmd", host-port="host-port", data="data))
+;;     result))
+;;     
+;; 
+;; ;;======================================================================
+;; ;; responder side
+;; ;;======================================================================
+;; 
+;; ;; take a request, rdat, and if not immediate put it in the work queue
+;; ;;
+;; ;; Reserved cmds; ack ping goodbye response
+;; ;;
+;; (define (ulex-handler uconn rdat)
+;;   (assert (list? rdat) "FATAL: ulex-handler give rdat as not list")
+;;   (match rdat ;;  (string-split controldat)
+;;     ((rem-host-port qrykey cmd params);; timedata)
+;;      ;; (print "ulex-handler got: "rem-host-port" qrykey: "qrykey" cmd: "cmd" params: "params)
+;;      (case cmd
+;;        ;; ((ack )(print "Got ack! But why? Should NOT get here.") 'ack)
+;;        ((ping)
+;; 	;; (print "Got Ping!")
+;; 	;; (add-to-work-queue uconn rdat)
+;; 	'ack)
+;;        ((goodbye)
+;; 	;; just clear out references to the caller. NOT COMPLETE
+;; 	(add-to-work-queue uconn rdat)
+;; 	'ack)
+;;        ((response) ;; this is a result from remote processing, send it as mail ...
+;; 	(case (return-method)
+;; 	  ((polling)
+;; 	   (hash-table-set! (udat-mboxes uconn) qrykey (cons 'ok params))
+;; 	   'ack)
+;; 	  ((mailbox)
+;; 	   (let ((mbox (hash-table-ref/default (udat-mboxes uconn) qrykey #f)))
+;; 	     (if mbox
+;; 		 (begin
+;; 		   (mailbox-send! mbox params) ;; params here is our result
+;; 		   'ack)
+;; 		 (begin
+;; 		   (print "ERROR: received result but no associated mbox for cookie "qrykey)
+;; 		   'no-mbox-found))))
+;; 	  (else (print "ULEX ERROR: unrecognised return-method "(return-method))
+;; 		'bad-return-method)))
+;;        (else ;; generic request - hand it to the work queue
+;; 	(add-to-work-queue uconn rdat)
+;; 	'ack)))
+;;     (else
+;;      (print "ULEX ERROR: bad rdat "rdat)
+;;      'bad-rdat)))
+;; 
+;; ;; given an already set up uconn start the cmd-loop
+;; ;;
+;; (define (ulex-cmd-loop uconn)
+;;   (let* ((serv-listener (udat-socket uconn))
+;; 	 (listener      (lambda ()
+;; 			  (let loop ((state 'start))
+;; 			    (let-values (((inp oup)(tcp-accept serv-listener)))
+;; 			      ;; (mutex-lock! *send-mutex*) ;; DOESN'T SEEM TO HELP
+;; 			      (let* ((rdat  (deserialize inp)) ;; '(my-host-port qrykey cmd params)
+;; 				     (resp  (ulex-handler uconn rdat)))
+;; 				(serialize resp oup)
+;; 				(close-input-port inp)
+;; 				(close-output-port oup)
+;; 				;; (mutex-unlock! *send-mutex*) ;; DOESN'T SEEM TO HELP
+;; 				)
+;; 			      (loop state))))))
+;;     ;; start N of them
+;;     (let loop ((thnum   0)
+;; 	       (threads '()))
+;;       (if (< thnum 100)
+;; 	  (let* ((th (make-thread listener (conc "listener" thnum))))
+;; 	    (thread-start! th)
+;; 	    (loop (+ thnum 1)
+;; 		  (cons th threads)))
+;; 	  (map thread-join! threads)))))
+;; 
+;; ;; add a proc to the cmd list, these are done symetrically (i.e. in all instances)
+;; ;; so that the proc can be dereferenced remotely
+;; ;;
+;; (define (set-work-handler uconn proc)
+;;   (udat-work-proc-set! uconn proc))
+;; 
+;; ;;======================================================================
+;; ;; work queues - this is all happening on the listener side
+;; ;;======================================================================
+;; 
+;; ;; rdat is (rem-host-port qrykey cmd params)
+;; 					     
+;; (define (add-to-work-queue uconn rdat)
+;;   #;(queue-add! (udat-work-queue uconn) rdat)
+;;   (case (work-method)
+;;     ((threads)
+;;      (thread-start! (make-thread (lambda ()
+;; 				   (do-work uconn rdat))
+;; 				 "worker thread")))
+;;     ((mailbox)
+;;      (mailbox-send! (udat-work-queue uconn) rdat))
+;;     ((direct)
+;;      (do-work uconn rdat))
+;;     (else
+;;      (print "ULEX ERROR: work-method "(work-method)" not recognised, using mailbox.")
+;;      (mailbox-send! (udat-work-queue uconn) rdat))))
+;;      
+;; ;; move the logic to return the result somewhere else?
+;; ;;
+;; (define (do-work uconn rdat)
+;;   (let* ((proc (udat-work-proc uconn))) ;; get it each time - conceivebly it could change
+;;     ;; put this following into a do-work procedure
+;;     (match rdat
+;;       ((rem-host-port qrykey cmd params)
+;;        (let* ((start-time (current-milliseconds))
+;; 	      (result (proc rem-host-port qrykey cmd params))
+;; 	      (end-time (current-milliseconds))
+;; 	      (run-time (- end-time start-time)))
+;; 	 (case (work-method)
+;; 	   ((direct) result)
+;; 	   (else
+;; 	    (print "ULEX: work "cmd", "params" done in "run-time" ms")
+;; 	    ;; send 'response as cmd and result as params
+;; 	    (send uconn rem-host-port qrykey 'response result) ;; could check for ack
+;; 	    (print "ULEX: response sent back to "rem-host-port" in "(- (current-milliseconds) end-time))))))
+;;       (MBOX_TIMEOUT 'do-work-timeout)
+;;       (else
+;;        (print "ERROR: rdat "rdat", did not match rem-host-port qrykey cmd params")))))
+;; 
+;; ;; NEW APPROACH:
+;; ;;   
+;; (define (process-work-queue uconn) 
+;;   (let ((wqueue (udat-work-queue uconn))
+;; 	(proc   (udat-work-proc  uconn))
+;; 	(numthr (udat-numthreads uconn)))
+;;     (let loop ((thnum    1)
+;; 	       (threads '()))
+;;       (let ((thlst (cons (make-thread (lambda ()
+;; 					(let work-loop ()
+;; 					  (let ((rdat (mailbox-receive! wqueue 24000 'MBOX_TIMEOUT)))
+;; 					    (do-work uconn rdat))
+;; 					  (work-loop)))
+;; 				      (conc "work thread " thnum))
+;; 			 threads)))
+;; 	(if (< thnum numthr)
+;; 	    (loop (+ thnum 1)
+;; 		  thlst)
+;; 	    (begin
+;; 	      (print "ULEX: Starting "(length thlst)" worker threads.")
+;; 	      (map thread-start! thlst)
+;; 	      (print "ULEX: Threads started. Joining all.")
+;; 	      (map thread-join! thlst)))))))
+;; 
+;; ;; below was to enable re-use of connections. This seems non-trivial so for
+;; ;; now lets open on each call
+;; ;;
+;; ;; ;; given host-port get or create peer struct
+;; ;; ;;
+;; ;; (define (udat-get-peer uconn host-port)
+;; ;;   (or (hash-table-ref/default (udat-peers uconn) host-port #f)
+;; ;;       ;; no peer, so create pdat and init it
+;; ;;       
+;; ;;       ;; NEED stack of connections, pop and use; inp, oup,
+;; ;;       ;; creation_time (remove and create new if over 24hrs old
+;; ;;       ;; 
+;; ;;       (let ((pdat (make-pdat host-port: host-port)))
+;; ;; 	(hash-table-set! (udat-peers uconn) host-port pdat)
+;; ;; 	pdat)))
+;; ;; 
+;; ;; ;; is pcon alive
+;; ;; 
+;; ;; ;; given host-port and pdat get a pcon
+;; ;; ;;
+;; ;; (define (pdat-get-pcon pdat host-port)
+;; ;;   (let loop ((conns (pdat-conns pdat)))
+;; ;;     (if (null? conns) ;; none? make and return - do NOT add - it will be pushed back on list later
+;; ;; 	(init-pcon (make-pcon))
+;; ;; 	(let* ((conn (pop conns)))
+;; ;; 	  
+;; ;; ;; given host-port get a pcon struct
+;; ;; ;;
+;; ;; (define (udat-get-pcon 
+;;       
+;; ;;======================================================================
+;; ;; misc utils
+;; ;;======================================================================
+;; 
+;; (define (make-cookie uconn)
+;;   (let ((newcnum (+ (udat-cnum uconn) 1)))
+;;     (udat-cnum-set! uconn newcnum)
+;;     (conc (udat-host-port uconn) ":"
+;; 	  newcnum)))
+;; 
+;; ;; cookie/mboxes
+;; 
+;; ;; we store each mbox with a cookie (<cookie> . <mbox>)
+;; ;;
+;; (define (get-cmbox uconn)
+;;   (if (null? (udat-avail-cmboxes uconn))
+;;       (let ((cookie (make-cookie uconn))
+;; 	    (mbox   (make-mailbox)))
+;; 	(hash-table-set! (udat-mboxes uconn) cookie mbox)
+;; 	`(,cookie . ,mbox))
+;;       (let ((cmbox (car (udat-avail-cmboxes uconn))))
+;; 	(udat-avail-cmboxes-set! uconn (cdr (udat-avail-cmboxes uconn)))
+;; 	cmbox)))
+;; 
+;; (define (put-cmbox uconn cmbox)
+;;   (udat-avail-cmboxes-set! uconn (cons cmbox (udat-avail-cmboxes uconn))))
+;; 
+;; (define (pp-uconn uconn)
+;;   (pp (udat->alist uconn)))
+;; 
+;;   
+;; ;;======================================================================
+;; ;; network utilities
+;; ;;======================================================================
+;; 
+;; ;; NOTE: Look at address-info egg as alternative to some of this
+;; 
+;; (define (rate-ip ipaddr)
+;;   (regex-case ipaddr
+;;     ( "^127\\..*" _ 0 )
+;;     ( "^(10\\.0|192\\.168)\\..*" _ 1 )
+;;     ( else 2 ) ))
+;; 
+;; ;; Change this to bias for addresses with a reasonable broadcast value?
+;; ;;
+;; (define (ip-pref-less? a b)
+;;   (> (rate-ip a) (rate-ip b)))
+;; 
+;; (define (get-my-best-address)
+;;   (let ((all-my-addresses (get-all-ips)))
+;;     (cond
+;;      ((null? all-my-addresses)
+;;       (get-host-name))                                          ;; no interfaces?
+;;      ((eq? (length all-my-addresses) 1)
+;;       (car all-my-addresses))                      ;; only one to choose from, just go with it
+;;      (else
+;;       (car (sort all-my-addresses ip-pref-less?))))))
+;; 
+;; (define (get-all-ips-sorted)
+;;   (sort (get-all-ips) ip-pref-less?))
+;; 
+;; (define (get-all-ips)
+;;   (map address-info-host
+;;        (filter (lambda (x)
+;; 		 (equal? (address-info-type x) "tcp"))
+;; 	       (address-infos (get-host-name)))))
+;; 
+)

ADDED   ulex-simple/dbmgr.scm
Index: ulex-simple/dbmgr.scm
==================================================================
--- /dev/null
+++ ulex-simple/dbmgr.scm
@@ -0,0 +1,1003 @@
+;;======================================================================
+;; Copyright 2022, Matthew Welland.
+;; 
+;; This file is part of Megatest.
+;; 
+;;     Megatest is free software: you can redistribute it and/or modify
+;;     it under the terms of the GNU General Public License as published by
+;;     the Free Software Foundation, either version 3 of the License, or
+;;     (at your option) any later version.
+;; 
+;;     Megatest is distributed in the hope that it will be useful,
+;;     but WITHOUT ANY WARRANTY; without even the implied warranty of
+;;     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;;     GNU General Public License for more details.
+;; 
+;;     You should have received a copy of the GNU General Public License
+;;     along with Megatest.  If not, see <http://www.gnu.org/licenses/>.
+
+;;======================================================================
+
+(declare (unit dbmgrmod))
+
+(declare (uses ulex))
+(declare (uses apimod))
+(declare (uses pkts))
+(declare (uses commonmod))
+(declare (uses dbmod))
+(declare (uses mtargs))
+(declare (uses portloggermod))
+(declare (uses debugprint))
+
+(module dbmgrmod
+    *
+
+(import scheme
+	chicken.base
+	chicken.condition
+	chicken.file
+	chicken.format
+	chicken.port
+	chicken.process
+	chicken.process-context
+	chicken.process-context.posix
+	chicken.sort
+	chicken.string
+	chicken.time
+	
+	(prefix sqlite3 sqlite3:)
+	matchable
+	md5
+	message-digest
+	regex
+	s11n
+	srfi-1
+	srfi-18
+	srfi-69
+	system-information
+	typed-records
+	
+	pkts
+	ulex
+
+	commonmod
+	apimod
+	dbmod
+	debugprint
+	(prefix mtargs args:)
+	portloggermod
+	)
+
+;; Configurations for server
+;; (tcp-buffer-size 2048)
+;; (max-connections 2048) 
+
+;; info about me as a listener and my connections to db servers
+;; stored (for now) in *db-serv-info*
+;;
+(defstruct servdat
+  (host #f)
+  (port #f)
+  (uuid #f)
+  (dbfile #f)
+  (uconn   #f) ;; this is the listener for this process
+  (mode    #f)
+  (status 'starting)
+  (trynum 0) ;; count the number of ports we've tried
+  (conns  (make-hash-table)) ;; apath/dbname => conndat
+  ) 
+
+(define *db-serv-info* #f)
+
+(define (servdat->url sdat)
+  (conc (servdat-host sdat)":"(servdat-port sdat)))
+
+;; db servers contact info
+;;
+(defstruct conndat
+  (apath    #f)
+  (dbname   #f)
+  (fullname #f)
+  (hostport #f)
+  (ipaddr   #f)
+  (port     #f)
+  (srvpkt   #f)
+  (srvkey   #f)
+  (lastmsg  0)
+  (expires  0))
+
+(define *srvpktspec*
+  `((server (host    . h)
+	    (port    . p)
+	    (servkey . k)
+	    (pid     . i)
+	    (ipaddr  . a)
+	    (dbpath  . d))))
+
+;;======================================================================
+;;  S U P P O R T   F U N C T I O N S
+;;======================================================================
+
+;; set up the api proc, seems like there should be a better place for this?
+;;
+;; IS THIS NEEDED ANYMORE? TODO - REMOVE IF POSSIBLE
+;;
+;; (define api-proc (make-parameter conc))
+;; (api-proc api:execute-requests)
+
+;; do we have a connection to apath dbname and
+;; is it not expired? then return it
+;;
+;; else setup a connection
+;;
+;; if that fails, return '(#f "some reason") ;; NB// convert to raising an exception
+;;
+(define (rmt:get-conn remdat apath dbname)
+  (let* ((fullname (db:dbname->path apath dbname)))
+    (hash-table-ref/default (servdat-conns remdat) fullname #f)))
+
+(define (rmt:drop-conn remdat apath dbname)
+  (let* ((fullname (db:dbname->path apath dbname)))
+    (hash-table-delete! (servdat-conns remdat) fullname)))
+  
+(define (rmt:find-main-server uconn apath dbname)
+  (let* ((pktsdir     (get-pkts-dir apath))
+	 (all-srvpkts (get-all-server-pkts pktsdir *srvpktspec*))
+	 (viable-srvs (get-viable-servers all-srvpkts dbname)))
+    (get-the-server uconn apath viable-srvs)))
+
+
+(define *connstart-mutex* (make-mutex))
+(define *last-main-start* 0)
+
+;; looks for a connection to main, returns if have and not exired
+;; creates new otherwise
+;; 
+;; connections for other servers happens by requesting from main
+;;
+;; TODO: This is unnecessarily re-creating the record in the hash table
+;;
+(define (rmt:open-main-connection remdat apath)
+  (let* ((fullpath (db:dbname->path apath ".db/main.db"))
+	 (conns    (servdat-conns remdat))
+	 (conn     (rmt:get-conn remdat apath ".db/main.db")) ;; (hash-table-ref/default conns fullpath #f)) ;; TODO - create call for this
+	 (myconn   (servdat-uconn remdat)))
+    (cond
+     ((not myconn)
+      (servdat-uconn-set! remdat (make-udat))
+      (rmt:open-main-connection remdat apath))
+     ((and conn                                             ;; conn is NOT a socket, just saying ...
+	   (< (current-seconds) (conndat-expires conn)))
+      #t) ;; we are current and good to go - we'll deal elsewhere with a server that was killed or died 
+     ((and conn
+	   (>= (current-seconds)(conndat-expires conn)))
+      (debug:print-info 0 *default-log-port* "connection to "fullpath" server expired. Reconnecting.")
+      (rmt:drop-conn remdat apath ".db/main.db") ;;
+      (rmt:open-main-connection remdat apath))
+     (else
+      ;; Below we will find or create and connect to main
+      (debug:print-info 0 *default-log-port* "rmt:open-main-connection - starting from scratch")
+      (let* ((dbname         (db:run-id->dbname #f))
+	     (the-srv        (rmt:find-main-server myconn apath dbname))
+	     (start-main-srv (lambda () ;; call IF there is no the-srv found
+			       (mutex-lock! *connstart-mutex*)
+			       (if (> (- (current-seconds) *last-main-start*) 5) ;; at least four seconds since last attempt to start main server
+				   (begin
+				     (api:run-server-process apath dbname)
+				     (set! *last-main-start* (current-seconds))
+				     (thread-sleep! 1))
+				   (thread-sleep! 0.25))
+			       (mutex-unlock! *connstart-mutex*)
+			       (rmt:open-main-connection remdat apath) ;; TODO: Add limit to number of tries
+			       )))
+	(if (not the-srv) ;; have server, try connecting to it
+	    (start-main-srv)
+	    (let* ((srv-addr (server-address the-srv)) ;; need serv
+		   (ipaddr   (alist-ref 'ipaddr  the-srv))
+		   (port     (alist-ref 'port    the-srv))
+		   (srvkey   (alist-ref 'servkey the-srv))
+		   (fullpath (db:dbname->path apath dbname))
+		   
+		   (new-the-srv (make-conndat
+				 apath:   apath
+				 dbname:  dbname
+				 fullname: fullpath
+				 hostport: srv-addr
+				 ;; socket: (open-nn-connection srv-addr)  - TODO - open ulex connection?
+				 ipaddr: ipaddr
+				 port: port
+				 srvpkt: the-srv
+				 srvkey: srvkey ;; generated by rmt:get-signature on the server side
+				 lastmsg: (current-seconds)
+				 expires: (+ (current-seconds)
+					     (server:expiration-timeout)
+					     -2) ;; this needs to be gathered during the ping
+				 )))
+	      (hash-table-set! conns fullpath new-the-srv)))
+	#t)))))
+
+;; NB// sinfo is a servdat struct
+;;
+(define (rmt:general-open-connection sinfo apath dbname #!key (num-tries 5))
+  (assert (not (equal? dbname ".db/main.db")) "ERROR: general-open-connection should never be called with main as the db")
+  (let* ((mdbname  ".db/main.db") ;; (db:run-id->dbname #f)) TODO: put this back to the lookup when stable
+	 (fullname (db:dbname->path apath dbname))
+	 (conns    (servdat-conns sinfo))
+	 (mconn    (rmt:get-conn sinfo apath ".db/main.db"))
+	 (dconn    (rmt:get-conn sinfo apath dbname)))
+    #;(if (and mconn
+	     (not (debug:print-logger)))
+	(begin
+	  (debug:print-info 0 *default-log-port* "Turning on logging to main, look in logs dir for main log.")
+	  (debug:print-logger rmt:log-to-main)))
+    (cond
+     ((and mconn
+	   dconn
+	   (< (current-seconds)(conndat-expires dconn)))
+      #t) ;; good to go
+     ((not mconn) ;; no channel open to main? open it...
+      (rmt:open-main-connection sinfo apath)
+      (rmt:general-open-connection sinfo apath dbname num-tries: (- num-tries 1)))
+     ((not dconn)                 ;; no channel open to dbname?     
+      (let* ((res (rmt:send-receive-real sinfo apath mdbname 'get-server `(,apath ,dbname))))
+	(case res
+	  ((server-started)
+	   (if (> num-tries 0)
+	       (begin
+		 (thread-sleep! 2)
+		 (rmt:general-open-connection sinfo apath dbname num-tries: (- num-tries 1)))
+	       (begin
+		 (debug:print-error 0 *default-log-port* "Failed to start servers needed or open channel to "apath", "dbname)
+		 (exit 1))))
+	  (else
+	   (if (list? res) ;; server has been registered and the info was returned. pass it on.
+	       (begin ;;  ("192.168.0.9" 53817
+		      ;;  "5e34239f48e8973b3813221e54701a01" "24310"
+		      ;;  "192.168.0.9"
+		      ;;  "/home/matt/data/megatest/tests/simplerun"
+		 ;;  ".db/1.db")
+		 (match
+		  res
+		  ((host port servkey pid ipaddr apath dbname)
+		   (debug:print-info 0 *default-log-port* "got "res)
+		   (hash-table-set! conns
+				    fullname
+				    (make-conndat
+				     apath: apath
+				     dbname: dbname
+				     hostport: (conc host":"port)
+				     ;; socket: (open-nn-connection (conc host":"port)) ;; TODO - open ulex connection?
+				     ipaddr: ipaddr
+				     port: port
+				     srvkey: servkey
+				     lastmsg: (current-seconds)
+				     expires: (+ (current-seconds)
+						 (server:expiration-timeout)
+						 -2))))
+		  (else
+		   (debug:print-info 0 *default-log-port* "return data from starting server did not match host port servkey pid ipaddr apath dbname " res)))
+		 res)
+	       (begin
+		 (debug:print-info 0 *default-log-port* "Unexpected result: " res)
+		 res)))))))
+    #t))
+
+;;======================================================================
+
+;; FOR DEBUGGING SET TO #t
+;; (define *localmode* #t)
+(define *localmode* #f)
+(define *dbstruct* (make-dbr:dbstruct))
+
+;; Defaults to current area
+;;
+(define (rmt:send-receive-attempted-consolidation cmd rid params #!key (attemptnum 1)(area-dat #f))
+  (let* ((apath      *toppath*)
+	 (sinfo      *db-serv-info*)
+	 (dbname     (db:run-id->dbname rid)))
+    (if (not *db-serv-info*)
+	(begin
+	  (set! *db-serv-info* (make-servdat))
+	  (set! sinfo *db-serv-info*)))
+    (rmt:open-main-connection sinfo apath)
+    (if rid (rmt:general-open-connection sinfo apath dbname))
+    ;; (if (not (member cmd '(log-to-main)))
+    ;;     (debug:print-info 0 *default-log-port* "rmt:send-receive "cmd" params="params))
+    (let* ((cdat (rmt:get-conn sinfo apath dbname)))
+      (assert cdat "FATAL: rmt:send-receive-real called without the needed channels opened")
+      (let* ((uconn    (servdat-uconn sinfo)) ;; get the interface to ulex
+	     ;; then send-receive using the ulex layer to host-port stored in cdat
+	     (res      (send-receive uconn (conndat-hostport cdat) cmd params)))
+	(conndat-expires-set! cdat (+ (current-seconds)
+				      (server:expiration-timeout)
+				      -2)) ;; two second margin for network time misalignments etc.
+	res))))
+
+; Defaults to current area
+;;
+(define (rmt:send-receive cmd rid params #!key (attemptnum 1)(area-dat #f))
+  (let* ((apath      *toppath*)
+         (sinfo      *db-serv-info*)
+         (dbname     (db:run-id->dbname rid)))
+    (if (not *db-serv-info*) ;; confirm this is really needed
+	(begin
+	  (set! *db-serv-info* (make-servdat))
+	  (set! sinfo *db-serv-info*)))
+    (rmt:open-main-connection sinfo apath)
+    (if rid (rmt:general-open-connection sinfo apath dbname))
+    #;(if (not (member cmd '(log-to-main)))
+    (debug:print-info 0 *default-log-port* "rmt:send-receive "cmd" params="params))
+    (rmt:send-receive-real sinfo apath dbname cmd params)))
+
+;; db is at apath/.db/dbname, rid is an intermediary solution and will be removed
+;; sometime in the future
+;;
+(define (rmt:send-receive-real sinfo apath dbname cmd params)
+  (let* ((cdat (rmt:get-conn sinfo apath dbname)))
+    (assert cdat "FATAL: rmt:send-receive-real called without the needed channels opened")
+    (let* ((uconn    (servdat-uconn sinfo)) ;; get the interface to ulex
+           ;; then send-receive using the ulex layer to host-port stored in cdat
+           (res      (send-receive uconn (conndat-hostport cdat) cmd params)))
+      ;; since we accessed the server we can bump the expires time up
+      (conndat-expires-set! cdat (+ (current-seconds)
+                                    (server:expiration-timeout)
+                                    -2)) ;; two second margin for network time misalignments etc.
+      res)))
+
+;
+;; db is at apath/.db/dbname, rid is an intermediary solution and will be removed
+;; sometime in the future.
+;;
+;; Purpose - call the main.db server and request a server be started
+;; for the given area path and dbname
+;;
+
+(define (rmt:print-db-stats)
+  (let ((fmtstr "~40a~7-d~9-d~20,2-f")) ;; "~20,2-f"
+    (debug:print 18 *default-log-port* "DB Stats, "(seconds->year-week/day-time (current-seconds))"\n=====================")
+    (debug:print 18 *default-log-port* (format #f "~40a~8a~10a~10a" "Cmd" "Count" "TotTime" "Avg"))
+    (for-each (lambda (cmd)
+		(let ((cmd-dat (hash-table-ref *db-stats* cmd)))
+		  (debug:print 18 *default-log-port* (format #f fmtstr cmd (vector-ref cmd-dat 0) (vector-ref cmd-dat 1) (/ (vector-ref cmd-dat 1)(vector-ref cmd-dat 0))))))
+	      (sort (hash-table-keys *db-stats*)
+		    (lambda (a b)
+		      (> (vector-ref (hash-table-ref *db-stats* a) 0)
+			 (vector-ref (hash-table-ref *db-stats* b) 0)))))))
+
+(define (rmt:get-max-query-average run-id)
+  (mutex-lock! *db-stats-mutex*)
+  (let* ((runkey (conc "run-id=" run-id " "))
+	 (cmds   (filter (lambda (x)
+			   (substring-index runkey x))
+			 (hash-table-keys *db-stats*)))
+	 (res    (if (null? cmds)
+		     (cons 'none 0)
+		     (let loop ((cmd (car cmds))
+				(tal (cdr cmds))
+				(max-cmd (car cmds))
+				(res 0))
+		       (let* ((cmd-dat (hash-table-ref *db-stats* cmd))
+			      (tot     (vector-ref cmd-dat 0))
+			      (curravg (/ (vector-ref cmd-dat 1) (vector-ref cmd-dat 0))) ;; count is never zero by construction
+			      (currmax (max res curravg))
+			      (newmax-cmd (if (> curravg res) cmd max-cmd)))
+			 (if (null? tal)
+			     (if (> tot 10)
+				 (cons newmax-cmd currmax)
+				 (cons 'none 0))
+			     (loop (car tal)(cdr tal) newmax-cmd currmax)))))))
+    (mutex-unlock! *db-stats-mutex*)
+    res))
+
+;; host and port are used to ensure we are remove proper records
+(define (rmt:server-shutdown host port)
+  (let ((dbfile   (servdat-dbfile *db-serv-info*)))
+    (debug:print-info 0 *default-log-port* "dbfile is "dbfile)
+    (if dbfile
+	(let* ((am-server  (args:get-arg "-server"))
+	       (dbfile     (args:get-arg "-db"))
+	       (apath      *toppath*)
+	       #;(sinfo     *remotedat*)) ;; foundation for future fix
+	  (if *dbstruct-db*
+	      (let* ((dbdat      (db:get-dbdat *dbstruct-db* apath dbfile))
+		     (db         (dbr:dbdat-db dbdat))
+		     (inmem      (dbr:dbdat-db dbdat))   ;; WRONG
+		     )
+		;; do a final sync here
+		(debug:print-info 0 *default-log-port* "Doing final sync for "apath" "dbfile" at "(current-seconds))
+		(db:sync-inmem->disk *dbstruct-db* apath dbfile force-sync: #t)
+		;; let's finalize here
+		(debug:print-info 0 *default-log-port* "Finalizing db and inmem")
+		(if (sqlite3:database? db)
+		    (sqlite3:finalize! db)
+		    (debug:print-info 0 *default-log-port* "in rmt:server-shutdown, db is not a database, not finalizing..."))
+		(if (sqlite3:database? inmem)
+		    (sqlite3:finalize! inmem)
+		    (debug:print-info 0 *default-log-port* "in rmt:server-shutdown, inmem is not a database, not finalizing..."))
+		(debug:print-info 0 *default-log-port* "Finalizing db and inmem complete"))
+	      (debug:print-info 0 *default-log-port* "Db was never opened, no cleanup to do."))
+	  (if (not am-server)
+	      (debug:print-info 0 *default-log-port* "I am not a server, should NOT get here!")
+	      (if (string-match ".*/main.db$" dbfile)
+		  (let ((pkt-file (conc (get-pkts-dir *toppath*)
+					"/" (servdat-uuid *db-serv-info*)
+					".pkt")))
+		    (debug:print-info 0 *default-log-port* "removing pkt "pkt-file)
+		    (delete-file* pkt-file)
+		    (debug:print-info 0 *default-log-port* "Releasing lock (if any) for "dbfile ", host "host", port "port)
+		    (db:with-lock-db
+		     (servdat-dbfile *db-serv-info*)
+		     (lambda (dbh dbfile)
+		       (db:release-lock dbh dbfile host port)))) ;; I'm not the server - should not have a lock to remove
+		  (let* ((sdat *db-serv-info*) ;; we have a run-id server
+			 (host (servdat-host sdat))
+			 (port (servdat-port sdat))
+			 (uuid (servdat-uuid sdat))
+			 (res  (rmt:deregister-server *db-serv-info* *toppath* host port uuid dbfile)))
+		    (debug:print-info 0 *default-log-port* "deregistered-server, res="res)
+		    (debug:print-info 0 *default-log-port* "deregistering server "host":"port" with uuid "uuid)
+		    )))))))
+
+
+(define (common:run-sync?)
+    ;; (and (common:on-homehost?)
+  (args:get-arg "-server"))
+
+(define *rmt:run-mutex* (make-mutex))
+(define *rmt:run-flag* #f)
+
+(define (listener-running?)
+  (and *db-serv-info*
+       (servdat-uconn *db-serv-info*)))
+
+;; Main entry point to start a server. was start-server
+(define (rmt:run hostn)
+  (mutex-lock! *rmt:run-mutex*)
+  (if *rmt:run-flag*
+      (begin
+	(debug:print-warn 0 *default-log-port* "rmt:run already running.")
+	(mutex-unlock! *rmt:run-mutex*))
+      (begin
+	(set! *rmt:run-flag* #t)
+	(mutex-unlock! *rmt:run-mutex*)
+	;;  ;; Configurations for server
+	;;  (tcp-buffer-size 2048)
+	;;  (max-connections 2048) 
+	(debug:print 2 *default-log-port* "PID: "(current-process-id)". Attempting to start the server ...")
+	(if (listener-running?)
+	    (let* ((uconn (servdat-uconn *db-serv-info*)))
+	      (wait-and-close uconn))
+	    (let* ((port            (portlogger:open-run-close portlogger:find-port))
+		   (handler-proc    (lambda (rem-host-port qrykey cmd params) ;;
+				      (set! *db-last-access* (current-seconds))
+				      (assert (list? params) "FATAL: handler called with non-list params")
+				      (assert (args:get-arg "-server") "FATAL: handler called on non-server side. cmd="cmd", params="params)
+				      (debug:print 0 *default-log-port* "handler call: "cmd", params="params)
+				      (api:execute-requests *dbstruct-db* cmd params))))
+	      ;; (api:process-request *dbstuct-db* 
+	      (if (not *db-serv-info*)
+		  (set! *db-serv-info* (make-servdat host: hostn port: port)))
+	      (let* ((uconn (run-listener handler-proc port))
+		     (rport (udat-port uconn))) ;; the real port
+		(servdat-host-set! *db-serv-info* hostn)
+		(servdat-port-set! *db-serv-info* rport)
+		(servdat-uconn-set! *db-serv-info* uconn)
+		(wait-and-close uconn)
+		(db:print-current-query-stats)
+		)))
+	(let* ((host (servdat-host *db-serv-info*))
+	       (port (servdat-port *db-serv-info*))
+	       (mode (or (servdat-mode *db-serv-info*)
+			 "non-db")))
+	  ;; server exit stuff here
+	  ;; (rmt:server-shutdown host port) - always do in on-exit
+	  ;; (portlogger:open-run-close portlogger:set-port port "released") ;; moved to on-exit 
+	  (debug:print-info 0 *default-log-port* "Server "host":"port" mode "mode"shutdown complete. Exiting")
+	  ))))
+
+;;======================================================================
+;; S E R V E R   U T I L I T I E S 
+;;======================================================================
+
+
+;;======================================================================
+;; NEW SERVER METHOD
+;;======================================================================
+
+;; only use for main.db - need to re-write some of this :(
+;;
+(define (get-lock-db sdat dbfile host port)
+  (assert host "FATAL: get-lock-db called with host not set.")
+  (assert port "FATAL: get-lock-db called with port not set.")
+  (let* ((dbh (db:open-run-db dbfile db:initialize-db)) ;; open-run-db creates a standard db with schema used by all situations
+	 (res (db:get-iam-server-lock dbh dbfile host port))
+	 (uconn (servdat-uconn sdat)))
+    ;; res => list then already locked, check server is responsive
+    ;;     => #t then sucessfully got the lock
+    ;;     => #f reserved for future use as to indicate something went wrong
+    (match res
+      ((owner_pid owner_host owner_port event_time)
+       (if (server-ready? uconn (conc owner_host":"owner_port) "abc")
+	   #f      ;; locked by someone else
+	   (begin  ;; locked by someone dead and gone
+	     (debug:print 0 *default-log-port* "WARNING: stale lock - have to steal it. This may fail.")
+	     (db:steal-lock-db dbh dbfile port))))
+      (#t  #t) ;; placeholder so that we don't touch res if it is #t
+      (else (set! res #f)))
+    (sqlite3:finalize! dbh)
+    res))
+
+
+(define (register-server pkts-dir pkt-spec host port servkey ipaddr dbpath)
+  (let* ((pkt-dat `((host    . ,host)
+		    (port    . ,port)
+		    (servkey . ,servkey)
+		    (pid     . ,(current-process-id))
+		    (ipaddr  . ,ipaddr)
+		    (dbpath  . ,dbpath)))
+	 (uuid    (write-alist->pkt
+		   pkts-dir
+		   pkt-dat
+		   pktspec: pkt-spec
+		   ptype: 'server)))
+    (debug:print 0 *default-log-port* "Server on "host":"port" registered in pkt "uuid)
+    uuid))
+
+(define (get-pkts-dir #!optional (apath #f))
+  (let* ((effective-toppath (or *toppath* apath)))
+    (assert effective-toppath
+	    "ERROR: get-pkts-dir called without *toppath* set. Exiting.")
+    (let* ((pdir (conc effective-toppath "/.meta/srvpkts")))
+      (if (file-exists? pdir)
+	  pdir
+	  (begin
+	    (handle-exceptions ;; this exception handler should NOT be needed but ...
+		exn
+		pdir
+	      (create-directory pdir #t))
+	    pdir)))))
+
+;; given a pkts dir read 
+;;
+(define (get-all-server-pkts pktsdir-in pktspec)
+  (let* ((pktsdir  (if (file-exists? pktsdir-in)
+		       pktsdir-in
+		       (begin
+			 (create-directory pktsdir-in #t)
+			 pktsdir-in)))
+	 (all-pkt-files (glob (conc pktsdir "/*.pkt"))))
+    (map (lambda (pkt-file)
+	   (read-pkt->alist pkt-file pktspec: pktspec))
+	 all-pkt-files)))
+
+(define (server-address srv-pkt)
+  (conc (alist-ref 'host srv-pkt) ":"
+	(alist-ref 'port srv-pkt)))
+	
+(define (server-ready? uconn host-port key) ;; server-address is host:port
+  (let* ((params `((cmd . ping)(key . ,key)))
+	 (data `((cmd . ping)
+		 (key . ,key)
+		 (params . ,params))) ;; I don't get it.
+	 (res  (send-receive uconn host-port 'ping data)))
+    (if (eq? res 'ping-ack) ;; yep, likely it is who we want on the other end
+	res
+	#f)))
+;; (begin (debug:print-info 0 *default-log-port* "server-ready? => "res) #f))))
+
+; from the pkts return servers associated with dbpath
+;; NOTE: Only one can be alive - have to check on each
+;;       in the list of pkts returned
+;;
+(define (get-viable-servers serv-pkts dbpath)
+  (let loop ((tail serv-pkts)
+	     (res  '()))
+    (if (null? tail)
+	res ;; NOTE: sort by age so oldest is considered first
+	(let* ((spkt (car tail)))
+	  (loop (cdr tail)
+		(if (equal? dbpath (alist-ref 'dbpath spkt))
+		    (cons spkt res)
+		    res))))))
+
+(define (remove-pkts-if-not-alive uconn serv-pkts)
+  (filter (lambda (pkt)
+	    (let* ((host (alist-ref 'host pkt))
+		   (port (alist-ref 'port pkt))
+		   (host-port (conc host":"port))
+		   (key  (alist-ref 'servkey  pkt))
+		   (pktz (alist-ref 'Z        pkt))
+		   (res  (server-ready? uconn host-port key)))
+	      (if res
+		  res
+		  (let* ((pktsdir (get-pkts-dir *toppath*))
+			 (pktpath (conc pktsdir"/"pktz".pkt")))
+		    (debug:print 0 *default-log-port* "WARNING: pkt with no server "pktpath)
+		    (delete-file* pktpath)
+		    #f))))
+	  serv-pkts))
+
+;; from viable servers get one that is alive and ready
+;;
+(define (get-the-server uconn apath serv-pkts)
+  (let loop ((tail serv-pkts))
+    (if (null? tail)
+	#f
+	(let* ((spkt  (car tail))
+	       (host  (alist-ref 'ipaddr spkt))
+	       (port  (alist-ref 'port spkt))
+	       (host-port (conc host":"port))
+	       (dbpth (alist-ref 'dbpath spkt))
+	       (srvkey (alist-ref 'Z spkt)) ;; (alist-ref 'srvkey spkt))
+	       (addr  (server-address spkt)))
+	  (if (server-ready? uconn host-port srvkey)
+	      spkt
+	      (loop (cdr tail)))))))
+
+;; am I the "first" in line server? I.e. my D card is smallest
+;; use Z card as tie breaker
+;;
+(define (get-best-candidate serv-pkts dbpath)
+  (if (null? serv-pkts)
+      #f
+      (let loop ((tail serv-pkts)
+		 (best  (car serv-pkts)))
+	(if (null? tail)
+	    best
+	    (let* ((candidate (car tail))
+		   (candidate-bd (string->number (alist-ref 'D candidate)))
+		   (best-bd      (string->number (alist-ref 'D best)))
+		   ;; bigger number is younger
+		   (candidate-z  (alist-ref 'Z candidate))
+		   (best-z       (alist-ref 'Z best))
+		   (new-best     (cond
+				  ((> best-bd candidate-bd) ;; best is younger than candidate
+				   candidate)
+				  ((< best-bd candidate-bd) ;; candidate is younger than best
+				   best)
+				  (else
+				   (if (string>=? best-z candidate-z)
+				       best
+				       candidate))))) ;; use Z card as tie breaker
+	      (if (null? tail)
+		  new-best
+		  (loop (cdr tail) new-best)))))))
+	  
+;;======================================================================
+;; END NEW SERVER METHOD
+;;======================================================================
+
+;; sdat must be defined and the host and port set and the same as previous
+;;
+(define (host-port-is-stable? sdat old-host old-port)
+  (and sdat
+       (let ((new-host (servdat-host sdat))
+	     (new-port (servdat-port sdat)))
+	 (and new-host
+	      new-port
+	      (equal? new-host old-host)
+	      (equal? new-port old-port)))))
+
+;; if .db/main.db check the pkts
+;; 
+(define (rmt:wait-for-server pkts-dir db-file server-key)
+  (let* ((sdat *db-serv-info*))
+    (let loop ((start-time (current-milliseconds))
+	       (changed    #t)
+	       (last-sdat  "not this")
+	       (last-host  #f)
+	       (last-port  #f))
+      (begin ;; let ((sdat #f))
+	(thread-sleep! 0.01)
+	(debug:print-info 0 *default-log-port* "Waiting for server alive signature")
+	(mutex-lock! *heartbeat-mutex*)
+	(set! sdat *db-serv-info*)
+	(mutex-unlock! *heartbeat-mutex*)
+	(if (and sdat
+		 (not changed)
+		 (>= (- (current-milliseconds) start-time) 100))
+	    (let* ((uconn (servdat-uconn sdat)))
+	      (servdat-status-set! sdat 'iface-stable)
+	      (debug:print-info 0 *default-log-port* "Received server alive signature, now attempting to lock in server")
+	      ;; create a server pkt in *toppath*/.meta/srvpkts
+	      
+	      ;; TODO:
+	      ;;   1. change sdat to stuct
+	      ;;   2. add uuid to struct
+	      ;;   3. update uuid in sdat here
+	      ;;
+	      (servdat-uuid-set! sdat
+				 (register-server
+				  pkts-dir *srvpktspec*
+				  (get-host-name)
+				  (servdat-port sdat) server-key
+				  (servdat-host sdat) db-file))
+	      ;; (set! *my-signature* (servdat-uuid sdat)) ;; replace with Z, no, stick with proper key
+	      ;; now read pkts and see if we are a contender
+	      (let* ((all-pkts     (get-all-server-pkts pkts-dir *srvpktspec*))
+		     (viables      (get-viable-servers all-pkts db-file))
+		     (alive        (remove-pkts-if-not-alive uconn viables))
+		     (best-srv     (get-best-candidate alive db-file))
+		     (best-srv-key (if best-srv (alist-ref 'servkey best-srv) #f))
+		     (i-am-srv     (equal? best-srv-key server-key))
+		     (delete-pkt   (lambda ()
+				     (let* ((pktfile (conc (get-pkts-dir *toppath*)
+							 "/" (servdat-uuid *db-serv-info*)
+							 ".pkt")))
+				       (debug:print-info 0 *default-log-port* "Attempting to remove bogus pkt file "pktfile)
+				       (delete-file* pktfile))))) ;; remove immediately instead of waiting for on-exit
+		(debug:print 0 *default-log-port* "best-srv-key: "best-srv-key", server-key: "server-key", i-am-srv: "i-am-srv)
+		;; am I the best-srv, compare server-keys to know
+		(if i-am-srv
+		    (if (get-lock-db sdat db-file (servdat-host sdat)(servdat-port sdat)) ;; (db:get-iam-server-lock *dbstruct-db* *toppath* run-id)
+			(begin
+			  (debug:print-info 0 *default-log-port* "I'm the server!")
+			  (servdat-dbfile-set! sdat db-file)
+			  (servdat-status-set! sdat 'db-locked))
+			(begin
+			  (debug:print-info 0 *default-log-port* "I'm not the server, exiting.")
+			  (bdat-time-to-exit-set! *bdat* #t)
+			  (delete-pkt)
+			  (thread-sleep! 0.2)
+			  (exit)))
+		    (begin
+		      (debug:print-info 0 *default-log-port*
+				   "Keys do not match "best-srv-key", "server-key", exiting.")
+		      (bdat-time-to-exit-set! *bdat* #t)
+		      (delete-pkt)
+		      (thread-sleep! 0.2)
+		      (exit)))
+		sdat))
+	    (begin ;; sdat not yet contains server info
+	      (debug:print-info 0 *default-log-port* "Still waiting, last-sdat=" last-sdat)
+	      (thread-sleep! 0.1)
+	      (if (> (- (current-milliseconds) start-time) 120000) ;; been waiting for two minutes
+		  (begin
+		    (debug:print-error 0 *default-log-port* "transport appears to have died, exiting server")
+		    (exit))
+		  (loop start-time
+			(not (host-port-is-stable? sdat last-host last-port))
+			sdat
+			(servdat-host sdat)
+			(servdat-port sdat)))))))))
+
+(define (rmt:register-server sinfo apath iface port server-key dbname)
+  (servdat-conns sinfo) ;; just checking types
+  (rmt:open-main-connection sinfo apath) ;; we need a channel to main.db
+  (rmt:send-receive-real sinfo apath      ;; params: host port servkey pid ipaddr dbpath
+			 (db:run-id->dbname #f)
+			 'register-server `(,iface
+					    ,port
+					    ,server-key
+					    ,(current-process-id)
+					    ,iface
+					    ,apath
+					    ,dbname)))
+
+(define (rmt:get-count-servers sinfo apath)
+  (servdat-conns sinfo) ;; just checking types
+  (rmt:open-main-connection sinfo apath) ;; we need a channel to main.db
+  (rmt:send-receive-real sinfo apath      ;; params: host port servkey pid ipaddr dbpath
+			 (db:run-id->dbname #f)
+			 'get-count-servers `(,apath)))
+
+(define (rmt:get-servers-info apath)
+  (rmt:send-receive 'get-servers-info #f `(,apath)))
+
+(define (rmt:deregister-server db-serv-info apath iface port server-key dbname)
+  (rmt:open-main-connection db-serv-info apath) ;; we need a channel to main.db
+  (rmt:send-receive db-serv-info apath      ;; params: host port servkey pid ipaddr dbpath
+                         (db:run-id->dbname #f)
+                         'deregister-server `(,iface
+                                              ,port
+                                              ,server-key
+                                              ,(current-process-id)
+                                              ,iface
+                                              ,apath
+                                              ,dbname)))
+
+(define (rmt:wait-for-stable-interface #!optional (num-tries-allowed 100))
+  ;; wait until *db-serv-info* stops changing
+  (let* ((stime (current-seconds)))
+    (let loop ((last-host  #f)
+	       (last-port  #f)
+	       (tries 0))
+      (let* ((curr-host (and *db-serv-info* (servdat-host *db-serv-info*)))
+	     (curr-port (and *db-serv-info* (servdat-port *db-serv-info*))))
+	;; first we verify port and interface, update *db-serv-info* in need be.
+	(cond
+	 ((> tries num-tries-allowed)
+	  (debug:print 0 *default-log-port* "rmt:keep-running, giving up after trying for several minutes.")
+	  (exit 1))
+	 ((not *db-serv-info*)
+	  (thread-sleep! 0.25)
+	  (loop curr-host curr-port (+ tries 1)))
+	 ((or (not last-host)(not last-port))
+	  (debug:print 0 *default-log-port* "rmt:keep-running, still no interface, tries="tries)
+	  (thread-sleep! 0.25)
+	  (loop curr-host curr-port (+ tries 1)))
+	 ((or (not (equal? last-host curr-host))
+	      (not (equal? last-port curr-port)))
+	  (debug:print-info 0 *default-log-port* "WARNING: interface changed, refreshing iface and port info")
+	  (thread-sleep! 0.25)
+	  (loop curr-host curr-port (+ tries 1)))
+	 ((< (- (current-seconds) stime) 1) ;; keep up the looping until at least 3 seconds have passed
+	  (thread-sleep! 0.5)
+	  (loop curr-host curr-port (+ tries 1)))
+	 (else
+	  (rmt:get-signature) ;; sets *my-signature* as side effect
+	  (servdat-status-set! *db-serv-info* 'interface-stable)
+	  (debug:print 0 *default-log-port*
+		       "SERVER STARTED: " curr-host
+		       ":" curr-port
+		       " AT " (current-seconds) " server signature: " *my-signature*
+		       " with "(servdat-trynum *db-serv-info*)" port changes")
+	  (flush-output *default-log-port*)
+	  #t))))))
+
+;; run rmt:keep-running in a parallel thread to monitor that the db is being 
+;; used and to shutdown after sometime if it is not.
+;;
+(define (rmt:keep-running dbname) 
+  ;; if none running or if > 20 seconds since 
+  ;; server last used then start shutdown
+  ;; This thread waits for the server to come alive
+  (debug:print-info 0 *default-log-port* "Starting the sync-back, keep alive thread in server")
+
+  (let* ((sinfo             *db-serv-info*)
+	 (server-start-time (current-seconds))
+	 (pkts-dir          (get-pkts-dir))
+	 (server-key        (rmt:get-signature)) ;; This servers key
+	 (is-main           (equal? (args:get-arg "-db") ".db/main.db"))
+	 (last-access       0)
+	 (server-timeout    (server:expiration-timeout))
+	 (shutdown-server-sequence (lambda (host port)
+				     (set! *unclean-shutdown* #f) ;; Should not be needed anymore
+				     (debug:print-info 0 *default-log-port* "Starting to shutdown the server. pid="(current-process-id))
+				     ;; (rmt:server-shutdown host port) -- called in on-exit
+				     ;; (portlogger:open-run-close portlogger:set-port port "released") called in on-exit
+				     (exit)))
+	 (timed-out?        (lambda ()
+			      (<= (+ last-access server-timeout)
+				  (current-seconds)))))
+    (servdat-dbfile-set! *db-serv-info* (args:get-arg "-db"))
+    ;; main and run db servers have both got wait logic (could/should merge it)
+    (if is-main
+	(rmt:wait-for-server pkts-dir dbname server-key)
+	(rmt:wait-for-stable-interface))
+    ;; this is our forever loop
+    (let* ((iface (servdat-host *db-serv-info*))
+	   (port  (servdat-port *db-serv-info*))
+	   (uconn (servdat-uconn *db-serv-info*)))
+      (let loop ((count          0)
+		 (bad-sync-count 0)
+		 (start-time     (current-milliseconds)))
+	(if (and (not is-main)
+		 (common:low-noise-print 60 "servdat-status"))
+	    (debug:print-info 0 *default-log-port* "servdat-status is " (servdat-status *db-serv-info*)))
+
+	(mutex-lock! *heartbeat-mutex*)
+	;; set up the database handle
+	(if (not *dbstruct-db*) ;; no db opened yet, open the db and register with main if appropriate
+	      (let ((watchdog (bdat-watchdog *bdat*)))
+		(debug:print 0 *default-log-port* "SERVER: dbprep")
+		(db:setup dbname) ;; sets *dbstruct-db* as side effect
+		(servdat-status-set! *db-serv-info* 'db-opened)
+		;; IFF I'm not main, call into main and register self
+		(if (not is-main)
+		    (let ((res (rmt:register-server sinfo
+						    *toppath* iface port
+						    server-key dbname)))
+		      (if res ;; we are the server
+			  (servdat-status-set! *db-serv-info* 'have-interface-and-db)
+			  ;; now check that the db locker is alive, clear it out if not
+			  (let* ((serv-info (rmt:server-info *toppath* dbname)))
+			    (match serv-info
+				   ((host port servkey pid ipaddr apath dbpath)
+				    (if (not (server-ready? uconn (conc host":"port) servkey))
+					(begin
+					  (debug:print-info 0 *default-log-port* "Server registered but not alive. Removing and trying again.")
+					  (rmt:deregister-server sinfo apath host port servkey dbpath) ;; servkey pid ipaddr apath dbpath)
+					  (loop (+ count 1) bad-sync-count start-time))))
+				   (else
+				    (debug:print 0 *default-log-port* "We are not the server for "dbname", exiting. Server info is: "serv-info)
+				    (exit)))))))
+		(debug:print 0 *default-log-port*
+			     "SERVER: running, db "dbname" opened, megatest version: "
+			   (common:get-full-version))
+	      ))
+	
+	(db:sync-inmem->disk *dbstruct-db* *toppath* dbname force-sync: #t)
+	
+	(mutex-unlock! *heartbeat-mutex*)
+	
+	;; when things go wrong we don't want to be doing the various
+	;; queries too often so we strive to run this stuff only every
+	;; four seconds or so.
+	(let* ((sync-time (- (current-milliseconds) start-time))
+	       (rem-time  (quotient (- 4000 sync-time) 1000)))
+	  (if (and (<= rem-time 4)
+		   (>  rem-time 0))
+	      (thread-sleep! rem-time)))
+    
+	;; Transfer *db-last-access* to last-access to use in checking that we are still alive
+	(set! last-access *db-last-access*)
+	
+	(if (< count 1) ;; 3x3 = 9 secs aprox
+	    (loop (+ count 1) bad-sync-count (current-milliseconds)))
+	
+	(if (common:low-noise-print 60 "dbstats")
+	    (begin
+	      (debug:print 0 *default-log-port* "Server stats:")
+	      (db:print-current-query-stats)))
+	(let* ((hrs-since-start  (/ (- (current-seconds) server-start-time) 3600)))
+	  (cond
+	   ((not *server-run*)
+	    (debug:print-info 0 *default-log-port* "*server-run* set to #f. Shutting down.")
+	    (shutdown-server-sequence (get-host-name) port))
+	   ((timed-out?)
+	    (debug:print-info 0 *default-log-port* "Server timed out. seconds since last db access: " (- (current-seconds) last-access))
+	    (shutdown-server-sequence (get-host-name) port))
+	   ((and *server-run*
+		 (or (not (timed-out?))
+		     (if is-main ;; do not exit if there are other servers (keep main open until all others gone)
+			 (> (rmt:get-count-servers sinfo *toppath*) 1)
+			 #f)))
+	    (if (common:low-noise-print 120 "server continuing")
+		(debug:print-info 0 *default-log-port* "Server continuing, seconds since last db access: " (- (current-seconds) last-access)))
+	    (loop 0 bad-sync-count (current-milliseconds)))
+	   (else
+	    (set! *unclean-shutdown* #f)
+	    (debug:print-info 0 *default-log-port* "Server timed out. seconds since last db access: " (- (current-seconds) last-access))
+	    (shutdown-server-sequence (get-host-name) port)
+	    #;(debug:print-info 0 *default-log-port* "Sending 'quit to server, received: "
+			      (open-send-receive-nn (conc iface":"port)      ;; do this here and not in server-shutdown
+						    (sexpr->string 'quit))))))))))
+
+(define (rmt:get-reasonable-hostname)
+  (let* ((inhost (or (args:get-arg "-server") "-")))
+    (if (equal? inhost "-")
+	(get-host-name)
+	inhost)))
+
+;; Call this to start the actual server
+;;
+;; all routes though here end in exit ...
+;;
+;; This is the point at which servers are started
+;;
+(define (rmt:server-launch dbname)
+  (debug:print-info 0 *default-log-port* "Entered rmt:server-launch")
+  (let* ((th2 (make-thread (lambda ()
+			     (debug:print-info 0 *default-log-port* "Server run thread started")
+			     (rmt:run (rmt:get-reasonable-hostname)))
+			   "Server run"))
+	 (th3 (make-thread (lambda ()
+			     (debug:print-info 0 *default-log-port* "Server monitor thread started")
+			     (if (args:get-arg "-server")
+				 (rmt:keep-running dbname)
+				 #;(rmt:wait-for-stable-interface)))
+			     "Keep running")))
+    (thread-start! th2)
+    (thread-sleep! 0.252) ;; give the server time to settle before starting the keep-running monitor.
+    (thread-start! th3)
+    (set! *didsomething* #t)
+    (thread-join! th2)
+    (thread-join! th3))
+  #f)
+	    
+;;======================================================================
+;;  S E R V E R   -  D I R E C T   C A L L S
+;;======================================================================
+
+(define (rmt:kill-server run-id)
+  (rmt:send-receive 'kill-server #f (list run-id)))
+
+(define (rmt:start-server run-id)
+  (rmt:send-receive 'start-server #f (list run-id)))
+
+(define (rmt:server-info apath dbname)
+  (rmt:send-receive 'get-server-info #f (list apath dbname)))
+
+
+)

Index: ulex-simple/ulex.scm
==================================================================
--- ulex-simple/ulex.scm
+++ ulex-simple/ulex.scm
@@ -24,11 +24,12 @@
 ;;   Why sql-de-lite and not say, dbi?  - performance mostly, then simplicity.
 ;;
 ;;======================================================================
 
 (module ulex
-    (
+	*
+	#;(
      
      ;; NOTE: looking for the handler proc - find the run-listener :)
      
      run-listener     ;; (run-listener handler-proc [port]) => uconn
 
@@ -50,44 +51,52 @@
      udat-port
      udat-host-port
      
      ;; for testing only
      ;; pp-uconn
+     
+     ;; parameters
+     work-method   ;; parameter; 'threads, 'mailbox, 'limited, 'direct
+     return-method ;; parameter; 'mailbox, 'polling, 'direct
      )
 
 (import scheme
 	chicken.base
 	chicken.file
+	chicken.io
 	chicken.time
 	chicken.condition
 	chicken.string
 	chicken.sort
 	chicken.pretty-print
+	chicken.tcp
 	
 	address-info
 	mailbox
 	matchable
 	;; queues
 	regex
 	regex-case
+	simple-exceptions
 	s11n
 	srfi-1
 	srfi-18
 	srfi-4
 	srfi-69
 	system-information
-	tcp6
+	;; tcp6
+	tcp-server
 	typed-records
 	)
 
 ;; udat struct, used by both caller and callee
 ;; instantiated as uconn by convention
 ;;
 (defstruct udat
   ;; the listener side
   (port #f)
-  (host-port #f)
+  (host-port #f) ;; my host:port
   (socket #f)
   ;; the peers
   (peers  (make-hash-table)) ;; host:port->peer
   ;; work handling
   (work-queue (make-mailbox))
@@ -94,30 +103,15 @@
   (work-proc  #f)                ;; set by user
   (cnum       0)                 ;; cookie number
   (mboxes     (make-hash-table)) ;; for the replies
   (avail-cmboxes '())            ;; list of (<cookie> . <mbox>) for re-use
   ;; threads
-  (numthreads 50)
+  (numthreads 10)
   (cmd-thread #f)
   (work-queue-thread #f)
-  ) 
-
-;; ;; struct for keeping track of others we are talking to
-;; ;;
-;; (defstruct pdat
-;;   (host-port  #f)
-;;   (conns      '()) ;; list of pcon structs, pop one off when calling the peer
-;;   )
-;; 
-;; ;; struct for peer connections, keep track of expiration etc.
-;; ;;
-;; (defstruct pcon
-;;   (inp #f)
-;;   (oup #f)
-;;   (exp (+ (current-seconds) 59)) ;; expires at this time, set to (+ (current-seconds) 59)
-;;   (lifetime (+ (current-seconds) 600)) ;; throw away and create new after five minutes
-;;   )
+  (num-threads-running 0)
+  ) 
 
 ;;======================================================================
 ;; listener
 ;;======================================================================
 
@@ -156,29 +150,25 @@
 ;;
 (define (run-listener handler-proc #!optional (port-suggestion 4242))
   (let* ((uconn (make-udat)))
     (udat-work-proc-set! uconn handler-proc)
     (if (setup-listener uconn port-suggestion)
-	(let* ((th1 (make-thread (lambda ()(ulex-cmd-loop uconn)) "Ulex command loop"))
-	       #;(th2 (make-thread (lambda ()(process-work-queue uconn)) "Ulex work queue processor")))
-	  (tcp-buffer-size 2048)
-	  ;; (max-connections 2048) 
-	  (thread-start! th1)
-	  #;(thread-start! th2)
-	  (udat-cmd-thread-set! uconn th1)
-	  #;(udat-work-queue-thread-set! uconn th2)
-	  (print "cmd loop and process workers started")
-	  uconn)
+	((make-tcp-server
+	  (udat-socket uconn)
+	  (lambda ()
+	    (let* ((rdat  (deserialize)) ;; '(my-host-port qrykey cmd params)
+		   (resp  (do-work uconn rdat)))
+	      (serialize resp)))))
 	(assert #f "ERROR: run-listener called without proper setup."))))
 
 (define (wait-and-close uconn)
   (thread-join! (udat-cmd-thread uconn))
   (tcp-close (udat-socket uconn)))
 
 ;;======================================================================
-;; peers and connections
-;;======================================================================
+;; == << ;; peers and connections
+;; == << ;;======================================================================
 
 (define *send-mutex* (make-mutex))
 
 ;; send structured data to recipient
 ;;
@@ -185,224 +175,69 @@
 ;;  NOTE: qrykey is what was called the "cookie" previously
 ;;
 ;;     retval tells send to expect and wait for return data (one line) and return it or time out
 ;;       this is for ping where we don't want to necessarily have set up our own server yet.
 ;;
-;; NOTE: see below for beginnings of code to allow re-use of tcp connections
-;;        - I believe (without substantial evidence) that re-using connections will
-;;          be beneficial ...
-;;
-(define (send udata host-port qrykey cmd params)
-  (mutex-lock! *send-mutex*)
-  (let* ((my-host-port (udat-host-port udata))          ;; remote will return to this
-	 (isme         #f #;(equal? host-port my-host-port)) ;; calling myself?
+(define (send-receive udata host-port cmd params)
+  (let* ((host-port-lst (string-split host-port ":"))
+	 (host          (car host-port-lst))
+	 (port          (string->number (cadr host-port-lst)))
+	 (my-host-port (and udata (udat-host-port udata)))          ;; remote will return to this
+	 (isme         (equal? host-port my-host-port)) ;; calling myself?
 	 ;; dat is a self-contained work block that can be sent or handled locally
-	 (dat          (list my-host-port qrykey cmd params)))
-    (if isme
-	(ulex-handler udata dat) ;; no transmission needed
-	(handle-exceptions ;; TODO - MAKE THIS EXCEPTION CMD SPECIFIC?
-	    exn
-	    #f
-	  (let-values (((inp oup)(tcp-connect host-port)))
+	 (dat          (list my-host-port 'qrykey cmd params #;(cons (current-seconds)(current-milliseconds)))))
+    (cond
+     (isme (do-work udata dat)) ;; no transmission needed
+     (else
+      (handle-exceptions ;; TODO - MAKE THIS EXCEPTION CMD SPECIFIC?
+	  exn
+	  (message exn)
+	(begin
+	  ;; (mutex-lock! *send-mutex*) ;; DOESN'T SEEM TO HELP
+	  (let-values (((inp oup)(tcp-connect host port)))
 	    (let ((res (if (and inp oup)
 			   (begin
 			     (serialize dat oup)
-			     (deserialize inp)) ;; yes, we always want an ack
+			     (close-output-port oup)
+			     (deserialize inp))
 			   (begin
 			     (print "ERROR: send called but no receiver has been setup. Please call setup first!")
 			     #f))))
 	      (close-input-port inp)
-	      (close-output-port oup)
-	      (mutex-unlock! *send-mutex*)
-	      res)))))) ;; res will always be 'ack
-
-;; send a request to the given host-port and register a mailbox in udata
-;; wait for the mailbox data and return it
-;;
-(define (send-receive uconn host-port cmd data)
-  (cond
-   ((member cmd '(ping goodbye)) ;; these are immediate
-    (send uconn host-port 'ping cmd data))
-   (else
-    (let* ((cmbox     (get-cmbox uconn)) ;; would it be better to keep a stack of mboxes to reuse?
-	   (qrykey    (car cmbox))
-	   (mbox      (cdr cmbox))
-	   (mbox-time (current-milliseconds))
-	   (sres      (send uconn host-port qrykey cmd data))) ;; short res
-      sres))))
-
-;;======================================================================
-;; responder side
-;;======================================================================
-
-;; take a request, rdat, and if not immediate put it in the work queue
-;;
-;; Reserved cmds; ack ping goodbye response
-;;
-(define (ulex-handler uconn rdat)
-  (assert (list? rdat) "FATAL: ulex-handler give rdat as not list")
-  (match rdat ;;  (string-split controldat)
-    ((rem-host-port qrykey cmd params)
-     ;; (print "ulex-handler got: "rem-host-port" qrykey: "qrykey" cmd: "cmd" params: "params)
-     (let ((mbox (hash-table-ref/default (udat-mboxes uconn) qrykey #f)))
-       (case cmd
-	 ;; ((ack )(print "Got ack! But why? Should NOT get here.") 'ack)
-	 ((ping)
-	  ;; (print "Got Ping!")
-	  ;; (add-to-work-queue uconn rdat)
-	 'ack)
-	 (else
-	  (do-work uconn rdat)))))
-    (else
-     (print "BAD DATA? controldat=" rdat)
-     'ack) ;; send ack anyway?
-    ))
-
-;; given an already set up uconn start the cmd-loop
-;;
-(define (ulex-cmd-loop uconn)
-  (let* ((serv-listener (udat-socket uconn)))
-    (let loop ((state 'start))
-      (let-values (((inp oup)(tcp-accept serv-listener)))
-	(let* ((rdat  (deserialize inp)) ;; '(my-host-port qrykey cmd params)
-	       (resp  (ulex-handler uconn rdat)))
-	  (if resp (serialize resp oup))
-	  (close-input-port inp)
-	  (close-output-port oup))
-	(loop state)))))
-;;(define (ulex-cmd-loop uconn)
-;;  (let* ((serv-listener (udat-socket uconn))
-;;	 ;; (old-listener      (lambda ()
-;;	 ;; 		      (let loop ((state 'start))
-;;	 ;; 			(let-values (((inp oup)(tcp-accept serv-listener)))
-;;	 ;; 			  (let* ((rdat  (deserialize inp)) ;; '(my-host-port qrykey cmd params)
-;;	 ;; 				 (resp  (ulex-handler uconn rdat)))
-;;	 ;; 			    (if resp (serialize resp oup))
-;;	 ;; 			    (close-input-port inp)
-;;	 ;; 			    (close-output-port oup))
-;;	 ;; 			  (loop state)))))
-;;	 (server       (make-tcp-server
-;;			serv-listener
-;;			(lambda ()
-;;			  (let* ((rdat  (deserialize )) ;; '(my-host-port qrykey cmd params)
-;;				 (resp  (ulex-handler uconn rdat)))
-;;			    (if resp (serialize resp) resp))))))
-;;    (server)))
-
-;; add a proc to the cmd list, these are done symetrically (i.e. in all instances)
-;; so that the proc can be dereferenced remotely
-;;
-(define (set-work-handler uconn proc)
-  (udat-work-proc-set! uconn proc))
+	      ;; (mutex-unlock! *send-mutex*) ;; DOESN'T SEEM TO HELP
+	      res)))))))) ;; res will always be 'ack unless return-method is direct
 
 ;;======================================================================
 ;; work queues - this is all happening on the listener side
 ;;======================================================================
 
-;; rdat is (rem-host-port qrykey cmd params)
-					     
-(define (add-to-work-queue uconn rdat)
-  #;(queue-add! (udat-work-queue uconn) rdat)
-  (mailbox-send! (udat-work-queue uconn) rdat))
-
+;; move the logic to return the result somewhere else?
+;;
 (define (do-work uconn rdat)
-  (let* ((proc (udat-work-proc uconn))) ;; get it each time - conceivebly it could change
+  (let* () ;; get it each time - conceivebly it could change
     ;; put this following into a do-work procedure
     (match rdat
       ((rem-host-port qrykey cmd params)
-       (let* ((start-time (current-milliseconds))
-	      (result (proc rem-host-port qrykey cmd params))
-	      (end-time (current-milliseconds))
-	      (run-time (- end-time start-time)))
-	 result))
+       (case cmd
+	 ((ping) 'ping-ack) ;; bypass calling the proc
+	 (else
+	  (let* ((proc       (udat-work-proc uconn))
+		 (start-time (current-milliseconds))
+		 (result     (proc rem-host-port qrykey cmd params))
+		 (end-time   (current-milliseconds))
+		 (run-time   (- end-time start-time)))
+	    result))))
       (else
-       (print "ERROR: rdat "rdat", did not match rem-host-port qrykey cmd params")
-       #f))))
-     
-(define (process-work-queue uconn) 
-  (let ((wqueue (udat-work-queue uconn))
-	(proc   (udat-work-proc  uconn))
-	(numthr (udat-numthreads uconn)))
-    (let loop ((thnum    1)
-	       (threads '()))
-      (let ((thlst (cons (make-thread (lambda ()
-					(let work-loop ()
-					  (let ((rdat (mailbox-receive! wqueue 24000 'MBOX_TIMEOUT)))
-					    (do-work uconn rdat))
-					  (work-loop)))
-				      (conc "work thread " thnum))
-			 threads)))
-	(if (< thnum numthr)
-	    (loop (+ thnum 1)
-		  thlst)
-	    (begin
-	      (print "ULEX: Starting "(length thlst)" worker threads.")
-	      (map thread-start! thlst)
-	      (print "ULEX: Threads started. Joining all.")
-	      (map thread-join! thlst)))))))
-
-;; below was to enable re-use of connections. This seems non-trivial so for
-;; now lets open on each call
-;;
-;; ;; given host-port get or create peer struct
-;; ;;
-;; (define (udat-get-peer uconn host-port)
-;;   (or (hash-table-ref/default (udat-peers uconn) host-port #f)
-;;       ;; no peer, so create pdat and init it
-;;       
-;;       ;; NEED stack of connections, pop and use; inp, oup,
-;;       ;; creation_time (remove and create new if over 24hrs old
-;;       ;; 
-;;       (let ((pdat (make-pdat host-port: host-port)))
-;; 	(hash-table-set! (udat-peers uconn) host-port pdat)
-;; 	pdat)))
-;; 
-;; ;; is pcon alive
-;; 
-;; ;; given host-port and pdat get a pcon
-;; ;;
-;; (define (pdat-get-pcon pdat host-port)
-;;   (let loop ((conns (pdat-conns pdat)))
-;;     (if (null? conns) ;; none? make and return - do NOT add - it will be pushed back on list later
-;; 	(init-pcon (make-pcon))
-;; 	(let* ((conn (pop conns)))
-;; 	  
-;; ;; given host-port get a pcon struct
-;; ;;
-;; (define (udat-get-pcon 
-      
+       (print "ERROR: rdat "rdat", did not match rem-host-port qrykey cmd params")))))
+
 ;;======================================================================
 ;; misc utils
 ;;======================================================================
 
-(define (make-cookie uconn)
-  (let ((newcnum (+ (udat-cnum uconn) 1)))
-    (udat-cnum-set! uconn newcnum)
-    (conc (udat-host-port uconn) ":"
-	  newcnum)))
-
-;; cookie/mboxes
-
-;; we store each mbox with a cookie (<cookie> . <mbox>)
-;;
-(define (get-cmbox uconn)
-  (if (null? (udat-avail-cmboxes uconn))
-      (let ((cookie (make-cookie uconn))
-	    (mbox   (make-mailbox)))
-	(hash-table-set! (udat-mboxes uconn) cookie mbox)
-	`(,cookie . ,mbox))
-      (let ((cmbox (car (udat-avail-cmboxes uconn))))
-	(udat-avail-cmboxes-set! uconn (cdr (udat-avail-cmboxes uconn)))
-	cmbox)))
-
-(define (put-cmbox uconn cmbox)
-  (udat-avail-cmboxes-set! uconn (cons cmbox (udat-avail-cmboxes uconn))))
-
 (define (pp-uconn uconn)
   (pp (udat->alist uconn)))
 
-  
 ;;======================================================================
 ;; network utilities
 ;;======================================================================
 
 ;; NOTE: Look at address-info egg as alternative to some of this

DELETED ulex.scm
Index: ulex.scm
==================================================================
--- ulex.scm
+++ /dev/null
@@ -1,24 +0,0 @@
-;;======================================================================
-;; Copyright 2019, Matthew Welland.
-;; 
-;; This file is part of Megatest.
-;; 
-;;     Megatest is free software: you can redistribute it and/or modify
-;;     it under the terms of the GNU General Public License as published by
-;;     the Free Software Foundation, either version 3 of the License, or
-;;     (at your option) any later version.
-;; 
-;;     Megatest is distributed in the hope that it will be useful,
-;;     but WITHOUT ANY WARRANTY; without even the implied warranty of
-;;     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-;;     GNU General Public License for more details.
-;; 
-;;     You should have received a copy of the GNU General Public License
-;;     along with Megatest.  If not, see <http://www.gnu.org/licenses/>.
-
-;;======================================================================
-
-(declare (unit ulex))
-
-(include "ulex/ulex.scm")
-;; (include "ulex-simple/ulex.scm")

ADDED   ulex.scm.template
Index: ulex.scm.template
==================================================================
--- /dev/null
+++ ulex.scm.template
@@ -0,0 +1,23 @@
+;;======================================================================
+;; Copyright 2019, Matthew Welland.
+;; 
+;; This file is part of Megatest.
+;; 
+;;     Megatest is free software: you can redistribute it and/or modify
+;;     it under the terms of the GNU General Public License as published by
+;;     the Free Software Foundation, either version 3 of the License, or
+;;     (at your option) any later version.
+;; 
+;;     Megatest is distributed in the hope that it will be useful,
+;;     but WITHOUT ANY WARRANTY; without even the implied warranty of
+;;     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;;     GNU General Public License for more details.
+;; 
+;;     You should have received a copy of the GNU General Public License
+;;     along with Megatest.  If not, see <http://www.gnu.org/licenses/>.
+
+;;======================================================================
+
+(declare (unit ulex))
+
+(include "ulex-FLAVOR/ulex.scm")

ADDED   ulex/dbmgr.scm
Index: ulex/dbmgr.scm
==================================================================
--- /dev/null
+++ ulex/dbmgr.scm
@@ -0,0 +1,1131 @@
+;;======================================================================
+;; Copyright 2022, Matthew Welland.
+;; 
+;; This file is part of Megatest.
+;; 
+;;     Megatest is free software: you can redistribute it and/or modify
+;;     it under the terms of the GNU General Public License as published by
+;;     the Free Software Foundation, either version 3 of the License, or
+;;     (at your option) any later version.
+;; 
+;;     Megatest is distributed in the hope that it will be useful,
+;;     but WITHOUT ANY WARRANTY; without even the implied warranty of
+;;     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;;     GNU General Public License for more details.
+;; 
+;;     You should have received a copy of the GNU General Public License
+;;     along with Megatest.  If not, see <http://www.gnu.org/licenses/>.
+
+;;======================================================================
+
+(declare (unit dbmgrmod))
+
+(declare (uses ulex))
+(declare (uses apimod))
+(declare (uses pkts))
+(declare (uses commonmod))
+(declare (uses dbmod))
+(declare (uses mtargs))
+(declare (uses portloggermod))
+(declare (uses debugprint))
+
+(module dbmgrmod
+    *
+
+(import scheme
+	chicken.base
+	chicken.condition
+	chicken.file
+	chicken.format
+	chicken.port
+	chicken.process
+	chicken.process-context
+	chicken.process-context.posix
+	chicken.sort
+	chicken.string
+	chicken.time
+	
+	(prefix sqlite3 sqlite3:)
+	matchable
+	md5
+	message-digest
+	regex
+	s11n
+	srfi-1
+	srfi-18
+	srfi-69
+	system-information
+	typed-records
+	
+	pkts
+	ulex
+
+	commonmod
+	apimod
+	dbmod
+	debugprint
+	(prefix mtargs args:)
+	portloggermod
+	)
+
+;; Configurations for server
+;; (tcp-buffer-size 2048)
+;; (max-connections 2048) 
+
+;; info about me as a listener and my connections to db servers
+;; stored (for now) in *db-serv-info*
+;;
+(defstruct servdat
+  (host #f)
+  (port #f)
+  (uuid #f)
+  (dbfile #f)
+  (uconn   #f) ;; this is the listener *FOR THIS PROCESS*
+  (mode    #f)
+  (status 'starting)
+  (trynum 0) ;; count the number of ports we've tried
+  (conns  (make-hash-table)) ;; apath/dbname => conndat
+  ) 
+
+(define *db-serv-info* (make-servdat))
+
+(define (servdat->url sdat)
+  (conc (servdat-host sdat)":"(servdat-port sdat)))
+
+;; db servers contact info
+;;
+(defstruct conndat
+  (apath    #f)
+  (dbname   #f)
+  (fullname #f)
+  (hostport #f)
+  (ipaddr   #f)
+  (port     #f)
+  (srvpkt   #f)
+  (srvkey   #f)
+  (lastmsg  0)
+  (expires  0))
+
+(define *srvpktspec*
+  `((server (host    . h)
+	    (port    . p)
+	    (servkey . k)
+	    (pid     . i)
+	    (ipaddr  . a)
+	    (dbpath  . d))))
+
+;;======================================================================
+;;  S U P P O R T   F U N C T I O N S
+;;======================================================================
+
+;; set up the api proc, seems like there should be a better place for this?
+;;
+;; IS THIS NEEDED ANYMORE? TODO - REMOVE IF POSSIBLE
+;;
+;; (define api-proc (make-parameter conc))
+;; (api-proc api:execute-requests)
+
+;; do we have a connection to apath dbname and
+;; is it not expired? then return it
+;;
+;; else setup a connection
+;;
+;; if that fails, return '(#f "some reason") ;; NB// convert to raising an exception
+;;
+(define (rmt:get-conn remdat apath dbname)
+  (let* ((fullname (db:dbname->path apath dbname)))
+    (hash-table-ref/default (servdat-conns remdat) fullname #f)))
+
+(define (rmt:drop-conn remdat apath dbname)
+  (let* ((fullname (db:dbname->path apath dbname)))
+    (hash-table-delete! (servdat-conns remdat) fullname)))
+  
+(define (rmt:find-main-server uconn apath dbname)
+  (let* ((pktsdir     (get-pkts-dir apath))
+	 (all-srvpkts (get-all-server-pkts pktsdir *srvpktspec*))
+	 (viable-srvs (get-viable-servers all-srvpkts dbname)))
+    (get-the-server uconn apath viable-srvs)))
+
+
+(define *connstart-mutex* (make-mutex))
+(define *last-main-start* 0)
+
+;; looks for a connection to main, returns if have and not exired
+;; creates new otherwise
+;; 
+;; connections for other servers happens by requesting from main
+;;
+;; TODO: This is unnecessarily re-creating the record in the hash table
+;;
+(define (rmt:open-main-connection remdat apath)
+  (let* ((fullpath (db:dbname->path apath ".db/main.db"))
+	 (conns    (servdat-conns remdat))
+	 (conn     (rmt:get-conn remdat apath ".db/main.db")) ;; (hash-table-ref/default conns fullpath #f)) ;; TODO - create call for this
+	 (start-rmt:run (lambda ()
+			  (let* ((th1 (make-thread (lambda ()(rmt:run (get-host-name))) "non-db mode server")))
+			    (thread-start! th1)
+			    (thread-sleep! 1)
+			    (let loop ((count 0))
+			      (assert (< count 30) "FATAL: responder failed to initialize in rmt:open-main-connection")
+			      (if (or (not *db-serv-info*)
+				      (not (servdat-uconn *db-serv-info*)))
+				  (begin
+				    (thread-sleep! 1)
+				    (loop (+ count 1)))
+				  (begin
+				    (servdat-mode-set! *db-serv-info* 'non-db)
+				    (servdat-uconn *db-serv-info*)))))))
+	 (myconn    (servdat-uconn *db-serv-info*)))
+    (cond
+     ((not myconn)
+      (start-rmt:run)
+      (rmt:open-main-connection remdat apath))
+     ((and conn                                             ;; conn is NOT a socket, just saying ...
+	   (< (current-seconds) (conndat-expires conn)))
+      #t) ;; we are current and good to go - we'll deal elsewhere with a server that was killed or died 
+     ((and conn
+	   (>= (current-seconds)(conndat-expires conn)))
+      (debug:print-info 0 *default-log-port* "connection to "fullpath" server expired. Reconnecting.")
+      (rmt:drop-conn remdat apath ".db/main.db") ;;
+      (rmt:open-main-connection remdat apath))
+     (else
+      ;; Below we will find or create and connect to main
+      (debug:print-info 0 *default-log-port* "rmt:open-main-connection - starting from scratch")
+      (let* ((dbname         (db:run-id->dbname #f))
+	     (the-srv        (rmt:find-main-server myconn apath dbname))
+	     (start-main-srv (lambda () ;; call IF there is no the-srv found
+			       (mutex-lock! *connstart-mutex*)
+			       (if (> (- (current-seconds) *last-main-start*) 5) ;; at least four seconds since last attempt to start main server
+				   (begin
+				     (api:run-server-process apath dbname)
+				     (set! *last-main-start* (current-seconds))
+				     (thread-sleep! 1))
+				   (thread-sleep! 0.25))
+			       (mutex-unlock! *connstart-mutex*)
+			       (rmt:open-main-connection remdat apath) ;; TODO: Add limit to number of tries
+			       )))
+	(if (not the-srv) ;; have server, try connecting to it
+	    (start-main-srv)
+	    (let* ((srv-addr (server-address the-srv)) ;; need serv
+		   (ipaddr   (alist-ref 'ipaddr  the-srv))
+		   (port     (alist-ref 'port    the-srv))
+		   (srvkey   (alist-ref 'servkey the-srv))
+		   (fullpath (db:dbname->path apath dbname))
+		   
+		   (new-the-srv (make-conndat
+				 apath:   apath
+				 dbname:  dbname
+				 fullname: fullpath
+				 hostport: srv-addr
+				 ;; socket: (open-nn-connection srv-addr)  - TODO - open ulex connection?
+				 ipaddr: ipaddr
+				 port: port
+				 srvpkt: the-srv
+				 srvkey: srvkey ;; generated by rmt:get-signature on the server side
+				 lastmsg: (current-seconds)
+				 expires: (+ (current-seconds)
+					     (server:expiration-timeout)
+					     -2) ;; this needs to be gathered during the ping
+				 )))
+	      (hash-table-set! conns fullpath new-the-srv)))
+	#t)))))
+
+;; NB// sinfo is a servdat struct
+;;
+(define (rmt:general-open-connection sinfo apath dbname #!key (num-tries 5))
+  (assert (not (equal? dbname ".db/main.db")) "ERROR: general-open-connection should never be called with main as the db")
+  (let* ((mdbname  ".db/main.db") ;; (db:run-id->dbname #f)) TODO: put this back to the lookup when stable
+	 (fullname (db:dbname->path apath dbname))
+	 (conns    (servdat-conns sinfo))
+	 (mconn    (rmt:get-conn sinfo apath ".db/main.db"))
+	 (dconn    (rmt:get-conn sinfo apath dbname)))
+    #;(if (and mconn
+	     (not (debug:print-logger)))
+	(begin
+	  (debug:print-info 0 *default-log-port* "Turning on logging to main, look in logs dir for main log.")
+	  (debug:print-logger rmt:log-to-main)))
+    (cond
+     ((and mconn
+	   dconn
+	   (< (current-seconds)(conndat-expires dconn)))
+      #t) ;; good to go
+     ((not mconn) ;; no channel open to main? open it...
+      (rmt:open-main-connection sinfo apath)
+      (rmt:general-open-connection sinfo apath dbname num-tries: (- num-tries 1)))
+     ((not dconn)                 ;; no channel open to dbname?     
+      (let* ((res (rmt:send-receive-real sinfo apath mdbname 'get-server `(,apath ,dbname))))
+	(case res
+	  ((server-started)
+	   (if (> num-tries 0)
+	       (begin
+		 (thread-sleep! 2)
+		 (rmt:general-open-connection sinfo apath dbname num-tries: (- num-tries 1)))
+	       (begin
+		 (debug:print-error 0 *default-log-port* "Failed to start servers needed or open channel to "apath", "dbname)
+		 (exit 1))))
+	  (else
+	   (if (list? res) ;; server has been registered and the info was returned. pass it on.
+	       (begin ;;  ("192.168.0.9" 53817
+		      ;;  "5e34239f48e8973b3813221e54701a01" "24310"
+		      ;;  "192.168.0.9"
+		      ;;  "/home/matt/data/megatest/tests/simplerun"
+		 ;;  ".db/1.db")
+		 (match
+		  res
+		  ((host port servkey pid ipaddr apath dbname)
+		   (debug:print-info 0 *default-log-port* "got "res)
+		   (hash-table-set! conns
+				    fullname
+				    (make-conndat
+				     apath: apath
+				     dbname: dbname
+				     hostport: (conc host":"port)
+				     ;; socket: (open-nn-connection (conc host":"port)) ;; TODO - open ulex connection?
+				     ipaddr: ipaddr
+				     port: port
+				     srvkey: servkey
+				     lastmsg: (current-seconds)
+				     expires: (+ (current-seconds)
+						 (server:expiration-timeout)
+						 -2))))
+		  (else
+		   (debug:print-info 0 *default-log-port* "return data from starting server did not match host port servkey pid ipaddr apath dbname " res)))
+		 res)
+	       (begin
+		 (debug:print-info 0 *default-log-port* "Unexpected result: " res)
+		 res)))))))
+    #t))
+
+;;======================================================================
+
+;; FOR DEBUGGING SET TO #t
+;; (define *localmode* #t)
+(define *localmode* #f)
+(define *dbstruct* (make-dbr:dbstruct))
+
+;; Defaults to current area
+;;
+(define (rmt:send-receive cmd rid params #!key (attemptnum 1)(area-dat #f))
+  (let* ((apath      *toppath*)
+	 (sinfo      *db-serv-info*)
+	 (dbname     (db:run-id->dbname rid)))
+    (if *localmode*
+	(api:execute-requests *dbstruct* cmd params)
+	(begin
+	  (rmt:open-main-connection sinfo apath)
+	  (if rid (rmt:general-open-connection sinfo apath dbname))
+	  #;(if (not (member cmd '(log-to-main)))
+	      (debug:print-info 0 *default-log-port* "rmt:send-receive "cmd" params="params))
+	  (rmt:send-receive-real sinfo apath dbname cmd params)))))
+
+;; db is at apath/.db/dbname, rid is an intermediary solution and will be removed
+;; sometime in the future
+;;
+(define (rmt:send-receive-real sinfo apath dbname cmd params)
+  (assert (not (eq? 'primordial (thread-name (current-thread)))) "FATAL: Do not call rmt:send-receive-real in the primodial thread.")
+  (let* ((cdat (rmt:get-conn sinfo apath dbname)))
+    (assert cdat "FATAL: rmt:send-receive-real called without the needed channels opened")
+    (let* ((uconn    (servdat-uconn sinfo)) ;; get the interface to ulex
+	   ;; then send-receive using the ulex layer to host-port stored in cdat
+	   (res      (send-receive uconn (conndat-hostport cdat) cmd params))
+	   #;(th1      (make-thread (lambda ()
+				    (set! res (send-receive uconn (conndat-hostport cdat) cmd params)))
+				  "send-receive thread")))
+      ;; (thread-start! th1)
+      ;; (thread-join! th1)   ;; gratuitious thread stuff is so that mailbox is not used in primordial thead
+      ;; since we accessed the server we can bump the expires time up
+      (conndat-expires-set! cdat (+ (current-seconds)
+				    (server:expiration-timeout)
+				    -2)) ;; two second margin for network time misalignments etc.
+      res)))
+
+;; db is at apath/.db/dbname, rid is an intermediary solution and will be removed
+;; sometime in the future.
+;;
+;; Purpose - call the main.db server and request a server be started
+;; for the given area path and dbname
+;;
+
+(define (rmt:print-db-stats)
+  (let ((fmtstr "~40a~7-d~9-d~20,2-f")) ;; "~20,2-f"
+    (debug:print 18 *default-log-port* "DB Stats, "(seconds->year-week/day-time (current-seconds))"\n=====================")
+    (debug:print 18 *default-log-port* (format #f "~40a~8a~10a~10a" "Cmd" "Count" "TotTime" "Avg"))
+    (for-each (lambda (cmd)
+		(let ((cmd-dat (hash-table-ref *db-stats* cmd)))
+		  (debug:print 18 *default-log-port* (format #f fmtstr cmd (vector-ref cmd-dat 0) (vector-ref cmd-dat 1) (/ (vector-ref cmd-dat 1)(vector-ref cmd-dat 0))))))
+	      (sort (hash-table-keys *db-stats*)
+		    (lambda (a b)
+		      (> (vector-ref (hash-table-ref *db-stats* a) 0)
+			 (vector-ref (hash-table-ref *db-stats* b) 0)))))))
+
+(define (rmt:get-max-query-average run-id)
+  (mutex-lock! *db-stats-mutex*)
+  (let* ((runkey (conc "run-id=" run-id " "))
+	 (cmds   (filter (lambda (x)
+			   (substring-index runkey x))
+			 (hash-table-keys *db-stats*)))
+	 (res    (if (null? cmds)
+		     (cons 'none 0)
+		     (let loop ((cmd (car cmds))
+				(tal (cdr cmds))
+				(max-cmd (car cmds))
+				(res 0))
+		       (let* ((cmd-dat (hash-table-ref *db-stats* cmd))
+			      (tot     (vector-ref cmd-dat 0))
+			      (curravg (/ (vector-ref cmd-dat 1) (vector-ref cmd-dat 0))) ;; count is never zero by construction
+			      (currmax (max res curravg))
+			      (newmax-cmd (if (> curravg res) cmd max-cmd)))
+			 (if (null? tal)
+			     (if (> tot 10)
+				 (cons newmax-cmd currmax)
+				 (cons 'none 0))
+			     (loop (car tal)(cdr tal) newmax-cmd currmax)))))))
+    (mutex-unlock! *db-stats-mutex*)
+    res))
+
+;; host and port are used to ensure we are remove proper records
+(define (rmt:server-shutdown host port)
+  (let ((dbfile   (servdat-dbfile *db-serv-info*)))
+    (debug:print-info 0 *default-log-port* "dbfile is "dbfile)
+    (if dbfile
+	(let* ((am-server  (args:get-arg "-server"))
+	       (dbfile     (args:get-arg "-db"))
+	       (apath      *toppath*)
+	       #;(sinfo     *remotedat*)) ;; foundation for future fix
+	  (if *dbstruct-db*
+	      (let* ((dbdat      (db:get-dbdat *dbstruct-db* apath dbfile))
+		     (db         (dbr:dbdat-db dbdat))
+		     (inmem      (dbr:dbdat-db dbdat))   ;; WRONG
+		     )
+		;; do a final sync here
+		(debug:print-info 0 *default-log-port* "Doing final sync for "apath" "dbfile" at "(current-seconds))
+		(db:sync-inmem->disk *dbstruct-db* apath dbfile force-sync: #t)
+		;; let's finalize here
+		(debug:print-info 0 *default-log-port* "Finalizing db and inmem")
+		(if (sqlite3:database? db)
+		    (sqlite3:finalize! db)
+		    (debug:print-info 0 *default-log-port* "in rmt:server-shutdown, db is not a database, not finalizing..."))
+		(if (sqlite3:database? inmem)
+		    (sqlite3:finalize! inmem)
+		    (debug:print-info 0 *default-log-port* "in rmt:server-shutdown, inmem is not a database, not finalizing..."))
+		(debug:print-info 0 *default-log-port* "Finalizing db and inmem complete"))
+	      (debug:print-info 0 *default-log-port* "Db was never opened, no cleanup to do."))
+	  (if (not am-server)
+	      (debug:print-info 0 *default-log-port* "I am not a server, should NOT get here!")
+	      (if (string-match ".*/main.db$" dbfile)
+		  (let ((pkt-file (conc (get-pkts-dir *toppath*)
+					"/" (servdat-uuid *db-serv-info*)
+					".pkt")))
+		    (debug:print-info 0 *default-log-port* "removing pkt "pkt-file)
+		    (delete-file* pkt-file)
+		    (debug:print-info 0 *default-log-port* "Releasing lock (if any) for "dbfile ", host "host", port "port)
+		    (db:with-lock-db
+		     (servdat-dbfile *db-serv-info*)
+		     (lambda (dbh dbfile)
+		       (db:release-lock dbh dbfile host port)))) ;; I'm not the server - should not have a lock to remove
+		  (let* ((sdat *db-serv-info*) ;; we have a run-id server
+			 (host (servdat-host sdat))
+			 (port (servdat-port sdat))
+			 (uuid (servdat-uuid sdat))
+			 (res  (rmt:deregister-server *db-serv-info* *toppath* host port uuid dbfile)))
+		    (debug:print-info 0 *default-log-port* "deregistered-server, res="res)
+		    (debug:print-info 0 *default-log-port* "deregistering server "host":"port" with uuid "uuid)
+		    )))))))
+
+
+(define (common:run-sync?)
+    ;; (and (common:on-homehost?)
+  (args:get-arg "-server"))
+
+(define *rmt:run-mutex* (make-mutex))
+(define *rmt:run-flag* #f)
+
+;; Main entry point to start a server. was start-server
+(define (rmt:run hostn)
+  (mutex-lock! *rmt:run-mutex*)
+  (if *rmt:run-flag*
+      (begin
+	(debug:print-warn 0 *default-log-port* "rmt:run already running.")
+	(mutex-unlock! *rmt:run-mutex*))
+      (begin
+	(set! *rmt:run-flag* #t)
+	(mutex-unlock! *rmt:run-mutex*)
+	;;  ;; Configurations for server
+	;;  (tcp-buffer-size 2048)
+	;;  (max-connections 2048) 
+	(debug:print 2 *default-log-port* "PID: "(current-process-id)". Attempting to start the server ...")
+	(if (and *db-serv-info*
+		 (servdat-uconn *db-serv-info*))
+	    (let* ((uconn (servdat-uconn *db-serv-info*)))
+	      (wait-and-close uconn))
+	    (let* ((port            (portlogger:open-run-close portlogger:find-port))
+		   (handler-proc    (lambda (rem-host-port qrykey cmd params) ;;
+				      (set! *db-last-access* (current-seconds))
+				      (assert (list? params) "FATAL: handler called with non-list params")
+				      (assert (args:get-arg "-server") "FATAL: handler called on non-server side. cmd="cmd", params="params)
+				      (debug:print 0 *default-log-port* "handler call: "cmd", params="params)
+				      (api:execute-requests *dbstruct-db* cmd params))))
+	      ;; (api:process-request *dbstuct-db* 
+	      (if (not *db-serv-info*)
+		  (set! *db-serv-info* (make-servdat host: hostn port: port)))
+	      (let* ((uconn (run-listener handler-proc port))
+		     (rport (udat-port uconn))) ;; the real port
+		(servdat-host-set! *db-serv-info* hostn)
+		(servdat-port-set! *db-serv-info* rport)
+		(servdat-uconn-set! *db-serv-info* uconn)
+		(wait-and-close uconn)
+		(db:print-current-query-stats)
+		)))
+	(let* ((host (servdat-host *db-serv-info*))
+	       (port (servdat-port *db-serv-info*))
+	       (mode (or (servdat-mode *db-serv-info*)
+			 "non-db")))
+	  ;; server exit stuff here
+	  ;; (rmt:server-shutdown host port) - always do in on-exit
+	  ;; (portlogger:open-run-close portlogger:set-port port "released") ;; moved to on-exit 
+	  (debug:print-info 0 *default-log-port* "Server "host":"port" mode "mode"shutdown complete. Exiting")
+	  ))))
+
+;;======================================================================
+;; S E R V E R   U T I L I T I E S 
+;;======================================================================
+
+
+;;======================================================================
+;; NEW SERVER METHOD
+;;======================================================================
+
+;; only use for main.db - need to re-write some of this :(
+;;
+(define (get-lock-db sdat dbfile host port)
+  (assert host "FATAL: get-lock-db called with host not set.")
+  (assert port "FATAL: get-lock-db called with port not set.")
+  (let* ((dbh (db:open-run-db dbfile db:initialize-db)) ;; open-run-db creates a standard db with schema used by all situations
+	 (res (db:get-iam-server-lock dbh dbfile host port))
+	 (uconn (servdat-uconn sdat)))
+    ;; res => list then already locked, check server is responsive
+    ;;     => #t then sucessfully got the lock
+    ;;     => #f reserved for future use as to indicate something went wrong
+    (match res
+      ((owner_pid owner_host owner_port event_time)
+       (if (server-ready? uconn (conc owner_host":"owner_port) "abc")
+	   #f      ;; locked by someone else
+	   (begin  ;; locked by someone dead and gone
+	     (debug:print 0 *default-log-port* "WARNING: stale lock - have to steal it. This may fail.")
+	     (db:steal-lock-db dbh dbfile port))))
+      (#t  #t) ;; placeholder so that we don't touch res if it is #t
+      (else (set! res #f)))
+    (sqlite3:finalize! dbh)
+    res))
+
+
+(define (register-server pkts-dir pkt-spec host port servkey ipaddr dbpath)
+  (let* ((pkt-dat `((host    . ,host)
+		    (port    . ,port)
+		    (servkey . ,servkey)
+		    (pid     . ,(current-process-id))
+		    (ipaddr  . ,ipaddr)
+		    (dbpath  . ,dbpath)))
+	 (uuid    (write-alist->pkt
+		   pkts-dir
+		   pkt-dat
+		   pktspec: pkt-spec
+		   ptype: 'server)))
+    (debug:print 0 *default-log-port* "Server on "host":"port" registered in pkt "uuid)
+    uuid))
+
+(define (get-pkts-dir #!optional (apath #f))
+  (let* ((effective-toppath (or *toppath* apath)))
+    (assert effective-toppath
+	    "ERROR: get-pkts-dir called without *toppath* set. Exiting.")
+    (let* ((pdir (conc effective-toppath "/.meta/srvpkts")))
+      (if (file-exists? pdir)
+	  pdir
+	  (begin
+	    (handle-exceptions ;; this exception handler should NOT be needed but ...
+		exn
+		pdir
+	      (create-directory pdir #t))
+	    pdir)))))
+
+;; given a pkts dir read 
+;;
+(define (get-all-server-pkts pktsdir-in pktspec)
+  (let* ((pktsdir  (if (file-exists? pktsdir-in)
+		       pktsdir-in
+		       (begin
+			 (create-directory pktsdir-in #t)
+			 pktsdir-in)))
+	 (all-pkt-files (glob (conc pktsdir "/*.pkt"))))
+    (map (lambda (pkt-file)
+	   (read-pkt->alist pkt-file pktspec: pktspec))
+	 all-pkt-files)))
+
+(define (server-address srv-pkt)
+  (conc (alist-ref 'host srv-pkt) ":"
+	(alist-ref 'port srv-pkt)))
+	
+(define (server-ready? uconn host-port key) ;; server-address is host:port
+  (let* ((params `((cmd . ping)(key . ,key)))
+	 (data `((cmd . ping)
+		 (key . ,key)
+		 (params . ,params))) ;; I don't get it.
+	 (res  (send-receive uconn host-port 'ping data)))
+    (if (eq? res 'ack) ;; yep, likely it is who we want on the other end
+	res
+	#f)))
+;; (begin (debug:print-info 0 *default-log-port* "server-ready? => "res) #f))))
+
+; from the pkts return servers associated with dbpath
+;; NOTE: Only one can be alive - have to check on each
+;;       in the list of pkts returned
+;;
+(define (get-viable-servers serv-pkts dbpath)
+  (let loop ((tail serv-pkts)
+	     (res  '()))
+    (if (null? tail)
+	res ;; NOTE: sort by age so oldest is considered first
+	(let* ((spkt (car tail)))
+	  (loop (cdr tail)
+		(if (equal? dbpath (alist-ref 'dbpath spkt))
+		    (cons spkt res)
+		    res))))))
+
+(define (remove-pkts-if-not-alive uconn serv-pkts)
+  (filter (lambda (pkt)
+	    (let* ((host (alist-ref 'host pkt))
+		   (port (alist-ref 'port pkt))
+		   (host-port (conc host":"port))
+		   (key  (alist-ref 'servkey  pkt))
+		   (pktz (alist-ref 'Z        pkt))
+		   (res  (server-ready? uconn host-port key)))
+	      (if res
+		  res
+		  (let* ((pktsdir (get-pkts-dir *toppath*))
+			 (pktpath (conc pktsdir"/"pktz".pkt")))
+		    (debug:print 0 *default-log-port* "WARNING: pkt with no server "pktpath)
+		    (delete-file* pktpath)
+		    #f))))
+	  serv-pkts))
+
+;; from viable servers get one that is alive and ready
+;;
+(define (get-the-server uconn apath serv-pkts)
+  (let loop ((tail serv-pkts))
+    (if (null? tail)
+	#f
+	(let* ((spkt  (car tail))
+	       (host  (alist-ref 'ipaddr spkt))
+	       (port  (alist-ref 'port spkt))
+	       (host-port (conc host":"port))
+	       (dbpth (alist-ref 'dbpath spkt))
+	       (srvkey (alist-ref 'Z spkt)) ;; (alist-ref 'srvkey spkt))
+	       (addr  (server-address spkt)))
+	  (if (server-ready? uconn host-port srvkey)
+	      spkt
+	      (loop (cdr tail)))))))
+
+;; am I the "first" in line server? I.e. my D card is smallest
+;; use Z card as tie breaker
+;;
+(define (get-best-candidate serv-pkts dbpath)
+  (if (null? serv-pkts)
+      #f
+      (let loop ((tail serv-pkts)
+		 (best  (car serv-pkts)))
+	(if (null? tail)
+	    best
+	    (let* ((candidate (car tail))
+		   (candidate-bd (string->number (alist-ref 'D candidate)))
+		   (best-bd      (string->number (alist-ref 'D best)))
+		   ;; bigger number is younger
+		   (candidate-z  (alist-ref 'Z candidate))
+		   (best-z       (alist-ref 'Z best))
+		   (new-best     (cond
+				  ((> best-bd candidate-bd) ;; best is younger than candidate
+				   candidate)
+				  ((< best-bd candidate-bd) ;; candidate is younger than best
+				   best)
+				  (else
+				   (if (string>=? best-z candidate-z)
+				       best
+				       candidate))))) ;; use Z card as tie breaker
+	      (if (null? tail)
+		  new-best
+		  (loop (cdr tail) new-best)))))))
+	  
+
+;;======================================================================
+;; END NEW SERVER METHOD
+;;======================================================================
+
+;; if .db/main.db check the pkts
+;; 
+(define (rmt:wait-for-server pkts-dir db-file server-key)
+  (let* ((sdat *db-serv-info*))
+    (let loop ((start-time (current-seconds))
+	       (changed    #t)
+	       (last-sdat  "not this"))
+      (begin ;; let ((sdat #f))
+	(thread-sleep! 0.01)
+	(debug:print-info 0 *default-log-port* "Waiting for server alive signature")
+	(mutex-lock! *heartbeat-mutex*)
+	(set! sdat *db-serv-info*)
+	(mutex-unlock! *heartbeat-mutex*)
+	(if (and sdat
+		 (not changed)
+		 (> (- (current-seconds) start-time) 2))
+	    (let* ((uconn (servdat-uconn sdat)))
+	      (servdat-status-set! sdat 'iface-stable)
+	      (debug:print-info 0 *default-log-port* "Received server alive signature, now attempting to lock in server")
+	      ;; create a server pkt in *toppath*/.meta/srvpkts
+	      
+	      ;; TODO:
+	      ;;   1. change sdat to stuct
+	      ;;   2. add uuid to struct
+	      ;;   3. update uuid in sdat here
+	      ;;
+	      (servdat-uuid-set! sdat
+				 (register-server
+				  pkts-dir *srvpktspec*
+				  (get-host-name)
+				  (servdat-port sdat) server-key
+				  (servdat-host sdat) db-file))
+	      ;; (set! *my-signature* (servdat-uuid sdat)) ;; replace with Z, no, stick with proper key
+	      ;; now read pkts and see if we are a contender
+	      (let* ((all-pkts     (get-all-server-pkts pkts-dir *srvpktspec*))
+		     (viables      (get-viable-servers all-pkts db-file))
+		     (alive        (remove-pkts-if-not-alive uconn viables))
+		     (best-srv     (get-best-candidate alive db-file))
+		     (best-srv-key (if best-srv (alist-ref 'servkey best-srv) #f))
+		     (i-am-srv     (equal? best-srv-key server-key))
+		     (delete-pkt   (lambda ()
+				     (let* ((pktfile (conc (get-pkts-dir *toppath*)
+							 "/" (servdat-uuid *db-serv-info*)
+							 ".pkt")))
+				       (debug:print-info 0 *default-log-port* "Attempting to remove bogus pkt file "pktfile)
+				       (delete-file* pktfile))))) ;; remove immediately instead of waiting for on-exit
+		(debug:print 0 *default-log-port* "best-srv-key: "best-srv-key", server-key: "server-key", i-am-srv: "i-am-srv)
+		;; am I the best-srv, compare server-keys to know
+		(if i-am-srv
+		    (if (get-lock-db sdat db-file (servdat-host sdat)(servdat-port sdat)) ;; (db:get-iam-server-lock *dbstruct-db* *toppath* run-id)
+			(begin
+			  (debug:print-info 0 *default-log-port* "I'm the server!")
+			  (servdat-dbfile-set! sdat db-file)
+			  (servdat-status-set! sdat 'db-locked))
+			(begin
+			  (debug:print-info 0 *default-log-port* "I'm not the server, exiting.")
+			  (bdat-time-to-exit-set! *bdat* #t)
+			  (delete-pkt)
+			  (thread-sleep! 0.2)
+			  (exit)))
+		    (begin
+		      (debug:print-info 0 *default-log-port*
+				   "Keys do not match "best-srv-key", "server-key", exiting.")
+		      (bdat-time-to-exit-set! *bdat* #t)
+		      (delete-pkt)
+		      (thread-sleep! 0.2)
+		      (exit)))
+		sdat))
+	    (begin ;; sdat not yet contains server info
+	      (debug:print-info 0 *default-log-port* "Still waiting, last-sdat=" last-sdat)
+	      (sleep 4)
+	      (if (> (- (current-seconds) start-time) 120) ;; been waiting for two minutes
+		  (begin
+		    (debug:print-error 0 *default-log-port* "transport appears to have died, exiting server")
+		    (exit))
+		  (loop start-time
+			(equal? sdat last-sdat)
+			sdat))))))))
+
+(define (rmt:register-server sinfo apath iface port server-key dbname)
+  (servdat-conns sinfo) ;; just checking types
+  (rmt:open-main-connection sinfo apath) ;; we need a channel to main.db
+  (rmt:send-receive-real sinfo apath      ;; params: host port servkey pid ipaddr dbpath
+			 (db:run-id->dbname #f)
+			 'register-server `(,iface
+					    ,port
+					    ,server-key
+					    ,(current-process-id)
+					    ,iface
+					    ,apath
+					    ,dbname)))
+
+(define (rmt:get-count-servers sinfo apath)
+  (servdat-conns sinfo) ;; just checking types
+  (rmt:open-main-connection sinfo apath) ;; we need a channel to main.db
+  (rmt:send-receive-real sinfo apath      ;; params: host port servkey pid ipaddr dbpath
+			 (db:run-id->dbname #f)
+			 'get-count-servers `(,apath)))
+
+(define (rmt:get-servers-info apath)
+  (rmt:send-receive 'get-servers-info #f `(,apath)))
+
+(define (rmt:deregister-server db-serv-info apath iface port server-key dbname)
+  (rmt:open-main-connection db-serv-info apath) ;; we need a channel to main.db
+  (rmt:send-receive-real db-serv-info apath      ;; params: host port servkey pid ipaddr dbpath
+                         (db:run-id->dbname #f)
+                         'deregister-server `(,iface
+                                              ,port
+                                              ,server-key
+                                              ,(current-process-id)
+                                              ,iface
+                                              ,apath
+                                              ,dbname)))
+
+(define (rmt:wait-for-stable-interface #!optional (num-tries-allowed 100))
+  ;; wait until *db-serv-info* stops changing
+  (let* ((stime (current-seconds)))
+    (let loop ((last-host  #f)
+	       (last-port  #f)
+	       (tries 0))
+      (let* ((curr-host (and *db-serv-info* (servdat-host *db-serv-info*)))
+	     (curr-port (and *db-serv-info* (servdat-port *db-serv-info*))))
+	;; first we verify port and interface, update *db-serv-info* in need be.
+	(cond
+	 ((> tries num-tries-allowed)
+	  (debug:print 0 *default-log-port* "rmt:keep-running, giving up after trying for several minutes.")
+	  (exit 1))
+	 ((not *db-serv-info*)
+	  (thread-sleep! 0.25)
+	  (loop curr-host curr-port (+ tries 1)))
+	 ((or (not last-host)(not last-port))
+	  (debug:print 0 *default-log-port* "rmt:keep-running, still no interface, tries="tries)
+	  (thread-sleep! 0.25)
+	  (loop curr-host curr-port (+ tries 1)))
+	 ((or (not (equal? last-host curr-host))
+	      (not (equal? last-port curr-port)))
+	  (debug:print-info 0 *default-log-port* "WARNING: interface changed, refreshing iface and port info")
+	  (thread-sleep! 0.25)
+	  (loop curr-host curr-port (+ tries 1)))
+	 ((< (- (current-seconds) stime) 1) ;; keep up the looping until at least 3 seconds have passed
+	  (thread-sleep! 0.5)
+	  (loop curr-host curr-port (+ tries 1)))
+	 (else
+	  (rmt:get-signature) ;; sets *my-signature* as side effect
+	  (servdat-status-set! *db-serv-info* 'interface-stable)
+	  (debug:print 0 *default-log-port*
+		       "SERVER STARTED: " curr-host
+		       ":" curr-port
+		       " AT " (current-seconds) " server signature: " *my-signature*
+		       " with "(servdat-trynum *db-serv-info*)" port changes")
+	  (flush-output *default-log-port*)
+	  #t))))))
+
+;; run rmt:keep-running in a parallel thread to monitor that the db is being 
+;; used and to shutdown after sometime if it is not.
+;;
+(define (rmt:keep-running dbname) 
+  ;; if none running or if > 20 seconds since 
+  ;; server last used then start shutdown
+  ;; This thread waits for the server to come alive
+  (debug:print-info 0 *default-log-port* "Starting the sync-back, keep alive thread in server")
+
+  (let* ((sinfo             *db-serv-info*)
+	 (server-start-time (current-seconds))
+	 (pkts-dir          (get-pkts-dir))
+	 (server-key        (rmt:get-signature)) ;; This servers key
+	 (is-main           (equal? (args:get-arg "-db") ".db/main.db"))
+	 (last-access       0)
+	 (server-timeout    (server:expiration-timeout))
+	 (shutdown-server-sequence (lambda (host port)
+				     (set! *unclean-shutdown* #f) ;; Should not be needed anymore
+				     (debug:print-info 0 *default-log-port* "Starting to shutdown the server. pid="(current-process-id))
+				     ;; (rmt:server-shutdown host port) -- called in on-exit
+				     ;; (portlogger:open-run-close portlogger:set-port port "released") called in on-exit
+				     (exit)))
+	 (timed-out?        (lambda ()
+			      (<= (+ last-access server-timeout)
+				  (current-seconds)))))
+    (servdat-dbfile-set! *db-serv-info* (args:get-arg "-db"))
+    ;; main and run db servers have both got wait logic (could/should merge it)
+    (if is-main
+	(rmt:wait-for-server pkts-dir dbname server-key)
+	(rmt:wait-for-stable-interface))
+    ;; this is our forever loop
+    (let* ((iface (servdat-host *db-serv-info*))
+	   (port  (servdat-port *db-serv-info*))
+	   (uconn (servdat-uconn *db-serv-info*)))
+      (let loop ((count          0)
+		 (bad-sync-count 0)
+		 (start-time     (current-milliseconds)))
+	(if (and (not is-main)
+		 (common:low-noise-print 60 "servdat-status"))
+	    (debug:print-info 0 *default-log-port* "servdat-status is " (servdat-status *db-serv-info*)))
+
+	(mutex-lock! *heartbeat-mutex*)
+	;; set up the database handle
+	(if (not *dbstruct-db*) ;; no db opened yet, open the db and register with main if appropriate
+	      (let ((watchdog (bdat-watchdog *bdat*)))
+		(debug:print 0 *default-log-port* "SERVER: dbprep")
+		(db:setup dbname) ;; sets *dbstruct-db* as side effect
+		(servdat-status-set! *db-serv-info* 'db-opened)
+		;; IFF I'm not main, call into main and register self
+		(if (not is-main)
+		    (let ((res (rmt:register-server sinfo
+						    *toppath* iface port
+						    server-key dbname)))
+		      (if res ;; we are the server
+			  (servdat-status-set! *db-serv-info* 'have-interface-and-db)
+			  ;; now check that the db locker is alive, clear it out if not
+			  (let* ((serv-info (rmt:server-info *toppath* dbname)))
+			    (match serv-info
+				   ((host port servkey pid ipaddr apath dbpath)
+				    (if (not (server-ready? uconn (conc host":"port) servkey))
+					(begin
+					  (debug:print-info 0 *default-log-port* "Server registered but not alive. Removing and trying again.")
+					  (rmt:deregister-server sinfo apath host port servkey dbpath) ;; servkey pid ipaddr apath dbpath)
+					  (loop (+ count 1) bad-sync-count start-time))))
+				   (else
+				    (debug:print 0 *default-log-port* "We are not the server for "dbname", exiting. Server info is: "serv-info)
+				    (exit)))))))
+		(debug:print 0 *default-log-port*
+			     "SERVER: running, db "dbname" opened, megatest version: "
+			   (common:get-full-version))
+	      ;; start the watchdog
+
+	      ;; is this really needed?
+	      
+	      #;(if watchdog
+		  (if (not (member (thread-state watchdog)
+				   '(ready running blocked
+					   sleeping dead)))
+		      (begin
+			(debug:print-info 0 *default-log-port* "Starting watchdog thread (in state "(thread-state watchdog)")")
+			(thread-start! watchdog))
+		      (debug:print-info 0 *default-log-port* "Not starting watchdog thread (in state "(thread-state watchdog)")"))
+		  (debug:print 0 *default-log-port* "ERROR: *watchdog* not setup, cannot start it."))
+	      #;(loop (+ count 1) bad-sync-count start-time)
+	      ))
+	
+	(db:sync-inmem->disk *dbstruct-db* *toppath* dbname force-sync: #t)
+	
+	(mutex-unlock! *heartbeat-mutex*)
+	
+	;; when things go wrong we don't want to be doing the various
+	;; queries too often so we strive to run this stuff only every
+	;; four seconds or so.
+	(let* ((sync-time (- (current-milliseconds) start-time))
+	       (rem-time  (quotient (- 4000 sync-time) 1000)))
+	  (if (and (<= rem-time 4)
+		   (>  rem-time 0))
+	      (thread-sleep! rem-time)))
+    
+	;; Transfer *db-last-access* to last-access to use in checking that we are still alive
+	(set! last-access *db-last-access*)
+	
+	(if (< count 1) ;; 3x3 = 9 secs aprox
+	    (loop (+ count 1) bad-sync-count (current-milliseconds)))
+	
+	(if (common:low-noise-print 60 "dbstats")
+	    (begin
+	      (debug:print 0 *default-log-port* "Server stats:")
+	      (db:print-current-query-stats)))
+	(let* ((hrs-since-start  (/ (- (current-seconds) server-start-time) 3600)))
+	  (cond
+	   ((not *server-run*)
+	    (debug:print-info 0 *default-log-port* "*server-run* set to #f. Shutting down.")
+	    (shutdown-server-sequence (get-host-name) port))
+	   ((timed-out?)
+	    (debug:print-info 0 *default-log-port* "Server timed out. seconds since last db access: " (- (current-seconds) last-access))
+	    (shutdown-server-sequence (get-host-name) port))
+	   ((and *server-run*
+		 (or (not (timed-out?))
+		     (if is-main ;; do not exit if there are other servers (keep main open until all others gone)
+			 (> (rmt:get-count-servers sinfo *toppath*) 1)
+			 #f)))
+	    (if (common:low-noise-print 120 "server continuing")
+		(debug:print-info 0 *default-log-port* "Server continuing, seconds since last db access: " (- (current-seconds) last-access)))
+	    (loop 0 bad-sync-count (current-milliseconds)))
+	   (else
+	    (set! *unclean-shutdown* #f)
+	    (debug:print-info 0 *default-log-port* "Server timed out. seconds since last db access: " (- (current-seconds) last-access))
+	    (shutdown-server-sequence (get-host-name) port)
+	    #;(debug:print-info 0 *default-log-port* "Sending 'quit to server, received: "
+			      (open-send-receive-nn (conc iface":"port)      ;; do this here and not in server-shutdown
+						    (sexpr->string 'quit))))))))))
+
+(define (rmt:get-reasonable-hostname)
+  (let* ((inhost (or (args:get-arg "-server") "-")))
+    (if (equal? inhost "-")
+	(get-host-name)
+	inhost)))
+
+;; Call this to start the actual server
+;;
+;; all routes though here end in exit ...
+;;
+;; This is the point at which servers are started
+;;
+(define (rmt:server-launch dbname)
+  (debug:print-info 0 *default-log-port* "Entered rmt:server-launch")
+  (let* ((th2 (make-thread (lambda ()
+			     (debug:print-info 0 *default-log-port* "Server run thread started")
+			     (rmt:run (rmt:get-reasonable-hostname)))
+			   "Server run"))
+	 (th3 (make-thread (lambda ()
+			     (debug:print-info 0 *default-log-port* "Server monitor thread started")
+			     (if (args:get-arg "-server")
+				 (rmt:keep-running dbname)))
+			     "Keep running")))
+    (thread-start! th2)
+    (thread-sleep! 0.252) ;; give the server time to settle before starting the keep-running monitor.
+    (thread-start! th3)
+    (set! *didsomething* #t)
+    (thread-join! th2)
+    (thread-join! th3))
+  #f)
+	    
+;;======================================================================
+;;  S E R V E R   -  D I R E C T   C A L L S
+;;======================================================================
+
+(define (rmt:kill-server run-id)
+  (rmt:send-receive 'kill-server #f (list run-id)))
+
+(define (rmt:start-server run-id)
+  (rmt:send-receive 'start-server #f (list run-id)))
+
+(define (rmt:server-info apath dbname)
+  (rmt:send-receive 'get-server-info #f (list apath dbname)))
+
+;;======================================================================
+;; Nanomsg transport
+;;======================================================================
+
+#;(define (is-port-in-use port-num)
+  (let* ((ret #f))
+    (let-values (((inp oup pid)
+		  (process "netstat" (list  "-tulpn" ))))
+      (let loop ((inl (read-line inp)))
+        (if (not (eof-object? inl))
+            (begin 
+	      (if (string-search (regexp (conc ":" port-num)) inl)
+		  (begin
+					;(print "Output: "  inl)
+		    (set! ret  #t))
+		  (loop (read-line inp)))))))
+    ret))
+
+#;(define (open-nn-connection host-port)
+  (let ((req  (make-req-socket))
+        (uri  (conc "tcp://" host-port)))
+    (nng-dial req uri)
+    (socket-set! req 'nng/recvtimeo 2000)
+    req))
+
+#;(define (send-receive-nn req msg)
+  (nng-send req msg)
+  (nng-recv req))
+
+#;(define (close-nn-connection req)
+  (nng-close! req))
+  
+;; ;; open connection to server, send message, close connection
+;; ;;
+;; (define (open-send-close-nn host-port msg #!key (timeout 3) ) ;; default timeout is 3 seconds
+;;   (let ((req  (make-req-socket 'req))
+;;         (uri  (conc "tcp://" host-port))
+;;         (res  #f)
+;;         ;; (contacts (alist-ref 'contact attrib))
+;;         ;; (mode (alist-ref 'mode attrib))
+;; 	)
+;;     (socket-set! req 'nng/recvtimeo 2000)
+;;     (handle-exceptions
+;;      exn
+;;      (let ((emsg ((condition-property-accessor 'exn 'message) exn)))
+;;        ;; Send notification       
+;;        (debug:print 0 *default-log-port* "ERROR: Failed to connect / send to " uri " message was \"" emsg "\"" )
+;;        #f)
+;;      (nng-dial req uri)
+;;      ;; (print "Connected to the server " )
+;;      (nng-send req msg)
+;;      ;; (print "Request Sent")  
+;;      (let* ((th1  (make-thread (lambda ()
+;;                                  (let ((resp (nng-recv req)))
+;;                                    (nng-close! req)
+;;                                    (set! res (if (equal? resp "ok")
+;;                                                  #t
+;;                                                  #f))))
+;;                                "recv thread"))
+;;             (th2 (make-thread (lambda ()
+;;                                 (thread-sleep! timeout)
+;;                                 (thread-terminate! th1))
+;; 			      "timer thread")))
+;;        (thread-start! th1)
+;;        (thread-start! th2)
+;;        (thread-join! th1)
+;;        res))))
+;; 
+#;(define (open-send-receive-nn host-port msg #!key (timeout 3) ) ;; default timeout is 3 seconds
+  (let ((req  (make-req-socket))
+        (uri  (conc "tcp://" host-port))
+        (res  #f)) 
+    (handle-exceptions
+     exn
+     (let ((emsg ((condition-property-accessor 'exn 'message) exn)))
+       ;; Send notification      
+       (debug:print 0 *default-log-port* "ERROR: Failed to connect / send to " uri " message was \"" emsg "\", exn=" exn)
+       #f)
+     (nng-dial req uri)
+     (nng-send req msg)
+     (let* ((th1  (make-thread (lambda ()
+                                 (let ((resp (nng-recv req)))
+                                   (nng-close! req)
+                                   ;; (print resp)
+                                   (set! res resp)))
+                               "recv thread"))
+            (th2 (make-thread (lambda ()
+                                (thread-sleep! timeout)
+                                (thread-terminate! th1))
+                             "timer thread")))
+       (thread-start! th1)
+       (thread-start! th2)
+       (thread-join! th1)
+       res))))
+
+;;======================================================================
+;; S E R V E R   U T I L I T I E S 
+;;======================================================================
+
+;; run ping in separate process, safest way in some cases
+;;
+#;(define (server:ping-server ifaceport)
+  (with-input-from-pipe 
+   (conc (common:get-megatest-exe) " -ping " ifaceport)
+   (lambda ()
+     (let loop ((inl (read-line))
+		(res "NOREPLY"))
+       (if (eof-object? inl)
+	   (case (string->symbol res)
+	     ((NOREPLY)  #f)
+	     ((LOGIN_OK) #t)
+	     (else       #f))
+	   (loop (read-line) inl))))))
+
+;; NOT USED (well, ok, reference in rpc-transport but otherwise not used).
+;;
+#;(define (server:login toppath)
+  (lambda (toppath)
+    (set! *db-last-access* (current-seconds)) ;; might not be needed.
+    (if (equal? *toppath* toppath)
+	#t
+	#f)))
+
+;; (define server:sync-lock-token "SERVER_SYNC_LOCK")
+;; (define (server:release-sync-lock)
+;;   (db:no-sync-del! *no-sync-db* server:sync-lock-token))
+;; (define (server:have-sync-lock?)
+;;   (let* ((have-lock-pair (db:no-sync-get-lock *no-sync-db* server:sync-lock-token))
+;;          (have-lock?     (car have-lock-pair))
+;;          (lock-time      (cdr have-lock-pair))
+;;          (lock-age       (- (current-seconds) lock-time)))
+;;     (cond
+;;      (have-lock? #t)
+;;      ((>lock-age
+;;        (* 3 (configf:lookup-number *configdat* "server" "minimum-intersync-delay" default: 180)))
+;;       (server:release-sync-lock)
+;;       (server:have-sync-lock?))
+;;      (else #f))))
+
+)