Index: Makefile
==================================================================
--- Makefile
+++ Makefile
@@ -105,10 +105,14 @@
 	chmod a+x $@
 
 $(PREFIX)/bin/nbfind : utils/nbfind
 	$(INSTALL) $< $@
 	chmod a+x $@
+
+$(PREFIX)/bin/nbload : utils/nbload
+	$(INSTALL) $< $@
+	chmod a+x $@
 
 $(PREFIX)/bin/refdb : refdb
 	$(INSTALL) $< $@
 	chmod a+x $@
 
@@ -126,11 +130,11 @@
 	$(INSTALL) dboard $(PREFIX)/bin/dboard
 	utils/mk_wrapper $(PREFIX) dboard > $(PREFIX)/bin/dashboard
 	chmod a+x $(PREFIX)/bin/dashboard
 
 install : bin $(PREFIX)/bin/mtest $(PREFIX)/bin/megatest $(PREFIX)/bin/dboard $(PREFIX)/bin/dashboard $(HELPERS) $(PREFIX)/bin/nbfake \
-          $(PREFIX)/bin/nbfind $(PREFIX)/bin/newdboard $(PREFIX)/bin/refdb $(PREFIX)/bin/mt_xterm $(PREFIX)/bin/revtagfsl
+          $(PREFIX)/bin/nbfind $(PREFIX)/bin/nbload $(PREFIX)/bin/newdboard $(PREFIX)/bin/refdb $(PREFIX)/bin/mt_xterm $(PREFIX)/bin/revtagfsl
 
 deploytarg/apropos.so : Makefile
 	for i in apropos base64 canvas-draw csv-xml directory-utils dot-locking extras fmt format hostinfo http-client intarweb json md5 message-digest posix posix-extras readline regex regex-case s11n spiffy spiffy-request-vars sqlite3 srfi-1 srfi-18 srfi-69 tcp test uri-common check-errors synch matchable sql-null tcp-server rpc blob-utils string-utils variable-item defstruct uri-generic sendfile opensll openssl lookup-table list-utils stack; do \
 	chicken-install -prefix deploytarg -deploy $$i;done
 

Index: TODO
==================================================================
--- TODO
+++ TODO
@@ -1,4 +1,12 @@
 
-1. Confirm that branch transaction-for-sequential-writes content was added to trunk/development
-2. Add a host chooser for ssh to launch-tests
-3. Try making static executable
+TODO
+====
+
+Migration to inmem db plus per run db
+-------------------------------------
+
+. Re-work the dbstruct data structure?
+.. Move main.db to global?
+.. [ run-id.db inmemdb last-mod last-read last-sync inuse ]
+. Re-work all queries to use run-id to dereference server
+. Open main.db directly in calls to -runtests etc. No need to talk remote?

Index: api.scm
==================================================================
--- api.scm
+++ api.scm
@@ -71,27 +71,27 @@
 					  (realparams (cddr params)))
 				      (db:with-db dbstruct run-id #t ;; these are all for modifying the db
 						  (lambda (db)
 						    (db:general-call db stmtname realparams)))))
     ((sync-inmem->db)               (db:sync-touched dbstruct force-sync: #t))
-    ((kill-server)
-     (db:sync-tables (db:tbls *inmemdb*) *inmemdb* *db*)  ;; (db:sync-to *inmemdb* *db*)
-     (let ((hostname (car  *runremote*))
-	   (port     (cadr *runremote*))
-	   (pid      (if (null? params) #f (car params)))
-	   (th1      (make-thread (lambda ()(thread-sleep! 3)(debug:print 0 "Server exiting!")(exit 0)) "Server exit thread")))
-       (debug:print 0 "WARNING: Server on " hostname ":" port " going down by user request!")
-       (debug:print-info 1 "current pid=" (current-process-id))
-       (open-run-close tasks:server-deregister tasks:open-db 
-		       hostname
-		       port: port)
-       (set! *server-run* #f)
-       (thread-sleep! 3)
-       (if pid 
-	   (process-signal pid signal/kill)
-	   (thread-start! th1))
-       '(#t "exit process started")))
+    ;; ((kill-server)
+    ;;  (db:sync-tables (db:tbls *inmemdb*) *inmemdb* *db*)  ;; (db:sync-to *inmemdb* *db*)
+    ;;  (let ((hostname (car  *runremote*))
+    ;;        (port     (cadr *runremote*))
+    ;;        (pid      (if (null? params) #f (car params)))
+    ;;        (th1      (make-thread (lambda ()(thread-sleep! 3)(debug:print 0 "Server exiting!")(exit 0)) "Server exit thread")))
+    ;;    (debug:print 0 "WARNING: Server on " hostname ":" port " going down by user request!")
+    ;;    (debug:print-info 1 "current pid=" (current-process-id))
+    ;;    (open-run-close tasks:server-deregister tasks:open-db 
+    ;;     	       hostname
+    ;;     	       port: port)
+    ;;    (set! *server-run* #f)
+    ;;    (thread-sleep! 3)
+    ;;    (if pid 
+    ;;        (process-signal pid signal/kill)
+    ;;        (thread-start! th1))
+    ;;    '(#t "exit process started")))
     ((sdb-qry)                      (apply sdb:qry params))
 
     ;; TESTMETA
     ((testmeta-get-record)       (apply db:testmeta-get-record dbstruct params))
     ((testmeta-add-record)       (apply db:testmeta-add-record dbstruct params))

Index: client.scm
==================================================================
--- client.scm
+++ client.scm
@@ -50,43 +50,52 @@
 ;;   1. We are a test manager and we received *transport-type* and *runremote* via cmdline
 ;;   2. We are a run tests, list runs or other interactive process and we must figure out
 ;;      *transport-type* and *runremote* from the monitor.db
 ;;
 ;; client:setup
-(define (client:setup #!key (numtries 3))
+(define (client:setup run-id #!key (numtries 3))
   (if (not *toppath*)
       (if (not (setup-for-run))
 	  (begin
 	    (debug:print 0 "ERROR: failed to find megatest.config, exiting")
 	    (exit))))
-  (push-directory *toppath*) ;; This is probably NOT needed 
+  ;; (push-directory *toppath*) ;; This is probably NOT needed 
   ;; clients get the sdb:qry proc created here
   ;; (if (not sdb:qry)
   ;;     (begin
   ;;       (set! sdb:qry (make-sdb:qry (conc *toppath* "/db/strings.db"))) ;; we open the normalization helpers here
   ;;       (sdb:qry 'setup #f)))
-
-  (debug:print-info 11 "*transport-type* is " *transport-type* ", *runremote* is " *runremote*)
-  (let* ((hostinfo  (open-run-close tasks:get-best-server tasks:open-db)))
-    (debug:print-info 11 "CLIENT SETUP, hostinfo=" hostinfo)
-    (set! *transport-type* (if hostinfo 
-    			       (string->symbol (tasks:hostinfo-get-transport hostinfo))
-			       'fs))
-    (debug:print-info 11 "Using transport type of " *transport-type* (if hostinfo (conc " to connect to " hostinfo) ""))
-    (case *transport-type* 
-      ;; ((fs)(if (not *megatest-db*)(set! *megatest-db* (open-db))))
-      ((http)
-       (http-transport:client-connect (tasks:hostinfo-get-interface hostinfo)
-				      (tasks:hostinfo-get-port hostinfo)))
-      ((zmq)
-       (zmq-transport:client-connect (tasks:hostinfo-get-interface hostinfo)
-				     (tasks:hostinfo-get-port      hostinfo)
-				     (tasks:hostinfo-get-pubport   hostinfo)))
-      (else  ;; default to fs
-       (debug:print 0 "ERROR: unrecognised transport type " *transport-type* " exiting now.")
-       (exit)))
-    (pop-directory)))
+  (let ((hostinfo (and run-id (hash-table-ref/default *runremote* run-id #f))))
+    (debug:print-info 11 "for run-id=" run-id ", *transport-type* is " *transport-type*)
+    (if hostinfo
+	hostinfo ;; have hostinfo - just return it
+	(let* ((hostinfo  (open-run-close tasks:get-server tasks:open-db run-id))
+	       (transport (if hostinfo 
+			      (string->symbol (tasks:hostinfo-get-transport hostinfo))
+			      'http)))
+	  (if (not hostinfo)
+	      (begin
+		(debug:print 0 "ERROR: Expected to be able to connect to a server by now. No server available for run-id = " run-id)
+		(exit 1)))
+	  (hash-table-set! *runremote* run-id hostinfo)
+	  (debug:print-info 11 "CLIENT SETUP, hostinfo=" hostinfo)
+	  (debug:print-info 11 "Using transport type of " transport (if hostinfo (conc " to connect to " hostinfo) ""))
+	  (case *transport-type* 
+	    ;; ((fs)(if (not *megatest-db*)(set! *megatest-db* (open-db))))
+	    ((http)
+	     ;; this saves the hostinfo in the *runremote* hash and returns it
+	     (http-transport:client-connect run-id 
+					    (tasks:hostinfo-get-interface hostinfo)
+					    (tasks:hostinfo-get-port hostinfo)))
+	    ((zmq)
+	     (zmq-transport:client-connect (tasks:hostinfo-get-interface hostinfo)
+					   (tasks:hostinfo-get-port      hostinfo)
+					   (tasks:hostinfo-get-pubport   hostinfo)))
+	    (else  ;; default to fs
+	     (debug:print 0 "ERROR: unrecognised transport type " *transport-type* " exiting now.")
+	     (exit)))))))
+;;	  (pop-directory)))
 
 ;; client:signal-handler
 (define (client:signal-handler signum)
   (handle-exceptions
    exn
@@ -103,13 +112,16 @@
      (thread-start! th2)
      (thread-start! th1)
      (thread-join! th2))))
 
 ;; client:launch
-(define (client:launch)
+;; Need to set the signal handler somewhere other than here as this
+;; routine will go away.
+;;
+(define (client:launch run-id)
   (set-signal-handler! signal/int client:signal-handler)
-   (if (client:setup)
-       (debug:print-info 2 "connected as client")
-       (begin
-	 (debug:print 0 "ERROR: Failed to connect as client")
-	 (exit))))
+  (if (client:setup run-id)
+      (debug:print-info 2 "connected as client")
+      (begin
+	(debug:print 0 "ERROR: Failed to connect as client")
+	(exit))))
 

Index: common.scm
==================================================================
--- common.scm
+++ common.scm
@@ -43,14 +43,14 @@
 ;; DATABASE
 (define *open-dbs* (vector #f (make-hash-table))) ;; megatestdb run-id-dbs
 
 ;; SERVER
 (define *my-client-signature* #f)
-(define *transport-type*    'fs)
+(define *transport-type*    'http)
 (define *megatest-db*       #f)
 (define *rpc:listener*      #f) ;; if set up for server communication this will hold the tcp port
-(define *runremote*         #f) ;; if set up for server communication this will hold <host port>
+(define *runremote*         (make-hash-table)) ;; if set up for server communication this will hold <host port>
 (define *last-db-access*    (current-seconds))  ;; update when db is accessed via server
 (define *max-cache-size*    0)
 (define *logged-in-clients* (make-hash-table))
 (define *client-non-blocking-mode* #f)
 (define *server-id*         #f)
@@ -59,10 +59,11 @@
 (define *received-response* #f)
 (define *default-numtries*  10)
 (define *server-run*        #t)
 (define *db-write-access*   #t)
 (define *inmemdb*           #f)
+(define *run-id*            #f)
 
 (define *target*            (make-hash-table)) ;; cache the target here; target is keyval1/keyval2/.../keyvalN
 (define *keys*              (make-hash-table)) ;; cache the keys here
 (define *keyvals*           (make-hash-table))
 (define *toptest-paths*     (make-hash-table)) ;; cache toptest path settings here
@@ -346,11 +347,11 @@
 	"unknown"
 	(caar uname-res))))
 	      
 (define (save-environment-as-files fname #!key (ignorevars (list "DISPLAY" "LS_COLORS" "XKEYSYMDB" "EDITOR")))
   (let ((envvars (get-environment-variables))
-        (whitesp (regexp "[^a-zA-Z0-9_\\-:;,.\\/%$]")))
+        (whitesp (regexp "[^a-zA-Z0-9_\\-:,.\\/%$]")))
      (with-output-to-file (conc fname ".csh")
        (lambda ()
           (for-each (lambda (key)
 		      (if (not (member key ignorevars))
 			  (let* ((val (cdr key))

Index: dashboard-tests.scm
==================================================================
--- dashboard-tests.scm
+++ dashboard-tests.scm
@@ -76,12 +76,18 @@
 	      lbl)
 	    (store-label "testcomment"
 			 (iup:label "TestComment                             "
 				    #:expand "HORIZONTAL")
 			 (lambda (testdat)
-			   ;; (sdb:qry 'getstr 
-			   (db:test-get-comment testdat))) ;; )
+			   (let ((newcomment (db:test-get-comment testdat)))
+			     (if *dashboard-comment-share-slot*
+				 (if (not (equal? (iup:attribute *dashboard-comment-share-slot* "VALUE")
+						  newcomment))
+				     (iup:attribute-set! *dashboard-comment-slot*
+							 "VALUE"
+							 newcomment)))
+			     newcomment)))
 	    (store-label "testid"
 			 (iup:label "TestId                             "
 				    #:expand "HORIZONTAL")
 			 (lambda (testdat)
 			   (db:test-get-id testdat)))
@@ -140,11 +146,11 @@
 ;;======================================================================
 (define (run-info-panel db keydat testdat runname)
   (let* ((run-id     (db:test-get-run_id testdat))
 	 (rundat     (db:get-run-info db run-id))
 	 (header     (db:get-header rundat))
-	 (event_time (db:get-value-by-header (db:get-row rundat)
+	 (event_time (db:get-value-by-header (db:get-rows rundat)
 					     (db:get-header rundat)
 					     "event_time")))
     (iup:frame 
      #:title "Megatest Run Info" ; #:expand "YES"
      (iup:hbox ; #:expand "YES"
@@ -216,28 +222,34 @@
 	 (color  (car (gutils:get-color-for-state-status state status))))
     ((vector-ref *state-status* 0) state color)
     ((vector-ref *state-status* 1) status color)))
 
 (define *dashboard-test-db* #t)
+(define *dashboard-comment-share-slot* #f)
 
 ;;======================================================================
 ;; Set fields 
 ;;======================================================================
 (define (set-fields-panel dbstruct run-id test-id testdat #!key (db #f))
   (let ((newcomment #f)
 	(newstatus  #f)
-	(newstate   #f))
+	(newstate   #f)
+	(wtxtbox    #f))
     (iup:frame
      #:title "Set fields"
      (iup:vbox
       (iup:hbox (iup:label "Comment:")
-		(iup:textbox #:action (lambda (val a b)
+		(let ((txtbox (iup:textbox #:action (lambda (val a b)
 					(rmt:test-set-state-status-by-id run-id test-id #f #f b)
-					;; IDEA: Just set a variable with the proc to call?
-					(set! newcomment b))
-			     #:value (db:test-get-comment testdat)
-			     #:expand "HORIZONTAL"))
+						      ;; IDEA: Just set a variable with the proc to call?
+						      (open-run-close db:test-set-state-status-by-id db test-id #f #f b)
+						      (set! newcomment b))
+					   #:value (db:test-get-comment testdat)
+					   #:expand "HORIZONTAL")))
+		  (set! wtxtbox txtbox)
+		  txtbox))
+		  
       (apply iup:hbox
 	     (iup:label "STATE:" #:size "30x")
 	     (let* ((btns  (map (lambda (state)
 				  (let ((btn (iup:button state
 							 #:expand "HORIZONTAL" #:size "50x" #:font "Courier New, -10"
@@ -262,12 +274,20 @@
 				  (let ((btn (iup:button status
 							 #:expand "HORIZONTAL" #:size "50x" #:font "Courier New, -10"
 							 #:action (lambda (x)
 								    (let ((t (iup:attribute x "TITLE")))
 								      (if (equal? t "WAIVED")
-									  (iup:show (dashboard-tests:waiver testdat (lambda (c)
-														      (set! newcomment c))))
+									  (iup:show (dashboard-tests:waiver testdat 
+													    (if wtxtbox (iup:attribute wtxtbox "VALUE") #f)
+													    (lambda (c)
+													      (set! newcomment c)
+													      (if wtxtbox 
+														  (begin
+														    (iup:attribute-set! wtxtbox "VALUE" c)
+														    (if (not *dashboard-comment-share-slot*)
+															(set! *dashboard-comment-share-slot* wtxtbox)))
+														  ))))
 									  (begin
 									    (open-run-close db:test-set-state-status-by-id db test-id #f status #f)
 									    (db:test-set-status! testdat status))))))))
 				    btn))
 				(map cadr *common:std-statuses*)))) ;; (list  "PASS" "WARN" "FAIL" "CHECK" "n/a" "WAIVED" "SKIP"))))
@@ -314,21 +334,21 @@
     ;;     	#:expand "HORIZONTAL"
     ;;     	#:action (lambda (obj)
     ;;     		   (print "Refresh test data " stepname))
     )))
 
-(define (dashboard-tests:waiver testdat cmtcmd)
+(define (dashboard-tests:waiver testdat ovrdval cmtcmd)
   (let* ((wpatt (configf:lookup *configdat* "setup" "waivercommentpatt"))
 	 (wregx (if (string? wpatt)(regexp wpatt) #f))
 	 (wmesg (iup:label (if wpatt (conc "Comment must match pattern " wpatt) "")))
 	 (comnt (iup:textbox #:action (lambda (val a b)
 					(if wpatt
 					    (if (string-match wregx b)
 						(iup:attribute-set! wmesg "TITLE" (conc "Comment matches " wpatt))
 						(iup:attribute-set! wmesg "TITLE" (conc "Comment does not match " wpatt))
 						)))
-			     #:value (db:test-get-comment testdat)
+			     #:value (if ovrdval ovrdval (db:test-get-comment testdat))
 			     #:expand "HORIZONTAL"))
 	 (dlog  #f))
     (set! dlog (iup:dialog ;; #:close_cb (lambda (a)(exit)) ; #:expand "YES"
 		#:title "SET WAIVER"
 		(iup:vbox ; #:expand "YES"
@@ -462,11 +482,11 @@
 	  (debug:print 2 "ERROR: No test data found for test " test-id ", exiting")
 	  (exit 1))
 	(let* (;; (run-id        (if testdat (db:test-get-run_id testdat) #f))
 	       (keydat        (if testdat (db:get-key-val-pairs dbstruct run-id) #f))
 	       (rundat        (if testdat (db:get-run-info dbstruct run-id) #f))
-	       (runname       (if testdat (db:get-value-by-header (db:get-row rundat)
+	       (runname       (if testdat (db:get-value-by-header (db:get-rows rundat)
 								  (db:get-header rundat)
 								  "runname") #f))
 	       (tdb           (tdb:open-test-db-by-test-id-local dbstruct run-id test-id))
 	       ;; These next two are intentional bad values to ensure errors if they should not
 	       ;; get filled in properly.

Index: db.scm
==================================================================
--- db.scm
+++ db.scm
@@ -117,10 +117,11 @@
 	(let* ((local        (dbr:dbstruct-get-local dbstruct))
 	       (toppath      (dbr:dbstruct-get-path dbstruct))
 	       (dbpath       (conc toppath "/db/" run-id ".db"))
 	       (dbexists     (file-exists? dbpath))
 	       (inmem        (if local #f (db:open-inmem-db)))
+	       (refdb        (if local #f (db:open-inmem-db)))
 	       (db           (sqlite3:open-database dbpath))
 	       (write-access (file-write-access? dbpath))
 	       (handler      (make-busy-timeout 136000)))
 	  (if (and dbexists (not write-access))
 	      (set! *db-write-access* #f)) ;; only unset so other db's also can use this control
@@ -140,10 +141,12 @@
 		(dbr:dbstruct-set-runvec-val! dbstruct run-id 'inmem db) ;; direct access ...
 		db)
 	      (begin
 		(dbr:dbstruct-set-runvec-val! dbstruct run-id 'inmem inmem)
 		(db:sync-tables db:sync-tests-only db inmem)
+		(dbr:dbstruct-set-runvec-val! dbstruct run-id 'refdb refdb)
+		(db:sync-tables db:sync-tests-only db refdb)
 		inmem))))))
 
 ;; This routine creates the db. It is only called if the db is not already opened
 ;;
 (define (db:open-main dbstruct) ;;  (conc *toppath* "/megatest.db") (car *configinfo*)))
@@ -170,23 +173,13 @@
 	  (dbr:dbstruct-set-main! dbstruct db)
 	  db))))
 
 ;; Make the dbstruct, setup up auxillary db's and call for main db at least once
 ;;
-(define (db:setup #!key (local #f))
+(define (db:setup run-id #!key (local #f))
   (let ((dbstruct (make-dbr:dbstruct path: *toppath* local: local)))
     (db:get-db dbstruct #f) ;; force one call to main
-    ;; (if (not sdb:qry)
-    ;;     (begin
-    ;;       (set! sdb:qry (make-sdb:qry (conc *toppath* "/db/strings.db"))) ;; we open the normalization helpers here
-    ;;       (sdb:qry 'setup #f)
-    ;;       ;; Initialize with some known needed strings, NOTE: set this up to only execute on first db initialization
-    ;;       (for-each
-    ;;        (lambda (str)
-    ;;          (sdb:qry 'get-id str))
-    ;;        (list "" "logs/final.log"))))
-    ;; (set! *fdb*   (filedb:open-db (conc *toppath* "/db/paths.db")))
     dbstruct))
 
 ;; Open the classic megatest.db file in toppath
 ;;
 (define (db:open-megatest-db)
@@ -772,21 +765,25 @@
 		       (db:get-db dbstruct #f)
 		       "SELECT fieldname FROM keys ORDER BY id DESC;")))
 	(set! *db-keys* res)
 	res)))
 
-;; 
+;; look up values in a header/data structure
 (define (db:get-value-by-header row header field)
-  (debug:print-info 4 "db:get-value-by-header row: " row " header: " header " field: " field)
   (if (null? header) #f
       (let loop ((hed (car header))
 		 (tal (cdr header))
 		 (n   0))
 	(if (equal? hed field)
 	    (vector-ref row n)
 	    (if (null? tal) #f (loop (car tal)(cdr tal)(+ n 1)))))))
 
+;; Accessors for the header/data structure
+;; get rows and header from 
+(define (db:get-header vec)(vector-ref vec 0))
+(define (db:get-rows   vec)(vector-ref vec 1))
+
 ;;======================================================================
 ;;  R U N S
 ;;======================================================================
 
 (define (db:get-run-name-from-id dbstruct run-id)
@@ -828,13 +825,14 @@
 				 '("")
 				 patts))
 			comparator)))
 
 
-;; register a test run with the db
+;; register a test run with the db, this accesses the main.db and does NOT
+;; use server api
+;;
 (define (db:register-run dbstruct keyvals runname state status user)
-  (debug:print 3 "runs:register-run runname: " runname " state: " state " status: " status " user: " user)
   (let* ((db        (db:get-db dbstruct #f))
 	 (keys      (map car keyvals))
 	 (keystr    (keys->keystr keys))	 
 	 (comma     (if (> (length keys) 0) "," ""))
 	 (andstr    (if (> (length keys) 0) " AND " ""))
@@ -975,11 +973,11 @@
 
 ;; db:get-runs-by-patt
 ;; get runs by list of criteria
 ;; register a test run with the db
 ;;
-;; Use: (db-get-value-by-header (db:get-header runinfo)(db:get-row runinfo))
+;; Use: (db:get-value-by-header (db:get-header runinfo)(db:get-rows runinfo))
 ;;  to extract info from the structure returned
 ;;
 (define (db:get-runs-by-patt dbstruct keys runnamepatt targpatt offset limit) ;; test-name)
   (let* ((tmp      (runs:get-std-run-fields keys '("id" "runname" "state" "status" "owner" "event_time")))
 	 (keystr   (car tmp))
@@ -1013,11 +1011,11 @@
 		   (db:get-db dbstruct #f)
 		   qry-str
 		   runnamepatt)))
     (vector header res)))
 
-;; use (get-value-by-header (db:get-header runinfo)(db:get-row runinfo))
+;; use (get-value-by-header (db:get-header runinfo)(db:get-rows runinfo))
 (define (db:get-run-info dbstruct run-id)
   ;;(if (hash-table-ref/default *run-info-cache* run-id #f)
   ;;    (hash-table-ref *run-info-cache* run-id)
   (let* ((res       (vector #f #f #f #f))
 	 (keys      (db:get-keys dbstruct))
@@ -1331,11 +1329,11 @@
   (let ((res 0))
     (sqlite3:for-each-row
      (lambda (count)
        (set! res count))
      (db:get-db dbstruct run-id)
-     "SELECT count(id) FROM tests WHERE state in ('RUNNING','LAUNCHED','REMOTEHOSTSTART');")
+     "SELECT count(id) FROM tests WHERE state in ('RUNNING','LAUNCHED','REMOTEHOSTSTART') AND run_id NOT IN (SELECT id FROM runs WHERE state='deleted');")
     res))
 
 ;; NEW BEHAVIOR: Look only at single run with run-id
 ;; 
 ;; (define (db:get-running-stats dbstruct run-id)
@@ -1354,11 +1352,11 @@
       (let ((res 0))
 	(sqlite3:for-each-row
 	 (lambda (count)
 	   (set! res count))
 	 (db:get-db dbstruct run-id)
-	 "SELECT count(id) FROM tests WHERE state = 'RUNNING' OR state = 'LAUNCHED' OR state = 'REMOTEHOSTSTART'
+	 "SELECT count(id) FROM tests WHERE state in ('RUNNING','LAUNCHED','REMOTEHOSTSTART')
              AND testname in (SELECT testname FROM test_meta WHERE jobgroup=?);"
 	 jobgroup)
 	res)))
 
 ;; done with run when:
@@ -1897,14 +1895,14 @@
 
 ;; read the record given a testname
 (define (db:testmeta-get-record dbstruct testname)
   (let ((res #f))
     (sqlite3:for-each-row
-     (lambda (id testname author owner description reviewed iterated avg_runtime avg_disk tags)
-       (set! res (vector id testname author owner description reviewed iterated avg_runtime avg_disk tags)))
+     (lambda (id testname author owner description reviewed iterated avg_runtime avg_disk tags jobgroup)
+       (set! res (vector id testname author owner description reviewed iterated avg_runtime avg_disk tags jobgroup)))
      (db:get-db dbstruct #f)
-     "SELECT id,testname,author,owner,description,reviewed,iterated,avg_runtime,avg_disk,tags FROM test_meta WHERE testname=?;"
+     "SELECT id,testname,author,owner,description,reviewed,iterated,avg_runtime,avg_disk,tags,jobgroup FROM test_meta WHERE testname=?;"
      testname)
     res))
 
 ;; create a new record for a given testname
 (define (db:testmeta-add-record dbstruct testname)

Index: db_records.scm
==================================================================
--- db_records.scm
+++ db_records.scm
@@ -8,14 +8,15 @@
 ;;            |-monitor.db
 ;;            |-sdb.db
 ;;            |-fdb.db
 ;;            |-1.db
 ;;            |-<N>.db
+;;
 (define (make-dbr:dbstruct #!key (path #f)(local #f))
   (vector
    #f                  ;; the main db (contains runs, test_meta etc.) NOT CACHED IN MEM
-   (make-hash-table)   ;; run-id => [ rundb inmemdb last-mod last-read last-sync ]
+   (make-hash-table)   ;; run-id => [ rundb inmemdb last-mod last-read last-sync refdb ]
    #f                  ;; the global string db (use for state, status etc.)
    path                ;; path to database files/megatest area
    local))             ;; read-only local access
 
 ;;
@@ -39,12 +40,12 @@
 ;; get a rundb vector, create it if not already existing
 (define (dbr:dbstruct-get-rundb-rec vec run-id)
   (let* ((dbhash (dbr:dbstruct-get-dbhash vec))              ;; get the runs hash
 	 (runvec (hash-table-ref/default dbhash run-id #f))) ;; get the vector for run-id
     (if (vector? runvec)
-	runvec ;;           rundb inmemdb last-mod last-read last-sync in-use
-	(let ((nvec  (vector #f      #f       -1       -1       -1       #f)))
+	runvec ;;           rundb inmemdb last-mod last-read last-sync in-use refdb
+	(let ((nvec  (vector #f      #f       -1       -1       -1       #f     #f)))
 	  (hash-table-set! dbhash run-id nvec)
 	  nvec))))
 
 ;;  [ rundb inmemdb last-mod last-read last-sync ]
 (define-inline (dbr:dbstruct-field-name->num field-name)
@@ -53,10 +54,11 @@
     ((inmem) 1) ;; the in-memory db
     ((mtime) 2) ;; last modification time
     ((rtime) 3) ;; last read time
     ((stime) 4) ;; last sync time
     ((inuse) 5) ;; is the db currently in use, #t yes, #f no.
+    ((refdb) 6) ;; the db used for reference (can be on disk or inmem)
     (else -1)))
 
 ;; get/set rundb fields
 (define (dbr:dbstruct-get-runvec-val vec run-id field-name)
   (let ((runvec   (dbr:dbstruct-get-rundb-rec vec run-id))
@@ -109,14 +111,10 @@
 (define-inline (db:test-set-state!    vec val)(vector-set! vec 3 val))
 (define-inline (db:test-set-status!   vec val)(vector-set! vec 4 val))
 (define-inline (db:test-set-run_duration! vec val)(vector-set! vec 12 val))
 (define-inline (db:test-set-final_logf! vec val)(vector-set! vec 13 val))
 
-;; get rows and header from 
-(define-inline (db:get-header vec)(vector-ref vec 0))
-(define-inline (db:get-rows   vec)(vector-ref vec 1))
-
 ;; make-vector-record "" db mintest id run_id testname state status event_time item_path
 ;;
 (define (make-db:mintest)(make-vector 7))
 (define-inline (db:mintest-get-id           vec)    (vector-ref  vec 0))
 (define-inline (db:mintest-get-run_id       vec)    (vector-ref  vec 1))
@@ -209,13 +207,10 @@
 (define-inline (tdb:step-stable-set-start!     vec val)(vector-set! vec 1 val))
 (define-inline (tdb:step-stable-set-end!       vec val)(vector-set! vec 2 val))
 (define-inline (tdb:step-stable-set-status!    vec val)(vector-set! vec 3 val))
 (define-inline (tdb:step-stable-set-runtime!   vec val)(vector-set! vec 4 val))
 
-;; use this one for db-get-run-info
-(define-inline (db:get-row    vec)(vector-ref vec 1))
-
 ;; The data structure for handing off requests via wire
 (define (make-cdb:packet)(make-vector 6))
 (define-inline (cdb:packet-get-client-sig   vec)    (vector-ref  vec 0))
 (define-inline (cdb:packet-get-qtype        vec)    (vector-ref  vec 1))
 (define-inline (cdb:packet-get-immediate    vec)    (vector-ref  vec 2))

Index: docs/html/megatest.html
==================================================================
--- docs/html/megatest.html
+++ docs/html/megatest.html
@@ -2,11 +2,11 @@
 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
 <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
 <head>
 <meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/>
 <meta name="generator" content="http://www.nongnu.org/elyxer/"/>
-<meta name="create-date" content="2012-01-29"/>
+<meta name="create-date" content="2014-01-25"/>
 <link rel="stylesheet" href="http://elyxer.nongnu.org/lyx.css" type="text/css" media="all"/>
 <title>Megatest User Manual</title>
 </head>
 <body>
 <div id="globalWrapper">
@@ -782,11 +782,11 @@
 </h2>
 <div class="Standard">
 Note: The monitor is usable but incomplete as of Megatest v1.31. Click on the &ldquo;Monitor&rdquo; button on the dashboard to start the monitor and give it a try.
 </div>
 <div class="Standard">
-<img class="embedded" src="monitor-state-diagram.png" alt="figure monitor-state-diagram.png" style="max-width: 383px; max-height: 335px;"/>
+<img class="embedded" src="monitor-state-diagram.png" alt="figure monitor-state-diagram.png" style="max-width: 531px; max-height: 465px;"/>
 
 </div>
 <h1 class="Section">
 <a class="toc" name="toc-Section-14">14</a> Reference
 </h1>
@@ -1708,10 +1708,10 @@
 <a class="toc" name="toc-Appendix-B">B</a> References
 </h1>
 
 <hr class="footer"/>
 <div class="footer" id="generated-by">
-Document generated by <a href="http://elyxer.nongnu.org/">eLyXer 1.2.2 (2011-06-12)</a> on <span class="create-date">2012-01-29T19:15:57.445316</span>
+Document generated by <a href="http://elyxer.nongnu.org/">eLyXer 1.2.3 (2011-08-31)</a> on <span class="create-date">2014-01-25T22:27:14.268137</span>
 </div>
 </div>
 </body>
 </html>

Index: docs/html/monitor-state-diagram.png
==================================================================
--- docs/html/monitor-state-diagram.png
+++ docs/html/monitor-state-diagram.png
cannot compute difference between binary files

Index: docs/manual/howto.txt
==================================================================
--- docs/manual/howto.txt
+++ docs/manual/howto.txt
@@ -17,11 +17,11 @@
 During Config File Processing
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 Organising Your Tests and Tasks
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-/nfs/ch/disks/ch_unienv_disk005/qa_mrwellan/interim/src/megatest/tests/fdktestqa/testqa
+
 ----------------------------
 [tests-paths]
 1 #{get misc parent}/simplerun/tests
 ----------------------------
 
@@ -34,15 +34,5 @@
 
 -------------------
 runscript main.csh
 -------------------
 
-
-ww30.2
-cellname/LVS/cellname.LAYOUT_ERRORS
-
-Error: text open
-
-ww31.3
-cellname/LVS/cellname.LAYOUT_ERRORS
-
-Error: text open

Index: docs/manual/megatest_manual.html
==================================================================
--- docs/manual/megatest_manual.html
+++ docs/manual/megatest_manual.html
@@ -781,322 +781,36 @@
 sqlite3 database.</p></div>
 </div>
 </div>
 </div>
 <h1 id="_road_map">Road Map</h1>
-<div class="paragraph"><p>Note: This road-map is tentative and subject to change without notice.</p></div>
-<div class="sect2">
-<h3 id="_ww32">ww32</h3>
-<div class="olist arabic"><ol class="arabic">
-<li>
-<p>
-Rerun step and or subsequent steps from gui
-</p>
-</li>
-<li>
-<p>
-Refresh test area files from gui
-</p>
-</li>
-<li>
-<p>
-Clean and re-run button
-</p>
-</li>
-<li>
-<p>
-Clean up STATE and STATUS handling.
-</p>
-<div class="olist loweralpha"><ol class="loweralpha">
-<li>
-<p>
-Dashboard and Test control panel are reverse order - choose and fix
-</p>
-</li>
-<li>
-<p>
-Move seldom used states and status to drop down selector
-</p>
-</li>
-</ol></div>
-</li>
-<li>
-<p>
-Access test control panel when clicking on Run Summary tests
-</p>
-</li>
-<li>
-<p>
-Feature: -generate-index-tree
-</p>
-</li>
-<li>
-<p>
-Change specifing of state and status to use STATE1/STATUS1,STATE2/STATUS2
-</p>
-</li>
-</ol></div>
-</div>
-<div class="sect2">
-<h3 id="_ww33">ww33</h3>
-<div class="olist arabic"><ol class="arabic">
-<li>
-<p>
-http api available for use with Perl, Ruby etc. scripts
-</p>
-</li>
-<li>
-<p>
-megatest.config setup entries for:
-</p>
-<div class="olist loweralpha"><ol class="loweralpha">
-<li>
-<p>
-run launching (e.g. /bin/sh %CMD% &gt; /dev/null)
-</p>
-</li>
-<li>
-<p>
-browser "konqueror %FNAME%
-</p>
-</li>
-</ol></div>
-</li>
-</ol></div>
-</div>
-<div class="sect2">
-<h3 id="_ww34">ww34</h3>
-<div class="olist arabic"><ol class="arabic">
-<li>
-<p>
-Mark dependent tests for clean/rerun -rerun-downstream
-</p>
-</li>
-<li>
-<p>
-On run start check for defunct tests in RUNNING, LAUNCHED or REMOTEHOSTSTART and correct or notify
-</p>
-</li>
-<li>
-<p>
-Fix: refresh of gui sometimes fails on last item (race condition?)
-</p>
-</li>
-</ol></div>
-</div>
-<div class="sect2">
-<h3 id="_ww35">ww35</h3>
-<div class="olist arabic"><ol class="arabic">
-<li>
-<p>
-refdb: Add export of csv, json and sexp
-</p>
-</li>
-<li>
-<p>
-Convert to using call-with-environment-variables where possible. Should allow handling of parallel runs in same process.
-</p>
-</li>
-<li>
-<p>
-Re-work text interface wizards. Several bugs on record. Possibly convert to gui based.
-</p>
-</li>
-<li>
-<p>
-Add to testconfig requirements section; launchlimiter scriptname, calls scriptname to check if ok to launch test
-</p>
-</li>
-<li>
-<p>
-Refactor Run Summary view, currently very clumsy
-</p>
-</li>
-<li>
-<p>
-Add option to show steps in Run Summary view
-</p>
-</li>
-</ol></div>
-</div>
-<div class="sect2">
-<h3 id="_ww36">ww36</h3>
-<div class="olist arabic"><ol class="arabic">
-<li>
-<p>
-Refactor guis for resizeablity
-</p>
-</li>
-<li>
-<p>
-Add filters to Run Summary view and Run Control view
-</p>
-</li>
-<li>
-<p>
-Add to megatest.config or testconfig; rerunok STATE/STATUS,STATE/STATUS&#8230;
-</p>
-</li>
-<li>
-<p>
-Launch gates for diskspace; /path/one&gt;1G,/path/two&gt;200M,/tmp&gt;5G,#{scheme <strong>toppath</strong>}&gt;1G
-</p>
-</li>
-</ol></div>
-</div>
-<div class="sect2">
-<h3 id="_bin_list">Bin List</h3>
-<div class="olist arabic"><ol class="arabic">
-<li>
-<p>
-Quality improvements
-</p>
-<div class="olist loweralpha"><ol class="loweralpha">
-<li>
-<p>
-Server stutters occasionally
-</p>
-</li>
-<li>
-<p>
-Large number of items or tests still has some issues.
-</p>
-</li>
-<li>
-<p>
-Code refactoring
-</p>
-</li>
-<li>
-<p>
-Replace remote process with true API using json (supports Web app also)
-</p>
-</li>
-</ol></div>
-</li>
-<li>
-<p>
-Streamline the gui
-</p>
-<div class="olist loweralpha"><ol class="loweralpha">
-<li>
-<p>
-Everything resizable
-</p>
-</li>
-<li>
-<p>
-Less clutter
-</p>
-</li>
-<li>
-<p>
-Tool tips
-</p>
-</li>
-<li>
-<p>
-Filters on Run Summary, Summary and Run Control panel
-</p>
-</li>
-<li>
-<p>
-Built in log viewer (partially implemented)
-</p>
-</li>
-<li>
-<p>
-Refactor the test control panel
-</p>
-</li>
-</ol></div>
-</li>
-<li>
-<p>
-Help and documentation
-</p>
-<div class="olist loweralpha"><ol class="loweralpha">
-<li>
-<p>
-Complete the user manual (I’ve been working on this lately).
-</p>
-</li>
-<li>
-<p>
-Online help in the gui
-</p>
-</li>
-</ol></div>
-</li>
-<li>
-<p>
-Streamlined install
-</p>
-<div class="olist loweralpha"><ol class="loweralpha">
-<li>
-<p>
-Deployed version (download a location independent ready to run binary bundle)
-</p>
-</li>
-<li>
-<p>
-Install Makefile (in progress, needed for Mike to install on VMs)
-</p>
-</li>
-<li>
-<p>
-Added option to compile IUP (needed for VMs)
-</p>
-</li>
-</ol></div>
-</li>
-<li>
-<p>
-Server side run launching
-</p>
-</li>
-<li>
-<p>
-Support for re-running, cleaning etc. of individual steps (ezsteps makes this very easy to implement).
-</p>
-</li>
-<li>
-<p>
-Launch process needs built in daemonizing (easy to do, just need to test it thoroughly).
-</p>
-</li>
-<li>
-<p>
-Wizards for creating tests, regression areas (current ones are text only and limited).
-</p>
-</li>
-<li>
-<p>
-Fully functional built in web service (currently you can browse runs but it is very simplistic).
-</p>
-</li>
-<li>
-<p>
-Wildcards in runconfigs: e.g. [p1271/9/%/%]
-</p>
-</li>
-<li>
-<p>
-Gui panels for editing megatest.config and runconfigs.config
-</p>
-</li>
-<li>
-<p>
-Fully isolated tests (no use of NFS to see regression area files)
-</p>
-</li>
-<li>
-<p>
-Windows version
-</p>
-</li>
-</ol></div>
+<div class="paragraph"><p>Note 1: This road-map is tentative and subject to change without notice.</p></div>
+<div class="paragraph"><p>Note 2: Starting over. Old plan is commented out.</p></div>
+<div class="sect1">
+<h2 id="_current_items">Current Items</h2>
+<div class="sectionbody">
+<div class="sect2">
+<h3 id="_ww05">ww05</h3>
+<div class="olist arabic"><ol class="arabic">
+<li>
+<p>
+Runs register in tasks table in monitor.db
+</p>
+</li>
+<li>
+<p>
+Server polls tasks table for next action
+</p>
+</li>
+<li>
+<p>
+Change run loop to execute in server, triggered by call to polling of tasks table
+</p>
+</li>
+</ol></div>
+</div>
+</div>
 </div>
 <h1 id="_getting_started">Getting Started</h1>
 <div class="openblock">
 <div class="title">Getting started with Megatest</div>
 <div class="content">
@@ -1190,10 +904,47 @@
 <div class="sect1">
 <h2 id="_the_first_chapter_of_the_second_part">The First Chapter of the Second Part</h2>
 <div class="sectionbody">
 <div class="paragraph"><p>Chapters grouped into book parts are at level 1 and can contain
 sub-sections.</p></div>
+</div>
+</div>
+<h1 id="_how_to_do_things">How To Do Things</h1>
+<div class="sect1">
+<h2 id="_tricks">Tricks</h2>
+<div class="sectionbody">
+<div class="paragraph"><p>This section is a compendium of a various useful tricks for debugging,
+configuring and generally getting the most out of Megatest.</p></div>
+</div>
+</div>
+<div class="sect1">
+<h2 id="_debugging_tricks">Debugging Tricks</h2>
+<div class="sectionbody">
+<div class="sect2">
+<h3 id="_examining_the_environment">Examining The Environment</h3>
+<div class="sect3">
+<h4 id="_during_config_file_processing">During Config File Processing</h4>
+</div>
+<div class="sect3">
+<h4 id="_organising_your_tests_and_tasks">Organising Your Tests and Tasks</h4>
+<div class="listingblock">
+<div class="content">
+<pre><tt>[tests-paths]
+1 #{get misc parent}/simplerun/tests</tt></pre>
+</div></div>
+<div class="listingblock">
+<div class="content">
+<pre><tt>[setup]</tt></pre>
+</div></div>
+<div class="paragraph"><p>The runscript method is a brute force way to run scripts where the
+user is responsible for setting STATE and STATUS</p></div>
+<div class="listingblock">
+<div class="content">
+<pre><tt>runscript main.csh</tt></pre>
+</div></div>
+</div>
+</div>
 </div>
 </div>
 <h1 id="_reference">Reference</h1>
 <div class="sect1">
 <h2 id="_the_first_chapter_of_the_second_part_2">The First Chapter of the Second Part</h2>
@@ -1415,10 +1166,10 @@
 </div>
 <div id="footnotes"><hr /></div>
 <div id="footer">
 <div id="footer-text">
 Version 1.0<br />
-Last updated 2013-08-01 22:59:08 MST
+Last updated 2014-01-25 21:45:40 MST
 </div>
 </div>
 </body>
 </html>

Index: docs/manual/reference.txt
==================================================================
--- docs/manual/reference.txt
+++ docs/manual/reference.txt
@@ -2,10 +2,11 @@
 Reference
 =========
 
 The First Chapter of the Second Part
 ------------------------------------
+
 Chapters grouped into book parts are at level 1 and can contain
 sub-sections.
 
 The testconfig File
 -------------------

Index: docs/plan.txt
==================================================================
--- docs/plan.txt
+++ docs/plan.txt
@@ -1,83 +1,107 @@
 Road Map
 ========
 
-Note: This road-map is tentative and subject to change without notice.
-
-ww32
-~~~~
-
-. Rerun step and or subsequent steps from gui
-. Refresh test area files from gui
-. Clean and re-run button
-. Clean up STATE and STATUS handling.
-.. Dashboard and Test control panel are reverse order - choose and fix
-.. Move seldom used states and status to drop down selector
-. Access test control panel when clicking on Run Summary tests
-. Feature: -generate-index-tree
-. Change specifing of state and status to use STATE1/STATUS1,STATE2/STATUS2
-
-ww33
-~~~~
-
-. http api available for use with Perl, Ruby etc. scripts
-. megatest.config setup entries for:
-.. run launching (e.g. /bin/sh %CMD% > /dev/null)
-.. browser "konqueror %FNAME%
-
-ww34
-~~~~
-
-. Mark dependent tests for clean/rerun -rerun-downstream
-. On run start check for defunct tests in RUNNING, LAUNCHED or REMOTEHOSTSTART and correct or notify
-. Fix: refresh of gui sometimes fails on last item (race condition?)
-
-ww35
-~~~~
-
-. refdb: Add export of csv, json and sexp
-. Convert to using call-with-environment-variables where possible. Should allow handling of parallel runs in same process.
-. Re-work text interface wizards. Several bugs on record. Possibly convert to gui based.
-. Add to testconfig requirements section; launchlimiter scriptname, calls scriptname to check if ok to launch test
-. Refactor Run Summary view, currently very clumsy
-. Add option to show steps in Run Summary view
-
-ww36
-~~~~
-
-. Refactor guis for resizeablity
-. Add filters to Run Summary view and Run Control view
-. Add to megatest.config or testconfig; rerunok STATE/STATUS,STATE/STATUS...
-. Launch gates for diskspace; /path/one>1G,/path/two>200M,/tmp>5G,#{scheme *toppath*}>1G
-
-Bin List
-~~~~~~~~
-
-.	Quality improvements
-..	Server stutters occasionally
-..	Large number of items or tests still has some issues.
-..	Code refactoring
-..	Replace remote process with true API using json (supports Web app also)
-.	Streamline the gui
-..	Everything resizable
-..	Less clutter
-..	Tool tips
-..	Filters on Run Summary, Summary and Run Control panel
-..	Built in log viewer (partially implemented)
-..	Refactor the test control panel
-.	Help and documentation
-..	Complete the user manual (I’ve been working on this lately).
-..	Online help in the gui
-.	Streamlined install
-..	Deployed version (download a location independent ready to run binary bundle)
-..	Install Makefile (in progress, needed for Mike to install on VMs)
-..	Added option to compile IUP (needed for VMs)
-.	Server side run launching
-.	Support for re-running, cleaning etc. of individual steps (ezsteps makes this very easy to implement).
-.	Launch process needs built in daemonizing (easy to do, just need to test it thoroughly).
-.	Wizards for creating tests, regression areas (current ones are text only and limited).
-.	Fully functional built in web service (currently you can browse runs but it is very simplistic).
-.	Wildcards in runconfigs: e.g. [p1271/9/%/%]
-.	Gui panels for editing megatest.config and runconfigs.config
-.	Fully isolated tests (no use of NFS to see regression area files)
-.	Windows version
+Note 1: This road-map is tentative and subject to change without notice.
+
+Note 2: Starting over. Old plan is commented out.
+
+Current Items
+-------------
+
+ww05 - migrate to inmem-db
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Keep as much the same as possible. Add internal reference to almost
+eliminate contention on db(s). 
+
+. Add internal reference db
+. Verify that actions are accessing correct db
+.. -runtests  - inmem
+.. -list-runs - local (but not megatest.db)
+.. dashboard  - local (but not megatest.db)
+. Mirror db to /var/tmp...
+. Dashboard read db from per-run db.
+. Dashboard read db from /var/tmp
+. Runs register in tasks table in monitor.db
+. Server polls tasks table for next action (in addition?)
+. Change run loop to execute in server, triggered by call to polling of tasks table
+
+
+// ww32
+// ~~~~
+// 
+// . Rerun step and or subsequent steps from gui
+// . Refresh test area files from gui
+// . Clean and re-run button
+// . Clean up STATE and STATUS handling.
+// .. Dashboard and Test control panel are reverse order - choose and fix
+// .. Move seldom used states and status to drop down selector
+// . Access test control panel when clicking on Run Summary tests
+// . Feature: -generate-index-tree
+// . Change specifing of state and status to use STATE1/STATUS1,STATE2/STATUS2
+// 
+// ww33
+// ~~~~
+// 
+// . http api available for use with Perl, Ruby etc. scripts
+// . megatest.config setup entries for:
+// .. run launching (e.g. /bin/sh %CMD% > /dev/null)
+// .. browser "konqueror %FNAME%
+// 
+// ww34
+// ~~~~
+// 
+// . Mark dependent tests for clean/rerun -rerun-downstream
+// . On run start check for defunct tests in RUNNING, LAUNCHED or REMOTEHOSTSTART and correct or notify
+// . Fix: refresh of gui sometimes fails on last item (race condition?)
+// 
+// ww35
+// ~~~~
+// 
+// . refdb: Add export of csv, json and sexp
+// . Convert to using call-with-environment-variables where possible. Should allow handling of parallel runs in same process.
+// . Re-work text interface wizards. Several bugs on record. Possibly convert to gui based.
+// . Add to testconfig requirements section; launchlimiter scriptname, calls scriptname to check if ok to launch test
+// . Refactor Run Summary view, currently very clumsy
+// . Add option to show steps in Run Summary view
+// 
+// ww36
+// ~~~~
+// 
+// . Refactor guis for resizeablity
+// . Add filters to Run Summary view and Run Control view
+// . Add to megatest.config or testconfig; rerunok STATE/STATUS,STATE/STATUS...
+// . Launch gates for diskspace; /path/one>1G,/path/two>200M,/tmp>5G,#{scheme *toppath*}>1G
+// 
+// Bin List
+// ~~~~~~~~
+// 
+// .	Quality improvements
+// ..	Server stutters occasionally
+// ..	Large number of items or tests still has some issues.
+// ..	Code refactoring
+// ..	Replace remote process with true API using json (supports Web app also)
+// .	Streamline the gui
+// ..	Everything resizable
+// ..	Less clutter
+// ..	Tool tips
+// ..	Filters on Run Summary, Summary and Run Control panel
+// ..	Built in log viewer (partially implemented)
+// ..	Refactor the test control panel
+// .	Help and documentation
+// ..	Complete the user manual (I’ve been working on this lately).
+// ..	Online help in the gui
+// .	Streamlined install
+// ..	Deployed version (download a location independent ready to run binary bundle)
+// ..	Install Makefile (in progress, needed for Mike to install on VMs)
+// ..	Added option to compile IUP (needed for VMs)
+// .	Server side run launching
+// .	Support for re-running, cleaning etc. of individual steps (ezsteps makes this very easy to implement).
+// .	Launch process needs built in daemonizing (easy to do, just need to test it thoroughly).
+// .	Wizards for creating tests, regression areas (current ones are text only and limited).
+// .	Fully functional built in web service (currently you can browse runs but it is very simplistic).
+// .	Wildcards in runconfigs: e.g. [p1271/9/%/%]
+// .	Gui panels for editing megatest.config and runconfigs.config
+// .	Fully isolated tests (no use of NFS to see regression area files)
+// .	Windows version
 

Index: http-transport.scm
==================================================================
--- http-transport.scm
+++ http-transport.scm
@@ -58,11 +58,11 @@
     (string-intersperse 
      (map number->string
 	  (u8vector->list
 	   (if res res (hostname->ip hostname)))) ".")))
 
-(define (http-transport:run hostn)
+(define (http-transport:run hostn run-id server-id)
   (debug:print 2 "Attempting to start the server ...")
   (if (not *toppath*)
       (if (not (setup-for-run))
 	  (begin
 	    (debug:print 0 "ERROR: cannot find megatest.config, cannot start server, exiting")
@@ -141,34 +141,31 @@
 				  ((equal? (uri-path (request-uri (current-request))) 
 					   '(/ "hey"))
 				   (send-response body: "hey there!\n"
 						  headers: '((content-type text/plain))))
 				  (else (continue))))))))
-    (http-transport:try-start-server ipaddrstr start-port)))
+    (http-transport:try-start-server ipaddrstr start-port server-id)))
 
 ;; This is recursively run by http-transport:run until sucessful
 ;;
-(define (http-transport:try-start-server ipaddrstr portnum)
+(define (http-transport:try-start-server ipaddrstr portnum server-id)
   (handle-exceptions
    exn
    (begin
      (print-error-message exn)
      (if (< portnum 9000)
 	 (begin 
 	   (debug:print 0 "WARNING: failed to start on portnum: " portnum ", trying next port")
 	   (thread-sleep! 0.1)
-	   ;; (open-run-close tasks:remove-server-records tasks:open-db)
-	   (open-run-close tasks:server-delete tasks:open-db ipaddrstr portnum)
-	   (http-transport:try-start-server ipaddrstr (+ portnum 1)))
+	   (http-transport:try-start-server ipaddrstr (+ portnum 1) server-id))
 	 (print "ERROR: Tried and tried but could not start the server")))
    ;; any error in following steps will result in a retry
    (set! *runremote* (list ipaddrstr portnum))
-   ;; (open-run-close tasks:remove-server-records tasks:open-db)
-   (open-run-close tasks:server-register 
+   (open-run-close tasks:server-set-interface-port 
 		   tasks:open-db 
-		   (current-process-id)
-		   ipaddrstr portnum 0 'startup 'http)
+		   server-id 
+		   ipaddrstr portnum)
    (debug:print 1 "INFO: Trying to start server on " ipaddrstr ":" portnum)
    ;; This starts the spiffy server
    ;; NEED WAY TO SET IP TO #f TO BIND ALL
    (start-server bind-address: ipaddrstr port: portnum)
    (open-run-close tasks:server-delete tasks:open-db ipaddrstr portnum)
@@ -373,61 +370,61 @@
 	 (thread-join! th1)
 	 (thread-terminate! th2)
 	 (debug:print-info 11 "got res=" res)
 	 res)))))
 
-(define (http-transport:client-connect iface port)
+(define (http-transport:client-connect run-id iface port)
   (let* ((login-res   #f)
 	 (uri-dat     (make-request method: 'POST uri: (uri-reference (conc "http://" iface ":" port "/ctrl"))))
 	 (uri-api-dat (make-request method: 'POST uri: (uri-reference (conc "http://" iface ":" port "/api"))))
 	 (serverdat   (list iface port uri-dat uri-api-dat)))
-    (set! *runremote* serverdat) ;; may or may not be good ...
-    (set! login-res (rmt:login))
+    (hash-table-set! *runremote* run-id serverdat) ;; may or may not be good ...
+    (set! login-res (rmt:login run-id))
     (if (and (list? login-res)
 	     (car login-res))
 	(begin
 	  (debug:print-info 2 "Logged in and connected to " iface ":" port)
-	  (set! *runremote* serverdat)
+	  (hash-table-set! *runremote* run-id serverdat)
 	  serverdat)
 	(begin
 	  (debug:print-info 0 "ERROR: Failed to login or connect to " iface ":" port)
 	  (exit 1)))))
-;; 	  (set! *runremote* #f)
-;; 	  (set! *transport-type* 'fs)
-;; 	  #f))))
-
 
 ;; run http-transport:keep-running in a parallel thread to monitor that the db is being 
 ;; used and to shutdown after sometime if it is not.
 ;;
-(define (http-transport:keep-running)
+(define (http-transport:keep-running server-id)
   ;; if none running or if > 20 seconds since 
   ;; server last used then start shutdown
   ;; This thread waits for the server to come alive
-  (let* ((server-info (let loop ()
+  (let* ((server-info (let loop ((start-time (current-seconds))
+				 (changed    #t)
+				 (last-sdat  "not this"))
                         (let ((sdat #f))
                           (mutex-lock! *heartbeat-mutex*)
                           (set! sdat *runremote*)
                           (mutex-unlock! *heartbeat-mutex*)
-                          (if sdat
+                          (if (and sdat
+				   (not changed)
+				   (> (- (current-seconds) start-time) 2))
 			      sdat
                               (begin
                                 (sleep 4)
-                                (loop))))))
+                                (loop start-time
+				      (equal? sdat last-sdat)
+				      sdat))))))
          (iface       (car server-info))
          (port        (cadr server-info))
          (last-access 0)
 	 (tdb         (tasks:open-db))
-	 (spid        ;;(open-run-close tasks:server-get-server-id tasks:open-db #f iface port #f))
-	   (tasks:server-get-server-id tdb #f iface port #f))
 	 (server-timeout (let ((tmo (config-lookup  *configdat* "server" "timeout")))
 			   (if (and (string? tmo)
 				    (string->number tmo))
 			       (* 60 60 (string->number tmo))
 			       ;; default to three days
 			       (* 3 24 60 60)))))
-    (debug:print-info 2 "server-timeout: " server-timeout ", server pid: " spid " on " iface ":" port)
+    (tasks:server-set-state! tdb server-id "running")
     (let loop ((count 0))
       ;; Use this opportunity to sync the inmemdb to db
       (let ((start-time (current-milliseconds))
 	    (sync-time  #f)
 	    (rem-time   #f))
@@ -449,20 +446,19 @@
       (mutex-lock! *heartbeat-mutex*)
       (set! sdat *runremote*)
       (mutex-unlock! *heartbeat-mutex*)
       
       (if (or (not (equal? sdat (list iface port)))
-	      (not spid))
+	      (not server-id))
 	  (begin 
 	    (debug:print-info 0 "interface changed, refreshing iface and port info")
 	    (set! iface (car sdat))
-	    (set! port  (cadr sdat))
-	    (set! spid  (tasks:server-get-server-id tdb #f iface port #f))))
+	    (set! port  (cadr sdat))))
       
       ;; NOTE: Get rid of this mechanism! It really is not needed...
       ;; (open-run-close tasks:server-update-heartbeat tasks:open-db spid)
-      (tasks:server-update-heartbeat tdb spid)
+      (tasks:server-update-heartbeat tdb server-id)
       
       ;; (if ;; (or (> numrunning 0) ;; stay alive for two days after last access
 
       ;; Transfer *last-db-access* to last-access to use in checking that we are still alive
       (mutex-lock! *heartbeat-mutex*)
@@ -479,12 +475,12 @@
 	  (begin
 	    (debug:print-info 0 "Starting to shutdown the server.")
 	    ;; need to delete only *my* server entry (future use)
 	    (set! *time-to-exit* #t)
 	    (if *inmemdb* (db:sync-touched *inmemdb* force-sync: #t))
-	    (open-run-close tasks:server-deregister-self tasks:open-db (get-host-name))
-	    (thread-sleep! 1)
+	    ( tasks:server-set-state! tdb server-id "shutting-down")
+	    (thread-sleep! 5)
 	    (debug:print-info 0 "Max cached queries was    " *max-cache-size*)
 	    (debug:print-info 0 "Number of cached writes   " *number-of-writes*)
 	    (debug:print-info 0 "Average cached write time "
 			      (if (eq? *number-of-writes* 0)
 				  "n/a (no writes)"
@@ -497,36 +493,42 @@
 				  "n/a (no queries)"
 				  (/ *total-non-write-delay* 
 				     *number-non-write-queries*))
 			      " ms")
 	    (debug:print-info 0 "Server shutdown complete. Exiting")
+	    (tasks:server-delete-record! tdb server-id)
 	    (exit))))))
 
 ;; all routes though here end in exit ...
-(define (http-transport:launch)
+(define (http-transport:launch run-id)
+  (set! *run-id*   run-id)
   (if (not *toppath*)
       (if (not (setup-for-run))
 	  (begin
 	    (debug:print 0 "ERROR: cannot find megatest.config, exiting")
 	    (exit))))
   (debug:print-info 2 "Starting the standalone server")
   (if (args:get-arg "-daemonize")
       (daemon:ize))
-  (let ((hostinfo (open-run-close tasks:get-best-server tasks:open-db)))
-    (debug:print 11 "http-transport:launch hostinfo=" hostinfo)
-    ;; #(1 "143.182.207.24" 5736 -1 "http" 22771 "hostname")
-    (if hostinfo
-	(debug:print-info 2 "NOT starting new server, one is already running on " (vector-ref hostinfo 1) ":" (vector-ref hostinfo 2))
+  (let ((server-id (open-run-close tasks:server-lock-slot tasks:open-db run-id)))
+    (if (not server-id)
+	(begin
+	  (debug:print-info 2 "INFO: server pid=" (current-process-id) ", hostname=" (get-host-name) " not starting due to other candidates ahead in start queue")
+	  (open-run-close tasks:server-delete-records-for-this-pid tasks:open-db))
 	(if *toppath* 
 	    (let* ((th2 (make-thread (lambda ()
 				       (http-transport:run 
 					(if (args:get-arg "-server")
 					    (args:get-arg "-server")
-					    "-"))) "Server run"))
-		   (th3 (make-thread http-transport:keep-running "Keep running")))
+					    "-")
+					run-id
+					server-id)) "Server run"))
+		   (th3 (make-thread (lambda ()
+				       (http-transport:keep-running server-id))
+				     "Keep running")))
 	      ;; Database connection
-	      (set! *inmemdb*  (db:setup))
+	      (set! *inmemdb*  (db:setup run-id))
 	      (thread-start! th2)
 	      (thread-start! th3)
 	      (set! *didsomething* #t)
 	      (thread-join! th2))
 	    (debug:print 0 "ERROR: Failed to setup for megatest")))

Index: launch.scm
==================================================================
--- launch.scm
+++ launch.scm
@@ -475,11 +475,11 @@
 ;;  
 ;; <target> - <testname> [ - <itempath> ] 
 ;;
 (define (create-work-area run-id run-info keyvals test-id test-src-path disk-path testname itemdat)
   (let* ((item-path (item-list->path itemdat))
-	 (runname  (db:get-value-by-header (db:get-row run-info)
+	 (runname  (db:get-value-by-header (db:get-rows run-info)
 					   (db:get-header run-info)
 					   "runname"))
 	 ;; convert back to db: from rdb: - this is always run at server end
 	 (target   (string-intersperse (map cadr keyvals) "/"))
 

Index: megatest.scm
==================================================================
--- megatest.scm
+++ megatest.scm
@@ -30,10 +30,11 @@
 ;; (declare (uses sdb))
 ;; (declare (uses filedb))
 (declare (uses tdb))
 (declare (uses mt))
 (declare (uses api))
+(declare (uses tasks)) ;; only used for debugging.
 
 (define *db* #f) ;; this is only for the repl, do not use in general!!!!
 
 (include "common_records.scm")
 (include "key_records.scm")
@@ -201,10 +202,11 @@
 			"-gen-megatest-test"
 			"-override-timeout"
 			"-test-files"  ;; -test-paths is for listing all
 			"-load"        ;; load and exectute a scheme file
 			"-dumpmode"
+			"-run-id"
 			) 
 		 (list  "-h"
 			"-version"
 		        "-force"
 		        "-xterm"
@@ -343,13 +345,17 @@
 
     ;; Server? Start up here.
     ;;
     (let ((tl        (setup-for-run))
 	  (transport (or (configf:lookup *configdat* "setup" "transport")
-			 (args:get-arg "-transport" "http"))))
-      (debug:print 2 "Launching server using transport " transport)
-      (server:launch (string->symbol transport)))
+			 (args:get-arg "-transport" "http")))
+	  (run-id    (and (args:get-arg "-run-id")
+			  (string->number (args:get-arg "-run-id")))))
+      (debug:print 2 "Launching server using transport " transport " for run-id=" run-id)
+      (if run-id
+	  (server:launch (string->symbol transport) run-id)
+	  (debug:print 0 "ERROR: server requires run-id be specified with -run-id")))
 
     ;; Not a server? This section will decide how to communicate
     ;;
     ;;  Setup client for all expect listed here
     (if (null? (lset-intersection 
@@ -358,11 +364,12 @@
 		     '("-list-servers"
 		       "-stop-server"
 		       "-show-cmdinfo"
 		       "-list-runs")))
 	(if (setup-for-run)
-	    (begin
+	    (let ((run-id    (and (args:get-arg "-run-id")
+				  (string->number (args:get-arg "-run-id")))))
 	      ;; (set! *fdb*   (filedb:open-db (conc *toppath* "/db/paths.db")))
 	      ;; if not list or kill then start a client (if appropriate)
 	      (if (or (args-defined? "-h" "-version" "-gen-megatest-area" "-gen-megatest-test")
 		      (eq? (length (hash-table-keys args:arg-hash)) 0))
 		  (debug:print-info 1 "Server connection not needed")
@@ -383,14 +390,20 @@
 								      "fs"))))
 		    (debug:print 2 "chosen-transport: " chosen-transport " have; config=" transport-from-config ", cmdln=" transport-from-cmdln ", cmdinfo=" transport-from-cmdinfo)
 		    (case chosen-transport
 		      ((http)
 		       (set! *transport-type 'http)
-		       (server:ensure-running)
-		       ;; Get rid of this
-
-		       (client:launch))
+		       ;; if we have a run-id (why would we?) start the server for that run.
+		       ;; otherwise it is up to other calls to start the server(s) dynamically
+		       (if run-id 
+			   (begin
+			     (server:ensure-running run-id)
+			     (client:launch run-id))
+			   (begin
+			     ;; without run-id we'll start a server for "0"
+			     (server:ensure-running 0)
+			     (client:launch 0))))
 		      (else ;; (fs)
 		       (debug:print 0 "ERROR: Should NOT be getting here! fs transport is no longer supported")
 		       (set! *transport-type* 'fs)
 		       (set! *megatest-db* (make-dbr:dbstruct path: *toppath* local: #t))))))))))
 
@@ -690,10 +703,21 @@
 (if (args:get-arg "-runtests")
   (general-run-call 
    "-runtests" 
    "run a test" 
    (lambda (target runname keys keyvals)
+     ;;
+     ;; May or may not implement it this way ...
+     ;;
+     ;; Insert this run into the tasks queue
+     ;; (open-run-close tasks:add tasks:open-db 
+     ;;    	     "runtests" 
+     ;;    	     user
+     ;;    	     target
+     ;;    	     runname
+     ;;    	     (args:get-arg "-runtests")
+     ;;    	     #f))))
      (runs:run-tests target
 		     runname
 		     (args:get-arg "-runtests")
 		     user
 		     args:arg-hash))))
@@ -998,11 +1022,11 @@
 			  (change-directory startingdir)
 			  (set! exitstat (system cmd))
 			  (set! *globalexitstatus* exitstat) ;; no necessary
 			  (change-directory testpath)
 			  ;; (cdb:test-set-log! *runremote* test-id (sdb:qry 'getid htmllogfile))))
-			  (rmt:test-set-log! test-id htmllogfile)))
+			  (rmt:test-set-log! run-id test-id htmllogfile)))
 		    (let ((msg (args:get-arg "-m")))
 		      (rmt:teststep-set-status! run-id test-id stepname "end" exitstat msg logfile))
 		    )))
 	  (if (or (args:get-arg "-test-status")
 		  (args:get-arg "-set-values"))

Index: mt.scm
==================================================================
--- mt.scm
+++ mt.scm
@@ -37,11 +37,11 @@
 
 ;; runs:get-runs-by-patt
 ;; get runs by list of criteria
 ;; register a test run with the db
 ;;
-;; Use: (db-get-value-by-header (db:get-header runinfo)(db:get-row runinfo))
+;; Use: (db-get-value-by-header (db:get-header runinfo)(db:get-rows runinfo))
 ;;  to extract info from the structure returned
 ;;
 (define (mt:get-runs-by-patt keys runnamepatt targpatt)
   (let loop ((runsdat  (rmt:get-runs-by-patt keys runnamepatt targpatt 0 500))
 	     (res      '())

Index: rmt.scm
==================================================================
--- rmt.scm
+++ rmt.scm
@@ -35,24 +35,26 @@
 ;;======================================================================
 
 ;; cmd is a symbol
 ;; vars is a json string encoding the parameters for the call
 ;;
-(define (rmt:send-receive cmd params)
+(define (rmt:send-receive cmd run-id params)
   (case *transport-type* 
     ((fs-aint-here)
      (debug:print 0 "ERROR: Not yet (re)supported")
      (exit 1))
     ((fs http)
-     (let* ((jparams (db:obj->string params)) ;; (rmt:dat->json-str params))
-	    (res (http-transport:client-api-send-receive *runremote* cmd jparams)))
+     ;; if run-id is #f send the request to run-id = 0 server. This will be for main.db
+     ;;
+     (let* ((connection-info (client:setup (if run-id run-id 0)))
+	    (jparams         (db:obj->string params)) ;; (rmt:dat->json-str params))
+	    (res (http-transport:client-api-send-receive connection-info cmd jparams)))
        (if res
 	   (db:string->obj res) ;; (rmt:json-str->dat res)
 	   (begin
 	     (debug:print 0 "ERROR: Bad value from http-transport:client-api-send-receive " res)
-	     #f))
-     ))
+	     #f))))
     (else
      (debug:print 0 "ERROR: Transport " *transport-type* " not yet (re)supported")
      (exit 1))))
 
 ;; Wrap json library for strings (why the ports crap in the first place?)
@@ -74,56 +76,62 @@
 
 ;;======================================================================
 ;;  M I S C
 ;;======================================================================
 
-(define (rmt:login)
-  (rmt:send-receive 'login (list *toppath* megatest-version *my-client-signature*)))
+(define (rmt:login run-id)
+  (rmt:send-receive 'login run-id (list *toppath* megatest-version *my-client-signature*)))
 
-(define (rmt:kill-server)
-  (rmt:send-receive 'kill-server '()))
+(define (rmt:kill-server run-id)
+  (rmt:send-receive 'kill-server run-id (list run-id)))
 
 ;; hand off a call to one of the db:queries statements
 ;; added run-id to make looking up the correct db possible 
 ;;
 (define (rmt:general-call stmtname run-id . params)
-  (rmt:send-receive 'general-call (append (list stmtname run-id) params)))
+  (rmt:send-receive 'general-call run-id (append (list stmtname run-id) params)))
 
-(define (rmt:sync-inmem->db)
-  (rmt:send-receive 'sync-inmem->db '()))
+(define (rmt:sync-inmem->db run-id)
+  (rmt:send-receive 'sync-inmem->db run-id '()))
 
-(define (rmt:sdb-qry qry val)
+(define (rmt:sdb-qry qry val run-id)
   ;; add caching if qry is 'getid or 'getstr
-  (rmt:send-receive 'sdb-qry (list qry val)))
+  (rmt:send-receive 'sdb-qry run-id (list qry val)))
+
+;; NOT COMPLETED
+(define (rmt:runtests user run-id testpatt params)
+  (rmt:send-receive 'runtests run-id testpatt))
 
 ;;======================================================================
 ;;  K E Y S 
 ;;======================================================================
 
+;; These should not require run-id but it is more consistent to have it. 
+;; run-id can theoretically be #f but how to handle that is not yet done.
 (define (rmt:get-key-val-pairs run-id)
-  (rmt:send-receive 'get-key-val-pairs (list run-id)))
+  (rmt:send-receive 'get-key-val-pairs run-id (list run-id)))
 
-(define (rmt:get-keys)
-  (rmt:send-receive 'get-keys '()))
+(define (rmt:get-keys run-id)
+  (rmt:send-receive 'get-keys run-id '()))
 
 ;;======================================================================
 ;;  T E S T S
 ;;======================================================================
 
 (define (rmt:get-test-id run-id testname item-path)
-  (rmt:send-receive 'get-test-id (list run-id testname item-path)))
+  (rmt:send-receive 'get-test-id run-id (list run-id testname item-path)))
 
 (define (rmt:get-test-info-by-id run-id test-id)
   (if (and (number? run-id)(number? test-id))
-      (rmt:send-receive 'get-test-info-by-id (list run-id test-id))
+      (rmt:send-receive 'get-test-info-by-id run-id (list run-id test-id))
       (begin
 	(debug:print 0 "ERROR: Bad data handed to rmt:get-test-info-by-id run-id=" run-id ", test-id=" test-id)
 	(print-call-chain)
 	#f)))
 
 (define (rmt:test-get-rundir-from-test-id run-id test-id)
-  (rmt:send-receive 'test-get-rundir-from-test-id (list run-id test-id)))
+  (rmt:send-receive 'test-get-rundir-from-test-id run-id (list run-id test-id)))
 
 (define (rmt:open-test-db-by-test-id run-id test-id #!key (work-area #f))
   (let* ((test-path (if (string? work-area)
 			work-area
 			(rmt:test-get-rundir-from-test-id run-id test-id))))
@@ -130,88 +138,94 @@
     (debug:print 3 "TEST PATH: " test-path)
     (open-test-db test-path)))
 
 ;; WARNING: This currently bypasses the transaction wrapped writes system
 (define (rmt:test-set-state-status-by-id run-id test-id newstate newstatus newcomment)
-  (rmt:send-receive 'test-set-state-status-by-id (list run-id test-id newstate newstatus newcomment)))
+  (rmt:send-receive 'test-set-state-status-by-id run-id (list run-id test-id newstate newstatus newcomment)))
 
 (define (rmt:set-tests-state-status run-id testnames currstate currstatus newstate newstatus)
-  (rmt:send-receive 'set-tests-state-status (list run-id testnames currstate currstatus newstate newstatus)))
+  (rmt:send-receive 'set-tests-state-status run-id (list run-id testnames currstate currstatus newstate newstatus)))
 
 (define (rmt:get-tests-for-run run-id testpatt states statuses offset limit not-in sort-by sort-order qryvals)
   (if (number? run-id)
-      (rmt:send-receive 'get-tests-for-run (list run-id testpatt states statuses offset limit not-in sort-by sort-order qryvals))
+      (rmt:send-receive 'get-tests-for-run run-id (list run-id testpatt states statuses offset limit not-in sort-by sort-order qryvals))
       (begin
 	(debug:print "ERROR: rmt:get-tests-for-run called with bad run-id=" run-id)
 	(print-call-chain)
 	'())))
 
 (define (rmt:get-tests-for-runs-mindata run-ids testpatt states status not-in)
-  (rmt:send-receive 'get-tests-for-runs-mindata (list run-ids testpatt states status not-in)))
+  (apply append (map (lambda (run-id)
+		       (rmt:send-receive 'get-tests-for-run-mindata run-id (list run-ids testpatt states status not-in)))
+		     run-ids)))
 
 (define (rmt:delete-test-records run-id test-id)
-  (rmt:send-receive 'delete-test-records (list run-id test-id)))
+  (rmt:send-receive 'delete-test-records run-id (list run-id test-id)))
 
 (define (rmt:test-set-status-state run-id test-id status state msg)
-  (rmt:send-receive 'test-set-status-state (list run-id test-id status state msg)))
+  (rmt:send-receive 'test-set-status-state run-id (list run-id test-id status state msg)))
 
 (define (rmt:get-previous-test-run-record run-id test-name item-path)
-  (rmt:send-receive 'get-previous-test-run-record (list run-id test-name item-path)))
+  (rmt:send-receive 'get-previous-test-run-record run-id (list run-id test-name item-path)))
 
 (define (rmt:get-matching-previous-test-run-records run-id test-name item-path)
-  (rmt:send-receive 'get-matching-previous-test-run-records (list run-id test-name item-path)))
+  (rmt:send-receive 'get-matching-previous-test-run-records run-id (list run-id test-name item-path)))
 
 (define (rmt:test-get-logfile-info run-id test-name)
-  (rmt:send-receive 'test-get-logfile-info (list run-id test-name)))
+  (rmt:send-receive 'test-get-logfile-info run-id (list run-id test-name)))
 
 (define (rmt:test-get-records-for-index-file run-id test-name)
-  (rmt:send-receive 'test-get-records-for-index-file (list  run-id test-name)))
+  (rmt:send-receive 'test-get-records-for-index-file run-id (list run-id test-name)))
 
 (define (rmt:get-testinfo-state-status run-id test-id)
-  (rmt:send-receive 'get-testinfo-state-status (list run-id test-id)))
+  (rmt:send-receive 'get-testinfo-state-status run-id (list run-id test-id)))
 
 (define (rmt:test-set-log! run-id test-id logf)
   (if (string? logf)(rmt:general-call 'test-set-log run-id logf test-id)))
 
 (define (rmt:test-get-paths-matching-keynames-target-new keynames target res testpatt statepatt statuspatt runname)
-  (rmt:send-receive 'test-get-paths-matching-keynames-target-new (list keynames target res testpatt statepatt statuspatt runname)))
+  (let ((run-ids (rmt:get-run-ids-matching keynames target res)))
+    (apply append (lambda (run-id)
+		    (rmt:send-receive 'test-get-paths-matching-keynames-target-new (list keynames target res testpatt statepatt statuspatt runname)))
+	   run-ids)))
 
 (define (rmt:get-prereqs-not-met run-id waitons ref-item-path #!key (mode 'normal))
-  (rmt:send-receive 'get-prereqs-not-met (list run-id waitons ref-item-path mode)))
+  (rmt:send-receive 'get-prereqs-not-met run-id (list run-id waitons ref-item-path mode)))
 
 (define (rmt:get-count-tests-running-for-run-id run-id)
-  (rmt:send-receive 'get-count-tests-running-for-run-id (list run-id)))
+  (rmt:send-receive 'get-count-tests-running-for-run-id run-id (list run-id)))
 
 ;; Statistical queries
 
 (define (rmt:get-count-tests-running run-id)
-  (rmt:send-receive 'get-count-tests-running (list run-id)))
+  (rmt:send-receive 'get-count-tests-running run-id (list run-id)))
 
 (define (rmt:get-count-tests-running-in-jobgroup run-id jobgroup)
-  (rmt:send-receive 'get-count-tests-running-in-jobgroup (list run-id jobgroup)))
+  (rmt:send-receive 'get-count-tests-running-in-jobgroup run-id (list run-id jobgroup)))
 
 (define (rmt:roll-up-pass-fail-counts run-id test-name item-path status)
-  (rmt:send-receive 'roll-up-pass-fail-counts (list run-id test-name item-path status)))
+  (rmt:send-receive 'roll-up-pass-fail-counts run-id (list run-id test-name item-path status)))
 
 (define (rmt:update-pass-fail-counts run-id test-name)
-  (rmt:general-call 'update-fail-pass-counts (list run-id test-name run-id test-name run-id test-name)))
+  (rmt:general-call 'update-fail-pass-counts run-id (list run-id test-name run-id test-name run-id test-name)))
 
 ;;======================================================================
 ;;  R U N S
 ;;======================================================================
 
 (define (rmt:get-run-info run-id)
-  (rmt:send-receive 'get-run-info (list run-id)))
+  (rmt:send-receive 'get-run-info run-id (list run-id)))
 
+;; Use the special run-id == #f scenario here since there is no run yet
 (define (rmt:register-run keyvals runname state status user)
-  (rmt:send-receive 'register-run (list keyvals runname state status user)))
+  (rmt:send-receive 'register-run #f (list keyvals runname state status user)))
     
 (define (rmt:get-run-name-from-id run-id)
-  (rmt:send-receive 'get-run-name-from-id (list run-id)))
+  (rmt:send-receive 'get-run-name-from-id run-id (list run-id)))
 
 (define (rmt:delete-run run-id)
-  (rmt:send-receive 'delete-run (list run-id)))
+  (rmt:send-receive 'delete-run run-id (list run-id)))
 
 (define (rmt:delete-old-deleted-test-records)
   (rmt:send-receive 'delete-old-deleted-test-records '()))
 
 (define (rmt:get-runs runpatt count offset keypatts)
@@ -237,30 +251,30 @@
 ;;  2. Open the testdat.db file and do the query
 ;; If not given the work area
 ;;  1. Do a remote call to get the test path
 ;;  2. Continue as above
 ;; 
-(define (rmt:get-steps-for-test test-id)
-  (rmt:send-receive 'get-steps-data (list test-id)))
+(define (rmt:get-steps-for-test run-id test-id)
+  (rmt:send-receive 'get-steps-data run-id (list test-id)))
 
 (define (rmt:teststep-set-status! run-id test-id teststep-name state-in status-in comment logfile)
   (let* ((state     (items:check-valid-items "state" state-in))
 	 (status    (items:check-valid-items "status" status-in)))
     (if (or (not state)(not status))
 	(debug:print 3 "WARNING: Invalid " (if status "status" "state")
 		     " value \"" (if status state-in status-in) "\", update your validvalues section in megatest.config"))
-    (rmt:send-receive 'teststep-set-status! (list run-id test-id teststep-name state-in status-in comment logfile))))
+    (rmt:send-receive 'teststep-set-status! run-id (list run-id test-id teststep-name state-in status-in comment logfile))))
 
-(define (rmt:get-steps-for-test test-id)
-  (rmt:send-receive 'get-steps-for-test (list test-id)))
+(define (rmt:get-steps-for-test run-id test-id)
+  (rmt:send-receive 'get-steps-for-test run-id (list test-id)))
 
 ;;======================================================================
 ;;  T E S T   D A T A 
 ;;======================================================================
 
-(define (rmt:read-test-data test-id categorypatt #!key (work-area #f)) 
-  (let ((tdb  (rmt:open-test-db-by-test-id test-id work-area: work-area)))
+(define (rmt:read-test-data run-id test-id categorypatt #!key (work-area #f)) 
+  (let ((tdb  (rmt:open-test-db-by-test-id run-id test-id work-area: work-area)))
     (if tdb
 	(tdb:read-test-data tdb test-id categorypatt)
 	'())))
 
 (define (rmt:testmeta-add-record testname)
@@ -271,9 +285,9 @@
 
 (define (rmt:testmeta-update-field test-name fld val)
   (rmt:send-receive 'testmeta-update-field (list test-name fld val)))
 
 (define (rmt:test-data-rollup run-id test-id status)
-  (rmt:send-receive 'test-data-rollup (list run-id test-id status)))
+  (rmt:send-receive 'test-data-rollup run-id (list run-id test-id status)))
 
 (define (rmt:csv->test-data run-id test-id csvdata)
-  (rmt:send-receive 'csv->test-data (list run-id test-id csvdata)))
+  (rmt:send-receive 'csv->test-data run-id (list run-id test-id csvdata)))

Index: runs.scm
==================================================================
--- runs.scm
+++ runs.scm
@@ -165,11 +165,14 @@
   (thread-sleep! (cond
 		  ((> *runs:can-run-more-tests-count* 20) 2);; obviously haven't had any work to do for a while
 		  (else 0)))
   (let* ((num-running             (rmt:get-count-tests-running run-id))
 	 (num-running-in-jobgroup (rmt:get-count-tests-running-in-jobgroup run-id jobgroup))
-	 (job-group-limit         (config-lookup *configdat* "jobgroups" jobgroup)))
+	 (job-group-limit         (let ((jobg-count (config-lookup *configdat* "jobgroups" jobgroup)))
+				    (if (string? jobg-count)
+					(string->number jobg-count)
+					jobg-count))))
     (if (> (+ num-running num-running-in-jobgroup) 0)
 	(set! *runs:can-run-more-tests-count* (+ *runs:can-run-more-tests-count* 1)))
     (if (not (eq? *last-num-running-tests* num-running))
 	(begin
 	  (debug:print 2 "max-concurrent-jobs: " max-concurrent-jobs ", num-running: " num-running)
@@ -186,22 +189,22 @@
 				  #t)
 				 ;; if job-group-limit is set and number of jobs in the group is greater
 				 ;; than the limit then cannot run more jobs of this kind
 				 ((and job-group-limit
 				       (>= num-running-in-jobgroup job-group-limit))
-				  (debug:print 1 "WARNING: number of jobs " num-running-in-jobgroup 
-					       " in " jobgroup " exceeded, will not run " (tests:testqueue-get-testname test-record))
+				  (if (runs:lownoise (conc "maxjobgroup " jobgroup) 60)
+				      (debug:print 1 "WARNING: number of jobs " num-running-in-jobgroup 
+						   " in jobgroup \"" jobgroup "\" exceeds limit of " job-group-limit))
 				  #t)
 				 (else #f))))
 	  (list (not can-not-run-more) num-running num-running-in-jobgroup max-concurrent-jobs job-group-limit)))))
 
 ;;  test-names: Comma separated patterns same as test-patts but used in selection 
 ;;              of tests to run. The item portions are not respected.
 ;;              FIXME: error out if /patt specified
 ;;            
 (define (runs:run-tests target runname test-patts user flags) ;; test-names
-  (common:clear-caches) ;; clear all caches
   (let* ((keys               (keys:config-get-fields *configdat*))
 	 (keyvals            (keys:target->keyval keys target))
 	 (run-id             (rmt:register-run keyvals runname "new" "n/a" user))  ;;  test-name)))
 	 (deferred          '()) ;; delay running these since they have a waiton clause
 	 (runconfigf         (conc  *toppath* "/runconfigs.config"))
@@ -790,11 +793,11 @@
 
       ;; (print "Top of loop, hed=" hed ", tal=" tal " ,reruns=" reruns)
       (let* ((test-record (hash-table-ref test-records hed))
 	     (test-name   (tests:testqueue-get-testname test-record))
 	     (tconfig     (tests:testqueue-get-testconfig test-record))
-	     (jobgroup    (config-lookup tconfig "requirements" "jobgroup"))
+	     (jobgroup    (config-lookup tconfig "test_meta" "jobgroup"))
 	     (testmode    (let ((m (config-lookup tconfig "requirements" "mode")))
 			    (if m (string->symbol m) 'normal)))
 	     (waitons     (tests:testqueue-get-waitons    test-record))
 	     (priority    (tests:testqueue-get-priority   test-record))
 	     (itemdat     (tests:testqueue-get-itemdat    test-record)) ;; itemdat can be a string, list or #f
@@ -1457,11 +1460,11 @@
 ;; Update the test_meta table for this test
 (define (runs:update-test_meta test-name test-conf)
   (let ((currrecord (rmt:testmeta-get-record test-name)))
     (if (not currrecord)
 	(begin
-	  (set! currrecord (make-vector 10 #f))
+	  (set! currrecord (make-vector 11 #f))
 	  (rmt:testmeta-add-record test-name)))
     (for-each 
      (lambda (key)
        (let* ((idx (cadr key))
 	      (fld (car  key))
@@ -1469,11 +1472,11 @@
 	 ;; (debug:print 5 "idx: " idx " fld: " fld " val: " val)
 	 (if (and val (not (equal? (vector-ref currrecord idx) val)))
 	     (begin
 	       (print "Updating " test-name " " fld " to " val)
 	       (rmt:testmeta-update-field test-name fld val)))))
-     '(("author" 2)("owner" 3)("description" 4)("reviewed" 5)("tags" 9)))))
+     '(("author" 2)("owner" 3)("description" 4)("reviewed" 5)("tags" 9)("jobgroup" 10)))))
 
 ;; Update test_meta for all tests
 (define (runs:update-all-test_meta db)
   (let ((test-names (tests:get-all))) ;; (tests:get-valid-tests)))
     (for-each 
@@ -1486,10 +1489,11 @@
 ;; NOT PORTED - DO NOT USE YET
 ;;
 (define (runs:rollup-run keys runname user keyvals)
   (debug:print 4 "runs:rollup-run, keys: " keys " :runname " runname " user: " user)
   (let* ((db              #f)
+	 ;; register run operates on the main db
 	 (new-run-id      (rmt:register-run keyvals runname "new" "n/a" user))
 	 (prev-tests      (rmt:get-matching-previous-test-run-records new-run-id "%" "%"))
 	 (curr-tests      (mt:get-tests-for-run new-run-id "%/%" '() '()))
 	 (curr-tests-hash (make-hash-table)))
     (rmt:update-run-event_time new-run-id)

Index: server.scm
==================================================================
--- server.scm
+++ server.scm
@@ -42,22 +42,22 @@
 
 ;; Call this to start the actual server
 ;;
 
 ;; all routes though here end in exit ...
-(define (server:launch transport)
+(define (server:launch transport run-id)
   (if (not *toppath*)
       (if (not (setup-for-run))
 	  (begin
 	    (debug:print 0 "ERROR: cannot find megatest.config, exiting")
 	    (exit))))
   (debug:print-info 2 "Starting server using " transport " transport")
   (set! *transport-type* transport)
   (case transport
     ;; ((fs)   (exit)) ;; there is no "fs" server transport
-    ((fs http) (http-transport:launch))
-    ((zmq)     (zmq-transport:launch))
+    ((fs http) (http-transport:launch run-id))
+    ((zmq)     (zmq-transport:launch run-id))
     (else
      (debug:print "WARNING: unrecognised transport " transport)
      (exit))))
 
 ;;======================================================================
@@ -117,19 +117,19 @@
        (send-message pub-socket (db:obj->string (vector success/fail query-sig result)))))
     (else 
      (debug:print 0 "ERROR: unrecognised transport type: " *transport-type*)
      result)))
 
-(define (server:ensure-running)
-  (let loop ((servers  (open-run-close tasks:get-best-server tasks:open-db))
+(define (server:ensure-running run-id)
+  (let loop ((servers  (open-run-close tasks:get-server tasks:open-db run-id))
 	     (trycount 0))
     (if (or (not servers)
 	    (null? servers))
 	(begin
 	  (if (even? trycount) ;; just do the server start every other time through this loop (every 8 seconds)
 	      (let ((cmdln (conc (if (getenv "MT_MEGATEST") (getenv "MT_MEGATEST") "megatest")
-				 " -server - -daemonize")))
+				 " -server - -daemonize -run-id " run-id)))
 		(debug:print 0 "INFO: Starting server (" cmdln ") as none running ...")
 		;; (server:launch (string->symbol (args:get-arg "-transport" "http"))))
 		;; no need to use fork, no need to do the list-servers trick. Just start the damn server, it will exit on it's own
 		;; if there is an existing server
 		(system cmdln)
@@ -138,10 +138,10 @@
 		)
 	      (begin
 		(debug:print-info 0 "Waiting for server to start")
 		(thread-sleep! 4)))
 	  (if (< trycount 10)
-	      (loop (open-run-close tasks:get-best-server tasks:open-db) 
+	      (loop (open-run-close tasks:get-server tasks:open-db run-id) 
 		    (+ trycount 1))
 	      (debug:print 0 "WARNING: Couldn't start or find a server.")))
 	(debug:print 2 "INFO: Server(s) running " servers)
 	)))

Index: tasks.scm
==================================================================
--- tasks.scm
+++ tasks.scm
@@ -39,12 +39,11 @@
                                 action TEXT DEFAULT '',
                                 owner TEXT,
                                 state TEXT DEFAULT 'new',
                                 target TEXT DEFAULT '',
                                 name TEXT DEFAULT '',
-                                test TEXT DEFAULT '',
-                                item TEXT DEFAULT '',
+                                testpatt TEXT DEFAULT '',
                                 keylock TEXT,
                                 params TEXT,
                                 creation_time TIMESTAMP,
                                 execution_time TIMESTAMP);")
 	  (sqlite3:execute mdb "CREATE TABLE IF NOT EXISTS monitors (id INTEGER PRIMARY KEY,
@@ -64,12 +63,12 @@
                                   priority INTEGER,
                                   state TEXT,
                                   mt_version TEXT,
                                   heartbeat TIMESTAMP,
                                   transport TEXT,
-                                  run_id INTEGER,
-                               CONSTRAINT servers_constraint UNIQUE (pid,hostname,port));")
+                                  run_id INTEGER);")
+;;                               CONSTRAINT servers_constraint UNIQUE (pid,hostname,port));")
 	  (sqlite3:execute mdb "CREATE TABLE IF NOT EXISTS clients (id INTEGER PRIMARY KEY,
                                   server_id INTEGER,
                                   pid INTEGER,
                                   hostname TEXT,
                                   cmdline TEXT,
@@ -91,67 +90,86 @@
 (define (tasks:hostinfo-get-pubport     vec)    (vector-ref  vec 3))
 (define (tasks:hostinfo-get-transport   vec)    (vector-ref  vec 4))
 (define (tasks:hostinfo-get-pid         vec)    (vector-ref  vec 5))
 (define (tasks:hostinfo-get-hostname    vec)    (vector-ref  vec 6))
 
-;; state: 'live, 'shutting-down, 'dead
-(define (tasks:server-register mdb pid interface port priority state transport #!key (pubport -1))
-  (debug:print-info 11 "tasks:server-register " pid " " interface " " port " " priority " " state)
+(define (tasks:server-lock-slot mdb run-id)
+  (let ((res '())
+	(best #f))
+    (tasks:server-clean-out-old-records-for-run-id mdb run-id)
+    (tasks:server-set-available mdb run-id)
+    (thread-sleep! 2) ;; Try removing this. It may not be needed.
+    (tasks:server-am-i-the-server? mdb run-id)))
+	
+;; register that this server may come online (first to register goes though with the process)
+(define (tasks:server-set-available mdb run-id)
   (sqlite3:execute 
    mdb 
-   "INSERT OR REPLACE INTO servers (pid,hostname,port,pubport,start_time,priority,state,mt_version,heartbeat,interface,transport)
-                             VALUES(?,  ?,       ?,   ?,  strftime('%s','now'), ?, ?, ?, strftime('%s','now'),?,?);"
-   pid (get-host-name) port pubport priority (conc state) 
-   (common:version-signature)
-   interface 
-   (conc transport))
-  (vector 
-   (tasks:server-get-server-id mdb (get-host-name) interface port pid)
-   interface
-   port
-   pubport
-   transport
+   "INSERT INTO servers (pid,hostname,port,pubport,start_time,      priority,state,mt_version,heartbeat,   interface,transport,run_id)
+                   VALUES(?, ?,       ?,   ?, strftime('%s','now'), ?,       ?,    ?, strftime('%s','now'),?,        ?,        ?);"
+   (current-process-id)       ;; pid
+   (get-host-name)            ;; hostname
+   -1                         ;; port
+   -1                         ;; pubport
+   (random 1000)              ;; priority (used a tiebreaker on get-available)
+   "available"                ;; state
+   (common:version-signature) ;; mt_version
+   -1                         ;; interface
+   "http"                     ;; transport
+   run-id
    ))
 
-;; NB// two servers with same pid on different hosts will be removed from the list if pid: is used!
-(define (tasks:server-deregister mdb hostname #!key (port #f)(pid #f)(action 'delete))
-  (debug:print-info 11 "server-deregister " hostname ", port " port ", pid " pid)
-  (if *db-write-access*
-      (if pid
-	  (case action
-	    ((delete)(sqlite3:execute mdb "DELETE FROM servers WHERE pid=?;" pid))
-	    (else    (sqlite3:execute mdb "UPDATE servers SET state='dead' WHERE pid=?;" pid)))
-	  (if port
-	      (case action
-	    ((delete)(sqlite3:execute mdb "DELETE FROM servers WHERE (interface=? or hostname=?) AND port=?;" hostname hostname port))
-	    (else    (sqlite3:execute mdb "UPDATE servers SET state='dead' WHERE (interface=? or hostname=?) AND port=?;" hostname hostname port)))
-	      (debug:print 0 "ERROR: tasks:server-deregister called with neither pid nor port specified")))))
-
-(define (tasks:server-deregister-self mdb hostname)
-  (tasks:server-deregister mdb hostname pid: (current-process-id)))
-
-;; need a simple call for robustly removing records given host and port
-(define (tasks:server-delete mdb hostname port)
-  (tasks:server-deregister mdb hostname port: port action: 'delete))
-
-(define (tasks:server-get-server-id mdb hostname iface port pid)
-  (debug:print-info 12 "tasks:server-get-server-id " mdb " " hostname " " iface " " port " " pid)
-  (let ((res #f))
+(define (tasks:server-clean-out-old-records-for-run-id mdb run-id)
+  (sqlite3:execute mdb "DELETE FROM servers WHERE state='available' AND (strftime('%s','now') - start_time) > 30 AND run_id=?;" run-id)
+  (sqlite3:execute mdb "DELETE FROM servers WHERE state='running'   AND (strftime('%s','now') - heartbeat)  > 10 AND run_id=?;" run-id))
+  
+
+(define (tasks:server-set-state! mdb server-id state)
+  (sqlite3:execute mdb "UPDATE servers SET state=? WHERE id=?;" state server-id))
+
+(define (tasks:server-delete-record! mdb server-id)
+  (sqlite3:execute mdb "DELETE FROM servers WHERE id=?;" server-id))
+
+(define (tasks:server-delete-records-for-this-pid mdb)
+  (sqlite3:execute mdb "DELETE FROM servers WHERE hostname=? AND pid=?;" (get-host-name) (current-process-id)))
+
+(define (tasks:server-set-interface-port mdb server-id interface port)
+  (sqlite3:execute mdb "UPDATE servers SET interface=?,port=? WHERE id=?;" interface port server-id))
+
+(define (tasks:server-am-i-the-server? mdb run-id)
+  (let* ((all    (tasks:server-get-servers-vying-for-run-id mdb run-id))
+	 (first  (if (null? all)
+		     (begin (debug:print 0 "ERROR: no servers listed, should be at least one by now.") 
+			    (sqlite3:finalize! mdb)
+			    (exit 1))
+		     (car (db:get-rows all))))
+	 (header   (db:get-header all))
+	 (id       (db:get-value-by-header first header "id"))
+	 (hostname (db:get-value-by-header first header "hostname"))
+	 (pid      (db:get-value-by-header first header "pid"))
+	 (priority (db:get-value-by-header first header "priority")))
+    (debug:print 0 "INFO: am-i-the-server got record " first)
+    ;; for now a basic check. add tiebreaking by priority later
+    (if (and (equal? hostname (get-host-name))
+	     (equal? pid      (current-process-id)))
+	id
+	#f)))
+	     
+;; Use: (db:get-value-by-header (car (db:get-rows dat)) (db:get-header dat) "fieldname")
+;;  to extract info from the structure returned
+;;
+(define (tasks:server-get-servers-vying-for-run-id mdb run-id)
+   (let* ((header (list "id" "hostname" "pid" "interface" "port" "pubport" "state" "run_id" "priority" "start_time"))
+	  (selstr (string-intersperse header ","))
+	  (res    '()))
     (sqlite3:for-each-row
-     (lambda (id)
-       (set! res id))
+     (lambda (a . b)
+       (set! res (cons (apply vector a b) res)))
      mdb
-     (cond
-      ((and hostname  pid)  "SELECT id FROM servers WHERE hostname=?  AND pid=?;")
-      ((and iface     port) "SELECT id FROM servers WHERE interface=? AND port=?;")
-      ((and hostname  port) "SELECT id FROM servers WHERE hostname=?  AND port=?;")
-      (else
-       (begin
-	 (debug:print 0 "ERROR: tasks:server-get-server-id needs (hostname and pid) OR (iface and port) OR (hostname and port)")
-	 "SELECT id FROM servers WHERE pid=-999;")))
-     (if hostname hostname iface)(if pid pid port))
-    res))
+     (conc "SELECT " selstr " FROM servers WHERE run_id=? ORDER BY start_time DESC;")
+     run-id)
+    (vector header res)))
 
 (define (tasks:server-update-heartbeat mdb server-id)
   (debug:print-info 1 "Heart beat update of server id=" server-id)
   (handle-exceptions
    exn
@@ -171,90 +189,22 @@
      (lambda (delta)
        (set! heartbeat-delta delta))
      mdb "SELECT strftime('%s','now')-heartbeat FROM servers WHERE id=?;" server-id)
     (< heartbeat-delta 10)))
 
-(define (tasks:client-register mdb pid hostname cmdline)
-  (sqlite3:execute
-   mdb
-   "INSERT OR REPLACE INTO clients (server_id,pid,hostname,cmdline,login_time) VALUES(?,?,?,?,strftime('%s','now'));")
-  (tasks:server-get-server-id mdb hostname #f #f pid)
-  pid hostname cmdline)
-
-(define (tasks:client-logout mdb pid hostname cmdline)
-  (sqlite3:execute
-   mdb
-   "UPDATE clients SET logout_time=strftime('%s','now') WHERE pid=? AND hostname=? AND cmdline=?;"
-   pid hostname cmdline))
-
-(define (tasks:get-logged-in-clients mdb server-id)
-  (let ((res '()))
-    (sqlite3:for-each-row 
-     (lambda (id server-id pid hostname cmdline login-time logout-time)
-       (set! res (cons (vector id server-id pid hostname cmdline login-time lougout-time) res)))
-     mdb
-     "SELECT id,server_id,pid,hostname,cmdline,login_time,logout_time FROM clients WHERE server_id=?;"
-     server-id)))
-
-(define (tasks:have-clients? mdb server-id)
-  (null? (tasks:get-logged-in-clients mdb server-id)))
-
-;; ping each server in the db and return first found that responds. 
-;; remove any others. will not necessarily remove all!
-(define (tasks:get-best-server mdb)
-  (let ((res '())
-	(best #f)
-	(transport (if (and *transport-type*
-			    (not (eq? *transport-type* 'fs)))
-		       (conc *transport-type*)
-		       "%")))
+(define (tasks:get-server mdb run-id)
+  (let ((res  #f)
+	(best #f))
     (sqlite3:for-each-row
      (lambda (id interface port pubport transport pid hostname)
-       (set! res (cons (vector id interface port pubport transport pid hostname) res))
-       ;;(debug:print-info 2 "Found existing server " hostname ":" port " registered in db"))
-       )
+       (set! res (vector id interface port pubport transport pid hostname)))
      mdb
-     
      "SELECT id,interface,port,pubport,transport,pid,hostname FROM servers
           WHERE strftime('%s','now')-heartbeat < 10 
-          AND mt_version=? AND transport LIKE ? 
-          ORDER BY start_time DESC LIMIT 1;" (common:version-signature) transport)
-    ;; for now we are keeping only one server registered in the db, return #f or first server found
-    (if (null? res) #f (car res))))
-
-;; BUG: This logic is probably needed unless methodology changes completely...
-;;
-;;     (if (null? res) #f
-;; 	(let loop ((hed (car res))
-;; 		   (tal (cdr res)))
-;; 	  ;; (print "hed=" hed ", tal=" tal)
-;; 	  (let* ((host     (list-ref hed 0))
-;; 		 (iface    (list-ref hed 1))
-;; 		 (port     (list-ref hed 2))
-;; 		 (pid      (list-ref hed 4))
-;; 		 (alive    (open-run-close tasks:server-alive? tasks:open-db #f hostname: host port: port)))
-;; 	    (if alive
-;; 		(begin
-;; 		  (debug:print-info 2 "Found an existing, alive, server " host ", " port ".")
-;; 		  (list host iface port))
-;; 		(begin
-;; 		  (debug:print-info 1 "Marking " host ":" port " as dead in server registry.")
-;; 		  (if port
-;; 		      (open-run-close tasks:server-deregister tasks:open-db host port: port)
-;; 		      (open-run-close tasks:server-deregister tasks:open-db host pid:  pid))
-;; 		  (if (null? tal)
-;; 		      #f
-;; 		      (loop (car tal)(cdr tal))))))))))
-
-(define (tasks:remove-server-records mdb)
-  (sqlite3:execute mdb "DELETE FROM servers;"))
-
-(define (tasks:mark-server hostname port pid state transport)
-  (if port
-      (open-run-close tasks:server-deregister tasks:open-db hostname port: port)
-      (open-run-close tasks:server-deregister tasks:open-db hostname pid:  pid)))
-
+          AND mt_version=? AND run_id=? AND state='running'
+          ORDER BY start_time DESC LIMIT 1;" (common:version-signature) run-id)
+    res))
 
 (define (tasks:kill-server status hostname port pid transport)
   (debug:print-info 1 "Removing defunct server record for " hostname ":" port)
   (if port
       (open-run-close tasks:server-deregister tasks:open-db hostname port: port)
@@ -287,22 +237,10 @@
 		  (process-signal pid signal/term)  ;; local machine, send sig term
 		  (thread-sleep! 5)                 ;; give it five seconds to die peacefully then do a brutal kill
 		  (process-signal pid signal/kill)) 
 		(debug:print 0 "WARNING: Can't kill frozen server on remote host " hostname))))))
 
-
-
-(define (tasks:get-all-servers mdb)
-  (let ((res '()))
-    (sqlite3:for-each-row
-     (lambda (id pid hostname interface port pubport start-time priority state mt-version last-update transport)
-       (set! res (cons (vector id pid hostname interface port pubport start-time priority state mt-version last-update transport) res)))
-     mdb
-     "SELECT id,pid,hostname,interface,port,pubport,start_time,priority,state,mt_version,strftime('%s','now')-heartbeat AS last_update,transport FROM servers ORDER BY start_time DESC;")
-    res))
-       
-
 ;;======================================================================
 ;; Tasks and Task monitors
 ;;======================================================================
 
 
@@ -334,12 +272,12 @@
      "SELECT count(id) FROM monitors WHERE last_update < (strftime('%s','now') - 300) AND username=?;"
      (car (user-information (current-user-id))))
     res))
 
 ;; register a task
-(define (tasks:add mdb action owner target runname test item params)
-  (sqlite3:execute mdb "INSERT INTO tasks_queue (action,owner,state,target,name,test,item,params,creation_time,execution_time)
+(define (tasks:add mdb action owner target runname testpatt params)
+  (sqlite3:execute mdb "INSERT INTO tasks_queue (action,owner,state,target,name,testpatt,params,creation_time,execution_time)
                        VALUES (?,?,'new',?,?,?,?,?,strftime('%s','now'),0);" 
 		   action
 		   owner
 		   target
 		   runname

Index: tests/Makefile
==================================================================
--- tests/Makefile
+++ tests/Makefile
@@ -1,18 +1,18 @@
 #
 # run some tests
 
-BINPATH=$(shell readlink -m $(PWD)/../bin)
-MEGATEST=$(BINPATH)/megatest
-DASHBOARD=$(BINPATH)/dashboard
-PATH := $(BINPATH):$(PATH)
-RUNNAME := $(shell date +w%V.%u.%H.%M)
-IPADDR := "-"
-# Set SERVER to "-server -"
-SERVER  = 
-DEBUG   = 1
-LOGGING = 
+BINPATH   = $(shell readlink -m $(PWD)/../bin)
+MEGATEST  = $(BINPATH)/megatest
+DASHBOARD = $(BINPATH)/dashboard
+PATH     := $(BINPATH):$(PATH)
+RUNNAME  := $(shell date +w%V.%u.%H.%M)
+IPADDR   := "-"
+RUNID    := 1
+SERVER    = 
+DEBUG     = 1
+LOGGING   = 
 
 OS  = $(shell grep ID /etc/*-release|cut -d= -f2)
 FS  = $(shell df -T .|tail -1|awk '{print $$2}')
 VER = $(shell fsl info|grep checkout|awk '{print $$2}'|cut -c 1-5)
 
@@ -22,11 +22,11 @@
 
 all : test1 test2 test3 test4 test5 test6 test7 test8 test9
 
 server :
 	cd ..;make;make install
-	cd fullrun;../../bin/megatest -server - -debug 22
+	cd fullrun;../../bin/megatest -server - -debug 22 -run-id $(RUNID)
 
 stopserver :
 	cd ..;make && make install
 	cd fullrun;$(MEGATEST) -stop-server 0
 

Index: tests/fullrun/megatest.config
==================================================================
--- tests/fullrun/megatest.config
+++ tests/fullrun/megatest.config
@@ -121,5 +121,9 @@
 [disks]
 disk0 /foobarbazz
 [include config/mt_include_2.config]
 
 [include #{getenv USER}_testing.config]
+
+[jobgroups]
+sqlite3 3
+blockz  5

Index: tests/fullrun/tests/blocktestxz/testconfig
==================================================================
--- tests/fullrun/tests/blocktestxz/testconfig
+++ tests/fullrun/tests/blocktestxz/testconfig
@@ -16,5 +16,7 @@
  that a test will never be run and thus remove it from\
  the queue of tests to be run.
 
 tags first,single
 reviewed 1/1/1965
+
+jobgroup blockz

Index: tests/fullrun/tests/priority_1/testconfig
==================================================================
--- tests/fullrun/tests/priority_1/testconfig
+++ tests/fullrun/tests/priority_1/testconfig
@@ -3,10 +3,11 @@
 
 [requirements]
 priority 1
 
 [test_meta]
+jobgroup sqlite3
 author matt
 owner  bob
 description This test checks that a multi-lineitem test with mix of pass and non-fail rolls up a PASS
 
 tags first,single

Index: tests/fullrun/tests/priority_2/testconfig
==================================================================
--- tests/fullrun/tests/priority_2/testconfig
+++ tests/fullrun/tests/priority_2/testconfig
@@ -5,10 +5,11 @@
 priority 2
 # runtimelim 1d 1h 1m 10s
 runtimelim 20s
 
 [test_meta]
+jobgroup sqlite3
 author matt
 owner  bob
 description This test checks that a multi-lineitem test with mix of pass and non-fail rolls up a PASS
 
 tags first,single

Index: tests/fullrun/tests/priority_3/testconfig
==================================================================
--- tests/fullrun/tests/priority_3/testconfig
+++ tests/fullrun/tests/priority_3/testconfig
@@ -4,10 +4,11 @@
 [requirements]
 priority 3
 
 
 [test_meta]
+jobgroup sqlite3
 author matt
 owner  bob
 description This test checks that a multi-lineitem test with mix of pass and non-fail rolls up a PASS
 
 tags first,single

Index: tests/fullrun/tests/priority_4/testconfig
==================================================================
--- tests/fullrun/tests/priority_4/testconfig
+++ tests/fullrun/tests/priority_4/testconfig
@@ -3,10 +3,11 @@
 
 [requirements]
 priority 4
 
 [test_meta]
+jobgroup sqlite3
 author matt
 owner  bob
 description This test checks that a multi-lineitem test with mix of pass and non-fail rolls up a PASS
 
 tags first,single

Index: tests/fullrun/tests/sqlitespeed/testconfig
==================================================================
--- tests/fullrun/tests/sqlitespeed/testconfig
+++ tests/fullrun/tests/sqlitespeed/testconfig
@@ -7,5 +7,7 @@
 
 [items]
 MANYITEMS [system (env > envfile.txt;echo aa ab ac ad ae af ag ah ai)]
 # BORKED  
 
+[test_meta]
+jobgroup sqlite3

ADDED   tests/watch-monitor.sh
Index: tests/watch-monitor.sh
==================================================================
--- /dev/null
+++ tests/watch-monitor.sh
@@ -0,0 +1,8 @@
+#!/bin/bash
+
+sqlite3 fullrun/db/monitor.db << EOF
+.header on
+.mode column
+select * from servers;
+.q
+EOF

Index: utils/installall.sh
==================================================================
--- utils/installall.sh
+++ utils/installall.sh
@@ -302,10 +302,16 @@
 # disabled zmq #     # CSC_OPTIONS="-I$PREFIX/include -L$CSCLIBS" $CHICKEN_INSTALL $PROX -deploy -prefix $DEPLOYTARG zmq
 # disabled zmq # fi
 # disabled zmq # fi # if zmq is in /usr/lib
 # disabled zmq # 
 cd $BUILDHOME  
+
+git clone https://bitbucket.org/DerGuteMoritz/zmq/commits/branch/3.2 zmq-3.2
+cd zmq-3.2
+chicken-install
+
+cd $BUILDHOME
 
 ## WEBKIT=WebKit-r131972
 ## if  ! [[ -e ${WEBKIT}.tar.bz2 ]] ; then
 ##    #    http://builds.nightly.webkit.org/files/trunk/src/WebKit-r131972.tar.bz2
 ##    wget http://builds.nightly.webkit.org/files/trunk/src/${WEBKIT}.tar.bz2

Index: utils/mtgetfile
==================================================================
--- utils/mtgetfile
+++ utils/mtgetfile
@@ -18,11 +18,11 @@
 		  ((not key-vals) "missing -target")
 		  ((not target)   "missing -target")
 		  ((not scriptn)  "missing file name to find")
 		  (else #f))))
   (if errmsg
-      (begin
+      (begin 
 	(print "THEPATH: Missing required switch: " errmsg)
 	(print "THEPATH: Usage: mtgetfile -target target scriptname [searchpath]")
 	(exit)))
   (print "THEPATH: key-vals=" key-vals " path=" path " scriptn=" scriptn))
 EOF

Index: utils/nbload
==================================================================
--- utils/nbload
+++ utils/nbload
@@ -2,23 +2,28 @@
 
 # load=`uptime|awk '{print $10}'|cut -d, -f1`
 load=`uptime|perl -pe 's/.*: (\d+.\d+),.*/$1/'`
 if which cpucheck > /dev/null;then
     numcpu=`cpucheck|tail -1|awk '{print $6}'`
-else
+elif which lscpu > /dev/null;then
     numcpu=`lscpu|grep "CPU.s.:"|awk '{print $2}'`
+else
+    numcpu=2
 fi
 
+# NB// max_load is in units of percent.
+#
 lperc=`echo "100 * $load / $numcpu"|bc`
 if [[ "x$MAX_ALLOWED_LOAD" == "x" ]]; then
-  max_load=50
+  max_load=100
 else
   max_load=$MAX_ALLOWED_LOAD
-fi 
-if [[ $lperc -lt $max_load ]];then
-  echo "$@" | at now + 0 minutes
-elif [[ "x$NBLAUNCHER" == "x" ]];then
-  echo "nbfind $@" | at now + 2 minutes
-else
-  $NBLAUNCHER "$@"
 fi
 
+if [[  $lperc -lt $max_load ]];then
+  echo "Load acceptable: lperc=$lperc %, max_load=$max_load %, load=$load, numcpu=$numcpu, MAX_ALLOWED_LOAD=$MAX_ALLOWED_LOAD %"
+  echo "Starting command: \"$@\""
+  nbfake "$@"
+else
+  # echo "Load too high: lperc=$lperc, max_load=$max_load, waiting two minutes before trying to run command: \"$@\""
+  echo "nbload $@" | at now + 2 minutes 2> /dev/null
+fi