Index: Makefile
==================================================================
--- Makefile
+++ Makefile
@@ -4,11 +4,11 @@
 INSTALL=install
 SRCFILES = common.scm items.scm launch.scm \
            ods.scm runconfig.scm server.scm configf.scm \
            db.scm keys.scm margs.scm megatest-version.scm \
            process.scm runs.scm tasks.scm tests.scm genexample.scm \
-	   fs-transport.scm zmq-transport.scm http-transport.scm \
+	   fs-transport.scm http-transport.scm \
            client.scm gutils.scm synchash.scm daemon.scm
 
 GUISRCF  = dashboard-tests.scm dashboard-guimonitor.scm 
 
 OFILES   = $(SRCFILES:%.scm=%.o)
@@ -55,10 +55,13 @@
 db.o ezsteps.o keys.o launch.o megatest.o monitor.o runs-for-ref.o runs.o tests.o : key_records.scm
 tests.o tasks.o dashboard-tasks.o : task_records.scm
 runs.o : test_records.scm
 megatest.o : megatest-fossil-hash.scm
 
+# Temporary while transitioning to new routine
+runs.o : run-tests-queue-classic.scm  run-tests-queue-new.scm
+
 megatest-fossil-hash.scm : $(SRCFILES) megatest.scm *_records.scm
 	echo "(define megatest-fossil-hash \"$(MTESTHASH)\")" > megatest-fossil-hash.new
 	if ! diff -q megatest-fossil-hash.new megatest-fossil-hash.scm ; then echo copying .new to .scm;cp -f megatest-fossil-hash.new megatest-fossil-hash.scm;fi
 
 $(OFILES) $(GOFILES) : common_records.scm 
@@ -109,11 +112,11 @@
 	chmod a+x $(PREFIX)/bin/dashboard
 
 install : bin $(PREFIX)/bin/mtest $(PREFIX)/bin/megatest $(PREFIX)/bin/dboard $(PREFIX)/bin/dashboard $(HELPERS) $(PREFIX)/bin/nbfake $(PREFIX)/bin/nbfind $(PREFIX)/bin/newdboard
 
 deploytarg/apropos.so : Makefile
-	for i in apropos base64 canvas-draw csv-xml directory-utils dot-locking extras fmt format hostinfo http-client intarweb json md5 message-digest posix posix-extras readline regex regex-case s11n spiffy spiffy-request-vars sqlite3 srfi-1 srfi-18 srfi-69 tcp test uri-common zmq check-errors synch matchable sql-null tcp-server rpc blob-utils string-utils variable-item defstruct uri-generic sendfile opensll openssl lookup-table list-utils stack; do \
+	for i in apropos base64 canvas-draw csv-xml directory-utils dot-locking extras fmt format hostinfo http-client intarweb json md5 message-digest posix posix-extras readline regex regex-case s11n spiffy spiffy-request-vars sqlite3 srfi-1 srfi-18 srfi-69 tcp test uri-common check-errors synch matchable sql-null tcp-server rpc blob-utils string-utils variable-item defstruct uri-generic sendfile opensll openssl lookup-table list-utils stack; do \
 	chicken-install -prefix deploytarg -deploy $$i;done
 
 deploytarg/libsqlite3.so : 
 	CSC_OPTIONS="-Ideploytarg -Ldeploytarg" $CHICKEN_INSTALL -prefix deploytarg -deploy sqlite3
 

Index: NOTES
==================================================================
--- NOTES
+++ NOTES
@@ -3,5 +3,19 @@
 3. Tests may or may not have file system access to the originating
    run area. rsync is used to pull the test area to the home host
    if and only if the originating area can not be seen via file 
    system. NO LONGER TRUE. Rsync is used but file system must be visible.
 4. All db access is done via the home host. NOT IMPLEMENTED YET.
+
+
+fdktestqa on Apr 29, 2013: 1812 tests
+
+INFO: (0) Max cached queries was    10
+INFO: (0) Number of cached writes   41335
+INFO: (0) Average cached write time 206.081553163179 ms
+INFO: (0) Number non-cached queries 74289
+INFO: (0) Average non-cached time   1055.09826488444 ms
+INFO: (0) Server shutdown complete. Exiting
+
+Start: 0 at Sun Apr 28 22:18:25 MST 2013
+Max:  52 at Sun Apr 28 23:06:59 MST 2013
+End:   6 at Sun Apr 28 23:47:51 MST 2013

Index: client.scm
==================================================================
--- client.scm
+++ client.scm
@@ -12,14 +12,16 @@
 ;; C L I E N T S
 ;;======================================================================
 
 (require-extension (srfi 18) extras tcp s11n)
 
-(use sqlite3 srfi-1 posix regex regex-case srfi-69 hostinfo md5 message-digest zmq)
+(use sqlite3 srfi-1 posix regex regex-case srfi-69 hostinfo md5 message-digest)
+;; (use zmq)
+
 (import (prefix sqlite3 sqlite3:))
 
-(use spiffy uri-common intarweb http-client spiffy-request-vars)
+(use spiffy uri-common intarweb http-client spiffy-request-vars uri-common intarweb)
 
 (declare (unit client))
 
 (declare (uses common))
 (declare (uses db))
@@ -73,11 +75,20 @@
     ;; ;; DEBUG STUFF
     ;; (if (eq? *transport-type* 'fs)(begin (print "ERROR!!!!!!! refusing to run with transport " *transport-type*)(exit 99)))
     
     (debug:print-info 11 "Using transport type of " *transport-type* (if hostinfo (conc " to connect to " hostinfo) ""))
     (case *transport-type* 
-      ((fs)(if (not *megatest-db*)(set! *megatest-db* (open-db))))
+      ((fs) ;; (if (not *megatest-db*)(set! *megatest-db* (open-db))))
+       ;; we are not doing fs any longer. let's cheat and start up a server
+       ;; if we are falling back on fs (not 100% supported) do an about face and start a server
+       (if (not (equal? (args:get-arg "-transport") "fs"))
+	   (begin
+	     (set! *transport-type* #f)
+	     (system "megatest -list-servers | grep alive || megatest -server - -daemonize && sleep 3")
+	     (thread-sleep! 1)
+	     (if (> numtries 0)
+		 (client:setup numtries: (- numtries 1))))))
       ((http)
        (http-transport:client-connect (tasks:hostinfo-get-interface hostinfo)
 				      (tasks:hostinfo-get-port hostinfo)))
       ((zmq)
        (zmq-transport:client-connect (tasks:hostinfo-get-interface hostinfo)

Index: common.scm
==================================================================
--- common.scm
+++ common.scm
@@ -53,10 +53,11 @@
 (define *server-id*         #f)
 (define *server-info*       #f)
 (define *time-to-exit*      #f)
 (define *received-response* #f)
 (define *default-numtries*  10)
+(define *db-write-access*   #t)
 
 (define *target*            (make-hash-table)) ;; cache the target here; target is keyval1/keyval2/.../keyvalN
 (define *keys*              (make-hash-table)) ;; cache the keys here
 (define *keyvals*           (make-hash-table))
 (define *toptest-paths*     (make-hash-table)) ;; cache toptest path settings here

Index: configf.scm
==================================================================
--- configf.scm
+++ configf.scm
@@ -59,11 +59,11 @@
 (define configf:comment-rx (regexp "^\\s*#.*"))
 (define configf:cont-ln-rx (regexp "^(\\s+)(\\S+.*)$"))
 
 ;; read a line and process any #{ ... } constructs
 
-(define configf:var-expand-regex (regexp "^(.*)#\\{(scheme|system|shell|getenv|get|runconfigs-get)\\s+([^\\}\\{]*)\\}(.*)"))
+(define configf:var-expand-regex (regexp "^(.*)#\\{(scheme|system|shell|getenv|get|runconfigs-get|rget)\\s+([^\\}\\{]*)\\}(.*)"))
 (define (configf:process-line l ht)
   (let loop ((res l))
     (if (string? res)
 	(let ((matchdat (string-search configf:var-expand-regex res)))
 	  (if matchdat
@@ -81,10 +81,11 @@
 				 (let* ((parts (string-split cmd))
 					(sect  (car parts))
 					(var   (cadr parts)))
 				   (conc "(lambda (ht)(config-lookup ht \"" sect "\" \"" var "\"))")))
 				((runconfigs-get) (conc "(lambda (ht)(runconfigs-get ht \"" cmd "\"))"))
+				((rget)           (conc "(lambda (ht)(runconfigs-get ht \"" cmd "\"))"))
 				(else "(lambda (ht)(print \"ERROR\") \"ERROR\")"))))
 		;; (print "fullcmd=" fullcmd)
 		(with-input-from-string fullcmd
 		  (lambda ()
 		    (set! result ((eval (read)) ht))))
@@ -110,18 +111,26 @@
 
 ;; Lookup a value in runconfigs based on -reqtarg or -target
 (define (runconfigs-get config var)
   (let ((targ (or (args:get-arg "-reqtarg")(args:get-arg "-target"))))
     (if targ
-	(config-lookup config targ var)
-	#f)))
+	(or (configf:lookup config targ var)
+	    (configf:lookup config "default" var))
+	(configf:lookup config "default" var))))
 
 (define-inline (configf:read-line p ht allow-processing)
-  (if (and allow-processing 
-	   (not (eq? allow-processing 'return-string)))
-      (configf:process-line (read-line p) ht)
-      (read-line p)))
+  (let loop ((inl (read-line p)))
+    (if (and (string? inl)
+	     (not (string-null? inl))
+	     (equal? "\\" (string-take-right inl 1))) ;; last character is \ 
+	(let ((nextl (read-line p)))
+	  (if (not (eof-object? nextl))
+	      (loop (string-append inl nextl))))
+	(if (and allow-processing 
+		 (not (eq? allow-processing 'return-string)))
+	    (configf:process-line inl ht)
+	    inl))))
 
 ;; read a config file, returns hash table of alists
 
 ;; read a config file, returns hash table of alists
 ;; adds to ht if given (must be #f otherwise)

Index: dashboard-tests.scm
==================================================================
--- dashboard-tests.scm
+++ dashboard-tests.scm
@@ -270,13 +270,13 @@
 	       (keydat        (if testdat (open-run-close db:get-key-val-pairs #f run-id) #f))
 	       (rundat        (if testdat (open-run-close db:get-run-info #f run-id) #f))
 	       (runname       (if testdat (db:get-value-by-header (db:get-row rundat)
 								  (db:get-header rundat)
 								  "runname") #f))
-	       (teststeps     (if testdat (db:get-compressed-steps test-id) '()))
 	       (logfile       "/this/dir/better/not/exist")
 	       (rundir        logfile)
+	       (teststeps     (if testdat (db:get-compressed-steps test-id work-area: rundir) '()))
 	       (testfullname  (if testdat (db:test-get-fullname testdat) "Gathering data ..."))
 	       (testname      (if testdat (db:test-get-testname testdat) "n/a"))
 	       (testmeta      (if testdat 
 				  (let ((tm (open-run-close db:testmeta-get-record #f testname)))
 				    (if tm tm (make-db:testmeta)))
@@ -305,15 +305,19 @@
 	       (refreshdat (lambda ()
 			     (let* ((curr-mod-time (file-modification-time db-path))
 				    (need-update   (or (and (> curr-mod-time db-mod-time)
 							    (> (current-seconds) (+ last-update 2))) ;; every two seconds if db touched
 						       request-update))
-				    (newtestdat (if need-update (open-run-close db:get-test-info-by-id #f test-id))))
+				    (newtestdat (if need-update 
+						    (handle-exceptions
+						     exn 
+						     (debug:print-info 2 "test db access issue: " ((condition-property-accessor 'exn 'message) exn))
+						     (open-run-close db:get-test-info-by-id #f test-id )))))
 			       (cond
 				((and need-update newtestdat)
 				 (set! testdat newtestdat)
-				 (set! teststeps    (db:get-compressed-steps test-id))
+				 (set! teststeps    (db:get-compressed-steps test-id work-area: rundir))
 				 (set! logfile      (conc (db:test-get-rundir testdat) "/" (db:test-get-final_logf testdat)))
 				 (set! rundir       (db:test-get-rundir testdat))
 				 (set! testfullname (db:test-get-fullname testdat))
 				 ;; (debug:print 0 "INFO: teststeps=" (intersperse teststeps "\n    "))
 				 )

Index: db.scm
==================================================================
--- db.scm
+++ db.scm
@@ -19,11 +19,11 @@
 (use sqlite3 srfi-1 posix regex regex-case srfi-69 csv-xml s11n md5 message-digest base64)
 (import (prefix sqlite3 sqlite3:))
 (import (prefix base64 base64:))
 
 ;; Note, try to remove this dependency 
-(use zmq)
+;; (use zmq)
 
 (declare (unit db))
 (declare (uses common))
 (declare (uses keys))
 (declare (uses ods))
@@ -68,14 +68,18 @@
 	  (begin
 	    (debug:print 0 "ERROR: Attempted to open db when not in megatest area. Exiting.")
 	    (exit))))
   (let* ((dbpath    (conc *toppath* "/megatest.db")) ;; fname)
 	 (dbexists  (file-exists? dbpath))
+	 (write-access (file-write-access? dbpath))
 	 (db        (sqlite3:open-database dbpath)) ;; (never-give-up-open-db dbpath))
 	 (handler   (make-busy-timeout (if (args:get-arg "-override-timeout")
 					   (string->number (args:get-arg "-override-timeout"))
 					   136000)))) ;; 136000))) ;; 136000 = 2.2 minutes
+    (if (and dbexists
+	     (not write-access))
+	(set! *db-write-access* write-access)) ;; only unset so other db's also can use this control
     (debug:print-info 11 "open-db, dbpath=" dbpath " argv=" (argv))
     (sqlite3:set-busy-handler! db handler)
     (if (not dbexists)
 	(db:initialize db))
     (db:set-sync db)
@@ -241,24 +245,24 @@
 ;;======================================================================
 ;; T E S T   S P E C I F I C   D B 
 ;;======================================================================
 
 ;; Create the sqlite db for the individual test(s)
-(define (open-test-db testpath) 
-  (debug:print-info 11 "open-test-db " testpath)
-  (if (and testpath 
-	   (directory? testpath)
-	   (file-read-access? testpath))
-      (let* ((dbpath    (conc testpath "/testdat.db"))
+(define (open-test-db work-area) 
+  (debug:print-info 11 "open-test-db " work-area)
+  (if (and work-area 
+	   (directory? work-area)
+	   (file-read-access? work-area))
+      (let* ((dbpath    (conc work-area "/testdat.db"))
 	     (dbexists  (file-exists? dbpath))
 	     (handler   (make-busy-timeout (if (args:get-arg "-override-timeout")
 					       (string->number (args:get-arg "-override-timeout"))
 					       136000))))
 	(handle-exceptions
 	 exn
 	 (begin
-	   (debug:print 0 "ERROR: problem accessing test db " testpath ", you probably should clean and re-run this test"
+	   (debug:print 0 "ERROR: problem accessing test db " work-area ", you probably should clean and re-run this test"
 			((condition-property-accessor 'exn 'message) exn))
 	   #f)
 	 (set! db (sqlite3:open-database dbpath)))
 	(sqlite3:set-busy-handler! db handler)
 	(if (not dbexists)
@@ -265,29 +269,31 @@
 	    (begin
 	      (sqlite3:execute db "PRAGMA synchronous = FULL;")
 	      (debug:print-info 11 "Initialized test database " dbpath)
 	      (db:testdb-initialize db)))
 	;; (sqlite3:execute db "PRAGMA synchronous = 0;")
-	(debug:print-info 11 "open-test-db END (sucessful)" testpath)
+	(debug:print-info 11 "open-test-db END (sucessful)" work-area)
 	;; now let's test that everything is correct
 	(handle-exceptions
 	 exn
 	 (begin
-	   (debug:print 0 "ERROR: problem accessing test db " testpath ", you probably should clean and re-run this test"
+	   (debug:print 0 "ERROR: problem accessing test db " work-area ", you probably should clean and re-run this test"
 			((condition-property-accessor 'exn 'message) exn))
 	   #f)
 	 ;; Is there a cheaper single line operation that will check for existance of a table
 	 ;; and raise an exception ?
 	 (sqlite3:execute db "SELECT id FROM test_data LIMIT 1;"))
 	db)
       (begin
-	(debug:print-info 11 "open-test-db END (unsucessful)" testpath)
+	(debug:print-info 11 "open-test-db END (unsucessful)" work-area)
 	#f)))
 
 ;; find and open the testdat.db file for an existing test
-(define (db:open-test-db-by-test-id db test-id)
-  (let* ((test-path (cdb:remote-run db:test-get-rundir-from-test-id db test-id)))
+(define (db:open-test-db-by-test-id db test-id #!key (work-area #f))
+  (let* ((test-path (if work-area
+			work-area
+			(cdb:remote-run db:test-get-rundir-from-test-id db test-id))))
     (debug:print 3 "TEST PATH: " test-path)
     (open-test-db test-path)))
 
 (define (db:testdb-initialize db)
   (debug:print 11 "db:testdb-initialize START")
@@ -519,10 +525,30 @@
 	    (if (null? tal) #f (loop (car tal)(cdr tal)(+ n 1)))))))
 
 ;;======================================================================
 ;;  R U N S
 ;;======================================================================
+
+(define (db:get-run-name-from-id db run-id)
+  (let ((res #f))
+    (sqlite3:for-each-row
+     (lambda (runname)
+       (set! res runname))
+     db
+     "SELECT runname FROM runs WHERE id=?;"
+     run-id)
+    res))
+
+(define (db:get-run-key-val db run-id key)
+  (let ((res #f))
+    (sqlite3:for-each-row
+     (lambda (val)
+       (set! res val))
+     db 
+     (conc "SELECT " (key:get-fieldname key) " FROM runs WHERE id=?;")
+     run-id)
+    res))
 
 ;; keys list to key1,key2,key3 ...
 (define (runs:get-std-run-fields keys remfields)
   (let* ((header    (append (map key:get-fieldname keys)
 			    remfields))
@@ -540,10 +566,43 @@
 				 (conc fieldname " " wildtype " '" patt "'")))
 			     (if (null? patts)
 				 '("")
 				 patts))
 			comparator)))
+
+
+;; register a test run with the db
+(define (db:register-run db keys keyvallst runname state status user)
+  (debug:print 3 "runs:register-run, keys: " keys " keyvallst: " keyvallst " runname: " runname " state: " state " status: " status " user: " user)
+  (let* ((keystr    (keys->keystr keys))
+	 (comma     (if (> (length keys) 0) "," ""))
+	 (andstr    (if (> (length keys) 0) " AND " ""))
+	 (valslots  (keys->valslots keys)) ;; ?,?,? ...
+	 (keyvals   (map cadr keyvallst))
+	 (allvals   (append (list runname state status user) keyvals))
+	 (qryvals   (append (list runname) keyvals))
+	 (key=?str  (string-intersperse (map (lambda (k)(conc (key:get-fieldname k) "=?")) keys) " AND ")))
+    (debug:print 3 "keys: " keys " allvals: " allvals " keyvals: " keyvals)
+    (debug:print 2 "NOTE: using target " (string-intersperse keyvals "/") " for this run")
+    (if (and runname (null? (filter (lambda (x)(not x)) keyvals))) ;; there must be a better way to "apply and"
+	(let ((res #f))
+	  (apply sqlite3:execute db (conc "INSERT OR IGNORE INTO runs (runname,state,status,owner,event_time" comma keystr ") VALUES (?,?,?,?,strftime('%s','now')" comma valslots ");")
+		 allvals)
+	  (apply sqlite3:for-each-row 
+	   (lambda (id)
+	     (set! res id))
+	   db
+	   (let ((qry (conc "SELECT id FROM runs WHERE (runname=? " andstr key=?str ");")))
+	     ;(debug:print 4 "qry: " qry) 
+	     qry)
+	   qryvals)
+	  (sqlite3:execute db "UPDATE runs SET state=?,status=? WHERE id=?;" state status res)
+	  res) 
+	(begin
+	  (debug:print 0 "ERROR: Called without all necessary keys")
+	  #f))))
+
 
 ;; replace header and keystr with a call to runs:get-std-run-fields
 ;;
 ;; keypatts: ( (KEY1 "abc%def")(KEY2 "%") )
 ;; runpatts: patt1,patt2 ...
@@ -701,25 +760,10 @@
 
 ;;======================================================================
 ;;  T E S T S
 ;;======================================================================
 
-(define (db:tests-register-test db run-id test-name item-path)
-  (debug:print-info 11 "db:tests-register-test START db=" db ", run-id=" run-id ", test-name=" test-name ", item-path=\"" item-path "\"")
-  (let ((item-paths (if (equal? item-path "")
-			(list item-path)
-			(list item-path ""))))
-    (for-each 
-     (lambda (pth)
-       (sqlite3:execute db "INSERT OR IGNORE INTO tests (run_id,testname,event_time,item_path,state,status) VALUES (?,?,strftime('%s','now'),?,'NOT_STARTED','n/a');" 
-			run-id 
-			test-name
-			pth))
-     item-paths)
-  (debug:print-info 11 "db:tests-register-test END db=" db ", run-id=" run-id ", test-name=" test-name ", item-path=\"" item-path "\"")
-    #f))
-
 ;; states and statuses are lists, turn them into ("PASS","FAIL"...) and use NOT IN
 ;; i.e. these lists define what to NOT show.
 ;; states and statuses are required to be lists, empty is ok
 ;; not-in #t = above behaviour, #f = must match
 (define (db:get-tests-for-run db run-id testpatt states statuses 
@@ -824,13 +868,13 @@
      )
     (debug:print-info 11 "db:get-tests-for-run START run-ids=" run-ids ", testpatt=" testpatt ", states=" states ", statuses=" statuses ", not-in=" not-in ", sort-by=" sort-by)
     res))
 
 ;; this one is a bit broken BUG FIXME
-(define (db:delete-test-step-records db test-id)
+(define (db:delete-test-step-records db test-id #!key (work-area #f))
   ;; Breaking it into two queries for better file access interleaving
-  (let* ((tdb (db:open-test-db-by-test-id db test-id)))
+  (let* ((tdb (db:open-test-db-by-test-id db test-id work-area: work-area)))
     ;; test db's can go away - must check every time
     (if tdb
 	(begin
 	  (sqlite3:execute tdb "DELETE FROM test_steps;")
 	  (sqlite3:execute tdb "DELETE FROM test_data;")
@@ -860,11 +904,12 @@
   (let ((targtime (- (current-seconds)(* 30 24 60 60)))) ;; one month in the past
     (sqlite3:execute db "DELETE FROM tests WHERE state='DELETED' AND event_time<?;" targtime)))
 
 ;; set tests with state currstate and status currstatus to newstate and newstatus
 ;; use currstate = #f and or currstatus = #f to apply to any state or status respectively
-;; WARNING: SQL injection risk
+;; WARNING: SQL injection risk. NB// See new but not yet used "faster" version below
+;;
 (define (db:set-tests-state-status db run-id testnames currstate currstatus newstate newstatus)
   (for-each (lambda (testname)
 	      (let ((qry (conc "UPDATE tests SET state=?,status=? WHERE "
 			       (if currstate  (conc "state='" currstate "' AND ") "")
 			       (if currstatus (conc "status='" currstatus "' AND ") "")
@@ -871,13 +916,43 @@
 			       " run_id=? AND testname=? AND NOT (item_path='' AND testname in (SELECT DISTINCT testname FROM tests WHERE testname=? AND item_path != ''));")))
 		;;(debug:print 0 "QRY: " qry)
 		(sqlite3:execute db qry run-id newstate newstatus testname testname)))
 	    testnames))
 
+
+(define (cdb:set-tests-state-status-faster serverdat run-id testnames currstate currstatus newstate newstatus)
+  ;; Convert #f to wildcard %
+  (if (null? testnames)
+      #t
+      (let ((currstate  (if currstate currstate "%"))
+	    (currstatus (if currstatus currstatus "%")))
+	(let loop ((hed (car testnames))
+		   (tal (cdr testnames))
+		   (thr '()))
+	  (let ((th1 (if newstate  (create-thread (cbd:client-call serverdat 'update-test-state  #t *default-numtries* newstate  currstate  run-id testname testname)) #f))
+		(th2 (if newstatus (create-thread (cbd:client-call serverdat 'update-test-status #t *default-numtries* newstatus currstatus run-id testname testname)) #f)))
+	    (thread-start! th1)
+	    (thread-start! th2)
+	    (if (null? tal)
+		(loop (car tal)(cdr tal)(cons th1 (cons th2 thr)))
+		(for-each
+		 (lambda (th)
+		   (if th (thread-join! th)))
+		 thr)))))))
+
 (define (cdb:delete-tests-in-state serverdat run-id state)
   (cdb:client-call serverdat 'delete-tests-in-state #t *default-numtries* run-id state))
 
+(define (cdb:tests-update-cpuload-diskfree serverdat test-id cpuload diskfree)
+  (cdb:client-call serverdat 'update-cpuload-diskfree #t *default-numtries* cpuload diskfree test-id))
+
+(define (cdb:tests-update-run-duration serverdat test-id minutes)
+  (cdb:client-call serverdat 'update-run-duration #t *default-numtries* minutes test-id))
+
+(define (cdb:tests-update-uname-host serverdat test-id uname hostname)
+  (cdb:client-call serverdat 'update-uname-host #t *default-numtries* test-id uname hostname))
+
 ;; speed up for common cases with a little logic
 (define (db:test-set-state-status-by-id db test-id newstate newstatus newcomment)
   (cond
    ((and newstate newstatus newcomment)
     (sqlite3:exectute db "UPDATE tests SET state=?,status=?,comment=? WHERE id=?;" newstate newstatus test-id))
@@ -954,12 +1029,15 @@
 
 (define db:get-test-id db:get-test-id-not-cached)
 
 ;; given a test-info record, patch in the latest data from the testdat.db file
 ;; found in the test run directory
-(define (db:patch-tdb-data-into-test-info db test-id res)
-  (let ((tdb (db:open-test-db-by-test-id db test-id)))
+;;
+;; NOT USED
+;;
+(define (db:patch-tdb-data-into-test-info db test-id res #!key (work-area #f))
+  (let ((tdb (db:open-test-db-by-test-id db test-id work-area: work-area)))
     ;; get state and status from megatest.db in real time
     ;; other fields that perhaps should be updated:
     ;;   fail_count
     ;;   pass_count
     ;;   final_logf
@@ -1139,15 +1217,15 @@
 
 ;; NOTE: Can remove the regex and base64 encoding for zmq
 (define (db:obj->string obj)
   (case *transport-type*
     ((fs) obj)
-	((http)
+    ((http)
      (string-substitute
-       (regexp "=") "_"
-         (base64:base64-encode (with-output-to-string (lambda ()(serialize obj))))
-        #t))
+      (regexp "=") "_"
+      (base64:base64-encode (with-output-to-string (lambda ()(serialize obj))))
+      #t))
     ((zmq)(with-output-to-string (lambda ()(serialize obj))))
     (else obj)))
 
 (define (db:string->obj msg)
   (case *transport-type*
@@ -1190,16 +1268,22 @@
        (debug:print-info 11 "zdat=" zdat)
        (let* ((res  #f)
 	      (rawdat      (http-transport:client-send-receive serverdat zdat))
 	      (tmp         #f))
 	 (debug:print-info 11 "Sent " zdat ", received " rawdat)
-	 (set! tmp (db:string->obj rawdat))
-	 (vector-ref tmp 2))))
+	 (if rawdat
+	     (begin
+	       (set! tmp (db:string->obj rawdat))
+	       (vector-ref tmp 2))
+	     (begin
+	       (debug:print 0 "ERROR: Communication with the server failed. Exiting if possible")
+	       (exit 1))))))
     ((zmq)
      (handle-exceptions
       exn
       (begin
+	(debug:print-info 0 "cdb:client-call timeout or error. Trying again in 5 seconds")
 	(thread-sleep! 5) 
 	(if (> numretries 0)(apply cdb:client-call serverdat qtype immediate (- numretries 1) params)))
       (let* ((push-socket (vector-ref serverdat 0))
 	     (sub-socket  (vector-ref serverdat 1))
 	     (client-sig  (client:get-signature))
@@ -1217,31 +1301,32 @@
 			       (receive-message* sub-socket)
 			       ;; now get the actual message
 			       (let ((myres (db:string->obj (receive-message* sub-socket))))
 				 (if (equal? query-sig (vector-ref myres 1))
 				     (set! res (vector-ref myres 2))
-				     (loop))))))
-	     (timeout (lambda ()
-			(let loop ((n numretries))
-			  (thread-sleep! 15)
-			  (if (not res)
-			      (if (> numretries 0)
-				  (begin
-				    (debug:print 2 "WARNING: no reply to query " params ", trying resend")
-				    (debug:print-info 11 "re-sending message")
-				    (send-message push-socket zdat)
-				    (debug:print-info 11 "message re-sent")
-				    (loop (- n 1)))
-				  ;; (apply cdb:client-call *runremote* qtype immediate (- numretries 1) params))
-				  (begin
-				    (debug:print 0 "ERROR: cdb:client-call timed out " params ", exiting.")
-				    (exit 5))))))))
+				     (loop)))))))
+	    ;; (timeout (lambda ()
+	    ;;     	(let loop ((n numretries))
+	    ;;     	  (thread-sleep! 15)
+	    ;;     	  (if (not res)
+	    ;;     	      (if (> numretries 0)
+	    ;;     		  (begin
+	    ;;     		    (debug:print 2 "WARNING: no reply to query " params ", trying resend")
+	    ;;     		    (debug:print-info 11 "re-sending message")
+	    ;;     		    (send-message push-socket zdat)
+	    ;;     		    (debug:print-info 11 "message re-sent")
+	    ;;     		    (loop (- n 1)))
+	    ;;     		  ;; (apply cdb:client-call *runremote* qtype immediate (- numretries 1) params))
+	    ;;     		  (begin
+	    ;;     		    (debug:print 0 "ERROR: cdb:client-call timed out " params ", exiting.")
+	    ;;     		    (exit 5))))))))
 	(debug:print-info 11 "Starting threads")
 	(let ((th1 (make-thread send-receive "send receive"))
-	      (th2 (make-thread timeout      "timeout")))
+	      ;; (th2 (make-thread timeout      "timeout"))
+	      )
 	  (thread-start! th1)
-	  (thread-start! th2)
+	  ;; (thread-start! th2)
 	  (thread-join!  th1)
 	  (debug:print-info 11 "cdb:client-call returning res=" res)
 	  res))))))
   
 (define (cdb:set-verbosity serverdat val)
@@ -1266,14 +1351,11 @@
 
 (define (cdb:pass-fail-counts serverdat test-id fail-count pass-count)
   (cdb:client-call serverdat 'pass-fail-counts #t *default-numtries* fail-count pass-count test-id))
 
 (define (cdb:tests-register-test serverdat run-id test-name item-path)
-  (let ((item-paths (if (equal? item-path "")
-			(list item-path)
-			(list item-path ""))))
-    (cdb:client-call serverdat 'register-test #t *default-numtries* run-id test-name item-path)))
+  (cdb:client-call serverdat 'register-test #t *default-numtries* run-id test-name item-path))
 
 (define (cdb:flush-queue serverdat)
   (cdb:client-call serverdat 'flush #f *default-numtries*))
 
 (define (cdb:kill-server serverdat)
@@ -1304,10 +1386,14 @@
      db
      "SELECT rundir,final_logf FROM tests WHERE run_id=? AND testname=? AND item_path='';"
      run-id test-name)
     res))
 
+;;======================================================================
+;; A G R E G A T E D   T R A N S A C T I O N   D B   W R I T E S 
+;;======================================================================
+
 (define db:queries 
   (list '(register-test          "INSERT OR IGNORE INTO tests (run_id,testname,event_time,item_path,state,status) VALUES (?,?,strftime('%s','now'),?,'NOT_STARTED','n/a');")
 	'(state-status           "UPDATE tests SET state=?,status=? WHERE id=?;")
 	'(state-status-msg       "UPDATE tests SET state=?,status=?,comment=? WHERE id=?;")
 	'(pass-fail-counts       "UPDATE tests SET fail_count=?,pass_count=? WHERE id=?;")
@@ -1323,10 +1409,15 @@
 	'(test-set-log            "UPDATE tests SET final_logf=? WHERE id=?;")
 	'(test-set-rundir-by-test-id "UPDATE tests SET rundir=? WHERE id=?")
 	'(test-set-rundir         "UPDATE tests SET rundir=? WHERE run_id=? AND testname=? AND item_path=?;")
 	'(delete-tests-in-state   "DELETE FROM tests WHERE state=? AND run_id=?;")
 	'(tests:test-set-toplog   "UPDATE tests SET final_logf=? WHERE run_id=? AND testname=? AND item_path='';")
+	'(update-cpuload-diskfree "UPDATE tests SET cpuload=?,diskfree=? WHERE id=?;")
+	'(update-run-duration     "UPDATE tests SET run_duration=? WHERE id=?;")
+	'(update-uname-host       "UPDATE tests SET uname=?,host=? WHERE id=?;")
+	'(update-test-state       "UPDATE tests SET state=? WHERE state=? AND run_id=? AND testname=? AND NOT (item_path='' AND testname IN (SELECT DISTINCT testname FROM tests WHERE testname=? AND item_path != ''));")
+	'(update-test-status      "UPDATE tests SET status=? WHERE status like ? AND run_id=? AND testname=? AND NOT (item_path='' AND testname IN (SELECT DISTINCT testname FROM tests WHERE testname=? AND item_path != ''));")
     ))
 
 ;; do not run these as part of the transaction
 (define db:special-queries   '(rollup-tests-pass-fail
 			       db:roll-up-pass-fail-counts
@@ -1442,11 +1533,11 @@
 	       (< (current-seconds) timeout))
 	  (begin
 	    (thread-sleep! 0.01)
 	    (loop))))
     (set! *number-of-writes*   (+ *number-of-writes*   1))
-    (set! *writes-total-delay* (+ *writes-total-delay* 1))
+    (set! *writes-total-delay* (+ *writes-total-delay* (- (current-milliseconds) start-time)))
     got-it))
 	  
 (define (db:process-queue-item db item)
   (let* ((stmt-key       (cdb:packet-get-qtype item))
 	 (qry-sig        (cdb:packet-get-query-sig item))
@@ -1529,19 +1620,20 @@
      "SELECT id,item_path,state,status,run_duration,final_logf,comment FROM tests WHERE run_id=? AND testname=? AND item_path != '';"
      run-id test-name)
     res))
 
 ;; Rollup the pass/fail counts from itemized tests into fail_count and pass_count
+;; NOTE: Is this duplicating (db:test-data-rollup db test-id status) ????
 (define (db:roll-up-pass-fail-counts db run-id test-name item-path status)
   ;; (cdb:flush-queue *runremote*)
   (if (and (not (equal? item-path ""))
 	   (member status '("PASS" "WARN" "FAIL" "WAIVED" "RUNNING" "CHECK" "SKIP")))
       (begin
 	(sqlite3:execute 
 	 db
 	 "UPDATE tests 
-             SET fail_count=(SELECT count(id) FROM tests WHERE run_id=? AND testname=? AND item_path != '' AND status='FAIL'),
+             SET fail_count=(SELECT count(id) FROM tests WHERE run_id=? AND testname=? AND item_path != '' AND status IN ('FAIL','CHECK')),
                  pass_count=(SELECT count(id) FROM tests WHERE run_id=? AND testname=? AND item_path != '' AND status IN ('PASS','WARN','WAIVED'))
              WHERE run_id=? AND testname=? AND item_path='';"
 	 run-id test-name run-id test-name run-id test-name)
         ;; (thread-sleep! 0.1) ;; give other processes a chance here, no, better to be done ASAP?
 	(if (equal? status "RUNNING") ;; running takes priority over all other states, force the test state to RUNNING
@@ -1553,16 +1645,20 @@
                                    WHEN (SELECT count(id) FROM tests 
                                                 WHERE run_id=? AND testname=?
                                                      AND item_path != '' 
                                                      AND state in ('RUNNING','NOT_STARTED')) > 0 THEN 'RUNNING'
                                    ELSE 'COMPLETED' END,
-                                      status=CASE 
-                                            WHEN fail_count > 0 THEN 'FAIL' 
-                                            WHEN pass_count > 0 AND fail_count=0 THEN 'PASS' 
-                                            ELSE 'UNKNOWN' END
+                            status=CASE 
+                                  WHEN fail_count > 0 THEN 'FAIL' 
+                                  WHEN pass_count > 0 AND fail_count=0 THEN 'PASS' 
+                                  WHEN (SELECT count(id) FROM tests
+                                         WHERE run_id=? AND testname=?
+                                              AND item_path != ''
+                                              AND status = 'SKIP') > 0 THEN 'SKIP'
+                                  ELSE 'UNKNOWN' END
                        WHERE run_id=? AND testname=? AND item_path='';"
-	     run-id test-name run-id test-name))
+	     run-id test-name run-id test-name run-id test-name))
 	#f)
       #f))
 
 ;;======================================================================
 ;; Tests meta data
@@ -1588,13 +1684,13 @@
 
 ;;======================================================================
 ;; T E S T   D A T A 
 ;;======================================================================
 
-(define (db:csv->test-data db test-id csvdata)
+(define (db:csv->test-data db test-id csvdata #!key (work-area #f))
   (debug:print 4 "test-id " test-id ", csvdata: " csvdata)
-  (let ((tdb     (db:open-test-db-by-test-id db test-id)))
+  (let ((tdb     (db:open-test-db-by-test-id db test-id work-area: work-area)))
     (if tdb
 	(let ((csvlist (csv->list (make-csv-reader
 				   (open-input-string csvdata)
 				   '((strip-leading-whitespace? #t)
 				     (strip-trailing-whitespace? #t)) )))) ;; (csv->list csvdata)))
@@ -1644,17 +1740,17 @@
 			       ((<=) (if (<= value expected) "pass" "fail"))
 			       (else (conc "ERROR: bad tol comparator " tol))))))
 	       (debug:print 4 "AFTER2: category: " category " variable: " variable " value: " value 
 			    ", expected: " expected " tol: " tol " units: " units " status: " status " comment: " comment)
 	       (sqlite3:execute tdb "INSERT OR REPLACE INTO test_data (test_id,category,variable,value,expected,tol,units,comment,status,type) VALUES (?,?,?,?,?,?,?,?,?,?);"
-				test-id category variable value expected tol units (if comment comment "") status type)
-	       (sqlite3:finalize! tdb)))
-	   csvlist)))))
+				test-id category variable value expected tol units (if comment comment "") status type)))
+	   csvlist)
+	  (sqlite3:finalize! tdb)))))
 
 ;; get a list of test_data records matching categorypatt
-(define (db:read-test-data db test-id categorypatt)
-  (let ((tdb  (db:open-test-db-by-test-id db test-id)))
+(define (db:read-test-data db test-id categorypatt #!key (work-area #f))
+  (let ((tdb  (db:open-test-db-by-test-id db test-id work-area: work-area)))
     (if tdb
 	(let ((res '()))
 	  (sqlite3:for-each-row 
 	   (lambda (id test_id category variable value expected tol units comment status type)
 	     (set! res (cons (vector id test_id category variable value expected tol units comment status type) res)))
@@ -1663,28 +1759,28 @@
 	  (sqlite3:finalize! tdb)
 	  (reverse res))
 	'())))
 
 ;; NOTE: Run this local with #f for db !!!
-(define (db:load-test-data db test-id)
+(define (db:load-test-data db test-id #!key (work-area #f))
   (let loop ((lin (read-line)))
     (if (not (eof-object? lin))
 	(begin
 	  (debug:print 4 lin)
-	  (db:csv->test-data db test-id lin)
+	  (db:csv->test-data db test-id lin work-area: work-area)
 	  (loop (read-line)))))
   ;; roll up the current results.
   ;; FIXME: Add the status to 
-  (db:test-data-rollup db test-id #f))
+  (db:test-data-rollup db test-id #f work-area: work-area))
 
 ;; WARNING: Do NOT call this for the parent test on an iterated test
 ;; Roll up test_data pass/fail results
 ;; look at the test_data status field, 
 ;;    if all are pass (any case) and the test status is PASS or NULL or '' then set test status to PASS.
 ;;    if one or more are fail (any case) then set test status to PASS, non "pass" or "fail" are ignored
-(define (db:test-data-rollup db test-id status)
-  (let ((tdb (db:open-test-db-by-test-id db test-id))
+(define (db:test-data-rollup db test-id status #!key (work-area #f))
+  (let ((tdb (db:open-test-db-by-test-id db test-id work-area: work-area))
 	(fail-count 0)
 	(pass-count 0))
     (if tdb
 	(begin
 	  (sqlite3:for-each-row
@@ -1699,11 +1795,15 @@
 
 	  ;; Now rollup the counts to the central megatest.db
 	  (cdb:pass-fail-counts *runremote* test-id fail-count pass-count)
 	  ;; (sqlite3:execute db "UPDATE tests SET fail_count=?,pass_count=? WHERE id=?;" 
 	  ;;                     fail-count pass-count test-id)
-	  (cdb:flush-queue *runremote*)
+
+	  ;; The flush is not needed with the transaction based write agregation enabled. Remove these commented lines
+	  ;; next time you read this!
+	  ;;
+	  ;; (cdb:flush-queue *runremote*)
 	  ;; (thread-sleep! 1) ;; play nice with the queue by ensuring the rollup is at least 10ms later than the set
 	  
 	  ;; if the test is not FAIL then set status based on the fail and pass counts.
 	  (cdb:test-rollup-test_data-pass-fail *runremote* test-id)
 	  ;; (sqlite3:execute
@@ -1729,12 +1829,12 @@
 
 (define (db:step-get-time-as-string vec)
   (seconds->time-string (db:step-get-event_time vec)))
 
 ;; db-get-test-steps-for-run
-(define (db:get-steps-for-test db test-id)
-  (let* ((tdb (db:open-test-db-by-test-id db test-id))
+(define (db:get-steps-for-test db test-id #!key (work-area #f))
+  (let* ((tdb (db:open-test-db-by-test-id db test-id work-area: work-area))
 	 (res '()))
     (if tdb
 	(begin
 	  (sqlite3:for-each-row 
 	   (lambda (id test-id stepname state status event-time logfile)
@@ -1746,12 +1846,12 @@
 	  (reverse res))
 	'())))
 
 ;; get a pretty table to summarize steps
 ;;
-(define (db:get-steps-table db test-id)
-  (let ((steps   (db:get-steps-for-test db test-id)))
+(define (db:get-steps-table db test-id #!key (work-area #f))
+  (let ((steps   (db:get-steps-for-test db test-id work-area: work-area)))
     ;; organise the steps for better readability
     (let ((res (make-hash-table)))
       (for-each 
        (lambda (step)
 	 (debug:print 6 "step=" step)
@@ -1806,12 +1906,12 @@
 		      (else #f)))))
       res)))
 
 ;; get a pretty table to summarize steps
 ;;
-(define (db:get-steps-table-list db test-id)
-  (let ((steps   (db:get-steps-for-test db test-id)))
+(define (db:get-steps-table-list db test-id #!key (work-area #f))
+  (let ((steps   (db:get-steps-for-test db test-id work-area: work-area)))
     ;; organise the steps for better readability
     (let ((res (make-hash-table)))
       (for-each 
        (lambda (step)
 	 (debug:print 6 "step=" step)
@@ -1864,35 +1964,38 @@
 		      ((eq? (db:step-get-event_time a)(db:step-get-event_time b)) 
 		       (<   (db:step-get-id a)        (db:step-get-id b)))
 		      (else #f)))))
       res)))
 
-(define (db:get-compressed-steps test-id)
-  (let* ((comprsteps (open-run-close db:get-steps-table #f test-id)))
-    (map (lambda (x)
-	   ;; take advantage of the \n on time->string
-	   (vector
-	    (vector-ref x 0)
-	    (let ((s (vector-ref x 1)))
-	      (if (number? s)(seconds->time-string s) s))
-	    (let ((s (vector-ref x 2)))
-	      (if (number? s)(seconds->time-string s) s))
-	    (vector-ref x 3)    ;; status
-	    (vector-ref x 4)
-	    (vector-ref x 5)))  ;; time delta
-	 (sort (hash-table-values comprsteps)
-	       (lambda (a b)
-		 (let ((time-a (vector-ref a 1))
-		       (time-b (vector-ref b 1)))
-		   (if (and (number? time-a)(number? time-b))
-		       (if (< time-a time-b)
-			   #t
-			   (if (eq? time-a time-b)
-			       (string<? (conc (vector-ref a 2))
-					 (conc (vector-ref b 2)))
-			       #f))
-		       (string<? (conc time-a)(conc time-b)))))))))
+(define (db:get-compressed-steps test-id #!key (work-area #f))
+  (if (or (not work-area)
+	  (file-exists? (conc work-area "/testdat.db")))
+      (let* ((comprsteps (open-run-close db:get-steps-table #f test-id work-area: work-area)))
+	(map (lambda (x)
+	       ;; take advantage of the \n on time->string
+	       (vector
+		(vector-ref x 0)
+		(let ((s (vector-ref x 1)))
+		  (if (number? s)(seconds->time-string s) s))
+		(let ((s (vector-ref x 2)))
+		  (if (number? s)(seconds->time-string s) s))
+		(vector-ref x 3)    ;; status
+		(vector-ref x 4)
+		(vector-ref x 5)))  ;; time delta
+	     (sort (hash-table-values comprsteps)
+		   (lambda (a b)
+		     (let ((time-a (vector-ref a 1))
+			   (time-b (vector-ref b 1)))
+		       (if (and (number? time-a)(number? time-b))
+			   (if (< time-a time-b)
+			       #t
+			       (if (eq? time-a time-b)
+				   (string<? (conc (vector-ref a 2))
+					     (conc (vector-ref b 2)))
+				   #f))
+			   (string<? (conc time-a)(conc time-b))))))))
+      '()))
 
 ;;======================================================================
 ;; M I S C   M A N A G E M E N T   I T E M S 
 ;;======================================================================
 
@@ -1899,26 +2002,25 @@
 ;; the new prereqs calculation, looks also at itempath if specified
 ;; all prereqs must be met:
 ;;    if prereq test with itempath='' is COMPLETED and PASS, WARN, CHECK, or WAIVED then prereq is met
 ;;    if prereq test with itempath=ref-item-path and COMPLETED with PASS, WARN, CHECK, or WAIVED then prereq is met
 ;;
-;; Note: do not convert to remote as it calls remote under the hood
 ;; Note: mode 'normal means that tests must be COMPLETED and ok (i.e. PASS, WARN, CHECK, SKIP or WAIVED)
 ;;       mode 'toplevel means that tests must be COMPLETED only
 ;;       mode 'itemmatch means that tests items must be COMPLETED and (PASS|WARN|WAIVED|CHECK) [[ NB// NOT IMPLEMENTED YET ]]
 ;; 
-(define (db:get-prereqs-not-met db run-id waitons ref-item-path #!key (mode 'normal))
+(define (db:get-prereqs-not-met run-id waitons ref-item-path #!key (mode 'normal))
   (if (or (not waitons)
 	  (null? waitons))
       '()
       (let* ((unmet-pre-reqs '())
 	     (result         '()))
 	(for-each 
 	 (lambda (waitontest-name)
 	   ;; by getting the tests with matching name we are looking only at the matching test 
 	   ;; and related sub items
-	   (let ((tests             (db:get-tests-for-run db run-id waitontest-name '() '()))
+	   (let ((tests             (cdb:remote-run db:get-tests-for-run #f run-id waitontest-name '() '()))
 		 (ever-seen         #f)
 		 (parent-waiton-met #f)
 		 (item-waiton-met   #f))
 	     (for-each 
 	      (lambda (test)
@@ -1948,14 +2050,14 @@
 	     (if (not ever-seen)
 		 (set! result (append (if (null? tests)(list waitontest-name) tests) result)))))
 	 waitons)
 	(delete-duplicates result))))
 
-(define (db:teststep-set-status! db test-id teststep-name state-in status-in comment logfile)
+(define (db:teststep-set-status! db test-id teststep-name state-in status-in comment logfile #!key (work-area #f))
   (debug:print 4 "test-id: " test-id " teststep-name: " teststep-name)
   ;;                 db:open-test-db-by-test-id does cdb:remote-run
-  (let* ((tdb       (db:open-test-db-by-test-id db test-id))
+  (let* ((tdb       (db:open-test-db-by-test-id db test-id work-area: work-area))
 	 (state     (items:check-valid-items "state" state-in))
 	 (status    (items:check-valid-items "status" status-in)))
     (if (or (not state)(not status))
 	(debug:print 3 "WARNING: Invalid " (if status "status" "state")
 		     " value \"" (if status state-in status-in) "\", update your validvalues section in megatest.config"))

Index: http-transport.scm
==================================================================
--- http-transport.scm
+++ http-transport.scm
@@ -11,21 +11,24 @@
 (require-extension (srfi 18) extras tcp s11n)
 
 (use sqlite3 srfi-1 posix regex regex-case srfi-69 hostinfo md5 message-digest)
 (import (prefix sqlite3 sqlite3:))
 
-(use spiffy uri-common intarweb http-client spiffy-request-vars)
+(use spiffy uri-common intarweb http-client spiffy-request-vars  uri-common intarweb)
 
+;; Configurations for server
 (tcp-buffer-size 2048)
+(max-connections 2048) 
 
 (declare (unit http-transport))
 
 (declare (uses common))
 (declare (uses db))
 (declare (uses tests))
 (declare (uses tasks)) ;; tasks are where stuff is maintained about what is running.
 (declare (uses server))
+(declare (uses daemon))
 
 (include "common_records.scm")
 (include "db_records.scm")
 
 (define (http-transport:make-server-url hostport)
@@ -43,10 +46,22 @@
 ;; Call this to start the actual server
 ;;
 
 (define *db:process-queue-mutex* (make-mutex))
 
+(define (server:get-best-guess-address hostname)
+  (let ((res #f))
+    (for-each 
+     (lambda (adr)
+       (if (not (eq? (u8vector-ref adr 0) 127))
+	   (set! res adr)))
+     (vector->list (hostinfo-addresses (hostname->hostinfo hostname))))
+    (string-intersperse 
+     (map number->string
+	  (u8vector->list
+	   (if res res (hostname->ip hostname)))) ".")))
+
 (define (http-transport:run hostn)
   (debug:print 2 "Attempting to start the server ...")
   (if (not *toppath*)
       (if (not (setup-for-run))
 	  (begin
@@ -56,11 +71,12 @@
 	 ;;        	      #f ;; (get-host-name) 
 	 ;;        	      hostn))
 	 (db              #f) ;;        (open-db)) ;; we don't want the server to be opening and closing the db unnecesarily
 	 (hostname        (get-host-name))
 	 (ipaddrstr       (let ((ipstr (if (string=? "-" hostn)
-					   (string-intersperse (map number->string (u8vector->list (hostname->ip hostname))) ".")
+					   ;; (string-intersperse (map number->string (u8vector->list (hostname->ip hostname))) ".")
+					   (server:get-best-guess-address hostname)
 					   #f)))
 			    (if ipstr ipstr hostn))) ;; hostname)))
 	 (start-port    (if (and (args:get-arg "-port")
 				 (string->number (args:get-arg "-port")))
 			    (string->number (args:get-arg "-port"))
@@ -118,11 +134,11 @@
    exn
    (begin
      (print-error-message exn)
      (if (< portnum 9000)
 	 (begin 
-	   (print "WARNING: failed to start on portnum: " portnum ", trying next port")
+	   (debug:print 0 "WARNING: failed to start on portnum: " portnum ", trying next port")
 	   (thread-sleep! 0.1)
 	   ;; (open-run-close tasks:remove-server-records tasks:open-db)
 	   (open-run-close tasks:server-delete tasks:open-db ipaddrstr portnum)
 	   (http-transport:try-start-server ipaddrstr (+ portnum 1)))
 	 (print "ERROR: Tried and tried but could not start the server")))
@@ -130,74 +146,106 @@
    (set! *runremote* (list ipaddrstr portnum))
    ;; (open-run-close tasks:remove-server-records tasks:open-db)
    (open-run-close tasks:server-register 
 		   tasks:open-db 
 		   (current-process-id)
-		   ipaddrstr portnum 0 'live 'http)
-   (print "INFO: Trying to start server on " ipaddrstr ":" portnum)
+		   ipaddrstr portnum 0 'startup 'http)
+   (debug:print 1 "INFO: Trying to start server on " ipaddrstr ":" portnum)
    ;; This starts the spiffy server
-   (start-server port: portnum)
+   ;; NEED WAY TO SET IP TO #f TO BIND ALL
+   (start-server bind-address: ipaddrstr port: portnum)
    (open-run-close tasks:server-delete tasks:open-db ipaddrstr portnum)
-   (print "INFO: server has been stopped")))
+   (debug:print 1 "INFO: server has been stopped")))
 
 ;;======================================================================
 ;; S E R V E R   U T I L I T I E S 
 ;;======================================================================
 
 ;;======================================================================
 ;; C L I E N T S
 ;;======================================================================
 
+(define *http-mutex* (make-mutex))
+
+;; (system "megatest -list-servers | grep alive || megatest -server - -daemonize && sleep 4")
+
 ;; <html>
 ;; <head></head>
 ;; <body>1 Hello, world! Goodbye Dolly</body></html>
 ;; Send msg to serverdat and receive result
-(define (http-transport:client-send-receive serverdat msg)
-  (let* ((url        (http-transport:make-server-url serverdat))
-	 (fullurl    (conc url "/ctrl")) ;; (conc url "/?dat=" msg)))
-	 (numretries 0))     
+(define (http-transport:client-send-receive serverdat msg #!key (numretries 30))
+  (let* (;; (url        (http-transport:make-server-url serverdat))
+	 (fullurl    (caddr serverdat)) ;; (conc url "/ctrl")) ;; (conc url "/?dat=" msg)))
+	 (res        #f))
     (handle-exceptions
      exn
-     (if (< numretries 200)
-	 (http-transport:client-send-receive serverdat msg))
+     (begin
+       (print "ERROR IN http-transport:client-send-receive " ((condition-property-accessor 'exn 'message) exn))
+       (thread-sleep! 2)
+       (if (> numretries 0)
+	   (http-transport:client-send-receive serverdat msg numretries: (- numretries 1))))
      (begin
        (debug:print-info 11 "fullurl=" fullurl "\n")
        ;; set up the http-client here
-       (max-retry-attempts 100)
+       (max-retry-attempts 5)
+       ;; consider all requests indempotent
        (retry-request? (lambda (request)
-			 (thread-sleep! (/ (if (> numretries 100) 100 numretries) 10))
-			 (set! numretries (+ numretries 1))
-			 #t))
+			 #t))   ;;  		 (thread-sleep! (/ (if (> numretries 100) 100 numretries) 10))
+       ;; (set! numretries (- numretries 1))
+       ;;  		 #t))
        ;; send the data and get the response
        ;; extract the needed info from the http data and 
        ;; process and return it.
-       (let* ((res   (with-input-from-request fullurl 
-					      ;; #f
-					      ;; msg 
-					      (list (cons 'dat msg)) 
-					      read-string)))
+       (let* ((send-recieve (lambda ()
+			      (mutex-lock! *http-mutex*)
+			      (set! res (with-input-from-request 
+					 fullurl 
+					 (list (cons 'dat msg)) 
+					 read-string))
+			      (close-all-connections!) 
+			      (mutex-unlock! *http-mutex*)))
+	      (time-out     (lambda ()
+			      (thread-sleep! 5)
+			      (if (not res)
+				  (begin
+				    (debug:print 0 "WARNING: communication with the server timed out.")
+				    (mutex-unlock! *http-mutex*)
+				    (http-transport:client-send-receive serverdat msg numretries: (- numretries 1))
+				    (if (< numretries 3) ;; on last try just exit
+					(begin
+					  (debug:print 0 "ERROR: communication with the server timed out. Giving up.")
+					  (exit 1)))))))
+	      (th1 (make-thread send-recieve "with-input-from-request"))
+	      (th2 (make-thread time-out     "time out")))
+	 (thread-start! th1)
+	 (thread-start! th2)
+	 (thread-join! th1)
+	 (thread-terminate! th2)
 	 (debug:print-info 11 "got res=" res)
 	 (let ((match (string-search (regexp "<body>(.*)<.body>") res)))
 	   (debug:print-info 11 "match=" match)
 	   (let ((final (cadr match)))
 	     (debug:print-info 11 "final=" final)
 	     final)))))))
 
 (define (http-transport:client-connect iface port)
   (let* ((login-res   #f)
-	 (serverdat   (list iface port)))
+	 (uri-dat     (make-request method: 'POST uri: (uri-reference (conc "http://" iface ":" port "/ctrl"))))
+	 (serverdat   (list iface port uri-dat)))
     (set! login-res (client:login serverdat))
     (if (and (not (null? login-res))
 	     (car login-res))
 	(begin
 	  (debug:print-info 2 "Logged in and connected to " iface ":" port)
 	  (set! *runremote* serverdat)
 	  serverdat)
 	(begin
-	  (debug:print-info 2 "Failed to login or connect to " iface ":" port)
-	  (set! *runremote* #f)
-	  #f))))
+	  (debug:print-info 0 "ERROR: Failed to login or connect to " iface ":" port)
+	  (exit 1)))))
+;; 	  (set! *runremote* #f)
+;; 	  (set! *transport-type* 'fs)
+;; 	  #f))))
 
 
 ;; run http-transport:keep-running in a parallel thread to monitor that the db is being 
 ;; used and to shutdown after sometime if it is not.
 ;;
@@ -208,25 +256,27 @@
   (let* ((server-info (let loop ()
                         (let ((sdat #f))
                           (mutex-lock! *heartbeat-mutex*)
                           (set! sdat *runremote*)
                           (mutex-unlock! *heartbeat-mutex*)
-                          (if sdat sdat
+                          (if sdat
+			      sdat
                               (begin
                                 (sleep 4)
                                 (loop))))))
          (iface       (car server-info))
          (port        (cadr server-info))
          (last-access 0)
 	 (tdb         (tasks:open-db))
-	 (spid        (tasks:server-get-server-id tdb #f iface port #f))
+	 (spid        ;;(open-run-close tasks:server-get-server-id tasks:open-db #f iface port #f))
+	   (tasks:server-get-server-id tdb #f iface port #f))
 	 (server-timeout (let ((tmo (config-lookup  *configdat* "server" "timeout")))
 			   (if (and (string? tmo)
 				    (string->number tmo))
 			       (* 60 60 (string->number tmo))
 			       ;; default to three days
-			       (* 3 24 60)))))
+			       (* 3 24 60 60)))))
     (debug:print-info 2 "server-timeout: " server-timeout ", server pid: " spid " on " iface ":" port)
     (let loop ((count 0))
       (thread-sleep! 4) ;; no need to do this very often
       ;; NB// sync currently does NOT return queue-length
       (let () ;; (queue-len (cdb:client-call server-info 'sync #t 1)))
@@ -237,18 +287,20 @@
 	;; Check that iface and port have not changed (can happen if server port collides)
 	(mutex-lock! *heartbeat-mutex*)
 	(set! sdat *runremote*)
 	(mutex-unlock! *heartbeat-mutex*)
 
-	(if (not (equal? sdat (list iface port)))
+	(if (or (not (equal? sdat (list iface port)))
+		(not spid))
 	    (begin 
-	      (debug:print-info 1 "interface changed, refreshing iface and port info")
+	      (debug:print-info 0 "interface changed, refreshing iface and port info")
 	      (set! iface (car sdat))
 	      (set! port  (cadr sdat))
 	      (set! spid  (tasks:server-get-server-id tdb #f iface port #f))))
 
         ;; NOTE: Get rid of this mechanism! It really is not needed...
+        ;; (open-run-close tasks:server-update-heartbeat tasks:open-db spid)
         (tasks:server-update-heartbeat tdb spid)
       
         ;; (if ;; (or (> numrunning 0) ;; stay alive for two days after last access
         (mutex-lock! *heartbeat-mutex*)
         (set! last-access *last-db-access*)
@@ -261,11 +313,11 @@
               (loop 0))
             (begin
               (debug:print-info 0 "Starting to shutdown the server.")
               ;; need to delete only *my* server entry (future use)
               (set! *time-to-exit* #t)
-              (tasks:server-deregister-self tdb (get-host-name))
+              (open-run-close tasks:server-deregister-self tasks:open-db (get-host-name))
               (thread-sleep! 1)
               (debug:print-info 0 "Max cached queries was    " *max-cache-size*)
 	      (debug:print-info 0 "Number of cached writes   " *number-of-writes*)
 	      (debug:print-info 0 "Average cached write time "
 				(if (eq? *number-of-writes* 0)
@@ -312,10 +364,27 @@
 	      (set! *didsomething* #t)
 	      (thread-join! th2))
 	    (debug:print 0 "ERROR: Failed to setup for megatest")))
     (exit)))
 
+;; (use trace)
+;; (trace http-transport:keep-running 
+;;        tasks:server-update-heartbeat
+;;        tasks:server-get-server-id)
+;;        tasks:get-best-server
+;;        http-transport:run
+;;        http-transport:launch
+;;        http-transport:try-start-server
+;;        http-transport:client-send-receive
+;;        http-transport:make-server-url
+;;        tasks:server-register
+;;        tasks:server-delete
+;;        start-server
+;;        hostname->ip
+;;        with-input-from-request
+;;        tasks:server-deregister-self)
+
 (define (http-transport:server-signal-handler signum)
   (handle-exceptions
    exn
    (debug:print " ... exiting ...")
    (let ((th1 (make-thread (lambda ()

Index: launch.scm
==================================================================
--- launch.scm
+++ launch.scm
@@ -53,13 +53,13 @@
 (define (launch:execute encoded-cmd)
   (let* ((cmdinfo   (read (open-input-string (base64:base64-decode encoded-cmd)))))
     (setenv "MT_CMDINFO" encoded-cmd)
     (if (list? cmdinfo) ;; ((testpath /tmp/mrwellan/jazzmind/src/example_run/tests/sqlitespeed)
 	;; (test-name sqlitespeed) (runscript runscript.rb) (db-host localhost) (run-id 1))
-	(let* ((testpath  (assoc/default 'testpath  cmdinfo))  ;; How is testpath different from work-area ??
+	(let* ((testpath  (assoc/default 'testpath  cmdinfo))  ;; testpath is the test spec area
 	       (top-path  (assoc/default 'toppath   cmdinfo))
-	       (work-area (assoc/default 'work-area cmdinfo))
+	       (work-area (assoc/default 'work-area cmdinfo))  ;; work-area is the test run area
 	       (test-name (assoc/default 'test-name cmdinfo))
 	       (runscript (assoc/default 'runscript cmdinfo))
 	       (ezsteps   (assoc/default 'ezsteps   cmdinfo))
 	       ;; (runremote (assoc/default 'runremote cmdinfo))
 	       (transport (assoc/default 'transport cmdinfo))
@@ -126,18 +126,18 @@
 
 	  (change-directory *toppath*) 
 	  (set-megatest-env-vars run-id) ;; these may be needed by the launching process
 	  (change-directory work-area) 
 
-	  (open-run-close set-run-config-vars #f run-id keys keyvals)
+	  (set-run-config-vars run-id keys keyvals target) ;; (db:get-target db run-id))
 	  ;; environment overrides are done *before* the remaining critical envars.
 	  (alist->env-vars env-ovrd)
 	  (set-megatest-env-vars run-id)
 	  (set-item-env-vars itemdat)
 	  (save-environment-as-files "megatest")
 	  ;; open-run-close not needed for test-set-meta-info
-	  (test-set-meta-info #f test-id run-id test-name itemdat 0)
+	  (tests:set-meta-info #f test-id run-id test-name itemdat 0 work-area)
 	  (tests:test-set-status! test-id "REMOTEHOSTSTART" "n/a" (args:get-arg "-m") #f)
 	  (if (args:get-arg "-xterm")
 	      (set! fullrunscript "xterm")
 	      (if (and fullrunscript (not (file-execute-access? fullrunscript)))
 		  (system (conc "chmod ug+x " fullrunscript))))
@@ -208,11 +208,11 @@
 						   ;; call the command using mt_ezstep
 						   (set! script (conc "mt_ezstep " stepname " " (if prevstep prevstep "-") " " stepcmd))
 
 						   (debug:print 4 "script: " script)
 						   ;; DO NOT remote
-						   (db:teststep-set-status! #f test-id stepname "start" "-" #f #f)
+						   (db:teststep-set-status! #f test-id stepname "start" "-" #f #f work-area: work-area)
 						   ;; now launch
 						   (let ((pid (process-run script)))
 						     (let processloop ((i 0))
 						       (let-values (((pid-val exit-status exit-code)(process-wait pid #t)))
 								   (mutex-lock! m)
@@ -226,11 +226,11 @@
 									 (processloop (+ i 1))))
 								   ))
                                                      (let ((exinfo (vector-ref exit-info 2))
                                                            (logfna (if logpro-used (conc stepname ".html") "")))
 						       ;; testing if procedures called in a remote call cause problems (ans: no or so I suspect)
-						       (db:teststep-set-status! #f test-id stepname "end" exinfo #f logfna))
+						       (db:teststep-set-status! #f test-id stepname "end" exinfo #f logfna work-area: work-area))
 						     (if logpro-used
 							 (cdb:test-set-log! *runremote*  test-id (conc stepname ".html")))
 						     ;; set the test final status
 						     (let* ((this-step-status (cond
 									       ((and (eq? (vector-ref exit-info 2) 2) logpro-used) 'warn)
@@ -276,11 +276,11 @@
 					(kill-tries 0))
 				   (let loop ((minutes   (calc-minutes)))
 				     (begin
 				       (set! kill-job? (test-get-kill-request test-id)) ;; run-id test-name itemdat))
 				       ;; open-run-close not needed for test-set-meta-info
-				       (test-set-meta-info #f test-id run-id test-name itemdat minutes)
+				       (tests:set-meta-info #f test-id run-id test-name itemdat minutes work-area)
 				       (if kill-job? 
 					   (begin
 					     (mutex-lock! m)
 					     (let* ((pid (vector-ref exit-info 0)))
 					       (if (number? pid)
@@ -336,11 +336,11 @@
 				      (if (equal? (db:test-get-status testinfo) "AUTO") "AUTO-WARN" "WARN"))
 				     (else "FAIL"))
 				    (args:get-arg "-m") #f)))
 	      ;; for automated creation of the rollup html file this is a good place...
 	      (if (not (equal? item-path ""))
-		  (open-run-close tests:summarize-items #f run-id test-name #f)) ;; don't force - just update if no
+		  (tests:summarize-items #f run-id test-name #f)) ;; don't force - just update if no
 	      )
 	    (mutex-unlock! m)
 	    ;; (exec-results (cmd-run->list fullrunscript)) ;;  (list ">" (conc test-name "-run.log"))))
 	    ;; (success      exec-results)) ;; (eq? (cadr exec-results) 0)))
 	    (debug:print 2 "Output from running " fullrunscript ", pid " (vector-ref exit-info 0) " in work area " 
@@ -406,18 +406,16 @@
 ;; 
 ;; All log file links should be stored relative to the top of link path
 ;;  
 ;; <target> - <testname> [ - <itempath> ] 
 ;;
-(define (create-work-area db run-id test-id test-src-path disk-path testname itemdat)
-  (let* ((run-info (cdb:remote-run db:get-run-info #f run-id))
-	 (item-path (item-list->path itemdat))
+(define (create-work-area run-id run-info key-vals test-id test-src-path disk-path testname itemdat)
+  (let* ((item-path (item-list->path itemdat))
 	 (runname  (db:get-value-by-header (db:get-row run-info)
 					   (db:get-header run-info)
 					   "runname"))
 	 ;; convert back to db: from rdb: - this is always run at server end
-	 (key-vals (cdb:remote-run db:get-key-vals #f run-id))
 	 (target   (string-intersperse key-vals "/"))
 
 	 (not-iterated  (equal? "" item-path))
 
 	 ;; all tests are found at <rundir>/test-base or <linkdir>/test-base
@@ -537,15 +535,16 @@
 	(begin
 	  (let* ((ovrcmd (let ((cmd (config-lookup *configdat* "setup" "testcopycmd")))
 			   (if cmd
 			       ;; substitute the TEST_SRC_PATH and TEST_TARG_PATH
 			       (string-substitute "TEST_TARG_PATH" test-path
-						  (string-substitute "TEST_SRC_PATH" test-src-path cmd))
+						  (string-substitute "TEST_SRC_PATH" test-src-path cmd #t) #t)
 			       #f)))
 		 (cmd    (if ovrcmd 
 			     ovrcmd
-			     (conc "rsync -av" (if (debug:debug-mode 1) "" "q") " " test-src-path "/ " test-path "/")))
+			     (conc "rsync -av" (if (debug:debug-mode 1) "" "q") " " test-src-path "/ " test-path "/"
+				   " >> " test-path "/mt_launch.log 2>> " test-path "/mt_launch.log")))
 		 (status (system cmd)))
 	    (if (not (eq? status 0))
 		(debug:print 2 "ERROR: problem with running \"" cmd "\"")))
 	  (list lnkpathf lnkpath ))
 	(list #f #f))))
@@ -555,11 +554,11 @@
 ;; 3. create link from run dir to megatest runs area 
 ;; 4. remotely run the test on allocated host
 ;;    - could be ssh to host from hosts table (update regularly with load)
 ;;    - could be netbatch
 ;;      (launch-test db (cadr status) test-conf))
-(define (launch-test db run-id runname test-conf keyvallst test-name test-path itemdat params)
+(define (launch-test test-id run-id run-info key-vals runname test-conf keyvallst test-name test-path itemdat params)
   (change-directory *toppath*)
   (alist->env-vars ;; consolidate this code with the code in megatest.scm for "-execute"
    (list ;; (list "MT_TEST_RUN_DIR" work-area)
     (list "MT_RUN_AREA_HOME" *toppath*)
     (list "MT_TEST_NAME" test-name)
@@ -594,11 +593,11 @@
 	 (diskpath   #f)
 	 (cmdparms   #f)
 	 (fullcmd    #f) ;; (define a (with-output-to-string (lambda ()(write x))))
 	 (mt-bindir-path #f)
 	 (item-path (item-list->path itemdat))
-	 (test-id    (cdb:remote-run db:get-test-id #f run-id test-name item-path))
+	 ;; (test-id    (cdb:remote-run db:get-test-id #f run-id test-name item-path))
 	 (testinfo   (cdb:get-test-info-by-id *runremote* test-id))
 	 (mt_target  (string-intersperse (map cadr keyvallst) "/"))
 	 (debug-param (append (if (args:get-arg "-debug")  (list "-debug" (args:get-arg "-debug")) '())
 			      (if (args:get-arg "-logging")(list "-logging") '()))))
     (if hosts (set! hosts (string-split hosts)))
@@ -607,11 +606,11 @@
     (set! mt-bindir-path (pathname-directory remote-megatest))
     (if launcher (set! launcher (string-split launcher)))
     ;; set up the run work area for this test
     (set! diskpath (get-best-disk *configdat*))
     (if diskpath
-	(let ((dat  (open-run-close create-work-area db run-id test-id test-path diskpath test-name itemdat)))
+	(let ((dat  (create-work-area run-id run-info key-vals test-id test-path diskpath test-name itemdat)))
 	  (set! work-area (car dat))
 	  (set! toptest-work-area (cadr dat))
 	  (debug:print-info 2 "Using work area " work-area))
 	(begin
 	  (set! work-area (conc test-path "/tmp_run"))
@@ -668,34 +667,37 @@
 					  (list "MT_ITEM_INFO" (conc itemdat)) 
 					  (list "MT_RUNNAME"   runname)
 					  (list "MT_TARGET"    mt_target)
 					  )
 				    itemdat)))
-	   (launch-results (apply cmd-run-with-stderr->list ;; cmd-run-proc-each-line
+	   (launch-results (apply (if (equal? (configf:lookup *configdat* "setup" "launchwait") "yes")
+				      cmd-run-with-stderr->list
+				      process-run)
 				  (if useshell
 				      (string-intersperse fullcmd " ")
 				      (car fullcmd))
-				  ;; conc
 				  (if useshell
 				      '()
-				      (cdr fullcmd))))) ;;  launcher fullcmd)));; (apply cmd-run-proc-each-line launcher print fullcmd))) ;; (cmd-run->list fullcmd))
-      (with-output-to-file "mt_launch.log"
-	(lambda ()
-	  (apply print launch-results)))
+				      (cdr fullcmd)))))
+      (if (list? launch-results)
+	  (with-output-to-file "mt_launch.log"
+	    (lambda ()
+	      (apply print launch-results))
+	    #:append))
       (debug:print 2 "Launching completed, updating db")
       (debug:print 2 "Launch results: " launch-results)
       (if (not launch-results)
-	  (begin
-	    (print "ERROR: Failed to run " (string-intersperse fullcmd " ") ", exiting now")
-	    ;; (sqlite3:finalize! db)
-	    ;; good ole "exit" seems not to work
-	    ;; (_exit 9)
-	    ;; but this hack will work! Thanks go to Alan Post of the Chicken email list
-	    ;; NB// Is this still needed? Should be safe to go back to "exit" now?
-	    (process-signal (current-process-id) signal/kill)
-	    ))
+          (begin
+            (print "ERROR: Failed to run " (string-intersperse fullcmd " ") ", exiting now")
+            ;; (sqlite3:finalize! db)
+            ;; good ole "exit" seems not to work
+            ;; (_exit 9)
+            ;; but this hack will work! Thanks go to Alan Post of the Chicken email list
+            ;; NB// Is this still needed? Should be safe to go back to "exit" now?
+            (process-signal (current-process-id) signal/kill)
+            ))
       (alist->env-vars miscprevvals)
       (alist->env-vars testprevvals)
       (alist->env-vars commonprevvals)
       launch-results))
   (change-directory *toppath*))
 

Index: megatest-version.scm
==================================================================
--- megatest-version.scm
+++ megatest-version.scm
@@ -1,7 +1,7 @@
 ;; Always use two digit decimal
 ;; 1.01, 1.02...1.10,1.11 ... 1.99,2.00..
 
 (declare (unit megatest-version))
 
-(define megatest-version 1.5415)
+(define megatest-version 1.5426)
 

Index: megatest.scm
==================================================================
--- megatest.scm
+++ megatest.scm
@@ -8,15 +8,15 @@
 ;;  PURPOSE.
 
 ;; (include "common.scm")
 ;; (include "megatest-version.scm")
 
-(use sqlite3 srfi-1 posix regex regex-case srfi-69 base64 format readline apropos json) ;; (srfi 18) extras)
+(use sqlite3 srfi-1 posix regex regex-case srfi-69 base64 format readline apropos json http-client) ;; (srfi 18) extras)
 (import (prefix sqlite3 sqlite3:))
 (import (prefix base64 base64:))
 
-(use zmq)
+;; (use zmq)
 
 (declare (uses common))
 (declare (uses megatest-version))
 (declare (uses margs))
 (declare (uses runs))
@@ -23,40 +23,23 @@
 (declare (uses launch))
 (declare (uses server))
 (declare (uses client))
 (declare (uses tests))
 (declare (uses genexample))
+(declare (uses daemon))
 
 (define *db* #f) ;; this is only for the repl, do not use in general!!!!
 
 (include "common_records.scm")
 (include "key_records.scm")
 (include "db_records.scm")
 (include "megatest-fossil-hash.scm")
 
-;; (use trace dot-locking)
-;; (trace
-;;  cdb:client-call
-;;  cdb:remote-run
-;;  cdb:test-set-status-state
-;;  change-directory
-;;  db:process-queue-item
-;;  db:test-get-logfile-info
-;;  db:teststep-set-status!
-;;  nice-path
-;;  obtain-dot-lock
-;;  open-run-close
-;;  read-config
-;;  runs:can-run-more-tests
-;;  sqlite3:execute
-;;  sqlite3:for-each-row
-;;  tests:check-waiver-eligibility
-;;  tests:summarize-items
-;;  tests:test-set-status!
-;;  thread-sleep!
-;;)
-       
+(let ((debugcontrolf (conc (get-environment-variable "HOME") "/.megatestrc")))
+  (if (file-exists? debugcontrolf)
+      (load debugcontrolf)))
+
 
 (define help (conc "
 Megatest, documentation at http://www.kiatoa.com/fossils/megatest
   version " megatest-version "
   license GPL, Copyright Matt Welland 2006-2012
@@ -118,20 +101,21 @@
   -list-targets           : list the targets in runconfigs.config
   -list-db-targets        : list the target combinations used in the db
   -show-config            : dump the internal representation of the megatest.config file
   -show-runconfig         : dump the internal representation of the runconfigs.config file
   -dumpmode json          : dump in json format instead of sexpr
+  -show-cmdinfo           : dump the command info for a test (run in test environment)
 
 Misc 
   -rebuild-db             : bring the database schema up to date
   -update-meta            : update the tests metadata for all tests
   -env2file fname         : write the environment to fname.csh and fname.sh
   -setvars VAR1=val1,VAR2=val2 : Add environment variables to a run NB// these are
                                  overwritten by values set in config files.
   -server -|hostname      : start the server (reduces contention on megatest.db), use
                             - to automatically figure out hostname
-  -transport http|zmq     : use http or zmq for transport (default is http) 
+  -transport http|fs      : use http or direct access for transport (default is http) 
   -daemonize              : fork into background and disconnect from stdin/out
   -list-servers           : list the servers 
   -stop-server id         : stop server specified by id (see output of -list-servers)
   -repl                   : start a repl (useful for extending megatest)
   -load file.scm          : load and run file.scm
@@ -230,10 +214,11 @@
 			"-list-disks"
 			"-list-targets"
 			"-list-db-targets"
 			"-show-runconfig"
 			"-show-config"
+			"-show-cmdinfo"
 			;; queries
 			"-test-paths" ;; get path(s) to a test, ordered by youngest first
 
 			"-runall"    ;; run all tests
 			"-remove-runs"
@@ -259,10 +244,15 @@
       (print megatest-version)
       (exit)))
 
 (define *didsomething* #f)
 
+(if (and (or (args:get-arg "-list-targets")
+	     (args:get-arg "-list-db-targets"))
+	 (not (args:get-arg "-transport")))
+    (hash-table-set! args:arg-hash "-transport" "fs"))
+
 ;;======================================================================
 ;; Misc setup stuff
 ;;======================================================================
 
 (debug:setup)
@@ -313,21 +303,38 @@
 		     (hash-table-keys args:arg-hash)
 		     '("-runtests"    "-list-runs"   "-rollup"
 		       "-remove-runs" "-lock"        "-unlock"
 		       "-update-meta" "-extract-ods"))))
 	(if (setup-for-run)
-	    (let ((servers (open-run-close tasks:get-best-server tasks:open-db)))
+	    (let loop ((servers  (open-run-close tasks:get-best-server tasks:open-db))
+		       (trycount 0))
 	      (if (or (not servers)
 		      (null? servers))
 		  (begin
-		    (debug:print 0 "INFO: Starting server as none running ...")
-		    ;; (server:launch (string->symbol (args:get-arg "-transport" "http"))))
-		    (system (conc (car (argv)) " -server - -daemonize -transport " (args:get-arg "-transport" "http")))
-		    (thread-sleep! 3)) ;; give the server a few seconds to start
-		  (debug:print 0 "INFO: Servers already running " servers)
+		    (if (even? trycount) ;; just do the server start every other time through this loop (every 8 seconds)
+			(begin
+			  (debug:print 0 "INFO: Starting server as none running ...")
+			  ;; (server:launch (string->symbol (args:get-arg "-transport" "http"))))
+			  ;; no need to use fork, no need to do the list-servers trick. Just start the damn server, it will exit on it's own
+			  ;; if there is an existing server
+			  (system "megatest -server - -daemonize")
+			  (thread-sleep! 3)
+			  ;; (process-run (car (argv)) (list "-server" "-" "-daemonize" "-transport" (args:get-arg "-transport" "http")))
+			  ;; (system (conc "megatest -list-servers | egrep '" megatest-version ".*alive' || megatest -server - -daemonize && sleep 3"))
+			  ;; (process-fork (lambda ()
+			  ;;       	  (daemon:ize)
+			  ;;       	  (server:launch (string->symbol (args:get-arg "-transport" "http")))))
+			  )
+			(begin
+			  (debug:print-info 0 "Waiting for server to start")
+			  (thread-sleep! 4)))
+		    (if (< trycount 10)
+			(loop (open-run-close tasks:get-best-server tasks:open-db) 
+			      (+ trycount 1))
+			(debug:print 0 "WARNING: Couldn't start or find a server.")))
+		  (debug:print 0 "INFO: Server(s) running " servers)
 		  )))))
-	
 
 (if (or (args:get-arg "-list-servers")
 	(args:get-arg "-stop-server"))
     (let ((tl (setup-for-run)))
       (if tl 
@@ -370,12 +377,11 @@
 		       (debug:print-info 0 "Attempting to stop server with pid " pid)
 		       (tasks:kill-server status hostname pullport pid transport)))))
 	     servers)
 	    (debug:print-info 1 "Done with listservers")
 	    (set! *didsomething* #t)
-	    (exit) ;; must do, would have to add checks to many/all calls below
-	    )
+	    (exit)) ;; must do, would have to add checks to many/all calls below
 	  (exit)))
     ;; if not list or kill then start a client (if appropriate)
     (if (or (args-defined? "-h" "-version" "-gen-megatest-area" "-gen-megatest-test")
 	    (eq? (length (hash-table-keys args:arg-hash)) 0))
 	(debug:print-info 1 "Server connection not needed")
@@ -392,20 +398,32 @@
       (for-each (lambda (x)
 		  ;; (print "[" x "]"))
 		  (print x))
 		targets)
       (set! *didsomething* #t)))
+
+(define (full-runconfigs-read)
+  (let* ((keys   (cdb:remote-run get-keys #f))
+	 (target (if (args:get-arg "-reqtarg")
+		     (args:get-arg "-reqtarg")
+		     (if (args:get-arg "-target")
+			 (args:get-arg "-target")
+			 #f)))
+	 (key-vals (if target (keys:target->keyval keys target) #f))
+	 (sections (if target (list "default" target) #f))
+	 (data     (begin
+		     (setenv "MT_RUN_AREA_HOME" *toppath*)
+		     (if key-vals
+			 (for-each (lambda (kt)
+				     (setenv (car kt) (cadr kt)))
+				   key-vals))
+		     (read-config "runconfigs.config" #f #t sections: sections))))
+    data))
+
 
 (if (args:get-arg "-show-runconfig")
-    (let* ((target (if (args:get-arg "-reqtarg")
-		       (args:get-arg "-reqtarg")
-		       (if (args:get-arg "-target")
-			   (args:get-arg "-target")
-			   #f)))
-	   (sections (if target (list "default" target) #f))
-	   (data     (read-config "runconfigs.config" #f #t sections: sections)))
-
+    (let ((data (full-runconfigs-read)))
       ;; keep this one local
       (cond
        ((not (args:get-arg "-dumpmode"))
 	(pp (hash-table->alist data)))
        ((string=? (args:get-arg "-dumpmode") "json")
@@ -423,10 +441,17 @@
        ((string=? (args:get-arg "-dumpmode") "json")
 	(json-write data))
        (else
 	(debug:print 0 "ERROR: -dumpmode of " (args:get-arg "-dumpmode") " not recognised")))
       (set! *didsomething* #t)))
+
+(if (args:get-arg "-show-cmdinfo")
+    (let ((data (read (open-input-string (base64:base64-decode (getenv "MT_CMDINFO"))))))
+      (if (equal? (args:get-arg "-dumpmode") "json")
+	  (json-write data)
+	  (pp data))
+      (set! *didsomething* #t)))
 
 ;;======================================================================
 ;; Remove old run(s)
 ;;======================================================================
 
@@ -500,12 +525,13 @@
 			 ;; (print "[" targetstr "]"))))
 			 (print targetstr))))
 	       (if (not db-targets)
 		   (let* ((run-id (db:get-value-by-header run header "id"))
 			  (tests  (cdb:remote-run db:get-tests-for-run #f run-id testpatt '() '())))
-		     (debug:print 1 "Run: " targetstr " status: " (db:get-value-by-header run header "state")
-				  " run-id: " run-id ", number tests: " (length tests))
+		     (print "Run: " targetstr "/" (db:get-value-by-header run header "runname") 
+			    " status: " (db:get-value-by-header run header "state")
+			    " run-id: " run-id ", number tests: " (length tests))
 		     (for-each 
 		      (lambda (test)
 			(format #t
 				"  Test: ~25a State: ~15a Status: ~15a Runtime: ~5@as Time: ~22a Host: ~10a\n"
 				(conc (db:test-get-testname test)
@@ -596,11 +622,11 @@
    "run a test" 
    (lambda (target runname keys keynames keyvallst)
      (runs:run-tests target
 		     runname
 		     (args:get-arg "-runtests")
-		     (args:get-arg "-testpatt")
+		     (args:get-arg "-runtests")
 		     user
 		     args:arg-hash))))
 
 ;;======================================================================
 ;; Rollup into a run
@@ -788,21 +814,22 @@
 	     (runscript (assoc/default 'runscript cmdinfo))
 	     (db-host   (assoc/default 'db-host   cmdinfo))
 	     (run-id    (assoc/default 'run-id    cmdinfo))
 	     (test-id   (assoc/default 'test-id   cmdinfo))
 	     (itemdat   (assoc/default 'itemdat   cmdinfo))
+	     (work-area (assoc/default 'work-area cmdinfo))
 	     (db        #f))
 	(change-directory testpath)
 	;; (set! *runremote* runremote)
 	(set! *transport-type* (string->symbol transport))
 	(if (not (setup-for-run))
 	    (begin
 	      (debug:print 0 "Failed to setup, exiting")
 	      (exit 1)))
 	(if (and state status)
-	    ;; DO NOT remote run
-	    (db:teststep-set-status! db test-id step state status msg logfile)
+	    ;; DO NOT remote run, makes calls to the testdat.db test db.
+	    (db:teststep-set-status! db test-id step state status msg logfile work-area: work-area)
 	    (begin
 	      (debug:print 0 "ERROR: You must specify :state and :status with every call to -step")
 	      (exit 6))))))
 
 (if (args:get-arg "-step")
@@ -814,11 +841,12 @@
        (args:get-arg "-setlog")
        (args:get-arg "-m"))
       ;; (if db (sqlite3:finalize! db))
       (set! *didsomething* #t)))
     
-(if (or (args:get-arg "-setlog")       ;; since setting up is so costly lets piggyback on -test-status
+(if (or (and (args:get-arg "-setlog")       ;; since setting up is so costly lets piggyback on -test-status
+	     (not (args:get-arg "-step")))  ;; -setlog may have been processed already in the "-step" previous
 	(args:get-arg "-set-toplog")
 	(args:get-arg "-test-status")
 	(args:get-arg "-set-values")
 	(args:get-arg "-load-test-data")
 	(args:get-arg "-runstep")
@@ -836,10 +864,11 @@
 	       (runscript (assoc/default 'runscript cmdinfo))
 	       (db-host   (assoc/default 'db-host   cmdinfo))
 	       (run-id    (assoc/default 'run-id    cmdinfo))
 	       (test-id   (assoc/default 'test-id   cmdinfo))
 	       (itemdat   (assoc/default 'itemdat   cmdinfo))
+	       (work-area (assoc/default 'work-area cmdinfo))
 	       (db        #f) ;; (open-db))
 	       (state     (args:get-arg ":state"))
 	       (status    (args:get-arg ":status")))
 	  (change-directory testpath)
 	  ;; (set! *runremote* runremote)
@@ -853,11 +882,11 @@
 	  ;; (client:setup)
 
 	  (if (args:get-arg "-load-test-data")
 	      ;; has sub commands that are rdb:
 	      ;; DO NOT put this one into either cdb:remote-run or open-run-close
-	      (db:load-test-data db test-id))
+	      (db:load-test-data db test-id work-area: work-area))
 	  (if (args:get-arg "-setlog")
 	      (let ((logfname (args:get-arg "-setlog")))
 		(cdb:test-set-log! *runremote* test-id logfname)))
 	  (if (args:get-arg "-set-toplog")
 	      ;; DO NOT run remote
@@ -885,11 +914,11 @@
 			 (fullcmd    (conc "(" (string-intersperse 
 						(cons cmd params) " ")
 					   ") " redir " " logfile)))
 		    ;; mark the start of the test
 		    ;; DO NOT run remote
-		    (db:teststep-set-status! db test-id stepname "start" "n/a" (args:get-arg "-m") logfile)
+		    (db:teststep-set-status! db test-id stepname "start" "n/a" (args:get-arg "-m") logfile work-area: work-area)
 		    ;; run the test step
 		    (debug:print-info 2 "Running \"" fullcmd "\"")
 		    (change-directory startingdir)
 		    (set! exitstat (system fullcmd)) ;; cmd params))
 		    (set! *globalexitstatus* exitstat)
@@ -905,11 +934,11 @@
 			  (set! *globalexitstatus* exitstat) ;; no necessary
 			  (change-directory testpath)
 			  (cdb:test-set-log! *runremote* test-id htmllogfile)))
 		    (let ((msg (args:get-arg "-m")))
 		      ;; DO NOT run remote
-		      (db:teststep-set-status! db test-id stepname "end" exitstat msg logfile))
+		      (db:teststep-set-status! db test-id stepname "end" exitstat msg logfile work-area: work-area))
 		    )))
 	  (if (or (args:get-arg "-test-status")
 		  (args:get-arg "-set-values"))
 	      (let ((newstatus (cond
 				((number? status)       (if (equal? status 0) "PASS" "FAIL"))
@@ -932,11 +961,11 @@
 		      ;; (sqlite3:finalize! db)
 		      (exit 6)))
 		(let* ((msg    (args:get-arg "-m"))
 		       (numoth (length (hash-table-keys otherdata))))
 		  ;; Convert to rpc inside the tests:test-set-status! call, not here
-		  (tests:test-set-status! test-id state newstatus msg otherdata))))
+		  (tests:test-set-status! test-id state newstatus msg otherdata work-area: work-area))))
 	  (if db (sqlite3:finalize! db))
 	  (set! *didsomething* #t))))
 
 ;;======================================================================
 ;; Various helper commands can go below here
@@ -1026,10 +1055,12 @@
       (set! *didsomething* #t)))
 
 ;;======================================================================
 ;; Exit and clean up
 ;;======================================================================
+
+(if *runremote* (close-all-connections!))
 
 ;; this is the socket if we are a client
 ;; (if (and *runremote*
 ;; 	 (socket? *runremote*))
 ;;     (close-socket *runremote*))

ADDED   run-tests-queue-classic.scm
Index: run-tests-queue-classic.scm
==================================================================
--- /dev/null
+++ run-tests-queue-classic.scm
@@ -0,0 +1,301 @@
+
+;; test-records is a hash table testname:item_path => vector < testname testconfig waitons priority items-info ... >
+(define (runs:run-tests-queue-classic run-id runname test-records keyvallst flags test-patts required-tests)
+    ;; At this point the list of parent tests is expanded 
+    ;; NB// Should expand items here and then insert into the run queue.
+  (debug:print 5 "test-records: " test-records ", keyvallst: " keyvallst " flags: " (hash-table->alist flags))
+  (let ((run-info              (cdb:remote-run db:get-run-info #f run-id))
+	(key-vals              (cdb:remote-run db:get-key-vals #f run-id))
+	(sorted-test-names     (tests:sort-by-priority-and-waiton test-records))
+	(test-registry         (make-hash-table))
+	(registry-mutex        (make-mutex))
+	(num-retries           0)
+	(max-retries           (config-lookup *configdat* "setup" "maxretries"))
+	(max-concurrent-jobs   (let ((mcj (config-lookup *configdat* "setup"     "max_concurrent_jobs")))
+				 (if (and mcj (string->number mcj))
+				     (string->number mcj)
+				     1))))
+    (set! max-retries (if (and max-retries (string->number max-retries))(string->number max-retries) 100))
+    (if (not (null? sorted-test-names))
+	(let loop ((hed         (car sorted-test-names))
+		   (tal         (cdr sorted-test-names))
+		   (reruns      '()))
+	  (if (not (null? reruns))(debug:print-info 4 "reruns=" reruns))
+	  ;; (print "Top of loop, hed=" hed ", tal=" tal " ,reruns=" reruns)
+	  (let* ((test-record (hash-table-ref test-records hed))
+		 (test-name   (tests:testqueue-get-testname test-record))
+		 (tconfig     (tests:testqueue-get-testconfig test-record))
+		 (testmode    (let ((m (config-lookup tconfig "requirements" "mode")))
+				(if m (string->symbol m) 'normal)))
+		 (waitons     (tests:testqueue-get-waitons    test-record))
+		 (priority    (tests:testqueue-get-priority   test-record))
+		 (itemdat     (tests:testqueue-get-itemdat    test-record)) ;; itemdat can be a string, list or #f
+		 (items       (tests:testqueue-get-items      test-record))
+		 (item-path   (item-list->path itemdat))
+		 (newtal      (append tal (list hed))))
+	    
+	    (debug:print 6
+			 "test-name: " test-name
+			 "\n  hed:         " hed
+			 "\n  itemdat:     " itemdat
+			 "\n  items:       " items
+			 "\n  item-path:   " item-path
+			 "\n  waitons:     " waitons
+			 "\n  num-retries: " num-retries
+			 "\n  tal:         " tal
+			 "\n  reruns:      " reruns)
+
+	    ;; check for hed in waitons => this would be circular, remove it and issue an
+	    ;; error
+	    (if (member test-name waitons)
+		(begin
+		  (debug:print 0 "ERROR: test " test-name " has listed itself as a waiton, please correct this!")
+		  (set! waiton (filter (lambda (x)(not (equal? x hed))) waitons))))
+
+	    (cond ;; OUTER COND
+	     ((not items) ;; when false the test is ok to be handed off to launch (but not before)
+	      (if (and (not (tests:match test-patts (tests:testqueue-get-testname test-record) item-path required: required-tests))
+	               (not (null? tal)))
+	          (loop (car newtal)(cdr newtal) reruns))
+	      (let* ((run-limits-info         (runs:can-run-more-tests test-record max-concurrent-jobs)) ;; look at the test jobgroup and tot jobs running
+		     (have-resources          (car run-limits-info))
+		     (num-running             (list-ref run-limits-info 1))
+		     (num-running-in-jobgroup (list-ref run-limits-info 2))
+		     (max-concurrent-jobs     (list-ref run-limits-info 3))
+		     (job-group-limit         (list-ref run-limits-info 4))
+		     (prereqs-not-met         (db:get-prereqs-not-met run-id waitons item-path mode: testmode))
+		     (fails                   (runs:calc-fails prereqs-not-met))
+		     (non-completed           (runs:calc-not-completed prereqs-not-met)))
+		(debug:print-info 8 "have-resources: " have-resources " prereqs-not-met: " 
+			     (string-intersperse 
+			      (map (lambda (t)
+				     (if (vector? t)
+					 (conc (db:test-get-state t) "/" (db:test-get-status t))
+					 (conc " WARNING: t is not a vector=" t )))
+				   prereqs-not-met) ", ") " fails: " fails)
+		(debug:print-info 4 "hed=" hed "\n  test-record=" test-record "\n  test-name: " test-name "\n  item-path: " item-path "\n  test-patts: " test-patts)
+
+		;; Don't know at this time if the test have been launched at some time in the past
+		;; i.e. is this a re-launch?
+		(debug:print-info 4 "run-limits-info = " run-limits-info)
+		(cond ;; INNER COND #1 for a launchable test
+		 ;; Check item path against item-patts
+		 ((not (tests:match test-patts (tests:testqueue-get-testname test-record) item-path required: required-tests)) ;; This test/itempath is not to be run
+		  ;; else the run is stuck, temporarily or permanently
+		  ;; but should check if it is due to lack of resources vs. prerequisites
+		  (debug:print-info 1 "Skipping " (tests:testqueue-get-testname test-record) " " item-path " as it doesn't match " test-patts)
+		  ;; (thread-sleep! *global-delta*)
+		  (if (not (null? tal))
+		      (loop (car tal)(cdr tal) reruns)))
+		 ;; Registry has been started for this test but has not yet completed
+		 ;; this should be rare, the case where there are only a couple of tests and the db is slow
+		 ;; delay a short while and continue
+		 ;; ((eq? (hash-table-ref/default test-registry (runs:make-full-test-name test-name item-path) #f) 'start)
+		 ;;  (thread-sleep! 0.01)
+		 ;;  (loop (car newtal)(cdr newtal) reruns))
+		 ;; count number of 'done, if more than 100 then skip on through.
+		 (;; (and (< (length (filter (lambda (x)(eq? x 'done))(hash-table-values test-registry))) 100) ;; why get more than 200 ahead?
+		  (not (hash-table-ref/default test-registry (runs:make-full-test-name test-name item-path) #f)) ;; ) ;; too many changes required. Implement later.
+		  (debug:print-info 4 "Pre-registering test " test-name "/" item-path " to create placeholder" )
+		  ;; NEED TO THREADIFY THIS
+		  (let ((th (make-thread (lambda ()
+		        		   (mutex-lock! registry-mutex)
+		        		   (hash-table-set! test-registry (runs:make-full-test-name test-name item-path) 'start)
+		        		   (mutex-unlock! registry-mutex)
+					   ;; If haven't done it before register a top level test if this is an itemized test
+					   (if (not (eq? (hash-table-ref/default test-registry (runs:make-full-test-name test-name "") #f) 'done))
+					       (cdb:tests-register-test *runremote* run-id test-name ""))
+					   (cdb:tests-register-test *runremote* run-id test-name item-path)
+		        		   (mutex-lock! registry-mutex)
+					   (hash-table-set! test-registry (runs:make-full-test-name test-name item-path) 'done)
+		        		   (mutex-unlock! registry-mutex))
+		        		 (conc test-name "/" item-path))))
+		    (thread-start! th))
+		  ;; TRY (thread-sleep! *global-delta*)
+		  (runs:shrink-can-run-more-tests-count)   ;; DELAY TWEAKER (still needed?)
+		  (loop (car newtal)(cdr newtal) reruns))
+		 ;; At this point *all* test registrations must be completed.
+		 ((not (null? (filter (lambda (x)(eq? 'start x))(hash-table-values test-registry))))
+		  (debug:print-info 0 "Waiting on test registrations: " (string-intersperse 
+									 (filter (lambda (x)
+										   (eq? (hash-table-ref/default test-registry x #f) 'start))
+										 (hash-table-keys test-registry))
+									 ", "))
+		  (thread-sleep! 0.1)
+		  (loop hed tal reruns))
+		 ((not have-resources) ;; simply try again after waiting a second
+		  (debug:print-info 1 "no resources to run new tests, waiting ...")
+		  ;; Have gone back and forth on this but db starvation is an issue.
+		  ;; wait one second before looking again to run jobs.
+		  (thread-sleep! 1) ;; (+ 2 *global-delta*))
+		  ;; could have done hed tal here but doing car/cdr of newtal to rotate tests
+		  (loop (car newtal)(cdr newtal) reruns))
+		 ((and have-resources
+		       (or (null? prereqs-not-met)
+			   (and (eq? testmode 'toplevel)
+				(null? non-completed))))
+		  (run:test run-id run-info key-vals runname keyvallst test-record flags #f)
+		  (hash-table-set! test-registry (runs:make-full-test-name test-name item-path) 'running)
+		  (runs:shrink-can-run-more-tests-count)  ;; DELAY TWEAKER (still needed?)
+		  ;; (thread-sleep! *global-delta*)
+		  (if (not (null? tal))
+		      (loop (car tal)(cdr tal) reruns)))
+		 (else ;; must be we have unmet prerequisites
+		    (debug:print 4 "FAILS: " fails)
+		    ;; If one or more of the prereqs-not-met are FAIL then we can issue
+		    ;; a message and drop hed from the items to be processed.
+		    (if (null? fails)
+			(begin
+			  ;; couldn't run, take a breather
+			  (debug:print-info 4 "Shouldn't really get here, race condition? Unable to launch more tests at this moment, killing time ...")
+			  ;; (thread-sleep! (+ 0.01 *global-delta*)) ;; long sleep here - no resources, may as well be patient
+			  ;; we made new tal by sticking hed at the back of the list
+			  (loop (car newtal)(cdr newtal) reruns))
+			;; the waiton is FAIL so no point in trying to run hed ever again
+			(if (not (null? tal))
+			    (if (vector? hed)
+				(begin 
+				  (debug:print 1 "WARN: Dropping test " (db:test-get-testname hed) "/" (db:test-get-item-path hed)
+					       " from the launch list as it has prerequistes that are FAIL")
+				  (runs:shrink-can-run-more-tests-count) ;; DELAY TWEAKER (still needed?)
+				  ;; (thread-sleep! *global-delta*)
+				  (hash-table-set! test-registry (runs:make-full-test-name test-name item-path) 'removed)
+				  (loop (car tal)(cdr tal) (cons hed reruns)))
+				(begin
+				  (debug:print 1 "WARN: Test not processed correctly. Could be a race condition in your test implementation? " hed) ;;  " as it has prerequistes that are FAIL. (NOTE: hed is not a vector)")
+				  (runs:shrink-can-run-more-tests-count) ;; DELAY TWEAKER (still needed?)
+				  ;; (thread-sleep! (+ 0.01 *global-delta*))
+				  (loop hed tal reruns))))))))) ;; END OF INNER COND
+	     
+	     ;; case where an items came in as a list been processed
+	     ((and (list? items)     ;; thus we know our items are already calculated
+		   (not   itemdat)) ;; and not yet expanded into the list of things to be done
+	      (if (and (debug:debug-mode 1) ;; (>= *verbosity* 1)
+		       (> (length items) 0)
+		       (> (length (car items)) 0))
+		  (pp items))
+	      (for-each
+	       (lambda (my-itemdat)
+		 (let* ((new-test-record (let ((newrec (make-tests:testqueue)))
+					   (vector-copy! test-record newrec)
+					   newrec))
+			(my-item-path (item-list->path my-itemdat)))
+		   (if (tests:match test-patts hed my-item-path required: required-tests) ;; (patt-list-match my-item-path item-patts)           ;; yes, we want to process this item, NOTE: Should not need this check here!
+		       (let ((newtestname (runs:make-full-test-name hed my-item-path)))    ;; test names are unique on testname/item-path
+			 (tests:testqueue-set-items!     new-test-record #f)
+			 (tests:testqueue-set-itemdat!   new-test-record my-itemdat)
+			 (tests:testqueue-set-item_path! new-test-record my-item-path)
+			 (hash-table-set! test-records newtestname new-test-record)
+			 (set! tal (cons newtestname tal)))))) ;; since these are itemized create new test names testname/itempath
+	       items)
+	      (if (not (null? tal))
+		  (begin
+		    (debug:print-info 4 "End of items list, looping with next after short delay")
+                    ;; (thread-sleep! (+ 0.01 *global-delta*))
+		    (loop (car tal)(cdr tal) reruns))))
+
+	     ;; if items is a proc then need to run items:get-items-from-config, get the list and loop 
+	     ;;    - but only do that if resources exist to kick off the job
+	     ((or (procedure? items)(eq? items 'have-procedure))
+	      (let ((can-run-more    (runs:can-run-more-tests test-record max-concurrent-jobs)))
+		(if (and (list? can-run-more)
+			 (car can-run-more))
+		    (let* ((prereqs-not-met (db:get-prereqs-not-met run-id waitons item-path mode: testmode))
+			   (fails           (runs:calc-fails prereqs-not-met))
+			   (non-completed   (runs:calc-not-completed prereqs-not-met)))
+		      (debug:print-info 8 "can-run-more: " can-run-more
+				   "\n testname:        " hed
+				   "\n prereqs-not-met: " (runs:pretty-string prereqs-not-met)
+				   "\n non-completed:   " (runs:pretty-string non-completed) 
+				   "\n fails:           " (runs:pretty-string fails)
+				   "\n testmode:        " testmode
+				   "\n num-retries:     " num-retries
+				   "\n (eq? testmode 'toplevel): " (eq? testmode 'toplevel)
+				   "\n (null? non-completed):    " (null? non-completed)
+				   "\n reruns:          " reruns
+				   "\n items:           " items
+				   "\n can-run-more:    " can-run-more)
+		      ;; (thread-sleep! (+ 0.01 *global-delta*))
+		      (cond ;; INNER COND #2
+		       ((or (null? prereqs-not-met) ;; all prereqs met, fire off the test
+			    ;; or, if it is a 'toplevel test and all prereqs not met are COMPLETED then launch
+			    (and (eq? testmode 'toplevel)
+				 (null? non-completed)))
+			(let ((test-name (tests:testqueue-get-testname test-record)))
+			  (setenv "MT_TEST_NAME" test-name) ;; 
+			  (setenv "MT_RUNNAME"   runname)
+			  (set-megatest-env-vars run-id inrunname: runname) ;; these may be needed by the launching process
+			  (let ((items-list (items:get-items-from-config tconfig)))
+			    (if (list? items-list)
+				(begin
+				  (tests:testqueue-set-items! test-record items-list)
+				  ;; (thread-sleep! *global-delta*)
+				  (loop hed tal reruns))
+				(begin
+				  (debug:print 0 "ERROR: The proc from reading the setup did not yield a list - please report this")
+				  (exit 1))))))
+		       ((null? fails)
+			(debug:print-info 4 "fails is null, moving on in the queue but keeping " hed " for now")
+			;; only increment num-retries when there are no tests runing
+			(if (eq? 0 (list-ref can-run-more 1))
+			    (begin
+			      ;; TRY (if (> num-retries 100) ;; first 100 retries are low time cost
+			      ;; TRY     (thread-sleep! (+ 2 *global-delta*))
+			      ;; TRY     (thread-sleep! (+ 0.01 *global-delta*)))
+			      (set! num-retries (+ num-retries 1))))
+			(if (> num-retries  max-retries)
+			    (if (not (null? tal))
+				(loop (car tal)(cdr tal) reruns))
+			    (loop (car newtal)(cdr newtal) reruns))) ;; an issue with prereqs not yet met?
+		       ((and (not (null? fails))(eq? testmode 'normal))
+			(debug:print-info 1 "test "  hed " (mode=" testmode ") has failed prerequisite(s); "
+				     (string-intersperse (map (lambda (t)(conc (db:test-get-testname t) ":" (db:test-get-state t)"/"(db:test-get-status t))) fails) ", ")
+				     ", removing it from to-do list")
+			(if (not (null? tal))
+			    (begin
+                              ;; (thread-sleep! *global-delta*)
+			      (loop (car tal)(cdr tal)(cons hed reruns)))))
+		       (else
+			(debug:print 8 "ERROR: No handler for this condition.")
+			;; TRY (thread-sleep! (+ 1 *global-delta*))
+			(loop (car newtal)(cdr newtal) reruns)))) ;; END OF IF CAN RUN MORE
+
+		    ;; if can't run more just loop with next possible test
+		    (begin
+		      (debug:print-info 4 "processing the case with a lambda for items or 'have-procedure. Moving through the queue without dropping " hed)
+		      ;; (thread-sleep! (+ 2 *global-delta*))
+		      (loop (car newtal)(cdr newtal) reruns))))) ;; END OF (or (procedure? items)(eq? items 'have-procedure))
+	     
+	     ;; this case should not happen, added to help catch any bugs
+	     ((and (list? items) itemdat)
+	      (debug:print 0 "ERROR: Should not have a list of items in a test and the itemspath set - please report this")
+	      (exit 1))
+	     ((not (null? reruns))
+	      (let* ((newlst (tests:filter-non-runnable run-id tal test-records)) ;; i.e. not FAIL, WAIVED, INCOMPLETE, PASS, KILLED,
+		     (junked (lset-difference equal? tal newlst)))
+		(debug:print-info 4 "full drop through, if reruns is less than 100 we will force retry them, reruns=" reruns ", tal=" tal)
+		(if (< num-retries max-retries)
+		    (set! newlst (append reruns newlst)))
+		(set! num-retries (+ num-retries 1))
+		;; (thread-sleep! (+ 1 *global-delta*))
+		(if (not (null? newlst))
+		    ;; since reruns have been tacked on to newlst create new reruns from junked
+		    (loop (car newlst)(cdr newlst)(delete-duplicates junked)))))
+	     ((not (null? tal))
+	      (debug:print-info 4 "I'm pretty sure I shouldn't get here."))
+	     (else
+	      (debug:print-info 4 "Exiting loop with...\n  hed=" hed "\n  tal=" tal "\n  reruns=" reruns))
+	     )))) ;; LET* ((test-record
+
+    ;; we get here on "drop through" - loop for next test in queue
+    ;; FIXME!!!! THIS SHOULD NOT REQUIRE AN EXIT!!!!!!!
+    
+    (debug:print-info 1 "All tests launched")
+    (thread-sleep! 0.5)
+    ;; FIXME! This harsh exit should not be necessary....
+    ;; (if (not *runremote*)(exit)) ;; 
+    #f)) ;; return a #f as a hint that we are done
+  ;; Here we need to check that all the tests remaining to be run are eligible to run
+  ;; and are not blocked by failed
+  
+

ADDED   run-tests-queue-new.scm
Index: run-tests-queue-new.scm
==================================================================
--- /dev/null
+++ run-tests-queue-new.scm
@@ -0,0 +1,334 @@
+
+;; test-records is a hash table testname:item_path => vector < testname testconfig waitons priority items-info ... >
+(define (runs:run-tests-queue-new run-id runname test-records keyvallst flags test-patts required-tests reglen)
+  ;; At this point the list of parent tests is expanded 
+  ;; NB// Should expand items here and then insert into the run queue.
+  (debug:print 5 "test-records: " test-records ", keyvallst: " keyvallst " flags: " (hash-table->alist flags))
+  (let ((run-info              (cdb:remote-run db:get-run-info #f run-id))
+	(key-vals              (cdb:remote-run db:get-key-vals #f run-id))
+	(sorted-test-names     (tests:sort-by-priority-and-waiton test-records))
+	(test-registry         (make-hash-table))
+	(registry-mutex        (make-mutex))
+	(num-retries           0)
+	(max-retries           (config-lookup *configdat* "setup" "maxretries"))
+	(max-concurrent-jobs   (let ((mcj (config-lookup *configdat* "setup"     "max_concurrent_jobs")))
+				 (if (and mcj (string->number mcj))
+				     (string->number mcj)
+				     1)))) ;; length of the register queue ahead
+    (set! max-retries (if (and max-retries (string->number max-retries))(string->number max-retries) 100))
+    (if (not (null? sorted-test-names))
+	(let loop ((hed         (car sorted-test-names))
+		   (tal         (cdr sorted-test-names))
+		   (reg         '()) ;; registered, put these at the head of tal 
+		   (reruns      '()))
+	  (if (not (null? reruns))(debug:print-info 4 "reruns=" reruns))
+	  ;; (print "Top of loop, hed=" hed ", tal=" tal " ,reruns=" reruns)
+	  (let* ((test-record (hash-table-ref test-records hed))
+		 (test-name   (tests:testqueue-get-testname test-record))
+		 (tconfig     (tests:testqueue-get-testconfig test-record))
+		 (testmode    (let ((m (config-lookup tconfig "requirements" "mode")))
+				(if m (string->symbol m) 'normal)))
+		 (waitons     (tests:testqueue-get-waitons    test-record))
+		 (priority    (tests:testqueue-get-priority   test-record))
+		 (itemdat     (tests:testqueue-get-itemdat    test-record)) ;; itemdat can be a string, list or #f
+		 (items       (tests:testqueue-get-items      test-record))
+		 (item-path   (item-list->path itemdat))
+		 (newtal      (append tal (list hed)))
+		 (regfull     (> (length reg) reglen)))
+	    ;; (if (> (length reg) 10)
+	    ;;     (begin
+	    ;;       (set! tal (cons hed tal))
+	    ;;       (set! hed (car reg))
+	    ;;       (set! reg (cdr reg))
+	    ;;       (set! newtal tal)))
+	    (debug:print 6
+			 "test-name: " test-name
+			 "\n  hed:         " hed
+			 "\n  itemdat:     " itemdat
+			 "\n  items:       " items
+			 "\n  item-path:   " item-path
+			 "\n  waitons:     " waitons
+			 "\n  num-retries: " num-retries
+			 "\n  tal:         " tal
+			 "\n  reruns:      " reruns)
+
+	    ;; check for hed in waitons => this would be circular, remove it and issue an
+	    ;; error
+	    (if (member test-name waitons)
+		(begin
+		  (debug:print 0 "ERROR: test " test-name " has listed itself as a waiton, please correct this!")
+		  (set! waiton (filter (lambda (x)(not (equal? x hed))) waitons))))
+
+	    (cond ;; OUTER COND
+	     ((not items) ;; when false the test is ok to be handed off to launch (but not before)
+	      (if (and (not (tests:match test-patts (tests:testqueue-get-testname test-record) item-path))
+	               (not (null? tal)))
+	          (loop (car tal)(cdr tal) reg reruns))
+	      (let* ((run-limits-info         (runs:can-run-more-tests test-record max-concurrent-jobs)) ;; look at the test jobgroup and tot jobs running
+		     (have-resources          (car run-limits-info))
+		     (num-running             (list-ref run-limits-info 1))
+		     (num-running-in-jobgroup (list-ref run-limits-info 2))
+		     (max-concurrent-jobs     (list-ref run-limits-info 3))
+		     (job-group-limit         (list-ref run-limits-info 4))
+		     (prereqs-not-met         (db:get-prereqs-not-met run-id waitons item-path mode: testmode))
+		     (fails                   (runs:calc-fails prereqs-not-met))
+		     (non-completed           (runs:calc-not-completed prereqs-not-met)))
+		(debug:print-info 8 "have-resources: " have-resources " prereqs-not-met: " 
+				  (string-intersperse 
+				   (map (lambda (t)
+					  (if (vector? t)
+					      (conc (db:test-get-state t) "/" (db:test-get-status t))
+					      (conc " WARNING: t is not a vector=" t )))
+					prereqs-not-met) ", ") " fails: " fails)
+		(debug:print-info 4 "hed=" hed "\n  test-record=" test-record "\n  test-name: " test-name "\n  item-path: " item-path "\n  test-patts: " test-patts)
+
+		;; Don't know at this time if the test have been launched at some time in the past
+		;; i.e. is this a re-launch?
+		(debug:print-info 4 "run-limits-info = " run-limits-info)
+		(cond ;; INNER COND #1 for a launchable test
+		 ;; Check item path against item-patts
+		 ((not (tests:match test-patts (tests:testqueue-get-testname test-record) item-path)) ;; This test/itempath is not to be run
+		  ;; else the run is stuck, temporarily or permanently
+		  ;; but should check if it is due to lack of resources vs. prerequisites
+		  (debug:print-info 1 "Skipping " (tests:testqueue-get-testname test-record) " " item-path " as it doesn't match " test-patts)
+		  ;; (thread-sleep! *global-delta*)
+		  (if (not (null? tal))
+		      (loop (runs:queue-next-hed tal reg reglen regfull)
+			    (runs:queue-next-tal tal reg reglen regfull)
+			    (runs:queue-next-reg tal reg reglen regfull)
+			    reruns)))
+		 ;; Registry has been started for this test but has not yet completed
+		 ;; this should be rare, the case where there are only a couple of tests and the db is slow
+		 ;; delay a short while and continue
+		 ;; ((eq? (hash-table-ref/default test-registry (runs:make-full-test-name test-name item-path) #f) 'start)
+		 ;;  (thread-sleep! 0.01)
+		 ;;  (loop (car newtal)(cdr newtal) reruns))
+		 ;; count number of 'done, if more than 100 then skip on through.
+		 ((not (hash-table-ref/default test-registry (runs:make-full-test-name test-name item-path) #f)) ;; ) ;; too many changes required. Implement later.
+		  (debug:print-info 4 "Pre-registering test " test-name "/" item-path " to create placeholder" )
+		  (let ((th (make-thread (lambda ()
+					   (mutex-lock! registry-mutex)
+					   (hash-table-set! test-registry (runs:make-full-test-name test-name item-path) 'start)
+					   (mutex-unlock! registry-mutex)
+					   ;; If haven't done it before register a top level test if this is an itemized test
+					   (if (not (eq? (hash-table-ref/default test-registry (runs:make-full-test-name test-name "") #f) 'done))
+					       (cdb:tests-register-test *runremote* run-id test-name ""))
+					   (cdb:tests-register-test *runremote* run-id test-name item-path)
+					   (mutex-lock! registry-mutex)
+					   (hash-table-set! test-registry (runs:make-full-test-name test-name item-path) 'done)
+					   (mutex-unlock! registry-mutex))
+					 (conc test-name "/" item-path))))
+		    (thread-start! th))
+		  (runs:shrink-can-run-more-tests-count)   ;; DELAY TWEAKER (still needed?)
+		  (if (and (null? tal)(null? reg))
+		      (loop hed tal reg reruns)
+		      (loop (runs:queue-next-hed tal reg reglen regfull)
+			    (runs:queue-next-tal tal reg reglen regfull)
+			    (let ((newl (append reg (list hed))))
+			      (if regfull 
+				  (cdr newl)
+				  newl))
+			    reruns)))
+		 ;; At this point hed test registration must be completed.
+		 ((eq? (hash-table-ref/default test-registry (runs:make-full-test-name test-name item-path) #f)
+		       'start)
+		  (debug:print-info 0 "Waiting on test registration(s): " (string-intersperse 
+									   (filter (lambda (x)
+										     (eq? (hash-table-ref/default test-registry x #f) 'start))
+										   (hash-table-keys test-registry))
+									   ", "))
+		  (thread-sleep! 0.1)
+		  (loop hed tal reg reruns))
+		 ((not have-resources) ;; simply try again after waiting a second
+		  (debug:print-info 1 "no resources to run new tests, waiting ...")
+		  ;; Have gone back and forth on this but db starvation is an issue.
+		  ;; wait one second before looking again to run jobs.
+		  (thread-sleep! 1) ;; (+ 2 *global-delta*))
+		  ;; could have done hed tal here but doing car/cdr of newtal to rotate tests
+		  (loop (car newtal)(cdr newtal) reg reruns))
+		 ((and have-resources
+		       (or (null? prereqs-not-met)
+			   (and (eq? testmode 'toplevel)
+				(null? non-completed))))
+		  (run:test run-id run-info key-vals runname keyvallst test-record flags #f)
+		  (hash-table-set! test-registry (runs:make-full-test-name test-name item-path) 'running)
+		  (runs:shrink-can-run-more-tests-count)  ;; DELAY TWEAKER (still needed?)
+		  ;; (thread-sleep! *global-delta*)
+		  (if (not (null? tal))
+		      (loop (runs:queue-next-hed tal reg reglen regfull)
+			    (runs:queue-next-tal tal reg reglen regfull)
+			    (runs:queue-next-reg tal reg reglen regfull)
+			    reruns)))
+		 (else ;; must be we have unmet prerequisites
+		  (debug:print 4 "FAILS: " fails)
+		  ;; If one or more of the prereqs-not-met are FAIL then we can issue
+		  ;; a message and drop hed from the items to be processed.
+		  (if (null? fails)
+		      (begin
+			;; couldn't run, take a breather
+			(debug:print-info 4 "Shouldn't really get here, race condition? Unable to launch more tests at this moment, killing time ...")
+			;; (thread-sleep! (+ 0.01 *global-delta*)) ;; long sleep here - no resources, may as well be patient
+			;; we made new tal by sticking hed at the back of the list
+			(loop (car newtal)(cdr newtal) reg reruns))
+		      ;; the waiton is FAIL so no point in trying to run hed ever again
+		      (if (not (null? tal))
+			  (if (vector? hed)
+			      (begin 
+				(debug:print 1 "WARN: Dropping test " (db:test-get-testname hed) "/" (db:test-get-item-path hed)
+					     " from the launch list as it has prerequistes that are FAIL")
+				(runs:shrink-can-run-more-tests-count) ;; DELAY TWEAKER (still needed?)
+				;; (thread-sleep! *global-delta*)
+				(hash-table-set! test-registry (runs:make-full-test-name test-name item-path) 'removed)
+				(loop (runs:queue-next-hed tal reg reglen regfull)
+				      (runs:queue-next-tal tal reg reglen regfull)
+				      (runs:queue-next-reg tal reg reglen regfull)
+				      (cons hed reruns)))
+			      (begin
+				(debug:print 1 "WARN: Test not processed correctly. Could be a race condition in your test implementation? " hed) ;;  " as it has prerequistes that are FAIL. (NOTE: hed is not a vector)")
+				(runs:shrink-can-run-more-tests-count) ;; DELAY TWEAKER (still needed?)
+				;; (thread-sleep! (+ 0.01 *global-delta*))
+				(loop hed tal reg reruns))))))))) ;; END OF INNER COND
+	     
+	     ;; case where an items came in as a list been processed
+	     ((and (list? items)     ;; thus we know our items are already calculated
+		   (not   itemdat)) ;; and not yet expanded into the list of things to be done
+	      (if (and (debug:debug-mode 1) ;; (>= *verbosity* 1)
+		       (> (length items) 0)
+		       (> (length (car items)) 0))
+		  (pp items))
+	      (for-each
+	       (lambda (my-itemdat)
+		 (let* ((new-test-record (let ((newrec (make-tests:testqueue)))
+					   (vector-copy! test-record newrec)
+					   newrec))
+			(my-item-path (item-list->path my-itemdat)))
+		   (if (tests:match test-patts hed my-item-path) ;; (patt-list-match my-item-path item-patts)           ;; yes, we want to process this item, NOTE: Should not need this check here!
+		       (let ((newtestname (runs:make-full-test-name hed my-item-path)))    ;; test names are unique on testname/item-path
+			 (tests:testqueue-set-items!     new-test-record #f)
+			 (tests:testqueue-set-itemdat!   new-test-record my-itemdat)
+			 (tests:testqueue-set-item_path! new-test-record my-item-path)
+			 (hash-table-set! test-records newtestname new-test-record)
+			 (set! tal (cons newtestname tal)))))) ;; since these are itemized create new test names testname/itempath
+	       items)
+	      (if (not (null? tal))
+		  (begin
+		    (debug:print-info 4 "End of items list, looping with next after short delay")
+		    ;; (thread-sleep! (+ 0.01 *global-delta*))
+		    (loop (runs:queue-next-hed tal reg reglen regfull)
+			  (runs:queue-next-tal tal reg reglen regfull)
+			  (runs:queue-next-reg tal reg reglen regfull)
+			  reruns))))
+
+	     ;; if items is a proc then need to run items:get-items-from-config, get the list and loop 
+	     ;;    - but only do that if resources exist to kick off the job
+	     ((or (procedure? items)(eq? items 'have-procedure))
+	      (let ((can-run-more    (runs:can-run-more-tests test-record max-concurrent-jobs)))
+		(if (and (list? can-run-more)
+			 (car can-run-more))
+		    (let* ((prereqs-not-met (db:get-prereqs-not-met run-id waitons item-path mode: testmode))
+			   (fails           (runs:calc-fails prereqs-not-met))
+			   (non-completed   (runs:calc-not-completed prereqs-not-met)))
+		      (debug:print-info 8 "can-run-more: " can-run-more
+					"\n testname:        " hed
+					"\n prereqs-not-met: " (runs:pretty-string prereqs-not-met)
+					"\n non-completed:   " (runs:pretty-string non-completed) 
+					"\n fails:           " (runs:pretty-string fails)
+					"\n testmode:        " testmode
+					"\n num-retries:     " num-retries
+					"\n (eq? testmode 'toplevel): " (eq? testmode 'toplevel)
+					"\n (null? non-completed):    " (null? non-completed)
+					"\n reruns:          " reruns
+					"\n items:           " items
+					"\n can-run-more:    " can-run-more)
+		      ;; (thread-sleep! (+ 0.01 *global-delta*))
+		      (cond ;; INNER COND #2
+		       ((or (null? prereqs-not-met) ;; all prereqs met, fire off the test
+			    ;; or, if it is a 'toplevel test and all prereqs not met are COMPLETED then launch
+			    (and (eq? testmode 'toplevel)
+				 (null? non-completed)))
+			(let ((test-name (tests:testqueue-get-testname test-record)))
+			  (setenv "MT_TEST_NAME" test-name) ;; 
+			  (setenv "MT_RUNNAME"   runname)
+			  (set-megatest-env-vars run-id) ;; these may be needed by the launching process
+			  (let ((items-list (items:get-items-from-config tconfig)))
+			    (if (list? items-list)
+				(begin
+				  (tests:testqueue-set-items! test-record items-list)
+				  ;; (thread-sleep! *global-delta*)
+				  (loop hed tal reg reruns))
+				(begin
+				  (debug:print 0 "ERROR: The proc from reading the setup did not yield a list - please report this")
+				  (exit 1))))))
+		       ((null? fails)
+			(debug:print-info 4 "fails is null, moving on in the queue but keeping " hed " for now")
+			;; only increment num-retries when there are no tests runing
+			(if (eq? 0 (list-ref can-run-more 1))
+			    (begin
+			      ;; TRY (if (> num-retries 100) ;; first 100 retries are low time cost
+			      ;; TRY     (thread-sleep! (+ 2 *global-delta*))
+			      ;; TRY     (thread-sleep! (+ 0.01 *global-delta*)))
+			      (set! num-retries (+ num-retries 1))))
+			(if (> num-retries  max-retries)
+			    (if (not (null? tal))
+				(loop (runs:queue-next-hed tal reg reglen regfull)
+				      (runs:queue-next-tal tal reg reglen regfull)
+				      (runs:queue-next-reg tal reg reglen regfull)
+				      reruns))
+			    (loop (car newtal)(cdr newtal) reg reruns))) ;; an issue with prereqs not yet met?
+		       ((and (not (null? fails))(eq? testmode 'normal))
+			(debug:print-info 1 "test "  hed " (mode=" testmode ") has failed prerequisite(s); "
+					  (string-intersperse (map (lambda (t)(conc (db:test-get-testname t) ":" (db:test-get-state t)"/"(db:test-get-status t))) fails) ", ")
+					  ", removing it from to-do list")
+			(if (not (null? tal))
+			    (begin
+			      ;; (thread-sleep! *global-delta*)
+			      (loop (runs:queue-next-hed tal reg reglen regfull)
+				    (runs:queue-next-tal tal reg reglen regfull)
+				    (runs:queue-next-reg tal reg reglen regfull)
+				    (cons hed reruns)))))
+		       (else
+			(debug:print 8 "ERROR: No handler for this condition.")
+			;; TRY (thread-sleep! (+ 1 *global-delta*))
+			(loop (car newtal)(cdr newtal) reg reruns)))) ;; END OF IF CAN RUN MORE
+
+		    ;; if can't run more just loop with next possible test
+		    (begin
+		      (debug:print-info 4 "processing the case with a lambda for items or 'have-procedure. Moving through the queue without dropping " hed)
+		      ;; (thread-sleep! (+ 2 *global-delta*))
+		      (loop (car newtal)(cdr newtal) reg reruns))))) ;; END OF (or (procedure? items)(eq? items 'have-procedure))
+	     
+	     ;; this case should not happen, added to help catch any bugs
+	     ((and (list? items) itemdat)
+	      (debug:print 0 "ERROR: Should not have a list of items in a test and the itemspath set - please report this")
+	      (exit 1))
+	     ((not (null? reruns))
+	      (let* ((newlst (tests:filter-non-runnable run-id tal test-records)) ;; i.e. not FAIL, WAIVED, INCOMPLETE, PASS, KILLED,
+		     (junked (lset-difference equal? tal newlst)))
+		(debug:print-info 4 "full drop through, if reruns is less than 100 we will force retry them, reruns=" reruns ", tal=" tal)
+		(if (< num-retries max-retries)
+		    (set! newlst (append reruns newlst)))
+		(set! num-retries (+ num-retries 1))
+		;; (thread-sleep! (+ 1 *global-delta*))
+		(if (not (null? newlst))
+		    ;; since reruns have been tacked on to newlst create new reruns from junked
+		    (loop (car newlst)(cdr newlst) reg (delete-duplicates junked)))))
+	     ((not (null? tal))
+	      (debug:print-info 4 "I'm pretty sure I shouldn't get here."))
+	     ((not (null? reg)) ;; could we get here with leftovers?
+	      (debug:print-info 0 "Have leftovers!")
+	      (loop (car reg)(cdr reg) '() reruns))
+	     (else
+	      (debug:print-info 4 "Exiting loop with...\n  hed=" hed "\n  tal=" tal "\n  reruns=" reruns))
+	     )))) ;; LET* ((test-record
+
+    ;; we get here on "drop through" - loop for next test in queue
+    ;; FIXME!!!! THIS SHOULD NOT REQUIRE AN EXIT!!!!!!!
+    
+    (debug:print-info 1 "All tests launched")
+    (thread-sleep! 0.5)
+    ;; FIXME! This harsh exit should not be necessary....
+    ;; (if (not *runremote*)(exit)) ;; 
+    #f)) ;; return a #f as a hint that we are done
+;; Here we need to check that all the tests remaining to be run are eligible to run
+;; and are not blocked by failed
+

Index: runconfig.scm
==================================================================
--- runconfig.scm
+++ runconfig.scm
@@ -59,16 +59,16 @@
 		    sections)
 	  (debug:print 2 "---")
 	  (set! *already-seen-runconfig-info* #t)))
     finaldat))
 
-(define (set-run-config-vars db run-id keys keyvals)
+(define (set-run-config-vars run-id keys keyvals targ-from-db)
   (push-directory *toppath*)
   (let ((runconfigf (conc  *toppath* "/runconfigs.config"))
 	(targ       (or (args:get-arg "-target")
 			(args:get-arg "-reqtarg")
-			(db:get-target db run-id))))
+			targ-from-db)))
     (pop-directory)
     (if (file-exists? runconfigf)
 	(setup-env-defaults runconfigf run-id #t keys keyvals
 			    environ-patt: (conc "(default"
 						(if targ

ADDED   runs-launch-loop-test.scm
Index: runs-launch-loop-test.scm
==================================================================
--- /dev/null
+++ runs-launch-loop-test.scm
@@ -0,0 +1,59 @@
+(use srfi-69)
+
+(define (runs:queue-next-hed tal reg n regful)
+  (if regful
+      (car reg)
+      (car tal)))
+
+(define (runs:queue-next-tal tal reg n regful)
+  (if regful
+      tal
+      (let ((newtal (cdr tal)))
+	(if (null? newtal)
+	    reg
+	    newtal
+	    ))))
+
+(define (runs:queue-next-reg tal reg n regful)
+  (if regful
+      (cdr reg)
+      (if (eq? (length tal) 1)
+	  '()
+	  reg)))
+
+(use trace)
+(trace runs:queue-next-hed
+       runs:queue-next-tal
+       runs:queue-next-reg)
+
+
+(define tests '(1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20))
+
+(define test-registry (make-hash-table))
+
+(define n 3)
+
+(let loop ((hed   (car tests))
+           (tal   (cdr tests))
+           (reg   '()))
+  (let* ((reglen (length reg))
+	 (regful (> reglen n)))
+    (print "hed=" hed ", length reg=" (length reg) ", (> lenreg n)=" (> (length reg) n))
+    (let ((newtal (append tal (list hed)))) ;; used if we are not done with this test
+      (cond
+       ((not (hash-table-ref/default test-registry hed #f))
+	(hash-table-set! test-registry hed #t)
+	(print "Registering #" hed)
+	(if (not (null? tal))
+          (loop (runs:queue-next-hed tal reg n regful)
+                (runs:queue-next-tal tal reg n regful)
+		(let ((newl (append reg (list hed))))
+		  (if regful
+		      (cdr newl)
+		      newl)))))
+       (else
+	(print "Running #" hed)
+	(if (not (null? tal))
+	    (loop (runs:queue-next-hed tal reg n regful)
+		  (runs:queue-next-tal tal reg n regful)
+		  (runs:queue-next-reg tal reg n regful))))))))

Index: runs.scm
==================================================================
--- runs.scm
+++ runs.scm
@@ -67,32 +67,12 @@
 (define (runs:test-get-full-path test)
   (let* ((testname (db:test-get-testname   test))
 	 (itempath (db:test-get-item-path test)))
     (conc testname (if (equal? itempath "") "" (conc "(" itempath ")")))))
 
-(define (db:get-run-key-val db run-id key)
-  (let ((res #f))
-    (sqlite3:for-each-row
-     (lambda (val)
-       (set! res val))
-     db 
-     (conc "SELECT " (key:get-fieldname key) " FROM runs WHERE id=?;")
-     run-id)
-    res))
-
-(define (db:get-run-name-from-id db run-id)
-  (let ((res #f))
-    (sqlite3:for-each-row
-     (lambda (runname)
-       (set! res runname))
-     db
-     "SELECT runname FROM runs WHERE id=?;"
-     run-id)
-    res))
-
-(define (set-megatest-env-vars run-id)
-  (let ((keys (cdb:remote-run db:get-keys #f))
+(define (set-megatest-env-vars run-id #!key (inkeys #f)(inrunname #f))
+  (let ((keys (if inkeys inkeys (cdb:remote-run db:get-keys #f)))
 	(vals (hash-table-ref/default *env-vars-by-run-id* run-id #f)))
     ;; get the info from the db and put it in the cache
     (if (not vals)
 	(let ((ht (make-hash-table)))
 	  (hash-table-set! *env-vars-by-run-id* run-id ht)
@@ -107,11 +87,11 @@
      (lambda (key val)
        (debug:print 2 "setenv " (key:get-fieldname key) " " val)
        (setenv (key:get-fieldname key) val)))
     (alist->env-vars (hash-table-ref/default *configdat* "env-override" '()))
     ;; Lets use this as an opportunity to put MT_RUNNAME in the environment
-    (setenv "MT_RUNNAME" (cdb:remote-run db:get-run-name-from-id #f run-id))
+    (setenv "MT_RUNNAME" (if inrunname inrunname (cdb:remote-run db:get-run-name-from-id #f run-id)))
     (setenv "MT_RUN_AREA_HOME" *toppath*)
     ))
 
 (define (set-item-env-vars itemdat)
   (for-each (lambda (item)
@@ -118,34 +98,28 @@
 	      (debug:print 2 "setenv " (car item) " " (cadr item))
 	      (setenv (car item) (cadr item)))
 	    itemdat))
 
 (define *last-num-running-tests* 0)
-(define *runs:can-run-more-tests-delay* 0)
-(define (runs:shrink-can-run-more-tests-delay)
-  (set! *runs:can-run-more-tests-delay* 0)) ;; (/ *runs:can-run-more-tests-delay* 2)))
+
+;; Every time can-run-more-tests is called increment the delay
+;; if the cou
+(define *runs:can-run-more-tests-count* 0)
+(define (runs:shrink-can-run-more-tests-count)
+  (set! *runs:can-run-more-tests-count* 0)) ;; (/ *runs:can-run-more-tests-count* 2)))
 
-(define (runs:can-run-more-tests test-record)
-  (thread-sleep! *runs:can-run-more-tests-delay*)
+(define (runs:can-run-more-tests test-record max-concurrent-jobs)
+  (thread-sleep! (cond
+		  ((> *runs:can-run-more-tests-count* 20) 2);; obviously haven't had any work to do for a while
+		  (else 0)))
   (let* ((tconfig                 (tests:testqueue-get-testconfig test-record))
 	 (jobgroup                (config-lookup tconfig "requirements" "jobgroup"))
-	 ;; Heuristic fix. These are getting called too rapidly when jobs are running or stuck
-	 ;; so we are going to increment a global delay by 0.1 seconds up to 10 seconds
-	 ;; every time runs:can-run-more-tests is called.
-	 ;; when a test is launched or other activity occurs divide the delay by 2
 	 (num-running             (cdb:remote-run db:get-count-tests-running #f))
 	 (num-running-in-jobgroup (cdb:remote-run db:get-count-tests-running-in-jobgroup #f jobgroup))
-	 (max-concurrent-jobs     (let ((mcj (config-lookup *configdat* "setup"     "max_concurrent_jobs")))
-				    (if (and mcj (string->number mcj))
-					(string->number mcj)
-					1)))
 	 (job-group-limit         (config-lookup *configdat* "jobgroups" jobgroup)))
-    (if (and (> (+ num-running num-running-in-jobgroup) 0)
-	     (< *runs:can-run-more-tests-delay* 1))
-	(begin
-	  (set! *runs:can-run-more-tests-delay* (+ *runs:can-run-more-tests-delay* 0.009))
-	  (debug:print-info 14 "can-run-more-tests-delay: " *runs:can-run-more-tests-delay*)))
+    (if (> (+ num-running num-running-in-jobgroup) 0)
+	(set! *runs:can-run-more-tests-count* (+ *runs:can-run-more-tests-count* 1)))
     (if (not (eq? *last-num-running-tests* num-running))
 	(begin
 	  (debug:print 2 "max-concurrent-jobs: " max-concurrent-jobs ", num-running: " num-running)
 	  (set! *last-num-running-tests* num-running)))
     (if (not (eq? 0 *globalexitstatus*))
@@ -171,40 +145,10 @@
 ;; New methodology. These routines will replace the above in time. For
 ;; now the code is duplicated. This stuff is initially used in the monitor
 ;; based code.
 ;;======================================================================
 
-;; register a test run with the db
-(define (runs:register-run db keys keyvallst runname state status user)
-  (debug:print 3 "runs:register-run, keys: " keys " keyvallst: " keyvallst " runname: " runname " state: " state " status: " status " user: " user)
-  (let* ((keystr    (keys->keystr keys))
-	 (comma     (if (> (length keys) 0) "," ""))
-	 (andstr    (if (> (length keys) 0) " AND " ""))
-	 (valslots  (keys->valslots keys)) ;; ?,?,? ...
-	 (keyvals   (map cadr keyvallst))
-	 (allvals   (append (list runname state status user) keyvals))
-	 (qryvals   (append (list runname) keyvals))
-	 (key=?str  (string-intersperse (map (lambda (k)(conc (key:get-fieldname k) "=?")) keys) " AND ")))
-    (debug:print 3 "keys: " keys " allvals: " allvals " keyvals: " keyvals)
-    (debug:print 2 "NOTE: using target " (string-intersperse keyvals "/") " for this run")
-    (if (and runname (null? (filter (lambda (x)(not x)) keyvals))) ;; there must be a better way to "apply and"
-	(let ((res #f))
-	  (apply sqlite3:execute db (conc "INSERT OR IGNORE INTO runs (runname,state,status,owner,event_time" comma keystr ") VALUES (?,?,?,?,strftime('%s','now')" comma valslots ");")
-		 allvals)
-	  (apply sqlite3:for-each-row 
-	   (lambda (id)
-	     (set! res id))
-	   db
-	   (let ((qry (conc "SELECT id FROM runs WHERE (runname=? " andstr key=?str ");")))
-	     ;(debug:print 4 "qry: " qry) 
-	     qry)
-	   qryvals)
-	  (sqlite3:execute db "UPDATE runs SET state=?,status=? WHERE id=?;" state status res)
-	  res) 
-	(begin
-	  (debug:print 0 "ERROR: Called without all necessary keys")
-	  #f))))
 
 ;; This is a duplicate of run-tests (which has been deprecated). Use this one instead of run tests.
 ;; keyvals.
 ;;
 ;;  test-names: Comma separated patterns same as test-patts but used in selection 
@@ -214,20 +158,21 @@
 (define (runs:run-tests target runname test-names test-patts user flags)
   (common:clear-caches) ;; clear all caches
   (let* ((db          #f)
 	 (keys        (cdb:remote-run db:get-keys #f))
 	 (keyvallst   (keys:target->keyval keys target))
-	 (run-id      (cdb:remote-run runs:register-run #f keys keyvallst runname "new" "n/a" user))  ;;  test-name)))
+	 (run-id      (cdb:remote-run db:register-run #f keys keyvallst runname "new" "n/a" user))  ;;  test-name)))
 	 (keyvals     (if run-id (cdb:remote-run db:get-key-vals #f run-id) #f))
 	 (deferred    '()) ;; delay running these since they have a waiton clause
 	 ;; keepgoing is the defacto modality now, will add hit-n-run a bit later
 	 ;; (keepgoing   (hash-table-ref/default flags "-keepgoing" #f))
 	 (runconfigf   (conc  *toppath* "/runconfigs.config"))
 	 (required-tests '())
-	 (test-records (make-hash-table)))
+	 (test-records (make-hash-table))
+     (all-test-names (tests:get-valid-tests *toppath* "%"))) ;; we need a list of all valid tests to check waiton names)
 
-    (set-megatest-env-vars run-id) ;; these may be needed by the launching process
+    (set-megatest-env-vars run-id inkeys: keys) ;; these may be needed by the launching process
 
     (if (file-exists? runconfigf)
 	(setup-env-defaults runconfigf run-id *already-seen-runconfig-info* keys keyvals "pre-launch-env-vars")
 	(debug:print 0 "WARNING: You do not have a run config file: " runconfigf))
     
@@ -263,19 +208,27 @@
 					   (begin ;; No config means this is a non-existant test
 					     (debug:print 0 "ERROR: non-existent required test \"" hed "\"")
 					     (if db (sqlite3:finalize! db))
 					     (exit 1)))))
 			    (debug:print-info 8 "waitons string is " instr)
-			    (string-split (cond
-					   ((procedure? instr)
-					    (let ((res (instr)))
-					      (debug:print-info 8 "waiton procedure results in string " res " for test " hed)
-					      res))
-					   ((string? instr)     instr)
-					   (else 
-					    ;; NOTE: This is actually the case of *no* waitons! ;; (debug:print 0 "ERROR: something went wrong in processing waitons for test " hed)
-					    ""))))))
+			    (let ((newwaitons
+				   (string-split (cond
+						  ((procedure? instr)
+						   (let ((res (instr)))
+						     (debug:print-info 8 "waiton procedure results in string " res " for test " hed)
+						     res))
+						  ((string? instr)     instr)
+						  (else 
+						   ;; NOTE: This is actually the case of *no* waitons! ;; (debug:print 0 "ERROR: something went wrong in processing waitons for test " hed)
+						   "")))))
+			      (filter (lambda (x)
+					(if (member x all-test-names)
+					    #t
+					    (begin
+					      (debug:print 0 "ERROR: test " hed " has unrecognised waiton testname " x)
+					      #f)))
+				      newwaitons)))))
 	    (debug:print-info 8 "waitons: " waitons)
 	    ;; check for hed in waitons => this would be circular, remove it and issue an
 	    ;; error
 	    (if (member hed waitons)
 		(begin
@@ -329,11 +282,14 @@
 
     (if (not (null? required-tests))
 	(debug:print-info 1 "Adding " required-tests " to the run queue"))
     ;; NOTE: these are all parent tests, items are not expanded yet.
     (debug:print-info 4 "test-records=" (hash-table->alist test-records))
-    (runs:run-tests-queue run-id runname test-records keyvallst flags test-patts)
+    (let ((reglen (any->number  (configf:lookup *configdat* "setup" "runqueue"))))
+      (if reglen
+	  (runs:run-tests-queue-new     run-id runname test-records keyvallst flags test-patts required-tests reglen)
+	  (runs:run-tests-queue-classic run-id runname test-records keyvallst flags test-patts required-tests)))
     (debug:print-info 4 "All done by here")))
 
 (define (runs:calc-fails prereqs-not-met)
   (filter (lambda (test)
 	    (and (vector? test) ;; not (string? test))
@@ -357,270 +313,38 @@
        lst))
 
 (define (runs:make-full-test-name testname itempath)
   (if (equal? itempath "") testname (conc testname "/" itempath)))
 
-;; test-records is a hash table testname:item_path => vector < testname testconfig waitons priority items-info ... >
-(define (runs:run-tests-queue run-id runname test-records keyvallst flags test-patts)
-    ;; At this point the list of parent tests is expanded 
-    ;; NB// Should expand items here and then insert into the run queue.
-  (debug:print 5 "test-records: " test-records ", keyvallst: " keyvallst " flags: " (hash-table->alist flags))
-  (let ((sorted-test-names (tests:sort-by-priority-and-waiton test-records))
-	(test-registery    (make-hash-table))
-	(num-retries        0)
-	(max-retries       (config-lookup *configdat* "setup" "maxretries")))
-    (set! max-retries (if (and max-retries (string->number max-retries))(string->number max-retries) 100))
-    (if (not (null? sorted-test-names))
-	(let loop ((hed         (car sorted-test-names))
-		   (tal         (cdr sorted-test-names))
-		   (reruns      '()))
-	  (if (not (null? reruns))(debug:print-info 4 "reruns=" reruns))
-	  ;; (print "Top of loop, hed=" hed ", tal=" tal " ,reruns=" reruns)
-	  (let* ((test-record (hash-table-ref test-records hed))
-		 (test-name   (tests:testqueue-get-testname test-record))
-		 (tconfig     (tests:testqueue-get-testconfig test-record))
-		 (testmode    (let ((m (config-lookup tconfig "requirements" "mode")))
-				(if m (string->symbol m) 'normal)))
-		 (waitons     (tests:testqueue-get-waitons    test-record))
-		 (priority    (tests:testqueue-get-priority   test-record))
-		 (itemdat     (tests:testqueue-get-itemdat    test-record)) ;; itemdat can be a string, list or #f
-		 (items       (tests:testqueue-get-items      test-record))
-		 (item-path   (item-list->path itemdat))
-		 (newtal      (append tal (list hed))))
-	    
-	    (debug:print 6
-			 "test-name: " test-name
-			 "\n  hed:         " hed
-			 "\n  itemdat:     " itemdat
-			 "\n  items:       " items
-			 "\n  item-path:   " item-path
-			 "\n  waitons:     " waitons
-			 "\n  num-retries: " num-retries
-			 "\n  tal:         " tal
-			 "\n  reruns:      " reruns)
-
-	    ;; check for hed in waitons => this would be circular, remove it and issue an
-	    ;; error
-	    (if (member test-name waitons)
-		(begin
-		  (debug:print 0 "ERROR: test " test-name " has listed itself as a waiton, please correct this!")
-		  (set! waiton (filter (lambda (x)(not (equal? x hed))) waitons))))
-
-	    (cond ;; OUTER COND
-	     ((not items) ;; when false the test is ok to be handed off to launch (but not before)
-	      (let* ((run-limits-info         (open-run-close runs:can-run-more-tests test-record)) ;; look at the test jobgroup and tot jobs running
-		     (have-resources          (car run-limits-info))
-		     (num-running             (list-ref run-limits-info 1))
-		     (num-running-in-jobgroup (list-ref run-limits-info 2))
-		     (max-concurrent-jobs     (list-ref run-limits-info 3))
-		     (job-group-limit         (list-ref run-limits-info 4))
-		     (prereqs-not-met         (open-run-close db:get-prereqs-not-met #f run-id waitons item-path mode: testmode))
-		     (fails                   (runs:calc-fails prereqs-not-met))
-		     (non-completed           (runs:calc-not-completed prereqs-not-met)))
-		(debug:print-info 8 "have-resources: " have-resources " prereqs-not-met: " 
-			     (string-intersperse 
-			      (map (lambda (t)
-				     (if (vector? t)
-					 (conc (db:test-get-state t) "/" (db:test-get-status t))
-					 (conc " WARNING: t is not a vector=" t )))
-				   prereqs-not-met) ", ") " fails: " fails)
-		(debug:print-info 4 "hed=" hed "\n  test-record=" test-record "\n  test-name: " test-name "\n  item-path: " item-path "\n  test-patts: " test-patts)
-
-		;; Don't know at this time if the test have been launched at some time in the past
-		;; i.e. is this a re-launch?
-		(debug:print-info 4 "run-limits-info = " run-limits-info)
-		(cond ;; INNER COND #1 for a launchable test
-		 ;; Check item path against item-patts
-		 ((not (tests:match test-patts (tests:testqueue-get-testname test-record) item-path)) ;; This test/itempath is not to be run
-		  ;; else the run is stuck, temporarily or permanently
-		  ;; but should check if it is due to lack of resources vs. prerequisites
-		  (debug:print-info 1 "Skipping " (tests:testqueue-get-testname test-record) " " item-path " as it doesn't match " test-patts)
-		  ;; (thread-sleep! *global-delta*)
-		  (if (not (null? tal))
-		      (loop (car tal)(cdr tal) reruns)))
-		 ( ;; (and
-		  (not (hash-table-ref/default test-registery (runs:make-full-test-name test-name item-path) #f))
-		      ;; (and max-concurrent-jobs (> (- max-concurrent-jobs num-running) 5)))
-		  (debug:print-info 4 "Pre-registering test " test-name "/" item-path " to create placeholder" )
-		  (open-run-close db:tests-register-test #f run-id test-name item-path)
-		  (hash-table-set! test-registery (runs:make-full-test-name test-name item-path) #t)
-		  ;; (thread-sleep! *global-delta*)
-(runs:shrink-can-run-more-tests-delay)
-		  (loop (car newtal)(cdr newtal) reruns))
-		 ((not have-resources) ;; simply try again after waiting a second
-		  (debug:print-info 1 "no resources to run new tests, waiting ...")
-		  ;; (thread-sleep! (+ 2 *global-delta*))
-		  ;; could have done hed tal here but doing car/cdr of newtal to rotate tests
-		  (loop (car newtal)(cdr newtal) reruns))
-		 ((and have-resources
-		       (or (null? prereqs-not-met)
-			   (and (eq? testmode 'toplevel)
-				(null? non-completed))))
-		  (run:test run-id runname keyvallst test-record flags #f)
-(runs:shrink-can-run-more-tests-delay)
-		  ;; (thread-sleep! *global-delta*)
-		  (if (not (null? tal))
-		      (loop (car tal)(cdr tal) reruns)))
-		 (else ;; must be we have unmet prerequisites
-		    (debug:print 4 "FAILS: " fails)
-		    ;; If one or more of the prereqs-not-met are FAIL then we can issue
-		    ;; a message and drop hed from the items to be processed.
-		    (if (null? fails)
-			(begin
-			  ;; couldn't run, take a breather
-			  (debug:print-info 4 "Shouldn't really get here, race condition? Unable to launch more tests at this moment, killing time ...")
-			  ;; (thread-sleep! (+ 0.01 *global-delta*)) ;; long sleep here - no resources, may as well be patient
-			  ;; we made new tal by sticking hed at the back of the list
-			  (loop (car newtal)(cdr newtal) reruns))
-			;; the waiton is FAIL so no point in trying to run hed ever again
-			(if (not (null? tal))
-			    (if (vector? hed)
-				(begin (debug:print 1 "WARN: Dropping test " (db:test-get-testname hed) "/" (db:test-get-item-path hed)
-						    " from the launch list as it has prerequistes that are FAIL")
-(runs:shrink-can-run-more-tests-delay)
-				       ;; (thread-sleep! *global-delta*)
-				       (loop (car tal)(cdr tal) (cons hed reruns)))
-				(begin
-				  (debug:print 1 "WARN: Test not processed correctly. Could be a race condition in your test implementation? " hed) ;;  " as it has prerequistes that are FAIL. (NOTE: hed is not a vector)")
-(runs:shrink-can-run-more-tests-delay)
-				  ;; (thread-sleep! (+ 0.01 *global-delta*))
-				  (loop hed tal reruns))))))))) ;; END OF INNER COND
-	     
-	     ;; case where an items came in as a list been processed
-	     ((and (list? items)     ;; thus we know our items are already calculated
-		   (not   itemdat)) ;; and not yet expanded into the list of things to be done
-	      (if (and (debug:debug-mode 1) ;; (>= *verbosity* 1)
-		       (> (length items) 0)
-		       (> (length (car items)) 0))
-		  (pp items))
-	      (for-each
-	       (lambda (my-itemdat)
-		 (let* ((new-test-record (let ((newrec (make-tests:testqueue)))
-					   (vector-copy! test-record newrec)
-					   newrec))
-			(my-item-path (item-list->path my-itemdat)))
-		   (if (tests:match test-patts hed my-item-path) ;; (patt-list-match my-item-path item-patts)           ;; yes, we want to process this item, NOTE: Should not need this check here!
-		       (let ((newtestname (runs:make-full-test-name hed my-item-path)))    ;; test names are unique on testname/item-path
-			 (tests:testqueue-set-items!     new-test-record #f)
-			 (tests:testqueue-set-itemdat!   new-test-record my-itemdat)
-			 (tests:testqueue-set-item_path! new-test-record my-item-path)
-			 (hash-table-set! test-records newtestname new-test-record)
-			 (set! tal (cons newtestname tal)))))) ;; since these are itemized create new test names testname/itempath
-	       items)
-	      (if (not (null? tal))
-		  (begin
-		    (debug:print-info 4 "End of items list, looping with next after short delay")
-                    ;; (thread-sleep! (+ 0.01 *global-delta*))
-		    (loop (car tal)(cdr tal) reruns))))
-
-	     ;; if items is a proc then need to run items:get-items-from-config, get the list and loop 
-	     ;;    - but only do that if resources exist to kick off the job
-	     ((or (procedure? items)(eq? items 'have-procedure))
-	      (let ((can-run-more    (runs:can-run-more-tests test-record)))
-		(if (and (list? can-run-more)
-			 (car can-run-more))
-		    (let* ((prereqs-not-met (open-run-close db:get-prereqs-not-met #f run-id waitons item-path mode: testmode))
-			   (fails           (runs:calc-fails prereqs-not-met))
-			   (non-completed   (runs:calc-not-completed prereqs-not-met)))
-		      (debug:print-info 8 "can-run-more: " can-run-more
-				   "\n testname:        " hed
-				   "\n prereqs-not-met: " (runs:pretty-string prereqs-not-met)
-				   "\n non-completed:   " (runs:pretty-string non-completed) 
-				   "\n fails:           " (runs:pretty-string fails)
-				   "\n testmode:        " testmode
-				   "\n num-retries:     " num-retries
-				   "\n (eq? testmode 'toplevel): " (eq? testmode 'toplevel)
-				   "\n (null? non-completed):    " (null? non-completed)
-				   "\n reruns:          " reruns
-				   "\n items:           " items
-				   "\n can-run-more:    " can-run-more)
-		      ;; (thread-sleep! (+ 0.01 *global-delta*))
-		      (cond ;; INNER COND #2
-		       ((or (null? prereqs-not-met) ;; all prereqs met, fire off the test
-			    ;; or, if it is a 'toplevel test and all prereqs not met are COMPLETED then launch
-			    (and (eq? testmode 'toplevel)
-				 (null? non-completed)))
-			(let ((test-name (tests:testqueue-get-testname test-record)))
-			  (setenv "MT_TEST_NAME" test-name) ;; 
-			  (setenv "MT_RUNNAME"   runname)
-			  (set-megatest-env-vars run-id) ;; these may be needed by the launching process
-			  (let ((items-list (items:get-items-from-config tconfig)))
-			    (if (list? items-list)
-				(begin
-				  (tests:testqueue-set-items! test-record items-list)
-				  ;; (thread-sleep! *global-delta*)
-				  (loop hed tal reruns))
-				(begin
-				  (debug:print 0 "ERROR: The proc from reading the setup did not yield a list - please report this")
-				  (exit 1))))))
-		       ((null? fails)
-			(debug:print-info 4 "fails is null, moving on in the queue but keeping " hed " for now")
-			;; only increment num-retries when there are no tests runing
-			(if (eq? 0 (list-ref can-run-more 1))
-			    (begin
-			      (if (> num-retries 100) ;; first 100 retries are low time cost
-				  (thread-sleep! (+ 2 *global-delta*))
-				  (thread-sleep! (+ 0.01 *global-delta*)))
-			      (set! num-retries (+ num-retries 1))))
-			(if (> num-retries  max-retries)
-			    (if (not (null? tal))
-				(loop (car tal)(cdr tal) reruns))
-			    (loop (car newtal)(cdr newtal) reruns))) ;; an issue with prereqs not yet met?
-		       ((and (not (null? fails))(eq? testmode 'normal))
-			(debug:print-info 1 "test "  hed " (mode=" testmode ") has failed prerequisite(s); "
-				     (string-intersperse (map (lambda (t)(conc (db:test-get-testname t) ":" (db:test-get-state t)"/"(db:test-get-status t))) fails) ", ")
-				     ", removing it from to-do list")
-			(if (not (null? tal))
-			    (begin
-                              ;; (thread-sleep! *global-delta*)
-			      (loop (car tal)(cdr tal)(cons hed reruns)))))
-		       (else
-			(debug:print 8 "ERROR: No handler for this condition.")
-			(thread-sleep! (+ 1 *global-delta*))
-			(loop (car newtal)(cdr newtal) reruns)))) ;; END OF IF CAN RUN MORE
-
-		    ;; if can't run more just loop with next possible test
-		    (begin
-		      (debug:print-info 4 "processing the case with a lambda for items or 'have-procedure. Moving through the queue without dropping " hed)
-		      ;; (thread-sleep! (+ 2 *global-delta*))
-		      (loop (car newtal)(cdr newtal) reruns))))) ;; END OF (or (procedure? items)(eq? items 'have-procedure))
-	     
-	     ;; this case should not happen, added to help catch any bugs
-	     ((and (list? items) itemdat)
-	      (debug:print 0 "ERROR: Should not have a list of items in a test and the itemspath set - please report this")
-	      (exit 1))
-	     ((not (null? reruns))
-	      (let* ((newlst (tests:filter-non-runnable run-id tal test-records)) ;; i.e. not FAIL, WAIVED, INCOMPLETE, PASS, KILLED,
-		     (junked (lset-difference equal? tal newlst)))
-		(debug:print-info 4 "full drop through, if reruns is less than 100 we will force retry them, reruns=" reruns ", tal=" tal)
-		(if (< num-retries max-retries)
-		    (set! newlst (append reruns newlst)))
-		(set! num-retries (+ num-retries 1))
-		;; (thread-sleep! (+ 1 *global-delta*))
-		(if (not (null? newlst))
-		    ;; since reruns have been tacked on to newlst create new reruns from junked
-		    (loop (car newlst)(cdr newlst)(delete-duplicates junked)))))
-	     ((not (null? tal))
-	      (debug:print-info 4 "I'm pretty sure I shouldn't get here."))
-	     (else
-	      (debug:print-info 4 "Exiting loop with...\n  hed=" hed "\n  tal=" tal "\n  reruns=" reruns))
-	     )))) ;; LET* ((test-record
-
-    ;; we get here on "drop through" - loop for next test in queue
-    ;; FIXME!!!! THIS SHOULD NOT REQUIRE AN EXIT!!!!!!!
-    
-    (debug:print-info 1 "All tests launched")
-    (thread-sleep! 0.5)
-    ;; FIXME! This harsh exit should not be necessary....
-    ;; (if (not *runremote*)(exit)) ;; 
-    #f)) ;; return a #f as a hint that we are done
-  ;; Here we need to check that all the tests remaining to be run are eligible to run
-  ;; and are not blocked by failed
-  
+(define (runs:queue-next-hed tal reg n regful)
+  (if regful
+      (if (null? reg) ;; doesn't make sense, this is probably NOT the problem of the car
+	  (car tal)
+	  (car reg))
+      (car tal)))
+
+(define (runs:queue-next-tal tal reg n regful)
+  (if regful
+      tal
+      (let ((newtal (cdr tal)))
+	(if (null? newtal)
+	    reg
+	    newtal
+	    ))))
+
+(define (runs:queue-next-reg tal reg n regful)
+  (if regful
+      (cdr reg)
+      (if (eq? (length tal) 1)
+	  '()
+	  reg)))
+
+(include "run-tests-queue-classic.scm")
+(include "run-tests-queue-new.scm")
 
 ;; parent-test is there as a placeholder for when parent-tests can be run as a setup step
-(define (run:test run-id runname keyvallst test-record flags parent-test)
+(define (run:test run-id run-info key-vals runname keyvallst test-record flags parent-test)
   ;; All these vars might be referenced by the testconfig file reader
   (let* ((test-name    (tests:testqueue-get-testname   test-record))
 	 (test-waitons (tests:testqueue-get-waitons    test-record))
 	 (test-conf    (tests:testqueue-get-testconfig test-record))
 	 (itemdat      (tests:testqueue-get-itemdat    test-record))
@@ -638,19 +362,19 @@
     (if (not itemdat)(set! itemdat '()))
     (set! item-path (item-list->path itemdat))
     (debug:print 2 "Attempting to launch test " test-name (if (equal? item-path "/") "/" item-path))
     (setenv "MT_TEST_NAME" test-name) ;; 
     (setenv "MT_RUNNAME"   runname)
-    (set-megatest-env-vars run-id) ;; these may be needed by the launching process
+    (set-megatest-env-vars run-id inrunname: runname) ;; these may be needed by the launching process
     (change-directory *toppath*)
 
     ;; Here is where the test_meta table is best updated
     ;; Yes, another use of a global for caching. Need a better way?
     (if (not (hash-table-ref/default *test-meta-updated* test-name #f))
         (begin
 	   (hash-table-set! *test-meta-updated* test-name #t)
-           (open-run-close runs:update-test_meta db test-name test-conf)))
+           (runs:update-test_meta test-name test-conf)))
     
     ;; (lambda (itemdat) ;;; ((ripeness "overripe") (temperature "cool") (season "summer"))
     (let* ((new-test-path (string-intersperse (cons test-path (map cadr itemdat)) "/"))
 	   (new-test-name (if (equal? item-path "") test-name (conc test-name "/" item-path))) ;; just need it to be unique
 	   (test-id       (cdb:remote-run db:get-test-id #f  run-id test-name item-path))
@@ -667,11 +391,11 @@
 	    ;;
 	    (set! test-id (open-run-close db:get-test-id db run-id test-name item-path))
 	    (if (not test-id)
 		(begin
 		  (debug:print 2 "WARN: Test not pre-created? test-name=" test-name ", item-path=" item-path ", run-id=" run-id)
-		  (open-run-close db:tests-register-test #f run-id test-name item-path)
+		  (cdb:tests-register-test *runremote* run-id test-name item-path)
 		  (set! test-id (open-run-close db:get-test-id db run-id test-name item-path))))
 	    (debug:print-info 4 "test-id=" test-id ", run-id=" run-id ", test-name=" test-name ", item-path=\"" item-path "\"")
 	    (set! testdat (cdb:get-test-info-by-id *runremote* test-id))))
       (set! test-id (db:test-get-id testdat))
       (change-directory test-path)
@@ -719,11 +443,12 @@
 		   (debug:print 1 "NOTE: Not starting test " new-test-name " as it is state \"" (test:get-state testdat) 
 				"\" and status \"" (test:get-status testdat) "\", use -rerun \"" (test:get-status testdat)
                                 "\" or -force to override"))
 	       ;; NOTE: No longer be checking prerequisites here! Will never get here unless prereqs are
 	       ;;       already met.
-	       (if (not (launch-test #f run-id runname test-conf keyvallst test-name test-path itemdat flags))
+	       ;; This would be a great place to do the process-fork
+	       (if (not (launch-test test-id run-id run-info key-vals runname test-conf keyvallst test-name test-path itemdat flags))
 		   (begin
 		     (print "ERROR: Failed to launch the test. Exiting as soon as possible")
 		     (set! *globalexitstatus* 1) ;; 
 		     (process-signal (current-process-id) signal/kill))))))
 	((KILLED) 
@@ -911,11 +636,11 @@
 	    ;;     ))
 	(set! keys (open-run-close db:get-keys db))
 	;; have enough to process -target or -reqtarg here
 	(if (args:get-arg "-reqtarg")
 	    (let* ((runconfigf (conc  *toppath* "/runconfigs.config")) ;; DO NOT EVALUATE ALL 
-		   (runconfig  (read-config runconfigf #f #t environ-patt: #f))) 
+		   (runconfig  (read-config runconfigf #f #t environ-patt: #f)))
 	      (if (hash-table-ref/default runconfig (args:get-arg "-reqtarg") #f)
 		  (keys:target-set-args keys (args:get-arg "-reqtarg") args:arg-hash)
 		  (begin
 		    (debug:print 0 "ERROR: [" (args:get-arg "-reqtarg") "] not found in " runconfigf)
 		    (if db (sqlite3:finalize! db))
@@ -957,26 +682,26 @@
 ;;======================================================================
 ;; Rollup runs
 ;;======================================================================
 
 ;; Update the test_meta table for this test
-(define (runs:update-test_meta db test-name test-conf)
-  (let ((currrecord (open-run-close db:testmeta-get-record db test-name)))
+(define (runs:update-test_meta test-name test-conf)
+  (let ((currrecord (cdb:remote-run db:testmeta-get-record #f test-name)))
     (if (not currrecord)
 	(begin
 	  (set! currrecord (make-vector 10 #f))
-	  (open-run-close db:testmeta-add-record db test-name)))
+	  (cdb:remote-run db:testmeta-add-record #f test-name)))
     (for-each 
      (lambda (key)
        (let* ((idx (cadr key))
 	      (fld (car  key))
 	      (val (config-lookup test-conf "test_meta" fld)))
 	 ;; (debug:print 5 "idx: " idx " fld: " fld " val: " val)
 	 (if (and val (not (equal? (vector-ref currrecord idx) val)))
 	     (begin
 	       (print "Updating " test-name " " fld " to " val)
-	       (open-run-close db:testmeta-update-field db test-name fld val)))))
+	       (cdb:remote-run db:testmeta-update-field #f test-name fld val)))))
      '(("author" 2)("owner" 3)("description" 4)("reviewed" 5)("tags" 9)))))
 
 ;; Update test_meta for all tests
 (define (runs:update-all-test_meta db)
   (let ((test-names (get-all-legal-tests)))
@@ -986,18 +711,18 @@
 	      (test-configf (conc test-path "/testconfig"))
 	      (testexists   (and (file-exists? test-configf)(file-read-access? test-configf)))
 	      ;; read configs with tricks turned off (i.e. no system)
 	      (test-conf    (if testexists (read-config test-configf #f #f)(make-hash-table))))
 	 ;; use the open-run-close instead of passing in db
-	 (runs:update-test_meta #f test-name test-conf)))
+	 (runs:update-test_meta test-name test-conf)))
      test-names)))
 
 ;; This could probably be refactored into one complex query ...
 (define (runs:rollup-run keys keyvallst runname user) ;; was target, now keyvallst
   (debug:print 4 "runs:rollup-run, keys: " keys " keyvallst: " keyvallst " :runname " runname " user: " user)
   (let* ((db              #f) ;; (keyvalllst      (keys:target->keyval keys target))
-	 (new-run-id      (open-run-close runs:register-run db keys keyvallst runname "new" "n/a" user))
+	 (new-run-id      (cdb:remote-run db:register-run #f keys keyvallst runname "new" "n/a" user))
 	 (prev-tests      (open-run-close test:get-matching-previous-test-run-records db new-run-id "%" "%"))
 	 (curr-tests      (open-run-close db:get-tests-for-run db new-run-id "%/%" '() '()))
 	 (curr-tests-hash (make-hash-table)))
     (open-run-close db:update-run-event_time db new-run-id)
     ;; index the already saved tests by testname and itemdat in curr-tests-hash

Index: server.scm
==================================================================
--- server.scm
+++ server.scm
@@ -8,11 +8,12 @@
 ;;  implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
 ;;  PURPOSE.
 
 (require-extension (srfi 18) extras tcp s11n)
 
-(use srfi-1 posix regex regex-case srfi-69 hostinfo md5 message-digest zmq)
+(use srfi-1 posix regex regex-case srfi-69 hostinfo md5 message-digest)
+;; (use zmq)
 
 (use spiffy uri-common intarweb http-client spiffy-request-vars)
 
 (declare (unit server))
 
@@ -19,11 +20,11 @@
 (declare (uses common))
 (declare (uses db))
 (declare (uses tasks)) ;; tasks are where stuff is maintained about what is running.
 (declare (uses synchash))
 (declare (uses http-transport))
-(declare (uses zmq-transport))
+;; (declare (uses zmq-transport))
 (declare (uses daemon))
 
 (include "common_records.scm")
 (include "db_records.scm")
 

Index: tasks.scm
==================================================================
--- tasks.scm
+++ tasks.scm
@@ -21,14 +21,18 @@
 ;;======================================================================
 ;; Tasks db
 ;;======================================================================
 
 (define (tasks:open-db)
-  (let* ((dbpath  (conc *toppath* "/monitor.db"))
-	 (exists  (file-exists? dbpath))
-	 (mdb     (sqlite3:open-database dbpath)) ;; (never-give-up-open-db dbpath))
-	 (handler (make-busy-timeout 36000)))
+  (let* ((dbpath       (conc *toppath* "/monitor.db"))
+	 (exists       (file-exists? dbpath))
+	 (write-access (file-write-access? dbpath))
+	 (mdb          (sqlite3:open-database dbpath)) ;; (never-give-up-open-db dbpath))
+	 (handler      (make-busy-timeout 36000)))
+    (if (and exists
+	     (not write-access))
+	(set! *db-write-access* write-access)) ;; only unset so other db's also can use this control
     (sqlite3:set-busy-handler! mdb handler)
     (sqlite3:execute mdb (conc "PRAGMA synchronous = 0;"))
     (if (not exists)
 	(begin
 	  (sqlite3:execute mdb "CREATE TABLE IF NOT EXISTS tasks_queue (id INTEGER PRIMARY KEY,
@@ -105,19 +109,20 @@
    ))
 
 ;; NB// two servers with same pid on different hosts will be removed from the list if pid: is used!
 (define (tasks:server-deregister mdb hostname #!key (port #f)(pid #f)(action 'markdead))
   (debug:print-info 11 "server-deregister " hostname ", port " port ", pid " pid)
-  (if pid
-      (case action
-	((delete)(sqlite3:execute mdb "DELETE FROM servers WHERE pid=?;" pid))
-	(else    (sqlite3:execute mdb "UPDATE servers SET state='dead' WHERE pid=?;" pid)))
-      (if port
-	  (case action
-	    ((delete)(sqlite3:execute mdb "DELETE FROM servers WHERE  hostname=? AND port=?;" hostname port))
-	    (else    (sqlite3:execute mdb "UPDATE servers SET state='dead' WHERE hostname=? AND port=?;" hostname port)))
-	  (debug:print 0 "ERROR: tasks:server-deregister called with neither pid nor port specified"))))
+  (if *db-write-access*
+      (if pid
+	  (case action
+	    ((delete)(sqlite3:execute mdb "DELETE FROM servers WHERE pid=?;" pid))
+	    (else    (sqlite3:execute mdb "UPDATE servers SET state='dead' WHERE pid=?;" pid)))
+	  (if port
+	      (case action
+		((delete)(sqlite3:execute mdb "DELETE FROM servers WHERE  hostname=? AND port=?;" hostname port))
+		(else    (sqlite3:execute mdb "UPDATE servers SET state='dead' WHERE hostname=? AND port=?;" hostname port)))
+	      (debug:print 0 "ERROR: tasks:server-deregister called with neither pid nor port specified")))))
 
 (define (tasks:server-deregister-self mdb hostname)
   (tasks:server-deregister mdb hostname pid: (current-process-id)))
 
 ;; need a simple call for robustly removing records given host and port
@@ -141,11 +146,18 @@
 	 "SELECT id FROM servers WHERE pid=-999;")))
      (if hostname hostname iface)(if pid pid port))
     res))
 
 (define (tasks:server-update-heartbeat mdb server-id)
-  (sqlite3:execute mdb "UPDATE servers SET heartbeat=strftime('%s','now') WHERE id=?;" server-id))
+  (debug:print-info 0 "Heart beat update of server id=" server-id)
+  (handle-exceptions
+   exn
+   (begin
+     (debug:print 0 "WARNING: probable timeout on monitor.db access")
+     (thread-sleep! 1)
+     (tasks:server-update-heartbeat mdb server-id))
+   (sqlite3:execute mdb "UPDATE servers SET heartbeat=strftime('%s','now') WHERE id=?;" server-id)))
 
 ;; alive servers keep the heartbeat field upto date with seconds every 6 or so seconds
 (define (tasks:server-alive? mdb server-id #!key (iface #f)(hostname #f)(port #f)(pid #f))
   (let* ((server-id  (if server-id 
 			 server-id

Index: tests.scm
==================================================================
--- tests.scm
+++ tests.scm
@@ -51,13 +51,13 @@
       (set! res (string-match (regexp finpatt (if like #t #f)) str))
       (if notpatt (not res) res))))
 
 ;; if itempath is #f then look only at the testname part
 ;;
-(define (tests:match patterns testname itempath)
+(define (tests:match patterns testname itempath #!key (required '()))
   (if (string? patterns)
-      (let ((patts (string-split patterns ",")))
+      (let ((patts (append (string-split patterns ",") required)))
 	(if (null? patts) ;;; no pattern(s) means no match
 	    #f
 	    (let loop ((patt (car patts))
 		       (tal  (cdr patts)))
 	      ;; (print "loop: patt: " patt ", tal " tal)
@@ -244,11 +244,11 @@
       (pop-directory)
       result)))
 
 
 ;; Do not rpc this one, do the underlying calls!!!
-(define (tests:test-set-status! test-id state status comment dat)
+(define (tests:test-set-status! test-id state status comment dat #!key (work-area #f))
   (debug:print-info 4 "tests:test-set-status! test-id=" test-id ", state=" state ", status=" status ", dat=" dat)
   (let* ((db          #f)
 	 (real-status status)
 	 (otherdat    (if dat dat (make-hash-table)))
 	 (testdat     (cdb:get-test-info-by-id *runremote* test-id))
@@ -290,11 +290,11 @@
 	(cdb:test-set-status-state *runremote* test-id real-status state (if waived waived comment)))
     
     ;; if status is "AUTO" then call rollup (note, this one modifies data in test
     ;; run area, it does remote calls under the hood.
     (if (and test-id state status (equal? status "AUTO")) 
-	(db:test-data-rollup #f test-id status))
+	(db:test-data-rollup #f test-id status work-area: work-area))
 
     ;; add metadata (need to do this way to avoid SQL injection issues)
 
     ;; :first_err
     ;; (let ((val (hash-table-ref/default otherdat ":first_err" #f)))
@@ -324,11 +324,12 @@
 			   expected ","
 			   tol      ","
 			   units    ","
 			   dcomment ",," ;; extra comma for status
 			   type     )))
-	    (cdb:remote-run db:csv->test-data #f test-id
+	    ;; This was run remote, don't think that makes sense.
+	    (db:csv->test-data #f test-id
 				dat))))
       
     ;; need to update the top test record if PASS or FAIL and this is a subtest
     (if (not (equal? item-path ""))
 	(cdb:roll-up-pass-fail-counts *runremote* run-id test-name item-path status))
@@ -553,32 +554,41 @@
 	 tdb
 	 "SELECT count(id) FROM test_rundat;")
 	res))
   0)
 
-(define (db:update-central-meta-info db test-id cpuload diskfree minutes num-records uname hostname)
-  (sqlite3:execute db "UPDATE tests SET cpuload=?,diskfree=? WHERE id=?;"
-		   cpuload
-		   diskfree
-		   test-id)
-  (if minutes (sqlite3:execute db "UPDATE tests SET run_duration=? WHERE id=?;" minutes test-id))
-  (if (eq? num-records 0)
-      (sqlite3:execute db "UPDATE tests SET uname=?,host=? WHERE id=?;"
-		       uname hostname test-id)))
-
-(define (test-set-meta-info db test-id run-id testname itemdat minutes)
+(define (tests:update-central-meta-info test-id cpuload diskfree minutes num-records uname hostname)
+  ;; This is a good candidate for threading the requests to enable
+  ;; transactionized write at the server
+  (cdb:tests-update-cpuload-diskfree *runremote* test-id cpuload diskfree)
+  ;; (let ((db (open-db)))
+    ;; (sqlite3:execute db "UPDATE tests SET cpuload=?,diskfree=? WHERE id=?;"
+    ;;     	     cpuload
+    ;;     	     diskfree
+    ;;     	     test-id)
+    (if minutes 
+	(cdb:tests-update-run-duration *runremote* test-id minutes))
+	;; (sqlite3:execute db "UPDATE tests SET run_duration=? WHERE id=?;" minutes test-id))
+    (if (eq? num-records 0)
+	(cdb:tests-update-uname-host *runremote* test-id uname hostname))
+	;;(sqlite3:execute db "UPDATE tests SET uname=?,host=? WHERE id=?;" uname hostname test-id))
+    ;;(sqlite3:finalize! db))
+    )
+  
+(define (tests:set-meta-info db test-id run-id testname itemdat minutes work-area)
   ;; DOES cdb:remote-run under the hood!
-  (let* ((tdb         (db:open-test-db-by-test-id db test-id))
+  (let* ((tdb         (db:open-test-db-by-test-id db test-id work-area: work-area))
 	 (num-records (test:tdb-get-rundat-count tdb))
 	 (cpuload  (get-cpu-load))
 	 (diskfree (get-df (current-directory))))
     (if (eq? (modulo num-records 10) 0) ;; every ten records update central
 	(let ((uname    (get-uname "-srvpio"))
 	      (hostname (get-host-name)))
-	  (cdb:remote-run db:update-central-meta-info db test-id cpuload diskfree minutes num-records uname hostname)))
+	  (tests:update-central-meta-info test-id cpuload diskfree minutes num-records uname hostname)))
     (sqlite3:execute tdb "INSERT INTO test_rundat (update_time,cpuload,diskfree,run_duration) VALUES (strftime('%s','now'),?,?,?);"
-		     cpuload diskfree minutes)))
+		     cpuload diskfree minutes)
+    (sqlite3:finalize! tdb)))
 	  
 ;;======================================================================
 ;; A R C H I V I N G
 ;;======================================================================
 

Index: tests/Makefile
==================================================================
--- tests/Makefile
+++ tests/Makefile
@@ -44,30 +44,32 @@
 
 
 test3 : fullprep
 	cd fullrun;$(MEGATEST) -runtests runfirst -reqtarg ubuntu/nfs/none :runname $(RUNNAME)_b -debug 10
 
-test4 : fullprep
-	cd fullrun;$(MEGATEST) -debug $(DEBUG) -runall -reqtarg ubuntu/nfs/none :runname $(RUNNAME)_b -m "This is a comment specific to a run" -v $(LOGGING)
+test4 : cleanprep
+	@echo "WARNING: No longer running fullprep, test converage may be lessened"
+	cd fullrun;time $(MEGATEST) -debug $(DEBUG) -runtests % -reqtarg ubuntu/nfs/none :runname $(RUNNAME)_b -m "This is a comment specific to a run" -v $(LOGGING)
 
 # NOTE: Only one instance can be a server
-test5 : fullprep
-	cd fullrun;sleep  0;$(MEGATEST) -runtests % -target $(TARGET) :runname $(RUNNAME)_aa -debug $(DEBUG) $(LOGGING) > aa.log 2> aa.log &
-	cd fullrun;sleep 10;$(MEGATEST) -runtests % -target $(TARGET) :runname $(RUNNAME)_ab -debug $(DEBUG) $(LOGGING) > ab.log 2> ab.log &
-	cd fullrun;sleep 10;$(MEGATEST) -runtests % -target $(TARGET) :runname $(RUNNAME)_ac -debug $(DEBUG) $(LOGGING) > ac.log 2> ac.log &
-	cd fullrun;sleep 10;$(MEGATEST) -runtests % -target $(TARGET) :runname $(RUNNAME)_ad -debug $(DEBUG) $(LOGGING) > ad.log 2> ad.log &	
-#	cd fullrun;sleep 10;$(MEGATEST) -runtests % -target $(TARGET) :runname $(RUNNAME)_ae -debug $(DEBUG) $(LOGGING) > ae.log 2> ae.log &	
-#	cd fullrun;sleep 10;$(MEGATEST) -runtests % -target $(TARGET) :runname $(RUNNAME)_af -debug $(DEBUG) $(LOGGING) > af.log 2> af.log &	
+test5 : cleanprep
+	@echo "WARNING: No longer running fullprep, test converage may be lessened"
+	cd fullrun;sleep 0;$(MEGATEST) -runtests % -target $(TARGET) :runname $(RUNNAME)_aa -debug $(DEBUG) $(LOGGING) > aa.log 2> aa.log &
+	cd fullrun;sleep 0;$(MEGATEST) -runtests % -target $(TARGET) :runname $(RUNNAME)_ab -debug $(DEBUG) $(LOGGING) > ab.log 2> ab.log &
+	cd fullrun;sleep 0;$(MEGATEST) -runtests % -target $(TARGET) :runname $(RUNNAME)_ac -debug $(DEBUG) $(LOGGING) > ac.log 2> ac.log &
+	cd fullrun;sleep 0;$(MEGATEST) -runtests % -target $(TARGET) :runname $(RUNNAME)_ad -debug $(DEBUG) $(LOGGING) > ad.log 2> ad.log &	
+#	cd fullrun;sleep 0;$(MEGATEST) -runtests % -target $(TARGET) :runname $(RUNNAME)_ae -debug $(DEBUG) $(LOGGING) > ae.log 2> ae.log &	
+#	cd fullrun;sleep 0;$(MEGATEST) -runtests % -target $(TARGET) :runname $(RUNNAME)_af -debug $(DEBUG) $(LOGGING) > af.log 2> af.log &	
 
 test6: fullprep
 	cd fullrun;$(MEGATEST) -runtests runfirst -testpatt %/1 -reqtarg ubuntu/nfs/none :runname $(RUNNAME)_itempatt -v
 	cd fullrun;$(MEGATEST) -runtests runfirst -testpatt %blahha% -reqtarg ubuntu/nfs/none :runname $(RUNNAME)_itempatt -debug 10
 	cd fullrun;$(MEGATEST) -rollup :runname newrun -target ubuntu/nfs/none -debug 10
 
 
 cleanprep : ../*.scm Makefile */*.config
-	mkdir -p /tmp/mt_runs /tmp/mt_links
+	mkdir -p fullrun/tmp/mt_runs fullrun/tmp/mt_links
 	cd ..;make;make install
 	rm -f */logging.db
 	touch cleanprep
 
 fullprep : cleanprep

ADDED   tests/fdktestqa/testqa/Makefile
Index: tests/fdktestqa/testqa/Makefile
==================================================================
--- /dev/null
+++ tests/fdktestqa/testqa/Makefile
@@ -0,0 +1,25 @@
+BINDIR=$(PWD)/../../../bin
+MEGATEST=$(BINDIR)/megatest
+DASHBOARD=$(BINDIR)/dashboard
+all :
+	$(MEGATEST) -runtests % -target a/b :runname c
+
+bigbig :
+	for tn in a b c d;do \
+	   ($(MEGATEST) -runtests % -target a/b :runname $tn & ) ; \
+	done
+
+bigrun :
+	$(MEGATEST) -runtests bigrun -target a/bigrun :runname a
+
+bigrun2 :
+	$(MEGATEST) -runtests bigrun2 -target a/bigrun2 :runname a
+
+dashboard : 
+	$(DASHBOARD) -rows 20 &
+
+compile :
+	(cd ../../..;make && make install)
+
+clean :
+	rm -rf ../simple*/*/* megatest.db

Index: tests/fdktestqa/testqa/megatest.config
==================================================================
--- tests/fdktestqa/testqa/megatest.config
+++ tests/fdktestqa/testqa/megatest.config
@@ -1,5 +1,8 @@
 [setup]
-testcopycmd cp --remove-destination -rlv TEST_SRC_PATH/. TEST_TARG_PATH/.
+testcopycmd cp --remove-destination -rlv TEST_SRC_PATH/. TEST_TARG_PATH/. >> TEST_TARG_PATH/mt_launch.log 2>> TEST_TARG_PATH/mt_launch.log
+runqueue 2
 
 [include ../fdk.config]
 
+[server]
+timeout 0.01

ADDED   tests/fdktestqa/testqa/runsuite.sh
Index: tests/fdktestqa/testqa/runsuite.sh
==================================================================
--- /dev/null
+++ tests/fdktestqa/testqa/runsuite.sh
@@ -0,0 +1,18 @@
+#!/bin/bash
+
+(cd ../../..;make && make install) || exit 1
+export PATH=$PWD/../../../bin:$PATH
+
+for i in a b c d e f;do
+  # g h i j k l m n o p q r s t u v w x y z;do
+  megatest -runtests % -target a/b :runname $i &
+done
+
+echo "" > num-running.log
+while true; do
+  foo=`megatest -list-runs % | grep RUNNING | wc -l`
+  echo "Num running at `date` $foo"
+  echo "$foo at `date`" >> num-running.log
+  # to make the test go at a reasonable clip only gather this info ever minute
+  sleep 1m
+done

Index: tests/fdktestqa/testqa/tests/bigrun/step1.sh
==================================================================
--- tests/fdktestqa/testqa/tests/bigrun/step1.sh
+++ tests/fdktestqa/testqa/tests/bigrun/step1.sh
@@ -1,3 +1,8 @@
 #!/bin/sh
-sleep 10
+if [ $NUMBER -lt 200 ];then 
+   sleep $NUMBER
+else
+   sleep 200
+fi
+
 exit 0

Index: tests/fdktestqa/testqa/tests/bigrun/testconfig
==================================================================
--- tests/fdktestqa/testqa/tests/bigrun/testconfig
+++ tests/fdktestqa/testqa/tests/bigrun/testconfig
@@ -7,11 +7,11 @@
 # waiton setup
 priority 0
 
 # Iteration for your tests are controlled by the items section
 [items]
-NUMBER #{scheme (string-intersperse (map number->string (sort (let loop ((a 0)(res '()))(if (< a 120)(loop (+ a 1)(cons a res)) res)) >)) " ")}
+NUMBER #{scheme (string-intersperse (map number->string (sort (let loop ((a 0)(res '()))(if (< a (or (any->number (get-environment-variable "NUMTESTS")) 1100))(loop (+ a 1)(cons a res)) res)) >)) " ")}
 
 # test_meta is a section for storing additional data on your test
 [test_meta]
 author matt
 owner  matt

ADDED   tests/fdktestqa/testqa/tests/bigrun2/step1.sh
Index: tests/fdktestqa/testqa/tests/bigrun2/step1.sh
==================================================================
--- /dev/null
+++ tests/fdktestqa/testqa/tests/bigrun2/step1.sh
@@ -0,0 +1,9 @@
+#!/bin/sh
+# prev_test=`$MT_MEGATEST -test-paths -target $MT_TARGET :runname $MT_RUNNAME -testpatt bigrun/$NUMBER`
+# if [ -e $prev_test/testconfig ]; then
+#   exit 0
+# else
+#   exit 1
+# fi
+
+exit 0

ADDED   tests/fdktestqa/testqa/tests/bigrun2/testconfig
Index: tests/fdktestqa/testqa/tests/bigrun2/testconfig
==================================================================
--- /dev/null
+++ tests/fdktestqa/testqa/tests/bigrun2/testconfig
@@ -0,0 +1,22 @@
+# Add additional steps here. Format is "stepname script"
+[ezsteps]
+step1 step1.sh
+
+# Test requirements are specified here
+[requirements]
+# waiton bigrun
+priority 0
+mode itemmatch
+
+
+# Iteration for your tests are controlled by the items section
+[items]
+NUMBER #{scheme (string-intersperse (map number->string (sort (let loop ((a 0)(res '()))(if (< a 1500)(loop (+ a 1)(cons a res)) res)) >)) " ")}
+
+# test_meta is a section for storing additional data on your test
+[test_meta]
+author matt
+owner  matt
+description An example test
+tags tagone,tagtwo
+reviewed never

ADDED   tests/fslsync/megatest.config
Index: tests/fslsync/megatest.config
==================================================================
--- /dev/null
+++ tests/fslsync/megatest.config
@@ -0,0 +1,20 @@
+[fields]
+YEAR    TEXT
+WEEKNUM TEXT
+DAY     TEXT
+
+[setup]
+# Adjust max_concurrent_jobs to limit how much you load your machines
+max_concurrent_jobs 50
+
+# This is your link path, you can move it but it is generally better to keep it stable
+linktree #{shell readlink -f #{getenv MT_RUN_AREA_HOME}/fslsynclinks}
+
+# Job tools are more advanced ways to control how your jobs are launched
+[jobtools]
+useshell yes
+launcher nbfind
+
+# As you run more tests you may need to add additional disks, the names are arbitrary but must be unique
+[disks]
+disk0 #{shell readlink -f #{getenv MT_RUN_AREA_HOME}/fslsyncruns}

ADDED   tests/fslsync/runconfigs.config
Index: tests/fslsync/runconfigs.config
==================================================================
--- /dev/null
+++ tests/fslsync/runconfigs.config
@@ -0,0 +1,5 @@
+[default]
+WORKAREA /tmp/#{getenv USER}/fslsync
+FSLSAREA /tmp/#{getenv USER}/fsls
+AREANAMES code data
+SITENAMES #{shell cat $MT_RUN_AREA_HOME/sites.dat}

ADDED   tests/fslsync/sites.dat.template
Index: tests/fslsync/sites.dat.template
==================================================================
--- /dev/null
+++ tests/fslsync/sites.dat.template
@@ -0,0 +1,1 @@
+site1

ADDED   tests/fslsync/tests/setup/mkdirs.logpro
Index: tests/fslsync/tests/setup/mkdirs.logpro
==================================================================
--- /dev/null
+++ tests/fslsync/tests/setup/mkdirs.logpro
@@ -0,0 +1,8 @@
+;; You should have at least one expect:required. This ensures that your process ran
+(expect:required in "LogFileBody" > 0 "done" #/done/)
+
+;; You may need ignores to suppress false error or warning hits from the later expects
+;; NOTE: Order is important here!
+(expect:ignore   in "LogFileBody"  < 99 "Ignore the word error in comments" #/^\/\/.*error/)
+(expect:warning  in "LogFileBody"  = 0 "Any warning" #/warn/)
+(expect:error    in "LogFileBody"  = 0 "Any error"  (list #/ERROR/ #/error/)) ;; but disallow any other errors

ADDED   tests/fslsync/tests/setup/mkdirs.sh
Index: tests/fslsync/tests/setup/mkdirs.sh
==================================================================
--- /dev/null
+++ tests/fslsync/tests/setup/mkdirs.sh
@@ -0,0 +1,11 @@
+#!/usr/bin/env bash
+
+# Create needed directories both local and remote
+
+# Remote
+ssh $SITENAME mkdir -vp $WORKAREA/$SITENAME/$AREANAME
+
+# Local
+mkdir -vp $WORKAREA/$SITENAME/$AREANAME
+
+echo done

ADDED   tests/fslsync/tests/setup/seedcache.logpro
Index: tests/fslsync/tests/setup/seedcache.logpro
==================================================================
--- /dev/null
+++ tests/fslsync/tests/setup/seedcache.logpro
@@ -0,0 +1,8 @@
+;; You should have at least one expect:required. This ensures that your process ran
+(expect:required in "LogFileBody" > 0 "done" #/done/)
+
+;; You may need ignores to suppress false error or warning hits from the later expects
+;; NOTE: Order is important here!
+(expect:ignore   in "LogFileBody"  < 99 "Ignore the word error in comments" #/^\/\/.*error/)
+(expect:warning  in "LogFileBody"  = 0 "Any warning" #/warn/)
+(expect:error    in "LogFileBody"  = 0 "Any error"  (list #/ERROR/ #/error/)) ;; but disallow any other errors

ADDED   tests/fslsync/tests/setup/seedcache.sh
Index: tests/fslsync/tests/setup/seedcache.sh
==================================================================
--- /dev/null
+++ tests/fslsync/tests/setup/seedcache.sh
@@ -0,0 +1,6 @@
+#!/usr/bin/env bash
+
+# Copy any non-existant files to the cache before doing the rsync 
+# in the hopes of saving some time.
+
+echo done

ADDED   tests/fslsync/tests/setup/testconfig
Index: tests/fslsync/tests/setup/testconfig
==================================================================
--- /dev/null
+++ tests/fslsync/tests/setup/testconfig
@@ -0,0 +1,21 @@
+# Add additional steps here. Format is "stepname script"
+[ezsteps]
+mkdirs     mkdirs.sh
+seedcache  seedcache.sh
+
+# Test requirements are specified here
+[requirements]
+priority 0
+
+# Iteration for your tests are controlled by the items section
+[items]
+AREANAME #{getenv AREANAMES}
+SITENAME #{getenv SITENAMES}
+
+# test_meta is a section for storing additional data on your test
+[test_meta]
+author matt
+owner  matt
+description Setup needed directories and seed the caches
+tags tagone,tagtwo
+reviewed never

ADDED   tests/fslsync/tests/sync/fsync.logpro
Index: tests/fslsync/tests/sync/fsync.logpro
==================================================================
--- /dev/null
+++ tests/fslsync/tests/sync/fsync.logpro
@@ -0,0 +1,8 @@
+;; You should have at least one expect:required. This ensures that your process ran
+(expect:required in "LogFileBody" > 0 "done" #/done/)
+
+;; You may need ignores to suppress false error or warning hits from the later expects
+;; NOTE: Order is important here!
+(expect:ignore   in "LogFileBody"  < 99 "Ignore the word error in comments" #/^\/\/.*error/)
+(expect:warning  in "LogFileBody"  = 0 "Any warning" #/warn/)
+(expect:error    in "LogFileBody"  = 0 "Any error"  (list #/ERROR/ #/error/)) ;; but disallow any other errors

ADDED   tests/fslsync/tests/sync/fsync.sh
Index: tests/fslsync/tests/sync/fsync.sh
==================================================================
--- /dev/null
+++ tests/fslsync/tests/sync/fsync.sh
@@ -0,0 +1,33 @@
+#!/usr/bin/env bash
+
+# Get the list of fossils from the cache
+
+FILES=$(ls $FSLSAREA/$AREANAME|grep fossil)
+
+# Do the remote sync from CACHE to FOSSILS
+ssh $SITENAME /bin/bash <<EOF
+for f in $FILES;do
+    FOSSLF=$FSLSAREA/$AREANAME/\$f
+    CACHEF=$WORKAREA/$SITENAME/
+    if [ ! -e \$FOSSLF ];then
+	cp \$CACHEF \$FOSSLF
+	chmod ug+rw \$FOSSLF
+    elif [ \$CACHEF -nt \$FOSSLF ];then
+	fossil pull -R \$FOSSLF \$CACHEF
+    fi
+done
+EOF 
+
+# Do the local sync 
+for f in $FILES;do
+    FOSSLF=$FSLSAREA/$AREANAME/\$f
+    CACHEF=$WORKAREA/$SITENAME/
+    if [ ! -e \$FOSSLF ];then
+	cp \$CACHEF \$FOSSLF
+	chmod ug+rw \$FOSSLF
+    elif [ \$CACHEF -nt \$FOSSLF ];then
+	fossil pull -R \$FOSSLF \$CACHEF
+    fi
+done
+
+echo done

ADDED   tests/fslsync/tests/sync/rsync.logpro
Index: tests/fslsync/tests/sync/rsync.logpro
==================================================================
--- /dev/null
+++ tests/fslsync/tests/sync/rsync.logpro
@@ -0,0 +1,8 @@
+;; You should have at least one expect:required. This ensures that your process ran
+(expect:required in "LogFileBody" > 0 "done" #/done/)
+
+;; You may need ignores to suppress false error or warning hits from the later expects
+;; NOTE: Order is important here!
+(expect:ignore   in "LogFileBody"  < 99 "Ignore the word error in comments" #/^\/\/.*error/)
+(expect:warning  in "LogFileBody"  = 0 "Any warning" #/warn/)
+(expect:error    in "LogFileBody"  = 0 "Any error"  (list #/ERROR/ #/error/)) ;; but disallow any other errors

ADDED   tests/fslsync/tests/sync/rsync.sh
Index: tests/fslsync/tests/sync/rsync.sh
==================================================================
--- /dev/null
+++ tests/fslsync/tests/sync/rsync.sh
@@ -0,0 +1,11 @@
+#!/usr/bin/env bash
+
+# Sync to remote cache
+rsync -avz $FSLSAREA/$AREANAME/ $SITENAME:$WORKAREA/$SITENAME/$AREANAME/ &
+# Sync to local cache
+rsync -avz $SITENAME:$FSLSAREA/$AREANAME/ $WORKAREA/$SITENAME/$AREANAME/ &
+
+# Wait until rsyncs complete
+wait
+
+echo done

ADDED   tests/fslsync/tests/sync/testconfig
Index: tests/fslsync/tests/sync/testconfig
==================================================================
--- /dev/null
+++ tests/fslsync/tests/sync/testconfig
@@ -0,0 +1,22 @@
+# Add additional steps here. Format is "stepname script"
+[ezsteps]
+rsync rsync.sh
+fsync fsync.sh
+
+# Test requirements are specified here
+[requirements]
+waiton setup
+priority 0
+
+# Iteration for your tests are controlled by the items section
+[items]
+AREANAME #{getenv AREANAMES}
+SITENAME #{getenv SITENAMES}
+
+# test_meta is a section for storing additional data on your test
+[test_meta]
+author matt
+owner  matt
+description Sync fossils to remote
+tags tagone,tagtwo
+reviewed never

Index: tests/fullrun/config/mt_include_1.config
==================================================================
--- tests/fullrun/config/mt_include_1.config
+++ tests/fullrun/config/mt_include_1.config
@@ -1,8 +1,8 @@
 [setup]
 # exectutable /path/to/megatest
-max_concurrent_jobs 200
+max_concurrent_jobs 150
 
 linktree #{getenv MT_RUN_AREA_HOME}/tmp/mt_links
 
 [jobtools]
 useshell yes

Index: tests/fullrun/megatest.config
==================================================================
--- tests/fullrun/megatest.config
+++ tests/fullrun/megatest.config
@@ -9,16 +9,24 @@
 area1 /tmp/oldarea/megatest
 
 [include config/mt_include_1.config]
 
 [setup]
+# Set launchwait to yes to use the old launch run code that waits for the launch process to return before 
+# proceeding.
+# launchwait yes
+
+# If defined the runs:run-tests-queue-new queue code is used with the register test depth
+# given. Otherwise the old code is used. The old code will be removed in the future and 
+# a default of 10 used.
+# runqueue 2
 
 # It is possible (but not recommended) to override the rsync command used
 # to populate the test directories. For test development the following 
 # example can be useful
 #
-testcopycmd cp --remove-destination -rsv TEST_SRC_PATH/. TEST_TARG_PATH/.
+# testcopycmd cp --remove-destination -rsv TEST_SRC_PATH/. TEST_TARG_PATH/. >> TEST_TARG_PATH/mt_launch.log 2>> TEST_TARG_PATH/mt_launch.log
 
 # or for hard links
 
 # testcopycmd cp --remove-destination -rlv TEST_SRC_PATH/. TEST_TARG_PATH/.
 
@@ -49,10 +57,14 @@
 WACKYVAR6 #{scheme (args:get-arg "-target")}
 PREDICTABLE the_ans
 MRAH MT_RUN_AREA_HOME=#{getenv MT_RUN_AREA_HOME}
 # The empty var should have a definition with null string
 EMPTY_VAR
+
+WRAPPEDVAR This var should have the work blah thrice: \
+blah \
+blah
 
 # XTERM   [system xterm]
 # RUNDEAD [system exit 56]
 
 [server]
@@ -61,11 +73,11 @@
 # it succeeds
 port 8080
 
 # This server will keep running this number of hours after last access. 
 # Three minutes is 0.05 hours
-timeout 0.05
+timeout 0.025
 
 ## disks are:
 ## name host:/path/to/area
 ## -or-
 ## name /path/to/area

Index: tests/fullrun/runconfigs.config
==================================================================
--- tests/fullrun/runconfigs.config
+++ tests/fullrun/runconfigs.config
@@ -1,5 +1,8 @@
+[default]
+SOMEVAR This should show up in SOMEVAR3
+
 [include #{getenv MT_RUN_AREA_HOME}/common_runconfigs.config]
 
 # #{system echo 'VACKYVAR #{shell pwd}' > $MT_RUN_AREA_HOME/config/$USER.config}
 [include ./config/#{getenv USER}.config]
 
@@ -9,5 +12,11 @@
 [default/ubuntu/nfs]
 WACKYVAR2 #{runconfigs-get CURRENT}
 
 [ubuntu/nfs/none]
 WACKYVAR2 #{runconfigs-get CURRENT}
+SOMEVAR2  This should show up in SOMEVAR4 if the target is ubuntu/nfs/none
+
+[default]
+SOMEVAR3 #{rget SOMEVAR}
+SOMEVAR4 #{rget SOMEVAR2}
+SOMEVAR5 #{runconfigs-get SOMEVAR2}

ADDED   utils/example-launch-dispatcher.scm
Index: utils/example-launch-dispatcher.scm
==================================================================
--- /dev/null
+++ utils/example-launch-dispatcher.scm
@@ -0,0 +1,12 @@
+
+(let ((target (assoc 
+	       ;; Put the variable name here, note: only *one* '
+	       ;; 'TARGET_OS
+	       'MANYITEMS
+	       (read (open-input-string (get-environment-variable "MT_ITEM_INFO"))))))
+  (case (if target target 'var-undef)
+    ((suse)      (system "echo suse-launcher.pl"))
+    ((redhat)    (system "echo red-hat-launcher.pl"))
+    ((af)        (system "echo Got af"))
+    ((var-undef) (system "echo Variable not in MT_ITEM_INFO list"))
+    (else        (system "echo normal-launcher.pl"))))

Index: utils/installall.sh
==================================================================
--- utils/installall.sh
+++ utils/installall.sh
@@ -164,137 +164,137 @@
 # CSC_OPTIONS="-I$PREFIX/include -L$CSCLIBS" $CHICKEN_INSTALL $PROX -D no-library-checks -feature disable-iup-web -deploy -prefix $DEPLOYTARG iup
 # iup:1.0.2 
 CSC_OPTIONS="-I$PREFIX/include -L$CSCLIBS" $CHICKEN_INSTALL $PROX -D no-library-checks canvas-draw
 # CSC_OPTIONS="-I$PREFIX/include -L$CSCLIBS" $CHICKEN_INSTALL $PROX -D no-library-checks -deploy -prefix $DEPLOYTARG canvas-draw
 
-#======================================================================
-# Note uuid needed only for zmq 2.x series
-#======================================================================
-
-# http://download.zeromq.org/zeromq-3.2.1-rc2.tar.gz
-# zpatchlev=-rc2
-# http://download.zeromq.org/zeromq-2.2.0.tar.gz
-
-if [[ -e /usr/lib/libzmq.so ]]; then
-  echo "Using system installed zmq library"
-  $CHICKEN_INSTALL zmq
-else
-ZEROMQ=zeromq-2.2.0
-# ZEROMQ=zeromq-3.2.2
-
-# wget http://www.kernel.org/pub/linux/utils/util-linux/v2.22/util-linux-2.22.tar.gz
-UTIL_LINUX=2.21
-# UTIL_LINUX=2.20.1
-if ! [[ -e util-linux-${UTIL_LINUX}.tar.gz ]] ; then
-    # wget http://www.kiatoa.com/matt/util-linux-2.20.1.tar.gz
-    wget http://www.kernel.org/pub/linux/utils/util-linux/v${UTIL_LINUX}/util-linux-${UTIL_LINUX}.tar.gz
-fi
-
-if [[ -e util-linux-${UTIL_LINUX}.tar.gz ]] ; then
-    tar xfz util-linux-${UTIL_LINUX}.tar.gz
-    cd util-linux-${UTIL_LINUX}
-    mkdir -p build
-    cd build
-    if [[ $UTIL_LINUX = "2.22" ]] ; then
-    ../configure --prefix=$PREFIX \
---enable-shared                   \
---disable-use-tty-group		  \
---disable-makeinstall-chown       \
---disable-makeinstall-setuid      \
---disable-libtool-lock		  \
---disable-login			  \
---disable-sulogin		  \
---disable-su			  \
---disable-schedutils		  \
---disable-libmount		  \
---disable-mount			  \
---disable-losetup		  \
---disable-fsck			  \
---disable-partx			  \
---disable-mountpoint		  \
---disable-fallocate		  \
---disable-unshare		  \
---disable-eject			  \
---disable-agetty		  \
---disable-cramfs		  \
---disable-switch_root		  \
---disable-pivot_root		  \
---disable-kill			  \
---disable-libblkid		  \
---disable-utmpdump		  \
---disable-rename		  \
---disable-chsh-only-listed	  \
---disable-wall			  \
---disable-pg-bell		  \
---disable-require-password	  \
---disable-libtool-lock		  \
---disable-nls			  \
---disable-dmesg                   \
---without-ncurses                 
-    else
-      ../configure --prefix=$PREFIX \
-  --enable-shared         \
-  --disable-mount         \
-  --disable-fsck          \
-  --disable-partx         \
-  --disable-largefile     \
-  --disable-tls           \
-  --disable-libmount      \
-  --disable-mountpoint    \
-  --disable-nls           \
-  --disable-rpath         \
-  --disable-agetty        \
-  --disable-cramfs        \
-  --disable-switch_root   \
-  --disable-pivot_root    \
-  --disable-fallocate     \
-  --disable-unshare       \
-  --disable-rename        \
-  --disable-schedutils    \
-  --disable-libblkid      \
-  --disable-wall CFLAGS='-fPIC'
-
-#  --disable-makeinstall-chown \
-#  --disable-makeinstall-setuid \
-
-#   --disable-chsh-only-listed
-#   --disable-pg-bell       let pg not ring the bell on invalid keys
-#   --disable-require-password
-#   --disable-use-tty-group do not install wall and write setgid tty
-#   --disable-makeinstall-chown
-#   --disable-makeinstall-setuid
-    fi
-    
-    (cd libuuid;make install)
-    # make
-    # make install
-    cp $PREFIX/include/uuid/uuid.h $PREFIX/include/uuid.h
-fi
-
-
-cd $BUILDHOME
-
-if ! [[ -e ${ZEROMQ}${zpatchlev}.tar.gz ]] ; then
-    wget http://download.zeromq.org/${ZEROMQ}${zpatchlev}.tar.gz
-fi
-
-if [[ -e ${ZEROMQ}${zpatchlev}.tar.gz ]] ; then
-    tar xfz ${ZEROMQ}.tar.gz
-    cd ${ZEROMQ}
-    ln -s $PREFIX/include/uuid src
-    # LDFLAGS=-L$PREFIX/lib ./configure --prefix=$PREFIX 
-    
-    ./configure --enable-static --prefix=$PREFIX --with-uuid=$PREFIX LDFLAGS="-L$PREFIX/lib" CPPFLAGS="-fPIC -I$PREFIX/include" LIBS="-lgcc"
-    # --disable-shared CPPFLAGS="-fPIC 
-    # LDFLAGS="-L/usr/lib64 -L$PREFIX/lib" ./configure --enable-static --prefix=$PREFIX 
-    make
-    make install
-    CSC_OPTIONS="-I$PREFIX/include -L$CSCLIBS" $CHICKEN_INSTALL $PROX zmq
-    # CSC_OPTIONS="-I$PREFIX/include -L$CSCLIBS" $CHICKEN_INSTALL $PROX -deploy -prefix $DEPLOYTARG zmq
-fi
-fi # if zmq is in /usr/lib
-
+# disabled zmq # #======================================================================
+# disabled zmq # # Note uuid needed only for zmq 2.x series
+# disabled zmq # #======================================================================
+# disabled zmq # 
+# disabled zmq # # http://download.zeromq.org/zeromq-3.2.1-rc2.tar.gz
+# disabled zmq # # zpatchlev=-rc2
+# disabled zmq # # http://download.zeromq.org/zeromq-2.2.0.tar.gz
+# disabled zmq # 
+# disabled zmq # if [[ -e /usr/lib/libzmq.so ]]; then
+# disabled zmq #   echo "Using system installed zmq library"
+# disabled zmq #   $CHICKEN_INSTALL zmq
+# disabled zmq # else
+# disabled zmq # ZEROMQ=zeromq-2.2.0
+# disabled zmq # # ZEROMQ=zeromq-3.2.2
+# disabled zmq # 
+# disabled zmq # # wget http://www.kernel.org/pub/linux/utils/util-linux/v2.22/util-linux-2.22.tar.gz
+# disabled zmq # UTIL_LINUX=2.21
+# disabled zmq # # UTIL_LINUX=2.20.1
+# disabled zmq # if ! [[ -e util-linux-${UTIL_LINUX}.tar.gz ]] ; then
+# disabled zmq #     # wget http://www.kiatoa.com/matt/util-linux-2.20.1.tar.gz
+# disabled zmq #     wget http://www.kernel.org/pub/linux/utils/util-linux/v${UTIL_LINUX}/util-linux-${UTIL_LINUX}.tar.gz
+# disabled zmq # fi
+# disabled zmq # 
+# disabled zmq # if [[ -e util-linux-${UTIL_LINUX}.tar.gz ]] ; then
+# disabled zmq #     tar xfz util-linux-${UTIL_LINUX}.tar.gz
+# disabled zmq #     cd util-linux-${UTIL_LINUX}
+# disabled zmq #     mkdir -p build
+# disabled zmq #     cd build
+# disabled zmq #     if [[ $UTIL_LINUX = "2.22" ]] ; then
+# disabled zmq #     ../configure --prefix=$PREFIX \
+# disabled zmq # --enable-shared                   \
+# disabled zmq # --disable-use-tty-group		  \
+# disabled zmq # --disable-makeinstall-chown       \
+# disabled zmq # --disable-makeinstall-setuid      \
+# disabled zmq # --disable-libtool-lock		  \
+# disabled zmq # --disable-login			  \
+# disabled zmq # --disable-sulogin		  \
+# disabled zmq # --disable-su			  \
+# disabled zmq # --disable-schedutils		  \
+# disabled zmq # --disable-libmount		  \
+# disabled zmq # --disable-mount			  \
+# disabled zmq # --disable-losetup		  \
+# disabled zmq # --disable-fsck			  \
+# disabled zmq # --disable-partx			  \
+# disabled zmq # --disable-mountpoint		  \
+# disabled zmq # --disable-fallocate		  \
+# disabled zmq # --disable-unshare		  \
+# disabled zmq # --disable-eject			  \
+# disabled zmq # --disable-agetty		  \
+# disabled zmq # --disable-cramfs		  \
+# disabled zmq # --disable-switch_root		  \
+# disabled zmq # --disable-pivot_root		  \
+# disabled zmq # --disable-kill			  \
+# disabled zmq # --disable-libblkid		  \
+# disabled zmq # --disable-utmpdump		  \
+# disabled zmq # --disable-rename		  \
+# disabled zmq # --disable-chsh-only-listed	  \
+# disabled zmq # --disable-wall			  \
+# disabled zmq # --disable-pg-bell		  \
+# disabled zmq # --disable-require-password	  \
+# disabled zmq # --disable-libtool-lock		  \
+# disabled zmq # --disable-nls			  \
+# disabled zmq # --disable-dmesg                   \
+# disabled zmq # --without-ncurses                 
+# disabled zmq #     else
+# disabled zmq #       ../configure --prefix=$PREFIX \
+# disabled zmq #   --enable-shared         \
+# disabled zmq #   --disable-mount         \
+# disabled zmq #   --disable-fsck          \
+# disabled zmq #   --disable-partx         \
+# disabled zmq #   --disable-largefile     \
+# disabled zmq #   --disable-tls           \
+# disabled zmq #   --disable-libmount      \
+# disabled zmq #   --disable-mountpoint    \
+# disabled zmq #   --disable-nls           \
+# disabled zmq #   --disable-rpath         \
+# disabled zmq #   --disable-agetty        \
+# disabled zmq #   --disable-cramfs        \
+# disabled zmq #   --disable-switch_root   \
+# disabled zmq #   --disable-pivot_root    \
+# disabled zmq #   --disable-fallocate     \
+# disabled zmq #   --disable-unshare       \
+# disabled zmq #   --disable-rename        \
+# disabled zmq #   --disable-schedutils    \
+# disabled zmq #   --disable-libblkid      \
+# disabled zmq #   --disable-wall CFLAGS='-fPIC'
+# disabled zmq # 
+# disabled zmq # #  --disable-makeinstall-chown \
+# disabled zmq # #  --disable-makeinstall-setuid \
+# disabled zmq # 
+# disabled zmq # #   --disable-chsh-only-listed
+# disabled zmq # #   --disable-pg-bell       let pg not ring the bell on invalid keys
+# disabled zmq # #   --disable-require-password
+# disabled zmq # #   --disable-use-tty-group do not install wall and write setgid tty
+# disabled zmq # #   --disable-makeinstall-chown
+# disabled zmq # #   --disable-makeinstall-setuid
+# disabled zmq #     fi
+# disabled zmq #     
+# disabled zmq #     (cd libuuid;make install)
+# disabled zmq #     # make
+# disabled zmq #     # make install
+# disabled zmq #     cp $PREFIX/include/uuid/uuid.h $PREFIX/include/uuid.h
+# disabled zmq # fi
+# disabled zmq # 
+# disabled zmq # 
+# disabled zmq # cd $BUILDHOME
+# disabled zmq # 
+# disabled zmq # if ! [[ -e ${ZEROMQ}${zpatchlev}.tar.gz ]] ; then
+# disabled zmq #     wget http://download.zeromq.org/${ZEROMQ}${zpatchlev}.tar.gz
+# disabled zmq # fi
+# disabled zmq # 
+# disabled zmq # if [[ -e ${ZEROMQ}${zpatchlev}.tar.gz ]] ; then
+# disabled zmq #     tar xfz ${ZEROMQ}.tar.gz
+# disabled zmq #     cd ${ZEROMQ}
+# disabled zmq #     ln -s $PREFIX/include/uuid src
+# disabled zmq #     # LDFLAGS=-L$PREFIX/lib ./configure --prefix=$PREFIX 
+# disabled zmq #     
+# disabled zmq #     ./configure --enable-static --prefix=$PREFIX --with-uuid=$PREFIX LDFLAGS="-L$PREFIX/lib" CPPFLAGS="-fPIC -I$PREFIX/include" LIBS="-lgcc"
+# disabled zmq #     # --disable-shared CPPFLAGS="-fPIC 
+# disabled zmq #     # LDFLAGS="-L/usr/lib64 -L$PREFIX/lib" ./configure --enable-static --prefix=$PREFIX 
+# disabled zmq #     make
+# disabled zmq #     make install
+# disabled zmq #     CSC_OPTIONS="-I$PREFIX/include -L$CSCLIBS" $CHICKEN_INSTALL $PROX zmq
+# disabled zmq #     # CSC_OPTIONS="-I$PREFIX/include -L$CSCLIBS" $CHICKEN_INSTALL $PROX -deploy -prefix $DEPLOYTARG zmq
+# disabled zmq # fi
+# disabled zmq # fi # if zmq is in /usr/lib
+# disabled zmq # 
 cd $BUILDHOME  
 
 ## WEBKIT=WebKit-r131972
 ## if  ! [[ -e ${WEBKIT}.tar.bz2 ]] ; then
 ##    #    http://builds.nightly.webkit.org/files/trunk/src/WebKit-r131972.tar.bz2

Index: utils/mk_wrapper
==================================================================
--- utils/mk_wrapper
+++ utils/mk_wrapper
@@ -3,11 +3,14 @@
 prefix=$1
 cmd=$2
 
 echo "#!/bin/bash"
 if [ "$LD_LIBRARY_PATH" != "" ];then
+  echo "INFO: Using LD_LIBRARY_PATH=$LD_LIBRARY_PATH" >&2
   echo "export LD_LIBRARY_PATH=$LD_LIBRARY_PATH"
+else
+  echo "INFO: LD_LIBRARY_PATH not set" >&2
 fi
 
 fullcmd="$prefix/bin/$cmd"
 
 echo "$fullcmd \"\$@\""