Index: Makefile
==================================================================
--- Makefile
+++ Makefile
@@ -179,18 +179,21 @@
 mofiles/commonmod.o : megatest-fossil-hash.scm mofiles/stml2.o \
                       mofiles/mtargs.o mofiles/pkts.o mofiles/mtconfigf.o \
                       mofiles/processmod.o
 mofiles/pgdbmod.o   : mofiles/commonmod.o
 mofiles/dbmod.o     : mofiles/commonmod.o mofiles/keysmod.o \
-                      mofiles/tasksmod.o mofiles/odsmod.o mofiles/testsmod.o
+                      mofiles/tasksmod.o mofiles/odsmod.o
 mofiles/tasksmod.o  : mofiles/commonmod.o mofiles/pgdbmod.o
 mofiles/rmtmod.o    : mofiles/commonmod.o \
                       mofiles/apimod.o mofiles/ulex.o mofiles/itemsmod.o
 mofiles/apimod.o    : mofiles/dbmod.o mofiles/commonmod.o mofiles/servermod.o
-mofiles/runsmod.o   : mofiles/testsmod.o
-mofiles/mtmod.o     : mofiles/mtconfigf.o mofiles/rmtmod.o
+mofiles/runsmod.o   : mofiles/testsmod.o mofiles/mtmod.o
+mofiles/mtmod.o     : mofiles/mtconfigf.o mofiles/rmtmod.o mofiles/tasksmod.o \
+                      mofiles/dbmod.o mofiles/pgdbmod.o mofiles/launchmod.o
 mofiles/servermod.o : mofiles/commonmod.o mofiles/dbmod.o
+mofiles/testsmod.o  : mofiles/servermod.o mofiles/dbmod.o
+mofiles/launchmod.o : mofiles/subrunmod.o
 
 # Removed from megamod.o dep:   mofiles/ftail.o
 mofiles/megamod.o   : \
    mofiles/rmtmod.o \
    mofiles/commonmod.o \

Index: apimod.scm
==================================================================
--- apimod.scm
+++ apimod.scm
@@ -80,12 +80,12 @@
                    ;;===============================================
 
                    ((get-keys-write)                        (db:get-keys dbstruct)) ;; force a dummy "write" query to force server; for debug in -repl
                    
                    ;; SERVERS
-                   ((start-server)                    (apply server:kind-run params))
-                   ((kill-server)                     (set! *server-run* #f))
+		   ;; ((start-server)                    (apply server:kind-run params))
+                   ;; ((kill-server)                     (set! *server-run* #f))
 
                    ;; TESTS
 
                    ;;((test-set-state-status-by-id)     (apply mt:test-set-state-status-by-id dbstruct params))
                    ;;BB - commented out above because it was calling below, eventually, incorrectly (dbstruct passed to mt:test-set-state-status-by-id, which previosly did more, but now only passes thru to db:set-state-status-and-roll-up-items.

Index: commonmod.scm
==================================================================
--- commonmod.scm
+++ commonmod.scm
@@ -69,10 +69,14 @@
   (reachable    #f)
   (last-update  0)
   (last-used    0)
   (last-cpuload 1))
 
+(define (common:run-sync?)
+    (and (common:on-homehost?)
+	 (args:get-arg "-server")))
+
 
 
 ;; GLOBALS
 
 ;; CONTEXTS

Index: dbmod.scm
==================================================================
--- dbmod.scm
+++ dbmod.scm
@@ -21,11 +21,11 @@
 (declare (unit dbmod))
 (declare (uses commonmod))
 (declare (uses keysmod))
 (declare (uses tasksmod))
 (declare (uses odsmod))
-(declare (uses testsmod))
+;; (declare (uses testsmod))
 (declare (uses mtargs))
 (declare (uses mtconfigf))
 ;; (declare (uses servermod))
 
 (module dbmod
@@ -44,11 +44,11 @@
 (import commonmod)
 (import keysmod)
 (import files)
 (import tasksmod)
 (import odsmod)
-(import testsmod)
+;; (import testsmod)
 (import (prefix mtargs args:))
 (import (prefix mtconfigf configf:))
 ;; (import servermod)
 
 ;; (use (prefix ulex ulex:))
@@ -90,10 +90,245 @@
 ;;
 (defstruct dbr:counts
   (state #f)
   (status #f)
   (count  0)) 
+
+;; more crap.
+
+
+
+
+(define (tests:get-tests-search-path cfgdat)
+  (let ((paths (let ((section (if cfgdat
+				  (configf:get-section cfgdat "tests-paths")
+				  #f)))
+		 (if section
+		     (map cadr section)
+		     '()))))
+    (filter (lambda (d)
+	      (if (directory-exists? d)
+		  d
+		  (begin
+		    (if (common:low-noise-print 60 "tests:get-tests-search-path" d)
+			(debug:print 0 *default-log-port* "WARNING: problem with directory " d ", dropping it from tests path"))
+		    #f)))
+	    (append paths (list (conc *toppath* "/tests"))))))
+
+(define (mt:lazy-read-test-config test-name)
+  (let ((tconf (hash-table-ref/default *testconfigs* test-name #f)))
+    (if tconf
+	tconf
+	(let ((test-dirs (tests:get-tests-search-path *configdat*)))
+	  (let loop ((hed (car test-dirs))
+		     (tal (cdr test-dirs)))
+	    ;; Setting MT_LINKTREE here is almost certainly unnecessary. 
+	    (let ((tconfig-file (conc hed "/" test-name "/testconfig")))
+	      (if (and (common:file-exists? tconfig-file)
+		       (file-read-access? tconfig-file))
+		  (let ((link-tree-path (common:get-linktree)) ;; (configf:lookup *configdat* "setup" "linktree"))
+			(old-link-tree  (get-environment-variable "MT_LINKTREE")))
+		    (if link-tree-path (setenv "MT_LINKTREE" link-tree-path))
+		    (let ((newtcfg (configf:read-config tconfig-file #f #f))) ;; NOTE: Does NOT run [system ...]
+		      (hash-table-set! *testconfigs* test-name newtcfg)
+		      (if old-link-tree 
+			  (setenv "MT_LINKTREE" old-link-tree)
+			  (unsetenv "MT_LINKTREE"))
+		      newtcfg))
+		  (if (null? tal)
+		      (begin
+			(debug:print-error 0 *default-log-port* "No readable testconfig found for " test-name)
+			#f)
+		      (loop (car tal)(cdr tal))))))))))
+
+;; if itempath is #f then look only at the testname part
+;;
+(define (tests:match->sqlqry patterns)
+  (if (string? patterns)
+      (let ((patts (string-split patterns ",")))
+	(if (null? patts) ;;; no pattern(s) means no match, we will do no query
+	    #f
+	    (let loop ((patt (car patts))
+		       (tal  (cdr patts))
+		       (res  '()))
+	      ;; (print "loop: patt: " patt ", tal " tal)
+	      (let* ((patt-parts (string-match (regexp "^([^\\/]*)(\\/(.*)|)$") patt))
+		     (test-patt  (cadr patt-parts))
+		     (item-patt  (cadddr patt-parts))
+		     (test-qry   (db:patt->like "testname" test-patt))
+		     (item-qry   (db:patt->like "item_path" item-patt))
+		     (qry        (conc "(" test-qry " AND " item-qry ")")))
+		;; (print "tests:match => patt-parts: " patt-parts ", test-patt: " test-patt ", item-patt: " item-patt)
+		(if (null? tal)
+		    (string-intersperse (append (reverse res)(list qry)) " OR ")
+		    (loop (car tal)(cdr tal)(cons qry res)))))))
+      #f))
+
+;; keys list to key1,key2,key3 ...
+(define (runs:get-std-run-fields keys remfields)
+  (let* ((header    (append keys remfields))
+	 (keystr    (conc (keys->keystr keys) ","
+			  (string-intersperse remfields ","))))
+    (list keystr header)))
+
+;; A routine to map itempaths using a itemmap
+;; patha and pathb must be strings or this will fail
+;;
+;; path-b is waiting on path-a
+;;
+(define (db:compare-itempaths test-b-name path-a path-b itemmaps )
+  (debug:print-info 6 *default-log-port* "ITEMMAPS: " itemmaps)
+  (let* ((itemmap    (tests:lookup-itemmap itemmaps test-b-name)))
+    (if itemmap
+	(let ((path-b-mapped (db:multi-pattern-apply path-b itemmap)))
+	  (debug:print-info 6 *default-log-port* "ITEMMAP is " itemmap ", path: " path-b ", mapped path: " path-b-mapped)
+	  (equal? path-a path-b-mapped))
+	(equal? path-b path-a))))
+
+;;======================================================================
+;; Run keys, these are used to hierarchially organise tests and run areas
+;;======================================================================
+
+(define (keys->keystr keys) ;; => key1,key2,key3,additiona1, ...
+  (string-intersperse keys ","))
+
+;;======================================================================
+;; config file related routines
+;;======================================================================
+
+(define keys:config-get-fields common:get-fields)
+(define (keys:make-key/field-string confdat)
+  (let ((fields (configf:get-section confdat "fields")))
+    (string-join
+     (map (lambda (field)(conc (car field) " " (cadr field)))
+	  fields)
+     ",")))
+
+;; given the keys (a list of vectors <key field> or a list of keys) and a target return a keyval list
+;; keyval list ( (key1 val1) (key2 val2) ...)
+(define (keys:target->keyval keys target)
+  (let* ((targlist (string-split target "/"))
+	 (numkeys  (length keys))
+	 (numtarg  (length targlist))
+	 (targtweaked (if (> numkeys numtarg)
+			  (append targlist (make-list (- numkeys numtarg) ""))
+			  targlist)))
+    (map (lambda (key targ)
+	   (list key targ))
+	 keys targtweaked)))
+
+;; patterns are:
+;;    "rx1"  "replacement1"\n
+;;    "rx2"  "replacement2"
+;; etc.
+;;
+(define (db:multi-pattern-apply item-path itemmap)
+  (let ((all-patts (string-split itemmap "\n")))
+    (if (null? all-patts)
+	item-path
+	(let loop ((hed (car all-patts))
+		   (tal (cdr all-patts))
+		   (res item-path))
+	  (let* ((parts (string-split hed))
+		 (patt  (car parts))
+
+		 (repl  (if (> (length parts) 1)(cadr parts) ""))
+
+		 (newr  (if (and patt repl)
+			    (begin
+                              (handle-exceptions
+                               exn
+                               (begin
+                                  (debug:print 0 *default-log-port*
+                                  "WARNING: itemmap has problem \"" itemmap "\", patt: " patt ", repl: " repl)
+                                 res)
+                              (string-substitute patt repl res))
+
+
+                              )
+			    (begin
+                              (debug:print 0 *default-log-port*
+                               "WARNING: itemmap has problem \"" itemmap "\", patt: " patt ", repl: " repl)
+			      res))))
+	    (if (null? tal)
+		newr
+		(loop (car tal)(cdr tal) newr)))))))
+
+;; given a list of itemmaps (testname . map), return the first match
+;;
+(define (tests:lookup-itemmap itemmaps testname)
+  (let ((best-matches (filter (lambda (itemmap)
+				(tests:match (car itemmap) testname #f))
+			      itemmaps)))
+    (if (null? best-matches)
+	#f
+	(let ((res (car best-matches)))
+	  ;; (debug:print 0 *default-log-port* "res=" res)
+	  (cond
+	   ((string? res) res) ;;; FIX THE ROOT CAUSE HERE ....
+	   ((null? res)   #f)
+	   ((string? (cdr res)) (cdr res))  ;; it is a pair
+	   ((string? (cadr res))(cadr res)) ;; it is a list
+	   (else cadr res))))))
+
+;; make a query (fieldname like 'patt1' OR fieldname 
+(define (db:patt->like fieldname pattstr #!key (comparator " OR "))
+  (let ((patts (if (string? pattstr)
+		   (string-split pattstr ",")
+		   '("%"))))
+    (string-intersperse (map (lambda (patt)
+			       (let ((wildtype (if (substring-index "%" patt) "LIKE" "GLOB")))
+				 (conc fieldname " " wildtype " '" patt "'")))
+			     (if (null? patts)
+				 '("")
+				 patts))
+			comparator)))
+
+;; if itempath is #f then look only at the testname part
+;;
+(define (tests:match patterns testname itempath #!key (required '()))
+  (if (string? patterns)
+      (let ((patts (append (string-split patterns ",") required)))
+	(if (null? patts) ;;; no pattern(s) means no match
+	    #f
+	    (let loop ((patt (car patts))
+		       (tal  (cdr patts)))
+	      ;; (print "loop: patt: " patt ", tal " tal)
+	      (if (string=? patt "")
+		  #f ;; nothing ever matches empty string - policy
+		  (let* ((patt-parts (string-match (regexp "^([^\\/]*)(\\/(.*)|)$") patt))
+			 (test-patt  (cadr patt-parts))
+			 (item-patt  (cadddr patt-parts)))
+		    ;; special case: test vs. test/
+		    ;;   test  => "test" "%"
+		    ;;   test/ => "test" ""
+		    (if (and (not (substring-index "/" patt)) ;; no slash in the original
+			     (or (not item-patt)
+				 (equal? item-patt "")))      ;; should always be true that item-patt is ""
+			(set! item-patt "%"))
+		    ;; (print "tests:match => patt-parts: " patt-parts ", test-patt: " test-patt ", item-patt: " item-patt)
+		    (if (and (tests:glob-like-match test-patt testname)
+			     (or (not itempath)
+				 (tests:glob-like-match (if item-patt item-patt "") itempath)))
+			#t
+			(if (null? tal)
+			    #f
+			    (loop (car tal)(cdr tal)))))))))))
+
+;; tests:glob-like-match 
+(define (tests:glob-like-match patt str) 
+  (let ((like (substring-index "%" patt)))
+    (let* ((notpatt  (equal? (substring-index "~" patt) 0))
+	   (newpatt  (if notpatt (substring patt 1) patt))
+	   (finpatt  (if like
+			(string-substitute (regexp "%") ".*" newpatt #f)
+			(string-substitute (regexp "\\*") ".*" newpatt #f)))
+	   (res      #f))
+      ;; (print "tests:glob-like-match => notpatt: " notpatt ", newpatt: " newpatt ", finpatt: " finpatt)
+      (set! res (string-match (regexp finpatt (if like #t #f)) str))
+      (if notpatt (not res) res))))
+
 
 ;;======================================================================
 ;; SQLITE3 HELPERS
 ;;======================================================================
 

Index: docs/code/module-hierarchy.dot
==================================================================
--- docs/code/module-hierarchy.dot
+++ docs/code/module-hierarchy.dot
@@ -23,10 +23,11 @@
 
   subgraph cluster_megatest {
     label="megatest";
 
     rmtmod [label="rmt mod"];
+    // httpmod [label="http-transportmod"];
     // commonmod
     mtargs     -> commonmod;
     pkts       -> commonmod;
     mtconfigf  -> commonmod;
     processmod -> commonmod;
@@ -44,11 +45,10 @@
     // dbmod
     commonmod -> dbmod;
     keysmod   -> dbmod;
     tasksmod  -> dbmod;
     odsmod    -> dbmod;
-    testsmod  -> dbmod;
     mtargs    -> dbmod;
     mtconfigf -> dbmod;
     // tasksmod
     commonmod -> tasksmod;
     pgdbmod   -> tasksmod;
@@ -56,14 +56,22 @@
     // odsmod
     commonmod -> odsmod;
     // testsmod
     commonmod -> testsmod;
     mtargs    -> testsmod;
+    servermod -> testsmod;
+    mtconfigf -> testsmod;
+    itemsmod  -> testsmod;
+    dbmod     -> testsmod;
     // mtmod
     mtconfigf -> mtmod;
     commonmod -> mtmod;
     rmtmod    -> mtmod;
+    servermod -> mtmod;
+    taskmod   -> mtmod;
+    mtargs    -> mtmod;
+    launchmod -> mtmod;
     // runsmod
     mtmod     -> runsmod;
     commonmod -> runsmod;
     testsmod  -> runsmod;
     // pgdbmod
@@ -70,12 +78,21 @@
     commonmod -> pgdbmod;
     mtconfigf -> pgdbmod;
     //servermod
     commonmod -> servermod;
     dbmod     -> servermod;
-    // processmod
-
-
+    mtconfigf -> servermod;
+    mtargs    -> servermod;
+    tasksmod  -> servermod;
+    // launchmod
+    commonmod -> launchmod;
+    rmtmod    -> launchmod;
+    testsmod  -> launchmod;
+    mtconfigf -> launchmod;
+    mtargs    -> launchmod;
+    subrunmod -> launchmod;
+    // subrun
+    
 }
   
 }
 

DELETED http-transport-inc.scm
Index: http-transport-inc.scm
==================================================================
--- http-transport-inc.scm
+++ /dev/null
@@ -1,680 +0,0 @@
-
-;; Copyright 2006-2012, Matthew Welland.
-;; 
-;; This file is part of Megatest.
-;; 
-;;     Megatest is free software: you can redistribute it and/or modify
-;;     it under the terms of the GNU General Public License as published by
-;;     the Free Software Foundation, either version 3 of the License, or
-;;     (at your option) any later version.
-;; 
-;;     Megatest is distributed in the hope that it will be useful,
-;;     but WITHOUT ANY WARRANTY; without even the implied warranty of
-;;     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-;;     GNU General Public License for more details.
-;; 
-;;     You should have received a copy of the GNU General Public License
-;;     along with Megatest.  If not, see <http://www.gnu.org/licenses/>.
-
-(define (http-transport:make-server-url hostport)
-  (if (not hostport)
-      #f
-      (conc "http://" (car hostport) ":" (cadr hostport))))
-
-(define *server-loop-heart-beat* (current-seconds))
-
-;;======================================================================
-;; S E R V E R
-;; ======================================================================
-
-;; Call this to start the actual server
-;;
-
-(define *db:process-queue-mutex* (make-mutex))
-
-(define (http-transport:run hostn)
-  ;; Configurations for server
-  (tcp-buffer-size 2048)
-  (max-connections 2048) 
-  (debug:print 2 *default-log-port* "Attempting to start the server ...")
-  (let* ((db              #f) ;;        (open-db)) ;; we don't want the server to be opening and closing the db unnecesarily
-	 (hostname        (get-host-name))
-	 (ipaddrstr       (let ((ipstr (if (string=? "-" hostn)
-					   ;; (string-intersperse (map number->string (u8vector->list (hostname->ip hostname))) ".")
-					   (server:get-best-guess-address hostname)
-					   #f)))
-			    (if ipstr ipstr hostn))) ;; hostname))) 
-	 (start-port      (portlogger:open-run-close portlogger:find-port))
-	 (link-tree-path  (common:get-linktree))
-	 (tmp-area        (common:get-db-tmp-area))
-	 (start-file      (conc tmp-area "/.server-start")))
-    (debug:print-info 0 *default-log-port* "portlogger recommended port: " start-port)
-    ;; set some parameters for the server
-    (root-path     (if link-tree-path 
-		       link-tree-path
-		       (current-directory))) ;; WARNING: SECURITY HOLE. FIX ASAP!
-    (handle-directory spiffy-directory-listing)
-    (handle-exception (lambda (exn chain)
-			(signal (make-composite-condition
-				 (make-property-condition 
-				  'server
-				  'message "server error")))))
-
-    ;; http-transport:handle-directory) ;; simple-directory-handler)
-    ;; Setup the web server and a /ctrl interface
-    ;;
-    (vhost-map `(((* any) . ,(lambda (continue)
-			       ;; open the db on the first call 
-				 ;; This is were we set up the database connections
-			       (let* (($   (request-vars source: 'both))
-				      (dat ($ 'dat))
-				      (res #f))
-				 (cond
-				  ((equal? (uri-path (request-uri (current-request)))
-					   '(/ "api"))
-				   (send-response body:    (api:process-request *dbstruct-db* $) ;; the $ is the request vars proc
-						  headers: '((content-type text/plain)))
-				   (mutex-lock! *heartbeat-mutex*)
-				   (set! *db-last-access* (current-seconds))
-				   (mutex-unlock! *heartbeat-mutex*))
-				  ((equal? (uri-path (request-uri (current-request))) 
-					   '(/ ""))
-				   (send-response body: (http-transport:main-page)))
-				  ((equal? (uri-path (request-uri (current-request))) 
-					   '(/ "json_api"))
-				   (send-response body: (http-transport:main-page)))
-				  ((equal? (uri-path (request-uri (current-request))) 
-					   '(/ "runs"))
-				   (send-response body: (http-transport:main-page)))
-				  ((equal? (uri-path (request-uri (current-request))) 
-					   '(/ any))
-				   (send-response body: "hey there!\n"
-						  headers: '((content-type text/plain))))
-				  ((equal? (uri-path (request-uri (current-request))) 
-					   '(/ "hey"))
-				   (send-response body: "hey there!\n" 
-						  headers: '((content-type text/plain))))
-                                  ((equal? (uri-path (request-uri (current-request))) 
-					   '(/ "jquery3.1.0.js"))
-				   (send-response body: (http-transport:show-jquery) 
-						  headers: '((content-type application/javascript))))
-                                  ((equal? (uri-path (request-uri (current-request))) 
-					   '(/ "test_log"))
-				   (send-response body: (http-transport:html-test-log $) 
-						  headers: '((content-type text/HTML))))    
-                                  ((equal? (uri-path (request-uri (current-request))) 
-					   '(/ "dashboard"))
-				   (send-response body: (http-transport:html-dboard $) 
-						  headers: '((content-type text/HTML)))) 
-				  (else (continue))))))))
-    (with-output-to-file start-file (lambda ()(print (current-process-id))))
-    (http-transport:try-start-server ipaddrstr start-port)))
-
-;; This is recursively run by http-transport:run until sucessful
-;;
-(define (http-transport:try-start-server ipaddrstr portnum)
-  (let ((config-hostname (configf:lookup *configdat* "server" "hostname"))
-	(config-use-proxy (equal? (configf:lookup *configdat* "client" "use-http_proxy") "yes")))
-    (if (not config-use-proxy)
-	(determine-proxy (constantly #f)))
-    (debug:print-info 0 *default-log-port* "http-transport:try-start-server time=" (seconds->time-string (current-seconds)) " ipaddrsstr=" ipaddrstr " portnum=" portnum " config-hostname=" config-hostname)
-    (handle-exceptions
-	exn
-	(begin
-	  (print-error-message exn)
-	  (if (< portnum 64000)
-	      (begin 
-		(debug:print 0 *default-log-port* "WARNING: attempt to start server failed. Trying again ...")
-		(debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))
-		(debug:print 5 *default-log-port* "exn=" (condition->list exn))
-		(portlogger:open-run-close portlogger:set-failed portnum)
-		(debug:print 0 *default-log-port* "WARNING: failed to start on portnum: " portnum ", trying next port")
-		(thread-sleep! 0.1)
-		
-		;; get_next_port goes here
-		(http-transport:try-start-server ipaddrstr
-						 (portlogger:open-run-close portlogger:find-port)))
-	      (begin
-		(print "ERROR: Tried and tried but could not start the server"))))
-      ;; any error in following steps will result in a retry
-      (set! *server-info* (list ipaddrstr portnum))
-      (debug:print 0 *default-log-port* "INFO: Trying to start server on " ipaddrstr ":" portnum)
-      ;; This starts the spiffy server
-      ;; NEED WAY TO SET IP TO #f TO BIND ALL
-      ;; (start-server bind-address: ipaddrstr port: portnum)
-      (if config-hostname ;; this is a hint to bind directly
-	  (start-server port: portnum bind-address: (if (equal? config-hostname "-")
-							ipaddrstr
-							config-hostname))
-	  (start-server port: portnum))
-      (portlogger:open-run-close portlogger:set-port portnum "released")
-      (debug:print 1 *default-log-port* "INFO: server has been stopped"))))
-
-;;======================================================================
-;; S E R V E R   U T I L I T I E S 
-;;======================================================================
-
-;;======================================================================
-;; C L I E N T S
-;;======================================================================
-
-(define *http-mutex* (make-mutex))
-
-;; NOTE: Large block of code from 32436b426188080f72fceb6894af541fbad9921e removed here
-;;       I'm pretty sure it is defunct.
-
-;; This next block all imported en-mass from the api branch
-(define *http-requests-in-progress* 0)
-(define *http-connections-next-cleanup* (current-seconds))
-
-(define (http-transport:get-time-to-cleanup)
-  (let ((res #f))
-    (mutex-lock! *http-mutex*)
-    (set! res (> (current-seconds) *http-connections-next-cleanup*))
-    (mutex-unlock! *http-mutex*)
-    res))
-
-(define (http-transport:inc-requests-count)
-  (mutex-lock! *http-mutex*)
-  (set! *http-requests-in-progress* (+ 1 *http-requests-in-progress*))
-  ;; Use this opportunity to slow things down iff there are too many requests in flight
-  (if (> *http-requests-in-progress* 5)
-      (begin
-	(debug:print-info 0 *default-log-port* "Whoa there buddy, ease up...")
-	(thread-sleep! 1)))
-  (mutex-unlock! *http-mutex*))
-
-(define (http-transport:dec-requests-count proc) 
-  (mutex-lock! *http-mutex*)
-  (proc)
-  (set! *http-requests-in-progress* (- *http-requests-in-progress* 1))
-  (mutex-unlock! *http-mutex*))
-
-(define (http-transport:dec-requests-count-and-close-all-connections)
-  (set! *http-requests-in-progress* (- *http-requests-in-progress* 1))
-  (let loop ((etime (+ (current-seconds) 5))) ;; give up in five seconds
-    (if (> *http-requests-in-progress* 0)
-	(if (> etime (current-seconds))
-	    (begin
-	      (thread-sleep! 0.05)
-	      (loop etime))
-	    (debug:print-error 0 *default-log-port* "requests still in progress after 5 seconds of waiting. I'm going to pass on cleaning up http connections"))
-	(close-all-connections!)))
-  (set! *http-connections-next-cleanup* (+ (current-seconds) 10))
-  (mutex-unlock! *http-mutex*))
-
-(define (http-transport:inc-requests-and-prep-to-close-all-connections)
-  (mutex-lock! *http-mutex*)
-  (set! *http-requests-in-progress* (+ 1 *http-requests-in-progress*)))
-
-;; Send "cmd" with json payload "params" to serverdat and receive result
-;;
-(define (http-transport:client-api-send-receive run-id serverdat cmd params #!key (numretries 3)(area-dat #f))
-  (let* ((fullurl    (if (vector? serverdat)
-			 (http-transport:server-dat-get-api-req serverdat)
-			 (begin
-			   (debug:print 0 *default-log-port* "FATAL ERROR: http-transport:client-api-send-receive called with no server info")
-			   (exit 1))))
-	 (res        (vector #f "uninitialized"))
-	 (success    #t)
-	 (sparams    (db:obj->string params transport: 'http))
-	 (runremote  (or area-dat *runremote*)))
-       (debug:print-info 11 *default-log-port* "fullurl=" fullurl ", cmd=" cmd ", params=" params ", run-id=" run-id "\n")
-       ;; set up the http-client here
-       (max-retry-attempts 1)
-       ;; consider all requests indempotent
-       (retry-request? (lambda (request)
-			 #f))
-       ;; send the data and get the response
-       ;; extract the needed info from the http data and 
-       ;; process and return it.
-       (let* ((send-recieve (lambda ()
-			      (mutex-lock! *http-mutex*)
-			      ;; (condition-case (with-input-from-request "http://localhost"; #f read-lines)
-			      ;;					       ((exn http client-error) e (print e)))
-			      (set! res (vector                ;;; DON'T FORGET - THIS IS THE CLIENT SIDE! NOTE: consider moving this to client.scm since we are only supporting http transport at this time.
-					 success
-					 (db:string->obj 
-					  (handle-exceptions
-					      exn
-					      (let ((call-chain (get-call-chain))
-						    (msg        ((condition-property-accessor 'exn 'message) exn)))
-						(set! success #f)
-                                                (if (debug:debug-mode 1)
-                                                    (debug:print-info 0 *default-log-port* "couldn't talk to server, trying again ...")
-                                                    (begin
-                                                      (debug:print 0 *default-log-port* "WARNING: failure in with-input-from-request to " fullurl ".")
-                                                      (debug:print 0 *default-log-port* " message: " msg)
-                                                      (debug:print 0 *default-log-port* " cmd: " cmd " params: " params)
-                                                      (debug:print 0 *default-log-port* " call-chain: " call-chain)))
-                                                (if runremote
-						    (remote-conndat-set! runremote #f))
-						;; Killing associated server to allow clean retry.")
-						;; (tasks:kill-server-run-id run-id)  ;; better to kill the server in the logic that called this routine?
-						(mutex-unlock! *http-mutex*)
-					     ;;; (signal (make-composite-condition
-					     ;;;          (make-property-condition 'commfail 'message "failed to connect to server")))
-					     ;;; "communications failed"
-						(db:obj->string #f))
-					    (with-input-from-request ;; was dat
-					     fullurl 
-					     (list (cons 'key (or *server-id* "thekey"))
-						   (cons 'cmd cmd)
-						   (cons 'params sparams))
-					     read-string))
-					  transport: 'http)
-                                         0)) ;; added this speculatively
-			      ;; Shouldn't this be a call to the managed call-all-connections stuff above?
-			      (close-all-connections!)
-			      (mutex-unlock! *http-mutex*)
-			      ))
-	      (time-out     (lambda ()
-			      (thread-sleep! 45)
-			      #f))
-	      (th1 (make-thread send-recieve "with-input-from-request"))
-	      (th2 (make-thread time-out     "time out")))
-	 (thread-start! th1)
-	 (thread-start! th2)
-	 (thread-join! th1)
-	 (thread-terminate! th2)
-	 (debug:print-info 11 *default-log-port* "got res=" res)
-	 (if (vector? res)
-	     (if (vector-ref res 0) ;; this is the first flag or the second flag?
-		 res ;; this is the *inner* vector? seriously? why?
-                 (if (debug:debug-mode 11)
-                     (let ((call-chain (get-call-chain))) ;; note: this code also called in nmsg-transport - consider consolidating it
-                       (print-call-chain (current-error-port))
-                       (debug:print-error 11 *default-log-port* "error above occured at server, res=" res) ;; " message: " ((condition-property-accessor 'exn 'message) exn))
-                       (debug:print 11 *default-log-port* " server call chain:")
-                       (pp (vector-ref res 1) (current-error-port))
-                       (signal (vector-ref res 0)))
-                     res))
-	     (signal (make-composite-condition
-		      (make-property-condition 
-		       'timeout
-		       'message "nmsg-transport:client-api-send-receive-raw timed out talking to server")))))))
-
-;; careful closing of connections stored in *runremote*
-;;
-(define (http-transport:close-connections #!key (area-dat #f))
-  (let* ((runremote  (or area-dat *runremote*))
-	 (server-dat (if runremote
-                         (remote-conndat runremote)
-                         #f))) ;; (hash-table-ref/default *runremote* run-id #f)))
-    (if (vector? server-dat)
-	(let ((api-dat (http-transport:server-dat-get-api-uri server-dat)))
-	  (handle-exceptions
-	    exn
-	    (begin
-	      (print-call-chain *default-log-port*)
-	      (debug:print-error 0 *default-log-port* " closing connection failed with error: " ((condition-property-accessor 'exn 'message) exn)))
-	    (close-connection! api-dat)
-            ;;(close-idle-connections!)
-	    #t))
-	#f)))
-
-;; Moved to commonmod
-;;
-;; (define (make-http-transport:server-dat)(make-vector 6))
-;; (define (http-transport:server-dat-get-iface         vec)    (vector-ref  vec 0))
-;; (define (http-transport:server-dat-get-port          vec)    (vector-ref  vec 1))
-;; (define (http-transport:server-dat-get-api-uri       vec)    (vector-ref  vec 2))
-;; (define (http-transport:server-dat-get-api-url       vec)    (vector-ref  vec 3))
-;; (define (http-transport:server-dat-get-api-req       vec)    (vector-ref  vec 4))
-;; (define (http-transport:server-dat-get-last-access   vec)    (vector-ref  vec 5))
-;; (define (http-transport:server-dat-get-socket        vec)    (vector-ref  vec 6))
-;; 
-;; (define (http-transport:server-dat-make-url vec)
-;;   (if (and (http-transport:server-dat-get-iface vec)
-;; 	   (http-transport:server-dat-get-port  vec))
-;;       (conc "http://" 
-;; 	    (http-transport:server-dat-get-iface vec)
-;; 	    ":"
-;; 	    (http-transport:server-dat-get-port  vec))
-;;       #f))
-;; 
-;; (define (http-transport:server-dat-update-last-access vec)
-;;   (if (vector? vec)
-;;       (vector-set! vec 5 (current-seconds))
-;;       (begin
-;; 	(print-call-chain (current-error-port))
-;; 	(debug:print-error 0 *default-log-port* "call to http-transport:server-dat-update-last-access with non-vector!!"))))
-
-;;
-;; connect
-;;
-(define (http-transport:client-connect iface port)
-  (let* ((api-url      (conc "http://" iface ":" port "/api"))
-	 (api-uri      (uri-reference (conc "http://" iface ":" port "/api")))
-	 (api-req      (make-request method: 'POST uri: api-uri))
-	 (server-dat   (vector iface port api-uri api-url api-req (current-seconds))))
-    server-dat))
-
-;; run http-transport:keep-running in a parallel thread to monitor that the db is being 
-;; used and to shutdown after sometime if it is not.
-;;
-(define (http-transport:keep-running) 
-  ;; if none running or if > 20 seconds since 
-  ;; server last used then start shutdown
-  ;; This thread waits for the server to come alive
-  (debug:print-info 0 *default-log-port* "Starting the sync-back, keep alive thread in server")
-  (let* ((sdat              #f)
-	 (tmp-area          (common:get-db-tmp-area))
-	 (started-file      (conc tmp-area "/.server-started"))
-	 (server-start-time (current-seconds))
-	 (server-info (let loop ((start-time (current-seconds))
-				 (changed    #t)
-				 (last-sdat  "not this"))
-                        (begin ;; let ((sdat #f))
-			  (thread-sleep! 0.01)
-			  (debug:print-info 0 *default-log-port* "Waiting for server alive signature")
-                          (mutex-lock! *heartbeat-mutex*)
-                          (set! sdat *server-info*)
-                          (mutex-unlock! *heartbeat-mutex*)
-                          (if (and sdat
-				   (not changed)
-				   (> (- (current-seconds) start-time) 2))
-			      (begin
-				(debug:print-info 0 *default-log-port* "Received server alive signature")
-                                (common:save-pkt `((action . alive)
-                                                   (T      . server)
-                                                   (pid    . ,(current-process-id))
-                                                   (ipaddr . ,(car sdat))
-                                                   (port   . ,(cadr sdat)))
-                                                 *configdat* #t)
-				sdat)
-                              (begin
-				(debug:print-info 0 *default-log-port* "Still waiting, last-sdat=" last-sdat)
-                                (sleep 4)
-				(if (> (- (current-seconds) start-time) 120) ;; been waiting for two minutes
-				    (begin
-				      (debug:print-error 0 *default-log-port* "transport appears to have died, exiting server")
-                                      (common:save-pkt `((action . died)
-                                                         (T      . server)
-                                                         (pid    . ,(current-process-id))
-                                                         (ipaddr . ,(car sdat))
-                                                         (port   . ,(cadr sdat))
-                                                         (msg    . "Transport died?"))
-						       *configdat* #t)
-				      (exit))
-				    (loop start-time
-					  (equal? sdat last-sdat)
-					  sdat)))))))
-	 (iface       (car server-info))
-         (port        (cadr server-info))
-         (last-access 0)
-	 (server-timeout (server:expiration-timeout))
-	 (server-going  #f)
-	 (server-log-file (args:get-arg "-log"))) ;; always set when we are a server
-
-    (with-output-to-file started-file (lambda ()(print (current-process-id))))
-
-    (let loop ((count         0)
-	       (server-state 'available)
-	       (bad-sync-count 0)
-	       (start-time     (current-milliseconds)))
-      ;; Use this opportunity to sync the tmp db to megatest.db
-      (if (not server-going) ;; *dbstruct-db* 
-	  (begin
-	    (debug:print 0 *default-log-port* "SERVER: dbprep")
-	    (set! *dbstruct-db*  (db:setup #t)) ;;  run-id))
-	    (set! server-going #t)
-	    (debug:print 0 *default-log-port* "SERVER: running, megatest version: " (common:get-full-version)) ;; NOTE: the server is NOT yet marked as running in the log. We do that in the keep-running routine.
-	    (thread-start! *watchdog*)))
-      
-      ;; when things go wrong we don't want to be doing the various queries too often
-      ;; so we strive to run this stuff only every four seconds or so.
-      (let* ((sync-time (- (current-milliseconds) start-time))
-	    (rem-time  (quotient (- 4000 sync-time) 1000)))
-	(if (and (<= rem-time 4)
-		 (>  rem-time 0))
-	    (thread-sleep! rem-time)))
-      
-      (if (< count 1) ;; 3x3 = 9 secs aprox
-	  (loop (+ count 1) 'running bad-sync-count (current-milliseconds)))
-      
-      ;; Check that iface and port have not changed (can happen if server port collides)
-      (mutex-lock! *heartbeat-mutex*)
-      (set! sdat *server-info*)
-      (mutex-unlock! *heartbeat-mutex*)
-      
-      (if (not (equal? sdat (list iface port)))
-	  (let ((new-iface (car sdat))
-		(new-port  (cadr sdat)))
-	    (debug:print-info 0 *default-log-port* "WARNING: interface changed, refreshing iface and port info")
-	    (set! iface new-iface)
-	    (set! port  new-port)
-	    (debug:print 0 *default-log-port* "SERVER STARTED: " iface ":" port " AT " (current-seconds))
-	    (flush-output *default-log-port*)))
-      
-      ;; Transfer *db-last-access* to last-access to use in checking that we are still alive
-      (mutex-lock! *heartbeat-mutex*)
-      (set! last-access *db-last-access*)
-      (mutex-unlock! *heartbeat-mutex*)
-      
-      (if (common:low-noise-print 120 (conc "server running on " iface ":" port))
-	  (begin
-	    (debug:print 0 *default-log-port* "SERVER STARTED: " iface ":" port " AT " (current-seconds))
-	    (flush-output *default-log-port*)))
-      (if (common:low-noise-print 60 "dbstats")
-	  (begin
-	    (debug:print 0 *default-log-port* "Server stats:")
-	    (db:print-current-query-stats)))
-      (let* ((hrs-since-start  (/ (- (current-seconds) server-start-time) 3600)))
-	(cond
-         ((and *server-run*
-	       (> (+ last-access server-timeout)
-		  (current-seconds)))
-          (if (common:low-noise-print 120 "server continuing")
-              (debug:print-info 0 *default-log-port* "Server continuing, seconds since last db access: " (- (current-seconds) last-access))
-	      (let ((curr-time (current-seconds)))
-		(handle-exceptions
-		    exn
-		    (debug:print 0 *default-log-port* "ERROR: Failed to change timestamp on log file " server-log-file ". Are you out of space on that disk?")
-		  (if (not *server-overloaded*)
-		      (change-file-times server-log-file curr-time curr-time)))))
-          (loop 0 server-state bad-sync-count (current-milliseconds)))
-         (else
-          (debug:print-info 0 *default-log-port* "Server timed out. seconds since last db access: " (- (current-seconds) last-access))
-          (http-transport:server-shutdown port)))))))
-
-(define (http-transport:server-shutdown port)
-  (begin
-    ;;(BB> "http-transport:server-shutdown called")
-    (debug:print-info 0 *default-log-port* "Starting to shutdown the server. pid="(current-process-id))
-    ;;
-    ;; start_shutdown
-    ;;
-    (set! *time-to-exit* #t) ;; tell on-exit to be fast as we've already cleaned up
-    (portlogger:open-run-close portlogger:set-port port "released")
-    (thread-sleep! 1)
-
-    ;; (debug:print-info 0 *default-log-port* "Max cached queries was    " *max-cache-size*)
-    ;; (debug:print-info 0 *default-log-port* "Number of cached writes   " *number-of-writes*)
-    ;; (debug:print-info 0 *default-log-port* "Average cached write time "
-    ;; 		      (if (eq? *number-of-writes* 0)
-    ;; 			  "n/a (no writes)"
-    ;; 			  (/ *writes-total-delay*
-    ;; 			     *number-of-writes*))
-    ;; 		      " ms")
-    ;; (debug:print-info 0 *default-log-port* "Number non-cached queries "  *number-non-write-queries*)
-    ;; (debug:print-info 0 *default-log-port* "Average non-cached time   "
-    ;; 		      (if (eq? *number-non-write-queries* 0)
-    ;; 			  "n/a (no queries)"
-    ;; 			  (/ *total-non-write-delay* 
-    ;; 			     *number-non-write-queries*))
-    ;; 		      " ms")
-    
-    (db:print-current-query-stats)
-    (common:save-pkt `((action . exit)
-                       (T      . server)
-                       (pid    . ,(current-process-id)))
-                     *configdat* #t)
-    (debug:print-info 0 *default-log-port* "Server shutdown complete. Exiting")
-    (exit)))
-
-;; all routes though here end in exit ...
-;;
-;; start_server? 
-;;
-(define (http-transport:launch)
-  ;; check that a server start is in progress, pause or exit if so
-  (let* ((tmp-area            (common:get-db-tmp-area))
-	 (server-start        (conc tmp-area "/.server-start"))
-	 (server-started      (conc tmp-area "/.server-started"))
-	 (start-time          (common:lazy-modification-time server-start))
-	 (started-time        (common:lazy-modification-time server-started))
-	 (server-starting     (< start-time started-time)) ;; if start-time is less than started-time then a server is still starting
-	 (start-time-old      (> (- (current-seconds) start-time) 5))
-         (cleanup-proc        (lambda (msg)
-                                (let* ((serv-fname      (conc "server-" (current-process-id) "-" (get-host-name) ".log"))
-                                       (full-serv-fname (conc *toppath* "/logs/" serv-fname))
-                                       (new-serv-fname  (conc *toppath* "/logs/" "defunct-" serv-fname)))
-                                  (debug:print 0 *default-log-port* msg)
-                                  (if (common:file-exists? full-serv-fname)
-                                      (system (conc "sleep 1;mv -f " full-serv-fname " " new-serv-fname))
-                                      (debug:print 0 *default-log-port* "INFO: cannot move " full-serv-fname " to " new-serv-fname))
-                                  (exit)))))
-    (if (and (not start-time-old) ;; last server start try was less than five seconds ago
-	     (not server-starting))
-	(begin
-	  (cleanup-proc "NOT starting server, there is either a recently started server or a server in process of starting")
-	  (exit)))
-    ;; lets not even bother to start if there are already three or more server files ready to go
-    (let* ((num-alive   (server:get-num-alive (server:get-list *toppath*))))
-      (if (> num-alive 3)
-          (begin
-            (cleanup-proc (conc "ERROR: Aborting server start because there are already " num-alive " possible servers either running or starting up"))
-            (exit))))
-  (common:save-pkt `((action . start)
-		     (T      . server)
-		     (pid    . ,(current-process-id)))
-		   *configdat* #t)
-    (let* ((th2 (make-thread (lambda ()
-                               (debug:print-info 0 *default-log-port* "Server run thread started")
-                               (http-transport:run 
-                                (if (args:get-arg "-server")
-                                    (args:get-arg "-server")
-                                    "-")
-                                )) "Server run"))
-           (th3 (make-thread (lambda ()
-                               (debug:print-info 0 *default-log-port* "Server monitor thread started")
-                               (http-transport:keep-running)
-                               "Keep running"))))
-      (thread-start! th2)
-      (thread-sleep! 0.25) ;; give the server time to settle before starting the keep-running monitor.
-      (thread-start! th3)
-      (set! *didsomething* #t)
-      (thread-join! th2)
-      (exit))))
-
-;; (define (http-transport:server-signal-handler signum)
-;;   (signal-mask! signum)
-;;   (handle-exceptions
-;;    exn
-;;    (debug:print 0 *default-log-port* " ... exiting ...")
-;;    (let ((th1 (make-thread (lambda ()
-;; 			     (thread-sleep! 1))
-;; 			   "eat response"))
-;; 	 (th2 (make-thread (lambda ()
-;; 			     (debug:print-error 0 *default-log-port* "Received ^C, attempting clean exit. Please be patient and wait a few seconds before hitting ^C again.")
-;; 			     (thread-sleep! 3) ;; give the flush three seconds to do it's stuff
-;; 			     (debug:print 0 *default-log-port* "       Done.")
-;; 			     (exit 4))
-;; 			   "exit on ^C timer")))
-;;      (thread-start! th2)
-;;      (thread-start! th1)
-;;      (thread-join! th2))))
-
-;;===============================================
-;; Java script
-;;===============================================
-(define (http-transport:show-jquery)
-  (let* ((data  (tests:readlines *java-script-lib*)))
-(string-join data "\n")))
-
-
-
-;;======================================================================
-;; web pages
-;;======================================================================
-
-(define (http-transport:html-test-log $)
-   (let* ((run-id ($ 'runid))
-         (test-item ($ 'testname))
-         (parts (string-split test-item ":"))
-         (test-name (car parts))
-             
-         (item-name (if (equal? (length parts) 1)
-             ""
-             (cadr parts))))
-  ;(print $) 
-(tests:get-test-log run-id test-name item-name)))
-
-
-(define (http-transport:html-dboard $)
-  (let* ((page ($ 'page))
-         (oup       (open-output-string)) 
-         (bdy "--------------------------")
-
-         (ret  (tests:dynamic-dboard page)))
-    (s:output-new  oup  ret)
-   (close-output-port oup)
-
-  (set! bdy   (get-output-string oup))
-     (conc "<h1>Dashboard</h1>" bdy "<br/> <br/> "  )))
-
-(define (http-transport:main-page)
-  (let ((linkpath (root-path)))
-    (conc "<head><h1>" (pathname-strip-directory *toppath*) "</h1></head>"
-	  "<body>"
-	  "Run area: " *toppath*
-	  "<h2>Server Stats</h2>"
-	  (http-transport:stats-table) 
-	  "<hr>"
-	  (http-transport:runs linkpath)
-	  "<hr>"
-	  ;; (http-transport:run-stats)
-	  "</body>"
-	  )))
-
-(define (http-transport:stats-table)
-  (mutex-lock! *heartbeat-mutex*)
-  (let ((res 
-	 (conc "<table>"
-	       ;; "<tr><td>Max cached queries</td>        <td>" *max-cache-size* "</td></tr>"
-	       "<tr><td>Number of cached writes</td>   <td>" *number-of-writes* "</td></tr>"
-	       "<tr><td>Average cached write time</td> <td>" (if (eq? *number-of-writes* 0)
-								 "n/a (no writes)"
-								 (/ *writes-total-delay*
-								    *number-of-writes*))
-	       " ms</td></tr>"
-	       "<tr><td>Number non-cached queries</td> <td>"  *number-non-write-queries* "</td></tr>"
-	       ;; "<tr><td>Average non-cached time</td>   <td>" (if (eq? *number-non-write-queries* 0)
-	       ;; 							 "n/a (no queries)"
-	       ;; 							 (/ *total-non-write-delay* 
-	       ;; 							    *number-non-write-queries*))
-	       " ms</td></tr>"
-	       "<tr><td>Last access</td><td>"              (seconds->time-string *db-last-access*) "</td></tr>"
-	       "</table>")))
-    (mutex-unlock! *heartbeat-mutex*)
-    res))
-
-(define (http-transport:runs linkpath)
-  (conc "<h3>Runs</h3>"
-	(string-intersperse
-	 (let ((files (map pathname-strip-directory (glob (conc linkpath "/*")))))
-	   (map (lambda (p)
-		  (conc "<a href=\"" p "\">" p "</a><br>"))
-		files))
-	 " ")))
-
-#;(define (http-transport:run-stats)
-  (let ((stats (open-run-close db:get-running-stats #f)))
-    (conc "<table>"
-	  (string-intersperse
-	   (map (lambda (stat)
-		  (conc "<tr><td>" (car stat) "</td><td>" (cadr stat) "</td></tr>"))
-		stats)
-	   " ")
-	  "</table>")))

ADDED   http-transportmod.scm
Index: http-transportmod.scm
==================================================================
--- /dev/null
+++ http-transportmod.scm
@@ -0,0 +1,719 @@
+
+;; Copyright 2006-2012, Matthew Welland.
+;; 
+;; This file is part of Megatest.
+;; 
+;;     Megatest is free software: you can redistribute it and/or modify
+;;     it under the terms of the GNU General Public License as published by
+;;     the Free Software Foundation, either version 3 of the License, or
+;;     (at your option) any later version.
+;; 
+;;     Megatest is distributed in the hope that it will be useful,
+;;     but WITHOUT ANY WARRANTY; without even the implied warranty of
+;;     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;;     GNU General Public License for more details.
+;; 
+;;     You should have received a copy of the GNU General Public License
+;;     along with Megatest.  If not, see <http://www.gnu.org/licenses/>.
+
+(declare (unit apimod))
+(declare (uses commonmod))
+(declare (uses stml2))
+(declare (uses apimod))
+(declare (uses dbmod))
+(declare (uses mtargs))
+(declare (uses mtconfigf))
+(declare (uses testsmod))
+
+(module apimod
+	*
+	
+(import scheme chicken data-structures extras posix files
+	srfi-13 srfi-18 spiffy http-client
+	spiffy-directory-listing
+	spiffy-request-vars tcp
+	)
+(import commonmod)
+(import stml2)
+(import apimod)
+(import dbmod)
+(import testsmod)
+(import (prefix mtargs args:))
+(import (prefix mtconfigf configf:))
+
+(define (http-transport:make-server-url hostport)
+  (if (not hostport)
+      #f
+      (conc "http://" (car hostport) ":" (cadr hostport))))
+
+(define *server-loop-heart-beat* (current-seconds))
+(define *writes-total-delay* 0)
+
+;;======================================================================
+;; S E R V E R
+;; ======================================================================
+
+;; Call this to start the actual server
+;;
+;; all routes though here end in exit ...
+;;
+;; start_server
+;;
+(define (server:launch run-id transport-type)
+  ;; (case transport-type
+  ;;   ((http)
+  (http-transport:launch))
+    ;;((nmsg)(nmsg-transport:launch run-id))
+    ;;((rpc)  (rpc-transport:launch run-id))
+    ;; (else (debug:print-error 0 *default-log-port* "unknown server type " transport-type))))
+
+(define *db:process-queue-mutex* (make-mutex))
+
+(define (http-transport:run hostn)
+  ;; Configurations for server
+  (tcp-buffer-size 2048)
+  (max-connections 2048) 
+  (debug:print 2 *default-log-port* "Attempting to start the server ...")
+  (let* ((db              #f) ;;        (open-db)) ;; we don't want the server to be opening and closing the db unnecesarily
+	 (hostname        (get-host-name))
+	 (ipaddrstr       (let ((ipstr (if (string=? "-" hostn)
+					   ;; (string-intersperse (map number->string (u8vector->list (hostname->ip hostname))) ".")
+					   (server:get-best-guess-address hostname)
+					   #f)))
+			    (if ipstr ipstr hostn))) ;; hostname))) 
+	 (start-port      (portlogger:open-run-close portlogger:find-port))
+	 (link-tree-path  (common:get-linktree))
+	 (tmp-area        (common:get-db-tmp-area))
+	 (start-file      (conc tmp-area "/.server-start")))
+    (debug:print-info 0 *default-log-port* "portlogger recommended port: " start-port)
+    ;; set some parameters for the server
+    (root-path     (if link-tree-path 
+		       link-tree-path
+		       (current-directory))) ;; WARNING: SECURITY HOLE. FIX ASAP!
+    (handle-directory spiffy-directory-listing)
+    (handle-exception (lambda (exn chain)
+			(signal (make-composite-condition
+				 (make-property-condition 
+				  'server
+				  'message "server error")))))
+
+    ;; http-transport:handle-directory) ;; simple-directory-handler)
+    ;; Setup the web server and a /ctrl interface
+    ;;
+    (vhost-map `(((* any) . ,(lambda (continue)
+			       ;; open the db on the first call 
+				 ;; This is were we set up the database connections
+			       (let* (($   (request-vars source: 'both))
+				      (dat ($ 'dat))
+				      (res #f))
+				 (cond
+				  ((equal? (uri-path (request-uri (current-request)))
+					   '(/ "api"))
+				   (send-response body:    (api:process-request *dbstruct-db* $) ;; the $ is the request vars proc
+						  headers: '((content-type text/plain)))
+				   (mutex-lock! *heartbeat-mutex*)
+				   (set! *db-last-access* (current-seconds))
+				   (mutex-unlock! *heartbeat-mutex*))
+				  ((equal? (uri-path (request-uri (current-request))) 
+					   '(/ ""))
+				   (send-response body: (http-transport:main-page)))
+				  ((equal? (uri-path (request-uri (current-request))) 
+					   '(/ "json_api"))
+				   (send-response body: (http-transport:main-page)))
+				  ((equal? (uri-path (request-uri (current-request))) 
+					   '(/ "runs"))
+				   (send-response body: (http-transport:main-page)))
+				  ((equal? (uri-path (request-uri (current-request))) 
+					   '(/ any))
+				   (send-response body: "hey there!\n"
+						  headers: '((content-type text/plain))))
+				  ((equal? (uri-path (request-uri (current-request))) 
+					   '(/ "hey"))
+				   (send-response body: "hey there!\n" 
+						  headers: '((content-type text/plain))))
+                                  ((equal? (uri-path (request-uri (current-request))) 
+					   '(/ "jquery3.1.0.js"))
+				   (send-response body: (http-transport:show-jquery) 
+						  headers: '((content-type application/javascript))))
+                                  #;((equal? (uri-path (request-uri (current-request))) 
+					   '(/ "test_log"))
+				   (send-response body: (http-transport:html-test-log $) 
+						  headers: '((content-type text/HTML))))
+				  ;; removing dashboard due to dependency challenges
+                                  #;((equal? (uri-path (request-uri (current-request))) 
+					   '(/ "dashboard"))
+				   (send-response body: (http-transport:html-dboard $) 
+						  headers: '((content-type text/HTML)))) 
+				  (else (continue))))))))
+    (with-output-to-file start-file (lambda ()(print (current-process-id))))
+    (http-transport:try-start-server ipaddrstr start-port)))
+
+;; This is recursively run by http-transport:run until sucessful
+;;
+(define (http-transport:try-start-server ipaddrstr portnum)
+  (let ((config-hostname (configf:lookup *configdat* "server" "hostname"))
+	(config-use-proxy (equal? (configf:lookup *configdat* "client" "use-http_proxy") "yes")))
+    (if (not config-use-proxy)
+	(determine-proxy (constantly #f)))
+    (debug:print-info 0 *default-log-port* "http-transport:try-start-server time=" (seconds->time-string (current-seconds)) " ipaddrsstr=" ipaddrstr " portnum=" portnum " config-hostname=" config-hostname)
+    (handle-exceptions
+	exn
+	(begin
+	  (print-error-message exn)
+	  (if (< portnum 64000)
+	      (begin 
+		(debug:print 0 *default-log-port* "WARNING: attempt to start server failed. Trying again ...")
+		(debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))
+		(debug:print 5 *default-log-port* "exn=" (condition->list exn))
+		(portlogger:open-run-close portlogger:set-failed portnum)
+		(debug:print 0 *default-log-port* "WARNING: failed to start on portnum: " portnum ", trying next port")
+		(thread-sleep! 0.1)
+		
+		;; get_next_port goes here
+		(http-transport:try-start-server ipaddrstr
+						 (portlogger:open-run-close portlogger:find-port)))
+	      (begin
+		(print "ERROR: Tried and tried but could not start the server"))))
+      ;; any error in following steps will result in a retry
+      (set! *server-info* (list ipaddrstr portnum))
+      (debug:print 0 *default-log-port* "INFO: Trying to start server on " ipaddrstr ":" portnum)
+      ;; This starts the spiffy server
+      ;; NEED WAY TO SET IP TO #f TO BIND ALL
+      ;; (start-server bind-address: ipaddrstr port: portnum)
+      (if config-hostname ;; this is a hint to bind directly
+	  (start-server port: portnum bind-address: (if (equal? config-hostname "-")
+							ipaddrstr
+							config-hostname))
+	  (start-server port: portnum))
+      (portlogger:open-run-close portlogger:set-port portnum "released")
+      (debug:print 1 *default-log-port* "INFO: server has been stopped"))))
+
+;;======================================================================
+;; S E R V E R   U T I L I T I E S 
+;;======================================================================
+
+;;======================================================================
+;; C L I E N T S
+;;======================================================================
+
+(define *http-mutex* (make-mutex))
+
+;; NOTE: Large block of code from 32436b426188080f72fceb6894af541fbad9921e removed here
+;;       I'm pretty sure it is defunct.
+
+;; This next block all imported en-mass from the api branch
+(define *http-requests-in-progress* 0)
+(define *http-connections-next-cleanup* (current-seconds))
+
+(define (http-transport:get-time-to-cleanup)
+  (let ((res #f))
+    (mutex-lock! *http-mutex*)
+    (set! res (> (current-seconds) *http-connections-next-cleanup*))
+    (mutex-unlock! *http-mutex*)
+    res))
+
+(define (http-transport:inc-requests-count)
+  (mutex-lock! *http-mutex*)
+  (set! *http-requests-in-progress* (+ 1 *http-requests-in-progress*))
+  ;; Use this opportunity to slow things down iff there are too many requests in flight
+  (if (> *http-requests-in-progress* 5)
+      (begin
+	(debug:print-info 0 *default-log-port* "Whoa there buddy, ease up...")
+	(thread-sleep! 1)))
+  (mutex-unlock! *http-mutex*))
+
+(define (http-transport:dec-requests-count proc) 
+  (mutex-lock! *http-mutex*)
+  (proc)
+  (set! *http-requests-in-progress* (- *http-requests-in-progress* 1))
+  (mutex-unlock! *http-mutex*))
+
+(define (http-transport:dec-requests-count-and-close-all-connections)
+  (set! *http-requests-in-progress* (- *http-requests-in-progress* 1))
+  (let loop ((etime (+ (current-seconds) 5))) ;; give up in five seconds
+    (if (> *http-requests-in-progress* 0)
+	(if (> etime (current-seconds))
+	    (begin
+	      (thread-sleep! 0.05)
+	      (loop etime))
+	    (debug:print-error 0 *default-log-port* "requests still in progress after 5 seconds of waiting. I'm going to pass on cleaning up http connections"))
+	(close-all-connections!)))
+  (set! *http-connections-next-cleanup* (+ (current-seconds) 10))
+  (mutex-unlock! *http-mutex*))
+
+(define (http-transport:inc-requests-and-prep-to-close-all-connections)
+  (mutex-lock! *http-mutex*)
+  (set! *http-requests-in-progress* (+ 1 *http-requests-in-progress*)))
+
+;; Send "cmd" with json payload "params" to serverdat and receive result
+;;
+(define (http-transport:client-api-send-receive run-id serverdat cmd params #!key (numretries 3)(area-dat #f))
+  (let* ((fullurl    (if (vector? serverdat)
+			 (http-transport:server-dat-get-api-req serverdat)
+			 (begin
+			   (debug:print 0 *default-log-port* "FATAL ERROR: http-transport:client-api-send-receive called with no server info")
+			   (exit 1))))
+	 (res        (vector #f "uninitialized"))
+	 (success    #t)
+	 (sparams    (db:obj->string params transport: 'http))
+	 (runremote  (or area-dat *runremote*)))
+       (debug:print-info 11 *default-log-port* "fullurl=" fullurl ", cmd=" cmd ", params=" params ", run-id=" run-id "\n")
+       ;; set up the http-client here
+       (max-retry-attempts 1)
+       ;; consider all requests indempotent
+       (retry-request? (lambda (request)
+			 #f))
+       ;; send the data and get the response
+       ;; extract the needed info from the http data and 
+       ;; process and return it.
+       (let* ((send-recieve (lambda ()
+			      (mutex-lock! *http-mutex*)
+			      ;; (condition-case (with-input-from-request "http://localhost"; #f read-lines)
+			      ;;					       ((exn http client-error) e (print e)))
+			      (set! res (vector                ;;; DON'T FORGET - THIS IS THE CLIENT SIDE! NOTE: consider moving this to client.scm since we are only supporting http transport at this time.
+					 success
+					 (db:string->obj 
+					  (handle-exceptions
+					      exn
+					      (let ((call-chain (get-call-chain))
+						    (msg        ((condition-property-accessor 'exn 'message) exn)))
+						(set! success #f)
+                                                (if (debug:debug-mode 1)
+                                                    (debug:print-info 0 *default-log-port* "couldn't talk to server, trying again ...")
+                                                    (begin
+                                                      (debug:print 0 *default-log-port* "WARNING: failure in with-input-from-request to " fullurl ".")
+                                                      (debug:print 0 *default-log-port* " message: " msg)
+                                                      (debug:print 0 *default-log-port* " cmd: " cmd " params: " params)
+                                                      (debug:print 0 *default-log-port* " call-chain: " call-chain)))
+                                                (if runremote
+						    (remote-conndat-set! runremote #f))
+						;; Killing associated server to allow clean retry.")
+						;; (tasks:kill-server-run-id run-id)  ;; better to kill the server in the logic that called this routine?
+						(mutex-unlock! *http-mutex*)
+					     ;;; (signal (make-composite-condition
+					     ;;;          (make-property-condition 'commfail 'message "failed to connect to server")))
+					     ;;; "communications failed"
+						(db:obj->string #f))
+					    (with-input-from-request ;; was dat
+					     fullurl 
+					     (list (cons 'key (or *server-id* "thekey"))
+						   (cons 'cmd cmd)
+						   (cons 'params sparams))
+					     read-string))
+					  transport: 'http)
+                                         0)) ;; added this speculatively
+			      ;; Shouldn't this be a call to the managed call-all-connections stuff above?
+			      (close-all-connections!)
+			      (mutex-unlock! *http-mutex*)
+			      ))
+	      (time-out     (lambda ()
+			      (thread-sleep! 45)
+			      #f))
+	      (th1 (make-thread send-recieve "with-input-from-request"))
+	      (th2 (make-thread time-out     "time out")))
+	 (thread-start! th1)
+	 (thread-start! th2)
+	 (thread-join! th1)
+	 (thread-terminate! th2)
+	 (debug:print-info 11 *default-log-port* "got res=" res)
+	 (if (vector? res)
+	     (if (vector-ref res 0) ;; this is the first flag or the second flag?
+		 res ;; this is the *inner* vector? seriously? why?
+                 (if (debug:debug-mode 11)
+                     (let ((call-chain (get-call-chain))) ;; note: this code also called in nmsg-transport - consider consolidating it
+                       (print-call-chain (current-error-port))
+                       (debug:print-error 11 *default-log-port* "error above occured at server, res=" res) ;; " message: " ((condition-property-accessor 'exn 'message) exn))
+                       (debug:print 11 *default-log-port* " server call chain:")
+                       (pp (vector-ref res 1) (current-error-port))
+                       (signal (vector-ref res 0)))
+                     res))
+	     (signal (make-composite-condition
+		      (make-property-condition 
+		       'timeout
+		       'message "nmsg-transport:client-api-send-receive-raw timed out talking to server")))))))
+
+;; careful closing of connections stored in *runremote*
+;;
+(define (http-transport:close-connections #!key (area-dat #f))
+  (let* ((runremote  (or area-dat *runremote*))
+	 (server-dat (if runremote
+                         (remote-conndat runremote)
+                         #f))) ;; (hash-table-ref/default *runremote* run-id #f)))
+    (if (vector? server-dat)
+	(let ((api-dat (http-transport:server-dat-get-api-uri server-dat)))
+	  (handle-exceptions
+	    exn
+	    (begin
+	      (print-call-chain *default-log-port*)
+	      (debug:print-error 0 *default-log-port* " closing connection failed with error: " ((condition-property-accessor 'exn 'message) exn)))
+	    (close-connection! api-dat)
+            ;;(close-idle-connections!)
+	    #t))
+	#f)))
+
+;; Moved to commonmod
+;;
+;; (define (make-http-transport:server-dat)(make-vector 6))
+;; (define (http-transport:server-dat-get-iface         vec)    (vector-ref  vec 0))
+;; (define (http-transport:server-dat-get-port          vec)    (vector-ref  vec 1))
+;; (define (http-transport:server-dat-get-api-uri       vec)    (vector-ref  vec 2))
+;; (define (http-transport:server-dat-get-api-url       vec)    (vector-ref  vec 3))
+;; (define (http-transport:server-dat-get-api-req       vec)    (vector-ref  vec 4))
+;; (define (http-transport:server-dat-get-last-access   vec)    (vector-ref  vec 5))
+;; (define (http-transport:server-dat-get-socket        vec)    (vector-ref  vec 6))
+;; 
+;; (define (http-transport:server-dat-make-url vec)
+;;   (if (and (http-transport:server-dat-get-iface vec)
+;; 	   (http-transport:server-dat-get-port  vec))
+;;       (conc "http://" 
+;; 	    (http-transport:server-dat-get-iface vec)
+;; 	    ":"
+;; 	    (http-transport:server-dat-get-port  vec))
+;;       #f))
+;; 
+;; (define (http-transport:server-dat-update-last-access vec)
+;;   (if (vector? vec)
+;;       (vector-set! vec 5 (current-seconds))
+;;       (begin
+;; 	(print-call-chain (current-error-port))
+;; 	(debug:print-error 0 *default-log-port* "call to http-transport:server-dat-update-last-access with non-vector!!"))))
+
+;;
+;; connect
+;;
+(define (http-transport:client-connect iface port)
+  (let* ((api-url      (conc "http://" iface ":" port "/api"))
+	 (api-uri      (uri-reference (conc "http://" iface ":" port "/api")))
+	 (api-req      (make-request method: 'POST uri: api-uri))
+	 (server-dat   (vector iface port api-uri api-url api-req (current-seconds))))
+    server-dat))
+
+;; run http-transport:keep-running in a parallel thread to monitor that the db is being 
+;; used and to shutdown after sometime if it is not.
+;;
+(define (http-transport:keep-running) 
+  ;; if none running or if > 20 seconds since 
+  ;; server last used then start shutdown
+  ;; This thread waits for the server to come alive
+  (debug:print-info 0 *default-log-port* "Starting the sync-back, keep alive thread in server")
+  (let* ((sdat              #f)
+	 (tmp-area          (common:get-db-tmp-area))
+	 (started-file      (conc tmp-area "/.server-started"))
+	 (server-start-time (current-seconds))
+	 (server-info (let loop ((start-time (current-seconds))
+				 (changed    #t)
+				 (last-sdat  "not this"))
+                        (begin ;; let ((sdat #f))
+			  (thread-sleep! 0.01)
+			  (debug:print-info 0 *default-log-port* "Waiting for server alive signature")
+                          (mutex-lock! *heartbeat-mutex*)
+                          (set! sdat *server-info*)
+                          (mutex-unlock! *heartbeat-mutex*)
+                          (if (and sdat
+				   (not changed)
+				   (> (- (current-seconds) start-time) 2))
+			      (begin
+				(debug:print-info 0 *default-log-port* "Received server alive signature")
+                                (common:save-pkt `((action . alive)
+                                                   (T      . server)
+                                                   (pid    . ,(current-process-id))
+                                                   (ipaddr . ,(car sdat))
+                                                   (port   . ,(cadr sdat)))
+                                                 *configdat* #t)
+				sdat)
+                              (begin
+				(debug:print-info 0 *default-log-port* "Still waiting, last-sdat=" last-sdat)
+                                (sleep 4)
+				(if (> (- (current-seconds) start-time) 120) ;; been waiting for two minutes
+				    (begin
+				      (debug:print-error 0 *default-log-port* "transport appears to have died, exiting server")
+                                      (common:save-pkt `((action . died)
+                                                         (T      . server)
+                                                         (pid    . ,(current-process-id))
+                                                         (ipaddr . ,(car sdat))
+                                                         (port   . ,(cadr sdat))
+                                                         (msg    . "Transport died?"))
+						       *configdat* #t)
+				      (exit))
+				    (loop start-time
+					  (equal? sdat last-sdat)
+					  sdat)))))))
+	 (iface       (car server-info))
+         (port        (cadr server-info))
+         (last-access 0)
+	 (server-timeout (server:expiration-timeout))
+	 (server-going  #f)
+	 (server-log-file (args:get-arg "-log"))) ;; always set when we are a server
+
+    (with-output-to-file started-file (lambda ()(print (current-process-id))))
+
+    (let loop ((count         0)
+	       (server-state 'available)
+	       (bad-sync-count 0)
+	       (start-time     (current-milliseconds)))
+      ;; Use this opportunity to sync the tmp db to megatest.db
+      (if (not server-going) ;; *dbstruct-db* 
+	  (begin
+	    (debug:print 0 *default-log-port* "SERVER: dbprep")
+	    (set! *dbstruct-db*  (db:setup #t)) ;;  run-id))
+	    (set! server-going #t)
+	    (debug:print 0 *default-log-port* "SERVER: running, megatest version: " (common:get-full-version)) ;; NOTE: the server is NOT yet marked as running in the log. We do that in the keep-running routine.
+	    (thread-start! *watchdog*)))
+      
+      ;; when things go wrong we don't want to be doing the various queries too often
+      ;; so we strive to run this stuff only every four seconds or so.
+      (let* ((sync-time (- (current-milliseconds) start-time))
+	    (rem-time  (quotient (- 4000 sync-time) 1000)))
+	(if (and (<= rem-time 4)
+		 (>  rem-time 0))
+	    (thread-sleep! rem-time)))
+      
+      (if (< count 1) ;; 3x3 = 9 secs aprox
+	  (loop (+ count 1) 'running bad-sync-count (current-milliseconds)))
+      
+      ;; Check that iface and port have not changed (can happen if server port collides)
+      (mutex-lock! *heartbeat-mutex*)
+      (set! sdat *server-info*)
+      (mutex-unlock! *heartbeat-mutex*)
+      
+      (if (not (equal? sdat (list iface port)))
+	  (let ((new-iface (car sdat))
+		(new-port  (cadr sdat)))
+	    (debug:print-info 0 *default-log-port* "WARNING: interface changed, refreshing iface and port info")
+	    (set! iface new-iface)
+	    (set! port  new-port)
+	    (debug:print 0 *default-log-port* "SERVER STARTED: " iface ":" port " AT " (current-seconds))
+	    (flush-output *default-log-port*)))
+      
+      ;; Transfer *db-last-access* to last-access to use in checking that we are still alive
+      (mutex-lock! *heartbeat-mutex*)
+      (set! last-access *db-last-access*)
+      (mutex-unlock! *heartbeat-mutex*)
+      
+      (if (common:low-noise-print 120 (conc "server running on " iface ":" port))
+	  (begin
+	    (debug:print 0 *default-log-port* "SERVER STARTED: " iface ":" port " AT " (current-seconds))
+	    (flush-output *default-log-port*)))
+      (if (common:low-noise-print 60 "dbstats")
+	  (begin
+	    (debug:print 0 *default-log-port* "Server stats:")
+	    (db:print-current-query-stats)))
+      (let* ((hrs-since-start  (/ (- (current-seconds) server-start-time) 3600)))
+	(cond
+         ((and *server-run*
+	       (> (+ last-access server-timeout)
+		  (current-seconds)))
+          (if (common:low-noise-print 120 "server continuing")
+              (debug:print-info 0 *default-log-port* "Server continuing, seconds since last db access: " (- (current-seconds) last-access))
+	      (let ((curr-time (current-seconds)))
+		(handle-exceptions
+		    exn
+		    (debug:print 0 *default-log-port* "ERROR: Failed to change timestamp on log file " server-log-file ". Are you out of space on that disk?")
+		  (if (not *server-overloaded*)
+		      (change-file-times server-log-file curr-time curr-time)))))
+          (loop 0 server-state bad-sync-count (current-milliseconds)))
+         (else
+          (debug:print-info 0 *default-log-port* "Server timed out. seconds since last db access: " (- (current-seconds) last-access))
+          (http-transport:server-shutdown port)))))))
+
+(define (http-transport:server-shutdown port)
+  (begin
+    ;;(BB> "http-transport:server-shutdown called")
+    (debug:print-info 0 *default-log-port* "Starting to shutdown the server. pid="(current-process-id))
+    ;;
+    ;; start_shutdown
+    ;;
+    (set! *time-to-exit* #t) ;; tell on-exit to be fast as we've already cleaned up
+    (portlogger:open-run-close portlogger:set-port port "released")
+    (thread-sleep! 1)
+
+    ;; (debug:print-info 0 *default-log-port* "Max cached queries was    " *max-cache-size*)
+    ;; (debug:print-info 0 *default-log-port* "Number of cached writes   " *number-of-writes*)
+    ;; (debug:print-info 0 *default-log-port* "Average cached write time "
+    ;; 		      (if (eq? *number-of-writes* 0)
+    ;; 			  "n/a (no writes)"
+    ;; 			  (/ *writes-total-delay*
+    ;; 			     *number-of-writes*))
+    ;; 		      " ms")
+    ;; (debug:print-info 0 *default-log-port* "Number non-cached queries "  *number-non-write-queries*)
+    ;; (debug:print-info 0 *default-log-port* "Average non-cached time   "
+    ;; 		      (if (eq? *number-non-write-queries* 0)
+    ;; 			  "n/a (no queries)"
+    ;; 			  (/ *total-non-write-delay* 
+    ;; 			     *number-non-write-queries*))
+    ;; 		      " ms")
+    
+    (db:print-current-query-stats)
+    (common:save-pkt `((action . exit)
+                       (T      . server)
+                       (pid    . ,(current-process-id)))
+                     *configdat* #t)
+    (debug:print-info 0 *default-log-port* "Server shutdown complete. Exiting")
+    (exit)))
+
+;; all routes though here end in exit ...
+;;
+;; start_server? 
+;;
+(define (http-transport:launch)
+  ;; check that a server start is in progress, pause or exit if so
+  (let* ((tmp-area            (common:get-db-tmp-area))
+	 (server-start        (conc tmp-area "/.server-start"))
+	 (server-started      (conc tmp-area "/.server-started"))
+	 (start-time          (common:lazy-modification-time server-start))
+	 (started-time        (common:lazy-modification-time server-started))
+	 (server-starting     (< start-time started-time)) ;; if start-time is less than started-time then a server is still starting
+	 (start-time-old      (> (- (current-seconds) start-time) 5))
+         (cleanup-proc        (lambda (msg)
+                                (let* ((serv-fname      (conc "server-" (current-process-id) "-" (get-host-name) ".log"))
+                                       (full-serv-fname (conc *toppath* "/logs/" serv-fname))
+                                       (new-serv-fname  (conc *toppath* "/logs/" "defunct-" serv-fname)))
+                                  (debug:print 0 *default-log-port* msg)
+                                  (if (common:file-exists? full-serv-fname)
+                                      (system (conc "sleep 1;mv -f " full-serv-fname " " new-serv-fname))
+                                      (debug:print 0 *default-log-port* "INFO: cannot move " full-serv-fname " to " new-serv-fname))
+                                  (exit)))))
+    (if (and (not start-time-old) ;; last server start try was less than five seconds ago
+	     (not server-starting))
+	(begin
+	  (cleanup-proc "NOT starting server, there is either a recently started server or a server in process of starting")
+	  (exit)))
+    ;; lets not even bother to start if there are already three or more server files ready to go
+    (let* ((num-alive   (server:get-num-alive (server:get-list *toppath*))))
+      (if (> num-alive 3)
+          (begin
+            (cleanup-proc (conc "ERROR: Aborting server start because there are already " num-alive " possible servers either running or starting up"))
+            (exit))))
+  (common:save-pkt `((action . start)
+		     (T      . server)
+		     (pid    . ,(current-process-id)))
+		   *configdat* #t)
+    (let* ((th2 (make-thread (lambda ()
+                               (debug:print-info 0 *default-log-port* "Server run thread started")
+                               (http-transport:run 
+                                (if (args:get-arg "-server")
+                                    (args:get-arg "-server")
+                                    "-")
+                                )) "Server run"))
+           (th3 (make-thread (lambda ()
+                               (debug:print-info 0 *default-log-port* "Server monitor thread started")
+                               (http-transport:keep-running)
+                               "Keep running"))))
+      (thread-start! th2)
+      (thread-sleep! 0.25) ;; give the server time to settle before starting the keep-running monitor.
+      (thread-start! th3)
+      (set! *didsomething* #t)
+      (thread-join! th2)
+      (exit))))
+
+;; (define (http-transport:server-signal-handler signum)
+;;   (signal-mask! signum)
+;;   (handle-exceptions
+;;    exn
+;;    (debug:print 0 *default-log-port* " ... exiting ...")
+;;    (let ((th1 (make-thread (lambda ()
+;; 			     (thread-sleep! 1))
+;; 			   "eat response"))
+;; 	 (th2 (make-thread (lambda ()
+;; 			     (debug:print-error 0 *default-log-port* "Received ^C, attempting clean exit. Please be patient and wait a few seconds before hitting ^C again.")
+;; 			     (thread-sleep! 3) ;; give the flush three seconds to do it's stuff
+;; 			     (debug:print 0 *default-log-port* "       Done.")
+;; 			     (exit 4))
+;; 			   "exit on ^C timer")))
+;;      (thread-start! th2)
+;;      (thread-start! th1)
+;;      (thread-join! th2))))
+
+;;===============================================
+;; Java script
+;;===============================================
+(define (http-transport:show-jquery)
+  (let* ((data  (tests:readlines *java-script-lib*)))
+(string-join data "\n")))
+
+
+
+;;======================================================================
+;; web pages
+;;======================================================================
+
+#;(define (http-transport:html-test-log $)
+   (let* ((run-id ($ 'runid))
+         (test-item ($ 'testname))
+         (parts (string-split test-item ":"))
+         (test-name (car parts))
+             
+         (item-name (if (equal? (length parts) 1)
+             ""
+             (cadr parts))))
+  ;(print $) 
+     (tests:get-test-log run-id test-name item-name)))
+
+
+#;(define (http-transport:html-dboard $)
+  (let* ((page ($ 'page))
+         (oup       (open-output-string)) 
+         (bdy "--------------------------")
+
+         (ret  (tests:dynamic-dboard page)))
+    (s:output-new  oup  ret)
+   (close-output-port oup)
+
+  (set! bdy   (get-output-string oup))
+     (conc "<h1>Dashboard</h1>" bdy "<br/> <br/> "  )))
+
+(define (http-transport:main-page)
+  (let ((linkpath (root-path)))
+    (conc "<head><h1>" (pathname-strip-directory *toppath*) "</h1></head>"
+	  "<body>"
+	  "Run area: " *toppath*
+	  "<h2>Server Stats</h2>"
+	  (http-transport:stats-table) 
+	  "<hr>"
+	  (http-transport:runs linkpath)
+	  "<hr>"
+	  ;; (http-transport:run-stats)
+	  "</body>"
+	  )))
+
+(define (http-transport:stats-table)
+  (mutex-lock! *heartbeat-mutex*)
+  (let ((res 
+	 (conc "<table>"
+	       ;; "<tr><td>Max cached queries</td>        <td>" *max-cache-size* "</td></tr>"
+	       "<tr><td>Number of cached writes</td>   <td>" *number-of-writes* "</td></tr>"
+	       "<tr><td>Average cached write time</td> <td>" (if (eq? *number-of-writes* 0)
+								 "n/a (no writes)"
+								 (/ *writes-total-delay*
+								    *number-of-writes*))
+	       " ms</td></tr>"
+	       "<tr><td>Number non-cached queries</td> <td>"  *number-non-write-queries* "</td></tr>"
+	       ;; "<tr><td>Average non-cached time</td>   <td>" (if (eq? *number-non-write-queries* 0)
+	       ;; 							 "n/a (no queries)"
+	       ;; 							 (/ *total-non-write-delay* 
+	       ;; 							    *number-non-write-queries*))
+	       " ms</td></tr>"
+	       "<tr><td>Last access</td><td>"              (seconds->time-string *db-last-access*) "</td></tr>"
+	       "</table>")))
+    (mutex-unlock! *heartbeat-mutex*)
+    res))
+
+(define (http-transport:runs linkpath)
+  (conc "<h3>Runs</h3>"
+	(string-intersperse
+	 (let ((files (map pathname-strip-directory (glob (conc linkpath "/*")))))
+	   (map (lambda (p)
+		  (conc "<a href=\"" p "\">" p "</a><br>"))
+		files))
+	 " ")))
+
+#;(define (http-transport:run-stats)
+  (let ((stats (open-run-close db:get-running-stats #f)))
+    (conc "<table>"
+	  (string-intersperse
+	   (map (lambda (stat)
+		  (conc "<tr><td>" (car stat) "</td><td>" (cadr stat) "</td></tr>"))
+		stats)
+	   " ")
+	  "</table>")))
+)

Index: launch-inc.scm
==================================================================
--- launch-inc.scm
+++ launch-inc.scm
@@ -14,1699 +14,5 @@
 ;;     GNU General Public License for more details.
 ;; 
 ;;     You should have received a copy of the GNU General Public License
 ;;     along with Megatest.  If not, see <http://www.gnu.org/licenses/>.
 
-;;======================================================================
-;; launch a task - this runs on the originating host, tests themselves
-;;
-;;======================================================================
-
-
-;;======================================================================
-;; ezsteps
-;;======================================================================
-
-;; ezsteps were going to be coded as
-;; stepname[,predstep1,predstep2 ...] [{VAR1=first,second,third}] command to execute
-;;   BUT
-;; now are
-;; stepname {VAR=first,second,third ...} command ...
-;; where the {VAR=first,second,third ...} is optional.
-
-;; given an exit code and whether or not logpro was used calculate OK/BAD
-;; return #t if we are ok, #f otherwise
-(define (steprun-good? logpro exitcode)
-  (or (eq? exitcode 0)
-      (and logpro (eq? exitcode 2))))
-
-;; if handed a string, process it, else look for MT_CMDINFO
-(define (launch:get-cmdinfo-assoc-list #!key (encoded-cmd #f))
-  (let ((enccmd (if encoded-cmd encoded-cmd (getenv "MT_CMDINFO"))))
-    (if enccmd
-	(common:read-encoded-string enccmd)
-	'())))
-
-;;                       0           1              2              3
-(defstruct launch:einf (pid #t)(exit-status #t)(exit-code #t)(rollup-status 0))
-
-;; return (conc status ": " comment) from the final section so that
-;;   the comment can be set in the step record in launch.scm
-;;
-(define (launch:load-logpro-dat run-id test-id stepname)
-  (let ((cname (conc stepname ".dat")))
-    (if (common:file-exists? cname)
-	(let* ((dat  (read-config cname #f #f))
-	       (csvr (db:logpro-dat->csv dat stepname))
-	       (csvt (let-values (((fmt-cell fmt-record fmt-csv) (make-format ",")))
-		       (fmt-csv (map list->csv-record csvr))))
-	       (status (configf:lookup dat "final" "exit-status"))
-	       (msg     (configf:lookup dat "final" "message")))
-          (if csvt  ;; this if blocked stack dump caused by .dat file from logpro being 0-byte.  fixed by upgrading logpro
-              (rmt:csv->test-data run-id test-id csvt)
-	      (debug:print 0 *default-log-port* "ERROR: no csvdat exists for run-id: " run-id " test-id: " test-id " stepname: " stepname ", check that logpro version is 1.15 or newer"))
-	  ;;  (debug:print-info 13 *default-log-port* "Error: run-id/test-id/stepname="run-id"/"test-id"/"stepname" => bad csvr="csvr)
-	  ;;  )
-	  (cond
-	   ((equal? status "PASS") "PASS") ;; skip the message part if status is pass
-	   (status (conc (configf:lookup dat "final" "exit-status") ": " (if msg msg "no message")))
-	   (else #f)))
-	#f)))
-
-(define (launch:runstep ezstep run-id test-id exit-info m tal testconfig) ;;; TODO: deprecate me in favor of ezsteps.scm
-  (let* ((stepname       (car ezstep))  ;; do stuff to run the step
-	 (stepinfo       (cadr ezstep))
-	;; (let ((info (cadr ezstep)))
-	;; 		   (if (proc? info) "" info)))
-	;; (stepproc       (let ((info (cadr ezstep)))
-	;; 		   (if (proc? info) info #f)))
-	 (stepparts      (string-match (regexp "^(\\{([^\\}\\{]*)\\}\\s*|)(.*)$") stepinfo))
-	 (stepparams     (list-ref stepparts 2)) ;; for future use, {VAR=1,2,3}, run step for each
-	 (paramparts     (if (string? stepparams)
-			     (map (lambda (x)(string-split x "=")) (string-split-fields "[^;]*=[^;]*" stepparams))
-			     '()))
-	 (subrun         (alist-ref "subrun" paramparts equal?))
-	 (stepcmd        (list-ref stepparts 3))
-	 (script         "") ; "#!/bin/bash\n") ;; yep, we depend on bin/bash FIXME!!!\
-	 (logpro-file    (conc stepname ".logpro"))
-	 (html-file      (conc stepname ".html"))
-	 (dat-file       (conc stepname ".dat"))
-	 (tconfig-logpro (configf:lookup testconfig "logpro" stepname))
-	 (logpro-used    (common:file-exists? logpro-file)))
-
-    (debug:print 0 *default-log-port* "stepparts: " stepparts ", stepparams: " stepparams
-                 ", paramparts: " paramparts ", subrun: " subrun ", stepcmd: " stepcmd)
-    
-    (if (and tconfig-logpro
-	     (not logpro-used)) ;; no logpro file found but have a defn in the testconfig
-	(begin
-	  (with-output-to-file logpro-file
-	    (lambda ()
-	      (print ";; logpro file extracted from testconfig\n"
-		     ";;")
-	      (print tconfig-logpro)))
-	  (set! logpro-used #t)))
-    
-    ;; NB// can safely assume we are in test-area directory
-    (debug:print 4 *default-log-port* "ezsteps:\n stepname: " stepname " stepinfo: " stepinfo " stepparts: " stepparts
-		 " stepparams: " stepparams " stepcmd: " stepcmd)
-    
-    ;; ;; first source the previous environment
-    ;; (let ((prev-env (conc ".ezsteps/" prevstep (if (string-search (regexp "csh") 
-    ;;      							 (get-environment-variable "SHELL")) ".csh" ".sh"))))
-    ;;   (if (and prevstep (common:file-exists? prev-env))
-    ;;       (set! script (conc script "source " prev-env))))
-    
-    ;; call the command using mt_ezstep
-    ;; (set! script (conc "mt_ezstep " stepname " " (if prevstep prevstep "x") " " stepcmd))
-    
-    (debug:print 4 *default-log-port* "script: " script)
-    (rmt:teststep-set-status! run-id test-id stepname "start" "-" #f #f)
-    ;; now launch the actual process
-    (call-with-environment-variables 
-     (list (cons "PATH" (conc (get-environment-variable "PATH") ":.")))
-     (lambda () ;; (process-run "/bin/bash" "-c" "exec ls -l /tmp/foobar > /tmp/delme-more.log 2>&1")
-       (let* ((cmd (conc stepcmd " > " stepname ".log 2>&1")) ;; >outfile 2>&1 
-	      (pid #f))
-	 (let ((proc (lambda ()
-		       (set! pid (process-run "/bin/bash" (list "-c" cmd))))))
-	   (if subrun
-               (begin
-                 (debug:print-info 0 *default-log-port* "Running without MT_.* environment variables.")
-                 (common:without-vars proc "^MT_.*"))
-	       (proc)))
-	 
-         (with-output-to-file "Makefile.ezsteps"
-           (lambda ()
-             (print stepname ".log :")
-             (print "\t" cmd)
-             (if (common:file-exists? (conc stepname ".logpro"))
-                 (print "\tlogpro " stepname ".logpro " stepname ".html < " stepname ".log"))
-             (print)
-             (print stepname " : " stepname ".log")
-             (print))
-           #:append)
-
-	 (rmt:test-set-top-process-pid run-id test-id pid)
-	 (let processloop ((i 0))
-	   (let-values (((pid-val exit-status exit-code)(process-wait pid #t)))
-		       (mutex-lock! m)
-		       (launch:einf-pid-set!         exit-info pid)         ;; (vector-set! exit-info 0 pid)
-		       (launch:einf-exit-status-set! exit-info exit-status) ;; (vector-set! exit-info 1 exit-status)
-		       (launch:einf-exit-code-set!   exit-info exit-code)   ;; (vector-set! exit-info 2 exit-code)
-		       (mutex-unlock! m)
-		       (if (eq? pid-val 0)
-			   (begin
-			     (thread-sleep! 2)
-			     (processloop (+ i 1))))
-		       )))))
-    (debug:print-info 0 *default-log-port* "step " stepname " completed with exit code " (launch:einf-exit-code exit-info)) ;; (vector-ref exit-info 2))
-    ;; now run logpro if needed
-    (if logpro-used
-	(let* ((logpro-exe (or (getenv "LOGPRO_EXE") "logpro"))
-               (pid        (process-run (conc "/bin/sh -c '"logpro-exe" "logpro-file " " (conc stepname ".html") " < " stepname ".log > /dev/null'"))))
-	  (let processloop ((i 0))
-	    (let-values (((pid-val exit-status exit-code)(process-wait pid #t)))
-			(mutex-lock! m)
-			;; (make-launch:einf pid: pid exit-status: exit-status exit-code: exit-code)
-			(launch:einf-pid-set!         exit-info pid)         ;; (vector-set! exit-info 0 pid)
-			(launch:einf-exit-status-set! exit-info exit-status) ;; (vector-set! exit-info 1 exit-status)
-			(launch:einf-exit-code-set!   exit-info exit-code)   ;; (vector-set! exit-info 2 exit-code)
-			(mutex-unlock! m)
-			(if (eq? pid-val 0)
-			    (begin
-			      (thread-sleep! 2)
-			      (processloop (+ i 1)))))
-	    (debug:print-info 0 *default-log-port* "logpro for step " stepname " exited with code " (launch:einf-exit-code exit-info))))) ;; (vector-ref exit-info 2)))))
-    
-    (let ((exinfo (launch:einf-exit-code exit-info)) ;; (vector-ref exit-info 2))
-	  (logfna (if logpro-used (conc stepname ".html") ""))
-	  (comment #f))
-      (if logpro-used
-	  (let ((datfile (conc stepname ".dat")))
-	    ;; load the .dat file into the test_data table if it exists
-	    (if (common:file-exists? datfile)
-		(set! comment (launch:load-logpro-dat run-id test-id stepname)))
-	    (rmt:test-set-log! run-id test-id (conc stepname ".html"))))
-      (rmt:teststep-set-status! run-id test-id stepname "end" exinfo comment logfna))
-    ;; set the test final status
-    (let* ((process-exit-status (launch:einf-exit-code exit-info)) ;; (vector-ref exit-info 2))
-	   (this-step-status (cond
-			      ((and (eq? process-exit-status 2) logpro-used) 'warn)   ;; logpro 2 = warnings
-			      ((and (eq? process-exit-status 3) logpro-used) 'check)  ;; logpro 3 = check
-			      ((and (eq? process-exit-status 4) logpro-used) 'waived) ;; logpro 4 = waived
-			      ((and (eq? process-exit-status 5) logpro-used) 'abort)  ;; logpro 5 = abort
-			      ((and (eq? process-exit-status 6) logpro-used) 'skip)   ;; logpro 6 = skip
-			      ((eq? process-exit-status 0)                   'pass)   ;; logpro 0 = pass
-			      (else 'fail)))
-	   (overall-status   (cond
-			      ((eq? (launch:einf-rollup-status exit-info) 2) 'warn) ;; rollup-status (vector-ref exit-info 3)
-			      ((eq? (launch:einf-rollup-status exit-info) 0) 'pass) ;; (vector-ref exit-info 3)
-			      (else 'fail)))
-	   (next-status      (cond 
-			      ((eq? overall-status 'pass) this-step-status)
-			      ((eq? overall-status 'warn)
-			       (if (eq? this-step-status 'fail) 'fail 'warn))
-			      ((eq? overall-status 'abort) 'abort)
-			      (else 'fail)))
-	   (next-state       ;; "RUNNING") ;; WHY WAS THIS CHANGED TO NOT USE (null? tal) ??
-	    (cond
-	     ((null? tal) ;; more to run?
-	      "COMPLETED")
-	     (else "RUNNING"))))
-      (debug:print 4 *default-log-port* "Exit value received: " (launch:einf-exit-code exit-info) " logpro-used: " logpro-used 
-		   " this-step-status: " this-step-status " overall-status: " overall-status 
-		   " next-status: " next-status " rollup-status: "  (launch:einf-rollup-status exit-info)) ;; (vector-ref exit-info 3))
-      (case next-status
-	((warn)
-	 (launch:einf-rollup-status-set! exit-info 2) ;; (vector-set! exit-info 3 2) ;; rollup-status
-	 ;; NB// test-set-status! does rdb calls under the hood
-	 (tests:test-set-status! run-id test-id next-state "WARN" 
-				 (if (eq? this-step-status 'warn) "Logpro warning found" #f)
-				 #f))
-	((check)
-	 (launch:einf-rollup-status-set! exit-info 3) ;; (vector-set! exit-info 3 3) ;; rollup-status
-	 ;; NB// test-set-status! does rdb calls under the hood
-	 (tests:test-set-status! run-id test-id next-state "CHECK" 
-				 (if (eq? this-step-status 'check) "Logpro check found" #f)
-				 #f))
-	((waived)
-	 (launch:einf-rollup-status-set! exit-info 4) ;; (vector-set! exit-info 3 3) ;; rollup-status
-	 ;; NB// test-set-status! does rdb calls under the hood
-	 (tests:test-set-status! run-id test-id next-state "WAIVED" 
-				 (if (eq? this-step-status 'check) "Logpro waived found" #f)
-				 #f))
-	((abort)
-	 (launch:einf-rollup-status-set! exit-info 5) ;; (vector-set! exit-info 3 4) ;; rollup-status
-	 ;; NB// test-set-status! does rdb calls under the hood
-	 (tests:test-set-status! run-id test-id next-state "ABORT" 
-				 (if (eq? this-step-status 'abort) "Logpro abort found" #f)
-				 #f))
-	((skip)
-	 (launch:einf-rollup-status-set! exit-info 6) ;; (vector-set! exit-info 3 4) ;; rollup-status
-	 ;; NB// test-set-status! does rdb calls under the hood
-	 (tests:test-set-status! run-id test-id next-state "SKIP" 
-				 (if (eq? this-step-status 'skip) "Logpro skip found" #f)
-				 #f))
-	((pass)
-	 (tests:test-set-status! run-id test-id next-state "PASS" #f #f))
-	(else ;; 'fail
-	 (launch:einf-rollup-status-set! exit-info 1) ;; (vector-set! exit-info 3 1) ;; force fail, this used to be next-state but that doesn't make sense. should always be "COMPLETED" 
-	 (tests:test-set-status! run-id test-id "COMPLETED" "FAIL" (conc "Failed at step " stepname) #f)
-	 )))
-    logpro-used))
-
-(define (launch:manage-steps run-id test-id item-path fullrunscript ezsteps subrun test-name tconfigreg exit-info m)
-  ;; (let-values
-  ;;  (((pid exit-status exit-code)
-  ;;    (run-n-wait fullrunscript)))
-  ;; (tests:test-set-status! test-id "RUNNING" "n/a" #f #f)
-  ;; Since we should have a clean slate at this time there is no need to do 
-  ;; any of the other stuff that tests:test-set-status! does. Let's just 
-  ;; force RUNNING/n/a
-
-  ;; (thread-sleep! 0.3)
-  ;; (tests:test-force-state-status! run-id test-id "RUNNING" "n/a")
-  (rmt:set-state-status-and-roll-up-items run-id test-name item-path "RUNNING" #f #f) 
-  ;; (thread-sleep! 0.3) ;; NFS slowness has caused grief here
-
-  ;; if there is a runscript do it first
-  (if fullrunscript
-      (let ((pid (process-run fullrunscript)))
-	(rmt:test-set-top-process-pid run-id test-id pid)
-	(let loop ((i 0))
-	  (let-values
-	   (((pid-val exit-status exit-code) (process-wait pid #t)))
-	   (mutex-lock! m)
-	   (launch:einf-pid-set!           exit-info  pid)         ;; (vector-set! exit-info 0 pid)
-	   (launch:einf-exit-status-set!   exit-info  exit-status) ;; (vector-set! exit-info 1 exit-status)
-	   (launch:einf-exit-code-set!     exit-info  exit-code)   ;; (vector-set! exit-info 2 exit-code)
-	   (launch:einf-rollup-status-set! exit-info  exit-code)   ;; (vector-set! exit-info 3 exit-code)  ;; rollup status
-	   (mutex-unlock! m)
-	   (if (eq? pid-val 0)
-	       (begin
-		 (thread-sleep! 2)
-		 (loop (+ i 1)))
-	       )))))
-  ;; then, if runscript ran ok (or did not get called)
-  ;; do all the ezsteps (if any)
-  (if (or ezsteps subrun)
-      (let* ((test-run-dir (tests:get-test-path-from-environment))
-             (testconfig ;; (read-config (conc work-area "/testconfig") #f #t environ-patt: "pre-launch-env-vars")) ;; FIXME??? is allow-system ok here?
-	      ;; NOTE: it is tempting to turn off force-create of testconfig but dynamic
-	      ;;       ezstep names need a full re-eval here.
-	      (tests:get-testconfig test-name item-path tconfigreg #t force-create: #t)) ;; 'return-procs)))
-	     (ezstepslst (if (hash-table? testconfig)
-			     (hash-table-ref/default testconfig "ezsteps" '())
-			     #f)))
-	(if testconfig
-	    (hash-table-set! *testconfigs* test-name testconfig) ;; cached for lazy reads later ...
-	    (begin
-	      (launch:setup)
-	      (debug:print 0 *default-log-port* "WARNING: no testconfig found for " test-name " in search path:\n  "
-			   (string-intersperse (tests:get-tests-search-path *configdat*) "\n  "))))
-	;; after all that, still no testconfig? Time to abort
-	(if (not testconfig)
-	    (begin
-	      (debug:print-error 0 *default-log-port* "Failed to resolve megatest.config, runconfigs.config and testconfig issues. Giving up now")
-	      (exit 1)))
-
-	;; create a proc for the subrun if requested, save that proc in the ezsteps table as the last entry
-	;; 1. get section [runarun]
-	;; 2. unset MT_* vars
-	;; 3. fix target
-	;; 4. fix runname
-	;; 5. fix testpatt or calculate it from contour
-	;; 6. launch the run
-	;; 7. roll up the run result and or roll up the logpro processed result
-	(when (configf:lookup testconfig "subrun" "runwait") ;; we use runwait as the flag that a subrun is requested
-            (subrun:initialize-toprun-test testconfig test-run-dir)
-	    (let* ((mt-cmd (subrun:launch-cmd test-run-dir)))
-              (debug:print-info 0 *default-log-port* "Subrun command is \"" mt-cmd "\"")
-              (set! ezsteps #t) ;; set the needed flag
-	      (set! ezstepslst
-                    (append (or ezstepslst '())
-                            (list (list "subrun" (conc "{subrun=true} " mt-cmd)))))))
-
-	;; process the ezsteps
-	(if ezsteps
-	    (begin
-	      (if (not (common:file-exists? ".ezsteps"))(create-directory ".ezsteps"))
-	      ;; if ezsteps was defined then we are sure to have at least one step but check anyway
-	      (if (not (> (length ezstepslst) 0))
-		  (debug:print-error 0 *default-log-port* "ezsteps defined but ezstepslst is zero length")
-		  (let loop ((ezstep (car ezstepslst))
-			     (tal    (cdr ezstepslst))
-			     (prevstep #f))
-                    (debug:print-info 0 *default-log-port* "Processing ezstep \"" (string-intersperse ezstep " ") "\"")
-		    ;; check exit-info (vector-ref exit-info 1)
-		    (if (launch:einf-exit-status exit-info) ;; (vector-ref exit-info 1)
-			(let ((logpro-used (launch:runstep ezstep run-id test-id exit-info m tal testconfig))
-			      (stepname    (car ezstep)))
-			  ;; if logpro-used read in the stepname.dat file
-			  (if (and logpro-used (common:file-exists? (conc stepname ".dat")))
-			      (launch:load-logpro-dat run-id test-id stepname))
-			  (if (steprun-good? logpro-used (launch:einf-exit-code exit-info))
-			      (if (not (null? tal))
-				  (loop (car tal) (cdr tal) stepname))
-			      (debug:print 0 *default-log-port* "WARNING: step " (car ezstep) " failed. Stopping")))
-			(debug:print 0 *default-log-port* "WARNING: a prior step failed, stopping at " ezstep)))))))))
-
-(define (launch:monitor-job run-id test-id item-path fullrunscript ezsteps test-name tconfigreg exit-info m work-area runtlim misc-flags)
-  (let* ((update-period (string->number (or (configf:lookup *configdat* "setup" "test-stats-update-period") "30")))
-         (start-seconds (current-seconds))
-	 (calc-minutes  (lambda ()
-			  (inexact->exact 
-			   (round 
-			    (- 
-			     (current-seconds) 
-			     start-seconds)))))
-	 (kill-tries 0))
-    ;; (tests:set-full-meta-info #f test-id run-id (calc-minutes) work-area)
-    ;; (tests:set-full-meta-info test-id run-id (calc-minutes) work-area)
-    (tests:set-full-meta-info #f test-id run-id (calc-minutes) work-area 10)
-
-    (let loop ((minutes   (calc-minutes))
-	       (cpu-load  (alist-ref 'adj-core-load (common:get-normalized-cpu-load #f)))
-	       (disk-free (get-df (current-directory)))
-               (last-sync (current-seconds)))
-      #;(common:telemetry-log "zombie" (conc "launch:monitor-job - top of loop encountered at "(current-seconds)" with last-sync="last-sync))
-      (let* ((over-time     (> (current-seconds) (+ last-sync update-period)))
-             (new-cpu-load  (let* ((load  (alist-ref 'adj-core-load (common:get-normalized-cpu-load #f)))
-                                   (delta (abs (- load cpu-load))))
-                              (if (> delta 0.1) ;; don't bother updating with small changes
-                                  load
-                                  #f)))
-             (new-disk-free (let* ((df    (if over-time ;; only get df every 30 seconds
-                                              (get-df (current-directory))
-                                              disk-free))
-                                   (delta (abs (- df disk-free))))
-                              (if (and (> df 0)
-                                       (> (/ delta df) 0.1)) ;; (> delta 200) ;; ignore changes under 200 Meg
-                                  df
-                                  #f)))
-             (do-sync       (or new-cpu-load new-disk-free over-time))
-
-             (test-info   (rmt:get-test-info-by-id run-id test-id))
-             (state       (db:test-get-state test-info))
-             (status      (db:test-get-status test-info))
-             (kill-reason  "no kill reason specified")
-             (kill-job?    #f))
-        #;(common:telemetry-log "zombie" (conc "launch:monitor-job - decision time encountered at "(current-seconds)" with last-sync="last-sync" do-sync="do-sync" over-time="over-time" update-period="update-period))
-        (cond
-         ((test-get-kill-request run-id test-id)
-          (set! kill-reason "KILLING TEST since received kill request (KILLREQ)")
-          (set! kill-job? #t))
-         ((and runtlim (> (- (current-seconds) start-seconds) runtlim))
-          (set! kill-reason (conc "KILLING TEST DUE TO TIME LIMIT EXCEEDED! Runtime=" (- (current-seconds) start-seconds) " seconds, limit=" runtlim))
-          (set! kill-job? #t))
-         ((equal? status "DEAD")
-          (tests:update-central-meta-info run-id test-id new-cpu-load new-disk-free (calc-minutes) #f #f)
-          (rmt:set-state-status-and-roll-up-items run-id test-id 'foo "RUNNING" "n/a" "was marked dead; really still running.")
-          ;;(set! kill-reason "KILLING TEST because it was marked as DEAD by launch:handle-zombie-tests (might indicate really overloaded server or else overzealous setup.deadtime)") ;; MARK RUNNING
-          (set! kill-job? #f)))
-
-        (debug:print 4 *default-log-port* "cpu: " new-cpu-load " disk: " new-disk-free " last-sync: " last-sync " do-sync: " do-sync)
-        (launch:handle-zombie-tests run-id)
-        (when do-sync
-          ;;(with-output-to-file (conc (getenv "MT_TEST_RUN_DIR") "/last-loadinfo.log" #:append)
-          ;;  (lambda () (pp (list (current-seconds) new-cpu-load new-disk-free (calc-minutes)))))
-          #;(common:telemetry-log "zombie" (conc  "launch:monitor-job - dosync started at "(current-seconds)))
-          (tests:update-central-meta-info run-id test-id new-cpu-load new-disk-free (calc-minutes) #f #f)
-          #;(common:telemetry-log "zombie" (conc "launch:monitor-job - dosync finished at "(current-seconds))))
-        
-	(if kill-job? 
-	    (begin
-              (debug:print-info 0 *default-log-port* "proceeding to kill test: "kill-reason)
-	      (mutex-lock! m)
-	      ;; NOTE: The pid can change as different steps are run. Do we need handshaking between this
-	      ;;       section and the runit section? Or add a loop that tries three times with a 1/4 second
-	      ;;       between tries?
-	      (let* ((pid1 (launch:einf-pid exit-info)) ;; (vector-ref exit-info 0))
-		     (pid2 (rmt:test-get-top-process-pid run-id test-id))
-		     (pids (delete-duplicates (filter number? (list pid1 pid2)))))
-		(if (not (null? pids))
-		    (begin
-		      (for-each
-		       (lambda (pid)
-			 (handle-exceptions
-			  exn
-			  (begin
-			    (debug:print-info 0 *default-log-port* "Unable to kill process with pid " pid ", possibly already killed.")
-			    (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)))
-			  (debug:print 0 *default-log-port* "WARNING: Request received to kill job " pid) ;;  " (attempt # " kill-tries ")")
-			  (debug:print-info 0 *default-log-port* "Signal mask=" (signal-mask))
-			  ;; (if (process:alive? pid)
-			  ;;     (begin
-			  (map (lambda (pid-num)
-				 (process-signal pid-num signal/term))
-			       (process:get-sub-pids pid))
-			  (thread-sleep! 5)
-			  ;; (if (process:process-alive? pid)
-			  (map (lambda (pid-num)
-				 (handle-exceptions
-				  exn
-				  #f
-				  (process-signal pid-num signal/kill)))
-			       (process:get-sub-pids pid))))
-		       ;;    (debug:print-info 0 *default-log-port* "not killing process " pid " as it is not alive"))))
-		       pids)
-                      ;; BB: question to Matt -- does the tests:test-state-status! encompass rollup to toplevel?  If not, should it?
-		      (tests:test-set-status! run-id test-id "KILLED"  "KILLED" (conc (args:get-arg "-m")" "kill-reason) #f)) ;; BB ADDED kill-reason -- confirm OK with Matt
-		    (begin
-		      (debug:print-error 0 *default-log-port* "Nothing to kill, pid1=" pid1 ", pid2=" pid2)
-		      (tests:test-set-status! run-id test-id "KILLED"  "FAILED TO KILL" (conc (args:get-arg "-m")" "kill-reason) #f) ;; BB ADDED kill-reason -- confirm OK with Matt
-		      )))
-	      (mutex-unlock! m)
-	      ;; no point in sticking around. Exit now. But run end of run before exiting?
-        (launch:end-of-run-check run-id)
-	      (exit)))
-	(if (hash-table-ref/default misc-flags 'keep-going #f)
-	    (begin
-	      (thread-sleep! 3) ;; (+ 3 (random 6))) ;; add some jitter to the call home time to spread out the db accesses
-	      (if (hash-table-ref/default misc-flags 'keep-going #f)  ;; keep originals for cpu-load and disk-free unless they change more than the allowed delta
-		  (loop (calc-minutes)
-                        (or new-cpu-load cpu-load)
-                        (or new-disk-free disk-free)
-                        (if do-sync (current-seconds) last-sync)))))))
-    (tests:update-central-meta-info run-id test-id (get-cpu-load) (get-df (current-directory))(calc-minutes) #f #f))) ;; NOTE: Checking twice for keep-going is intentional
-
-
-(define (launch:execute encoded-cmd)
-  (let* ((cmdinfo    (common:read-encoded-string encoded-cmd))
-	 (tconfigreg #f))
-    (setenv "MT_CMDINFO" encoded-cmd)
-    ;;(bb-check-path msg: "launch:execute incoming")
-    (if (list? cmdinfo) ;; ((testpath /tmp/mrwellan/jazzmind/src/example_run/tests/sqlitespeed)
-	;; (test-name sqlitespeed) (runscript runscript.rb) (db-host localhost) (run-id 1))
-	(let* ((testpath  (assoc/default 'testpath  cmdinfo))  ;; testpath is the test spec area
-	       (top-path  (assoc/default 'toppath   cmdinfo))
-	       (work-area (assoc/default 'work-area cmdinfo))  ;; work-area is the test run area
-	       (test-name (assoc/default 'test-name cmdinfo))
-	       (runscript (assoc/default 'runscript cmdinfo))
-	       (ezsteps   (assoc/default 'ezsteps   cmdinfo))
-	       (subrun    (assoc/default 'subrun    cmdinfo))
-	       ;; (runremote (assoc/default 'runremote cmdinfo))
-	       ;; (transport (assoc/default 'transport cmdinfo))  ;; not used
-	       ;; (serverinf (assoc/default 'serverinf cmdinfo))
-	       ;; (port      (assoc/default 'port      cmdinfo))
-	       (serverurl (assoc/default 'serverurl cmdinfo))
-	       (homehost  (assoc/default 'homehost  cmdinfo))
-	       (run-id    (assoc/default 'run-id    cmdinfo))
-	       (test-id   (assoc/default 'test-id   cmdinfo))
-	       (target    (assoc/default 'target    cmdinfo))
-	       (areaname  (assoc/default 'areaname  cmdinfo))
-	       (itemdat   (assoc/default 'itemdat   cmdinfo))
-	       (env-ovrd  (assoc/default 'env-ovrd  cmdinfo))
-	       (set-vars  (assoc/default 'set-vars  cmdinfo)) ;; pre-overrides from -setvar
-	       (runname   (assoc/default 'runname   cmdinfo))
-	       (megatest  (assoc/default 'megatest  cmdinfo))
-	       (runtlim   (assoc/default 'runtlim   cmdinfo))
-	       (contour   (assoc/default 'contour   cmdinfo))
-	       (item-path (item-list->path itemdat))
-	       (mt-bindir-path (assoc/default 'mt-bindir-path cmdinfo))
-	       (keys      #f)
-	       (keyvals   #f)
-	       (fullrunscript (if (not runscript)
-                                  #f
-                                  (if (substring-index "/" runscript)
-                                      runscript ;; use unadultered if contains slashes
-                                      (let ((fulln (conc work-area "/" runscript)))
-	                                  (if (and (common:file-exists? fulln)
-                                                   (file-execute-access? fulln))
-                                              fulln
-                                              runscript))))) ;; assume it is on the path
-               (check-work-area           (lambda ()
-                                            ;; NFS might not have propagated the directory meta data to the run host - give it time if needed
-                                            (let loop ((count 0))
-                                              (if (or (common:directory-exists? work-area)
-                                                      (> count 10))
-                                                  (change-directory work-area)
-                                                  (begin
-                                                    (debug:print 0 *default-log-port* "INFO: Not starting job yet - directory " work-area " not found")
-                                                    (thread-sleep! 10)
-                                                    (loop (+ count 1)))))
-
-                                            (if (not (string=?  (common:real-path work-area)(common:real-path (current-directory))))
-                                                (begin
-                                                  (debug:print 0 *default-log-port*
-                                                               "INFO: we are expecting to be in directory " work-area "\n"
-                                                               "     but we are actually in the directory " (current-directory) "\n"
-                                                               "     doing another change dir.")
-                                                  (change-directory work-area)))
-                                            
-                                            ;; spot check that the files in testpath are available. Too often NFS delays cause problems here.
-                                            (let ((files      (glob (conc testpath "/*")))
-                                                  (bad-files '()))
-                                              (for-each
-                                               (lambda (fullname)
-                                                 (let* ((fname (pathname-strip-directory fullname))
-                                                        (targn (conc work-area "/" fname)))
-                                                   (if (not (file-exists? targn))
-                                                       (set! bad-files (cons fname bad-files)))))
-                                               files)
-                                              (if (not (null? bad-files))
-                                                  (begin
-                                                    (debug:print 0 *default-log-port* "INFO: test data from " testpath " not copied properly or filesystem problems causing data to not be found. Re-running the copy command.")
-                                                    (debug:print 0 *default-log-port* "INFO: missing files from " work-area ": " (string-intersperse bad-files ", "))
-                                                    (launch:test-copy testpath work-area))))
-                                            ;; one more time, change to the work-area directory
-                                            (change-directory work-area)))
-	       ) ;; let*
-
-	  (if contour (setenv "MT_CONTOUR" contour))
-	  
-	  ;; immediated set some key variables from CMDINFO data, yes, these will be set again below ...
-	  ;;
-	  (setenv "MT_TESTSUITENAME" areaname)
-	  (setenv "MT_RUN_AREA_HOME" top-path)
-	  (set! *toppath* top-path)
-          (change-directory *toppath*) ;; temporarily switch to the run area home
-	  (setenv "MT_TEST_RUN_DIR"  work-area)
-
-	  (launch:setup) ;; should be properly in the run area home now
-
-	  (if contour (setenv "MT_CONTOUR" contour))
-	  
-	  ;; immediated set some key variables from CMDINFO data, yes, these will be set again below ...
-	  ;;
-	  (setenv "MT_TESTSUITENAME" areaname)
-	  (setenv "MT_RUN_AREA_HOME" top-path)
-	  (set! *toppath* top-path)
-          (change-directory *toppath*) ;; temporarily switch to the run area home
-	  (setenv "MT_TEST_RUN_DIR"  work-area)
-
-	  (launch:setup) ;; should be properly in the run area home now
-          
-	  (set! tconfigreg (tests:get-all)) ;; mapping of testname => test source path
-	  (let ((sighand (lambda (signum)
-			   ;; (signal-mask! signum) ;; to mask or not? seems to cause issues in exiting
-			   (if (eq? signum signal/stop)
-			       (debug:print-error 0 *default-log-port* "attempt to STOP process. Exiting."))
-			   (set! *time-to-exit* #t)
-			   (print "Received signal " signum ", cleaning up before exit (set this test to COMPLETED/ABORT) . Please wait...")
-			   (let ((th1 (make-thread (lambda ()
-                                                     (print "set test to COMPLETED/ABORT begin.")
-						     (rmt:test-set-state-status run-id test-id "COMPLETED" "ABORT" "received kill signal")
-                                                     (print "set test to COMPLETED/ABORT complete.")
-						     (print "Killed by signal " signum ". Exiting")
-						     (exit 1))))
-				 (th2 (make-thread (lambda ()
-						     (thread-sleep! 20)
-						     (debug:print 0 *default-log-port* "Done")
-						     (exit 4)))))
-			     (thread-start! th2)
-			     (thread-start! th1)
-			     (thread-join! th2)))))
-	    (set-signal-handler! signal/int sighand)
-	    (set-signal-handler! signal/term sighand)
-	    ) ;; (set-signal-handler! signal/stop sighand)
-	  
-	  ;; Do not run the test if it is REMOVING, RUNNING, KILLREQ or REMOTEHOSTSTART,
-	  ;; Mark the test as REMOTEHOSTSTART *IMMEDIATELY*
-	  ;;
-	  (let* ((test-info (rmt:get-test-info-by-id run-id test-id))
-		 (test-host (if test-info
-				(db:test-get-host        test-info)
-				(begin
-				  (debug:print 0 *default-log-port* "ERROR: failed to find a record for test-id " test-id ", exiting.")
-				  (exit))))
-		 (test-pid  (db:test-get-process_id  test-info)))
-	    (cond
-             ;; -mrw- I'm removing KILLREQ from this list so that a test in KILLREQ state is treated as a "do not run" flag.
-	     ((member (db:test-get-state test-info) '("INCOMPLETE" "KILLED" "UNKNOWN" "STUCK")) ;; prior run of this test didn't complete, go ahead and try to rerun
-	      (debug:print 0 *default-log-port* "INFO: test is INCOMPLETE or KILLED, treat this execute call as a rerun request")
-	      ;; (tests:test-force-state-status! run-id test-id "REMOTEHOSTSTART" "n/a")
-
-              (rmt:general-call 'set-test-start-time #f test-id)
-              (rmt:test-set-state-status run-id test-id "REMOTEHOSTSTART" "n/a" #f)
-	      ) ;; prime it for running
-	     ((member (db:test-get-state test-info) '("RUNNING" "REMOTEHOSTSTART"))
-	      (if (process:alive-on-host? test-host test-pid)
-		  (debug:print-error 0 *default-log-port* "test state is "  (db:test-get-state test-info) " and process " test-pid " is still running on host " test-host ", cannot proceed")
-		  (exit)))
-	     ((not (member (db:test-get-state test-info) '("REMOVING" "REMOTEHOSTSTART" "RUNNING" "KILLREQ")))
-	      ;; (tests:test-force-state-status! run-id test-id "REMOTEHOSTSTART" "n/a")
-              (rmt:general-call 'set-test-start-time #f test-id)
-	      (rmt:test-set-state-status run-id test-id "REMOTEHOSTSTART" "n/a" #f)
-	      )
-	     (else ;; (member (db:test-get-state test-info) '("REMOVING" "REMOTEHOSTSTART" "RUNNING" "KILLREQ"))
-	      (debug:print-error 0 *default-log-port* "test state is " (db:test-get-state test-info) ", cannot proceed")
-	      (exit))))
-
-          ;; cleanup prior execution's steps
-          (rmt:delete-steps-for-test! run-id test-id)
-          
-	  (debug:print 2 *default-log-port* "Executing " test-name " (id: " test-id ") on " (get-host-name))
-	  (set! keys       (rmt:get-keys))
-	  ;; (runs:set-megatest-env-vars run-id inkeys: keys inkeyvals: keyvals) ;; these may be needed by the launching process
-	  ;; one of these is defunct/redundant ...
-	  (if (not (launch:setup force-reread: #t))
-	      (begin
-		(debug:print 0 *default-log-port* "Failed to setup, exiting") 
-		;; (sqlite3:finalize! db)
-		;; (sqlite3:finalize! tdb)
-		(exit 1)))
-          ;; validate that the test run area is available
-          (check-work-area)
-          
-          ;; still need to go back to run area home for next couple steps
-	  (change-directory *toppath*) 
-
-	  ;; NOTE: Current order is to process runconfigs *before* setting the MT_ vars. This 
-	  ;;       seems non-ideal but could well break stuff
-	  ;;    BUG? BUG? BUG?
-	  
-	  (let ((rconfig (full-runconfigs-read)) ;; (read-config (conc  *toppath* "/runconfigs.config") #f #t sections: (list "default" target))))
-		(wconfig (read-config "waivers.config" #f #t sections: `( "default" ,target )))) ;; read the waivers config if it exists
-	    ;; (setup-env-defaults (conc *toppath* "/runconfigs.config") run-id (make-hash-table) keyvals target)
-	    ;; (set-run-config-vars run-id keyvals target) ;; (db:get-target db run-id))
-	    ;; Now have runconfigs data loaded, set environment vars
-	    (for-each
-	     (lambda (section)
-	       (for-each
-		(lambda (varval)
-		  (let ((var (car varval))
-			(val (cadr varval)))
-		    (if (and (string? var)(string? val))
-			(begin
-			  (safe-setenv var (configf:eval-string-in-environment val))) ;; val)
-			(debug:print-error 0 *default-log-port* "bad variable spec, " var "=" val))))
-		(configf:get-section rconfig section)))
-	     (list "default" target)))
-          ;;(bb-check-path msg: "launch:execute post block 1")
-
-	  ;; NFS might not have propagated the directory meta data to the run host - give it time if needed
-	  (let loop ((count 0))
-	    (if (or (common:file-exists? work-area)
-		    (> count 10))
-		(change-directory work-area)
-		(begin
-		  (debug:print 0 *default-log-port* "INFO: Not starting job yet - directory " work-area " not found")
-		  (thread-sleep! 10)
-		  (loop (+ count 1)))))
-
-          ;; now we can switch to the work-area?
-          (change-directory work-area)
-          ;;(bb-check-path msg: "launch:execute post block 1.5")
-	  ;; (change-directory work-area) 
-	  (set! keyvals    (keys:target->keyval keys target))
-	  ;; apply pre-overrides before other variables. The pre-override vars must not
-	  ;; clobbers things from the official sources such as megatest.config and runconfigs.config
-	  (if (string? set-vars)
-	      (let ((varpairs (string-split set-vars ",")))
-		(debug:print 4 *default-log-port* "varpairs: " varpairs)
-		(map (lambda (varpair)
-		       (let ((varval (string-split varpair "=")))
-			 (if (eq? (length varval) 2)
-			     (let ((var (car varval))
-				   (val (cadr varval)))
-			       (debug:print 1 *default-log-port* "Adding pre-var/val " var " = " val " to the environment")
-			       (setenv var val)))))
-		     varpairs)))
-          ;;(bb-check-path msg: "launch:execute post block 2")
-	  (for-each
-	   (lambda (varval)
-	     (let ((var (car varval))
-		   (val (cadr varval)))
-	       (if val
-		   (setenv var val)
-		   (begin
-		     (debug:print-error 0 *default-log-port* "required variable " var " does not have a valid value. Exiting")
-		     (exit)))))
-	     (list 
-	      (list  "MT_TEST_RUN_DIR" work-area)
-	      (list  "MT_TEST_NAME" test-name)
-	      (list  "MT_ITEM_INFO" (conc itemdat))
-	      (list  "MT_ITEMPATH"  item-path)
-	      (list  "MT_RUNNAME"   runname)
-	      (list  "MT_MEGATEST"  megatest)
-	      (list  "MT_TARGET"    target)
-	      (list  "MT_LINKTREE"  (common:get-linktree)) ;; (configf:lookup *configdat* "setup" "linktree"))
-	      (list  "MT_TESTSUITENAME" (common:get-testsuite-name))))
-          ;;(bb-check-path msg: "launch:execute post block 3")
-
-	  (if mt-bindir-path (setenv "PATH" (conc (getenv "PATH") ":" mt-bindir-path)))
-          ;;(bb-check-path msg: "launch:execute post block 4")
-	  ;; (change-directory top-path)
-	  ;; Can setup as client for server mode now
-	  ;; (client:setup)
-
-	  
-	  ;; environment overrides are done *before* the remaining critical envars.
-	  (alist->env-vars env-ovrd)
-          ;;(bb-check-path msg: "launch:execute post block 41")
-	  (runs:set-megatest-env-vars run-id inkeys: keys inkeyvals: keyvals)
-          ;;(bb-check-path msg: "launch:execute post block 42")
-	  (set-item-env-vars itemdat)
-          ;;(bb-check-path msg: "launch:execute post block 43")
-          (let ((blacklist (configf:lookup *configdat* "setup" "blacklistvars")))
-            (if blacklist
-		(let ((vars (string-split blacklist)))
-		  (save-environment-as-files "megatest" ignorevars: vars)
-		  (for-each (lambda (var)
-			      (unsetenv var))
-			    vars))
-                (save-environment-as-files "megatest")))
-          ;;(bb-check-path msg: "launch:execute post block 44")
-	  ;; open-run-close not needed for test-set-meta-info
-	  ;; (tests:set-full-meta-info #f test-id run-id 0 work-area)
-	  ;; (tests:set-full-meta-info test-id run-id 0 work-area)
-	  (tests:set-full-meta-info #f test-id run-id 0 work-area 10)
-
-	  ;; (thread-sleep! 0.3) ;; NFS slowness has caused grief here
-
-	  (if (args:get-arg "-xterm")
-	      (set! fullrunscript "xterm")
-	      (if (and fullrunscript 
-		       (common:file-exists? fullrunscript)
-		       (not (file-execute-access? fullrunscript)))
-		  (system (conc "chmod ug+x " fullrunscript))))
-
-	  ;; We are about to actually kick off the test
-	  ;; so this is a good place to remove the records for 
-	  ;; any previous runs
-	  ;; (db:test-remove-steps db run-id testname itemdat)
-	  ;; now is also a good time to write the .testconfig file
-	  (let* ((tconfig-fname   (conc work-area "/.testconfig"))
-		 (tconfig-tmpfile (conc tconfig-fname ".tmp"))
-		 (tconfig         (tests:get-testconfig test-name item-path tconfigreg #t force-create: #t))) ;; 'return-procs)))
-	    (configf:write-alist tconfig tconfig-tmpfile)
-	    (file-move tconfig-tmpfile tconfig-fname #t))
-	  ;; 
-	  (let* ((m            (make-mutex))
-		 (kill-job?    #f)
-		 (exit-info    (make-launch:einf pid: #t exit-status: #t exit-code: #t rollup-status: 0)) ;; pid exit-status exit-code (i.e. process was successfully run) rollup-status
-		 (job-thread   #f)
-		 ;; (keep-going   #t)
-		 (misc-flags   (let ((ht (make-hash-table)))
-				 (hash-table-set! ht 'keep-going #t)
-				 ht))
-		 (runit        (lambda ()
-				 (launch:manage-steps run-id test-id item-path fullrunscript ezsteps subrun test-name tconfigreg exit-info m)))
-		 (monitorjob   (lambda ()
-				 (launch:monitor-job  run-id test-id item-path fullrunscript ezsteps test-name tconfigreg exit-info m work-area runtlim misc-flags)))
-		 (th1          (make-thread monitorjob "monitor job"))
-		 (th2          (make-thread runit "run job")))
-	    (set! job-thread th2)
-	    (thread-start! th1)
-	    (thread-start! th2)
-	    (thread-join! th2)
-	    (debug:print-info 0 *default-log-port* "Megatest exectute of test " test-name ", item path " item-path " complete. Notifying the db ...")
-	    (hash-table-set! misc-flags 'keep-going #f)
-	    (thread-join! th1)
-	    (thread-sleep! 1)       ;; givbe thread th1 a chance to be done TODO: Verify this is needed. At 0.1 I was getting fail to stop, increased to total of 1.1 sec.
-	    (mutex-lock! m)
-	    (let* ((item-path (item-list->path itemdat))
-		   ;; only state and status needed - use lazy routine
-		   (testinfo  (rmt:get-testinfo-state-status run-id test-id)))
-	      ;; Am I completed?
-	      (if (member (db:test-get-state testinfo) '("REMOTEHOSTSTART" "RUNNING")) ;; NOTE: It should *not* be REMOTEHOSTSTART but for reasons I don't yet understand it sometimes gets stuck in that state ;; (not (equal? (db:test-get-state testinfo) "COMPLETED"))
-		  (let ((new-state  (if kill-job? "KILLED" "COMPLETED") ;; (if (eq? (vector-ref exit-info 2) 0) ;; exited with "good" status
-				                                        ;; "COMPLETED"							                ;; (db:test-get-state testinfo)))   ;; else preseve the state as set within the test
-				    )
-			(new-status (cond
-				     ((not (launch:einf-exit-status exit-info)) "FAIL") ;; job failed to run ... (vector-ref exit-info 1)
-				     ((eq? (launch:einf-rollup-status exit-info) 0)     ;; (vector-ref exit-info 3)
-				      ;; if the current status is AUTO then defer to the calculated value (i.e. leave this AUTO)
-				      (if (equal? (db:test-get-status testinfo) "AUTO") "AUTO" "PASS"))
-				     ((eq? (launch:einf-rollup-status exit-info) 1) "FAIL")  ;; (vector-ref exit-info 3)
-				     ((eq? (launch:einf-rollup-status exit-info) 2)	     ;;	(vector-ref exit-info 3)
-				      ;; if the current status is AUTO the defer to the calculated value but qualify (i.e. make this AUTO-WARN)
-				      (if (equal? (db:test-get-status testinfo) "AUTO") "AUTO-WARN" "WARN"))
-				     ((eq? (launch:einf-rollup-status exit-info) 3) "CHECK")
-				     ((eq? (launch:einf-rollup-status exit-info) 4) "WAIVED")
-				     ((eq? (launch:einf-rollup-status exit-info) 5) "ABORT")
-				     ((eq? (launch:einf-rollup-status exit-info) 6) "SKIP")
-				     (else "FAIL")))) ;; (db:test-get-status testinfo)))
-		    (debug:print-info 1 *default-log-port* "Test exited in state=" (db:test-get-state testinfo) ", setting state/status based on exit code of " (launch:einf-exit-status exit-info) " and rollup-status of " (launch:einf-rollup-status exit-info))
-		    (tests:test-set-status! run-id 
-					    test-id 
-					    new-state
-					    new-status
-					    (args:get-arg "-m") #f)
-		    ;; need to update the top test record if PASS or FAIL and this is a subtest
-		    ;; NO NEED TO CALL set-state-status-and-roll-up-items HERE, THIS IS DONE IN set-state-status-and-roll-up-items called by tests:test-set-status!
-		    ))
-	      ;; for automated creation of the rollup html file this is a good place...
-	      (if (not (equal? item-path ""))
-		  (tests:summarize-items run-id test-id test-name #f))
-	      (tests:summarize-test run-id test-id)  ;; don't force - just update if no
-	      (rmt:update-run-stats run-id (rmt:get-raw-run-stats run-id)))
-	    (mutex-unlock! m)
-            (launch:end-of-run-check run-id )
-	    (debug:print 2 *default-log-port* "Output from running " fullrunscript ", pid " (launch:einf-pid exit-info) " in work area " 
-			 work-area ":\n====\n exit code " (launch:einf-exit-code exit-info) "\n" "====\n")
-	    (if (not (launch:einf-exit-status exit-info))
-		(exit 4))))
-        )))
-
-;; Spec for End of test
-;; At end of each test call, after marking self as COMPLETED do run-state-status-rollup
-;; At transition to run COMPLETED/X do hooks
-;; Definition: test_dead if event_time + duration + 1 minute? < current_time AND
-;; we can prove the process is not alive (ssh host pstree -A pid)
-;; if dead safe to mark the test as killed in the db
-;; State/status table
-;; new
-;; 100% COMPLETED/ (PASS,FAIL,ABORT etc.) ==> COMPLETED / X where X is same as itemized rollup
-;; > 3 RUNNING with not test_dead do nothing (run should already be RUNNING/ na
-;; > 0 RUNNING and test_dead then send KILLREQ ==> COMPLETED
-;; 0 RUNNING ==> this is actually the first condition, should not get here
-
-(define (launch:end-of-run-check run-id )
-    (let*	((not-completed-cnt (rmt:get-not-completed-cnt run-id))  
-           (running-cnt (rmt:get-count-tests-running-for-run-id run-id))
-           (all-test-launched (rmt:get-var (conc "lunch-complete-" run-id)))
-           (current-state (rmt:get-run-state run-id))
-           (current-status (rmt:get-run-status run-id)))
-     ;;get-vars run-id to query metadata table to check if all completed. if all-test-launched = yes then only not-completed-cnt = 0 means everyting is completed if no entry found in the table do nothing 
-     (debug:print 0 *default-log-port* "rollup run state/status")                      
-     (rmt:set-state-status-and-roll-up-run  run-id current-state current-status)
- 
-     (cond 
-       ((and all-test-launched (eq? not-completed-cnt 0) (equal? all-test-launched "yes" ))
-           	(debug:print 0 *default-log-port* "look for  post hook.")
-          	(runs:run-post-hook run-id))
-        ((> running-cnt 3) 
-        	  (debug:print 0 *default-log-port* "There are " running-cnt " tests running." ))
-        ((> running-cnt 0)
-            (debug:print 0 *default-log-port* "running cnt > 0 but <= 3 kill-running-tests-if-dead" )
-   				  (let ((kill-cnt (launch:kill-tests-if-dead run-id)))
-           			(if (and all-test-launched  (equal? all-test-launched "yes") (eq? kill-cnt running-cnt))
-           					(launch:end-of-run-check run-id)))) ;;todo
-        (else  (debug:print 0 *default-log-port* "Should it get here?? May be everything is not launched yet. Running test cnt:" running-cnt " Not completed test cnt:" not-completed-cnt)
-         (let* ((not-completed-tests (rmt:get-tests-for-run run-id "%" `("NOT_STARTED" "RUNNING" "LAUNCHED" "REMOTEHOSTSTART") `() #f #f #f #f #f #f #f #f)))
-       (if (> (length not-completed-tests) 0) 
-           (let loop ((running-test (car not-completed-tests))
-			     (tal    (cdr not-completed-tests)))
-		       (let* ((test-name (vector-ref running-test 2))
-                 (item-path (vector-ref running-test 11)))
-			       	(debug:print 0 *default-log-port* "test " test-name "/" item-path " not completed")
-              (if (not (null? tal))
-				  (loop (car tal) (cdr tal)))))))))))        
- 
-(define (launch:is-test-alive host pid)
-(if (and host pid (not (equal? host "n/a")))
-(let* ((cmd (conc "ssh " host " pstree -A " pid))
-      (output (with-input-from-pipe cmd read-lines)))
-  (print "cmd: " cmd "\n op: " output )
-  (if(eq? (length output) 0)
-     #f
-     #t))
-#t))
- 
-(define (launch:kill-tests-if-dead run-id)
-  (let* ((running-tests (rmt:get-tests-for-run run-id "%" `("RUNNING" "LAUNCHED" "REMOTEHOSTSTART") `() #f #f #f #f #f #f #f #f)))
-       (let loop ((running-test (car running-tests))
-			     (tal    (cdr running-tests))
-			     (kill-cnt 0))
-		       (let* ((test-name (vector-ref running-test 2))
-                 (item-path (vector-ref running-test 11))
-								 (test-id (vector-ref running-test 0))
-                 (host (vector-ref running-test 6))
-                 (pid  (rmt:test-get-top-process-pid run-id test-id))   
-                 (event-time (vector-ref running-test 5))
-                 (duration (vector-ref running-test 12))
-                 (flag 0)   
-                 (curr-time (current-seconds)))
-       (if (and (< (+ event-time duration 600) curr-time) (not (launch:is-test-alive host pid))) ;;test has not updated duration in last 10 min then likely its not running but confirm before marking it as killed
-           (begin    
-			       	(debug:print 0 *default-log-port* "test " test-name "/" item-path " needs to be killed")
-              (set! flag 1) 
-              (rmt:set-state-status-and-roll-up-items run-id test-name item-path "KILLREQ" "n/a" #f)))
-               (if (not (null? tal))
-				  (loop (car tal) (cdr tal) (+ kill-cnt flag))
-                 (+ kill-cnt flag))))))
-
-;; DO NOT USE - caching of configs is handled in launch:setup now.
-;;
-(define (launch:cache-config)
-  ;; if we have a linktree and -runtests and -target and the directory exists dump the config
-  ;; to megatest-(current-seconds).cfg and symlink it to megatest.cfg
-  (if (and *configdat* 
-	   (or (args:get-arg "-run")
-	       (args:get-arg "-runtests")
-	       (args:get-arg "-execute")))
-      (let* ((linktree (common:get-linktree)) ;; (get-environment-variable "MT_LINKTREE"))
-	     (target   (common:args-get-target exit-if-bad: #t))
-	     (runname  (or (args:get-arg "-runname")
-			   (args:get-arg ":runname")
-			   (getenv "MT_RUNNAME")))
-	     (fulldir  (conc linktree "/"
-			     target "/"
-			     runname)))
-	(if (and linktree (common:file-exists? linktree)) ;; can't proceed without linktree
-	    (begin
-	      (debug:print-info 0 *default-log-port* "Have -run with target=" target ", runname=" runname ", fulldir=" fulldir ", testpatt=" (or (args:get-arg "-testpatt") "%"))
-	      (if (not (common:file-exists? fulldir))
-		  (create-directory fulldir #t)) ;; need to protect with exception handler 
-	      (if (and target
-		       runname
-		       (common:file-exists? fulldir))
-		  (let ((tmpfile  (conc fulldir "/.megatest.cfg." (current-seconds)))
-			(targfile (conc fulldir "/.megatest.cfg-"  megatest-version "-" megatest-fossil-hash))
-			(rconfig  (conc fulldir "/.runconfig." megatest-version "-" megatest-fossil-hash)))
-		    (if (common:file-exists? rconfig) ;; only cache megatest.config AFTER runconfigs has been cached
-			(begin
-			  (debug:print-info 0 *default-log-port* "Caching megatest.config in " tmpfile)
-                          (if (not (common:in-running-test?))
-                              (configf:write-alist *configdat* tmpfile))
-			  (system (conc "ln -sf " tmpfile " " targfile))))
-		    )))
-	    (debug:print-info 1 *default-log-port* "No linktree yet, no caching configs.")))))
-
-
-;; gather available information, if legit read configs in this order:
-;;
-;;   if have cache;
-;;      read it a return it
-;;   else
-;;     megatest.config     (do not cache)
-;;     runconfigs.config   (cache if all vars avail)
-;;     megatest.config     (cache if all vars avail)
-;;   returns:
-;;     *toppath*
-;;   side effects:
-;;     sets; *configdat*    (megatest.config info)
-;;           *runconfigdat* (runconfigs.config info)
-;;           *configstatus* (status of the read data)
-;;
-(define (launch:setup #!key (force-reread #f) (areapath #f))
-  (mutex-lock! *launch-setup-mutex*)
-  (if (and *toppath*
-	   (eq? *configstatus* 'fulldata) (not force-reread)) ;; got it all
-      (begin
-	(debug:print 2 *default-log-port* "NOTE: skipping launch:setup-body call since we have fulldata")
-	(mutex-unlock! *launch-setup-mutex*)
-	*toppath*)
-      (let ((res (launch:setup-body force-reread: force-reread areapath: areapath)))
-	(mutex-unlock! *launch-setup-mutex*)
-	res)))
-
-;; return paths depending on what info is available.
-;;
-(define (launch:get-cache-file-paths areapath toppath target mtconfig)
-  (let* ((use-cache (common:use-cache?))
-         (runname  (common:args-get-runname))
-         (linktree (common:get-linktree))
-         (testname (common:get-full-test-name))
-         (rundir   (if (and runname target linktree)
-                       (common:directory-writable? (conc linktree "/" target "/" runname))
-                       #f))
-         (testdir  (if (and rundir testname)
-                       (common:directory-writable? (conc rundir "/" testname))
-                       #f))
-         (cachedir (or testdir rundir))
-         (mtcachef (and cachedir (conc cachedir "/" ".megatest.cfg-"  megatest-version "-" megatest-fossil-hash)))
-         (rccachef (and cachedir (conc cachedir "/" ".runconfigs.cfg-"  megatest-version "-" megatest-fossil-hash))))
-    (debug:print-info 6 *default-log-port* 
-                      "runname=" runname 
-                      "\n  linktree=" linktree
-                      "\n  testname=" testname
-                      "\n  rundir=" rundir 
-                      "\n  testdir=" testdir 
-                      "\n  cachedir=" cachedir
-                      "\n  mtcachef=" mtcachef
-                      "\n  rccachef=" rccachef)
-    (cons mtcachef rccachef)))
-
-(define (launch:setup-body #!key (force-reread #f) (areapath #f))
-  (if (and (eq? *configstatus* 'fulldata)
-	   *toppath*
-	   (not force-reread)) ;; no need to reprocess
-      *toppath*   ;; return toppath
-      (let* ((use-cache (common:use-cache?)) ;; BB- use-cache checks *configdat* for use-cache setting.  We do not have *configdat*.  Bootstrapping problem here.
-	     (toppath  (or *toppath* areapath (getenv "MT_RUN_AREA_HOME"))) ;; preserve toppath
-	     (target   (common:args-get-target))
-	     (sections (if target (list "default" target) #f)) ;; for runconfigs
-	     (mtconfig (or (args:get-arg "-config") "megatest.config")) ;; allow overriding megatest.config 
-             (cachefiles (launch:get-cache-file-paths areapath toppath target mtconfig))
-	     ;; checking for null cachefiles should not be necessary, I was seeing error car of '(), might be a chicken bug or a red herring ...
-	     (mtcachef   (if (null? cachefiles)
-			     #f
-			     (car cachefiles))) ;; (and cachedir (conc cachedir "/" ".megatest.cfg-"  megatest-version "-" megatest-fossil-hash)))
-	     (rccachef   (if (null? cachefiles)
-			     #f
-			     (cdr cachefiles)))) ;; (and cachedir (conc cachedir "/" ".runconfigs.cfg-"  megatest-version "-" megatest-fossil-hash)))
-	      ;; (cancreate (and cachedir (common:file-exists? cachedir)(file-write-access? cachedir) (not (common:in-running-test?)))))
-	(set! *toppath* toppath) ;; This is needed when we are running as a test using CMDINFO as a datasource
-        ;;(BB> "launch:setup-body -- cachefiles="cachefiles)
-	(cond
-	 ;; if mtcachef exists just read it, however we need to assume toppath is available in $MT_RUN_AREA_HOME
-	 ((and (not force-reread)
-	       mtcachef  rccachef
-	       use-cache
-	       (get-environment-variable "MT_RUN_AREA_HOME")
-	       (common:file-exists? mtcachef)
-	       (common:file-exists? rccachef))
-          ;;(BB> "launch:setup-body -- cond branch 1 - use-cache")
-          (set! *configdat*    (configf:read-alist mtcachef))
-          ;;(BB> "launch:setup-body -- 1 set! *configdat*="*configdat*)
-	  (set! *runconfigdat* (configf:read-alist rccachef))
-	  (set! *configinfo*   (list *configdat*  (get-environment-variable "MT_RUN_AREA_HOME")))
-	  (set! *configstatus* 'fulldata)
-	  (set! *toppath*      (get-environment-variable "MT_RUN_AREA_HOME"))
-	  *toppath*)
-	 ;; there are no existing cached configs, do full reads of the configs and cache them
-	 ;; we have all the info needed to fully process runconfigs and megatest.config
-	 ((and ;; (not force-reread) ;; force-reread is irrelevant in the AND, could however OR it?
-	       mtcachef
-	       rccachef) ;; BB- why are we doing this without asking if caching is desired?
-          ;;(BB> "launch:setup-body -- cond branch 2")
-	  (let* ((first-pass    (find-and-read-config        ;; NB// sets MT_RUN_AREA_HOME as side effect
-				 mtconfig
-				 environ-patt: "env-override"
-				 given-toppath: toppath
-				 pathenvvar: "MT_RUN_AREA_HOME"))
-		 (first-rundat  (let ((toppath (if toppath 
-						   toppath
-						   (car first-pass))))
-				  (read-config ;; (conc toppath "/runconfigs.config") ;; this should be converted to runconfig:read but it is non-trivial, leaving it for now.
-				   (conc (if (string? toppath)
-					     toppath
-					     (get-environment-variable "MT_RUN_AREA_HOME"))
-					 "/runconfigs.config")
-				   *runconfigdat* #t 
-				   sections: sections))))
-	    (set! *runconfigdat* first-rundat)
-	    (if first-pass  ;; 
-		(begin
-                  ;;(BB> "launch:setup-body -- \"first-pass\"=first-pass")
-		  (set! *configdat*  (car first-pass))
-                  ;;(BB> "launch:setup-body -- 2 set! *configdat*="*configdat*)
-		  (set! *configinfo* first-pass)
-		  (set! *toppath*    (or toppath (cadr first-pass))) ;; use the gathered data unless already have it
-		  (set! toppath      *toppath*)
-		  (if (not *toppath*)
-		      (begin
-			(debug:print-error 0 *default-log-port* "you are not in a megatest area!")
-			(exit 1)))
-		  (setenv "MT_RUN_AREA_HOME" *toppath*)
-		  ;; the seed read is done, now read runconfigs, cache it then read megatest.config one more time and cache it
-		  (let* ((keys         (rmt:get-keys))
-			 (key-vals     (keys:target->keyval keys target))
-			 (linktree     (common:get-linktree)) ;; (or (getenv "MT_LINKTREE")(if *configdat* (configf:lookup *configdat* "setup" "linktree") #f)))
-					;     (if *configdat*
-					; 	   (configf:lookup *configdat* "setup" "linktree")
-					; 	   (conc *toppath* "/lt"))))
-			 (second-pass  (find-and-read-config
-					mtconfig
-					environ-patt: "env-override"
-					given-toppath: toppath
-					pathenvvar: "MT_RUN_AREA_HOME"))
-			 (runconfigdat (begin     ;; this read of the runconfigs will see any adjustments made by re-reading megatest.config
-					 (for-each (lambda (kt)
-						     (setenv (car kt) (cadr kt)))
-						   key-vals)
-					 (read-config (conc toppath "/runconfigs.config") *runconfigdat* #t ;; consider using runconfig:read some day ...
-						      sections: sections)))
-                         (cachefiles   (launch:get-cache-file-paths areapath toppath target mtconfig))
-                         (mtcachef     (car cachefiles))
-                         (rccachef     (cdr cachefiles)))
-                    ;;  trap exception due to stale NFS handle -- Error: (open-output-file) cannot open file - Stale NFS file handle: "/p/fdk/gwa/lefkowit/mtTesting/qa/primbeqa/links/p1222/11/PDK_r1.1.1/prim/clean/pcell_testgen/.runconfigs.cfg-1.6427-7d1e789cb3f62f9cde719a4865bb51b3c17ea853" - ticket 220546342
-                    ;; TODO - consider 1) using simple-lock to bracket cache write
-                    ;;                 2) cache in hash on server, since need to do rmt: anyway to lock.
-
-		    (if rccachef
-                        (common:fail-safe
-                         (lambda ()
-                           (configf:write-alist runconfigdat rccachef))
-                         (conc "Could not write cache file - "rccachef)))
-                    (if mtcachef
-                        (common:fail-safe
-                         (lambda ()
-                           (configf:write-alist *configdat* mtcachef))
-                         (conc "Could not write cache file - "mtcachef)))
-		    (set! *runconfigdat* runconfigdat)
-		    (if (and rccachef mtcachef) (set! *configstatus* 'fulldata))))
-		;; no configs found? should not happen but let's try to recover gracefully, return an empty hash-table
-		(set! *configdat* (make-hash-table))
-		)))
-
-	 ;; else read what you can and set the flag accordingly
-	 ;; here we don't have either mtconfig or rccachef
-	 (else
-          ;;(BB> "launch:setup-body -- cond branch 3 - else")
-	  (let* ((cfgdat   (find-and-read-config 
-			    (or (args:get-arg "-config") "megatest.config")
-			    environ-patt: "env-override"
-			    given-toppath: (get-environment-variable "MT_RUN_AREA_HOME")
-			    pathenvvar: "MT_RUN_AREA_HOME")))
-
-            (if (and cfgdat (list? cfgdat) (> (length cfgdat) 0) (hash-table? (car cfgdat)))
-		(let* ((toppath  (or (get-environment-variable "MT_RUN_AREA_HOME")(cadr cfgdat)))
-		       (rdat     (read-config (conc toppath  ;; convert this to use runconfig:read!
-						    "/runconfigs.config") *runconfigdat* #t sections: sections)))
-		  (set! *configinfo*   cfgdat)
-		  (set! *configdat*    (car cfgdat))
-		  (set! *runconfigdat* rdat)
-		  (set! *toppath*      toppath)
-		  (set! *configstatus* 'partial))
-		(begin
-		  (debug:print-error 0 *default-log-port* "No " mtconfig " file found. Giving up.")
-		  (exit 2))))))
-	;; COND ends here.
-	
-	;; additional house keeping
-	(let* ((linktree (or (common:get-linktree)
-			     (conc *toppath* "/lt"))))
-	  (if linktree
-	      (begin
-		(if (not (common:file-exists? linktree))
-		    (begin
-		      (handle-exceptions
-			  exn
-			  (begin
-			    (debug:print-error 0 *default-log-port* "Something went wrong when trying to create linktree dir at " linktree)
-			    (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))
-			    (exit 1))
-			(create-directory linktree #t))))
-		(handle-exceptions
-		    exn
-		    (begin
-		      (debug:print-error 0 *default-log-port* "Something went wrong when trying to create link to linktree at " *toppath*)
-		      (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)))
-		  (let ((tlink (conc *toppath* "/lt")))
-		    (if (not (common:file-exists? tlink))
-			(create-symbolic-link linktree tlink)))))
-	      (begin
-		(debug:print-error 0 *default-log-port* "linktree not defined in [setup] section of megatest.config")
-		)))
-	(if (and *toppath*
-		 (directory-exists? *toppath*))
-	    (begin
-	      (setenv "MT_RUN_AREA_HOME" *toppath*)
-	      (setenv "MT_TESTSUITENAME" (common:get-testsuite-name)))
-	    (begin
-	      (debug:print-error 0 *default-log-port* "failed to find the top path to your Megatest area.")
-	      (set! *toppath* #f) ;; force it to be false so we return #f
-	      #f))
-	
-        ;; one more attempt to cache the configs for future reading
-        (let* ((cachefiles   (launch:get-cache-file-paths areapath toppath target mtconfig))
-               (mtcachef     (car cachefiles))
-               (rccachef     (cdr cachefiles)))
-
-          ;; trap exception due to stale NFS handle -- Error: (open-output-file) cannot open file - Stale NFS file handle: "...somepath.../.runconfigs.cfg-1.6427-7d1e789cb3f62f9cde719a4865bb51b3c17ea853" - ticket 220546342
-          ;; TODO - consider 1) using simple-lock to bracket cache write
-          ;;                 2) cache in hash on server, since need to do rmt: anyway to lock.
-          (if (and rccachef *runconfigdat* (not (common:file-exists? rccachef)))
-              (common:fail-safe
-               (lambda ()
-                 (configf:write-alist *runconfigdat* rccachef))
-               (conc "Could not write cache file - "rccachef))
-              )
-          (if (and mtcachef *configdat*    (not (common:file-exists? mtcachef)))
-              (common:fail-safe
-               (lambda ()
-                 (configf:write-alist *configdat* mtcachef))
-               (conc "Could not write cache file - "mtcachef))
-              )
-          (if (and rccachef mtcachef *runconfigdat* *configdat*)
-              (set! *configstatus* 'fulldata)))
-
-	;; if have -append-config then read and append here
-	(let ((cfname (args:get-arg "-append-config")))
-	  (if (and cfname
-		   (file-read-access? cfname))
-	      (read-config cfname *configdat* #t))) ;; values are added to the hash, no need to do anything special.
-	*toppath*)))
-
-(define (get-best-disk confdat testconfig)
-  (let* ((disks   (or (and testconfig (hash-table-ref/default testconfig "disks" #f))
-		      (hash-table-ref/default confdat "disks" #f)))
-	 (minspace (let ((m (configf:lookup confdat "setup" "minspace")))
-		     (string->number (or m "10000")))))
-    (if disks 
-	(let ((res (common:get-disk-with-most-free-space disks minspace))) ;; min size of 1000, seems tad dumb
-	  (if res
-	      (cdr res)
-	      (begin
-;;		(if (common:low-noise-print 20 "No valid disks or no disk with enough space")
-;;		    (debug:print-error 0 *default-log-port* "No valid disks found in megatest.config. Please add some to your [disks] section and ensure the directory exists and has enough space!\n    You can change minspace in the [setup] section of megatest.config. Current setting is: " minspace))
-		;;(exit 1)
-                 (if (null? disks)
-                     (cons 1 (conc *toppath* "/runs"))
-                     (let ((paths (sort disks (lambda (x y) (> (string-length (cadr x)) (string-length (cadr y)))))))
-                       (let loop ((head (car paths)) (tail (cdr paths)))
-                         (let ((result (handle-exceptions exn #f (create-directory (cadr head) #t))))
-                           (if result
-                               result
-                               (if (null? tail)
-                                   (cons 1 (conc *toppath* "/runs"))
-                                   (loop (car tail) (cdr tail)))))))))))))) ;; the code creates the necessary directories if it does not exist and returns the path.
-
-
-(define (launch:test-copy test-src-path test-path)
-  (let* ((ovrcmd (let ((cmd (configf:lookup *configdat* "setup" "testcopycmd")))
-		   (if cmd
-		       ;; substitute the TEST_SRC_PATH and TEST_TARG_PATH
-		       (string-substitute "TEST_TARG_PATH" test-path
-					  (string-substitute "TEST_SRC_PATH" test-src-path cmd #t) #t)
-		       #f)))
-	 (cmd    (if ovrcmd 
-		     ovrcmd
-		     (conc "rsync -av" (if (debug:debug-mode 1) "" "q") " " test-src-path "/ " test-path "/"
-			   " >> " test-path "/mt_launch.log 2>> " test-path "/mt_launch.log")))
-	 (status (system cmd)))
-    (if (not (eq? status 0))
-	(debug:print 2 *default-log-port* "ERROR: problem with running \"" cmd "\""))))
-
-
-;; Desired directory structure:
-;;
-;;  <linkdir> - <target> - <testname> -.
-;;                                     |
-;;                                     v
-;;  <rundir>  -  <target>  -    <testname> -|- <itempath(s)>
-;;
-;;  dir stored in test is:
-;; 
-;;  <linkdir> - <target> - <testname> [ - <itempath> ]
-;; 
-;; All log file links should be stored relative to the top of link path
-;;  
-;; <target> - <testname> [ - <itempath> ] 
-;;
-(define (create-work-area run-id run-info keyvals test-id test-src-path disk-path testname itemdat #!key (remtries 2))
-  (let* ((item-path (if (string? itemdat) itemdat (item-list->path itemdat))) ;; if pass in string - just use it
-	 (runname   (if (string? run-info) ;; if we pass in a string as run-info use it as run-name.
-			run-info
-			(db:get-value-by-header (db:get-rows run-info)
-						(db:get-header run-info)
-						"runname")))
-	 (contour   #f) ;; NOT READY FOR THIS (args:get-arg "-contour"))
-	 ;; convert back to db: from rdb: - this is always run at server end
-	 (target   (string-intersperse (map cadr keyvals) "/"))
-
-	 (not-iterated  (equal? "" item-path))
-
-	 ;; all tests are found at <rundir>/test-base or <linkdir>/test-base
-	 (testtop-base (conc target "/" runname "/" testname))
-	 (test-base    (conc testtop-base (if not-iterated "" "/") item-path))
-
-	 ;; nb// if itempath is not "" then it is prefixed with "/"
-	 (toptest-path (conc disk-path (if contour (conc "/" contour) "") "/" testtop-base))
-	 (test-path    (conc disk-path (if contour (conc "/" contour) "") "/" test-base))
-
-	 ;; ensure this exists first as links to subtests must be created there
-	 (linktree  (common:get-linktree))
-	 ;; WAS: (let ((rd (configf:lookup *configdat* "setup" "linktree")))
-	 ;;         (if rd rd (conc *toppath* "/runs"))))
-	 ;; which seems wrong ...
-
-	 (lnkbase   (conc linktree (if contour (conc "/" contour) "") "/" target "/" runname))
-	 (lnkpath   (conc lnkbase "/" testname))
-	 (lnkpathf  (conc lnkpath (if not-iterated "" "/") item-path))
-	 (lnktarget (conc lnkpath "/" item-path)))
-
-    ;; Update the rundir path in the test record for all, rundir=physical, shortdir=logical
-    ;;                                                 rundir   shortdir
-    (rmt:general-call 'test-set-rundir-shortdir run-id lnkpathf test-path testname item-path run-id)
-
-    (debug:print 2 *default-log-port* "INFO:\n       lnkbase=" lnkbase "\n       lnkpath=" lnkpath "\n  toptest-path=" toptest-path "\n     test-path=" test-path)
-    (if (not (common:file-exists? linktree))
-	(begin
-	  (debug:print 0 *default-log-port* "WARNING: linktree did not exist! Creating it now at " linktree)
-	  (create-directory linktree #t))) ;; (system (conc "mkdir -p " linktree))))
-    ;; create the directory for the tests dir links, this is needed no matter what... try up to three times
-    (let loop ((done 3)) 
-      (let ((success (if (and (not (common:directory-exists? lnkbase))
-			      (not (common:file-exists? lnkbase)))
-			 (handle-exceptions
-			  exn
-			  (begin
-			    (debug:print-error 0 *default-log-port* "Problem creating linktree base at " lnkbase)
-			    (print-error-message exn (current-error-port))
-			    #t)
-			  (create-directory lnkbase #t)
-			  #f))))
-	(if (and (not success)(> done 0))
-	    (loop (- done 1)))))
-    
-    ;; update the toptest record with its location rundir, cache the path
-    ;; This wass highly inefficient, one db write for every subtest, potentially
-    ;; thousands of unnecessary updates, cache the fact it was set and don't set it 
-    ;; again. 
-
-    ;; Now create the link from the test path to the link tree, however
-    ;; if the test is iterated it is necessary to create the parent path
-    ;; to the iteration. use pathname-directory to trim the path by one
-    ;; level
-    (if (not not-iterated) ;; i.e. iterated
-	(let ((iterated-parent  (pathname-directory (conc lnkpath "/" item-path))))
-	  (debug:print-info 2 *default-log-port* "Creating iterated parent " iterated-parent)
-	  (handle-exceptions
-	   exn
-	   (begin
-	     (debug:print-error 0 *default-log-port* " Failed to create directory " iterated-parent ((condition-property-accessor 'exn 'message) exn) ", exiting")
-	     (exit 1))
-	   (create-directory iterated-parent #t))))
-
-    (if (symbolic-link? lnkpath) 
-	(handle-exceptions
-	 exn
-	 (begin
-	   (debug:print-error 0 *default-log-port* " Failed to remove symlink " lnkpath ((condition-property-accessor 'exn 'message) exn) ", exiting")
-	   (exit 1))
-	 (delete-file lnkpath)))
-
-    (if (not (or (common:file-exists? lnkpath)
-		 (symbolic-link? lnkpath)))
-	(handle-exceptions
-	 exn
-	 (begin
-	   (debug:print-error 0 *default-log-port* " Failed to create symlink " lnkpath ((condition-property-accessor 'exn 'message) exn) ", exiting")
-	   (exit 1))
-	 (create-symbolic-link toptest-path lnkpath)))
-    
-    ;; NB - This was not working right - some top tests are not getting the path set!!!
-    ;;
-    ;; Do the setting of this record after the paths are created so that the shortdir can 
-    ;; be set to the real directory location. This is safer for future clean up if the link
-    ;; tree is damaged or lost.
-    ;; 
-    (if (not (hash-table-ref/default *toptest-paths* testname #f))
-	(let* ((testinfo       (rmt:get-test-info-by-id run-id test-id)) ;;  run-id testname item-path))
-	       (curr-test-path (if testinfo ;; (filedb:get-path *fdb*
-							     ;; (db:get-path dbstruct
-				   ;; (rmt:sdb-qry 'getstr 
-				   (db:test-get-rundir testinfo) ;; ) ;; )
-				   #f)))
-	  (hash-table-set! *toptest-paths* testname curr-test-path)
-	  ;; NB// Was this for the test or for the parent in an iterated test?
-	  (rmt:general-call 'test-set-rundir-shortdir run-id lnkpath 
-			    (if (common:file-exists? lnkpath)
-				;; (resolve-pathname lnkpath)
-				(common:nice-path lnkpath)
-				lnkpath)
-			    testname "" run-id)
-	  ;; (rmt:general-call 'test-set-rundir run-id lnkpath testname "") ;; toptest-path)
-	  (if (or (not curr-test-path)
-		  (not (directory-exists? toptest-path)))
-	      (begin
-		(debug:print-info 2 *default-log-port* "Creating " toptest-path " and link " lnkpath)
-		(handle-exceptions
-		 exn
-		 #f ;; don't care to catch and deal with errors here for now.
-		 (create-directory toptest-path #t))
-		(hash-table-set! *toptest-paths* testname toptest-path)))))
-
-    ;; The toptest path has been created, the link to the test in the linktree has
-    ;; been created. Now, if this is an iterated test the real test dir must be created
-    (if (not not-iterated) ;; this is an iterated test
-	(begin ;; (let ((lnktarget (conc lnkpath "/" item-path)))
-	  (debug:print 2 *default-log-port* "Setting up sub test run area")
-	  (debug:print 2 *default-log-port* " - creating run area in " test-path)
-	  (handle-exceptions
-	   exn
-	   (begin
-	     (debug:print-error 0 *default-log-port* " Failed to create directory " test-path ((condition-property-accessor 'exn 'message) exn) ", exiting")
-	     (exit 1))
-	   (create-directory test-path #t))
-	  (debug:print 2 *default-log-port* 
-		       " - creating link from: " test-path "\n"
-		       "                   to: " lnktarget)
-
-	  ;; If there is already a symlink delete it and recreate it.
-	  (handle-exceptions
-	   exn
-	   (begin
-	     (debug:print-error 0 *default-log-port* " Failed to re-create link " lnktarget ((condition-property-accessor 'exn 'message) exn) ", exiting")
-	     (exit))
-	   (if (symbolic-link? lnktarget)     (delete-file lnktarget))
-	   (if (not (common:file-exists? lnktarget)) (create-symbolic-link test-path lnktarget)))))
-
-    (if (not (directory? test-path))
-	(create-directory test-path #t)) ;; this is a hack, I don't know why out of the blue this path does not exist sometimes
-
-    (if (and test-src-path (directory? test-path))
-	(begin
-	  (launch:test-copy test-src-path test-path)
-	  (list lnkpathf lnkpath ))
-	(if (and test-src-path (> remtries 0))
-	    (begin
-	      (debug:print-error 0 *default-log-port* "Failed to create work area at " test-path " with link at " lnktarget ", remaining attempts " remtries)
-	      ;; 
-	      (create-work-area run-id run-info keyvals test-id test-src-path disk-path testname itemdat remtries: (- remtries 1)))
-	    (list #f #f)))))
-
-
-(define (launch:handle-zombie-tests run-id)
-  (let* ((key (conc "zombiescan-runid-"run-id))
-         (now (current-seconds))
-         (threshold (- (current-seconds)  (* 2 (or (configf:lookup-number *configdat* "setup" "deadtime") 120))))
-         (val (rmt:get-var key))
-         (do-scan?
-          (cond
-           ((not val)
-            #t)
-           ((< val threshold)
-            #t)
-           (else #f))))
-    (when do-scan?
-      (debug:print 1 *default-log-port* "INFO: search and mark zombie tests")
-      (rmt:set-var key (current-seconds))
-      (rmt:find-and-mark-incomplete run-id #f))))
-
-
-
-
-
-;; 1. look though disks list for disk with most space
-;; 2. create run dir on disk, path name is meaningful
-;; 3. create link from run dir to megatest runs area 
-;; 4. remotely run the test on allocated host
-;;    - could be ssh to host from hosts table (update regularly with load)
-;;    - could be netbatch
-;;      (launch-test db (cadr status) test-conf))
-(define (launch-test test-id run-id run-info keyvals runname test-conf test-name test-path itemdat params)
-  (mutex-lock! *launch-setup-mutex*) ;; setting variables and processing the testconfig is NOT thread-safe, reuse the launch-setup mutex
-  (let* ( ;; (lock-key        (conc "test-" test-id))
-	;; (got-lock        (let loop ((lock        (rmt:no-sync-get-lock lock-key))
-	;; 			     (expire-time (+ (current-seconds) 15))) ;; give up on getting the lock and steal it after 15 seconds
-	;; 		    (if (car lock)
-	;; 			#t
-	;; 			(if (> (current-seconds) expire-time)
-	;; 			    (begin
-	;; 			      (debug:print-info 0 *default-log-port* "Timed out waiting for a lock to launch test " keyvals " " runname " " test-name " " test-path)
-	;; 			      (rmt:no-sync-del! lock-key) ;; destroy the lock
-	;; 			      (loop (rmt:no-sync-get-lock lock-key) expire-time)) ;; 
-	;; 			    (begin
-	;; 			      (thread-sleep! 1)
-	;; 			      (loop (rmt:no-sync-get-lock lock-key) expire-time))))))
-	 (item-path       (item-list->path itemdat))
-	 (contour         #f)) ;; NOT READY FOR THIS (args:get-arg "-contour")))
-    (let loop ((delta        (- (current-seconds) *last-launch*))
-	       (launch-delay (configf:lookup-number *configdat* "setup" "launch-delay" default: 1)))
-      (if (> launch-delay delta)
-	  (begin
-	    (if (common:low-noise-print 1200 "test launch delay") ;; every two hours or so remind the user about launch delay.
-		(debug:print-info 0 *default-log-port* "NOTE: test launches are delayed by " launch-delay " seconds. See megatest.config launch-delay setting to adjust.")) ;; launch of " test-name " for " (- launch-delay delta) " seconds"))
-	    (thread-sleep! (- launch-delay delta))
-	    (loop (- (current-seconds) *last-launch*) launch-delay))))
-    (change-directory *toppath*)
-    (alist->env-vars ;; consolidate this code with the code in megatest.scm for "-execute", *maybe* - the longer they are set the longer each launch takes (must be non-overlapping with the vars)
-     (append
-      (list
-       (list "MT_RUN_AREA_HOME" *toppath*)
-       (list "MT_TEST_NAME" test-name)
-       (list "MT_RUNNAME"   runname)
-       (list "MT_ITEMPATH"  item-path)
-       (list "MT_CONTOUR"   contour)
-       )
-      itemdat))
-    (let* ((tregistry       (tests:get-all)) ;; third param (below) is system-allowed
-           ;; for tconfig, why do we allow fallback to test-conf?
-	   (tconfig         (or (tests:get-testconfig test-name item-path tregistry #t force-create: #t)
-				(begin
-                                  (debug:print 0 *default-log-port* "WARNING: falling back to pre-calculated testconfig. This is likely not desired.")
-                                  test-conf))) ;; force re-read now that all vars are set
-	   (useshell        (let ((ush (configf:lookup *configdat* "jobtools"     "useshell")))
-			      (if ush 
-				  (if (equal? ush "no") ;; must use "no" to NOT use shell
-				      #f
-				      ush)
-				  #t)))     ;; default is yes
-	   (runscript       (configf:lookup tconfig   "setup"        "runscript"))
-	   (ezsteps         (> (length (hash-table-ref/default tconfig "ezsteps" '())) 0)) ;; don't send all the steps, could be big, just send a flag
-	   (subrun          (> (length (hash-table-ref/default tconfig "subrun"  '())) 0)) ;; send a flag to process a subrun
-	   ;; (diskspace       (configf:lookup tconfig   "requirements" "diskspace"))
-	   ;; (memory          (configf:lookup tconfig   "requirements" "memory"))
-	   ;; (hosts           (configf:lookup *configdat* "jobtools"     "workhosts")) ;; I'm pretty sure this was never completed
-	   (remote-megatest (configf:lookup *configdat* "setup" "executable"))
-	   (run-time-limit  (or (configf:lookup  tconfig   "requirements" "runtimelim")
-				(configf:lookup  *configdat* "setup" "runtimelim")))
-	   ;; FIXME SOMEDAY: not good how this is so obtuse, this hack is to 
-	   ;;                allow running from dashboard. Extract the path
-	   ;;                from the called megatest and convert dashboard
-	   ;;             	  or dboard to megatest
-	   (local-megatest  (let* ((lm  (car (argv)))
-				   (dir (pathname-directory lm))
-				   (exe (pathname-strip-directory lm)))
-			      (conc (if dir (conc dir "/") "")
-				    (case (string->symbol exe)
-				      ((dboard)    "../megatest")
-				      ((mtest)     "../megatest")
-				      ((dashboard) "megatest")
-				      (else exe)))))
-	   (launcher        (common:get-launcher *configdat* test-name item-path)) ;; (configf:lookup *configdat* "jobtools"     "launcher"))
-	   (test-sig        (conc (common:get-testsuite-name) ":" test-name ":" item-path)) ;; (item-list->path itemdat))) ;; test-path is the full path including the item-path
-	   (work-area       #f)
-	   (toptest-work-area #f) ;; for iterated tests the top test contains data relevant for all
-	   (diskpath   #f)
-	   (cmdparms   #f)
-	   (fullcmd    #f) ;; (define a (with-output-to-string (lambda ()(write x))))
-	   (mt-bindir-path #f)
-	   (testinfo   (rmt:get-test-info-by-id run-id test-id))
-	   (mt_target  (string-intersperse (map cadr keyvals) "/"))
-	   (debug-param (append (if (args:get-arg "-debug")  (list "-debug" (args:get-arg "-debug")) '())
-				(if (args:get-arg "-logging")(list "-logging") '()))))
-      ;; (if hosts (set! hosts (string-split hosts)))
-      ;; set the megatest to be called on the remote host
-      (if (not remote-megatest)(set! remote-megatest local-megatest)) ;; "megatest"))
-      (set! mt-bindir-path (pathname-directory remote-megatest))
-      (if launcher (set! launcher (string-split launcher)))
-      ;; set up the run work area for this test
-      (if (and (args:get-arg "-preclean") ;; user has requested to preclean for this run
-	       (not (member (db:test-get-rundir testinfo)(list "n/a" "/tmp/badname")))) ;; n/a is a placeholder and thus not a read dir
-	  (begin
-	    (debug:print-info 0 *default-log-port* "attempting to preclean directory " (db:test-get-rundir testinfo) " for test " test-name "/" item-path)
-	    (runs:remove-test-directory testinfo 'remove-data-only))) ;; remove data only, do not perturb the record
-      
-      ;; prevent overlapping actions - set to LAUNCHED as early as possible
-      ;;
-      ;; the following call handles waiver propogation. cannot yet condense into roll-up-pass-fail
-      (tests:test-set-status! run-id test-id "LAUNCHED" "n/a" #f #f) ;; (if launch-results launch-results "FAILED"))
-      (rmt:set-state-status-and-roll-up-items run-id test-name item-path #f "LAUNCHED" #f)
-      ;; (pp (hash-table->alist tconfig))
-      (set! diskpath (get-best-disk *configdat* tconfig))
-      (if diskpath
-	  (let ((dat  (create-work-area run-id run-info keyvals test-id test-path diskpath test-name itemdat)))
-	    (set! work-area (car dat))
-	    (set! toptest-work-area (cadr dat))
-	    (debug:print-info 2 *default-log-port* "Using work area " work-area))
-	  (begin
-	    (set! work-area (conc test-path "/tmp_run"))
-	    (create-directory work-area #t)
-	    (debug:print 0 *default-log-port* "WARNING: No disk work area specified - running in the test directory under tmp_run")))
-      (set! cmdparms (base64:base64-encode 
-		      (z3:encode-buffer 
-		       (with-output-to-string
-			 (lambda () ;; (list 'hosts     hosts)
-			   (write (list (list 'testpath  test-path)
-					;; (list 'transport (conc *transport-type*))
-					;; (list 'serverinf *server-info*)
-					(list 'homehost  (let* ((hhdat (common:get-homehost)))
-							   (if hhdat
-							       (car hhdat)
-							       #f)))
-					(list 'serverurl (if *runremote*
-							     (remote-server-url *runremote*)
-							     #f)) ;;
-					(list 'areaname  (common:get-testsuite-name))
-					(list 'toppath   *toppath*)
-					(list 'work-area work-area)
-					(list 'test-name test-name) 
-					(list 'runscript runscript) 
-					(list 'run-id    run-id   )
-					(list 'test-id   test-id  )
-					;; (list 'item-path item-path )
-					(list 'itemdat   itemdat  )
-					(list 'megatest  remote-megatest)
-					(list 'ezsteps   ezsteps)
-					(list 'subrun    subrun)
-					(list 'target    mt_target)
-					(list 'contour   contour)
-					(list 'runtlim   (if run-time-limit (common:hms-string->seconds run-time-limit) #f))
-					(list 'env-ovrd  (hash-table-ref/default *configdat* "env-override" '())) 
-					(list 'set-vars  (if params (hash-table-ref/default params "-setvars" #f)))
-					(list 'runname   runname)
-					(list 'mt-bindir-path mt-bindir-path))))))))
-      
-      ;; clean out step records from previous run if they exist
-      ;; (rmt:delete-test-step-records run-id test-id)
-      ;; if the dir does not exist we may have a itempath where individual variables are a path, launch anyway
-      (if (common:file-exists? work-area)
-	  (change-directory work-area)) ;; so that log files from the launch process don't clutter the test dir
-      (cond
-       ;; ((and launcher hosts) ;; must be using ssh hostname
-       ;;    (set! fullcmd (append launcher (car hosts)(list remote-megatest "-m" test-sig "-execute" cmdparms) debug-param)))
-       ;; (set! fullcmd (append launcher (car hosts)(list remote-megatest test-sig "-execute" cmdparms))))
-       (launcher
-	(set! fullcmd (append launcher (list remote-megatest "-m" test-sig "-execute" cmdparms) debug-param)))
-       ;; (set! fullcmd (append launcher (list remote-megatest test-sig "-execute" cmdparms))))
-       (else
-	(if (not useshell)(debug:print 0 *default-log-port* "WARNING: internal launching will not work well without \"useshell yes\" in your [jobtools] section"))
-	(set! fullcmd (append (list remote-megatest "-m" test-sig "-execute" cmdparms) debug-param (list (if useshell "&" ""))))))
-      ;; (set! fullcmd (list remote-megatest test-sig "-execute" cmdparms (if useshell "&" "")))))
-      (if (args:get-arg "-xterm")(set! fullcmd (append fullcmd (list "-xterm"))))
-      (debug:print 1 *default-log-port* "Launching " work-area)
-      ;; set pre-launch-env-vars before launching, keep the vars in prevvals and put the envionment back when done
-      (debug:print 4 *default-log-port* "fullcmd: " fullcmd)
-      (set! *last-launch* (current-seconds)) ;; all that junk above takes time, set this as late as possible.
-      (let* ((commonprevvals (alist->env-vars
-			      (hash-table-ref/default *configdat* "env-override" '())))
-	     (miscprevvals   (alist->env-vars ;; consolidate this code with the code in megatest.scm for "-execute"
-			      (append (list (list "MT_TEST_RUN_DIR" work-area)
-					    (list "MT_TEST_NAME" test-name)
-					    (list "MT_ITEM_INFO" (conc itemdat)) 
-					    (list "MT_RUNNAME"   runname)
-					    (list "MT_TARGET"    mt_target)
-					    (list "MT_ITEMPATH"  item-path)
-					    )
-				      itemdat)))
-	     (testprevvals   (alist->env-vars
-			      (hash-table-ref/default tconfig "pre-launch-env-overrides" '())))
-	     ;; Launchwait defaults to true, must override it to turn off wait
-	     (launchwait     (if (equal? (configf:lookup *configdat* "setup" "launchwait") "no") #f #t))
-	     (launch-results-prev (apply (if launchwait ;; BB: TODO: refactor this to examine return code of launcher, if nonzero, set state to launch failed.
-					process:cmd-run-with-stderr-and-exitcode->list
-					process-run)
-				    (if useshell
-					(let ((cmdstr (string-intersperse fullcmd " ")))
-					  (if launchwait
-					      cmdstr
-					      (conc cmdstr " >> mt_launch.log 2>&1 &")))
-					(car fullcmd))
-				    (if useshell
-					'()
-					(cdr fullcmd))))
-             (success        (if launchwait (equal? 0 (cadr launch-results-prev)) #t))
-             (launch-results (if launchwait (car launch-results-prev) launch-results-prev)))
-        (if (not success)
-            (tests:test-set-status! run-id test-id "COMPLETED" "DEAD" "launcher failed; exited non-zero; check mt_launch.log" #f)) ;; (if launch-results launch-results "FAILED"))
-        (mutex-unlock! *launch-setup-mutex*) ;; yes, really should mutex all the way to here. Need to put this entire process into a fork.
-	;; (rmt:no-sync-del! lock-key)         ;; release the lock for starting this test
-	(if (not launchwait) ;; give the OS a little time to allow the process to start
-	    (thread-sleep! 0.01))
-	(with-output-to-file "mt_launch.log"
-	  (lambda ()
-	    (print "LAUNCHCMD: " (string-intersperse fullcmd " "))
-	    (if (list? launch-results)
-		(apply print launch-results)
-		(print "NOTE: launched \"" fullcmd "\"\n  but did not wait for it to proceed. Add the following to megatest.config \n[setup]\nlaunchwait yes\n  if you have problems with this"))
-	    #:append))
-	(debug:print 2 *default-log-port* "Launching completed, updating db")
-	(debug:print 2 *default-log-port* "Launch results: " launch-results)
-	(if (not launch-results)
-	    (begin
-	      (print "ERROR: Failed to run " (string-intersperse fullcmd " ") ", exiting now")
-	      ;; (sqlite3:finalize! db)
-	      ;; good ole "exit" seems not to work
-	      ;; (_exit 9)
-	      ;; but this hack will work! Thanks go to Alan Post of the Chicken email list
-	      ;; NB// Is this still needed? Should be safe to go back to "exit" now?
-	      (process-signal (current-process-id) signal/kill)
-	      ))
-	(alist->env-vars miscprevvals)
-	(alist->env-vars testprevvals)
-	(alist->env-vars commonprevvals)
-	launch-results))
-    (change-directory *toppath*)))
-
-;; recover a test where the top controlling mtest may have died
-;;
-(define (launch:recover-test run-id test-id)
-  ;; this function is called on the test run host via ssh
-  ;;
-  ;; 1. look at the process from pid
-  ;;    - is it owned by calling user
-  ;;    - it it's run directory correct for the test
-  ;;    - is there a controlling mtest (maybe stuck)
-  ;; 2. if recovery is needed watch pid
-  ;;    - when it exits take the exit code and do the needful
-  ;;
-  (let* ((pid (rmt:test-get-top-process-pid run-id test-id))
-	 (psres (with-input-from-pipe
-		 (conc "ps -F -u " (current-user-name) " | grep -E '" pid " ' | grep -v 'grep -E " pid "'")
-		 (lambda ()
-		   (read-line))))
-	 (rundir (if (string? psres) ;; real process owned by user
-		     (read-symbolic-link (conc "/proc/" pid "/cwd"))
-		     #f)))
-    ;; now wait on that process if all is correct
-    ;; periodically update the db with runtime
-    ;; when the process exits look at the db, if still RUNNING after 10 seconds set
-    ;; state/status appropriately
-    (process-wait pid)))

Index: launchmod.scm
==================================================================
--- launchmod.scm
+++ launchmod.scm
@@ -18,18 +18,1829 @@
 
 ;;======================================================================
 
 (declare (unit launchmod))
 (declare (uses commonmod))
+(declare (uses rmtmod))
+(declare (uses mtconfigf))
+(declare (uses testsmod))
+(declare (uses mtargs))
 
 (module launchmod
 	*
 	
-(import scheme chicken data-structures extras)
-(import (prefix sqlite3 sqlite3:) posix typed-records srfi-18 srfi-69 format ports srfi-1 matchable)
+(import scheme chicken data-structures extras files)
+(import (prefix sqlite3 sqlite3:) posix typed-records
+	srfi-18 srfi-69 format ports srfi-1 matchable
+	z3 (prefix base64 base64:) regex
+	call-with-environment-variables csv)
 (import commonmod)
+(import rmtmod)
+(import (prefix mtconfigf configf:))
+(import testsmod)
+(import (prefix mtargs args:))
+
 ;; (use (prefix ulex ulex:))
 
 (include "common_records.scm")
+(include "db_records.scm")
+(include "run_records.scm")
+
+;;======================================================================
+;; launch a task - this runs on the originating host, tests themselves
+;;
+;;======================================================================
+
+
+;;======================================================================
+;; ezsteps
+;;======================================================================
+
+;; ezsteps were going to be coded as
+;; stepname[,predstep1,predstep2 ...] [{VAR1=first,second,third}] command to execute
+;;   BUT
+;; now are
+;; stepname {VAR=first,second,third ...} command ...
+;; where the {VAR=first,second,third ...} is optional.
+
+;; given an exit code and whether or not logpro was used calculate OK/BAD
+;; return #t if we are ok, #f otherwise
+(define (steprun-good? logpro exitcode)
+  (or (eq? exitcode 0)
+      (and logpro (eq? exitcode 2))))
+
+;; if handed a string, process it, else look for MT_CMDINFO
+(define (launch:get-cmdinfo-assoc-list #!key (encoded-cmd #f))
+  (let ((enccmd (if encoded-cmd encoded-cmd (getenv "MT_CMDINFO"))))
+    (if enccmd
+	(common:read-encoded-string enccmd)
+	'())))
+
+;;                       0           1              2              3
+(defstruct launch:einf (pid #t)(exit-status #t)(exit-code #t)(rollup-status 0))
+
+;; return (conc status ": " comment) from the final section so that
+;;   the comment can be set in the step record in launch.scm
+;;
+(define (launch:load-logpro-dat run-id test-id stepname)
+  (let ((cname (conc stepname ".dat")))
+    (if (common:file-exists? cname)
+	(let* ((dat  (configf:read-config cname #f #f))
+	       (csvr (db:logpro-dat->csv dat stepname))
+	       (csvt (let-values (((fmt-cell fmt-record fmt-csv) (make-format ",")))
+		       (fmt-csv (map list->csv-record csvr))))
+	       (status (configf:lookup dat "final" "exit-status"))
+	       (msg     (configf:lookup dat "final" "message")))
+          (if csvt  ;; this if blocked stack dump caused by .dat file from logpro being 0-byte.  fixed by upgrading logpro
+              (rmt:csv->test-data run-id test-id csvt)
+	      (debug:print 0 *default-log-port* "ERROR: no csvdat exists for run-id: " run-id " test-id: " test-id " stepname: " stepname ", check that logpro version is 1.15 or newer"))
+	  ;;  (debug:print-info 13 *default-log-port* "Error: run-id/test-id/stepname="run-id"/"test-id"/"stepname" => bad csvr="csvr)
+	  ;;  )
+	  (cond
+	   ((equal? status "PASS") "PASS") ;; skip the message part if status is pass
+	   (status (conc (configf:lookup dat "final" "exit-status") ": " (if msg msg "no message")))
+	   (else #f)))
+	#f)))
+
+(define (launch:runstep ezstep run-id test-id exit-info m tal testconfig) ;;; TODO: deprecate me in favor of ezsteps.scm
+  (let* ((stepname       (car ezstep))  ;; do stuff to run the step
+	 (stepinfo       (cadr ezstep))
+	;; (let ((info (cadr ezstep)))
+	;; 		   (if (proc? info) "" info)))
+	;; (stepproc       (let ((info (cadr ezstep)))
+	;; 		   (if (proc? info) info #f)))
+	 (stepparts      (string-match (regexp "^(\\{([^\\}\\{]*)\\}\\s*|)(.*)$") stepinfo))
+	 (stepparams     (list-ref stepparts 2)) ;; for future use, {VAR=1,2,3}, run step for each
+	 (paramparts     (if (string? stepparams)
+			     (map (lambda (x)(string-split x "=")) (string-split-fields "[^;]*=[^;]*" stepparams))
+			     '()))
+	 (subrun         (alist-ref "subrun" paramparts equal?))
+	 (stepcmd        (list-ref stepparts 3))
+	 (script         "") ; "#!/bin/bash\n") ;; yep, we depend on bin/bash FIXME!!!\
+	 (logpro-file    (conc stepname ".logpro"))
+	 (html-file      (conc stepname ".html"))
+	 (dat-file       (conc stepname ".dat"))
+	 (tconfig-logpro (configf:lookup testconfig "logpro" stepname))
+	 (logpro-used    (common:file-exists? logpro-file)))
+
+    (debug:print 0 *default-log-port* "stepparts: " stepparts ", stepparams: " stepparams
+                 ", paramparts: " paramparts ", subrun: " subrun ", stepcmd: " stepcmd)
+    
+    (if (and tconfig-logpro
+	     (not logpro-used)) ;; no logpro file found but have a defn in the testconfig
+	(begin
+	  (with-output-to-file logpro-file
+	    (lambda ()
+	      (print ";; logpro file extracted from testconfig\n"
+		     ";;")
+	      (print tconfig-logpro)))
+	  (set! logpro-used #t)))
+    
+    ;; NB// can safely assume we are in test-area directory
+    (debug:print 4 *default-log-port* "ezsteps:\n stepname: " stepname " stepinfo: " stepinfo " stepparts: " stepparts
+		 " stepparams: " stepparams " stepcmd: " stepcmd)
+    
+    ;; ;; first source the previous environment
+    ;; (let ((prev-env (conc ".ezsteps/" prevstep (if (string-search (regexp "csh") 
+    ;;      							 (get-environment-variable "SHELL")) ".csh" ".sh"))))
+    ;;   (if (and prevstep (common:file-exists? prev-env))
+    ;;       (set! script (conc script "source " prev-env))))
+    
+    ;; call the command using mt_ezstep
+    ;; (set! script (conc "mt_ezstep " stepname " " (if prevstep prevstep "x") " " stepcmd))
+    
+    (debug:print 4 *default-log-port* "script: " script)
+    (rmt:teststep-set-status! run-id test-id stepname "start" "-" #f #f)
+    ;; now launch the actual process
+    (call-with-environment-variables 
+     (list (cons "PATH" (conc (get-environment-variable "PATH") ":.")))
+     (lambda () ;; (process-run "/bin/bash" "-c" "exec ls -l /tmp/foobar > /tmp/delme-more.log 2>&1")
+       (let* ((cmd (conc stepcmd " > " stepname ".log 2>&1")) ;; >outfile 2>&1 
+	      (pid #f))
+	 (let ((proc (lambda ()
+		       (set! pid (process-run "/bin/bash" (list "-c" cmd))))))
+	   (if subrun
+               (begin
+                 (debug:print-info 0 *default-log-port* "Running without MT_.* environment variables.")
+                 (common:without-vars proc "^MT_.*"))
+	       (proc)))
+	 
+         (with-output-to-file "Makefile.ezsteps"
+           (lambda ()
+             (print stepname ".log :")
+             (print "\t" cmd)
+             (if (common:file-exists? (conc stepname ".logpro"))
+                 (print "\tlogpro " stepname ".logpro " stepname ".html < " stepname ".log"))
+             (print)
+             (print stepname " : " stepname ".log")
+             (print))
+           #:append)
+
+	 (rmt:test-set-top-process-pid run-id test-id pid)
+	 (let processloop ((i 0))
+	   (let-values (((pid-val exit-status exit-code)(process-wait pid #t)))
+		       (mutex-lock! m)
+		       (launch:einf-pid-set!         exit-info pid)         ;; (vector-set! exit-info 0 pid)
+		       (launch:einf-exit-status-set! exit-info exit-status) ;; (vector-set! exit-info 1 exit-status)
+		       (launch:einf-exit-code-set!   exit-info exit-code)   ;; (vector-set! exit-info 2 exit-code)
+		       (mutex-unlock! m)
+		       (if (eq? pid-val 0)
+			   (begin
+			     (thread-sleep! 2)
+			     (processloop (+ i 1))))
+		       )))))
+    (debug:print-info 0 *default-log-port* "step " stepname " completed with exit code " (launch:einf-exit-code exit-info)) ;; (vector-ref exit-info 2))
+    ;; now run logpro if needed
+    (if logpro-used
+	(let* ((logpro-exe (or (getenv "LOGPRO_EXE") "logpro"))
+               (pid        (process-run (conc "/bin/sh -c '"logpro-exe" "logpro-file " " (conc stepname ".html") " < " stepname ".log > /dev/null'"))))
+	  (let processloop ((i 0))
+	    (let-values (((pid-val exit-status exit-code)(process-wait pid #t)))
+			(mutex-lock! m)
+			;; (make-launch:einf pid: pid exit-status: exit-status exit-code: exit-code)
+			(launch:einf-pid-set!         exit-info pid)         ;; (vector-set! exit-info 0 pid)
+			(launch:einf-exit-status-set! exit-info exit-status) ;; (vector-set! exit-info 1 exit-status)
+			(launch:einf-exit-code-set!   exit-info exit-code)   ;; (vector-set! exit-info 2 exit-code)
+			(mutex-unlock! m)
+			(if (eq? pid-val 0)
+			    (begin
+			      (thread-sleep! 2)
+			      (processloop (+ i 1)))))
+	    (debug:print-info 0 *default-log-port* "logpro for step " stepname " exited with code " (launch:einf-exit-code exit-info))))) ;; (vector-ref exit-info 2)))))
+    
+    (let ((exinfo (launch:einf-exit-code exit-info)) ;; (vector-ref exit-info 2))
+	  (logfna (if logpro-used (conc stepname ".html") ""))
+	  (comment #f))
+      (if logpro-used
+	  (let ((datfile (conc stepname ".dat")))
+	    ;; load the .dat file into the test_data table if it exists
+	    (if (common:file-exists? datfile)
+		(set! comment (launch:load-logpro-dat run-id test-id stepname)))
+	    (rmt:test-set-log! run-id test-id (conc stepname ".html"))))
+      (rmt:teststep-set-status! run-id test-id stepname "end" exinfo comment logfna))
+    ;; set the test final status
+    (let* ((process-exit-status (launch:einf-exit-code exit-info)) ;; (vector-ref exit-info 2))
+	   (this-step-status (cond
+			      ((and (eq? process-exit-status 2) logpro-used) 'warn)   ;; logpro 2 = warnings
+			      ((and (eq? process-exit-status 3) logpro-used) 'check)  ;; logpro 3 = check
+			      ((and (eq? process-exit-status 4) logpro-used) 'waived) ;; logpro 4 = waived
+			      ((and (eq? process-exit-status 5) logpro-used) 'abort)  ;; logpro 5 = abort
+			      ((and (eq? process-exit-status 6) logpro-used) 'skip)   ;; logpro 6 = skip
+			      ((eq? process-exit-status 0)                   'pass)   ;; logpro 0 = pass
+			      (else 'fail)))
+	   (overall-status   (cond
+			      ((eq? (launch:einf-rollup-status exit-info) 2) 'warn) ;; rollup-status (vector-ref exit-info 3)
+			      ((eq? (launch:einf-rollup-status exit-info) 0) 'pass) ;; (vector-ref exit-info 3)
+			      (else 'fail)))
+	   (next-status      (cond 
+			      ((eq? overall-status 'pass) this-step-status)
+			      ((eq? overall-status 'warn)
+			       (if (eq? this-step-status 'fail) 'fail 'warn))
+			      ((eq? overall-status 'abort) 'abort)
+			      (else 'fail)))
+	   (next-state       ;; "RUNNING") ;; WHY WAS THIS CHANGED TO NOT USE (null? tal) ??
+	    (cond
+	     ((null? tal) ;; more to run?
+	      "COMPLETED")
+	     (else "RUNNING"))))
+      (debug:print 4 *default-log-port* "Exit value received: " (launch:einf-exit-code exit-info) " logpro-used: " logpro-used 
+		   " this-step-status: " this-step-status " overall-status: " overall-status 
+		   " next-status: " next-status " rollup-status: "  (launch:einf-rollup-status exit-info)) ;; (vector-ref exit-info 3))
+      (case next-status
+	((warn)
+	 (launch:einf-rollup-status-set! exit-info 2) ;; (vector-set! exit-info 3 2) ;; rollup-status
+	 ;; NB// test-set-status! does rdb calls under the hood
+	 (tests:test-set-status! run-id test-id next-state "WARN" 
+				 (if (eq? this-step-status 'warn) "Logpro warning found" #f)
+				 #f))
+	((check)
+	 (launch:einf-rollup-status-set! exit-info 3) ;; (vector-set! exit-info 3 3) ;; rollup-status
+	 ;; NB// test-set-status! does rdb calls under the hood
+	 (tests:test-set-status! run-id test-id next-state "CHECK" 
+				 (if (eq? this-step-status 'check) "Logpro check found" #f)
+				 #f))
+	((waived)
+	 (launch:einf-rollup-status-set! exit-info 4) ;; (vector-set! exit-info 3 3) ;; rollup-status
+	 ;; NB// test-set-status! does rdb calls under the hood
+	 (tests:test-set-status! run-id test-id next-state "WAIVED" 
+				 (if (eq? this-step-status 'check) "Logpro waived found" #f)
+				 #f))
+	((abort)
+	 (launch:einf-rollup-status-set! exit-info 5) ;; (vector-set! exit-info 3 4) ;; rollup-status
+	 ;; NB// test-set-status! does rdb calls under the hood
+	 (tests:test-set-status! run-id test-id next-state "ABORT" 
+				 (if (eq? this-step-status 'abort) "Logpro abort found" #f)
+				 #f))
+	((skip)
+	 (launch:einf-rollup-status-set! exit-info 6) ;; (vector-set! exit-info 3 4) ;; rollup-status
+	 ;; NB// test-set-status! does rdb calls under the hood
+	 (tests:test-set-status! run-id test-id next-state "SKIP" 
+				 (if (eq? this-step-status 'skip) "Logpro skip found" #f)
+				 #f))
+	((pass)
+	 (tests:test-set-status! run-id test-id next-state "PASS" #f #f))
+	(else ;; 'fail
+	 (launch:einf-rollup-status-set! exit-info 1) ;; (vector-set! exit-info 3 1) ;; force fail, this used to be next-state but that doesn't make sense. should always be "COMPLETED" 
+	 (tests:test-set-status! run-id test-id "COMPLETED" "FAIL" (conc "Failed at step " stepname) #f)
+	 )))
+    logpro-used))
+
+(define (launch:manage-steps run-id test-id item-path fullrunscript ezsteps subrun test-name tconfigreg exit-info m)
+  ;; (let-values
+  ;;  (((pid exit-status exit-code)
+  ;;    (run-n-wait fullrunscript)))
+  ;; (tests:test-set-status! test-id "RUNNING" "n/a" #f #f)
+  ;; Since we should have a clean slate at this time there is no need to do 
+  ;; any of the other stuff that tests:test-set-status! does. Let's just 
+  ;; force RUNNING/n/a
+
+  ;; (thread-sleep! 0.3)
+  ;; (tests:test-force-state-status! run-id test-id "RUNNING" "n/a")
+  (rmt:set-state-status-and-roll-up-items run-id test-name item-path "RUNNING" #f #f) 
+  ;; (thread-sleep! 0.3) ;; NFS slowness has caused grief here
+
+  ;; if there is a runscript do it first
+  (if fullrunscript
+      (let ((pid (process-run fullrunscript)))
+	(rmt:test-set-top-process-pid run-id test-id pid)
+	(let loop ((i 0))
+	  (let-values
+	   (((pid-val exit-status exit-code) (process-wait pid #t)))
+	   (mutex-lock! m)
+	   (launch:einf-pid-set!           exit-info  pid)         ;; (vector-set! exit-info 0 pid)
+	   (launch:einf-exit-status-set!   exit-info  exit-status) ;; (vector-set! exit-info 1 exit-status)
+	   (launch:einf-exit-code-set!     exit-info  exit-code)   ;; (vector-set! exit-info 2 exit-code)
+	   (launch:einf-rollup-status-set! exit-info  exit-code)   ;; (vector-set! exit-info 3 exit-code)  ;; rollup status
+	   (mutex-unlock! m)
+	   (if (eq? pid-val 0)
+	       (begin
+		 (thread-sleep! 2)
+		 (loop (+ i 1)))
+	       )))))
+  ;; then, if runscript ran ok (or did not get called)
+  ;; do all the ezsteps (if any)
+  (if (or ezsteps subrun)
+      (let* ((test-run-dir (tests:get-test-path-from-environment))
+             (testconfig ;; (read-config (conc work-area "/testconfig") #f #t environ-patt: "pre-launch-env-vars")) ;; FIXME??? is allow-system ok here?
+	      ;; NOTE: it is tempting to turn off force-create of testconfig but dynamic
+	      ;;       ezstep names need a full re-eval here.
+	      (tests:get-testconfig test-name item-path tconfigreg #t force-create: #t)) ;; 'return-procs)))
+	     (ezstepslst (if (hash-table? testconfig)
+			     (hash-table-ref/default testconfig "ezsteps" '())
+			     #f)))
+	(if testconfig
+	    (hash-table-set! *testconfigs* test-name testconfig) ;; cached for lazy reads later ...
+	    (begin
+	      (launch:setup)
+	      (debug:print 0 *default-log-port* "WARNING: no testconfig found for " test-name " in search path:\n  "
+			   (string-intersperse (tests:get-tests-search-path *configdat*) "\n  "))))
+	;; after all that, still no testconfig? Time to abort
+	(if (not testconfig)
+	    (begin
+	      (debug:print-error 0 *default-log-port* "Failed to resolve megatest.config, runconfigs.config and testconfig issues. Giving up now")
+	      (exit 1)))
+
+	;; create a proc for the subrun if requested, save that proc in the ezsteps table as the last entry
+	;; 1. get section [runarun]
+	;; 2. unset MT_* vars
+	;; 3. fix target
+	;; 4. fix runname
+	;; 5. fix testpatt or calculate it from contour
+	;; 6. launch the run
+	;; 7. roll up the run result and or roll up the logpro processed result
+	(when (configf:lookup testconfig "subrun" "runwait") ;; we use runwait as the flag that a subrun is requested
+            (subrun:initialize-toprun-test testconfig test-run-dir)
+	    (let* ((mt-cmd (subrun:launch-cmd test-run-dir)))
+              (debug:print-info 0 *default-log-port* "Subrun command is \"" mt-cmd "\"")
+              (set! ezsteps #t) ;; set the needed flag
+	      (set! ezstepslst
+                    (append (or ezstepslst '())
+                            (list (list "subrun" (conc "{subrun=true} " mt-cmd)))))))
+
+	;; process the ezsteps
+	(if ezsteps
+	    (begin
+	      (if (not (common:file-exists? ".ezsteps"))(create-directory ".ezsteps"))
+	      ;; if ezsteps was defined then we are sure to have at least one step but check anyway
+	      (if (not (> (length ezstepslst) 0))
+		  (debug:print-error 0 *default-log-port* "ezsteps defined but ezstepslst is zero length")
+		  (let loop ((ezstep (car ezstepslst))
+			     (tal    (cdr ezstepslst))
+			     (prevstep #f))
+                    (debug:print-info 0 *default-log-port* "Processing ezstep \"" (string-intersperse ezstep " ") "\"")
+		    ;; check exit-info (vector-ref exit-info 1)
+		    (if (launch:einf-exit-status exit-info) ;; (vector-ref exit-info 1)
+			(let ((logpro-used (launch:runstep ezstep run-id test-id exit-info m tal testconfig))
+			      (stepname    (car ezstep)))
+			  ;; if logpro-used read in the stepname.dat file
+			  (if (and logpro-used (common:file-exists? (conc stepname ".dat")))
+			      (launch:load-logpro-dat run-id test-id stepname))
+			  (if (steprun-good? logpro-used (launch:einf-exit-code exit-info))
+			      (if (not (null? tal))
+				  (loop (car tal) (cdr tal) stepname))
+			      (debug:print 0 *default-log-port* "WARNING: step " (car ezstep) " failed. Stopping")))
+			(debug:print 0 *default-log-port* "WARNING: a prior step failed, stopping at " ezstep)))))))))
+
+(define (launch:monitor-job run-id test-id item-path fullrunscript ezsteps test-name tconfigreg exit-info m work-area runtlim misc-flags)
+  (let* ((update-period (string->number (or (configf:lookup *configdat* "setup" "test-stats-update-period") "30")))
+         (start-seconds (current-seconds))
+	 (calc-minutes  (lambda ()
+			  (inexact->exact 
+			   (round 
+			    (- 
+			     (current-seconds) 
+			     start-seconds)))))
+	 (kill-tries 0))
+    ;; (tests:set-full-meta-info #f test-id run-id (calc-minutes) work-area)
+    ;; (tests:set-full-meta-info test-id run-id (calc-minutes) work-area)
+    (tests:set-full-meta-info #f test-id run-id (calc-minutes) work-area 10)
+
+    (let loop ((minutes   (calc-minutes))
+	       (cpu-load  (alist-ref 'adj-core-load (common:get-normalized-cpu-load #f)))
+	       (disk-free (get-df (current-directory)))
+               (last-sync (current-seconds)))
+      #;(common:telemetry-log "zombie" (conc "launch:monitor-job - top of loop encountered at "(current-seconds)" with last-sync="last-sync))
+      (let* ((over-time     (> (current-seconds) (+ last-sync update-period)))
+             (new-cpu-load  (let* ((load  (alist-ref 'adj-core-load (common:get-normalized-cpu-load #f)))
+                                   (delta (abs (- load cpu-load))))
+                              (if (> delta 0.1) ;; don't bother updating with small changes
+                                  load
+                                  #f)))
+             (new-disk-free (let* ((df    (if over-time ;; only get df every 30 seconds
+                                              (get-df (current-directory))
+                                              disk-free))
+                                   (delta (abs (- df disk-free))))
+                              (if (and (> df 0)
+                                       (> (/ delta df) 0.1)) ;; (> delta 200) ;; ignore changes under 200 Meg
+                                  df
+                                  #f)))
+             (do-sync       (or new-cpu-load new-disk-free over-time))
+
+             (test-info   (rmt:get-test-info-by-id run-id test-id))
+             (state       (db:test-get-state test-info))
+             (status      (db:test-get-status test-info))
+             (kill-reason  "no kill reason specified")
+             (kill-job?    #f))
+        #;(common:telemetry-log "zombie" (conc "launch:monitor-job - decision time encountered at "(current-seconds)" with last-sync="last-sync" do-sync="do-sync" over-time="over-time" update-period="update-period))
+        (cond
+         ((test-get-kill-request run-id test-id)
+          (set! kill-reason "KILLING TEST since received kill request (KILLREQ)")
+          (set! kill-job? #t))
+         ((and runtlim (> (- (current-seconds) start-seconds) runtlim))
+          (set! kill-reason (conc "KILLING TEST DUE TO TIME LIMIT EXCEEDED! Runtime=" (- (current-seconds) start-seconds) " seconds, limit=" runtlim))
+          (set! kill-job? #t))
+         ((equal? status "DEAD")
+          (tests:update-central-meta-info run-id test-id new-cpu-load new-disk-free (calc-minutes) #f #f)
+          (rmt:set-state-status-and-roll-up-items run-id test-id 'foo "RUNNING" "n/a" "was marked dead; really still running.")
+          ;;(set! kill-reason "KILLING TEST because it was marked as DEAD by launch:handle-zombie-tests (might indicate really overloaded server or else overzealous setup.deadtime)") ;; MARK RUNNING
+          (set! kill-job? #f)))
+
+        (debug:print 4 *default-log-port* "cpu: " new-cpu-load " disk: " new-disk-free " last-sync: " last-sync " do-sync: " do-sync)
+        (launch:handle-zombie-tests run-id)
+        (when do-sync
+          ;;(with-output-to-file (conc (getenv "MT_TEST_RUN_DIR") "/last-loadinfo.log" #:append)
+          ;;  (lambda () (pp (list (current-seconds) new-cpu-load new-disk-free (calc-minutes)))))
+          #;(common:telemetry-log "zombie" (conc  "launch:monitor-job - dosync started at "(current-seconds)))
+          (tests:update-central-meta-info run-id test-id new-cpu-load new-disk-free (calc-minutes) #f #f)
+          #;(common:telemetry-log "zombie" (conc "launch:monitor-job - dosync finished at "(current-seconds))))
+        
+	(if kill-job? 
+	    (begin
+              (debug:print-info 0 *default-log-port* "proceeding to kill test: "kill-reason)
+	      (mutex-lock! m)
+	      ;; NOTE: The pid can change as different steps are run. Do we need handshaking between this
+	      ;;       section and the runit section? Or add a loop that tries three times with a 1/4 second
+	      ;;       between tries?
+	      (let* ((pid1 (launch:einf-pid exit-info)) ;; (vector-ref exit-info 0))
+		     (pid2 (rmt:test-get-top-process-pid run-id test-id))
+		     (pids (delete-duplicates (filter number? (list pid1 pid2)))))
+		(if (not (null? pids))
+		    (begin
+		      (for-each
+		       (lambda (pid)
+			 (handle-exceptions
+			  exn
+			  (begin
+			    (debug:print-info 0 *default-log-port* "Unable to kill process with pid " pid ", possibly already killed.")
+			    (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)))
+			  (debug:print 0 *default-log-port* "WARNING: Request received to kill job " pid) ;;  " (attempt # " kill-tries ")")
+			  (debug:print-info 0 *default-log-port* "Signal mask=" (signal-mask))
+			  ;; (if (process:alive? pid)
+			  ;;     (begin
+			  (map (lambda (pid-num)
+				 (process-signal pid-num signal/term))
+			       (process:get-sub-pids pid))
+			  (thread-sleep! 5)
+			  ;; (if (process:process-alive? pid)
+			  (map (lambda (pid-num)
+				 (handle-exceptions
+				  exn
+				  #f
+				  (process-signal pid-num signal/kill)))
+			       (process:get-sub-pids pid))))
+		       ;;    (debug:print-info 0 *default-log-port* "not killing process " pid " as it is not alive"))))
+		       pids)
+                      ;; BB: question to Matt -- does the tests:test-state-status! encompass rollup to toplevel?  If not, should it?
+		      (tests:test-set-status! run-id test-id "KILLED"  "KILLED" (conc (args:get-arg "-m")" "kill-reason) #f)) ;; BB ADDED kill-reason -- confirm OK with Matt
+		    (begin
+		      (debug:print-error 0 *default-log-port* "Nothing to kill, pid1=" pid1 ", pid2=" pid2)
+		      (tests:test-set-status! run-id test-id "KILLED"  "FAILED TO KILL" (conc (args:get-arg "-m")" "kill-reason) #f) ;; BB ADDED kill-reason -- confirm OK with Matt
+		      )))
+	      (mutex-unlock! m)
+	      ;; no point in sticking around. Exit now. But run end of run before exiting?
+        (launch:end-of-run-check run-id)
+	      (exit)))
+	(if (hash-table-ref/default misc-flags 'keep-going #f)
+	    (begin
+	      (thread-sleep! 3) ;; (+ 3 (random 6))) ;; add some jitter to the call home time to spread out the db accesses
+	      (if (hash-table-ref/default misc-flags 'keep-going #f)  ;; keep originals for cpu-load and disk-free unless they change more than the allowed delta
+		  (loop (calc-minutes)
+                        (or new-cpu-load cpu-load)
+                        (or new-disk-free disk-free)
+                        (if do-sync (current-seconds) last-sync)))))))
+    (tests:update-central-meta-info run-id test-id (get-cpu-load) (get-df (current-directory))(calc-minutes) #f #f))) ;; NOTE: Checking twice for keep-going is intentional
+
+
+(define (launch:execute encoded-cmd)
+  (let* ((cmdinfo    (common:read-encoded-string encoded-cmd))
+	 (tconfigreg #f))
+    (setenv "MT_CMDINFO" encoded-cmd)
+    ;;(bb-check-path msg: "launch:execute incoming")
+    (if (list? cmdinfo) ;; ((testpath /tmp/mrwellan/jazzmind/src/example_run/tests/sqlitespeed)
+	;; (test-name sqlitespeed) (runscript runscript.rb) (db-host localhost) (run-id 1))
+	(let* ((testpath  (assoc/default 'testpath  cmdinfo))  ;; testpath is the test spec area
+	       (top-path  (assoc/default 'toppath   cmdinfo))
+	       (work-area (assoc/default 'work-area cmdinfo))  ;; work-area is the test run area
+	       (test-name (assoc/default 'test-name cmdinfo))
+	       (runscript (assoc/default 'runscript cmdinfo))
+	       (ezsteps   (assoc/default 'ezsteps   cmdinfo))
+	       (subrun    (assoc/default 'subrun    cmdinfo))
+	       ;; (runremote (assoc/default 'runremote cmdinfo))
+	       ;; (transport (assoc/default 'transport cmdinfo))  ;; not used
+	       ;; (serverinf (assoc/default 'serverinf cmdinfo))
+	       ;; (port      (assoc/default 'port      cmdinfo))
+	       (serverurl (assoc/default 'serverurl cmdinfo))
+	       (homehost  (assoc/default 'homehost  cmdinfo))
+	       (run-id    (assoc/default 'run-id    cmdinfo))
+	       (test-id   (assoc/default 'test-id   cmdinfo))
+	       (target    (assoc/default 'target    cmdinfo))
+	       (areaname  (assoc/default 'areaname  cmdinfo))
+	       (itemdat   (assoc/default 'itemdat   cmdinfo))
+	       (env-ovrd  (assoc/default 'env-ovrd  cmdinfo))
+	       (set-vars  (assoc/default 'set-vars  cmdinfo)) ;; pre-overrides from -setvar
+	       (runname   (assoc/default 'runname   cmdinfo))
+	       (megatest  (assoc/default 'megatest  cmdinfo))
+	       (runtlim   (assoc/default 'runtlim   cmdinfo))
+	       (contour   (assoc/default 'contour   cmdinfo))
+	       (item-path (item-list->path itemdat))
+	       (mt-bindir-path (assoc/default 'mt-bindir-path cmdinfo))
+	       (keys      #f)
+	       (keyvals   #f)
+	       (fullrunscript (if (not runscript)
+                                  #f
+                                  (if (substring-index "/" runscript)
+                                      runscript ;; use unadultered if contains slashes
+                                      (let ((fulln (conc work-area "/" runscript)))
+	                                  (if (and (common:file-exists? fulln)
+                                                   (file-execute-access? fulln))
+                                              fulln
+                                              runscript))))) ;; assume it is on the path
+               (check-work-area           (lambda ()
+                                            ;; NFS might not have propagated the directory meta data to the run host - give it time if needed
+                                            (let loop ((count 0))
+                                              (if (or (common:directory-exists? work-area)
+                                                      (> count 10))
+                                                  (change-directory work-area)
+                                                  (begin
+                                                    (debug:print 0 *default-log-port* "INFO: Not starting job yet - directory " work-area " not found")
+                                                    (thread-sleep! 10)
+                                                    (loop (+ count 1)))))
+
+                                            (if (not (string=?  (common:real-path work-area)(common:real-path (current-directory))))
+                                                (begin
+                                                  (debug:print 0 *default-log-port*
+                                                               "INFO: we are expecting to be in directory " work-area "\n"
+                                                               "     but we are actually in the directory " (current-directory) "\n"
+                                                               "     doing another change dir.")
+                                                  (change-directory work-area)))
+                                            
+                                            ;; spot check that the files in testpath are available. Too often NFS delays cause problems here.
+                                            (let ((files      (glob (conc testpath "/*")))
+                                                  (bad-files '()))
+                                              (for-each
+                                               (lambda (fullname)
+                                                 (let* ((fname (pathname-strip-directory fullname))
+                                                        (targn (conc work-area "/" fname)))
+                                                   (if (not (file-exists? targn))
+                                                       (set! bad-files (cons fname bad-files)))))
+                                               files)
+                                              (if (not (null? bad-files))
+                                                  (begin
+                                                    (debug:print 0 *default-log-port* "INFO: test data from " testpath " not copied properly or filesystem problems causing data to not be found. Re-running the copy command.")
+                                                    (debug:print 0 *default-log-port* "INFO: missing files from " work-area ": " (string-intersperse bad-files ", "))
+                                                    (launch:test-copy testpath work-area))))
+                                            ;; one more time, change to the work-area directory
+                                            (change-directory work-area)))
+	       ) ;; let*
+
+	  (if contour (setenv "MT_CONTOUR" contour))
+	  
+	  ;; immediated set some key variables from CMDINFO data, yes, these will be set again below ...
+	  ;;
+	  (setenv "MT_TESTSUITENAME" areaname)
+	  (setenv "MT_RUN_AREA_HOME" top-path)
+	  (set! *toppath* top-path)
+          (change-directory *toppath*) ;; temporarily switch to the run area home
+	  (setenv "MT_TEST_RUN_DIR"  work-area)
+
+	  (launch:setup) ;; should be properly in the run area home now
+
+	  (if contour (setenv "MT_CONTOUR" contour))
+	  
+	  ;; immediated set some key variables from CMDINFO data, yes, these will be set again below ...
+	  ;;
+	  (setenv "MT_TESTSUITENAME" areaname)
+	  (setenv "MT_RUN_AREA_HOME" top-path)
+	  (set! *toppath* top-path)
+          (change-directory *toppath*) ;; temporarily switch to the run area home
+	  (setenv "MT_TEST_RUN_DIR"  work-area)
+
+	  (launch:setup) ;; should be properly in the run area home now
+          
+	  (set! tconfigreg (tests:get-all)) ;; mapping of testname => test source path
+	  (let ((sighand (lambda (signum)
+			   ;; (signal-mask! signum) ;; to mask or not? seems to cause issues in exiting
+			   (if (eq? signum signal/stop)
+			       (debug:print-error 0 *default-log-port* "attempt to STOP process. Exiting."))
+			   (set! *time-to-exit* #t)
+			   (print "Received signal " signum ", cleaning up before exit (set this test to COMPLETED/ABORT) . Please wait...")
+			   (let ((th1 (make-thread (lambda ()
+                                                     (print "set test to COMPLETED/ABORT begin.")
+						     (rmt:test-set-state-status run-id test-id "COMPLETED" "ABORT" "received kill signal")
+                                                     (print "set test to COMPLETED/ABORT complete.")
+						     (print "Killed by signal " signum ". Exiting")
+						     (exit 1))))
+				 (th2 (make-thread (lambda ()
+						     (thread-sleep! 20)
+						     (debug:print 0 *default-log-port* "Done")
+						     (exit 4)))))
+			     (thread-start! th2)
+			     (thread-start! th1)
+			     (thread-join! th2)))))
+	    (set-signal-handler! signal/int sighand)
+	    (set-signal-handler! signal/term sighand)
+	    ) ;; (set-signal-handler! signal/stop sighand)
+	  
+	  ;; Do not run the test if it is REMOVING, RUNNING, KILLREQ or REMOTEHOSTSTART,
+	  ;; Mark the test as REMOTEHOSTSTART *IMMEDIATELY*
+	  ;;
+	  (let* ((test-info (rmt:get-test-info-by-id run-id test-id))
+		 (test-host (if test-info
+				(db:test-get-host        test-info)
+				(begin
+				  (debug:print 0 *default-log-port* "ERROR: failed to find a record for test-id " test-id ", exiting.")
+				  (exit))))
+		 (test-pid  (db:test-get-process_id  test-info)))
+	    (cond
+             ;; -mrw- I'm removing KILLREQ from this list so that a test in KILLREQ state is treated as a "do not run" flag.
+	     ((member (db:test-get-state test-info) '("INCOMPLETE" "KILLED" "UNKNOWN" "STUCK")) ;; prior run of this test didn't complete, go ahead and try to rerun
+	      (debug:print 0 *default-log-port* "INFO: test is INCOMPLETE or KILLED, treat this execute call as a rerun request")
+	      ;; (tests:test-force-state-status! run-id test-id "REMOTEHOSTSTART" "n/a")
+
+              (rmt:general-call 'set-test-start-time #f test-id)
+              (rmt:test-set-state-status run-id test-id "REMOTEHOSTSTART" "n/a" #f)
+	      ) ;; prime it for running
+	     ((member (db:test-get-state test-info) '("RUNNING" "REMOTEHOSTSTART"))
+	      (if (process:alive-on-host? test-host test-pid)
+		  (debug:print-error 0 *default-log-port* "test state is "  (db:test-get-state test-info) " and process " test-pid " is still running on host " test-host ", cannot proceed")
+		  (exit)))
+	     ((not (member (db:test-get-state test-info) '("REMOVING" "REMOTEHOSTSTART" "RUNNING" "KILLREQ")))
+	      ;; (tests:test-force-state-status! run-id test-id "REMOTEHOSTSTART" "n/a")
+              (rmt:general-call 'set-test-start-time #f test-id)
+	      (rmt:test-set-state-status run-id test-id "REMOTEHOSTSTART" "n/a" #f)
+	      )
+	     (else ;; (member (db:test-get-state test-info) '("REMOVING" "REMOTEHOSTSTART" "RUNNING" "KILLREQ"))
+	      (debug:print-error 0 *default-log-port* "test state is " (db:test-get-state test-info) ", cannot proceed")
+	      (exit))))
+
+          ;; cleanup prior execution's steps
+          (rmt:delete-steps-for-test! run-id test-id)
+          
+	  (debug:print 2 *default-log-port* "Executing " test-name " (id: " test-id ") on " (get-host-name))
+	  (set! keys       (rmt:get-keys))
+	  ;; (runs:set-megatest-env-vars run-id inkeys: keys inkeyvals: keyvals) ;; these may be needed by the launching process
+	  ;; one of these is defunct/redundant ...
+	  (if (not (launch:setup force-reread: #t))
+	      (begin
+		(debug:print 0 *default-log-port* "Failed to setup, exiting") 
+		;; (sqlite3:finalize! db)
+		;; (sqlite3:finalize! tdb)
+		(exit 1)))
+          ;; validate that the test run area is available
+          (check-work-area)
+          
+          ;; still need to go back to run area home for next couple steps
+	  (change-directory *toppath*) 
+
+	  ;; NOTE: Current order is to process runconfigs *before* setting the MT_ vars. This 
+	  ;;       seems non-ideal but could well break stuff
+	  ;;    BUG? BUG? BUG?
+	  
+	  (let ((rconfig (full-runconfigs-read)) ;; (read-config (conc  *toppath* "/runconfigs.config") #f #t sections: (list "default" target))))
+		(wconfig (configf:read-config "waivers.config" #f #t sections: `( "default" ,target )))) ;; read the waivers config if it exists
+	    ;; (setup-env-defaults (conc *toppath* "/runconfigs.config") run-id (make-hash-table) keyvals target)
+	    ;; (set-run-config-vars run-id keyvals target) ;; (db:get-target db run-id))
+	    ;; Now have runconfigs data loaded, set environment vars
+	    (for-each
+	     (lambda (section)
+	       (for-each
+		(lambda (varval)
+		  (let ((var (car varval))
+			(val (cadr varval)))
+		    (if (and (string? var)(string? val))
+			(begin
+			  (safe-setenv var (configf:eval-string-in-environment val))) ;; val)
+			(debug:print-error 0 *default-log-port* "bad variable spec, " var "=" val))))
+		(configf:get-section rconfig section)))
+	     (list "default" target)))
+          ;;(bb-check-path msg: "launch:execute post block 1")
+
+	  ;; NFS might not have propagated the directory meta data to the run host - give it time if needed
+	  (let loop ((count 0))
+	    (if (or (common:file-exists? work-area)
+		    (> count 10))
+		(change-directory work-area)
+		(begin
+		  (debug:print 0 *default-log-port* "INFO: Not starting job yet - directory " work-area " not found")
+		  (thread-sleep! 10)
+		  (loop (+ count 1)))))
+
+          ;; now we can switch to the work-area?
+          (change-directory work-area)
+          ;;(bb-check-path msg: "launch:execute post block 1.5")
+	  ;; (change-directory work-area) 
+	  (set! keyvals    (keys:target->keyval keys target))
+	  ;; apply pre-overrides before other variables. The pre-override vars must not
+	  ;; clobbers things from the official sources such as megatest.config and runconfigs.config
+	  (if (string? set-vars)
+	      (let ((varpairs (string-split set-vars ",")))
+		(debug:print 4 *default-log-port* "varpairs: " varpairs)
+		(map (lambda (varpair)
+		       (let ((varval (string-split varpair "=")))
+			 (if (eq? (length varval) 2)
+			     (let ((var (car varval))
+				   (val (cadr varval)))
+			       (debug:print 1 *default-log-port* "Adding pre-var/val " var " = " val " to the environment")
+			       (setenv var val)))))
+		     varpairs)))
+          ;;(bb-check-path msg: "launch:execute post block 2")
+	  (for-each
+	   (lambda (varval)
+	     (let ((var (car varval))
+		   (val (cadr varval)))
+	       (if val
+		   (setenv var val)
+		   (begin
+		     (debug:print-error 0 *default-log-port* "required variable " var " does not have a valid value. Exiting")
+		     (exit)))))
+	     (list 
+	      (list  "MT_TEST_RUN_DIR" work-area)
+	      (list  "MT_TEST_NAME" test-name)
+	      (list  "MT_ITEM_INFO" (conc itemdat))
+	      (list  "MT_ITEMPATH"  item-path)
+	      (list  "MT_RUNNAME"   runname)
+	      (list  "MT_MEGATEST"  megatest)
+	      (list  "MT_TARGET"    target)
+	      (list  "MT_LINKTREE"  (common:get-linktree)) ;; (configf:lookup *configdat* "setup" "linktree"))
+	      (list  "MT_TESTSUITENAME" (common:get-testsuite-name))))
+          ;;(bb-check-path msg: "launch:execute post block 3")
+
+	  (if mt-bindir-path (setenv "PATH" (conc (getenv "PATH") ":" mt-bindir-path)))
+          ;;(bb-check-path msg: "launch:execute post block 4")
+	  ;; (change-directory top-path)
+	  ;; Can setup as client for server mode now
+	  ;; (client:setup)
+
+	  
+	  ;; environment overrides are done *before* the remaining critical envars.
+	  (alist->env-vars env-ovrd)
+          ;;(bb-check-path msg: "launch:execute post block 41")
+	  (runs:set-megatest-env-vars run-id inkeys: keys inkeyvals: keyvals)
+          ;;(bb-check-path msg: "launch:execute post block 42")
+	  (set-item-env-vars itemdat)
+          ;;(bb-check-path msg: "launch:execute post block 43")
+          (let ((blacklist (configf:lookup *configdat* "setup" "blacklistvars")))
+            (if blacklist
+		(let ((vars (string-split blacklist)))
+		  (save-environment-as-files "megatest" ignorevars: vars)
+		  (for-each (lambda (var)
+			      (unsetenv var))
+			    vars))
+                (save-environment-as-files "megatest")))
+          ;;(bb-check-path msg: "launch:execute post block 44")
+	  ;; open-run-close not needed for test-set-meta-info
+	  ;; (tests:set-full-meta-info #f test-id run-id 0 work-area)
+	  ;; (tests:set-full-meta-info test-id run-id 0 work-area)
+	  (tests:set-full-meta-info #f test-id run-id 0 work-area 10)
+
+	  ;; (thread-sleep! 0.3) ;; NFS slowness has caused grief here
+
+	  (if (args:get-arg "-xterm")
+	      (set! fullrunscript "xterm")
+	      (if (and fullrunscript 
+		       (common:file-exists? fullrunscript)
+		       (not (file-execute-access? fullrunscript)))
+		  (system (conc "chmod ug+x " fullrunscript))))
+
+	  ;; We are about to actually kick off the test
+	  ;; so this is a good place to remove the records for 
+	  ;; any previous runs
+	  ;; (db:test-remove-steps db run-id testname itemdat)
+	  ;; now is also a good time to write the .testconfig file
+	  (let* ((tconfig-fname   (conc work-area "/.testconfig"))
+		 (tconfig-tmpfile (conc tconfig-fname ".tmp"))
+		 (tconfig         (tests:get-testconfig test-name item-path tconfigreg #t force-create: #t))) ;; 'return-procs)))
+	    (configf:write-alist tconfig tconfig-tmpfile)
+	    (file-move tconfig-tmpfile tconfig-fname #t))
+	  ;; 
+	  (let* ((m            (make-mutex))
+		 (kill-job?    #f)
+		 (exit-info    (make-launch:einf pid: #t exit-status: #t exit-code: #t rollup-status: 0)) ;; pid exit-status exit-code (i.e. process was successfully run) rollup-status
+		 (job-thread   #f)
+		 ;; (keep-going   #t)
+		 (misc-flags   (let ((ht (make-hash-table)))
+				 (hash-table-set! ht 'keep-going #t)
+				 ht))
+		 (runit        (lambda ()
+				 (launch:manage-steps run-id test-id item-path fullrunscript ezsteps subrun test-name tconfigreg exit-info m)))
+		 (monitorjob   (lambda ()
+				 (launch:monitor-job  run-id test-id item-path fullrunscript ezsteps test-name tconfigreg exit-info m work-area runtlim misc-flags)))
+		 (th1          (make-thread monitorjob "monitor job"))
+		 (th2          (make-thread runit "run job")))
+	    (set! job-thread th2)
+	    (thread-start! th1)
+	    (thread-start! th2)
+	    (thread-join! th2)
+	    (debug:print-info 0 *default-log-port* "Megatest exectute of test " test-name ", item path " item-path " complete. Notifying the db ...")
+	    (hash-table-set! misc-flags 'keep-going #f)
+	    (thread-join! th1)
+	    (thread-sleep! 1)       ;; givbe thread th1 a chance to be done TODO: Verify this is needed. At 0.1 I was getting fail to stop, increased to total of 1.1 sec.
+	    (mutex-lock! m)
+	    (let* ((item-path (item-list->path itemdat))
+		   ;; only state and status needed - use lazy routine
+		   (testinfo  (rmt:get-testinfo-state-status run-id test-id)))
+	      ;; Am I completed?
+	      (if (member (db:test-get-state testinfo) '("REMOTEHOSTSTART" "RUNNING")) ;; NOTE: It should *not* be REMOTEHOSTSTART but for reasons I don't yet understand it sometimes gets stuck in that state ;; (not (equal? (db:test-get-state testinfo) "COMPLETED"))
+		  (let ((new-state  (if kill-job? "KILLED" "COMPLETED") ;; (if (eq? (vector-ref exit-info 2) 0) ;; exited with "good" status
+				                                        ;; "COMPLETED"							                ;; (db:test-get-state testinfo)))   ;; else preseve the state as set within the test
+				    )
+			(new-status (cond
+				     ((not (launch:einf-exit-status exit-info)) "FAIL") ;; job failed to run ... (vector-ref exit-info 1)
+				     ((eq? (launch:einf-rollup-status exit-info) 0)     ;; (vector-ref exit-info 3)
+				      ;; if the current status is AUTO then defer to the calculated value (i.e. leave this AUTO)
+				      (if (equal? (db:test-get-status testinfo) "AUTO") "AUTO" "PASS"))
+				     ((eq? (launch:einf-rollup-status exit-info) 1) "FAIL")  ;; (vector-ref exit-info 3)
+				     ((eq? (launch:einf-rollup-status exit-info) 2)	     ;;	(vector-ref exit-info 3)
+				      ;; if the current status is AUTO the defer to the calculated value but qualify (i.e. make this AUTO-WARN)
+				      (if (equal? (db:test-get-status testinfo) "AUTO") "AUTO-WARN" "WARN"))
+				     ((eq? (launch:einf-rollup-status exit-info) 3) "CHECK")
+				     ((eq? (launch:einf-rollup-status exit-info) 4) "WAIVED")
+				     ((eq? (launch:einf-rollup-status exit-info) 5) "ABORT")
+				     ((eq? (launch:einf-rollup-status exit-info) 6) "SKIP")
+				     (else "FAIL")))) ;; (db:test-get-status testinfo)))
+		    (debug:print-info 1 *default-log-port* "Test exited in state=" (db:test-get-state testinfo) ", setting state/status based on exit code of " (launch:einf-exit-status exit-info) " and rollup-status of " (launch:einf-rollup-status exit-info))
+		    (tests:test-set-status! run-id 
+					    test-id 
+					    new-state
+					    new-status
+					    (args:get-arg "-m") #f)
+		    ;; need to update the top test record if PASS or FAIL and this is a subtest
+		    ;; NO NEED TO CALL set-state-status-and-roll-up-items HERE, THIS IS DONE IN set-state-status-and-roll-up-items called by tests:test-set-status!
+		    ))
+	      ;; for automated creation of the rollup html file this is a good place...
+	      (if (not (equal? item-path ""))
+		  (tests:summarize-items run-id test-id test-name #f))
+	      (tests:summarize-test run-id test-id)  ;; don't force - just update if no
+	      (rmt:update-run-stats run-id (rmt:get-raw-run-stats run-id)))
+	    (mutex-unlock! m)
+            (launch:end-of-run-check run-id )
+	    (debug:print 2 *default-log-port* "Output from running " fullrunscript ", pid " (launch:einf-pid exit-info) " in work area " 
+			 work-area ":\n====\n exit code " (launch:einf-exit-code exit-info) "\n" "====\n")
+	    (if (not (launch:einf-exit-status exit-info))
+		(exit 4))))
+        )))
+
+;; Spec for End of test
+;; At end of each test call, after marking self as COMPLETED do run-state-status-rollup
+;; At transition to run COMPLETED/X do hooks
+;; Definition: test_dead if event_time + duration + 1 minute? < current_time AND
+;; we can prove the process is not alive (ssh host pstree -A pid)
+;; if dead safe to mark the test as killed in the db
+;; State/status table
+;; new
+;; 100% COMPLETED/ (PASS,FAIL,ABORT etc.) ==> COMPLETED / X where X is same as itemized rollup
+;; > 3 RUNNING with not test_dead do nothing (run should already be RUNNING/ na
+;; > 0 RUNNING and test_dead then send KILLREQ ==> COMPLETED
+;; 0 RUNNING ==> this is actually the first condition, should not get here
+
+(define (launch:end-of-run-check run-id )
+    (let*	((not-completed-cnt (rmt:get-not-completed-cnt run-id))  
+           (running-cnt (rmt:get-count-tests-running-for-run-id run-id))
+           (all-test-launched (rmt:get-var (conc "lunch-complete-" run-id)))
+           (current-state (rmt:get-run-state run-id))
+           (current-status (rmt:get-run-status run-id)))
+     ;;get-vars run-id to query metadata table to check if all completed. if all-test-launched = yes then only not-completed-cnt = 0 means everyting is completed if no entry found in the table do nothing 
+     (debug:print 0 *default-log-port* "rollup run state/status")                      
+     (rmt:set-state-status-and-roll-up-run  run-id current-state current-status)
+ 
+     (cond 
+       ((and all-test-launched (eq? not-completed-cnt 0) (equal? all-test-launched "yes" ))
+           	(debug:print 0 *default-log-port* "look for  post hook.")
+          	(runs:run-post-hook run-id))
+        ((> running-cnt 3) 
+        	  (debug:print 0 *default-log-port* "There are " running-cnt " tests running." ))
+        ((> running-cnt 0)
+            (debug:print 0 *default-log-port* "running cnt > 0 but <= 3 kill-running-tests-if-dead" )
+   				  (let ((kill-cnt (launch:kill-tests-if-dead run-id)))
+           			(if (and all-test-launched  (equal? all-test-launched "yes") (eq? kill-cnt running-cnt))
+           					(launch:end-of-run-check run-id)))) ;;todo
+        (else  (debug:print 0 *default-log-port* "Should it get here?? May be everything is not launched yet. Running test cnt:" running-cnt " Not completed test cnt:" not-completed-cnt)
+         (let* ((not-completed-tests (rmt:get-tests-for-run run-id "%" `("NOT_STARTED" "RUNNING" "LAUNCHED" "REMOTEHOSTSTART") `() #f #f #f #f #f #f #f #f)))
+       (if (> (length not-completed-tests) 0) 
+           (let loop ((running-test (car not-completed-tests))
+			     (tal    (cdr not-completed-tests)))
+		       (let* ((test-name (vector-ref running-test 2))
+                 (item-path (vector-ref running-test 11)))
+			       	(debug:print 0 *default-log-port* "test " test-name "/" item-path " not completed")
+              (if (not (null? tal))
+				  (loop (car tal) (cdr tal)))))))))))        
+ 
+(define (launch:is-test-alive host pid)
+(if (and host pid (not (equal? host "n/a")))
+(let* ((cmd (conc "ssh " host " pstree -A " pid))
+      (output (with-input-from-pipe cmd read-lines)))
+  (print "cmd: " cmd "\n op: " output )
+  (if(eq? (length output) 0)
+     #f
+     #t))
+#t))
+ 
+(define (launch:kill-tests-if-dead run-id)
+  (let* ((running-tests (rmt:get-tests-for-run run-id "%" `("RUNNING" "LAUNCHED" "REMOTEHOSTSTART") `() #f #f #f #f #f #f #f #f)))
+       (let loop ((running-test (car running-tests))
+			     (tal    (cdr running-tests))
+			     (kill-cnt 0))
+		       (let* ((test-name (vector-ref running-test 2))
+                 (item-path (vector-ref running-test 11))
+								 (test-id (vector-ref running-test 0))
+                 (host (vector-ref running-test 6))
+                 (pid  (rmt:test-get-top-process-pid run-id test-id))   
+                 (event-time (vector-ref running-test 5))
+                 (duration (vector-ref running-test 12))
+                 (flag 0)   
+                 (curr-time (current-seconds)))
+       (if (and (< (+ event-time duration 600) curr-time) (not (launch:is-test-alive host pid))) ;;test has not updated duration in last 10 min then likely its not running but confirm before marking it as killed
+           (begin    
+			       	(debug:print 0 *default-log-port* "test " test-name "/" item-path " needs to be killed")
+              (set! flag 1) 
+              (rmt:set-state-status-and-roll-up-items run-id test-name item-path "KILLREQ" "n/a" #f)))
+               (if (not (null? tal))
+				  (loop (car tal) (cdr tal) (+ kill-cnt flag))
+                 (+ kill-cnt flag))))))
+
+;; DO NOT USE - caching of configs is handled in launch:setup now.
+;;
+(define (launch:cache-config)
+  ;; if we have a linktree and -runtests and -target and the directory exists dump the config
+  ;; to megatest-(current-seconds).cfg and symlink it to megatest.cfg
+  (if (and *configdat* 
+	   (or (args:get-arg "-run")
+	       (args:get-arg "-runtests")
+	       (args:get-arg "-execute")))
+      (let* ((linktree (common:get-linktree)) ;; (get-environment-variable "MT_LINKTREE"))
+	     (target   (common:args-get-target exit-if-bad: #t))
+	     (runname  (or (args:get-arg "-runname")
+			   (args:get-arg ":runname")
+			   (getenv "MT_RUNNAME")))
+	     (fulldir  (conc linktree "/"
+			     target "/"
+			     runname)))
+	(if (and linktree (common:file-exists? linktree)) ;; can't proceed without linktree
+	    (begin
+	      (debug:print-info 0 *default-log-port* "Have -run with target=" target ", runname=" runname ", fulldir=" fulldir ", testpatt=" (or (args:get-arg "-testpatt") "%"))
+	      (if (not (common:file-exists? fulldir))
+		  (create-directory fulldir #t)) ;; need to protect with exception handler 
+	      (if (and target
+		       runname
+		       (common:file-exists? fulldir))
+		  (let ((tmpfile  (conc fulldir "/.megatest.cfg." (current-seconds)))
+			(targfile (conc fulldir "/.megatest.cfg-"  megatest-version "-" megatest-fossil-hash))
+			(rconfig  (conc fulldir "/.runconfig." megatest-version "-" megatest-fossil-hash)))
+		    (if (common:file-exists? rconfig) ;; only cache megatest.config AFTER runconfigs has been cached
+			(begin
+			  (debug:print-info 0 *default-log-port* "Caching megatest.config in " tmpfile)
+                          (if (not (common:in-running-test?))
+                              (configf:write-alist *configdat* tmpfile))
+			  (system (conc "ln -sf " tmpfile " " targfile))))
+		    )))
+	    (debug:print-info 1 *default-log-port* "No linktree yet, no caching configs.")))))
+
+
+;; gather available information, if legit read configs in this order:
+;;
+;;   if have cache;
+;;      read it a return it
+;;   else
+;;     megatest.config     (do not cache)
+;;     runconfigs.config   (cache if all vars avail)
+;;     megatest.config     (cache if all vars avail)
+;;   returns:
+;;     *toppath*
+;;   side effects:
+;;     sets; *configdat*    (megatest.config info)
+;;           *runconfigdat* (runconfigs.config info)
+;;           *configstatus* (status of the read data)
+;;
+(define (launch:setup #!key (force-reread #f) (areapath #f))
+  (mutex-lock! *launch-setup-mutex*)
+  (if (and *toppath*
+	   (eq? *configstatus* 'fulldata) (not force-reread)) ;; got it all
+      (begin
+	(debug:print 2 *default-log-port* "NOTE: skipping launch:setup-body call since we have fulldata")
+	(mutex-unlock! *launch-setup-mutex*)
+	*toppath*)
+      (let ((res (launch:setup-body force-reread: force-reread areapath: areapath)))
+	(mutex-unlock! *launch-setup-mutex*)
+	res)))
+
+;; return paths depending on what info is available.
+;;
+(define (launch:get-cache-file-paths areapath toppath target mtconfig)
+  (let* ((use-cache (common:use-cache?))
+         (runname  (common:args-get-runname))
+         (linktree (common:get-linktree))
+         (testname (common:get-full-test-name))
+         (rundir   (if (and runname target linktree)
+                       (common:directory-writable? (conc linktree "/" target "/" runname))
+                       #f))
+         (testdir  (if (and rundir testname)
+                       (common:directory-writable? (conc rundir "/" testname))
+                       #f))
+         (cachedir (or testdir rundir))
+         (mtcachef (and cachedir (conc cachedir "/" ".megatest.cfg-"  megatest-version "-" megatest-fossil-hash)))
+         (rccachef (and cachedir (conc cachedir "/" ".runconfigs.cfg-"  megatest-version "-" megatest-fossil-hash))))
+    (debug:print-info 6 *default-log-port* 
+                      "runname=" runname 
+                      "\n  linktree=" linktree
+                      "\n  testname=" testname
+                      "\n  rundir=" rundir 
+                      "\n  testdir=" testdir 
+                      "\n  cachedir=" cachedir
+                      "\n  mtcachef=" mtcachef
+                      "\n  rccachef=" rccachef)
+    (cons mtcachef rccachef)))
+
+(define (launch:setup-body #!key (force-reread #f) (areapath #f))
+  (if (and (eq? *configstatus* 'fulldata)
+	   *toppath*
+	   (not force-reread)) ;; no need to reprocess
+      *toppath*   ;; return toppath
+      (let* ((use-cache (common:use-cache?)) ;; BB- use-cache checks *configdat* for use-cache setting.  We do not have *configdat*.  Bootstrapping problem here.
+	     (toppath  (or *toppath* areapath (getenv "MT_RUN_AREA_HOME"))) ;; preserve toppath
+	     (target   (common:args-get-target))
+	     (sections (if target (list "default" target) #f)) ;; for runconfigs
+	     (mtconfig (or (args:get-arg "-config") "megatest.config")) ;; allow overriding megatest.config 
+             (cachefiles (launch:get-cache-file-paths areapath toppath target mtconfig))
+	     ;; checking for null cachefiles should not be necessary, I was seeing error car of '(), might be a chicken bug or a red herring ...
+	     (mtcachef   (if (null? cachefiles)
+			     #f
+			     (car cachefiles))) ;; (and cachedir (conc cachedir "/" ".megatest.cfg-"  megatest-version "-" megatest-fossil-hash)))
+	     (rccachef   (if (null? cachefiles)
+			     #f
+			     (cdr cachefiles)))) ;; (and cachedir (conc cachedir "/" ".runconfigs.cfg-"  megatest-version "-" megatest-fossil-hash)))
+	      ;; (cancreate (and cachedir (common:file-exists? cachedir)(file-write-access? cachedir) (not (common:in-running-test?)))))
+	(set! *toppath* toppath) ;; This is needed when we are running as a test using CMDINFO as a datasource
+        ;;(BB> "launch:setup-body -- cachefiles="cachefiles)
+	(cond
+	 ;; if mtcachef exists just read it, however we need to assume toppath is available in $MT_RUN_AREA_HOME
+	 ((and (not force-reread)
+	       mtcachef  rccachef
+	       use-cache
+	       (get-environment-variable "MT_RUN_AREA_HOME")
+	       (common:file-exists? mtcachef)
+	       (common:file-exists? rccachef))
+          ;;(BB> "launch:setup-body -- cond branch 1 - use-cache")
+          (set! *configdat*    (configf:read-alist mtcachef))
+          ;;(BB> "launch:setup-body -- 1 set! *configdat*="*configdat*)
+	  (set! *runconfigdat* (configf:read-alist rccachef))
+	  (set! *configinfo*   (list *configdat*  (get-environment-variable "MT_RUN_AREA_HOME")))
+	  (set! *configstatus* 'fulldata)
+	  (set! *toppath*      (get-environment-variable "MT_RUN_AREA_HOME"))
+	  *toppath*)
+	 ;; there are no existing cached configs, do full reads of the configs and cache them
+	 ;; we have all the info needed to fully process runconfigs and megatest.config
+	 ((and ;; (not force-reread) ;; force-reread is irrelevant in the AND, could however OR it?
+	       mtcachef
+	       rccachef) ;; BB- why are we doing this without asking if caching is desired?
+          ;;(BB> "launch:setup-body -- cond branch 2")
+	  (let* ((first-pass    (configf:find-and-read-config        ;; NB// sets MT_RUN_AREA_HOME as side effect
+				 mtconfig
+				 environ-patt: "env-override"
+				 given-toppath: toppath
+				 pathenvvar: "MT_RUN_AREA_HOME"))
+		 (first-rundat  (let ((toppath (if toppath 
+						   toppath
+						   (car first-pass))))
+				  (configf:read-config ;; (conc toppath "/runconfigs.config") ;; this should be converted to runconfig:read but it is non-trivial, leaving it for now.
+				   (conc (if (string? toppath)
+					     toppath
+					     (get-environment-variable "MT_RUN_AREA_HOME"))
+					 "/runconfigs.config")
+				   *runconfigdat* #t 
+				   sections: sections))))
+	    (set! *runconfigdat* first-rundat)
+	    (if first-pass  ;; 
+		(begin
+                  ;;(BB> "launch:setup-body -- \"first-pass\"=first-pass")
+		  (set! *configdat*  (car first-pass))
+                  ;;(BB> "launch:setup-body -- 2 set! *configdat*="*configdat*)
+		  (set! *configinfo* first-pass)
+		  (set! *toppath*    (or toppath (cadr first-pass))) ;; use the gathered data unless already have it
+		  (set! toppath      *toppath*)
+		  (if (not *toppath*)
+		      (begin
+			(debug:print-error 0 *default-log-port* "you are not in a megatest area!")
+			(exit 1)))
+		  (setenv "MT_RUN_AREA_HOME" *toppath*)
+		  ;; the seed read is done, now read runconfigs, cache it then read megatest.config one more time and cache it
+		  (let* ((keys         (rmt:get-keys))
+			 (key-vals     (keys:target->keyval keys target))
+			 (linktree     (common:get-linktree)) ;; (or (getenv "MT_LINKTREE")(if *configdat* (configf:lookup *configdat* "setup" "linktree") #f)))
+					;     (if *configdat*
+					; 	   (configf:lookup *configdat* "setup" "linktree")
+					; 	   (conc *toppath* "/lt"))))
+			 (second-pass  (configf:find-and-read-config
+					mtconfig
+					environ-patt: "env-override"
+					given-toppath: toppath
+					pathenvvar: "MT_RUN_AREA_HOME"))
+			 (runconfigdat (begin     ;; this read of the runconfigs will see any adjustments made by re-reading megatest.config
+					 (for-each (lambda (kt)
+						     (setenv (car kt) (cadr kt)))
+						   key-vals)
+					 (configf:read-config (conc toppath "/runconfigs.config") *runconfigdat* #t ;; consider using runconfig:read some day ...
+						      sections: sections)))
+                         (cachefiles   (launch:get-cache-file-paths areapath toppath target mtconfig))
+                         (mtcachef     (car cachefiles))
+                         (rccachef     (cdr cachefiles)))
+                    ;;  trap exception due to stale NFS handle -- Error: (open-output-file) cannot open file - Stale NFS file handle: "/p/fdk/gwa/lefkowit/mtTesting/qa/primbeqa/links/p1222/11/PDK_r1.1.1/prim/clean/pcell_testgen/.runconfigs.cfg-1.6427-7d1e789cb3f62f9cde719a4865bb51b3c17ea853" - ticket 220546342
+                    ;; TODO - consider 1) using simple-lock to bracket cache write
+                    ;;                 2) cache in hash on server, since need to do rmt: anyway to lock.
+
+		    (if rccachef
+                        (common:fail-safe
+                         (lambda ()
+                           (configf:write-alist runconfigdat rccachef))
+                         (conc "Could not write cache file - "rccachef)))
+                    (if mtcachef
+                        (common:fail-safe
+                         (lambda ()
+                           (configf:write-alist *configdat* mtcachef))
+                         (conc "Could not write cache file - "mtcachef)))
+		    (set! *runconfigdat* runconfigdat)
+		    (if (and rccachef mtcachef) (set! *configstatus* 'fulldata))))
+		;; no configs found? should not happen but let's try to recover gracefully, return an empty hash-table
+		(set! *configdat* (make-hash-table))
+		)))
+
+	 ;; else read what you can and set the flag accordingly
+	 ;; here we don't have either mtconfig or rccachef
+	 (else
+          ;;(BB> "launch:setup-body -- cond branch 3 - else")
+	  (let* ((cfgdat   (configf:find-and-read-config 
+			    (or (args:get-arg "-config") "megatest.config")
+			    environ-patt: "env-override"
+			    given-toppath: (get-environment-variable "MT_RUN_AREA_HOME")
+			    pathenvvar: "MT_RUN_AREA_HOME")))
+
+            (if (and cfgdat (list? cfgdat) (> (length cfgdat) 0) (hash-table? (car cfgdat)))
+		(let* ((toppath  (or (get-environment-variable "MT_RUN_AREA_HOME")(cadr cfgdat)))
+		       (rdat     (configf:read-config (conc toppath  ;; convert this to use runconfig:read!
+						    "/runconfigs.config") *runconfigdat* #t sections: sections)))
+		  (set! *configinfo*   cfgdat)
+		  (set! *configdat*    (car cfgdat))
+		  (set! *runconfigdat* rdat)
+		  (set! *toppath*      toppath)
+		  (set! *configstatus* 'partial))
+		(begin
+		  (debug:print-error 0 *default-log-port* "No " mtconfig " file found. Giving up.")
+		  (exit 2))))))
+	;; COND ends here.
+	
+	;; additional house keeping
+	(let* ((linktree (or (common:get-linktree)
+			     (conc *toppath* "/lt"))))
+	  (if linktree
+	      (begin
+		(if (not (common:file-exists? linktree))
+		    (begin
+		      (handle-exceptions
+			  exn
+			  (begin
+			    (debug:print-error 0 *default-log-port* "Something went wrong when trying to create linktree dir at " linktree)
+			    (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))
+			    (exit 1))
+			(create-directory linktree #t))))
+		(handle-exceptions
+		    exn
+		    (begin
+		      (debug:print-error 0 *default-log-port* "Something went wrong when trying to create link to linktree at " *toppath*)
+		      (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)))
+		  (let ((tlink (conc *toppath* "/lt")))
+		    (if (not (common:file-exists? tlink))
+			(create-symbolic-link linktree tlink)))))
+	      (begin
+		(debug:print-error 0 *default-log-port* "linktree not defined in [setup] section of megatest.config")
+		)))
+	(if (and *toppath*
+		 (directory-exists? *toppath*))
+	    (begin
+	      (setenv "MT_RUN_AREA_HOME" *toppath*)
+	      (setenv "MT_TESTSUITENAME" (common:get-testsuite-name)))
+	    (begin
+	      (debug:print-error 0 *default-log-port* "failed to find the top path to your Megatest area.")
+	      (set! *toppath* #f) ;; force it to be false so we return #f
+	      #f))
+	
+        ;; one more attempt to cache the configs for future reading
+        (let* ((cachefiles   (launch:get-cache-file-paths areapath toppath target mtconfig))
+               (mtcachef     (car cachefiles))
+               (rccachef     (cdr cachefiles)))
+
+          ;; trap exception due to stale NFS handle -- Error: (open-output-file) cannot open file - Stale NFS file handle: "...somepath.../.runconfigs.cfg-1.6427-7d1e789cb3f62f9cde719a4865bb51b3c17ea853" - ticket 220546342
+          ;; TODO - consider 1) using simple-lock to bracket cache write
+          ;;                 2) cache in hash on server, since need to do rmt: anyway to lock.
+          (if (and rccachef *runconfigdat* (not (common:file-exists? rccachef)))
+              (common:fail-safe
+               (lambda ()
+                 (configf:write-alist *runconfigdat* rccachef))
+               (conc "Could not write cache file - "rccachef))
+              )
+          (if (and mtcachef *configdat*    (not (common:file-exists? mtcachef)))
+              (common:fail-safe
+               (lambda ()
+                 (configf:write-alist *configdat* mtcachef))
+               (conc "Could not write cache file - "mtcachef))
+              )
+          (if (and rccachef mtcachef *runconfigdat* *configdat*)
+              (set! *configstatus* 'fulldata)))
+
+	;; if have -append-config then read and append here
+	(let ((cfname (args:get-arg "-append-config")))
+	  (if (and cfname
+		   (file-read-access? cfname))
+	      (configf:read-config cfname *configdat* #t))) ;; values are added to the hash, no need to do anything special.
+	*toppath*)))
+
+(define (get-best-disk confdat testconfig)
+  (let* ((disks   (or (and testconfig (hash-table-ref/default testconfig "disks" #f))
+		      (hash-table-ref/default confdat "disks" #f)))
+	 (minspace (let ((m (configf:lookup confdat "setup" "minspace")))
+		     (string->number (or m "10000")))))
+    (if disks 
+	(let ((res (common:get-disk-with-most-free-space disks minspace))) ;; min size of 1000, seems tad dumb
+	  (if res
+	      (cdr res)
+	      (begin
+;;		(if (common:low-noise-print 20 "No valid disks or no disk with enough space")
+;;		    (debug:print-error 0 *default-log-port* "No valid disks found in megatest.config. Please add some to your [disks] section and ensure the directory exists and has enough space!\n    You can change minspace in the [setup] section of megatest.config. Current setting is: " minspace))
+		;;(exit 1)
+                 (if (null? disks)
+                     (cons 1 (conc *toppath* "/runs"))
+                     (let ((paths (sort disks (lambda (x y) (> (string-length (cadr x)) (string-length (cadr y)))))))
+                       (let loop ((head (car paths)) (tail (cdr paths)))
+                         (let ((result (handle-exceptions exn #f (create-directory (cadr head) #t))))
+                           (if result
+                               result
+                               (if (null? tail)
+                                   (cons 1 (conc *toppath* "/runs"))
+                                   (loop (car tail) (cdr tail)))))))))))))) ;; the code creates the necessary directories if it does not exist and returns the path.
+
+
+(define (launch:test-copy test-src-path test-path)
+  (let* ((ovrcmd (let ((cmd (configf:lookup *configdat* "setup" "testcopycmd")))
+		   (if cmd
+		       ;; substitute the TEST_SRC_PATH and TEST_TARG_PATH
+		       (string-substitute "TEST_TARG_PATH" test-path
+					  (string-substitute "TEST_SRC_PATH" test-src-path cmd #t) #t)
+		       #f)))
+	 (cmd    (if ovrcmd 
+		     ovrcmd
+		     (conc "rsync -av" (if (debug:debug-mode 1) "" "q") " " test-src-path "/ " test-path "/"
+			   " >> " test-path "/mt_launch.log 2>> " test-path "/mt_launch.log")))
+	 (status (system cmd)))
+    (if (not (eq? status 0))
+	(debug:print 2 *default-log-port* "ERROR: problem with running \"" cmd "\""))))
+
+
+;; Desired directory structure:
+;;
+;;  <linkdir> - <target> - <testname> -.
+;;                                     |
+;;                                     v
+;;  <rundir>  -  <target>  -    <testname> -|- <itempath(s)>
+;;
+;;  dir stored in test is:
+;; 
+;;  <linkdir> - <target> - <testname> [ - <itempath> ]
+;; 
+;; All log file links should be stored relative to the top of link path
+;;  
+;; <target> - <testname> [ - <itempath> ] 
+;;
+(define (create-work-area run-id run-info keyvals test-id test-src-path disk-path testname itemdat #!key (remtries 2))
+  (let* ((item-path (if (string? itemdat) itemdat (item-list->path itemdat))) ;; if pass in string - just use it
+	 (runname   (if (string? run-info) ;; if we pass in a string as run-info use it as run-name.
+			run-info
+			(db:get-value-by-header (db:get-rows run-info)
+						(db:get-header run-info)
+						"runname")))
+	 (contour   #f) ;; NOT READY FOR THIS (args:get-arg "-contour"))
+	 ;; convert back to db: from rdb: - this is always run at server end
+	 (target   (string-intersperse (map cadr keyvals) "/"))
+
+	 (not-iterated  (equal? "" item-path))
+
+	 ;; all tests are found at <rundir>/test-base or <linkdir>/test-base
+	 (testtop-base (conc target "/" runname "/" testname))
+	 (test-base    (conc testtop-base (if not-iterated "" "/") item-path))
+
+	 ;; nb// if itempath is not "" then it is prefixed with "/"
+	 (toptest-path (conc disk-path (if contour (conc "/" contour) "") "/" testtop-base))
+	 (test-path    (conc disk-path (if contour (conc "/" contour) "") "/" test-base))
+
+	 ;; ensure this exists first as links to subtests must be created there
+	 (linktree  (common:get-linktree))
+	 ;; WAS: (let ((rd (configf:lookup *configdat* "setup" "linktree")))
+	 ;;         (if rd rd (conc *toppath* "/runs"))))
+	 ;; which seems wrong ...
+
+	 (lnkbase   (conc linktree (if contour (conc "/" contour) "") "/" target "/" runname))
+	 (lnkpath   (conc lnkbase "/" testname))
+	 (lnkpathf  (conc lnkpath (if not-iterated "" "/") item-path))
+	 (lnktarget (conc lnkpath "/" item-path)))
+
+    ;; Update the rundir path in the test record for all, rundir=physical, shortdir=logical
+    ;;                                                 rundir   shortdir
+    (rmt:general-call 'test-set-rundir-shortdir run-id lnkpathf test-path testname item-path run-id)
+
+    (debug:print 2 *default-log-port* "INFO:\n       lnkbase=" lnkbase "\n       lnkpath=" lnkpath "\n  toptest-path=" toptest-path "\n     test-path=" test-path)
+    (if (not (common:file-exists? linktree))
+	(begin
+	  (debug:print 0 *default-log-port* "WARNING: linktree did not exist! Creating it now at " linktree)
+	  (create-directory linktree #t))) ;; (system (conc "mkdir -p " linktree))))
+    ;; create the directory for the tests dir links, this is needed no matter what... try up to three times
+    (let loop ((done 3)) 
+      (let ((success (if (and (not (common:directory-exists? lnkbase))
+			      (not (common:file-exists? lnkbase)))
+			 (handle-exceptions
+			  exn
+			  (begin
+			    (debug:print-error 0 *default-log-port* "Problem creating linktree base at " lnkbase)
+			    (print-error-message exn (current-error-port))
+			    #t)
+			  (create-directory lnkbase #t)
+			  #f))))
+	(if (and (not success)(> done 0))
+	    (loop (- done 1)))))
+    
+    ;; update the toptest record with its location rundir, cache the path
+    ;; This wass highly inefficient, one db write for every subtest, potentially
+    ;; thousands of unnecessary updates, cache the fact it was set and don't set it 
+    ;; again. 
+
+    ;; Now create the link from the test path to the link tree, however
+    ;; if the test is iterated it is necessary to create the parent path
+    ;; to the iteration. use pathname-directory to trim the path by one
+    ;; level
+    (if (not not-iterated) ;; i.e. iterated
+	(let ((iterated-parent  (pathname-directory (conc lnkpath "/" item-path))))
+	  (debug:print-info 2 *default-log-port* "Creating iterated parent " iterated-parent)
+	  (handle-exceptions
+	   exn
+	   (begin
+	     (debug:print-error 0 *default-log-port* " Failed to create directory " iterated-parent ((condition-property-accessor 'exn 'message) exn) ", exiting")
+	     (exit 1))
+	   (create-directory iterated-parent #t))))
+
+    (if (symbolic-link? lnkpath) 
+	(handle-exceptions
+	 exn
+	 (begin
+	   (debug:print-error 0 *default-log-port* " Failed to remove symlink " lnkpath ((condition-property-accessor 'exn 'message) exn) ", exiting")
+	   (exit 1))
+	 (delete-file lnkpath)))
+
+    (if (not (or (common:file-exists? lnkpath)
+		 (symbolic-link? lnkpath)))
+	(handle-exceptions
+	 exn
+	 (begin
+	   (debug:print-error 0 *default-log-port* " Failed to create symlink " lnkpath ((condition-property-accessor 'exn 'message) exn) ", exiting")
+	   (exit 1))
+	 (create-symbolic-link toptest-path lnkpath)))
+    
+    ;; NB - This was not working right - some top tests are not getting the path set!!!
+    ;;
+    ;; Do the setting of this record after the paths are created so that the shortdir can 
+    ;; be set to the real directory location. This is safer for future clean up if the link
+    ;; tree is damaged or lost.
+    ;; 
+    (if (not (hash-table-ref/default *toptest-paths* testname #f))
+	(let* ((testinfo       (rmt:get-test-info-by-id run-id test-id)) ;;  run-id testname item-path))
+	       (curr-test-path (if testinfo ;; (filedb:get-path *fdb*
+							     ;; (db:get-path dbstruct
+				   ;; (rmt:sdb-qry 'getstr 
+				   (db:test-get-rundir testinfo) ;; ) ;; )
+				   #f)))
+	  (hash-table-set! *toptest-paths* testname curr-test-path)
+	  ;; NB// Was this for the test or for the parent in an iterated test?
+	  (rmt:general-call 'test-set-rundir-shortdir run-id lnkpath 
+			    (if (common:file-exists? lnkpath)
+				;; (resolve-pathname lnkpath)
+				(common:nice-path lnkpath)
+				lnkpath)
+			    testname "" run-id)
+	  ;; (rmt:general-call 'test-set-rundir run-id lnkpath testname "") ;; toptest-path)
+	  (if (or (not curr-test-path)
+		  (not (directory-exists? toptest-path)))
+	      (begin
+		(debug:print-info 2 *default-log-port* "Creating " toptest-path " and link " lnkpath)
+		(handle-exceptions
+		 exn
+		 #f ;; don't care to catch and deal with errors here for now.
+		 (create-directory toptest-path #t))
+		(hash-table-set! *toptest-paths* testname toptest-path)))))
+
+    ;; The toptest path has been created, the link to the test in the linktree has
+    ;; been created. Now, if this is an iterated test the real test dir must be created
+    (if (not not-iterated) ;; this is an iterated test
+	(begin ;; (let ((lnktarget (conc lnkpath "/" item-path)))
+	  (debug:print 2 *default-log-port* "Setting up sub test run area")
+	  (debug:print 2 *default-log-port* " - creating run area in " test-path)
+	  (handle-exceptions
+	   exn
+	   (begin
+	     (debug:print-error 0 *default-log-port* " Failed to create directory " test-path ((condition-property-accessor 'exn 'message) exn) ", exiting")
+	     (exit 1))
+	   (create-directory test-path #t))
+	  (debug:print 2 *default-log-port* 
+		       " - creating link from: " test-path "\n"
+		       "                   to: " lnktarget)
+
+	  ;; If there is already a symlink delete it and recreate it.
+	  (handle-exceptions
+	   exn
+	   (begin
+	     (debug:print-error 0 *default-log-port* " Failed to re-create link " lnktarget ((condition-property-accessor 'exn 'message) exn) ", exiting")
+	     (exit))
+	   (if (symbolic-link? lnktarget)     (delete-file lnktarget))
+	   (if (not (common:file-exists? lnktarget)) (create-symbolic-link test-path lnktarget)))))
+
+    (if (not (directory? test-path))
+	(create-directory test-path #t)) ;; this is a hack, I don't know why out of the blue this path does not exist sometimes
+
+    (if (and test-src-path (directory? test-path))
+	(begin
+	  (launch:test-copy test-src-path test-path)
+	  (list lnkpathf lnkpath ))
+	(if (and test-src-path (> remtries 0))
+	    (begin
+	      (debug:print-error 0 *default-log-port* "Failed to create work area at " test-path " with link at " lnktarget ", remaining attempts " remtries)
+	      ;; 
+	      (create-work-area run-id run-info keyvals test-id test-src-path disk-path testname itemdat remtries: (- remtries 1)))
+	    (list #f #f)))))
+
+
+(define (launch:handle-zombie-tests run-id)
+  (let* ((key (conc "zombiescan-runid-"run-id))
+         (now (current-seconds))
+         (threshold (- (current-seconds)  (* 2 (or (configf:lookup-number *configdat* "setup" "deadtime") 120))))
+         (val (rmt:get-var key))
+         (do-scan?
+          (cond
+           ((not val)
+            #t)
+           ((< val threshold)
+            #t)
+           (else #f))))
+    (when do-scan?
+      (debug:print 1 *default-log-port* "INFO: search and mark zombie tests")
+      (rmt:set-var key (current-seconds))
+      (rmt:find-and-mark-incomplete run-id #f))))
+
+
+
+
+
+;; 1. look though disks list for disk with most space
+;; 2. create run dir on disk, path name is meaningful
+;; 3. create link from run dir to megatest runs area 
+;; 4. remotely run the test on allocated host
+;;    - could be ssh to host from hosts table (update regularly with load)
+;;    - could be netbatch
+;;      (launch-test db (cadr status) test-conf))
+(define (launch-test test-id run-id run-info keyvals runname test-conf test-name test-path itemdat params)
+  (mutex-lock! *launch-setup-mutex*) ;; setting variables and processing the testconfig is NOT thread-safe, reuse the launch-setup mutex
+  (let* ( ;; (lock-key        (conc "test-" test-id))
+	;; (got-lock        (let loop ((lock        (rmt:no-sync-get-lock lock-key))
+	;; 			     (expire-time (+ (current-seconds) 15))) ;; give up on getting the lock and steal it after 15 seconds
+	;; 		    (if (car lock)
+	;; 			#t
+	;; 			(if (> (current-seconds) expire-time)
+	;; 			    (begin
+	;; 			      (debug:print-info 0 *default-log-port* "Timed out waiting for a lock to launch test " keyvals " " runname " " test-name " " test-path)
+	;; 			      (rmt:no-sync-del! lock-key) ;; destroy the lock
+	;; 			      (loop (rmt:no-sync-get-lock lock-key) expire-time)) ;; 
+	;; 			    (begin
+	;; 			      (thread-sleep! 1)
+	;; 			      (loop (rmt:no-sync-get-lock lock-key) expire-time))))))
+	 (item-path       (item-list->path itemdat))
+	 (contour         #f)) ;; NOT READY FOR THIS (args:get-arg "-contour")))
+    (let loop ((delta        (- (current-seconds) *last-launch*))
+	       (launch-delay (configf:lookup-number *configdat* "setup" "launch-delay" default: 1)))
+      (if (> launch-delay delta)
+	  (begin
+	    (if (common:low-noise-print 1200 "test launch delay") ;; every two hours or so remind the user about launch delay.
+		(debug:print-info 0 *default-log-port* "NOTE: test launches are delayed by " launch-delay " seconds. See megatest.config launch-delay setting to adjust.")) ;; launch of " test-name " for " (- launch-delay delta) " seconds"))
+	    (thread-sleep! (- launch-delay delta))
+	    (loop (- (current-seconds) *last-launch*) launch-delay))))
+    (change-directory *toppath*)
+    (alist->env-vars ;; consolidate this code with the code in megatest.scm for "-execute", *maybe* - the longer they are set the longer each launch takes (must be non-overlapping with the vars)
+     (append
+      (list
+       (list "MT_RUN_AREA_HOME" *toppath*)
+       (list "MT_TEST_NAME" test-name)
+       (list "MT_RUNNAME"   runname)
+       (list "MT_ITEMPATH"  item-path)
+       (list "MT_CONTOUR"   contour)
+       )
+      itemdat))
+    (let* ((tregistry       (tests:get-all)) ;; third param (below) is system-allowed
+           ;; for tconfig, why do we allow fallback to test-conf?
+	   (tconfig         (or (tests:get-testconfig test-name item-path tregistry #t force-create: #t)
+				(begin
+                                  (debug:print 0 *default-log-port* "WARNING: falling back to pre-calculated testconfig. This is likely not desired.")
+                                  test-conf))) ;; force re-read now that all vars are set
+	   (useshell        (let ((ush (configf:lookup *configdat* "jobtools"     "useshell")))
+			      (if ush 
+				  (if (equal? ush "no") ;; must use "no" to NOT use shell
+				      #f
+				      ush)
+				  #t)))     ;; default is yes
+	   (runscript       (configf:lookup tconfig   "setup"        "runscript"))
+	   (ezsteps         (> (length (hash-table-ref/default tconfig "ezsteps" '())) 0)) ;; don't send all the steps, could be big, just send a flag
+	   (subrun          (> (length (hash-table-ref/default tconfig "subrun"  '())) 0)) ;; send a flag to process a subrun
+	   ;; (diskspace       (configf:lookup tconfig   "requirements" "diskspace"))
+	   ;; (memory          (configf:lookup tconfig   "requirements" "memory"))
+	   ;; (hosts           (configf:lookup *configdat* "jobtools"     "workhosts")) ;; I'm pretty sure this was never completed
+	   (remote-megatest (configf:lookup *configdat* "setup" "executable"))
+	   (run-time-limit  (or (configf:lookup  tconfig   "requirements" "runtimelim")
+				(configf:lookup  *configdat* "setup" "runtimelim")))
+	   ;; FIXME SOMEDAY: not good how this is so obtuse, this hack is to 
+	   ;;                allow running from dashboard. Extract the path
+	   ;;                from the called megatest and convert dashboard
+	   ;;             	  or dboard to megatest
+	   (local-megatest  (let* ((lm  (car (argv)))
+				   (dir (pathname-directory lm))
+				   (exe (pathname-strip-directory lm)))
+			      (conc (if dir (conc dir "/") "")
+				    (case (string->symbol exe)
+				      ((dboard)    "../megatest")
+				      ((mtest)     "../megatest")
+				      ((dashboard) "megatest")
+				      (else exe)))))
+	   (launcher        (common:get-launcher *configdat* test-name item-path)) ;; (configf:lookup *configdat* "jobtools"     "launcher"))
+	   (test-sig        (conc (common:get-testsuite-name) ":" test-name ":" item-path)) ;; (item-list->path itemdat))) ;; test-path is the full path including the item-path
+	   (work-area       #f)
+	   (toptest-work-area #f) ;; for iterated tests the top test contains data relevant for all
+	   (diskpath   #f)
+	   (cmdparms   #f)
+	   (fullcmd    #f) ;; (define a (with-output-to-string (lambda ()(write x))))
+	   (mt-bindir-path #f)
+	   (testinfo   (rmt:get-test-info-by-id run-id test-id))
+	   (mt_target  (string-intersperse (map cadr keyvals) "/"))
+	   (debug-param (append (if (args:get-arg "-debug")  (list "-debug" (args:get-arg "-debug")) '())
+				(if (args:get-arg "-logging")(list "-logging") '()))))
+      ;; (if hosts (set! hosts (string-split hosts)))
+      ;; set the megatest to be called on the remote host
+      (if (not remote-megatest)(set! remote-megatest local-megatest)) ;; "megatest"))
+      (set! mt-bindir-path (pathname-directory remote-megatest))
+      (if launcher (set! launcher (string-split launcher)))
+      ;; set up the run work area for this test
+      (if (and (args:get-arg "-preclean") ;; user has requested to preclean for this run
+	       (not (member (db:test-get-rundir testinfo)(list "n/a" "/tmp/badname")))) ;; n/a is a placeholder and thus not a read dir
+	  (begin
+	    (debug:print-info 0 *default-log-port* "attempting to preclean directory " (db:test-get-rundir testinfo) " for test " test-name "/" item-path)
+	    (runs:remove-test-directory testinfo 'remove-data-only))) ;; remove data only, do not perturb the record
+      
+      ;; prevent overlapping actions - set to LAUNCHED as early as possible
+      ;;
+      ;; the following call handles waiver propogation. cannot yet condense into roll-up-pass-fail
+      (tests:test-set-status! run-id test-id "LAUNCHED" "n/a" #f #f) ;; (if launch-results launch-results "FAILED"))
+      (rmt:set-state-status-and-roll-up-items run-id test-name item-path #f "LAUNCHED" #f)
+      ;; (pp (hash-table->alist tconfig))
+      (set! diskpath (get-best-disk *configdat* tconfig))
+      (if diskpath
+	  (let ((dat  (create-work-area run-id run-info keyvals test-id test-path diskpath test-name itemdat)))
+	    (set! work-area (car dat))
+	    (set! toptest-work-area (cadr dat))
+	    (debug:print-info 2 *default-log-port* "Using work area " work-area))
+	  (begin
+	    (set! work-area (conc test-path "/tmp_run"))
+	    (create-directory work-area #t)
+	    (debug:print 0 *default-log-port* "WARNING: No disk work area specified - running in the test directory under tmp_run")))
+      (set! cmdparms (base64:base64-encode 
+		      (z3:encode-buffer 
+		       (with-output-to-string
+			 (lambda () ;; (list 'hosts     hosts)
+			   (write (list (list 'testpath  test-path)
+					;; (list 'transport (conc *transport-type*))
+					;; (list 'serverinf *server-info*)
+					(list 'homehost  (let* ((hhdat (common:get-homehost)))
+							   (if hhdat
+							       (car hhdat)
+							       #f)))
+					(list 'serverurl (if *runremote*
+							     (remote-server-url *runremote*)
+							     #f)) ;;
+					(list 'areaname  (common:get-testsuite-name))
+					(list 'toppath   *toppath*)
+					(list 'work-area work-area)
+					(list 'test-name test-name) 
+					(list 'runscript runscript) 
+					(list 'run-id    run-id   )
+					(list 'test-id   test-id  )
+					;; (list 'item-path item-path )
+					(list 'itemdat   itemdat  )
+					(list 'megatest  remote-megatest)
+					(list 'ezsteps   ezsteps)
+					(list 'subrun    subrun)
+					(list 'target    mt_target)
+					(list 'contour   contour)
+					(list 'runtlim   (if run-time-limit (common:hms-string->seconds run-time-limit) #f))
+					(list 'env-ovrd  (hash-table-ref/default *configdat* "env-override" '())) 
+					(list 'set-vars  (if params (hash-table-ref/default params "-setvars" #f)))
+					(list 'runname   runname)
+					(list 'mt-bindir-path mt-bindir-path))))))))
+      
+      ;; clean out step records from previous run if they exist
+      ;; (rmt:delete-test-step-records run-id test-id)
+      ;; if the dir does not exist we may have a itempath where individual variables are a path, launch anyway
+      (if (common:file-exists? work-area)
+	  (change-directory work-area)) ;; so that log files from the launch process don't clutter the test dir
+      (cond
+       ;; ((and launcher hosts) ;; must be using ssh hostname
+       ;;    (set! fullcmd (append launcher (car hosts)(list remote-megatest "-m" test-sig "-execute" cmdparms) debug-param)))
+       ;; (set! fullcmd (append launcher (car hosts)(list remote-megatest test-sig "-execute" cmdparms))))
+       (launcher
+	(set! fullcmd (append launcher (list remote-megatest "-m" test-sig "-execute" cmdparms) debug-param)))
+       ;; (set! fullcmd (append launcher (list remote-megatest test-sig "-execute" cmdparms))))
+       (else
+	(if (not useshell)(debug:print 0 *default-log-port* "WARNING: internal launching will not work well without \"useshell yes\" in your [jobtools] section"))
+	(set! fullcmd (append (list remote-megatest "-m" test-sig "-execute" cmdparms) debug-param (list (if useshell "&" ""))))))
+      ;; (set! fullcmd (list remote-megatest test-sig "-execute" cmdparms (if useshell "&" "")))))
+      (if (args:get-arg "-xterm")(set! fullcmd (append fullcmd (list "-xterm"))))
+      (debug:print 1 *default-log-port* "Launching " work-area)
+      ;; set pre-launch-env-vars before launching, keep the vars in prevvals and put the envionment back when done
+      (debug:print 4 *default-log-port* "fullcmd: " fullcmd)
+      (set! *last-launch* (current-seconds)) ;; all that junk above takes time, set this as late as possible.
+      (let* ((commonprevvals (alist->env-vars
+			      (hash-table-ref/default *configdat* "env-override" '())))
+	     (miscprevvals   (alist->env-vars ;; consolidate this code with the code in megatest.scm for "-execute"
+			      (append (list (list "MT_TEST_RUN_DIR" work-area)
+					    (list "MT_TEST_NAME" test-name)
+					    (list "MT_ITEM_INFO" (conc itemdat)) 
+					    (list "MT_RUNNAME"   runname)
+					    (list "MT_TARGET"    mt_target)
+					    (list "MT_ITEMPATH"  item-path)
+					    )
+				      itemdat)))
+	     (testprevvals   (alist->env-vars
+			      (hash-table-ref/default tconfig "pre-launch-env-overrides" '())))
+	     ;; Launchwait defaults to true, must override it to turn off wait
+	     (launchwait     (if (equal? (configf:lookup *configdat* "setup" "launchwait") "no") #f #t))
+	     (launch-results-prev (apply (if launchwait ;; BB: TODO: refactor this to examine return code of launcher, if nonzero, set state to launch failed.
+					process:cmd-run-with-stderr-and-exitcode->list
+					process-run)
+				    (if useshell
+					(let ((cmdstr (string-intersperse fullcmd " ")))
+					  (if launchwait
+					      cmdstr
+					      (conc cmdstr " >> mt_launch.log 2>&1 &")))
+					(car fullcmd))
+				    (if useshell
+					'()
+					(cdr fullcmd))))
+             (success        (if launchwait (equal? 0 (cadr launch-results-prev)) #t))
+             (launch-results (if launchwait (car launch-results-prev) launch-results-prev)))
+        (if (not success)
+            (tests:test-set-status! run-id test-id "COMPLETED" "DEAD" "launcher failed; exited non-zero; check mt_launch.log" #f)) ;; (if launch-results launch-results "FAILED"))
+        (mutex-unlock! *launch-setup-mutex*) ;; yes, really should mutex all the way to here. Need to put this entire process into a fork.
+	;; (rmt:no-sync-del! lock-key)         ;; release the lock for starting this test
+	(if (not launchwait) ;; give the OS a little time to allow the process to start
+	    (thread-sleep! 0.01))
+	(with-output-to-file "mt_launch.log"
+	  (lambda ()
+	    (print "LAUNCHCMD: " (string-intersperse fullcmd " "))
+	    (if (list? launch-results)
+		(apply print launch-results)
+		(print "NOTE: launched \"" fullcmd "\"\n  but did not wait for it to proceed. Add the following to megatest.config \n[setup]\nlaunchwait yes\n  if you have problems with this"))
+	    #:append))
+	(debug:print 2 *default-log-port* "Launching completed, updating db")
+	(debug:print 2 *default-log-port* "Launch results: " launch-results)
+	(if (not launch-results)
+	    (begin
+	      (print "ERROR: Failed to run " (string-intersperse fullcmd " ") ", exiting now")
+	      ;; (sqlite3:finalize! db)
+	      ;; good ole "exit" seems not to work
+	      ;; (_exit 9)
+	      ;; but this hack will work! Thanks go to Alan Post of the Chicken email list
+	      ;; NB// Is this still needed? Should be safe to go back to "exit" now?
+	      (process-signal (current-process-id) signal/kill)
+	      ))
+	(alist->env-vars miscprevvals)
+	(alist->env-vars testprevvals)
+	(alist->env-vars commonprevvals)
+	launch-results))
+    (change-directory *toppath*)))
+
+;; recover a test where the top controlling mtest may have died
+;;
+(define (launch:recover-test run-id test-id)
+  ;; this function is called on the test run host via ssh
+  ;;
+  ;; 1. look at the process from pid
+  ;;    - is it owned by calling user
+  ;;    - it it's run directory correct for the test
+  ;;    - is there a controlling mtest (maybe stuck)
+  ;; 2. if recovery is needed watch pid
+  ;;    - when it exits take the exit code and do the needful
+  ;;
+  (let* ((pid (rmt:test-get-top-process-pid run-id test-id))
+	 (psres (with-input-from-pipe
+		 (conc "ps -F -u " (current-user-name) " | grep -E '" pid " ' | grep -v 'grep -E " pid "'")
+		 (lambda ()
+		   (read-line))))
+	 (rundir (if (string? psres) ;; real process owned by user
+		     (read-symbolic-link (conc "/proc/" pid "/cwd"))
+		     #f)))
+    ;; now wait on that process if all is correct
+    ;; periodically update the db with runtime
+    ;; when the process exits look at the db, if still RUNNING after 10 seconds set
+    ;; state/status appropriately
+    (process-wait pid)))
+
+
+;; Do not rpc this one, do the underlying calls!!!
+(define (tests:test-set-status! run-id test-id state status comment dat #!key (work-area #f))
+  (let* ((real-status status)
+	 (otherdat    (if dat dat (make-hash-table)))
+	 (testdat     (rmt:get-test-info-by-id run-id test-id))
+	 (test-name   (db:test-get-testname  testdat))
+	 (item-path   (db:test-get-item-path testdat))
+	 ;; before proceeding we must find out if the previous test (where all keys matched except runname)
+	 ;; was WAIVED if this test is FAIL
+
+	 ;; NOTES:
+	 ;;  1. Is the call to test:get-previous-run-record remotified?
+	 ;;  2. Add test for testconfig waiver propagation control here
+	 ;;
+	 (prev-test   (if (equal? status "FAIL")
+			  (rmt:get-previous-test-run-record run-id test-name item-path)
+			  #f))
+	 (waived   (if prev-test
+		       (if prev-test ;; true if we found a previous test in this run series
+			   (let ((prev-status  (db:test-get-status  prev-test))
+				 (prev-state   (db:test-get-state   prev-test))
+				 (prev-comment (db:test-get-comment prev-test)))
+			     (debug:print 4 *default-log-port* "prev-status " prev-status ", prev-state " prev-state ", prev-comment " prev-comment)
+			     (if (and (equal? prev-state  "COMPLETED")
+				      (equal? prev-status "WAIVED"))
+				 (if comment
+				     comment
+				     prev-comment) ;; waived is either the comment or #f
+				 #f))
+			   #f)
+		       #f)))
+    (if (and waived 
+	     (tests:check-waiver-eligibility testdat prev-test))
+	(set! real-status "WAIVED"))
+
+    (debug:print 4 *default-log-port* "real-status " real-status ", waived " waived ", status " status)
+
+    ;; update the primary record IF state AND status are defined
+    (if (and state status)
+	(begin
+	  (rmt:set-state-status-and-roll-up-items run-id test-id item-path state real-status (if waived waived comment))
+	  ;; (mt:process-triggers run-id test-id state real-status) ;; triggers are called in test-set-state-status
+	  ))
+    
+    ;; if status is "AUTO" then call rollup (note, this one modifies data in test
+    ;; run area, it does remote calls under the hood.
+    ;; (if (and test-id state status (equal? status "AUTO")) 
+    ;; 	(rmt:test-data-rollup run-id test-id status))
+
+    ;; add metadata (need to do this way to avoid SQL injection issues)
+
+    ;; :first_err
+    ;; (let ((val (hash-table-ref/default otherdat ":first_err" #f)))
+    ;;   (if val
+    ;;       (sqlite3:execute db "UPDATE tests SET first_err=? WHERE run_id=? AND testname=? AND item_path=?;" val run-id test-name item-path)))
+    ;; 
+    ;; ;; :first_warn
+    ;; (let ((val (hash-table-ref/default otherdat ":first_warn" #f)))
+    ;;   (if val
+    ;;       (sqlite3:execute db "UPDATE tests SET first_warn=? WHERE run_id=? AND testname=? AND item_path=?;" val run-id test-name item-path)))
+
+    (let ((category (hash-table-ref/default otherdat ":category" ""))
+	  (variable (hash-table-ref/default otherdat ":variable" ""))
+	  (value    (hash-table-ref/default otherdat ":value"    #f))
+	  (expected (hash-table-ref/default otherdat ":expected" "n/a"))
+	  (tol      (hash-table-ref/default otherdat ":tol"      "n/a"))
+	  (units    (hash-table-ref/default otherdat ":units"    ""))
+	  (type     (hash-table-ref/default otherdat ":type"     ""))
+	  (dcomment (hash-table-ref/default otherdat ":comment"  "")))
+      (debug:print 4 *default-log-port* 
+		   "category: " category ", variable: " variable ", value: " value
+		   ", expected: " expected ", tol: " tol ", units: " units)
+      (if (and value) ;; require only value; BB was- all three required
+	  (let ((dat (conc category ","
+			   variable ","
+			   value    ","
+			   expected ","
+			   tol      ","
+			   units    ","
+			   dcomment ",," ;; extra comma for status
+			   type     )))
+	    ;; This was run remote, don't think that makes sense. Perhaps not, but that is the easiest path for the moment.
+	    (rmt:csv->test-data run-id test-id
+				dat)
+	    ;; This was added in check-in a5adfa3f9a. Message was: "...added delay in set-values to allow for delayed write on server start"
+	    ;; I'm inserting an arbitrary rmt: call to force/ensure that the server is available to (hopefully) prevent a communication issue.
+	    (rmt:get-var "MEGATEST_VERSION") ;; this does NOTHING but ensure the server is reachable. This is almost certainly NOT needed :)
+            ;; BB - commentiong out arbitrary 10 second wait (thread-sleep! 10) ;; add 10 second delay before quit incase rmt needs time to start a server.
+            )))
+      
+    ;; need to update the top test record if PASS or FAIL and this is a subtest
+    ;;;;;; (if (not (equal? item-path ""))
+    ;;;;;;     (rmt:set-state-status-and-roll-up-items run-id test-name item-path state status #f) ;;;;;)
+
+    (if (or (and (string? comment)
+		 (string-match (regexp "\\S+") comment))
+	    waived)
+	(let ((cmt  (if waived waived comment)))
+	  (rmt:general-call 'set-test-comment run-id cmt test-id)))))
+
 
 
 )

Index: megamod.scm
==================================================================
--- megamod.scm
+++ megamod.scm
@@ -147,11 +147,10 @@
 ;;======================================================================
 ;; L O C K I N G   M E C H A N I S M S 
 ;;======================================================================
 
 ;; globals
-(define *writes-total-delay* 0)
 (define *exit-started* #f)
 (define *last-monitor-update-time* 0)
 (define *default-log-port* (current-error-port))
 
 ;; The watchdog is to keep an eye on things like db sync etc.
@@ -173,11 +172,11 @@
 (include "archive-inc.scm")  
 (include "client-inc.scm")
 (include "common-inc.scm")    ;; L5
 (include "db-inc.scm")        ;; L4
 (include "env-inc.scm")
-(include "http-transport-inc.scm")
+;; (include "http-transport-inc.scm")
 ;; (include "items-inc.scm")
 ;; (include "keys-inc.scm")
 (include "launch-inc.scm")     ;; L1
 ;; (include "margs-inc.scm")
 (include "mt-inc.scm")

Index: mtmod.scm
==================================================================
--- mtmod.scm
+++ mtmod.scm
@@ -21,20 +21,36 @@
 (declare (unit mtmod))
 (declare (uses mtconfigf))
 (declare (uses commonmod))
 (declare (uses rmtmod))
 (declare (uses testsmod))
+(declare (uses tasksmod))
+(declare (uses mtargs))
+(declare (uses dbmod))
+(declare (uses pgdbmod))
+(declare (uses servermod))
+(declare (uses launchmod))
 
 (module mtmod
 	*
 	
 (import scheme chicken data-structures extras posix)
 
 (import (prefix mtconfigf configf:)
+	(prefix sqlite3 sqlite3:)
+	srfi-69 regex srfi-18 srfi-13 srfi-1
 	commonmod
 	rmtmod
-	testsmod)
+	testsmod
+	tasksmod
+	dbmod
+	pgdbmod
+	servermod
+	(prefix mtargs args:))
+
+(include "run_records.scm")
+(include "db_records.scm")
 
 
 ;; This is the Megatest API. All generally "useful" routines will be wrapped or extended
 ;; here.
 
@@ -976,14 +992,10 @@
 
 ;;======================================================================
 ;; E X I T   H A N D L I N G
 ;;======================================================================
 
-(define (common:run-sync?)
-    (and (common:on-homehost?)
-	 (args:get-arg "-server")))
-
 ;;   (let ((ohh (common:on-homehost?))
 ;; 	(srv (args:get-arg "-server")))
 ;;     (and ohh srv)))
     ;; (debug:print-info 0 *default-log-port* "common:run-sync? ohh=" ohh ", srv=" srv)
 
@@ -1191,7 +1203,1246 @@
        ;; '())
        )
   (if (common:api-changed?)
       (common:set-last-run-version)))
 
+;; called in megatest.scm, host-port is string hostname:port
+;;
+;; NOTE: This is NOT called directly from clients as not all transports support a client running
+;;       in the same process as the server.
+;;
+(define (server:ping host-port-in #!key (do-exit #f))
+  (let ((host:port (if (not host-port-in) ;; use read-dotserver to find
+		       #f ;; (server:check-if-running *toppath*)
+		;; (if (number? host-port-in) ;; we were handed a server-id
+		;; 	   (let ((srec (tasks:get-server-by-id (db:delay-if-busy (tasks:open-db)) host-port-in)))
+		;; 	     ;; (print "srec: " srec " host-port-in: " host-port-in)
+		;; 	     (if srec
+		;; 		 (conc (vector-ref srec 3) ":" (vector-ref srec 4))
+		;; 		 (conc "no such server-id " host-port-in)))
+		       host-port-in))) ;; )
+    (let* ((host-port (if host:port
+			  (let ((slst (string-split   host:port ":")))
+			    (if (eq? (length slst) 2)
+				(list (car slst)(string->number (cadr slst)))
+				#f))
+			  #f)))
+;;	   (toppath       (launch:setup)))
+      ;; (print "host-port=" host-port)
+      (if (not host-port)
+	  (begin
+	    (if host-port-in
+		(debug:print 0 *default-log-port*  "ERROR: bad host:port"))
+	    (if do-exit (exit 1))
+	    #f)
+	  (let* ((iface      (car host-port))
+		 (port       (cadr host-port))
+		 (server-dat (http-transport:client-connect iface port))
+		 (login-res  (rmt:login-no-auto-client-setup server-dat)))
+	    (if (and (list? login-res)
+		     (car login-res))
+		(begin
+		  ;; (print "LOGIN_OK")
+		  (if do-exit (exit 0))
+		  #t)
+		(begin
+		  ;; (print "LOGIN_FAILED")
+		  (if do-exit (exit 1))
+		  #f)))))))
+
+;; run ping in separate process, safest way in some cases
+;;
+(define (server:ping-server ifaceport)
+  (with-input-from-pipe 
+   (conc (common:get-megatest-exe) " -ping " ifaceport)
+   (lambda ()
+     (let loop ((inl (read-line))
+		(res "NOREPLY"))
+       (if (eof-object? inl)
+	   (case (string->symbol res)
+	     ((NOREPLY)  #f)
+	     ((LOGIN_OK) #t)
+	     (else       #f))
+	   (loop (read-line) inl))))))
+
+;; ping the given server
+;;
+(define (server:check-server server-record)
+  (let* ((server-url (server:record->url server-record))
+         (res        (case *transport-type*
+                       ((http)(server:ping server-url))
+                       ;; ((nmsg)(nmsg-transport:ping (tasks:hostinfo-get-interface server)
+                       )))
+    (if res
+        server-url
+	#f)))
+
+;; no longer care if multiple servers are started by accident. older servers will drop off in time.
+;;
+(define (server:check-if-running areapath) ;;  #!key (numservers "2"))
+  (let* ((ns            (server:get-num-servers))
+	 (servers       (server:get-best (server:get-list areapath))))
+    ;; (print "servers: " servers " ns: " ns)
+    (if (or (and servers
+		 (null? servers))
+	    (not servers)
+	    (and (list? servers)
+		 (< (length servers) (random ns)))) ;; somewhere between 0 and numservers
+        #f
+        (let loop ((hed (car servers))
+                   (tal (cdr servers)))
+          (let ((res (server:check-server hed)))
+            (if res
+                res
+                (if (null? tal)
+                    #f
+                    (loop (car tal)(cdr tal)))))))))
+
+
+;; NOT USED (well, ok, reference in rpc-transport but otherwise not used).
+;;
+(define (server:login toppath)
+  (lambda (toppath)
+    (set! *db-last-access* (current-seconds)) ;; might not be needed.
+    (if (equal? *toppath* toppath)
+	#t
+	#f)))
+;; Given a run id start a server process    ### NOTE ### > file 2>&1 
+;; if the run-id is zero and the target-host is set 
+;; try running on that host
+;;   incidental: rotate logs in logs/ dir.
+;;
+(define  (server:run areapath) ;; areapath is *toppath* for a given testsuite area
+  (let* ((curr-host   (get-host-name))
+         ;; (attempt-in-progress (server:start-attempted? areapath))
+         ;; (dot-server-url (server:check-if-running areapath))
+	 (curr-ip     (server:get-best-guess-address curr-host))
+	 (curr-pid    (current-process-id))
+	 (homehost    (common:get-homehost)) ;; configf:lookup *configdat* "server" "homehost" ))
+	 (target-host (car homehost))
+	 (testsuite   (common:get-testsuite-name))
+	 (logfile     (conc areapath "/logs/server.log")) ;; -" curr-pid "-" target-host ".log"))
+	 (cmdln (conc (common:get-megatest-exe)
+		      " -server " (or target-host "-") (if (equal? (configf:lookup *configdat* "server" "daemonize") "yes")
+							   " -daemonize "
+							   "")
+		      ;; " -log " logfile
+		      " -m testsuite:" testsuite)) ;; (conc " >> " logfile " 2>&1 &")))))
+	 (log-rotate  (make-thread common:rotate-logs  "server run, rotate logs thread"))
+         (load-limit  (configf:lookup-number *configdat* "jobtools" "max-server-start-load" default: 3.0)))
+    ;; we want the remote server to start in *toppath* so push there
+    (push-directory areapath)
+    (debug:print 0 *default-log-port* "INFO: Trying to start server (" cmdln ") ...")
+    (thread-start! log-rotate)
+    
+    ;; host.domain.tld match host?
+    (if (and target-host 
+	     ;; look at target host, is it host.domain.tld or ip address and does it 
+	     ;; match current ip or hostname
+	     (not (string-match (conc "("curr-host "|" curr-host"\\..*)") target-host))
+	     (not (equal? curr-ip target-host)))
+	(begin
+	  (debug:print-info 0 *default-log-port* "Starting server on " target-host ", logfile is " logfile)
+	  (setenv "TARGETHOST" target-host)))
+      
+    (setenv "TARGETHOST_LOGF" logfile)
+    (thread-sleep! (/ (random 5000) 1000)) ;; add about a random (up to 5 seconds) initial delay. It seems pretty common that many running tests request a server at the same time
+    (common:wait-for-normalized-load load-limit " delaying server start due to load" target-host) ;; do not try starting servers on an already overloaded machine, just wait forever
+    (system (conc "nbfake " cmdln))
+    (unsetenv "TARGETHOST_LOGF")
+    (if (get-environment-variable "TARGETHOST")(unsetenv "TARGETHOST"))
+    (thread-join! log-rotate)
+    (pop-directory)))
+
+;; kind start up of servers, wait 40 seconds before allowing another server for a given
+;; run-id to be launched
+(define (server:kind-run areapath)
+  (if (not (server:check-if-running areapath)) ;; why try if there is already a server running?
+      (let* ((last-run-dat (hash-table-ref/default *server-kind-run* areapath '(0 0))) ;; callnum, whenrun
+	     (call-num     (car last-run-dat))
+	     (when-run     (cadr last-run-dat))
+	     (run-delay    (+ (case call-num
+				((0)    0)
+				((1)   20)
+				((2)  300)
+				(else 600))
+			      (random 5)))   ;; add a small random number just in case a lot of jobs hit the work hosts simultaneously
+	     (lock-file    (conc areapath "/logs/server-start.lock")))
+	(if	(> (- (current-seconds) when-run) run-delay)
+		(begin
+		  (common:simple-file-lock-and-wait lock-file expire-time: 15)
+		  (server:run areapath)
+		  (thread-sleep! 2) ;; don't release the lock for at least a few seconds
+		  (common:simple-file-release-lock lock-file)))
+	(hash-table-set! *server-kind-run* areapath (list (+ call-num 1)(current-seconds))))))
+
+(define server:try-running server:run) ;; there is no more per-run servers ;; REMOVE ME. BUG.
+
+(define (server:start-and-wait areapath #!key (timeout 60))
+  (let ((give-up-time (+ (current-seconds) timeout)))
+    (let loop ((server-url (server:check-if-running areapath))
+	       (try-num    0))
+      (if (or server-url
+	      (> (current-seconds) give-up-time)) ;; server-url will be #f if no server available.
+	  server-url
+	  (let ((num-ok (length (server:get-best (server:get-list areapath)))))
+	    (if (and (> try-num 0)  ;; first time through simply wait a little while then try again
+		     (< num-ok 1))  ;; if there are no decent candidates for servers then try starting a new one
+		(server:kind-run areapath))
+	    (thread-sleep! 5)
+	    (loop (server:check-if-running areapath)
+		  (+ try-num 1)))))))
+
+;;======================================================================
+;; make html output
+;;======================================================================
+
+(define (tests:test-set-toplog! run-id test-name logf) 
+  (rmt:general-call 'tests:test-set-toplog run-id logf run-id test-name))
+
+(define (tests:summarize-items run-id test-id test-name force)
+  ;; if not force then only update the record if one of these is true:
+  ;;   1. logf is "log/final.log
+  ;;   2. logf is same as outputfilename
+  (let* ((outputfilename (conc "megatest-rollup-" test-name ".html"))
+	 (orig-dir       (current-directory))
+	 (logf-info      (rmt:test-get-logfile-info run-id test-name))
+	 (logf           (if logf-info (cadr logf-info) #f))
+	 (path           (if logf-info (car  logf-info) #f)))
+    ;; This query finds the path and changes the directory to it for the test
+    (if (and (string? path)
+	     (directory? path)) ;; can get #f here under some wierd conditions. why, unknown ...
+	(begin
+	  (debug:print 4 *default-log-port* "Found path: " path)
+	  (change-directory path))
+	;; (set! outputfilename (conc path "/" outputfilename)))
+	(debug:print-error 0 *default-log-port* "summarize-items for run-id=" run-id ", test-name=" test-name ", no such path: " path))
+    (debug:print 4 *default-log-port* "summarize-items with logf " logf ", outputfilename " outputfilename " and force " force)
+    (if (or (equal? logf "logs/final.log")
+	    (equal? logf outputfilename)
+	    force)
+	(let ((my-start-time (current-seconds))
+	      (lockf         (conc outputfilename ".lock")))
+	  (let loop ((have-lock  (common:simple-file-lock lockf)))
+	    (if have-lock
+		(let ((script (configf:lookup *configdat* "testrollup" test-name)))
+		  (print "Obtained lock for " outputfilename)
+		  (rmt:set-state-status-and-roll-up-items run-id test-name "" #f #f #f)
+		  (if script
+		      (system (conc script " > " outputfilename " & "))
+		      (tests:generate-html-summary-for-iterated-test run-id test-id test-name outputfilename))
+		  (common:simple-file-release-lock lockf)
+		  (change-directory orig-dir)
+		  ;; NB// tests:test-set-toplog! is remote internal...
+		  (tests:test-set-toplog! run-id test-name outputfilename))
+		;; didn't get the lock, check to see if current update started later than this 
+		;; update, if so we can exit without doing any work
+		(if (> my-start-time (handle-exceptions
+					 exn
+					 0
+				       (file-modification-time lockf)))
+		    ;; we started since current re-gen in flight, delay a little and try again
+		    (begin
+		      (debug:print-info 1 *default-log-port* "Waiting to update " outputfilename ", another test currently updating it")
+		      (thread-sleep! (+ 5 (random 5))) ;; delay between 5 and 10 seconds
+		      (loop (common:simple-file-lock lockf))))))))))
+
+(define (tests:generate-html-summary-for-iterated-test run-id test-id test-name outputfilename)
+  (let ((counts              (make-hash-table))
+	(statecounts         (make-hash-table))
+	(outtxt              "")
+	(tot                 0)
+	(testdat             (rmt:test-get-records-for-index-file run-id test-name)))
+    (with-output-to-file outputfilename
+      (lambda ()
+	(set! outtxt (conc outtxt "<html><title>Summary: " test-name 
+			   "</title><body><h2>Summary for " test-name "</h2>"))
+	(for-each
+	 (lambda (testrecord)
+	   (let ((id             (vector-ref testrecord 0))
+		 (itempath       (vector-ref testrecord 1))
+		 (state          (vector-ref testrecord 2))
+		 (status         (vector-ref testrecord 3))
+		 (run_duration   (vector-ref testrecord 4))
+		 (logf           (vector-ref testrecord 5))
+		 (comment        (vector-ref testrecord 6)))
+	     (hash-table-set! counts status (+ 1 (hash-table-ref/default counts status 0)))
+	     (hash-table-set! statecounts state (+ 1 (hash-table-ref/default statecounts state 0)))
+	     (set! outtxt (conc outtxt "<tr>"
+				;; "<td><a href=\"" itempath "/" logf "\"> " itempath "</a></td>" 
+				"<td><a href=\"" itempath "/test-summary.html\"> " itempath "</a></td>" 
+				"<td>" state    "</td>" 
+				"<td><font color=" (common:get-color-from-status status)
+				">"   status   "</font></td>"
+				"<td>" (if (equal? comment "")
+					   "&nbsp;"
+					   comment) "</td>"
+					   "</tr>"))))
+	 (if (list? testdat)
+	     testdat
+	     (begin
+	       (print "ERROR: failed to get records with rmt:test-get-records-for-index-file run-id=" run-id "test-name=" test-name)
+	       '())))
+	
+	(print "<table><tr><td valign=\"top\">")
+	;; Print out stats for status
+	(set! tot 0)
+	(print "<table cellspacing=\"0\" border=\"1\"><tr><td colspan=\"2\"><h2>State stats</h2></td></tr>")
+	(for-each (lambda (state)
+		    (set! tot (+ tot (hash-table-ref statecounts state)))
+		    (print "<tr><td>" state "</td><td>" (hash-table-ref statecounts state) "</td></tr>"))
+		  (hash-table-keys statecounts))
+	(print "<tr><td>Total</td><td>" tot "</td></tr></table>")
+	(print "</td><td valign=\"top\">")
+	;; Print out stats for state
+	(set! tot 0)
+	(print "<table cellspacing=\"0\" border=\"1\"><tr><td colspan=\"2\"><h2>Status stats</h2></td></tr>")
+	(for-each (lambda (status)
+		    (set! tot (+ tot (hash-table-ref counts status)))
+		    (print "<tr><td><font color=\"" (common:get-color-from-status status) "\">" status
+			   "</font></td><td>" (hash-table-ref counts status) "</td></tr>"))
+		  (hash-table-keys counts))
+	(print "<tr><td>Total</td><td>" tot "</td></tr></table>")
+	(print "</td></td></tr></table>")
+	
+	(print "<table cellspacing=\"0\" border=\"1\">" 
+	       "<tr><td>Item</td><td>State</td><td>Status</td><td>Comment</td>"
+	       outtxt "</table></body></html>")
+	;; (release-dot-lock outputfilename)
+	;;(rmt:update-run-stats 
+	;; run-id
+	;; (hash-table-map
+	;;  state-status-counts
+	;;  (lambda (key val)
+	;;	(append key (list val)))))
+	))))
+
+(define tests:css-jscript-block
+#<<EOF
+<style type="text/css">
+ul.LinkedList { display: block; }
+/* ul.LinkedList ul { display: none; } */
+.HandCursorStyle { cursor: pointer; cursor: hand; }  /* For IE */
+th {background-color: #8c8c8c;}
+td.test {background-color: #d9dbdd;}
+td.PASS {background-color: #347533;}
+td.FAIL {background-color: #cc2812;}
+td.SKIP{background-color: #FFD733;}
+td.WARN {background-color: #EA8724;}
+td.WAIVED {background-color: #838A12;}
+td.ABORT{background-color: #EA24B7;}
+.PASS .link, .SKIP .link, .WARN .link,.WAIVED .link,.ABORT .link, .FAIL .link{color: #FFFFFF;}
+
+
+</style>
+
+
+  <script type="text/JavaScript">
+
+    function filtersome() {
+  $("tr").show();
+  $(".test").filter(
+    function() {
+      var names = $('#testname').val().split(',');
+      var good=1;
+      for (var i=0, len=names.length; i<len; i++) {
+        var uname=names[i];
+        console.log("Trying to check for " + uname); 
+        if($(this).text().indexOf(uname) != -1) {
+          good= 0;
+          console.log("Found "+uname);
+        }
+      }
+      return good; 
+    }
+  ).parent().hide();
+//  $(".sum").show();
+}
+  
+    // Add this to the onload event of the BODY element
+    function addEvents() {
+      activateTree(document.getElementById("LinkedList1"));
+    }
+
+    // This function traverses the list and add links 
+    // to nested list items
+    function activateTree(oList) {
+      // Collapse the tree
+      for (var i=0; i < oList.getElementsByTagName("ul").length; i++) {
+        oList.getElementsByTagName("ul")[i].style.display="none";            
+      }                                                                  
+      // Add the click-event handler to the list items
+      if (oList.addEventListener) {
+        oList.addEventListener("click", toggleBranch, false);
+      } else if (oList.attachEvent) { // For IE
+        oList.attachEvent("onclick", toggleBranch);
+      }
+      // Make the nested items look like links
+      addLinksToBranches(oList);
+    }
+
+    // This is the click-event handler
+    function toggleBranch(event) {
+      var oBranch, cSubBranches;
+      if (event.target) {
+        oBranch = event.target;
+      } else if (event.srcElement) { // For IE
+        oBranch = event.srcElement;
+      }
+      cSubBranches = oBranch.getElementsByTagName("ul");
+      if (cSubBranches.length > 0) {
+        if (cSubBranches[0].style.display == "block") {
+          cSubBranches[0].style.display = "none";
+        } else {
+          cSubBranches[0].style.display = "block";
+        }
+      }
+    }
+
+    // This function makes nested list items look like links
+    function addLinksToBranches(oList) {
+      var cBranches = oList.getElementsByTagName("li");
+      var i, n, cSubBranches;
+      if (cBranches.length > 0) {
+        for (i=0, n = cBranches.length; i < n; i++) {
+          cSubBranches = cBranches[i].getElementsByTagName("ul");
+          if (cSubBranches.length > 0) {
+            addLinksToBranches(cSubBranches[0]);
+            cBranches[i].className = "HandCursorStyle";
+            cBranches[i].style.color = "blue";
+            cSubBranches[0].style.color = "black";
+            cSubBranches[0].style.cursor = "auto";
+          }
+        }
+      }
+    }
+  </script>
+EOF
+)
+
+(define tests:css-jscript-block-dynamic 
+#<<EOF
+           <script src= ./jquery3.1.0.js></script> 
+EOF
+)
+
+(define  (test:js-block javascript-lib)
+   (conc  "<script src=" javascript-lib "></script>" ))
+
+
+(define tests:css-jscript-block-static (test:js-block *java-script-lib*))
+
+(define (tests:css-jscript-block-cond dynamic) 
+      (if (equal? dynamic  #t)
+       tests:css-jscript-block-dynamic
+       tests:css-jscript-block-static))
+
+       
+(define (tests:run-record->test-path run numkeys)
+   (append (take (vector->list run) numkeys)
+	   (list (vector-ref run (+ 1 numkeys)))))
+
+
+(define (tests:get-rest-data runs header numkeys)
+   (let ((resh (make-hash-table)))
+   (for-each
+     (lambda (run)
+        (let* ((run-id (db:get-value-by-header run header "id"))
+               (run-dir      (tests:run-record->test-path run numkeys))
+	       (test-data    (rmt:get-tests-for-run
+				   run-id
+                                   "%"       ;; testnamepatt
+				   '()        ;; states
+				   '()        ;; statuses
+				   #f         ;; offset
+				   #f         ;; num-to-get
+				   #f         ;; hide/not-hide
+				   #f         ;; sort-by
+				   #f         ;; sort-order
+				   #f         ;; 'shortlist                           ;; qrytype
+                                   0         ;; last update
+				   #f)))
+            
+            (map (lambda (test)
+                 (let* ((test-name (vector-ref test 2))
+                        (test-html-path (conc (vector-ref test 10) "/" (vector-ref test 13)))
+                        (test-item (conc test-name ":" (vector-ref test 11)))
+                        (test-status (vector-ref test 4)))
+                         
+                (if (not (hash-table-ref/default resh test-name  #f))
+                      (hash-table-set! resh test-name   (make-hash-table)))
+                (if (not (hash-table-ref/default (hash-table-ref/default resh test-name  #f)  test-item  #f))
+                       (hash-table-set! (hash-table-ref/default resh test-name  #f) test-item   (make-hash-table))) 
+               (hash-table-set!  (hash-table-ref/default (hash-table-ref/default resh test-name  #f) test-item #f) run-id (list test-status test-html-path)))) 
+        test-data)))
+      runs)
+   resh))
+
+
+;; hash-table tree to html list tree
+;;
+;;   tipfunc takes two parameters: y the tip value and path the path to that point
+;;
+(define (common:htree->html ht path tipfunc)
+  (let ((datlist 	(sort (hash-table->alist ht)
+                              (lambda (a b)
+                                (string< (car a)(car b))))))
+    (if (null? datlist)
+    	(tipfunc #f path) ;; really shouldn't get here
+	(s:ul
+	 (map (lambda (x)
+		(let* ((levelname (car x))
+		       (y         (cdr x))
+		       (newpath   (append path (list levelname)))
+		       (leaf      (or (not (hash-table? y))
+				      (null? (hash-table-keys y)))))
+		  (if leaf
+		      (s:li (tipfunc y newpath))
+		      (s:li
+		       (list 
+			levelname
+			(common:htree->html y newpath tipfunc))))))
+	      datlist)))))
+
+
+;; tests:genrate dashboard body 
+;;
+
+(define (tests:dashboard-body page pg-size keys numkeys  total-runs linktree area-name get-prev-links get-next-links flag run-patt target-patt)
+  (let* ((start (* page pg-size)) 
+	       ;(runsdat   (rmt:get-runs "%" pg-size start (map (lambda (x)(list x "%")) keys)))
+         (runsdat   (rmt:get-runs-by-patt  keys run-patt target-patt start pg-size #f 0 sort-order: "desc"))
+                    ; db:get-runs-by-patt   keys runnamepatt targpatt offset limit fields last-update   
+	       (header    (vector-ref runsdat 0))
+	       (runs      (vector-ref runsdat 1))
+         (ctr 0)
+         (test-runs-hash (tests:get-rest-data runs header numkeys))
+         (test-list (hash-table-keys test-runs-hash))) 
+  
+  (s:html tests:css-jscript-block (tests:css-jscript-block-cond flag)
+		   (s:title "Summary for " area-name)
+		   (s:body 'onload "addEvents();"
+                          (get-prev-links page linktree)
+                          (get-next-links page linktree total-runs)
+                           
+			   (s:h1 "Summary for " area-name)
+                           (s:h3 "Filter" )
+                           (s:input 'type "text"  'name "testname" 'id "testname" 'length "30" 'onkeyup "filtersome()")
+			   ;; top list
+         
+			   (s:table 'id "LinkedList1" 'border "1" 'cellspacing 0
+                            (map (lambda (key)
+				 (let* ((res (s:tr 'class "something" 
+				  (s:th key )
+                                   (map (lambda (run)
+                                   (s:th  (vector-ref run ctr)))
+                                  runs))))
+                             (set! ctr (+ ctr 1))
+                               res))
+                               keys)
+                               (s:tr
+				 (s:th "Run Name")
+                                  (map (lambda (run)
+                                   (s:th (db:get-value-by-header run header "runname")))
+                                  runs))
+                              
+                               (map (lambda (test-name)
+                                 (let* ((item-hash (hash-table-ref/default test-runs-hash test-name  #f))
+                                         (item-keys (sort (hash-table-keys item-hash) string<=?))) 
+                                          (map (lambda (item-name)  
+  		                             (let* ((res (s:tr  'class item-name
+				                         (s:td  item-name 'class "test" )
+                                                           (map (lambda (run)
+                                                               (let* ((run-test (hash-table-ref/default item-hash item-name  #f))
+                                                                      (run-id (db:get-value-by-header run header "id"))
+                                                                      (result (hash-table-ref/default run-test run-id "n/a"))
+                                                                      ;(relative-path (get-relative-path)) 
+                                                                      (status (if (string? result)
+									                                                            	result
+										                                                            (car result)))
+                                                                        (link (if (string? result)
+										                                                            result
+                                                                                (if (equal? flag #t) 
+                                                                                (s:a (car result) 'href (conc "./test_log?runid=" run-id "&testname="  item-name ))
+  																																						  (s:a (car result) 'href (string-substitute  (conc linktree "/")  "" (cadr result)  "-"))))))
+                                                                       (s:td  link 'class status)))
+                                                                runs))))
+                                                        res))
+                                                   item-keys)))
+                               test-list)))))) 
+
+;; (tests:create-html-tree "test-index.html")
+;;
+(define (tests:create-html-tree outf)
+   (let* ((lockfile  (conc outf ".lock"))
+	 			 (runs-to-process '())
+         (linktree  (common:get-linktree))
+         (area-name (common:get-testsuite-name))
+	  		 (keys      (rmt:get-keys))
+	  		 (numkeys   (length keys))
+         (run-patt (or (args:get-arg "-run-patt")
+                        (args:get-arg "-runname")
+                        "%"))
+         (target (or  (args:get-arg "-target-patt") 
+											(args:get-arg "-target")
+                      "%"))
+         (targlist (string-split target "/"))
+         (numtarg  (length targlist))  
+         (targtweaked (if (> numkeys numtarg)
+			   								(append targlist (make-list (- numkeys numtarg) "%"))
+			  								targlist))
+         (target-patt (string-join targtweaked "/"))
+         ;(total-runs  (rmt:get-num-runs "%")) ;;this needs to be changed to filter by target
+          (total-runs (rmt:get-runs-cnt-by-patt run-patt target-patt keys )) 
+         (pg-size 10))
+    (if (common:simple-file-lock lockfile)
+        (begin
+         ;(print total-runs)    
+        (let loop ((page 0))
+	(let* ((oup       (open-output-file (or outf (conc linktree "/page" page ".html"))))
+               (get-prev-links (lambda (page linktree )   
+                            (let* ((link  (if (not (eq? page 0))
+                                   (s:a "&lt;&lt;prev" 'href (conc  "page" (- page 1) ".html"))
+                                   (s:a "" 'href (conc   "page"  page ".html")))))
+                               link)))
+               (get-next-links (lambda (page linktree total-runs)   
+                            (let* ((link  (if (> total-runs (+ 10 (* page pg-size)))
+                                   (s:a "next&gt;&gt;" 'href (conc  "page"  (+ page 1) ".html"))
+                                   (s:a "" 'href (conc   "page" page  ".html")))))
+                               link))) )
+          (print "total runs: " total-runs) 
+          (s:output-new
+	   			 oup
+	   					(tests:dashboard-body page pg-size keys numkeys total-runs linktree area-name get-prev-links get-next-links #f run-patt target-patt)) ;; update this function
+          (close-output-port oup)
+         ; (set! page (+ 1 page))
+          (if (> total-runs (* (+ 1 page) pg-size))
+           (loop (+ 1  page)))))
+	  (common:simple-file-release-lock lockfile))
+	            
+	#f)))
+
+
+(define (tests:readlines filename)
+  (call-with-input-file filename
+    (lambda (p)
+      (let loop ((line (read-line p))
+                 (result '()))
+        (if (eof-object? line)
+            (reverse result)
+            (loop (read-line p) (cons line result)))))))
+
+(define (tests:get-test-log run-id test-name item-name)
+  (let* ((test-data    (rmt:get-tests-for-run
+				   (string->number run-id)
+                                    test-name      ;; testnamepatt
+				   '()        ;; states
+				   '()        ;; statuses
+				   #f         ;; offset
+				   #f         ;; num-to-get
+				   #f         ;; hide/not-hide
+				   #f         ;; sort-by
+				   #f         ;; sort-order
+				   #f         ;; 'shortlist                           ;; qrytype
+                                   0         ;; last update
+				   #f))
+         (path "")
+         (found 0))
+    (debug:print-info 0 *default-log-port* "found: " found )
+
+   (let loop ((hed (car test-data))
+		 (tal (cdr test-data)))
+          (debug:print-info 0 *default-log-port* "item: " (vector-ref hed 11) (vector-ref hed 10) "/" (vector-ref hed 13))
+
+	(if (equal? (vector-ref hed 11) item-name)
+            (begin
+              (set! found 1) 
+	      (set! path (conc (vector-ref hed 10) "/" (vector-ref hed 13)))))
+	    (if (and (not (null? tal)) (equal? found 0))
+		(loop (car tal)(cdr tal))))
+   (if (equal? path "")
+     "<H2>Data not found</H2>"
+     (string-join (tests:readlines path) "\n"))))
+
+
+(define (tests:dynamic-dboard page)
+;(define (tests:create-html-tree o)
+ (let* (
+;(page "1")
+          (linktree  (common:get-linktree))
+         (area-name (common:get-testsuite-name))
+	       (keys      (rmt:get-keys))
+	       (numkeys   (length keys))
+         (targtweaked (make-list numkeys "%"))
+         (target-patt (string-join targtweaked "/"))
+         (total-runs  (rmt:get-num-runs "%"))
+         (pg-size 10)
+         (pg (if (equal? page #f)
+                 0
+                 (- (string->number page) 1)))
+          (get-prev-links  (lambda (pg linktree)
+                           (debug:print-info 0 *default-log-port* "val: " (- 1 pg))
+                          (let* ((link  (if (not (eq? pg 0))
+                               (s:a  "&lt;&lt;prev " 'href (conc  "dashboard?page="  pg  ))
+                               (s:a "" 'href (conc  "dashboard?page=" pg)))))
+                               link)))
+          (get-next-links   (lambda (pg linktree total-runs)  
+                            (debug:print-info 0 *default-log-port* "val: " pg)
+                             (debug:print-info 0 *default-log-port* "val: " total-runs " size" pg-size)
+ 
+                            (let* ((link  (if (> total-runs (+ 10 (* pg pg-size)))
+                              (s:a  "next&gt;&gt; "  'href (conc  "dashboard?page="  (+ pg 2)  ))
+                             (s:a "" 'href (conc  "dashboard?page=" pg  )))))
+                             link)))
+         (html-body (tests:dashboard-body pg pg-size keys numkeys total-runs linktree area-name get-prev-links get-next-links #t "%" target-patt))) ;; update tis function
+        html-body))
+
+(define (tests:create-html-summary outf)
+ (let* ((lockfile  (conc outf ".lock"))
+        (linktree  (common:get-linktree))
+				(keys      (rmt:get-keys))
+        (area-name (common:get-testsuite-name))
+        (run-patt (or (args:get-arg "-run-patt")
+                        (args:get-arg "-runname")
+                        "%"))
+        (target (or (args:get-arg "-target-patt")
+                        (args:get-arg "-target")
+                        "%"))
+         (targlist (string-split target "/"))
+         (numkeys  (length keys))
+	       (numtarg  (length targlist))  
+         (targtweaked (if (> numkeys numtarg)
+			   								(append targlist (make-list (- numkeys numtarg) "%"))
+			  								targlist))
+        (target-patt (string-join targtweaked "/")))
+    (if (common:simple-file-lock lockfile)
+        (begin
+          (let* (;(runsdat1   (rmt:get-runs run-patt #f #f (map (lambda (x)(list x "%")) keys)))
+                 (runsdat   (rmt:get-runs-by-patt  keys run-patt target-patt #f #f #f 0))
+					       (runs      (vector-ref runsdat 1))
+                 (header      (vector-ref runsdat 0))
+        	       (oup       (open-output-file (or outf (conc linktree "/targets.html"))))
+                 (target-hash (test:create-target-hash runs header (length keys))))
+           (test:create-target-html target-hash oup area-name linktree)
+          (test:create-run-html  runs area-name linktree (length keys) header))
+	  (common:simple-file-release-lock lockfile))
+	#f)))
+
+(define (test:get-test-hash test-data)
+	(let ((resh (make-hash-table)))
+    	(map (lambda (test)
+        (let* ((test-name (vector-ref test 2))
+               (test-html-path (if (file-exists? (conc (vector-ref test 10) "/test-summary.html"))
+																 (conc (vector-ref test 10) "/test-summary.html" )
+							 									 (conc (vector-ref test 10) "/" (vector-ref test 13))))
+               (test-item  (vector-ref test 11))
+               (test-status (vector-ref test 4)))
+               (if (not (hash-table-ref/default resh test-item  #f))
+                   (hash-table-set! resh test-item   (make-hash-table)))
+               (hash-table-set! (hash-table-ref/default resh test-item  #f) test-name (list test-status test-html-path)))) 
+        test-data)
+resh))
+
+(define (test:get-data->b-keys ordered-data a-keys)
+  (delete-duplicates
+   (sort (apply
+	  append
+	  (map (lambda (sub-key)
+		 (let ((subdat (hash-table-ref ordered-data sub-key)))
+		   (hash-table-keys subdat)))
+	       a-keys))
+	 string>=?)))
+
+
+(define (test:create-run-html runs area-name linktree numkeys header)
+  (map (lambda (run)
+		 (let* ((target (string-join (take (vector->list run) numkeys) "/"))
+						(run-name (db:get-value-by-header run header "runname"))
+            (run-time (seconds->work-week/day-time (db:get-value-by-header run header "event_time")))
+						(oup (if (file-exists? (conc linktree "/" target "/" run-name))
+                        (open-output-file (conc linktree "/" target "/" run-name "/run.html"))
+                         #f))
+            (run-id (db:get-value-by-header run header "id"))
+            (test-data    (rmt:get-tests-for-run
+				  								 run-id
+                           "%"       ;; testnamepatt
+				  								 '()        ;; states
+				   								 '()        ;; statuses
+				  								 	#f         ;; offset
+				  						 			#f         ;; num-to-get
+				   									#f         ;; hide/not-hide
+				  								  #f         ;; sort-by
+				   									#f         ;; sort-order
+				   									#f         ;; 'shortlist                           ;; qrytype
+                            0         ;; last update
+				  									#f))
+            (item-test-hash (test:get-test-hash test-data))
+            (items  (hash-table-keys item-test-hash))
+ 						(test-names (test:get-data->b-keys item-test-hash items)))
+    (if oup
+      (begin 
+     (s:output-new
+	   oup
+	   (s:html tests:css-jscript-block (tests:css-jscript-block-cond #f)
+		   (s:title "Runs View " run-name)
+		   (s:body
+		     (s:h1 "Runs View " )
+         (s:h3 "Target" target)
+				 (s:p 
+					(s:b "Run name" ) run-name)
+         (s:p 
+					(s:b "Run Date" ) run-time)
+         (s:table 'border 1 'cellspacing 0
+           (s:tr
+           (s:th "Items")
+           (map (lambda (test)
+            (s:th test))
+           test-names))  
+           (map (lambda (item) 
+					  (let* ((test-hash (hash-table-ref/default item-test-hash item  #f)))
+								 (if test-hash
+                  (begin
+									(s:tr
+					  			(s:td 'class "test" item)
+            			(map (lambda (test)
+						  		(let* ((test-details (hash-table-ref/default test-hash test  #f))
+												(status (if test-details
+																(car test-details)))
+                        (link (if test-details 
+														(string-substitute  (conc linktree "/" target "/" run-name "/")  "" (cadr test-details) "-"))))
+                   (if test-details
+											(s:td 'class status
+												(s:a 'class "link" 'href link status ))
+                      (s:td "")))) 			
+									test-names))))))
+				  (sort items string<=?))))))
+		(close-output-port oup))
+    (debug:print-info 0 "Skip: Dirctory structure " linktree "/" target "/" run-name " does not exist. Megatest will not create run.html"))))
+runs))
+
+(define (test:create-target-hash runs header numkeys)
+  (let ((resh (make-hash-table)))
+   (for-each
+     (lambda (run)
+        (let* ((run-name (db:get-value-by-header run header "runname"))
+               (target   (string-join (take (vector->list run) numkeys) "/"))
+               (run-list (hash-table-ref/default resh target  #f)))
+               
+               (if (not run-list)
+                   (hash-table-set! resh target   (list run-name))
+                   (hash-table-set! resh target   (cons run-name run-list)))))
+      runs)
+   resh))
+
+(define (test:get-max-run-cnt target-hash targets)
+   (let* ((cnt 0 ))
+   (map (lambda (target)
+        (let* ((runs  (hash-table-ref/default target-hash target  #f))
+               (run-length (if runs
+																(length runs)
+                                 0)))
+  
+              (if (< cnt run-length)
+               (set! cnt  run-length)))) 
+		targets) 
+cnt))
+ 
+(define (test:pad-runs target-hash targets max-row-length)
+ (map (lambda (target)
+        (let loop ((run-list  (hash-table-ref/default target-hash target  #f)))
+               (if (< (length run-list) max-row-length)
+                 (begin  
+               		 (hash-table-set! target-hash target   (cons "" run-list))
+               		 (loop (hash-table-ref/default target-hash target  #f) ))))) 
+		targets)
+   target-hash)
+
+(define (test:create-target-html target-hash oup area-name linktree)
+  (let* ((targets (hash-table-keys target-hash))
+         (max-row-length (test:get-max-run-cnt target-hash targets))
+         (pad-runs-hash (test:pad-runs target-hash targets max-row-length)))
+   (s:output-new
+	   oup
+	   (s:html tests:css-jscript-block (tests:css-jscript-block-cond #f)
+
+		   (s:title "Target View " area-name)
+		   (s:body
+		   (s:h1 "Target View " area-name)
+					(s:table 'id "LinkedList1" 'border "1" 'cellspacing 0
+             (s:tr 'class "something" 
+               (s:th "Target")
+								(s:th 'colspan max-row-length "Runs"))                                              
+                (let* ((tbl (map (lambda (target)
+                      (s:tr
+                      (s:td 'class "test" target)
+										  (let* ((runs  (hash-table-ref/default target-hash target  #f))
+														 (rest-row (map (lambda (run)
+																				(if (equal? run "")
+																						(s:td run)
+                                            (if (file-exists?(conc linktree "/" target "/" run ))
+																						(begin 
+																							(s:td 
+																							(s:a 'href (conc  target "/" run "/run.html") run))))))
+																				(reverse runs))))
+                              rest-row)))
+                                   targets)))
+                           tbl)))))
+          (close-output-port oup)))
+
+
+(define (tests:create-html-tree-old outf)
+   (let* ((lockfile  (conc outf ".lock"))
+	 (runs-to-process '()))
+    (if (common:simple-file-lock lockfile)
+	(let* ((linktree  (common:get-linktree))
+	       (oup       (open-output-file (or outf (conc linktree "/runs-index.html"))))
+	       (area-name (common:get-testsuite-name))
+	       (keys      (rmt:get-keys))
+	       (numkeys   (length keys))
+	       (runsdat   (rmt:get-runs "%" #f #f (map (lambda (x)(list x "%")) keys)))
+	       (header    (vector-ref runsdat 0))
+	       (runs      (vector-ref runsdat 1))
+	       (runtreedat (map (lambda (x)
+				  (tests:run-record->test-path x numkeys))
+				runs))
+	       (runs-htree (common:list->htree runtreedat)))
+	  (set! runs-to-process runs)
+	  (s:output-new
+	   oup
+	   (s:html tests:css-jscript-block
+		   (s:title "Summary for " area-name)
+		   (s:body 'onload "addEvents();"
+			   (s:h1 "Summary for " area-name)
+			   ;; top list
+			   (s:ul 'id "LinkedList1" 'class "LinkedList"
+				 (s:li
+				  "Runs"
+				  (common:htree->html runs-htree
+						      '()
+						      (lambda (x p)
+							(let* ((targ-path (string-intersperse p "/"))
+                                                               (full-path (conc linktree "/" targ-path))
+                                                               (run-name  (car (reverse p))))
+                                                          (if (and (common:file-exists? full-path)
+                                                                   (directory?   full-path)
+                                                                   (file-write-access? full-path))
+                                                              (s:a run-name 'href (conc targ-path "/run-summary.html"))
+                                                              (begin
+                                                                (debug:print 0 *default-log-port* "INFO: Can't create " targ-path "/run-summary.html")
+                                                                (conc run-name " (Not able to create summary at " targ-path ")")))))))))))
+          (close-output-port oup)
+	  (common:simple-file-release-lock lockfile)
+               
+	  (for-each
+	   (lambda (run)
+	     (let* ((test-subpath (tests:run-record->test-path run numkeys))
+		    (run-id       (db:get-value-by-header run header "id"))
+                    (run-dir      (tests:run-record->test-path run numkeys))
+		    (test-dats    (rmt:get-tests-for-run
+				   run-id
+                                   "%/"       ;; testnamepatt
+				   '()        ;; states
+				   '()        ;; statuses
+				   #f         ;; offset
+				   #f         ;; num-to-get
+				   #f         ;; hide/not-hide
+				   #f         ;; sort-by
+				   #f         ;; sort-order
+				   #f         ;; 'shortlist                           ;; qrytype
+                                   0         ;; last update
+				   #f))
+                    (tests-tree-dat (map (lambda (test-dat)
+                                         ;; (tests:run-record->test-path x numkeys))
+                                         (let* ((test-name  (db:test-get-testname test-dat))
+                                                (item-path  (db:test-get-item-path test-dat))
+                                                (full-name  (db:test-make-full-name test-name item-path))
+                                                (path-parts (string-split full-name)))
+                                           path-parts))
+                                       test-dats))
+                    (tests-htree (common:list->htree tests-tree-dat))
+                    (html-dir    (conc linktree "/" (string-intersperse run-dir "/")))
+                    (html-path   (conc html-dir "/run-summary.html"))
+                    (oup         (if (and (common:file-exists? html-dir)
+                                          (directory?   html-dir)
+                                          (file-write-access? html-dir))
+                                     (open-output-file  html-path)
+                                     #f)))
+               ;; (print "run-dir: " run-dir ", tests-tree-dat: " tests-tree-dat)
+               (if oup
+                   (begin
+                     (s:output-new
+                      oup
+                      (s:html tests:css-jscript-block
+                              (s:title "Summary for " area-name)
+                              (s:body 'onload "addEvents();"
+                                      (s:h1 "Summary for " (string-intersperse run-dir "/"))
+                                      ;; top list
+                                      (s:ul 'id "LinkedList1" 'class "LinkedList"
+                                            (s:li
+                                             "Tests"
+                                             (common:htree->html tests-htree
+                                                                 '()
+                                                                 (lambda (x p)
+                                                                   (let* ((targ-path (string-intersperse p "/"))
+                                                                          (test-name (car p))
+                                                                          (item-path ;; (if (> (length p) 2) ;; test-name + run-name
+                                                                           (string-intersperse p "/"))
+                                                                          (full-targ (conc html-dir "/" targ-path))
+                                                                          (std-file  (conc full-targ "/test-summary.html"))
+                                                                          (alt-file  (conc full-targ "/megatest-rollup-" test-name ".html"))
+                                                                          (html-file (if (common:file-exists? alt-file)
+                                                                                         alt-file
+                                                                                         std-file))
+                                                                          (run-name  (car (reverse p))))
+                                                                     (if (and (not (common:file-exists? full-targ))
+                                                                              (directory? full-targ)
+                                                                              (file-write-access? full-targ))
+                                                                         (tests:summarize-test 
+                                                                          run-id 
+                                                                          (rmt:get-test-id run-id test-name item-path)))
+                                                                     (if (common:file-exists? full-targ)
+                                                                         (s:a run-name 'href html-file)
+                                                                         (begin
+                                                                           (debug:print 0 *default-log-port* "ERROR: can't access " full-targ)
+                                                                           (conc "No summary for " run-name)))))
+                                                                 ))))))
+                     (close-output-port oup)))))
+           runs)
+          #t)
+	#f)))
+
+
+
+
+(define (tests:update-central-meta-info run-id test-id cpuload diskfree minutes uname hostname)
+  (rmt:general-call 'update-test-rundat run-id test-id (current-seconds) (or cpuload -1)(or diskfree -1) -1 (or minutes -1))
+  (if (and cpuload diskfree)
+      (rmt:general-call 'update-cpuload-diskfree run-id cpuload diskfree test-id))
+  (if minutes 
+      (rmt:general-call 'update-run-duration run-id minutes test-id))
+  (if (and uname hostname)
+      (rmt:general-call 'update-uname-host run-id uname hostname test-id)))
+  
+;; This one is for running with no db access (i.e. via rmt: internally)
+(define (tests:set-full-meta-info db test-id run-id minutes work-area remtries)
+;; (define (tests:set-full-meta-info test-id run-id minutes work-area)
+;;  (let ((remtries 10))
+  (let* ((cpuload  (get-cpu-load))
+	 (diskfree (get-df (current-directory)))
+	 (uname    (get-uname "-srvpio"))
+	 (hostname (get-host-name)))
+    (tests:update-central-meta-info run-id test-id cpuload diskfree minutes uname hostname)))
+    
+;; 
+;;
+(define (tests:get-compressed-steps run-id test-id)
+  (let* ((steps-data  (rmt:get-steps-for-test run-id test-id)) ;;      0       1    2    3       4       5       6      7       
+	 (comprsteps  (tests:process-steps-table steps-data))) ;; #<stepname start end status Duration Logfile Comment id>
+    (map (lambda (x)
+	   ;; take advantage of the \n on time->string
+	   (vector    ;; we are constructing basically the original vector but collapsing start end records
+	    (vector-ref x 0)                              ;; id        0
+	    (let ((s (vector-ref x 1)))
+	      (if (number? s)(seconds->time-string s) s)) ;; starttime 1
+	    (let ((s (vector-ref x 2)))
+	      (if (number? s)(seconds->time-string s) s)) ;; endtime   2
+	    (vector-ref x 3)                              ;; status    3    
+	    (vector-ref x 4)                              ;; duration  4
+	    (vector-ref x 5)                              ;; logfile   5
+	    (vector-ref x 6)                              ;; comment   6
+	    (vector-ref x 7)))                            ;; id        7
+	 (sort (hash-table-values comprsteps)
+	       (lambda (a b)
+		 (let ((time-a (vector-ref a 1))
+		       (time-b (vector-ref b 1))
+		       (id-a   (vector-ref a 7))
+		       (id-b   (vector-ref b 7)))
+		   (if (and (number? time-a)(number? time-b))
+		       (if (< time-a time-b)
+			   #t
+			   (if (eq? time-a time-b)
+			       (< id-a id-b)
+			       ;; (string<? (conc (vector-ref a 2))
+			       ;;	    (conc (vector-ref b 2)))
+			       #f))
+		       (string<? (conc time-a)(conc time-b)))))))))
+
+
+;; summarize test in to a file test-summary.html in the test directory
+;;
+(define (tests:summarize-test run-id test-id)
+  (let* ((test-dat  (rmt:get-test-info-by-id run-id test-id))
+	 (out-dir   (db:test-get-rundir test-dat))
+	 (out-file  (conc out-dir "/test-summary.html")))
+    ;; first verify we are able to write the output file
+    (if (not (file-write-access? out-dir))
+	(debug:print 0 *default-log-port* "ERROR: cannot write test-summary.html to " out-dir)
+	(let* (;; (steps-dat (rmt:get-steps-for-test run-id test-id))
+	       (test-name (db:test-get-testname test-dat))
+	       (item-path (db:test-get-item-path test-dat))
+	       (full-name (db:test-make-full-name test-name item-path))
+	       (oup       (open-output-file out-file))
+	       (status    (db:test-get-status   test-dat))
+	       (color     (common:get-color-from-status status))
+	       (logf      (db:test-get-final_logf test-dat))
+	       (steps-dat (tests:get-compressed-steps run-id test-id)))
+	  ;; (dcommon:get-compressed-steps #f 1 30045)
+	  ;; (#("wasting_time" "23:36:13" "23:36:21" "0" "8.0s" "wasting_time.log"))
+	  
+	  (s:output-new
+	   oup
+	   (s:html
+	    (s:title "Summary for " full-name)
+	    (s:body 
+	     (s:h2 "Summary for " full-name)
+	     (s:table 'cellspacing "0" 'border "1"
+		      (s:tr (s:td "run id")   (s:td (db:test-get-run_id   test-dat))
+			    (s:td "test id")  (s:td (db:test-get-id       test-dat)))
+		      (s:tr (s:td "testname") (s:td test-name)
+			    (s:td "itempath") (s:td item-path))
+		      (s:tr (s:td "state")    (s:td (db:test-get-state    test-dat))
+			    (s:td "status")   (s:td (s:a 'href logf (s:font 'color color status))))
+		      (s:tr (s:td "TestDate") (s:td (seconds->work-week/day-time 
+						     (db:test-get-event_time test-dat)))
+			    (s:td "Duration") (s:td (seconds->hr-min-sec (db:test-get-run_duration test-dat)))))
+	     (s:h3 "Log files")
+	     (s:table 
+	      'cellspacing "0" 'border "1"
+	      (s:tr (s:td "Final log")(s:td (s:a 'href logf logf))))
+	     (s:table
+	      'cellspacing "0" 'border "1"
+	      (s:tr (s:td "Step Name")(s:td "Start")(s:td "End")(s:td "Status")(s:td "Duration")(s:td "Log File"))
+	      (map (lambda (step-dat)
+		     (s:tr (s:td (tdb:steps-table-get-stepname step-dat))
+			   (s:td (tdb:steps-table-get-start    step-dat))
+			   (s:td (tdb:steps-table-get-end      step-dat))
+			   (s:td (tdb:steps-table-get-status   step-dat))
+			   (s:td (tdb:steps-table-get-runtime  step-dat))
+			   (s:td (let ((step-log (tdb:steps-table-get-log-file step-dat)))
+				   (s:a 'href step-log step-log)))))
+		   steps-dat))
+	     )))
+	  (close-output-port oup)))))
+	  
+	  
+;; MUST BE CALLED local!
+;;
+(define (tests:test-get-paths-matching keynames target fnamepatt #!key (res '()))
+  ;; BUG: Move the values derived from args to parameters and push to megatest.scm
+  (let* ((testpatt   (or (args:get-arg "-testpatt")(args:get-arg "-testpatt") "%"))
+	 (statepatt  (or (args:get-arg "-state")   (args:get-arg ":state")    "%"))
+	 (statuspatt (or (args:get-arg "-status")  (args:get-arg ":status")   "%"))
+	 (runname    (or (args:get-arg "-runname") (args:get-arg ":runname")  "%"))
+	 (paths-from-db (rmt:test-get-paths-matching-keynames-target-new keynames target res
+					testpatt
+					statepatt
+					statuspatt
+					runname)))
+    (if fnamepatt
+	(apply append 
+	       (map (lambda (p)
+		      (if (directory-exists? p)
+			  (let ((glob-query (conc p "/" fnamepatt)))
+			    (handle-exceptions
+				exn
+				(with-input-from-pipe
+				    (conc "echo " glob-query)
+				  read-lines)  ;; we aren't going to try too hard. If glob breaks it is likely because someone tried to do */*/*.log or similar
+			      (glob glob-query)))
+			  '()))
+		    paths-from-db))
+	paths-from-db)))
+
+			      
+;; for each test:
+;;   
+(define (tests:filter-non-runnable run-id testkeynames testrecordshash)
+  (let ((runnables '()))
+    (for-each
+     (lambda (testkeyname)
+       (let* ((test-record (hash-table-ref testrecordshash testkeyname))
+	      (test-name   (tests:testqueue-get-testname  test-record))
+	      (itemdat     (tests:testqueue-get-itemdat   test-record))
+	      (item-path   (tests:testqueue-get-item_path test-record))
+	      (waitons     (tests:testqueue-get-waitons   test-record))
+	      (keep-test   #t)
+	      (test-id     (rmt:get-test-id run-id test-name item-path))
+	      (tdat        (rmt:get-testinfo-state-status run-id test-id))) ;; (cdb:get-test-info-by-id *runremote* test-id)))
+	 (if tdat
+	     (begin
+	       ;; Look at the test state and status
+	       (if (or (and (member (db:test-get-status tdat) 
+				    '("PASS" "WARN" "WAIVED" "CHECK" "SKIP"))
+			    (equal? (db:test-get-state tdat) "COMPLETED"))
+		       (member (db:test-get-state tdat)
+				    '("INCOMPLETE" "KILLED")))
+		   (set! keep-test #f))
+
+	       ;; examine waitons for any fails. If it is FAIL or INCOMPLETE then eliminate this test
+	       ;; from the runnable list
+	       (if keep-test
+		   (for-each (lambda (waiton)
+			       ;; for now we are waiting only on the parent test
+			       (let* ((parent-test-id (rmt:get-test-id run-id waiton ""))
+				      (wtdat          (rmt:get-testinfo-state-status run-id test-id))) ;; (cdb:get-test-info-by-id *runremote* test-id)))
+				 (if (or (and (equal? (db:test-get-state wtdat) "COMPLETED")
+					      (member (db:test-get-status wtdat) '("FAIL" "ABORT")))
+					 (member (db:test-get-status wtdat)  '("KILLED"))
+					 (member (db:test-get-state wtdat)   '("INCOMPETE")))
+				 ;; (if (or (member (db:test-get-status wtdat)
+				 ;;        	 '("FAIL" "KILLED"))
+				 ;;         (member (db:test-get-state wtdat)
+				 ;;        	 '("INCOMPETE")))
+				     (set! keep-test #f)))) ;; no point in running this one again
+			     waitons))))
+	 (if keep-test (set! runnables (cons testkeyname runnables)))))
+     testkeynames)
+    runnables))
+
+;;======================================================================
+;; test steps
+;;======================================================================
+
+;; teststep-set-status! used to be here
+
+(define (test-get-kill-request run-id test-id) ;; run-id test-name itemdat)
+  (let* ((testdat   (rmt:get-test-info-by-id run-id test-id)))
+    (and testdat
+	 (equal? (test:get-state testdat) "KILLREQ"))))
+
+(define (test:tdb-get-rundat-count tdb)
+  (if tdb
+      (let ((res 0))
+	(sqlite3:for-each-row
+	 (lambda (count)
+	   (set! res count))
+	 tdb
+	 "SELECT count(id) FROM test_rundat;")
+	res))
+  0)
+
+;; (define (tests:set-partial-meta-info test-id run-id minutes work-area)
+#;(define (tests:set-partial-meta-info test-id run-id minutes work-area remtries)
+  (let* ((cpuload  (get-cpu-load))
+	 (diskfree (get-df (current-directory)))
+	 (remtries 10))
+    (handle-exceptions
+     exn
+     (if (> remtries 0)
+	 (begin
+	   (print-call-chain (current-error-port))
+	   (debug:print-info 0 *default-log-port* "WARNING: failed to set meta info. Will try " remtries " more times")
+	   (set! remtries (- remtries 1))
+	   (thread-sleep! 10)
+	   (tests:set-full-meta-info db test-id run-id minutes work-area (- remtries 1)))
+	 (let ((err-status ((condition-property-accessor 'sqlite3 'status #f) exn)))
+	   (debug:print-error 0 *default-log-port* "tried for over a minute to update meta info and failed. Giving up")
+	   (debug:print 0 *default-log-port* "EXCEPTION: database probably overloaded or unreadable.")
+	   (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))
+	   (debug:print 5 *default-log-port* "exn=" (condition->list exn))
+	   (debug:print 0 *default-log-port* " status:  " ((condition-property-accessor 'sqlite3 'status) exn))
+	   (print-call-chain (current-error-port))))
+     (tests:update-testdat-meta-info db test-id work-area cpuload diskfree minutes)
+  )))
+	 
 )
 	

Index: rmtmod.scm
==================================================================
--- rmtmod.scm
+++ rmtmod.scm
@@ -60,11 +60,11 @@
 ;;======================================================================
 
 ;; if a server is either running or in the process of starting call client:setup
 ;; else return #f to let the calling proc know that there is no server available
 ;;
-(define (rmt:get-connection-info areapath #!key (area-dat #f)) ;; TODO: push areapath down.
+#;(define (rmt:get-connection-info areapath #!key (area-dat #f)) ;; TODO: push areapath down.
   (let* ((runremote (or area-dat *runremote*))
 	 (cinfo     (if (remote? runremote)
 			(remote-conndat runremote)
 			#f)))
 	  (if cinfo
@@ -210,11 +210,11 @@
 ;;      (else (extras-case-11 *default-log-port* runremote cmd params attemptnum rid)))))
 
 ;; bunch of small functions factored out of send-receive to make debug easier
 ;;
 
-(define (extras-case-11 *default-log-port* runremote cmd params attemptnum rid)
+#;(define (extras-case-11 *default-log-port* runremote cmd params attemptnum rid)
   ;; (mutex-unlock! *rmt-mutex*)
   (debug:print-info 12 *default-log-port* "rmt:send-receive, case  9")
   ;; (mutex-lock! *rmt-mutex*)
   (let* ((conninfo (remote-conndat runremote))
 	 (dat      (case (remote-transport runremote)
@@ -224,25 +224,25 @@
 					     ;; is needed to deal with
 					     ;; attemtped
 					     ;; communication to
 					     ;; servers that have gone
 					     ;; away
-			      (http-transport:client-api-send-receive 0 conninfo cmd params)
+				 #;(http-transport:client-api-send-receive 0 conninfo cmd params)
 			      ((commfail)(vector #f "communications fail"))
 			      ((exn)(vector #f "other fail" (print-call-chain)))))
 		     (else
 		      (debug:print 0 *default-log-port* "ERROR: transport " (remote-transport runremote) " not supported")
 		      (exit))))
 	 (success  (if (vector? dat) (vector-ref dat 0) #f))
 	 (res      (if (vector? dat) (vector-ref dat 1) #f)))
     (if (and (vector? conninfo) (< 5 (vector-length conninfo)))
-	(http-transport:server-dat-update-last-access conninfo) ;; refresh access time
+	#t #;(http-transport:server-dat-update-last-access conninfo) ;; refresh access time
 	(begin
 	  (debug:print 0 *default-log-port* "INFO: Should not get here! conninfo=" conninfo)
 	  (set! conninfo #f)
 	  (remote-conndat-set! *runremote* #f) ;; NOTE: *runremote* is global copy of runremote. Purpose: factor out global.
-	  (http-transport:close-connections  area-dat: runremote)))
+	  #;(http-transport:close-connections  area-dat: runremote)))
     (debug:print-info 13 *default-log-port* "rmt:send-receive, case  9. conninfo=" conninfo " dat=" dat " runremote = " runremote)
     (mutex-unlock! *rmt-mutex*)
     (if success ;; success only tells us that the transport was
 	;; successful, have to examine the data to see if
 	;; there was a detected issue at the other end
@@ -352,11 +352,11 @@
 (define (rmt:send-receive-no-auto-client-setup connection-info cmd run-id params)
   (let* ((run-id   (if run-id run-id 0))
 	 (res  	   (handle-exceptions
 		    exn
 		    #f
-		    (http-transport:client-api-send-receive run-id connection-info cmd params))))
+		    #;(http-transport:client-api-send-receive run-id connection-info cmd params))))
     (if (and res (vector-ref res 0))
 	(vector-ref res 1) ;;; YES!! THIS IS CORRECT!! CHANGE IT HERE, THEN CHANGE rmt:send-receive ALSO!!!
 	#f)))
 
 ;; ;; Wrap json library for strings (why the ports crap in the first place?)
@@ -934,11 +934,11 @@
 
 (define (extras-transport-failed *default-log-port* *rmt-mutex* attemptnum runremote cmd rid params)
   (debug:print 0 *default-log-port* "WARNING: communication failed. Trying again, try num: " attemptnum)
   (mutex-lock! *rmt-mutex*)
   (remote-conndat-set!    runremote #f)
-  (http-transport:close-connections area-dat: runremote)
+  #;(http-transport:close-connections area-dat: runremote)
   (remote-server-url-set! runremote #f)
   (mutex-unlock! *rmt-mutex*)
   (debug:print-info 12 *default-log-port* "rmt:send-receive, case  9.1")
   (rmt:send-receive cmd rid params attemptnum: (+ attemptnum 1)))
   
@@ -962,14 +962,14 @@
 						 ;; want to ease off
 						 ;; the queries
       (let ((wait-delay (+ attemptnum (* attemptnum 10))))
 	(debug:print 0 *default-log-port* "WARNING: server is overloaded. Delaying " wait-delay " seconds and trying call again.")
 	(mutex-lock! *rmt-mutex*)
-	(http-transport:close-connections area-dat: runremote)
+	#;(http-transport:close-connections area-dat: runremote)
 	(set! *runremote* #f) ;; force starting over
 	(mutex-unlock! *rmt-mutex*)
 	(thread-sleep! wait-delay)
 	(rmt:send-receive cmd rid params attemptnum: (+ attemptnum 1)))
       res)) ;; All good, return res
 
 ;; (include "common_records.scm")
 )

Index: servermod.scm
==================================================================
--- servermod.scm
+++ servermod.scm
@@ -19,18 +19,27 @@
 ;;======================================================================
 
 (declare (unit servermod))
 (declare (uses commonmod))
 (declare (uses dbmod))
+(declare (uses mtconfigf))
+(declare (uses mtargs))
+(declare (uses tasksmod))
 
 (module servermod
 	*
 	
-(import scheme chicken data-structures extras)
-(import (prefix sqlite3 sqlite3:) posix typed-records srfi-18 srfi-69 format ports srfi-1 matchable)
+(import scheme chicken data-structures extras files)
+(import (prefix sqlite3 sqlite3:) posix typed-records srfi-18
+	srfi-69 format ports srfi-1 matchable
+	directory-utils md5 message-digest regex
+	stack)
 (import commonmod)
 (import dbmod)
+(import tasksmod)
+(import (prefix mtargs args:))
+(import (prefix mtconfigf configf:))
 
 ;; (use (prefix ulex ulex:))
 
 (include "common_records.scm")
 
@@ -314,53 +323,19 @@
 	       servr))
     (if (and host port)
 	(conc host ":" port)
 	#f)))
 
-
 ;; timeout is hms string: 1h 5m 3s, default is 1 minute
 ;;
 (define (server:expiration-timeout)
   (let ((tmo (configf:lookup *configdat* "server" "timeout")))
     (if (and (string? tmo)
 	     (common:hms-string->seconds tmo)) ;; BUG: hms-string->seconds is broken, if given "10" returns 0. Also, it doesn't belong in this logic unless the string->number is changed below
         (* 3600 (string->number tmo))
 	60)))
 
-;; ping the given server
-;;
-(define (server:check-server server-record)
-  (let* ((server-url (server:record->url server-record))
-         (res        (case *transport-type*
-                       ((http)(server:ping server-url))
-                       ;; ((nmsg)(nmsg-transport:ping (tasks:hostinfo-get-interface server)
-                       )))
-    (if res
-        server-url
-	#f)))
-
-;; no longer care if multiple servers are started by accident. older servers will drop off in time.
-;;
-(define (server:check-if-running areapath) ;;  #!key (numservers "2"))
-  (let* ((ns            (server:get-num-servers))
-	 (servers       (server:get-best (server:get-list areapath))))
-    ;; (print "servers: " servers " ns: " ns)
-    (if (or (and servers
-		 (null? servers))
-	    (not servers)
-	    (and (list? servers)
-		 (< (length servers) (random ns)))) ;; somewhere between 0 and numservers
-        #f
-        (let loop ((hed (car servers))
-                   (tal (cdr servers)))
-          (let ((res (server:check-server hed)))
-            (if res
-                res
-                (if (null? tal)
-                    #f
-                    (loop (car tal)(cdr tal)))))))))
-
 ;;======================================================================
 ;; P K T S   S T U F F 
 ;;======================================================================
 
 ;; ???
@@ -376,21 +351,10 @@
 ;;======================================================================
 
 ;; Call this to start the actual server
 ;;
 
-;; all routes though here end in exit ...
-;;
-;; start_server
-;;
-(define (server:launch run-id transport-type)
-  (case transport-type
-    ((http)(http-transport:launch))
-    ;;((nmsg)(nmsg-transport:launch run-id))
-    ;;((rpc)  (rpc-transport:launch run-id))
-    (else (debug:print-error 0 *default-log-port* "unknown server type " transport-type))))
-
 ;;======================================================================
 ;; S E R V E R   U T I L I T I E S 
 ;;======================================================================
 
 ;; Get the transport
@@ -425,176 +389,22 @@
     ((fs)   result)
     (else 
      (debug:print-error 0 *default-log-port* "unrecognised transport type: " *transport-type*)
      result)))
 
-;; Given a run id start a server process    ### NOTE ### > file 2>&1 
-;; if the run-id is zero and the target-host is set 
-;; try running on that host
-;;   incidental: rotate logs in logs/ dir.
-;;
-(define  (server:run areapath) ;; areapath is *toppath* for a given testsuite area
-  (let* ((curr-host   (get-host-name))
-         ;; (attempt-in-progress (server:start-attempted? areapath))
-         ;; (dot-server-url (server:check-if-running areapath))
-	 (curr-ip     (server:get-best-guess-address curr-host))
-	 (curr-pid    (current-process-id))
-	 (homehost    (common:get-homehost)) ;; configf:lookup *configdat* "server" "homehost" ))
-	 (target-host (car homehost))
-	 (testsuite   (common:get-testsuite-name))
-	 (logfile     (conc areapath "/logs/server.log")) ;; -" curr-pid "-" target-host ".log"))
-	 (cmdln (conc (common:get-megatest-exe)
-		      " -server " (or target-host "-") (if (equal? (configf:lookup *configdat* "server" "daemonize") "yes")
-							   " -daemonize "
-							   "")
-		      ;; " -log " logfile
-		      " -m testsuite:" testsuite)) ;; (conc " >> " logfile " 2>&1 &")))))
-	 (log-rotate  (make-thread common:rotate-logs  "server run, rotate logs thread"))
-         (load-limit  (configf:lookup-number *configdat* "jobtools" "max-server-start-load" default: 3.0)))
-    ;; we want the remote server to start in *toppath* so push there
-    (push-directory areapath)
-    (debug:print 0 *default-log-port* "INFO: Trying to start server (" cmdln ") ...")
-    (thread-start! log-rotate)
-    
-    ;; host.domain.tld match host?
-    (if (and target-host 
-	     ;; look at target host, is it host.domain.tld or ip address and does it 
-	     ;; match current ip or hostname
-	     (not (string-match (conc "("curr-host "|" curr-host"\\..*)") target-host))
-	     (not (equal? curr-ip target-host)))
-	(begin
-	  (debug:print-info 0 *default-log-port* "Starting server on " target-host ", logfile is " logfile)
-	  (setenv "TARGETHOST" target-host)))
-      
-    (setenv "TARGETHOST_LOGF" logfile)
-    (thread-sleep! (/ (random 5000) 1000)) ;; add about a random (up to 5 seconds) initial delay. It seems pretty common that many running tests request a server at the same time
-    (common:wait-for-normalized-load load-limit " delaying server start due to load" target-host) ;; do not try starting servers on an already overloaded machine, just wait forever
-    (system (conc "nbfake " cmdln))
-    (unsetenv "TARGETHOST_LOGF")
-    (if (get-environment-variable "TARGETHOST")(unsetenv "TARGETHOST"))
-    (thread-join! log-rotate)
-    (pop-directory)))
-
 (define (server:get-client-signature) 
   ;; (if *my-client-signature* *my-client-signature*
   ;; (let ((sig
   (server:mk-signature)) ;; )
 ;; (set! *my-client-signature* sig)
 ;;        *my-client-signature*)))
 
-;; kind start up of servers, wait 40 seconds before allowing another server for a given
-;; run-id to be launched
-(define (server:kind-run areapath)
-  (if (not (server:check-if-running areapath)) ;; why try if there is already a server running?
-      (let* ((last-run-dat (hash-table-ref/default *server-kind-run* areapath '(0 0))) ;; callnum, whenrun
-	     (call-num     (car last-run-dat))
-	     (when-run     (cadr last-run-dat))
-	     (run-delay    (+ (case call-num
-				((0)    0)
-				((1)   20)
-				((2)  300)
-				(else 600))
-			      (random 5)))   ;; add a small random number just in case a lot of jobs hit the work hosts simultaneously
-	     (lock-file    (conc areapath "/logs/server-start.lock")))
-	(if	(> (- (current-seconds) when-run) run-delay)
-		(begin
-		  (common:simple-file-lock-and-wait lock-file expire-time: 15)
-		  (server:run areapath)
-		  (thread-sleep! 2) ;; don't release the lock for at least a few seconds
-		  (common:simple-file-release-lock lock-file)))
-	(hash-table-set! *server-kind-run* areapath (list (+ call-num 1)(current-seconds))))))
-
-(define (server:start-and-wait areapath #!key (timeout 60))
-  (let ((give-up-time (+ (current-seconds) timeout)))
-    (let loop ((server-url (server:check-if-running areapath))
-	       (try-num    0))
-      (if (or server-url
-	      (> (current-seconds) give-up-time)) ;; server-url will be #f if no server available.
-	  server-url
-	  (let ((num-ok (length (server:get-best (server:get-list areapath)))))
-	    (if (and (> try-num 0)  ;; first time through simply wait a little while then try again
-		     (< num-ok 1))  ;; if there are no decent candidates for servers then try starting a new one
-		(server:kind-run areapath))
-	    (thread-sleep! 5)
-	    (loop (server:check-if-running areapath)
-		  (+ try-num 1)))))))
-
-(define server:try-running server:run) ;; there is no more per-run servers ;; REMOVE ME. BUG.
-
 (define (server:kill servr)
   (match-let (((mod-time hostname port start-time pid)
 	       servr))
     (tasks:kill-server hostname pid)))
 
-;; called in megatest.scm, host-port is string hostname:port
-;;
-;; NOTE: This is NOT called directly from clients as not all transports support a client running
-;;       in the same process as the server.
-;;
-(define (server:ping host-port-in #!key (do-exit #f))
-  (let ((host:port (if (not host-port-in) ;; use read-dotserver to find
-		       #f ;; (server:check-if-running *toppath*)
-		;; (if (number? host-port-in) ;; we were handed a server-id
-		;; 	   (let ((srec (tasks:get-server-by-id (db:delay-if-busy (tasks:open-db)) host-port-in)))
-		;; 	     ;; (print "srec: " srec " host-port-in: " host-port-in)
-		;; 	     (if srec
-		;; 		 (conc (vector-ref srec 3) ":" (vector-ref srec 4))
-		;; 		 (conc "no such server-id " host-port-in)))
-		       host-port-in))) ;; )
-    (let* ((host-port (if host:port
-			  (let ((slst (string-split   host:port ":")))
-			    (if (eq? (length slst) 2)
-				(list (car slst)(string->number (cadr slst)))
-				#f))
-			  #f)))
-;;	   (toppath       (launch:setup)))
-      ;; (print "host-port=" host-port)
-      (if (not host-port)
-	  (begin
-	    (if host-port-in
-		(debug:print 0 *default-log-port*  "ERROR: bad host:port"))
-	    (if do-exit (exit 1))
-	    #f)
-	  (let* ((iface      (car host-port))
-		 (port       (cadr host-port))
-		 (server-dat (http-transport:client-connect iface port))
-		 (login-res  (rmt:login-no-auto-client-setup server-dat)))
-	    (if (and (list? login-res)
-		     (car login-res))
-		(begin
-		  ;; (print "LOGIN_OK")
-		  (if do-exit (exit 0))
-		  #t)
-		(begin
-		  ;; (print "LOGIN_FAILED")
-		  (if do-exit (exit 1))
-		  #f)))))))
-
-;; run ping in separate process, safest way in some cases
-;;
-(define (server:ping-server ifaceport)
-  (with-input-from-pipe 
-   (conc (common:get-megatest-exe) " -ping " ifaceport)
-   (lambda ()
-     (let loop ((inl (read-line))
-		(res "NOREPLY"))
-       (if (eof-object? inl)
-	   (case (string->symbol res)
-	     ((NOREPLY)  #f)
-	     ((LOGIN_OK) #t)
-	     (else       #f))
-	   (loop (read-line) inl))))))
-
-;; NOT USED (well, ok, reference in rpc-transport but otherwise not used).
-;;
-(define (server:login toppath)
-  (lambda (toppath)
-    (set! *db-last-access* (current-seconds)) ;; might not be needed.
-    (if (equal? *toppath* toppath)
-	#t
-	#f)))
-
 ;; (define server:sync-lock-token "SERVER_SYNC_LOCK")
 ;; (define (server:release-sync-lock)
 ;;   (db:no-sync-del! *no-sync-db* server:sync-lock-token))
 ;; (define (server:have-sync-lock?)
 ;;   (let* ((have-lock-pair (db:no-sync-get-lock *no-sync-db* server:sync-lock-token))

Index: subrun-inc.scm
==================================================================
--- subrun-inc.scm
+++ subrun-inc.scm
@@ -14,231 +14,5 @@
 ;;     GNU General Public License for more details.
 ;; 
 ;;     You should have received a copy of the GNU General Public License
 ;;     along with Megatest.  If not, see <http://www.gnu.org/licenses/>.
 
-;;  strftime('%m/%d/%Y %H:%M:%S','now','localtime')
-
-(define (subrun:subrun-test-initialized? test-run-dir)
-  (if (and (common:file-exists? (conc test-run-dir "/subrun-area") )
-           (common:file-exists? (conc test-run-dir "/testconfig.subrun") ))
-      #t
-      #f))
-
-(define (subrun:launch-dashboard test-run-dir)
-  (if (subrun:subrun-test-initialized? test-run-dir)
-      (let* ((subarea (subrun:get-runarea test-run-dir)))
-        (if (and subarea (common:file-exists? subarea))
-            (system (conc "cd " subarea ";env -i PATH=$PATH DISPLAY=$DISPLAY HOME=$HOME USER=$USER dashboard &"))))))
-
-(define (subrun:subrun-removed? test-run-dir)
-  (if (subrun:subrun-test-initialized? test-run-dir)
-      (let ((flagfile (conc test-run-dir "/subrun.removed")))
-        (if (common:file-exists? flagfile)
-            #t
-            #f))
-      #t))
-
-(define (subrun:set-subrun-removed test-run-dir)
-  (let ((flagfile (conc test-run-dir "/subrun.removed")))
-    (if (and (subrun:subrun-test-initialized? test-run-dir) (not (common:file-exists? flagfile)))
-        (with-output-to-file flagfile
-          (lambda () (print (current-seconds)))))))
-
-(define (subrun:unset-subrun-removed test-run-dir)
-  (let ((flagfile (conc test-run-dir "/subrun.removed")))
-    (if (and (subrun:subrun-test-initialized? test-run-dir) (common:file-exists? flagfile))
-        (delete-file flagfile))))
-
-
-(define (subrun:testconfig-defines-subrun? testconfig)
-  (configf:lookup testconfig "subrun" "runwait")) ;; we use runwait as the flag that a subrun is requested
-
-(define (subrun:initialize-toprun-test  testconfig test-run-dir)
-  (let ((ra (configf:lookup testconfig "subrun" "run-area"))
-        (logpro (configf:lookup testconfig "subrun" "logpro"))
-        (symlink-target (conc test-run-dir "/subrun-area"))
-        )
-    (if (not ra)      ;; when runarea is not set we default to *toppath*. However 
-	(let ((fallback-run-area (or *toppath* (conc test-run-dir "/subrun"))))
-	  ;; we need to force the setting in the testconfig so it will
-          ;; be preserved in the testconfig.subrun file
-	  (configf:set-section-var testconfig "subrun" "run-area" fallback-run-area)
-	  (set! ra fallback-run-area)))
-    (configf:set-section-var testconfig "logpro" "subrun" logpro) ;; append the logpro rules to the logpro section as stepname subrun
-    (if (common:file-exists? symlink-target)
-        (delete-file symlink-target))
-    (create-symbolic-link ra symlink-target)
-    (configf:write-alist testconfig "testconfig.subrun")))
-
-(define (subrun:set-state-status test-run-dir state status new-state-status)
-  (if (and (not (subrun:subrun-removed? test-run-dir)) (subrun:subrun-test-initialized? test-run-dir))
-      (let* ((action-switches-str
-              (conc "-set-state-status "new-state-status
-                    (if state (conc " -state "state) "")
-                    (if status (conc " -status "status) "")))
-             (log-prefix
-              (subrun:sanitize-path
-               (conc "set-state-status="new-state-status
-                     (if state (conc ":state="state) "")
-                     (if status (conc "+status="status) ""))))
-             (submt-result 
-              (subrun:exec-sub-megatest test-run-dir action-switches-str log-prefix)))
-        submt-result)))
-
-(define (subrun:remove-subrun test-run-dir keep-records )
-  (if (and (not (subrun:subrun-removed? test-run-dir)) (subrun:subrun-test-initialized? test-run-dir))
-      (let* ((action-switches-str
-              (conc "-remove-runs"
-                    (if keep-records "-keep-records " "")
-                    ))
-             (remove-result
-              (subrun:exec-sub-megatest test-run-dir action-switches-str "remove")))
-        (if remove-result
-            (begin
-              (subrun:set-subrun-removed test-run-dir)
-              #t)
-            #f))
-      #t))
-
-(define (subrun:kill-subrun test-run-dir )
-  (if (and (not (subrun:subrun-removed? test-run-dir)) (subrun:subrun-test-initialized? test-run-dir))
-      (let* ((action-switches-str
-              (conc "-kill-runs" ))
-             (kill-result
-              (subrun:exec-sub-megatest test-run-dir action-switches-str "kill")))
-        kill-result)
-      #t))
-
-(define (subrun:launch-cmd test-run-dir #!optional (sub-cmd "-run")) ;; BUG: "-run" should be changed to "-rerun-clean" but current doesn't work
-  (if (subrun:subrun-removed? test-run-dir)
-      (subrun:unset-subrun-removed test-run-dir))      
-
-  (let* ((log-prefix "run")
-         (switches (subrun:selector+log-switches test-run-dir log-prefix))
-         (run-wait #t)
-         (cmd      (conc "megatest " sub-cmd " " switches" "
-                         (if run-wait "-run-wait " ""))))
-    cmd))
-
-
-(define (subrun:sanitize-path inpath)
-  (let* ((insane-pattern (irregex "[^[a-zA-Z0-9_\\-]")))
-    (regex#string-substitute insane-pattern "_" inpath #t)))
-
-(define (subrun:get-runarea test-run-dir)
-  (if (subrun:subrun-test-initialized? test-run-dir)
-      (let* ((info-alist (subrun:selector+log-alist
-                          test-run-dir
-                          "foo"))
-             (run-area   (if (list? info-alist)
-                             (alist-ref "-start-dir" info-alist equal? #f)
-                             #f)))
-        run-area)
-      #f))
-
-(define (subrun:selector+log-alist test-run-dir log-prefix)
-  (let* ((switch-def-alist (common:get-param-mapping flavor: 'config))
-         (subrunfile   (conc test-run-dir "/testconfig.subrun" ))
-         (subrundata   (with-input-from-file subrunfile read))
-         (subrunconfig (configf:alist->config subrundata))
-         (run-area     (configf:lookup subrunconfig "subrun" "run-area"))
-         (defvals      `(("start-dir" . ,(or run-area  ;; default values if not specified in subrun section of tconf
-                                             (get-environment-variable "MT_RUN_AREA_HOME")
-                                             "/no/rundir/found")) 
-                         ("run-name"  . ,(or (get-environment-variable "MT_RUNNAME") "NO-RUNNAME"))
-                         ("target"    . ,(or (get-environment-variable "MT_TARGET")  "NO-TARGET"))))
-         (switch-alist-pre  (filter-map (lambda (item)
-                                          (let* ((config-key (car item))
-                                                 (switch     (cdr item))
-                                                 (defval     (alist-ref config-key defvals equal? #f))
-                                                 (val        (or (configf:lookup subrunconfig "subrun" config-key)
-                                                                 defval)))
-                                            (if val
-                                                (cons switch val)
-                                                #f)))
-                                        switch-def-alist))
-
-         ;; testpatt may be modified if all three of mode-patt, tag-expr, and testpatt are null
-         (mode-patt     (alist-ref "-modepatt" switch-alist-pre equal? #f))
-         (tag-expr      (alist-ref "-tagexpr" switch-alist-pre equal? #f))
-         (testpatt      (alist-ref "-testpatt" switch-alist-pre equal?
-                                   (if (not (or mode-patt tag-expr)) "%" #f))) ;; testpatt is % if not
-                                                                               ;; otherwise specified
-
-         ;; define compact-stem for logfile
-         (target        (alist-ref "-target" switch-alist-pre equal? #f)) ;; want data-structures alist-ref, not alist-lib alist-ref
-         (runname       (alist-ref "-runname" switch-alist-pre equal? #f))
-
-
-         (compact-stem  (subrun:sanitize-path
-                         (conc
-                          target
-                          "-"
-                          runname
-                          "-" (or testpatt mode-patt tag-expr "NO-TESTPATT"))))
-         (logfile       (conc
-                         test-run-dir "/"
-                         (if log-prefix
-                             (conc (subrun:sanitize-path log-prefix) "-")
-                             "")
-                         compact-stem
-                         ".log"))
-         ;; swap out testpatt with modified test-patt and add -log
-         (switch-alist  (cons
-                         (cons "-log" logfile)
-                         (map (lambda (item)
-                                (if (equal? (car item) "-testpatt")
-                                    (cons "-testpatt" testpatt)
-                                    item))
-                                switch-alist-pre))))
-    switch-alist))
-    ;; note - get precmd from subrun section
-    ;;   apply to submegatest commands
-
-(define (subrun:get-log-path test-run-dir log-prefix)
-  (let* ((alist (subrun:selector+log-alist test-run-dir log-prefix))
-         (res   (alist-ref "-log" alist equal? #f)))
-    res))
-
-(define (subrun:selector+log-switches test-run-dir log-prefix)
-  (let* ((switch-alist (subrun:selector+log-alist test-run-dir log-prefix))
-         (res
-          (string-intersperse
-           (apply
-            append
-            (map
-             (lambda (x)
-               (list (car x) (cdr x)))
-             switch-alist))
-           " ")))
-    res))
-
-(define (subrun:exec-sub-megatest test-run-dir action-switches-str log-prefix)
-  (let* ((selector-switches  (subrun:selector+log-switches test-run-dir log-prefix))
-         (cmd (conc "megatest " selector-switches " " action-switches-str ))
-         (pid #f)
-         (proc (lambda ()
-                 (debug:print-info 0 *default-log-port* "Running sub megatest command: "cmd)
-                 ;;(set! pid (process-run "/usr/bin/xterm" (list ))))))
-                 (set! pid (process-run "/bin/bash" (list "-c" cmd))))))
-    (call-with-environment-variables 
-     (list (cons "PATH" (conc (get-environment-variable "PATH") ":.")))
-     (lambda  ()
-       (common:without-vars proc "^MT_.*")))
-    (let processloop ((i 0))
-      (let-values (((pid-val exit-status exit-code)(process-wait pid #t)))
-        (if (eq? pid-val 0)
-            (begin
-              (thread-sleep! 2)
-              (processloop (+ i 1)))
-            (begin
-              (debug:print-info 0 *default-log-port* "sub megatest " action-switches-str " completed with exit code " exit-code)
-              (if (eq? 0 exit-code)
-                  (begin
-                    #t)
-                  (begin
-                    #f))))))))
-
-
-
-;; (subrun:exec-sub-megatest "/nfs/pdx/disks/icf_env_disk001/bjbarcla/gwa/issues/mtdev/165/megatest/ext-tests/tests/subrun-usecases/toparea/links/SYSTEM_val/RELEASE_val/go/toptest" "-foo" "foo")

Index: subrunmod.scm
==================================================================
--- subrunmod.scm
+++ subrunmod.scm
@@ -18,18 +18,249 @@
 
 ;;======================================================================
 
 (declare (unit subrunmod))
 (declare (uses commonmod))
+(declare (uses mtconfigf))
 
 (module subrunmod
 	*
 	
 (import scheme chicken data-structures extras)
-(import (prefix sqlite3 sqlite3:) posix typed-records srfi-18 srfi-69 format ports srfi-1 matchable)
-(import commonmod)
+(import (prefix sqlite3 sqlite3:) posix typed-records srfi-18
+	srfi-69 format ports srfi-1 matchable
+	call-with-environment-variables)
+(import
+  commonmod
+  (prefix mtconfigf configf:))
 ;; (use (prefix ulex ulex:))
 
 (include "common_records.scm")
+;;  strftime('%m/%d/%Y %H:%M:%S','now','localtime')
+
+(define (subrun:subrun-test-initialized? test-run-dir)
+  (if (and (common:file-exists? (conc test-run-dir "/subrun-area") )
+           (common:file-exists? (conc test-run-dir "/testconfig.subrun") ))
+      #t
+      #f))
+
+(define (subrun:launch-dashboard test-run-dir)
+  (if (subrun:subrun-test-initialized? test-run-dir)
+      (let* ((subarea (subrun:get-runarea test-run-dir)))
+        (if (and subarea (common:file-exists? subarea))
+            (system (conc "cd " subarea ";env -i PATH=$PATH DISPLAY=$DISPLAY HOME=$HOME USER=$USER dashboard &"))))))
+
+(define (subrun:subrun-removed? test-run-dir)
+  (if (subrun:subrun-test-initialized? test-run-dir)
+      (let ((flagfile (conc test-run-dir "/subrun.removed")))
+        (if (common:file-exists? flagfile)
+            #t
+            #f))
+      #t))
+
+(define (subrun:set-subrun-removed test-run-dir)
+  (let ((flagfile (conc test-run-dir "/subrun.removed")))
+    (if (and (subrun:subrun-test-initialized? test-run-dir) (not (common:file-exists? flagfile)))
+        (with-output-to-file flagfile
+          (lambda () (print (current-seconds)))))))
+
+(define (subrun:unset-subrun-removed test-run-dir)
+  (let ((flagfile (conc test-run-dir "/subrun.removed")))
+    (if (and (subrun:subrun-test-initialized? test-run-dir) (common:file-exists? flagfile))
+        (delete-file flagfile))))
+
+
+(define (subrun:testconfig-defines-subrun? testconfig)
+  (configf:lookup testconfig "subrun" "runwait")) ;; we use runwait as the flag that a subrun is requested
+
+(define (subrun:initialize-toprun-test  testconfig test-run-dir)
+  (let ((ra (configf:lookup testconfig "subrun" "run-area"))
+        (logpro (configf:lookup testconfig "subrun" "logpro"))
+        (symlink-target (conc test-run-dir "/subrun-area"))
+        )
+    (if (not ra)      ;; when runarea is not set we default to *toppath*. However 
+	(let ((fallback-run-area (or *toppath* (conc test-run-dir "/subrun"))))
+	  ;; we need to force the setting in the testconfig so it will
+          ;; be preserved in the testconfig.subrun file
+	  (configf:set-section-var testconfig "subrun" "run-area" fallback-run-area)
+	  (set! ra fallback-run-area)))
+    (configf:set-section-var testconfig "logpro" "subrun" logpro) ;; append the logpro rules to the logpro section as stepname subrun
+    (if (common:file-exists? symlink-target)
+        (delete-file symlink-target))
+    (create-symbolic-link ra symlink-target)
+    (configf:write-alist testconfig "testconfig.subrun")))
+
+(define (subrun:set-state-status test-run-dir state status new-state-status)
+  (if (and (not (subrun:subrun-removed? test-run-dir)) (subrun:subrun-test-initialized? test-run-dir))
+      (let* ((action-switches-str
+              (conc "-set-state-status "new-state-status
+                    (if state (conc " -state "state) "")
+                    (if status (conc " -status "status) "")))
+             (log-prefix
+              (subrun:sanitize-path
+               (conc "set-state-status="new-state-status
+                     (if state (conc ":state="state) "")
+                     (if status (conc "+status="status) ""))))
+             (submt-result 
+              (subrun:exec-sub-megatest test-run-dir action-switches-str log-prefix)))
+        submt-result)))
+
+(define (subrun:remove-subrun test-run-dir keep-records )
+  (if (and (not (subrun:subrun-removed? test-run-dir)) (subrun:subrun-test-initialized? test-run-dir))
+      (let* ((action-switches-str
+              (conc "-remove-runs"
+                    (if keep-records "-keep-records " "")
+                    ))
+             (remove-result
+              (subrun:exec-sub-megatest test-run-dir action-switches-str "remove")))
+        (if remove-result
+            (begin
+              (subrun:set-subrun-removed test-run-dir)
+              #t)
+            #f))
+      #t))
+
+(define (subrun:kill-subrun test-run-dir )
+  (if (and (not (subrun:subrun-removed? test-run-dir)) (subrun:subrun-test-initialized? test-run-dir))
+      (let* ((action-switches-str
+              (conc "-kill-runs" ))
+             (kill-result
+              (subrun:exec-sub-megatest test-run-dir action-switches-str "kill")))
+        kill-result)
+      #t))
+
+(define (subrun:launch-cmd test-run-dir #!optional (sub-cmd "-run")) ;; BUG: "-run" should be changed to "-rerun-clean" but current doesn't work
+  (if (subrun:subrun-removed? test-run-dir)
+      (subrun:unset-subrun-removed test-run-dir))      
+
+  (let* ((log-prefix "run")
+         (switches (subrun:selector+log-switches test-run-dir log-prefix))
+         (run-wait #t)
+         (cmd      (conc "megatest " sub-cmd " " switches" "
+                         (if run-wait "-run-wait " ""))))
+    cmd))
+
+
+(define (subrun:sanitize-path inpath)
+  (let* ((insane-pattern (irregex "[^[a-zA-Z0-9_\\-]")))
+    (regex#string-substitute insane-pattern "_" inpath #t)))
+
+(define (subrun:get-runarea test-run-dir)
+  (if (subrun:subrun-test-initialized? test-run-dir)
+      (let* ((info-alist (subrun:selector+log-alist
+                          test-run-dir
+                          "foo"))
+             (run-area   (if (list? info-alist)
+                             (alist-ref "-start-dir" info-alist equal? #f)
+                             #f)))
+        run-area)
+      #f))
+
+(define (subrun:selector+log-alist test-run-dir log-prefix)
+  (let* ((switch-def-alist (common:get-param-mapping flavor: 'config))
+         (subrunfile   (conc test-run-dir "/testconfig.subrun" ))
+         (subrundata   (with-input-from-file subrunfile read))
+         (subrunconfig (configf:alist->config subrundata))
+         (run-area     (configf:lookup subrunconfig "subrun" "run-area"))
+         (defvals      `(("start-dir" . ,(or run-area  ;; default values if not specified in subrun section of tconf
+                                             (get-environment-variable "MT_RUN_AREA_HOME")
+                                             "/no/rundir/found")) 
+                         ("run-name"  . ,(or (get-environment-variable "MT_RUNNAME") "NO-RUNNAME"))
+                         ("target"    . ,(or (get-environment-variable "MT_TARGET")  "NO-TARGET"))))
+         (switch-alist-pre  (filter-map (lambda (item)
+                                          (let* ((config-key (car item))
+                                                 (switch     (cdr item))
+                                                 (defval     (alist-ref config-key defvals equal? #f))
+                                                 (val        (or (configf:lookup subrunconfig "subrun" config-key)
+                                                                 defval)))
+                                            (if val
+                                                (cons switch val)
+                                                #f)))
+                                        switch-def-alist))
+
+         ;; testpatt may be modified if all three of mode-patt, tag-expr, and testpatt are null
+         (mode-patt     (alist-ref "-modepatt" switch-alist-pre equal? #f))
+         (tag-expr      (alist-ref "-tagexpr" switch-alist-pre equal? #f))
+         (testpatt      (alist-ref "-testpatt" switch-alist-pre equal?
+                                   (if (not (or mode-patt tag-expr)) "%" #f))) ;; testpatt is % if not
+                                                                               ;; otherwise specified
+
+         ;; define compact-stem for logfile
+         (target        (alist-ref "-target" switch-alist-pre equal? #f)) ;; want data-structures alist-ref, not alist-lib alist-ref
+         (runname       (alist-ref "-runname" switch-alist-pre equal? #f))
+
+
+         (compact-stem  (subrun:sanitize-path
+                         (conc
+                          target
+                          "-"
+                          runname
+                          "-" (or testpatt mode-patt tag-expr "NO-TESTPATT"))))
+         (logfile       (conc
+                         test-run-dir "/"
+                         (if log-prefix
+                             (conc (subrun:sanitize-path log-prefix) "-")
+                             "")
+                         compact-stem
+                         ".log"))
+         ;; swap out testpatt with modified test-patt and add -log
+         (switch-alist  (cons
+                         (cons "-log" logfile)
+                         (map (lambda (item)
+                                (if (equal? (car item) "-testpatt")
+                                    (cons "-testpatt" testpatt)
+                                    item))
+                                switch-alist-pre))))
+    switch-alist))
+    ;; note - get precmd from subrun section
+    ;;   apply to submegatest commands
+
+(define (subrun:get-log-path test-run-dir log-prefix)
+  (let* ((alist (subrun:selector+log-alist test-run-dir log-prefix))
+         (res   (alist-ref "-log" alist equal? #f)))
+    res))
+
+(define (subrun:selector+log-switches test-run-dir log-prefix)
+  (let* ((switch-alist (subrun:selector+log-alist test-run-dir log-prefix))
+         (res
+          (string-intersperse
+           (apply
+            append
+            (map
+             (lambda (x)
+               (list (car x) (cdr x)))
+             switch-alist))
+           " ")))
+    res))
+
+(define (subrun:exec-sub-megatest test-run-dir action-switches-str log-prefix)
+  (let* ((selector-switches  (subrun:selector+log-switches test-run-dir log-prefix))
+         (cmd (conc "megatest " selector-switches " " action-switches-str ))
+         (pid #f)
+         (proc (lambda ()
+                 (debug:print-info 0 *default-log-port* "Running sub megatest command: "cmd)
+                 ;;(set! pid (process-run "/usr/bin/xterm" (list ))))))
+                 (set! pid (process-run "/bin/bash" (list "-c" cmd))))))
+    (call-with-environment-variables 
+     (list (cons "PATH" (conc (get-environment-variable "PATH") ":.")))
+     (lambda  ()
+       (common:without-vars proc "^MT_.*")))
+    (let processloop ((i 0))
+      (let-values (((pid-val exit-status exit-code)(process-wait pid #t)))
+        (if (eq? pid-val 0)
+            (begin
+              (thread-sleep! 2)
+              (processloop (+ i 1)))
+            (begin
+              (debug:print-info 0 *default-log-port* "sub megatest " action-switches-str " completed with exit code " exit-code)
+              (if (eq? 0 exit-code)
+                  (begin
+                    #t)
+                  (begin
+                    #f))))))))
+
+
+
+;; (subrun:exec-sub-megatest "/nfs/pdx/disks/icf_env_disk001/bjbarcla/gwa/issues/mtdev/165/megatest/ext-tests/tests/subrun-usecases/toparea/links/SYSTEM_val/RELEASE_val/go/toptest" "-foo" "foo")
 
 
 )

Index: tests-inc.scm
==================================================================
--- tests-inc.scm
+++ tests-inc.scm
@@ -16,1742 +16,5 @@
 ;;     You should have received a copy of the GNU General Public License
 ;;     along with Megatest.  If not, see <http://www.gnu.org/licenses/>.
 ;;
 ;;======================================================================
 
-;;======================================================================
-;; Tests
-;;======================================================================
-
-;; return items given config
-;;
-(define (tests:get-items tconfig)
-  (let ((items      (hash-table-ref/default tconfig "items" #f)) ;; items 4
-	(itemstable (hash-table-ref/default tconfig "itemstable" #f))) 
-    ;; if either items or items table is a proc return it so test running
-    ;; process can know to call items:get-items-from-config
-    ;; if either is a list and none is a proc go ahead and call get-items
-    ;; otherwise return #f - this is not an iterated test
-    (cond
-     ((procedure? items)      
-      (debug:print-info 4 *default-log-port* "items is a procedure, will calc later")
-      items)            ;; calc later
-     ((procedure? itemstable)
-      (debug:print-info 4 *default-log-port* "itemstable is a procedure, will calc later")
-      itemstable)       ;; calc later
-     ((filter (lambda (x)
-		(let ((val (car x)))
-		  (if (procedure? val) val #f)))
-	      (append (if (list? items) items '())
-		      (if (list? itemstable) itemstable '())))
-      'have-procedure)
-     ((or (list? items)(list? itemstable)) ;; calc now
-      (debug:print-info 4 *default-log-port* "items and itemstable are lists, calc now\n"
-			"    items: " items " itemstable: " itemstable)
-      (items:get-items-from-config tconfig))
-     (else #f))))                           ;; not iterated
-
-
-;; returns waitons waitors tconfigdat
-;;
-(define (tests:get-waitons test-name all-tests-registry)
-   (let* ((config  (tests:get-testconfig test-name #f all-tests-registry 'return-procs))) ;; assuming no problems with immediate evaluation, this could be simplified ('return-procs -> #t)
-     (let ((instr (if config 
-		      (configf:lookup config "requirements" "waiton")
-		      (begin ;; No config means this is a non-existant test
-			(debug:print-error 0 *default-log-port* "non-existent required test \"" test-name "\"")
-			(exit 1))))
-	   (instr2 (if config
-		       (configf:lookup config "requirements" "waitor")
-		       "")))
-       (debug:print-info 8 *default-log-port* "waitons string is " instr ", waitors string is " instr2)
-       (let ((newwaitons
-	      (string-split (cond
-			     ((procedure? instr) ;; here 
-			      (let ((res (instr)))
-				(debug:print-info 8 *default-log-port* "waiton procedure results in string " res " for test " test-name)
-				res))
-			     ((string? instr)     instr)
-			     (else 
-			      ;; NOTE: This is actually the case of *no* waitons! ;; (debug:print-error 0 *default-log-port* "something went wrong in processing waitons for test " test-name)
-			      ""))))
-	     (newwaitors
-	      (string-split (cond
-			     ((procedure? instr2)
-			      (let ((res (instr2)))
-				(debug:print-info 8 *default-log-port* "waitor procedure results in string " res " for test " test-name)
-				res))
-			     ((string? instr2)     instr2)
-			     (else 
-			      ;; NOTE: This is actually the case of *no* waitons! ;; (debug:print-error 0 *default-log-port* "something went wrong in processing waitons for test " test-name)
-			      "")))))
-	 (values
-	  ;; the waitons
-	  (filter (lambda (x)
-		    (if (hash-table-ref/default all-tests-registry x #f)
-			#t
-			(begin
-			  (debug:print-error 0 *default-log-port* "test " test-name " has unrecognised waiton testname " x)
-			  #f)))
-		  newwaitons)
-	  (filter (lambda (x)
-		    (if (hash-table-ref/default all-tests-registry x #f)
-			#t
-			(begin
-			  (debug:print-error 0 *default-log-port* "test " test-name " has unrecognised waiton testname " x)
-			  #f)))
-		  newwaitors)
-	  config)))))
-					     
-;; Check for waiver eligibility
-;;
-(define (tests:check-waiver-eligibility testdat prev-testdat)
-  (let* ((test-registry (make-hash-table))
-	 (testconfig  (tests:get-testconfig (db:test-get-testname testdat) (db:test-get-item-path testdat) test-registry #f))
-	 (test-rundir ;; (sdb:qry 'passstr 
-	  (db:test-get-rundir testdat)) ;; )
-	 (prev-rundir ;; (sdb:qry 'passstr 
-	  (db:test-get-rundir prev-testdat)) ;; )
-	 (waivers     (if testconfig (configf:section-vars testconfig "waivers") '()))
-	 (waiver-rx   (regexp "^(\\S+)\\s+(.*)$"))
-	 (diff-rule   "diff %file1% %file2%")
-	 (logpro-rule "diff %file1% %file2% | logpro %waivername%.logpro %waivername%.html"))
-    (if (not (common:file-exists? test-rundir))
-	(begin
-	  (debug:print-error 0 *default-log-port* "test run directory is gone, cannot propagate waiver")
-	  #f)
-	(begin
-	  (push-directory test-rundir)
-	  (let ((result (if (null? waivers)
-			    #f
-			    (let loop ((hed (car waivers))
-				       (tal (cdr waivers)))
-			      (debug:print 0 *default-log-port* "INFO: Applying waiver rule \"" hed "\"")
-			      (let* ((waiver      (configf:lookup testconfig "waivers" hed))
-				     (wparts      (if waiver (string-match waiver-rx waiver) #f))
-				     (waiver-rule (if wparts (cadr wparts)  #f))
-				     (waiver-glob (if wparts (caddr wparts) #f))
-				     (logpro-file (if waiver
-						      (let ((fname (conc hed ".logpro")))
-							(if (common:file-exists? fname)
-							    fname 
-							    (begin
-							      (debug:print 0 *default-log-port* "INFO: No logpro file " fname " falling back to diff")
-							      #f)))
-						      #f))
-				     ;; if rule by name of waiver-rule is found in testconfig - use it
-				     ;; else if waivername.logpro exists use logpro-rule
-				     ;; else default to diff-rule
-				     (rule-string (let ((rule (configf:lookup testconfig "waiver_rules" waiver-rule)))
-						    (if rule
-							rule
-							(if logpro-file
-							    logpro-rule
-							    (begin
-							      (debug:print 0 *default-log-port* "INFO: No logpro file " logpro-file " found, using diff rule")
-							      diff-rule)))))
-				     ;; (string-substitute "%file1%" "foofoo.txt" "This is %file1% and so is this %file1%." #t)
-				     (processed-cmd (string-substitute 
-						     "%file1%" (conc test-rundir "/" waiver-glob)
-						     (string-substitute
-						      "%file2%" (conc prev-rundir "/" waiver-glob)
-						      (string-substitute
-						       "%waivername%" hed rule-string #t) #t) #t))
-				     (res            #f))
-				(debug:print 0 *default-log-port* "INFO: waiver command is \"" processed-cmd "\"")
-				(if (eq? (system processed-cmd) 0)
-				    (if (null? tal)
-					#t
-					(loop (car tal)(cdr tal)))
-				    #f))))))
-	    (pop-directory)
-	    result)))))
-
-;; Do not rpc this one, do the underlying calls!!!
-(define (tests:test-set-status! run-id test-id state status comment dat #!key (work-area #f))
-  (let* ((real-status status)
-	 (otherdat    (if dat dat (make-hash-table)))
-	 (testdat     (rmt:get-test-info-by-id run-id test-id))
-	 (test-name   (db:test-get-testname  testdat))
-	 (item-path   (db:test-get-item-path testdat))
-	 ;; before proceeding we must find out if the previous test (where all keys matched except runname)
-	 ;; was WAIVED if this test is FAIL
-
-	 ;; NOTES:
-	 ;;  1. Is the call to test:get-previous-run-record remotified?
-	 ;;  2. Add test for testconfig waiver propagation control here
-	 ;;
-	 (prev-test   (if (equal? status "FAIL")
-			  (rmt:get-previous-test-run-record run-id test-name item-path)
-			  #f))
-	 (waived   (if prev-test
-		       (if prev-test ;; true if we found a previous test in this run series
-			   (let ((prev-status  (db:test-get-status  prev-test))
-				 (prev-state   (db:test-get-state   prev-test))
-				 (prev-comment (db:test-get-comment prev-test)))
-			     (debug:print 4 *default-log-port* "prev-status " prev-status ", prev-state " prev-state ", prev-comment " prev-comment)
-			     (if (and (equal? prev-state  "COMPLETED")
-				      (equal? prev-status "WAIVED"))
-				 (if comment
-				     comment
-				     prev-comment) ;; waived is either the comment or #f
-				 #f))
-			   #f)
-		       #f)))
-    (if (and waived 
-	     (tests:check-waiver-eligibility testdat prev-test))
-	(set! real-status "WAIVED"))
-
-    (debug:print 4 *default-log-port* "real-status " real-status ", waived " waived ", status " status)
-
-    ;; update the primary record IF state AND status are defined
-    (if (and state status)
-	(begin
-	  (rmt:set-state-status-and-roll-up-items run-id test-id item-path state real-status (if waived waived comment))
-	  ;; (mt:process-triggers run-id test-id state real-status) ;; triggers are called in test-set-state-status
-	  ))
-    
-    ;; if status is "AUTO" then call rollup (note, this one modifies data in test
-    ;; run area, it does remote calls under the hood.
-    ;; (if (and test-id state status (equal? status "AUTO")) 
-    ;; 	(rmt:test-data-rollup run-id test-id status))
-
-    ;; add metadata (need to do this way to avoid SQL injection issues)
-
-    ;; :first_err
-    ;; (let ((val (hash-table-ref/default otherdat ":first_err" #f)))
-    ;;   (if val
-    ;;       (sqlite3:execute db "UPDATE tests SET first_err=? WHERE run_id=? AND testname=? AND item_path=?;" val run-id test-name item-path)))
-    ;; 
-    ;; ;; :first_warn
-    ;; (let ((val (hash-table-ref/default otherdat ":first_warn" #f)))
-    ;;   (if val
-    ;;       (sqlite3:execute db "UPDATE tests SET first_warn=? WHERE run_id=? AND testname=? AND item_path=?;" val run-id test-name item-path)))
-
-    (let ((category (hash-table-ref/default otherdat ":category" ""))
-	  (variable (hash-table-ref/default otherdat ":variable" ""))
-	  (value    (hash-table-ref/default otherdat ":value"    #f))
-	  (expected (hash-table-ref/default otherdat ":expected" "n/a"))
-	  (tol      (hash-table-ref/default otherdat ":tol"      "n/a"))
-	  (units    (hash-table-ref/default otherdat ":units"    ""))
-	  (type     (hash-table-ref/default otherdat ":type"     ""))
-	  (dcomment (hash-table-ref/default otherdat ":comment"  "")))
-      (debug:print 4 *default-log-port* 
-		   "category: " category ", variable: " variable ", value: " value
-		   ", expected: " expected ", tol: " tol ", units: " units)
-      (if (and value) ;; require only value; BB was- all three required
-	  (let ((dat (conc category ","
-			   variable ","
-			   value    ","
-			   expected ","
-			   tol      ","
-			   units    ","
-			   dcomment ",," ;; extra comma for status
-			   type     )))
-	    ;; This was run remote, don't think that makes sense. Perhaps not, but that is the easiest path for the moment.
-	    (rmt:csv->test-data run-id test-id
-				dat)
-	    ;; This was added in check-in a5adfa3f9a. Message was: "...added delay in set-values to allow for delayed write on server start"
-	    ;; I'm inserting an arbitrary rmt: call to force/ensure that the server is available to (hopefully) prevent a communication issue.
-	    (rmt:get-var "MEGATEST_VERSION") ;; this does NOTHING but ensure the server is reachable. This is almost certainly NOT needed :)
-            ;; BB - commentiong out arbitrary 10 second wait (thread-sleep! 10) ;; add 10 second delay before quit incase rmt needs time to start a server.
-            )))
-      
-    ;; need to update the top test record if PASS or FAIL and this is a subtest
-    ;;;;;; (if (not (equal? item-path ""))
-    ;;;;;;     (rmt:set-state-status-and-roll-up-items run-id test-name item-path state status #f) ;;;;;)
-
-    (if (or (and (string? comment)
-		 (string-match (regexp "\\S+") comment))
-	    waived)
-	(let ((cmt  (if waived waived comment)))
-	  (rmt:general-call 'set-test-comment run-id cmt test-id)))))
-
-(define (tests:test-set-toplog! run-id test-name logf) 
-  (rmt:general-call 'tests:test-set-toplog run-id logf run-id test-name))
-
-(define (tests:summarize-items run-id test-id test-name force)
-  ;; if not force then only update the record if one of these is true:
-  ;;   1. logf is "log/final.log
-  ;;   2. logf is same as outputfilename
-  (let* ((outputfilename (conc "megatest-rollup-" test-name ".html"))
-	 (orig-dir       (current-directory))
-	 (logf-info      (rmt:test-get-logfile-info run-id test-name))
-	 (logf           (if logf-info (cadr logf-info) #f))
-	 (path           (if logf-info (car  logf-info) #f)))
-    ;; This query finds the path and changes the directory to it for the test
-    (if (and (string? path)
-	     (directory? path)) ;; can get #f here under some wierd conditions. why, unknown ...
-	(begin
-	  (debug:print 4 *default-log-port* "Found path: " path)
-	  (change-directory path))
-	;; (set! outputfilename (conc path "/" outputfilename)))
-	(debug:print-error 0 *default-log-port* "summarize-items for run-id=" run-id ", test-name=" test-name ", no such path: " path))
-    (debug:print 4 *default-log-port* "summarize-items with logf " logf ", outputfilename " outputfilename " and force " force)
-    (if (or (equal? logf "logs/final.log")
-	    (equal? logf outputfilename)
-	    force)
-	(let ((my-start-time (current-seconds))
-	      (lockf         (conc outputfilename ".lock")))
-	  (let loop ((have-lock  (common:simple-file-lock lockf)))
-	    (if have-lock
-		(let ((script (configf:lookup *configdat* "testrollup" test-name)))
-		  (print "Obtained lock for " outputfilename)
-		  (rmt:set-state-status-and-roll-up-items run-id test-name "" #f #f #f)
-		  (if script
-		      (system (conc script " > " outputfilename " & "))
-		      (tests:generate-html-summary-for-iterated-test run-id test-id test-name outputfilename))
-		  (common:simple-file-release-lock lockf)
-		  (change-directory orig-dir)
-		  ;; NB// tests:test-set-toplog! is remote internal...
-		  (tests:test-set-toplog! run-id test-name outputfilename))
-		;; didn't get the lock, check to see if current update started later than this 
-		;; update, if so we can exit without doing any work
-		(if (> my-start-time (handle-exceptions
-					 exn
-					 0
-				       (file-modification-time lockf)))
-		    ;; we started since current re-gen in flight, delay a little and try again
-		    (begin
-		      (debug:print-info 1 *default-log-port* "Waiting to update " outputfilename ", another test currently updating it")
-		      (thread-sleep! (+ 5 (random 5))) ;; delay between 5 and 10 seconds
-		      (loop (common:simple-file-lock lockf))))))))))
-
-(define (tests:generate-html-summary-for-iterated-test run-id test-id test-name outputfilename)
-  (let ((counts              (make-hash-table))
-	(statecounts         (make-hash-table))
-	(outtxt              "")
-	(tot                 0)
-	(testdat             (rmt:test-get-records-for-index-file run-id test-name)))
-    (with-output-to-file outputfilename
-      (lambda ()
-	(set! outtxt (conc outtxt "<html><title>Summary: " test-name 
-			   "</title><body><h2>Summary for " test-name "</h2>"))
-	(for-each
-	 (lambda (testrecord)
-	   (let ((id             (vector-ref testrecord 0))
-		 (itempath       (vector-ref testrecord 1))
-		 (state          (vector-ref testrecord 2))
-		 (status         (vector-ref testrecord 3))
-		 (run_duration   (vector-ref testrecord 4))
-		 (logf           (vector-ref testrecord 5))
-		 (comment        (vector-ref testrecord 6)))
-	     (hash-table-set! counts status (+ 1 (hash-table-ref/default counts status 0)))
-	     (hash-table-set! statecounts state (+ 1 (hash-table-ref/default statecounts state 0)))
-	     (set! outtxt (conc outtxt "<tr>"
-				;; "<td><a href=\"" itempath "/" logf "\"> " itempath "</a></td>" 
-				"<td><a href=\"" itempath "/test-summary.html\"> " itempath "</a></td>" 
-				"<td>" state    "</td>" 
-				"<td><font color=" (common:get-color-from-status status)
-				">"   status   "</font></td>"
-				"<td>" (if (equal? comment "")
-					   "&nbsp;"
-					   comment) "</td>"
-					   "</tr>"))))
-	 (if (list? testdat)
-	     testdat
-	     (begin
-	       (print "ERROR: failed to get records with rmt:test-get-records-for-index-file run-id=" run-id "test-name=" test-name)
-	       '())))
-	
-	(print "<table><tr><td valign=\"top\">")
-	;; Print out stats for status
-	(set! tot 0)
-	(print "<table cellspacing=\"0\" border=\"1\"><tr><td colspan=\"2\"><h2>State stats</h2></td></tr>")
-	(for-each (lambda (state)
-		    (set! tot (+ tot (hash-table-ref statecounts state)))
-		    (print "<tr><td>" state "</td><td>" (hash-table-ref statecounts state) "</td></tr>"))
-		  (hash-table-keys statecounts))
-	(print "<tr><td>Total</td><td>" tot "</td></tr></table>")
-	(print "</td><td valign=\"top\">")
-	;; Print out stats for state
-	(set! tot 0)
-	(print "<table cellspacing=\"0\" border=\"1\"><tr><td colspan=\"2\"><h2>Status stats</h2></td></tr>")
-	(for-each (lambda (status)
-		    (set! tot (+ tot (hash-table-ref counts status)))
-		    (print "<tr><td><font color=\"" (common:get-color-from-status status) "\">" status
-			   "</font></td><td>" (hash-table-ref counts status) "</td></tr>"))
-		  (hash-table-keys counts))
-	(print "<tr><td>Total</td><td>" tot "</td></tr></table>")
-	(print "</td></td></tr></table>")
-	
-	(print "<table cellspacing=\"0\" border=\"1\">" 
-	       "<tr><td>Item</td><td>State</td><td>Status</td><td>Comment</td>"
-	       outtxt "</table></body></html>")
-	;; (release-dot-lock outputfilename)
-	;;(rmt:update-run-stats 
-	;; run-id
-	;; (hash-table-map
-	;;  state-status-counts
-	;;  (lambda (key val)
-	;;	(append key (list val)))))
-	))))
-
-(define tests:css-jscript-block
-#<<EOF
-<style type="text/css">
-ul.LinkedList { display: block; }
-/* ul.LinkedList ul { display: none; } */
-.HandCursorStyle { cursor: pointer; cursor: hand; }  /* For IE */
-th {background-color: #8c8c8c;}
-td.test {background-color: #d9dbdd;}
-td.PASS {background-color: #347533;}
-td.FAIL {background-color: #cc2812;}
-td.SKIP{background-color: #FFD733;}
-td.WARN {background-color: #EA8724;}
-td.WAIVED {background-color: #838A12;}
-td.ABORT{background-color: #EA24B7;}
-.PASS .link, .SKIP .link, .WARN .link,.WAIVED .link,.ABORT .link, .FAIL .link{color: #FFFFFF;}
-
-
-</style>
-
-
-  <script type="text/JavaScript">
-
-    function filtersome() {
-  $("tr").show();
-  $(".test").filter(
-    function() {
-      var names = $('#testname').val().split(',');
-      var good=1;
-      for (var i=0, len=names.length; i<len; i++) {
-        var uname=names[i];
-        console.log("Trying to check for " + uname); 
-        if($(this).text().indexOf(uname) != -1) {
-          good= 0;
-          console.log("Found "+uname);
-        }
-      }
-      return good; 
-    }
-  ).parent().hide();
-//  $(".sum").show();
-}
-  
-    // Add this to the onload event of the BODY element
-    function addEvents() {
-      activateTree(document.getElementById("LinkedList1"));
-    }
-
-    // This function traverses the list and add links 
-    // to nested list items
-    function activateTree(oList) {
-      // Collapse the tree
-      for (var i=0; i < oList.getElementsByTagName("ul").length; i++) {
-        oList.getElementsByTagName("ul")[i].style.display="none";            
-      }                                                                  
-      // Add the click-event handler to the list items
-      if (oList.addEventListener) {
-        oList.addEventListener("click", toggleBranch, false);
-      } else if (oList.attachEvent) { // For IE
-        oList.attachEvent("onclick", toggleBranch);
-      }
-      // Make the nested items look like links
-      addLinksToBranches(oList);
-    }
-
-    // This is the click-event handler
-    function toggleBranch(event) {
-      var oBranch, cSubBranches;
-      if (event.target) {
-        oBranch = event.target;
-      } else if (event.srcElement) { // For IE
-        oBranch = event.srcElement;
-      }
-      cSubBranches = oBranch.getElementsByTagName("ul");
-      if (cSubBranches.length > 0) {
-        if (cSubBranches[0].style.display == "block") {
-          cSubBranches[0].style.display = "none";
-        } else {
-          cSubBranches[0].style.display = "block";
-        }
-      }
-    }
-
-    // This function makes nested list items look like links
-    function addLinksToBranches(oList) {
-      var cBranches = oList.getElementsByTagName("li");
-      var i, n, cSubBranches;
-      if (cBranches.length > 0) {
-        for (i=0, n = cBranches.length; i < n; i++) {
-          cSubBranches = cBranches[i].getElementsByTagName("ul");
-          if (cSubBranches.length > 0) {
-            addLinksToBranches(cSubBranches[0]);
-            cBranches[i].className = "HandCursorStyle";
-            cBranches[i].style.color = "blue";
-            cSubBranches[0].style.color = "black";
-            cSubBranches[0].style.cursor = "auto";
-          }
-        }
-      }
-    }
-  </script>
-EOF
-)
-
-(define tests:css-jscript-block-dynamic 
-#<<EOF
-           <script src= ./jquery3.1.0.js></script> 
-EOF
-)
-
-(define  (test:js-block javascript-lib)
-   (conc  "<script src=" javascript-lib "></script>" ))
-
-
-(define tests:css-jscript-block-static (test:js-block *java-script-lib*))
-
-(define (tests:css-jscript-block-cond dynamic) 
-      (if (equal? dynamic  #t)
-       tests:css-jscript-block-dynamic
-       tests:css-jscript-block-static))
-
-       
-(define (tests:run-record->test-path run numkeys)
-   (append (take (vector->list run) numkeys)
-	   (list (vector-ref run (+ 1 numkeys)))))
-
-
-(define (tests:get-rest-data runs header numkeys)
-   (let ((resh (make-hash-table)))
-   (for-each
-     (lambda (run)
-        (let* ((run-id (db:get-value-by-header run header "id"))
-               (run-dir      (tests:run-record->test-path run numkeys))
-	       (test-data    (rmt:get-tests-for-run
-				   run-id
-                                   "%"       ;; testnamepatt
-				   '()        ;; states
-				   '()        ;; statuses
-				   #f         ;; offset
-				   #f         ;; num-to-get
-				   #f         ;; hide/not-hide
-				   #f         ;; sort-by
-				   #f         ;; sort-order
-				   #f         ;; 'shortlist                           ;; qrytype
-                                   0         ;; last update
-				   #f)))
-            
-            (map (lambda (test)
-                 (let* ((test-name (vector-ref test 2))
-                        (test-html-path (conc (vector-ref test 10) "/" (vector-ref test 13)))
-                        (test-item (conc test-name ":" (vector-ref test 11)))
-                        (test-status (vector-ref test 4)))
-                         
-                (if (not (hash-table-ref/default resh test-name  #f))
-                      (hash-table-set! resh test-name   (make-hash-table)))
-                (if (not (hash-table-ref/default (hash-table-ref/default resh test-name  #f)  test-item  #f))
-                       (hash-table-set! (hash-table-ref/default resh test-name  #f) test-item   (make-hash-table))) 
-               (hash-table-set!  (hash-table-ref/default (hash-table-ref/default resh test-name  #f) test-item #f) run-id (list test-status test-html-path)))) 
-        test-data)))
-      runs)
-   resh))
-
-
-;; hash-table tree to html list tree
-;;
-;;   tipfunc takes two parameters: y the tip value and path the path to that point
-;;
-(define (common:htree->html ht path tipfunc)
-  (let ((datlist 	(sort (hash-table->alist ht)
-                              (lambda (a b)
-                                (string< (car a)(car b))))))
-    (if (null? datlist)
-    	(tipfunc #f path) ;; really shouldn't get here
-	(s:ul
-	 (map (lambda (x)
-		(let* ((levelname (car x))
-		       (y         (cdr x))
-		       (newpath   (append path (list levelname)))
-		       (leaf      (or (not (hash-table? y))
-				      (null? (hash-table-keys y)))))
-		  (if leaf
-		      (s:li (tipfunc y newpath))
-		      (s:li
-		       (list 
-			levelname
-			(common:htree->html y newpath tipfunc))))))
-	      datlist)))))
-
-
-;; tests:genrate dashboard body 
-;;
-
-(define (tests:dashboard-body page pg-size keys numkeys  total-runs linktree area-name get-prev-links get-next-links flag run-patt target-patt)
-  (let* ((start (* page pg-size)) 
-	       ;(runsdat   (rmt:get-runs "%" pg-size start (map (lambda (x)(list x "%")) keys)))
-         (runsdat   (rmt:get-runs-by-patt  keys run-patt target-patt start pg-size #f 0 sort-order: "desc"))
-                    ; db:get-runs-by-patt   keys runnamepatt targpatt offset limit fields last-update   
-	       (header    (vector-ref runsdat 0))
-	       (runs      (vector-ref runsdat 1))
-         (ctr 0)
-         (test-runs-hash (tests:get-rest-data runs header numkeys))
-         (test-list (hash-table-keys test-runs-hash))) 
-  
-  (s:html tests:css-jscript-block (tests:css-jscript-block-cond flag)
-		   (s:title "Summary for " area-name)
-		   (s:body 'onload "addEvents();"
-                          (get-prev-links page linktree)
-                          (get-next-links page linktree total-runs)
-                           
-			   (s:h1 "Summary for " area-name)
-                           (s:h3 "Filter" )
-                           (s:input 'type "text"  'name "testname" 'id "testname" 'length "30" 'onkeyup "filtersome()")
-			   ;; top list
-         
-			   (s:table 'id "LinkedList1" 'border "1" 'cellspacing 0
-                            (map (lambda (key)
-				 (let* ((res (s:tr 'class "something" 
-				  (s:th key )
-                                   (map (lambda (run)
-                                   (s:th  (vector-ref run ctr)))
-                                  runs))))
-                             (set! ctr (+ ctr 1))
-                               res))
-                               keys)
-                               (s:tr
-				 (s:th "Run Name")
-                                  (map (lambda (run)
-                                   (s:th (db:get-value-by-header run header "runname")))
-                                  runs))
-                              
-                               (map (lambda (test-name)
-                                 (let* ((item-hash (hash-table-ref/default test-runs-hash test-name  #f))
-                                         (item-keys (sort (hash-table-keys item-hash) string<=?))) 
-                                          (map (lambda (item-name)  
-  		                             (let* ((res (s:tr  'class item-name
-				                         (s:td  item-name 'class "test" )
-                                                           (map (lambda (run)
-                                                               (let* ((run-test (hash-table-ref/default item-hash item-name  #f))
-                                                                      (run-id (db:get-value-by-header run header "id"))
-                                                                      (result (hash-table-ref/default run-test run-id "n/a"))
-                                                                      ;(relative-path (get-relative-path)) 
-                                                                      (status (if (string? result)
-									                                                            	result
-										                                                            (car result)))
-                                                                        (link (if (string? result)
-										                                                            result
-                                                                                (if (equal? flag #t) 
-                                                                                (s:a (car result) 'href (conc "./test_log?runid=" run-id "&testname="  item-name ))
-  																																						  (s:a (car result) 'href (string-substitute  (conc linktree "/")  "" (cadr result)  "-"))))))
-                                                                       (s:td  link 'class status)))
-                                                                runs))))
-                                                        res))
-                                                   item-keys)))
-                               test-list)))))) 
-
-;; (tests:create-html-tree "test-index.html")
-;;
-(define (tests:create-html-tree outf)
-   (let* ((lockfile  (conc outf ".lock"))
-	 			 (runs-to-process '())
-         (linktree  (common:get-linktree))
-         (area-name (common:get-testsuite-name))
-	  		 (keys      (rmt:get-keys))
-	  		 (numkeys   (length keys))
-         (run-patt (or (args:get-arg "-run-patt")
-                        (args:get-arg "-runname")
-                        "%"))
-         (target (or  (args:get-arg "-target-patt") 
-											(args:get-arg "-target")
-                      "%"))
-         (targlist (string-split target "/"))
-         (numtarg  (length targlist))  
-         (targtweaked (if (> numkeys numtarg)
-			   								(append targlist (make-list (- numkeys numtarg) "%"))
-			  								targlist))
-         (target-patt (string-join targtweaked "/"))
-         ;(total-runs  (rmt:get-num-runs "%")) ;;this needs to be changed to filter by target
-          (total-runs (rmt:get-runs-cnt-by-patt run-patt target-patt keys )) 
-         (pg-size 10))
-    (if (common:simple-file-lock lockfile)
-        (begin
-         ;(print total-runs)    
-        (let loop ((page 0))
-	(let* ((oup       (open-output-file (or outf (conc linktree "/page" page ".html"))))
-               (get-prev-links (lambda (page linktree )   
-                            (let* ((link  (if (not (eq? page 0))
-                                   (s:a "&lt;&lt;prev" 'href (conc  "page" (- page 1) ".html"))
-                                   (s:a "" 'href (conc   "page"  page ".html")))))
-                               link)))
-               (get-next-links (lambda (page linktree total-runs)   
-                            (let* ((link  (if (> total-runs (+ 10 (* page pg-size)))
-                                   (s:a "next&gt;&gt;" 'href (conc  "page"  (+ page 1) ".html"))
-                                   (s:a "" 'href (conc   "page" page  ".html")))))
-                               link))) )
-          (print "total runs: " total-runs) 
-          (s:output-new
-	   			 oup
-	   					(tests:dashboard-body page pg-size keys numkeys total-runs linktree area-name get-prev-links get-next-links #f run-patt target-patt)) ;; update this function
-          (close-output-port oup)
-         ; (set! page (+ 1 page))
-          (if (> total-runs (* (+ 1 page) pg-size))
-           (loop (+ 1  page)))))
-	  (common:simple-file-release-lock lockfile))
-	            
-	#f)))
-
-
-(define (tests:readlines filename)
-  (call-with-input-file filename
-    (lambda (p)
-      (let loop ((line (read-line p))
-                 (result '()))
-        (if (eof-object? line)
-            (reverse result)
-            (loop (read-line p) (cons line result)))))))
-
-(define (tests:get-test-log run-id test-name item-name)
-  (let* ((test-data    (rmt:get-tests-for-run
-				   (string->number run-id)
-                                    test-name      ;; testnamepatt
-				   '()        ;; states
-				   '()        ;; statuses
-				   #f         ;; offset
-				   #f         ;; num-to-get
-				   #f         ;; hide/not-hide
-				   #f         ;; sort-by
-				   #f         ;; sort-order
-				   #f         ;; 'shortlist                           ;; qrytype
-                                   0         ;; last update
-				   #f))
-         (path "")
-         (found 0))
-    (debug:print-info 0 *default-log-port* "found: " found )
-
-   (let loop ((hed (car test-data))
-		 (tal (cdr test-data)))
-          (debug:print-info 0 *default-log-port* "item: " (vector-ref hed 11) (vector-ref hed 10) "/" (vector-ref hed 13))
-
-	(if (equal? (vector-ref hed 11) item-name)
-            (begin
-              (set! found 1) 
-	      (set! path (conc (vector-ref hed 10) "/" (vector-ref hed 13)))))
-	    (if (and (not (null? tal)) (equal? found 0))
-		(loop (car tal)(cdr tal))))
-   (if (equal? path "")
-     "<H2>Data not found</H2>"
-     (string-join (tests:readlines path) "\n"))))
-
-
-(define (tests:dynamic-dboard page)
-;(define (tests:create-html-tree o)
- (let* (
-;(page "1")
-          (linktree  (common:get-linktree))
-         (area-name (common:get-testsuite-name))
-	       (keys      (rmt:get-keys))
-	       (numkeys   (length keys))
-         (targtweaked (make-list numkeys "%"))
-         (target-patt (string-join targtweaked "/"))
-         (total-runs  (rmt:get-num-runs "%"))
-         (pg-size 10)
-         (pg (if (equal? page #f)
-                 0
-                 (- (string->number page) 1)))
-          (get-prev-links  (lambda (pg linktree)
-                           (debug:print-info 0 *default-log-port* "val: " (- 1 pg))
-                          (let* ((link  (if (not (eq? pg 0))
-                               (s:a  "&lt;&lt;prev " 'href (conc  "dashboard?page="  pg  ))
-                               (s:a "" 'href (conc  "dashboard?page=" pg)))))
-                               link)))
-          (get-next-links   (lambda (pg linktree total-runs)  
-                            (debug:print-info 0 *default-log-port* "val: " pg)
-                             (debug:print-info 0 *default-log-port* "val: " total-runs " size" pg-size)
- 
-                            (let* ((link  (if (> total-runs (+ 10 (* pg pg-size)))
-                              (s:a  "next&gt;&gt; "  'href (conc  "dashboard?page="  (+ pg 2)  ))
-                             (s:a "" 'href (conc  "dashboard?page=" pg  )))))
-                             link)))
-         (html-body (tests:dashboard-body pg pg-size keys numkeys total-runs linktree area-name get-prev-links get-next-links #t "%" target-patt))) ;; update tis function
-        html-body))
-
-(define (tests:create-html-summary outf)
- (let* ((lockfile  (conc outf ".lock"))
-        (linktree  (common:get-linktree))
-				(keys      (rmt:get-keys))
-        (area-name (common:get-testsuite-name))
-        (run-patt (or (args:get-arg "-run-patt")
-                        (args:get-arg "-runname")
-                        "%"))
-        (target (or (args:get-arg "-target-patt")
-                        (args:get-arg "-target")
-                        "%"))
-         (targlist (string-split target "/"))
-         (numkeys  (length keys))
-	       (numtarg  (length targlist))  
-         (targtweaked (if (> numkeys numtarg)
-			   								(append targlist (make-list (- numkeys numtarg) "%"))
-			  								targlist))
-        (target-patt (string-join targtweaked "/")))
-    (if (common:simple-file-lock lockfile)
-        (begin
-          (let* (;(runsdat1   (rmt:get-runs run-patt #f #f (map (lambda (x)(list x "%")) keys)))
-                 (runsdat   (rmt:get-runs-by-patt  keys run-patt target-patt #f #f #f 0))
-					       (runs      (vector-ref runsdat 1))
-                 (header      (vector-ref runsdat 0))
-        	       (oup       (open-output-file (or outf (conc linktree "/targets.html"))))
-                 (target-hash (test:create-target-hash runs header (length keys))))
-           (test:create-target-html target-hash oup area-name linktree)
-          (test:create-run-html  runs area-name linktree (length keys) header))
-	  (common:simple-file-release-lock lockfile))
-	#f)))
-
-(define (test:get-test-hash test-data)
-	(let ((resh (make-hash-table)))
-    	(map (lambda (test)
-        (let* ((test-name (vector-ref test 2))
-               (test-html-path (if (file-exists? (conc (vector-ref test 10) "/test-summary.html"))
-																 (conc (vector-ref test 10) "/test-summary.html" )
-							 									 (conc (vector-ref test 10) "/" (vector-ref test 13))))
-               (test-item  (vector-ref test 11))
-               (test-status (vector-ref test 4)))
-               (if (not (hash-table-ref/default resh test-item  #f))
-                   (hash-table-set! resh test-item   (make-hash-table)))
-               (hash-table-set! (hash-table-ref/default resh test-item  #f) test-name (list test-status test-html-path)))) 
-        test-data)
-resh))
-
-(define (test:get-data->b-keys ordered-data a-keys)
-  (delete-duplicates
-   (sort (apply
-	  append
-	  (map (lambda (sub-key)
-		 (let ((subdat (hash-table-ref ordered-data sub-key)))
-		   (hash-table-keys subdat)))
-	       a-keys))
-	 string>=?)))
-
-
-(define (test:create-run-html runs area-name linktree numkeys header)
-  (map (lambda (run)
-		 (let* ((target (string-join (take (vector->list run) numkeys) "/"))
-						(run-name (db:get-value-by-header run header "runname"))
-            (run-time (seconds->work-week/day-time (db:get-value-by-header run header "event_time")))
-						(oup (if (file-exists? (conc linktree "/" target "/" run-name))
-                        (open-output-file (conc linktree "/" target "/" run-name "/run.html"))
-                         #f))
-            (run-id (db:get-value-by-header run header "id"))
-            (test-data    (rmt:get-tests-for-run
-				  								 run-id
-                           "%"       ;; testnamepatt
-				  								 '()        ;; states
-				   								 '()        ;; statuses
-				  								 	#f         ;; offset
-				  						 			#f         ;; num-to-get
-				   									#f         ;; hide/not-hide
-				  								  #f         ;; sort-by
-				   									#f         ;; sort-order
-				   									#f         ;; 'shortlist                           ;; qrytype
-                            0         ;; last update
-				  									#f))
-            (item-test-hash (test:get-test-hash test-data))
-            (items  (hash-table-keys item-test-hash))
- 						(test-names (test:get-data->b-keys item-test-hash items)))
-    (if oup
-      (begin 
-     (s:output-new
-	   oup
-	   (s:html tests:css-jscript-block (tests:css-jscript-block-cond #f)
-		   (s:title "Runs View " run-name)
-		   (s:body
-		     (s:h1 "Runs View " )
-         (s:h3 "Target" target)
-				 (s:p 
-					(s:b "Run name" ) run-name)
-         (s:p 
-					(s:b "Run Date" ) run-time)
-         (s:table 'border 1 'cellspacing 0
-           (s:tr
-           (s:th "Items")
-           (map (lambda (test)
-            (s:th test))
-           test-names))  
-           (map (lambda (item) 
-					  (let* ((test-hash (hash-table-ref/default item-test-hash item  #f)))
-								 (if test-hash
-                  (begin
-									(s:tr
-					  			(s:td 'class "test" item)
-            			(map (lambda (test)
-						  		(let* ((test-details (hash-table-ref/default test-hash test  #f))
-												(status (if test-details
-																(car test-details)))
-                        (link (if test-details 
-														(string-substitute  (conc linktree "/" target "/" run-name "/")  "" (cadr test-details) "-"))))
-                   (if test-details
-											(s:td 'class status
-												(s:a 'class "link" 'href link status ))
-                      (s:td "")))) 			
-									test-names))))))
-				  (sort items string<=?))))))
-		(close-output-port oup))
-    (debug:print-info 0 "Skip: Dirctory structure " linktree "/" target "/" run-name " does not exist. Megatest will not create run.html"))))
-runs))
-
-(define (test:create-target-hash runs header numkeys)
-  (let ((resh (make-hash-table)))
-   (for-each
-     (lambda (run)
-        (let* ((run-name (db:get-value-by-header run header "runname"))
-               (target   (string-join (take (vector->list run) numkeys) "/"))
-               (run-list (hash-table-ref/default resh target  #f)))
-               
-               (if (not run-list)
-                   (hash-table-set! resh target   (list run-name))
-                   (hash-table-set! resh target   (cons run-name run-list)))))
-      runs)
-   resh))
-
-(define (test:get-max-run-cnt target-hash targets)
-   (let* ((cnt 0 ))
-   (map (lambda (target)
-        (let* ((runs  (hash-table-ref/default target-hash target  #f))
-               (run-length (if runs
-																(length runs)
-                                 0)))
-  
-              (if (< cnt run-length)
-               (set! cnt  run-length)))) 
-		targets) 
-cnt))
- 
-(define (test:pad-runs target-hash targets max-row-length)
- (map (lambda (target)
-        (let loop ((run-list  (hash-table-ref/default target-hash target  #f)))
-               (if (< (length run-list) max-row-length)
-                 (begin  
-               		 (hash-table-set! target-hash target   (cons "" run-list))
-               		 (loop (hash-table-ref/default target-hash target  #f) ))))) 
-		targets)
-   target-hash)
-
-(define (test:create-target-html target-hash oup area-name linktree)
-  (let* ((targets (hash-table-keys target-hash))
-         (max-row-length (test:get-max-run-cnt target-hash targets))
-         (pad-runs-hash (test:pad-runs target-hash targets max-row-length)))
-   (s:output-new
-	   oup
-	   (s:html tests:css-jscript-block (tests:css-jscript-block-cond #f)
-
-		   (s:title "Target View " area-name)
-		   (s:body
-		   (s:h1 "Target View " area-name)
-					(s:table 'id "LinkedList1" 'border "1" 'cellspacing 0
-             (s:tr 'class "something" 
-               (s:th "Target")
-								(s:th 'colspan max-row-length "Runs"))                                              
-                (let* ((tbl (map (lambda (target)
-                      (s:tr
-                      (s:td 'class "test" target)
-										  (let* ((runs  (hash-table-ref/default target-hash target  #f))
-														 (rest-row (map (lambda (run)
-																				(if (equal? run "")
-																						(s:td run)
-                                            (if (file-exists?(conc linktree "/" target "/" run ))
-																						(begin 
-																							(s:td 
-																							(s:a 'href (conc  target "/" run "/run.html") run))))))
-																				(reverse runs))))
-                              rest-row)))
-                                   targets)))
-                           tbl)))))
-          (close-output-port oup)))
-
-
-(define (tests:create-html-tree-old outf)
-   (let* ((lockfile  (conc outf ".lock"))
-	 (runs-to-process '()))
-    (if (common:simple-file-lock lockfile)
-	(let* ((linktree  (common:get-linktree))
-	       (oup       (open-output-file (or outf (conc linktree "/runs-index.html"))))
-	       (area-name (common:get-testsuite-name))
-	       (keys      (rmt:get-keys))
-	       (numkeys   (length keys))
-	       (runsdat   (rmt:get-runs "%" #f #f (map (lambda (x)(list x "%")) keys)))
-	       (header    (vector-ref runsdat 0))
-	       (runs      (vector-ref runsdat 1))
-	       (runtreedat (map (lambda (x)
-				  (tests:run-record->test-path x numkeys))
-				runs))
-	       (runs-htree (common:list->htree runtreedat)))
-	  (set! runs-to-process runs)
-	  (s:output-new
-	   oup
-	   (s:html tests:css-jscript-block
-		   (s:title "Summary for " area-name)
-		   (s:body 'onload "addEvents();"
-			   (s:h1 "Summary for " area-name)
-			   ;; top list
-			   (s:ul 'id "LinkedList1" 'class "LinkedList"
-				 (s:li
-				  "Runs"
-				  (common:htree->html runs-htree
-						      '()
-						      (lambda (x p)
-							(let* ((targ-path (string-intersperse p "/"))
-                                                               (full-path (conc linktree "/" targ-path))
-                                                               (run-name  (car (reverse p))))
-                                                          (if (and (common:file-exists? full-path)
-                                                                   (directory?   full-path)
-                                                                   (file-write-access? full-path))
-                                                              (s:a run-name 'href (conc targ-path "/run-summary.html"))
-                                                              (begin
-                                                                (debug:print 0 *default-log-port* "INFO: Can't create " targ-path "/run-summary.html")
-                                                                (conc run-name " (Not able to create summary at " targ-path ")")))))))))))
-          (close-output-port oup)
-	  (common:simple-file-release-lock lockfile)
-               
-	  (for-each
-	   (lambda (run)
-	     (let* ((test-subpath (tests:run-record->test-path run numkeys))
-		    (run-id       (db:get-value-by-header run header "id"))
-                    (run-dir      (tests:run-record->test-path run numkeys))
-		    (test-dats    (rmt:get-tests-for-run
-				   run-id
-                                   "%/"       ;; testnamepatt
-				   '()        ;; states
-				   '()        ;; statuses
-				   #f         ;; offset
-				   #f         ;; num-to-get
-				   #f         ;; hide/not-hide
-				   #f         ;; sort-by
-				   #f         ;; sort-order
-				   #f         ;; 'shortlist                           ;; qrytype
-                                   0         ;; last update
-				   #f))
-                    (tests-tree-dat (map (lambda (test-dat)
-                                         ;; (tests:run-record->test-path x numkeys))
-                                         (let* ((test-name  (db:test-get-testname test-dat))
-                                                (item-path  (db:test-get-item-path test-dat))
-                                                (full-name  (db:test-make-full-name test-name item-path))
-                                                (path-parts (string-split full-name)))
-                                           path-parts))
-                                       test-dats))
-                    (tests-htree (common:list->htree tests-tree-dat))
-                    (html-dir    (conc linktree "/" (string-intersperse run-dir "/")))
-                    (html-path   (conc html-dir "/run-summary.html"))
-                    (oup         (if (and (common:file-exists? html-dir)
-                                          (directory?   html-dir)
-                                          (file-write-access? html-dir))
-                                     (open-output-file  html-path)
-                                     #f)))
-               ;; (print "run-dir: " run-dir ", tests-tree-dat: " tests-tree-dat)
-               (if oup
-                   (begin
-                     (s:output-new
-                      oup
-                      (s:html tests:css-jscript-block
-                              (s:title "Summary for " area-name)
-                              (s:body 'onload "addEvents();"
-                                      (s:h1 "Summary for " (string-intersperse run-dir "/"))
-                                      ;; top list
-                                      (s:ul 'id "LinkedList1" 'class "LinkedList"
-                                            (s:li
-                                             "Tests"
-                                             (common:htree->html tests-htree
-                                                                 '()
-                                                                 (lambda (x p)
-                                                                   (let* ((targ-path (string-intersperse p "/"))
-                                                                          (test-name (car p))
-                                                                          (item-path ;; (if (> (length p) 2) ;; test-name + run-name
-                                                                           (string-intersperse p "/"))
-                                                                          (full-targ (conc html-dir "/" targ-path))
-                                                                          (std-file  (conc full-targ "/test-summary.html"))
-                                                                          (alt-file  (conc full-targ "/megatest-rollup-" test-name ".html"))
-                                                                          (html-file (if (common:file-exists? alt-file)
-                                                                                         alt-file
-                                                                                         std-file))
-                                                                          (run-name  (car (reverse p))))
-                                                                     (if (and (not (common:file-exists? full-targ))
-                                                                              (directory? full-targ)
-                                                                              (file-write-access? full-targ))
-                                                                         (tests:summarize-test 
-                                                                          run-id 
-                                                                          (rmt:get-test-id run-id test-name item-path)))
-                                                                     (if (common:file-exists? full-targ)
-                                                                         (s:a run-name 'href html-file)
-                                                                         (begin
-                                                                           (debug:print 0 *default-log-port* "ERROR: can't access " full-targ)
-                                                                           (conc "No summary for " run-name)))))
-                                                                 ))))))
-                     (close-output-port oup)))))
-           runs)
-          #t)
-	#f)))
-
-
-
-
-
-
-
-;; CHECK - WAS THIS ADDED OR REMOVED? MANUAL MERGE WITH API STUFF!!!
-;;
-;; get a pretty table to summarize steps
-;;
-;; (define (dcommon:process-steps-table steps);; db test-id #!key (work-area #f))
-(define (tests:process-steps-table steps);; db test-id #!key (work-area #f))
-;;  (let ((steps   (db:get-steps-for-test db test-id work-area: work-area)))
-    ;; organise the steps for better readability
-    (let ((res (make-hash-table)))
-      (for-each 
-       (lambda (step)
-	 (debug:print 6 *default-log-port* "step=" step)
-	 (let ((record (hash-table-ref/default 
-			res 
-			(tdb:step-get-stepname step)
-			;;           0                      1    2    3       4         5       6       7
-			;;        stepname                start end status Duration  Logfile Comment  first-id
-			(vector (tdb:step-get-stepname step) ""   "" ""     ""        ""     ""       #f))))
-	   (debug:print 6 *default-log-port* "record(before) = " record 
-			"\nid:       " (tdb:step-get-id step)
-			"\nstepname: " (tdb:step-get-stepname step)
-			"\nstate:    " (tdb:step-get-state step)
-			"\nstatus:   " (tdb:step-get-status step)
-			"\ntime:     " (tdb:step-get-event_time step))
-	   (if (not (vector-ref record 7))(vector-set! record 7 (tdb:step-get-id step))) ;; do not clobber the id if previously set
-	   (case (string->symbol (tdb:step-get-state step))
-	     ((start)(vector-set! record 1 (tdb:step-get-event_time step))
-	      (vector-set! record 3 (if (equal? (vector-ref record 3) "")
-					(tdb:step-get-status step)))
-	      (if (> (string-length (tdb:step-get-logfile step))
-		     0)
-		  (vector-set! record 5 (tdb:step-get-logfile step))))
-	     ((end)  
-	      (vector-set! record 2 (any->number (tdb:step-get-event_time step)))
-	      (vector-set! record 3 (tdb:step-get-status step))
-	      (vector-set! record 4 (let ((startt (any->number (vector-ref record 1)))
-					  (endt   (any->number (vector-ref record 2))))
-				      (debug:print 4 *default-log-port* "record[1]=" (vector-ref record 1) 
-						   ", startt=" startt ", endt=" endt
-						   ", get-status: " (tdb:step-get-status step))
-				      (if (and (number? startt)(number? endt))
-					  (seconds->hr-min-sec (- endt startt)) "-1")))
-	      (if (> (string-length (tdb:step-get-logfile step))
-		     0)
-		  (vector-set! record 5 (tdb:step-get-logfile step)))
-	      (if (> (string-length (tdb:step-get-comment step))
-		     0)
-		  (vector-set! record 6 (tdb:step-get-comment step))))
-	     (else
-	      (vector-set! record 2 (tdb:step-get-state step))
-	      (vector-set! record 3 (tdb:step-get-status step))
-	      (vector-set! record 4 (tdb:step-get-event_time step))
-	      (vector-set! record 6 (tdb:step-get-comment step))))
-	   (hash-table-set! res (tdb:step-get-stepname step) record)
-	   (debug:print 6 *default-log-port* "record(after)  = " record 
-			"\nid:       " (tdb:step-get-id step)
-			"\nstepname: " (tdb:step-get-stepname step)
-			"\nstate:    " (tdb:step-get-state step)
-			"\nstatus:   " (tdb:step-get-status step)
-			"\ntime:     " (tdb:step-get-event_time step))))
-       ;; (else   (vector-set! record 1 (tdb:step-get-event_time step)))
-       (sort steps (lambda (a b)
-		     (cond
-		      ((<   (tdb:step-get-event_time a)(tdb:step-get-event_time b)) #t)
-		      ((eq? (tdb:step-get-event_time a)(tdb:step-get-event_time b)) 
-		       (<   (tdb:step-get-id a)        (tdb:step-get-id b)))
-		      (else #f)))))
-      res))
-
-;; 
-;;
-(define (tests:get-compressed-steps run-id test-id)
-  (let* ((steps-data  (rmt:get-steps-for-test run-id test-id)) ;;      0       1    2    3       4       5       6      7       
-	 (comprsteps  (tests:process-steps-table steps-data))) ;; #<stepname start end status Duration Logfile Comment id>
-    (map (lambda (x)
-	   ;; take advantage of the \n on time->string
-	   (vector    ;; we are constructing basically the original vector but collapsing start end records
-	    (vector-ref x 0)                              ;; id        0
-	    (let ((s (vector-ref x 1)))
-	      (if (number? s)(seconds->time-string s) s)) ;; starttime 1
-	    (let ((s (vector-ref x 2)))
-	      (if (number? s)(seconds->time-string s) s)) ;; endtime   2
-	    (vector-ref x 3)                              ;; status    3    
-	    (vector-ref x 4)                              ;; duration  4
-	    (vector-ref x 5)                              ;; logfile   5
-	    (vector-ref x 6)                              ;; comment   6
-	    (vector-ref x 7)))                            ;; id        7
-	 (sort (hash-table-values comprsteps)
-	       (lambda (a b)
-		 (let ((time-a (vector-ref a 1))
-		       (time-b (vector-ref b 1))
-		       (id-a   (vector-ref a 7))
-		       (id-b   (vector-ref b 7)))
-		   (if (and (number? time-a)(number? time-b))
-		       (if (< time-a time-b)
-			   #t
-			   (if (eq? time-a time-b)
-			       (< id-a id-b)
-			       ;; (string<? (conc (vector-ref a 2))
-			       ;;	    (conc (vector-ref b 2)))
-			       #f))
-		       (string<? (conc time-a)(conc time-b)))))))))
-
-
-;; summarize test in to a file test-summary.html in the test directory
-;;
-(define (tests:summarize-test run-id test-id)
-  (let* ((test-dat  (rmt:get-test-info-by-id run-id test-id))
-	 (out-dir   (db:test-get-rundir test-dat))
-	 (out-file  (conc out-dir "/test-summary.html")))
-    ;; first verify we are able to write the output file
-    (if (not (file-write-access? out-dir))
-	(debug:print 0 *default-log-port* "ERROR: cannot write test-summary.html to " out-dir)
-	(let* (;; (steps-dat (rmt:get-steps-for-test run-id test-id))
-	       (test-name (db:test-get-testname test-dat))
-	       (item-path (db:test-get-item-path test-dat))
-	       (full-name (db:test-make-full-name test-name item-path))
-	       (oup       (open-output-file out-file))
-	       (status    (db:test-get-status   test-dat))
-	       (color     (common:get-color-from-status status))
-	       (logf      (db:test-get-final_logf test-dat))
-	       (steps-dat (tests:get-compressed-steps run-id test-id)))
-	  ;; (dcommon:get-compressed-steps #f 1 30045)
-	  ;; (#("wasting_time" "23:36:13" "23:36:21" "0" "8.0s" "wasting_time.log"))
-	  
-	  (s:output-new
-	   oup
-	   (s:html
-	    (s:title "Summary for " full-name)
-	    (s:body 
-	     (s:h2 "Summary for " full-name)
-	     (s:table 'cellspacing "0" 'border "1"
-		      (s:tr (s:td "run id")   (s:td (db:test-get-run_id   test-dat))
-			    (s:td "test id")  (s:td (db:test-get-id       test-dat)))
-		      (s:tr (s:td "testname") (s:td test-name)
-			    (s:td "itempath") (s:td item-path))
-		      (s:tr (s:td "state")    (s:td (db:test-get-state    test-dat))
-			    (s:td "status")   (s:td (s:a 'href logf (s:font 'color color status))))
-		      (s:tr (s:td "TestDate") (s:td (seconds->work-week/day-time 
-						     (db:test-get-event_time test-dat)))
-			    (s:td "Duration") (s:td (seconds->hr-min-sec (db:test-get-run_duration test-dat)))))
-	     (s:h3 "Log files")
-	     (s:table 
-	      'cellspacing "0" 'border "1"
-	      (s:tr (s:td "Final log")(s:td (s:a 'href logf logf))))
-	     (s:table
-	      'cellspacing "0" 'border "1"
-	      (s:tr (s:td "Step Name")(s:td "Start")(s:td "End")(s:td "Status")(s:td "Duration")(s:td "Log File"))
-	      (map (lambda (step-dat)
-		     (s:tr (s:td (tdb:steps-table-get-stepname step-dat))
-			   (s:td (tdb:steps-table-get-start    step-dat))
-			   (s:td (tdb:steps-table-get-end      step-dat))
-			   (s:td (tdb:steps-table-get-status   step-dat))
-			   (s:td (tdb:steps-table-get-runtime  step-dat))
-			   (s:td (let ((step-log (tdb:steps-table-get-log-file step-dat)))
-				   (s:a 'href step-log step-log)))))
-		   steps-dat))
-	     )))
-	  (close-output-port oup)))))
-	  
-	  
-;; MUST BE CALLED local!
-;;
-(define (tests:test-get-paths-matching keynames target fnamepatt #!key (res '()))
-  ;; BUG: Move the values derived from args to parameters and push to megatest.scm
-  (let* ((testpatt   (or (args:get-arg "-testpatt")(args:get-arg "-testpatt") "%"))
-	 (statepatt  (or (args:get-arg "-state")   (args:get-arg ":state")    "%"))
-	 (statuspatt (or (args:get-arg "-status")  (args:get-arg ":status")   "%"))
-	 (runname    (or (args:get-arg "-runname") (args:get-arg ":runname")  "%"))
-	 (paths-from-db (rmt:test-get-paths-matching-keynames-target-new keynames target res
-					testpatt
-					statepatt
-					statuspatt
-					runname)))
-    (if fnamepatt
-	(apply append 
-	       (map (lambda (p)
-		      (if (directory-exists? p)
-			  (let ((glob-query (conc p "/" fnamepatt)))
-			    (handle-exceptions
-				exn
-				(with-input-from-pipe
-				    (conc "echo " glob-query)
-				  read-lines)  ;; we aren't going to try too hard. If glob breaks it is likely because someone tried to do */*/*.log or similar
-			      (glob glob-query)))
-			  '()))
-		    paths-from-db))
-	paths-from-db)))
-
-			      
-;;======================================================================
-;; Gather data from test/task specifications
-;;======================================================================
-
-;; (define (tests:get-valid-tests testsdir test-patts) ;;  #!key (test-names '()))
-;;   (let ((tests (glob (conc testsdir "/tests/*")))) ;; " (string-translate patt "%" "*")))))
-;;     (set! tests (filter (lambda (test)(common:file-exists? (conc test "/testconfig"))) tests))
-;;     (delete-duplicates
-;;      (filter (lambda (testname)
-;; 	       (tests:match test-patts testname #f))
-;; 	     (map (lambda (testp)
-;; 		    (last (string-split testp "/")))
-;; 		  tests)))))
-
-(define (tests:get-test-path-from-environment)
-  (if (and (getenv "MT_LINKTREE")
-	   (getenv "MT_TARGET")
-	   (getenv "MT_RUNNAME")
-	   (getenv "MT_TEST_NAME")
-	   (getenv "MT_ITEMPATH"))
-      (conc (getenv "MT_LINKTREE")  "/"
-	    (getenv "MT_TARGET")    "/"
-	    (getenv "MT_RUNNAME")   "/"
-	    (getenv "MT_TEST_NAME")
-	    (if (and (getenv "MT_ITEMPATH")
-                     (not (string=? "" (getenv "MT_ITEMPATH"))))
-		(conc "/" (getenv "MT_ITEMPATH"))
-                ""))
-      #f))
-
-;; if .testconfig exists in test directory read and return it
-;; else if have cached copy in *testconfigs* return it IFF there is a section "have fulldata"
-;; else read the testconfig file
-;;   if have path to test directory save the config as .testconfig and return it
-;;
-(define (tests:get-testconfig test-name item-path test-registry system-allowed
-			      #!key (force-create #f)(allow-write-cache #t)(wait-a-minute #f))
-  (let* ((use-cache    (common:use-cache?))
-	 (cache-path   (tests:get-test-path-from-environment))
-	 (cache-file   (and cache-path (conc cache-path "/.testconfig")))
-	 (cache-exists (and cache-file
-			    (not force-create)  ;; if force-create then pretend there is no cache to read
-			    (common:file-exists? cache-file)))
-	 (cached-dat   (if (and (not force-create)
-				cache-exists
-				use-cache)
-			   (handle-exceptions
-			    exn
-			    #f ;; any issues, just give up with the cached version and re-read
-			    (configf:read-alist cache-file))
-			   #f))
-         (test-full-name (if (and item-path (not (string-null? item-path)))
-                             (conc test-name "/" item-path)
-                             test-name)))
-    (if cached-dat
-	cached-dat
-	(let ((dat (hash-table-ref/default *testconfigs* test-full-name #f)))
-	  (if (and  dat ;; have a locally cached version
-		    (hash-table-ref/default dat "have fulldata" #f)) ;; marked as good data?
-	      dat
-	      ;; no cached data available
-	      (let* ((treg         (or test-registry
-				       (tests:get-all)))
-		     (test-path    (or (hash-table-ref/default treg test-name #f)
-                                       (let* ((local-tcdir (conc (getenv "MT_LINKTREE") "/"
-                                                                 (getenv "MT_TARGET") "/"
-                                                                 (getenv "MT_RUNNAME") "/"
-                                                                 test-name "/" item-path))
-                                              (local-tcfg (conc local-tcdir "/testconfig")))
-                                         (if (common:file-exists? local-tcfg)
-                                             local-tcdir
-                                             #f))
-				       (conc *toppath* "/tests/" test-name)))
-		     (test-configf (conc test-path "/testconfig"))
-		     (testexists   (let loopa ((tries-left 30))
-                                     (cond
-                                      (
-                                       (and (common:file-exists? test-configf)(file-read-access? test-configf))
-                                       #t)
-                                      (
-                                       (common:file-exists? test-configf)
-                                       (debug:print 0 *default-log-port* "WARNING: Cannot read testconfig file: "test-configf)
-                                       #f)
-                                      (
-                                       (and wait-a-minute (> tries-left 0))
-                                       (thread-sleep! 10)
-                                       (debug:print 0 *default-log-port* "WARNING: testconfig file does not exist: "test-configf" will retry in 10 seconds.  Tries left: "tries-left) ;; BB: this fires
-                                       (loopa (sub1 tries-left)))
-                                      (else
-                                       (debug:print 0 *default-log-port* "WARNING: testconfig file does not exist: "test-configf) ;; BB: this fires
-                                       #f))))
-		     (tcfg         (if testexists
-				       (read-config test-configf #f system-allowed
-						    environ-patt: (if system-allowed
-								      "pre-launch-env-vars"
-								      #f))
-				       #f)))
-		(if (and tcfg cache-file) (hash-table-set! tcfg "have fulldata" #t)) ;; mark this as fully read data
-		(if tcfg (hash-table-set! *testconfigs* test-full-name tcfg))
-		(if (and testexists
-			 cache-file
-			 (file-write-access? cache-path)
-			 allow-write-cache)
-		    (let ((tpath (conc cache-path "/.testconfig")))
-		      (debug:print-info 1 *default-log-port* "Caching testconfig for " test-name " in " tpath)
-                      (if (and tcfg (not (common:in-running-test?)))
-                          (configf:write-alist tcfg tpath))))
-		tcfg))))))
-  
-;; sort tests by priority and waiton
-;; Move test specific stuff to a test unit FIXME one of these days
-(define (tests:sort-by-priority-and-waiton test-records)
-  (if (eq? (hash-table-size test-records) 0)
-      '()
-      (let* ((mungepriority (lambda (priority)
-			      (if priority
-				  (let ((tmp (any->number priority)))
-				    (if tmp tmp (begin (debug:print-error 0 *default-log-port* "bad priority value " priority ", using 0") 0)))
-				  0)))
-	     (all-tests      (hash-table-keys test-records))
-	     (all-waited-on  (let loop ((hed (car all-tests))
-					(tal (cdr all-tests))
-					(res '()))
-			       (let* ((trec    (hash-table-ref test-records hed))
-				      (waitons (or (tests:testqueue-get-waitons trec) '())))
-				 (if (null? tal)
-				     (append res waitons)
-				     (loop (car tal)(cdr tal)(append res waitons))))))
-	     (sort-fn1 
-	      (lambda (a b)
-		(let* ((a-record   (hash-table-ref test-records a))
-		       (b-record   (hash-table-ref test-records b))
-		       (a-waitons  (or (tests:testqueue-get-waitons a-record) '()))
-		       (b-waitons  (or (tests:testqueue-get-waitons b-record) '()))
-		       (a-config   (tests:testqueue-get-testconfig  a-record))
-		       (b-config   (tests:testqueue-get-testconfig  b-record))
-		       (a-raw-pri  (configf:lookup a-config "requirements" "priority"))
-		       (b-raw-pri  (configf:lookup b-config "requirements" "priority"))
-		       (a-priority (mungepriority a-raw-pri))
-		       (b-priority (mungepriority b-raw-pri)))
-		  (tests:testqueue-set-priority! a-record a-priority)
-		  (tests:testqueue-set-priority! b-record b-priority)
-		  ;; (debug:print 0 *default-log-port* "a=" a ", b=" b ", a-waitons=" a-waitons ", b-waitons=" b-waitons)
-		  (cond
-		   ;; is 
-		   ((member a b-waitons)          ;; is b waiting on a?
-		    ;; (debug:print 0 *default-log-port* "case1")
-		    #t)
-		   ((member b a-waitons)          ;; is a waiting on b?
-		    ;; (debug:print 0 *default-log-port* "case2")
-		    #f)
-		   ((and (not (null? a-waitons))  ;; both have waitons - do not disturb
-			 (not (null? b-waitons)))
-		    ;; (debug:print 0 *default-log-port* "case2.1")
-		    #t)
-		   ((and (null? a-waitons)        ;; no waitons for a but b has waitons
-			 (not (null? b-waitons)))
-		    ;; (debug:print 0 *default-log-port* "case3")
-		    #f)
-		   ((and (not (null? a-waitons))  ;; a has waitons but b does not
-			 (null? b-waitons)) 
-		    ;; (debug:print 0 *default-log-port* "case4")
-		    #t)
-		   ((not (eq? a-priority b-priority)) ;; use
-		    (> a-priority b-priority))
-		   (else
-		    ;; (debug:print 0 *default-log-port* "case5")
-		    (string>? a b))))))
-	     
-	     (sort-fn2
-	      (lambda (a b)
-		(> (mungepriority (tests:testqueue-get-priority (hash-table-ref test-records a)))
-		   (mungepriority (tests:testqueue-get-priority (hash-table-ref test-records b)))))))
-	;; (let ((dot-res (tests:run-dot (tests:tests->dot test-records) "plain")))
-	;;   (debug:print "dot-res=" dot-res))
-	;; (let ((data (map cdr (filter
-	;;     		  (lambda (x)(equal? "node" (car x)))
-	;;     		  (map string-split (tests:easy-dot test-records "plain"))))))
-	;;   (map car (sort data (lambda (a b)
-	;;     		    (> (string->number (caddr a))(string->number (caddr b)))))))
-	;; ))
-	(sort all-tests sort-fn1)))) ;; avoid dealing with deleted tests, look at the hash table
-
-(define (tests:easy-dot test-records outtype)
-  (let-values (((fd temp-path) (file-mkstemp (conc "/tmp/" (current-user-name) ".XXXXXX"))))
-    (let ((all-testnames (hash-table-keys test-records))
-	  (temp-port     (open-output-file* fd)))
-      ;; (format temp-port "This file is ~A.~%" temp-path)
-      (format temp-port "digraph tests {\n")
-      (format temp-port "  size=4,8\n")
-      ;; (format temp-port "   splines=none\n")
-      (for-each
-       (lambda (testname)
-	 (let* ((testrec (hash-table-ref test-records testname))
-		(waitons (or (tests:testqueue-get-waitons testrec) '())))
-	   (for-each
-	    (lambda (waiton)
-	      (format temp-port (conc "   " waiton " -> " testname " [splines=ortho]\n")))
-	    waitons)))
-       all-testnames)
-      (format temp-port "}\n")
-      (close-output-port temp-port)
-      (with-input-from-pipe
-       (conc "env -i PATH=$PATH dot -T" outtype " < " temp-path)
-       (lambda ()
-	 (let ((res (read-lines)))
-	   ;; (delete-file temp-path)
-	   res))))))
-
-(define (tests:write-dot-file test-records fname sizex sizey)
-  (if (file-write-access? (pathname-directory fname))
-      (with-output-to-file fname
-	(lambda ()
-	  (map print (tests:tests->dot test-records sizex sizey))))))
-
-(define (tests:tests->dot test-records sizex sizey)
-  (let ((all-testnames (hash-table-keys test-records)))
-    (if (null? all-testnames)
-	'()
-	(let loop ((hed (car all-testnames))
-		   (tal (cdr all-testnames))
-		   (res (list "digraph tests {"
-			      (conc " size=\"" (or sizex 11) "," (or sizey 11) "\";")
-			      " ratio=0.95;"
-			      )))
-	  (let* ((testrec (hash-table-ref test-records hed))
-		 (waitons (or (tests:testqueue-get-waitons testrec) '()))
-		 (newres  (append res
-				  (if (null? waitons)
-				      (list (conc "   \"" hed "\" [shape=box];"))
-				      (map (lambda (waiton)
-					     (conc "   \"" waiton "\" -> \"" hed "\" [shape=box];"))
-					   waitons)
-				      ))))
-	    (if (null? tal)
-		(append newres (list "}"))
-		(loop (car tal)(cdr tal) newres)
-		))))))
-
-;; (tests:run-dot (list "digraph tests {" "a -> b" "}") "plain")
-
-(define (tests:run-dot indat outtype) ;; outtype is plain, fig, dot, etc. http://www.graphviz.org/content/output-formats
-  (let-values (((inp oup pid)(process "env -i PATH=$PATH dot" (list "-T" outtype))))
-    (with-output-to-port oup
-      (lambda ()
-	(map print indat)))
-    (close-output-port oup)
-    (let ((res (with-input-from-port inp
-		 (lambda ()
-		   (read-lines)))))
-      (close-input-port inp)
-      res)))
-
-;; read data from tmp file or create if not exists
-;; if exists regen in background
-;;
-(define (tests:lazy-dot testrecords  outtype sizex sizey)
-  (let ((dfile (conc "/tmp/." (current-user-name) "-" (server:mk-signature) ".dot"))
-	(fname (conc "/tmp/." (current-user-name) "-" (server:mk-signature) ".dotdat")))
-    (tests:write-dot-file testrecords dfile sizex sizey)
-    (if (common:file-exists? fname)
-	(let ((res (with-input-from-file fname
-		     (lambda ()
-		       (read-lines)))))
-	  (system (conc "env -i PATH=$PATH dot -T " outtype " < " dfile " > " fname "&"))
-	  res)
-	(begin
-	  (system (conc "env -i PATH=$PATH dot -T " outtype " < " dfile " > " fname))
-	  (with-input-from-file fname
-	    (lambda ()
-	      (read-lines)))))))
-	  
-
-;; for each test:
-;;   
-(define (tests:filter-non-runnable run-id testkeynames testrecordshash)
-  (let ((runnables '()))
-    (for-each
-     (lambda (testkeyname)
-       (let* ((test-record (hash-table-ref testrecordshash testkeyname))
-	      (test-name   (tests:testqueue-get-testname  test-record))
-	      (itemdat     (tests:testqueue-get-itemdat   test-record))
-	      (item-path   (tests:testqueue-get-item_path test-record))
-	      (waitons     (tests:testqueue-get-waitons   test-record))
-	      (keep-test   #t)
-	      (test-id     (rmt:get-test-id run-id test-name item-path))
-	      (tdat        (rmt:get-testinfo-state-status run-id test-id))) ;; (cdb:get-test-info-by-id *runremote* test-id)))
-	 (if tdat
-	     (begin
-	       ;; Look at the test state and status
-	       (if (or (and (member (db:test-get-status tdat) 
-				    '("PASS" "WARN" "WAIVED" "CHECK" "SKIP"))
-			    (equal? (db:test-get-state tdat) "COMPLETED"))
-		       (member (db:test-get-state tdat)
-				    '("INCOMPLETE" "KILLED")))
-		   (set! keep-test #f))
-
-	       ;; examine waitons for any fails. If it is FAIL or INCOMPLETE then eliminate this test
-	       ;; from the runnable list
-	       (if keep-test
-		   (for-each (lambda (waiton)
-			       ;; for now we are waiting only on the parent test
-			       (let* ((parent-test-id (rmt:get-test-id run-id waiton ""))
-				      (wtdat          (rmt:get-testinfo-state-status run-id test-id))) ;; (cdb:get-test-info-by-id *runremote* test-id)))
-				 (if (or (and (equal? (db:test-get-state wtdat) "COMPLETED")
-					      (member (db:test-get-status wtdat) '("FAIL" "ABORT")))
-					 (member (db:test-get-status wtdat)  '("KILLED"))
-					 (member (db:test-get-state wtdat)   '("INCOMPETE")))
-				 ;; (if (or (member (db:test-get-status wtdat)
-				 ;;        	 '("FAIL" "KILLED"))
-				 ;;         (member (db:test-get-state wtdat)
-				 ;;        	 '("INCOMPETE")))
-				     (set! keep-test #f)))) ;; no point in running this one again
-			     waitons))))
-	 (if keep-test (set! runnables (cons testkeyname runnables)))))
-     testkeynames)
-    runnables))
-
-;;======================================================================
-;; refactoring this block into tests:get-full-data from line 263 of runs.scm
-;;======================================================================
-;; hed is the test name
-;; test-records is a hash of test-name => test record
-(define (tests:get-full-data test-names test-records required-tests all-tests-registry)
-  (if (not (null? test-names))
-      (let loop ((hed (car test-names))
-		 (tal (cdr test-names)))         ;; 'return-procs tells the config reader to prep running system but return a proc
-	(debug:print-info 4 *default-log-port* "hed=" hed " at top of loop")
-        ;; don't know item-path at this time, let the testconfig get the top level testconfig
-	(let* ((config  (tests:get-testconfig hed #f all-tests-registry 'return-procs))
-	       (waitons (let ((instr (if config 
-					 (configf:lookup config "requirements" "waiton")
-					 (begin ;; No config means this is a non-existant test
-					   (debug:print-error 0 *default-log-port* "non-existent required test \"" hed "\", grep through your testconfigs to find and remove or create the test. Discarding and continuing.")
-					     ""))))
-			  (debug:print-info 8 *default-log-port* "waitons string is " instr)
-			  (string-split (cond
-					 ((procedure? instr)
-					  (let ((res (instr)))
-					    (debug:print-info 8 *default-log-port* "waiton procedure results in string " res " for test " hed)
-					    res))
-					 ((string? instr)     instr)
-					 (else 
-					  ;; NOTE: This is actually the case of *no* waitons! ;; (debug:print-error 0 *default-log-port* "something went wrong in processing waitons for test " hed)
-					  ""))))))
-	  (if (not config) ;; this is a non-existant test called in a waiton. 
-	      (if (null? tal)
-		  test-records
-		  (loop (car tal)(cdr tal)))
-	      (begin
-		(debug:print-info 8 *default-log-port* "waitons: " waitons)
-		;; check for hed in waitons => this would be circular, remove it and issue an
-		;; error
-		(if (member hed waitons)
-		    (begin
-		      (debug:print-error 0 *default-log-port* "test " hed " has listed itself as a waiton, please correct this!")
-		      (set! waitons (filter (lambda (x)(not (equal? x hed))) waitons))))
-		
-		;; (items   (items:get-items-from-config config)))
-		(if (not (hash-table-ref/default test-records hed #f))
-		    (hash-table-set! test-records
-				     hed (vector hed     ;; 0
-						 config  ;; 1
-						 waitons ;; 2
-						 (configf:lookup config "requirements" "priority")     ;; priority 3
-						 (let ((items      (hash-table-ref/default config "items" #f)) ;; items 4
-						       (itemstable (hash-table-ref/default config "itemstable" #f))) 
-						   ;; if either items or items table is a proc return it so test running
-						   ;; process can know to call items:get-items-from-config
-						   ;; if either is a list and none is a proc go ahead and call get-items
-						   ;; otherwise return #f - this is not an iterated test
-						   (cond
-						    ((procedure? items)      
-						     (debug:print-info 4 *default-log-port* "items is a procedure, will calc later")
-						     items)            ;; calc later
-						    ((procedure? itemstable)
-						     (debug:print-info 4 *default-log-port* "itemstable is a procedure, will calc later")
-						     itemstable)       ;; calc later
-						    ((filter (lambda (x)
-							       (let ((val (car x)))
-								 (if (procedure? val) val #f)))
-							     (append (if (list? items) items '())
-								     (if (list? itemstable) itemstable '())))
-						     'have-procedure)
-						    ((or (list? items)(list? itemstable)) ;; calc now
-						     (debug:print-info 4 *default-log-port* "items and itemstable are lists, calc now\n"
-								       "    items: " items " itemstable: " itemstable)
-						     (items:get-items-from-config config))
-						    (else #f)))                           ;; not iterated
-						 #f      ;; itemsdat 5
-						 #f      ;; spare - used for item-path
-						 )))
-		(for-each 
-		 (lambda (waiton)
-		   (if (and waiton (not (member waiton test-names)))
-		       (begin
-			 (set! required-tests (cons waiton required-tests))
-			 (set! test-names (cons waiton test-names))))) ;; was an append, now a cons
-		 waitons)
-		(let ((remtests (delete-duplicates (append waitons tal))))
-		  (if (not (null? remtests))
-		      (loop (car remtests)(cdr remtests))
-		      test-records))))))))
-
-;;======================================================================
-;; test steps
-;;======================================================================
-
-;; teststep-set-status! used to be here
-
-(define (test-get-kill-request run-id test-id) ;; run-id test-name itemdat)
-  (let* ((testdat   (rmt:get-test-info-by-id run-id test-id)))
-    (and testdat
-	 (equal? (test:get-state testdat) "KILLREQ"))))
-
-(define (test:tdb-get-rundat-count tdb)
-  (if tdb
-      (let ((res 0))
-	(sqlite3:for-each-row
-	 (lambda (count)
-	   (set! res count))
-	 tdb
-	 "SELECT count(id) FROM test_rundat;")
-	res))
-  0)
-
-(define (tests:update-central-meta-info run-id test-id cpuload diskfree minutes uname hostname)
-  (rmt:general-call 'update-test-rundat run-id test-id (current-seconds) (or cpuload -1)(or diskfree -1) -1 (or minutes -1))
-  (if (and cpuload diskfree)
-      (rmt:general-call 'update-cpuload-diskfree run-id cpuload diskfree test-id))
-  (if minutes 
-      (rmt:general-call 'update-run-duration run-id minutes test-id))
-  (if (and uname hostname)
-      (rmt:general-call 'update-uname-host run-id uname hostname test-id)))
-  
-;; This one is for running with no db access (i.e. via rmt: internally)
-(define (tests:set-full-meta-info db test-id run-id minutes work-area remtries)
-;; (define (tests:set-full-meta-info test-id run-id minutes work-area)
-;;  (let ((remtries 10))
-  (let* ((cpuload  (get-cpu-load))
-	 (diskfree (get-df (current-directory)))
-	 (uname    (get-uname "-srvpio"))
-	 (hostname (get-host-name)))
-    (tests:update-central-meta-info run-id test-id cpuload diskfree minutes uname hostname)))
-    
-;; (define (tests:set-partial-meta-info test-id run-id minutes work-area)
-#;(define (tests:set-partial-meta-info test-id run-id minutes work-area remtries)
-  (let* ((cpuload  (get-cpu-load))
-	 (diskfree (get-df (current-directory)))
-	 (remtries 10))
-    (handle-exceptions
-     exn
-     (if (> remtries 0)
-	 (begin
-	   (print-call-chain (current-error-port))
-	   (debug:print-info 0 *default-log-port* "WARNING: failed to set meta info. Will try " remtries " more times")
-	   (set! remtries (- remtries 1))
-	   (thread-sleep! 10)
-	   (tests:set-full-meta-info db test-id run-id minutes work-area (- remtries 1)))
-	 (let ((err-status ((condition-property-accessor 'sqlite3 'status #f) exn)))
-	   (debug:print-error 0 *default-log-port* "tried for over a minute to update meta info and failed. Giving up")
-	   (debug:print 0 *default-log-port* "EXCEPTION: database probably overloaded or unreadable.")
-	   (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))
-	   (debug:print 5 *default-log-port* "exn=" (condition->list exn))
-	   (debug:print 0 *default-log-port* " status:  " ((condition-property-accessor 'sqlite3 'status) exn))
-	   (print-call-chain (current-error-port))))
-     (tests:update-testdat-meta-info db test-id work-area cpuload diskfree minutes)
-  )))
-	 
-;;======================================================================
-;; A R C H I V I N G
-;;======================================================================
-
-(define (test:archive db test-id)
-  #f)
-
-(define (test:archive-tests db keynames target)
-  #f)
-

Index: testsmod.scm
==================================================================
--- testsmod.scm
+++ testsmod.scm
@@ -19,42 +19,41 @@
 ;;======================================================================
 
 (declare (unit testsmod))
 (declare (uses commonmod))
 (declare (uses mtargs))
+(declare (uses servermod))
+(declare (uses mtconfigf))
+(declare (uses itemsmod))
+(declare (uses dbmod))
 
 (module testsmod
 	*
 	
-(import scheme chicken data-structures extras)
+(import scheme chicken data-structures extras files)
 
-(import (prefix sqlite3 sqlite3:) posix typed-records srfi-18 srfi-69 format ports srfi-1 matchable
-	(prefix mtconfigf configf:)
+(import (prefix sqlite3 sqlite3:) posix typed-records srfi-18 srfi-69
+	format ports srfi-1 matchable
+	directory-utils
 	regex srfi-13
 	commonmod
+	servermod
+	itemsmod
+	dbmod
+	(prefix mtconfigf configf:)
 	(prefix mtargs args:))
+
+(include "run_records.scm")
+(include "test_records.scm")
+(include "db_records.scm")
 
 (define *java-script-lib* #f)
 
 (define (init-java-script-lib)
   (set! *java-script-lib* (conc  (common:get-install-area) "/share/js/jquery-3.1.0.slim.min.js"))
   )
 
-;; A routine to map itempaths using a itemmap
-;; patha and pathb must be strings or this will fail
-;;
-;; path-b is waiting on path-a
-;;
-(define (db:compare-itempaths test-b-name path-a path-b itemmaps )
-  (debug:print-info 6 *default-log-port* "ITEMMAPS: " itemmaps)
-  (let* ((itemmap    (tests:lookup-itemmap itemmaps test-b-name)))
-    (if itemmap
-	(let ((path-b-mapped (db:multi-pattern-apply path-b itemmap)))
-	  (debug:print-info 6 *default-log-port* "ITEMMAP is " itemmap ", path: " path-b ", mapped path: " path-b-mapped)
-	  (equal? path-a path-b-mapped))
-	(equal? path-b path-a))))
-
 ;; A routine to convert test/itempath using a itemmap
 ;; NOTE: to process only an itempath (i.e. no prepended testname)
 ;;       just call db:multi-pattern-apply
 ;;
 (define (db:convert-test-itempath path-in itemmap)
@@ -63,18 +62,11 @@
 	 (test-name   (if (null? path-parts) "" (car path-parts)))
 	 (item-path   (string-intersperse (if (null? path-parts) '() (cdr path-parts)) "/")))
     (conc test-name "/" 
 	  (db:multi-pattern-apply item-path itemmap))))
 
-;;======================================================================
-;; Run keys, these are used to hierarchially organise tests and run areas
-;;======================================================================
-
-(define (keys->keystr keys) ;; => key1,key2,key3,additiona1, ...
-  (string-intersperse keys ","))
-
-(define (args:usage . a) #f)
+#; (define (args:usage . a) #f)
 
 ;;======================================================================
 ;; key <=> target routines
 ;;======================================================================
 
@@ -95,72 +87,10 @@
 		      vals)
 	    (debug:print-error 0 *default-log-port* "wrong number of values in " target ", should match " keys))
 	vals)
       (debug:print 4 *default-log-port* "ERROR: keys:target-set-args called with no target.")))
 
-;; given the keys (a list of vectors <key field> or a list of keys) and a target return a keyval list
-;; keyval list ( (key1 val1) (key2 val2) ...)
-(define (keys:target->keyval keys target)
-  (let* ((targlist (string-split target "/"))
-	 (numkeys  (length keys))
-	 (numtarg  (length targlist))
-	 (targtweaked (if (> numkeys numtarg)
-			  (append targlist (make-list (- numkeys numtarg) ""))
-			  targlist)))
-    (map (lambda (key targ)
-	   (list key targ))
-	 keys targtweaked)))
-
-;;======================================================================
-;; config file related routines
-;;======================================================================
-
-(define keys:config-get-fields common:get-fields)
-(define (keys:make-key/field-string confdat)
-  (let ((fields (configf:get-section confdat "fields")))
-    (string-join
-     (map (lambda (field)(conc (car field) " " (cadr field)))
-	  fields)
-     ",")))
-
-;; patterns are:
-;;    "rx1"  "replacement1"\n
-;;    "rx2"  "replacement2"
-;; etc.
-;;
-(define (db:multi-pattern-apply item-path itemmap)
-  (let ((all-patts (string-split itemmap "\n")))
-    (if (null? all-patts)
-	item-path
-	(let loop ((hed (car all-patts))
-		   (tal (cdr all-patts))
-		   (res item-path))
-	  (let* ((parts (string-split hed))
-		 (patt  (car parts))
-
-		 (repl  (if (> (length parts) 1)(cadr parts) ""))
-
-		 (newr  (if (and patt repl)
-			    (begin
-                              (handle-exceptions
-                               exn
-                               (begin
-                                  (debug:print 0 *default-log-port*
-                                  "WARNING: itemmap has problem \"" itemmap "\", patt: " patt ", repl: " repl)
-                                 res)
-                              (string-substitute patt repl res))
-
-
-                              )
-			    (begin
-                              (debug:print 0 *default-log-port*
-                               "WARNING: itemmap has problem \"" itemmap "\", patt: " patt ", repl: " repl)
-			      res))))
-	    (if (null? tal)
-		newr
-		(loop (car tal)(cdr tal) newr)))))))
-
 ;; given waiting-test that is waiting on waiton-test extend test-patt appropriately
 ;;
 ;;  genlib/testconfig               sim/testconfig
 ;;  genlib/sch                      sim/sch/cell1
 ;;
@@ -215,121 +145,18 @@
                           (cons waiton-test patts))))
       (string-intersperse (delete-duplicates new-patts) ",")))))
 
 
   
-;; tests:glob-like-match 
-(define (tests:glob-like-match patt str) 
-  (let ((like (substring-index "%" patt)))
-    (let* ((notpatt  (equal? (substring-index "~" patt) 0))
-	   (newpatt  (if notpatt (substring patt 1) patt))
-	   (finpatt  (if like
-			(string-substitute (regexp "%") ".*" newpatt #f)
-			(string-substitute (regexp "\\*") ".*" newpatt #f)))
-	   (res      #f))
-      ;; (print "tests:glob-like-match => notpatt: " notpatt ", newpatt: " newpatt ", finpatt: " finpatt)
-      (set! res (string-match (regexp finpatt (if like #t #f)) str))
-      (if notpatt (not res) res))))
-
-;; if itempath is #f then look only at the testname part
-;;
-(define (tests:match patterns testname itempath #!key (required '()))
-  (if (string? patterns)
-      (let ((patts (append (string-split patterns ",") required)))
-	(if (null? patts) ;;; no pattern(s) means no match
-	    #f
-	    (let loop ((patt (car patts))
-		       (tal  (cdr patts)))
-	      ;; (print "loop: patt: " patt ", tal " tal)
-	      (if (string=? patt "")
-		  #f ;; nothing ever matches empty string - policy
-		  (let* ((patt-parts (string-match (regexp "^([^\\/]*)(\\/(.*)|)$") patt))
-			 (test-patt  (cadr patt-parts))
-			 (item-patt  (cadddr patt-parts)))
-		    ;; special case: test vs. test/
-		    ;;   test  => "test" "%"
-		    ;;   test/ => "test" ""
-		    (if (and (not (substring-index "/" patt)) ;; no slash in the original
-			     (or (not item-patt)
-				 (equal? item-patt "")))      ;; should always be true that item-patt is ""
-			(set! item-patt "%"))
-		    ;; (print "tests:match => patt-parts: " patt-parts ", test-patt: " test-patt ", item-patt: " item-patt)
-		    (if (and (tests:glob-like-match test-patt testname)
-			     (or (not itempath)
-				 (tests:glob-like-match (if item-patt item-patt "") itempath)))
-			#t
-			(if (null? tal)
-			    #f
-			    (loop (car tal)(cdr tal)))))))))))
-
-;; if itempath is #f then look only at the testname part
-;;
-(define (tests:match->sqlqry patterns)
-  (if (string? patterns)
-      (let ((patts (string-split patterns ",")))
-	(if (null? patts) ;;; no pattern(s) means no match, we will do no query
-	    #f
-	    (let loop ((patt (car patts))
-		       (tal  (cdr patts))
-		       (res  '()))
-	      ;; (print "loop: patt: " patt ", tal " tal)
-	      (let* ((patt-parts (string-match (regexp "^([^\\/]*)(\\/(.*)|)$") patt))
-		     (test-patt  (cadr patt-parts))
-		     (item-patt  (cadddr patt-parts))
-		     (test-qry   (db:patt->like "testname" test-patt))
-		     (item-qry   (db:patt->like "item_path" item-patt))
-		     (qry        (conc "(" test-qry " AND " item-qry ")")))
-		;; (print "tests:match => patt-parts: " patt-parts ", test-patt: " test-patt ", item-patt: " item-patt)
-		(if (null? tal)
-		    (string-intersperse (append (reverse res)(list qry)) " OR ")
-		    (loop (car tal)(cdr tal)(cons qry res)))))))
-      #f))
-
-;; keys list to key1,key2,key3 ...
-(define (runs:get-std-run-fields keys remfields)
-  (let* ((header    (append keys remfields))
-	 (keystr    (conc (keys->keystr keys) ","
-			  (string-intersperse remfields ","))))
-    (list keystr header)))
-
-;; make a query (fieldname like 'patt1' OR fieldname 
-(define (db:patt->like fieldname pattstr #!key (comparator " OR "))
-  (let ((patts (if (string? pattstr)
-		   (string-split pattstr ",")
-		   '("%"))))
-    (string-intersperse (map (lambda (patt)
-			       (let ((wildtype (if (substring-index "%" patt) "LIKE" "GLOB")))
-				 (conc fieldname " " wildtype " '" patt "'")))
-			     (if (null? patts)
-				 '("")
-				 patts))
-			comparator)))
-
 ;; Call this one to do all the work and get a standardized list of tests
 ;;   gets paths from configs and finds valid tests 
 ;;   returns hash of testname --> fullpath
 ;;
 (define (tests:get-all)
   (let* ((test-search-path   (tests:get-tests-search-path *configdat*)))
     (tests:get-valid-tests (make-hash-table) test-search-path)))
 
-(define (tests:get-tests-search-path cfgdat)
-  (let ((paths (let ((section (if cfgdat
-				  (configf:get-section cfgdat "tests-paths")
-				  #f)))
-		 (if section
-		     (map cadr section)
-		     '()))))
-    (filter (lambda (d)
-	      (if (directory-exists? d)
-		  d
-		  (begin
-		    (if (common:low-noise-print 60 "tests:get-tests-search-path" d)
-			(debug:print 0 *default-log-port* "WARNING: problem with directory " d ", dropping it from tests path"))
-		    #f)))
-	    (append paths (list (conc *toppath* "/tests"))))))
-
 (define (tests:get-valid-tests test-registry tests-paths)
   (if (null? tests-paths) 
       test-registry
       (let loop ((hed (car tests-paths))
 		 (tal (cdr tests-paths)))
@@ -374,49 +201,615 @@
 		'())
 	    (if itemmap-table
 		itemmap-table
 		'()))))
 
-;; given a list of itemmaps (testname . map), return the first match
+;;======================================================================
+;; Tests
+;;======================================================================
+
+;; return items given config
+;;
+(define (tests:get-items tconfig)
+  (let ((items      (hash-table-ref/default tconfig "items" #f)) ;; items 4
+	(itemstable (hash-table-ref/default tconfig "itemstable" #f))) 
+    ;; if either items or items table is a proc return it so test running
+    ;; process can know to call items:get-items-from-config
+    ;; if either is a list and none is a proc go ahead and call get-items
+    ;; otherwise return #f - this is not an iterated test
+    (cond
+     ((procedure? items)      
+      (debug:print-info 4 *default-log-port* "items is a procedure, will calc later")
+      items)            ;; calc later
+     ((procedure? itemstable)
+      (debug:print-info 4 *default-log-port* "itemstable is a procedure, will calc later")
+      itemstable)       ;; calc later
+     ((filter (lambda (x)
+		(let ((val (car x)))
+		  (if (procedure? val) val #f)))
+	      (append (if (list? items) items '())
+		      (if (list? itemstable) itemstable '())))
+      'have-procedure)
+     ((or (list? items)(list? itemstable)) ;; calc now
+      (debug:print-info 4 *default-log-port* "items and itemstable are lists, calc now\n"
+			"    items: " items " itemstable: " itemstable)
+      (items:get-items-from-config tconfig))
+     (else #f))))                           ;; not iterated
+
+
+;; returns waitons waitors tconfigdat
+;;
+(define (tests:get-waitons test-name all-tests-registry)
+   (let* ((config  (tests:get-testconfig test-name #f all-tests-registry 'return-procs))) ;; assuming no problems with immediate evaluation, this could be simplified ('return-procs -> #t)
+     (let ((instr (if config 
+		      (configf:lookup config "requirements" "waiton")
+		      (begin ;; No config means this is a non-existant test
+			(debug:print-error 0 *default-log-port* "non-existent required test \"" test-name "\"")
+			(exit 1))))
+	   (instr2 (if config
+		       (configf:lookup config "requirements" "waitor")
+		       "")))
+       (debug:print-info 8 *default-log-port* "waitons string is " instr ", waitors string is " instr2)
+       (let ((newwaitons
+	      (string-split (cond
+			     ((procedure? instr) ;; here 
+			      (let ((res (instr)))
+				(debug:print-info 8 *default-log-port* "waiton procedure results in string " res " for test " test-name)
+				res))
+			     ((string? instr)     instr)
+			     (else 
+			      ;; NOTE: This is actually the case of *no* waitons! ;; (debug:print-error 0 *default-log-port* "something went wrong in processing waitons for test " test-name)
+			      ""))))
+	     (newwaitors
+	      (string-split (cond
+			     ((procedure? instr2)
+			      (let ((res (instr2)))
+				(debug:print-info 8 *default-log-port* "waitor procedure results in string " res " for test " test-name)
+				res))
+			     ((string? instr2)     instr2)
+			     (else 
+			      ;; NOTE: This is actually the case of *no* waitons! ;; (debug:print-error 0 *default-log-port* "something went wrong in processing waitons for test " test-name)
+			      "")))))
+	 (values
+	  ;; the waitons
+	  (filter (lambda (x)
+		    (if (hash-table-ref/default all-tests-registry x #f)
+			#t
+			(begin
+			  (debug:print-error 0 *default-log-port* "test " test-name " has unrecognised waiton testname " x)
+			  #f)))
+		  newwaitons)
+	  (filter (lambda (x)
+		    (if (hash-table-ref/default all-tests-registry x #f)
+			#t
+			(begin
+			  (debug:print-error 0 *default-log-port* "test " test-name " has unrecognised waiton testname " x)
+			  #f)))
+		  newwaitors)
+	  config)))))
+					     
+;; Check for waiver eligibility
+;;
+(define (tests:check-waiver-eligibility testdat prev-testdat)
+  (let* ((test-registry (make-hash-table))
+	 (testconfig  (tests:get-testconfig (db:test-get-testname testdat) (db:test-get-item-path testdat) test-registry #f))
+	 (test-rundir ;; (sdb:qry 'passstr 
+	  (db:test-get-rundir testdat)) ;; )
+	 (prev-rundir ;; (sdb:qry 'passstr 
+	  (db:test-get-rundir prev-testdat)) ;; )
+	 (waivers     (if testconfig (configf:section-vars testconfig "waivers") '()))
+	 (waiver-rx   (regexp "^(\\S+)\\s+(.*)$"))
+	 (diff-rule   "diff %file1% %file2%")
+	 (logpro-rule "diff %file1% %file2% | logpro %waivername%.logpro %waivername%.html"))
+    (if (not (common:file-exists? test-rundir))
+	(begin
+	  (debug:print-error 0 *default-log-port* "test run directory is gone, cannot propagate waiver")
+	  #f)
+	(begin
+	  (push-directory test-rundir)
+	  (let ((result (if (null? waivers)
+			    #f
+			    (let loop ((hed (car waivers))
+				       (tal (cdr waivers)))
+			      (debug:print 0 *default-log-port* "INFO: Applying waiver rule \"" hed "\"")
+			      (let* ((waiver      (configf:lookup testconfig "waivers" hed))
+				     (wparts      (if waiver (string-match waiver-rx waiver) #f))
+				     (waiver-rule (if wparts (cadr wparts)  #f))
+				     (waiver-glob (if wparts (caddr wparts) #f))
+				     (logpro-file (if waiver
+						      (let ((fname (conc hed ".logpro")))
+							(if (common:file-exists? fname)
+							    fname 
+							    (begin
+							      (debug:print 0 *default-log-port* "INFO: No logpro file " fname " falling back to diff")
+							      #f)))
+						      #f))
+				     ;; if rule by name of waiver-rule is found in testconfig - use it
+				     ;; else if waivername.logpro exists use logpro-rule
+				     ;; else default to diff-rule
+				     (rule-string (let ((rule (configf:lookup testconfig "waiver_rules" waiver-rule)))
+						    (if rule
+							rule
+							(if logpro-file
+							    logpro-rule
+							    (begin
+							      (debug:print 0 *default-log-port* "INFO: No logpro file " logpro-file " found, using diff rule")
+							      diff-rule)))))
+				     ;; (string-substitute "%file1%" "foofoo.txt" "This is %file1% and so is this %file1%." #t)
+				     (processed-cmd (string-substitute 
+						     "%file1%" (conc test-rundir "/" waiver-glob)
+						     (string-substitute
+						      "%file2%" (conc prev-rundir "/" waiver-glob)
+						      (string-substitute
+						       "%waivername%" hed rule-string #t) #t) #t))
+				     (res            #f))
+				(debug:print 0 *default-log-port* "INFO: waiver command is \"" processed-cmd "\"")
+				(if (eq? (system processed-cmd) 0)
+				    (if (null? tal)
+					#t
+					(loop (car tal)(cdr tal)))
+				    #f))))))
+	    (pop-directory)
+	    result)))))
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+;; CHECK - WAS THIS ADDED OR REMOVED? MANUAL MERGE WITH API STUFF!!!
+;;
+;; get a pretty table to summarize steps
+;;
+;; (define (dcommon:process-steps-table steps);; db test-id #!key (work-area #f))
+(define (tests:process-steps-table steps);; db test-id #!key (work-area #f))
+;;  (let ((steps   (db:get-steps-for-test db test-id work-area: work-area)))
+    ;; organise the steps for better readability
+    (let ((res (make-hash-table)))
+      (for-each 
+       (lambda (step)
+	 (debug:print 6 *default-log-port* "step=" step)
+	 (let ((record (hash-table-ref/default 
+			res 
+			(tdb:step-get-stepname step)
+			;;           0                      1    2    3       4         5       6       7
+			;;        stepname                start end status Duration  Logfile Comment  first-id
+			(vector (tdb:step-get-stepname step) ""   "" ""     ""        ""     ""       #f))))
+	   (debug:print 6 *default-log-port* "record(before) = " record 
+			"\nid:       " (tdb:step-get-id step)
+			"\nstepname: " (tdb:step-get-stepname step)
+			"\nstate:    " (tdb:step-get-state step)
+			"\nstatus:   " (tdb:step-get-status step)
+			"\ntime:     " (tdb:step-get-event_time step))
+	   (if (not (vector-ref record 7))(vector-set! record 7 (tdb:step-get-id step))) ;; do not clobber the id if previously set
+	   (case (string->symbol (tdb:step-get-state step))
+	     ((start)(vector-set! record 1 (tdb:step-get-event_time step))
+	      (vector-set! record 3 (if (equal? (vector-ref record 3) "")
+					(tdb:step-get-status step)))
+	      (if (> (string-length (tdb:step-get-logfile step))
+		     0)
+		  (vector-set! record 5 (tdb:step-get-logfile step))))
+	     ((end)  
+	      (vector-set! record 2 (any->number (tdb:step-get-event_time step)))
+	      (vector-set! record 3 (tdb:step-get-status step))
+	      (vector-set! record 4 (let ((startt (any->number (vector-ref record 1)))
+					  (endt   (any->number (vector-ref record 2))))
+				      (debug:print 4 *default-log-port* "record[1]=" (vector-ref record 1) 
+						   ", startt=" startt ", endt=" endt
+						   ", get-status: " (tdb:step-get-status step))
+				      (if (and (number? startt)(number? endt))
+					  (seconds->hr-min-sec (- endt startt)) "-1")))
+	      (if (> (string-length (tdb:step-get-logfile step))
+		     0)
+		  (vector-set! record 5 (tdb:step-get-logfile step)))
+	      (if (> (string-length (tdb:step-get-comment step))
+		     0)
+		  (vector-set! record 6 (tdb:step-get-comment step))))
+	     (else
+	      (vector-set! record 2 (tdb:step-get-state step))
+	      (vector-set! record 3 (tdb:step-get-status step))
+	      (vector-set! record 4 (tdb:step-get-event_time step))
+	      (vector-set! record 6 (tdb:step-get-comment step))))
+	   (hash-table-set! res (tdb:step-get-stepname step) record)
+	   (debug:print 6 *default-log-port* "record(after)  = " record 
+			"\nid:       " (tdb:step-get-id step)
+			"\nstepname: " (tdb:step-get-stepname step)
+			"\nstate:    " (tdb:step-get-state step)
+			"\nstatus:   " (tdb:step-get-status step)
+			"\ntime:     " (tdb:step-get-event_time step))))
+       ;; (else   (vector-set! record 1 (tdb:step-get-event_time step)))
+       (sort steps (lambda (a b)
+		     (cond
+		      ((<   (tdb:step-get-event_time a)(tdb:step-get-event_time b)) #t)
+		      ((eq? (tdb:step-get-event_time a)(tdb:step-get-event_time b)) 
+		       (<   (tdb:step-get-id a)        (tdb:step-get-id b)))
+		      (else #f)))))
+      res))
+
+;;======================================================================
+;; Gather data from test/task specifications
+;;======================================================================
+
+;; (define (tests:get-valid-tests testsdir test-patts) ;;  #!key (test-names '()))
+;;   (let ((tests (glob (conc testsdir "/tests/*")))) ;; " (string-translate patt "%" "*")))))
+;;     (set! tests (filter (lambda (test)(common:file-exists? (conc test "/testconfig"))) tests))
+;;     (delete-duplicates
+;;      (filter (lambda (testname)
+;; 	       (tests:match test-patts testname #f))
+;; 	     (map (lambda (testp)
+;; 		    (last (string-split testp "/")))
+;; 		  tests)))))
+
+(define (tests:get-test-path-from-environment)
+  (if (and (getenv "MT_LINKTREE")
+	   (getenv "MT_TARGET")
+	   (getenv "MT_RUNNAME")
+	   (getenv "MT_TEST_NAME")
+	   (getenv "MT_ITEMPATH"))
+      (conc (getenv "MT_LINKTREE")  "/"
+	    (getenv "MT_TARGET")    "/"
+	    (getenv "MT_RUNNAME")   "/"
+	    (getenv "MT_TEST_NAME")
+	    (if (and (getenv "MT_ITEMPATH")
+                     (not (string=? "" (getenv "MT_ITEMPATH"))))
+		(conc "/" (getenv "MT_ITEMPATH"))
+                ""))
+      #f))
+
+;; if .testconfig exists in test directory read and return it
+;; else if have cached copy in *testconfigs* return it IFF there is a section "have fulldata"
+;; else read the testconfig file
+;;   if have path to test directory save the config as .testconfig and return it
+;;
+(define (tests:get-testconfig test-name item-path test-registry system-allowed
+			      #!key (force-create #f)(allow-write-cache #t)(wait-a-minute #f))
+  (let* ((use-cache    (common:use-cache?))
+	 (cache-path   (tests:get-test-path-from-environment))
+	 (cache-file   (and cache-path (conc cache-path "/.testconfig")))
+	 (cache-exists (and cache-file
+			    (not force-create)  ;; if force-create then pretend there is no cache to read
+			    (common:file-exists? cache-file)))
+	 (cached-dat   (if (and (not force-create)
+				cache-exists
+				use-cache)
+			   (handle-exceptions
+			    exn
+			    #f ;; any issues, just give up with the cached version and re-read
+			    (configf:read-alist cache-file))
+			   #f))
+         (test-full-name (if (and item-path (not (string-null? item-path)))
+                             (conc test-name "/" item-path)
+                             test-name)))
+    (if cached-dat
+	cached-dat
+	(let ((dat (hash-table-ref/default *testconfigs* test-full-name #f)))
+	  (if (and  dat ;; have a locally cached version
+		    (hash-table-ref/default dat "have fulldata" #f)) ;; marked as good data?
+	      dat
+	      ;; no cached data available
+	      (let* ((treg         (or test-registry
+				       (tests:get-all)))
+		     (test-path    (or (hash-table-ref/default treg test-name #f)
+                                       (let* ((local-tcdir (conc (getenv "MT_LINKTREE") "/"
+                                                                 (getenv "MT_TARGET") "/"
+                                                                 (getenv "MT_RUNNAME") "/"
+                                                                 test-name "/" item-path))
+                                              (local-tcfg (conc local-tcdir "/testconfig")))
+                                         (if (common:file-exists? local-tcfg)
+                                             local-tcdir
+                                             #f))
+				       (conc *toppath* "/tests/" test-name)))
+		     (test-configf (conc test-path "/testconfig"))
+		     (testexists   (let loopa ((tries-left 30))
+                                     (cond
+                                      (
+                                       (and (common:file-exists? test-configf)(file-read-access? test-configf))
+                                       #t)
+                                      (
+                                       (common:file-exists? test-configf)
+                                       (debug:print 0 *default-log-port* "WARNING: Cannot read testconfig file: "test-configf)
+                                       #f)
+                                      (
+                                       (and wait-a-minute (> tries-left 0))
+                                       (thread-sleep! 10)
+                                       (debug:print 0 *default-log-port* "WARNING: testconfig file does not exist: "test-configf" will retry in 10 seconds.  Tries left: "tries-left) ;; BB: this fires
+                                       (loopa (sub1 tries-left)))
+                                      (else
+                                       (debug:print 0 *default-log-port* "WARNING: testconfig file does not exist: "test-configf) ;; BB: this fires
+                                       #f))))
+		     (tcfg         (if testexists
+				       (configf:read-config test-configf #f system-allowed
+						    environ-patt: (if system-allowed
+								      "pre-launch-env-vars"
+								      #f))
+				       #f)))
+		(if (and tcfg cache-file) (hash-table-set! tcfg "have fulldata" #t)) ;; mark this as fully read data
+		(if tcfg (hash-table-set! *testconfigs* test-full-name tcfg))
+		(if (and testexists
+			 cache-file
+			 (file-write-access? cache-path)
+			 allow-write-cache)
+		    (let ((tpath (conc cache-path "/.testconfig")))
+		      (debug:print-info 1 *default-log-port* "Caching testconfig for " test-name " in " tpath)
+                      (if (and tcfg (not (common:in-running-test?)))
+                          (configf:write-alist tcfg tpath))))
+		tcfg))))))
+  
+;; sort tests by priority and waiton
+;; Move test specific stuff to a test unit FIXME one of these days
+(define (tests:sort-by-priority-and-waiton test-records)
+  (if (eq? (hash-table-size test-records) 0)
+      '()
+      (let* ((mungepriority (lambda (priority)
+			      (if priority
+				  (let ((tmp (any->number priority)))
+				    (if tmp tmp (begin (debug:print-error 0 *default-log-port* "bad priority value " priority ", using 0") 0)))
+				  0)))
+	     (all-tests      (hash-table-keys test-records))
+	     (all-waited-on  (let loop ((hed (car all-tests))
+					(tal (cdr all-tests))
+					(res '()))
+			       (let* ((trec    (hash-table-ref test-records hed))
+				      (waitons (or (tests:testqueue-get-waitons trec) '())))
+				 (if (null? tal)
+				     (append res waitons)
+				     (loop (car tal)(cdr tal)(append res waitons))))))
+	     (sort-fn1 
+	      (lambda (a b)
+		(let* ((a-record   (hash-table-ref test-records a))
+		       (b-record   (hash-table-ref test-records b))
+		       (a-waitons  (or (tests:testqueue-get-waitons a-record) '()))
+		       (b-waitons  (or (tests:testqueue-get-waitons b-record) '()))
+		       (a-config   (tests:testqueue-get-testconfig  a-record))
+		       (b-config   (tests:testqueue-get-testconfig  b-record))
+		       (a-raw-pri  (configf:lookup a-config "requirements" "priority"))
+		       (b-raw-pri  (configf:lookup b-config "requirements" "priority"))
+		       (a-priority (mungepriority a-raw-pri))
+		       (b-priority (mungepriority b-raw-pri)))
+		  (tests:testqueue-set-priority! a-record a-priority)
+		  (tests:testqueue-set-priority! b-record b-priority)
+		  ;; (debug:print 0 *default-log-port* "a=" a ", b=" b ", a-waitons=" a-waitons ", b-waitons=" b-waitons)
+		  (cond
+		   ;; is 
+		   ((member a b-waitons)          ;; is b waiting on a?
+		    ;; (debug:print 0 *default-log-port* "case1")
+		    #t)
+		   ((member b a-waitons)          ;; is a waiting on b?
+		    ;; (debug:print 0 *default-log-port* "case2")
+		    #f)
+		   ((and (not (null? a-waitons))  ;; both have waitons - do not disturb
+			 (not (null? b-waitons)))
+		    ;; (debug:print 0 *default-log-port* "case2.1")
+		    #t)
+		   ((and (null? a-waitons)        ;; no waitons for a but b has waitons
+			 (not (null? b-waitons)))
+		    ;; (debug:print 0 *default-log-port* "case3")
+		    #f)
+		   ((and (not (null? a-waitons))  ;; a has waitons but b does not
+			 (null? b-waitons)) 
+		    ;; (debug:print 0 *default-log-port* "case4")
+		    #t)
+		   ((not (eq? a-priority b-priority)) ;; use
+		    (> a-priority b-priority))
+		   (else
+		    ;; (debug:print 0 *default-log-port* "case5")
+		    (string>? a b))))))
+	     
+	     (sort-fn2
+	      (lambda (a b)
+		(> (mungepriority (tests:testqueue-get-priority (hash-table-ref test-records a)))
+		   (mungepriority (tests:testqueue-get-priority (hash-table-ref test-records b)))))))
+	;; (let ((dot-res (tests:run-dot (tests:tests->dot test-records) "plain")))
+	;;   (debug:print "dot-res=" dot-res))
+	;; (let ((data (map cdr (filter
+	;;     		  (lambda (x)(equal? "node" (car x)))
+	;;     		  (map string-split (tests:easy-dot test-records "plain"))))))
+	;;   (map car (sort data (lambda (a b)
+	;;     		    (> (string->number (caddr a))(string->number (caddr b)))))))
+	;; ))
+	(sort all-tests sort-fn1)))) ;; avoid dealing with deleted tests, look at the hash table
+
+(define (tests:easy-dot test-records outtype)
+  (let-values (((fd temp-path) (file-mkstemp (conc "/tmp/" (current-user-name) ".XXXXXX"))))
+    (let ((all-testnames (hash-table-keys test-records))
+	  (temp-port     (open-output-file* fd)))
+      ;; (format temp-port "This file is ~A.~%" temp-path)
+      (format temp-port "digraph tests {\n")
+      (format temp-port "  size=4,8\n")
+      ;; (format temp-port "   splines=none\n")
+      (for-each
+       (lambda (testname)
+	 (let* ((testrec (hash-table-ref test-records testname))
+		(waitons (or (tests:testqueue-get-waitons testrec) '())))
+	   (for-each
+	    (lambda (waiton)
+	      (format temp-port (conc "   " waiton " -> " testname " [splines=ortho]\n")))
+	    waitons)))
+       all-testnames)
+      (format temp-port "}\n")
+      (close-output-port temp-port)
+      (with-input-from-pipe
+       (conc "env -i PATH=$PATH dot -T" outtype " < " temp-path)
+       (lambda ()
+	 (let ((res (read-lines)))
+	   ;; (delete-file temp-path)
+	   res))))))
+
+(define (tests:write-dot-file test-records fname sizex sizey)
+  (if (file-write-access? (pathname-directory fname))
+      (with-output-to-file fname
+	(lambda ()
+	  (map print (tests:tests->dot test-records sizex sizey))))))
+
+(define (tests:tests->dot test-records sizex sizey)
+  (let ((all-testnames (hash-table-keys test-records)))
+    (if (null? all-testnames)
+	'()
+	(let loop ((hed (car all-testnames))
+		   (tal (cdr all-testnames))
+		   (res (list "digraph tests {"
+			      (conc " size=\"" (or sizex 11) "," (or sizey 11) "\";")
+			      " ratio=0.95;"
+			      )))
+	  (let* ((testrec (hash-table-ref test-records hed))
+		 (waitons (or (tests:testqueue-get-waitons testrec) '()))
+		 (newres  (append res
+				  (if (null? waitons)
+				      (list (conc "   \"" hed "\" [shape=box];"))
+				      (map (lambda (waiton)
+					     (conc "   \"" waiton "\" -> \"" hed "\" [shape=box];"))
+					   waitons)
+				      ))))
+	    (if (null? tal)
+		(append newres (list "}"))
+		(loop (car tal)(cdr tal) newres)
+		))))))
+
+;; (tests:run-dot (list "digraph tests {" "a -> b" "}") "plain")
+
+(define (tests:run-dot indat outtype) ;; outtype is plain, fig, dot, etc. http://www.graphviz.org/content/output-formats
+  (let-values (((inp oup pid)(process "env -i PATH=$PATH dot" (list "-T" outtype))))
+    (with-output-to-port oup
+      (lambda ()
+	(map print indat)))
+    (close-output-port oup)
+    (let ((res (with-input-from-port inp
+		 (lambda ()
+		   (read-lines)))))
+      (close-input-port inp)
+      res)))
+
+;; read data from tmp file or create if not exists
+;; if exists regen in background
 ;;
-(define (tests:lookup-itemmap itemmaps testname)
-  (let ((best-matches (filter (lambda (itemmap)
-				(tests:match (car itemmap) testname #f))
-			      itemmaps)))
-    (if (null? best-matches)
-	#f
-	(let ((res (car best-matches)))
-	  ;; (debug:print 0 *default-log-port* "res=" res)
-	  (cond
-	   ((string? res) res) ;;; FIX THE ROOT CAUSE HERE ....
-	   ((null? res)   #f)
-	   ((string? (cdr res)) (cdr res))  ;; it is a pair
-	   ((string? (cadr res))(cadr res)) ;; it is a list
-	   (else cadr res))))))
-
-(define (mt:lazy-read-test-config test-name)
-  (let ((tconf (hash-table-ref/default *testconfigs* test-name #f)))
-    (if tconf
-	tconf
-	(let ((test-dirs (tests:get-tests-search-path *configdat*)))
-	  (let loop ((hed (car test-dirs))
-		     (tal (cdr test-dirs)))
-	    ;; Setting MT_LINKTREE here is almost certainly unnecessary. 
-	    (let ((tconfig-file (conc hed "/" test-name "/testconfig")))
-	      (if (and (common:file-exists? tconfig-file)
-		       (file-read-access? tconfig-file))
-		  (let ((link-tree-path (common:get-linktree)) ;; (configf:lookup *configdat* "setup" "linktree"))
-			(old-link-tree  (get-environment-variable "MT_LINKTREE")))
-		    (if link-tree-path (setenv "MT_LINKTREE" link-tree-path))
-		    (let ((newtcfg (configf:read-config tconfig-file #f #f))) ;; NOTE: Does NOT run [system ...]
-		      (hash-table-set! *testconfigs* test-name newtcfg)
-		      (if old-link-tree 
-			  (setenv "MT_LINKTREE" old-link-tree)
-			  (unsetenv "MT_LINKTREE"))
-		      newtcfg))
-		  (if (null? tal)
-		      (begin
-			(debug:print-error 0 *default-log-port* "No readable testconfig found for " test-name)
-			#f)
-		      (loop (car tal)(cdr tal))))))))))
+(define (tests:lazy-dot testrecords  outtype sizex sizey)
+  (let ((dfile (conc "/tmp/." (current-user-name) "-" (server:mk-signature) ".dot"))
+	(fname (conc "/tmp/." (current-user-name) "-" (server:mk-signature) ".dotdat")))
+    (tests:write-dot-file testrecords dfile sizex sizey)
+    (if (common:file-exists? fname)
+	(let ((res (with-input-from-file fname
+		     (lambda ()
+		       (read-lines)))))
+	  (system (conc "env -i PATH=$PATH dot -T " outtype " < " dfile " > " fname "&"))
+	  res)
+	(begin
+	  (system (conc "env -i PATH=$PATH dot -T " outtype " < " dfile " > " fname))
+	  (with-input-from-file fname
+	    (lambda ()
+	      (read-lines)))))))
+	  
+
+;;======================================================================
+;; refactoring this block into tests:get-full-data from line 263 of runs.scm
+;;======================================================================
+;; hed is the test name
+;; test-records is a hash of test-name => test record
+(define (tests:get-full-data test-names test-records required-tests all-tests-registry)
+  (if (not (null? test-names))
+      (let loop ((hed (car test-names))
+		 (tal (cdr test-names)))         ;; 'return-procs tells the config reader to prep running system but return a proc
+	(debug:print-info 4 *default-log-port* "hed=" hed " at top of loop")
+        ;; don't know item-path at this time, let the testconfig get the top level testconfig
+	(let* ((config  (tests:get-testconfig hed #f all-tests-registry 'return-procs))
+	       (waitons (let ((instr (if config 
+					 (configf:lookup config "requirements" "waiton")
+					 (begin ;; No config means this is a non-existant test
+					   (debug:print-error 0 *default-log-port* "non-existent required test \"" hed "\", grep through your testconfigs to find and remove or create the test. Discarding and continuing.")
+					     ""))))
+			  (debug:print-info 8 *default-log-port* "waitons string is " instr)
+			  (string-split (cond
+					 ((procedure? instr)
+					  (let ((res (instr)))
+					    (debug:print-info 8 *default-log-port* "waiton procedure results in string " res " for test " hed)
+					    res))
+					 ((string? instr)     instr)
+					 (else 
+					  ;; NOTE: This is actually the case of *no* waitons! ;; (debug:print-error 0 *default-log-port* "something went wrong in processing waitons for test " hed)
+					  ""))))))
+	  (if (not config) ;; this is a non-existant test called in a waiton. 
+	      (if (null? tal)
+		  test-records
+		  (loop (car tal)(cdr tal)))
+	      (begin
+		(debug:print-info 8 *default-log-port* "waitons: " waitons)
+		;; check for hed in waitons => this would be circular, remove it and issue an
+		;; error
+		(if (member hed waitons)
+		    (begin
+		      (debug:print-error 0 *default-log-port* "test " hed " has listed itself as a waiton, please correct this!")
+		      (set! waitons (filter (lambda (x)(not (equal? x hed))) waitons))))
+		
+		;; (items   (items:get-items-from-config config)))
+		(if (not (hash-table-ref/default test-records hed #f))
+		    (hash-table-set! test-records
+				     hed (vector hed     ;; 0
+						 config  ;; 1
+						 waitons ;; 2
+						 (configf:lookup config "requirements" "priority")     ;; priority 3
+						 (let ((items      (hash-table-ref/default config "items" #f)) ;; items 4
+						       (itemstable (hash-table-ref/default config "itemstable" #f))) 
+						   ;; if either items or items table is a proc return it so test running
+						   ;; process can know to call items:get-items-from-config
+						   ;; if either is a list and none is a proc go ahead and call get-items
+						   ;; otherwise return #f - this is not an iterated test
+						   (cond
+						    ((procedure? items)      
+						     (debug:print-info 4 *default-log-port* "items is a procedure, will calc later")
+						     items)            ;; calc later
+						    ((procedure? itemstable)
+						     (debug:print-info 4 *default-log-port* "itemstable is a procedure, will calc later")
+						     itemstable)       ;; calc later
+						    ((filter (lambda (x)
+							       (let ((val (car x)))
+								 (if (procedure? val) val #f)))
+							     (append (if (list? items) items '())
+								     (if (list? itemstable) itemstable '())))
+						     'have-procedure)
+						    ((or (list? items)(list? itemstable)) ;; calc now
+						     (debug:print-info 4 *default-log-port* "items and itemstable are lists, calc now\n"
+								       "    items: " items " itemstable: " itemstable)
+						     (items:get-items-from-config config))
+						    (else #f)))                           ;; not iterated
+						 #f      ;; itemsdat 5
+						 #f      ;; spare - used for item-path
+						 )))
+		(for-each 
+		 (lambda (waiton)
+		   (if (and waiton (not (member waiton test-names)))
+		       (begin
+			 (set! required-tests (cons waiton required-tests))
+			 (set! test-names (cons waiton test-names))))) ;; was an append, now a cons
+		 waitons)
+		(let ((remtests (delete-duplicates (append waitons tal))))
+		  (if (not (null? remtests))
+		      (loop (car remtests)(cdr remtests))
+		      test-records))))))))
+
+;;======================================================================
+;; A R C H I V I N G
+;;======================================================================
+
+(define (test:archive db test-id)
+  #f)
+
+(define (test:archive-tests db keynames target)
+  #f)
 
 )