Index: Makefile
==================================================================
--- Makefile
+++ Makefile
@@ -234,12 +234,13 @@
 	$(INSTALL) dboard $(PREFIX)/bin/.$(ARCHSTR)/dboard
 
 install : $(PREFIX)/bin/.$(ARCHSTR) $(PREFIX)/bin/.$(ARCHSTR)/mtest $(PREFIX)/bin/megatest \
           $(PREFIX)/bin/.$(ARCHSTR)/dboard $(PREFIX)/bin/dashboard $(HELPERS) $(PREFIX)/bin/nbfake \
 	  $(PREFIX)/bin/nbfind $(PREFIX)/bin/loadrunner $(PREFIX)/bin/viewscreen $(PREFIX)/bin/mt_xterm \
-	  $(PREFIX)/share/docs/megatest_manual.html $(PREFIX)/bin/remrun $(PREFIX)/bin/mtutil
-#          $(PREFIX)/share/db/mt-pg.sql $(PREFIX)/bin/.$(ARCHSTR)/ndboard  $(PREFIX)/bin/tcmt
+	  $(PREFIX)/share/docs/megatest_manual.html $(PREFIX)/bin/remrun $(PREFIX)/bin/mtutil \
+	  $(PREFIX)/bin/tcmt $(PREFIX)/share/db/mt-pg.sql
+#         $(PREFIX)/bin/.$(ARCHSTR)/ndboard
 
 # $(PREFIX)/bin/newdashboard
 
 $(PREFIX)/bin/.$(ARCHSTR) : 
 	mkdir -p $(PREFIX)/bin/.$(ARCHSTR)

DELETED emergency-patch-1.scm
Index: emergency-patch-1.scm
==================================================================
--- emergency-patch-1.scm
+++ /dev/null
@@ -1,203 +0,0 @@
-
-
-;; These are called by the server on recipt of /api calls
-;;    - keep it simple, only return the actual result of the call, i.e. no meta info here
-;;
-;;    - returns #( flag result )
-;;
-(define (api:execute-requests dbstruct dat)
-  (handle-exceptions
-   exn
-   (let ((call-chain (get-call-chain)))
-     (debug:print 0 *default-log-port* "WARNING: api:execute-requests received an exception from peer, dat=" dat)
-     (print-call-chain (current-error-port))
-     (debug:print 0 *default-log-port* " message: "  ((condition-property-accessor 'exn 'message) exn))       
-     (vector #f (vector exn call-chain dat))) ;; return some stuff for debug if an exception happens
-   (cond
-    ((not (vector? dat))                    ;; it is an error to not receive a vector
-     (vector #f (vector #f "remote must be called with a vector")))
-    ((> *api-process-request-count* 20) ;; 20)
-     (debug:print 0 *default-log-port* "WARNING: api:execute-requests received an overloaded message.")
-     (set! *server-overloaded* #t)
-     (vector #f (vector #f 'overloaded))) ;; the inner vector is what gets returned. nope, don't know why. please refactor!
-    (else  
-     (let* ((cmd-in            (vector-ref dat 0))
-            (cmd               (if (symbol? cmd-in)
-				   cmd-in
-				   (string->symbol cmd-in)))
-            (params            (vector-ref dat 1))
-            (start-t           (current-milliseconds))
-            (readonly-mode     (dbr:dbstruct-read-only dbstruct))
-            (readonly-command  (member cmd api:read-only-queries))
-            (writecmd-in-readonly-mode (and readonly-mode (not readonly-command)))
-            (res    
-             (if writecmd-in-readonly-mode
-                 (conc "attempt to run write command "cmd" on a read-only database")
-                 (case cmd
-                   ;;===============================================
-                   ;; READ/WRITE QUERIES
-                   ;;===============================================
-
-                   ((get-keys-write)                        (db:get-keys dbstruct)) ;; force a dummy "write" query to force server; for debug in -repl
-                   
-                   ;; SERVERS
-                   ((start-server)                    (apply server:kind-run params))
-                   ((kill-server)                     (set! *server-run* #f))
-
-                   ;; TESTS
-
-                   ;;((test-set-state-status-by-id)     (apply mt:test-set-state-status-by-id dbstruct params))
-                   ;;BB - commented out above because it was calling below, eventually, incorrectly (dbstruct passed to mt:test-set-state-status-by-id, which previosly did more, but now only passes thru to db:set-state-status-and-roll-up-items.
-                   ((test-set-state-status-by-id)
-
-                    ;; (define (db:set-state-status-and-roll-up-items dbstruct run-id test-name item-path state status comment)
-                    (db:set-state-status-and-roll-up-items
-                     dbstruct
-                     (list-ref params 0) ; run-id
-                     (list-ref params 1) ; test-name
-                     #f                  ; item-path
-                     (list-ref params 2) ; state
-                     (list-ref params 3) ; status
-                     (list-ref params 4) ; comment
-                     ))
-                   
-                   ((delete-test-records)             (apply db:delete-test-records dbstruct params))
-                   ((delete-old-deleted-test-records) (apply db:delete-old-deleted-test-records dbstruct params))
-                   ((test-set-state-status)           (apply db:test-set-state-status dbstruct params))
-                   ((test-set-top-process-pid)        (apply db:test-set-top-process-pid dbstruct params))
-                   ((set-state-status-and-roll-up-items) (apply db:set-state-status-and-roll-up-items dbstruct params))
-                   ((top-test-set-per-pf-counts)      (apply db:top-test-set-per-pf-counts dbstruct params))
-                   ((test-set-archive-block-id)       (apply db:test-set-archive-block-id dbstruct params))
-
-                   ;; RUNS
-                   ((register-run)                 (apply db:register-run dbstruct params))
-                   ((set-tests-state-status)       (apply db:set-tests-state-status dbstruct params))
-                   ((delete-run)                   (apply db:delete-run dbstruct params))
-                   ((lock/unlock-run)              (apply db:lock/unlock-run dbstruct params))
-                   ((update-run-event_time)        (apply db:update-run-event_time dbstruct params))
-                   ((update-run-stats)             (apply db:update-run-stats dbstruct params))
-                   ((set-var)                      (apply db:set-var dbstruct params))
-                   ((del-var)                      (apply db:del-var dbstruct params))
-
-                   ;; STEPS
-                   ((teststep-set-status!)         (apply db:teststep-set-status! dbstruct params))
-
-                   ;; TEST DATA
-                   ((test-data-rollup)             (apply db:test-data-rollup dbstruct params))
-                   ((csv->test-data)               (apply db:csv->test-data dbstruct params))
-
-                   ;; MISC
-                   ((sync-inmem->db)               (let ((run-id (car params)))
-                                                     (db:sync-touched dbstruct run-id force-sync: #t)))
-                   ((mark-incomplete)              (apply db:find-and-mark-incomplete dbstruct params))
-
-                   ;; TESTMETA
-                   ((testmeta-add-record)       (apply db:testmeta-add-record dbstruct params))
-                   ((testmeta-update-field)     (apply db:testmeta-update-field dbstruct params))
-                   ((get-tests-tags)            (db:get-tests-tags dbstruct))
-
-                   ;; TASKS
-                   ((tasks-add)                 (apply tasks:add dbstruct params))   
-                   ((tasks-set-state-given-param-key) (apply tasks:set-state-given-param-key dbstruct params))
-                   ((tasks-get-last)            (apply tasks:get-last dbstruct params))
-
-		   ;; NO SYNC DB
-		   ((no-sync-set)               (apply db:no-sync-set         *no-sync-db* params))
-		   ((no-sync-get/default)       (apply db:no-sync-get/default *no-sync-db* params))
-		   ((no-sync-del!)              (apply db:no-sync-del!        *no-sync-db* params))
-		 
-                   ;; ARCHIVES
-                   ;; ((archive-get-allocations)   
-                   ((archive-register-disk)     (apply db:archive-register-disk dbstruct params))
-                   ((archive-register-block-name)(apply db:archive-register-block-name dbstruct params))
-                   ((archive-allocate-testsuite/area-to-block)(apply db:archive-allocate-testsuite/area-to-block dbstruct block-id testsuite-name areakey))
-
-                   ;;======================================================================
-                   ;; READ ONLY QUERIES
-                   ;;======================================================================
-
-                   ;; KEYS
-                   ((get-key-val-pairs)               (apply db:get-key-val-pairs dbstruct params))
-                   ((get-keys)                        (db:get-keys dbstruct))
-                   ((get-key-vals)                    (apply db:get-key-vals dbstruct params))
-                   ((get-target)                      (apply db:get-target dbstruct params))
-                   ((get-targets)                     (db:get-targets dbstruct))
-
-                   ;; ARCHIVES
-                   ((test-get-archive-block-info)     (apply db:test-get-archive-block-info dbstruct params))
-                   
-                   ;; TESTS
-                   ((test-toplevel-num-items)         (apply db:test-toplevel-num-items dbstruct params))
-                   ((get-test-info-by-id)	       (apply db:get-test-info-by-id dbstruct params))
-                   ((test-get-rundir-from-test-id)    (apply db:test-get-rundir-from-test-id dbstruct params))
-                   ((get-count-tests-running-for-testname) (apply db:get-count-tests-running-for-testname dbstruct params))
-                   ((get-count-tests-running)         (apply db:get-count-tests-running dbstruct params))
-                   ((get-count-tests-running-in-jobgroup) (apply db:get-count-tests-running-in-jobgroup dbstruct params))
-                   ;; ((delete-test-step-records)        (apply db:delete-test-step-records dbstruct params))
-                   ((get-previous-test-run-record)    (apply db:get-previous-test-run-record dbstruct params))
-                   ((get-matching-previous-test-run-records)(apply db:get-matching-previous-test-run-records dbstruct params))
-                   ((test-get-logfile-info)           (apply db:test-get-logfile-info dbstruct params))
-                   ((test-get-records-for-index-file)  (apply db:test-get-records-for-index-file dbstruct params))
-                   ((get-testinfo-state-status)       (apply db:get-testinfo-state-status dbstruct params))
-                   ((test-get-top-process-pid)        (apply db:test-get-top-process-pid dbstruct params))
-                   ((test-get-paths-matching-keynames-target-new) (apply db:test-get-paths-matching-keynames-target-new dbstruct params))
-                   ((get-prereqs-not-met)             (apply db:get-prereqs-not-met dbstruct params))
-                   ((get-count-tests-running-for-run-id) (apply db:get-count-tests-running-for-run-id dbstruct params))
-                   ((synchash-get)                    (apply synchash:server-get dbstruct params))
-                   ((get-raw-run-stats)               (apply db:get-raw-run-stats dbstruct params))
-
-                   ;; RUNS
-                   ((get-run-info)                 (apply db:get-run-info dbstruct params))
-                   ((get-run-status)               (apply db:get-run-status dbstruct params))
-                   ((set-run-status)               (apply db:set-run-status dbstruct params))
-                   ((get-tests-for-run)            (apply db:get-tests-for-run dbstruct params))
-                   ((get-test-id)                  (apply db:get-test-id dbstruct params))
-                   ((get-tests-for-run-mindata)    (apply db:get-tests-for-run-mindata dbstruct params))
-                   ((get-runs)                     (apply db:get-runs dbstruct params))
-                   ((get-num-runs)                 (apply db:get-num-runs dbstruct params))
-                   ((get-all-run-ids)              (db:get-all-run-ids dbstruct))
-                   ((get-prev-run-ids)             (apply db:get-prev-run-ids dbstruct params))
-                   ((get-run-ids-matching-target)  (apply db:get-run-ids-matching-target dbstruct params))
-                   ((get-runs-by-patt)             (apply db:get-runs-by-patt dbstruct params))
-                   ((get-run-name-from-id)         (apply db:get-run-name-from-id dbstruct params))
-                   ((get-main-run-stats)           (apply db:get-main-run-stats dbstruct params))
-                   ((get-var)                      (apply db:get-var dbstruct params))
-                   ((get-run-stats)                (apply db:get-run-stats dbstruct params))
-
-                   ;; STEPS
-                   ((get-steps-data)               (apply db:get-steps-data dbstruct params))
-                   ((get-steps-for-test)           (apply db:get-steps-for-test dbstruct params))
-
-                   ;; TEST DATA
-                   ((read-test-data)               (apply db:read-test-data dbstruct params))
-                   ((read-test-data*)              (apply db:read-test-data* dbstruct params))
-
-                   ;; MISC
-                   ((get-latest-host-load)         (apply db:get-latest-host-load dbstruct params))
-                   ((have-incompletes?)            (apply db:have-incompletes? dbstruct params))
-                   ((login)                        (apply db:login dbstruct params))
-                   ((general-call)                 (let ((stmtname   (car params))
-                                                         (run-id     (cadr params))
-                                                         (realparams (cddr params)))
-                                                     (db:general-call dbstruct stmtname realparams)))
-                   ((sdb-qry)                      (apply sdb:qry params))
-                   ((ping)                         (current-process-id))
-		   ((get-changed-record-ids)       (apply db:get-changed-record-ids dbstruct params))
-		   
-                   ;; TESTMETA
-                   ((testmeta-get-record)       (apply db:testmeta-get-record dbstruct params))
-
-                   ;; TASKS 
-                   ((find-task-queue-records)   (apply tasks:find-task-queue-records dbstruct params))
-		   (else
-		    (debug:print 0 *default-log-port* "ERROR: bad api call " cmd)
-		    (conc "ERROR: BAD api call " cmd))))))
-       
-       ;; save all stats
-       (let ((delta-t (- (current-milliseconds)
-			 start-t)))
-	 (hash-table-set! *db-api-call-time* cmd
-			  (cons delta-t (hash-table-ref/default *db-api-call-time* cmd '()))))
-       (if writecmd-in-readonly-mode
-	   (vector #f res)
-           (vector #t res)))))))

DELETED emergency-patch-2.scm
Index: emergency-patch-2.scm
==================================================================
--- emergency-patch-2.scm
+++ /dev/null
@@ -1,311 +0,0 @@
-(include "common_records.scm")
-(include "key_records.scm")
-(include "db_records.scm")
-(include "run_records.scm")
-(include "test_records.scm")
-
-(define (common:wait-for-cpuload maxload numcpus waitdelay #!key (count 1000) (msg #f)(remote-host #f))
-  (let* ((loadavg (common:get-cpu-load remote-host))
-	 (first   (car loadavg))
-	 (next    (cadr loadavg))
-	 (adjload (* maxload numcpus))
-	 (loadjmp (- first next)))
-    (cond
-     ((and (> first adjload)
-	   (> count 0))
-      (debug:print-info 0 *default-log-port* "waiting " waitdelay " seconds due to load " first " exceeding max of " adjload " " (if msg msg ""))
-      (thread-sleep! waitdelay)
-      (common:wait-for-cpuload maxload numcpus waitdelay count: (- count 1)))
-     ((and (> loadjmp numcpus)
-	   (> count 0))
-      (debug:print-info 0 *default-log-port* "waiting " waitdelay " seconds due to load jump " loadjmp " > numcpus " numcpus (if msg msg ""))
-      (thread-sleep! waitdelay)
-      (common:wait-for-cpuload maxload numcpus waitdelay count: (- count 1))))))
-
-(define (common:wait-for-homehost-load maxload msg)
-  (let* ((hh-dat (if (common:on-homehost?) ;; if we are on the homehost then pass in #f so the calls are local.
-                     #f
-                     (common:get-homehost)))
-         (hh     (if hh-dat (car hh-dat) #f))
-         (numcpus (common:get-num-cpus hh)))
-    (common:wait-for-normalized-load maxload msg: msg remote-host: hh)))
-
-;; wait for normalized cpu load to drop below maxload
-;;
-(define (common:wait-for-normalized-load maxload #!key (msg #f)(remote-host #f))
-  (let ((num-cpus (common:get-num-cpus remote-host)))
-    (common:wait-for-cpuload maxload num-cpus 15 msg: msg remote-host: remote-host)))
-
-;;  hed tal reg reruns reglen regfull test-record runname test-name item-path jobgroup max-concurrent-jobs run-id waitons item-path testmode test-patts required-tests test-registry registry-mutex flags keyvals run-info newtal all-tests-registry itemmaps)
-(define (runs:process-expanded-tests runsdat testdat)
-  ;; unroll the contents of runsdat and testdat (due to ongoing refactoring).
-  (let* ((hed                    (runs:testdat-hed testdat))
-	 (tal                    (runs:testdat-tal testdat))
-	 (reg                    (runs:testdat-reg testdat))
-	 (reruns                 (runs:testdat-reruns testdat))
-	 (test-name              (runs:testdat-test-name testdat))
-	 (item-path              (runs:testdat-item-path testdat))
-	 (jobgroup               (runs:testdat-jobgroup testdat))
-	 (waitons                (runs:testdat-waitons testdat))
-	 (item-path              (runs:testdat-item-path testdat))
-	 (testmode               (runs:testdat-testmode testdat))
-	 (newtal                 (runs:testdat-newtal testdat))
-	 (itemmaps               (runs:testdat-itemmaps testdat))
-	 (test-record            (runs:testdat-test-record testdat))
-	 (prereqs-not-met        (runs:testdat-prereqs-not-met testdat))
-
-	 (reglen                 (runs:dat-reglen runsdat))
-	 (regfull                (runs:dat-regfull runsdat))
-	 (runname                (runs:dat-runname runsdat))
-	 (max-concurrent-jobs    (runs:dat-max-concurrent-jobs runsdat))
-	 (run-id                 (runs:dat-run-id runsdat))
-	 (test-patts             (runs:dat-test-patts runsdat))
-	 (required-tests         (runs:dat-required-tests runsdat))
-	 (test-registry          (runs:dat-test-registry runsdat))
-	 (registry-mutex         (runs:dat-registry-mutex runsdat))
-	 (flags                  (runs:dat-flags runsdat))
-	 (keyvals                (runs:dat-keyvals runsdat))
-	 (run-info               (runs:dat-run-info runsdat))
-	 (all-tests-registry     (runs:dat-all-tests-registry runsdat))
-	 (run-limits-info        (runs:dat-can-run-more-tests runsdat))
-	 ;; (runs:can-run-more-tests run-id jobgroup max-concurrent-jobs)) ;; look at the test jobgroup and tot jobs running
-	 (have-resources         (car run-limits-info))
-	 (num-running            (list-ref run-limits-info 1))
-	 (num-running-in-jobgroup(list-ref run-limits-info 2)) 
-	 (max-concurrent-jobs    (list-ref run-limits-info 3))
-	 (job-group-limit        (list-ref run-limits-info 4))
-	 ;; (prereqs-not-met        (rmt:get-prereqs-not-met run-id waitons hed item-path mode: testmode itemmaps: itemmaps))
-	 ;; (prereqs-not-met         (mt:lazy-get-prereqs-not-met run-id waitons item-path mode: testmode itemmap: itemmap))
-	 (fails                  (if (list? prereqs-not-met)
-				      (runs:calc-fails prereqs-not-met)
-				      (begin
-					(debug:print-error 0 *default-log-port* "prereqs-not-met is not a list! " prereqs-not-met)
-					'())))
-	 (non-completed           (filter (lambda (x)             ;; remove hed from not completed list, duh, of course it is not completed!
-					    (not (equal? x hed)))
-					  (runs:calc-not-completed prereqs-not-met)))
-	 (loop-list               (list hed tal reg reruns))
-	 ;; configure the load runner
-	 (numcpus                 (common:get-num-cpus #f))
-	 (maxload                 (string->number (or (configf:lookup *configdat* "jobtools" "maxload") "3.0")))         ;; use a non-number string to disable
-         (maxhomehostload         (string->number (or (configf:lookup *configdat* "jobtools" "maxhomehostload") "1.2"))) ;; use a non-number string to disable
-         (waitdelay               (string->number (or (configf:lookup *configdat* "jobtools" "waitdelay") "60"))))
-    (debug:print-info 4 *default-log-port* "have-resources: " have-resources " prereqs-not-met: (" 
-		      (string-intersperse 
-		       (map (lambda (t)
-			      (if (vector? t)
-				  (conc (db:test-get-state t) "/" (db:test-get-status t))
-				  (conc " WARNING: t is not a vector=" t )))
-			    prereqs-not-met)
-		       ", ") ") fails: " fails
-		       "\nregistered? " (hash-table-ref/default test-registry (db:test-make-full-name test-name item-path) #f))
-			    
-
-    
-    (if (and (not (null? prereqs-not-met))
-	     (runs:lownoise (conc "waiting on tests " prereqs-not-met hed) 60))
-	(debug:print-info 2 *default-log-port* "waiting on tests; " (string-intersperse (runs:mixed-list-testname-and-testrec->list-of-strings prereqs-not-met) ", ")))
-
-    ;; Don't know at this time if the test have been launched at some time in the past
-    ;; i.e. is this a re-launch?
-    (debug:print-info 4 *default-log-port* "run-limits-info = " run-limits-info)
-    
-    (cond
-     
-     ;; Check item path against item-patts, 
-     ;;
-     ((not (tests:match test-patts (tests:testqueue-get-testname test-record) item-path required: required-tests)) ;; This test/itempath is not to be run
-      ;; else the run is stuck, temporarily or permanently
-      ;; but should check if it is due to lack of resources vs. prerequisites
-      (debug:print-info 1 *default-log-port* "Skipping " (tests:testqueue-get-testname test-record) " " item-path " as it doesn't match " test-patts)
-      (if (or (not (null? tal))(not (null? reg)))
-	  (list (runs:queue-next-hed tal reg reglen regfull)
-		(runs:queue-next-tal tal reg reglen regfull)
-		(runs:queue-next-reg tal reg reglen regfull)
-		reruns)
-	  #f))
-     
-     ;; Register tests 
-     ;;
-     ((not (hash-table-ref/default test-registry (db:test-make-full-name test-name item-path) #f))
-      (debug:print-info 4 *default-log-port* "Pre-registering test " test-name "/" item-path " to create placeholder" )
-      ;; always do firm registration now in v1.60 and greater ;; (eq? *transport-type* 'fs) ;; no point in parallel registration if use fs
-      (let register-loop ((numtries 15))
-	(rmt:register-test run-id test-name item-path)
-	(if (rmt:get-test-id run-id test-name item-path)
-	    (hash-table-set! test-registry (db:test-make-full-name test-name item-path) 'done)
-	    (if (> numtries 0)
-		(begin
-		  (thread-sleep! 0.5)
-		  (register-loop (- numtries 1)))
-		(debug:print-error 0 *default-log-port* "failed to register test " (db:test-make-full-name test-name item-path)))))
-      (if (not (eq? (hash-table-ref/default test-registry (db:test-make-full-name test-name "") #f) 'done))
-	  (begin
-	    (rmt:register-test run-id test-name "")
-	    (if (rmt:get-test-id run-id test-name "")
-		(hash-table-set! test-registry (db:test-make-full-name test-name "") 'done))))
-      (runs:shrink-can-run-more-tests-count runsdat)   ;; DELAY TWEAKER (still needed?)
-      (if (and (null? tal)(null? reg))
-	  (list hed tal (append reg (list hed)) reruns)
-	  (list (runs:queue-next-hed tal reg reglen regfull)
-		(runs:queue-next-tal tal reg reglen regfull)
-		;; NB// Here we are building reg as we register tests
-		;; if regfull we must pop the front item off reg
-		(if regfull
-		    (append (cdr reg) (list hed))
-		    (append reg (list hed)))
-		reruns)))
-     
-     ;; At this point hed test registration must be completed.
-     ;;
-     ((eq? (hash-table-ref/default test-registry (db:test-make-full-name test-name item-path) #f)
-	   'start)
-      (debug:print-info 0 *default-log-port* "Waiting on test registration(s): "
-			(string-intersperse 
-			 (filter (lambda (x)
-				   (eq? (hash-table-ref/default test-registry x #f) 'start))
-				 (hash-table-keys test-registry))
-			 ", "))
-      (thread-sleep! 0.051)
-      (list hed tal reg reruns))
-     
-     ;; If no resources are available just kill time and loop again
-     ;;
-     ((not have-resources) ;; simply try again after waiting a second
-      (if (runs:lownoise "no resources" 60)
-	  (debug:print-info 1 *default-log-port* "no resources to run new tests, waiting ..."))
-      ;; Have gone back and forth on this but db starvation is an issue.
-      ;; wait one second before looking again to run jobs.
-      (thread-sleep! 1)
-      ;; could have done hed tal here but doing car/cdr of newtal to rotate tests
-      (list (car newtal)(cdr newtal) reg reruns))
-     
-     ;; This is the final stage, everything is in place so launch the test
-     ;;
-     ((and have-resources
-	   (or (null? prereqs-not-met)
-	       (and (member 'toplevel testmode) ;;  'toplevel)
-		    (null? non-completed)
-		    (not (member 'exclusive testmode)))))
-      ;; (hash-table-delete! *max-tries-hash* (db:test-make-full-name test-name item-path))
-      ;; we are going to reset all the counters for test retries by setting a new hash table
-      ;; this means they will increment only when nothing can be run
-      (set! *max-tries-hash* (make-hash-table))
-      ;; well, first lets see if cpu load throttling is enabled. If so wait around until the
-      ;; average cpu load is under the threshold before continuing
-      (if maxload ;; only gate if maxload is specified
-          (common:wait-for-cpuload maxload numcpus waitdelay))
-      (if maxhomehostload
-          (common:wait-for-homehost-load maxhomehostload (conc "Waiting for homehost load to drop below normalized value of " maxhomehostload)))
-      
-      (run:test run-id run-info keyvals runname test-record flags #f test-registry all-tests-registry)
-      (runs:incremental-print-results run-id)
-      (hash-table-set! test-registry (db:test-make-full-name test-name item-path) 'running)
-      (runs:shrink-can-run-more-tests-count runsdat)  ;; DELAY TWEAKER (still needed?)
-      ;; (thread-sleep! *global-delta*)
-      (if (or (not (null? tal))(not (null? reg)))
-	  (list (runs:queue-next-hed tal reg reglen regfull)
-		(runs:queue-next-tal tal reg reglen regfull)
-		(runs:queue-next-reg tal reg reglen regfull)
-		reruns)
-	  #f))
-     
-     ;; must be we have unmet prerequisites
-     ;;
-     (else
-      (debug:print 4 *default-log-port* "FAILS: " fails)
-      ;; If one or more of the prereqs-not-met are FAIL then we can issue
-      ;; a message and drop hed from the items to be processed.
-      ;; (runs:mixed-list-testname-and-testrec->list-of-strings prereqs-not-met)
-      (if (and (not (null? prereqs-not-met))
-	       (runs:lownoise (conc "waiting on tests " prereqs-not-met hed) 60))
-	  (debug:print-info 1 *default-log-port* "waiting on tests; " (string-intersperse 
-						    (runs:mixed-list-testname-and-testrec->list-of-strings 
-						     prereqs-not-met) ", ")))
-      (if (or (null? fails)
-	      (member 'toplevel testmode))
-	  (begin
-	    ;; couldn't run, take a breather
-	    (if  (runs:lownoise "Waiting for more work to do..." 60)
-		 (debug:print-info 0 *default-log-port* "Waiting for more work to do..."))
-	    (thread-sleep! 1)
-	    (list (car newtal)(cdr newtal) reg reruns))
-	  ;; the waiton is FAIL so no point in trying to run hed ever again
-	  (if (or (not (null? reg))(not (null? tal)))
-	      (if (vector? hed)
-		  (begin
-		    (debug:print 1 *default-log-port* "WARNING: Dropping test " test-name "/" item-path
-				 " from the launch list as it has prerequistes that are FAIL")
-		    (let ((test-id (rmt:get-test-id run-id hed "")))
-		      (if test-id (mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "PREQ_FAIL" "Failed to run due to failed prerequisites")))
-		    (runs:shrink-can-run-more-tests-count runsdat) ;; DELAY TWEAKER (still needed?)
-		    ;; (thread-sleep! *global-delta*)
-		    ;; This next is for the items
-		    (mt:test-set-state-status-by-testname run-id test-name item-path "NOT_STARTED" "BLOCKED" #f)
-		    (hash-table-set! test-registry (db:test-make-full-name test-name item-path) 'removed)
-		    (list (runs:queue-next-hed tal reg reglen regfull)
-			  (runs:queue-next-tal tal reg reglen regfull)
-			  (runs:queue-next-reg tal reg reglen regfull)
-			  reruns ;; WAS: (cons hed reruns) ;; but that makes no sense?
-			  ))
-		  (let ((nth-try (hash-table-ref/default test-registry hed 0)))
-		    (cond
-		     ((member "RUNNING" (map db:test-get-state prereqs-not-met))
-		      (if (runs:lownoise (conc "possible RUNNING prerequistes " hed) 60)
-			  (debug:print 0 *default-log-port* "WARNING: test " hed " has possible RUNNING prerequisites, don't give up on it yet."))
-		      (thread-sleep! 4)
-		      (list (runs:queue-next-hed newtal reg reglen regfull)
-			    (runs:queue-next-tal newtal reg reglen regfull)
-			    (runs:queue-next-reg newtal reg reglen regfull)
-			    reruns))
-		     ((or (not nth-try)
-			  (and (number? nth-try)
-			       (< nth-try 10)))
-		      (hash-table-set! test-registry hed (if (number? nth-try)
-							     (+ nth-try 1)
-							     0))
-		      (if (runs:lownoise (conc "not removing test " hed) 60)
-			  (debug:print 1 *default-log-port* "WARNING: not removing test " hed " from queue although it may not be runnable due to FAILED prerequisites"))
-		      ;; may not have processed correctly. Could be a race condition in your test implementation? Dropping test " hed) ;;  " as it has prerequistes that are FAIL. (NOTE: hed is not a vector)")
-		      (runs:shrink-can-run-more-tests-count runsdat) ;; DELAY TWEAKER (still needed?)
-		      ;; (list hed tal reg reruns)
-		      ;; (list (car newtal)(cdr newtal) reg reruns)
-		      ;; (hash-table-set! test-registry hed 'removed)
-		      (list (runs:queue-next-hed newtal reg reglen regfull)
-			    (runs:queue-next-tal newtal reg reglen regfull)
-			    (runs:queue-next-reg newtal reg reglen regfull)
-			    reruns))
-		     ((symbol? nth-try)
-		      (if (eq? nth-try 'removed) ;; removed is removed - drop it NOW
-			  (if (null? tal)
-			      #f ;; yes, really
-			      (list (car tal)(cdr tal) reg reruns))
-			  (begin
-			    (if (runs:lownoise (conc "FAILED prerequisites or other issue" hed) 60)
-				(debug:print 0 *default-log-port* "WARNING: test " hed " has FAILED prerequisites or other issue. Internal state " nth-try " will be overridden and we'll retry."))
-			    (mt:test-set-state-status-by-testname run-id test-name item-path "NOT_STARTED" "KEEP_TRYING" #f)
-			    (hash-table-set! test-registry hed 0)
-			    (list (runs:queue-next-hed newtal reg reglen regfull)
-				  (runs:queue-next-tal newtal reg reglen regfull)
-				  (runs:queue-next-reg newtal reg reglen regfull)
-				  reruns))))
-		     (else
-		      (if (runs:lownoise (conc "FAILED prerequitests and we tried" hed) 60)
-			  (debug:print 0 *default-log-port* "WARNING: test " hed " has FAILED prerequitests and we've tried at least 10 times to run it. Giving up now."))
-		      ;; (debug:print 0 *default-log-port* "         prereqs: " prereqs-not-met)
-		      (hash-table-set! test-registry hed 'removed)
-		      (mt:test-set-state-status-by-testname run-id test-name item-path "NOT_STARTED" "TEN_STRIKES" #f)
-		      ;; I'm unclear on if this roll up is needed - it may be the root cause of the "all set to FAIL" bug.
-		      (rmt:set-state-status-and-roll-up-items run-id test-name item-path #f "FAIL" #f) ;; treat as FAIL
-		      (list (if (null? tal)(car newtal)(car tal))
-			    tal
-			    reg
-			    reruns)))))
-	      ;; can't drop this - maybe running? Just keep trying
-	      (let ((runable-tests (runs:runable-tests prereqs-not-met)))
-		(if (null? runable-tests)
-		    #f   ;; I think we are truly done here
-		    (list (runs:queue-next-hed newtal reg reglen regfull)
-			    (runs:queue-next-tal newtal reg reglen regfull)
-			    (runs:queue-next-reg newtal reg reglen regfull)
-			    reruns)))))))))

DELETED emergency-patch-3.scm
Index: emergency-patch-3.scm
==================================================================
--- emergency-patch-3.scm
+++ /dev/null
@@ -1,81 +0,0 @@
- ;; To build patch:
- ;;;;;;;;;;;;;;;;;;;;;;;;;
- ;; ldd /p/foundry/env/pkgs/megatest/1.64/19/bin/.11/mtest
- ;;        linux-vdso.so.1 =>  (0x00002aaaaaaab000)
- ;;        libchicken.so.7 => /p/foundry/env/pkgs/megatest/1.64/chicken-4.10.0//lib/libchicken.so.7 (0x00002aaaaaaad000)
- ;;        libm.so.6 => /lib64/libm.so.6 (0x00002aaaab0a6000)
- ;;        libdl.so.2 => /lib64/libdl.so.2 (0x00002aaaab31f000)
- ;;        libc.so.6 => /lib64/libc.so.6 (0x00002aaaab523000)
- ;;        /lib64/ld-linux-x86-64.so.2 (0x0000555555554000)
- ;;
- ;;  /p/foundry/env/pkgs/megatest/1.64/chicken-4.10.0/bin/csc -s emergency-patch-3.scm
- ;;
-
-
- ;; to test patch:
- ;;;;;;;;;;;;;;;;;;;;;;;;;
- ;; in .megatestrc, add:
- ;; (if (and (> megatest-version 1.64)
- ;;         (< megatest-version 1.6421))
- ;;   (begin
- ;;      (load "/p/foundry/env/pkgs/megatest/1.64/19/share/epatch-1.so")
- ;;      (load "/p/foundry/env/pkgs/megatest/1.64/19/share/epatch-2.so"))) 
- ;;
-
-
- ;; to productize patch:
- ;;;;;;;;;;;;;;;;;;;;;;;;;
- ;; 
-(use directory-utils regex)
-
-(include "common_records.scm")
-(include "key_records.scm")
-(include "db_records.scm")
-(include "run_records.scm")
-(include "test_records.scm")
-
-;; Given a run id start a server process    ### NOTE ### > file 2>&1 
-;; if the run-id is zero and the target-host is set 
-;; try running on that host
-;;   incidental: rotate logs in logs/ dir.
-;;
-(define  (server:run areapath) ;; areapath is *toppath* for a given testsuite area
-  (let* ((curr-host   (get-host-name))
-         ;; (attempt-in-progress (server:start-attempted? areapath))
-         ;; (dot-server-url (server:check-if-running areapath))
-	 (curr-ip     (server:get-best-guess-address curr-host))
-	 (curr-pid    (current-process-id))
-	 (homehost    (common:get-homehost)) ;; configf:lookup *configdat* "server" "homehost" ))
-	 (target-host (car homehost))
-	 (testsuite   (common:get-testsuite-name))
-	 (logfile     (conc areapath "/logs/server.log")) ;; -" curr-pid "-" target-host ".log"))
-	 (cmdln (conc (common:get-megatest-exe)
-		      " -server " (or target-host "-") (if (equal? (configf:lookup *configdat* "server" "daemonize") "yes")
-							   " -daemonize "
-							   "")
-		      ;; " -log " logfile
-		      " -m testsuite:" testsuite)) ;; (conc " >> " logfile " 2>&1 &")))))
-	 (log-rotate  (make-thread common:rotate-logs  "server run, rotate logs thread"))
-         (load-limit  (configf:lookup-number *configdat* "server" "load-limit" default: 0.9)))
-    ;; we want the remote server to start in *toppath* so push there
-    (push-directory areapath)
-    (debug:print 0 *default-log-port* "INFO: Trying to start server (" cmdln ") ...")
-    (thread-start! log-rotate)
-    
-    ;; host.domain.tld match host?
-    (if (and target-host 
-	     ;; look at target host, is it host.domain.tld or ip address and does it 
-	     ;; match current ip or hostname
-	     (not (string-match (conc "("curr-host "|" curr-host"\\..*)") target-host))
-	     (not (equal? curr-ip target-host)))
-	(begin
-	  (debug:print-info 0 *default-log-port* "Starting server on " target-host ", logfile is " logfile)
-	  (setenv "TARGETHOST" target-host)))
-      
-    (setenv "TARGETHOST_LOGF" logfile)
-    (common:wait-for-normalized-load load-limit " delaying server start due to load" remote-host: (get-environment-variable "TARGETHOST")) ;; do not try starting servers on an already overloaded machine, just wait forever
-    (system (conc "nbfake " cmdln))
-    (unsetenv "TARGETHOST_LOGF")
-    (if (get-environment-variable "TARGETHOST")(unsetenv "TARGETHOST"))
-    (thread-join! log-rotate)
-    (pop-directory)))

ADDED   emergency-patches/emergency-patch-1.scm
Index: emergency-patches/emergency-patch-1.scm
==================================================================
--- /dev/null
+++ emergency-patches/emergency-patch-1.scm
@@ -0,0 +1,203 @@
+
+
+;; These are called by the server on recipt of /api calls
+;;    - keep it simple, only return the actual result of the call, i.e. no meta info here
+;;
+;;    - returns #( flag result )
+;;
+(define (api:execute-requests dbstruct dat)
+  (handle-exceptions
+   exn
+   (let ((call-chain (get-call-chain)))
+     (debug:print 0 *default-log-port* "WARNING: api:execute-requests received an exception from peer, dat=" dat)
+     (print-call-chain (current-error-port))
+     (debug:print 0 *default-log-port* " message: "  ((condition-property-accessor 'exn 'message) exn))       
+     (vector #f (vector exn call-chain dat))) ;; return some stuff for debug if an exception happens
+   (cond
+    ((not (vector? dat))                    ;; it is an error to not receive a vector
+     (vector #f (vector #f "remote must be called with a vector")))
+    ((> *api-process-request-count* 20) ;; 20)
+     (debug:print 0 *default-log-port* "WARNING: api:execute-requests received an overloaded message.")
+     (set! *server-overloaded* #t)
+     (vector #f (vector #f 'overloaded))) ;; the inner vector is what gets returned. nope, don't know why. please refactor!
+    (else  
+     (let* ((cmd-in            (vector-ref dat 0))
+            (cmd               (if (symbol? cmd-in)
+				   cmd-in
+				   (string->symbol cmd-in)))
+            (params            (vector-ref dat 1))
+            (start-t           (current-milliseconds))
+            (readonly-mode     (dbr:dbstruct-read-only dbstruct))
+            (readonly-command  (member cmd api:read-only-queries))
+            (writecmd-in-readonly-mode (and readonly-mode (not readonly-command)))
+            (res    
+             (if writecmd-in-readonly-mode
+                 (conc "attempt to run write command "cmd" on a read-only database")
+                 (case cmd
+                   ;;===============================================
+                   ;; READ/WRITE QUERIES
+                   ;;===============================================
+
+                   ((get-keys-write)                        (db:get-keys dbstruct)) ;; force a dummy "write" query to force server; for debug in -repl
+                   
+                   ;; SERVERS
+                   ((start-server)                    (apply server:kind-run params))
+                   ((kill-server)                     (set! *server-run* #f))
+
+                   ;; TESTS
+
+                   ;;((test-set-state-status-by-id)     (apply mt:test-set-state-status-by-id dbstruct params))
+                   ;;BB - commented out above because it was calling below, eventually, incorrectly (dbstruct passed to mt:test-set-state-status-by-id, which previosly did more, but now only passes thru to db:set-state-status-and-roll-up-items.
+                   ((test-set-state-status-by-id)
+
+                    ;; (define (db:set-state-status-and-roll-up-items dbstruct run-id test-name item-path state status comment)
+                    (db:set-state-status-and-roll-up-items
+                     dbstruct
+                     (list-ref params 0) ; run-id
+                     (list-ref params 1) ; test-name
+                     #f                  ; item-path
+                     (list-ref params 2) ; state
+                     (list-ref params 3) ; status
+                     (list-ref params 4) ; comment
+                     ))
+                   
+                   ((delete-test-records)             (apply db:delete-test-records dbstruct params))
+                   ((delete-old-deleted-test-records) (apply db:delete-old-deleted-test-records dbstruct params))
+                   ((test-set-state-status)           (apply db:test-set-state-status dbstruct params))
+                   ((test-set-top-process-pid)        (apply db:test-set-top-process-pid dbstruct params))
+                   ((set-state-status-and-roll-up-items) (apply db:set-state-status-and-roll-up-items dbstruct params))
+                   ((top-test-set-per-pf-counts)      (apply db:top-test-set-per-pf-counts dbstruct params))
+                   ((test-set-archive-block-id)       (apply db:test-set-archive-block-id dbstruct params))
+
+                   ;; RUNS
+                   ((register-run)                 (apply db:register-run dbstruct params))
+                   ((set-tests-state-status)       (apply db:set-tests-state-status dbstruct params))
+                   ((delete-run)                   (apply db:delete-run dbstruct params))
+                   ((lock/unlock-run)              (apply db:lock/unlock-run dbstruct params))
+                   ((update-run-event_time)        (apply db:update-run-event_time dbstruct params))
+                   ((update-run-stats)             (apply db:update-run-stats dbstruct params))
+                   ((set-var)                      (apply db:set-var dbstruct params))
+                   ((del-var)                      (apply db:del-var dbstruct params))
+
+                   ;; STEPS
+                   ((teststep-set-status!)         (apply db:teststep-set-status! dbstruct params))
+
+                   ;; TEST DATA
+                   ((test-data-rollup)             (apply db:test-data-rollup dbstruct params))
+                   ((csv->test-data)               (apply db:csv->test-data dbstruct params))
+
+                   ;; MISC
+                   ((sync-inmem->db)               (let ((run-id (car params)))
+                                                     (db:sync-touched dbstruct run-id force-sync: #t)))
+                   ((mark-incomplete)              (apply db:find-and-mark-incomplete dbstruct params))
+
+                   ;; TESTMETA
+                   ((testmeta-add-record)       (apply db:testmeta-add-record dbstruct params))
+                   ((testmeta-update-field)     (apply db:testmeta-update-field dbstruct params))
+                   ((get-tests-tags)            (db:get-tests-tags dbstruct))
+
+                   ;; TASKS
+                   ((tasks-add)                 (apply tasks:add dbstruct params))   
+                   ((tasks-set-state-given-param-key) (apply tasks:set-state-given-param-key dbstruct params))
+                   ((tasks-get-last)            (apply tasks:get-last dbstruct params))
+
+		   ;; NO SYNC DB
+		   ((no-sync-set)               (apply db:no-sync-set         *no-sync-db* params))
+		   ((no-sync-get/default)       (apply db:no-sync-get/default *no-sync-db* params))
+		   ((no-sync-del!)              (apply db:no-sync-del!        *no-sync-db* params))
+		 
+                   ;; ARCHIVES
+                   ;; ((archive-get-allocations)   
+                   ((archive-register-disk)     (apply db:archive-register-disk dbstruct params))
+                   ((archive-register-block-name)(apply db:archive-register-block-name dbstruct params))
+                   ((archive-allocate-testsuite/area-to-block)(apply db:archive-allocate-testsuite/area-to-block dbstruct block-id testsuite-name areakey))
+
+                   ;;======================================================================
+                   ;; READ ONLY QUERIES
+                   ;;======================================================================
+
+                   ;; KEYS
+                   ((get-key-val-pairs)               (apply db:get-key-val-pairs dbstruct params))
+                   ((get-keys)                        (db:get-keys dbstruct))
+                   ((get-key-vals)                    (apply db:get-key-vals dbstruct params))
+                   ((get-target)                      (apply db:get-target dbstruct params))
+                   ((get-targets)                     (db:get-targets dbstruct))
+
+                   ;; ARCHIVES
+                   ((test-get-archive-block-info)     (apply db:test-get-archive-block-info dbstruct params))
+                   
+                   ;; TESTS
+                   ((test-toplevel-num-items)         (apply db:test-toplevel-num-items dbstruct params))
+                   ((get-test-info-by-id)	       (apply db:get-test-info-by-id dbstruct params))
+                   ((test-get-rundir-from-test-id)    (apply db:test-get-rundir-from-test-id dbstruct params))
+                   ((get-count-tests-running-for-testname) (apply db:get-count-tests-running-for-testname dbstruct params))
+                   ((get-count-tests-running)         (apply db:get-count-tests-running dbstruct params))
+                   ((get-count-tests-running-in-jobgroup) (apply db:get-count-tests-running-in-jobgroup dbstruct params))
+                   ;; ((delete-test-step-records)        (apply db:delete-test-step-records dbstruct params))
+                   ((get-previous-test-run-record)    (apply db:get-previous-test-run-record dbstruct params))
+                   ((get-matching-previous-test-run-records)(apply db:get-matching-previous-test-run-records dbstruct params))
+                   ((test-get-logfile-info)           (apply db:test-get-logfile-info dbstruct params))
+                   ((test-get-records-for-index-file)  (apply db:test-get-records-for-index-file dbstruct params))
+                   ((get-testinfo-state-status)       (apply db:get-testinfo-state-status dbstruct params))
+                   ((test-get-top-process-pid)        (apply db:test-get-top-process-pid dbstruct params))
+                   ((test-get-paths-matching-keynames-target-new) (apply db:test-get-paths-matching-keynames-target-new dbstruct params))
+                   ((get-prereqs-not-met)             (apply db:get-prereqs-not-met dbstruct params))
+                   ((get-count-tests-running-for-run-id) (apply db:get-count-tests-running-for-run-id dbstruct params))
+                   ((synchash-get)                    (apply synchash:server-get dbstruct params))
+                   ((get-raw-run-stats)               (apply db:get-raw-run-stats dbstruct params))
+
+                   ;; RUNS
+                   ((get-run-info)                 (apply db:get-run-info dbstruct params))
+                   ((get-run-status)               (apply db:get-run-status dbstruct params))
+                   ((set-run-status)               (apply db:set-run-status dbstruct params))
+                   ((get-tests-for-run)            (apply db:get-tests-for-run dbstruct params))
+                   ((get-test-id)                  (apply db:get-test-id dbstruct params))
+                   ((get-tests-for-run-mindata)    (apply db:get-tests-for-run-mindata dbstruct params))
+                   ((get-runs)                     (apply db:get-runs dbstruct params))
+                   ((get-num-runs)                 (apply db:get-num-runs dbstruct params))
+                   ((get-all-run-ids)              (db:get-all-run-ids dbstruct))
+                   ((get-prev-run-ids)             (apply db:get-prev-run-ids dbstruct params))
+                   ((get-run-ids-matching-target)  (apply db:get-run-ids-matching-target dbstruct params))
+                   ((get-runs-by-patt)             (apply db:get-runs-by-patt dbstruct params))
+                   ((get-run-name-from-id)         (apply db:get-run-name-from-id dbstruct params))
+                   ((get-main-run-stats)           (apply db:get-main-run-stats dbstruct params))
+                   ((get-var)                      (apply db:get-var dbstruct params))
+                   ((get-run-stats)                (apply db:get-run-stats dbstruct params))
+
+                   ;; STEPS
+                   ((get-steps-data)               (apply db:get-steps-data dbstruct params))
+                   ((get-steps-for-test)           (apply db:get-steps-for-test dbstruct params))
+
+                   ;; TEST DATA
+                   ((read-test-data)               (apply db:read-test-data dbstruct params))
+                   ((read-test-data*)              (apply db:read-test-data* dbstruct params))
+
+                   ;; MISC
+                   ((get-latest-host-load)         (apply db:get-latest-host-load dbstruct params))
+                   ((have-incompletes?)            (apply db:have-incompletes? dbstruct params))
+                   ((login)                        (apply db:login dbstruct params))
+                   ((general-call)                 (let ((stmtname   (car params))
+                                                         (run-id     (cadr params))
+                                                         (realparams (cddr params)))
+                                                     (db:general-call dbstruct stmtname realparams)))
+                   ((sdb-qry)                      (apply sdb:qry params))
+                   ((ping)                         (current-process-id))
+		   ((get-changed-record-ids)       (apply db:get-changed-record-ids dbstruct params))
+		   
+                   ;; TESTMETA
+                   ((testmeta-get-record)       (apply db:testmeta-get-record dbstruct params))
+
+                   ;; TASKS 
+                   ((find-task-queue-records)   (apply tasks:find-task-queue-records dbstruct params))
+		   (else
+		    (debug:print 0 *default-log-port* "ERROR: bad api call " cmd)
+		    (conc "ERROR: BAD api call " cmd))))))
+       
+       ;; save all stats
+       (let ((delta-t (- (current-milliseconds)
+			 start-t)))
+	 (hash-table-set! *db-api-call-time* cmd
+			  (cons delta-t (hash-table-ref/default *db-api-call-time* cmd '()))))
+       (if writecmd-in-readonly-mode
+	   (vector #f res)
+           (vector #t res)))))))

ADDED   emergency-patches/emergency-patch-2.scm
Index: emergency-patches/emergency-patch-2.scm
==================================================================
--- /dev/null
+++ emergency-patches/emergency-patch-2.scm
@@ -0,0 +1,311 @@
+(include "common_records.scm")
+(include "key_records.scm")
+(include "db_records.scm")
+(include "run_records.scm")
+(include "test_records.scm")
+
+(define (common:wait-for-cpuload maxload numcpus waitdelay #!key (count 1000) (msg #f)(remote-host #f))
+  (let* ((loadavg (common:get-cpu-load remote-host))
+	 (first   (car loadavg))
+	 (next    (cadr loadavg))
+	 (adjload (* maxload numcpus))
+	 (loadjmp (- first next)))
+    (cond
+     ((and (> first adjload)
+	   (> count 0))
+      (debug:print-info 0 *default-log-port* "waiting " waitdelay " seconds due to load " first " exceeding max of " adjload " " (if msg msg ""))
+      (thread-sleep! waitdelay)
+      (common:wait-for-cpuload maxload numcpus waitdelay count: (- count 1)))
+     ((and (> loadjmp numcpus)
+	   (> count 0))
+      (debug:print-info 0 *default-log-port* "waiting " waitdelay " seconds due to load jump " loadjmp " > numcpus " numcpus (if msg msg ""))
+      (thread-sleep! waitdelay)
+      (common:wait-for-cpuload maxload numcpus waitdelay count: (- count 1))))))
+
+(define (common:wait-for-homehost-load maxload msg)
+  (let* ((hh-dat (if (common:on-homehost?) ;; if we are on the homehost then pass in #f so the calls are local.
+                     #f
+                     (common:get-homehost)))
+         (hh     (if hh-dat (car hh-dat) #f))
+         (numcpus (common:get-num-cpus hh)))
+    (common:wait-for-normalized-load maxload msg: msg remote-host: hh)))
+
+;; wait for normalized cpu load to drop below maxload
+;;
+(define (common:wait-for-normalized-load maxload #!key (msg #f)(remote-host #f))
+  (let ((num-cpus (common:get-num-cpus remote-host)))
+    (common:wait-for-cpuload maxload num-cpus 15 msg: msg remote-host: remote-host)))
+
+;;  hed tal reg reruns reglen regfull test-record runname test-name item-path jobgroup max-concurrent-jobs run-id waitons item-path testmode test-patts required-tests test-registry registry-mutex flags keyvals run-info newtal all-tests-registry itemmaps)
+(define (runs:process-expanded-tests runsdat testdat)
+  ;; unroll the contents of runsdat and testdat (due to ongoing refactoring).
+  (let* ((hed                    (runs:testdat-hed testdat))
+	 (tal                    (runs:testdat-tal testdat))
+	 (reg                    (runs:testdat-reg testdat))
+	 (reruns                 (runs:testdat-reruns testdat))
+	 (test-name              (runs:testdat-test-name testdat))
+	 (item-path              (runs:testdat-item-path testdat))
+	 (jobgroup               (runs:testdat-jobgroup testdat))
+	 (waitons                (runs:testdat-waitons testdat))
+	 (item-path              (runs:testdat-item-path testdat))
+	 (testmode               (runs:testdat-testmode testdat))
+	 (newtal                 (runs:testdat-newtal testdat))
+	 (itemmaps               (runs:testdat-itemmaps testdat))
+	 (test-record            (runs:testdat-test-record testdat))
+	 (prereqs-not-met        (runs:testdat-prereqs-not-met testdat))
+
+	 (reglen                 (runs:dat-reglen runsdat))
+	 (regfull                (runs:dat-regfull runsdat))
+	 (runname                (runs:dat-runname runsdat))
+	 (max-concurrent-jobs    (runs:dat-max-concurrent-jobs runsdat))
+	 (run-id                 (runs:dat-run-id runsdat))
+	 (test-patts             (runs:dat-test-patts runsdat))
+	 (required-tests         (runs:dat-required-tests runsdat))
+	 (test-registry          (runs:dat-test-registry runsdat))
+	 (registry-mutex         (runs:dat-registry-mutex runsdat))
+	 (flags                  (runs:dat-flags runsdat))
+	 (keyvals                (runs:dat-keyvals runsdat))
+	 (run-info               (runs:dat-run-info runsdat))
+	 (all-tests-registry     (runs:dat-all-tests-registry runsdat))
+	 (run-limits-info        (runs:dat-can-run-more-tests runsdat))
+	 ;; (runs:can-run-more-tests run-id jobgroup max-concurrent-jobs)) ;; look at the test jobgroup and tot jobs running
+	 (have-resources         (car run-limits-info))
+	 (num-running            (list-ref run-limits-info 1))
+	 (num-running-in-jobgroup(list-ref run-limits-info 2)) 
+	 (max-concurrent-jobs    (list-ref run-limits-info 3))
+	 (job-group-limit        (list-ref run-limits-info 4))
+	 ;; (prereqs-not-met        (rmt:get-prereqs-not-met run-id waitons hed item-path mode: testmode itemmaps: itemmaps))
+	 ;; (prereqs-not-met         (mt:lazy-get-prereqs-not-met run-id waitons item-path mode: testmode itemmap: itemmap))
+	 (fails                  (if (list? prereqs-not-met)
+				      (runs:calc-fails prereqs-not-met)
+				      (begin
+					(debug:print-error 0 *default-log-port* "prereqs-not-met is not a list! " prereqs-not-met)
+					'())))
+	 (non-completed           (filter (lambda (x)             ;; remove hed from not completed list, duh, of course it is not completed!
+					    (not (equal? x hed)))
+					  (runs:calc-not-completed prereqs-not-met)))
+	 (loop-list               (list hed tal reg reruns))
+	 ;; configure the load runner
+	 (numcpus                 (common:get-num-cpus #f))
+	 (maxload                 (string->number (or (configf:lookup *configdat* "jobtools" "maxload") "3.0")))         ;; use a non-number string to disable
+         (maxhomehostload         (string->number (or (configf:lookup *configdat* "jobtools" "maxhomehostload") "1.2"))) ;; use a non-number string to disable
+         (waitdelay               (string->number (or (configf:lookup *configdat* "jobtools" "waitdelay") "60"))))
+    (debug:print-info 4 *default-log-port* "have-resources: " have-resources " prereqs-not-met: (" 
+		      (string-intersperse 
+		       (map (lambda (t)
+			      (if (vector? t)
+				  (conc (db:test-get-state t) "/" (db:test-get-status t))
+				  (conc " WARNING: t is not a vector=" t )))
+			    prereqs-not-met)
+		       ", ") ") fails: " fails
+		       "\nregistered? " (hash-table-ref/default test-registry (db:test-make-full-name test-name item-path) #f))
+			    
+
+    
+    (if (and (not (null? prereqs-not-met))
+	     (runs:lownoise (conc "waiting on tests " prereqs-not-met hed) 60))
+	(debug:print-info 2 *default-log-port* "waiting on tests; " (string-intersperse (runs:mixed-list-testname-and-testrec->list-of-strings prereqs-not-met) ", ")))
+
+    ;; Don't know at this time if the test have been launched at some time in the past
+    ;; i.e. is this a re-launch?
+    (debug:print-info 4 *default-log-port* "run-limits-info = " run-limits-info)
+    
+    (cond
+     
+     ;; Check item path against item-patts, 
+     ;;
+     ((not (tests:match test-patts (tests:testqueue-get-testname test-record) item-path required: required-tests)) ;; This test/itempath is not to be run
+      ;; else the run is stuck, temporarily or permanently
+      ;; but should check if it is due to lack of resources vs. prerequisites
+      (debug:print-info 1 *default-log-port* "Skipping " (tests:testqueue-get-testname test-record) " " item-path " as it doesn't match " test-patts)
+      (if (or (not (null? tal))(not (null? reg)))
+	  (list (runs:queue-next-hed tal reg reglen regfull)
+		(runs:queue-next-tal tal reg reglen regfull)
+		(runs:queue-next-reg tal reg reglen regfull)
+		reruns)
+	  #f))
+     
+     ;; Register tests 
+     ;;
+     ((not (hash-table-ref/default test-registry (db:test-make-full-name test-name item-path) #f))
+      (debug:print-info 4 *default-log-port* "Pre-registering test " test-name "/" item-path " to create placeholder" )
+      ;; always do firm registration now in v1.60 and greater ;; (eq? *transport-type* 'fs) ;; no point in parallel registration if use fs
+      (let register-loop ((numtries 15))
+	(rmt:register-test run-id test-name item-path)
+	(if (rmt:get-test-id run-id test-name item-path)
+	    (hash-table-set! test-registry (db:test-make-full-name test-name item-path) 'done)
+	    (if (> numtries 0)
+		(begin
+		  (thread-sleep! 0.5)
+		  (register-loop (- numtries 1)))
+		(debug:print-error 0 *default-log-port* "failed to register test " (db:test-make-full-name test-name item-path)))))
+      (if (not (eq? (hash-table-ref/default test-registry (db:test-make-full-name test-name "") #f) 'done))
+	  (begin
+	    (rmt:register-test run-id test-name "")
+	    (if (rmt:get-test-id run-id test-name "")
+		(hash-table-set! test-registry (db:test-make-full-name test-name "") 'done))))
+      (runs:shrink-can-run-more-tests-count runsdat)   ;; DELAY TWEAKER (still needed?)
+      (if (and (null? tal)(null? reg))
+	  (list hed tal (append reg (list hed)) reruns)
+	  (list (runs:queue-next-hed tal reg reglen regfull)
+		(runs:queue-next-tal tal reg reglen regfull)
+		;; NB// Here we are building reg as we register tests
+		;; if regfull we must pop the front item off reg
+		(if regfull
+		    (append (cdr reg) (list hed))
+		    (append reg (list hed)))
+		reruns)))
+     
+     ;; At this point hed test registration must be completed.
+     ;;
+     ((eq? (hash-table-ref/default test-registry (db:test-make-full-name test-name item-path) #f)
+	   'start)
+      (debug:print-info 0 *default-log-port* "Waiting on test registration(s): "
+			(string-intersperse 
+			 (filter (lambda (x)
+				   (eq? (hash-table-ref/default test-registry x #f) 'start))
+				 (hash-table-keys test-registry))
+			 ", "))
+      (thread-sleep! 0.051)
+      (list hed tal reg reruns))
+     
+     ;; If no resources are available just kill time and loop again
+     ;;
+     ((not have-resources) ;; simply try again after waiting a second
+      (if (runs:lownoise "no resources" 60)
+	  (debug:print-info 1 *default-log-port* "no resources to run new tests, waiting ..."))
+      ;; Have gone back and forth on this but db starvation is an issue.
+      ;; wait one second before looking again to run jobs.
+      (thread-sleep! 1)
+      ;; could have done hed tal here but doing car/cdr of newtal to rotate tests
+      (list (car newtal)(cdr newtal) reg reruns))
+     
+     ;; This is the final stage, everything is in place so launch the test
+     ;;
+     ((and have-resources
+	   (or (null? prereqs-not-met)
+	       (and (member 'toplevel testmode) ;;  'toplevel)
+		    (null? non-completed)
+		    (not (member 'exclusive testmode)))))
+      ;; (hash-table-delete! *max-tries-hash* (db:test-make-full-name test-name item-path))
+      ;; we are going to reset all the counters for test retries by setting a new hash table
+      ;; this means they will increment only when nothing can be run
+      (set! *max-tries-hash* (make-hash-table))
+      ;; well, first lets see if cpu load throttling is enabled. If so wait around until the
+      ;; average cpu load is under the threshold before continuing
+      (if maxload ;; only gate if maxload is specified
+          (common:wait-for-cpuload maxload numcpus waitdelay))
+      (if maxhomehostload
+          (common:wait-for-homehost-load maxhomehostload (conc "Waiting for homehost load to drop below normalized value of " maxhomehostload)))
+      
+      (run:test run-id run-info keyvals runname test-record flags #f test-registry all-tests-registry)
+      (runs:incremental-print-results run-id)
+      (hash-table-set! test-registry (db:test-make-full-name test-name item-path) 'running)
+      (runs:shrink-can-run-more-tests-count runsdat)  ;; DELAY TWEAKER (still needed?)
+      ;; (thread-sleep! *global-delta*)
+      (if (or (not (null? tal))(not (null? reg)))
+	  (list (runs:queue-next-hed tal reg reglen regfull)
+		(runs:queue-next-tal tal reg reglen regfull)
+		(runs:queue-next-reg tal reg reglen regfull)
+		reruns)
+	  #f))
+     
+     ;; must be we have unmet prerequisites
+     ;;
+     (else
+      (debug:print 4 *default-log-port* "FAILS: " fails)
+      ;; If one or more of the prereqs-not-met are FAIL then we can issue
+      ;; a message and drop hed from the items to be processed.
+      ;; (runs:mixed-list-testname-and-testrec->list-of-strings prereqs-not-met)
+      (if (and (not (null? prereqs-not-met))
+	       (runs:lownoise (conc "waiting on tests " prereqs-not-met hed) 60))
+	  (debug:print-info 1 *default-log-port* "waiting on tests; " (string-intersperse 
+						    (runs:mixed-list-testname-and-testrec->list-of-strings 
+						     prereqs-not-met) ", ")))
+      (if (or (null? fails)
+	      (member 'toplevel testmode))
+	  (begin
+	    ;; couldn't run, take a breather
+	    (if  (runs:lownoise "Waiting for more work to do..." 60)
+		 (debug:print-info 0 *default-log-port* "Waiting for more work to do..."))
+	    (thread-sleep! 1)
+	    (list (car newtal)(cdr newtal) reg reruns))
+	  ;; the waiton is FAIL so no point in trying to run hed ever again
+	  (if (or (not (null? reg))(not (null? tal)))
+	      (if (vector? hed)
+		  (begin
+		    (debug:print 1 *default-log-port* "WARNING: Dropping test " test-name "/" item-path
+				 " from the launch list as it has prerequistes that are FAIL")
+		    (let ((test-id (rmt:get-test-id run-id hed "")))
+		      (if test-id (mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "PREQ_FAIL" "Failed to run due to failed prerequisites")))
+		    (runs:shrink-can-run-more-tests-count runsdat) ;; DELAY TWEAKER (still needed?)
+		    ;; (thread-sleep! *global-delta*)
+		    ;; This next is for the items
+		    (mt:test-set-state-status-by-testname run-id test-name item-path "NOT_STARTED" "BLOCKED" #f)
+		    (hash-table-set! test-registry (db:test-make-full-name test-name item-path) 'removed)
+		    (list (runs:queue-next-hed tal reg reglen regfull)
+			  (runs:queue-next-tal tal reg reglen regfull)
+			  (runs:queue-next-reg tal reg reglen regfull)
+			  reruns ;; WAS: (cons hed reruns) ;; but that makes no sense?
+			  ))
+		  (let ((nth-try (hash-table-ref/default test-registry hed 0)))
+		    (cond
+		     ((member "RUNNING" (map db:test-get-state prereqs-not-met))
+		      (if (runs:lownoise (conc "possible RUNNING prerequistes " hed) 60)
+			  (debug:print 0 *default-log-port* "WARNING: test " hed " has possible RUNNING prerequisites, don't give up on it yet."))
+		      (thread-sleep! 4)
+		      (list (runs:queue-next-hed newtal reg reglen regfull)
+			    (runs:queue-next-tal newtal reg reglen regfull)
+			    (runs:queue-next-reg newtal reg reglen regfull)
+			    reruns))
+		     ((or (not nth-try)
+			  (and (number? nth-try)
+			       (< nth-try 10)))
+		      (hash-table-set! test-registry hed (if (number? nth-try)
+							     (+ nth-try 1)
+							     0))
+		      (if (runs:lownoise (conc "not removing test " hed) 60)
+			  (debug:print 1 *default-log-port* "WARNING: not removing test " hed " from queue although it may not be runnable due to FAILED prerequisites"))
+		      ;; may not have processed correctly. Could be a race condition in your test implementation? Dropping test " hed) ;;  " as it has prerequistes that are FAIL. (NOTE: hed is not a vector)")
+		      (runs:shrink-can-run-more-tests-count runsdat) ;; DELAY TWEAKER (still needed?)
+		      ;; (list hed tal reg reruns)
+		      ;; (list (car newtal)(cdr newtal) reg reruns)
+		      ;; (hash-table-set! test-registry hed 'removed)
+		      (list (runs:queue-next-hed newtal reg reglen regfull)
+			    (runs:queue-next-tal newtal reg reglen regfull)
+			    (runs:queue-next-reg newtal reg reglen regfull)
+			    reruns))
+		     ((symbol? nth-try)
+		      (if (eq? nth-try 'removed) ;; removed is removed - drop it NOW
+			  (if (null? tal)
+			      #f ;; yes, really
+			      (list (car tal)(cdr tal) reg reruns))
+			  (begin
+			    (if (runs:lownoise (conc "FAILED prerequisites or other issue" hed) 60)
+				(debug:print 0 *default-log-port* "WARNING: test " hed " has FAILED prerequisites or other issue. Internal state " nth-try " will be overridden and we'll retry."))
+			    (mt:test-set-state-status-by-testname run-id test-name item-path "NOT_STARTED" "KEEP_TRYING" #f)
+			    (hash-table-set! test-registry hed 0)
+			    (list (runs:queue-next-hed newtal reg reglen regfull)
+				  (runs:queue-next-tal newtal reg reglen regfull)
+				  (runs:queue-next-reg newtal reg reglen regfull)
+				  reruns))))
+		     (else
+		      (if (runs:lownoise (conc "FAILED prerequitests and we tried" hed) 60)
+			  (debug:print 0 *default-log-port* "WARNING: test " hed " has FAILED prerequitests and we've tried at least 10 times to run it. Giving up now."))
+		      ;; (debug:print 0 *default-log-port* "         prereqs: " prereqs-not-met)
+		      (hash-table-set! test-registry hed 'removed)
+		      (mt:test-set-state-status-by-testname run-id test-name item-path "NOT_STARTED" "TEN_STRIKES" #f)
+		      ;; I'm unclear on if this roll up is needed - it may be the root cause of the "all set to FAIL" bug.
+		      (rmt:set-state-status-and-roll-up-items run-id test-name item-path #f "FAIL" #f) ;; treat as FAIL
+		      (list (if (null? tal)(car newtal)(car tal))
+			    tal
+			    reg
+			    reruns)))))
+	      ;; can't drop this - maybe running? Just keep trying
+	      (let ((runable-tests (runs:runable-tests prereqs-not-met)))
+		(if (null? runable-tests)
+		    #f   ;; I think we are truly done here
+		    (list (runs:queue-next-hed newtal reg reglen regfull)
+			    (runs:queue-next-tal newtal reg reglen regfull)
+			    (runs:queue-next-reg newtal reg reglen regfull)
+			    reruns)))))))))

ADDED   emergency-patches/emergency-patch-3.scm
Index: emergency-patches/emergency-patch-3.scm
==================================================================
--- /dev/null
+++ emergency-patches/emergency-patch-3.scm
@@ -0,0 +1,81 @@
+ ;; To build patch:
+ ;;;;;;;;;;;;;;;;;;;;;;;;;
+ ;; ldd /p/foundry/env/pkgs/megatest/1.64/19/bin/.11/mtest
+ ;;        linux-vdso.so.1 =>  (0x00002aaaaaaab000)
+ ;;        libchicken.so.7 => /p/foundry/env/pkgs/megatest/1.64/chicken-4.10.0//lib/libchicken.so.7 (0x00002aaaaaaad000)
+ ;;        libm.so.6 => /lib64/libm.so.6 (0x00002aaaab0a6000)
+ ;;        libdl.so.2 => /lib64/libdl.so.2 (0x00002aaaab31f000)
+ ;;        libc.so.6 => /lib64/libc.so.6 (0x00002aaaab523000)
+ ;;        /lib64/ld-linux-x86-64.so.2 (0x0000555555554000)
+ ;;
+ ;;  /p/foundry/env/pkgs/megatest/1.64/chicken-4.10.0/bin/csc -s emergency-patch-3.scm
+ ;;
+
+
+ ;; to test patch:
+ ;;;;;;;;;;;;;;;;;;;;;;;;;
+ ;; in .megatestrc, add:
+ ;; (if (and (> megatest-version 1.64)
+ ;;         (< megatest-version 1.6421))
+ ;;   (begin
+ ;;      (load "/p/foundry/env/pkgs/megatest/1.64/19/share/epatch-1.so")
+ ;;      (load "/p/foundry/env/pkgs/megatest/1.64/19/share/epatch-2.so"))) 
+ ;;
+
+
+ ;; to productize patch:
+ ;;;;;;;;;;;;;;;;;;;;;;;;;
+ ;; 
+(use directory-utils regex)
+
+(include "common_records.scm")
+(include "key_records.scm")
+(include "db_records.scm")
+(include "run_records.scm")
+(include "test_records.scm")
+
+;; Given a run id start a server process    ### NOTE ### > file 2>&1 
+;; if the run-id is zero and the target-host is set 
+;; try running on that host
+;;   incidental: rotate logs in logs/ dir.
+;;
+(define  (server:run areapath) ;; areapath is *toppath* for a given testsuite area
+  (let* ((curr-host   (get-host-name))
+         ;; (attempt-in-progress (server:start-attempted? areapath))
+         ;; (dot-server-url (server:check-if-running areapath))
+	 (curr-ip     (server:get-best-guess-address curr-host))
+	 (curr-pid    (current-process-id))
+	 (homehost    (common:get-homehost)) ;; configf:lookup *configdat* "server" "homehost" ))
+	 (target-host (car homehost))
+	 (testsuite   (common:get-testsuite-name))
+	 (logfile     (conc areapath "/logs/server.log")) ;; -" curr-pid "-" target-host ".log"))
+	 (cmdln (conc (common:get-megatest-exe)
+		      " -server " (or target-host "-") (if (equal? (configf:lookup *configdat* "server" "daemonize") "yes")
+							   " -daemonize "
+							   "")
+		      ;; " -log " logfile
+		      " -m testsuite:" testsuite)) ;; (conc " >> " logfile " 2>&1 &")))))
+	 (log-rotate  (make-thread common:rotate-logs  "server run, rotate logs thread"))
+         (load-limit  (configf:lookup-number *configdat* "server" "load-limit" default: 0.9)))
+    ;; we want the remote server to start in *toppath* so push there
+    (push-directory areapath)
+    (debug:print 0 *default-log-port* "INFO: Trying to start server (" cmdln ") ...")
+    (thread-start! log-rotate)
+    
+    ;; host.domain.tld match host?
+    (if (and target-host 
+	     ;; look at target host, is it host.domain.tld or ip address and does it 
+	     ;; match current ip or hostname
+	     (not (string-match (conc "("curr-host "|" curr-host"\\..*)") target-host))
+	     (not (equal? curr-ip target-host)))
+	(begin
+	  (debug:print-info 0 *default-log-port* "Starting server on " target-host ", logfile is " logfile)
+	  (setenv "TARGETHOST" target-host)))
+      
+    (setenv "TARGETHOST_LOGF" logfile)
+    (common:wait-for-normalized-load load-limit " delaying server start due to load" remote-host: (get-environment-variable "TARGETHOST")) ;; do not try starting servers on an already overloaded machine, just wait forever
+    (system (conc "nbfake " cmdln))
+    (unsetenv "TARGETHOST_LOGF")
+    (if (get-environment-variable "TARGETHOST")(unsetenv "TARGETHOST"))
+    (thread-join! log-rotate)
+    (pop-directory)))

Index: server.scm
==================================================================
--- server.scm
+++ server.scm
@@ -245,11 +245,11 @@
 				    (mod-time   (list-ref rec 0)))
 				;; (print "start-time: " start-time " mod-time: " mod-time)
 				(and start-time mod-time
 				     (> (- now start-time) 0)    ;; been running at least 0 seconds
 				     (< (- now mod-time)   16)   ;; still alive - file touched in last 16 seconds
-				     (< (- now start-time) 
+				     (< (- now start-time)       
 					(+ (- (string->number (or (configf:lookup *configdat* "server" "runtime") "3600"))
 					      180)
 					   (random 360))) ;; under one hour running time +/- 180
 				     ))
 			      #f))
@@ -444,11 +444,11 @@
 ;; timeout is hms string: 1h 5m 3s, default is 1 minute
 ;;
 (define (server:expiration-timeout)
   (let ((tmo (configf:lookup *configdat* "server" "timeout")))
     (if (and (string? tmo)
-	     (common:hms-string->seconds tmo))
+	     (common:hms-string->seconds tmo)) ;; BUG: hms-string->seconds is broken, if given "10" returns 0. Also, it doesn't belong in this logic unless the string->number is changed below
         (* 3600 (string->number tmo))
 	60)))
 
 ;; moving this here as it needs access to db and cannot be in common.
 ;;

Index: tcmt.scm
==================================================================
--- tcmt.scm
+++ tcmt.scm
@@ -41,12 +41,13 @@
 ;; ##teamcity[testStdOut name='suite.testName' out='text']
 ;; ##teamcity[testStdErr name='suite.testName' out='error text']
 ;; ##teamcity[testFailed name='suite.testName' message='failure message' details='message and stack trace']
 ;; ##teamcity[testFinished name='suite.testName' duration='50']
 ;; 
-
-(define (print-changes-since data run-ids last-update tsname target runname)
+;; flush; #f, normal call. #t, last call, print out something for NOT_STARTED, etc.
+;;
+(define (print-changes-since data run-ids last-update tsname target runname flowid flush) ;; 
   (let ((now   (current-seconds)))
     (handle-exceptions
      exn
      (begin (print-call-chain) (print "Error message: " ((condition-property-accessor 'exn 'message) exn)))
      (for-each
@@ -63,18 +64,22 @@
 		    (status   (db:test-get-status       testdat))
 		    (duration (or (any->number (db:test-get-run_duration testdat)) 0))
 		    (comment  (db:test-get-comment      testdat))
 		    (logfile  (db:test-get-final_logf   testdat))
 		    (prevstat (hash-table-ref/default data testn #f))
-		    (newstat  (if (equal? state "RUNNING")
-				  "RUNNING"
-				  (if (equal? state "COMPLETED")
-				      status
-				      "UNK")))
-		    (cmtstr   (if comment
+		    (newstat  (cond
+			       ((equal? state "RUNNING")   "RUNNING")
+			       ((equal? state "COMPLETED") status)
+			       (flush   (conc state "/" status))
+			       (else "UNK")))
+		    (cmtstr   (if (and (not flush) comment)
 				  (conc " message='" comment "' ")
-				  " "))
+				  (if flush
+				      (conc "message='Test ended in state/status=" state "/" status  (if  (string-match "^\\s*$" comment)
+													  ", no Megatest comment found.' "
+													  (conc ", Megatest comment='" comment "' "))) ;; special case, we are handling stragglers
+				      " ")))
 		    (details  (if (string-match ".*html$" logfile)
 				  (conc " details='" *toppath* "/lt/" target "/" runname "/" testname (if (equal? itempath "") "/" (conc "/" itempath "/")) logfile "' ")
 				  "")))
 		    
 	       ;; (print "DEBUG: testn=" testn " state=" state " status=" status " prevstat=" prevstat " newstat=" newstat)
@@ -81,35 +86,36 @@
 	       (if (or (not prevstat)
 		       (not (equal? prevstat newstat)))
 		   (begin
 		     (case (string->symbol newstat)
 		       ((UNK)       ) ;; do nothing
-		       ((RUNNING)   (print "##teamcity[testStarted name='" tctname "']"))
-		       ((PASS SKIP WARN WAIVED) (print "##teamcity[testFinished name='" tctname "' duration='" (* 1e3 duration) "'" cmtstr details " ]"))
+		       ((RUNNING)   (print "##teamcity[testStarted name='" tctname "' flowId='" flowid "']"))
+		       ((PASS SKIP WARN WAIVED) (print "##teamcity[testFinished name='" tctname "' duration='" (* 1e3 duration) "'" cmtstr details " flowId='" flowid "']"))
 		       (else
-			(print "##teamcity[testFailed name='" tctname "' " cmtstr details " ]")))
+			(print "##teamcity[testFailed name='" tctname "' " cmtstr details " flowId='" flowid "']")))
 		     (flush-output)
 		     (hash-table-set! data testn newstat)))))
 	   tests)))
       run-ids))
     now))
 
 (define (monitor pid)
-  (let ((run-ids #f)
-	(testdat (make-hash-table))
-	(keys    #f)
-	(last-update 0)
-	(target  (or (args:get-arg "-target")
-		     (args:get-arg "-reqtarg")))
-	(runname (args:get-arg "-runname"))
-	(tsname  #f))
+  (let* ((run-ids #f)
+	 (testdat (make-hash-table))
+	 (keys    #f)
+	 (last-update 0)
+	 (target  (or (args:get-arg "-target")
+		      (args:get-arg "-reqtarg")))
+	 (runname (args:get-arg "-runname"))
+	 (tsname  #f)
+	 (flowid  (conc target "/" runname)))
     (if (and target runname)
 	(begin
 	  (launch:setup)
 	  (set! keys (rmt:get-keys))))
     (set! tsname  (common:get-testsuite-name))
-    (print "TCMT: for testsuite=" tsname " found runname=" runname ", target=" target ", keys=" keys " and successfully ran launch:setup.")
+    (print "TCMT: for testsuite=" tsname " found runname=" runname ", target=" target ", keys=" keys " and successfully ran launch:setup. Using " flowid " as the flowId.")
     (let loop ()
       (handle-exceptions
        exn
        ;; (print "Process done.")
        (begin (print-call-chain) (print "Error message: " ((condition-property-accessor 'exn 'message) exn)))
@@ -131,18 +137,20 @@
 		    (run-ids-in (map (lambda (row)
 				       (db:get-value-by-header row header "id"))
 				     rows)))
 	       (set! run-ids run-ids-in)))
 	 ;; (print "TCMT: pidres=" pidres " exittype=" exittype " exitstatus=" exitstatus " run-ids=" run-ids)
-	 (if keys
-	     (set! last-update (print-changes-since testdat run-ids last-update tsname target runname)))
 	 (if (eq? pidres 0)
 	     (begin
+	       (if keys
+		   (set! last-update (print-changes-since testdat run-ids last-update tsname target runname flowid #f)))
 	       (thread-sleep! 3)
 	       (loop))
 	     (begin
 	       ;; (print "TCMT: pidres=" pidres " exittype=" exittype " exitstatus=" exitstatus " run-ids=" run-ids)
+	       (print "TCMT: processing any tests that did not formally complete.")
+	       (print-changes-since testdat run-ids 0 tsname target runname flowid #t) ;; call in flush mode
 	       (print "TCMT: All done.")
 	       )))))))
 
 ;; (if (not (eq? pidres 0))	  ;; (not exitstatus))
 ;; 	  (begin