Megatest: commonmod.scm at [5751790037]

File commonmod.scm artifact 8296b7d920 part of check-in 5751790037

;;======================================================================
;; Copyright 2017, Matthew Welland.
;; 
;; This file is part of Megatest.
;; 
;;     Megatest is free software: you can redistribute it and/or modify
;;     it under the terms of the GNU General Public License as published by
;;     the Free Software Foundation, either version 3 of the License, or
;;     (at your option) any later version.
;; 
;;     Megatest is distributed in the hope that it will be useful,
;;     but WITHOUT ANY WARRANTY; without even the implied warranty of
;;     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
;;     GNU General Public License for more details.
;; 
;;     You should have received a copy of the GNU General Public License
;;     along with Megatest.  If not, see <http://www.gnu.org/licenses/>.

;;======================================================================

(declare (unit commonmod))
(declare (uses configfmod))
(declare (uses processmod))

(module commonmod
	*
	
(import scheme chicken data-structures extras)
(import (prefix sqlite3 sqlite3:) posix typed-records srfi-18
	srfi-1 files format srfi-13 matchable 
	srfi-69 ports
	regex-case regex hostinfo srfi-4
	pkts (prefix dbi dbi:))

(import configfmod)
(import processmod)
(import stml2)

(include "common_records.scm")
(include "megatest-fossil-hash.scm")
(include "megatest-version.scm")

(define (common:low-noise-print alldat waitval . keys)
  (let* ((key      (string-intersperse (map conc keys) "-" ))
	 (lasttime (hash-table-ref/default (alldat-denoise alldat) key 0))
	 (currtime (current-seconds)))
    (if (> (- currtime lasttime) waitval)
	(begin
	  (hash-table-set! (alldat-denoise alldat) key currtime)
	  #t)
	#f)))

(define (common:version-signature alldat)
  (conc (alldat-megatest-version alldat)
	"-" (substring (alldat-megatest-fossil-hash alldat) 0 4)))

(define (common:get-fields cfgdat)
  (let ((fields (hash-table-ref/default cfgdat "fields" '())))
    (map car fields)))

;;======================================================================
;; T I M E   A N D   D A T E
;;======================================================================

;; Convert strings like "5s 2h 3m" => 60x60x2 + 3x60 + 5
(define (common:hms-string->seconds tstr)
  (let ((parts     (string-split-fields "\\w+" tstr))
	(time-secs 0)
	;; s=seconds, m=minutes, h=hours, d=days, M=months, y=years, w=weeks
	(trx       (regexp "(\\d+)([smhdMyw])")))
    (for-each (lambda (part)
		(let ((match  (string-match trx part)))
		  (if match
		      (let ((val (string->number (cadr match)))
			    (unt (caddr match)))
			(if val 
			    (set! time-secs (+ time-secs (* val
							    (case (string->symbol unt)
							      ((s) 1)
							      ((m) 60) ;; minutes
							      ((h) 3600)
							      ((d) 86400)
							      ((w) 604800)
							      ((M) 2628000) ;; aproximately one month
							      ((y) 31536000)
							      (else #f))))))))))
	      parts)
    time-secs))
		       
(define (seconds->hr-min-sec secs)
  (let* ((hrs (quotient secs 3600))
	 (min (quotient (- secs (* hrs 3600)) 60))
	 (sec (- secs (* hrs 3600)(* min 60))))
    (conc (if (> hrs 0)(conc hrs "hr ") "")
	  (if (> min 0)(conc min "m ")  "")
	  sec "s")))

(define (seconds->time-string sec)
  (time->string 
   (seconds->local-time sec) "%H:%M:%S"))

(define (seconds->work-week/day-time sec)
  (time->string
   (seconds->local-time sec) "ww%V.%u %H:%M"))

(define (seconds->work-week/day sec)
  (time->string
   (seconds->local-time sec) "ww%V.%u"))

(define (seconds->year-work-week/day sec)
  (time->string
   (seconds->local-time sec) "%yww%V.%w"))

(define (seconds->year-work-week/day-time sec)
  (time->string
   (seconds->local-time sec) "%Yww%V.%w %H:%M"))

(define (seconds->year-week/day-time sec)
  (time->string
   (seconds->local-time sec) "%Yw%V.%w %H:%M"))

(define (seconds->quarter sec)
  (case (string->number
	 (time->string 
	  (seconds->local-time sec)
	  "%m"))
    ((1 2 3) 1)
    ((4 5 6) 2)
    ((7 8 9) 3)
    ((10 11 12) 4)
    (else #f)))

;; basic ISO8601 format (e.g. "2017-02-28 06:02:54") date time => Unix epoch
;;
(define (common:date-time->seconds datetime)
  (local-time->seconds (string->time datetime "%Y-%m-%d %H:%M:%S")))

;; given span of seconds tstart to tend
;; find start time to mark and mark delta
;;
(define (common:find-start-mark-and-mark-delta tstart tend)
  (let* ((deltat   (- (max tend (+ tend 10)) tstart)) ;; can't handle runs of less than 4 seconds. Pad it to 10 seconds ...
	 (result   #f)
	 (min      60)
	 (hr       (* 60 60))
	 (day      (* 24 hr))
	 (yr       (* 365 day)) ;; year
	 (mo       (/ yr 12))
	 (wk       (* day 7)))
    (for-each
     (lambda (max-blks)
       (for-each
	(lambda (span) ;; 5 2 1
	  (if (not result)
	      (for-each 
	       (lambda (timeunit timesym) ;; year month day hr min sec
		 (if (not result)
		     (let* ((time-blk (* span timeunit))
			    (num-blks (quotient deltat time-blk)))
		       (if (and (> num-blks 4)(< num-blks max-blks))
			   (let ((first (* (quotient tstart time-blk) time-blk)))
			     (set! result (list span timeunit time-blk first timesym))
			     )))))
	       (list yr mo wk day hr min 1)
	       '(     y  mo w  d   h  m   s))))
	(list 8 6 5 2 1)))
     '(5 10 15 20 30 40 50 500))
    (if values
	(apply values result)
	(values 0 day 1 0 'd))))

;; given x y lim return the cron expansion
;;
(define (common:expand-cron-slash x y lim)
  (let loop ((curr x)
	     (res  `()))
    (if (< curr lim)
	(loop (+ curr y) (cons curr res))
	(reverse res))))

;; expand a complex cron string to a list of cron strings
;;
;;  x/y   => x, x+y, x+2y, x+3y while x+Ny<max_for_field
;;  a,b,c => a, b ,c
;;
;;   NOTE: with flatten a lot of the crud below can be factored down.
;;
(define (common:cron-expand cron-str)
  (if (list? cron-str)
      (flatten
       (fold (lambda (x res)
	       (if (list? x)
		   (let ((newres (map common:cron-expand x)))
		     (append x newres))
		   (cons x res)))
	     '()
	     cron-str)) ;; (map common:cron-expand cron-str))
      (let ((cron-items (string-split cron-str))
	    (slash-rx   (regexp "(\\d+)/(\\d+)"))
	    (comma-rx   (regexp ".*,.*"))
	    (max-vals   '((min        . 60)
			  (hour       . 24)
			  (dayofmonth . 28) ;;; BUG!!!! This will be a bug for some combinations
			  (month      . 12)
			  (dayofweek  . 7))))
	(if (< (length cron-items) 5) ;; bad spec
	    cron-str ;; `(,cron-str)              ;; just return the string, something downstream will fix it
	    (let loop ((hed  (car cron-items))
		       (tal  (cdr cron-items))
		       (type 'min)
		       (type-tal '(hour dayofmonth month dayofweek))
		       (res  '()))
	      (regex-case
		  hed
		(slash-rx ( _ base incr ) (let* ((basen          (string->number base))
						 (incrn          (string->number incr))
						 (expanded-vals  (common:expand-cron-slash basen incrn (alist-ref type max-vals)))
						 (new-list-crons (fold (lambda (x myres)
									 (cons (conc (if (null? res)
											 ""
											 (conc (string-intersperse res " ") " "))
										     x " " (string-intersperse tal " "))
									       myres))
								       '() expanded-vals)))
					    ;; (print "new-list-crons: " new-list-crons)
					    ;; (fold (lambda (x res)
					    ;; 	    (if (list? x)
					    ;; 		(let ((newres (map common:cron-expand x)))
					    ;; 		  (append x newres))
					    ;; 		(cons x res)))
					    ;; 	  '()
					    (flatten (map common:cron-expand new-list-crons))))
		;;					    (map common:cron-expand (map common:cron-expand new-list-crons))))
		(else (if (null? tal)
			  cron-str
			  (loop (car tal)(cdr tal)(car type-tal)(cdr type-tal)(append res (list hed)))))))))))
		      
	    
;; given a cron string and the last time event was processed return #t to run or #f to not run
;;
;;  min    hour   dayofmonth month  dayofweek
;; 0-59    0-23   1-31       1-12   0-6          ### NOTE: dayofweek does not include 7
;;
;;  #t => yes, run the job
;;  #f => no, do not run the job
;;
(define (common:cron-event cron-str now-seconds-in last-done) ;; ref-seconds = #f is NOW.
  (let* ((cron-items     (map string->number (string-split cron-str)))
	 (now-seconds    (or now-seconds-in (current-seconds)))
	 (now-time       (seconds->local-time now-seconds))
	 (last-done-time (seconds->local-time last-done))
	 (all-times      (make-hash-table)))
    ;; (print "cron-items: " cron-items "(length cron-items): " (length cron-items))
    (if (not (eq? (length cron-items) 5)) ;; don't even try to figure out junk strings
	#f
	(match-let (((     cmin chour cdayofmonth cmonth    cdayofweek)
		     cron-items)
		    ;; 0     1    2        3         4    5      6
		    ((nsec nmin nhour ndayofmonth nmonth nyr ndayofweek n7 n8 n9)
		     (vector->list now-time))
		    ((lsec lmin lhour ldayofmonth lmonth lyr ldayofweek l7 l8 l9)
		     (vector->list last-done-time)))
	  ;; create all possible time slots
	  ;; remove invalid slots due to (for example) day of week
	  ;; get the start and end entries for the ref-seconds (current) time
	  ;; if last-done > ref-seconds => this is an ERROR!
	  ;; does the last-done time fall in the legit region?
	  ;;    yes => #f  do not run again this command
	  ;;    no  => #t  ok to run the command
	  (for-each ;; month
	   (lambda (month)
	     (for-each ;; dayofmonth
	      (lambda (dom)
		(for-each
		 (lambda (hr) ;; hour
		   (for-each
		    (lambda (minute) ;; minute
		      (let ((copy-now (apply vector (vector->list now-time))))
			(vector-set! copy-now 0 0) ;; force seconds to zero
			(vector-set! copy-now 1 minute)
			(vector-set! copy-now 2 hr)
			(vector-set! copy-now 3 dom)  ;; dom is already corrected for zero referenced
			(vector-set! copy-now 4 month)
			(let* ((copy-now-secs (local-time->seconds copy-now))
			       (new-copy      (seconds->local-time copy-now-secs))) ;; remake the time vector
			  (if (or (not cdayofweek)
				  (equal? (vector-ref new-copy 6)
					  cdayofweek)) ;; if the day is specified and a match OR if the day is NOT specified
			      (if (or (not cdayofmonth)
				      (equal? (vector-ref new-copy 3)
					      (+ 1 cdayofmonth))) ;; if the month is specified and a match OR if the month is NOT specified
				  (hash-table-set! all-times copy-now-secs new-copy))))))
		    (if cmin
			`(,cmin)  ;; if given cmin, have to use it
			(list (- nmin 1) nmin (+ nmin 1))))) ;; minute
		 (if chour
		     `(,chour)
		     (list (- nhour 1) nhour (+ nhour 1))))) ;; hour
	      (if cdayofmonth
		  `(,cdayofmonth)
		  (list (- ndayofmonth 1) ndayofmonth (+ ndayofmonth 1)))))
	   (if cmonth
	       `(,cmonth)
	       (list (- nmonth 1) nmonth (+ nmonth 1))))
	  (let ((before #f)
		(is-in  #f))
	    (for-each
	     (lambda (moment)
	       (if (and before
			(<= before now-seconds)
			(>= moment now-seconds))
		   (begin
		     ;; (print)
		     ;; (print "Before: " (time->string (seconds->local-time before)))
		     ;; (print "Now:    " (time->string (seconds->local-time now-seconds)))
		     ;; (print "After:  " (time->string (seconds->local-time moment)))
		     ;; (print "Last:   " (time->string (seconds->local-time last-done)))
		     (if (<  last-done before)
			 (set! is-in before))
		     ))
	       (set! before moment))
	     (sort (hash-table-keys all-times) <))
	    is-in)))))

(define (common:extended-cron  cron-str now-seconds-in last-done)
  (let ((expanded-cron (common:cron-expand cron-str)))
    (if (string? expanded-cron)
	(common:cron-event expanded-cron now-seconds-in last-done)
	(let loop ((hed (car expanded-cron))
		   (tal (cdr expanded-cron)))
	  (if (common:cron-event hed now-seconds-in last-done)
	      #t
	      (if (null? tal)
		  #f
		  (loop (car tal)(cdr tal))))))))

;;======================================================================
;; C O L O R S
;;======================================================================
      
(define (common:name->iup-color name)
  (case (string->symbol (string-downcase name))
    ((red)    "223 33 49")
    ((grey)   "192 192 192")
    ((orange) "255 172 13")
    ((purple) "This is unfinished ...")))

;; (define (common:get-color-for-state-status state status)
;;   (case (string->symbol state)
;;     ((COMPLETED)
;;      (case (string->symbol status)
;;        ((PASS)        "70  249 73")
;;        ((WARN WAIVED) "255 172 13")
;;        ((SKIP)        "230 230 0")
;;        (else "223 33 49")))
;;     ((LAUNCHED)         "101 123 142")
;;     ((CHECK)            "255 100 50")
;;     ((REMOTEHOSTSTART)  "50  130 195")
;;     ((RUNNING)          "9   131 232")
;;     ((KILLREQ)          "39  82  206")
;;     ((KILLED)           "234 101 17")
;;     ((NOT_STARTED)      "240 240 240")
;;     (else               "192 192 192")))

(define (common:iup-color->rgb-hex instr)
  (string-intersperse 
   (map (lambda (x)
          (number->string x 16))
        (map string->number
             (string-split instr)))
   "/"))

;; dot-locking egg seems not to work, using this for now
;; if lock is older than expire-time then remove it and try again
;; to get the lock
;;
(define (common:simple-file-lock fname #!key (expire-time 300))
    (if (file-exists? fname)
	(if (> (- (current-seconds)(file-modification-time fname)) expire-time)
	    (begin
              (handle-exceptions exn #f (delete-file* fname))	
	      (common:simple-file-lock fname expire-time: expire-time))
	    #f)
	(let ((key-string (conc (get-host-name) "-" (current-process-id))))
	  (with-output-to-file fname
	    (lambda ()
	      (print key-string)))
	  (thread-sleep! 0.25)
	  (if (file-exists? fname)
	      (handle-exceptions exn
                #f 
                (with-input-from-file fname
	  	  (lambda ()
		    (equal? key-string (read-line)))))
	      #f))))

(define (common:simple-file-lock-and-wait fname #!key (expire-time 300))
  (let ((end-time (+ expire-time (current-seconds))))
    (let loop ((got-lock (common:simple-file-lock fname expire-time: expire-time)))
      (if got-lock
	  #t
	  (if (> end-time (current-seconds))
	      (begin
		(thread-sleep! 3)
		(loop (common:simple-file-lock fname expire-time: expire-time)))
	      #f)))))

(define (common:simple-file-release-lock fname)
  (handle-exceptions
      exn
      #f ;; I don't really care why this failed (at least for now)
    (delete-file* fname)))

;; lazy-safe get file mod time. on any error (file not existing etc.) return 0
;;
(define (common:lazy-modification-time fpath)
  (handle-exceptions
      exn
      0
      (file-modification-time fpath)))

;; find timestamp of newest file associated with a sqlite db file
(define (common:lazy-sqlite-db-modification-time fpath)
  (let* ((glob-list (handle-exceptions
			exn
			`(,(conc "/no/such/file, message: " ((condition-property-accessor 'exn 'message) exn)))
		      (glob (conc fpath "*"))))
         (file-list (if (eq? 0 (length glob-list))
			'("/no/such/file")
			glob-list)))
  (apply max
   (map
    common:lazy-modification-time 
    file-list))))


;; execute thunk, return value.  If exception thrown, trap exception, return #f, and emit nonfatal condition note to *default-log-port* .
;; arguments - thunk, message
(define (common:fail-safe thunk warning-message-on-exception)
  (handle-exceptions
   exn
   (begin
     (debug:print-info 0 *default-log-port* "notable but nonfatal condition - "warning-message-on-exception)
     (debug:print-info 0 *default-log-port*
                       (string-substitute "\n?Error:" "nonfatal condition:"
                                          (with-output-to-string
                                            (lambda ()
                                              (print-error-message exn) ))))
     (debug:print-info 0 *default-log-port* "    -- continuing after nonfatal condition...")
     #f)
   (thunk)))

(define getenv get-environment-variable)
(define (safe-setenv key val)
  (if (or (substring-index "!" key) (substring-index ":" key)) ;; variables containing : are for internal use and cannot be environment variables.
      (debug:print-error 4 *default-log-port* "skip setting internal use only variables containing \":\" or starting with \"!\"")
      (if (and (string? val)
	       (string? key))
	  (handle-exceptions
	      exn
	      (debug:print-error 0 *default-log-port* "bad value for setenv, key=" key ", value=" val)
	    (setenv key val))
	  (debug:print-error 0 *default-log-port* "bad value for setenv, key=" key ", value=" val))))

(define home (getenv "HOME"))
(define user (getenv "USER"))


;; returns list of fd count, socket count
(define (get-file-descriptor-count #!key  (pid (current-process-id )))
  (list
    (length (glob (conc "/proc/" pid "/fd/*")))
    (length  (filter identity (map socket? (glob (conc "/proc/" pid "/fd/*")))))
  )
)

  

;; GLOBALS

;; CONTEXTS
(defstruct cxt
  (taskdb #f)
  (cmutex (make-mutex)))
;; (define *contexts* (make-hash-table))
;; (define *context-mutex* (make-mutex))

;; ;; safe method for accessing a context given a toppath
;; ;;
;; (define (common:with-cxt toppath proc)
;;   (mutex-lock! *context-mutex*)
;;   (let ((cxt (hash-table-ref/default *contexts* toppath #f)))
;;     (if (not cxt)
;;         (set! cxt (let ((x (make-cxt)))(hash-table-set! *contexts* toppath x) x)))
;;     (let ((cxt-mutex (cxt-mutex cxt)))
;;       (mutex-unlock! *context-mutex*)
;;       (mutex-lock! cxt-mutex)
;;       (let ((res (proc cxt)))
;;         (mutex-unlock! cxt-mutex)
;;         res))))
        
;; A hash table that can be accessed by #{scheme ...} calls in
;; config files. Allows communicating between confgs
;;
(define *user-hash-data* (make-hash-table))

(define *db-keys* #f)

(define *pkts-info*    (make-hash-table)) ;; store stuff like the last parent here
(define *configinfo*   #f)   ;; raw results from setup, includes toppath and table from megatest.config
(define *runconfigdat* #f)   ;; run configs data
(define *configdat*    #f)   ;; megatest.config data
(define *configstatus* #f)   ;; status of data; 'fulldata : all processing done, #f : no data yet, 'partialdata : partial read done
(define *toppath*      #f)
(define *already-seen-runconfig-info* #f)

(define *test-meta-updated* (make-hash-table))
(define *globalexitstatus*  0) ;; attempt to work around possible thread issues
(define *passnum*           0) ;; when running track calls to run-tests or similar
;; (define *alt-log-file* #f)  ;; used by -log
(define *common:denoise*    (make-hash-table)) ;; for low noise printing
(define *default-log-port*  (current-error-port))
(define *time-zero* (current-seconds)) ;; for the watchdog
(define *default-area-tag* "local")

;; DATABASE
(define *dbstruct-db*         #f) ;; used to cache the dbstruct in db:setup. Goal is to remove this.
;; db stats
(define *db-stats*            (make-hash-table)) ;; hash of vectors < count duration-total >
(define *db-stats-mutex*      (make-mutex))
;; db access
(define *db-last-access*      (current-seconds)) ;; last db access, used in server
(define *db-write-access*     #t)
;; db sync
(define *db-last-sync*        0)                 ;; last time the sync to megatest.db happened
(define *db-sync-in-progress* #f)                ;; if there is a sync in progress do not try to start another
(define *db-multi-sync-mutex* (make-mutex))      ;; protect access to *db-sync-in-progress*, *db-last-sync*
;; task db
(define *task-db*             #f) ;; (vector db path-to-db)
(define *db-access-allowed*   #t) ;; flag to allow access
(define *db-access-mutex*     (make-mutex))
(define *db-transaction-mutex* (make-mutex))
(define *db-cache-path*       #f)
(define *db-with-db-mutex*    (make-mutex))

;; no sync db
(define *no-sync-db*          #f)

;; SERVER
(define *my-client-signature* #f)
(define *transport-type*    'http)             ;; override with [server] transport http|rpc|nmsg
;; (define *max-cache-size*    0)
(define *logged-in-clients* (make-hash-table))
(define *server-id*         #f)
(define *server-info*       #f)  ;; good candidate for easily convert to non-global
(define *time-to-exit*      #f)
(define *server-run*        #t)
(define *run-id*            #f)
(define *server-kind-run*   (make-hash-table))
(define *home-host*         #f)
;; (define *total-non-write-delay* 0)
(define *heartbeat-mutex*   (make-mutex))
(define *server-overloaded*  #f)

;; client
(define *rmt-mutex*         (make-mutex))     ;; remote access calls mutex 

;; RPC transport
(define *rpc:listener*      #f)

;; KEY info
(define *target*            (make-hash-table)) ;; cache the target here; target is keyval1/keyval2/.../keyvalN
(define *keys*              (make-hash-table)) ;; cache the keys here
(define *keyvals*           (make-hash-table))
(define *toptest-paths*     (make-hash-table)) ;; cache toptest path settings here
(define *test-paths*        (make-hash-table)) ;; cache test-id to test run paths here
(define *test-ids*          (make-hash-table)) ;; cache run-id, testname, and item-path => test-id
(define *test-info*         (make-hash-table)) ;; cache the test info records, update the state, status, run_duration etc. from testdat.db

(define *run-info-cache*     (make-hash-table)) ;; run info is stable, no need to reget
(define *launch-setup-mutex* (make-mutex))     ;; need to be able to call launch:setup often so mutex it and re-call the real deal only if *toppath* not set
(define *homehost-mutex*     (make-mutex))

;; Miscellaneous
(define *triggers-mutex*     (make-mutex))     ;; block overlapping processing of triggers

(use posix-extras pathname-expand files)

;; this plugs a hole in posix-extras in recent chicken versions > 4.9)
(let-values (( (chicken-release-number chicken-major-version)
               (apply values
                      (map string->number
                           (take
                            (string-split (chicken-version) ".")
                            2)))))
  (let ((resolve-pathname-broken?
         (or (> chicken-release-number 4)
             (and (eq? 4 chicken-release-number) (> chicken-major-version 9)))))
    (if resolve-pathname-broken?
        (define ##sys#expand-home-path pathname-expand))))
      
(define (realpath x) (resolve-pathname  (pathname-expand (or x "/dev/null")) ))

(define (common:get-this-exe-fullpath #!key (argv (argv)))
  (let* ((this-script
          (cond
           ((and (> (length argv) 2)
                 (string-match "^(.*/csi|csi)$" (car argv))
                 (string-match "^-(s|ss|sx|script)$" (cadr argv)))
            (caddr argv))
           (else (car argv))))
         (fullpath (realpath this-script)))
    fullpath))
(define *common:this-exe-fullpath* (common:get-this-exe-fullpath))
(define *common:this-exe-dir*      (pathname-directory *common:this-exe-fullpath*))
(define *common:this-exe-name*     (pathname-strip-directory *common:this-exe-fullpath*))

(define *common:logpro-exit-code->status-sym-alist*
  '( ( 0 . pass )
     ( 1 . fail )
     ( 2 . warn )
     ( 3 . check )
     ( 4 . waived )
     ( 5 . abort )
     ( 6 . skip )))

(define (common:logpro-exit-code->status-sym exit-code)
  (or (alist-ref exit-code *common:logpro-exit-code->status-sym-alist*) 'fail))

(define (common:worse-status-sym ss1 ss2)
  (let loop ((status-syms-remaining '(abort fail check skip warn waived pass)))
    (cond
     ((null? status-syms-remaining)
      'fail)
     ((eq? (car status-syms-remaining) ss1)
      ss1)
     ((eq? (car status-syms-remaining) ss2)
      ss2)
     (else
      (loop (cdr status-syms-remaining))))))

(define (common:steps-can-proceed-given-status-sym status-sym)
  (if (member status-sym '(warn waived pass))
      #t
      #f))

(define (status-sym->string status-sym)
  (case status-sym
      ((pass) "PASS")
    ((fail) "FAIL")
    ((warn) "WARN")
    ((check) "CHECK")
    ((waived) "WAIVED")
    ((abort) "ABORT")
    ((skip) "SKIP")
    (else "FAIL")))

(define (common:logpro-exit-code->test-status exit-code)
  (status-sym->string (common:logpro-exit-code->status-sym exit-code)))

;; launching and hosts
(defstruct host
  (reachable    #f)
  (last-update  0)
  (last-used    0)
  (last-cpuload 1))

(define *host-loads*         (make-hash-table))

;; cache environment vars for each run here
(define *env-vars-by-run-id* (make-hash-table))

;; Testconfig and runconfig caches. 
(define *testconfigs*        (make-hash-table)) ;; test-name => testconfig
(define *runconfigs*         (make-hash-table)) ;; target    => runconfig

;; This is a cache of pre-reqs met, don't re-calc in cases where called with same params less than
;; five seconds ago
(define *pre-reqs-met-cache* (make-hash-table))

;; cache of verbosity given string
;;
(define *verbosity-cache*    (make-hash-table))

(define (common:clear-caches)
  (set! *target*             (make-hash-table))
  (set! *keys*               (make-hash-table))
  (set! *keyvals*            (make-hash-table))
  (set! *toptest-paths*      (make-hash-table))
  (set! *test-paths*         (make-hash-table))
  (set! *test-ids*           (make-hash-table))
  (set! *test-info*          (make-hash-table))
  (set! *run-info-cache*     (make-hash-table))
  (set! *env-vars-by-run-id* (make-hash-table))
  (set! *test-id-cache*      (make-hash-table)))

;; Generic string database
(define sdb:qry #f) ;; (make-sdb:qry)) ;;  'init #f)
;; Generic path database
(define *fdb* #f)

(define *last-launch* (current-seconds)) ;; use for throttling the launch rate. Would be better to use the db and last time of a test in LAUNCHED state.

;;======================================================================
;; V E R S I O N
;;======================================================================

(define (common:get-full-version)
  (conc megatest-version "-" megatest-fossil-hash))

;; from metadat lookup MEGATEST_VERSION
;;
(define (common:get-last-run-version) ;; RADT => How does this work in send-receive function??; assume it is the value saved in some DB
  (rmt:get-var "MEGATEST_VERSION"))

(define (common:get-last-run-version-number)
  (string->number 
   (substring (common:get-last-run-version) 0 6)))

(define (common:set-last-run-version)
  (rmt:set-var "MEGATEST_VERSION" (common:version-signature)))

;; postive number if megatest version > db version
;; negative number if megatest version < db version
(define (common:version-db-delta)
         (- megatest-version (common:get-last-run-version-number)))

(define (common:version-changed?)
  (not (equal? (common:get-last-run-version)
               (common:version-signature))))

(define (common:api-changed?)
  (not (equal? (substring (->string megatest-version) 0 4)
               (substring (conc (common:get-last-run-version)) 0 4))))

(define (common:get-sync-lock-filepath alldat)
  (let* ((tmp-area     (common:get-db-tmp-area alldat))
         (lockfile     (conc tmp-area "/megatest.db.sync-lock")))
    lockfile))
    
;; Move me elsewhere ...
;; RADT => Why do we meed the version check here, this is called only if version misma
;;
(define (common:cleanup-db dbstruct #!key (full #f))
  (apply db:multi-db-sync 
   dbstruct
   'schema
   ;; 'new2old
   'killservers
   'adj-target
   ;; 'old2new
   'new2old
   ;; (if full
       '(dejunk)
       ;; '())
       )
  (if (common:api-changed?)
      (common:set-last-run-version)))

(define (common:snapshot-file filepath #!key (subdir  ".") )
  (if (file-exists? filepath)
      (let* ((age-sec  (lambda (file)
                         (if (file-exists? file)
                             (- (current-seconds) (file-modification-time file))
                             1000000000))) ;; return really old value if file doesn't exist.  we want to clobber it if old or not exist.
             (ok-flag  #t)
             (age-mins (lambda (file) (/ (age-sec   file) 60)))
             (age-hrs  (lambda (file) (/ (age-mins file) 60)))
             (age-days (lambda (file) (/ (age-hrs   file) 24)))
             (age-wks  (lambda (file) (/ (age-days  file) 7)))
             (docmd    (lambda (cmd)
                         (cond
                          (ok-flag
                           (let ((res (system cmd)))
                             (cond
                              ((eq? 0 res)
                               #t)
                              (else
                               (set! ok-flag #f)
                               (debug:print 0 *default-log-port* "ERROR: ["(common:human-time)"] Command failed with exit code "
                                            (if (< res 0)
                                                res
                                                (/ res 8)) " ["cmd"]" )
                               #f))))
                          (else
                           (debug:print 0 *default-log-port* "ERROR: ["(common:human-time)"] Not runnining command due to prior error. ["cmd"]")
                           #f))))
             (copy     (lambda (src dest) (docmd (conc "/bin/cp '"src"' '"dest"'"))))
             (copy+zip (lambda (src dest) (docmd (conc "gzip -c - < '"src"' > '"dest"'"))))
             (fullpath (realpath filepath))
             (basedir  (pathname-directory fullpath))
             (basefile (pathname-strip-directory fullpath))
             ;;(prevfile (conc filepath ".prev.gz"))
             (minsfile (conc basedir "/" subdir "/" basefile ".mins.gz"))
             (hrsfile  (conc basedir "/" subdir "/" basefile ".hrs.gz"))
             (daysfile (conc basedir "/" subdir "/" basefile ".days.gz"))
             (wksfile  (conc basedir "/" subdir "/" basefile ".weeks.gz")))

        ;; create subdir it not exists
        (if (not (directory-exists? (conc basedir "/" subdir)))
            (docmd (conc "/bin/mkdir -p '"(conc basedir "/" subdir)"'")))
        
        ;; copy&zip <file> to <file>.mins if not exists
        (if (not (file-exists? minsfile))
            (copy+zip filepath minsfile))
        ;; copy <file>.mins to <file>.hrs if not exists
        (if (not (file-exists? hrsfile))
            (copy minsfile hrsfile))
        ;; copy <file>.hrs to <file>.days if not exists
        (if (not (file-exists? daysfile))
            (copy hrsfile daysfile))
        ;; copy <file>.days to <file>.weeks if not exists
        (if (not (file-exists? wksfile))
            (copy daysfile wksfile))


        ;; if age(<file>.mins.gz) >= 1h:
        ;;     copy <file>.mins.gz <file>.hrs.gz
        ;;     copy <prev file> <file>.mins.gz
        (when (>= (age-mins minsfile) 1)
          (copy minsfile hrsfile)
          (copy+zip filepath  minsfile))
        
        ;; if age(<file>.hrs.gz) >= 1d:
        ;;     copy <file>.hrs.gz <file>.days.gz
        ;;     copy <file>.mins.gz <file>.hrs.gz
        (when (>= (age-days hrsfile) 1)
          (copy hrsfile daysfile)
          (copy minsfile hrsfile))
        
        ;; if age(<file>.days.gz) >= 1w:
        ;;     copy <file>.days.gz <file>.weeks.gz
        ;;     copy <file>.hrs.gz <file>.days.gz
        (when (>= (age-wks daysfile) 1)
          (copy daysfile wksfile)
          (copy hrsfile daysfile))
        #t)
      #f))
  
        
        
;; Rotate logs, logic: 
;;                 if > 500k and older than 1 week:
;;                     remove previous compressed log and compress this log
;; WARNING: This proc operates assuming that it is in the directory above the
;;          logs directory you wish to log-rotate.
;;
(define (common:rotate-logs)
  (let* ((all-files (make-hash-table))
	 (stats     (make-hash-table))
	 (inc-stat  (lambda (key)
		      (hash-table-set! stats key (+ (hash-table-ref/default stats key 0) 1))))
	(max-allowed (string->number (or (configf:lookup *configdat* "setup" "max-logfiles") "300")))) ;; name -> age
    (if (not (directory-exists? "logs"))(create-directory "logs"))
    (directory-fold 
     (lambda (file rem)
       (handle-exceptions
	exn
	(begin
	  (debug:print-info 0 *default-log-port* "unable to rotate log " file ", probably handled by another process.")
	  (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))
	  (print-call-chain (current-error-port)))
	(let* ((fullname  (conc "logs/" file))
	       (mod-time  (file-modification-time fullname))
	       (file-age  (- (current-seconds) mod-time)))
	  (hash-table-set! all-files file mod-time)
	  (if (or (and (string-match "^.*.log" file)
		       (> (file-size fullname) 200000))
		  (and (string-match "^server-.*.log" file)
		       (> (- (current-seconds) (file-modification-time fullname))
			  (* 8 60 60))))
	      (let ((gzfile (conc fullname ".gz")))
		(if (common:file-exists? gzfile)
		    (begin
		      (debug:print-info 0 *default-log-port* "removing " gzfile)
		      (delete-file* gzfile)
		      (hash-table-delete!  all-files gzfile) ;; needed?
		      ))
		(debug:print-info 0 *default-log-port* "compressing " file)
		(system (conc "gzip " fullname))
		(inc-stat "gzipped")
		(hash-table-set! all-files (conc file ".gz") file-age)  ;; add the .gz file and remove the base file
		(hash-table-delete! all-files file)
		)
	      (if (> file-age (* (string->number (or (configf:lookup *configdat* "setup" "log-expire-days") "30")) 24 3600))
		  (handle-exceptions
		   exn
		   #f
		   (delete-file* fullname)
		   (inc-stat "deleted")
		   (hash-table-delete! all-files file)))))))
     '()
     "logs")
    (debug:print-info 0 *default-log-port* "Deleted log files: " (hash-table-ref/default stats "deleted" 0))
    (debug:print-info 0 *default-log-port* "Gzipped log files: " (hash-table-ref/default stats "gzipped" 0))    
    (let ((num-logs (hash-table-size all-files)))
      (if (> num-logs max-allowed) ;; because NFS => don't let number of logs exceed 300
	  (let ((files (take (sort (hash-table-keys all-files)
				   (lambda (a b)
				     (< (hash-table-ref all-files a)(hash-table-ref all-files b))))
			     (- num-logs max-allowed))))
	    (for-each
	     (lambda (file)
	       (delete-file* (conc "logs/" file)))
	     files)
	    (debug:print-info 0 *default-log-port* "Deleted " (length files) " from logs, keeping " max-allowed " files."))))))
  
;; Force a megatest cleanup-db if version is changed and skip-version-check not specified
;; Do NOT check if not on homehost!
;;
(define (common:exit-on-version-changed)
  (if (common:on-homehost?)
      (if (common:api-changed?)
	  (let* ((mtconf (conc (get-environment-variable "MT_RUN_AREA_HOME") "/megatest.config"))
                (dbfile (conc (get-environment-variable "MT_RUN_AREA_HOME") "/megatest.db"))
                (read-only (not (file-write-access? dbfile)))
                (dbstruct (db:setup #t)))
	    (debug:print 0 *default-log-port*
			 "WARNING: Version mismatch!\n"
			 "   expected: " (common:version-signature) "\n"
			 "   got:      " (common:get-last-run-version))
            (cond
             ((get-environment-variable "MT_SKIP_DB_MIGRATE") #t)
             ((and (common:file-exists? mtconf) (common:file-exists? dbfile) (not read-only)
                   (eq? (current-user-id)(file-owner mtconf))) ;; safe to run -cleanup-db
              (debug:print 0 *default-log-port* "   I see you are the owner of megatest.config, attempting to cleanup and reset to new version")
              (handle-exceptions
               exn
               (begin
                 (debug:print 0 *default-log-port* "Failed to switch versions.")
                 (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))
                 (print-call-chain (current-error-port))
                 (exit 1))
               (common:cleanup-db dbstruct)))
             ((not (common:file-exists? mtconf))
              (debug:print 0 *default-log-port* "   megatest.config does not exist in this area.  Cannot proceed with megatest version migration.")
              (exit 1))
             ((not (common:file-exists? dbfile))
              (debug:print 0 *default-log-port* "   megatest.db does not exist in this area.  Cannot proceed with megatest version migration.")
              (exit 1))
             ((not (eq? (current-user-id)(file-owner mtconf)))
              (debug:print 0 *default-log-port* "   You do not own megatest.db in this area.  Cannot proceed with megatest version migration.")
              (exit 1))
             (read-only
              (debug:print 0 *default-log-port* "   You have read-only access to this area.  Cannot proceed with megatest version migration.")
              (exit 1))
             (else
              (debug:print 0 *default-log-port* " to switch versions you can run: \"megatest -cleanup-db\"")
              (exit 1)))))))
;;      (begin
;;	(debug:print 0 *default-log-port* "ERROR: cannot migrate version unless on homehost. Exiting.")
;;	(exit 1))))

;;======================================================================
;; S P A R S E   A R R A Y S
;;======================================================================

(define (make-sparse-array)
  (let ((a (make-sparse-vector)))
    (sparse-vector-set! a 0 (make-sparse-vector))
    a))

(define (sparse-array? a)
  (and (sparse-vector? a)
       (sparse-vector? (sparse-vector-ref a 0))))

(define (sparse-array-ref a x y)
  (let ((row (sparse-vector-ref a x)))
    (if row
	(sparse-vector-ref row y)
	#f)))

(define (sparse-array-set! a x y val)
  (let ((row (sparse-vector-ref a x)))
    (if row
	(sparse-vector-set! row y val)
	(let ((new-row (make-sparse-vector)))
	  (sparse-vector-set! a x new-row)
	  (sparse-vector-set! new-row y val)))))

;;======================================================================
;; L O C K E R S   A N D   B L O C K E R S 
;;======================================================================

;; block further accesses to databases. Call this before shutting db down
(define (common:db-block-further-queries)
  (mutex-lock! *db-access-mutex*)
  (set! *db-access-allowed* #f)
  (mutex-unlock! *db-access-mutex*))

(define (common:db-access-allowed?)
  (let ((val (begin
	       (mutex-lock! *db-access-mutex*)
	       *db-access-allowed*
	       (mutex-unlock! *db-access-mutex*))))
    val))

;;======================================================================
;; U S E F U L   S T U F F
;;======================================================================

;; convert things to an alist or assoc list, #f gets converted to ""
;;
(define (common:to-alist dat)
  (cond
   ((list? dat)   (map common:to-alist dat))
   ((vector? dat)
    (map common:to-alist (vector->list dat)))
   ((pair? dat)
    (cons (common:to-alist (car dat))
	  (common:to-alist (cdr dat))))
   ((hash-table? dat)
    (map common:to-alist (hash-table->alist dat)))
   (else
    (if dat
	dat
	""))))

(define (common:alist-ref/default key alist default)
  (or (alist-ref key alist) default))

(define (common:get-megatest-exe)
  (or (getenv "MT_MEGATEST") "megatest"))

(define (common:read-encoded-string instr)
  (handle-exceptions
   exn
   (handle-exceptions
    exn
    (begin
      (debug:print-error 0 *default-log-port* "received bad encoded string \"" instr "\", message: " ((condition-property-accessor 'exn 'message) exn))
      (print-call-chain (current-error-port))
      #f)
    (read (open-input-string (base64:base64-decode instr))))
   (read (open-input-string (z3:decode-buffer (base64:base64-decode instr))))))

;;======================================================================
;; S T A T E S   A N D   S T A T U S E S
;;======================================================================

;; BBnote: *common:std-states* - dashboard filter control and test control state buttons defined here; used in set-fields-panel and dboard:make-controls
(define *common:std-states*   ;; for toggle buttons in dashboard
  '(
    (0 "ARCHIVED")
    (1 "STUCK")
    (2 "KILLREQ")
    (3 "KILLED")
    (4 "NOT_STARTED")
    (5 "COMPLETED")
    (6 "LAUNCHED")
    (7 "REMOTEHOSTSTART")
    (8 "RUNNING")
    ))

(define *common:dont-roll-up-states*
  '("DELETED"
    "REMOVING"
    "CLEANING"
    "ARCHIVE_REMOVING"
    ))

;; BBnote: *common:std-statuses* dashboard filter control and test control status buttons defined here; used in set-fields-panel and dboard:make-controls
;; note these statuses are sorted from better to worse.
;; This sort order is important to dcommon:status-compare3 and db:set-state-status-and-roll-up-items
(define *common:std-statuses*
  '(;; (0 "DELETED")  
    (1 "n/a")
    (2 "PASS")
    (3 "SKIP")
    (4 "WARN")
    (5 "WAIVED")
    (6 "CHECK")
    (7 "STUCK/DEAD")
    (8 "DEAD")
    (9 "FAIL")
    (10 "PREQ_FAIL")
    (11 "PREQ_DISCARDED")
    (12 "ABORT")))

(define *common:ended-states*       ;; states which indicate the test is stopped and will not proceed
  '("COMPLETED" "ARCHIVED" "KILLED" "KILLREQ" "STUCK" "INCOMPLETE" ))

(define *common:badly-ended-states* ;; these roll up as CHECK, i.e. results need to be checked
  '("KILLED" "KILLREQ" "STUCK" "INCOMPLETE" "DEAD"))

(define *common:well-ended-states* ;; an item's prereq in this state allows item to proceed
  '("PASS" "WARN" "CHECK" "WAIVED" "SKIP"))

;; BBnote: *common:running-states* used from db:set-state-status-and-roll-up-items
(define *common:running-states*     ;; test is either running or can be run
  '("RUNNING" "REMOTEHOSTSTART" "LAUNCHED" "STARTED"))

(define *common:cant-run-states*    ;; These are stopping conditions that prevent a test from being run
  '("COMPLETED" "KILLED" "UNKNOWN" "INCOMPLETE" "ARCHIVED"))

(define *common:not-started-ok-statuses* ;; if not one of these statuses when in not_started state treat as dead
  '("n/a" "na" "PASS" "FAIL" "WARN" "CHECK" "WAIVED" "DEAD" "SKIP"))

;; group tests into buckets corresponding to rollup
;;; Running, completed-pass,  completed-non-pass + worst status, not started.
;; filter out 
;(define (common:categorize-items-for-rollup in-tests)
;  (

(define (common:special-sort items order comp)
  (let ((items-order (map reverse order))
        (acomp       (or comp >)))
    (sort items
        (lambda (a b)
          (let ((a-num (cadr (or (assoc a items-order) '(0 0))))
                (b-num (cadr (or (assoc b items-order) '(0 0)))))
            (acomp a-num b-num))))))

;; ;; given a toplevel with currstate, currstatus apply state and status
;; ;;  => (newstate . newstatus)
;; (define (common:apply-state-status currstate currstatus state status)
;;   (let* ((cstate  (string->symbol (string-downcase currstate)))
;;          (cstatus (string->symbol (string-downcase currstatus)))
;;          (sstate  (string->symbol (string-downcase state)))
;;          (sstatus (string->symbol (string-downcase status)))
;;          (nstate  #f)
;;          (nstatus #f))
;;     (set! nstate
;;           (case cstate
;;             ((completed not_started killed killreq stuck archived) 
;;              (case sstate ;; completed -> sstate
;;                ((completed killed killreq stuck archived) completed)
;;                ((running remotehoststart launched)        running)
;;                (else                                      unknown-error-1)))
;;             ((running remotehoststart launched)
;;              (case sstate
;;                ((completed killed killreq stuck archived) #f) ;; need to look at all items
;;                ((running remotehoststart launched)        running)
;;                (else                                      unknown-error-2)))
;;             (else unknown-error-3)))
;;     (set! nstatus
;;           (case sstatus
;;             ((pass)
;;              (case nstate
;;                ((pass n/a deleted)     pass)
;;                ((warn)                 warn)
;;                ((fail)                 fail)
;;                ((check)               check)
;;                ((waived)             waived)
;;                ((skip)                 skip)
;;                ((stuck/dead)          stuck)
;;                ((abort)               abort)
;;                (else        unknown-error-4)))
;;             ((warn)
;;              (case nstate
;;                ((pass warn n/a skip deleted)   warn)
;;                ((fail)                         fail)
;;                ((check)                       check)
;;                ((waived)                     waived)
;;                ((stuck/dead)                  stuck)
;;                (else                unknown-error-5)))
;;             ((fail)
;;              (case nstate
;;                ((pass warn fail check n/a waived skip deleted stuck/dead stuck)  fail)
;;                ((abort)                                                         abort)
;;                (else                                                  unknown-error-6)))
;;             (else    unknown-error-7)))
;;     (cons 
;;      (if nstate  (symbol->string nstate)  nstate)
;;      (if nstatus (symbol->string nstatus) nstatus))))
               
;;======================================================================
;; D E B U G G I N G   S T U F F 
;;======================================================================

(define *verbosity*         1)
(define *logging*           #f)

(define (get-with-default val default)
  (let ((val (args:get-arg val)))
    (if val val default)))

(define (assoc/default key lst . default)
  (let ((res (assoc key lst)))
    (if res (cadr res)(if (null? default) #f (car default)))))


(define (common:get-area-path-signature)
  (message-digest-string (md5-primitive) *toppath*))

(define (common:get-signature str)
  (message-digest-string (md5-primitive) str))

;;======================================================================
;; E X I T   H A N D L I N G
;;======================================================================

(define (common:run-sync?)
    (and (common:on-homehost?)
	 (args:get-arg "-server")))

;;   (let ((ohh (common:on-homehost?))
;; 	(srv (args:get-arg "-server")))
;;     (and ohh srv)))
    ;; (debug:print-info 0 *default-log-port* "common:run-sync? ohh=" ohh ", srv=" srv)



(define *wdnum* 0)
(define *wdnum*mutex (make-mutex))


(define (common:human-time)
  (time->string (seconds->local-time (current-seconds)) "%Y-%m-%d %H:%M:%S"))


;; currently the primary job of the watchdog is to run the sync back to megatest.db from the db in /tmp
;; if we are on the homehost and we are a server (by definition we are on the homehost if we are a server)
;;
(define (common:readonly-watchdog dbstruct)
  (thread-sleep! 0.05) ;; delay for startup
  (debug:print-info 13 *default-log-port* "common:readonly-watchdog entered.")
  ;; sync megatest.db to /tmp/.../megatst.db
  (let* ((sync-cool-off-duration   3)
        (golden-mtdb     (dbr:dbstruct-mtdb dbstruct))
        (golden-mtpath   (db:dbdat-get-path golden-mtdb))
        (tmp-mtdb        (dbr:dbstruct-tmpdb dbstruct))
        (tmp-mtpath      (db:dbdat-get-path tmp-mtdb)))
    (debug:print-info 0 *default-log-port* "Read-only periodic sync thread started.")
    (let loop ((last-sync-time 0))
      (debug:print-info 13 *default-log-port* "loop top tmp-mtpath="tmp-mtpath" golden-mtpath="golden-mtpath)
      (let* ((duration-since-last-sync (- (current-seconds) last-sync-time)))
        (debug:print-info 13 *default-log-port* "duration-since-last-sync="duration-since-last-sync)
        (if (and (not *time-to-exit*)
                 (< duration-since-last-sync sync-cool-off-duration))
            (thread-sleep! (- sync-cool-off-duration duration-since-last-sync)))
        (if (not *time-to-exit*)
            (let ((golden-mtdb-mtime (file-modification-time golden-mtpath))
                  (tmp-mtdb-mtime    (file-modification-time tmp-mtpath)))
	      (if (> golden-mtdb-mtime tmp-mtdb-mtime)
		  (if (< golden-mtdb-mtime (- (current-seconds) 3)) ;; file has NOT been touched in past three seconds, this way multiple servers won't fight to sync back
		      (let ((res (db:multi-db-sync dbstruct 'old2new)))
			(debug:print-info 13 *default-log-port* "rosync called, " res " records transferred."))))
              (loop (current-seconds)))
            #t)))
    (debug:print-info 0 *default-log-port* "Exiting readonly-watchdog timer, *time-to-exit* = " *time-to-exit*" pid="(current-process-id)" mtpath="golden-mtpath)))


(define (std-exit-procedure)
  ;;(common:telemetry-log-close)
  (on-exit (lambda () 0))
  ;;(debug:print-info 13 *default-log-port* "std-exit-procedure called; *time-to-exit*="*time-to-exit*)
  (let ((no-hurry  (if *time-to-exit* ;; hurry up
		       #f
		       (begin
			 (set! *time-to-exit* #t)
			 #t))))
    (debug:print-info 4 *default-log-port* "starting exit process, finalizing databases.")
    (if (and no-hurry (debug:debug-mode 18))
	(rmt:print-db-stats))
    (let ((th1 (make-thread (lambda () ;; thread for cleaning up, give it five seconds
                              (if *dbstruct-db* (db:close-all *dbstruct-db*)) ;; one second allocated
			      (if *task-db*    
				  (let ((db (cdr *task-db*)))
				    (if (sqlite3:database? db)
					(begin
					  (sqlite3:interrupt! db)
					  (sqlite3:finalize! db #t)
					  ;; (vector-set! *task-db* 0 #f)
					  (set! *task-db* #f)))))
                              (http-client#close-all-connections!)
                              (if (not (eq? *default-log-port* (current-error-port)))
                                  (close-output-port *default-log-port*))
			      (set! *default-log-port* (current-error-port))) "Cleanup db exit thread"))
	  (th2 (make-thread (lambda ()
			      (debug:print 4 *default-log-port* "Attempting clean exit. Please be patient and wait a few seconds...")
			      (if no-hurry
                                  (begin
                                    (thread-sleep! 5)) ;; give the clean up few seconds to do it's stuff
                                  (begin
      				  (thread-sleep! 2)))
      			      (debug:print 4 *default-log-port* " ... done")
      			      )
			    "clean exit")))
      (thread-start! th1)
      (thread-start! th2)
      (thread-join! th1)
      )
    )

  0)

(define (std-signal-handler signum)
  ;; (signal-mask! signum)
  (set! *time-to-exit* #t) 
  ;;(debug:print-info 13 *default-log-port* "got signal "signum)
  (debug:print-error 0 *default-log-port* "Received signal " signum " aaa exiting promptly")
  ;; (std-exit-procedure) ;; shouldn't need this since we are exiting and it will be called anyway
  (exit))

(define (special-signal-handler signum)
  ;; (signal-mask! signum)
  (set! *time-to-exit* #t)
  ;;(debug:print-info 13 *default-log-port* "got signal "signum)
  (debug:print-error 0 *default-log-port* "Received signal " signum " sending email befor exiting!!")
  ;;TODO send email to notify admin contact listed in the config that the lisner got killed
  ;; (std-exit-procedure) ;; shouldn't need this since we are exiting and it will be called anyway
  (exit))


(set-signal-handler! signal/int  std-signal-handler)  ;; ^C
(set-signal-handler! signal/term std-signal-handler)

;; (set-signal-handler! signal/stop std-signal-handler)  ;; ^Z NO, do NOT handle ^Z!

;;======================================================================
;; M I S C   U T I L S
;;======================================================================

;; convert stuff to a number if possible
(define (any->number val)
  (cond 
   ((number? val) val)
   ((string? val) (string->number val))
   ((symbol? val) (any->number (symbol->string val)))
   (else #f)))

(define (any->number-if-possible val)
  (let ((num (any->number val)))
    (if num num val)))

(define (patt-list-match item patts)
  (debug:print-info 8 *default-log-port* "patt-list-match item=" item " patts=" patts)
  (if (and item patts)  ;; here we are filtering for matches with item patterns
      (let ((res #f))   ;; look through all the item-patts if defined, format is patt1,patt2,patt3 ... wildcard is %
	(for-each 
	 (lambda (patt)
	   (let ((modpatt (string-substitute "%" ".*" patt #t)))
	     (debug:print-info 10 *default-log-port* "patt " patt " modpatt " modpatt)
	     (if (string-match (regexp modpatt) item)
		 (set! res #t))))
	 (string-split patts ","))
	res)
      #t))

;; return first command that exists, else #f
;;
(define (common:which cmds)
  (if (null? cmds)
      #f
      (let loop ((hed (car cmds))
		 (tal (cdr cmds)))
	(let ((res (with-input-from-pipe (conc "which " hed) read-line)))
	  (if (and (string? res)
		   (common:file-exists? res))
	      res
	      (if (null? tal)
		  #f
		  (loop (car tal)(cdr tal))))))))
  
(define (common:get-install-area)
  (let ((exe-path (car (argv))))
    (if (common:file-exists? exe-path)
	(handle-exceptions
	 exn
	 #f
	 (pathname-directory
	  (pathname-directory 
	   (pathname-directory exe-path))))
	#f)))

;; return the youngest timestamp . filename
;;
(define (common:get-youngest glob-list)
  (let ((all-files (apply append
			  (map (lambda (patt)
				 (handle-exceptions
				     exn
				     '()
				   (glob patt)))
			       glob-list))))
    (fold (lambda (fname res)
	    (let ((last-mod (car res))
		  (curmod   (handle-exceptions
				exn
				0
			      (file-modification-time fname))))
	      (if (> curmod last-mod)
		  (list curmod fname)
		  res)))
	  '(0 "n/a")
	  all-files)))

;; use bash to expand a glob. Does NOT handle paths with spaces!
;;
(define (common:bash-glob instr)
  (string-split
   (with-input-from-pipe
       (conc "/bin/bash -c \"echo " instr "\"")
     read-line)))
  



(define (common:false-on-exception thunk #!key (message #f))
  (handle-exceptions exn
                     (begin
                       (if message
                           (debug:print-info 0 *default-log-port* message))
                       #f) (thunk) ))

(define (common:file-exists? path-string #!key (silent #f))
  ;; this avoids stack dumps in the case where 

  ;;;; TODO: catch permission denied exceptions and emit appropriate warnings, eg:  system error while trying to access file: "/nfs/pdx/disks/icf_env_disk001/bjbarcla/gwa/issues/mtdev/randy-slow/reproduce/q...
  (common:false-on-exception (lambda () (file-exists? path-string))
                             message: (if (not silent)
                                          (conc "Unable to access path: " path-string)
                                          #f)
                             ))

(define (common:directory-exists? path-string)
  ;;;; TODO: catch permission denied exceptions and emit appropriate warnings, eg:  system error while trying to access file: "/nfs/pdx/disks/icf_env_disk001/bjbarcla/gwa/issues/mtdev/randy-slow/reproduce/q...
  (common:false-on-exception (lambda () (directory-exists? path-string))
                             message: (conc "Unable to access path: " path-string)
                             ))

;; does the directory exist and do we have write access?
;;
;;    returns the directory or #f
;;
(define (common:directory-writable? path-string)
  (handle-exceptions
   exn
   #f
   (if (and (directory-exists? path-string)
            (file-write-access? path-string))
       path-string
       #f)))

(define (common:get-linktree)
  (or (getenv "MT_LINKTREE")
      (if *configdat*
	  (configf:lookup *configdat* "setup" "linktree")
	  (if *toppath*
	      (conc *toppath* "/lt")
	      #f))))

(define (common:args-get-runname)
  (let ((res (or (args:get-arg "-runname")
		 (args:get-arg ":runname")
		 (getenv "MT_RUNNAME"))))
    ;; (if res (set-environment-variable "MT_RUNNAME" res)) ;; not sure if this is a good idea. side effect and all ...
    res))

(define (common:args-get-target #!key (split #f)(exit-if-bad #f))
  (let* ((keys    (if (hash-table? *configdat*) (common:get-fields *configdat*) '()))
	 (numkeys (length keys))
	 (target  (or (args:get-arg "-reqtarg")
		      (args:get-arg "-target")
		      (getenv "MT_TARGET")))
	 (tlist   (if target (string-split target "/" #t) '()))
	 (valid   (if target
		      (or (null? keys) ;; probably don't know our keys yet
			  (and (not (null? tlist))
			       (eq? numkeys (length tlist))
			       (null? (filter string-null? tlist))))
		      #f)))
    (if valid
	(if split
	    tlist
	    target)
	(if target
	    (begin
	      (debug:print-error 0 *default-log-port* "Invalid target, spaces or blanks not allowed \"" target "\", target should be: " (string-intersperse keys "/") ", have " tlist " for elements")
	      (if exit-if-bad (exit 1))
	      #f)
	    #f))))

;; looking only (at least for now) at the MT_ variables craft the full testname
;;
(define (common:get-full-test-name)
  (if (getenv "MT_TEST_NAME")
      (if (and (getenv "MT_ITEMPATH")
               (not (equal? (getenv "MT_ITEMPATH") "")))
          (getenv "MT_TEST_NAME")
          (conc (getenv "MT_TEST_NAME") "/" (getenv "MT_ITEMPATH")))
      #f))

;; logic for getting homehost. Returns (host . at-home)
;; IF *toppath* is not set, wait up to five seconds trying every two seconds
;; (this is to accomodate the watchdog)
;;
(define (common:get-homehost #!key (trynum 5))
  ;; called often especially at start up. use mutex to eliminate collisions
  (mutex-lock! *homehost-mutex*)
  (cond
   (*home-host*
    (mutex-unlock! *homehost-mutex*)
    *home-host*)
   ((not *toppath*)
    (mutex-unlock! *homehost-mutex*)
    (launch:setup) ;; safely mutexed now
    (if (> trynum 0)
	(begin
	  (thread-sleep! 2)
	  (common:get-homehost trynum: (- trynum 1)))
	#f))
   (else
    (let* ((currhost (get-host-name))
	   (bestadrs (server:get-best-guess-address currhost))
	   ;; first look in config, then look in file .homehost, create it if not found
	   (homehost (or (configf:lookup *configdat* "server" "homehost" )
			 (handle-exceptions
			     exn
			     (if (> trynum 0)
				 (let ((delay-time (* (- 5 trynum) 5)))
				   (mutex-unlock! *homehost-mutex*)
				   (debug:print 0 *default-log-port* "ERROR: ["(common:human-time)"] Failed to read .homehost file, delaying " delay-time " seconds and trying again, message: "  ((condition-property-accessor 'exn 'message) exn))
				   (thread-sleep! delay-time)
				   (common:get-homehost trynum: (- trynum 1)))
				 (begin
				   (mutex-unlock! *homehost-mutex*)
				   (debug:print 0 *default-log-port* "ERROR: ["(common:human-time)"] Failed to read .homehost file after trying five times. Giving up and exiting, message: "  ((condition-property-accessor 'exn 'message) exn))
				   (exit 1)))
			   (let ((hhf (conc *toppath* "/.homehost")))
			     (if (common:file-exists? hhf)
				 (with-input-from-file hhf read-line)
				 (if (file-write-access? *toppath*)
				     (begin
				       (with-output-to-file hhf
					 (lambda ()
					   (print bestadrs)))
				       (begin
					 (mutex-unlock! *homehost-mutex*)
					 (car (common:get-homehost))))
				     #f))))))
	   (at-home  (or (equal? homehost currhost)
			 (equal? homehost bestadrs))))
      (set! *home-host* (cons homehost at-home))
      (mutex-unlock! *homehost-mutex*)
      *home-host*))))

;; am I on the homehost?
;;
(define (common:on-homehost?)
  (let ((hh (common:get-homehost)))
    (if hh
	(cdr hh)
	#f)))

;; do we honor the caches of the config files?
;;
(define (common:use-cache?)
  (let ((res #t)) ;; priority by order of evaluation
    (if *configdat* ;; sillyness here. can't use setup/use-cache to know if we can use the cached files!
	(if (equal? (configf:lookup *configdat* "setup" "use-cache") "no")
	    (set! res #f)
	    (if (equal? (configf:lookup *configdat* "setup" "use-cache") "yes")
		(set! res #t))))
    (if (args:get-arg "-no-cache")(set! res #f)) ;; overrides setting in "setup"
    (if (getenv "MT_USE_CACHE")
	(if (equal? (getenv "MT_USE_CACHE") "yes")
	    (set! res #t)
	    (if (equal? (getenv "MT_USE_CACHE") "no")
		(set! res #f))))    ;; overrides -no-cache switch
    res))
  
;; force use of server?
;;
(define (common:force-server?)
  (let* ((force-setting (configf:lookup *configdat* "server" "force"))
	 (force-type    (if force-setting (string->symbol force-setting) #f))
	 (force-result  (case force-type
			  ((#f)     #f)
			  ((always) #t)
			  ((test)   (if (args:get-arg "-execute") ;; we are in a test
					#t
					#f))
			  (else
			   (debug:print 0 *default-log-port* "ERROR: Bad server force setting " force-setting ", forcing server.")
			   #t)))) ;; default to requiring server
    (if force-result
	(begin
	  (debug:print-info 0 *default-log-port* "forcing use of server, force setting is \"" force-setting "\".")
	  #t)
	#f)))

;;======================================================================
;; M I S C   L I S T S
;;======================================================================

;; items in lista are matched value and position in listb
;; return the remaining items in listb or #f
;;
(define (common:list-is-sublist lista listb)
  (if (null? lista)
      listb ;; all items in listb are "remaining"
      (if (> (length lista)(length listb)) 
	  #f
	  (let loop ((heda (car lista))
		     (tala (cdr lista))
		     (hedb (car listb))
		     (talb (cdr listb)))
	    (if (equal? heda hedb)
		(if (null? tala) ;; we are done
		    talb
		    (loop (car tala)
			  (cdr tala)
			  (car talb)
			  
			  (cdr talb)))
		#f)))))

;; Needed for long lists to be sorted where (apply max ... ) dies
;;
(define (common:max inlst)
  (let loop ((max-val (car inlst))
	     (hed     (car inlst))
	     (tal     (cdr inlst)))
    (if (not (null? tal))
	(loop (max hed max-val)
	      (car tal)
	      (cdr tal))
	(max hed max-val))))

;; get min or max, use > for max and < for min, this works around the limits on apply
;;
(define (common:min-max comp lst)
  (if (null? lst)
      #f ;; better than an exception for my needs
      (fold (lambda (a b)
	      (if (comp a b) a b))
	    (car lst)
	    lst)))

;; get min or max, use > for max and < for min, this works around the limits on apply
;;
(define (common:sum lst)
  (if (null? lst)
      0
      (fold (lambda (a b)
	      (+ a b))
	    (car lst)
	    lst)))

;; path list to hash-table tree
;;   ((a b c)(a b d)(e b c)) => ((a (b (d) (c))) (e (b (c))))
;;
(define (common:list->htree lst)
  (let ((resh (make-hash-table)))
    (for-each
     (lambda (inlst)
       (let loop ((ht  resh)
		  (hed (car inlst))
		  (tal (cdr inlst)))
	 (if (hash-table-ref/default ht hed #f)
	     (if (not (null? tal))
		 (loop (hash-table-ref ht hed)
		       (car tal)
		       (cdr tal)))
	     (begin
	       (hash-table-set! ht hed (make-hash-table))
	       (loop ht hed tal)))))
     lst)
    resh))

;; hash-table tree to html list tree
;;
;;   tipfunc takes two parameters: y the tip value and path the path to that point
;;
(define (common:htree->html ht path tipfunc)
  (let ((datlist 	(sort (hash-table->alist ht)
                              (lambda (a b)
                                (string< (car a)(car b))))))
    (if (null? datlist)
    	(tipfunc #f path) ;; really shouldn't get here
	(s:ul
	 (map (lambda (x)
		(let* ((levelname (car x))
		       (y         (cdr x))
		       (newpath   (append path (list levelname)))
		       (leaf      (or (not (hash-table? y))
				      (null? (hash-table-keys y)))))
		  (if leaf
		      (s:li (tipfunc y newpath))
		      (s:li
		       (list 
			levelname
			(common:htree->html y newpath tipfunc))))))
	      datlist)))))

;; hash-table tree to alist tree
;;
(define (common:htree->atree ht)
  (map (lambda (x)
	 (cons (car x)
	       (let ((y (cdr x)))
		 (if (hash-table? y)
		     (common:htree->atree y)
		     y))))
       (hash-table->alist ht)))

;;======================================================================
;; M U N G E   D A T A   I N T O   N I C E   F O R M S
;;======================================================================

;; Generate an index for a sparse list of key values
;;   ( (rowname1 colname1 val1)(rowname2 colname2 val2) )
;;
;; => 
;;
;;   ( (rowname1 0)(rowname2 1))    ;; rownames -> num
;;     (colname1 0)(colname2 1)) )  ;; colnames -> num
;; 
;; optional apply proc to rownum colnum value
(define (common:sparse-list-generate-index data #!key (proc #f))
  (if (null? data)
      (list '() '())
      (let loop ((hed (car data))
		 (tal (cdr data))
		 (rownames '())
		 (colnames '())
		 (rownum   0)
		 (colnum   0))
	(let* ((rowkey          (car   hed))
	       (colkey          (cadr  hed))
	       (value           (caddr hed))
	       (existing-rowdat (assoc rowkey rownames))
	       (existing-coldat (assoc colkey colnames))
	       (curr-rownum     (if existing-rowdat rownum (+ rownum 1)))
	       (curr-colnum     (if existing-coldat colnum (+ colnum 1)))
	       (new-rownames    (if existing-rowdat rownames (cons (list rowkey curr-rownum) rownames)))
	       (new-colnames    (if existing-coldat colnames (cons (list colkey curr-colnum) colnames))))
	  ;; (debug:print-info 0 *default-log-port* "Processing record: " hed )
	  (if proc (proc curr-rownum curr-colnum rowkey colkey value))
	  (if (null? tal)
	      (list new-rownames new-colnames)
	      (loop (car tal)
		    (cdr tal)
		    new-rownames
		    new-colnames
		    (if (> curr-rownum rownum) curr-rownum rownum)
		    (if (> curr-colnum colnum) curr-colnum colnum)
		    ))))))

;; if it looks like a number -> convert it to a number, else return it
;;
(define (common:lazy-convert inval)
  (let* ((as-num (if (string? inval)(string->number inval) #f)))
    (or as-num inval)))

;; convert string a=1; b=2; c=a silly thing; d=
;; to '((a . 1)(b . 2)(c . "a silly thing")(d . ""))
;;
(define (common:val->alist val #!key (convert #f))
  (let ((val-list (string-split-fields ";\\s*" val #:infix)))
    (if val-list
	(map (lambda (x)
	       (let ((f (string-split-fields "\\s*=\\s*" x #:infix)))
		 (case (length f)
		   ((0) `(,#f))  ;; null string case
		   ((1) `(,(string->symbol (car f))))
		   ((2) `(,(string->symbol (car f)) . ,(let ((inval (cadr f)))
							 (if convert (common:lazy-convert inval) inval))))
		   (else f))))
	     val-list)
	'())))

;;======================================================================
;; S Y S T E M   S T U F F
;;======================================================================

;; return a nice clean pathname made absolute
(define (common:nice-path dir)
  (let ((match (string-match "^(~[^\\/]*)(\\/.*|)$" dir)))
    (if match ;; using ~ for home?
	(common:nice-path (conc (common:read-link-f (cadr match)) "/" (caddr match)))
	(normalize-pathname (if (absolute-pathname? dir)
				dir
				(conc (current-directory) "/" dir))))))

;; make "nice-path" available in config files and the repl
(define nice-path common:nice-path)

(define (common:read-link-f path)
  (handle-exceptions
      exn
      (begin
	(debug:print-error 0 *default-log-port* "command \"/bin/readlink -f " path "\" failed.")
	path) ;; just give up
    (with-input-from-pipe
	(conc "/bin/readlink -f " path)
      (lambda ()
	(read-line)))))

(define (get-cpu-load #!key (remote-host #f))
  (car (common:get-cpu-load remote-host)))
;;   (let* ((load-res (process:cmd-run->list "uptime"))
;; 	 (load-rx  (regexp "load average:\\s+(\\d+)"))
;; 	 (cpu-load #f))
;;     (for-each (lambda (l)
;; 		(let ((match (string-search load-rx l)))
;; 		  (if match
;; 		      (let ((newval (string->number (cadr match))))
;; 			(if (number? newval)
;; 			    (set! cpu-load newval))))))
;; 	      (car load-res))
;;     cpu-load))

;; get values from cached info from dropping file in logs dir
;;  e.g. key is host and dtype is normalized-load
;;
(define (common:get-cached-info key dtype #!key (age 5))
  (let* ((fullpath (conc *toppath* "/logs/" key "-" dtype ".log")))
    (if (and (file-exists? fullpath)
	     (file-read-access? fullpath))
	(handle-exceptions
	 exn
	 #f
	 (debug:print 2 *default-log-port* "reading file " fullpath)
	 (let ((real-age (- (current-seconds)(file-change-time fullpath)))) 
	   (if (< real-age age)
	       (with-input-from-file fullpath read)
	       (begin
		 (debug:print 2 *default-log-port* "file " fullpath " is too old (" real-age" seconds)to trust, skipping reading it")
		 #f))))
	(begin
	  (debug:print 2 *default-log-port* "not reading file " fullpath)
	  #f))))
 
(define (common:write-cached-info key dtype dat)
  (let* ((fullpath (conc *toppath* "/logs/" key "-" dtype ".log")))
    (handle-exceptions
     exn
     #f
     (with-output-to-file fullpath (lambda ()(pp dat))))))

;; get cpu load by reading from /proc/loadavg, return all three values
;;
(define (common:get-cpu-load remote-host)
  (handle-exceptions
   exn
   '(99 99 99)
   (let* ((actual-hostname (or remote-host (get-host-name) "localhost")))
     (or (common:get-cached-info actual-hostname "cpu-load")
	 (let ((result (if remote-host
			   (map (lambda (res)
				  (if (eof-object? res) 9e99 res))
			        (with-input-from-pipe 
				 (conc "ssh " remote-host " cat /proc/loadavg")
				 (lambda ()(list (read)(read)(read)))))
			   (with-input-from-file "/proc/loadavg" 
			     (lambda ()(list (read)(read)(read)))))))
	   (common:write-cached-info actual-hostname "cpu-load" result)
	                     result)))))

;; get normalized cpu load by reading from /proc/loadavg and /proc/cpuinfo return all three values and the number of real cpus and the number of threads
;; returns alist '((adj-cpu-load . normalized-proc-load) ... etc.
;;  keys: adj-proc-load, adj-core-load, 1m-load, 5m-load, 15m-load
;;
(define (common:get-normalized-cpu-load remote-host)
  (let ((res (common:get-normalized-cpu-load-raw remote-host))
	(default `((adj-proc-load . 2) ;; there is no right answer
		   (adj-core-load . 2)
		   (1m-load       . 2)
		   (5m-load       . 0) ;; causes a large delta - thus causing default of throttling if stuff goes wrong
		   (15m-load      . 0)
		   (proc          . 1)
		   (core          . 1)
		   (phys          . 1)
		   (error         . #t))))
    (cond
     ((and (list? res)
	   (> (length res) 2))
      res)
     ((eq? res #f)   default) ;; add messages?
     ((eq? res #f) default)   ;; this would be the #eof
     (else default))))

(define (common:get-normalized-cpu-load-raw remote-host)
  (let* ((actual-host (or remote-host (get-host-name)))) ;; #f is localhost
    (or (common:get-cached-info actual-host "normalized-load")
	(let ((data (if remote-host
			(with-input-from-pipe 
			    (conc "ssh " remote-host " cat /proc/loadavg;cat /proc/cpuinfo;echo end")
			  read-lines)
			(append 
			 (with-input-from-file "/proc/loadavg" 
			   read-lines)
			 (with-input-from-file "/proc/cpuinfo"
			   read-lines)
			 (list "end"))))
	      (load-rx  (regexp "^([\\d\\.]+)\\s+([\\d\\.]+)\\s+([\\d\\.]+)\\s+.*$"))
	      (proc-rx  (regexp "^processor\\s+:\\s+(\\d+)\\s*$"))
	      (core-rx  (regexp "^core id\\s+:\\s+(\\d+)\\s*$"))
	      (phys-rx  (regexp "^physical id\\s+:\\s+(\\d+)\\s*$"))
	      (max-num  (lambda (p n)(max (string->number p) n))))
	  ;; (print "data=" data)
	  (if (null? data) ;; something went wrong
	      #f
	      (let loop ((hed      (car data))
			 (tal      (cdr data))
			 (loads    #f)
			 (proc-num 0)  ;; processor includes threads
			 (phys-num 0)  ;; physical chip on motherboard
			 (core-num 0)) ;; core
		;; (print hed ", " loads ", " proc-num ", " phys-num ", " core-num)
		(if (null? tal) ;; have all our data, calculate normalized load and return result
		    (let* ((act-proc (+ proc-num 1))
			   (act-phys (+ phys-num 1))
			   (act-core (+ core-num 1))
			   (adj-proc-load (/ (car loads) act-proc))
			   (adj-core-load (/ (car loads) act-core))
			   (result
			    (append (list (cons 'adj-proc-load adj-proc-load)
					  (cons 'adj-core-load adj-core-load))
				    (list (cons '1m-load (car loads))
					  (cons '5m-load (cadr loads))
					  (cons '15m-load (caddr loads)))
				    (list (cons 'proc act-proc)
					  (cons 'core act-core)
					  (cons 'phys act-phys)))))
		      (common:write-cached-info actual-host "normalized-load" result)
		      result)
		  (regex-case
		   hed
		   (load-rx  ( x l1 l5 l15 ) (loop (car tal)(cdr tal)(map string->number (list l1 l5 l15)) proc-num phys-num core-num))
		   (proc-rx  ( x p         ) (loop (car tal)(cdr tal) loads           (max-num p proc-num) phys-num core-num))
		   (phys-rx  ( x p         ) (loop (car tal)(cdr tal) loads           proc-num (max-num p phys-num) core-num))
		   (core-rx  ( x c         ) (loop (car tal)(cdr tal) loads           proc-num phys-num (max-num c core-num)))
		   (else 
		    (begin
		      ;; (print "NO MATCH: " hed)
		      (loop (car tal)(cdr tal) loads proc-num phys-num core-num)))))))))))

(define (common:unix-ping hostname)
  (let ((res (system (conc "ping -c 1 " hostname " > /dev/null"))))
    (eq? res 0)))


(define (common:wait-for-cpuload maxload-in numcpus-in waitdelay #!key (count 1000) (msg #f)(remote-host #f)(force-maxload #f))
  (let* ((loadavg (common:get-cpu-load remote-host))
	 (numcpus (if (<= 1 numcpus-in) ;; not possible to have zero.  If we get 1, it's possible that we got the previous default, and we should check again
		      (common:get-num-cpus remote-host)
		      numcpus-in))
	 (maxload (if force-maxload
		      maxload-in
		      (max maxload-in 0.5))) ;; so maxload must be greater than 0.5 for now BUG - FIXME?
	 (first   (car loadavg))
	 (next    (cadr loadavg))
	 (adjload (* maxload (max 1 numcpus))) ;; possible bug where numcpus (or could be maxload) is zero, crude fallback is to at least use 1
	 (loadjmp (- first next))
         (adjwait (min (+ 300 (random 10)) (abs (* (+ (random 10)(/ (- 1000 count) 10) waitdelay) (- first adjload) ))  ))) ;; add some randomness to the time to break any alignment where netbatch dumps many jobs to machines simultaneously
    (debug:print-info 1 *default-log-port* "Checking cpuload on " (or remote-host "localhost") ", maxload: " maxload
		      ", load: " first ", adjload: " adjload ", loadjmp: " loadjmp)
    (cond
     ((and (> first adjload)
	   (> count 0))
      (debug:print-info 0 *default-log-port* "server start delayed " adjwait " seconds due to load " first " exceeding max of " adjload " on server " (or remote-host (get-host-name)) " (normalized load-limit: " maxload ") " (if msg msg ""))
      (thread-sleep! adjwait)
      (common:wait-for-cpuload maxload numcpus waitdelay count: (- count 1) msg: msg remote-host: remote-host))
     ((and (> loadjmp numcpus)
	   (> count 0))
      (debug:print-info 0 *default-log-port* "waiting " adjwait " seconds due to load jump " loadjmp " > numcpus " numcpus (if msg msg ""))
      (thread-sleep! adjwait)
      (common:wait-for-cpuload maxload numcpus waitdelay count: (- count 1) msg: msg remote-host: remote-host)))))

(define (common:wait-for-homehost-load maxload msg)
  (let* ((hh-dat (if (common:on-homehost?) ;; if we are on the homehost then pass in #f so the calls are local.
                     #f
                     (common:get-homehost)))
         (hh     (if hh-dat (car hh-dat) #f))
         (numcpus (common:get-num-cpus hh)))
    (common:wait-for-normalized-load maxload msg hh)))

(define (common:get-num-cpus remote-host)
  (let* ((actual-host (or remote-host (get-host-name))))
    (or (common:get-cached-info actual-host "num-cpus" age: 86400) ;; hosts had better not be changing the number of cpus too often!
	(let* ((proc   (lambda ()
			 (let loop ((numcpu 0)
				    (inl    (read-line)))
			   (if (eof-object? inl)
			       (begin
				 (common:write-cached-info remote-host "num-cpus" numcpu)
				 numcpu)
			       (loop (if (string-match "^processor\\s+:\\s+\\d+$" inl)
					 (+ numcpu 1)
					 numcpu)
				     (read-line))))))
	       (result (if remote-host
			   (with-input-from-pipe 
			       (conc "ssh " remote-host " cat /proc/cpuinfo")
			     proc)
			   (with-input-from-file "/proc/cpuinfo" proc))))
	  (common:write-cached-info actual-host "num-cpus" result)
	  result))))

;; wait for normalized cpu load to drop below maxload
;;
(define (common:wait-for-normalized-load maxload msg remote-host)
  (let ((num-cpus (common:get-num-cpus remote-host)))
    (common:wait-for-cpuload maxload num-cpus 15 msg: msg remote-host: remote-host)))

(define (get-uname . params)
  (let* ((uname-res (process:cmd-run->list (conc "uname " (if (null? params) "-a" (car params)))))
	 (uname #f))
    (if (null? (car uname-res))
	"unknown"
	(caar uname-res))))

;; for reasons I don't understand multiple calls to real-path in parallel threads
;; must be protected by mutexes
;;
(define (common:real-path inpath)
  ;; (process:cmd-run-with-stderr->list "readlink" "-f" inpath)) ;; cmd . params)
  ;; (let-values 
  ;;  (((inp oup pid) (process "readlink" (list "-f" inpath))))
  ;;  (with-input-from-port inp
  ;;    (let loop ((inl (read-line))
  ;;       	(res #f))
  ;;      (print "inl=" inl)
  ;;      (if (eof-object? inl)
  ;;          (begin
  ;;            (close-input-port inp)
  ;;            (close-output-port oup)
  ;;            ;; (process-wait pid)
  ;;            res)
  ;;          (loop (read-line) inl))))))
  (with-input-from-pipe (conc "readlink -f " inpath) read-line))

;;======================================================================
;; D I S K   S P A C E 
;;======================================================================

(define (common:get-disk-space-used fpath)
  (with-input-from-pipe (conc "/usr/bin/du -s " fpath) read))

;; given path get free space, allows override in [setup]
;; with free-space-script /path/to/some/script.sh
;;
(define (get-df path)
  (if (configf:lookup *configdat* "setup" "free-space-script")
      (with-input-from-pipe 
       (conc (configf:lookup *configdat* "setup" "free-space-script") " " path)
       (lambda ()
	 (let ((res (read-line)))
	   (if (string? res)
	       (string->number res)))))
      (get-unix-df path)))

(define (get-free-inodes path)
  (if (configf:lookup *configdat* "setup" "free-inodes-script")
      (with-input-from-pipe 
	  (conc (configf:lookup *configdat* "setup" "free-inodes-script") " " path)
	(lambda ()
	  (let ((res (read-line)))
	    (if (string? res)
		(string->number res)))))
      (get-unix-inodes path)))

(define (get-unix-df path)
  (let* ((df-results (process:cmd-run->list (conc "df " path)))
	 (space-rx   (regexp "([0-9]+)\\s+([0-9]+)%"))
	 (freespc    #f))
    ;; (write df-results)
    (for-each (lambda (l)
		(let ((match (string-search space-rx l)))
		  (if match 
		      (let ((newval (string->number (cadr match))))
			(if (number? newval)
			    (set! freespc newval))))))
	      (car df-results))
    freespc))

(define (get-unix-inodes path)
  (let* ((df-results (process:cmd-run->list (conc "df -i " path)))
	 (space-rx   (regexp "([0-9]+)\\s+([0-9]+)%"))
	 (freenodes    0)) ;; 0 is a better failsafe than #f here.
    ;; (write df-results)
    (for-each (lambda (l)
		(let ((match (string-search space-rx l)))
		  (if match 
		      (let ((newval (string->number (cadr match))))
			(if (number? newval)
			    (set! freenodes newval))))))
	      (car df-results))
    freenodes))

(define (common:check-space-in-dir dirpath required)
  (let* ((dbspace  (if (directory? dirpath)
		       (get-df dirpath)
		       0)))
    (list (> dbspace required)
	  dbspace
	  required
	  dirpath)))

;; check space in dbdir and in megatest dir
;; returns: ok/not dbspace required-space
;;
(define (common:check-db-dir-space alldat)
  (let* ((required (string->number 
		    (or (and (alldat-mtconfig alldat)
			     (configf:lookup (alldat-mtconfig alldat) "setup" "dbdir-space-required"))
			"100000")))
	 (dbdir    (common:get-db-tmp-area alldat)) ;; (db:get-dbdir))
	 (tdbspace (common:check-space-in-dir dbdir required))
	 (mdbspace (common:check-space-in-dir (alldat-areapath alldat) required)))
    (sort (list tdbspace mdbspace) (lambda (a b)
				     (< (cadr a)(cadr b))))))
    
;; check available space in dbdir, exit if insufficient
;;
(define (common:check-db-dir-and-exit-if-insufficient)
  (let* ((spacedat (car (common:check-db-dir-space *alldat*))) ;; look only at worst for now
	 (is-ok    (car spacedat))
	 (dbspace  (cadr spacedat))
	 (required (caddr spacedat))
	 (dbdir    (cadddr spacedat)))
    (if (not is-ok)
	(begin
	  (debug:print-error 0 *default-log-port* "Insufficient space in " dbdir ", require " required ", have " dbspace  ", exiting now.")
	  (exit 1)))))
  
;; paths is list of lists ((name path) ... )
;;
(define (common:get-disk-with-most-free-space disks minsize)
  (let ((best     #f)
	(bestsize 0)
        (min-inodes (or (string->number (if (configf:lookup *configdat* "setup" "min_inodes") (configf:lookup *configdat* "setup" "min_inodes") "0")) 0)))
    (for-each 
     (lambda (disk-num)
       (let* ((dirpath    (cadr (assoc disk-num disks)))
	      (freespc    (cond
			   ((not (directory? dirpath))
			    (if (common:low-noise-print 300 "disks not a dir " disk-num)
				(debug:print 0 *default-log-port* "WARNING: disk " disk-num " at path \"" dirpath "\" is not a directory - ignoring it."))
			    -1)
			   ((not (file-write-access? dirpath))
			    (if (common:low-noise-print 300 "disks not writeable " disk-num)
				(debug:print 0 *default-log-port* "WARNING: disk " disk-num " at path \"" dirpath "\" is not writeable - ignoring it."))
			    -1)
			   ((not (eq? (string-ref dirpath 0) #\/))
			    (if (common:low-noise-print 300 "disks not a proper path " disk-num)
				(debug:print 0 *default-log-port* "WARNING: disk " disk-num " at path \"" dirpath "\" is not a fully qualified path - ignoring it."))
			    -1)
			   (else
			    (get-df dirpath))))
	      (free-inodes (cond
			   ((not (directory? dirpath))
			    (if (common:low-noise-print 300 "disks not a dir " disk-num)
				(debug:print 0 *default-log-port* "WARNING: disk " disk-num " at path \"" dirpath "\" is not a directory - ignoring it."))
			    -1)
			   ((not (file-write-access? dirpath))
			    (if (common:low-noise-print 300 "disks not writeable " disk-num)
				(debug:print 0 *default-log-port* "WARNING: disk " disk-num " at path \"" dirpath "\" is not writeable - ignoring it."))
			    -1)
			   ((not (eq? (string-ref dirpath 0) #\/))
			    (if (common:low-noise-print 300 "disks not a proper path " disk-num)
				(debug:print 0 *default-log-port* "WARNING: disk " disk-num " at path \"" dirpath "\" is not a fully qualified path - ignoring it."))
			    -1)
			   (else
			    (get-free-inodes dirpath))))
             ;;(free-inodes (get-free-inodes dirpath))
             )
	 (if (and (> freespc bestsize)(> free-inodes min-inodes ))
	     (begin
	       (set! best     (cons disk-num dirpath))
	       (set! bestsize freespc)))
        ;;(print "Processing: " disk-num " bestsize: " bestsize " best: " best " freespc: " freespc " min-inodes: " min-inodes " free-inodes: " free-inodes)
      ))
     (map car disks))
    (if (and best (> bestsize minsize))
	best
	#f))) ;; #f means no disk candidate found

;; convert a spec string to a list of vectors #( rx  action rx-string )
(define (common:spec-string->list-of-specs spec-string actions)
  (let ((spec-strings  (string-split-fields "\\s*;\\s*" spec-string #:infix))
	(actions-regex (regexp (conc "^(.*)\\s+(" (string-intersperse (map conc actions) "|") ")"))))
    (filter
     (lambda (x) x)
     (map (lambda (s)
	    (let ((m (string-match actions-regex s)))
	      (if m
		  (vector (regexp (cadr m))(string->symbol (caddr m))(cadr m))
		  (begin
		    (debug:print 0 *default-log-port* "WARNING: Unrecognised rule \"" s "\" in clean-up specification.")
		    #f))))
	  spec-strings))))
  
;; given a list of specs rx . rule and a file return the first matching rule
;; 
(define (common:file-find-rule fname rules) ;; rule is vector #( rx action rx-string)
  (let loop ((rule   (car rules))
	     (tail   (cdr rules)))
    (let ((rx (vector-ref rule 0))
	  (rn (vector-ref rule 1))) ;; rule name
      (if (string-match rx fname)
	  rule ;; return the whole rule so regex can be printed etc.
	  (if (null? tail)
	      #f
	      (loop (car tail)(cdr tail)))))))

;; given a spec apply some rules to a directory
;;
;;  WARNING: This function will REMOVE files - be sure your spec and path is correct!
;;
;; spec format:
;;   file-regex1 action; file-regex2 action; ...
;; e.g.
;;   .*\.log$ keep; .* remove
;;   --> keep all .log files, remove everything else
;; limitations:
;;   cannot have a rule with ; as part of the spec
;;   not very flexible, would be nice to return binned file names?
;; supported rules:
;;   keep     - keep this file
;;   remove   - remove this file
;;   compress - compress this file
;;
(define (common:dir-clean-up path spec-string #!key (compress "gzip")(actions '(keep remove compress))(remove-empty #f))
  (let* ((specs         (common:spec-string->list-of-specs spec-string actions))
	 (keepers       (make-hash-table))
	 (directories   (make-hash-table)))
    (find-files 
     path
     action: (lambda (p res)
	       (let ((rule  (common:file-find-rule p specs)))
		 (cond
		  ((directory? p)(hash-table-set! directories p #t))
		  (else
		   (case (vector-ref rule 1)
		     ((keep)(hash-table-set! keepers p rule))
		     ((remove)
		      (print "Removing file " p)
		      (delete-file p))
		     ((compress)
		      (print "Compressing file " p)
		      (system (conc compress " " p)))
		     (else
		      (print "No match for file " p))))))))
    (if remove-empty
	(for-each
	 (lambda (d)
	   (if (null? (glob (conc d "/.*")(conc d "/*")))
	       (begin
		 (print "Removing empty directory " d)
		 (delete-directory d))))
	 (sort (hash-table-keys directories) (lambda (a b)(> (string-length a)(string-length b))))))
    ))

;;======================================================================
;; E N V I R O N M E N T   V A R S
;;======================================================================
(define (bb-check-path #!key (msg "check-path: "))
  (let ((path (or (get-environment-variable "PATH") "none")))
    (debug:print-info 0 *default-log-port* (conc msg" : $PATH="path))
    (if (string-match "^.*/isoenv-core/.*" path)
        (debug:print-error 0 *default-log-port* (conc msg" : !!ISOENV PRESENT!!")) ;; remove for prod
        (debug:print-info 1 *default-log-port* (conc msg" : **no isoenv present**")))))

	      
(define (save-environment-as-files fname #!key (ignorevars (list "USER" "HOME" "DISPLAY" "LS_COLORS" "XKEYSYMDB" "EDITOR" "MAKEFLAGS" "MAKEF" "MAKEOVERRIDES")))
  ;;(bb-check-path msg: "save-environment-as-files entry")
  (let ((envvars (get-environment-variables))
        (whitesp (regexp "[^a-zA-Z0-9_\\-:,\\.\\/%$]"))
	(mungeval (lambda (val)
		    (cond
		     ((eq? val #t) "") ;; convert #t to empty string
		     ((eq? val #f) #f) ;; convert #f to itself (still thinking about this one
		     (else val)))))
    (with-output-to-file (conc fname ".csh")
       (lambda ()
          (for-each (lambda (keyval)
		      (let* ((key   (car keyval))
			     (val   (cdr keyval))
			     (delim (if (string-search whitesp val) 
					"\""
					"")))
			(print (if (or (member key ignorevars)
				       (string-search whitesp key))
				   "# setenv "
				   "setenv ")
			       key " " delim (mungeval val) delim)))
		    envvars)))
     (with-output-to-file (conc fname ".sh")
       (lambda ()
          (for-each (lambda (keyval)
		      (let* ((key (car keyval))
			     (val (cdr keyval))
			     (delim (if (string-search whitesp val) 
					"\""
					"")))
			(print (if (or (member key ignorevars)
				       (string-search whitesp key)
				       (string-search ":" key)) ;; internal only values to be skipped.
				   "# export "
				   "export ")
			       key "=" delim (mungeval val) delim)))
                    envvars)))))


(define (common:get-param-mapping #!key (flavor #f))
  "returns alist mapping string keys in testconfig/subrun to megatest command line switches; if flavor is switch-symbol, maps tcmt symbolic switches to megatest switches"
  (let ((default '(("tag-expr"  . "-tagexpr")
                   ("mode-patt" . "-modepatt")
                   ("run-name"  . "-runname")
                   ("contour"   . "-contour")
                   ("target"    . "-target")
                   ("test-patt" . "-testpatt")
                   ("msg"       . "-m")
                   ("log"       . "-log")
                   ("start-dir" . "-start-dir")
                   ("new"       . "-set-state-status"))))
    (if (eq? flavor 'switch-symbol)
        (map (lambda (x)
               (cons (string->symbol (conc "-" (car x))) (cdr x)))
             default)
        default)))

;; set some env vars from an alist, return an alist with original values
;; (("VAR" "value") ...)
;; a value of #f means "unset this var"
;;
(define (alist->env-vars lst)
  (if (list? lst)
      (let ((res '()))
	(for-each (lambda (p)
		    (let* ((var (car  p))
			   (val (cadr p))
			   (prv (get-environment-variable var)))
		      (set! res (cons (list var prv) res))
		      (if val 
			  (safe-setenv var (->string val))
			  (unsetenv var))))
		  lst)
	res)
      '()))


;; clear vars matching pattern, run proc, set vars back
;; if proc is a string run that string as a command with
;; system.
;;
(define *common:orig-env*
  (let ((envvars (get-environment-variables)))
    (if (get-environment-variable "MT_ORIG_ENV")
        (with-input-from-string
            (z3:decode-buffer (base64:base64-decode (get-environment-variable "MT_ORIG_ENV")))
          read)
        (filter-map (lambda (x)
                      (if (string-match "^MT_.*" (car x))
                          #f
                          x))
                    envvars))))

(define (common:with-orig-env proc)
  (let  ((current-env (get-environment-variables)))
    (for-each (lambda (x) (unsetenv (car x)))             current-env)
    (for-each (lambda (x) (setenv (car x) (cdr x))) *common:orig-env*)
    (let ((rv (cond
               ((string? proc)(system proc))
               (proc          (proc)))))
      (for-each (lambda (x) (unsetenv (car x))) *common:orig-env*)
      (for-each (lambda (x) (setenv (car x) (cdr x))) current-env)
      rv)))

(define (common:without-vars proc . var-patts)
  (let ((vars (make-hash-table)))
    (for-each
     (lambda (vardat) ;; each env var
       (for-each
	(lambda (var-patt)
	  (if (string-match var-patt (car vardat))
	      (let ((var (car vardat))
		    (val (cdr vardat)))
		(hash-table-set! vars var val)
		(unsetenv var))))
	var-patts))
     (get-environment-variables))
    (cond
     ((string? proc)(system proc))
     (proc          (proc)))
    (hash-table-for-each
     vars
     (lambda (var val)
       (setenv var val)))
    vars))

;;======================================================================
;;
;;======================================================================

(define (common:in-running-test?)
  (and (args:get-arg "-execute") (get-environment-variable "MT_CMDINFO")))

(define (common:get-color-from-status status)
  (cond
   ((equal? status "PASS")    "green")
   ((equal? status "FAIL")    "red")
   ((equal? status "WARN")    "orange")
   ((equal? status "KILLED")  "orange")
   ((equal? status "KILLREQ") "purple")
   ((equal? status "RUNNING") "blue")
   ((equal? status "ABORT")   "brown")
   (else "black")))

;; ;;======================================================================
;; ;; N A N O M S G   C L I E N T
;; ;;======================================================================
;; 
;; 
;; 
;; (define (common:send-dboard-main-changed)
;;   (let* ((dashboard-ips (mddb:get-dashboards)))
;;     (for-each
;;      (lambda (ipadr)
;;        (let* ((soc (common:open-nm-req (conc "tcp://" ipadr)))
;; 	      (msg (conc "main " *toppath*))
;; 	      (res (common:nm-send-receive-timeout soc msg)))
;; 	 (if (not res) ;; couldn't reach that dashboard - remove it from db
;; 	     (print "ERROR: couldn't reach dashboard " ipadr))
;; 	 res))
;;      dashboard-ips)))
;;     
;;     
;; ;;======================================================================
;; ;; D A S H B O A R D   D B 
;; ;;======================================================================
;; 
;; (define (mddb:open-db)
;;   (let* ((db (open-database (conc (get-environment-variable "HOME") "/.dashboard.db"))))
;;     (set-busy-handler! db (busy-timeout 10000))
;;     (for-each
;;      (lambda (qry)
;;        (exec (sql db qry)))
;;      (list 
;;       "CREATE TABLE IF NOT EXISTS vars       (id INTEGER PRIMARY KEY,key TEXT, val TEXT, CONSTRAINT varsconstraint UNIQUE (key));"
;;       "CREATE TABLE IF NOT EXISTS dashboards (
;;           id         INTEGER PRIMARY KEY,
;;           pid        INTEGER,
;;           username   TEXT,
;;           hostname   TEXT,
;;           ipaddr     TEXT,
;;           portnum    INTEGER,
;;           start_time TIMESTAMP DEFAULT (strftime('%s','now')),
;;              CONSTRAINT hostport UNIQUE (hostname,portnum)
;;         );"
;;       ))
;;     db))
;; 
;; ;; register a dashboard 
;; ;;
;; (define (mddb:register-dashboard port)
;;   (let* ((pid      (current-process-id))
;; 	 (hostname (get-host-name))
;; 	 (ipaddr   (server:get-best-guess-address hostname))
;; 	 (username (current-user-name)) ;; (car userinfo)))
;; 	 (db      (mddb:open-db)))
;;     (print "Register monitor, pid: " pid ", hostname: " hostname ", port: " port ", username: " username)
;;     (exec (sql db "INSERT OR REPLACE INTO dashboards (pid,username,hostname,ipaddr,portnum) VALUES (?,?,?,?,?);")
;; 	   pid username hostname ipaddr port)
;;     (close-database db)))
;; 
;; ;; unregister a monitor
;; ;;
;; (define (mddb:unregister-dashboard host port)
;;   (let* ((db      (mddb:open-db)))
;;     (print "Register unregister monitor, host:port=" host ":" port)
;;     (exec (sql db "DELETE FROM dashboards WHERE hostname=? AND portnum=?;") host port)
;;     (close-database db)))
;; 
;; ;; get registered dashboards
;; ;;
;; (define (mddb:get-dashboards)
;;   (let ((db (mddb:open-db)))
;;     (query fetch-column
;; 	   (sql db "SELECT ipaddr || ':' || portnum FROM dashboards;"))))
    
;;======================================================================
;; NMSG AND NEW API
;;======================================================================

;; nm based server experiment, keep around for now.
;;
#;(define (nm:start-server dbconn #!key (given-host-name #f))
  (let* ((srvdat    (start-raw-server given-host-name: given-host-name))
	 (host-name (srvdat-host srvdat))
	 (soc       (srvdat-soc srvdat)))
    
    ;; start the queue processor (save for second round of development)
    ;;
    (thread-start! (make-thread! (lambda ()(queue-processor dbconn) "Queue processor")))
    ;; msg is an alist
    ;;  'r host:port  <== where to return the data
    ;;  'p params     <== data to apply the command to
    ;;  'e j|s|l      <== encoding of the params. default is s (sexp), if not specified is assumed to be default
    ;;  'c command    <== look up the function to call using this key
    ;;
    (let loop ((msg-in (nn-recv soc)))
      (if (not (equal? msg-in "quit"))
	  (let* ((dat        (decode msg-in))
		 (host-port  (alist-ref 'r dat)) ;; this is for the reverse req rep where the server is a client of the original client
		 (params     (alist-ref 'p dat))
		 (command    (let ((c (alist-ref 'c dat)))(if c (string->symbol c) #f)))
		 (all-good   (and host-port params command (hash-table-exists? *commands* command))))
	    (if all-good
		(let ((cmddat (make-qitem
			       command:   command
			       host-port: host-port
			       params:    params)))
		  (queue-push cmddat) 		;; put request into the queue
		  (nn-send soc "queued"))         ;; reply with "queued"
		(print "ERROR: ["(common:human-time)"] BAD request " dat))
	    (loop (nn-recv soc)))))
    (nn-close soc)))
  


;;======================================================================
;; H I E R A R C H I C A L   H A S H   T A B L E S
;;======================================================================

;; Every element including top element is a vector:
;;   <vector subhash value>

(define (hh:make-hh #!key (ht #f)(value #f))
  (vector (or ht    (make-hash-table)) value))

;; used internally
(define-inline (hh:set-ht! hh ht)       (vector-set! hh 0 ht))
(define-inline (hh:get-ht hh)           (vector-ref  hh 0))
(define-inline (hh:set-value! hh value) (vector-set! hh 1 value))
(define-inline (hh:get-value  hh value) (vector-ref  hh 1))

;; given a hierarchial hash and some keys look up the value ...
;;
(define (hh:get hh . keys)
  (if (null? keys)
      (vector-ref hh 1) ;; we have reached the end of the line, return the value sought
      (let ((sub-ht (hh:get-ht hh)))
	(if sub-ht ;; yes, there is more hierarchy
	    (let ((sub-hh (hash-table-ref/default sub-ht (car keys) #f)))
	      (if sub-hh
		  (apply hh:get sub-hh (cdr keys))
		  #f))
	    #f))))

;; given a hierarchial hash, a value and some keys, add needed hierarcy and insert the value
;;
(define (hh:set! hh value . keys)
  (if (null? keys)
      (hh:set-value! hh value) ;; we have reached the end of the line, store the value
      (let ((sub-ht (hh:get-ht hh)))
	(if sub-ht ;; yes, there is more hierarchy
	    (let ((sub-hh (hash-table-ref/default sub-ht (car keys) #f)))
	      (if (not sub-hh) ;; we'll need to add the next level of hierarchy
		  (let ((new-sub-hh (hh:make-hh)))
		    (hash-table-set! sub-ht (car keys) new-sub-hh)
		    (apply hh:set! new-sub-hh value (cdr keys)))
		  (apply hh:set! sub-hh value (cdr keys))))    ;; call the sub-hierhash with remaining keys
	    (begin
	      (hh:set-ht! hh (make-hash-table))
	      (apply hh:set! hh value keys))))))
  
;; Manage pkts, used in servers, tests and likely other contexts so put
;; in common
;;======================================================================

(define common:pkts-spec
  '((default . ((parent    . P)
                (action    . a)
                (filename  . f)))
    (configf . ((parent    . P)
                (action    . a)
                (filename  . f)))
    (server  . ((action    . a)
		(pid       . d)
		(ipaddr    . i)
		(port      . p)
		(parent    . P)))
    			  
    (test    . ((cpuuse    . c)
		(diskuse   . d)
		(item-path . i)
		(runname   . r)
		(state     . s)
		(target    . t)
		(status    . u)
		(parent    . P)))))

(define (common:get-pkts-dirs mtconf use-lt)
  (let* ((pktsdirs-str (or (configf:lookup mtconf "setup"  "pktsdirs")
			   (and use-lt
				(conc (or *toppath*
					  (current-directory))
				      "/lt/.pkts"))))
	 (pktsdirs  (if pktsdirs-str
			(string-split pktsdirs-str " ")
			#f)))
    pktsdirs))

;; use-lt is use linktree "lt" link to find pkts dir
(define (common:save-pkt pktalist-in mtconf use-lt #!key (add-only #f)) ;; add-only saves the pkt only if there is a parent already
  (if (or add-only
	  (hash-table-exists? *pkts-info* 'last-parent))
      (let* ((parent   (hash-table-ref/default *pkts-info* 'last-parent #f))
	     (pktalist (if parent
			   (cons `(parent . ,parent)
				 pktalist-in)
			   pktalist-in)))
	(let-values (((uuid pkt)
		      (alist->pkt pktalist common:pkts-spec)))
	  (hash-table-set! *pkts-info* 'last-parent uuid)
	  (let ((pktsdir (or (hash-table-ref/default *pkts-info* 'pkts-dir #f)
			     (let* ((pktsdirs (common:get-pkts-dirs mtconf use-lt))
				    (pktsdir   (car pktsdirs))) ;; assume it is there
			       (hash-table-set! *pkts-info* 'pkts-dir pktsdir)
			       pktsdir))))
            (handle-exceptions
             exn
             (debug:print-info 0 "failed to write out packet to " pktsdir) ;; don't care if this failed for now but MUST FIX - BUG!!
             (if (not (file-exists? pktsdir))
                 (create-directory pktsdir #t))
             (with-output-to-file
                 (conc pktsdir "/" uuid ".pkt")
               (lambda ()
                 (print pkt)))))))))
	
(define (common:with-queue-db mtconf proc #!key (use-lt #f)(toppath-in #f))
  (let* ((pktsdirs (common:get-pkts-dirs mtconf use-lt))
	 (pktsdir  (if pktsdirs (car pktsdirs) #f))
	 (toppath  (or (configf:lookup mtconf "scratchdat" "toppath")
		       toppath-in))
	 (pdbpath  (or (configf:lookup mtconf "setup"  "pdbpath") pktsdir)))
    (cond
     ((not (and  pktsdir toppath pdbpath))
      (debug:print 0 *default-log-port* "ERROR: settings are missing in your megatest.config for area management.")
      (debug:print  0 *default-log-port* "  you need to have pktsdirs in the [setup] section."))
     ((not (common:file-exists? pktsdir))
      (debug:print 0 *default-log-port* "ERROR: pkts directory not found " pktsdir))
     ((not (equal? (file-owner pktsdir)(current-effective-user-id)))
      (debug:print 0 *default-log-port* "ERROR: directory " pktsdir " is not owned by " (current-effective-user-name)))
     (else
	(let* ((pdb  (open-queue-db pdbpath "pkts.db"
				    schema: '("CREATE TABLE groups (id INTEGER PRIMARY KEY,groupname TEXT, CONSTRAINT group_constraint UNIQUE (groupname));"))))
	  (proc pktsdirs pktsdir pdb)
	  (dbi:close pdb))))))

(define (common:load-pkts-to-db mtconf #!key (use-lt #f))
  (common:with-queue-db
   mtconf
   (lambda (pktsdirs pktsdir pdb)
     (for-each
      (lambda (pktsdir) ;; look at all
	(cond
	 ((not (common:file-exists? pktsdir))
	  (debug:print 0 *default-log-port* "ERROR: packets directory " pktsdir " does not exist."))
	 ((not (directory? pktsdir))
	  (debug:print 0 *default-log-port* "ERROR: packets directory path " pktsdir " is not a directory."))
	 ((not (file-read-access? pktsdir))
	  (debug:print 0 *default-log-port* "ERROR: packets directory path " pktsdir " is not readable."))
	 (else
	  (debug:print-info 0 *default-log-port* "Loading packets found in " pktsdir)
	  (let ((pkts (glob (conc pktsdir "/*.pkt"))))
	    (for-each
	     (lambda (pkt)
	       (let* ((uuid    (cadr (string-match ".*/([0-9a-f]+).pkt" pkt)))
		      (exists  (lookup-by-uuid pdb uuid #f)))
		 (if (not exists)
		     (let* ((pktdat (string-intersperse
				     (with-input-from-file pkt read-lines)
				     "\n"))
			    (apkt   (pkt->alist pktdat))
			    (ptype  (alist-ref 'T apkt)))
		       (add-to-queue pdb pktdat uuid (or ptype 'cmd) #f 0)
		       (debug:print 4 *default-log-port* "Added " uuid " of type " ptype " to queue"))
		     (debug:print 4 *default-log-port* "pkt: " uuid " exists, skipping...")
		     )))
	     pkts)))))
      pktsdirs))
   use-lt: use-lt))

(define (common:get-pkt-alists pkts)
  (map (lambda (x)
	 (alist-ref 'apkt x)) ;; 'pkta pulls out the alist from the read pkt
       pkts))

;; given list of pkts (alist mode) return list of D cards as Unix epoch, sorted descending
;; also delete duplicates by target i.e. (car pkt)
;;
(define (common:get-pkt-times pkts)
  (delete-duplicates
   (sort 
    (map (lambda (x)
	   `(,(alist-ref 't x) . ,(string->number (alist-ref 'D x))))
	 pkts)
    (lambda (a b)(> (cdr a)(cdr b))))      ;; sort descending
   (lambda (a b)(equal? (car a)(car b))))) ;; remove duplicates by target


(define *common:thread-punchlist* (make-hash-table))
(define (common:send-thunk-to-background-thread thunk #!key (name #f))
  ;;(BB> "launched thread " name)

  ;; we need a unique name for the thread.
  (let* ((realname (if name
                       (if (not (hash-table-ref/default *common:thread-punchlist* name #f))
                           name
                           (conc name"-" (symbol->string (gensym))))
                       (conc "anonymous-"(symbol->string (gensym)))))
         (realthunk (lambda ()
                      (let ((res (thunk)))
                        (hash-table-delete! *common:thread-punchlist* realname)
                        res)))
         (thread   (make-thread realthunk realname)))
    (hash-table-set! *common:thread-punchlist* realname thread)
    (thread-start! thread)
    ))

(define (common:join-backgrounded-threads)
  ;; may need to trap and ignore exceptions -- dunno how atomic threads are...
  (for-each
   (lambda (thread-name)
     (let* ((thread (hash-table-ref/default *common:thread-punchlist* thread-name #f)))
       (if thread
           (handle-exceptions
           exn
           #t ;; just ignore it, it might have died in the meantime so joining it will throw an exception
           (thread-join! thread))
           )))
   (hash-table-keys *common:thread-punchlist*)))

;; DO NOT REMOVE TELEMETRY STUFF!! I WANT TO RE-ENABLE IT SOMEDAY!

#;(define *common:telemetry-log-state* 'startup)
#;(define *common:telemetry-log-socket* #f)

#;(define (common:telemetry-log-open)
  (if (eq? *common:telemetry-log-state* 'startup)
      (let* ((serverhost (configf:lookup *configdat* "telemetry" "host"))
             (serverport (configf:lookup-number *configdat* "telemetry" "port"))
             (user (or (get-environment-variable "USER") "unknown"))
             (host (or (get-environment-variable "HOST") "unknown")))
        (set! *common:telemetry-log-state*
              (handle-exceptions
               exn
               (begin
                 (debug:print-info 0 *default-log-port* "common-telemetry-log open udp port failure")
                 'broken)
               (if (and serverhost serverport user host)
                   (let* ((s (udp-open-socket)))
                     ;;(udp-bind! s #f 0)
                     (udp-connect! s serverhost serverport)
                     (set! *common:telemetry-log-socket* s)
                     'open)
                   'not-needed))))))
  
#;(define (common:telemetry-log event #!key (payload '()))
  (if (eq? *common:telemetry-log-state* 'startup)
      (common:telemetry-log-open))

  (if (eq? 'open *common:telemetry-log-state*)
      (handle-exceptions
       exn
       (begin
         (debug:print-info 0 *default-log-port* "common-telemetry-log comms failure ; disabled (no server?)")
         ;;(define *common:telemetry-log-state* 'broken-or-no-server-preclose)
         ;;(common:telemetry-log-close)
         (define *common:telemetry-log-state* 'broken-or-no-server)
         (set! *common:telemetry-log-socket* #f)
         )
       (if (and *common:telemetry-log-socket* event) ;; TODO - filter on event against telemetry.want-events
           (let* ((user (or (get-environment-variable "USER") "unknown"))
                  (host (or (get-environment-variable "HOST") "unknown"))
                  (start (conc "[megatest "event"]"))
                  (toppath (or *toppath* "/dev/null"))
                  (payload-serialized
                   (base64:base64-encode
                    (z3:encode-buffer
                     (with-output-to-string (lambda () (pp payload))))))
                  (msg     (conc user":"host":"start":"(current-process-id)":"(car (argv))":"
                                 toppath":"payload-serialized)))
             (udp-send *common:telemetry-log-socket* msg))))))
  
#;(define (common:telemetry-log-close)
  (when (or (member *common:telemetry-log-state* '(broken-or-no-server-preclose open)) *common:telemetry-log-socket*)
    (handle-exceptions
     exn
     (begin
       (define *common:telemetry-log-state* 'closed-fail)
       (debug:print-info 0 *default-log-port* "common-telemetry-log closure failure")
       )
     (begin
       (define *common:telemetry-log-state* 'closed)
       (udp-close-socket *common:telemetry-log-socket*)
       (set! *common:telemetry-log-socket* #f)))))

;; (define (debug:print . params) #f)
;; (define (debug:print-info . params) #f)
;; 
;; (define (set-functions dbgp dbgpinfo)
;;   (set! debug:print dbgp)
;;   (set! debug:print-info dbgpinfo))

(define (server:get-best-guess-address hostname)
  (let ((res #f))
    (for-each 
     (lambda (adr)
       (if (not (eq? (u8vector-ref adr 0) 127))
	   (set! res adr)))
     ;; NOTE: This can fail when there is no mention of the host in /etc/hosts. FIXME
     (vector->list (hostinfo-addresses (hostname->hostinfo hostname))))
    (string-intersperse 
     (map number->string
	  (u8vector->list
	   (if res res (hostname->ip hostname)))) ".")))

;;======================================================================
;; D A S H B O A R D   U S E R   V I E W S
;;======================================================================

;; first read ~/views.config if it exists, then read $MTRAH/views.config if it exists
;;
(define (common:load-views-config)
  (let* ((view-cfgdat    (make-hash-table))
	 (home-cfgfile   (conc (get-environment-variable "HOME") "/.mtviews.config"))
	 (mthome-cfgfile (conc *toppath* "/.mtviews.config")))
    (if (common:file-exists? mthome-cfgfile)
	(read-config mthome-cfgfile view-cfgdat #t))
    ;; we load the home dir file AFTER the MTRAH file so the user can clobber settings when running the dashboard in read-only areas
    (if (common:file-exists? home-cfgfile)
	(read-config home-cfgfile view-cfgdat #t))
    view-cfgdat))


;; (require-library margs)
;; (include "margs.scm")

;; (define old-exit exit)
;; 
;; (define (exit . code)
;;   (if (null? code)
;;       (old-exit)
;;       (old-exit code)))

;;======================================================================
;; T A R G E T S  ,   S T A T E ,   S T A T U S ,   
;;                    R U N N A M E    A N D   T E S T P A T T
;;======================================================================

;; (map print (map car (hash-table->alist (read-config "runconfigs.config" #f #t))))
;;
(define (common:get-runconfig-targets #!key (configf #f))
  (let ((targs       (sort (map car (hash-table->alist
				     (or configf ;; NOTE: There is no value in using runconfig:read here.
					 (read-config (conc *toppath* "/runconfigs.config")
						      #f #t)
					 (make-hash-table))))
			   string<?))
	(target-patt (args:get-arg "-target")))
    (if target-patt
	(filter (lambda (x)
		  (patt-list-match x target-patt))
		targs)
	targs)))

;; Lookup a value in runconfigs based on -reqtarg or -target
;; 
(define (runconfigs-get config var)
  (let ((targ (common:args-get-target))) ;; (or (args:get-arg "-reqtarg")(args:get-arg "-target")(getenv "MT_TARGET"))))
    (if targ
	(or (configf:lookup config targ var)
	    (configf:lookup config "default" var))
	(configf:lookup config "default" var))))

(define (common:args-get-state)
  (or (args:get-arg "-state")(args:get-arg ":state")))

(define (common:args-get-status)
  (or (args:get-arg "-status")(args:get-arg ":status")))

(define (common:args-get-testpatt rconf)
  (let* (;; (tagexpr       (args:get-arg "-tagexpr"))
         ;; (tags-testpatt (if tagexpr (string-join (runs:get-tests-matching-tags tagexpr) ",") #f))
         (testpatt-key  (or (args:get-arg "-modepatt") (args:get-arg "--modepatt") "TESTPATT"))
         (args-testpatt (or (args:get-arg "-testpatt") (args:get-arg "-runtests") "%"))
         (rtestpatt     (if rconf (runconfigs-get rconf testpatt-key) #f)))
    (cond
     ((args:get-arg "--modepatt") ;; modepatt is a forced setting, when set it MUST refer to an existing PATT in the runconfig
      (if rconf
	  (runconfigs-get rconf testpatt-key)
	  #f))     ;; We do NOT fall back to "%"
     ;; (tags-testpatt
     ;;  (debug:print-info 0 *default-log-port* "-tagexpr "tagexpr" selects testpatt "tags-testpatt)
     ;;  tags-testpatt)
     ((and (equal? args-testpatt "%") rtestpatt)
      (debug:print-info 0 *default-log-port* "testpatt defined in "testpatt-key" from runconfigs: " rtestpatt)
      rtestpatt)
     (else args-testpatt))))

;; '(print (string-intersperse (map cadr (hash-table-ref/default (read-config "megatest.config" \#f \#t) "disks" '"'"'("none" ""))) "\n"))'
(define (common:get-disks #!key (configf #f))
  (hash-table-ref/default 
   (or configf (read-config "megatest.config" #f #t))
   "disks" '("none" "")))

;; TODO: for multiple areas, we will have multiple watchdogs; and multiple threads to manage
(define (common:watchdog)
  (debug:print-info 13 *default-log-port* "common:watchdog entered.")
  (if (launch:setup)
      (if (common:on-homehost?)
	  (let ((dbstruct (db:setup #t)))
	    (debug:print-info 13 *default-log-port* "after db:setup with dbstruct=" dbstruct)
	    (cond
	     ((dbr:dbstruct-read-only dbstruct)
	      (debug:print-info 13 *default-log-port* "loading read-only watchdog")
	      (common:readonly-watchdog dbstruct))
	     (else
	      (debug:print-info 13 *default-log-port* "loading writable-watchdog.")
              (let* ((syncer (or (configf:lookup *configdat* "server" "sync-method") "brute-force-sync")))
                (cond
                 ((equal? syncer "brute-force-sync")
                  (server:writable-watchdog-bruteforce dbstruct))
                 ((equal? syncer "delta-sync")
                  (server:writable-watchdog-deltasync dbstruct))
                 (else
                  (debug:print-error 0 *default-log-port* "Unknown server/sync-method specified ("syncer") - valid values are brute-force-sync and delta-sync.")
                  (exit 1)))
                ;;(debug:print 1 *default-log-port* "INFO: ["(common:human-time)"] Syncer started (method="syncer")")
                )))
	    (debug:print-info 13 *default-log-port* "watchdog done."))
	  (debug:print-info 13 *default-log-port* "no need for watchdog on non-homehost"))))

;;======================================================================
;; server stuff
;;======================================================================

(define (server:make-server-url hostport)
  (if (not hostport)
      #f
      (conc "http://" (car hostport) ":" (cadr hostport))))

(define  *server-loop-heart-beat* (current-seconds))

;;======================================================================
;; P K T S   S T U F F 
;;======================================================================

;; ???

;;======================================================================
;; P K T S   S T U F F 
;;======================================================================

;; ???

;;======================================================================
;; S E R V E R
;;======================================================================

;; Call this to start the actual server
;;

;; all routes though here end in exit ...
;;
;; start_server
;;
(define (server:launch run-id transport-type)
  (http-transport:launch))

;;======================================================================
;; S E R V E R   U T I L I T I E S 
;;======================================================================

;; Get the transport
(define (server:get-transport) 'http)
	    
;; Generate a unique signature for this server
(define (server:mk-signature)
  (message-digest-string (md5-primitive) 
			 (with-output-to-string
			   (lambda ()
			     (write (list (current-directory)
					  (argv)))))))

;; When using zmq this would send the message back (two step process)
;; with spiffy or rpc this simply returns the return data to be returned
;; 
(define (server:reply return-addr query-sig success/fail result)
  (debug:print-info 11 *default-log-port* "server:reply return-addr=" return-addr ", result=" result)
  (db:obj->string (vector success/fail query-sig result)))  ;; (send-message pubsock target send-more: #t)

;; Given a run id start a server process    ### NOTE ### > file 2>&1 
;; if the run-id is zero and the target-host is set 
;; try running on that host
;;   incidental: rotate logs in logs/ dir.
;;
(define  (server:run areapath) ;; areapath is *toppath* for a given testsuite area
  (let* ((curr-host   (get-host-name))
         ;; (attempt-in-progress (server:start-attempted? areapath))
         ;; (dot-server-url (server:check-if-running areapath))
	 (curr-ip     (server:get-best-guess-address curr-host))
	 (curr-pid    (current-process-id))
	 (homehost    (common:get-homehost)) ;; configf:lookup *configdat* "server" "homehost" ))
	 (target-host (car homehost))
	 (testsuite   (common:get-area-name *alldat*))
	 (logfile     (conc areapath "/logs/server.log")) ;; -" curr-pid "-" target-host ".log"))
	 (cmdln (conc (common:get-megatest-exe)
		      " -server " (or target-host "-") (if (equal? (configf:lookup *configdat* "server" "daemonize") "yes")
							   " -daemonize "
							   "")
		      ;; " -log " logfile
		      " -m testsuite:" testsuite)) ;; (conc " >> " logfile " 2>&1 &")))))
	 (log-rotate  (make-thread common:rotate-logs  "server run, rotate logs thread"))
         (load-limit  (configf:lookup-number *configdat* "jobtools" "max-server-start-load" default: 3.0)))
    ;; we want the remote server to start in *toppath* so push there
    (push-directory areapath)
    (debug:print 0 *default-log-port* "INFO: Trying to start server (" cmdln ") ...")
    (thread-start! log-rotate)
    
    ;; host.domain.tld match host?
    (if (and target-host 
	     ;; look at target host, is it host.domain.tld or ip address and does it 
	     ;; match current ip or hostname
	     (not (string-match (conc "("curr-host "|" curr-host"\\..*)") target-host))
	     (not (equal? curr-ip target-host)))
	(begin
	  (debug:print-info 0 *default-log-port* "Starting server on " target-host ", logfile is " logfile)
	  (setenv "TARGETHOST" target-host)))
      
    (setenv "TARGETHOST_LOGF" logfile)
    (thread-sleep! (/ (random 5000) 1000)) ;; add about a random (up to 5 seconds) initial delay. It seems pretty common that many running tests request a server at the same time
    (common:wait-for-normalized-load load-limit " delaying server start due to load" target-host) ;; do not try starting servers on an already overloaded machine, just wait forever
    (system (conc "nbfake " cmdln))
    (unsetenv "TARGETHOST_LOGF")
    (if (get-environment-variable "TARGETHOST")(unsetenv "TARGETHOST"))
    (thread-join! log-rotate)
    (pop-directory)))

;; given a path to a server log return: host port startseconds
;;
(define (server:logf-get-start-info logf)
  (let ((rx (regexp "^SERVER STARTED: (\\S+):(\\d+) AT ([\\d\\.]+)"))) ;; SERVER STARTED: host:port AT timesecs
    (handle-exceptions
	exn
	(list #f #f #f) ;; no idea what went wrong, call it a bad server
      (with-input-from-file
	  logf
	(lambda ()
	  (let loop ((inl  (read-line))
		     (lnum 0))
	    (if (not (eof-object? inl))
		(let ((mlst (string-match rx inl)))
		  (if (not mlst)
		      (if (< lnum 500) ;; give up if more than 500 lines of server log read
			  (loop (read-line)(+ lnum 1))
			  (list #f #f #f))
		      (let ((dat  (cdr mlst)))
			(list (car dat) ;; host
			      (string->number (cadr dat)) ;; port
			      (string->number (caddr dat))))))
		(list #f #f #f))))))))

;; get a list of servers with all relevant data
;; ( mod-time host port start-time pid )
;;
(define (server:get-list areapath #!key (limit #f))
  (let ((fname-rx    (regexp "^(|.*/)server-(\\d+)-(\\S+).log$"))
	(day-seconds (* 24 60 60)))
    ;; if the directory exists continue to get the list
    ;; otherwise attempt to create the logs dir and then
    ;; continue
    (if (if (directory-exists? (conc areapath "/logs"))
	    '()
	    (if (file-write-access? areapath)
		(begin
		  (condition-case
		      (create-directory (conc areapath "/logs") #t)
		    (exn (i/o file)(debug:print 0 *default-log-port* "ERROR: Cannot create directory at " (conc areapath "/logs")))
		    (exn ()(debug:print 0 *default-log-port* "ERROR: Unknown error attemtping to get server list.")))
		  (directory-exists? (conc areapath "/logs")))
		'()))
	(let* ((server-logs   (glob (conc areapath "/logs/server-*.log")))
	       (num-serv-logs (length server-logs)))
	  (if (null? server-logs)
	      '()
	      (let loop ((hed  (car server-logs))
			 (tal  (cdr server-logs))
			 (res '()))
		(let* ((mod-time  (handle-exceptions
				      exn
				      (current-seconds) ;; 0
				    (file-modification-time hed))) ;; default to *very* old so log gets ignored if deleted
		       (down-time (- (current-seconds) mod-time))
		       (serv-dat  (if (or (< num-serv-logs 10)
				  	  (< down-time 900)) ;; day-seconds))
				      (server:logf-get-start-info hed)
				      '())) ;; don't waste time processing server files not touched in the 15 minutes if there are more than ten servers to look at
		       (serv-rec (cons mod-time serv-dat))
		       (fmatch   (string-match fname-rx hed))
		       (pid      (if fmatch (string->number (list-ref fmatch 2)) #f))
		       (new-res  (if (null? serv-dat)
				     res
				     (cons (append serv-rec (list pid)) res))))
		(if (null? tal)
		    (if (and limit
			     (> (length new-res) limit))
			new-res ;; (take new-res limit)  <= need intelligent sorting before this will work
			new-res)
		    (loop (car tal)(cdr tal) new-res)))))))))

(define (server:get-num-alive srvlst)
  (let ((num-alive 0))
    (for-each
     (lambda (server)
       (match-let (((mod-time host port start-time pid)
		    server))
	 (let* ((uptime  (- (current-seconds) mod-time))
		(runtime (if start-time
			     (- mod-time start-time)
			     0)))
	   (if (< uptime 5)(set! num-alive (+ num-alive 1))))))
     srvlst)
    num-alive))

;; given a list of servers get a list of valid servers, i.e. at least
;; 10 seconds old, has started and is less than 1 hour old and is
;; active (i.e. mod-time < 10 seconds
;;
;; mod-time host port start-time pid
;;
;; sort by start-time descending. I.e. get the oldest first. Young servers will thus drop off
;; and servers should stick around for about two hours or so.
;;
(define (server:get-best srvlst)
  (let* ((nums (server:get-num-servers))
	 (now  (current-seconds))
	 (slst (sort
		(filter (lambda (rec)
			  (if (and (list? rec)
				   (> (length rec) 2))
			      (let ((start-time (list-ref rec 3))
				    (mod-time   (list-ref rec 0)))
				;; (print "start-time: " start-time " mod-time: " mod-time)
				(and start-time mod-time
				     (> (- now start-time) 0)    ;; been running at least 0 seconds
				     (< (- now mod-time)   16)   ;; still alive - file touched in last 16 seconds
				     (< (- now start-time)       
					(+ (- (string->number (or (configf:lookup *configdat* "server" "runtime") "3600"))
					      180)
					   (random 360))) ;; under one hour running time +/- 180
				     ))
			      #f))
			srvlst)
		(lambda (a b)
		  (< (list-ref a 3)
		     (list-ref b 3))))))
    (if (> (length slst) nums)
	(take slst nums)
	slst)))

(define (server:get-first-best areapath)
  (let ((srvrs (server:get-best (server:get-list areapath))))
    (if (and srvrs
	     (not (null? srvrs)))
	(car srvrs)
	#f)))

(define (server:get-rand-best areapath)
  (let ((srvrs (server:get-best (server:get-list areapath))))
    (if (and (list? srvrs)
	     (not (null? srvrs)))
	(let* ((len (length srvrs))
	       (idx (random len)))
	  (list-ref srvrs idx))
	#f)))


(define (server:record->url servr)
  (match-let (((mod-time host port start-time pid)
	       servr))
    (if (and host port)
	(conc host ":" port)
	#f)))

(define (server:get-client-signature) ;; BB> why is this proc named "get-"?  it returns nothing -- set! has not return value.
  (if *my-client-signature* *my-client-signature*
      (let ((sig (server:mk-signature)))
        (set! *my-client-signature* sig)
        *my-client-signature*)))

;; kind start up of servers, wait 40 seconds before allowing another server for a given
;; run-id to be launched
(define (server:kind-run areapath)
  (if (not (server:check-if-running areapath)) ;; why try if there is already a server running?
      (let* ((last-run-dat (hash-table-ref/default *server-kind-run* areapath '(0 0))) ;; callnum, whenrun
	     (call-num     (car last-run-dat))
	     (when-run     (cadr last-run-dat))
	     (run-delay    (+ (case call-num
				((0)    0)
				((1)   20)
				((2)  300)
				(else 600))
			      (random 5)))   ;; add a small random number just in case a lot of jobs hit the work hosts simultaneously
	     (lock-file    (conc areapath "/logs/server-start.lock")))
	(if	(> (- (current-seconds) when-run) run-delay)
		(begin
		  (common:simple-file-lock-and-wait lock-file expire-time: 15)
		  (server:run areapath)
		  (thread-sleep! 2) ;; don't release the lock for at least a few seconds
		  (common:simple-file-release-lock lock-file)))
	(hash-table-set! *server-kind-run* areapath (list (+ call-num 1)(current-seconds))))))

(define (server:start-and-wait areapath #!key (timeout 60))
  (let ((give-up-time (+ (current-seconds) timeout)))
    (let loop ((server-url (server:check-if-running areapath))
	       (try-num    0))
      (if (or server-url
	      (> (current-seconds) give-up-time)) ;; server-url will be #f if no server available.
	  server-url
	  (let ((num-ok (length (server:get-best (server:get-list areapath)))))
	    (if (and (> try-num 0)  ;; first time through simply wait a little while then try again
		     (< num-ok 1))  ;; if there are no decent candidates for servers then try starting a new one
		(server:kind-run areapath))
	    (thread-sleep! 5)
	    (loop (server:check-if-running areapath)
		  (+ try-num 1)))))))

(define server:try-running server:run) ;; there is no more per-run servers ;; REMOVE ME. BUG.

(define (server:get-num-servers #!key (numservers 2))
  (let ((ns (string->number
	     (or (configf:lookup *configdat* "server" "numservers") "notanumber"))))
    (or ns numservers)))

;; no longer care if multiple servers are started by accident. older servers will drop off in time.
;;
(define (server:check-if-running areapath) ;;  #!key (numservers "2"))
  (let* ((ns            (server:get-num-servers))
	 (servers       (server:get-best (server:get-list areapath))))
    ;; (print "servers: " servers " ns: " ns)
    (if (or (and servers
		 (null? servers))
	    (not servers)
	    (and (list? servers)
		 (< (length servers) (random ns)))) ;; somewhere between 0 and numservers
        #f
        (let loop ((hed (car servers))
                   (tal (cdr servers)))
          (let ((res (server:check-server hed)))
            (if res
                res
                (if (null? tal)
                    #f
                    (loop (car tal)(cdr tal)))))))))

;; ping the given server
;;
(define (server:check-server server-record)
  (let* ((server-url (server:record->url server-record))
         (res        (case *transport-type*
                       ((http)(server:ping server-url))
                       ;; ((nmsg)(nmsg-transport:ping (tasks:hostinfo-get-interface server)
                       )))
    (if res
        server-url
	#f)))

(define (server:kill servr)
  (match-let (((mod-time hostname port start-time pid)
	       servr))
    (tasks:kill-server hostname pid)))

;; called in megatest.scm, host-port is string hostname:port
;;
;; NOTE: This is NOT called directly from clients as not all transports support a client running
;;       in the same process as the server.
;;
(define (server:ping host-port-in #!key (do-exit #f))
  (let ((host:port (if (not host-port-in) ;; use read-dotserver to find
		       #f ;; (server:check-if-running *toppath*)
		;; (if (number? host-port-in) ;; we were handed a server-id
		;; 	   (let ((srec (tasks:get-server-by-id (db:delay-if-busy (tasks:open-db)) host-port-in)))
		;; 	     ;; (print "srec: " srec " host-port-in: " host-port-in)
		;; 	     (if srec
		;; 		 (conc (vector-ref srec 3) ":" (vector-ref srec 4))
		;; 		 (conc "no such server-id " host-port-in)))
		       host-port-in))) ;; )
    (let* ((host-port (if host:port
			  (let ((slst (string-split   host:port ":")))
			    (if (eq? (length slst) 2)
				(list (car slst)(string->number (cadr slst)))
				#f))
			  #f)))
;;	   (toppath       (launch:setup)))
      ;; (print "host-port=" host-port)
      (if (not host-port)
	  (begin
	    (if host-port-in
		(debug:print 0 *default-log-port*  "ERROR: bad host:port"))
	    (if do-exit (exit 1))
	    #f)
	  (let* ((iface      (car host-port))
		 (port       (cadr host-port))
		 (server-dat (http-transport:client-connect iface port))
		 (login-res  (rmt:login-no-auto-client-setup server-dat)))
	    (if (and (list? login-res)
		     (car login-res))
		(begin
		  ;; (print "LOGIN_OK")
		  (if do-exit (exit 0))
		  #t)
		(begin
		  ;; (print "LOGIN_FAILED")
		  (if do-exit (exit 1))
		  #f)))))))

;; run ping in separate process, safest way in some cases
;;
(define (server:ping-server ifaceport)
  (with-input-from-pipe 
   (conc (common:get-megatest-exe) " -ping " ifaceport)
   (lambda ()
     (let loop ((inl (read-line))
		(res "NOREPLY"))
       (if (eof-object? inl)
	   (case (string->symbol res)
	     ((NOREPLY)  #f)
	     ((LOGIN_OK) #t)
	     (else       #f))
	   (loop (read-line) inl))))))

;; NOT USED (well, ok, was referenced in rpc-transport but otherwise
;; not used).
;;
(define (server:login toppath)
  (lambda (toppath)
    (set! *db-last-access* (current-seconds)) ;; might not be needed.
    (if (equal? *toppath* toppath)
	#t
	#f)))

;; timeout is hms string: 1h 5m 3s, default is 1 minute
;;
(define (server:expiration-timeout)
  (let ((tmo (configf:lookup *configdat* "server" "timeout")))
    (if (and (string? tmo)
	     (common:hms-string->seconds tmo)) ;; BUG: hms-string->seconds is broken, if given "10" returns 0. Also, it doesn't belong in this logic unless the string->number is changed below
        (* 3600 (string->number tmo))
	60)))

;; (define server:sync-lock-token "SERVER_SYNC_LOCK")
;; (define (server:release-sync-lock)
;;   (db:no-sync-del! *no-sync-db* server:sync-lock-token))
;; (define (server:have-sync-lock?)
;;   (let* ((have-lock-pair (db:no-sync-get-lock *no-sync-db* server:sync-lock-token))
;;          (have-lock?     (car have-lock-pair))
;;          (lock-time      (cdr have-lock-pair))
;;          (lock-age       (- (current-seconds) lock-time)))
;;     (cond
;;      (have-lock? #t)
;;      ((>lock-age
;;        (* 3 (configf:lookup-number *configdat* "server" "minimum-intersync-delay" default: 180)))
;;       (server:release-sync-lock)
;;       (server:have-sync-lock?))
;;      (else #f))))

;; moving this here as it needs access to db and cannot be in common.
;;

(define (server:get-bruteforce-syncer dbstruct #!key (fork-to-background #f) (persist-until-sync #f))
  (let* ((sqlite-exe   (or (get-environment-variable "MT_SQLITE3_EXE"))) ;; defined in cfg.sh
         (sync-log     (or (args:get-arg "-sync-log") (conc *toppath* "/logs/sync-" (current-process-id) "-" (get-host-name) ".log")))
	 (tmp-area     (common:get-db-tmp-area *alldat*))
	 (tmp-db       (conc tmp-area "/megatest.db"))
	 (staging-file (conc *toppath* "/.megatest.db"))
	 (mtdbfile     (conc *toppath* "/megatest.db"))
	 (lockfile     (common:get-sync-lock-filepath *alldat*))
         (sync-cmd-core     (conc sqlite-exe" " tmp-db " .dump | "sqlite-exe" " staging-file "&>"sync-log))
         (sync-cmd     (if fork-to-background 
                           (conc "/usr/bin/env NBFAKE_LOG="*toppath*"/logs/last-server-sync-"(current-process-id)".log nbfake \""sync-cmd-core" && /bin/mv -f " staging-file " " mtdbfile" \"")
                           sync-cmd-core))
         (default-min-intersync-delay 2)
	 (min-intersync-delay (configf:lookup-number *configdat* "server" "minimum-intersync-delay" default: default-min-intersync-delay))
         (default-duty-cycle 0.1)
         (duty-cycle   (configf:lookup-number *configdat* "server" "sync-duty-cycle" default: default-duty-cycle))
         (last-sync-seconds 10) ;; we will adjust this to a measurement and delay last-sync-seconds * (1 - duty-cycle)
         (calculate-off-time (lambda (work-duration duty-cycle)
                                  (* (/ (- 1 duty-cycle) duty-cycle) last-sync-seconds)))
         (off-time min-intersync-delay) ;; adjusted in closure below.
         (do-a-sync
          (lambda ()
            (BB> "Start do-a-sync with fork-to-background="fork-to-background" persist-until-sync="persist-until-sync)
            (let* ((finalres
                    (let retry-loop ((num-tries 0))
                         (if (common:simple-file-lock lockfile)
	                     (begin
                               (cond
                                ((not (or fork-to-background persist-until-sync))
                                 (debug:print 0 *default-log-port* "INFO: syncer thread sleeping for max of (server.minimum-intersync-delay="min-intersync-delay
                                              " , off-time="off-time" seconds ]")
                                 (thread-sleep! (max off-time min-intersync-delay)))
                                (else
                                 (debug:print 0 *default-log-port* "INFO: syncer thread NOT sleeping ; maybe time-to-exit...")))

                               (if (not (configf:lookup *configdat* "server" "disable-db-snapshot"))
                                   (common:snapshot-file mtdbfile subdir: ".db-snapshot"))
		               (delete-file* staging-file)
		               (let* ((start-time (current-milliseconds))
                                      (res (system sync-cmd))
                                      (res2 
                                       (cond
                                        ((eq? 0 res)
		                         (delete-file* (conc mtdbfile ".backup"))
                                         (if (eq? 0 (file-size sync-log))
                                             (delete-file sync-log))
		                         (system (conc "/bin/mv " staging-file " " mtdbfile))
                                         
                                         (set! last-sync-seconds (/ (- (current-milliseconds) start-time) 1000))
                                         (set! off-time (calculate-off-time
                                                         last-sync-seconds
                                                         (cond
                                                          ((and (number? duty-cycle) (> duty-cycle 0) (< duty-cycle 1))
                                                           duty-cycle)
                                                          (else
                                                           (debug:print 0 *default-log-port* "WARNING: ["(common:human-time)"] server.sync-duty-cycle is invalid.  Should be a number between 0 and 1, but "duty-cycle" was specified.  Using default value: "default-duty-cycle)
                                                           default-duty-cycle))))
                                         
                                         (debug:print 1 *default-log-port* "INFO: ["(common:human-time)"] pid="(current-process-id)" SYNC took "last-sync-seconds" sec")
                                         (debug:print 1 *default-log-port* "INFO: ["(common:human-time)"] pid="(current-process-id)" SYNC took "last-sync-seconds" sec ; with duty-cycle of "duty-cycle" off time is now "off-time)
                                         'sync-completed)
                                        (else
                                         (system (conc "/bin/cp "sync-log" "sync-log".fail"))
                                         (debug:print 0 *default-log-port* "ERROR: ["(common:human-time)"] Sync failed. See log at "sync-log".fail")
                                         (if (file-exists? (conc mtdbfile ".backup"))
                                             (system (conc "/bin/cp "mtdbfile ".backup " mtdbfile)))
                                         #f))))
                                 (common:simple-file-release-lock lockfile)
                                 (BB> "released lockfile: " lockfile)
                                 (when (common:file-exists? lockfile)
                                   (BB> "DID NOT ACTUALLY RELEASE LOCKFILE"))
                                 res2) ;; end let
                               );; end begin
                             ;; else
                             (cond
                              (persist-until-sync
                               (thread-sleep! 1)
                               (debug:print 1 *default-log-port* "INFO: ["(common:human-time)"] pid="(current-process-id)" other SYNC in progress; we're in a fork-to-background so we need to succeed.  Let's wait a jiffy and and try again. num-tries="num-tries" (waiting for lockfile="lockfile" to disappear)")
                               (retry-loop (add1 num-tries)))
                              (else
                               (thread-sleep! (max off-time (+ last-sync-seconds min-intersync-delay)))
                               (debug:print 1 *default-log-port* "INFO: ["(common:human-time)"] pid="(current-process-id)" other SYNC in progress; not syncing.")
                               'parallel-sync-in-progress))
                             ) ;; end if got lockfile
                         )
                    ))
              (BB> "End do-a-sync with fork-to-background="fork-to-background" persist-until-sync="persist-until-sync" and result="finalres)
              finalres)
            ) ;; end lambda
          ))
    do-a-sync))

(define (server:writable-watchdog-bruteforce dbstruct)
  (thread-sleep! 1) ;; delay for startup
  (let* ((do-a-sync  (server:get-bruteforce-syncer dbstruct))
         (final-sync (server:get-bruteforce-syncer dbstruct fork-to-background: #t persist-until-sync: #t)))
    (when (and (not (args:get-arg "-sync-to-megatest.db")) ;; conditions under which we do not run the sync
	       (args:get-arg "-server"))
      
      (let loop ()
	(do-a-sync)
        (if (not *time-to-exit*) (loop))) ;; keep going unless time to exit

      ;; time to exit, close the no-sync db here
      (final-sync)

      (if (common:low-noise-print 30)
	  (debug:print-info 0 *default-log-port* "Exiting watchdog timer, *time-to-exit* = " *time-to-exit*" pid="(current-process-id)
			    )))))

(define (server:writable-watchdog-deltasync dbstruct)
  (thread-sleep! 0.05) ;; delay for startup
  (let ((legacy-sync  (common:run-sync?))
        (sync-stale-seconds (configf:lookup-number *configdat* "server" "sync-stale-seconds" default: 300))
	(debug-mode   (debug:debug-mode 1))
	(last-time    (current-seconds))
	(no-sync-db   (db:open-no-sync-db))
        (sync-duration 0) ;; run time of the sync in milliseconds
        ;;(this-wd-num  (begin (mutex-lock! *wdnum*mutex) (let ((x *wdnum*)) (set! *wdnum* (add1 *wdnum*)) (mutex-unlock! *wdnum*mutex) x)))
        )
    (set! *no-sync-db* no-sync-db) ;; make the no sync db available to api calls
    (debug:print-info 2 *default-log-port* "Periodic sync thread started.")
    (debug:print-info 3 *default-log-port* "watchdog starting. legacy-sync is " legacy-sync" pid="(current-process-id)  );;  " this-wd-num="this-wd-num)
    (if (and legacy-sync (not *time-to-exit*))
	(let* (;;(dbstruct (db:setup))
	       (mtdb       (dbr:dbstruct-mtdb dbstruct))
	       (mtpath     (db:dbdat-get-path mtdb))
	       (tmp-area   (common:get-db-tmp-area *alldat*))
	       (start-file (conc tmp-area "/.start-sync"))
	       (end-file   (conc tmp-area "/.end-sync")))
	  (debug:print-info 0 *default-log-port* "Server running, periodic sync started.")
	  (let loop ()
	    ;; sync for filesystem local db writes
	    ;;
	    (mutex-lock! *db-multi-sync-mutex*)
	    (let* ((need-sync        (>= *db-last-access* *db-last-sync*)) ;; no sync since last write
		   (sync-in-progress *db-sync-in-progress*)
                   (min-intersync-delay (configf:lookup-number *configdat* "server" "minimum-intersync-delay" default: 5))
		   (should-sync      (and (not *time-to-exit*)
                                          (> (- (current-seconds) *db-last-sync*) min-intersync-delay))) ;; sync every five seconds minimum, deprecated logic, can probably be removed
		   (start-time       (current-seconds))
                   (cpu-load-adj     (alist-ref 'adj-proc-load (common:get-normalized-cpu-load #f)))
		   (mt-mod-time      (file-modification-time mtpath))
		   (last-sync-start  (if (common:file-exists? start-file)
					 (file-modification-time start-file)
					 0))
		   (last-sync-end    (if (common:file-exists? end-file)
					 (file-modification-time end-file)
					 10))
                   (sync-period      (+ 3 (* cpu-load-adj 30))) ;; as adjusted load increases increase the sync period
		   (recently-synced  (and (< (- start-time mt-mod-time) sync-period) ;; not useful if sync didn't modify megatest.db!
					  (< mt-mod-time last-sync-start)))
		   (sync-done        (<= last-sync-start last-sync-end))
                   (sync-stale       (> start-time (+ last-sync-start sync-stale-seconds)))
		   (will-sync        (and (not *time-to-exit*)       ;; do not start a sync if we are in the process of exiting
                                          (or need-sync should-sync)
					  (or sync-done sync-stale)
					  (not sync-in-progress)
					  (not recently-synced))))
              (debug:print-info 13 *default-log-port* "WD writable-watchdog top of loop.  need-sync="need-sync" sync-in-progress=" sync-in-progress
				" should-sync="should-sync" start-time="start-time" mt-mod-time="mt-mod-time" recently-synced="recently-synced" will-sync="will-sync
				" sync-done=" sync-done " sync-period=" sync-period)
              (if (and (> sync-period 5)
                       (common:low-noise-print 30 "sync-period"))
                  (debug:print-info 0 *default-log-port* "Increased sync period due to long sync times, sync took: " sync-period " seconds."))
	      ;; (if recently-synced (debug:print-info 0 *default-log-port* "Skipping sync due to recently-synced flag=" recently-synced))
	      ;; (debug:print-info 0 *default-log-port* "need-sync: " need-sync " sync-in-progress: " sync-in-progress " should-sync: " should-sync " will-sync: " will-sync)
	      (if will-sync (set! *db-sync-in-progress* #t))
	      (mutex-unlock! *db-multi-sync-mutex*)
	      (if will-sync
                  (let (;; (max-sync-duration  (configf:lookup-number *configdat* "server" "max-sync-duration")) ;; KEEPING THIS AVAILABLE BUT SHOULD NOT USE, I'M PRETTY SURE IT DOES NOT WORK!
                        (sync-start         (current-milliseconds)))
		    (with-output-to-file start-file (lambda ()(print (current-process-id))))
		    
		    ;; put lock here
		    
                    ;; (if (or (not max-sync-duration)
                    ;;        (< sync-duration max-sync-duration)) ;; NOTE: db:sync-to-megatest.db keeps track of time of last sync and syncs incrementally
                        (let ((res        (db:sync-to-megatest.db dbstruct no-sync-db: no-sync-db))) ;; did we sync any data? If so need to set the db touched flag to keep the server alive
                          (set! sync-duration (- (current-milliseconds) sync-start))
                          (if (> res 0) ;; some records were transferred, keep the db alive
                              (begin
                                (mutex-lock! *heartbeat-mutex*)
                                (set! *db-last-access* (current-seconds))
                                (mutex-unlock! *heartbeat-mutex*)
                                (debug:print-info 0 *default-log-port* "sync called, " res " records transferred."))
                              (debug:print-info 2 *default-log-port* "sync called but zero records transferred")))))
;;                         ;; TODO: factor this next routine out into a function
;;                         (with-input-from-pipe ;; this should not block other threads but need to verify this
;;                          (conc "megatest -sync-to-megatest.db -m testsuite:" (common:get-area-name) ":" *toppath*)
;;                          (lambda ()
;;                            (let loop ((inl (read-line))
;;                                       (res #f))
;;                              (if (eof-object? inl)
;;                                  (begin
;;                                    (set! sync-duration (- (current-milliseconds) sync-start))
;;                                    (cond
;;                                     ((not res)
;;                                      (debug:print 0 *default-log-port* "ERROR: sync from /tmp db to megatest.db appears to have failed. Recommended that you stop your runs and run \"megatest -cleanup-db\""))
;;                                     ((> res 0)
;;                                      (mutex-lock! *heartbeat-mutex*)
;;                                      (set! *db-last-access* (current-seconds))
;;                                      (mutex-unlock! *heartbeat-mutex*))))
;;                                  (let ((num-synced (let ((matches (string-match "^Synced (\\d+).*$" inl)))
;;                                                      (if matches
;;                                                          (string->number (cadr matches))
;;                                                          #f))))
;;                                    (loop (read-line)
;;                                          (or num-synced res))))))))))
	      (if will-sync
		  (begin
		    (mutex-lock! *db-multi-sync-mutex*)
		    (set! *db-sync-in-progress* #f)
		    (set! *db-last-sync* start-time)
		    (with-output-to-file end-file (lambda ()(print (current-process-id))))

		    ;; release lock here

		    (mutex-unlock! *db-multi-sync-mutex*)))
	      (if (and debug-mode
		       (> (- start-time last-time) 60))
		  (begin
		    (set! last-time start-time)
		    (debug:print-info 4 *default-log-port* "timestamp -> " (seconds->time-string (current-seconds)) ", time since start -> " (seconds->hr-min-sec (- (current-seconds) *time-zero*))))))
	    
	    ;; keep going unless time to exit
	    ;;
	    (if (not *time-to-exit*)
		(let delay-loop ((count 0))
                  ;;(debug:print-info 13 *default-log-port* "delay-loop top; count="count" pid="(current-process-id)" this-wd-num="this-wd-num" *time-to-exit*="*time-to-exit*)
                                                            
		  (if (and (not *time-to-exit*)
			   (< count 6)) ;; was 11, changing to 4. 
		      (begin
			(thread-sleep! 1)
			(delay-loop (+ count 1))))
		  (if (not *time-to-exit*) (loop))))
	    ;; time to exit, close the no-sync db here
	    (db:no-sync-close-db no-sync-db)
	    (if (common:low-noise-print 30)
		(debug:print-info 0 *default-log-port* "Exiting watchdog timer, *time-to-exit* = " *time-to-exit*" pid="(current-process-id) ))))))) ;;" this-wd-num="this-wd-num)))))))

;;======================================================================
;; archive
;;======================================================================
;;======================================================================
;; 
;;======================================================================

;; NOT CURRENTLY USED
;;
(define (archive:main linktree target runname testname itempath options)
  (let ((testdir (conc linktree "/" target "/" runname "/" testname "/" itempatt))
	(flavor  'plain) ;; type of machine to run jobs on
	(maxload 1.5)   ;; max allowed load for this work
	(adisks  (archive:get-archive-disks)))
    ;; get testdir size
    ;;   - hand off du to job mgr
    (if (and (common:file-exists? testdir)
	     (file-is-writable? testdir))
	(let* ((dused  (jobrunner:run-job 
			flavor  ;; machine type
			maxload ;; max allowed load
			'()     ;; prevars - environment vars to set for the job
			common:get-disk-space-used  ;; if a proc call it, if a string it is a unix command
			(list testdir)))
	       (apath  (archive:get-archive testname itempath dused)))
	  (jobrunner:run-job
	   flavor
	   maxload
	   '()
	   archive:run-bup
	   (list testdir apath))))))
	  
;; Get archive disks from megatest.config
;;
(define (archive:get-archive-disks)
  (let ((section (configf:get-section *configdat* "archive-disks")))
    (if section
	section
	'())))

;; look for the best candidate archive area, else create new 
;; area
;;
(define (archive:get-archive testname itempath dused)
  ;; look up in archive_allocations if there is a pre-used archive
  ;; with adequate diskspace
  ;;
  (let* ((existing-blocks (rmt:archive-get-allocations testname itempath dused))
	 (candidate-disks (map (lambda (block)
				 (list
				  (vector-ref block 1)   ;; archive-area-name
				  (vector-ref block 2))) ;; disk-path
			       existing-blocks)))
    (or (common:get-disk-with-most-free-space candidate-disks dused)
	(archive:allocate-new-archive-block #f #f #f)))) ;; BROKEN. testname itempath))))

;; allocate a new archive area
;;
(define (archive:allocate-new-archive-block blockid-cache run-area-home testsuite-name dneeded target run-name test-name)
  (let ((key (conc testsuite-name "/" target "/" run-name "/" test-name)))
    (if (hash-table-exists? blockid-cache key)
	(hash-table-ref blockid-cache key)
	(let* ((pscript     (configf:lookup *configdat* "archive" "pathscript"))
	       (pscript-cmd (conc pscript " " testsuite-name " " target " " run-name " " test-name))
	       (apath       (if pscript
				(handle-exceptions
				 exn
				 (begin
				   (debug:print 0 "ERROR: script \"" pscript-cmd "\" failed to run properly.")
				   (exit 1))
				 (with-input-from-pipe
				  pscript-cmd
				  read-line))
				#f)) ;; this is the user-calculated archive path
	       (adisks    (archive:get-archive-disks))
	       (best-disk (common:get-disk-with-most-free-space adisks dneeded)))
	  (if best-disk
	      (let* ((bdisk-name    (car best-disk))
		     (bdisk-path    (cdr best-disk))
		     (area-key      (substring (message-digest-string (md5-primitive) run-area-home) 0 5))
		     (bdisk-id      (rmt:archive-register-disk bdisk-name bdisk-path (get-df bdisk-path)))
		     (archive-name  (if apath
					apath
					(let ((sec (current-seconds)))
					  (conc (time->string (seconds->local-time sec) "%Y")
						"_q" (seconds->quarter sec) "/"
						testsuite-name "_" area-key))))
		     (archive-path  (conc bdisk-path "/" archive-name))
		     (block-id      (rmt:archive-register-block-name bdisk-id archive-path)))
		;;   (allocation-id (rmt:archive-allocate-testsuite/area-to-block block-id testsuite-name area-key)))
		(if block-id ;; (and block-id allocation-id)
		    (let ((res (cons block-id archive-path)))
		      (hash-table-set! blockid-cache key res)
		      res)
		    #f))
	      #f)) ;; no best disk found
	  )))

;; archive - run bup
;;
;; 1. create the bup dir if not exists
;; 2. start the du of each directory
;; 3. gen index
;; 4. save
;;
(define (archive:run-bup archive-command run-id run-name tests rp-mutex bup-mutex)
  ;; move the getting of archive space down into the below block so that a single run can 
  ;; allocate as needed should a disk fill up
  ;;
  (let* ((blockid-cache  (make-hash-table))
	 (tsname         (common:get-area-name *alldat*))
	 (min-space      (string->number (or (configf:lookup *configdat* "archive" "minspace") "1000")))
	 (arch-groups    (make-hash-table)) ;; archive groups, each corrosponds to a bup area
	 (disk-groups    (make-hash-table)) ;; 
	 (test-groups    (make-hash-table)) ;; these two (disk and test groups) could be combined nicely
	 (test-dirs      (make-hash-table))
	 (bup-exe        (or (configf:lookup *configdat* "archive" "bup") "bup"))
	 (compress       (or (configf:lookup *configdat* "archive" "compress") "9"))
	 (linktree       (common:get-linktree)) ;; (configf:lookup *configdat* "setup" "linktree")))
	 (archiver       (let ((s (configf:lookup *configdat* "archive" "archiver")))
			   (if s (string->symbol s) 'bup)))
	 (archiver-cmd   (case archiver
			   ((tar) "tar cfj ARCHIVE_NAME.tar.bz2 ")
			   ((7z)  " 7z u -t7z -m0=lzma -mx=9 -mfb=64 -md=32m -ms=on ARCHIVE_NAME.7z ")
			   (else #f)))
	 (print-prefix      "Running: ") ;; change to #f to turn off printing
	 (preclean-spec  (configf:get-section *configdat* "archive-preclean")))

    ;;     (tests:match patt testname itempath)
    
    ;; from the test info bin the path to the test by stem
    ;;
    (for-each
     (lambda (test-dat)
       (let* ((item-path         (db:test-get-item-path test-dat))
	      (test-name         (db:test-get-testname  test-dat))
	      (test-id           (db:test-get-id        test-dat))
	      (run-id            (db:test-get-run_id    test-dat))
	      (target            (string-intersperse (map cadr (rmt:get-key-val-pairs run-id)) "/"))
	      
	      (toplevel/children (and (db:test-get-is-toplevel test-dat)
				      (> (rmt:test-toplevel-num-items run-id test-name) 0)))
	      (test-partial-path (conc target "/" run-name "/" (db:test-make-full-name test-name item-path)))
	      ;; note the trailing slash to get the dir inspite of it being a link
	      (test-path         (conc linktree "/" test-partial-path))
	      (mutex-lock! rp-mutex)
	      (test-physical-path (if (common:file-exists? test-path) 
				      (common:real-path test-path)
				      #f))
	      (mutex-unlock! rp-mutex)
	      (partial-path-index (if test-physical-path (substring-index test-partial-path test-physical-path) #f))
	      (test-base         (if (and partial-path-index 
					  test-physical-path )
				     (substring test-physical-path
						0
						partial-path-index)
				     #f))
	      ;; we need our archive dir checked for every test to enable folks who want to store other ways.
	      (archive-info (archive:allocate-new-archive-block blockid-cache *toppath* tsname min-space target run-name test-name))
	      (archive-dir  (if archive-info (cdr archive-info) #f))
	      (archive-id   (if archive-info (car archive-info) -1))

	      )
	 
	 (if (not archive-dir) ;; no archive disk found, this is fatal
	     (begin
	       (debug:print 0 *default-log-port* "FATAL: No archive disks found. Please add disks with at least "
			    min-space " MB space to the [archive-disks] section of megatest.config")
	       (debug:print 0 *default-log-port* "       use [archive] minspace to specify minimum available space")
	       (debug:print 0 *default-log-port* "   disks: "
			    (string-intersperse (map cadr (archive:get-archive-disks)) "\n         "))
	       (exit 1))
	     (debug:print-info 0 *default-log-port* "Using path " archive-dir " for archiving test " test-path))

	 ;; preclean the test directory per the spec if provided
	 (if (not (null? preclean-spec)) ;; we've been asked to preclean before archiving
	     (let loop ((spec (car preclean-spec))
			(tail (cdr preclean-spec)))
	       (if (> (length spec) 1)
		   (let ((testspec (car spec))
			 (rules    (cadr spec)))
		     (if (tests:match testspec test-name item-path)
			 (begin
			   (debug:print 0 *default-log-port* "INFO: cleanup requested for " test-physical-path)
			   (common:dir-clean-up test-physical-path rules remove-empty: #t))
			 (if (not (null? tail))
			     (loop (car tail)(cdr tail)))))
		   (begin
		     (debug:print 0 *default-log-port* "ERROR: bad spec line in [archive-preclean] section. \"" spec "\"")
		     (if (not (null? tail))(loop (car tail)(cdr tail)))))))
	 (cond
	  (toplevel/children
	   (debug:print 0 *default-log-port* "WARNING: cannot archive " test-name " with id " test-id
			" as it is a toplevel test with children"))
	  ((not (common:file-exists? test-path))
	   (debug:print 0 *default-log-port* "WARNING: Cannot archive " test-name "/" item-path
			" as path " test-path " does not exist"))
	  (else
	   (debug:print 0 *default-log-port*
			"From test-dat=" test-dat " derived the following:\n"
			"test-partial-path  = " test-partial-path "\n"
			"test-path          = " test-path "\n"
			"test-physical-path = " test-physical-path "\n"
			"partial-path-index = " partial-path-index "\n"
			"test-base          = " test-base)
	   (hash-table-set! disk-groups test-base
			    (cons test-physical-path (hash-table-ref/default disk-groups test-base '())))
	   (hash-table-set! test-groups test-base
			    (cons test-dat (hash-table-ref/default test-groups test-base '())))
	   (hash-table-set! arch-groups test-base
			    (cons archive-info (hash-table-ref/default arch-groups test-base '())))
	   (hash-table-set! test-dirs test-id test-path)))))
	   ;; test-path))))
     tests)
    (debug:print 0 *default-log-port* "INFO: DISK GROUPS=" (hash-table->alist disk-groups))
    ;; for each disk-group, initialize the bup area if needed
    (for-each 
     (lambda (test-base)
       (let* ((disk-group (hash-table-ref disk-groups test-base))
	      (arch-group (hash-table-ref arch-groups test-base))
	      (arch-info  (car arch-group)) ;; don't know yet how this will work, can I get more than one possibility?
	      (archive-id    (car arch-info))
	      (archive-dir   (cdr arch-info)))
	 (debug:print 0 *default-log-port* "Processing disk-group " test-base)
	 (let* ((test-paths (hash-table-ref disk-groups test-base)))
	   (if (not (common:file-exists? archive-dir))
	       (create-directory archive-dir #t))
	   (case archiver
	     ((bup) ;; Archive using bup
	      (let* ((bup-init-params  (list "-d" archive-dir "init"))
		     (bup-index-params (append (list "-d" archive-dir "index") test-paths))
		     (bup-save-params  (append (list "-d" archive-dir "save" ;; (conc "--strip-path=" linktree)
						     (conc "-" compress) ;; or (conc "--compress=" compress)
						     "-n" (conc (common:get-area-name *alldat*) "-" run-id)
						     (conc "--strip-path=" test-base) ;; if we push to the directory do we need this?
						     )
					       test-paths)))
		(if (not (common:file-exists? (conc archive-dir "/HEAD")))
		    (begin
		      ;; replace this with jobrunner stuff enventually
		      (debug:print-info 0 *default-log-port* "Init bup in " archive-dir)
		      ;; (mutex-lock! bup-mutex)
		      (run-n-wait bup-exe params: bup-init-params print-cmd: print-prefix)
		      ;; (mutex-unlock! bup-mutex)
		      ))
		(debug:print-info 0 *default-log-port* "Indexing data to be archived")
		;; (mutex-lock! bup-mutex)
		(run-n-wait bup-exe params: bup-index-params print-cmd: print-prefix)
		(debug:print-info 0 *default-log-port* "Archiving data with bup")
		(run-n-wait bup-exe params: bup-save-params print-cmd: print-prefix)))
	     ((7z tar)
	      (for-each
	       (lambda (test-dat)
		 (let* ((test-id           (db:test-get-id        test-dat))
			(test-name         (db:test-get-testname  test-dat))
			(item-path         (db:test-get-item-path test-dat))
			(test-full-name    (db:test-make-full-name test-name item-path))
			(run-id            (db:test-get-run_id    test-dat))
			(target            (string-intersperse (map cadr (rmt:get-key-val-pairs run-id)) "/"))
			(run-name          (rmt:get-run-name-from-id run-id))
			(source-dir        (hash-table-ref test-dirs test-id)) ;; (conc test-base "/" test-name "/" item-path))
			(target-dir        (string-substitute "/$" "" (conc archive-dir "/" target "/" run-name "/" test-full-name))))
		   ;; create the test and item-path levels under archive-dir
		   (create-directory (pathname-directory target-dir) #t)
		   (run-n-wait
		    (conc
		     (string-substitute "ARCHIVE_NAME" target-dir archiver-cmd) " "
		     "."
		     )
		    print-cmd: print-prefix
		    run-dir: source-dir)))
	       (hash-table-ref test-groups test-base))))
	   ;; (mutex-unlock! bup-mutex)
	   (for-each
	    (lambda (test-dat)
	      (let ((test-id           (db:test-get-id        test-dat))
		    (run-id            (db:test-get-run_id    test-dat)))
		(rmt:test-set-archive-block-id run-id test-id archive-id)
		(if (member archive-command '("save-remove"))
		    (runs:remove-test-directory test-dat 'archive-remove))))
	    (hash-table-ref test-groups test-base)))))
       (hash-table-keys disk-groups))
    #t))

(define (archive:bup-restore archive-command run-id run-name tests rp-mutex bup-mutex)  ;; move the getting of archive space down into the below block so that a single run can 
  ;; allocate as needed should a disk fill up
  ;;
  (let* ((bup-exe      (or (configf:lookup *configdat* "archive" "bup") "bup"))
	 (linktree     (common:get-linktree))) ;; (configf:lookup *configdat* "setup" "linktree")))

    ;; from the test info bin the path to the test by stem
    ;;
    (for-each
     (lambda (test-dat)
       ;; When restoring test-dat will initially contain an old and invalid path to the test
       (let* ((best-disk         (get-best-disk *configdat* #f)) ;; BUG: get the testconfig and use it here. Otherwise data pulled out of archive could end up on the wrong kind of disk.
	      (item-path         (db:test-get-item-path test-dat))
	      (test-name         (db:test-get-testname  test-dat))
	      (test-id           (db:test-get-id        test-dat))
	      (run-id            (db:test-get-run_id    test-dat))
	      (keyvals           (rmt:get-key-val-pairs run-id))
	      (target            (string-intersperse (map cadr keyvals) "/"))
	      
	      (toplevel/children (and (db:test-get-is-toplevel test-dat)
				      (> (rmt:test-toplevel-num-items run-id test-name) 0)))
	      (test-partial-path (conc target "/" run-name "/" (db:test-make-full-name test-name item-path)))
	      ;; note the trailing slash to get the dir inspite of it being a link
	      (test-path         (conc linktree "/" test-partial-path))
	      ;; if the old path was not deleted then prev-test-physical-path will end up pointing to a real directory
	      (mutex-lock! rp-mutex)
	      (prev-test-physical-path (if (common:file-exists? test-path)
					   ;; (read-symbolic-link test-path #t)
					   (common:real-path test-path)
					   #f))
	      (mutex-unlock! rp-mutex)
	      (new-test-physical-path  (conc best-disk "/" test-partial-path))
	      (archive-block-id        (db:test-get-archived test-dat))
	      (archive-block-info      (rmt:test-get-archive-block-info archive-block-id))
	      (archive-path            (if (vector? archive-block-info)
					   (vector-ref archive-block-info 2) ;; look in db.scm for test-get-archive-block-info for the vector record info
					   #f)) ;; no archive found?
	      (archive-internal-path   (conc (common:get-area-name *alldat*) "-" run-id "/latest/" test-partial-path)))
	 
	 ;; some sanity checks, move an existing path out of the way - iif it is not a toplevel with children
	 ;;
	 (if (and (not toplevel/children)  ;; special handling needed for toplevel with children
		  prev-test-physical-path
		  (common:file-exists? prev-test-physical-path)) ;; what to do? abort or clean up or link it in?
	     (let* ((base (pathname-directory prev-test-physical-path))
		    (dirn (pathname-file      prev-test-physical-path))
		    (newn (conc base "/." dirn)))
	       (debug:print-error 0 *default-log-port* "the old directory " prev-test-physical-path ", still exists! Moving it to " newn)
	       (rename-file prev-test-physical-path newn)))

	 (if (and archive-path ;; no point in proceeding if there is no actual archive
		  (not toplevel/children))
	     (begin
	       ;; CREATE WORK AREA
	       ;; test-src-path == #f     ==> don't copy in data from tests directory
	       ;; itemdat       == string ==> use directly
	       (create-work-area run-id run-name keyvals test-id #f best-disk test-name item-path) ;; #!key (remtries 2))

	       ;; 1. Get the block id from the test info
	       ;; 2. Get the block data given the block id
	       ;; 3. Construct the paths etc. for the following command:
	       ;; 
	       ;; bup -d /tmp/matt/adisk1/2015_q1/fullrun_e1a40/ restore -C /tmp/seeme fullrun-30/latest/ubuntu/nfs/none/w02.1.20.54_b/

	       ;; DO BUP RESTORE
	       (let* ((new-test-dat        (rmt:get-test-info-by-id run-id test-id))
		      (new-test-path       (if (vector? new-test-dat )
					       (db:test-get-rundir new-test-dat)
					       (begin
						 (debug:print-error 0 *default-log-port* "unable to get data for run-id=" run-id ", test-id=" test-id)
						 (exit 1))))
		      ;; new-test-path won't work - must use best-disk instead? Nope, new-test-path but tack on /..
		      (bup-restore-params  (list "-d" archive-path "restore" "-C" (conc new-test-path "/..") archive-internal-path)))
		 (debug:print-info 0 *default-log-port* "Restoring archived data to " new-test-physical-path " from archive in " archive-path " ... " archive-internal-path)
		 ;; (mutex-lock! bup-mutex)
		 (run-n-wait bup-exe params: bup-restore-params print-cmd: #f)
		 ;; (mutex-unlock! bup-mutex)
		 (mt:test-set-state-status-by-id run-id test-id "COMPLETED" #f #f)))
	     (debug:print-error 0 *default-log-port* "No archive path in the record for run-id=" run-id " test-id=" test-id))))
     (filter vector? tests))))
	 
;;======================================================================
;; client stuff
;;======================================================================
;; client:get-signature
(define (client:get-signature)
  (if *my-client-signature* *my-client-signature*
      (let ((sig (conc (get-host-name) " " (current-process-id))))
	(set! *my-client-signature* sig)
	*my-client-signature*)))

;; Not currently used! But, I think it *should* be used!!!
(define (client:logout serverdat)
  (let ((ok (and (socket? serverdat)
		 (cdb:logout serverdat *toppath* (client:get-signature)))))
    ok))

(define (client:connect iface port)
  (http:client-connect iface port))

(define (client:setup areapath #!key (remaining-tries 100) (failed-connects 0))
  (client:setup-http *alldat* areapath remaining-tries: remaining-tries failed-connects: failed-connects))

(set-fn 'client:setup client:setup)


;; Do all the connection work, look up the transport type and set up the
;; connection if required.
;;
;; There are two scenarios. 
;;   1. We are a test manager and we received *transport-type* and *runremote* via cmdline
;;   2. We are a run tests, list runs or other interactive process and we must figure out
;;      *transport-type* and *runremote* from the monitor.db
;;
;; client:setup
;;
;; lookup_server, need to remove *runremote* stuff -> replace with *alldat* for now
;;

(define (client:setup-http runremote areapath #!key (remaining-tries 100) (failed-connects 0)) ;; (area-dat #f))
  (debug:print-info 2 *default-log-port* "client:setup remaining-tries=" remaining-tries)
  (server:start-and-wait areapath)
  (if (<= remaining-tries 0)
      (begin
	(debug:print-error 0 *default-log-port* "failed to start or connect to server")
	(exit 1))
      ;;
      ;; Alternatively here, we can get the list of candidate servers and work our way
      ;; through them searching for a good one.
      ;;
      (let* ((server-dat (server:get-rand-best areapath))) ;; (server:get-first-best areapath))
	(if (not server-dat) ;; no server found
	    (client:setup-http runremote areapath remaining-tries: (- remaining-tries 1))
	    (let ((host  (cadr  server-dat))
		  (port  (caddr server-dat)))
	      (debug:print-info 4 *default-log-port* "client:setup server-dat=" server-dat ", remaining-tries=" remaining-tries)
	      (if (and host port)
		  (let* ((start-res (case *transport-type*
				      ((http)(http-transport:client-connect host port))))
			 (ping-res  (case *transport-type* 
				      ((http)(rmt:login-no-auto-client-setup start-res)))))
		    (if (and start-res
			     ping-res)
			(begin
			  (alldat-conndat-set! runremote start-res)
			  (debug:print-info 2 *default-log-port* "connected to " (http-transport:server-dat-make-url start-res))
			  start-res)
			(begin    ;; login failed but have a server record, clean out the record and try again
			  (debug:print-info 0 *default-log-port* "client:setup, login unsuccessful, will attempt to start server ... start-res=" start-res ", server-dat=" server-dat) ;; had runid.  Fixes part of Randy;s ticket 1405717332
			  (case *transport-type* 
			    ((http)(http-transport:close-connections)))
			  (alldat-conndat-set! runremote #f)  ;; (hash-table-delete! runremote run-id)
			  (thread-sleep! 1)
			  (client:setup-http runremote areapath remaining-tries: (- remaining-tries 1))
			  )))
		  (begin    ;; no server registered
		    ;; (server:kind-run areapath)
		    (server:start-and-wait areapath)
		    (debug:print-info 0 *default-log-port* "client:setup, no server registered, remaining-tries=" remaining-tries)
		    (thread-sleep! 1) ;; (+ 5 (random (- 20 remaining-tries))))  ;; give server a little time to start up, randomize a little to avoid start storms.
		    (client:setup-http runremote areapath remaining-tries: (- remaining-tries 1)))))))))

;;======================================================================
;; configf
;;======================================================================
;; return list (path fullpath configname)
(define (find-config configname #!key (toppath #f))
  (if toppath
      (let ((cfname (conc toppath "/" configname)))
	(if (common:file-exists? cfname)
	    (list toppath cfname configname)
	    (list #f      #f     #f)))
      (let* ((cwd (string-split (current-directory) "/")))
	(let loop ((dir cwd))
	  (let* ((path     (conc "/" (string-intersperse dir "/")))
		 (fullpath (conc path "/" configname)))
	    (if (common:file-exists? fullpath)
		(list path fullpath configname)
		(let ((remcwd (take dir (- (length dir) 1))))
		  (if (null? remcwd)
		      (list #f #f #f) ;;  #f #f) 
		  (loop remcwd)))))))))

(define (config:assoc-safe-add alist key val #!key (metadata #f))
  (let ((newalist (filter (lambda (x)(not (equal? key (car x)))) alist)))
    (append newalist (list (if metadata
			       (list key val metadata)
			       (list key val))))))

(define (configf:section-var-set! cfgdat section-name var value #!key (metadata #f))
  (hash-table-set! cfgdat section-name
		   (config:assoc-safe-add
		    (hash-table-ref/default cfgdat section-name '())
		    var value metadata: metadata)))

(define (config:eval-string-in-environment str)
  ;; (if (or (string-null? str)
  ;;	  (equal? "!" (substring str 0 1))) ;; null string or starts with ! are preserved but NOT set in the environment
      str
      (handle-exceptions
       exn
       (begin
	 (debug:print-error 0 *default-log-port* "problem evaluating \"" str "\" in the shell environment")
	 #f)
       (let ((cmdres (process:cmd-run->list (conc "echo " str))))
	 (if (null? cmdres) ""
	     (caar cmdres))))) ;; )

;;======================================================================
;; Make the regexp's needed globally available
;;======================================================================

(define configf:include-rx (regexp "^\\[include\\s+(.*)\\]\\s*$"))
(define configf:script-rx  (regexp "^\\[scriptinc\\s+(\\S+)([^\\]]*)\\]\\s*$")) ;; include output from a script
(define configf:section-rx (regexp "^\\[(.*)\\]\\s*$"))
(define configf:blank-l-rx (regexp "^\\s*$"))
(define configf:key-sys-pr (regexp "^(\\S+)\\s+\\[system\\s+(\\S+.*)\\]\\s*$"))
(define configf:key-val-pr (regexp "^(\\S+)(\\s+(.*)|())$"))
(define configf:key-no-val (regexp "^(\\S+)(\\s*)$"))
(define configf:comment-rx (regexp "^\\s*#.*"))
(define configf:cont-ln-rx (regexp "^(\\s+)(\\S+.*)$"))
(define configf:settings   (regexp "^\\[configf:settings\\s+(\\S+)\\s+(\\S+)]\\s*$"))

;; read a line and process any #{ ... } constructs

(define configf:var-expand-regex (regexp "^(.*)#\\{(scheme|system|shell|getenv|get|runconfigs-get|rget|scm|sh|rp|gv|g|mtrah)\\s+([^\\}\\{]*)\\}(.*)"))

(define (configf:system ht cmd)
  (system cmd)
  )

(define (configf:process-line l ht allow-system #!key (linenum #f))
  (let loop ((res l))
    (if (string? res)
	(let ((matchdat (string-search configf:var-expand-regex res)))
	  (if matchdat
	      (let* ((prestr  (list-ref matchdat 1))
		     (cmdtype (list-ref matchdat 2)) ;; eval, system, shell, getenv
		     (cmd     (list-ref matchdat 3))
		     (poststr (list-ref matchdat 4))
		     (result  #f)
		     (start-time (current-seconds))
		     (cmdsym  (string->symbol cmdtype))
		     (fullcmd (case cmdsym
				((scheme scm) (conc "(lambda (ht)" cmd ")"))
				((system)     (conc "(lambda (ht)(configf:system ht \"" cmd "\"))"))
				((shell sh)   (conc "(lambda (ht)(string-translate (shell \""  cmd "\") \"\n\" \" \"))"))
				((realpath rp)(conc "(lambda (ht)(common:nice-path \"" cmd "\"))"))
				((getenv gv)  (conc "(lambda (ht)(get-environment-variable \"" cmd "\"))"))
				((mtrah)      (conc "(lambda (ht)"
                                                    "    (let ((extra \"" cmd "\"))"
						    "       (conc (or *toppath* (get-environment-variable \"MT_RUN_AREA_HOME\"))"
						    "             (if (string-null? extra) \"\" \"/\")"
						    "             extra)))"))
				((get g)   
				 (let* ((parts (string-split cmd))
					(sect  (car parts))
					(var   (cadr parts)))
				   (conc "(lambda (ht)(config-lookup ht \"" sect "\" \"" var "\"))")))
				((runconfigs-get rget) (conc "(lambda (ht)(runconfigs-get ht \"" cmd "\"))"))
				;; ((rget)           (conc "(lambda (ht)(runconfigs-get ht \"" cmd "\"))"))
				(else "(lambda (ht)(print \"ERROR\") \"ERROR\")"))))
		;; (print "fullcmd=" fullcmd)
		(handle-exceptions
		 exn
		 (begin
		   (debug:print 0 *default-log-port* "WARNING: failed to process config input \"" l "\"")
		   (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))
		   ;; (print "exn=" (condition->list exn))
		   (set! result (conc "#{( " cmdtype ") " cmd "}, full expansion: " fullcmd)))
		 (if (or allow-system
			 (not (member cmdtype '("system" "shell" "sh"))))
		     (with-input-from-string fullcmd
		       (lambda ()
			 (set! result ((eval (read)) ht))))
		     (set! result (conc "#{(" cmdtype ") "  cmd "}"))))
		(case cmdsym
		  ((system shell scheme)
		   (let ((delta (- (current-seconds) start-time)))
		     (if (> delta 2)
			 (debug:print-info 0 *default-log-port* "for line \"" l "\"\n command:  " cmd " took " delta " seconds to run with output:\n   " result)
			 (debug:print-info 9 *default-log-port* "for line \"" l "\"\n command:  " cmd " took " delta " seconds to run with output:\n   " result)))))
		(loop (conc prestr result poststr)))
	      res))
	res)))

;; Run a shell command and return the output as a string
(define (shell cmd)
  (let* ((output (process:cmd-run->list cmd))
	 (res    (car output))
	 (status (cadr output)))
    (if (equal? status 0)
	(let ((outres (string-intersperse 
		       res
		       "\n")))
	  (debug:print-info 4 *default-log-port* "shell result:\n" outres)
	  outres)
	(begin
	  (with-output-to-port (current-error-port)
	    (lambda ()
	      (print "ERROR: " cmd " returned bad exit code " status)))
	  ""))))

;; this was inline but I'm pretty sure that is a hold over from when it was *very* simple ...
;;
(define (configf:read-line p ht allow-processing settings)
  (let loop ((inl (read-line p)))
    (let ((cont-line (and (string? inl)
			  (not (string-null? inl))
			  (equal? "\\" (string-take-right inl 1)))))
      (if cont-line ;; last character is \ 
	  (let ((nextl (read-line p)))
	    (if (not (eof-object? nextl))
		(loop (string-append (if cont-line 
					 (string-take inl (- (string-length inl) 1))
					 inl)
				     nextl))))
	  (let ((res (case allow-processing ;; if (and allow-processing 
		       ;;	   (not (eq? allow-processing 'return-string)))
		       ((#t #f)
			(configf:process-line inl ht allow-processing))
		       ((return-string)
			inl)
		       (else
			(configf:process-line inl ht allow-processing)))))
	    (if (and (string? res)
		     (not (equal? (hash-table-ref/default settings "trim-trailing-spaces" "no") "no")))
		(string-substitute "\\s+$" "" res)
		res))))))

(define (configf:cfgdat->env-alist section cfgdat-ht allow-system)
  (filter
   (lambda (pair)
     (let* ((var (car pair))
            (val (cdr pair)))
       (cons var
             (cond
              ((and allow-system (procedure? val)) ;; if we decided to use something other than #t or #f for allow-system ('return-procs or 'return-string) , this may become problematic
               (val))
              ((procedure? val) #f)
              ((string? val) val)
              (else "#f")))))
   (append
    (hash-table-ref/default cfgdat-ht "default" '())
    (if (equal? section "default") '() (hash-table-ref/default cfgdat-ht section '())))))

(define (calc-allow-system allow-system section sections)
  (if sections
      (and (or (equal? "default" section)
	       (member section sections))
	   allow-system) ;; account for sections and return allow-system as it might be a symbol such as return-strings
      allow-system))
    
;; given a config hash and a section name, apply that section to all matching sections (using wildcard % or regex if /..../)
;; remove the section when done so that there is no downstream clobbering
;;
(define (configf:apply-wildcards ht section-name)
  (if (hash-table-exists? ht section-name)
      (let* ((vars  (hash-table-ref ht section-name))
	     (rxstr (if (string-contains section-name "%")
			(string-substitute (regexp "%") ".*" section-name)
			(string-substitute (regexp "^/(.*)/$") "\\1" section-name)))
	     (rx    (regexp rxstr)))
	;; (print "\nsection-name: " section-name " rxstr: " rxstr)
        (for-each
         (lambda (section)
	   (if section
	       (let ((same-section (string=? section-name section))
		     (rx-match     (string-match rx section)))
		 ;; (print "section: " section " vars: " vars " same-section: " same-section " rx-match: " rx-match)
		 (if (and (not same-section) rx-match)
		     (for-each
		      (lambda (bundle)
			;; (print "bundle: " bundle)
			(let ((key  (car bundle))
			      (val  (cadr bundle))
			      (meta (if (> (length bundle) 2)(caddr bundle) #f)))
			  (hash-table-set! ht section (config:assoc-safe-add (hash-table-ref ht section) key val metadata: meta))))
		      vars)))))
         (hash-table-keys ht))))
  ht)

;; read a config file, returns hash table of alists

;; read a config file, returns hash table of alists
;; adds to ht if given (must be #f otherwise)
;; allow-system:
;;    #f - do not evaluate [system
;;    #t - immediately evaluate [system and store result as string
;;    'return-procs -- return a proc taking ht as an argument that may be evaulated at some future time
;;    'return-string -- return a string representing a proc taking ht as an argument that may be evaulated at some future time
;; envion-patt is a regex spec that identifies sections that will be eval'd
;; in the environment on the fly
;; sections: #f => get all, else list of sections to gather
;; post-section-procs alist of section-pattern => proc, where: (proc section-name next-section-name ht curr-path)
;; apply-wildcards: #t/#f - apply vars from targets with % wildcards to all matching sections
;;
(define (read-config path ht allow-system #!key (environ-patt #f)            (curr-section #f)   
		     (sections #f)              (settings (make-hash-table)) (keep-filenames #f)
		     (post-section-procs '())   (apply-wildcards #t) )
  (debug:print 9 *default-log-port* "START: " path)
;; (if *configdat*
;;     (common:save-pkt `((action . read-config)
;;       		 (f      . ,(cond ((string? path) path)
;;       				  ((port?   path) "port")
;;       				  (else (conc path))))
;;                        (T      . configf))
;;       	       *configdat* #t add-only: #t))
  (if (and (not (port? path))
	   (not (common:file-exists? path))) ;; for case where we are handed a port
      (begin 
	(debug:print-info 1 *default-log-port* "read-config - file not found " path " current path: " (current-directory))
	;; WARNING: This is a risky change but really, we should not return an empty hash table if no file read?
	#f) ;; (if (not ht)(make-hash-table) ht))
      (let ((inp        (if (string? path)
			    (open-input-file path)
			      path)) ;; we can be handed a port
	    (res        (if (not ht)(make-hash-table) ht))
	    (metapath   (if (or (debug:debug-mode 9)
				keep-filenames)
			    path #f))
            (process-wildcards  (lambda (res curr-section-name)
                                  (if (and apply-wildcards
                                           (or (string-contains curr-section-name "%")   ;; wildcard
                                               (string-match "/.*/" curr-section-name))) ;; regex
                                      (begin
                                        (configf:apply-wildcards res curr-section-name)
                                        (hash-table-delete! res curr-section-name))))))  ;; NOTE: if the section is a wild card it will be REMOVED from res 
	(let loop ((inl               (configf:read-line inp res (calc-allow-system allow-system curr-section sections) settings)) ;; (read-line inp))
		   (curr-section-name (if curr-section curr-section "default"))
		   (var-flag #f);; turn on for key-var-pr and cont-ln-rx, turn off elsewhere
		   (lead     #f))
	  (debug:print-info 8 *default-log-port* "curr-section-name: " curr-section-name " var-flag: " var-flag "\n   inl: \"" inl "\"")
	  (if (eof-object? inl) 
	      (begin
                ;; process last section for wildcards
                (process-wildcards res curr-section-name)
		(if (string? path) ;; we received a path, not a port, thus we are responsible for closing it.
		    (close-input-port inp))
		(if (list? sections) ;; delete all sections except given when sections is provided
		    (for-each
		     (lambda (section)
		       (if (not (member section sections))
			   (hash-table-delete! res section))) ;; we are using "" as a dumping ground and must remove it before returning the ht
		     (hash-table-keys res)))
		(debug:print 9 *default-log-port* "END: " path)
                res
                ) ;; retval
	      (regex-case 
	       inl 
	       (configf:comment-rx _                  (loop (configf:read-line inp res (calc-allow-system allow-system curr-section-name sections) settings)
                                                            curr-section-name #f #f))
               
	       (configf:blank-l-rx _                  (loop (configf:read-line inp res (calc-allow-system allow-system curr-section-name sections) settings)
                                                            curr-section-name #f #f))
	       (configf:settings   ( x setting val  )
                                   (begin
                                     (hash-table-set! settings setting val)
                                     (loop (configf:read-line inp res (calc-allow-system allow-system curr-section-name sections) settings)
                                           curr-section-name #f #f)))
               
	       (configf:include-rx ( x include-file )
                                   (let* ((curr-conf-dir (pathname-directory path))
                                          (full-conf     (if (and (absolute-pathname? include-file) (file-exists? include-file))
                                                             include-file
                                                             (common:nice-path 
                                                              (conc (if curr-conf-dir
                                                                        curr-conf-dir
                                                                        ".")
                                                                    "/" include-file)))))
				     (let ((all-matches (sort (handle-exceptions exn (list) (glob full-conf)) string<=?)))
				       (if (null? all-matches)
					   (begin
					     (debug:print '(2 9) #f "INFO: include file(s) matching " include-file " not found (called from " path ")")
					     (debug:print 2 *default-log-port* "        " full-conf))
					   (for-each
					    (lambda (fpath)
					      ;; (push-directory conf-dir)
					      (debug:print 9 *default-log-port* "Including: " full-conf)
					      (read-config fpath res allow-system environ-patt: environ-patt
							   curr-section: curr-section-name sections: sections settings: settings
							   keep-filenames: keep-filenames))
					    all-matches))
				       (loop (configf:read-line inp res (calc-allow-system allow-system curr-section-name sections) settings)
					     curr-section-name #f #f))))
	       (configf:script-rx ( x include-script params);; handle-exceptions
                                  ;;    exn
                                  ;;    (begin
                                  ;;      (debug:print '(0 2 9) #f "INFO: include from script " include-script " failed.")
                                  ;;      (loop (configf:read-line inp res (calc-allow-system allow-system curr-section-name sections) settings) curr-section-name #f #f))
                                  (if (and (common:file-exists? include-script)(file-execute-access? include-script))
                                      (let* ((local-allow-system  (calc-allow-system allow-system curr-section-name sections))
                                             (env-delta  (configf:cfgdat->env-alist curr-section-name res local-allow-system))
                                             (new-inp-port
                                              (common:with-env-vars
                                               env-delta
                                               (lambda ()
                                                 (open-input-pipe (conc include-script " " params))))))
                                        (debug:print '(2 9) *default-log-port* "Including from script output: " include-script)
                                        ;;  (print "We got here, calling read-config next. Port is: " new-inp-port)
                                        (read-config new-inp-port res allow-system environ-patt: environ-patt curr-section: curr-section-name sections: sections settings: settings keep-filenames: keep-filenames)
                                        (close-input-port new-inp-port)
                                        (loop (configf:read-line inp res (calc-allow-system allow-system curr-section-name sections) settings) curr-section-name #f #f))
                                      (begin
                                        (debug:print 0 *default-log-port* "Script not found or not exectutable: " include-script)
                                        (loop (configf:read-line inp res (calc-allow-system allow-system curr-section-name sections) settings) curr-section-name #f #f)))
                                  ) ;; )
	       (configf:section-rx ( x section-name )
                                   (begin
                                     ;; call post-section-procs
                                     (for-each 
                                      (lambda (dat)
                                        (let ((patt (car dat))
                                              (proc (cdr dat)))
                                          (if (string-match patt curr-section-name)
                                              (proc curr-section-name section-name res path))))
                                      post-section-procs)
                                     ;; after gathering the vars for a section and if apply-wildcards is true and if there is a wildcard in the section name process wildcards
                                     ;; NOTE: we are processing the curr-section-name, NOT section-name.
                                     (process-wildcards res curr-section-name)
                                     (if (not (hash-table-ref/default res section-name #f))(hash-table-set! res section-name '())) ;; ensure that mere mention of a section is not lost
                                     (loop (configf:read-line inp res (calc-allow-system allow-system curr-section-name sections) settings)
                                           ;; if we have the sections list then force all settings into "" and delete it later?
                                           ;; (if (or (not sections) 
                                           ;;	      (member section-name sections))
                                           ;;	  section-name "") ;; stick everything into "". NOPE: We need new strategy. Put stuff in correct sections and then delete all sections later.
                                           section-name
                                           #f #f)))
	       (configf:key-sys-pr ( x key cmd      )
                                   (if (calc-allow-system allow-system curr-section-name sections)
                                       (let ((alist    (hash-table-ref/default res curr-section-name '()))
                                             (val-proc (lambda ()
                                                         (let* ((start-time (current-seconds))
                                                                (local-allow-system  (calc-allow-system allow-system curr-section-name sections))
                                                                (env-delta  (configf:cfgdat->env-alist curr-section-name res local-allow-system))
                                                                (cmdres     (process:cmd-run->list cmd delta-env-alist-or-hash-table: env-delta)) ;; BB: here is where [system is exec'd.  needs to have env from other vars!
                                                                (delta      (- (current-seconds) start-time))
                                                                (status     (cadr cmdres))
                                                                (res        (car  cmdres)))
                                                           (debug:print-info 4 *default-log-port* "" inl "\n => " (string-intersperse res "\n"))
                                                           (if (not (eq? status 0))
                                                               (begin
                                                                 (debug:print-error 0 *default-log-port* "problem with " inl ", return code " status
                                                                                    " output: " cmdres)))
                                                           (if (> delta 2)
                                                               (debug:print-info 0 *default-log-port* "for line \"" inl "\"\n  command: " cmd " took " delta " seconds to run with output:\n   " res)
                                                               (debug:print-info 9 *default-log-port* "for line \"" inl "\"\n  command: " cmd " took " delta " seconds to run with output:\n   " res))
                                                           (if (null? res)
                                                               ""
                                                               (string-intersperse res " "))))))
                                         (hash-table-set! res curr-section-name 
                                                          (config:assoc-safe-add alist
                                                                                 key 
                                                                                 (case (calc-allow-system allow-system curr-section-name sections)
                                                                                   ((return-procs) val-proc)
                                                                                   ((return-string) cmd)
                                                                                   (else (val-proc)))
                                                                                 metadata: metapath))
                                         (loop (configf:read-line inp res (calc-allow-system allow-system curr-section-name sections) settings) curr-section-name #f #f))
                                       (loop (configf:read-line inp res
                                                                (calc-allow-system allow-system curr-section-name sections)
                                                                settings)
                                             curr-section-name #f #f)))
               
	       (configf:key-no-val ( x key val)
                                   (let* ((alist   (hash-table-ref/default res curr-section-name '()))
                                          (fval    (or (if (string? val) val #f) ""))) ;; fval should be either "" or " " (one or more spaces)
                                     (debug:print 10 *default-log-port* "   setting: [" curr-section-name "] " key " = #t")
                                     (safe-setenv key fval)
                                     (hash-table-set! res curr-section-name 
                                                      (config:assoc-safe-add alist key fval metadata: metapath))
                                     (loop (configf:read-line inp res
                                                              (calc-allow-system allow-system curr-section-name sections)
                                                              settings)
                                           curr-section-name key #f)))
               
	       (configf:key-val-pr ( x key unk1 val unk2 )
                                   (let* ((alist   (hash-table-ref/default res curr-section-name '()))
                                          (envar   (and environ-patt
							(string-search (regexp environ-patt) curr-section-name) ;; does the section match the envionpatt?
							(and (not (string-null? key))
							     (not (equal? "!" (substring key 0 1)))) ;; ! as leading character is a signature to NOT export to the environment
							;; (string-match "^.*:.*:.*$" key) ;; ;; something:something:something reserved for triggers in runconfigs
							)) 
                                          (realval (if envar
                                                       (config:eval-string-in-environment val)
                                                       val)))
                                     (debug:print-info 6 *default-log-port* "read-config env setting, envar: " envar " realval: " realval " val: " val " key: " key " curr-section-name: " curr-section-name)
                                     (if envar (safe-setenv key realval))
                                     (debug:print 10 *default-log-port* "   setting: [" curr-section-name "] " key " = " val)
                                     (hash-table-set! res curr-section-name 
                                                      (config:assoc-safe-add alist key realval metadata: metapath))
                                     (loop (configf:read-line inp res
                                                              (calc-allow-system allow-system curr-section-name sections) settings)
                                           curr-section-name key #f)))
	       ;; if a continued line
	       (configf:cont-ln-rx ( x whsp val     )
                                   (let ((alist (hash-table-ref/default res curr-section-name '())))
                                     (if var-flag             ;; if set to a string then we have a continued var
                                         (let ((newval (conc 
                                                        (config-lookup res curr-section-name var-flag) "\n"
                                                        ;; trim lead from the incoming whsp to support some indenting.
                                                        (if lead
                                                            (string-substitute (regexp lead) "" whsp)
                                                            "")
                                                        val)))
                                           ;; (print "val: " val "\nnewval: \"" newval "\"\nvarflag: " var-flag)
                                           (hash-table-set! res curr-section-name 
                                                            (config:assoc-safe-add alist var-flag newval metadata: metapath))
                                           (loop (configf:read-line inp res (calc-allow-system allow-system curr-section-name sections) settings) curr-section-name var-flag (if lead lead whsp)))
                                         (loop (configf:read-line inp res (calc-allow-system allow-system curr-section-name sections) settings) curr-section-name #f #f))))
	       (else (debug:print-error 0 *default-log-port* "problem parsing " path ",\n   \"" inl "\"")
		     (set! var-flag #f)
		     (loop (configf:read-line inp res (calc-allow-system allow-system curr-section-name sections) settings) curr-section-name #f #f))))
          ) ;; end loop
        )))
  
;; pathenvvar will set the named var to the path of the config
(define (find-and-read-config fname #!key (environ-patt #f)(given-toppath #f)(pathenvvar #f))
  (let* ((curr-dir   (current-directory))
         (configinfo (find-config fname toppath: given-toppath))
	 (toppath    (car configinfo))
	 (configfile (cadr configinfo))
	 (set-fields (lambda (curr-section next-section ht path)
		       (let ((field-names (if ht (common:get-fields ht) '()))
			     (target      (or (getenv "MT_TARGET")(args:get-arg "-reqtarg")(args:get-arg "-target"))))
			 (debug:print-info 9 *default-log-port* "set-fields with field-names=" field-names " target=" target " curr-section=" curr-section " next-section=" next-section " path=" path " ht=" ht)
			 (if (not (null? field-names))(keys:target-set-args field-names target #f))))))
    (if toppath (change-directory toppath)) 
    (if (and toppath pathenvvar)(setenv pathenvvar toppath))
    (let ((configdat  (if configfile 
			  (read-config configfile #f #t environ-patt: environ-patt post-section-procs: (list (cons "^fields$" set-fields)) #f))))
      (if toppath (change-directory curr-dir))
      (list configdat toppath configfile fname))))

(define (config-lookup cfgdat section var)
  (if (hash-table? cfgdat)
      (let ((sectdat (hash-table-ref/default cfgdat section '())))
	(if (null? sectdat)
	    #f
	    (let ((match (assoc var sectdat)))
	      (if match ;; (and match (list? match)(> (length match) 1))
		  (cadr match)
		  #f))
	    ))
      #f))

;; use to have definitive setting:
;;  [foo]
;;  var yes
;;
;;  (configf:var-is? cfgdat "foo" "var" "yes") => #t
;;
(define (configf:var-is? cfgdat section var expected-val)
  (equal? (configf:lookup cfgdat section var) expected-val))

(define configf:lookup config-lookup)
(define configf:read-file read-config)

(define (configf:section-vars cfgdat section)
  (let ((sectdat (hash-table-ref/default cfgdat section '())))
    (if (null? sectdat)
	'()
	(map car sectdat))))

(define (configf:set-section-var cfgdat section var val)
  (let ((sectdat (configf:get-section cfgdat section)))
    (hash-table-set! cfgdat section
                     (config:assoc-safe-add sectdat var val))))

    ;;(append (filter (lambda (x)(not (assoc var sectdat))) sectdat)
    ;;	    (list var val))))

(define (setup)
  (let* ((configf (find-config "megatest.config"))
	 (config  (if configf (read-config configf #f #t) #f)))
    (if config
	(setenv "RUN_AREA_HOME" (pathname-directory configf)))
    config))

;;======================================================================
;; Non destructive writing of config file
;;======================================================================

(define (configf:compress-multi-lines fdat)
  ;; step 1.5 - compress any continued lines
  (if (null? fdat) fdat
	(let loop ((hed (car fdat))
		   (tal (cdr fdat))
		   (cur "")
		   (led #f)
		   (res '()))
	  ;; ALL WHITESPACE LEADING LINES ARE TACKED ON!!
	  ;;  1. remove led whitespace
	  ;;  2. tack on to hed with "\n"
	  (let ((match (string-match configf:cont-ln-rx hed)))
	    (if match ;; blast! have to deal with a multiline
		(let* ((lead (cadr match))
		       (lval (caddr match))
		       (newl (conc cur "\n" lval)))
		  (if (not led)(set! led lead))
		  (if (null? tal) 
		      (set! fdat (append fdat (list newl)))
		      (loop (car tal)(cdr tal) newl led res))) ;; NB// not tacking newl onto res
		(let ((newres (if led 
				  (append res (list cur hed))
				  (append res (list hed)))))
		  ;; prev was a multiline
		  (if (null? tal)
		      newres
		      (loop (car tal)(cdr tal) "" #f newres))))))))

;; note: I'm cheating a little here. I merely replace "\n" with "\n         "
(define (configf:expand-multi-lines fdat)
  ;; step 1.5 - compress any continued lines
  (if (null? fdat) fdat
      (let loop ((hed (car fdat))
		 (tal (cdr fdat))
		 (res '()))
	(let ((newres (append res (list (string-substitute (regexp "\n") "\n         " hed #t)))))
	  (if (null? tal)
	      newres
	      (loop (car tal)(cdr tal) newres))))))

(define (configf:file->list fname)
  (if (common:file-exists? fname)
      (let ((inp (open-input-file fname)))
	(let loop ((inl (read-line inp))
		   (res '()))
	  (if (eof-object? inl)
	      (begin
		(close-input-port inp)
		(reverse res))
	      (loop (read-line inp)(cons inl res)))))
      '()))

;;======================================================================
;; Write a config
;;   0. Given a refererence data structure "indat"
;;   1. Open the output file and read it into a list
;;   2. Flatten any multiline entries
;;   3. Modify values per contents of "indat" and remove absent values
;;   4. Append new values to the section (immediately after last legit entry)
;;   5. Write out the new list 
;;======================================================================

(define (configf:write-config indat fname #!key (required-sections '()))
  (let* (;; step 1: Open the output file and read it into a list
	 (fdat       (configf:file->list fname))
	 (refdat  (make-hash-table))
	 (sechash (make-hash-table)) ;; current section hash, init with hash for "default" section
	 (new     #f) ;; put the line to be used in new, if it is to be deleted the set new to #f
	 (secname #f))

    ;; step 2: Flatten multiline entries
    (if (not (null? fdat))(set! fdat (configf:compress-multi-line fdat)))

    ;; step 3: Modify values per contents of "indat" and remove absent values
    (if (not (null? fdat))
	(let loop ((hed  (car fdat))
		   (tal  (cadr fdat))
		   (res  '())
		   (lnum 0))
	  (regex-case 
	   hed
	   (configf:comment-rx _                  (set! res (append res (list hed)))) ;; (loop (read-line inp) curr-section-name #f #f))
	   (configf:blank-l-rx _                  (set! res (append res (list hed)))) ;; (loop (read-line inp) curr-section-name #f #f))
	   (configf:section-rx ( x section-name ) (let ((section-hash (hash-table-ref/default refdat section-name #f)))
					    (if (not section-hash)
						(let ((newhash (make-hash-table)))
						  (hash-table-set! refhash section-name newhash)
						  (set! sechash newhash))
						(set! sechash section-hash))
					    (set! new hed) ;; will append this at the bottom of the loop
					    (set! secname section-name)
					    ))
	   ;; No need to process key cmd, let it fall though to key val
	   (configf:key-val-pr ( x key val      )
		       (let ((newval (config-lookup indat sec key)))
			 ;; can handle newval == #f here => that means key is removed
			 (cond 
			  ((equal? newval val)
			   (set! res (append res (list hed))))
			  ((not newval) ;; key has been removed
			   (set! new #f))
			  ((not (equal? newval val))
			     (hash-table-set! sechash key newval)
			     (set! new (conc key " " newval)))
			  (else
			   (debug:print-error 0 *default-log-port* "problem parsing line number " lnum "\"" hed "\"")))))
	   (else
	    (debug:print-error 0 *default-log-port* "Problem parsing line num " lnum " :\n   " hed )))
	  (if (not (null? tal))
	      (loop (car tal)(cdr tal)(if new (append res (list new)) res)(+ lnum 1)))
	  ;; drop to here when done processing, res contains modified list of lines
	  (set! fdat res)))

    ;; step 4: Append new values to the section
    (for-each 
     (lambda (section)
       (let ((sdat   '()) ;; append needed bits here
	     (svars  (configf:section-vars indat section)))
	 (for-each 
	  (lambda (var)
	    (let ((val (config-lookup refdat section var)))
	      (if (not val) ;; this one is new
		  (begin
		    (if (null? sdat)(set! sdat (list (conc "[" section "]"))))
		    (set! sdat (append sdat (list (conc var " " val))))))))
	  svars)
	 (set! fdat (append fdat sdat))))
     (delete-duplicates (append require-sections (hash-table-keys indat))))

    ;; step 5: Write out new file
    (with-output-to-file fname 
      (lambda ()
	(for-each 
	 (lambda (line)
	   (print line))
	 (configf:expand-multi-lines fdat))))))

;;======================================================================
;; refdb
;;======================================================================

;; reads a refdb into an assoc array of assoc arrays
;;   returns (list dat msg)
(define (configf:read-refdb refdb-path)
  (let ((sheets-file  (conc refdb-path "/sheet-names.cfg")))
    (if (not (common:file-exists? sheets-file))
	(list #f (conc "ERROR: no refdb found at " refdb-path))
	(if (not (file-read-access? sheets-file))
	    (list #f (conc "ERROR: refdb file not readable at " refdb-path))
	    (let* ((sheets (with-input-from-file sheets-file
			     (lambda ()
			       (let loop ((inl (read-line))
					  (res '()))
				 (if (eof-object? inl)
				     (reverse res)
				     (loop (read-line)(cons inl res)))))))
		   (data   '()))
	      (for-each 
	       (lambda (sheet-name)
		 (let* ((dat-path  (conc refdb-path "/" sheet-name ".dat"))
			(ref-dat   (configf:read-file dat-path #f #t))
			(ref-assoc (map (lambda (key)
					  (list key (hash-table-ref ref-dat key)))
					(hash-table-keys ref-dat))))
				   ;; (hash-table->alist ref-dat)))
		   ;; (set! data (append data (list (list sheet-name ref-assoc))))))
		   (set! data (cons (list sheet-name ref-assoc) data))))
	       sheets)
	      (list data "NO ERRORS"))))))

;; map over all pairs in a three level hierarchial alist and apply a function to the keys/val
;;
(define (configf:map-all-hier-alist data proc #!key (initproc1 #f)(initproc2 #f)(initproc3 #f))
  (for-each 
   (lambda (sheetname)
     (let* ((sheettmp  (assoc sheetname data))
	    (sheetdat  (if sheettmp (cadr sheettmp) '())))
       (if initproc1 (initproc1 sheetname))
       (for-each 
	(lambda (sectionname)
	  (let* ((sectiontmp  (assoc sectionname sheetdat))
		 (sectiondat  (if sectiontmp (cadr sectiontmp) '())))
	    (if initproc2 (initproc2 sheetname sectionname))
	    (for-each
	     (lambda (varname)
	       (let* ((valtmp (assoc varname sectiondat))
		      (val    (if valtmp (cadr valtmp) "")))
		 (proc sheetname sectionname varname val)))
	     (map car sectiondat))))
	(map car sheetdat))))
   (map car data))
  data)

;;======================================================================
;;  C O N F I G   T O / F R O M   A L I S T
;;======================================================================

(define (configf:config->alist cfgdat)
  (hash-table->alist cfgdat))

(define (configf:alist->config adat)
  (let ((ht (make-hash-table)))
    (for-each
     (lambda (section)
       (hash-table-set! ht (car section)(cdr section)))
     adat)
    ht))

;; if 
(define (configf:read-alist fname)
  (handle-exceptions
   exn
   #f
   (configf:alist->config
    (with-input-from-file fname read))))

(define (configf:write-alist cdat fname)
  (if (not (common:faux-lock fname))
      (debug:print 0 *default-log-port* "INFO: Could not get lock on " fname))
  (let* ((dat  (configf:config->alist cdat))
         (res
          (begin
            (with-output-to-file fname ;; first write out the file
              (lambda ()
                (pp dat)))
            
            (if (common:file-exists? fname)   ;; now verify it is readable
                (if (configf:read-alist fname)
                    #t ;; data is good.
                    (begin
                      (handle-exceptions
                       exn
                       #f
                       (debug:print 0 *default-log-port* "WARNING: content " dat " for cache " fname " is not readable. Deleting generated file.")
                       (delete-file fname))
                      #f))
                #f))))
    (common:faux-unlock fname)
    res))
  
;; convert hierarchial list to ini format
;;
(define (configf:config->ini data)
  (map 
   (lambda (section)
     (let ((section-name (car section))
	   (section-dat  (cdr section)))
       (print "\n[" section-name "]")
       (map (lambda (dat-pair)
	      (let* ((var (car dat-pair))
		     (val (cadr dat-pair))
		     (fname (if (> (length dat-pair) 2)(caddr dat-pair) #f)))
		(if fname (print "# " var "=>" fname))
		(print var " " val)))
	    section-dat))) ;;       (print "section-dat: " section-dat))
   (hash-table->alist data)))

;;======================================================================
;; dashboard-context-menus
;;======================================================================

(define (dboard:launch-testpanel run-id test-id)
  (let* (;; (cfg-sh  (conc *common:this-exe-dir* "/cfg.sh"))
         ;; (cmd (conc
         ;;       (if (common:file-exists? cfg-sh)
         ;;           (conc "source "cfg-sh" && ")
         ;;           "")
         ;;       *common:this-exe-fullpath*
         ;;       " -test " run-id "," test-id
         ;;       " &"))
         (cmd (conc *common:this-exe-dir*"/../dashboard "
                    "-test " run-id "," test-id
                    " &")))
    (system cmd)))


(define (dashboard:run-menu-items  run-id test-id target runname test-name testpatt item-test-path test-info)
  (list
   (iup:menu-item
    (conc "Rerun " testpatt)
    #:action
    (lambda (obj)
      ;; (print  " run-id: " run-id " test-id: " test-id " target: " target " runname: " runname " test-name: " test-name " testpatt: " testpatt "item-path : " item-path)
      (common:run-a-command
       (conc "megatest -run -target " target
             " -runname " runname
             " -testpatt " testpatt
             " -preclean -clean-cache")
       )))
   (iup:menu-item
    "Rerun Complete Run"
    #:action
    (lambda (obj)
      (common:run-a-command
       (conc "megatest -set-state-status NOT_STARTED,n/a -run -target " target
             " -runname " runname
             " -testpatt % "
             " -preclean -clean-cache"))))
   (iup:menu-item
    "Clean Complete Run"
    #:action
    (lambda (obj)
      (common:run-a-command
       (conc "megatest -remove-runs -target " target
             " -runname " runname
             " -testpatt % "))))
   (iup:menu-item 
    "Kill Complete Run"
    #:action
    (lambda (obj)
      (common:run-a-command
       (conc "megatest -set-state-status KILLREQ,n/a -target " target
             " -runname " runname
             " -testpatt % "
             "  -state RUNNING,REMOTEHOSTSTART,LAUNCHED,NOT_STARTED"))))
   (iup:menu-item 
    "Delete Run Data"
    #:action
    (lambda (obj)
      (common:run-a-command
       (conc "megatest -remove-runs -target " target
             " -runname " runname
             " -testpatt % "
             "  -keep-records"))))))

(define (dashboard:test-menu-items  run-id test-id target runname test-name testpatt item-test-path test-info)
  (list
   (iup:menu-item
    (conc "Rerun " item-test-path)
    #:action
    (lambda (obj)
      (common:run-a-command
       (conc "megatest -set-state-status NOT_STARTED,n/a -run -target " target
             " -runname " runname
             " -testpatt " item-test-path
             " -preclean -clean-cache"))))
   (iup:menu-item
    (conc "Kill " item-test-path)
    #:action
    (lambda (obj)
      ;; (rmt:test-set-state-status-by-id run-id test-id "KILLREQ" #f #f)
      (common:run-a-command
       (conc "megatest -set-state-status KILLREQ,n/a -target " target
             " -runname " runname
             " -testpatt " item-test-path 
             " -state RUNNING,REMOTEHOSTSTART,LAUNCHED"))))
   (iup:menu-item
    (conc "Delete data : " item-test-path)
    #:action
    (lambda (obj)
      (common:run-a-command
       (conc "megatest -remove-runs -target " target
             " -runname " runname
             " -testpatt " item-test-path 
             " -keep-records"))))
   (iup:menu-item
    (conc "Clean "item-test-path)
    #:action
    (lambda (obj)
      (common:run-a-command
       (conc "megatest -remove-runs -target " target
             " -runname " runname
             " -testpatt " item-test-path))))
   (iup:menu-item
    "Start xterm"
    #:action
    (lambda (obj)
      (dcommon:examine-xterm run-id test-id)))
   ;;(let* ((cmd (conc (car (argv)) " -xterm " run-id "," test-id "&")))
   ;; (system cmd))))
   (iup:menu-item
    "Edit testconfig"
    #:action
    (lambda (obj)
      (let* ((all-tests (tests:get-all))
             (editor-rx (or (configf:lookup *configdat* "setup" "editor-regex") 
                            "\\b(vim?|nano|pico)\\b"))
             (editor (or (configf:lookup *configdat* "setup" "editor")
                         (get-environment-variable "VISUAL")
                         (get-environment-variable "EDITOR") "vi"))
             (tconfig (conc (hash-table-ref all-tests test-name) "/testconfig"))
             (cmd (conc (if (string-search editor-rx editor)
                            (conc "xterm -e " editor)
                            editor)
                        " " tconfig " &")))
        (system cmd))))))

(define (dashboard:step-logs-menu-item  run-id test-id target runname test-name testpatt item-test-path test-info)
  (let* ((steps (tests:get-compressed-steps run-id test-id))   ;; #<stepname start end status Duration Logfile Comment id>
         (rundir (db:test-get-rundir test-info)))
    
    (iup:menu-item
     "Step logs"
     (apply iup:menu
            (map (lambda (step)
                   (let ((stepname (vector-ref step 0))
                         (logfile  (vector-ref step 5))
                         (status   (vector-ref step 3)))
                     (iup:menu-item
                      (conc stepname "/" (if (string=? logfile "") "no log!" logfile) " (" status ")")
                      #:action (lambda (obj)
                                 (let ((fullfile (conc rundir "/" logfile)))
                                   (if (common:file-exists? fullfile)
                                       (dcommon:run-html-viewer fullfile)
                                       (message-window (conc "file " fullfile " not found"))))))))
                 steps)))))

(define (dashboard:toplevel-menu-items  run-id test-id target runname test-name testpatt item-test-path test-info)
  (list

   (iup:menu-item
    "Test Control Panel"
    #:action
    (lambda (obj)
      (dboard:launch-testpanel run-id test-id)))
   
   (dashboard:step-logs-menu-item  run-id test-id target runname test-name testpatt item-test-path test-info)

   (iup:menu-item
    (conc "Rerun " item-test-path)
    #:action
    (lambda (obj)
      (common:run-a-command
       (conc "megatest -set-state-status NOT_STARTED,n/a -run -target " target
             " -runname " runname
             " -testpatt " item-test-path
             " -preclean -clean-cache"))))
   
   (iup:menu-item
    "Start xterm"
    #:action
    (lambda (obj)
      (dcommon:examine-xterm run-id test-id)))
   
   (iup:menu-item
    (conc "Kill " item-test-path)
    #:action
    (lambda (obj)
      ;; (rmt:test-set-state-status-by-id run-id test-id "KILLREQ" #f #f)
      (common:run-a-command
       (conc "megatest -set-state-status KILLREQ,n/a -target " target
             " -runname " runname
             " -testpatt " item-test-path 
             " -state RUNNING,REMOTEHOSTSTART,LAUNCHED,NOT_STARTED"))))
   
   (let* ((rundir    (db:test-get-rundir      test-info))
          (has-subrun (subrun:subrun-test-initialized? rundir)))
     (if has-subrun
         (iup:menu-item
          "Launch subrun dashboard"
          #:action
          (lambda (obj)
            (subrun:launch-dashboard rundir)))
         (iup:vbox)))
    
    (iup:menu-item
     (conc "View Log " item-test-path)
     #:action
     (lambda (obj)
       (let* ((rundir    (db:test-get-rundir      test-info))
              (logf      (db:test-get-final_logf  test-info))
              (fullfile  (conc rundir "/" logf)))
         (if (common:file-exists? fullfile)
             (dcommon:run-html-viewer fullfile)
             (message-window (conc "file " fullfile " not found.")))))
     )
    ))
;; example section for megatest.config:
;;
;; 
;; [custom-context-menu-items]
;; #<unique var> <menu item text, can have template variables> : <command line with template %variable%s>
;; item1  custom show run-id (%run-id%):echo "%run-id%"
;; item2  custom show test-id (%test-id%):echo "%test-id%"
;; item3  custom show target (%target%):echo "%target%"
;; item4  custom show test-name (%test-name%):echo "%test-name%"
;; item5  custom show test-patt (%test-patt%):echo "%test-patt%"
;; item6  custom show test-run-dir (%test-run-dir%):echo "%test-run-dir%"
;; item7  custom show run-area-home (%run-area-home%):echo "%run-area-home%"
;; item8  custom show megatest root (%mt-root%):echo "%mt-root%"
;; item9  custom ls :  ls -lrt
;; item10 custom see $MT_RUN_AREA_HOME (not yet implemented) :  echo $MT_RUN_AREA_HOME

(define (dashboard:custom-menu-items  run-id test-id target run-name test-name testpatt item-test-path test-info)
  (let* ((vars (configf:section-vars *configdat* "custom-context-menu-items"))
         (item-path (db:test-get-item-path test-info))
         (mt-root (pathname-directory  (pathname-directory *common:this-exe-dir* ))))
    (filter-map
     (lambda (var)
       (let* ((val (configf:lookup *configdat* "custom-context-menu-items" var))
              (m   (string-match "^\\s*([^:]+?)\\s*:\\s*(.*?)\\s*$" val)))
         (if m
             (let* ((menu-item-text-raw (list-ref m 1))
                    (command-line-raw   (list-ref m 2))
                    (subst-alist ;; template vars
                          `(( "%run-id%"    . ,run-id   )
                            ( "%test-id%"   . ,test-id  )
                            ( "%target%"    . ,target   )
                            ( "%test-name%" . ,test-name)
                            ( "%test-patt%" . ,testpatt)
                            ( "%test-run-dir%" . ,(db:test-get-rundir test-info))
                            ( "%mt-root%" . ,mt-root)
                            ( "%run-name%" . ,run-name)
                            ( "%run-area-home%" . ,*toppath*)
                            ( "%item-path%" . ,item-path)
                            ( "%item-test-patt%" . ,item-test-path )))
                    (command-line ;; replace template vars
                          (foldr
                           (lambda (x i)
                             (string-substitute
                              (car x)
                              (->string (cdr x))
                              i
                              #t))
                           command-line-raw
                           subst-alist))
                    (menu-item-text ;; replace template vars
                          (foldr
                           (lambda (x i)
                             (string-substitute
                              (car x)
                              (->string (cdr x))
                              i
                              #t))
                           menu-item-text-raw
                           subst-alist)))
               (iup:menu-item
                (conc "*"menu-item-text)
                #:action
                (lambda (obj)

                  (let* ((scheme-match (string-match "^#(\\(.*)" command-line)))
                    ;;(BB> "cmdline is >"command-line"<")
                    (common:with-env-vars
                     ;; TODO: with-env-vars <runconfig target vars, env-override vars from mtest>
                     ;; TODO: with-env-vars MT_*
                     (runs:get-mt-env-alist run-id run-name target test-name item-path)
                     
                     (lambda ()
                       (if scheme-match
                           (begin
                             (handle-exceptions
                              exn
                              (print "error with custom menu scheme")
                              (begin
                                ;;(BB> "gonna eval it!")
                                (eval (with-input-from-string (cadr scheme-match) read)))))
                           (common:run-a-command command-line with-vars: #t))))))))
             #f)))
     vars)))

(define (dashboard:context-menu  run-id test-id target runname test-name testpatt item-test-path test-info)
  (let* ((run-menu-items
          (dashboard:run-menu-items  run-id test-id target runname test-name testpatt item-test-path test-info))
         (test-menu-items
          (dashboard:test-menu-items  run-id test-id target runname test-name testpatt item-test-path test-info))
         (custom-menu-items
          (dashboard:custom-menu-items run-id test-id target runname test-name testpatt item-test-path test-info))
         (toplevel-menu-items
          (dashboard:toplevel-menu-items  run-id test-id target runname test-name testpatt item-test-path test-info))
         )
    (apply iup:menu
           `(,@toplevel-menu-items
             ,(iup:menu-item
              "Run"
              (apply iup:menu run-menu-items))
             ,(iup:menu-item
              "Test"
              (apply iup:menu test-menu-items))
             ,@custom-menu-items))))
;;======================================================================
;; dashboard-gui-monitor
;;======================================================================
(define (control-panel db tdb keys)
  (let* ((var-params (make-hash-table)) ;; register all the widgets here for querying on run, rollup, remove?
	 (key-params (make-hash-table))
	 (monitordat '()) ;; list of monitor records
	 (keyentries (iup:frame 
		      #:title "Keys"
		      (apply
		       iup:vbox
		       (map (lambda (key)
			      (iup:hbox (iup:label (vector-ref key 0) #:size "60x15") ; #:expand "HORIZONTAL")
					(iup:textbox #:expand "HORIZONTAL"
						     #:action (lambda (obj a val)
								(hash-table-set! key-params (vector-ref key 0) val)))))
			    keys))))
	 (othervars  (iup:frame
		      #:title "Run Vars"
		      (apply
		       iup:vbox
		       (map (lambda (var)
			      (iup:hbox (iup:label var #:size "60x15")
					(iup:textbox   #:expand "HORIZONTAL"
						       #:action (lambda (obj a val)
								  (hash-table-set! var-params var val)))))
			    (list "runname" "testpatts" "params")))))
	 (controls   (iup:frame
		      #:title "Controls"
		      (iup:hbox 
		       (iup:frame
			#:title "Runs"
			(iup:hbox 
			 (iup:button "Start"  
				     #:expand "HORIZONTAL"
				     #:action (lambda (obj)
						(tasks:add-from-params tdb "run" keys key-params var-params)
						(print "Launch Run")))
			 (iup:button "Remove" 
				     #:expand "HORIZONTAL"
				     #:action (lambda (obj)
						(print "Remove Run")
						(tasks:add-from-params tdb "remove" keys key-params var-params)
						))
			 (iup:button "Rollup" 
				     #:expand "HORIZONTAL"
				     #:action (lambda (obj)
						(print "Rollup Run")
						(tasks:add-from-params tdb "rollup" keys key-params var-params)))))
		       (iup:frame 
			#:title "Misc"
			(iup:hbox
			 (iup:button "Quit" 
				     #:expand "HORIZONTAL"
				     #:action (lambda (obj)
						(sqlite3:finalize! db)
						(sqlite3:finalize! tdb)
						(exit))))))))
	 (monitors     (iup:textbox 
			#:expand "YES" ; HORIZONTAL"
			; #:size   "x40"
			#:multiline "YES"
			#:font "Courier New, -10"
			#:value "None..............................................."))
	 (actions      (iup:textbox
			#:expand "YES"
			#:multiline "YES"
			#:font "Courier New, -10"
			#:value "None..............................................."))
	 (lastmodtime 0)
	 (next-touch  0) ;; the last time the "last_update" field was updated
	 (refreshdat (lambda ()
		       (let* ((monitordbpath  (conc *toppath* "/monitor.db"))
			      (megatestdbpath (conc *toppath* "/megatest.db"))
			      (modtime        (max (file-modification-time megatestdbpath)
						   (file-modification-time monitordbpath))))
			 ;; do stuff here when the db is updated by some other process
			 (if (> modtime lastmodtime)
			     (let ((tlst (tasks:get-tasks tdb '() '()))
				   (mlst (tasks:get-monitors tdb)))
			       (set! tasksdat tlst)
			       (set! monitorsdat mlst)
			       (iup:attribute-set! monitors "VALUE" (tasks:monitors->text-table mlst))
			       (iup:attribute-set! actions  "VALUE" (tasks:tasks->text tlst))
			       (tasks:process-queue db tdb)
			       (set! lastmodtime (max (file-modification-time megatestdbpath)
						      (file-modification-time monitordbpath)))
			       (tasks:reset-stuck-tasks tdb)))
			 ;; stuff to do every 10 seconds
			 (if (> (current-seconds) next-touch)
			     (begin
			       ;; (tasks:process-queue db tdb monitordbpath)
			       (tasks:monitors-update tdb)
			       (tasks:reset-stuck-tasks tdb)
			       (set! monitorsdat (tasks:get-monitors tdb))
			       (set! next-touch (+ (current-seconds) 10))
			       )))))
	 (topdialog  #f))
    (set! topdialog (iup:dialog 
		     #:close_cb (lambda (a)(exit))
		     #:title "Run Controls"
		     (iup:vbox
		      (iup:hbox keyentries othervars)
		      controls
		      (let ((tabtop (iup:tabs 
				     (iup:vbox 
				       (let* ((tb (iup:textbox #:expand "HORIZONTAL"))
					      (bt (iup:button "Remove tasks by id"
							      #:action (lambda (obj)
									 (let ((val (iup:attribute tb "VALUE")))
									   (tasks:remove-queue-entries tdb val)))))
					      (lb (iup:label "(comma separated)")))
					 (iup:hbox bt tb lb))
				       actions)
				     monitors
				     )))
			(iup:attribute-set! tabtop "TABTITLE0" "Actions")
			(iup:attribute-set! tabtop "TABTITLE1" "Monitors")
			tabtop)
		      )))
		      ; (iup:frame
		      ;  #:title "Monitors"
		      ;  monitors)
		      ; (iup:frame
		      ;  #:title "Actions"
		      ;  actions))))

    (iup:show topdialog)
    (iup:callback-set! *tim* "ACTION_CB"
		       (lambda (x)
			 (refreshdat)
			 (if *exit-started*
			     (set! *exit-started* 'ok))))))

(define (main-window setuptab fsltab collateraltab toolstab)
  (iup:show
   (iup:dialog #:title "FSL Power Window" #:size "290x190" ; #:expand "YES"
               (let ((tabtop (iup:tabs setuptab collateraltab fsltab toolstab)))
                 (iup:attribute-set! tabtop "TABTITLE0" "Setup") 
                 (iup:attribute-set! tabtop "TABTITLE1" "Collateral")
                 (iup:attribute-set! tabtop "TABTITLE2" "Fossil")
                 (iup:attribute-set! tabtop "TABTITLE3" "Tools")
                 tabtop))))

;; BUG: Remember to re-instate this!!!!
;; (on-exit (lambda ()
;; 	   (let ((tdb (tasks:open-db)))
;; 	     ;; (print "On-exit called")
;; 	     (tasks:remove-monitor-record tdb)
;; 	     (sqlite3:finalize! tdb))))

(define (gui-monitor db)
  (let ((keys (db:get-keys db))
	(tdb  (tasks:open-db)))
    (tasks:register-monitor db tdb) ;;; let the other monitors know we are here
    (control-panel db tdb keys)
    ;(tasks:remove-monitor-record db)
    ;(sqlite3:finalize! db)
   ))
	
;;======================================================================
;; dashboard-tests
;;======================================================================

;;======================================================================
;; C O M M O N
;;======================================================================

(define *dashboard-comment-share-slot* #f)

(define (dtests:get-pre-command #!key (default-override #f))
  (let* ((orig-pre-command "export CMD='")
         (viewscreen-pre-command  "viewscreen ")
         (use-viewscreen (configf:lookup *configdat* "dashboard" "use-viewscreen"))
         (default-pre-command (if use-viewscreen viewscreen-pre-command orig-pre-command))
         (cfg-ovrd (configf:lookup *configdat* "dashboard" "pre-command")))
    (or cfg-ovrd default-override default-pre-command))) ;; "xterm -geometry 180x20 -e \""))

  
(define (dtests:get-post-command #!key (default-override #f))
  (let* ((orig-post-command (conc "';xterm -geometry 180x20 -e \"(echo; echo -n START:;date +ww%U.%w-$H:%M:%S;echo;echo $CMD;echo;$CMD)|&"
                                 "tee -a runlog-`date +ww%U.%w-%H:%M`.log;echo Press any key to continue;bash -c 'read -n 1 -s'\" &"))
         (viewscreen-post-command  "")
         (use-viewscreen (configf:lookup *configdat* "dashboard" "use-viewscreen"))
         (default-post-command (if use-viewscreen viewscreen-post-command orig-post-command))
         (cfg-ovrd (configf:lookup *configdat* "dashboard" "post-command")))
    (or cfg-ovrd default-override default-post-command))) ;; ";echo Press any key to continue;bash -c 'read -n 1 -s'\" &")))

(define (common:run-a-command cmd #!key (with-vars #f) (with-orig-env #f))
  (let* ((pre-cmd  (dtests:get-pre-command))
         (post-cmd (dtests:get-post-command))
         (fullcmd  (if (or pre-cmd post-cmd)
                       (conc pre-cmd cmd post-cmd)
                       (conc "viewscreen " cmd))))
    (debug:print-info 02 *default-log-port* "Running command: " fullcmd)
    (cond
     (with-vars     (common:without-vars  fullcmd))
     (with-orig-env (common:with-orig-env fullcmd))
     (else          (common:without-vars  fullcmd "MT_.*")))))

(define (test-info-panel testdat store-label widgets)
  (iup:frame 
   #:title "Test Info" ; #:expand "YES"
   (iup:hbox ; #:expand "YES"
    (apply iup:vbox ; #:expand "YES"
	   (append (map (lambda (val)
			  (iup:label val ; #:expand "HORIZONTAL"
				     ))
			(list "Testname: "
			      "Item path: "
			      "Current state: "
			      "Current status: "
			      "Test comment: "
			      "Test id: "
			      "Test date: "))
		   (list (iup:label "" #:expand "VERTICAL"))))
    (apply iup:vbox  ; #:expand "YES"
	   (list 
	    (store-label "testname"
			 (iup:label (db:test-get-testname  testdat) #:expand "HORIZONTAL")
			 (lambda (testdat)(db:test-get-testname testdat)))
	    (store-label "item-path"
			 (iup:label (db:test-get-item-path testdat) #:expand "HORIZONTAL")
			 (lambda (testdat)(db:test-get-item-path testdat)))
	    (store-label "teststate" 
			 (iup:label (db:test-get-state testdat) #:expand "HORIZONTAL")
			 (lambda (testdat)
			   (db:test-get-state testdat)))
	    (let ((lbl   (iup:label (db:test-get-status testdat) #:expand "HORIZONTAL")))
	      (hash-table-set! widgets "teststatus"
			       (lambda (testdat)
				 (let ((newstatus (db:test-get-status testdat))
				       (oldstatus (iup:attribute lbl "TITLE")))
				   (if (not (equal? oldstatus newstatus))
				       (begin
					 (iup:attribute-set! lbl "FGCOLOR" (car (gutils:get-color-for-state-status (db:test-get-state testdat)
														   (db:test-get-status testdat))))
					 (iup:attribute-set! lbl "TITLE" (db:test-get-status testdat)))))))
	      lbl)
	    (store-label "testcomment"
			 (iup:label "TestComment                             "
				    #:expand "HORIZONTAL")
			 (lambda (testdat)
			   (let ((newcomment (db:test-get-comment testdat)))
			     (if *dashboard-comment-share-slot*
				 (if (not (equal? (iup:attribute *dashboard-comment-share-slot* "VALUE")
						  newcomment))
				     (iup:attribute-set! *dashboard-comment-share-slot*
							 "VALUE"
							 newcomment)))
			     newcomment)))
	    (store-label "testid"
			 (iup:label "TestId                             "
				    #:expand "HORIZONTAL")
			 (lambda (testdat)
			   (db:test-get-id testdat)))
	    (store-label "testdate" 
			 (iup:label "TestDate                           "
				    #:expand "HORIZONTAL")
			 (lambda (testdat)
			   (seconds->work-week/day-time (db:test-get-event_time testdat))))
	    )))))

;;======================================================================
;; Test meta panel
;;======================================================================

(define (test-meta-panel-get-description testmeta)
  (fmt #f (with-width 40 (wrap-lines (db:testmeta-get-description testmeta)))))

(define (test-meta-panel testmeta store-meta)
  (iup:frame 
   #:title "Test Meta Data" ; #:expand "YES"
   (iup:hbox ; #:expand "YES"
    (apply iup:vbox ; #:expand "YES"
	   (append (map (lambda (val)
			  (iup:label val ; #:expand "HORIZONTAL"
				     ))
			(list "Author: "
			      "Owner: "
			      "Reviewed: "
			      "Tags: "
			      "Description: "))
		   (list (iup:label "" #:expand "VERTICAL"))))
    (apply iup:vbox  ; #:expand "YES"
	   (list 
	    (store-meta "author"
			 (iup:label (db:testmeta-get-author testmeta) #:expand "HORIZONTAL")
			 (lambda (testmeta)(db:testmeta-get-author testmeta)))
	    (store-meta "owner"
			 (iup:label (db:testmeta-get-owner testmeta) #:expand "HORIZONTAL")
			 (lambda (testmeta)(db:testmeta-get-owner testmeta)))
	    (store-meta "reviewed" 
			 (iup:label (db:testmeta-get-reviewed testmeta) #:expand "HORIZONTAL")
			 (lambda (testmeta)(db:testmeta-get-reviewed testmeta)))
	    (store-meta "tags" 
			 (iup:label (db:testmeta-get-tags testmeta) #:expand "HORIZONTAL")
			 (lambda (testmeta)(db:testmeta-get-tags testmeta)))
	    (store-meta "description" 
			 (iup:label (test-meta-panel-get-description testmeta) #:size "x50"); #:expand "HORIZONTAL")
			 (lambda (testmeta)
			   (test-meta-panel-get-description testmeta)))
	    )))))


;;======================================================================
;; Run info panel
;;======================================================================
(define (run-info-panel db keydat testdat runname)
  (let* ((run-id     (db:test-get-run_id testdat))
	 (rundat     (rmt:get-run-info run-id))
	 (header     (db:get-header rundat))
	 (event_time (db:get-value-by-header (db:get-rows rundat)
					     (db:get-header rundat)
					     "event_time")))
    (iup:frame 
     #:title "Megatest Run Info" ; #:expand "YES"
     (iup:hbox ; #:expand "YES"
      (apply iup:vbox ; #:expand "YES"
	     (append (map (lambda (keyval)
			    (iup:label (conc (car keyval) " ")))
			  keydat)
		     (list (iup:label "runname ")
			   (iup:label "run-id")
			   (iup:label "run-date"))))
      (apply iup:vbox
	     (append (map (lambda (keyval)
			    (iup:label (cadr keyval) #:expand "HORIZONTAL"))
			  keydat)
		     (list (iup:label runname)
			   (iup:label (conc run-id))
			   (iup:label (seconds->year-work-week/day-time event_time))
			   (iup:label "" #:expand "VERTICAL"))))))))
  
;;======================================================================
;; Host info panel
;;======================================================================
(define (host-info-panel testdat store-label)
  (iup:frame
   #:title "Remote host and Test Run Info" ; #:expand "YES"
   (iup:hbox ; #:expand "YES"
    (apply iup:vbox ; #:expand "YES" ;; The heading labels
	   (append (map (lambda (val)
			  (iup:label val ; #:expand "HORIZONTAL"
				     ))
			(list "Hostname: "
			      "Disk free: "
			      "CPU Load: "
			      "Run duration: "
			      "Logfile: "
			      "Top process id: "
			      "Uname -a: "))
		   (iup:label "" #:expand "VERTICAL")))
    (apply iup:vbox ; #:expand "YES"
	   (list
	    ;; NOTE: Yes, the host can change!
	    (store-label "HostName"
			 (iup:label ;; (sdb:qry 'getstr 
			  (db:test-get-host testdat) ;; )
			  #:expand "HORIZONTAL")
			 (lambda (testdat)(db:test-get-host testdat)))
	    (store-label "DiskFree"
			 (iup:label (conc (db:test-get-diskfree testdat)) #:expand "HORIZONTAL")
			 (lambda (testdat)(conc (db:test-get-diskfree testdat))))
	    (store-label "CPULoad"
			 (iup:label (conc (db:test-get-cpuload testdat)) #:expand "HORIZONTAL")
			 (lambda (testdat)(conc (db:test-get-cpuload testdat))))
	    (store-label "RunDuration"
			 (iup:label (conc (seconds->hr-min-sec (db:test-get-run_duration testdat))) #:expand "HORIZONTAL")
			 (lambda (testdat)(conc (seconds->hr-min-sec (db:test-get-run_duration testdat)))))
	    (store-label "LogFile"
			 (iup:label (conc (db:test-get-final_logf testdat)) #:expand "HORIZONTAL")
			 (lambda (testdat)(conc (db:test-get-final_logf testdat))))
	    (store-label "ProcessId"
			 (iup:label (conc (db:test-get-process_id testdat)) #:expand "HORIZONTAL")
			 (lambda (testdat)(conc (db:test-get-process_id testdat))))
	    (store-label "Uname"
			 (iup:label "                                                   " #:expand "HORIZONTAL") ;;  #:wordwrap "YES")
			 (lambda (testdat) ;; (sdb:qry 'getstr 
			   (db:test-get-uname testdat))) ;; )
	    )))))

;; if there is a submegatest create a button to launch dashboard in that area
;;
(define (submegatest-panel dbstruct keydat testdat runname testconfig)
  (let* ((test-run-dir      (db:test-get-rundir testdat))
	 (subarea           (subrun:get-runarea test-run-dir))
	 (area-exists       (and subarea (common:file-exists? subarea silent: #t))))
    (if subarea
	(iup:frame 
	 #:title "Megatest Run Info" ; #:expand "YES"
	 (iup:button
	  "Launch Dashboard"
	  #:action (lambda (obj)
                     (subrun:launch-dashboard test-run-dir))))
	(iup:vbox))))

;; use a global for setting the buttons colors
;;                           state status teststeps
(define *state-status* (vector #f #f #f))
(define (update-state-status-buttons testdat)
  (let* ((state  (db:test-get-state  testdat))
	 (status (db:test-get-status testdat))
	 (color  (car (gutils:get-color-for-state-status state status))))
    ((vector-ref *state-status* 0) state color)
    ((vector-ref *state-status* 1) status color)))

(define *dashboard-test-db* #t)
(define *dashboard-comment-share-slot* #f)

;;======================================================================
;; Set fields 
;;======================================================================
(define (set-fields-panel dbstruct run-id test-id testdat #!key (db #f))
  (let ((newcomment #f)
	(newstatus  #f)
	(newstate   #f)
	(wtxtbox    #f))
    (iup:frame
     #:title "Set fields"
     (iup:vbox
      (iup:hbox (iup:label "Comment:")
		(let ((txtbox (iup:textbox #:action (lambda (val a b)
						      ;; (rmt:test-set-state-status-by-id run-id test-id #f #f b)
						      (rmt:test-set-state-status run-id test-id #f #f b)
						      ;; IDEA: Just set a variable with the proc to call?
						      ;; (rmt:test-set-state-status-by-id run-id test-id #f #f b)
						      (set! newcomment b))
					   #:value (db:test-get-comment testdat)
					   #:expand "HORIZONTAL")))
		  (set! wtxtbox txtbox)
		  txtbox))
		  
      (apply iup:hbox
	     (iup:label "STATE:" #:size "30x")
	     (let* ((btns  (map (lambda (state)
				  (let ((btn (iup:button state
							 #:expand "HORIZONTAL" #:size "50x" #:font "Courier New, -10"
							 #:action (lambda (x)
								    ;; (rmt:test-set-state-status-by-id run-id test-id state #f #f)
								    (rmt:set-state-status-and-roll-up-items run-id test-id #f state #f #f) ;; test-name passed in as test-id is respected
								    (db:test-set-state! testdat state)))))
				    btn))
				(map cadr *common:std-states*)))) ;; (list "COMPLETED" "NOT_STARTED" "RUNNING" "REMOTEHOSTSTART" "LAUNCHED" "KILLED" "KILLREQ"))))
	       (vector-set! *state-status* 0
			    (lambda (state color)
			      (for-each 
			       (lambda (btn)
				 (let* ((name     (iup:attribute btn "TITLE"))
					(newcolor (if (equal? name state) color "192 192 192")))
				   (if (not (colors-similar? newcolor (iup:attribute btn "BGCOLOR")))
				       (iup:attribute-set! btn "BGCOLOR" newcolor))))
			       btns)))
	       btns))
      (apply iup:hbox
	     (iup:label "STATUS:" #:size "30x")
	     (let* ((btns  (map (lambda (status)
				  (let ((btn (iup:button status
							 #:expand "HORIZONTAL" #:size "50x" #:font "Courier New, -10"
							 #:action (lambda (x)
								    (let ((t (iup:attribute x "TITLE")))
								      (if (equal? t "WAIVED")
									  (iup:show (dashboard-tests:waiver run-id testdat 
													    (if wtxtbox (iup:attribute wtxtbox "VALUE") #f)
													    (lambda (c)
													      (set! newcomment c)
													      (if wtxtbox 
														  (begin
														    (iup:attribute-set! wtxtbox "VALUE" c)
														    (if (not *dashboard-comment-share-slot*)
															(set! *dashboard-comment-share-slot* wtxtbox)))
														  ))))
									  (begin
									    ;; (rmt:test-set-state-status-by-id run-id test-id #f status #f)
									    (rmt:set-state-status-and-roll-up-items run-id test-id #f #f status #f) ;; test-name passed in as test-id is respected
									    (db:test-set-status! testdat status))))))))
				    btn))
				(map cadr *common:std-statuses*)))) ;; (list  "PASS" "WARN" "FAIL" "CHECK" "n/a" "WAIVED" "SKIP"))))
	       (vector-set! *state-status* 1
			    (lambda (status color)
			      (for-each 
			       (lambda (btn)
				 (let* ((name     (iup:attribute btn "TITLE"))
					(newcolor (if (equal? name status) color "192 192 192")))
				   (if (not (colors-similar? newcolor (iup:attribute btn "BGCOLOR")))
				       (iup:attribute-set! btn "BGCOLOR" newcolor))))
			       btns)))
	       btns))))))

(define (dashboard-tests:run-a-step info)
  #t)

;; (define (dashboard-tests:step-run-control testdat stepname testconfig)
;;   (let* ((mutex (make-mutex)))
;;     (letrec ((dlg
;;               (iup:dialog ;; #:close_cb (lambda (a)(exit)) ; #:expand "YES"
;;                #:title stepname
;;                (iup:vbox ; #:expand "YES"
;;                 (iup:label (conc "Step: " stepname "\nNB// These buttons only run the test step\nfor the purpose of debugging.\nNot all database updates are done."))
;;                 (iup:button "Re-run"            
;; 		            #:expand "HORIZONTAL" 
;; 		            #:action (lambda (obj)
;;                                             (debug:catch-and-dump (lambda ()
;; 			                                            (thread-start! 
;; 			                                             (make-thread
;;                                                                       (lambda ()
;;                                                                         (print "BB> started ezsteps:run-from")
;;                                                                         (debug:catch-and-dump 
;;                                                                          (lambda ()
;; 			                                                   (ezsteps:run-from testdat stepname #t))
;;                                                                          "dashboard-tests:step-run-control -> ezstep:run-from (1)")
;;                                                                         (print "BB> done ezsteps:run-from")
;;                                                                         'foo)
;;                                                                       (conc "ezstep run single step " stepname)))
;;                                                                     )
;;                                                                   "step-run-control action")))
;;                 (iup:button "Re-run and continue"         
;; 		            #:expand "HORIZONTAL" 
;; 		            #:action (lambda (obj)
;;                                        (debug:catch-and-dump
;;                                         (lambda ()
;; 			                  (thread-start!
;; 			                   (make-thread (lambda ()
;; 					                  (ezsteps:run-from testdat stepname #f))
;; 					                (conc "ezstep run from step " stepname))))
;;                                         "dashboard-tests:step-run-control -> ezstep:run-from (2)")))
;;                 (iup:button "Close"
;;                             #:action (lambda (obj)
;;                                        (iup:destroy! dlg)))
;;                 ;; (iup:button "Refresh test data"
;;                 ;;     	#:expand "HORIZONTAL"
;;                 ;;     	#:action (lambda (obj)
;;                 ;;     		   (print "Refresh test data " stepname))
;;                 ))))
;;       dlg)))

(define (dashboard-tests:waiver run-id testdat ovrdval cmtcmd)
  (let* ((wpatt (configf:lookup *configdat* "setup" "waivercommentpatt"))
	 (wregx (if (string? wpatt)(regexp wpatt) #f))
	 (wmesg (iup:label (if wpatt (conc "Comment must match pattern " wpatt) "")))
	 (comnt (iup:textbox #:action (lambda (val a b)
					(if wpatt
					    (if (string-match wregx b)
						(iup:attribute-set! wmesg "TITLE" (conc "Comment matches " wpatt))
						(iup:attribute-set! wmesg "TITLE" (conc "Comment does not match " wpatt))
						)))
			     #:value (if ovrdval ovrdval (db:test-get-comment testdat))
			     #:expand "HORIZONTAL"))
	 (dlog  #f))
    (set! dlog (iup:dialog ;; #:close_cb (lambda (a)(exit)) ; #:expand "YES"
		#:title "SET WAIVER"
		(iup:vbox ; #:expand "YES"
		 (iup:label (conc "Enter justification for waiving test "
				  (db:test-get-testname testdat)
				  (if (equal? (db:test-get-item-path testdat) "") 
				      ""
				      (conc "/" (db:test-get-item-path testdat)))))
		 wmesg ;; the informational msg on whether it matches
		 comnt
		 (iup:hbox
		  (iup:button "Apply and Close "
			      #:expand "HORIZONTAL"
			      #:action (lambda (obj)
					 (let ((comment (iup:attribute comnt "VALUE"))
					       (test-id (db:test-get-id testdat)))
					   (if (or (not wpatt)
						   (string-match wregx comment))
					       (begin
						 ;; (rmt:test-set-state-status-by-id run-id test-id #f "WAIVED" comment)
						 (rmt:test-set-state-status run-id test-id #f "WAIVED" comment)
						 (db:test-set-status! testdat "WAIVED")
						 (cmtcmd comment)
						 (iup:destroy! dlog))))))
		  (iup:button "Cancel"
			      #:expand "HORIZONTAL" 
			      #:action (lambda (obj)
					 (iup:destroy! dlog)))))))
    dlog))


;;======================================================================
;;
;;======================================================================
(define (dashboard-tests:examine-test run-id test-id) ;; run-id run-key origtest)
  (let* ((db-path       (common:get-db-tmp-area *alldat*)) 
	 (dbstruct      #f) ;; NOT ACTUALLY USED (db:setup))
			    ;;		   local: #t))
	 (testdat        (rmt:get-test-info-by-id run-id test-id)) ;; (db:get-test-info-by-id dbstruct run-id test-id))
	 (db-mod-time   0) ;; (file-modification-time db-path))
	 (last-update   0) ;; (current-seconds))
	 (request-update #t))
    (if (not testdat)
	(begin
	  (debug:print 2 *default-log-port* "ERROR: No test data found for test " test-id ", exiting")
	  (exit 1))
	(let* (;; (run-id        (if testdat (db:test-get-run_id testdat) #f))
	       (test-registry (tests:get-all))
	       (keydat        (if testdat (rmt:get-key-val-pairs run-id) #f))
	       (rundat        (if testdat (rmt:get-run-info run-id) #f))
	       (runname       (if testdat (db:get-value-by-header (db:get-rows rundat)
								  (db:get-header rundat)
								  "runname") #f))
	       ;; (tdb           (tdb:open-test-db-by-test-id-local dbstruct run-id test-id))
	       ;; These next two are intentional bad values to ensure errors if they should not
	       ;; get filled in properly.
	       (logfile       "/this/dir/better/not/exist")
	       (rundir        (if testdat 
				  (db:test-get-rundir testdat)
				  logfile))
	       ;; (testdat-path  (conc rundir "/testdat.db")) ;; this gets recalculated until found 
               (augment-teststeps (lambda (inlov)
                                    (map
                                     (lambda (invec)
                                       (list->vector
                                        `(
                                          ,@(reverse (cdr (reverse (vector->list invec))))
                                          "rerun this step" "restart from here" )))
                                     inlov)))
	       (teststeps     (if testdat (augment-teststeps (tests:get-compressed-steps run-id test-id)) '()))
	       (testfullname  (if testdat (db:test-get-fullname testdat) "Gathering data ..."))
	       (testname      (if testdat (db:test-get-testname testdat) "n/a"))
	       ;; (tests:get-testconfig testdat testname 'return-procs))
	       (testmeta      (if testdat 
				  (let ((tm (rmt:testmeta-get-record testname)))
				    (if tm tm (make-db:testmeta)))
				  (make-db:testmeta)))

	       (keystring  (string-intersperse 
			    (map (lambda (keyval)
				   ;; (conc ":" (car keyval) " " (cadr keyval)))
				   (cadr keyval))
				 keydat)
			    "/"))
	       (item-path  (db:test-get-item-path testdat))
	       ;; this next block was added to fix a bug where variables were
               ;; needed. Revisit this.
	       (runconfig  (let ((runconfigf (conc  *toppath* "/runconfigs.config"))) ;; no rush but it would be good to convert this call to use runconfig:read
	 		     (if (common:file-exists? runconfigf)
	 			 (handle-exceptions
                                   exn
                                   #f  ;; do nothing, just keep on trucking ....
                                   (setup-env-defaults runconfigf run-id (make-hash-table) keydat environ-patt: keystring))
	 			 (make-hash-table))))
	       (testconfig    (begin
				;; (runs:set-megatest-env-vars run-id inrunname: runname testname: test-name itempath: item-path)
				(runs:set-megatest-env-vars run-id inkeyvals: keydat inrunname: runname intarget: keystring testname: testname itempath: item-path) ;; these may be needed by the launching process
				(handle-exceptions
				 exn  ;; NOTE: I've no idea why this was written this way. Research, study and fix needed!
				 (tests:get-testconfig (db:test-get-testname testdat) (db:test-get-item-path testdat) test-registry #f allow-write-cache: #f)
				 (tests:get-testconfig (db:test-get-testname testdat) item-path test-registry #t allow-write-cache: #f))))
	       (viewlog    (lambda (x)
			     (if (common:file-exists? logfile)
					;(system (conc "firefox " logfile "&"))
				 (dcommon:run-html-viewer logfile)
				 (message-window (conc "File " logfile " not found")))))
	       (view-a-log (lambda (lfile) 
			     (let ((lfilename (conc rundir "/" lfile)))
			       ;; (print "lfilename: " lfilename)
			       (if (common:file-exists? lfilename)
					;(system (conc "firefox " logfile "&"))
				   (dcommon:run-html-viewer lfilename)
				   (message-window (conc "File " lfilename " not found"))))))
	       (xterm      (lambda (x)
			     (if (directory-exists? rundir)
				 (let ((shell (if (get-environment-variable "SHELL") 
						  (conc "-e " (get-environment-variable "SHELL"))
						  "")))
				   (common:without-vars
				    (conc "cd " rundir 
					  ";mt_xterm -T \"" (string-translate testfullname "()" "  ") "\" " shell "&")
				    "MT_.*"))
				 (message-window  (conc "Directory " rundir " not found")))))
	       (widgets    (make-hash-table))
	       (refreshdat (lambda ()
			     (let* ((curr-mod-time (file-modification-time db-path))
				                   ;;     (max ..... (if (common:file-exists? testdat-path)
						   ;;      	      (file-modification-time testdat-path)
						   ;;      	      (begin
						   ;;      		(set! testdat-path (conc rundir "/testdat.db"))
						   ;;      		0))))
				    (need-update   (or (and (>= curr-mod-time db-mod-time)
							    (> (current-milliseconds)(+ last-update 250))) ;; every half seconds if db touched
						       (> (current-milliseconds)(+ last-update 10000))     ;; force update even 10 seconds
						       request-update))
				    (newtestdat (if need-update 
						    ;; NOTE: BUG HIDER, try to eliminate this exception handler
						    (handle-exceptions
						     exn 
						     (debug:print-info 0 *default-log-port* "test db access issue in examine test for run-id " run-id ", test-id " test-id ": " ((condition-property-accessor 'exn 'message) exn))
						     (rmt:get-test-info-by-id run-id test-id )))))
			       ;; (print "INFO: need-update= " need-update " curr-mod-time = " curr-mod-time)
			       (cond
				((and need-update newtestdat)
				 (set! testdat newtestdat)
				 (set! teststeps    (augment-teststeps (tests:get-compressed-steps run-id test-id)))
				 (set! logfile      (conc (db:test-get-rundir testdat) "/" (db:test-get-final_logf testdat)))
				 (set! rundir       ;; (filedb:get-path *fdb* 
				       (db:test-get-rundir testdat)) ;; )
				 (set! testfullname (db:test-get-fullname testdat))
				 ;; (debug:print 0 *default-log-port* "INFO: teststeps=" (intersperse teststeps "\n    "))
				 
				 ;; I don't see why this was implemented this way. Please comment it ...
				 ;; (if (eq? curr-mod-time db-mod-time) ;; do only once if same
				 ;;     (set! db-mod-time (+ curr-mod-time 1))
				 ;;     (set! db-mod-time curr-mod-time))

				 (if (not (eq? curr-mod-time db-mod-time))
				     (set! db-mod-time curr-mod-time))
				 (set! last-update (current-milliseconds))
				 (set! request-update #f) ;; met the need ...
				 )
				(need-update ;; if this was true and yet there is no data ....
				 (db:test-set-testname! testdat "DEAD OR DELETED TEST")))
			       (if need-update
				   (begin
				     ;; update the gui elements here
				     (for-each 
				      (lambda (key)
					;; (print "Updating " key)
					((hash-table-ref widgets key) testdat))
				      (hash-table-keys widgets))
				     (update-state-status-buttons testdat)))
			       ;; (iup:refresh self)
			       )))
	       (meta-widgets (make-hash-table))
	       (self         #f)
	       (store-label  (lambda (name lbl cmd)
			       (hash-table-set! widgets name 
						(lambda (testdat)
						  (let ((newval (cmd testdat))
							(oldval (iup:attribute lbl "TITLE")))
						    (if (not (equal? newval oldval))
							(begin
					;(mutex-lock! mx1)
							  (iup:attribute-set! lbl "TITLE" newval)
					;(mutex-unlock! mx1)
							  )))))
			       lbl))
	       (store-meta  (lambda (name lbl cmd)
			      (hash-table-set! meta-widgets name 
					       (lambda (testmeta)
						 (let ((newval (cmd testmeta))
						       (oldval (iup:attribute lbl "TITLE")))
						   (if (not (equal? newval oldval))
						       (begin
					;(mutex-lock! mx1)
							 (iup:attribute-set! lbl "TITLE" newval)
					;(mutex-unlock! mx1)
							 )))))
			      lbl))
	       (store-button store-label)
	       (command-proc (lambda (command-text-box)
			       (let* ((cmd     (iup:attribute command-text-box "VALUE")))
				 (common:run-a-command cmd with-orig-env: #t))))
	       (command-text-box (iup:textbox
				  #:expand "HORIZONTAL"
				  #:font "Courier New, -10"
				  #:action (lambda (obj cnum val)
					     ;; (print "cnum=" cnum)
					     (if (eq? cnum 13)
						 (command-prox obj)))
				  ))
	       (command-launch-button (iup:button "Execute!" #:action (lambda (x)
									(command-proc command-text-box))))
	;; (lambda (x)
	;; 								(let* ((cmd     (iup:attribute command-text-box "VALUE"))
	;; 								       (fullcmd (conc (dtests:get-pre-command)
	;; 										      cmd 
	;; 										      (dtests:get-post-command))))
	;; 								  (debug:print-info 02 *default-log-port* "Running command: " fullcmd)
	;; 								  (common:without-vars fullcmd "MT_.*")))))
	       (kill-jobs (lambda (x)
			    (iup:attribute-set! 
			     command-text-box "VALUE"
			     (conc "megatest -target " keystring " -runname "  runname 
				   " -set-state-status KILLREQ,n/a -testpatt %/% "
				   " -state RUNNING,REMOTEHOSTSTART,LAUNCHED"))))
	       (run-test  (lambda (x)
			    (iup:attribute-set! 
			     command-text-box "VALUE"
			     (conc "megatest -target " keystring " -runname " runname 
				   " -run -testpatt " (conc testname "/" (if (equal? item-path "")
									"%" 
									item-path))
				   " -clean-cache"
				   ))))
	       (remove-test (lambda (x)
			      (iup:attribute-set!
			       command-text-box "VALUE"
			       (conc "megatest -remove-runs -target " keystring " -runname " runname
				     " -testpatt " (conc testname "/" (if (equal? item-path "")
									  "%"
									  item-path))
				     " -v"))))
	       (clean-run-execute  (lambda (x)
				     (let ((cmd (conc ;; "megatest -remove-runs -target " keystring " -runname " runname
                                                 "megatest -set-state-status NOT_STARTED,n/a -target " keystring " -runname " runname
						      " -testpatt " (conc testname "/" (if (equal? item-path "")
						       					   "%"
						       					   item-path))
                                                      ";megatest -target " keystring " -runname " runname 
						      " -run -preclean -testpatt " (conc testname "/" (if (equal? item-path "")
											   "%" 
											   item-path))
						      " -clean-cache"
						      )))
                                       (thread-start! (make-thread (lambda ()
                                                                     (common:run-a-command cmd))
                                                                   "clean-run-execute")))))
	       (remove-test (lambda (x)
			      (iup:attribute-set!
			       command-text-box "VALUE"
			       (conc "megatest -remove-runs -target " keystring " -runname " runname
				     " -testpatt " (conc testname "/" (if (equal? item-path "")
									  "%"
									  item-path))
				     " -v"))))
	       (archive-test  (lambda (x)
				(iup:attribute-set! 
				 command-text-box "VALUE"
				 (conc "megatest -target " keystring " -runname " runname 
				       " -archive save-remove -testpatt " (conc testname "/" (if (equal? item-path "")
												 "%" 
												 item-path))
				       )))))
	  (cond
	   ((not testdat)(begin (print "ERROR: bad test info for " test-id)(exit 1)))
	   ((not rundat)(begin (print "ERROR: found test info but there is a problem with the run info for " run-id)(exit 1)))
	   (else
	    ;;  (test-set-status! db run-id test-name state status itemdat)
	    (set! self ; 
		  (iup:dialog #:close_cb (lambda (a)(exit)) ; #:expand "YES"
			      #:title testfullname
			      (iup:vbox ; #:expand "YES"
			       ;; The run and test info
			       (iup:hbox  ; #:expand "YES"
				(run-info-panel dbstruct keydat testdat runname)
				(test-info-panel testdat store-label widgets)
				(test-meta-panel testmeta store-meta))
			       (iup:hbox
				(host-info-panel testdat store-label)
				(submegatest-panel dbstruct keydat testdat runname testconfig))
			       ;; The controls
			       (iup:frame #:title "Actions" 
					  (iup:vbox
					   (iup:hbox 
					    (iup:button "View Log"      #:action viewlog      #:size "80x")
					    (iup:button "Start Xterm"   #:action xterm        #:size "80x")
					    (iup:button "Run Test"      #:action run-test     #:size "80x")
					    (iup:button "Clean Test"    #:action remove-test  #:size "80x")
					    (iup:button "CleanRunExecute!"    #:action clean-run-execute #:size "80x")
					    (iup:button "Kill All Jobs" #:action kill-jobs    #:size "80x")
					    (iup:button "Archive Test"  #:action archive-test #:size "80x")
					    (iup:button "Close"         #:action (lambda (x)(exit)) #:size "80x"))
					   (apply 
					    iup:hbox
					    (list command-text-box command-launch-button))))
			       (set-fields-panel dbstruct run-id test-id testdat)
			       (let ((tabs 
				      (iup:tabs
				       ;; Replace here with matrix
				       (let ((steps-matrix (iup:matrix
							    #:font   "Courier New, -8"
							    #:expand "YES"
							    #:scrollbar "YES"
							    #:numcol 9
							    #:numlin 100
							    #:numcol-visible 9
							    #:numlin-visible 5
							    #:click-cb (lambda (obj lin col status)
									 ;; (if (equal? col 6)
									 (let* ((mtrx-rc  (conc lin ":" 6))
										(fname    (iup:attribute obj mtrx-rc))
                                                                                (stepname (iup:attribute obj (conc lin ":" 1)))                                                                                            (comment  (iup:attribute obj (conc lin ":" 7))))
                                                                           (case col
                                                                             
                                                                             ((7) (print "Comment from step "stepname": "comment))
                                                                             ((8) (ezsteps:spawn-run-from testdat stepname #t))
                                                                             ((9) (ezsteps:spawn-run-from testdat stepname #f))
                                                                             (else (view-a-log fname))))))))
					 ;; (let loop ((count 0))
					 ;;   (iup:attribute-set! steps-matrix "FITTOTEXT" (conc "L" count))
					 ;;   (if (< count 30)
					 ;;       (loop (+ count 1))))
					 (iup:attribute-set! steps-matrix "0:1" "Step Name")
					 (iup:attribute-set! steps-matrix "0:2" "Start")
					 (iup:attribute-set! steps-matrix "0:3" "End")
					 (iup:attribute-set! steps-matrix "WIDTH3" "50")
					 (iup:attribute-set! steps-matrix "0:4" "Status")
					 (iup:attribute-set! steps-matrix "WIDTH4" "50")
					 (iup:attribute-set! steps-matrix "0:5" "Duration")
					 (iup:attribute-set! steps-matrix "0:6" "Log File")
					 (iup:attribute-set! steps-matrix "0:7" "Comment")
                                         (iup:attribute-set! steps-matrix "0:8" "rerun only")
                                         (iup:attribute-set! steps-matrix "BGCOLOR0:9" "149 208 252")
                                         (iup:attribute-set! steps-matrix "BGCOLOR0:8" "149 208 252")
                                         (iup:attribute-set! steps-matrix "BGCOLOR0:7" "149 208 252")
                                         (iup:attribute-set! steps-matrix "0:9" "rerun & continue")
					 (iup:attribute-set! steps-matrix "ALIGNMENT1" "ALEFT")
					 ;; (iup:attribute-set! steps-matrix "FIXTOTEXT" "C1")
					 (iup:attribute-set! steps-matrix "RESIZEMATRIX" "YES")
					 (let ((proc
						(lambda (testdat)
						  (dcommon:populate-steps teststeps steps-matrix run-id test-id))))
					   (hash-table-set! widgets "StepsMatrix" proc)
					   (proc testdat))
					 steps-matrix)
				       ;; populate the Test Data panel
				       (iup:frame
					#:title "Test Data"
					(let ((test-data
					       (iup:textbox  ;; #:action (lambda (obj char val)
						;;   	#f)
						#:expand "YES"
						#:multiline "YES"
						#:font "Courier New, -10"
						#:size "100x100")))
					  (hash-table-set! widgets "Test Data"
							   (lambda (testdat) ;; 
							     (let* ((currval (iup:attribute test-data "VALUE")) ;; "TITLE"))
								    (fmtstr  "~10a~10a~10a~10a~7a~7a~6a~7a~a") ;; category,variable,value,expected,tol,units,type,comment
								    (newval  (string-intersperse 
									      (append
									       (list 
										(format #f fmtstr "Category" "Variable" "Value" "Expected" "Tol" "Status" "Units" "Type" "Comment")
										(format #f fmtstr "========" "========" "=====" "========" "===" "======" "=====" "====" "======="))
									       (map (lambda (x)
										      (format #f fmtstr
											      (db:test-data-get-category x)
											      (db:test-data-get-variable x)
											      (db:test-data-get-value    x)
											      (db:test-data-get-expected x)
											      (db:test-data-get-tol      x)
											      (db:test-data-get-status   x)
											      (db:test-data-get-units    x)
											      (db:test-data-get-type     x)
											      (db:test-data-get-comment  x)))
										    (rmt:read-test-data run-id test-id "%")))
									      "\n")))
							       (if (not (equal? currval newval))
								   (iup:attribute-set! test-data "VALUE" newval ))))) ;; "TITLE" newval)))))
					  test-data))
				       ;;(dashboard:run-controls)
				       )))
				 (iup:attribute-set! tabs "TABTITLE0" "Steps")
				 (iup:attribute-set! tabs "TABTITLE1" "Test Data")
				 tabs))))
	    (iup:show self)
	    (iup:callback-set! *tim* "ACTION_CB"
			       (lambda (x)
				 ;; Now start keeping the gui updated from the db
				 (refreshdat) ;; update from the db here
					;(thread-suspend! other-thread)
				 (if *exit-started*
				     (set! *exit-started* 'ok))))))))))

;;======================================================================
;; dcommon
;;======================================================================

;; yes, this is non-ideal 
(define dashboard:update-summary-tab #f)
(define dashboard:update-servers-table #f)

;;======================================================================
;; C O M M O N   D A T A   S T R U C T U R E
;;======================================================================
;; 

;;======================================================================
;; D O T F I L E
;;======================================================================

(define (dcommon:write-dotfile fname dat)
  (with-output-to-file fname
    (lambda ()
      (pp dat))))

;;======================================================================
;; TARGET AND PATTERN MANIPULATIONS
;;======================================================================

;; Convert to and from list of lines (for a text box)
;; "," => "\n"
(define (dboard:test-patt->lines test-patt)
  (string-substitute (regexp ",") "\n" test-patt))

(define (dboard:lines->test-patt lines)
  (string-substitute (regexp "\n") "," lines #t))


;;======================================================================
;; P R O C E S S   R U N S
;;======================================================================

;; MOVE THIS INTO *data*
(define *cachedata* (make-hash-table))
(hash-table-set! *cachedata* "runid-to-col"    (make-hash-table))
(hash-table-set! *cachedata* "testname-to-row" (make-hash-table))

;; modify a cell if the data is changed, return #t or-ed with previous if modified, #f elsewise
;;
(define (dcommon:modifiy-if-different mtrx cell-name new-val prev-changed)
  (let ((curr-val (iup:attribute mtrx cell-name)))
    (if (not (equal? curr-val new-val)) 
	(begin
	  (iup:attribute-set! mtrx cell-name col-name)
	  #t) ;; need a re-draw
	prev-changed)))


;; TO-DO
;;  1. Make "data" hash-table hierarchial store of all displayed data
;;  2. Update synchash to understand "get-runs", "get-tests" etc.
;;  3. Add extraction of filters to synchash calls
;;
;;    NOTE: Used in newdashboard
;;
;; Mode is 'full or 'incremental for full refresh or incremental refresh
;; (define (dcommon:run-update keys data runname keypatts testpatt states statuses mode window-id)
;;   (let* (;; count and offset => #f so not used
;; 	 ;; the synchash calls modify the "data" hash
;; 	 (changed         #f)
;; 	 (get-runs-sig    (conc (client:get-signature) " get-runs"))
;; 	 (get-tests-sig   (conc (client:get-signature) " get-tests"))
;; 	 (get-details-sig (conc (client:get-signature) " get-test-details"))
;; 
;; 	 ;; test-ids to get and display are indexed on window-id in curr-test-ids hash
;; 	 (test-ids        (hash-table-values (dboard:tabdat-curr-test-ids data)))
;; 	 ;; run-id is #f in next line to send the query to server 0
;;  	 (run-changes     (synchash:client-get 'db:get-runs get-runs-sig (length keypatts) data #f runname #f #f keypatts))
;; 	 (tests-detail-changes (if (not (null? test-ids))
;; 				   (synchash:client-get 'db:get-test-info-by-ids get-details-sig 0  data #f test-ids)
;; 				   '()))
;; 
;; 	 ;; Now can calculate the run-ids
;; 	 (run-hash    (hash-table-ref/default data get-runs-sig #f))
;; 	 (run-ids     (if run-hash (filter number? (hash-table-keys run-hash)) '()))
;; 
;; 	 (all-test-changes (let ((res (make-hash-table)))
;; 			     (for-each (lambda (run-id)
;; 					 (if (> run-id 0)
;; 					     (hash-table-set! res run-id (synchash:client-get 'db:get-tests-for-run-mindata get-tests-sig 0 data run-id 1 testpatt states statuses #f))))
;; 				       run-ids)
;; 			     res))
;; 	 (runs-hash    (hash-table-ref/default data get-runs-sig #f))
;; 	 (header       (hash-table-ref/default runs-hash "header" #f))
;; 	 (run-ids      (sort (filter number? (hash-table-keys runs-hash))
;; 			     (lambda (a b)
;; 			       (let* ((record-a (hash-table-ref runs-hash a))
;; 				      (record-b (hash-table-ref runs-hash b))
;; 				      (time-a   (db:get-value-by-header record-a header "event_time"))
;; 				      (time-b   (db:get-value-by-header record-b header "event_time")))
;; 				 (> time-a time-b)))
;; 			     ))
;; 	 (runid-to-col    (hash-table-ref *cachedata* "runid-to-col"))
;; 	 (testname-to-row (hash-table-ref *cachedata* "testname-to-row")) 
;; 	 (colnum       1)
;; 	 (rownum       0)
;; 	 (cellname (conc rownum ":" colnum))) ;; rownum = 0 is the header
;; ;; (debug:print 0 *default-log-port* "test-ids " test-ids ", tests-detail-changes " tests-detail-changes)
;;     
;; 	 ;; tests related stuff
;; 	 ;; (all-testnames (delete-duplicates (map db:test-get-testname test-changes))))
;; 
;;     ;; Given a run-id and testname/item_path calculate a cell R:C
;; 
;;     ;; NOTE: Also build the test tree browser and look up table
;;     ;;
;;     ;; Each run is unique on its keys and runname or run-id, store in hash on colnum
;;     (for-each (lambda (run-id)
;; 		(let* ((run-record (hash-table-ref/default runs-hash run-id #f))
;; 		       (key-vals   (map (lambda (key)(db:get-value-by-header run-record header key))
;; 					keys))
;; 		       (run-name   (db:get-value-by-header run-record header "runname"))
;; 		       (col-name   (conc (string-intersperse key-vals "\n") "\n" run-name))
;; 		       (run-path   (append key-vals (list run-name))))
;; 		  (hash-table-set! (dboard:tabdat-run-keys data) run-id run-path)
;; 		  ;; modify cell - but only if changed
;; 		  (set! changed (dcommon:modifiy-if-different (dboard:tabdat-runs-matrix data) cellname col-name changed))
;; 		  (hash-table-set! runid-to-col run-id (list colnum run-record))
;; 		  ;; Here we update the tests treebox and tree keys
;; 		  (tree:add-node (dboard:tabdat-tests-tree data) "Runs" (append key-vals (list run-name))
;; 				 userdata: (conc "run-id: " run-id))
;; 		  (set! colnum (+ colnum 1))))
;; 	      run-ids)
;; 
;;     ;; Scan all tests to be displayed and organise all the test names, respecting what is in the hash table
;;     ;; Do this analysis in the order of the run-ids, the most recent run wins
;;     (for-each (lambda (run-id)
;; 		(let* ((run-path       (hash-table-ref (dboard:tabdat-run-keys data) run-id))
;; 		       (test-changes   (hash-table-ref all-test-changes run-id))
;; 		       (new-test-dat   (car test-changes))
;; 		       (removed-tests  (cadr test-changes))
;; 		       (tests          (sort (map cadr (filter (lambda (testrec)
;; 								 (eq? run-id (db:mintest-get-run_id (cadr testrec))))
;; 							       new-test-dat))
;; 					     (lambda (a b)
;; 					       (let ((time-a (db:mintest-get-event_time a))
;; 						     (time-b (db:mintest-get-event_time b)))
;; 						 (> time-a time-b)))))
;; 		       ;; test-changes is a list of (( id record ) ... )
;; 		       ;; Get list of test names sorted by time, remove tests
;; 		       (test-names (delete-duplicates (map (lambda (t)
;; 							     (let ((i (db:mintest-get-item_path t))
;; 								   (n (db:mintest-get-testname  t)))
;; 							       (if (string=? i "")
;; 								   (conc "   " i)
;; 								   n)))
;; 							   tests)))
;; 		       (colnum     (car (hash-table-ref runid-to-col run-id))))
;; 		  ;; for each test name get the slot if it exists and fill in the cell
;; 		  ;; or take the next slot and fill in the cell, deal with items in the
;; 		  ;; run view panel? The run view panel can have a tree selector for
;; 		  ;; browsing the tests/items
;; 
;; 		  ;; SWITCH THIS TO USING CHANGED TESTS ONLY
;; 		  (for-each (lambda (test)
;; 			      (let* ((test-id   (db:mintest-get-id test))
;; 				     (state     (db:mintest-get-state test))
;; 				     (status    (db:mintest-get-status test))
;; 				     (testname  (db:mintest-get-testname test))
;; 				     (itempath  (db:mintest-get-item_path test))
;; 				     (fullname  (conc testname "/" itempath))
;; 				     (dispname  (if (string=? itempath "") testname (conc "   " itempath)))
;; 				     (rownum    (hash-table-ref/default testname-to-row fullname #f))
;; 				     (test-path (append run-path (if (equal? itempath "") 
;; 								     (list testname)
;; 								     (list testname itempath))))
;; 				     (tb         (dboard:tabdat-tests-tree data)))
;; 				(print "INFONOTE: run-path: " run-path)
;; 				(tree:add-node (dboard:tabdat-tests-tree data) "Runs" 
;; 					       test-path
;; 					       userdata: (conc "test-id: " test-id))
;; 				(let ((node-num (tree:find-node tb (cons "Runs" test-path)))
;; 				      (color    (car (gutils:get-color-for-state-status state status))))
;; 				  (debug:print 0 *default-log-port* "node-num: " node-num ", color: " color)
;; 
;; 				  (set! changed (dcommon:modifiy-if-different 
;; 						 tb
;; 						 (conc "COLOR" node-num)
;; 						 color changed))
;; 
;; 				  ;; (iup:attribute-set! tb (conc "COLOR" node-num) color)
;; 				  )
;; 				(hash-table-set! (dboard:tabdat-path-test-ids data) test-path test-id)
;; 				(if (not rownum)
;; 				    (let ((rownums (hash-table-values testname-to-row)))
;; 				      (set! rownum (if (null? rownums)
;; 						       1
;; 						       (+ 1 (common:max rownums))))
;; 				      (hash-table-set! testname-to-row fullname rownum)
;; 				      ;; create the label
;; 				      (set! changed (dcommon:modifiy-if-different 
;; 						     (dboard:tabdat-runs-matrix data)
;; 						     (conc rownum ":" 0)
;; 						     dispname
;; 						     changed))
;; 				      ;; (iup:attribute-set! (dboard:tabdat-runs-matrix data)
;; 				      ;;   		  (conc rownum ":" 0) dispname)
;; 				      ))
;; 				;; set the cell text and color
;; 				;; (debug:print 2 *default-log-port* "rownum:colnum=" rownum ":" colnum ", state=" status)
;; 				(set! changed (dcommon:modifiy-if-different 
;; 						     (dboard:tabdat-runs-matrix data)
;; 						     (conc rownum ":" colnum)
;; 						     (if (member state '("ARCHIVED" "COMPLETED"))
;; 							 status
;; 							 state)
;; 						     changed))
;; 				;; (iup:attribute-set! (dboard:tabdat-runs-matrix data)
;; 				;; 		    (conc rownum ":" colnum)
;; 				;; 		    (if (member state '("ARCHIVED" "COMPLETED"))
;; 				;; 			status
;; 				;; 			state))
;; 				(set! changed (dcommon:modifiy-if-different 
;; 					       (dboard:tabdat-runs-matrix data)
;; 					       (conc "BGCOLOR" rownum ":" colnum)
;; 					       (car (gutils:get-color-for-state-status state status))
;; 					       changed))
;; 				;; (iup:attribute-set! (dboard:tabdat-runs-matrix data)
;; 				;; 		    (conc "BGCOLOR" rownum ":" colnum)
;; 				;; 		    (car (gutils:get-color-for-state-status state status)))
;; 				))
;; 			    tests)))
;; 	      run-ids)
;; 
;;     (let ((updater (hash-table-ref/default  (dboard:commondat-updaters commondat) window-id #f)))
;;       (if updater (updater (hash-table-ref/default data get-details-sig #f))))
;; 
;;     (if changed (iup:attribute-set! (dboard:tabdat-runs-matrix data) "REDRAW" "ALL"))
;;     ;; (debug:print 2 *default-log-port* "run-changes: " run-changes)
;;     ;; (debug:print 2 *default-log-port* "test-changes: " test-changes)
;;     (list run-changes all-test-changes)))

(define (dcommon:runsdat-get-col-num dat target runname force-set)
  (let* ((runs-index (dboard:runsdat-runs-index dat))
	 (col-name   (conc target "/" runname))
	 (res        (hash-table-ref/default runs-index col-name #f)))
    (if res
	res
	(if force-set
	    (let ((max-col-num (+ 1 (common:max (cons-1 (hash-table-values runs-index))))))
	      (hash-table-set! runs-index col-name max-col-num)
	      max-col-num)))))

(define (dcommon:runsdat-get-row-num dat testname itempath force-set)
  (let* ((tests-index (dboard:runsdat-runs-index dat))
	 (row-name    (conc testname "/" itempath))
	 (res         (hash-table-ref/default runs-index row-name #f)))
    (if res
	res
	(if force-set
	    (let ((max-row-num (+ 1 (common:max (cons -1 (hash-table-values tests-index))))))
	      (hash-table-set! runs-index row-name max-row-num)
	      max-row-num)))))

(define (dcommon:rundat-copy-tests-to-by-name rundat)
  (let ((src-ht (dboard:rundat-tests rundat))
	(trg-ht (dboard:rundat-tests-by-name rundat)))
    (if (and (hash-table? src-ht)(hash-table? trg-ht))
	(begin
	  (hash-table-clear! trg-ht)
	  (for-each
	   (lambda (testdat)
	     (hash-table-set! trg-ht (test:test-get-fullname testdat) testdat))
	   (hash-table-values src-ht)))
	(debug:print 0 *default-log-port* "WARNING: src-ht " src-ht " trg-ht " trg-ht))))
  

;;======================================================================
;; TESTS DATA
;;======================================================================

;; Produce a list of lists ready for common:sparse-list-generate-index
;;
(define (dcommon:minimize-test-data tests-dat)
  (if (null? tests-dat) 
      '()
      (let loop ((hed (car tests-dat))
		 (tal (cdr tests-dat))
		 (res '()))
	(let* ((test-id    (db:test-get-id hed)) ;; look at the tests-dat spec for locations
	       (test-name  (db:test-get-testname hed))
	       (item-path  (db:test-get-item-path hed))
	       (state      (db:test-get-state hed))
	       (status     (db:test-get-status hed))
               (event-time (db:test-get-event_time hed))
	       (newitem    (list test-name item-path (list test-id state status event-time))))
	  (if (null? tal)
	      (reverse (cons newitem res))
	      (loop (car tal)(cdr tal)(cons newitem res)))))))

(define (dcommon:tests-mindat->hash tests-mindat)
  (let* ((res (make-hash-table)))
    (for-each
     (lambda (item)
       (let* ((test-name+item-path (cons (list-ref item 0) (list-ref item 1)))
              (value (list-ref item 2)))
         (hash-table-set! res test-name+item-path value)))
     tests-mindat)
    res))

;; return 1 if status1 is better
;; return 0 if status1 and 2 are equally good
;; return -1 if status2 is better
(define (dcommon:status-compare3 status1 status2)
  (let*
      ((status-goodness-ranking  (cdr ;; cdr to drop first item -- "n/a"
                                  (append (map cadr *common:std-statuses*)
                                          '(#f)) ;; algorithm requres last item to be #f
                                  )  )
       (mem1 (member status1 status-goodness-ranking))
       (mem2 (member status2 status-goodness-ranking))
       )
    (cond
     ((and (not mem1) (not mem2)) 0)
     ((not mem1) -1)
     ((not mem2) 1)
     ((= (length mem1) (length mem2)) 0)
     ((> (length mem1) (length mem2)) 1)
     (else -1))))
     
(define (dcommon:xor-tests-mindat src-tests-mindat dest-tests-mindat #!key (hide-clean #f))
  (let* ((src-hash (dcommon:tests-mindat->hash src-tests-mindat))
         (dest-hash (dcommon:tests-mindat->hash dest-tests-mindat))
         (all-keys
          (reverse (sort 
           (delete-duplicates
            (append (hash-table-keys src-hash) (hash-table-keys dest-hash)))

           (lambda (a b) 
             (cond
              ((< 0 (string-compare3 (car a) (car b))) #t)
              ((> 0 (string-compare3 (car a) (car b))) #f)
              ((< 0 (string-compare3 (cdr a) (cdr b))) #t)
              (else #f)))

           ))))
    (let ((res
           (map ;; TODO: rename xor to delta globally in dcommon and dashboard
            (lambda (key)
              (let* ((test-name (car key))
                     (item-path (cdr key))

                     (dest-value (hash-table-ref/default dest-hash key #f)) ;; (list test-id state status)
                     (dest-test-id  (if dest-value (list-ref dest-value 0) #f))
                     (dest-state    (if dest-value (list-ref dest-value 1) #f))
                     (dest-status   (if dest-value (list-ref dest-value 2) #f))

                     (src-value     (hash-table-ref/default src-hash key #f))   ;; (list test-id state status)
                     (src-test-id   (if src-value (list-ref src-value 0) #f))
                     (src-state     (if src-value (list-ref src-value 1) #f))
                     (src-status    (if src-value (list-ref src-value 2) #f))

                     (incomplete-statuses '("DELETED" "INCOMPLETE" "STUCK/DEAD" "N/A")) ;; if any of these statuses apply, treat test as incomplete

                     (dest-complete
                      (and dest-value dest-state dest-status
                           (equal? dest-state "COMPLETED")
                           (not (member dest-status incomplete-statuses))))
                     (src-complete
                      (and src-value src-state src-status
                           (equal? src-state "COMPLETED")
                           (not (member src-status incomplete-statuses))))
                     (status-compare-result (dcommon:status-compare3 src-status dest-status))
                     (xor-new-item
                      (cond
                       ;; complete, for this case means: state=compelte AND status not in ( deleted uncomplete stuck/dead n/a )
                       ;; neither complete -> bad

                       ;; src !complete, dest complete -> better
                       ((and (not dest-complete) (not src-complete))
                        (list dest-test-id "BOTH-BAD" "BOTH-INCOMPLETE"))
                       ((not dest-complete)
                        (list src-test-id "DIFF-MISSING" "DEST-INCOMPLETE"))  
                       ((not src-complete)
                        (list dest-test-id "DIFF-NEW" "SRC-INCOMPLETE"))      
                       ((and
                         (equal? src-state dest-state)
                         (equal? src-status dest-status))
                        (list dest-test-id  (conc "CLEAN") (conc "CLEAN-" dest-status) )) 
                       ;;    better or worse: pass > warn > waived > skip > fail > abort
                       ;;     pass > warn > waived > skip > fail > abort
                       
                       ((= 1 status-compare-result) ;; src is better, dest is worse
                        (list dest-test-id "DIRTY-WORSE" (conc src-status "->" dest-status)))
                       (else
                        (list dest-test-id "DIRTY-BETTER" (conc src-status "->" dest-status)))
                       )))
                (list test-name item-path  xor-new-item)))
            all-keys)))

      (if hide-clean
          (filter
           (lambda (item)
             ;;(print item)
             (not
              (equal?
               "CLEAN"
               (list-ref (list-ref item 2) 1))))
           res)
          res))))

(define (dcommon:examine-xterm run-id test-id)
  (let* ((testdat (rmt:get-test-info-by-id run-id test-id)))
    (if (not testdat)
        (begin
          (debug:print 2 "ERROR: No test data found for test " test-id ", exiting")
          (exit 1))
        (let*
            ((rundir        (if testdat 
                                (db:test-get-rundir testdat)
                                logfile))
             (testfullname  (if testdat (db:test-get-fullname testdat) "Gathering data ..."))
             (xterm      (lambda ()
                           (if (directory-exists? rundir)
                               (let* ((shell (if (get-environment-variable "SHELL") 
                                                 (conc "-e " (get-environment-variable "SHELL"))
                                                 ""))
                                      (command (conc "cd " rundir 
                                                     ";mt_xterm -T \"" (string-translate testfullname "()" "  ") "\" " shell "&")))
                                 (print "Command =" command)
                                 (common:without-vars
                                  command
                                  "MT_.*"))
                               (message-window  (conc "Directory " rundir " not found"))))))
          (xterm)
          (print "Adding xterm code")))))

;;======================================================================
;; D A T A   T A B L E S
;;======================================================================

;; Table of keys
(define (dcommon:keys-matrix rawconfig)
  (let* ((curr-row-num 1)
         (key-vals     (configf:section-vars rawconfig "fields"))
         (keys-matrix  (iup:matrix
                        #:alignment1 "ALEFT"
                        #:expand "YES" ;; "HORIZONTAL" ;; "VERTICAL"
                        ;; #:scrollbar "YES"
                        #:numcol 1
                        #:numlin (length key-vals)
                        #:numcol-visible 1
                        #:numlin-visible (length key-vals)
                        #:click-cb (lambda (obj lin col status)
                                     (print "obj: " obj " lin: " lin " col: " col " status: " status)))))
    ;; (iup:attribute-set! keys-matrix "0:0" "Run Keys")
    (iup:attribute-set! keys-matrix "WIDTH0" 0)
    (iup:attribute-set! keys-matrix "0:1" "Key Name")
    ;; (iup:attribute-set! keys-matrix "WIDTH1" "100")
    ;; fill in keys
    (for-each 
     (lambda (var)
       ;; (iup:attribute-set! keys-matrix "ADDLIN" (conc curr-row-num))
       (iup:attribute-set! keys-matrix (conc curr-row-num ":0") curr-row-num)
       (iup:attribute-set! keys-matrix (conc curr-row-num ":1") var)
       (set! curr-row-num (+ 1 curr-row-num))) ;; (config-lookup *configdat* "fields" var)))
     key-vals)
    (iup:attribute-set! keys-matrix "WIDTHDEF" "40")
    keys-matrix))

;; Section to table
(define (dcommon:section-matrix rawconfig sectionname varcolname valcolname #!key (title #f))
  (let* ((curr-row-num    1)
         (key-vals        (configf:section-vars rawconfig sectionname))
         (section-matrix  (iup:matrix
                           #:alignment1 "ALEFT"
                           ;; #:expand "YES" ;; "HORIZONTAL"
                           #:numcol 1
                           #:numlin (length key-vals)
                           #:numcol-visible 1
                           #:numlin-visible (min 10 (length key-vals))
			   #:scrollbar "YES")))
    (iup:attribute-set! section-matrix "0:0" varcolname)
    (iup:attribute-set! section-matrix "0:1" valcolname)
    (iup:attribute-set! section-matrix "WIDTH1" "200")
    ;; fill in keys
    (for-each 
     (lambda (var)
       ;; (iup:attribute-set! keys-matrix "ADDLIN" (conc curr-row-num))
       (iup:attribute-set! section-matrix (conc curr-row-num ":0") var)
       (iup:attribute-set! section-matrix (conc curr-row-num ":1") (configf:lookup rawconfig sectionname var))
       (set! curr-row-num (+ 1 curr-row-num))) ;; (config-lookup *configdat* "fields" var)))
     key-vals)
    (iup:vbox
     (iup:label (if title title (conc "Settings from [" sectionname "]"))  
         	;; #:size   "5x"
         	#:expand "HORIZONTAL"
         	)
     section-matrix)))
    
;; General data
;;
(define (dcommon:general-info)
  (let ((general-matrix (iup:matrix
			 #:alignment1 "ALEFT"
			 #:expand "YES" ;; "HORIZONTAL"
			 #:numcol 1
			 #:numlin 2
			 #:numcol-visible 1
			 #:numlin-visible 2)))
    (iup:attribute-set! general-matrix "WIDTH1" "150")
    (iup:attribute-set! general-matrix "0:1" "About this Megatest area") 
    ;; User (this is not always obvious - it is common to run as a different user
    (iup:attribute-set! general-matrix "1:0" "User")
    (iup:attribute-set! general-matrix "1:1" (current-user-name))
    ;; Megatest area
    ;; (iup:attribute-set! general-matrix "2:0" "Area")
    ;; (iup:attribute-set! general-matrix "2:1" *toppath*)
    ;; Megatest version
    (iup:attribute-set! general-matrix "2:0" "Version")
    (iup:attribute-set! general-matrix "2:1" (conc megatest-version "-" (substring megatest-fossil-hash 0 4)))

    general-matrix))

(define (dcommon:run-stats commondat tabdat #!key (tab-num #f))
  (let* ((stats-matrix (iup:matrix expand: "YES"))
	 (changed      #f)
	 (stats-updater (lambda ()
			 (if (dashboard:database-changed? commondat tabdat context-key: 'run-stats)
			     (let* ((run-stats    (rmt:get-run-stats))
				    (indices      (common:sparse-list-generate-index run-stats)) ;;  proc: set-cell))
				    (row-indices  (car indices))
				    (col-indices  (cadr indices))
				    (max-row      (if (null? row-indices) 1 (common:max (map cadr row-indices))))
				    (max-col      (if (null? col-indices) 1 
						      (common:max (map cadr col-indices))))
				    (max-visible  (max (- (dboard:tabdat-num-tests tabdat) 15) 3))
				    (max-col-vis  (if (> max-col 10) 10 max-col))
				    (numrows      1)
				    (numcols      1))
			       (iup:attribute-set! stats-matrix "CLEARVALUE" "CONTENTS")
			       (iup:attribute-set! stats-matrix "NUMCOL" max-col )
			       (iup:attribute-set! stats-matrix "NUMLIN" (if (< max-row max-visible) max-visible max-row)) ;; min of 20
			       (iup:attribute-set! stats-matrix "NUMCOL_VISIBLE" max-col-vis)
			       (iup:attribute-set! stats-matrix "NUMLIN_VISIBLE" (if (> max-row max-visible) max-visible max-row))

			       ;; Row labels
			       (for-each (lambda (ind)
					   (let* ((name (car ind))
						  (num  (cadr ind))
						  (key  (conc num ":0")))
					     (if (not (equal? (iup:attribute stats-matrix key) name))
						 (begin
						   (set! changed #t)
						   (iup:attribute-set! stats-matrix key name)))))
					 row-indices)

			       ;; Col labels
			       (for-each (lambda (ind)
					   (let* ((name (car ind))
						  (num  (cadr ind))
						  (key  (conc "0:" num)))
					     (if (not (equal? (iup:attribute stats-matrix key) name))
						 (begin
						   (set! changed #t)
						   (iup:attribute-set! stats-matrix key name)))))
					 col-indices)

			       ;; Cell contents
			       (for-each (lambda (entry)
					   (let* ((row-name (car entry))
						  (col-name (cadr entry))
						  (value    (caddr entry))
						  (row-num  (cadr (assoc row-name row-indices)))
						  (col-num  (cadr (assoc col-name col-indices)))
						  (key      (conc row-num ":" col-num)))
					     (if (not (equal? (iup:attribute stats-matrix key) value))
						 (begin
						   (set! changed #t)
						   (iup:attribute-set! stats-matrix key value)))))
					 run-stats)
			       (if changed (iup:attribute-set! stats-matrix "REDRAW" "ALL")))
                             ))))
    ;; (dboard:commondat-please-update-set! commondat #t) ;; force redraw on first pass 
    ;; (mark-for-update tabdat)
    ;; (stats-updater)
    (dboard:commondat-add-updater commondat stats-updater tab-num: tab-num)
    ;; (set! dashboard:update-summary-tab updater)
    (iup:attribute-set! stats-matrix "WIDTHDEF" "40")
    (iup:vbox
     ;; (iup:label "Run statistics"  #:expand "HORIZONTAL")
     stats-matrix)))

(define (dcommon:servers-table commondat tabdat)
  (let* ((colnum         0)
	 (rownum         0)
	 (servers-matrix (iup:matrix #:expand "YES"
				     #:numcol 7
				     #:numcol-visible 7
				     #:numlin-visible 5
				     ))
	 (colnames       (list "Id" "MTver" "Pid" "Host" "Interface:OutPort" "RunTime" "State" "RunId"))
	 (updater        (lambda ()
			   (if (dashboard:monitor-changed? commondat tabdat)
			       (let ((servers  (server:get-list *toppath* limit: 10)))
				 (iup:attribute-set! servers-matrix "NUMLIN" (length servers))
				 ;; (set! colnum 0)
				 ;; (for-each (lambda (colname)
				 ;;    	 ;; (print "colnum: " colnum " colname: " colname)
				 ;;    	 (iup:attribute-set! servers-matrix (conc "0:" colnum) colname)
				 ;;    	 (set! colnum (+ 1 colnum)))
				 ;;           colnames)
				 (set! rownum 1)
				 (for-each 
				  (lambda (server)
				    (set! colnum 0)
				    (match-let (((mod-time host port start-time pid)
						 server))
				      (let* ((uptime  (- (current-seconds) mod-time))
					     (runtime (if start-time
							  (- mod-time start-time)
							  0))
					     (vals (list "-"  ;; (vector-ref server 0) ;; Id
							 "-"  ;; (vector-ref server 9) ;; MT-Ver
							 pid  ;; (vector-ref server 1) ;; Pid
							 host ;; (vector-ref server 2) ;; Hostname
							 (conc host ":" port) ;; (conc (vector-ref server 3) ":" (vector-ref server 4)) ;; IP:Port
							 (seconds->hr-min-sec runtime) ;; (- (current-seconds) start-time)) ;; (vector-ref server 6)))
							 (cond
							  ((< uptime 5)  "alive")
							  ((< uptime 16) "probably alive");; less than 15 seconds since mod, call it alive (vector-ref server 8) ;; State
							  (else "dead"))
							 "-" ;; (vector-ref server 12)  ;; RunId
							 )))
					(for-each (lambda (val)
						    (let* ((row-col (conc rownum ":" colnum))
							   (curr-val (iup:attribute servers-matrix row-col)))
						      (if (not (equal? (conc val) curr-val))
							  (begin
							    (iup:attribute-set! servers-matrix row-col val)
							    (iup:attribute-set! servers-matrix "FITTOTEXT" (conc "C" colnum))))
						      (set! colnum (+ 1 colnum))))
						  vals)
					(set! rownum (+ rownum 1)))
				      (iup:attribute-set! servers-matrix "REDRAW" "ALL")))
				    (sort servers (lambda (a b)(> (car a)(car b))))))))))
    (set! colnum 0)
    (for-each (lambda (colname)
		(iup:attribute-set! servers-matrix (conc "0:" colnum) colname)
		(iup:attribute-set! servers-matrix "FITTOTEXT" (conc "C" colnum))
		(set! colnum (+ colnum 1)))
	      colnames)
    ;; (set! dashboard:update-servers-table updater) 
    (dboard:commondat-add-updater commondat updater)
    ;; (iup:attribute-set! servers-matrix "WIDTHDEF" "40")
    ;;  (iup:hbox
    ;;   (iup:vbox
    ;;    (iup:button "Start"
    ;;      	  ;; #:size "50x"
    ;;      	  #:expand "YES"
    ;;      	  #:action (lambda (obj)
    ;;      		     (let ((cmd (conc ;; "xterm -geometry 180x20 -e \""
    ;;      				      "megatest -server - &")))
    ;;      				      ;; ";echo Press any key to continue;bash -c 'read -n 1 -s'\" &")))
    ;;      		       (system cmd))))
    ;;    (iup:button "Stop"
    ;;      	  #:expand "YES"
    ;;      	  ;; #:size "50x"
    ;;      	  #:action (lambda (obj)
    ;;      		     (let ((cmd (conc ;; "xterm -geometry 180x20 -e \""
    ;;      				      "megatest -stop-server 0 &")))
    ;;      				      ;; ";echo Press any key to continue;bash -c 'read -n 1 -s'\" &")))
    ;;      		       (system cmd))))
    ;;    (iup:button "Restart"
    ;;      	  #:expand "YES"
    ;;      	  ;; #:size "50x"
    ;;      	  #:action (lambda (obj)
    ;;      		     (let ((cmd (conc ;; "xterm -geometry 180x20 -e \""
    ;;      				      "megatest -stop-server 0;megatest -server - &")))
    ;;      				      ;; ";echo Press any key to continue;bash -c 'read -n 1 -s'\" &")))
    ;;      		       (system cmd)))))
    ;;    servers-matrix
    ;;   )))
    servers-matrix
    ))

;; The main menu
(define (dcommon:main-menu)
  (iup:menu ;; a menu is a special attribute to a dialog (think Gnome putting the menu at screen top)
   (iup:menu-item "Files" (iup:menu   ;; Note that you can use either #:action or action: for options
			   (iup:menu-item "Open"  action: (lambda (obj)
							    (let* ((area-name (iup:textbox #:expand "HORIZONTAL"))
								   (fd        (iup:file-dialog #:dialogtype "DIR"))
								   (top       (iup:show fd #:modal? "YES")))
							      (iup:attribute-set! source-tb "VALUE"
										  (iup:attribute fd "VALUE"))
							      (iup:destroy! fd))))
			   ;; (lambda (obj)
			   ;;  (iup:show (iup:file-dialog))
			   ;;  (print "File->open " obj)))
			   (iup:menu-item "Save"  #:action (lambda (obj)(print "File->save " obj)))
			   (iup:menu-item "Exit"  #:action (lambda (obj)(exit)))))
   (iup:menu-item "Tools" (iup:menu
			   (iup:menu-item "Create new blah" #:action (lambda (obj)(print "Tools->new blah")))
			   ;; (iup:menu-item "Show dialog"     #:action (lambda (obj)
			   ;;  					   (show message-window
			   ;;  					     #:modal? #t
			   ;;  					     ;; set positon using coordinates or center, start, top, left, end, bottom, right, parent-center, current
			   ;;  					     ;; #:x 'mouse
			   ;;  					     ;; #:y 'mouse
			   ;;  )					     
			   ))))

;;======================================================================
;; CANVAS STUFF FOR TESTS
;;======================================================================

(define (dcommon:draw-test cnv xoffset yoffset scalef x y w h name selected)
  (let* ((llx (dcommon:x->canvas x scalef xoffset))
	 (lly (dcommon:y->canvas y scalef yoffset))
	 (urx (dcommon:x->canvas (+ x w) scalef xoffset))
	 (ury (dcommon:y->canvas (+ y h) scalef yoffset)))
    (canvas-text! cnv (+ llx 5)(+ lly 5) name)
    (canvas-rectangle! cnv llx urx lly ury)
    (if selected (canvas-box! cnv llx (+ llx 5) lly (+ lly 5)))))

(define (dcommon:draw-arrow cnv test-box-center waiton-center)
  (let* ((test-box-center-x (vector-ref test-box-center 0))
	 (test-box-center-y (vector-ref test-box-center 1))
	 (waiton-center-x   (vector-ref waiton-center   0))
	 (waiton-center-y   (vector-ref waiton-center   1))
	 (delta-y           (- waiton-center-y test-box-center-y))
	 (delta-x           (- waiton-center-x test-box-center-x))
	 (abs-delta-x       (abs delta-x))
	 (abs-delta-y       (abs delta-y))
	 (use-delta-x       (> abs-delta-x abs-delta-y)) ;; use the larger one
	 (delta-ratio       (if use-delta-x
				(if (> abs-delta-x 0)
				    (/ abs-delta-y abs-delta-x)
				    1)
				(if (> abs-delta-y 0)
				    (/ abs-delta-x abs-delta-y)
				    1)))
	 (x-adj             (if use-delta-x
				8
				(* delta-ratio 8)))
	 (y-adj             (if use-delta-x
				(* x-adj delta-ratio)
				8))
	 (new-waiton-x      (inexact->exact
			     (round (if (> delta-x 0) ;; have positive x
					(- waiton-center-x x-adj)
					(+ waiton-center-x x-adj)))))
	 (new-waiton-y      (inexact->exact
			     (round (if (> delta-y 0)
					(- waiton-center-y y-adj)
					(+ waiton-center-y y-adj))))))
  ;; (canvas-line-width-set! cnv 5)
  (canvas-line! cnv
		test-box-center-x
		test-box-center-y
		new-waiton-x
		new-waiton-y
		)
  (canvas-mark! cnv new-waiton-x new-waiton-y)))

(define (dcommon:get-box-center box)
  (let* ((llx  (list-ref box 0))
	 (lly  (list-ref box 1))
	 (boxw (list-ref box 4))
	 (boxh (list-ref box 5)))
    (vector (+ llx (/ boxw 2))
	    (+ lly (/ boxh 2)))))

(define-inline (num->int num)
  (inexact->exact (round num)))

(define (dcommon:draw-edges cnv xoffset yoffset scalef edges)
  (for-each
   (lambda (e)
     (let loop ((x1 (car e))
		(y1 (cadr e))
		(x2 #f)
		(y2 #f)
		(tal (cddr e)))
       (if (and x1 y1 x2 y2)
	   (canvas-line! 
	    cnv 
	    (num->int (dcommon:x->canvas x1 scalef xoffset))
	    (num->int (dcommon:y->canvas y1 scalef yoffset))
	    (num->int (dcommon:x->canvas x2 scalef xoffset))
	    (num->int (dcommon:y->canvas y2 scalef yoffset)))) ;; (num->int x1)(num->int y1)(num->int x2)(num->int y2)))
       (if (< (length tal) 2)
	   (canvas-mark! cnv
			 (num->int (dcommon:x->canvas x1 scalef xoffset))
			 (num->int (dcommon:y->canvas y1 scalef yoffset))) ;; (num->int x1)(num->int y1))
	   (loop (car tal)(cadr tal) x1 y1 (cddr tal)))))
   ;; (map (lambda (e)(map (lambda (x)(num->int (* x scalef))) e)) edges)))
   edges))


(define (dcommon:draw-arrows cnv testname tests-hash test-records)
  (let* ((test-box-info   (hash-table-ref tests-hash testname))
	 (test-box-center (dcommon:get-box-center test-box-info))
	 (test-record     (hash-table-ref test-records testname))
	 (waitons         (vector-ref test-record 2)))
    (for-each
     (lambda (waiton)
       (let* ((waiton-box-info (hash-table-ref/default tests-hash waiton #f))
	      (waiton-center   (dcommon:get-box-center (or waiton-box-info test-box-info))))
	 (dcommon:draw-arrow cnv test-box-center waiton-center)))
     waitons)
    ;; (debug:print 0 *default-log-port* "test-box-info=" test-box-info)
    ;; (debug:print 0 *default-log-port* "test-record=" test-record)
    ))

(define (dcommon:estimate-scale sizex sizey originx originy nodes)
  ;; (print "sizex: " sizex " sizey: " sizey " originx: " originx " originy: " originy " nodes: " nodes)
  (let* ((maxx 1)
	 (maxy 1))
    (for-each
     (lambda (node)
       (if (equal? (car node) "node")
	   (let ((x (string->number (list-ref node 2)))
		 (y (string->number (list-ref node 3))))
	     (if (and x (> x maxx))(set! maxx x))
	     (if (and y (> y maxy))(set! maxy y)))))
     nodes)
    (let ((scalex (/ sizex maxx))
	  (scaley (/ sizey maxy)))
      ;; (print "maxx: " maxx " maxy: " maxy " scalex: " scalex " scaley: " scaley)
      (min scalex scaley))))

(define (dcommon:get-xoffset tests-draw-state sizex-in xadj-in)
  (let ((xadj  (or xadj-in  (hash-table-ref/default tests-draw-state 'xadj 0)))
	(sizex (or sizex-in (hash-table-ref/default tests-draw-state 'sizex 500))))
    (hash-table-set! tests-draw-state 'xadj xadj) ;; for use in de-scaling when handling mouse clicks
    (hash-table-set! tests-draw-state 'sizex sizex)
    (* (/ sizex 2) (- 0.5 xadj))))

(define (dcommon:get-yoffset tests-draw-state sizey-in yadj-in)
  (let ((yadj  (or yadj-in  (hash-table-ref/default tests-draw-state 'yadj 0)))
	(sizey (or sizey-in (hash-table-ref/default tests-draw-state 'sizey 500))))
    (hash-table-set! tests-draw-state 'yadj yadj) ;; for use in de-scaling when handling mouse clicks
    (hash-table-set! tests-draw-state 'sizey sizey)
    (* (/ sizey 2) (- yadj 0.5))))

(define (dcommon:x->canvas x scalef xoffset)
  (+ xoffset (* x scalef)))

(define (dcommon:y->canvas y scalef yoffset)
  (+ yoffset (* y scalef)))

;; sizex, sizey     - canvas size
;; originx, originy - canvas origin
;;
(define (dcommon:initial-draw-tests cnv xadj yadj sizex sizey sizexmm sizeymm originx originy tests-draw-state sorted-testnames test-records)
  (let* ((dot-data ;; (map cdr (filter
		   ;; 	  (lambda (x)(equal? "node" (car x)))
	  (map string-split (tests:lazy-dot test-records "plain" sizex sizey))) ;; (tests:easy-dot test-records "plain")))
	 (xoffset	 (dcommon:get-xoffset tests-draw-state sizex xadj))
	 (yoffset        (dcommon:get-yoffset tests-draw-state sizey yadj))
	 (no-dot         (configf:lookup *configdat* "setup" "nodot"))
	 (boxh           15)
	 (boxw           10)
	 (margin         5)
	 (tests-info     (hash-table-ref tests-draw-state 'tests-info))
	 (selected-tests (hash-table-ref tests-draw-state 'selected-tests ))
	 (scalef         (if no-dot
			     1
			     (dcommon:estimate-scale sizex sizey originx originy dot-data)))
	 (sorted-testnames (if no-dot
			       (sort sorted-testnames string>=?)
			       sorted-testnames))
	 (curr-x         0)  ;; NB// NOT screen units
	 (curr-y         (/ (- sizey boxh margin) scalef)) ;; used when no-dot
	 (scaled-sizex   (/ sizex scalef)))

    (hash-table-set! tests-draw-state 'scalef scalef)
    
    (let ((longest-str   (if (null? sorted-testnames) "         " (car (sort sorted-testnames (lambda (a b)(>= (string-length a)(string-length b))))))))
      (let-values (((x-max y-max) (canvas-text-size cnv longest-str)))
	(if (> x-max boxw)(set! boxw (+ 10 x-max)))))
    ;; (print "sizex: " sizex " sizey: " sizey " font: " (canvas-font cnv) " originx: " originx " originy: " originy " xtorig: " xtorig " ytorig: " ytorig " xadj: " xadj " yadj: " yadj)
    (if (not (null? sorted-testnames))
	(let loop ((hed (car (reverse sorted-testnames)))
		   (tal (cdr (reverse sorted-testnames))))
	  (let* ((nodedat (if no-dot
			      #f
			      (let ((tmpres (filter (lambda (x)
						      (if (and (not (null? x))
							       (equal? (car x) "node"))
							  (equal? hed (cadr x))
							  #f))
						    dot-data)))
				(if (null? tmpres)
				    ;;           llx  lly boxw boxh
				    (list "0" "1" "1" (conc (length tal)) "2" "0.5") ;; return some placeholder junk if no dat found
				    (car tmpres)))))
		 (edgedat (if no-dot
			      '()
			      (let ((edges (filter (lambda (x)  ;; filter for edge
						     (if (and (not (null? x))
							      (equal? (car x) "edge"))
							 (equal? hed (cadr x))
							 #f))
						   dot-data)))
				(map (lambda (inlst)
				       (dcommon:process-polyline 
					(map (lambda (instr)
					       (string->number instr)) ;; convert to number and scale
					     (let ((il (cddddr inlst)))
					       (take il (- (length il) 2))))
					(lambda (x y)
					  (list (+ x 0)   ;; xtorig)
						(+ y 0))) ;; ytorig)))
					#f #f)) ;; process polyline
				     edges))))
		 (cx   (if no-dot ;; this is the centerpoint!
			   curr-x
			   (string->number (list-ref nodedat 2))))
		 (cy   (if no-dot
			   curr-y
			   (string->number (list-ref nodedat 3))))
		 (boxw (if no-dot
			   boxw
			   (string->number (list-ref nodedat 4))))
		 (boxh (if no-dot
			   boxh
			   (string->number (list-ref nodedat 5))))
		 (boxw/2  (/ boxw 2))
		 (boxh/2  (/ boxh 2))
		 (urx     (+ cx boxw/2))
		 (ury     (+ cy boxh/2))
		 (llx     (- cx boxw/2))
		 (lly     (- cy boxh/2)))

	    ;; if we are in no-dot mode then increment curr-x and curr-y as needed
	    (if no-dot
		(begin
		  (cond 
		   ((< curr-x (- scaled-sizex boxw boxw margin))
		    (set! curr-x (+ curr-x boxw margin)))
		   ((> curr-x (- scaled-sizex boxw boxw margin))
		    (set! curr-x 0)
		    (set! curr-y (- curr-y (+ boxh margin)))))))
					; (print "hed " hed " llx " llx " lly " lly " urx " urx " ury " ury)
	    (dcommon:draw-test cnv xoffset yoffset scalef llx lly boxw boxh hed (hash-table-ref/default selected-tests hed #f))
	    ;; (dcommon:draw-arrows cnv testname tests-info test-records))
	    (dcommon:draw-edges cnv xoffset yoffset scalef edgedat)
	    
	    ;; data used by mouse click calc. keep the wacky order for now.
	    (hash-table-set! tests-info hed  (list llx lly urx ury boxw boxh edgedat)) 
	    (if (not (null? tal))
		(loop (car tal)
		      (cdr tal))))))
    ))

;; per-point-proc required, remainder optional
;;
(define (dcommon:process-polyline line per-point-proc per-segment-proc last-segment-proc)
  (if (< (length line) 2)
      '()
      (let loop ((x1   (car  line))
		 (y1   (cadr line))
		 (x2   #f)
		 (y2   #f)
		 (tal  (cddr line))
		 (res  '()))
	(if (and x1 y1 x2 y2 per-segment-proc)
	    (per-segment-proc x1 y1 x2 y2))
	(if (< (length tal) 2)
	    (begin
	      (if last-segment-proc (last-segment-proc x1 y1 x2 y2))
	      (append res (per-point-proc x1 y1)))
	    (loop (car tal)(cadr tal) x1 y1 (cddr tal) (append res (per-point-proc x1 y1)))))))

(define (dcommon:redraw-tests cnv xadj yadj sizex sizey sizexmm sizeymm originx originy tests-draw-state sorted-testnames test-records)
  (let* ((scalef              (hash-table-ref tests-draw-state 'scalef))
	 (xoffset             (dcommon:get-xoffset tests-draw-state sizex xadj))
	 (yoffset             (dcommon:get-yoffset tests-draw-state sizey yadj))
	 (tests-info          (hash-table-ref tests-draw-state 'tests-info))
	 (selected-tests      (hash-table-ref tests-draw-state 'selected-tests )))
    (if (not (null? sorted-testnames))
	(let loop ((hed (car (reverse sorted-testnames)))
		   (tal (cdr (reverse sorted-testnames))))
	  (let* ((tvals (hash-table-ref tests-info hed))
		 (llx   (list-ref tvals 0))
		 (lly   (list-ref tvals 1))
		 (boxw  (list-ref tvals 4))
		 (boxh  (list-ref tvals 5))
		 (edges (map (lambda (pline)
			       (dcommon:process-polyline pline
							 (lambda (x1 y1)
							   (list x1 y1))
							 #f #f))
			     (list-ref tvals 6)))
		 (urx   (+ llx boxw))
		 (ury   (+ lly boxh)))
	    (dcommon:draw-test cnv xoffset yoffset scalef llx lly boxw boxh hed (hash-table-ref/default selected-tests hed #f))
	    (dcommon:draw-edges cnv xoffset yoffset scalef edges)
	    (if (not (null? tal))
		;; leave a column of space to the right to list items
		(loop (car tal)
		      (cdr tal))))))))

;;======================================================================
;; RUN CONTROLS
;;======================================================================

(define (dcommon:command-execution-control data)
  ;; The command line display/exectution control
  (iup:frame
   #:title "Command to be exectuted"
   (iup:hbox
    (iup:label "Run on" #:size "40x")
    (iup:radio 
     (iup:hbox
      (iup:toggle "Local" #:size "40x")
      (iup:toggle "Server" #:size "40x")))
    (let ((tb (iup:textbox 
	       #:value "megatest "
	       #:expand "HORIZONTAL"
	       #:readonly "YES"
	       #:font "Courier New, -12"
	       )))
      (dboard:tabdat-command-tb-set! data tb)
      tb)
    (iup:button "Execute" #:size "50x"
		#:action (lambda (obj)
			   ;; (let ((cmd (conc ;; "xterm -geometry 180x20 -e \""
                           (common:run-a-command (iup:attribute (dboard:tabdat-command-tb data) "VALUE")))))))
    ;; ";echo Press any key to continue;bash -c 'read -n 1 -s'\" &")))
    ;; (system cmd)))))))

(define (dcommon:command-action-selector commondat tabdat #!key (tab-num #f))
  (iup:frame
   #:title "Set the action to take"
   (iup:hbox
    ;; (iup:label "Command to run" #:expand "HORIZONTAL" #:size "70x" #:alignment "LEFT:ACENTER")
    (let* ((cmds-list '("run" "remove-runs")) ;;  "set-state-status" "lock-runs" "unlock-runs"))
	   (lb         (iup:listbox #:expand "HORIZONTAL"
				    #:dropdown "YES"
				    #:action (lambda (obj val index lbstate)
					       ;; (print obj " " val " " index " " lbstate)
					       (dboard:tabdat-command-set! tabdat val)
					       (dashboard:update-run-command tabdat))))
	   (default-cmd (car cmds-list)))
      (iuplistbox-fill-list lb cmds-list selected-item: default-cmd)
      (dboard:tabdat-command-set! tabdat default-cmd)
      lb))))

(define (dcommon:command-runname-selector commondat tabdat #!key (tab-num #f)) ;; alldat data)
  (iup:frame
   #:title "Runname"
   (let* ((default-run-name (seconds->work-week/day (current-seconds)))
	  (tb (iup:textbox #:expand "HORIZONTAL"
			   #:action (lambda (obj val txt)
				      (debug:catch-and-dump
				       (lambda ()
					 ;; (print "obj: " obj " val: " val " unk: " unk)
					 (dboard:tabdat-run-name-set! tabdat txt) ;; (iup:attribute obj "VALUE"))
					 (dashboard:update-run-command tabdat))
				       "command-runname-selector tb action"))
			   #:value (or default-run-name (dboard:tabdat-run-name tabdat))))
	  (lb (iup:listbox #:expand "HORIZONTAL"
			   #:dropdown "YES"
			   #:action (lambda (obj val index lbstate)
				      (debug:catch-and-dump
				       (lambda ()
					 (if (not (equal? val ""))
					     (begin
					       (iup:attribute-set! tb "VALUE" val)
					       (dboard:tabdat-run-name-set! tabdat val)
					       (dashboard:update-run-command tabdat))))
				       "command-runname-selector lb action"))))
	  (refresh-runs-list (lambda ()
			       (if (dashboard:database-changed? commondat tabdat context-key: 'runname-selector-runs-list)
				   (let* (;; (target        (dboard:tabdat-target-string tabdat))
					  (runs-for-targ (rmt:get-runs-by-patt (dboard:tabdat-keys tabdat) "%" #f #f #f #f 0))
					  (runs-header   (vector-ref runs-for-targ 0))
					  (runs-dat      (vector-ref runs-for-targ 1))
					  (run-names     (cons default-run-name 
							       (map (lambda (x)
								      (db:get-value-by-header x runs-header "runname"))
								    runs-dat))))
				     ;; (print "DEBUGINFO: run-names=" run-names)
				     ;; (iup:attribute-set! lb "REMOVEITEM" "ALL")
				     (iuplistbox-fill-list lb run-names selected-item: default-run-name))))))
     ;; (dboard:tabdat-updater-for-runs-set! tabdat refresh-runs-list)
     (dboard:commondat-add-updater commondat refresh-runs-list tab-num: tab-num)
     ;; (refresh-runs-list)
     (dboard:tabdat-run-name-set! tabdat default-run-name)
     (iup:hbox
      tb
      lb))))

(define (dcommon:command-testname-selector commondat tabdat update-keyvals) ;;  key-listboxes)
  (iup:vbox
   ;; Text box for test patterns
   (iup:frame
    #:title "Test patterns (one per line)"
    (let ((tb (iup:textbox #:action (lambda (val a b)
				      (debug:catch-and-dump
				       (lambda ()
					 (dboard:tabdat-test-patts-set!-use
					  tabdat
					  (dboard:lines->test-patt b))
					 (dashboard:update-run-command tabdat))
				       "command-testname-selector tb action"))
			   #:value (dboard:test-patt->lines
				    (dboard:tabdat-test-patts-use tabdat))
			   #:expand "YES"
			   #:size "x30" ;; was 10x30
			   #:multiline "YES")))
      (set! test-patterns-textbox tb)
      (dboard:tabdat-test-patterns-textbox-set! tabdat tb)
      tb))
;; (iup:frame
;;  #:title "Target"
;;  ;; Target selectors
;;  (apply iup:hbox
;; 	   (let* ((dat      (dashboard:update-target-selector tabdat action-proc: update-keyvals))
;; 		  (key-lb   (car dat))
;; 		  (combos   (cadr dat)))
;; 	     combos)))
   ;; (iup:hbox
   ;;  ;; Text box for STATES
   ;;  (iup:frame
   ;;   #:title "States"
   ;;   (dashboard:text-list-toggle-box 
   ;;    ;; Move these definitions to common and find the other useages and replace!
   ;;    (map cadr *common:std-states*) ;; '("COMPLETED" "RUNNING" "STUCK" "INCOMPLETE" "LAUNCHED" "REMOTEHOSTSTART" "KILLED")
   ;;    (lambda (all)
   ;;      (dboard:tabdat-states-set! tabdat all)
   ;;      (dashboard:update-run-command tabdat))))
   ;;  ;; Text box for STATES
   ;;  (iup:frame
   ;;   #:title "Statuses"
   ;;   (dashboard:text-list-toggle-box 
   ;;    (map cadr *common:std-statuses*) ;; '("PASS" "FAIL" "n/a" "CHECK" "WAIVED" "SKIP" "DELETED" "STUCK/DEAD")
   ;;    (lambda (all)
   ;;      (dboard:tabdat-statuses-set! tabdat all)
   ;;      (dashboard:update-run-command tabdat)))))
   ))

(define (dcommon:command-tests-tasks-canvas tabdat test-records sorted-testnames tests-draw-state)
  (iup:frame
   #:title "Tests and Tasks"
   (let* ((updater #f)
	  (last-xadj 0)
	  (last-yadj 0)
	  (the-cnv   #f)
	  (canvas-obj 
	   (iup:canvas #:action (make-canvas-action
				 (lambda (cnv xadj yadj)
				   (if (not updater)
				       (set! updater (lambda (xadj yadj)
						       ;; (print "cnv: " cnv " xadj: " xadj " yadj: " yadj)
						       (dashboard:draw-tests cnv xadj yadj tests-draw-state sorted-testnames test-records)
						       (set! last-xadj xadj)
						       (set! last-yadj yadj))))
				   (updater xadj yadj)
				   (set! the-cnv cnv)
				   ))
		       ;; Following doesn't work 
		       #:wheel-cb (lambda (obj step x y dir) ;; dir is 4 for up and 5 for down. I think.
				    (let ((scalef (hash-table-ref tests-draw-state 'scalef)))
				      (hash-table-set! tests-draw-state 'scalef (+ scalef
										   (if (> step 0)
										       (* scalef 0.01)
										       (* scalef -0.01))))
				      (if the-cnv
					  (dashboard:draw-tests the-cnv last-xadj last-yadj tests-draw-state sorted-testnames test-records))
				      ))
		       ;; #:size "250x250"
		       #:expand "YES"
		       #:scrollbar "YES"
		       #:posx "0.5"
		       #:posy "0.5"
		       #:button-cb (lambda (obj btn pressed x y status)
				     ;; (print "obj: " obj ", pressed " pressed ", status " status)
					; (print "canvas-origin: " (canvas-origin the-cnv))
				     ;; (let-values (((xx yy)(canvas-origin the-cnv)))
				     ;; (canvas-transform-set! the-cnv #f)
				     ;; (print "canvas-origin: " xx " " yy " click at " x " " y))
				     (let* ((tests-info     (hash-table-ref tests-draw-state 'tests-info))
					    (selected-tests (hash-table-ref tests-draw-state 'selected-tests))
					    (scalef         (hash-table-ref tests-draw-state 'scalef))
					    (sizey          (hash-table-ref tests-draw-state 'sizey))
					    (xoffset        (dcommon:get-xoffset tests-draw-state #f #f))
					    (yoffset        (dcommon:get-yoffset tests-draw-state #f #f))
					    (new-y          (- sizey y))
					    (test-patterns-textbox (dboard:tabdat-test-patterns-textbox tabdat)))
				       ;; (print "xoffset=" xoffset ", yoffset=" yoffset)
				       ;; (print "\tx\ty\tllx\tlly\turx\tury")
				       (for-each (lambda (test-name)
						   (let* ((rec-coords (hash-table-ref tests-info test-name))
							  (llx        (dcommon:x->canvas (list-ref rec-coords 0) scalef xoffset))
							  (lly        (dcommon:y->canvas (list-ref rec-coords 1) scalef yoffset))
							  (urx        (dcommon:x->canvas (list-ref rec-coords 2) scalef xoffset))
							  (ury        (dcommon:y->canvas (list-ref rec-coords 3) scalef yoffset)))
						     ;; (if (eq? pressed 1)
						     ;;    (print "\tx=" x "\ty=" y "\tnew-y=" new-y "\tllx=" llx "\tlly=" lly "\turx=" urx "\tury=" ury "\t" test-name " "))
						     (if (and (eq? pressed 1)
							      (>= x llx)
							      (>= new-y lly)
							      (<= x urx)
							      (<= new-y ury))
							 (let* ((box-patterns (string-split (iup:attribute test-patterns-textbox "VALUE")))
                                                                (test-patts   (string-split (or (dboard:tabdat-test-patts tabdat)
                                                                                                "")
                                                                                            ","))
                                                                (patterns     (delete-duplicates (append box-patterns test-patts)))) 
							   (let* ((selected     (not (member test-name patterns)))
								  (newpatt-list (if selected
										    (cons test-name patterns)
										    (delete test-name patterns)))
								  (newpatt      (string-intersperse newpatt-list "\n")))
							     (iup:attribute-set! test-patterns-textbox "VALUE" newpatt)
							     (iup:attribute-set! obj "REDRAW" "ALL")
							     (hash-table-set! selected-tests test-name selected)
							     (dboard:tabdat-test-patts-set!-use tabdat (dboard:lines->test-patt newpatt))
							     (dashboard:update-run-command tabdat)
							     (if updater (updater last-xadj last-yadj)))))))
						 (hash-table-keys tests-info)))))))
     canvas-obj)))

;;======================================================================
;;  S T E P S
;;======================================================================

(define (dcommon:populate-steps teststeps steps-matrix run-id test-id)
  (let* ((max-row       0)
	 (max-col       9)
         (white         "255 255 255")
         
         (testinfo      (rmt:get-testinfo-state-status run-id test-id))
         (state         (db:test-get-state testinfo))
         (status        (db:test-get-status testinfo))
         (test-status-color (car (gutils:get-color-for-state-status state status)))
         (running-color (car (gutils:get-color-for-state-status "RUNNING" "STARTED")))
         (failcolor     (car (gutils:get-color-for-state-status "COMPLETED" "FAIL"))))
    (if (null? teststeps)
	(begin
          (iup:attribute-set! steps-matrix "CLEARATTRIB" "CONTENTS")
          (iup:attribute-set! steps-matrix "CLEARVALUE" "CONTENTS"))
	(let loop ((hed    (car teststeps))
		   (tal    (cdr teststeps))
		   (rownum 1)
		   (colnum 1))
	  (if (> rownum max-row)(set! max-row rownum))
	  (let* ((status  (vector-ref hed 3))
                 (val     (vector-ref hed (- colnum 1)))
                 (bgcolor (cond
                           ((member (conc status) '("" "-" "#<unspecified>"))
                            running-color)
                           
                           ((member (conc status) '("0" 0))
                            white)
                           (else test-status-color)))
                          ; (else failcolor)))
		 (mtrx-rc (conc rownum ":" colnum)))
            ;;(print "BB> status=>"status"< bgcolor="bgcolor)
	    (iup:attribute-set! steps-matrix  mtrx-rc (if val (conc val) ""))
            (if (< colnum 5)
                (iup:attribute-set! steps-matrix  (conc "BGCOLOR" mtrx-rc) bgcolor))
	    (if (< colnum max-col)
		(loop hed tal rownum (+ colnum 1))
		(if (not (null? tal))
		    (loop (car tal) (cdr tal) (+ rownum 1) 1))))))
    (if (> max-row 0)
	(begin
	  ;; we are going to speculatively clear rows until we find a row that is already cleared
	  (let loop ((rownum  (+ max-row 1))
		     (colnum  0)
		     (deleted #f))
	    ;; (debug:print-info 0 *default-log-port* "cleaning " rownum ":" colnum)
	    (let* ((next-row (if (eq? colnum max-col) (+ rownum 1) rownum))
		   (next-col (if (eq? colnum max-col) 1 (+ colnum 1)))
		   (mtrx-rc  (conc rownum ":" colnum))
		   (curr-val (iup:attribute steps-matrix mtrx-rc)))
	      ;; (debug:print-info 0 *default-log-port* "cleaning " rownum ":" colnum " currval= " curr-val)
	      (if (and (string? curr-val)
		       (not (equal? curr-val "")))
		  (begin
		    (iup:attribute-set! steps-matrix mtrx-rc "")
		    (loop next-row next-col #t))
		  (if (eq? colnum max-col) ;; not done, didn't get a full blank row
		      (if deleted (loop next-row next-col #f)) ;; exit on this not met
		      (loop next-row next-col deleted)))))
	  (iup:attribute-set! steps-matrix "REDRAW" "ALL")))))

;;======================================================================
;; U T I L I T I E S
;;======================================================================

(define (dcommon:run-html-viewer lfilename)
  (let ((htmlviewercmd (configf:lookup *configdat* "setup" "htmlviewercmd")))
    (if htmlviewercmd
	(system (conc "(" htmlviewercmd " " lfilename " ) &")) 
	(iup:send-url lfilename))))

;;======================================================================
;; diff-report
;;======================================================================

(define css "")

(define (diff:tests-mindat->hash tests-mindat)
  (let* ((res (make-hash-table)))
    (for-each
     (lambda (item)
       (let* ((test-name+item-path (cons (list-ref item 0) (list-ref item 1)))
              (value (list-ref item 2)))
         (hash-table-set! res test-name+item-path value)))
     tests-mindat)
    res))

;; return 1 if status1 is better
;; return 0 if status1 and 2 are equally good
;; return -1 if status2 is better
(define (diff:status-compare3 status1 status2)
  (let*
      ((status-goodness-ranking  (list "PASS" "WARN" "WAIVED" "SKIP" "FAIL" "ABORT" #f))
       (mem1 (member status1 status-goodness-ranking))
       (mem2 (member status2 status-goodness-ranking))
       )
    (cond
     ((and (not mem1) (not mem2)) 0)
     ((not mem1) -1)
     ((not mem2) 1)
     ((= (length mem1) (length mem2)) 0)
     ((> (length mem1) (length mem2)) 1)
     (else -1))))


(define (diff:xor-tests-mindat src-tests-mindat dest-tests-mindat #!key (hide-clean #f) (consistent-fail-not-clean #f))
  (let* ((src-hash (diff:tests-mindat->hash src-tests-mindat))
         (dest-hash (diff:tests-mindat->hash dest-tests-mindat))
         (all-keys
          (reverse (sort 
           (delete-duplicates
            (append (hash-table-keys src-hash) (hash-table-keys dest-hash)))

           (lambda (a b) 
             (cond
              ((< 0 (string-compare3 (car a) (car b))) #t)
              ((> 0 (string-compare3 (car a) (car b))) #f)
              ((< 0 (string-compare3 (cdr a) (cdr b))) #t)
              (else #f)))

           ))))
    (let ((res
           (map ;; TODO: rename xor to delta globally in dcommon and dashboard
            (lambda (key)
              (let* ((test-name (car key))
                     (item-path (cdr key))

                     (dest-value    (hash-table-ref/default dest-hash key (list 0 "NULL" "NULL"))) ;; (list test-id state status)
                     (dest-test-id  (list-ref dest-value 0))
                     (dest-state    (list-ref dest-value 1))
                     (dest-status   (list-ref dest-value 2))

                     (src-value     (hash-table-ref/default src-hash key (list 0 "NULL" "NULL")))   ;; (list test-id state status)
                     (src-test-id   (list-ref src-value 0))
                     (src-state     (list-ref src-value 1))
                     (src-status    (list-ref src-value 2))

                     (incomplete-statuses '("DELETED" "INCOMPLETE" "STUCK/DEAD" "N/A")) ;; if any of these statuses apply, treat test as incomplete

                     (dest-complete
                      (and dest-value dest-state dest-status
                           (equal? dest-state "COMPLETED")
                           (not (member dest-status incomplete-statuses))))
                     (src-complete
                      (and src-value src-state src-status
                           (equal? src-state "COMPLETED")
                           (not (member src-status incomplete-statuses))))
                     (status-compare-result (diff:status-compare3 src-status dest-status))
                     (xor-new-item
                      (cond
                       ;; complete, for this case means: state=compelte AND status not in ( deleted uncomplete stuck/dead n/a )
                       ;; neither complete -> bad

                       ;; src !complete, dest complete -> better
                       ((and (not dest-complete) (not src-complete))
                        (list dest-test-id "BOTH-BAD" "BOTH-INCOMPLETE") src-value dest-value)
                       ((not dest-complete)
                        (list src-test-id "NOT-IN-DEST" "DEST-INCOMPLETE") src-value dest-value)  
                       ((not src-complete)
                        (list dest-test-id "NOT-IN-SRC" "SRC-INCOMPLETE") src-value dest-value)      
                       ((and
                         (equal? src-state dest-state)
                         (equal? src-status dest-status))
                        (if (and consistent-fail-not-clean (not (member dest-status '("PASS" "SKIP" "WAIVED" "WARN"))))
                            (list dest-test-id  (conc "BOTH-BAD") (conc "CLEAN-" dest-status) src-value dest-value)
                            (list dest-test-id  (conc "CLEAN") (conc "CLEAN-" dest-status) src-value dest-value)))
                       ;;    better or worse: pass > warn > waived > skip > fail > abort
                       ;;     pass > warn > waived > skip > fail > abort
                       
                       ((= 1 status-compare-result) ;; src is better, dest is worse
                        (list dest-test-id "WORSE" (conc src-status "->" dest-status) src-value dest-value))
                       (else
                        (list dest-test-id "BETTER" (conc src-status "->" dest-status) src-value dest-value)))))
                (list test-name item-path  xor-new-item)))
            all-keys)))

      (if hide-clean
          (filter
           (lambda (item)
             (not
              (equal?
               "CLEAN"
               (list-ref (list-ref item 2) 1))))
           res)
          res))))

(define (diff:run-name->run-id run-name)
  (if (number? run-name)
      run-name
      (let* ((qry-res (rmt:get-runs run-name 1 0 '())))
        (if (eq? 2 (vector-length qry-res))
            (vector-ref (car (vector-ref qry-res 1)) 1)
            #f))))

(define (diff:target+run-name->run-id target run-name)
  (let* ((keys (rmt:get-keys))
         (target-parts (if target (string-split target "/") (map (lambda (x) "%") keys))))
    (if (not (eq? (length keys) (length keys)))
        (begin
          (print "Error: Target ("target") item count does not match fields count target tokens="target-parts" fields="keys)
          #f)
        (let* ((target-map (zip keys target-parts))
               (qry-res (rmt:get-runs run-name 1 0 target-map)))

          (if (eq? 2 (vector-length qry-res))
              (let ((first-ent (vector-ref qry-res 1)))
                (if (> (length first-ent) 0)
                    (vector-ref (car first-ent) 1)
                    #f))
              #f)))))

(define (diff:run-id->tests-mindat run-id #!key (testpatt "%/%"))
  (let* ((states '())
         (statuses '())
         (offset #f)
         (limit #f)
         (not-in #t)
         (sort-by #f)
         (sort-order #f)
         (qryvals "id,testname,item_path,state,status")
         (qryvals "id,testname,item_path,state,status")
         (last-update 0)
         (mode #f)
         )
    (map
     ;; (lambda (row)
     ;;   (match row
     ;;     ((#(id test-name item-path state status)
     ;;       (list test-name item-path (list id state status))))
     ;;     (else #f)))
     (lambda (row)
       (let* ((id        (vector-ref row 0))
              (test-name  (vector-ref row 1))
              (item-path (vector-ref row 2))
              (state     (vector-ref row 3))
              (status    (vector-ref row 4)))
             (list test-name item-path (list id state status))))
     
     (rmt:get-tests-for-run run-id
                            testpatt states statuses
                            offset limit
                            not-in sort-by sort-order
                            qryvals
                            last-update
                            mode))))


(define (diff:diff-runs src-run-id dest-run-id)
  (let* ((src-tests-mindat  (diff:run-id->tests-mindat src-run-id))
         (dest-tests-mindat (diff:run-id->tests-mindat dest-run-id)))
    (diff:xor-tests-mindat src-tests-mindat dest-tests-mindat consistent-fail-not-clean: #t)))


(define (diff:rundiff-find-by-state run-diff state)
    (filter
     (lambda (x)
       (equal? (list-ref (caddr x) 1) state))
     run-diff))

(define (diff:rundiff-clean-breakdown run-diff)
  (map
   (lambda (run-diff-item)
     (match run-diff-item
       ((test-name item-path (junk-id diff-state diff-status (src-test-id src-state src-status) (dest-test-id dest-state dest-status)))
        (list test-name item-path "CLEAN" src-status))
       (else "")))
   (diff:rundiff-find-by-state run-diff "CLEAN")))
  
(define (diff:summarize-run-diff run-diff)
  
  (let* ((diff-states (list "CLEAN" "BETTER" "WORSE" "BOTH-BAD" "NOT-IN-DEST" "NOT-IN-SRC" )))
    (map
     (lambda (state)
       (list state 
             (length (diff:rundiff-find-by-state run-diff state))))
     diff-states)))

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Presentation code below, business logic above ;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(define (diff:stml->string in-stml)
  (with-output-to-string
    (lambda ()
      (s:output-new
       (current-output-port)
       in-stml))))

(define (diff:state-status->bgcolor state status)
  (match (list state status)
    (("CLEAN"           _) "#88ff88")
    (("BETTER"          _) "#33ff33")
    (("WORSE"           _) "#ff3333")
    (("BOTH-BAD"        _) "#ff3333")
    ((_            "WARN") "#ffff88")
    ((_            "FAIL") "#ff8888")
    ((_           "ABORT") "#ff0000")
    ((_            "PASS") "#88ff88")
    ((_            "SKIP") "#ffff00")           
    (else                  "#ffffff")))

(define (diff:test-state-status->diff-report-cell state status)
  (s:td 'bgcolor (diff:state-status->bgcolor state status) status))

(define (diff:diff-state-status->diff-report-cell state status)
  (s:td state 'bgcolor (diff:state-status->bgcolor state status)))


(define (diff:megatest-html-logo)

  "<pre>
___  ___                 _            _
|  \\/  | ___  __ _  __ _| |_ ___  ___| |_
| |\\/| |/ _ \\/ _` |/ _` | __/ _ \\/ __| __|
| |  | |  __/ (_| | (_| | ||  __/\\__ \\ |_
|_|  |_|\\___|\\__, |\\__,_|\\__\\___||___/\\__|
             |___/
</pre>")

(define (diff:megatest-html-diff-logo)
  "<pre>
___  ___                 _            _
|  \\/  | ___  __ _  __ _| |_ ___  ___| |_  |  _ \\(_)/ _|/ _|
| |\\/| |/ _ \\/ _` |/ _` | __/ _ \\/ __| __| | | | | | |_| |_
| |  | |  __/ (_| | (_| | ||  __/\\__ \\ |_  | |_| | |  _|  _|
|_|  |_|\\___|\\__, |\\__,_|\\__\\___||___/\\__| |____/|_|_| |_|
             |___/
</pre>")


(define (diff:run-id->target+run-name+starttime run-id)
  (let* ((target      (rmt:get-target run-id))
         (runinfo     (rmt:get-run-info run-id)) ; vector of header (list) and result (vector)
         (info-hash   (alist->hash-table
                       (map (lambda (x) (cons (car x) (cadr x)))  ; make it a useful hash
                            (zip (vector-ref runinfo 0) (vector->list (vector-ref runinfo 1))))))
         (run-name    (hash-table-ref/default info-hash "runname" "N/A"))
         (start-time  (hash-table-ref/default info-hash "event_time" 0)))
    (list target run-name start-time)))

(define (diff:deliver-diff-report src-run-id dest-run-id
                                    #!key
                                    (html-output-file #f)
                                    (email-subject-prefix "[MEGATEST DIFF]")
                                    (email-recipients-list '())  )
  (let* ((src-info         (diff:run-id->target+run-name+starttime src-run-id))
         (src-target       (car src-info))
         (src-run-name     (cadr src-info))
         (src-start        (conc (seconds->string (caddr src-info)) " " (local-timezone-abbreviation)))
         (dest-info        (diff:run-id->target+run-name+starttime dest-run-id))
         (dest-target      (car dest-info))
         (dest-run-name    (cadr dest-info))
         (dest-start       (conc (seconds->string (caddr dest-info)) " " (local-timezone-abbreviation)))


         (run-diff (diff:diff-runs src-run-id dest-run-id ))
         (test-count (length run-diff))
         (summary-table
          (apply s:table 'cellspacing "0" 'border "1"
                 (s:tr
                  (s:th "Diff type")
                  (s:th "% share")
                  (s:th "Count"))
                 
                 (map
                  (lambda (state-count)
                    (s:tr
                     (diff:diff-state-status->diff-report-cell (car state-count) #f)
                     (s:td 'align "right" (fmt #f
                                (decimal-align 3
                                               (fix 2
                                                    (num/fit 6
                                                             (* 100 (/ (cadr state-count) test-count)))))))
                     (s:td 'align "right" (cadr state-count))))
                  (diff:summarize-run-diff run-diff))))
         (meta-table
          (s:table 'cellspacing "0" 'border "1"
                   
           (s:tr
            (s:td 'colspan "2"
                  (s:table 'cellspacing "0" 'border "1"
                           (s:tr
                            (s:th 'align "LEFT" "")          (s:th "SOURCE RUN")     (s:th "DESTINATION RUN"))
                           (s:tr
                            (s:th 'align "LEFT" "Started")  (s:td src-start)  (s:td dest-start))
                           (s:tr
                            (s:th 'align "LEFT" "TARGET")  (s:td src-target)  (s:td dest-target))
                           (s:tr
                            (s:th 'align "LEFT" "RUN NAME")  (s:td src-run-name)  (s:td dest-run-name)))))))
           
         (main-table
          (apply s:table 'cellspacing "0" 'border "1"
                 (s:tr
                  (s:th "Test name")
                  (s:th "Item Path")
                  (s:th (conc "SOURCE"))
                  (s:th (conc "DEST"))
                  (s:th "Diff"))
                 (map
                  (lambda (run-diff-item)
                    (match run-diff-item
                      ((test-name item-path (junk-id diff-state diff-status (src-test-id src-state src-status) (dest-test-id dest-state dest-status)))
                       (s:tr
                        (s:td test-name)
                        (s:td item-path)
                        (diff:test-state-status->diff-report-cell src-state src-status)
                        (diff:test-state-status->diff-report-cell dest-state dest-status)
                        (diff:diff-state-status->diff-report-cell diff-state diff-status)))
                      (else "")))
                  (filter (lambda (run-diff-item)
                            (match run-diff-item
                              ((test-name item-path (junk-id diff-state diff-status (src-test-id src-state src-status) (dest-test-id dest-state dest-status)))
                               (not (equal? diff-state "CLEAN")))
                              (else #f)))
                            run-diff))))
         (email-subject (conc email-subject-prefix " " src-target "/" src-run-name" vs. "dest-target"/"dest-run-name))
         (html-body     (diff:stml->string (s:body
                   (diff:megatest-html-diff-logo)
                   (s:h2 "Summary")
                   (s:table 'border "0"
                            (s:tr
                             (s:td "Diff calculated at")
                             (s:td (conc (seconds->string) " " (local-timezone-abbreviation))))
                            (s:tr
                             (s:td "MT_RUN_AREA_HOME" ) (s:td *toppath*))
                            (s:tr 'valign "TOP"
                     (s:td summary-table)
                     (s:td meta-table)))
                   (s:h2 "Diffs + consistently failing tests")
                   main-table)))

         )
    (if html-output-file
        (with-output-to-file html-output-file (lambda () (print html-body))))
    (when (and email-recipients-list (> (length email-recipients-list) 0))
      (sendmail (string-join email-recipients-list ",") email-subject html-body use_html: #t))
    html-body))
      

  


;; (let* ((src-run-name "all57")
;;        (dest-run-name "all60")
;;        (src-run-id (diff:run-name->run-id src-run-name))
;;        (dest-run-id (diff:run-name->run-id dest-run-name))
;;        (to-list (list "bjbarcla")))
;;   (diff:deliver-diff-report src-run-id dest-run-id email-recipients-list: to-list html-output-file: "/tmp/bjbarcla/zippy.html")
;;   )

(define (do-diff-report src-target src-runname dest-target dest-runname html-file to-list-raw)
  (let* (;;(src-target "nope%")
         ;;(src-runname "all57")
         ;;(dest-target "%")
         ;;(dest-runname "all60")
         (src-run-id (diff:target+run-name->run-id src-target src-runname))
         (dest-run-id (diff:target+run-name->run-id dest-target dest-runname))
         ;(html-file "/tmp/bjbarcla/zippy.html")
         (to-list (if (string? to-list-raw) (string-split to-list-raw ",:") #f))
         )
    
    (cond
     ((not src-run-id)
      (print "No match for source target/runname="src-target"/"src-runname)
      (print "Cannot proceed.")
      #f)
     ((not dest-run-id)
      (print "No match for source target/runname="dest-target"/"dest-runname)
      (print "Cannot proceed.")
      #f)
     (else
      (diff:deliver-diff-report src-run-id dest-run-id email-recipients-list: to-list html-output-file: html-file)))))

  
;;======================================================================
;; env
;;======================================================================

(define (env:open-db fname)
  (let* ((db-exists (common:file-exists? fname))
	 (db        (open-database fname)))
    (if (not db-exists)
	(begin
	  (exec (sql db "CREATE TABLE envvars (
                    id INTEGER PRIMARY KEY,
                    context TEXT NOT NULL,
                    var TEXT NOT NULL,
                    val TEXT NOT NULL,
                       CONSTRAINT envvars_constraint UNIQUE (context,var))"))))
    (set-busy-handler! db (busy-timeout 10000))
    db))

;; save vars in given context, this is NOT incremental by default
;;
(define (env:save-env-vars db context #!key (incremental #f)(vardat #f))
  (with-transaction
   db
   (lambda ()
     ;; first clear out any vars for this context
     (if (not incremental)(exec (sql db "DELETE FROM envvars WHERE context=?") context))
     (for-each
      (lambda (varval)
	(let ((var (car varval))
	      (val (cdr varval)))
	  (if incremental (exec (sql db "DELETE FROM envvars WHERE context=? AND var=?") context var))
	  (exec (sql db "INSERT INTO envvars (context,var,val) VALUES (?,?,?)") context var val)))
	(if vardat
	    (hash-table->alist vardat)
	    (get-environment-variables))))))

;; merge contexts in the order given
;;  - each context is applied in the given order
;;  - variables in the paths list are split on the separator and the components
;;    merged using simple delta addition
;;    returns a hash of the merged vars
;;
(define (env:merge-contexts db basecontext contexts paths)
  (let ((result (make-hash-table)))
    (for-each
     (lambda (context)
       (query
	(for-each-row
	 (lambda (row)
	   (let ((var  (car row))
		 (val  (cadr row)))
	     (hash-table-set! result var 
			      (if (and (hash-table-ref/default results var #f)
				       (assoc var paths)) ;; this var is a path and there is a previous path
				  (let ((sep (cadr (assoc var paths))))
				    (env:merge-path-envvar sep (hash-table-ref results var) valb))
				  valb)))))
	(sql db "SELECT var,val FROM envvars WHERE context=?")
	context))
     contexts)
    result))

;;  get list of removed variables between two contexts
;;
(define (env:get-removed db contexta contextb)
  (let ((result (make-hash-table)))
    (query
     (for-each-row
      (lambda (row)
	(let ((var  (car row))
	      (val  (cadr row)))
	  (hash-table-set! result var val))))
     (sql db "SELECT var,val FROM envvars WHERE context=? AND var NOT IN (SELECT var FROM envvars WHERE context=?)")
     contexta contextb)
    result))

;;  get list of variables added to contextb from contexta
;;
(define (env:get-added db contexta contextb)
  (let ((result (make-hash-table)))
    (query
     (for-each-row
      (lambda (row)
	(let ((var  (car row))
	      (val  (cadr row)))
	  (hash-table-set! result var val))))
     (sql db "SELECT var,val FROM envvars WHERE context=? AND var NOT IN (SELECT var FROM envvars WHERE context=?)")
     contextb contexta)
    result))

;;  get list of variables in both contexta and contexb that have been changed
;;
(define (env:get-changed db contexta contextb)
  (let ((result (make-hash-table)))
    (query
     (for-each-row
      (lambda (row)
	(let ((var  (car row))
	      (val  (cadr row)))
	  (hash-table-set! result var val))))
     (sql db "SELECT var,val FROM envvars AS a WHERE context=? AND val != (SELECT val FROM envvars WHERE var=a.var AND context=?)")
     contextb contexta)
    result))

;;
(define (env:blind-merge l1 l2)
  (if (null? l1) l2
      (if (null? l2) l1
	  (cons (car l1) (cons (car l2) (env:blind-merge (cdr l1) (cdr l2)))))))

;; given a before and an after envvar calculate a new merged path
;;
(define (env:merge-path-envvar separator patha pathb)
  (let* ((patha-parts  (string-split patha separator))
	 (pathb-parts  (string-split pathb separator))
	 (common-parts (lset-intersection equal? patha-parts pathb-parts))
	 (final        (delete-duplicates ;; env:blind-merge 
			(append pathb-parts common-parts patha-parts))))
;;     (print "BEFORE:   " (string-intersperse patha-parts  "\n       "))
;;     (print "AFTER:    " (string-intersperse pathb-parts  "\n       "))
;;     (print "COMMON:   " (string-intersperse common-parts "\n       "))
    (string-intersperse final separator)))

(define (env:process-path-envvar varname separator patha pathb)
  (let ((newpath (env:merge-path-envvar separator patha pathb)))
    (setenv varname newpath)))

(define (env:have-context db context)
  (> (query fetch-value (sql db "SELECT count(id) FROM envvars WHERE context=?") context)
     0))

;; this is so the calling block does not need to import sql-de-lite
(define (env:close-database db)
  (close-database db))

(define (env:lazy-hash-table->alist indat)
  (if (hash-table? indat)
      (let ((dat (hash-table->alist indat)))
	(if (null? dat)
	    #f 
	    dat))
      #f))

(define (env:inc-path path)
  (print "PATH "
	 (conc "#{scheme (env:min-path \"" path "\" \"#{getenv PATH}\")}")))
;; 	 (conc
;; 	  "#{scheme (string-intersperse "
;; 	  "(delete-duplicates "
;; 	  "(append (string-split \"" path "\" \":\") "
;; 	  "(string-split \"#{getenv PATH}\" \":\")))"
;; 	  " \":\")}")))

(define (env:min-path path1 path2)
  (string-intersperse
   (delete-duplicates
    (append
     (string-split path1 ":")
     (string-split path2 ":")))
   ":"))

;; inc path will set a PATH that is incrementally modified when read - config mode only
;;
(define (env:print added removed changed #!key (inc-path #t))
  (let ((a  (env:lazy-hash-table->alist added))
	(r  (env:lazy-hash-table->alist removed))
	(c  (env:lazy-hash-table->alist changed)))
    (case (if (args:get-arg "-dumpmode")
	      (string->symbol (args:get-arg "-dumpmode"))
	      'bash)
      ((bash)
       (if a
	   (begin
	     (print "# Added vars")
	     (map (lambda (dat)(print "export " (car dat) "=" (cdr dat)))
		  (hash-table->alist added))))
       (if r
	   (begin
	     (print "# Removed vars")
	     (map (lambda (dat)(print "unset " (car dat)))
		  (hash-table->alist removed))))
       (if c
	   (begin
	     (print "# Changed vars")
	     (map (lambda (dat)(print "export " (car dat) "=" (cdr dat)))
		  (hash-table->alist changed)))))
      ((csh)
       (if a
	   (begin
	     (print "# Added vars")
	     (map (lambda (dat)(print "setenv " (car dat) " " (cdr dat)))
		  (hash-table->alist added))))
       (if r
	   (begin
	     (print "# Removed vars")
	     (map (lambda (dat)(print "unsetenv " (car dat)))
		  (hash-table->alist removed))))
       (if c
	   (begin
	     (print "# Changed vars")
	     (map (lambda (dat)(print "setenv " (car dat) " " (cdr dat)))
		  (hash-table->alist changed)))))
      ((config ini)
       (if a
	   (begin
	     (print "# Added vars")
	     (map (lambda (dat)
		    (let ((var (car dat))
			  (val (cdr dat)))
		      (if (and inc-path
			       (equal? var "PATH"))
			  (env:inc-path val)
			  (print var " " val))))
		  (hash-table->alist added))))
       (if r
	   (begin
	     (print "# Removed vars")
	     (map (lambda (dat)(print "#{scheme (unsetenv \"" (car dat) "\")}"))
		  (hash-table->alist removed))))
       (if c
	   (begin
	     (print "# Changed vars")
	     (map (lambda (dat)
		    (let ((var (car dat))
			  (val (cdr dat)))
		      (if (and inc-path
			       (equal? var "PATH"))
			  (env:inc-path val)
			  (print var " " val))))
		  (hash-table->alist changed)))))
      (else
       (debug:print-error 0 *default-log-port* "No dumpmode specified, use -dumpmode [bash|csh|config]")))))
;;======================================================================
;; ezsteps
;;======================================================================


(define (ezsteps:run-from testdat start-step-name run-one)
  ;;# TODO - recapture item variables, debug repeated step eval; regen logpro from test
  (let* ((do-update-test-state-status #f)
         (test-run-dir  ;; (filedb:get-path *fdb* 
	  (db:test-get-rundir testdat)) ;; )
	 (testconfig    (read-config (conc test-run-dir "/testconfig") #f #t environ-patt: "pre-launch-env-vars"))
	 (ezstepslst    (hash-table-ref/default testconfig "ezsteps" '()))
	 (run-mutex     (make-mutex))
	 (rollup-status 0)
         (rollup-status-string #f)
         (rollup-status-sym #f)
	 (exit-info     (vector #t #t #t))
	 (test-id       (db:test-get-id        testdat))
	 (run-id        (db:test-get-run_id    testdat))
	 (test-name     (db:test-get-testname  testdat))
         (orig-test-state (db:test-get-state   testdat))
         (orig-test-status (db:test-get-status testdat))
	 (kill-job      #f)) ;; for future use (on re-factoring with launch.scm code

    ;; keep trying till NFS deigns to populate test run dir on this host
    (let loop ((count 5))
      (if (not (common:file-exists? test-run-dir))
	  ;;(push-directory test-run-dir)
	  (if (> count 0)
	      (begin
		(debug:print 0 *default-log-port* "WARNING: ezsteps attempting to run but test run directory " test-run-dir " is not there. Waiting and trying again " count " more times")
		(sleep 3)
		(loop (- count 1))))))
    
    (debug:print-info 0 *default-log-port* "Running in directory " test-run-dir)
    (if (not (common:file-exists? ".ezsteps"))(create-directory ".ezsteps"))
    ;; if ezsteps was defined then we are sure to have at least one step but check anyway
    
    (if (not (> (length ezstepslst) 0))
	(message-window "ERROR: You can only re-run steps defined via ezsteps")
	(begin
	  (let loop ((ezstep   (car ezstepslst))
		     (tal      (cdr ezstepslst))
                     (status-sym-so-far 'pass)
		     ;;(runflag  #f)
                     (saw-start-step-name #f)) ;; flag used to skip steps when not starting at the beginning
	    (if (vector-ref exit-info 1)
		(let* ((stepname    (car ezstep))  ;; do stuff to run the step
                       (logpro-used (common:file-exists? (conc test-run-dir "/" stepname ".logpro")))
		       (stepinfo    (cadr ezstep))
		       (stepparts   (string-match (regexp "^(\\{([^\\}]*)\\}\\s*|)(.*)$") stepinfo))
		       (stepparms   (list-ref stepparts 2)) ;; for future use, {VAR=1,2,3}, run step for each 
		       (stepcmd     (list-ref stepparts 3))
		       (script      (conc "mt_ezstep '"test-run-dir"' '"stepname"' '"stepcmd"'")) ;; call the command using mt_ezstep
                       (saw-start-step-name-next (or saw-start-step-name (equal? stepname start-step-name)))
                       (proceed-with-this-step
                        (or (not start-step-name)
                            (equal? stepname start-step-name)
                            (and saw-start-step-name (not run-one))
                            saw-start-step-name-next
                            (and start-step-name (equal? stepname start-step-name))))
                       )
                  (set! do-update-test-state-status (and proceed-with-this-step (null? tal)))
                  ;;(BB> "stepname="stepname" proceed-with-this-step="proceed-with-this-step " do-update-test-state-status="do-update-test-state-status " orig-test-state="orig-test-state" orig-test-status="orig-test-status)
                  (cond
                   ((and (not proceed-with-this-step) (null? tal))
                    'done)
                   ((not proceed-with-this-step)
                      (loop (car tal)
                            (cdr tal)
                            status-sym-so-far
                            saw-start-step-name-next))
                   (else
		    (debug:print 4 *default-log-port* "ezsteps:\n stepname: " stepname " stepinfo: " stepinfo " stepparts: " stepparts
			         " stepparms: " stepparms " stepcmd: " stepcmd)
		    (debug:print 4 *default-log-port* "script: " script)
		    (rmt:teststep-set-status! run-id test-id stepname "start" "-" #f #f)

		    ;; now launch the script
		    (let ((pid (process-run script)))
		      (let processloop ((i 0))
		        (let-values (((pid-val exit-status exit-code)(process-wait pid #t)))
			  (mutex-lock! run-mutex)
			  (vector-set! exit-info 0 pid)
			  (vector-set! exit-info 1 exit-status)
			  (vector-set! exit-info 2 exit-code)
			  (mutex-unlock! run-mutex)
			  (if (eq? pid-val 0)
			      (begin
			        (thread-sleep! 1)
			        (processloop (+ i 1))))
			  ))
		      (let ((exinfo (vector-ref exit-info 2))
			    (logfna (if logpro-used (conc stepname ".html") "")))
		        (rmt:teststep-set-status! run-id test-id stepname "end" exinfo #f logfna))
                      
		      (if logpro-used
			  (rmt:test-set-log! run-id test-id (conc stepname ".html")))
                      
		      ;; set the test final status
		      (let* ((this-step-status      (cond
                                                     (logpro-used
                                                      (common:logpro-exit-code->status-sym (vector-ref exit-info 2)))
					             ((eq? (vector-ref exit-info 2) 0)
                                                      'pass)
					             (else
                                                      'fail)))
			     (overall-status-sym    (common:worse-status-sym this-step-status status-sym-so-far))
                             (overall-status-string (status-sym->string overall-status-sym)))
		        (debug:print 4 *default-log-port* "Exit value received: " (vector-ref exit-info 2) " logpro-used: " logpro-used 
				     " this-step-status: " this-step-status " overall-status: " overall-status-sym) 
		        ;;" next-status: " next-status " rollup-status: " rollup-status)
                        (set! rollup-status-string overall-status-string)
                        (set! rollup-status-sym overall-status-sym)
                        (tests:test-set-status! run-id test-id "RUNNING" overall-status-string #f #f)))

                    (if (and
                         (not run-one)
                         (common:steps-can-proceed-given-status-sym rollup-status-sym)
                         (not (null? tal)))
                        (loop (car tal)
                              (cdr tal)
                              rollup-status-sym
                              saw-start-step-name-next)))))
		(debug:print 4 *default-log-port* "WARNING: a prior step failed, stopping at " ezstep)))
	  
	  ;; Once done with step/steps update the test record
	  ;;
	  (let* ((item-path (db:test-get-item-path testdat)) ;; (item-list->path itemdat))
		 (testinfo  (rmt:get-testinfo-state-status run-id test-id))) ;; refresh the testdat, call it iteminfo in case need prev/curr
	    ;; Am I completed?
	    (if (equal? (db:test-get-state testinfo) "RUNNING") ;; (not (equal? (db:test-get-state testinfo) "COMPLETED"))
		(let ((new-state  (if kill-job "KILLED" "COMPLETED") ;; (if (eq? (vector-ref exit-info 2) 0) ;; exited with "good" status
				  ;; "COMPLETED"
				  ;; (db:test-get-state testinfo)))   ;; else preseve the state as set within the test
				  )
                      (new-status rollup-status-string)
                      ) ;; (db:test-get-status testinfo)))
		  (debug:print-info 2 *default-log-port* "Test NOT logged as COMPLETED, (state=" (db:test-get-state testinfo) "), updating result, rollup-status is " rollup-status)
		  (tests:test-set-status! run-id test-id 
					  (if do-update-test-state-status new-state orig-test-state)
					  (if do-update-test-state-status new-status orig-test-status)
					  (args:get-arg "-m") #f)
		  ;; need to update the top test record if PASS or FAIL and this is a subtest
		  (if (and (not (equal? item-path "")) do-update-test-state-status)
                      (rmt:set-state-status-and-roll-up-items run-id test-name item-path new-state new-status #f))))
	    ;; for automated creation of the rollup html file this is a good place...
	    (if (not (equal? item-path ""))
	      (tests:summarize-items run-id test-id test-name #f)) ;; don't force - just update if no
	    )))
    ;;(pop-directory)
    rollup-status-string))

(define (ezsteps:spawn-run-from testdat start-step-name run-one)
  (thread-start! 
   (make-thread
    (lambda ()
      (ezsteps:run-from testdat start-step-name run-one))
    (conc "ezstep run single step " start-step-name " run-one="run-one)))
  )

;;======================================================================
;; genexample
;;======================================================================

(define genexample:example-logpro
#<<EOF
  ;; You should have at least one expect:required. This ensures that your process ran
  ;; comment out the line below and replace "put pattern here" with a pattern that will
  ;; always be seen in your log file if the step runs successfully.
  ;;
  ;; (expect:required in "LogFileBody" > 0 "Put description here" #/put pattern here/)
  ;;
  ;; You may need ignores to suppress false error or warning hits from the later expects
  ;; NOTE: Order is important here!
  (expect:ignore   in "LogFileBody"  < 99 "Ignore the word error in comments" #/^\/\/.*error/)
  (expect:warning  in "LogFileBody"  = 0 "Any warning" #/warn/)
  (expect:error    in "LogFileBody"  = 0 "Any error"  (list #/ERROR/ #/error/)) ;; but disallow any other errors
EOF
)

(define genexample:example-script
#<<EOF
#!/usr/bin/env bash

# Run your step here
EOF
)

(define (genexample:mk-megatest.config)
  (let ((keystr #f)
	(keys   #f)
	(lntree #f)
	(path   #f)
	(firstd #f))
    (print "Note: don't worry too much about typos in this process, you will be able to edit
the generated files before starting your first runs")

    ;; create the needed area
    (print "==================\nWhere can I create your Megatest regresssion/continuous build area? Note, your \n"
	   "tests will not necessarily be run in this area, disk space needs are modest. Current directory is:\n\n"
	   (current-directory) "\n")
    (display "Enter your megatest directory: ")
    (set! path (read-line))

    (if (not (directory? path))
	(begin
	  (print "The path " path " does not exist or is not a directory. Attempting to create it now")
	  (create-directory path #t)))

    ;; First check that the directory is empty!
    (if (and (common:file-exists? path)
	     (not (null? (glob (conc path "/*")))))
	(begin
	  (print "WARNING: directory " path " is not empty, are you sure you want to continue?")
	  (display "Enter y/n: ")
	  (if (equal? "y" (read-line))
	      (print "Using directory " path " for your Megatest area.")
	      (begin
		(print "INFO: Creation of megatest files in " path " aborted")
		(exit 1)))))

    ;; first prompt user for fields
    ;;
    (print
     "==================

Next you must specify fields or keys for your megatest area. These
will be used to organise your runs. One field should probably be
\"RELEASE\".  Other examples of useful fields might be \"PLATFORM\",
\"TARGET_OS\" or if you are in the semiconductor business perhaps
things like \"TECHNOLOGY_NODE\", \"DESIGN_KIT\" or \"METAL_STACK\".

The all caps is a convention because the variables you choose will be
available to your tests as environment variables. You can edit these
values later but it is generally a good idea to settle on them
early.

Your runs will be stored in directories specified by your
keys. Example, if you have keys OSFAMILY/VARIANT/OSVER/RELEASE you may
get a test \"build\" in a directory like this:
linux/ubuntu/11.04/rev_1.2/build

Please enter your keys now, separated by spaces or slashes. Only alpha-numeric characters, 
upper case recommended. Example: COMPILER_VER/RELEASE_NAME/QUAL_LEVEL
")
    (set! keys (let loop ((keystr ""))
		 (if (equal? keystr "q")
		     (begin
		       (print "Quiting ...")
		       (exit))
		     (let ((keylst (apply append
					  (map string-split (string-split keystr "/")))))
		       (if (or (null? keylst)
			       (not (null? (filter string-null? keylst))))
			   (begin
			     (display "Enter keys separated by spaces or slashes: ")
			     (loop (read-line)))
			   keylst)))))

    (print "You have choosen " (string-intersperse keys ", ") " for your keys.")
    
    ;; Now get the link tree location and a first disk
    (print
     "==================

Now you need an initial place to store your runs. These are called \"disks\" and you
can add more at any time. To get going provide a writeable directory name. 

")
    (display "Enter your test runs directory: ")
    (set! firstd (read-line))
    (if (not (directory? firstd))
	(begin
	  (print "WARNING: you have specified a path " firstd " that does not exist. Attempting to create it...\n")
	  (create-directory firstd #t)))

    (print
     "==================

Megatest uses a tree of symlinks to provide a uniform structure for finding all the tests
you run over time. Please provide a path where we can create this link tree.

")
    (display "Enter link tree directory: ")
    (set! lntree (read-line))
    (if (not (directory? lntree))
	(begin
	  (print "WARNING: you have specified a path " lntree "that does not exist. Attempting to create it...\n")
	  (create-directory lntree #t)))
    
    (with-output-to-file (conc path "/megatest.config")
      (lambda ()
	(print "# This area uses Megatest. Learn more at http://www.kiatoa.com/fossils/megatest.")
	(print "#\n")
	(print "[fields]")
	(map (lambda (k)(print k " TEXT")) keys)
	(print "")
	(print "[setup]")
	(print "# Adjust max_concurrent_jobs to limit how much you load your machines")
	(print "max_concurrent_jobs 50\n")
	(print "# This is your link path. Avoid moving it once set.")
	(print "linktree " (common:real-path lntree))
	(print "\n# Job tools are more advanced ways to control how your jobs are launched")
	(print "[jobtools]\nuseshell yes\nlauncher nbfake\nmaxload 1.5\n")
	(print "# You can override environment variables for all your tests here")
	(print "[env-override]\nEXAMPLE_VAR example value\n")
	(print "# As you run more tests you may need to add additional disks, the names are arbitrary but must be unique")
	(print "[disks]\ndisk0 " (common:real-path firstd))))

    (print
     "==================

I'm now creating a runconfigs.config file for you with a default section.
You can use this file to set variables for your tests based on the \"target\" (the combination
of keys).

")
    (with-output-to-file (conc path "/runconfigs.config")
      (lambda ()
	(print "# The variables in the default category will be seen in all runs\n[default]\nALLTESTS see this variable\n")
	
	(print "# Your variables here are grouped by targets [" (string-intersperse keys "/") "]")
	(let ((example-target (string-intersperse (map (lambda (k)(conc k "_val")) keys) "/")))
	  (print "[" example-target "]")
	  (print "ANOTHERVAR only defined if target is " example-target))
	(print "\n# It can be handy to include a file based on the users unix username.\n"
	       "# To prevent cluttering up the top level directory we'll put this file\n# in a directory called \"configs\".")
	(print "[include #{getenv USER}.config]")
	))

    (create-directory (conc path "/configs") #t)
    (with-output-to-file (conc path "/configs/" (current-user-name) ".config")
      (lambda ()
	(print "# Override settings in ../runconfigs.config for user " (current-user-name) " here.")))
    
    ;; Now create a test and logpro file
    (print
     "==================

You now have the basic common files for your megatest setup. Next run
\"megatest -gen-test\" to create a test.

Thank you for using Megatest. 

You can edit your config files and create tests in the " path " directory

")))


;;======================================================================
;; create skeleton files for a test
;;======================================================================

(define (genexample:mk-megatest-test testname)
  ;; Gather needed data
  (let ((waiton   #f)
	(priority #f)
	(description #f)
	(steps    '())
	(scripts  '())
	(items    '())
	(rel-path #f))

    (cond
     ((common:file-exists? "megatest.config")         (set! rel-path "./"))
     ((common:file-exists? "../megatest.config")      (set! rel-path "../"))
     ((common:file-exists? "../../megatest.config")   (set! rel-path "../../"))
     ((common:file-exists? "../../../megatest.config")(set! rel-path "../../../"))) ;; good enough dang it.

    ;; Don't gather data or continue if a) megatest.config can't be found or b) testconfig already exists
    (if (not rel-path)
	(begin
	  (print "ERROR: I could not find megatest.config, please run -create-test in the top dir of your megatest area")
	  (exit 1)))

    (if (common:file-exists? (conc rel-path "tests/" testname "/testconfig"))
	(begin
	  (print "WARNING: You already have a testconfig in " rel-path "tests/" testname ", do you want to clobber your files?")
	  (display "Enter y/n: ")
	  (if (not (equal? "y" (read-line)))
	      (begin
		(print "INFO: user abort of creation of test " testname)
		(exit 1)))))

    (print "We are going to generate a skeleton set of files for your test " testname "\n"
	   " *** Note: do not worry too much about typos, you can edit the files created when you are done.")

    (print "\n==================\nPlease describe this test. The description will be visible in various dialogs and reports")
    (display "Enter one line description for this test: ")
    (set! description (read-line))

    (print "\n\n==================\nDoes this test, " testname ", require any other test be run prior to launch?")
    (display (conc "Enter space delimited list of tests which " testname " must wait for (default is no waiton): "))
    (set! waiton (read-line))

    (print "\n\n==================\nDo you wish to prioritize the running of this test over other tests? If so")
    (print "enter a number greater than zero here")
    (display "Enter a priority of 0 (default) or higher: ")
    (set! priority (read-line))

    ;; Get the steps
    (print "\n==================\nNow to enter the one or more steps that make up your test, note; you can add more later")
    (print "Hint; use .sh extension on the script names and we'll create a placeholder script."

    (let ((stepname   #f)
	  (scriptname #f))
      (let loop ((done #f))
	(display "Enter the name for this step (blank to stop): ")
	(set! stepname (read-line))
	(if (not (equal? stepname ""))
	    (begin
	      (display "Enter the script or progam to run: ")
	      (set! scriptname (read-line))
	      (set! steps (append steps (list (list stepname scriptname))))))
	(if (not (equal? stepname ""))
	    (begin
	      (print "Added step " stepname " to list of steps.\n")
	      (loop #f)))))

    ;; Get the items
    (print "\n\n==================\nNext we need to get the variables and values you wish to iterate this test over (blank for none)")
    (let ((varname #f)
	  (values  #f))
      (let loop ((done #f))
	(display "Enter the variable name: ")
	(set! varname (read-line))
	(if (not (equal? varname ""))
	    (begin
	      (display (conc "Enter the space separated list of values for " varname ": "))
	      (set! values (read-line))
	      (set! items (append items (list (conc varname " " values))))))
	(if (not (equal? varname ""))
	    (loop #f))))

    ;; Now create the test
    (if (not rel-path)
	(begin
	  (print "ERROR: You must run this command in a megatest area under where the megatest.config file exists.")
	  (exit 1))
	(let ((testdir (conc rel-path "tests/" testname)))
	  (create-directory testdir #t)
	  (with-output-to-file (conc testdir "/testconfig")
	    (lambda ()
	      (print "# Add additional steps here. Format is \"stepname script\"\n[ezsteps]")
	      (map (lambda (stp)(print (string-intersperse stp " "))) steps)
	      (print "")
	      (print "# Test requirements are specified here\n[requirements]")
	      (print (if (string-null? waiton) "# " "") "waiton " waiton)
	      (print "priority " (if (string-null? priority) 0 priority) "\n")
	      (print "# Iteration for your test is controlled by the items section\n[items]")
	      (map print items)
	      (print "")
	      (print "# Alternatively you could use a [itemstable] section")
	      (print "# [itemstable]")
	      (print "# ITEMVAR1  a    b    c")
	      (print "# ITEMVAR2  d    e    f")
	      (print "#\n# would result in items: a/d  b/e   c/f\n#\n")
	      (print "# Logpro rules for each step can be captured here in the testconfig")
	      (print "# note: The ;; after the stepname and the leading whitespace are required")
	      (print "#\n[logpro]\n")
	      (for-each (lambda (step)
			  (let ((stepname   (car step))
				(scriptname (cadr step)))
			    (print stepname " ;; rules for checking output from running step " step " with command " scriptname)
			    (print genexample:example-logpro "\n")))
			steps)
	      (print "# test_meta is a section for storing additional data on your test\n[test_meta]")
	      (print "author " (get-environment-variable "USER"))
	      (print "owner  " (get-environment-variable "USER"))
	      (print "description " description)
	      (print "tags tagone,tagtwo")
	      (print "reviewed never")))
	  ;; Now create shell scripts (if extension is .sh) and logpro files
	  (for-each (lambda (stp)
		      (let ((stepname (car stp))
			    (script   (cadr stp)))
			(if (string-match ".*\\.sh$" script)
			    (begin
			      (with-output-to-file (conc testdir "/" script)
				(lambda ()
				  (print genexample:example-script)))
			      (system (conc "chmod ug+r,a+x " (conc testdir "/" script)))))))
		    steps))))))

;;======================================================================
;; gutils
;;======================================================================

;; NOTE: These functions will move to iuputils

(define (gutils:colors-similar? color1 color2)
  (let* ((c1 (map string->number (string-split color1)))
	 (c2 (map string->number (string-split color2)))
	 (delta (map (lambda (a b)(abs (- a b))) c1 c2)))
    (null? (filter (lambda (x)(> x 3)) delta))))

(define gutils:colors
  '((PASS . "70 249 73")
    (FAIL . "253 33 49")
    (SKIP . "230 230 0")))

(define (gutils:get-color-spec effective-state)
  (or (alist-ref effective-state gutils:colors)
      (alist-ref 'FAIL gutils:colors)))

;; BBnote - state status dashboard button color / text defined here
(define (gutils:get-color-for-state-status state status);; #!key (get-label #f))
  ;; ((if get-label cadr car)
  (case (string->symbol state)
    ((COMPLETED) ;; ARCHIVED)
     (case (string->symbol status)
       ((PASS)        (list "70  249 73" status))
       ((PREQ_FAIL PREQ_DISCARDED) (list "255 127 127" status))
       ((WARN WAIVED) (list "255 172 13" status))
       ((SKIP)        (list (gutils:get-color-spec 'SKIP) status))
       ((ABORT)       (list "198 36 166" status))
       (else (list "253 33 49" status))))
    ((ARCHIVED)
     (case (string->symbol status)
       ((PASS)        (list "70  170 73" status))
       ((WARN WAIVED) (list "200 130 13" status))
       ((SKIP)        (list (gutils:get-color-spec 'SKIP) status))
       (else (list "180 33 49" status))))
    ;;      (if (equal? status "PASS")
    ;;	  '("70 249 73" "PASS")
    ;;	  (if (or (equal? status "WARN")
    ;;		  (equal? status "WAIVED"))
    ;;	      (list "255 172 13" status)
    ;;	      (list "223 33 49"  status)))) ;; greenish orangeish redish
    ((LAUNCHED)         (list "101 123 142"  state))
    ((CHECK)            (list "255 100 50"   state))
    ((REMOTEHOSTSTART)  (list "50 130 195"   state))
    ((RUNNING STARTED)          (list "9 131 232"    state))
    ((KILLREQ)          (list "39 82 206"    state))
    ((KILLED)           (list "234 101 17"   state))
    ((NOT_STARTED)      (case (string->symbol status)
			  ((CHECK STARTED)(list (gutils:get-color-spec 'SKIP) state))
			  (else   (list "240 240 240"                 state))))
    ;; for xor mode below
    ;;
    ((CLEAN)
     (case (string->symbol status)
       ((CLEAN-FAIL CLEAN-CHECK CLEAN-ABORT)  (list "200 130 13" status)) ;; orange requested for these
       (else  (list "60  235 63" status))))
    ((DIRTY-BETTER)     (list "160  255 153" status))
    ((DIRTY-WORSE)      (list "165 42  42" status))
    ((BOTH-BAD)         (list "180 33 49" status))

    (else               (list "192 192 192"  state))))

;;======================================================================
;; http-transport
;;======================================================================

(define (http-transport:make-server-url hostport)
  (if (not hostport)
      #f
      (conc "http://" (car hostport) ":" (cadr hostport))))

(define *server-loop-heart-beat* (current-seconds))

;;======================================================================
;; S E R V E R
;; ======================================================================

;; Call this to start the actual server
;;

(define *db:process-queue-mutex* (make-mutex))

(define (http-transport:run hostn)
  (debug:print 2 *default-log-port* "Attempting to start the server ...")
  (let* ((db              #f) ;;        (open-db)) ;; we don't want the server to be opening and closing the db unnecesarily
	 (hostname        (get-host-name))
	 (ipaddrstr       (let ((ipstr (if (string=? "-" hostn)
					   ;; (string-intersperse (map number->string (u8vector->list (hostname->ip hostname))) ".")
					   (server:get-best-guess-address hostname)
					   #f)))
			    (if ipstr ipstr hostn))) ;; hostname))) 
	 (start-port      (portlogger:open-run-close portlogger:find-port))
	 (link-tree-path  (common:get-linktree))
	 (tmp-area        (common:get-db-tmp-area *alldat*))
	 (start-file      (conc tmp-area "/.server-start")))
    (debug:print-info 0 *default-log-port* "portlogger recommended port: " start-port)
    ;; set some parameters for the server
    (root-path     (if link-tree-path 
		       link-tree-path
		       (current-directory))) ;; WARNING: SECURITY HOLE. FIX ASAP!
    (handle-directory spiffy-directory-listing)
    (handle-exception (lambda (exn chain)
			(signal (make-composite-condition
				 (make-property-condition 
				  'server
				  'message "server error")))))

    ;; http-transport:handle-directory) ;; simple-directory-handler)
    ;; Setup the web server and a /ctrl interface
    ;;
    (vhost-map `(((* any) . ,(lambda (continue)
			       ;; open the db on the first call 
				 ;; This is were we set up the database connections
			       (let* (($   (request-vars source: 'both))
				      (dat ($ 'dat))
				      (res #f))
				 (cond
				  ((equal? (uri-path (request-uri (current-request)))
					   '(/ "api"))
				   (send-response body:    (api:process-request *dbstruct-db* $) ;; the $ is the request vars proc
						  headers: '((content-type text/plain)))
				   (mutex-lock! *heartbeat-mutex*)
				   (set! *db-last-access* (current-seconds))
				   (mutex-unlock! *heartbeat-mutex*))
				  ((equal? (uri-path (request-uri (current-request))) 
					   '(/ ""))
				   (send-response body: (http-transport:main-page)))
				  ((equal? (uri-path (request-uri (current-request))) 
					   '(/ "json_api"))
				   (send-response body: (http-transport:main-page)))
				  ((equal? (uri-path (request-uri (current-request))) 
					   '(/ "runs"))
				   (send-response body: (http-transport:main-page)))
				  ((equal? (uri-path (request-uri (current-request))) 
					   '(/ any))
				   (send-response body: "hey there!\n"
						  headers: '((content-type text/plain))))
				  ((equal? (uri-path (request-uri (current-request))) 
					   '(/ "hey"))
				   (send-response body: "hey there!\n" 
						  headers: '((content-type text/plain))))
                                  ((equal? (uri-path (request-uri (current-request))) 
					   '(/ "jquery3.1.0.js"))
				   (send-response body: (http-transport:show-jquery) 
						  headers: '((content-type application/javascript))))
                                  ((equal? (uri-path (request-uri (current-request))) 
					   '(/ "test_log"))
				   (send-response body: (http-transport:html-test-log $) 
						  headers: '((content-type text/HTML))))    
                                  ((equal? (uri-path (request-uri (current-request))) 
					   '(/ "dashboard"))
				   (send-response body: (http-transport:html-dboard $) 
						  headers: '((content-type text/HTML)))) 
				  (else (continue))))))))
    (with-output-to-file start-file (lambda ()(print (current-process-id))))
    (http-transport:try-start-server ipaddrstr start-port)))

;; This is recursively run by http-transport:run until sucessful
;;
(define (http-transport:try-start-server ipaddrstr portnum)
  (let ((config-hostname (configf:lookup *configdat* "server" "hostname"))
	(config-use-proxy (equal? (configf:lookup *configdat* "client" "use-http_proxy") "yes")))
    (if (not config-use-proxy)
	(determine-proxy (constantly #f)))
    (debug:print-info 0 *default-log-port* "http-transport:try-start-server time=" (seconds->time-string (current-seconds)) " ipaddrsstr=" ipaddrstr " portnum=" portnum " config-hostname=" config-hostname)
    (handle-exceptions
	exn
	(begin
	  (print-error-message exn)
	  (if (< portnum 64000)
	      (begin 
		(debug:print 0 *default-log-port* "WARNING: attempt to start server failed. Trying again ...")
		(debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))
		(debug:print 5 *default-log-port* "exn=" (condition->list exn))
		(portlogger:open-run-close portlogger:set-failed portnum)
		(debug:print 0 *default-log-port* "WARNING: failed to start on portnum: " portnum ", trying next port")
		(thread-sleep! 0.1)
		
		;; get_next_port goes here
		(http-transport:try-start-server ipaddrstr
						 (portlogger:open-run-close portlogger:find-port)))
	      (begin
		(print "ERROR: Tried and tried but could not start the server"))))
      ;; any error in following steps will result in a retry
      (set! *server-info* (list ipaddrstr portnum))
      (debug:print 0 *default-log-port* "INFO: Trying to start server on " ipaddrstr ":" portnum)
      ;; This starts the spiffy server
      ;; NEED WAY TO SET IP TO #f TO BIND ALL
      ;; (start-server bind-address: ipaddrstr port: portnum)
      (if config-hostname ;; this is a hint to bind directly
	  (start-server port: portnum bind-address: (if (equal? config-hostname "-")
							ipaddrstr
							config-hostname))
	  (start-server port: portnum))
      (portlogger:open-run-close portlogger:set-port portnum "released")
      (debug:print 1 *default-log-port* "INFO: server has been stopped"))))

;;======================================================================
;; S E R V E R   U T I L I T I E S 
;;======================================================================

;;======================================================================
;; C L I E N T S
;;======================================================================

(define *http-mutex* (make-mutex))

;; NOTE: Large block of code from 32436b426188080f72fceb6894af541fbad9921e removed here
;;       I'm pretty sure it is defunct.

;; This next block all imported en-mass from the api branch
(define *http-requests-in-progress* 0)
(define *http-connections-next-cleanup* (current-seconds))

(define (http-transport:get-time-to-cleanup)
  (let ((res #f))
    (mutex-lock! *http-mutex*)
    (set! res (> (current-seconds) *http-connections-next-cleanup*))
    (mutex-unlock! *http-mutex*)
    res))

(define (http-transport:inc-requests-count)
  (mutex-lock! *http-mutex*)
  (set! *http-requests-in-progress* (+ 1 *http-requests-in-progress*))
  ;; Use this opportunity to slow things down iff there are too many requests in flight
  (if (> *http-requests-in-progress* 5)
      (begin
	(debug:print-info 0 *default-log-port* "Whoa there buddy, ease up...")
	(thread-sleep! 1)))
  (mutex-unlock! *http-mutex*))

(define (http-transport:dec-requests-count proc) 
  (mutex-lock! *http-mutex*)
  (proc)
  (set! *http-requests-in-progress* (- *http-requests-in-progress* 1))
  (mutex-unlock! *http-mutex*))

(define (http-transport:dec-requests-count-and-close-all-connections)
  (set! *http-requests-in-progress* (- *http-requests-in-progress* 1))
  (let loop ((etime (+ (current-seconds) 5))) ;; give up in five seconds
    (if (> *http-requests-in-progress* 0)
	(if (> etime (current-seconds))
	    (begin
	      (thread-sleep! 0.05)
	      (loop etime))
	    (debug:print-error 0 *default-log-port* "requests still in progress after 5 seconds of waiting. I'm going to pass on cleaning up http connections"))
	(close-all-connections!)))
  (set! *http-connections-next-cleanup* (+ (current-seconds) 10))
  (mutex-unlock! *http-mutex*))

(define (http-transport:inc-requests-and-prep-to-close-all-connections)
  (mutex-lock! *http-mutex*)
  (set! *http-requests-in-progress* (+ 1 *http-requests-in-progress*)))

;; Send "cmd" with json payload "params" to serverdat and receive result
;;
(define (http-transport:client-api-send-receive run-id serverdat cmd params #!key (numretries 3)(area-dat #f))
  (let* ((fullurl    (if (vector? serverdat)
			 (http-transport:server-dat-get-api-req serverdat)
			 (begin
			   (debug:print 0 *default-log-port* "FATAL ERROR: http-transport:client-api-send-receive called with no server info")
			   (exit 1))))
	 (res        (vector #f "uninitialized"))
	 (success    #t)
	 (sparams    (db:obj->string params transport: 'http))
	 (areadat  (or area-dat *areadat*)))
       (debug:print-info 11 *default-log-port* "fullurl=" fullurl ", cmd=" cmd ", params=" params ", run-id=" run-id "\n")
       ;; set up the http-client here
       (max-retry-attempts 1)
       ;; consider all requests indempotent
       (retry-request? (lambda (request)
			 #f))
       ;; send the data and get the response
       ;; extract the needed info from the http data and 
       ;; process and return it.
       (let* ((send-recieve (lambda ()
			      (mutex-lock! *http-mutex*)
			      ;; (condition-case (with-input-from-request "http://localhost"; #f read-lines)
			      ;;					       ((exn http client-error) e (print e)))
			      (set! res (vector                ;;; DON'T FORGET - THIS IS THE CLIENT SIDE! NOTE: consider moving this to client.scm since we are only supporting http transport at this time.
					 success
					 (db:string->obj 
					  (handle-exceptions
					      exn
					      (let ((call-chain (get-call-chain))
						    (msg        ((condition-property-accessor 'exn 'message) exn)))
						(set! success #f)
                                                (if (debug:debug-mode 1)
                                                    (debug:print-info 0 *default-log-port* "couldn't talk to server, trying again ...")
                                                    (begin
                                                      (debug:print 0 *default-log-port* "WARNING: failure in with-input-from-request to " fullurl ".")
                                                      (debug:print 0 *default-log-port* " message: " msg)
                                                      (debug:print 0 *default-log-port* " cmd: " cmd " params: " params)
                                                      (debug:print 0 *default-log-port* " call-chain: " call-chain)))
                                                (if areadat
						    (areadat-conndat-set! areadat #f))
						;; Killing associated server to allow clean retry.")
						;; (tasks:kill-server-run-id run-id)  ;; better to kill the server in the logic that called this routine?
						(mutex-unlock! *http-mutex*)
					     ;;; (signal (make-composite-condition
					     ;;;          (make-property-condition 'commfail 'message "failed to connect to server")))
					     ;;; "communications failed"
						(db:obj->string #f))
					    (with-input-from-request ;; was dat
					     fullurl 
					     (list (cons 'key (or *server-id* "thekey"))
						   (cons 'cmd cmd)
						   (cons 'params sparams))
					     read-string))
					  transport: 'http)
                                         0)) ;; added this speculatively
			      ;; Shouldn't this be a call to the managed call-all-connections stuff above?
			      (close-all-connections!)
			      (mutex-unlock! *http-mutex*)
			      ))
	      (time-out     (lambda ()
			      (thread-sleep! 45)
			      #f))
	      (th1 (make-thread send-recieve "with-input-from-request"))
	      (th2 (make-thread time-out     "time out")))
	 (thread-start! th1)
	 (thread-start! th2)
	 (thread-join! th1)
	 (thread-terminate! th2)
	 (debug:print-info 11 *default-log-port* "got res=" res)
	 (if (vector? res)
	     (if (vector-ref res 0) ;; this is the first flag or the second flag?
		 res ;; this is the *inner* vector? seriously? why?
                 (if (debug:debug-mode 11)
                     (let ((call-chain (get-call-chain))) ;; note: this code also called in nmsg-transport - consider consolidating it
                       (print-call-chain (current-error-port))
                       (debug:print-error 11 *default-log-port* "error above occured at server, res=" res " message: " ((condition-property-accessor 'exn 'message) exn))
                       (debug:print 11 *default-log-port* " server call chain:")
                       (pp (vector-ref res 1) (current-error-port))
                       (signal (vector-ref res 0)))
                     res))
	     (signal (make-composite-condition
		      (make-property-condition 
		       'timeout
		       'message "nmsg-transport:client-api-send-receive-raw timed out talking to server")))))))

;; careful closing of connections stored in *alldat*
;;
(define (http-transport:close-connections #!key (all-dat #f))
  (let* ((alldat  (or all-dat *alldat*))
	 (server-dat (if alldat
                         (alldat-conndat alldat)
                         #f))) ;; (hash-table-ref/default *areadat* run-id #f)))
    (if (vector? server-dat)
	(let ((api-dat (http-transport:server-dat-get-api-uri server-dat)))
	  (handle-exceptions
	    exn
	    (begin
	      (print-call-chain *default-log-port*)
	      (debug:print-error 0 *default-log-port* " closing connection failed with error: " ((condition-property-accessor 'exn 'message) exn)))
	    (close-connection! api-dat)
            ;;(close-idle-connections!)
	    #t))
	#f)))

;; http-transport:server-dat definition moved to common_records.scm

;;
;; connect
;;
(define (http-transport:client-connect iface port)
  (let* ((api-url      (conc "http://" iface ":" port "/api"))
	 (api-uri      (uri-reference (conc "http://" iface ":" port "/api")))
	 (api-req      (make-request method: 'POST uri: api-uri))
	 (server-dat   (vector iface port api-uri api-url api-req (current-seconds))))
    server-dat))

;; run http-transport:keep-running in a parallel thread to monitor that the db is being 
;; used and to shutdown after sometime if it is not.
;;
(define (http-transport:keep-running) 
  ;; if none running or if > 20 seconds since 
  ;; server last used then start shutdown
  ;; This thread waits for the server to come alive
  (debug:print-info 0 *default-log-port* "Starting the sync-back, keep alive thread in server")
  (let* ((tmp-area          (common:get-db-tmp-area *alldat*))
	 (started-file      (conc tmp-area "/.server-started"))
	 (server-start-time (current-seconds))
	 (server-info (let loop ((start-time (current-seconds))
				 (changed    #t)
				 (last-sdat  "not this"))
                        (let ((sdat #f))
			  (thread-sleep! 0.01)
			  (debug:print-info 0 *default-log-port* "Waiting for server alive signature")
                          (mutex-lock! *heartbeat-mutex*)
                          (set! sdat *server-info*)
                          (mutex-unlock! *heartbeat-mutex*)
                          (if (and sdat
				   (not changed)
				   (> (- (current-seconds) start-time) 2))
			      (begin
				(debug:print-info 0 *default-log-port* "Received server alive signature")
                                (common:save-pkt `((action . alive)
                                                   (T      . server)
                                                   (pid    . ,(current-process-id))
                                                   (ipaddr . ,(car sdat))
                                                   (port   . ,(cadr sdat)))
                                                 *configdat* #t)
				sdat)
                              (begin
				(debug:print-info 0 *default-log-port* "Still waiting, last-sdat=" last-sdat)
                                (sleep 4)
				(if (> (- (current-seconds) start-time) 120) ;; been waiting for two minutes
				    (begin
				      (debug:print-error 0 *default-log-port* "transport appears to have died, exiting server")
                                      (common:save-pkt `((action . died)
                                                         (T      . server)
                                                         (pid    . ,(current-process-id))
                                                         (ipaddr . ,(car sdat))
                                                         (port   . ,(cadr sdat))
                                                         (msg    . "Transport died?"))
                                                 *configdat* #t)
				      (exit))
				    (loop start-time
					  (equal? sdat last-sdat)
					  sdat)))))))
         (iface       (car server-info))
         (port        (cadr server-info))
         (last-access 0)
	 (server-timeout (server:expiration-timeout))
	 (server-going  #f)
	 (server-log-file (args:get-arg "-log"))) ;; always set when we are a server

    (with-output-to-file started-file (lambda ()(print (current-process-id))))

    (let loop ((count         0)
	       (server-state 'available)
	       (bad-sync-count 0)
	       (start-time     (current-milliseconds)))
      ;; Use this opportunity to sync the tmp db to megatest.db
      (if (not server-going) ;; *dbstruct-db* 
	  (begin
	    (debug:print 0 *default-log-port* "SERVER: dbprep")
	    (set! *dbstruct-db*  (db:setup #t)) ;;  run-id))
	    (set! server-going #t)
	    (debug:print 0 *default-log-port* "SERVER: running, megatest version: " (common:get-full-version)) ;; NOTE: the server is NOT yet marked as running in the log. We do that in the keep-running routine.
	    (thread-start! *watchdog*)))
      
      ;; when things go wrong we don't want to be doing the various queries too often
      ;; so we strive to run this stuff only every four seconds or so.
      (let* ((sync-time (- (current-milliseconds) start-time))
	    (rem-time  (quotient (- 4000 sync-time) 1000)))
	(if (and (<= rem-time 4)
		 (>  rem-time 0))
	    (thread-sleep! rem-time)))
      
      (if (< count 1) ;; 3x3 = 9 secs aprox
	  (loop (+ count 1) 'running bad-sync-count (current-milliseconds)))
      
      ;; Check that iface and port have not changed (can happen if server port collides)
      (mutex-lock! *heartbeat-mutex*)
      (set! sdat *server-info*)
      (mutex-unlock! *heartbeat-mutex*)
      
      (if (not (equal? sdat (list iface port)))
	  (let ((new-iface (car sdat))
		(new-port  (cadr sdat)))
	    (debug:print-info 0 *default-log-port* "WARNING: interface changed, refreshing iface and port info")
	    (set! iface new-iface)
	    (set! port  new-port)
	    (debug:print 0 *default-log-port* "SERVER STARTED: " iface ":" port " AT " (current-seconds))
	    (flush-output *default-log-port*)))
      
      ;; Transfer *db-last-access* to last-access to use in checking that we are still alive
      (mutex-lock! *heartbeat-mutex*)
      (set! last-access *db-last-access*)
      (mutex-unlock! *heartbeat-mutex*)
      
      (if (common:low-noise-print 120 (conc "server running on " iface ":" port))
	  (begin
	    (debug:print 0 *default-log-port* "SERVER STARTED: " iface ":" port " AT " (current-seconds))
	    (flush-output *default-log-port*)))
      (if (common:low-noise-print 60 "dbstats")
	  (begin
	    (debug:print 0 *default-log-port* "Server stats:")
	    (db:print-current-query-stats)))
      (let* ((hrs-since-start  (/ (- (current-seconds) server-start-time) 3600)))
	(cond
         ((and *server-run*
	       (> (+ last-access server-timeout)
		  (current-seconds)))
          (if (common:low-noise-print 120 "server continuing")
              (debug:print-info 0 *default-log-port* "Server continuing, seconds since last db access: " (- (current-seconds) last-access))
	      (let ((curr-time (current-seconds)))
		(handle-exceptions
		    exn
		    (debug:print 0 *default-log-port* "ERROR: Failed to change timestamp on log file " server-log-file ". Are you out of space on that disk?")
		  (if (not *server-overloaded*)
		      (change-file-times server-log-file curr-time curr-time)))))
          (loop 0 server-state bad-sync-count (current-milliseconds)))
         (else
          (debug:print-info 0 *default-log-port* "Server timed out. seconds since last db access: " (- (current-seconds) last-access))
          (http-transport:server-shutdown port)))))))

(define (http-transport:server-shutdown port)
  (begin
    ;;(BB> "http-transport:server-shutdown called")
    (debug:print-info 0 *default-log-port* "Starting to shutdown the server. pid="(current-process-id))
    ;;
    ;; start_shutdown
    ;;
    (set! *time-to-exit* #t) ;; tell on-exit to be fast as we've already cleaned up
    (portlogger:open-run-close portlogger:set-port port "released")
    (thread-sleep! 1)

    ;; (debug:print-info 0 *default-log-port* "Max cached queries was    " *max-cache-size*)
    ;; (debug:print-info 0 *default-log-port* "Number of cached writes   " *number-of-writes*)
    ;; (debug:print-info 0 *default-log-port* "Average cached write time "
    ;; 		      (if (eq? *number-of-writes* 0)
    ;; 			  "n/a (no writes)"
    ;; 			  (/ *writes-total-delay*
    ;; 			     *number-of-writes*))
    ;; 		      " ms")
    ;; (debug:print-info 0 *default-log-port* "Number non-cached queries "  *number-non-write-queries*)
    ;; (debug:print-info 0 *default-log-port* "Average non-cached time   "
    ;; 		      (if (eq? *number-non-write-queries* 0)
    ;; 			  "n/a (no queries)"
    ;; 			  (/ *total-non-write-delay* 
    ;; 			     *number-non-write-queries*))
    ;; 		      " ms")
    
    (db:print-current-query-stats)
    (common:save-pkt `((action . exit)
                       (T      . server)
                       (pid    . ,(current-process-id)))
                     *configdat* #t)
    (debug:print-info 0 *default-log-port* "Server shutdown complete. Exiting")
    (exit)))

;; all routes though here end in exit ...
;;
;; start_server? 
;;
(define (http-transport:launch)
  ;; check that a server start is in progress, pause or exit if so
  (let* ((tmp-area            (common:get-db-tmp-area *alldat*))
	 (server-start        (conc tmp-area "/.server-start"))
	 (server-started      (conc tmp-area "/.server-started"))
	 (start-time          (common:lazy-modification-time server-start))
	 (started-time        (common:lazy-modification-time server-started))
	 (server-starting     (< start-time started-time)) ;; if start-time is less than started-time then a server is still starting
	 (start-time-old      (> (- (current-seconds) start-time) 5))
         (cleanup-proc        (lambda (msg)
                                (let* ((serv-fname      (conc "server-" (current-process-id) "-" (get-host-name) ".log"))
                                       (full-serv-fname (conc *toppath* "/logs/" serv-fname))
                                       (new-serv-fname  (conc *toppath* "/logs/" "defunct-" serv-fname)))
                                  (debug:print 0 *default-log-port* msg)
                                  (if (common:file-exists? full-serv-fname)
                                      (system (conc "sleep 1;mv -f " full-serv-fname " " new-serv-fname))
                                      (debug:print 0 *default-log-port* "INFO: cannot move " full-serv-fname " to " new-serv-fname))
                                  (exit)))))
    (if (and (not start-time-old) ;; last server start try was less than five seconds ago
	     (not server-starting))
	(begin
	  (cleanup-proc "NOT starting server, there is either a recently started server or a server in process of starting")
	  (exit)))
    ;; lets not even bother to start if there are already three or more server files ready to go
    (let* ((num-alive   (server:get-num-alive (server:get-list *toppath*))))
      (if (> num-alive 3)
          (begin
            (cleanup-proc (conc "ERROR: Aborting server start because there are already " num-alive " possible servers either running or starting up"))
            (exit))))
  (common:save-pkt `((action . start)
		     (T      . server)
		     (pid    . ,(current-process-id)))
		   *configdat* #t)
    (let* ((th2 (make-thread (lambda ()
                               (debug:print-info 0 *default-log-port* "Server run thread started")
                               (http-transport:run 
                                (if (args:get-arg "-server")
                                    (args:get-arg "-server")
                                    "-")
                                )) "Server run"))
           (th3 (make-thread (lambda ()
                               (debug:print-info 0 *default-log-port* "Server monitor thread started")
                               (http-transport:keep-running)
                               "Keep running"))))
      (thread-start! th2)
      (thread-sleep! 0.25) ;; give the server time to settle before starting the keep-running monitor.
      (thread-start! th3)
      (set! *didsomething* #t)
      (thread-join! th2)
      (exit))))

;; (define (http-transport:server-signal-handler signum)
;;   (signal-mask! signum)
;;   (handle-exceptions
;;    exn
;;    (debug:print 0 *default-log-port* " ... exiting ...")
;;    (let ((th1 (make-thread (lambda ()
;; 			     (thread-sleep! 1))
;; 			   "eat response"))
;; 	 (th2 (make-thread (lambda ()
;; 			     (debug:print-error 0 *default-log-port* "Received ^C, attempting clean exit. Please be patient and wait a few seconds before hitting ^C again.")
;; 			     (thread-sleep! 3) ;; give the flush three seconds to do it's stuff
;; 			     (debug:print 0 *default-log-port* "       Done.")
;; 			     (exit 4))
;; 			   "exit on ^C timer")))
;;      (thread-start! th2)
;;      (thread-start! th1)
;;      (thread-join! th2))))

;;===============================================
;; Java script
;;===============================================
(define (http-transport:show-jquery)
  (let* ((data  (tests:readlines *java-script-lib*)))
(string-join data "\n")))



;;======================================================================
;; web pages
;;======================================================================

(define (http-transport:html-test-log $)
   (let* ((run-id ($ 'runid))
         (test-item ($ 'testname))
         (parts (string-split test-item ":"))
         (test-name (car parts))
             
         (item-name (if (equal? (length parts) 1)
             ""
             (cadr parts))))
  ;(print $) 
(tests:get-test-log run-id test-name item-name)))


(define (http-transport:html-dboard $)
  (let* ((page ($ 'page))
         (oup       (open-output-string)) 
         (bdy "--------------------------")

         (ret  (tests:dynamic-dboard page)))
    (s:output-new  oup  ret)
   (close-output-port oup)

  (set! bdy   (get-output-string oup))
     (conc "<h1>Dashboard</h1>" bdy "<br/> <br/> "  )))

(define (http-transport:main-page)
  (let ((linkpath (root-path)))
    (conc "<head><h1>" (pathname-strip-directory *toppath*) "</h1></head>"
	  "<body>"
	  "Run area: " *toppath*
	  "<h2>Server Stats</h2>"
	  (http-transport:stats-table) 
	  "<hr>"
	  (http-transport:runs linkpath)
	  "<hr>"
	  (http-transport:run-stats)
	  "</body>"
	  )))

(define (http-transport:stats-table)
  (mutex-lock! *heartbeat-mutex*)
  (let ((res 
	 (conc "<table>"
	       ;; "<tr><td>Max cached queries</td>        <td>" *max-cache-size* "</td></tr>"
	       "<tr><td>Number of cached writes</td>   <td>" *number-of-writes* "</td></tr>"
	       "<tr><td>Average cached write time</td> <td>" (if (eq? *number-of-writes* 0)
								 "n/a (no writes)"
								 (/ *writes-total-delay*
								    *number-of-writes*))
	       " ms</td></tr>"
	       "<tr><td>Number non-cached queries</td> <td>"  *number-non-write-queries* "</td></tr>"
	       ;; "<tr><td>Average non-cached time</td>   <td>" (if (eq? *number-non-write-queries* 0)
	       ;; 							 "n/a (no queries)"
	       ;; 							 (/ *total-non-write-delay* 
	       ;; 							    *number-non-write-queries*))
	       " ms</td></tr>"
	       "<tr><td>Last access</td><td>"              (seconds->time-string *db-last-access*) "</td></tr>"
	       "</table>")))
    (mutex-unlock! *heartbeat-mutex*)
    res))

(define (http-transport:runs linkpath)
  (conc "<h3>Runs</h3>"
	(string-intersperse
	 (let ((files (map pathname-strip-directory (glob (conc linkpath "/*")))))
	   (map (lambda (p)
		  (conc "<a href=\"" p "\">" p "</a><br>"))
		files))
	 " ")))

(define (http-transport:run-stats)
  (let ((stats (open-run-close db:get-running-stats #f)))
    (conc "<table>"
	  (string-intersperse
	   (map (lambda (stat)
		  (conc "<tr><td>" (car stat) "</td><td>" (cadr stat) "</td></tr>"))
		stats)
	   " ")
	  "</table>")))

;; Configurations for server
(tcp-buffer-size 2048)
(max-connections 2048) 

;;======================================================================
;;
;;======================================================================


;; Puts out all combinations
(define (process-itemlist hierdepth curritemkey itemlist)
  (let ((res '()))
    (if (not hierdepth)
	(set! hierdepth (length itemlist)))
    (let loop ((hed (car itemlist))
	       (tal (cdr itemlist)))
      (if (null? tal)
	  (for-each (lambda (item)
		      (if (> (length curritemkey) (- hierdepth 2))
			  (set! res (append res (list (append curritemkey (list (list (car hed) item))))))))
		    (cadr hed))
	  (begin
	    (for-each (lambda (item)
			(set! res (append res (process-itemlist hierdepth (append curritemkey (list (list (car hed) item))) tal))))
		      (cadr hed))
	    (loop (car tal)(cdr tal)))))
    res))

;; (item-assoc->item-list '(("ANIMAL" "Elephant Lion")("SEASON" "Spring Fall")))
;;   => ((("ANIMAL" "Elephant") ("SEASON" "Spring")) 
;;       (("ANIMAL" "Elephant") ("SEASON" "Fall")) 
;;       (("ANIMAL" "Lion")     ("SEASON" "Spring"))
;;       (("ANIMAL" "Lion")     ("SEASON" "Fall")))
(define (item-assoc->item-list itemsdat)
  (if (and itemsdat (not (null? itemsdat)))
      (let ((itemlst (filter (lambda (x)
			       (list? x))
			     (map (lambda (x)
				    (debug:print 6 *default-log-port* "item-assoc->item-list x: " x)
				    (if (< (length x) 2)
					(begin
					  (debug:print-error 0 *default-log-port* "malformed items spec " (string-intersperse x " "))
					  (list (car x)'()))
					(let* ((name (car x))
					       (items (cadr x))
					       (ilist (list name (if (string? items)
								     (string-split items)
								     '()))))
					  (if (null? ilist)
					      (debug:print-error 0 *default-log-port* "No items specified for " name))
					  ilist)))
				  itemsdat))))
	(let ((debuglevel 5))
	  (debug:print 5 *default-log-port* "item-assoc->item-list: itemsdat => itemlst ")
	  (if (debug:debug-mode 5)
	      (begin
		(pp itemsdat)
		(print " => ")
		(pp itemlst))))
	(if (> (length itemlst) 0)
	    (process-itemlist #f '() itemlst)
	    '()))
      '())) ;; return a list consisting on a single null list for non-item runs
            ;; Nope, not now, return null as of 6/6/2011

;; (item-table->item-list '(("ANIMAL" "Elephant Lion")("SEASON" "Spring Winter")))
;;   => ((("ANIMAL" "Elephant")("SEASON" "Spring"))
;;       (("ANIMAL" "Lion")    ("SEASON" "Winter")))
(define (item-table->item-list itemtable)
  (let ((newlst (map (lambda (x)
		       (if (> (length x) 1)
			   (list (car x)
				 (string-split (cadr x)))
			   (list x '())))
		     itemtable))
	(res     '())) ;; a list of items
    (let loop ((indx    0)
	       (item   '()) ;; an item will be ((KEYNAME1 VAL1)(KEYNAME2 VAL2) ...)
	       (elflag  #f))
      (for-each (lambda (row)
		  (let ((rowname (car row))
			(rowdat  (cadr row)))
		    (set! item (append item 
				       (list 
					(if (< indx (length rowdat))
					    (let ((new (list rowname (list-ref rowdat indx))))
					      ;; (debug:print 0 *default-log-port* "New: " new)
					      (set! elflag #t)
					      new
					      ) ;; i.e. had at least on legit value to use
					    (list rowname "-")))))))
		newlst)
      (if elflag
	  (begin
	    (set! res (append res (list item)))
	    (loop (+ indx 1)
		  '()
		  #f)))
      res)))
            ;; Nope, not now, return null as of 6/6/2011
		
(define (items:check-valid-items class item)
  (let ((valid-values (let ((s (config-lookup *configdat* "validvalues" class)))
			(if s (string-split s) #f))))
    (if valid-values
	(if (member item valid-values)
	    item #f)
	item)))

(define (items:get-items-from-config tconfig)
  (let* ((have-items  (hash-table-ref/default tconfig "items"      #f))
	 (have-itable (hash-table-ref/default tconfig "itemstable" #f))
	 (items       (hash-table-ref/default tconfig "items"      '()))
	 (itemstable  (hash-table-ref/default tconfig "itemstable" '())))
    (debug:print 5 *default-log-port* "items: " items " itemstable: " itemstable)
    (set! items (map (lambda (item)
		       (if (procedure? (cadr item))
			   (list (car item)((cadr item)))  ;; evaluate the proc
			   item))
		     items))
    (set! itemstable (map (lambda (item)
			    (if (procedure? (cadr item))
				(list (car item)((cadr item)))  ;; evaluate the proc
				item))
			  itemstable))
    (if (and have-items  (null? items))     (debug:print 0 *default-log-port* "WARNING:[items] section in testconfig but no entries defined"))
    (if (and have-itable (null? itemstable))(debug:print 0 *default-log-port* "WARNNG:[itemstable] section in testconfig but no entries defined"))
    (if (or (not (null? items))(not (null? itemstable)))
	(append (item-assoc->item-list items)
		(item-table->item-list itemstable))
	'(()))))

;; (pp (item-assoc->item-list itemdat))

;;======================================================================
;; keys
;;======================================================================


(define (args:usage . a) #f)

;;======================================================================
;; key <=> target routines
;;======================================================================

;; This invalidates using "/" in item names. Every key will be
;; available via args:get-arg as :keyfield. Since this only needs to
;; be called once let's use it to set the environment vars
;;
;; The setting of :keyfield in args should be turned off ASAP
;;
(define (keys:target-set-args keys target ht)
  (if target
      (let ((vals (string-split target "/")))
	(if (eq? (length vals)(length keys))
	    (for-each (lambda (key val)
			(setenv key val)
			(if ht (hash-table-set! ht (conc ":" key) val)))
		      keys
		      vals)
	    (debug:print-error 0 *default-log-port* "wrong number of values in " target ", should match " keys))
	vals)
      (debug:print 4 *default-log-port* "ERROR: keys:target-set-args called with no target.")))

;; given the keys (a list of vectors <key field> or a list of keys) and a target return a keyval list
;; keyval list ( (key1 val1) (key2 val2) ...)
(define (keys:target->keyval keys target)
  (let* ((targlist (string-split target "/"))
	 (numkeys  (length keys))
	 (numtarg  (length targlist))
	 (targtweaked (if (> numkeys numtarg)
			  (append targlist (make-list (- numkeys numtarg) ""))
			  targlist)))
    (map (lambda (key targ)
	   (list key targ))
	 keys targtweaked)))

;;======================================================================
;; launch
;;======================================================================

;;======================================================================
;; ezsteps
;;======================================================================

;; ezsteps were going to be coded as
;; stepname[,predstep1,predstep2 ...] [{VAR1=first,second,third}] command to execute
;;   BUT
;; now are
;; stepname {VAR=first,second,third ...} command ...
;; where the {VAR=first,second,third ...} is optional.

;; given an exit code and whether or not logpro was used calculate OK/BAD
;; return #t if we are ok, #f otherwise
(define (steprun-good? logpro exitcode)
  (or (eq? exitcode 0)
      (and logpro (eq? exitcode 2))))

;; if handed a string, process it, else look for MT_CMDINFO
(define (launch:get-cmdinfo-assoc-list #!key (encoded-cmd #f))
  (let ((enccmd (if encoded-cmd encoded-cmd (getenv "MT_CMDINFO"))))
    (if enccmd
	(common:read-encoded-string enccmd)
	'())))

;;                       0           1              2              3
(defstruct launch:einf (pid #t)(exit-status #t)(exit-code #t)(rollup-status 0))

;; return (conc status ": " comment) from the final section so that
;;   the comment can be set in the step record in launch.scm
;;
(define (launch:load-logpro-dat run-id test-id stepname)
  (let ((cname (conc stepname ".dat")))
    (if (common:file-exists? cname)
	(let* ((dat  (read-config cname #f #f))
	       (csvr (db:logpro-dat->csv dat stepname))
	       (csvt (let-values (((fmt-cell fmt-record fmt-csv) (make-format ",")))
		       (fmt-csv (map list->csv-record csvr))))
	       (status (configf:lookup dat "final" "exit-status"))
	       (msg     (configf:lookup dat "final" "message")))
          (if csvt  ;; this if blocked stack dump caused by .dat file from logpro being 0-byte.  fixed by upgrading logpro
              (rmt:csv->test-data run-id test-id csvt)
	      (debug:print 0 *default-log-port* "ERROR: no csvdat exists for run-id: " run-id " test-id: " test-id " stepname: " stepname ", check that logpro version is 1.15 or newer"))
	  ;;  (debug:print-info 13 *default-log-port* "Error: run-id/test-id/stepname="run-id"/"test-id"/"stepname" => bad csvr="csvr)
	  ;;  )
	  (cond
	   ((equal? status "PASS") "PASS") ;; skip the message part if status is pass
	   (status (conc (configf:lookup dat "final" "exit-status") ": " (if msg msg "no message")))
	   (else #f)))
	#f)))

(define (launch:runstep ezstep run-id test-id exit-info m tal testconfig) ;;; TODO: deprecate me in favor of ezsteps.scm
  (let* ((stepname       (car ezstep))  ;; do stuff to run the step
	 (stepinfo       (cadr ezstep))
	;; (let ((info (cadr ezstep)))
	;; 		   (if (proc? info) "" info)))
	;; (stepproc       (let ((info (cadr ezstep)))
	;; 		   (if (proc? info) info #f)))
	 (stepparts      (string-match (regexp "^(\\{([^\\}\\{]*)\\}\\s*|)(.*)$") stepinfo))
	 (stepparams     (list-ref stepparts 2)) ;; for future use, {VAR=1,2,3}, run step for each
	 (paramparts     (if (string? stepparams)
			     (map (lambda (x)(string-split x "=")) (string-split-fields "[^;]*=[^;]*" stepparams))
			     '()))
	 (subrun         (alist-ref "subrun" paramparts equal?))
	 (stepcmd        (list-ref stepparts 3))
	 (script         "") ; "#!/bin/bash\n") ;; yep, we depend on bin/bash FIXME!!!\
	 (logpro-file    (conc stepname ".logpro"))
	 (html-file      (conc stepname ".html"))
	 (dat-file       (conc stepname ".dat"))
	 (tconfig-logpro (configf:lookup testconfig "logpro" stepname))
	 (logpro-used    (common:file-exists? logpro-file)))

    (debug:print 0 *default-log-port* "stepparts: " stepparts ", stepparams: " stepparams
                 ", paramparts: " paramparts ", subrun: " subrun ", stepcmd: " stepcmd)
    
    (if (and tconfig-logpro
	     (not logpro-used)) ;; no logpro file found but have a defn in the testconfig
	(begin
	  (with-output-to-file logpro-file
	    (lambda ()
	      (print ";; logpro file extracted from testconfig\n"
		     ";;")
	      (print tconfig-logpro)))
	  (set! logpro-used #t)))
    
    ;; NB// can safely assume we are in test-area directory
    (debug:print 4 *default-log-port* "ezsteps:\n stepname: " stepname " stepinfo: " stepinfo " stepparts: " stepparts
		 " stepparams: " stepparams " stepcmd: " stepcmd)
    
    ;; ;; first source the previous environment
    ;; (let ((prev-env (conc ".ezsteps/" prevstep (if (string-search (regexp "csh") 
    ;;      							 (get-environment-variable "SHELL")) ".csh" ".sh"))))
    ;;   (if (and prevstep (common:file-exists? prev-env))
    ;;       (set! script (conc script "source " prev-env))))
    
    ;; call the command using mt_ezstep
    ;; (set! script (conc "mt_ezstep " stepname " " (if prevstep prevstep "x") " " stepcmd))
    
    (debug:print 4 *default-log-port* "script: " script)
    (rmt:teststep-set-status! run-id test-id stepname "start" "-" #f #f)
    ;; now launch the actual process
    (call-with-environment-variables 
     (list (cons "PATH" (conc (get-environment-variable "PATH") ":.")))
     (lambda () ;; (process-run "/bin/bash" "-c" "exec ls -l /tmp/foobar > /tmp/delme-more.log 2>&1")
       (let* ((cmd (conc stepcmd " > " stepname ".log 2>&1")) ;; >outfile 2>&1 
	      (pid #f))
	 (let ((proc (lambda ()
		       (set! pid (process-run "/bin/bash" (list "-c" cmd))))))
	   (if subrun
               (begin
                 (debug:print-info 0 *default-log-port* "Running without MT_.* environment variables.")
                 (common:without-vars proc "^MT_.*"))
	       (proc)))
	 
         (with-output-to-file "Makefile.ezsteps"
           (lambda ()
             (print stepname ".log :")
             (print "\t" cmd)
             (if (common:file-exists? (conc stepname ".logpro"))
                 (print "\tlogpro " stepname ".logpro " stepname ".html < " stepname ".log"))
             (print)
             (print stepname " : " stepname ".log")
             (print))
           #:append)

	 (rmt:test-set-top-process-pid run-id test-id pid)
	 (let processloop ((i 0))
	   (let-values (((pid-val exit-status exit-code)(process-wait pid #t)))
		       (mutex-lock! m)
		       (launch:einf-pid-set!         exit-info pid)         ;; (vector-set! exit-info 0 pid)
		       (launch:einf-exit-status-set! exit-info exit-status) ;; (vector-set! exit-info 1 exit-status)
		       (launch:einf-exit-code-set!   exit-info exit-code)   ;; (vector-set! exit-info 2 exit-code)
		       (mutex-unlock! m)
		       (if (eq? pid-val 0)
			   (begin
			     (thread-sleep! 2)
			     (processloop (+ i 1))))
		       )))))
    (debug:print-info 0 *default-log-port* "step " stepname " completed with exit code " (launch:einf-exit-code exit-info)) ;; (vector-ref exit-info 2))
    ;; now run logpro if needed
    (if logpro-used
	(let* ((logpro-exe (or (getenv "LOGPRO_EXE") "logpro"))
               (pid        (process-run (conc "/bin/sh -c '"logpro-exe" "logpro-file " " (conc stepname ".html") " < " stepname ".log > /dev/null'"))))
	  (let processloop ((i 0))
	    (let-values (((pid-val exit-status exit-code)(process-wait pid #t)))
			(mutex-lock! m)
			;; (make-launch:einf pid: pid exit-status: exit-status exit-code: exit-code)
			(launch:einf-pid-set!         exit-info pid)         ;; (vector-set! exit-info 0 pid)
			(launch:einf-exit-status-set! exit-info exit-status) ;; (vector-set! exit-info 1 exit-status)
			(launch:einf-exit-code-set!   exit-info exit-code)   ;; (vector-set! exit-info 2 exit-code)
			(mutex-unlock! m)
			(if (eq? pid-val 0)
			    (begin
			      (thread-sleep! 2)
			      (processloop (+ i 1)))))
	    (debug:print-info 0 *default-log-port* "logpro for step " stepname " exited with code " (launch:einf-exit-code exit-info))))) ;; (vector-ref exit-info 2)))))
    
    (let ((exinfo (launch:einf-exit-code exit-info)) ;; (vector-ref exit-info 2))
	  (logfna (if logpro-used (conc stepname ".html") ""))
	  (comment #f))
      (if logpro-used
	  (let ((datfile (conc stepname ".dat")))
	    ;; load the .dat file into the test_data table if it exists
	    (if (common:file-exists? datfile)
		(set! comment (launch:load-logpro-dat run-id test-id stepname)))
	    (rmt:test-set-log! run-id test-id (conc stepname ".html"))))
      (rmt:teststep-set-status! run-id test-id stepname "end" exinfo comment logfna))
    ;; set the test final status
    (let* ((process-exit-status (launch:einf-exit-code exit-info)) ;; (vector-ref exit-info 2))
	   (this-step-status (cond
			      ((and (eq? process-exit-status 2) logpro-used) 'warn)   ;; logpro 2 = warnings
			      ((and (eq? process-exit-status 3) logpro-used) 'check)  ;; logpro 3 = check
			      ((and (eq? process-exit-status 4) logpro-used) 'waived) ;; logpro 4 = waived
			      ((and (eq? process-exit-status 5) logpro-used) 'abort)  ;; logpro 5 = abort
			      ((and (eq? process-exit-status 6) logpro-used) 'skip)   ;; logpro 6 = skip
			      ((eq? process-exit-status 0)                   'pass)   ;; logpro 0 = pass
			      (else 'fail)))
	   (overall-status   (cond
			      ((eq? (launch:einf-rollup-status exit-info) 2) 'warn) ;; rollup-status (vector-ref exit-info 3)
			      ((eq? (launch:einf-rollup-status exit-info) 0) 'pass) ;; (vector-ref exit-info 3)
			      (else 'fail)))
	   (next-status      (cond 
			      ((eq? overall-status 'pass) this-step-status)
			      ((eq? overall-status 'warn)
			       (if (eq? this-step-status 'fail) 'fail 'warn))
			      ((eq? overall-status 'abort) 'abort)
			      (else 'fail)))
	   (next-state       ;; "RUNNING") ;; WHY WAS THIS CHANGED TO NOT USE (null? tal) ??
	    (cond
	     ((null? tal) ;; more to run?
	      "COMPLETED")
	     (else "RUNNING"))))
      (debug:print 4 *default-log-port* "Exit value received: " (launch:einf-exit-code exit-info) " logpro-used: " logpro-used 
		   " this-step-status: " this-step-status " overall-status: " overall-status 
		   " next-status: " next-status " rollup-status: "  (launch:einf-rollup-status exit-info)) ;; (vector-ref exit-info 3))
      (case next-status
	((warn)
	 (launch:einf-rollup-status-set! exit-info 2) ;; (vector-set! exit-info 3 2) ;; rollup-status
	 ;; NB// test-set-status! does rdb calls under the hood
	 (tests:test-set-status! run-id test-id next-state "WARN" 
				 (if (eq? this-step-status 'warn) "Logpro warning found" #f)
				 #f))
	((check)
	 (launch:einf-rollup-status-set! exit-info 3) ;; (vector-set! exit-info 3 3) ;; rollup-status
	 ;; NB// test-set-status! does rdb calls under the hood
	 (tests:test-set-status! run-id test-id next-state "CHECK" 
				 (if (eq? this-step-status 'check) "Logpro check found" #f)
				 #f))
	((waived)
	 (launch:einf-rollup-status-set! exit-info 4) ;; (vector-set! exit-info 3 3) ;; rollup-status
	 ;; NB// test-set-status! does rdb calls under the hood
	 (tests:test-set-status! run-id test-id next-state "WAIVED" 
				 (if (eq? this-step-status 'check) "Logpro waived found" #f)
				 #f))
	((abort)
	 (launch:einf-rollup-status-set! exit-info 5) ;; (vector-set! exit-info 3 4) ;; rollup-status
	 ;; NB// test-set-status! does rdb calls under the hood
	 (tests:test-set-status! run-id test-id next-state "ABORT" 
				 (if (eq? this-step-status 'abort) "Logpro abort found" #f)
				 #f))
	((skip)
	 (launch:einf-rollup-status-set! exit-info 6) ;; (vector-set! exit-info 3 4) ;; rollup-status
	 ;; NB// test-set-status! does rdb calls under the hood
	 (tests:test-set-status! run-id test-id next-state "SKIP" 
				 (if (eq? this-step-status 'skip) "Logpro skip found" #f)
				 #f))
	((pass)
	 (tests:test-set-status! run-id test-id next-state "PASS" #f #f))
	(else ;; 'fail
	 (launch:einf-rollup-status-set! exit-info 1) ;; (vector-set! exit-info 3 1) ;; force fail, this used to be next-state but that doesn't make sense. should always be "COMPLETED" 
	 (tests:test-set-status! run-id test-id "COMPLETED" "FAIL" (conc "Failed at step " stepname) #f)
	 )))
    logpro-used))

(define (launch:manage-steps run-id test-id item-path fullrunscript ezsteps subrun test-name tconfigreg exit-info m)
  ;; (let-values
  ;;  (((pid exit-status exit-code)
  ;;    (run-n-wait fullrunscript)))
  ;; (tests:test-set-status! test-id "RUNNING" "n/a" #f #f)
  ;; Since we should have a clean slate at this time there is no need to do 
  ;; any of the other stuff that tests:test-set-status! does. Let's just 
  ;; force RUNNING/n/a

  ;; (thread-sleep! 0.3)
  ;; (tests:test-force-state-status! run-id test-id "RUNNING" "n/a")
  (rmt:set-state-status-and-roll-up-items run-id test-name item-path "RUNNING" #f #f) 
  ;; (thread-sleep! 0.3) ;; NFS slowness has caused grief here

  ;; if there is a runscript do it first
  (if fullrunscript
      (let ((pid (process-run fullrunscript)))
	(rmt:test-set-top-process-pid run-id test-id pid)
	(let loop ((i 0))
	  (let-values
	   (((pid-val exit-status exit-code) (process-wait pid #t)))
	   (mutex-lock! m)
	   (launch:einf-pid-set!           exit-info  pid)         ;; (vector-set! exit-info 0 pid)
	   (launch:einf-exit-status-set!   exit-info  exit-status) ;; (vector-set! exit-info 1 exit-status)
	   (launch:einf-exit-code-set!     exit-info  exit-code)   ;; (vector-set! exit-info 2 exit-code)
	   (launch:einf-rollup-status-set! exit-info  exit-code)   ;; (vector-set! exit-info 3 exit-code)  ;; rollup status
	   (mutex-unlock! m)
	   (if (eq? pid-val 0)
	       (begin
		 (thread-sleep! 2)
		 (loop (+ i 1)))
	       )))))
  ;; then, if runscript ran ok (or did not get called)
  ;; do all the ezsteps (if any)
  (if (or ezsteps subrun)
      (let* ((test-run-dir (tests:get-test-path-from-environment))
             (testconfig ;; (read-config (conc work-area "/testconfig") #f #t environ-patt: "pre-launch-env-vars")) ;; FIXME??? is allow-system ok here?
	      ;; NOTE: it is tempting to turn off force-create of testconfig but dynamic
	      ;;       ezstep names need a full re-eval here.
	      (tests:get-testconfig test-name item-path tconfigreg #t force-create: #t)) ;; 'return-procs)))
	     (ezstepslst (if (hash-table? testconfig)
			     (hash-table-ref/default testconfig "ezsteps" '())
			     #f)))
	(if testconfig
	    (hash-table-set! *testconfigs* test-name testconfig) ;; cached for lazy reads later ...
	    (begin
	      (launch:setup)
	      (debug:print 0 *default-log-port* "WARNING: no testconfig found for " test-name " in search path:\n  "
			   (string-intersperse (tests:get-tests-search-path *configdat*) "\n  "))))
	;; after all that, still no testconfig? Time to abort
	(if (not testconfig)
	    (begin
	      (debug:print-error 0 *default-log-port* "Failed to resolve megatest.config, runconfigs.config and testconfig issues. Giving up now")
	      (exit 1)))

	;; create a proc for the subrun if requested, save that proc in the ezsteps table as the last entry
	;; 1. get section [runarun]
	;; 2. unset MT_* vars
	;; 3. fix target
	;; 4. fix runname
	;; 5. fix testpatt or calculate it from contour
	;; 6. launch the run
	;; 7. roll up the run result and or roll up the logpro processed result
	(when (configf:lookup testconfig "subrun" "runwait") ;; we use runwait as the flag that a subrun is requested
            (subrun:initialize-toprun-test testconfig test-run-dir)
	    (let* ((mt-cmd (subrun:launch-cmd test-run-dir)))
              (debug:print-info 0 *default-log-port* "Subrun command is \"" mt-cmd "\"")
              (set! ezsteps #t) ;; set the needed flag
	      (set! ezstepslst
                    (append (or ezstepslst '())
                            (list (list "subrun" (conc "{subrun=true} " mt-cmd)))))))

	;; process the ezsteps
	(if ezsteps
	    (begin
	      (if (not (common:file-exists? ".ezsteps"))(create-directory ".ezsteps"))
	      ;; if ezsteps was defined then we are sure to have at least one step but check anyway
	      (if (not (> (length ezstepslst) 0))
		  (debug:print-error 0 *default-log-port* "ezsteps defined but ezstepslst is zero length")
		  (let loop ((ezstep (car ezstepslst))
			     (tal    (cdr ezstepslst))
			     (prevstep #f))
                    (debug:print-info 0 *default-log-port* "Processing ezstep \"" (string-intersperse ezstep " ") "\"")
		    ;; check exit-info (vector-ref exit-info 1)
		    (if (launch:einf-exit-status exit-info) ;; (vector-ref exit-info 1)
			(let ((logpro-used (launch:runstep ezstep run-id test-id exit-info m tal testconfig))
			      (stepname    (car ezstep)))
			  ;; if logpro-used read in the stepname.dat file
			  (if (and logpro-used (common:file-exists? (conc stepname ".dat")))
			      (launch:load-logpro-dat run-id test-id stepname))
			  (if (steprun-good? logpro-used (launch:einf-exit-code exit-info))
			      (if (not (null? tal))
				  (loop (car tal) (cdr tal) stepname))
			      (debug:print 0 *default-log-port* "WARNING: step " (car ezstep) " failed. Stopping")))
			(debug:print 0 *default-log-port* "WARNING: a prior step failed, stopping at " ezstep)))))))))

(define (launch:monitor-job run-id test-id item-path fullrunscript ezsteps test-name tconfigreg exit-info m work-area runtlim misc-flags)
  (let* ((update-period (string->number (or (configf:lookup *configdat* "setup" "test-stats-update-period") "30")))
         (start-seconds (current-seconds))
	 (calc-minutes  (lambda ()
			  (inexact->exact 
			   (round 
			    (- 
			     (current-seconds) 
			     start-seconds)))))
	 (kill-tries 0))
    ;; (tests:set-full-meta-info #f test-id run-id (calc-minutes) work-area)
    ;; (tests:set-full-meta-info test-id run-id (calc-minutes) work-area)
    (tests:set-full-meta-info #f test-id run-id (calc-minutes) work-area 10)

    (let loop ((minutes   (calc-minutes))
	       (cpu-load  (alist-ref 'adj-core-load (common:get-normalized-cpu-load #f)))
	       (disk-free (get-df (current-directory)))
               (last-sync (current-seconds)))
      (common:telemetry-log "zombie" (conc "launch:monitor-job - top of loop encountered at "(current-seconds)" with last-sync="last-sync))
      (let* ((over-time     (> (current-seconds) (+ last-sync update-period)))
             (new-cpu-load  (let* ((load  (alist-ref 'adj-core-load (common:get-normalized-cpu-load #f)))
                                   (delta (abs (- load cpu-load))))
                              (if (> delta 0.1) ;; don't bother updating with small changes
                                  load
                                  #f)))
             (new-disk-free (let* ((df    (if over-time ;; only get df every 30 seconds
                                              (get-df (current-directory))
                                              disk-free))
                                   (delta (abs (- df disk-free))))
                              (if (and (> df 0)
                                       (> (/ delta df) 0.1)) ;; (> delta 200) ;; ignore changes under 200 Meg
                                  df
                                  #f)))
             (do-sync       (or new-cpu-load new-disk-free over-time))

             (test-info   (rmt:get-test-info-by-id run-id test-id))
             (state       (db:test-get-state test-info))
             (status      (db:test-get-status test-info))
             (kill-reason  "no kill reason specified")
             (kill-job?    #f))
        (common:telemetry-log "zombie" (conc "launch:monitor-job - decision time encountered at "(current-seconds)" with last-sync="last-sync" do-sync="do-sync" over-time="over-time" update-period="update-period))
        (cond
         ((test-get-kill-request run-id test-id)
          (set! kill-reason "KILLING TEST since received kill request (KILLREQ)")
          (set! kill-job? #t))
         ((and runtlim (> (- (current-seconds) start-seconds) runtlim))
          (set! kill-reason (conc "KILLING TEST DUE TO TIME LIMIT EXCEEDED! Runtime=" (- (current-seconds) start-seconds) " seconds, limit=" runtlim))
          (set! kill-job? #t))
         ((equal? status "DEAD")
          (tests:update-central-meta-info run-id test-id new-cpu-load new-disk-free (calc-minutes) #f #f)
          (rmt:set-state-status-and-roll-up-items run-id test-id 'foo "RUNNING" "n/a" "was marked dead; really still running.")
          ;;(set! kill-reason "KILLING TEST because it was marked as DEAD by launch:handle-zombie-tests (might indicate really overloaded server or else overzealous setup.deadtime)") ;; MARK RUNNING
          (set! kill-job? #f)))

        (debug:print 4 *default-log-port* "cpu: " new-cpu-load " disk: " new-disk-free " last-sync: " last-sync " do-sync: " do-sync)
        (launch:handle-zombie-tests run-id)
        (when do-sync
          ;;(with-output-to-file (conc (getenv "MT_TEST_RUN_DIR") "/last-loadinfo.log" #:append)
          ;;  (lambda () (pp (list (current-seconds) new-cpu-load new-disk-free (calc-minutes)))))
          (common:telemetry-log "zombie" (conc  "launch:monitor-job - dosync started at "(current-seconds)))
          (tests:update-central-meta-info run-id test-id new-cpu-load new-disk-free (calc-minutes) #f #f)
          (common:telemetry-log "zombie" (conc "launch:monitor-job - dosync finished at "(current-seconds))))
        
	(if kill-job? 
	    (begin
              (debug:print-info 0 *default-log-port* "proceeding to kill test: "kill-reason)
	      (mutex-lock! m)
	      ;; NOTE: The pid can change as different steps are run. Do we need handshaking between this
	      ;;       section and the runit section? Or add a loop that tries three times with a 1/4 second
	      ;;       between tries?
	      (let* ((pid1 (launch:einf-pid exit-info)) ;; (vector-ref exit-info 0))
		     (pid2 (rmt:test-get-top-process-pid run-id test-id))
		     (pids (delete-duplicates (filter number? (list pid1 pid2)))))
		(if (not (null? pids))
		    (begin
		      (for-each
		       (lambda (pid)
			 (handle-exceptions
			  exn
			  (begin
			    (debug:print-info 0 *default-log-port* "Unable to kill process with pid " pid ", possibly already killed.")
			    (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)))
			  (debug:print 0 *default-log-port* "WARNING: Request received to kill job " pid) ;;  " (attempt # " kill-tries ")")
			  (debug:print-info 0 *default-log-port* "Signal mask=" (signal-mask))
			  ;; (if (process:alive? pid)
			  ;;     (begin
			  (map (lambda (pid-num)
				 (process-signal pid-num signal/term))
			       (process:get-sub-pids pid))
			  (thread-sleep! 5)
			  ;; (if (process:process-alive? pid)
			  (map (lambda (pid-num)
				 (handle-exceptions
				  exn
				  #f
				  (process-signal pid-num signal/kill)))
			       (process:get-sub-pids pid))))
		       ;;    (debug:print-info 0 *default-log-port* "not killing process " pid " as it is not alive"))))
		       pids)
                      ;; BB: question to Matt -- does the tests:test-state-status! encompass rollup to toplevel?  If not, should it?
		      (tests:test-set-status! run-id test-id "KILLED"  "KILLED" (conc (args:get-arg "-m")" "kill-reason) #f)) ;; BB ADDED kill-reason -- confirm OK with Matt
		    (begin
		      (debug:print-error 0 *default-log-port* "Nothing to kill, pid1=" pid1 ", pid2=" pid2)
		      (tests:test-set-status! run-id test-id "KILLED"  "FAILED TO KILL" (conc (args:get-arg "-m")" "kill-reason) #f) ;; BB ADDED kill-reason -- confirm OK with Matt
		      )))
	      (mutex-unlock! m)
	      ;; no point in sticking around. Exit now. But run end of run before exiting?
        (launch:end-of-run-check run-id)
	      (exit)))
	(if (hash-table-ref/default misc-flags 'keep-going #f)
	    (begin
	      (thread-sleep! 3) ;; (+ 3 (random 6))) ;; add some jitter to the call home time to spread out the db accesses
	      (if (hash-table-ref/default misc-flags 'keep-going #f)  ;; keep originals for cpu-load and disk-free unless they change more than the allowed delta
		  (loop (calc-minutes)
                        (or new-cpu-load cpu-load)
                        (or new-disk-free disk-free)
                        (if do-sync (current-seconds) last-sync)))))))
    (tests:update-central-meta-info run-id test-id (get-cpu-load) (get-df (current-directory))(calc-minutes) #f #f))) ;; NOTE: Checking twice for keep-going is intentional


(define (launch:execute encoded-cmd)
  (let* ((cmdinfo    (common:read-encoded-string encoded-cmd))
	 (tconfigreg #f))
    (setenv "MT_CMDINFO" encoded-cmd)
    ;;(bb-check-path msg: "launch:execute incoming")
    (if (list? cmdinfo) ;; ((testpath /tmp/mrwellan/jazzmind/src/example_run/tests/sqlitespeed)
	;; (test-name sqlitespeed) (runscript runscript.rb) (db-host localhost) (run-id 1))
	(let* ((testpath  (assoc/default 'testpath  cmdinfo))  ;; testpath is the test spec area
	       (top-path  (assoc/default 'toppath   cmdinfo))
	       (work-area (assoc/default 'work-area cmdinfo))  ;; work-area is the test run area
	       (test-name (assoc/default 'test-name cmdinfo))
	       (runscript (assoc/default 'runscript cmdinfo))
	       (ezsteps   (assoc/default 'ezsteps   cmdinfo))
	       (subrun    (assoc/default 'subrun    cmdinfo))
	       (serverurl (assoc/default 'serverurl cmdinfo))
	       (homehost  (assoc/default 'homehost  cmdinfo))
	       (run-id    (assoc/default 'run-id    cmdinfo))
	       (test-id   (assoc/default 'test-id   cmdinfo))
	       (target    (assoc/default 'target    cmdinfo))
	       (areaname  (assoc/default 'areaname  cmdinfo))
	       (itemdat   (assoc/default 'itemdat   cmdinfo))
	       (env-ovrd  (assoc/default 'env-ovrd  cmdinfo))
	       (set-vars  (assoc/default 'set-vars  cmdinfo)) ;; pre-overrides from -setvar
	       (runname   (assoc/default 'runname   cmdinfo))
	       (megatest  (assoc/default 'megatest  cmdinfo))
	       (runtlim   (assoc/default 'runtlim   cmdinfo))
	       (contour   (assoc/default 'contour   cmdinfo))
	       (item-path (item-list->path itemdat))
	       (mt-bindir-path (assoc/default 'mt-bindir-path cmdinfo))
	       (keys      #f)
	       (keyvals   #f)
	       (fullrunscript (if (not runscript)
                                  #f
                                  (if (substring-index "/" runscript)
                                      runscript ;; use unadultered if contains slashes
                                      (let ((fulln (conc work-area "/" runscript)))
	                                  (if (and (common:file-exists? fulln)
                                                   (file-execute-access? fulln))
                                              fulln
                                              runscript))))) ;; assume it is on the path
               (check-work-area           (lambda ()
                                            ;; NFS might not have propagated the directory meta data to the run host - give it time if needed
                                            (let loop ((count 0))
                                              (if (or (common:directory-exists? work-area)
                                                      (> count 10))
                                                  (change-directory work-area)
                                                  (begin
                                                    (debug:print 0 *default-log-port* "INFO: Not starting job yet - directory " work-area " not found")
                                                    (thread-sleep! 10)
                                                    (loop (+ count 1)))))

                                            (if (not (string=?  (common:real-path work-area)(common:real-path (current-directory))))
                                                (begin
                                                  (debug:print 0 *default-log-port*
                                                               "INFO: we are expecting to be in directory " work-area "\n"
                                                               "     but we are actually in the directory " (current-directory) "\n"
                                                               "     doing another change dir.")
                                                  (change-directory work-area)))
                                            
                                            ;; spot check that the files in testpath are available. Too often NFS delays cause problems here.
                                            (let ((files      (glob (conc testpath "/*")))
                                                  (bad-files '()))
                                              (for-each
                                               (lambda (fullname)
                                                 (let* ((fname (pathname-strip-directory fullname))
                                                        (targn (conc work-area "/" fname)))
                                                   (if (not (file-exists? targn))
                                                       (set! bad-files (cons fname bad-files)))))
                                               files)
                                              (if (not (null? bad-files))
                                                  (begin
                                                    (debug:print 0 *default-log-port* "INFO: test data from " testpath " not copied properly or filesystem problems causing data to not be found. Re-running the copy command.")
                                                    (debug:print 0 *default-log-port* "INFO: missing files from " work-area ": " (string-intersperse bad-files ", "))
                                                    (launch:test-copy testpath work-area))))
                                            ;; one more time, change to the work-area directory
                                            (change-directory work-area)))
	       ) ;; let*

	  (if contour (setenv "MT_CONTOUR" contour))
	  
	  ;; immediated set some key variables from CMDINFO data, yes, these will be set again below ...
	  ;;
	  (setenv "MT_TESTSUITENAME" areaname)
	  (setenv "MT_RUN_AREA_HOME" top-path)
	  (set! *toppath* top-path)
          (change-directory *toppath*) ;; temporarily switch to the run area home
	  (setenv "MT_TEST_RUN_DIR"  work-area)

	  (launch:setup) ;; should be properly in the run area home now

	  (if contour (setenv "MT_CONTOUR" contour))
	  
	  ;; immediated set some key variables from CMDINFO data, yes, these will be set again below ...
	  ;;
	  (setenv "MT_TESTSUITENAME" areaname)
	  (setenv "MT_RUN_AREA_HOME" top-path)
	  (set! *toppath* top-path)
          (change-directory *toppath*) ;; temporarily switch to the run area home
	  (setenv "MT_TEST_RUN_DIR"  work-area)

	  (launch:setup) ;; should be properly in the run area home now
          
	  (set! tconfigreg (tests:get-all)) ;; mapping of testname => test source path
	  (let ((sighand (lambda (signum)
			   ;; (signal-mask! signum) ;; to mask or not? seems to cause issues in exiting
			   (if (eq? signum signal/stop)
			       (debug:print-error 0 *default-log-port* "attempt to STOP process. Exiting."))
			   (set! *time-to-exit* #t)
			   (print "Received signal " signum ", cleaning up before exit (set this test to COMPLETED/ABORT) . Please wait...")
			   (let ((th1 (make-thread (lambda ()
                                                     (print "set test to COMPLETED/ABORT begin.")
						     (rmt:test-set-state-status run-id test-id "COMPLETED" "ABORT" "received kill signal")
                                                     (print "set test to COMPLETED/ABORT complete.")
						     (print "Killed by signal " signum ". Exiting")
						     (exit 1))))
				 (th2 (make-thread (lambda ()
						     (thread-sleep! 20)
						     (debug:print 0 *default-log-port* "Done")
						     (exit 4)))))
			     (thread-start! th2)
			     (thread-start! th1)
			     (thread-join! th2)))))
	    (set-signal-handler! signal/int sighand)
	    (set-signal-handler! signal/term sighand)
	    ) ;; (set-signal-handler! signal/stop sighand)
	  
	  ;; Do not run the test if it is REMOVING, RUNNING, KILLREQ or REMOTEHOSTSTART,
	  ;; Mark the test as REMOTEHOSTSTART *IMMEDIATELY*
	  ;;
	  (let* ((test-info (rmt:get-test-info-by-id run-id test-id))
		 (test-host (if test-info
				(db:test-get-host        test-info)
				(begin
				  (debug:print 0 *default-log-port* "ERROR: failed to find a record for test-id " test-id ", exiting.")
				  (exit))))
		 (test-pid  (db:test-get-process_id  test-info)))
	    (cond
             ;; -mrw- I'm removing KILLREQ from this list so that a test in KILLREQ state is treated as a "do not run" flag.
	     ((member (db:test-get-state test-info) '("INCOMPLETE" "KILLED" "UNKNOWN" "STUCK")) ;; prior run of this test didn't complete, go ahead and try to rerun
	      (debug:print 0 *default-log-port* "INFO: test is INCOMPLETE or KILLED, treat this execute call as a rerun request")
	      ;; (tests:test-force-state-status! run-id test-id "REMOTEHOSTSTART" "n/a")

              (rmt:general-call 'set-test-start-time #f test-id)
              (rmt:test-set-state-status run-id test-id "REMOTEHOSTSTART" "n/a" #f)
	      ) ;; prime it for running
	     ((member (db:test-get-state test-info) '("RUNNING" "REMOTEHOSTSTART"))
	      (if (process:alive-on-host? test-host test-pid)
		  (debug:print-error 0 *default-log-port* "test state is "  (db:test-get-state test-info) " and process " test-pid " is still running on host " test-host ", cannot proceed")
		  (exit)))
	     ((not (member (db:test-get-state test-info) '("REMOVING" "REMOTEHOSTSTART" "RUNNING" "KILLREQ")))
	      ;; (tests:test-force-state-status! run-id test-id "REMOTEHOSTSTART" "n/a")
              (rmt:general-call 'set-test-start-time #f test-id)
	      (rmt:test-set-state-status run-id test-id "REMOTEHOSTSTART" "n/a" #f)
	      )
	     (else ;; (member (db:test-get-state test-info) '("REMOVING" "REMOTEHOSTSTART" "RUNNING" "KILLREQ"))
	      (debug:print-error 0 *default-log-port* "test state is " (db:test-get-state test-info) ", cannot proceed")
	      (exit))))

          ;; cleanup prior execution's steps
          (rmt:delete-steps-for-test! run-id test-id)
          
	  (debug:print 2 *default-log-port* "Executing " test-name " (id: " test-id ") on " (get-host-name))
	  (set! keys       (rmt:get-keys))
	  ;; (runs:set-megatest-env-vars run-id inkeys: keys inkeyvals: keyvals) ;; these may be needed by the launching process
	  ;; one of these is defunct/redundant ...
	  (if (not (launch:setup force-reread: #t))
	      (begin
		(debug:print 0 *default-log-port* "Failed to setup, exiting") 
		;; (sqlite3:finalize! db)
		;; (sqlite3:finalize! tdb)
		(exit 1)))
          ;; validate that the test run area is available
          (check-work-area)
          
          ;; still need to go back to run area home for next couple steps
	  (change-directory *toppath*) 

	  ;; NOTE: Current order is to process runconfigs *before* setting the MT_ vars. This 
	  ;;       seems non-ideal but could well break stuff
	  ;;    BUG? BUG? BUG?
	  
	  (let ((rconfig (full-runconfigs-read)) ;; (read-config (conc  *toppath* "/runconfigs.config") #f #t sections: (list "default" target))))
		(wconfig (read-config "waivers.config" #f #t sections: `( "default" ,target )))) ;; read the waivers config if it exists
	    ;; (setup-env-defaults (conc *toppath* "/runconfigs.config") run-id (make-hash-table) keyvals target)
	    ;; (set-run-config-vars run-id keyvals target) ;; (db:get-target db run-id))
	    ;; Now have runconfigs data loaded, set environment vars
	    (for-each
	     (lambda (section)
	       (for-each
		(lambda (varval)
		  (let ((var (car varval))
			(val (cadr varval)))
		    (if (and (string? var)(string? val))
			(begin
			  (safe-setenv var (config:eval-string-in-environment val))) ;; val)
			(debug:print-error 0 *default-log-port* "bad variable spec, " var "=" val))))
		(configf:get-section rconfig section)))
	     (list "default" target)))
          ;;(bb-check-path msg: "launch:execute post block 1")

	  ;; NFS might not have propagated the directory meta data to the run host - give it time if needed
	  (let loop ((count 0))
	    (if (or (common:file-exists? work-area)
		    (> count 10))
		(change-directory work-area)
		(begin
		  (debug:print 0 *default-log-port* "INFO: Not starting job yet - directory " work-area " not found")
		  (thread-sleep! 10)
		  (loop (+ count 1)))))

          ;; now we can switch to the work-area?
          (change-directory work-area)
          ;;(bb-check-path msg: "launch:execute post block 1.5")
	  ;; (change-directory work-area) 
	  (set! keyvals    (keys:target->keyval keys target))
	  ;; apply pre-overrides before other variables. The pre-override vars must not
	  ;; clobbers things from the official sources such as megatest.config and runconfigs.config
	  (if (string? set-vars)
	      (let ((varpairs (string-split set-vars ",")))
		(debug:print 4 *default-log-port* "varpairs: " varpairs)
		(map (lambda (varpair)
		       (let ((varval (string-split varpair "=")))
			 (if (eq? (length varval) 2)
			     (let ((var (car varval))
				   (val (cadr varval)))
			       (debug:print 1 *default-log-port* "Adding pre-var/val " var " = " val " to the environment")
			       (setenv var val)))))
		     varpairs)))
          ;;(bb-check-path msg: "launch:execute post block 2")
	  (for-each
	   (lambda (varval)
	     (let ((var (car varval))
		   (val (cadr varval)))
	       (if val
		   (setenv var val)
		   (begin
		     (debug:print-error 0 *default-log-port* "required variable " var " does not have a valid value. Exiting")
		     (exit)))))
	     (list 
	      (list  "MT_TEST_RUN_DIR" work-area)
	      (list  "MT_TEST_NAME" test-name)
	      (list  "MT_ITEM_INFO" (conc itemdat))
	      (list  "MT_ITEMPATH"  item-path)
	      (list  "MT_RUNNAME"   runname)
	      (list  "MT_MEGATEST"  megatest)
	      (list  "MT_TARGET"    target)
	      (list  "MT_LINKTREE"  (common:get-linktree)) ;; (configf:lookup *configdat* "setup" "linktree"))
	      (list  "MT_TESTSUITENAME" (common:get-area-name *alldat*))))
          ;;(bb-check-path msg: "launch:execute post block 3")

	  (if mt-bindir-path (setenv "PATH" (conc (getenv "PATH") ":" mt-bindir-path)))
          ;;(bb-check-path msg: "launch:execute post block 4")
	  ;; (change-directory top-path)
	  ;; Can setup as client for server mode now
	  ;; (client:setup)

	  
	  ;; environment overrides are done *before* the remaining critical envars.
	  (alist->env-vars env-ovrd)
          ;;(bb-check-path msg: "launch:execute post block 41")
	  (runs:set-megatest-env-vars run-id inkeys: keys inkeyvals: keyvals)
          ;;(bb-check-path msg: "launch:execute post block 42")
	  (set-item-env-vars itemdat)
          ;;(bb-check-path msg: "launch:execute post block 43")
          (let ((blacklist (configf:lookup *configdat* "setup" "blacklistvars")))
            (if blacklist
                (save-environment-as-files "megatest" ignorevars: (string-split blacklist))
                (save-environment-as-files "megatest")))
          ;;(bb-check-path msg: "launch:execute post block 44")
	  ;; open-run-close not needed for test-set-meta-info
	  ;; (tests:set-full-meta-info #f test-id run-id 0 work-area)
	  ;; (tests:set-full-meta-info test-id run-id 0 work-area)
	  (tests:set-full-meta-info #f test-id run-id 0 work-area 10)

	  ;; (thread-sleep! 0.3) ;; NFS slowness has caused grief here

	  (if (args:get-arg "-xterm")
	      (set! fullrunscript "xterm")
	      (if (and fullrunscript 
		       (common:file-exists? fullrunscript)
		       (not (file-execute-access? fullrunscript)))
		  (system (conc "chmod ug+x " fullrunscript))))

	  ;; We are about to actually kick off the test
	  ;; so this is a good place to remove the records for 
	  ;; any previous runs
	  ;; (db:test-remove-steps db run-id testname itemdat)
	  ;; now is also a good time to write the .testconfig file
	  (let* ((tconfig-fname   (conc work-area "/.testconfig"))
		 (tconfig-tmpfile (conc tconfig-fname ".tmp"))
		 (tconfig         (tests:get-testconfig test-name item-path tconfigreg #t force-create: #t))) ;; 'return-procs)))
	    (configf:write-alist tconfig tconfig-tmpfile)
	    (file-move tconfig-tmpfile tconfig-fname #t))
	  ;; 
	  (let* ((m            (make-mutex))
		 (kill-job?    #f)
		 (exit-info    (make-launch:einf pid: #t exit-status: #t exit-code: #t rollup-status: 0)) ;; pid exit-status exit-code (i.e. process was successfully run) rollup-status
		 (job-thread   #f)
		 ;; (keep-going   #t)
		 (misc-flags   (let ((ht (make-hash-table)))
				 (hash-table-set! ht 'keep-going #t)
				 ht))
		 (runit        (lambda ()
				 (launch:manage-steps run-id test-id item-path fullrunscript ezsteps subrun test-name tconfigreg exit-info m)))
		 (monitorjob   (lambda ()
				 (launch:monitor-job  run-id test-id item-path fullrunscript ezsteps test-name tconfigreg exit-info m work-area runtlim misc-flags)))
		 (th1          (make-thread monitorjob "monitor job"))
		 (th2          (make-thread runit "run job")))
	    (set! job-thread th2)
	    (thread-start! th1)
	    (thread-start! th2)
	    (thread-join! th2)
	    (debug:print-info 0 *default-log-port* "Megatest exectute of test " test-name ", item path " item-path " complete. Notifying the db ...")
	    (hash-table-set! misc-flags 'keep-going #f)
	    (thread-join! th1)
	    (thread-sleep! 1)       ;; givbe thread th1 a chance to be done TODO: Verify this is needed. At 0.1 I was getting fail to stop, increased to total of 1.1 sec.
	    (mutex-lock! m)
	    (let* ((item-path (item-list->path itemdat))
		   ;; only state and status needed - use lazy routine
		   (testinfo  (rmt:get-testinfo-state-status run-id test-id)))
	      ;; Am I completed?
	      (if (member (db:test-get-state testinfo) '("REMOTEHOSTSTART" "RUNNING")) ;; NOTE: It should *not* be REMOTEHOSTSTART but for reasons I don't yet understand it sometimes gets stuck in that state ;; (not (equal? (db:test-get-state testinfo) "COMPLETED"))
		  (let ((new-state  (if kill-job? "KILLED" "COMPLETED") ;; (if (eq? (vector-ref exit-info 2) 0) ;; exited with "good" status
				                                        ;; "COMPLETED"							                ;; (db:test-get-state testinfo)))   ;; else preseve the state as set within the test
				    )
			(new-status (cond
				     ((not (launch:einf-exit-status exit-info)) "FAIL") ;; job failed to run ... (vector-ref exit-info 1)
				     ((eq? (launch:einf-rollup-status exit-info) 0)     ;; (vector-ref exit-info 3)
				      ;; if the current status is AUTO then defer to the calculated value (i.e. leave this AUTO)
				      (if (equal? (db:test-get-status testinfo) "AUTO") "AUTO" "PASS"))
				     ((eq? (launch:einf-rollup-status exit-info) 1) "FAIL")  ;; (vector-ref exit-info 3)
				     ((eq? (launch:einf-rollup-status exit-info) 2)	     ;;	(vector-ref exit-info 3)
				      ;; if the current status is AUTO the defer to the calculated value but qualify (i.e. make this AUTO-WARN)
				      (if (equal? (db:test-get-status testinfo) "AUTO") "AUTO-WARN" "WARN"))
				     ((eq? (launch:einf-rollup-status exit-info) 3) "CHECK")
				     ((eq? (launch:einf-rollup-status exit-info) 4) "WAIVED")
				     ((eq? (launch:einf-rollup-status exit-info) 5) "ABORT")
				     ((eq? (launch:einf-rollup-status exit-info) 6) "SKIP")
				     (else "FAIL")))) ;; (db:test-get-status testinfo)))
		    (debug:print-info 1 *default-log-port* "Test exited in state=" (db:test-get-state testinfo) ", setting state/status based on exit code of " (launch:einf-exit-status exit-info) " and rollup-status of " (launch:einf-rollup-status exit-info))
		    (tests:test-set-status! run-id 
					    test-id 
					    new-state
					    new-status
					    (args:get-arg "-m") #f)
		    ;; need to update the top test record if PASS or FAIL and this is a subtest
		    ;; NO NEED TO CALL set-state-status-and-roll-up-items HERE, THIS IS DONE IN set-state-status-and-roll-up-items called by tests:test-set-status!
		    ))
	      ;; for automated creation of the rollup html file this is a good place...
	      (if (not (equal? item-path ""))
		  (tests:summarize-items run-id test-id test-name #f))
	      (tests:summarize-test run-id test-id)  ;; don't force - just update if no
	      (rmt:update-run-stats run-id (rmt:get-raw-run-stats run-id)))
	    (mutex-unlock! m)
            (launch:end-of-run-check run-id )
	    (debug:print 2 *default-log-port* "Output from running " fullrunscript ", pid " (launch:einf-pid exit-info) " in work area " 
			 work-area ":\n====\n exit code " (launch:einf-exit-code exit-info) "\n" "====\n")
	    (if (not (launch:einf-exit-status exit-info))
		(exit 4))))
        )))

;; Spec for End of test
;; At end of each test call, after marking self as COMPLETED do run-state-status-rollup
;; At transition to run COMPLETED/X do hooks
;; Definition: test_dead if event_time + duration + 1 minute? < current_time AND
;; we can prove the process is not alive (ssh host pstree -A pid)
;; if dead safe to mark the test as killed in the db
;; State/status table
;; new
;; 100% COMPLETED/ (PASS,FAIL,ABORT etc.) ==> COMPLETED / X where X is same as itemized rollup
;; > 3 RUNNING with not test_dead do nothing (run should already be RUNNING/ na
;; > 0 RUNNING and test_dead then send KILLREQ ==> COMPLETED
;; 0 RUNNING ==> this is actually the first condition, should not get here

(define (launch:end-of-run-check run-id )
    (let*	((not-completed-cnt (rmt:get-not-completed-cnt run-id))  
           (running-cnt (rmt:get-count-tests-running-for-run-id run-id))
           (all-test-launched (rmt:get-var (conc "lunch-complete-" run-id)))
           (current-state (rmt:get-run-state run-id))
           (current-status (rmt:get-run-status run-id)))
     ;;get-vars run-id to query metadata table to check if all completed. if all-test-launched = yes then only not-completed-cnt = 0 means everyting is completed if no entry found in the table do nothing 
     (debug:print 0 *default-log-port* "rollup run state/status")                      
     (rmt:set-state-status-and-roll-up-run  run-id current-state current-status)
 
     (cond 
       ((and all-test-launched (eq? not-completed-cnt 0) (equal? all-test-launched "yes" ))
           	(debug:print 0 *default-log-port* "look for  post hook.")
          	(runs:run-post-hook run-id))
        ((> running-cnt 3) 
        	  (debug:print 0 *default-log-port* "There are " running-cnt " tests running." ))
        ((> running-cnt 0)
            (debug:print 0 *default-log-port* "running cnt > 0 but <= 3 kill-running-tests-if-dead" )
   				  (let ((kill-cnt (launch:kill-tests-if-dead run-id)))
           			(if (and all-test-launched  (equal? all-test-launched "yes") (eq? kill-cnt running-cnt))
           					(launch:end-of-run-check run-id)))) ;;todo
        (else  (debug:print 0 *default-log-port* "Should it get here?? May be everything is not launched yet. Running test cnt:" running-cnt " Not completed test cnt:" not-completed-cnt)
         (let* ((not-completed-tests (rmt:get-tests-for-run run-id "%" `("NOT_STARTED" "RUNNING" "LAUNCHED" "REMOTEHOSTSTART") `() #f #f #f #f #f #f #f #f)))
       (if (> (length not-completed-tests) 0) 
           (let loop ((running-test (car not-completed-tests))
			     (tal    (cdr not-completed-tests)))
		       (let* ((test-name (vector-ref running-test 2))
                 (item-path (vector-ref running-test 11)))
			       	(debug:print 0 *default-log-port* "test " test-name "/" item-path " not completed")
              (if (not (null? tal))
				  (loop (car tal) (cdr tal)))))))))))        
 
(define (launch:is-test-alive host pid)
(if (and host pid (not (equal? host "n/a")))
(let* ((cmd (conc "ssh " host " pstree -A " pid))
      (output (with-input-from-pipe cmd read-lines)))
  (print "cmd: " cmd "\n op: " output )
  (if(eq? (length output) 0)
     #f
     #t))
#t))
 
(define (launch:kill-tests-if-dead run-id)
  (let* ((running-tests (rmt:get-tests-for-run run-id "%" `("RUNNING" "LAUNCHED" "REMOTEHOSTSTART") `() #f #f #f #f #f #f #f #f)))
       (let loop ((running-test (car running-tests))
			     (tal    (cdr running-tests))
			     (kill-cnt 0))
		       (let* ((test-name (vector-ref running-test 2))
                 (item-path (vector-ref running-test 11))
								 (test-id (vector-ref running-test 0))
                 (host (vector-ref running-test 6))
                 (pid  (rmt:test-get-top-process-pid run-id test-id))   
                 (event-time (vector-ref running-test 5))
                 (duration (vector-ref running-test 12))
                 (flag 0)   
                 (curr-time (current-seconds)))
       (if (and (< (+ event-time duration 600) curr-time) (not (launch:is-test-alive host pid))) ;;test has not updated duration in last 10 min then likely its not running but confirm before marking it as killed
           (begin    
			       	(debug:print 0 *default-log-port* "test " test-name "/" item-path " needs to be killed")
              (set! flag 1) 
              (rmt:set-state-status-and-roll-up-items run-id test-name item-path "KILLREQ" "n/a" #f)))
               (if (not (null? tal))
				  (loop (car tal) (cdr tal) (+ kill-cnt flag))
                 (+ kill-cnt flag))))))

;; DO NOT USE - caching of configs is handled in launch:setup now.
;;
(define (launch:cache-config)
  ;; if we have a linktree and -runtests and -target and the directory exists dump the config
  ;; to megatest-(current-seconds).cfg and symlink it to megatest.cfg
  (if (and *configdat* 
	   (or (args:get-arg "-run")
	       (args:get-arg "-runtests")
	       (args:get-arg "-execute")))
      (let* ((linktree (common:get-linktree)) ;; (get-environment-variable "MT_LINKTREE"))
	     (target   (common:args-get-target exit-if-bad: #t))
	     (runname  (or (args:get-arg "-runname")
			   (args:get-arg ":runname")
			   (getenv "MT_RUNNAME")))
	     (fulldir  (conc linktree "/"
			     target "/"
			     runname)))
	(if (and linktree (common:file-exists? linktree)) ;; can't proceed without linktree
	    (begin
	      (debug:print-info 0 *default-log-port* "Have -run with target=" target ", runname=" runname ", fulldir=" fulldir ", testpatt=" (or (args:get-arg "-testpatt") "%"))
	      (if (not (common:file-exists? fulldir))
		  (create-directory fulldir #t)) ;; need to protect with exception handler 
	      (if (and target
		       runname
		       (common:file-exists? fulldir))
		  (let ((tmpfile  (conc fulldir "/.megatest.cfg." (current-seconds)))
			(targfile (conc fulldir "/.megatest.cfg-"  megatest-version "-" megatest-fossil-hash))
			(rconfig  (conc fulldir "/.runconfig." megatest-version "-" megatest-fossil-hash)))
		    (if (common:file-exists? rconfig) ;; only cache megatest.config AFTER runconfigs has been cached
			(begin
			  (debug:print-info 0 *default-log-port* "Caching megatest.config in " tmpfile)
                          (if (not (common:in-running-test?))
                              (configf:write-alist *configdat* tmpfile))
			  (system (conc "ln -sf " tmpfile " " targfile))))
		    )))
	    (debug:print-info 1 *default-log-port* "No linktree yet, no caching configs.")))))


;; gather available information, if legit read configs in this order:
;;
;;   if have cache;
;;      read it a return it
;;   else
;;     megatest.config     (do not cache)
;;     runconfigs.config   (cache if all vars avail)
;;     megatest.config     (cache if all vars avail)
;;   returns:
;;     *toppath*
;;   side effects:
;;     sets; *configdat*    (megatest.config info)
;;           *runconfigdat* (runconfigs.config info)
;;           *configstatus* (status of the read data)
;;
(define (launch:setup #!key (force-reread #f) (areapath #f))
  (mutex-lock! *launch-setup-mutex*)
  (if (and *toppath*
	   (eq? *configstatus* 'fulldata) (not force-reread)) ;; got it all
      (begin
	(debug:print 2 *default-log-port* "NOTE: skipping launch:setup-body call since we have fulldata")
	(mutex-unlock! *launch-setup-mutex*)
	*toppath*)
      (let ((res (launch:setup-body force-reread: force-reread areapath: areapath)))
	(mutex-unlock! *launch-setup-mutex*)
	res)))

;; return paths depending on what info is available.
;;
(define (launch:get-cache-file-paths areapath toppath target mtconfig)
  (let* ((use-cache (common:use-cache?))
         (runname  (common:args-get-runname))
         (linktree (common:get-linktree))
         (testname (common:get-full-test-name))
         (rundir   (if (and runname target linktree)
                       (common:directory-writable? (conc linktree "/" target "/" runname))
                       #f))
         (testdir  (if (and rundir testname)
                       (common:directory-writable? (conc rundir "/" testname))
                       #f))
         (cachedir (or testdir rundir))
         (mtcachef (and cachedir (conc cachedir "/" ".megatest.cfg-"  megatest-version "-" megatest-fossil-hash)))
         (rccachef (and cachedir (conc cachedir "/" ".runconfigs.cfg-"  megatest-version "-" megatest-fossil-hash))))
    (debug:print-info 6 *default-log-port* 
                      "runname=" runname 
                      "\n  linktree=" linktree
                      "\n  testname=" testname
                      "\n  rundir=" rundir 
                      "\n  testdir=" testdir 
                      "\n  cachedir=" cachedir
                      "\n  mtcachef=" mtcachef
                      "\n  rccachef=" rccachef)
    (cons mtcachef rccachef)))

(define (launch:setup-body #!key (force-reread #f) (areapath #f))
  (if (and (eq? *configstatus* 'fulldata)
	   *toppath*
	   (not force-reread)) ;; no need to reprocess
      *toppath*   ;; return toppath
      (let* ((use-cache (common:use-cache?))  ;; BB- use-cache checks
					      ;; *configdat* for
					      ;; use-cache setting.
					      ;; We do not have
					      ;; *configdat*.
					      ;; Bootstrapping problem
					      ;; here.
	     (toppath  (or *toppath* areapath (getenv "MT_RUN_AREA_HOME"))) ;; preserve toppath
	     (target   (common:args-get-target))
	     (sections (if target (list "default" target) #f)) ;; for runconfigs
	     (mtconfig (or (args:get-arg "-config") "megatest.config")) ;; allow overriding megatest.config 
             (cachefiles (launch:get-cache-file-paths areapath toppath target mtconfig))
	     ;; checking for null cachefiles should not be necessary,
	     ;; I was seeing error car of '(), might be a chicken bug
	     ;; or a red herring ...
	     (mtcachef   (if (null? cachefiles)
			     #f
			     (car cachefiles))) ;; (and cachedir (conc
						;; cachedir "/"
						;; ".megatest.cfg-"
						;; megatest-version
						;; "-"
						;; megatest-fossil-hash)))
	     (rccachef   (if (null? cachefiles)
			     #f
			     (cdr cachefiles)))) ;; (and cachedir
						 ;; (conc cachedir "/"
						 ;; ".runconfigs.cfg-"
						 ;; megatest-version
						 ;; "-"
						 ;; megatest-fossil-hash)))
						 ;; (cancreate (and
						 ;; cachedir
						 ;; (common:file-exists?
						 ;; cachedir)(file-write-access?
						 ;; cachedir) (not
						 ;; (common:in-running-test?)))))
	(set! *toppath* toppath) ;; This is needed when we are running
				 ;; as a test using CMDINFO as a
				 ;; datasource (BB> "launch:setup-body
				 ;; -- cachefiles="cachefiles)
	(cond
	 ;; if mtcachef exists just read it, however we need to assume
	 ;; toppath is available in $MT_RUN_AREA_HOME
	 ((and (not force-reread)
	       mtcachef  rccachef
	       use-cache
	       (get-environment-variable "MT_RUN_AREA_HOME")
	       (common:file-exists? mtcachef)
	       (common:file-exists? rccachef))
          ;;(BB> "launch:setup-body -- cond branch 1 - use-cache")
          (set! *configdat*    (configf:read-alist mtcachef))
          ;;(BB> "launch:setup-body -- 1 set! *configdat*="*configdat*)
	  (set! *runconfigdat* (configf:read-alist rccachef))
	  (set! *configinfo*   (list *configdat*  (get-environment-variable "MT_RUN_AREA_HOME")))
	  (set! *configstatus* 'fulldata)
	  (set! *toppath*      (get-environment-variable "MT_RUN_AREA_HOME"))
	  *toppath*)
	 ;; there are no existing cached configs, do full reads of the
	 ;; configs and cache them we have all the info needed to
	 ;; fully process runconfigs and megatest.config
	 ((and ;; (not force-reread) ;; force-reread is irrelevant in the AND, could however OR it?
	       mtcachef
	       rccachef) ;; BB- why are we doing this without asking if caching is desired?
          ;;(BB> "launch:setup-body -- cond branch 2")
	  (let* ((first-pass    (find-and-read-config        ;; NB// sets MT_RUN_AREA_HOME as side effect
				 mtconfig
				 environ-patt: "env-override"
				 given-toppath: toppath
				 pathenvvar: "MT_RUN_AREA_HOME"))
		 (first-rundat  (let ((toppath (if toppath 
						   toppath
						   (car first-pass))))
				  (read-config ;; (conc toppath "/runconfigs.config") ;; this should be converted to runconfig:read but it is non-trivial, leaving it for now.
				   (conc (if (string? toppath)
					     toppath
					     (get-environment-variable "MT_RUN_AREA_HOME"))
					 "/runconfigs.config")
				   *runconfigdat* #t 
				   sections: sections))))
	    (set! *runconfigdat* first-rundat)
	    (if first-pass  ;; 
		(begin
                  ;;(BB> "launch:setup-body -- \"first-pass\"=first-pass")
		  (set! *configdat*  (car first-pass))
                  ;;(BB> "launch:setup-body -- 2 set! *configdat*="*configdat*)
		  (set! *configinfo* first-pass)
		  (set! *toppath*    (or toppath (cadr first-pass))) ;; use the gathered data unless already have it
		  (set! toppath      *toppath*)
		  (if (not *toppath*)
		      (begin
			(debug:print-error 0 *default-log-port* "you are not in a megatest area!")
			(exit 1)))
		  (setenv "MT_RUN_AREA_HOME" *toppath*)
		  ;; the seed read is done, now read runconfigs, cache it then read megatest.config one more time and cache it
		  (let* ((keys         (rmt:get-keys))
			 (key-vals     (keys:target->keyval keys target))
			 (linktree     (common:get-linktree)) ;; (or (getenv "MT_LINKTREE")(if *configdat* (configf:lookup *configdat* "setup" "linktree") #f)))
					;     (if *configdat*
					; 	   (configf:lookup *configdat* "setup" "linktree")
					; 	   (conc *toppath* "/lt"))))
			 (second-pass  (find-and-read-config
					mtconfig
					environ-patt: "env-override"
					given-toppath: toppath
					pathenvvar: "MT_RUN_AREA_HOME"))
			 (runconfigdat (begin     ;; this read of the runconfigs will see any adjustments made by re-reading megatest.config
					 (for-each (lambda (kt)
						     (setenv (car kt) (cadr kt)))
						   key-vals)
					 (read-config (conc toppath "/runconfigs.config") *runconfigdat* #t ;; consider using runconfig:read some day ...
						      sections: sections)))
                         (cachefiles   (launch:get-cache-file-paths areapath toppath target mtconfig))
                         (mtcachef     (car cachefiles))
                         (rccachef     (cdr cachefiles)))
                    ;;  trap exception due to stale NFS handle -- Error: (open-output-file) cannot open file - Stale NFS file handle: "/p/fdk/gwa/lefkowit/mtTesting/qa/primbeqa/links/p1222/11/PDK_r1.1.1/prim/clean/pcell_testgen/.runconfigs.cfg-1.6427-7d1e789cb3f62f9cde719a4865bb51b3c17ea853" - ticket 220546342
                    ;; TODO - consider 1) using simple-lock to bracket cache write
                    ;;                 2) cache in hash on server, since need to do rmt: anyway to lock.

		    (if rccachef
                        (common:fail-safe
                         (lambda ()
                           (configf:write-alist runconfigdat rccachef))
                         (conc "Could not write cache file - "rccachef)))
                    (if mtcachef
                        (common:fail-safe
                         (lambda ()
                           (configf:write-alist *configdat* mtcachef))
                         (conc "Could not write cache file - "mtcachef)))
		    (set! *runconfigdat* runconfigdat)
		    (if (and rccachef mtcachef) (set! *configstatus* 'fulldata))))
		;; no configs found? should not happen but let's try to recover gracefully, return an empty hash-table
		(set! *configdat* (make-hash-table))
		)))

	 ;; else read what you can and set the flag accordingly
	 ;; here we don't have either mtconfig or rccachef
	 (else
          ;;(BB> "launch:setup-body -- cond branch 3 - else")
	  (let* ((cfgdat   (find-and-read-config 
			    (or (args:get-arg "-config") "megatest.config")
			    environ-patt: "env-override"
			    given-toppath: (get-environment-variable "MT_RUN_AREA_HOME")
			    pathenvvar: "MT_RUN_AREA_HOME")))

            (if (and cfgdat (list? cfgdat) (> (length cfgdat) 0) (hash-table? (car cfgdat)))
		(let* ((toppath  (or (get-environment-variable "MT_RUN_AREA_HOME")(cadr cfgdat)))
		       (rdat     (read-config (conc toppath  ;; convert this to use runconfig:read!
						    "/runconfigs.config") *runconfigdat* #t sections: sections)))
		  (set! *configinfo*   cfgdat)
		  (set! *configdat*    (car cfgdat))
		  (set! *runconfigdat* rdat)
		  (set! *toppath*      toppath)
		  (set! *configstatus* 'partial)
		  ;; set up as many vars in *alldat* as possible here
		  (alldat-areapath-set! *alldat* toppath)
		  (alldat-log-port-set! *alldat* *default-log-port*)
		  (alldat-mtconfig-set! *alldat* *configdat*)
		  
		  )
		(begin
		  (debug:print-error 0 *default-log-port* "No " mtconfig " file found. Giving up.")
		  (exit 2))))))
	;; COND ends here.
	
	;; additional house keeping
	(let* ((linktree (or (common:get-linktree)
			     (conc *toppath* "/lt"))))
	  (if linktree
	      (begin
		(if (not (common:file-exists? linktree))
		    (begin
		      (handle-exceptions
			  exn
			  (begin
			    (debug:print-error 0 *default-log-port* "Something went wrong when trying to create linktree dir at " linktree)
			    (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))
			    (exit 1))
			(create-directory linktree #t))))
		(handle-exceptions
		    exn
		    (begin
		      (debug:print-error 0 *default-log-port* "Something went wrong when trying to create link to linktree at " *toppath*)
		      (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)))
		  (let ((tlink (conc *toppath* "/lt")))
		    (if (not (common:file-exists? tlink))
			(create-symbolic-link linktree tlink)))))
	      (begin
		(debug:print-error 0 *default-log-port* "linktree not defined in [setup] section of megatest.config")
		)))
	(if (and *toppath*
		 (directory-exists? *toppath*))
	    (begin
	      (setenv "MT_RUN_AREA_HOME" *toppath*)
	      (setenv "MT_TESTSUITENAME" (common:get-area-name *alldat*)))
	    (begin
	      (debug:print-error 0 *default-log-port* "failed to find the top path to your Megatest area.")
	      (set! *toppath* #f) ;; force it to be false so we return #f
	      #f))
	
        ;; one more attempt to cache the configs for future reading
        (let* ((cachefiles   (launch:get-cache-file-paths areapath toppath target mtconfig))
               (mtcachef     (car cachefiles))
               (rccachef     (cdr cachefiles)))

          ;; trap exception due to stale NFS handle -- Error: (open-output-file) cannot open file - Stale NFS file handle: "...somepath.../.runconfigs.cfg-1.6427-7d1e789cb3f62f9cde719a4865bb51b3c17ea853" - ticket 220546342
          ;; TODO - consider 1) using simple-lock to bracket cache write
          ;;                 2) cache in hash on server, since need to do rmt: anyway to lock.
          (if (and rccachef *runconfigdat* (not (common:file-exists? rccachef)))
              (common:fail-safe
               (lambda ()
                 (configf:write-alist *runconfigdat* rccachef))
               (conc "Could not write cache file - "rccachef))
              )
          (if (and mtcachef *configdat*    (not (common:file-exists? mtcachef)))
              (common:fail-safe
               (lambda ()
                 (configf:write-alist *configdat* mtcachef))
               (conc "Could not write cache file - "mtcachef))
              )
          (if (and rccachef mtcachef *runconfigdat* *configdat*)
              (set! *configstatus* 'fulldata)))

	;; if have -append-config then read and append here
	(let ((cfname (args:get-arg "-append-config")))
	  (if (and cfname
		   (file-read-access? cfname))
	      (read-config cfname *configdat* #t))) ;; values are added to the hash, no need to do anything special.
	*toppath*)))

(define (get-best-disk confdat testconfig)
  (let* ((disks   (or (and testconfig (hash-table-ref/default testconfig "disks" #f))
		      (hash-table-ref/default confdat "disks" #f)))
	 (minspace (let ((m (configf:lookup confdat "setup" "minspace")))
		     (string->number (or m "10000")))))
    (if disks 
	(let ((res (common:get-disk-with-most-free-space disks minspace))) ;; min size of 1000, seems tad dumb
	  (if res
	      (cdr res)
	      (begin
;;		(if (common:low-noise-print 20 "No valid disks or no disk with enough space")
;;		    (debug:print-error 0 *default-log-port* "No valid disks found in megatest.config. Please add some to your [disks] section and ensure the directory exists and has enough space!\n    You can change minspace in the [setup] section of megatest.config. Current setting is: " minspace))
		;;(exit 1)
                 (if (null? disks)
                     (cons 1 (conc *toppath* "/runs"))
                     (let ((paths (sort disks (lambda (x y) (> (string-length (cadr x)) (string-length (cadr y)))))))
                       (let loop ((head (car paths)) (tail (cdr paths)))
                         (let ((result (handle-exceptions exn #f (create-directory (cadr head) #t))))
                           (if result
                               result
                               (if (null? tail)
                                   (cons 1 (conc *toppath* "/runs"))
                                   (loop (car tail) (cdr tail)))))))))))))) ;; the code creates the necessary directories if it does not exist and returns the path.


(define (launch:test-copy test-src-path test-path)
  (let* ((ovrcmd (let ((cmd (config-lookup *configdat* "setup" "testcopycmd")))
		   (if cmd
		       ;; substitute the TEST_SRC_PATH and TEST_TARG_PATH
		       (string-substitute "TEST_TARG_PATH" test-path
					  (string-substitute "TEST_SRC_PATH" test-src-path cmd #t) #t)
		       #f)))
	 (cmd    (if ovrcmd 
		     ovrcmd
		     (conc "rsync -av" (if (debug:debug-mode 1) "" "q") " " test-src-path "/ " test-path "/"
			   " >> " test-path "/mt_launch.log 2>> " test-path "/mt_launch.log")))
	 (status (system cmd)))
    (if (not (eq? status 0))
	(debug:print 2 *default-log-port* "ERROR: problem with running \"" cmd "\""))))


;; Desired directory structure:
;;
;;  <linkdir> - <target> - <testname> -.
;;                                     |
;;                                     v
;;  <rundir>  -  <target>  -    <testname> -|- <itempath(s)>
;;
;;  dir stored in test is:
;; 
;;  <linkdir> - <target> - <testname> [ - <itempath> ]
;; 
;; All log file links should be stored relative to the top of link path
;;  
;; <target> - <testname> [ - <itempath> ] 
;;
(define (create-work-area run-id run-info keyvals test-id test-src-path disk-path testname itemdat #!key (remtries 2))
  (let* ((item-path (if (string? itemdat) itemdat (item-list->path itemdat))) ;; if pass in string - just use it
	 (runname   (if (string? run-info) ;; if we pass in a string as run-info use it as run-name.
			run-info
			(db:get-value-by-header (db:get-rows run-info)
						(db:get-header run-info)
						"runname")))
	 (contour   #f) ;; NOT READY FOR THIS (args:get-arg "-contour"))
	 ;; convert back to db: from rdb: - this is always run at server end
	 (target   (string-intersperse (map cadr keyvals) "/"))

	 (not-iterated  (equal? "" item-path))

	 ;; all tests are found at <rundir>/test-base or <linkdir>/test-base
	 (testtop-base (conc target "/" runname "/" testname))
	 (test-base    (conc testtop-base (if not-iterated "" "/") item-path))

	 ;; nb// if itempath is not "" then it is prefixed with "/"
	 (toptest-path (conc disk-path (if contour (conc "/" contour) "") "/" testtop-base))
	 (test-path    (conc disk-path (if contour (conc "/" contour) "") "/" test-base))

	 ;; ensure this exists first as links to subtests must be created there
	 (linktree  (common:get-linktree))
	 ;; WAS: (let ((rd (config-lookup *configdat* "setup" "linktree")))
	 ;;         (if rd rd (conc *toppath* "/runs"))))
	 ;; which seems wrong ...

	 (lnkbase   (conc linktree (if contour (conc "/" contour) "") "/" target "/" runname))
	 (lnkpath   (conc lnkbase "/" testname))
	 (lnkpathf  (conc lnkpath (if not-iterated "" "/") item-path))
	 (lnktarget (conc lnkpath "/" item-path)))

    ;; Update the rundir path in the test record for all, rundir=physical, shortdir=logical
    ;;                                                 rundir   shortdir
    (rmt:general-call 'test-set-rundir-shortdir run-id lnkpathf test-path testname item-path run-id)

    (debug:print 2 *default-log-port* "INFO:\n       lnkbase=" lnkbase "\n       lnkpath=" lnkpath "\n  toptest-path=" toptest-path "\n     test-path=" test-path)
    (if (not (common:file-exists? linktree))
	(begin
	  (debug:print 0 *default-log-port* "WARNING: linktree did not exist! Creating it now at " linktree)
	  (create-directory linktree #t))) ;; (system (conc "mkdir -p " linktree))))
    ;; create the directory for the tests dir links, this is needed no matter what... try up to three times
    (let loop ((done 3)) 
      (let ((success (if (and (not (common:directory-exists? lnkbase))
			      (not (common:file-exists? lnkbase)))
			 (handle-exceptions
			  exn
			  (begin
			    (debug:print-error 0 *default-log-port* "Problem creating linktree base at " lnkbase)
			    (print-error-message exn (current-error-port))
			    #t)
			  (create-directory lnkbase #t)
			  #f))))
	(if (and (not success)(> done 0))
	    (loop (- done 1)))))
    
    ;; update the toptest record with its location rundir, cache the path
    ;; This wass highly inefficient, one db write for every subtest, potentially
    ;; thousands of unnecessary updates, cache the fact it was set and don't set it 
    ;; again. 

    ;; Now create the link from the test path to the link tree, however
    ;; if the test is iterated it is necessary to create the parent path
    ;; to the iteration. use pathname-directory to trim the path by one
    ;; level
    (if (not not-iterated) ;; i.e. iterated
	(let ((iterated-parent  (pathname-directory (conc lnkpath "/" item-path))))
	  (debug:print-info 2 *default-log-port* "Creating iterated parent " iterated-parent)
	  (handle-exceptions
	   exn
	   (begin
	     (debug:print-error 0 *default-log-port* " Failed to create directory " iterated-parent ((condition-property-accessor 'exn 'message) exn) ", exiting")
	     (exit 1))
	   (create-directory iterated-parent #t))))

    (if (symbolic-link? lnkpath) 
	(handle-exceptions
	 exn
	 (begin
	   (debug:print-error 0 *default-log-port* " Failed to remove symlink " lnkpath ((condition-property-accessor 'exn 'message) exn) ", exiting")
	   (exit 1))
	 (delete-file lnkpath)))

    (if (not (or (common:file-exists? lnkpath)
		 (symbolic-link? lnkpath)))
	(handle-exceptions
	 exn
	 (begin
	   (debug:print-error 0 *default-log-port* " Failed to create symlink " lnkpath ((condition-property-accessor 'exn 'message) exn) ", exiting")
	   (exit 1))
	 (create-symbolic-link toptest-path lnkpath)))
    
    ;; NB - This was not working right - some top tests are not getting the path set!!!
    ;;
    ;; Do the setting of this record after the paths are created so that the shortdir can 
    ;; be set to the real directory location. This is safer for future clean up if the link
    ;; tree is damaged or lost.
    ;; 
    (if (not (hash-table-ref/default *toptest-paths* testname #f))
	(let* ((testinfo       (rmt:get-test-info-by-id run-id test-id)) ;;  run-id testname item-path))
	       (curr-test-path (if testinfo ;; (filedb:get-path *fdb*
							     ;; (db:get-path dbstruct
				   ;; (rmt:sdb-qry 'getstr 
				   (db:test-get-rundir testinfo) ;; ) ;; )
				   #f)))
	  (hash-table-set! *toptest-paths* testname curr-test-path)
	  ;; NB// Was this for the test or for the parent in an iterated test?
	  (rmt:general-call 'test-set-rundir-shortdir run-id lnkpath 
			    (if (common:file-exists? lnkpath)
				;; (resolve-pathname lnkpath)
				(common:nice-path lnkpath)
				lnkpath)
			    testname "" run-id)
	  ;; (rmt:general-call 'test-set-rundir run-id lnkpath testname "") ;; toptest-path)
	  (if (or (not curr-test-path)
		  (not (directory-exists? toptest-path)))
	      (begin
		(debug:print-info 2 *default-log-port* "Creating " toptest-path " and link " lnkpath)
		(handle-exceptions
		 exn
		 #f ;; don't care to catch and deal with errors here for now.
		 (create-directory toptest-path #t))
		(hash-table-set! *toptest-paths* testname toptest-path)))))

    ;; The toptest path has been created, the link to the test in the linktree has
    ;; been created. Now, if this is an iterated test the real test dir must be created
    (if (not not-iterated) ;; this is an iterated test
	(begin ;; (let ((lnktarget (conc lnkpath "/" item-path)))
	  (debug:print 2 *default-log-port* "Setting up sub test run area")
	  (debug:print 2 *default-log-port* " - creating run area in " test-path)
	  (handle-exceptions
	   exn
	   (begin
	     (debug:print-error 0 *default-log-port* " Failed to create directory " test-path ((condition-property-accessor 'exn 'message) exn) ", exiting")
	     (exit 1))
	   (create-directory test-path #t))
	  (debug:print 2 *default-log-port* 
		       " - creating link from: " test-path "\n"
		       "                   to: " lnktarget)

	  ;; If there is already a symlink delete it and recreate it.
	  (handle-exceptions
	   exn
	   (begin
	     (debug:print-error 0 *default-log-port* " Failed to re-create link " lnktarget ((condition-property-accessor 'exn 'message) exn) ", exiting")
	     (exit))
	   (if (symbolic-link? lnktarget)     (delete-file lnktarget))
	   (if (not (common:file-exists? lnktarget)) (create-symbolic-link test-path lnktarget)))))

    (if (not (directory? test-path))
	(create-directory test-path #t)) ;; this is a hack, I don't know why out of the blue this path does not exist sometimes

    (if (and test-src-path (directory? test-path))
	(begin
	  (launch:test-copy test-src-path test-path)
	  (list lnkpathf lnkpath ))
	(if (and test-src-path (> remtries 0))
	    (begin
	      (debug:print-error 0 *default-log-port* "Failed to create work area at " test-path " with link at " lnktarget ", remaining attempts " remtries)
	      ;; 
	      (create-work-area run-id run-info keyvals test-id test-src-path disk-path testname itemdat remtries: (- remtries 1)))
	    (list #f #f)))))


(define (launch:handle-zombie-tests run-id)
  (let* ((key (conc "zombiescan-runid-"run-id))
         (now (current-seconds))
         (threshold (- (current-seconds)  (* 2 (or (configf:lookup-number *configdat* "setup" "deadtime") 120))))
         (val (rmt:get-var key))
         (do-scan?
          (cond
           ((not val)
            #t)
           ((< val threshold)
            #t)
           (else #f))))
    (when do-scan?
      (debug:print 1 *default-log-port* "INFO: search and mark zombie tests")
      (rmt:set-var key (current-seconds))
      (rmt:find-and-mark-incomplete run-id #f))))





;; 1. look though disks list for disk with most space
;; 2. create run dir on disk, path name is meaningful
;; 3. create link from run dir to megatest runs area 
;; 4. remotely run the test on allocated host
;;    - could be ssh to host from hosts table (update regularly with load)
;;    - could be netbatch
;;      (launch-test db (cadr status) test-conf))
(define (launch-test test-id run-id run-info keyvals runname test-conf test-name test-path itemdat params)
  (mutex-lock! *launch-setup-mutex*) ;; setting variables and processing the testconfig is NOT thread-safe, reuse the launch-setup mutex
  (let* ( ;; (lock-key        (conc "test-" test-id))
	;; (got-lock        (let loop ((lock        (rmt:no-sync-get-lock lock-key))
	;; 			     (expire-time (+ (current-seconds) 15))) ;; give up on getting the lock and steal it after 15 seconds
	;; 		    (if (car lock)
	;; 			#t
	;; 			(if (> (current-seconds) expire-time)
	;; 			    (begin
	;; 			      (debug:print-info 0 *default-log-port* "Timed out waiting for a lock to launch test " keyvals " " runname " " test-name " " test-path)
	;; 			      (rmt:no-sync-del! lock-key) ;; destroy the lock
	;; 			      (loop (rmt:no-sync-get-lock lock-key) expire-time)) ;; 
	;; 			    (begin
	;; 			      (thread-sleep! 1)
	;; 			      (loop (rmt:no-sync-get-lock lock-key) expire-time))))))
	 (item-path       (item-list->path itemdat))
	 (contour         #f)) ;; NOT READY FOR THIS (args:get-arg "-contour")))
    (let loop ((delta        (- (current-seconds) *last-launch*))
	       (launch-delay (configf:lookup-number *configdat* "setup" "launch-delay" default: 1)))
      (if (> launch-delay delta)
	  (begin
	    (if (common:low-noise-print 1200 "test launch delay") ;; every two hours or so remind the user about launch delay.
		(debug:print-info 0 *default-log-port* "NOTE: test launches are delayed by " launch-delay " seconds. See megatest.config launch-delay setting to adjust.")) ;; launch of " test-name " for " (- launch-delay delta) " seconds"))
	    (thread-sleep! (- launch-delay delta))
	    (loop (- (current-seconds) *last-launch*) launch-delay))))
    (change-directory *toppath*)
    (alist->env-vars ;; consolidate this code with the code in megatest.scm for "-execute", *maybe* - the longer they are set the longer each launch takes (must be non-overlapping with the vars)
     (append
      (list
       (list "MT_RUN_AREA_HOME" *toppath*)
       (list "MT_TEST_NAME" test-name)
       (list "MT_RUNNAME"   runname)
       (list "MT_ITEMPATH"  item-path)
       (list "MT_CONTOUR"   contour)
       )
      itemdat))
    (let* ((tregistry       (tests:get-all)) ;; third param (below) is system-allowed
           ;; for tconfig, why do we allow fallback to test-conf?
	   (tconfig         (or (tests:get-testconfig test-name item-path tregistry #t force-create: #t)
				(begin
                                  (debug:print 0 *default-log-port* "WARNING: falling back to pre-calculated testconfig. This is likely not desired.")
                                  test-conf))) ;; force re-read now that all vars are set
	   (useshell        (let ((ush (config-lookup *configdat* "jobtools"     "useshell")))
			      (if ush 
				  (if (equal? ush "no") ;; must use "no" to NOT use shell
				      #f
				      ush)
				  #t)))     ;; default is yes
	   (runscript       (config-lookup tconfig   "setup"        "runscript"))
	   (ezsteps         (> (length (hash-table-ref/default tconfig "ezsteps" '())) 0)) ;; don't send all the steps, could be big, just send a flag
	   (subrun          (> (length (hash-table-ref/default tconfig "subrun"  '())) 0)) ;; send a flag to process a subrun
	   ;; (diskspace       (config-lookup tconfig   "requirements" "diskspace"))
	   ;; (memory          (config-lookup tconfig   "requirements" "memory"))
	   ;; (hosts           (config-lookup *configdat* "jobtools"     "workhosts")) ;; I'm pretty sure this was never completed
	   (remote-megatest (config-lookup *configdat* "setup" "executable"))
	   (run-time-limit  (or (configf:lookup  tconfig   "requirements" "runtimelim")
				(configf:lookup  *configdat* "setup" "runtimelim")))
	   ;; FIXME SOMEDAY: not good how this is so obtuse, this hack is to 
	   ;;                allow running from dashboard. Extract the path
	   ;;                from the called megatest and convert dashboard
	   ;;             	  or dboard to megatest
	   (local-megatest  (let* ((lm  (car (argv)))
				   (dir (pathname-directory lm))
				   (exe (pathname-strip-directory lm)))
			      (conc (if dir (conc dir "/") "")
				    (case (string->symbol exe)
				      ((dboard)    "../megatest")
				      ((mtest)     "../megatest")
				      ((dashboard) "megatest")
				      (else exe)))))
	   (launcher        (common:get-launcher *configdat* test-name item-path)) ;; (config-lookup *configdat* "jobtools"     "launcher"))
	   (test-sig        (conc (common:get-area-name *alldat*) ":" test-name ":" item-path)) ;; (item-list->path itemdat))) ;; test-path is the full path including the item-path
	   (work-area       #f)
	   (toptest-work-area #f) ;; for iterated tests the top test contains data relevant for all
	   (diskpath   #f)
	   (cmdparms   #f)
	   (fullcmd    #f) ;; (define a (with-output-to-string (lambda ()(write x))))
	   (mt-bindir-path #f)
	   (testinfo   (rmt:get-test-info-by-id run-id test-id))
	   (mt_target  (string-intersperse (map cadr keyvals) "/"))
	   (debug-param (append (if (args:get-arg "-debug")  (list "-debug" (args:get-arg "-debug")) '())
				(if (args:get-arg "-logging")(list "-logging") '()))))
      ;; (if hosts (set! hosts (string-split hosts)))
      ;; set the megatest to be called on the remote host
      (if (not remote-megatest)(set! remote-megatest local-megatest)) ;; "megatest"))
      (set! mt-bindir-path (pathname-directory remote-megatest))
      (if launcher (set! launcher (string-split launcher)))
      ;; set up the run work area for this test
      (if (and (args:get-arg "-preclean") ;; user has requested to preclean for this run
	       (not (member (db:test-get-rundir testinfo)(list "n/a" "/tmp/badname")))) ;; n/a is a placeholder and thus not a read dir
	  (begin
	    (debug:print-info 0 *default-log-port* "attempting to preclean directory " (db:test-get-rundir testinfo) " for test " test-name "/" item-path)
	    (runs:remove-test-directory testinfo 'remove-data-only))) ;; remove data only, do not perturb the record
      
      ;; prevent overlapping actions - set to LAUNCHED as early as possible
      ;;
      ;; the following call handles waiver propogation. cannot yet condense into roll-up-pass-fail
      (tests:test-set-status! run-id test-id "LAUNCHED" "n/a" #f #f) ;; (if launch-results launch-results "FAILED"))
      (rmt:set-state-status-and-roll-up-items run-id test-name item-path #f "LAUNCHED" #f)
      ;; (pp (hash-table->alist tconfig))
      (set! diskpath (get-best-disk *configdat* tconfig))
      (if diskpath
	  (let ((dat  (create-work-area run-id run-info keyvals test-id test-path diskpath test-name itemdat)))
	    (set! work-area (car dat))
	    (set! toptest-work-area (cadr dat))
	    (debug:print-info 2 *default-log-port* "Using work area " work-area))
	  (begin
	    (set! work-area (conc test-path "/tmp_run"))
	    (create-directory work-area #t)
	    (debug:print 0 *default-log-port* "WARNING: No disk work area specified - running in the test directory under tmp_run")))
      (set! cmdparms (base64:base64-encode 
		      (z3:encode-buffer 
		       (with-output-to-string
			 (lambda () ;; (list 'hosts     hosts)
			   (write (list (list 'testpath  test-path)
					;; (list 'transport (conc *transport-type*))
					;; (list 'serverinf *server-info*)
					(list 'homehost  (let* ((hhdat (common:get-homehost)))
							   (if hhdat
							       (car hhdat)
							       #f)))
					(list 'serverurl (if *alldat* 
							     (alldat-server-url *alldat*)
							     #f)) ;;
					(list 'areaname  (common:get-area-name *alldat*))
					(list 'toppath   *toppath*)
					(list 'work-area work-area)
					(list 'test-name test-name) 
					(list 'runscript runscript) 
					(list 'run-id    run-id   )
					(list 'test-id   test-id  )
					;; (list 'item-path item-path )
					(list 'itemdat   itemdat  )
					(list 'megatest  remote-megatest)
					(list 'ezsteps   ezsteps)
					(list 'subrun    subrun)
					(list 'target    mt_target)
					(list 'contour   contour)
					(list 'runtlim   (if run-time-limit (common:hms-string->seconds run-time-limit) #f))
					(list 'env-ovrd  (hash-table-ref/default *configdat* "env-override" '())) 
					(list 'set-vars  (if params (hash-table-ref/default params "-setvars" #f)))
					(list 'runname   runname)
					(list 'mt-bindir-path mt-bindir-path))))))))
      
      ;; clean out step records from previous run if they exist
      ;; (rmt:delete-test-step-records run-id test-id)
      ;; if the dir does not exist we may have a itempath where individual variables are a path, launch anyway
      (if (common:file-exists? work-area)
	  (change-directory work-area)) ;; so that log files from the launch process don't clutter the test dir
      (cond
       ;; ((and launcher hosts) ;; must be using ssh hostname
       ;;    (set! fullcmd (append launcher (car hosts)(list remote-megatest "-m" test-sig "-execute" cmdparms) debug-param)))
       ;; (set! fullcmd (append launcher (car hosts)(list remote-megatest test-sig "-execute" cmdparms))))
       (launcher
	(set! fullcmd (append launcher (list remote-megatest "-m" test-sig "-execute" cmdparms) debug-param)))
       ;; (set! fullcmd (append launcher (list remote-megatest test-sig "-execute" cmdparms))))
       (else
	(if (not useshell)(debug:print 0 *default-log-port* "WARNING: internal launching will not work well without \"useshell yes\" in your [jobtools] section"))
	(set! fullcmd (append (list remote-megatest "-m" test-sig "-execute" cmdparms) debug-param (list (if useshell "&" ""))))))
      ;; (set! fullcmd (list remote-megatest test-sig "-execute" cmdparms (if useshell "&" "")))))
      (if (args:get-arg "-xterm")(set! fullcmd (append fullcmd (list "-xterm"))))
      (debug:print 1 *default-log-port* "Launching " work-area)
      ;; set pre-launch-env-vars before launching, keep the vars in prevvals and put the envionment back when done
      (debug:print 4 *default-log-port* "fullcmd: " fullcmd)
      (set! *last-launch* (current-seconds)) ;; all that junk above takes time, set this as late as possible.
      (let* ((commonprevvals (alist->env-vars
			      (hash-table-ref/default *configdat* "env-override" '())))
	     (miscprevvals   (alist->env-vars ;; consolidate this code with the code in megatest.scm for "-execute"
			      (append (list (list "MT_TEST_RUN_DIR" work-area)
					    (list "MT_TEST_NAME" test-name)
					    (list "MT_ITEM_INFO" (conc itemdat)) 
					    (list "MT_RUNNAME"   runname)
					    (list "MT_TARGET"    mt_target)
					    (list "MT_ITEMPATH"  item-path)
					    )
				      itemdat)))
	     (testprevvals   (alist->env-vars
			      (hash-table-ref/default tconfig "pre-launch-env-overrides" '())))
	     ;; Launchwait defaults to true, must override it to turn off wait
	     (launchwait     (if (equal? (configf:lookup *configdat* "setup" "launchwait") "no") #f #t))
	     (launch-results-prev (apply (if launchwait ;; BB: TODO: refactor this to examine return code of launcher, if nonzero, set state to launch failed.
					process:cmd-run-with-stderr-and-exitcode->list
					process-run)
				    (if useshell
					(let ((cmdstr (string-intersperse fullcmd " ")))
					  (if launchwait
					      cmdstr
					      (conc cmdstr " >> mt_launch.log 2>&1 &")))
					(car fullcmd))
				    (if useshell
					'()
					(cdr fullcmd))))
             (success        (if launchwait (equal? 0 (cadr launch-results-prev)) #t))
             (launch-results (if launchwait (car launch-results-prev) launch-results-prev)))
        (if (not success)
            (tests:test-set-status! run-id test-id "COMPLETED" "DEAD" "launcher failed; exited non-zero; check mt_launch.log" #f)) ;; (if launch-results launch-results "FAILED"))
        (mutex-unlock! *launch-setup-mutex*) ;; yes, really should mutex all the way to here. Need to put this entire process into a fork.
	;; (rmt:no-sync-del! lock-key)         ;; release the lock for starting this test
	(if (not launchwait) ;; give the OS a little time to allow the process to start
	    (thread-sleep! 0.01))
	(with-output-to-file "mt_launch.log"
	  (lambda ()
	    (print "LAUNCHCMD: " (string-intersperse fullcmd " "))
	    (if (list? launch-results)
		(apply print launch-results)
		(print "NOTE: launched \"" fullcmd "\"\n  but did not wait for it to proceed. Add the following to megatest.config \n[setup]\nlaunchwait yes\n  if you have problems with this"))
	    #:append))
	(debug:print 2 *default-log-port* "Launching completed, updating db")
	(debug:print 2 *default-log-port* "Launch results: " launch-results)
	(if (not launch-results)
	    (begin
	      (print "ERROR: Failed to run " (string-intersperse fullcmd " ") ", exiting now")
	      ;; (sqlite3:finalize! db)
	      ;; good ole "exit" seems not to work
	      ;; (_exit 9)
	      ;; but this hack will work! Thanks go to Alan Post of the Chicken email list
	      ;; NB// Is this still needed? Should be safe to go back to "exit" now?
	      (process-signal (current-process-id) signal/kill)
	      ))
	(alist->env-vars miscprevvals)
	(alist->env-vars testprevvals)
	(alist->env-vars commonprevvals)
	launch-results))
    (change-directory *toppath*)))

;; recover a test where the top controlling mtest may have died
;;
(define (launch:recover-test run-id test-id)
  ;; this function is called on the test run host via ssh
  ;;
  ;; 1. look at the process from pid
  ;;    - is it owned by calling user
  ;;    - it it's run directory correct for the test
  ;;    - is there a controlling mtest (maybe stuck)
  ;; 2. if recovery is needed watch pid
  ;;    - when it exits take the exit code and do the needful
  ;;
  (let* ((pid (rmt:test-get-top-process-id run-id test-id))
	 (psres (with-input-from-pipe
		 (conc "ps -F -u " (current-user-name) " | grep -E '" pid " ' | grep -v 'grep -E " pid "'")
		 (lambda ()
		   (read-line))))
	 (rundir (if (string? psres) ;; real process owned by user
		     (read-symbolic-link (conc "/proc/" pid "/cwd"))
		     #f)))
    ;; now wait on that process if all is correct
    ;; periodically update the db with runtime
    ;; when the process exits look at the db, if still RUNNING after 10 seconds set
    ;; state/status appropriately
    (process-wait pid)))

;;======================================================================
;; lock-queue
;;======================================================================


;;======================================================================
;; attempt to prevent overlapping updates of rollup files by queueing
;; update requests in an sqlite db
;;======================================================================

;;======================================================================
;; db record, <vector db path-to-db>
;;======================================================================

(define (make-lock-queue:db-dat)(make-vector 3))
(define-inline (lock-queue:db-dat-get-db        vec)    (vector-ref  vec 0))
(define-inline (lock-queue:db-dat-get-path      vec)    (vector-ref  vec 1))
(define-inline (lock-queue:db-dat-set-db!       vec val)(vector-set! vec 0 val))
(define-inline (lock-queue:db-dat-set-path!     vec val)(vector-set! vec 1 val))

(define (lock-queue:delete-lock-db dbdat)
  (let ((fname (lock-queue:db-dat-get-path dbdat)))
    (system (conc "rm -f " fname "*"))))

(define (lock-queue:open-db fname #!key (count 10))
  (let* ((actualfname (conc fname ".lockdb"))
	 (dbexists (common:file-exists? actualfname))
	 (db       (sqlite3:open-database actualfname))
	 (handler  (make-busy-timeout 136000)))
    (if dbexists
	(vector db actualfname)
	(begin
	  (handle-exceptions
	   exn
	   (begin
	     (thread-sleep! 10)
	     (if (> count 0)
		 (lock-queue:open-db fname count: (- count 1))
		 (vector db actualfname)))
	   (sqlite3:with-transaction
	    db
	    (lambda ()
	      (sqlite3:execute 
	       db
	       "CREATE TABLE IF NOT EXISTS queue (
     	         id         INTEGER PRIMARY KEY,
                 test_id    INTEGER,
                 start_time INTEGER,
                 state      TEXT,
                 CONSTRAINT queue_constraint UNIQUE (test_id));")
	      (sqlite3:execute
	       db
	       "CREATE TABLE IF NOT EXISTS runlocks (
                 id         INTEGER PRIMARY KEY,
                 test_id    INTEGER,
                 run_lock   TEXT,
                 CONSTRAINT runlock_constraint UNIQUE (run_lock));"))))))
    (sqlite3:set-busy-handler! db handler)
    (vector db actualfname)))

(define (lock-queue:set-state dbdat test-id newstate #!key (remtries 10))
  (tasks:wait-on-journal (lock-queue:db-dat-get-path dbdat) 1200)
  (handle-exceptions
   exn
   (if (> remtries 0)
       (begin
	 (debug:print 0 *default-log-port* "WARNING: exception on lock-queue:set-state. Trying again in 30 seconds.")
	 (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))
	 (thread-sleep! 30)
	 (lock-queue:set-state dbdat test-id newstate remtries: (- remtries 1)))
       (begin
	 (debug:print-error 0 *default-log-port* " Failed to set lock state for test with id " test-id ", error: " ((condition-property-accessor 'exn 'message) exn) ", giving up.")
	 #f))
   (sqlite3:execute (lock-queue:db-dat-get-db dbdat) "UPDATE queue SET state=? WHERE test_id=?;"
		    newstate
		    test-id)))

(define (lock-queue:any-younger? dbdat mystart test-id #!key (remtries 10))
  ;; no need to wait on journal on read only queries
  ;; (tasks:wait-on-journal (lock-queue:db-dat-get-path dbdat) 1200)
  (handle-exceptions
   exn
   (if (> remtries 0)
       (begin
	 (debug:print 0 *default-log-port* "WARNING: exception on lock-queue:any-younger. Removing lockdb and trying again in 5 seconds.")
	 (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))
	 (thread-sleep! 5)
         (lock-queue:delete-lock-db dbdat)
	 (lock-queue:any-younger? dbdat mystart test-id remtries: (- remtries 1)))
       (begin
	 (debug:print-error 0 *default-log-port* " Failed to find younger locks for test with id " test-id ", error: " ((condition-property-accessor 'exn 'message) exn) ", giving up.")
	 #f))
   (let ((res #f))
     (sqlite3:for-each-row
      (lambda (tid)
	;; Actually this should not be needed as mystart cannot be simultaneously less than and test-id same as 
	(if (not (equal? tid test-id)) 
	    (set! res tid)))
      (lock-queue:db-dat-get-db dbdat)
      "SELECT test_id FROM queue WHERE start_time > ?;" mystart)
     res)))

(define (lock-queue:get-lock dbdat test-id #!key (count 10)(waiting-msg #f))
  (tasks:wait-on-journal (lock-queue:db-dat-get-path dbdat) 1200 remove: #t waiting-msg: "lock-queue:get-lock, waiting on journal")
  (let* ((res       #f)
	 (db        (lock-queue:db-dat-get-db dbdat))
	 (lckqry    (sqlite3:prepare db "SELECT test_id,run_lock FROM runlocks WHERE run_lock='locked';"))
	 (mklckqry  (sqlite3:prepare db "INSERT INTO runlocks (test_id,run_lock) VALUES (?,'locked');")))
    (let ((result 
	   (handle-exceptions
	    exn
	    (begin
	      (debug:print 0 *default-log-port* "WARNING: failed to get queue lock. Removing lock db and returning fail") ;; Will try again in a few seconds")
	      (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))
	      (thread-sleep! 10)
	      ;; (if (> count 0)	
	      ;;  #f ;; (lock-queue:get-lock dbdat test-id count: (- count 1)) - give up on retries 
	      ;; (begin ;; never recovered, remote the lock file and return #f, no lock obtained
	      (lock-queue:delete-lock-db dbdat)
	      #f)
	    (sqlite3:with-transaction
	     db
	     (lambda ()
	       (sqlite3:for-each-row (lambda (tid lockstate)
				       (set! res (list tid lockstate)))
				     lckqry)
	       (if res
		   (if (equal? (car res) test-id)
		       #t ;; already have the lock
		       #f)
		   (begin
		     (sqlite3:execute mklckqry test-id)
		     ;; if no error handled then return #t for got the lock
		     #t)))))))
      (sqlite3:finalize! lckqry)
      (sqlite3:finalize! mklckqry)
      result)))

(define (lock-queue:release-lock fname test-id #!key (count 10))
  (let* ((dbdat (lock-queue:open-db fname)))
    (tasks:wait-on-journal (lock-queue:db-dat-get-path dbdat) 1200 "lock-queue:release-lock; waiting on journal")
    (handle-exceptions
     exn
     (begin
       (debug:print 0 *default-log-port* "WARNING: Failed to release queue lock. Will try again in few seconds")
       (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))
       (thread-sleep! (/ count 10))
       (if (> count 0)
	   (begin
	     (sqlite3:finalize! (lock-queue:db-dat-get-db dbdat))
	     (lock-queue:release-lock fname test-id count: (- count 1)))
	   (let ((journal (conc fname "-journal")))
	     ;; If we've tried ten times and failed there is a serious problem
	     ;; try to remove the lock db and allow it to be recreated
	     (handle-exceptions
	      exn
	      #f
	      (if (common:file-exists? journal)(delete-file journal))
	      (if (common:file-exists? fname)  (delete-file fname))
	      #f))))
     (sqlite3:execute (lock-queue:db-dat-get-db dbdat) "DELETE FROM runlocks WHERE test_id=?;" test-id)
     (sqlite3:finalize! (lock-queue:db-dat-get-db dbdat)))))

(define (lock-queue:steal-lock dbdat test-id #!key (count 10))
  (debug:print-info 0 *default-log-port* "Attempting to steal lock at " (lock-queue:db-dat-get-path dbdat))
  (tasks:wait-on-journal (lock-queue:db-dat-get-path dbdat) 1200 "lock-queue:steal-lock; waiting on journal")
  (handle-exceptions
   exn
   (begin
     (debug:print 0 *default-log-port* "WARNING: Failed to steal queue lock. Will try again in few seconds")
     (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))
     (thread-sleep! 10)
     (if (> count 0)
	 (lock-queue:steal-lock dbdat test-id count: (- count 1))
	 #f))
   (sqlite3:execute (lock-queue:db-dat-get-db dbdat) "DELETE FROM runlocks WHERE run_lock='locked';"))
  (lock-queue:get-lock dbdat test-it))

;; returns #f if ok to skip the task
;; returns #t if ok to proceed with task
;; otherwise waits
;;
(define (lock-queue:wait-turn fname test-id #!key (count 10)(waiting-msg #f))
  (let* ((dbdat   (lock-queue:open-db fname))
	 (mystart (current-seconds))
	 (db      (lock-queue:db-dat-get-db dbdat)))
    ;; (tasks:wait-on-journal (lock-queue:db-dat-get-path dbdat) 1200 waiting-msg: "lock-queue:wait-turn; waiting on journal file")
    (handle-exceptions
     exn
     (begin
       (debug:print 0 *default-log-port* "WARNING: Failed to find out if it is ok to skip the wait queue. Will try again in few seconds")
       (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))
       (print-call-chain (current-error-port))
       (thread-sleep! 10)
       (if (> count 0)
	   (begin
	     (sqlite3:finalize! db)
	     (lock-queue:wait-turn fname test-id count: (- count 1)))
	   (begin
	     (debug:print 0 *default-log-port* "Giving up calls to lock-queue:wait-turn for test-id " test-id " at path " fname ", printing call chain")
	     (print-call-chain (current-error-port))
	     #f)))
     ;; wait 10 seconds and then check to see if someone is already updating the html
     (thread-sleep! 10)
     (if (not (lock-queue:any-younger? dbdat mystart test-id)) ;; no processing in flight, must try to start processing
	 (begin
	   (tasks:wait-on-journal (lock-queue:db-dat-get-path dbdat) 1200 waiting-msg: "lock-queue:wait-turn; waiting on journal file")
	   (sqlite3:execute
	    db
	    "INSERT OR REPLACE INTO queue (test_id,start_time,state) VALUES (?,?,'waiting');"
	    test-id mystart)
	   ;; (thread-sleep! 1) ;; give other tests a chance to register
	   (let ((result 
		  (let loop ((younger-waiting (lock-queue:any-younger? dbdat mystart test-id)))
		    (if younger-waiting
			(begin
			  ;; no need for us to wait. mark in the lock queue db as skipping
			  ;; no point in marking anything in the queue - simply never register this
			  ;; test as it is *covered* by a previously started update to the html file
			  ;; (lock-queue:set-state dbdat test-id "skipping")
			  #f) ;; let the calling process know that nothing needs to be done
			(if (lock-queue:get-lock dbdat test-id)
			    #t
			    (if (> (- (current-seconds) mystart) 36000) ;; waited too long, steal the lock
				(lock-queue:steal-lock dbdat test-id)
				(begin
				  (thread-sleep! 1)
				  (loop (lock-queue:any-younger? dbdat mystart test-id)))))))))
	     (sqlite3:finalize! db)
	     result))))))
	  
            
;; (use trace)
;; (trace lock-queue:get-lock lock-queue:release-lock lock-queue:wait-turn lock-queue:any-younger? lock-queue:set-state)

;;======================================================================
;; margs
;;======================================================================
(define args:arg-hash (make-hash-table))

(define (args:get-arg arg . default)
  (if (null? default)
      (hash-table-ref/default args:arg-hash arg #f)
      (hash-table-ref/default args:arg-hash arg (car default))))

(define (args:any? . args)
  (not (null? (filter (lambda (x) x)
		      (map args:get-arg args)))))

(define (args:get-arg-from ht arg . default)
  (if (null? default)
      (hash-table-ref/default ht arg #f)
      (hash-table-ref/default ht arg (car default))))

(define (args:usage . args)
  (if (> (length args) 0)
      (apply print "ERROR: " args))
  (if (string? help)
      (print help)
      (print "Usage: " (car (argv)) " ... "))
  (exit 0))

 ;; one-of args defined
(define (args:any-defined? . param)
  (let ((res #f))
    (for-each 
     (lambda (arg)
       (if (args:get-arg arg)(set! res #t)))
     param)
    res))

;; args: 
(define (args:get-args args params switches arg-hash num-needed)
  (let* ((numargs (length args))
	 (adj-num-needed (if num-needed (+ num-needed 2) #f)))
    (if (< numargs (if adj-num-needed adj-num-needed 2))
	(if (>= num-needed 1)
	    (args:usage "No arguments provided")
	    '())
	(let loop ((arg (cadr args))
		   (tail (cddr args))
		   (remargs '()))
	  (cond 
	   ((member arg params) ;; args with params
	    (if (< (length tail) 1)
		(args:usage "param given without argument " arg)
		(let ((val     (car tail))
		      (newtail (cdr tail)))
		  (hash-table-set! arg-hash arg val)
		  (if (null? newtail) remargs
		      (loop (car newtail)(cdr newtail) remargs)))))
	   ((member arg switches)         ;; args with no params (i.e. switches)
	    (hash-table-set! arg-hash arg #t)
	    (if (null? tail) remargs
		(loop (car tail)(cdr tail) remargs)))
	   (else
	    (if (null? tail)(append remargs (list arg)) ;; return the non-used args
		(loop (car tail)(cdr tail)(append remargs (list arg))))))))
    ))

(define (args:print-args remargs arg-hash)
  (print "ARGS: " remargs)
  (for-each (lambda (arg)
	      (print "   " arg "   " (hash-table-ref/default arg-hash arg #f)))
	    (hash-table-keys arg-hash)))

;;======================================================================
;; mt
;;======================================================================

;; This is the Megatest API. All generally "useful" routines will be wrapped or extended
;; here.

;;======================================================================
;;  R U N S
;;======================================================================

;; runs:get-runs-by-patt
;; get runs by list of criteria
;; register a test run with the db
;;
;; Use: (db-get-value-by-header (db:get-header runinfo)(db:get-rows runinfo))
;;  to extract info from the structure returned
;;
(define (mt:get-runs-by-patt keys runnamepatt targpatt)
  (let loop ((runsdat  (rmt:get-runs-by-patt keys runnamepatt targpatt 0 500 #f 0))
	     (res      '())
	     (offset   0)
	     (limit    500))
    ;; (print "runsdat: " runsdat)
    (let* ((header    (vector-ref runsdat 0))
	   (runslst   (vector-ref runsdat 1))
	   (full-list (append res runslst))
	   (have-more (eq? (length runslst) limit)))
      ;; (debug:print 0 *default-log-port* "header: " header " runslst: " runslst " have-more: " have-more)
      (if have-more 
	  (let ((new-offset (+ offset limit))
		(next-batch (rmt:get-runs-by-patt keys runnamepatt targpatt offset limit #f 0)))
	    (debug:print-info 4 *default-log-port* "More than " limit " runs, have " (length full-list) " runs so far.")
	    (debug:print-info 0 *default-log-port* "next-batch: " next-batch)
	    (loop next-batch
		  full-list
		  new-offset
		  limit))
	 (vector header full-list)))))

;;======================================================================
;;  T E S T S
;;======================================================================

(define (mt:get-tests-for-run run-id testpatt states status #!key (not-in #t) (sort-by 'event_time) (sort-order "ASC") (qryvals #f)(last-update #f))
  (let loop ((testsdat (rmt:get-tests-for-run run-id testpatt states status 0 500 not-in sort-by sort-order qryvals last-update 'normal))
	     (res      '())
	     (offset   0)
	     (limit    500))
    (let* ((full-list (append res testsdat))
	   (have-more (eq? (length testsdat) limit)))
      (if have-more 
	  (let ((new-offset (+ offset limit)))
	    (debug:print-info 4 *default-log-port* "More than " limit " tests, have " (length full-list) " tests so far.")
	    (loop (rmt:get-tests-for-run run-id testpatt states status new-offset limit not-in sort-by sort-order qryvals last-update 'normal)
		  full-list
		  new-offset
		  limit))
	  full-list))))

(define (mt:lazy-get-prereqs-not-met run-id waitons ref-item-path #!key (mode '(normal))(itemmaps #f) )
  (let* ((key    (list run-id waitons ref-item-path mode))
	 (res    (hash-table-ref/default *pre-reqs-met-cache* key #f))
	 (useres (let ((last-time (if (vector? res) (vector-ref res 0) #f)))
		   (if last-time
		       (< (current-seconds)(+ last-time 5))
		       #f))))
    (if useres
	(let ((result (vector-ref res 1)))
	  (debug:print 4 *default-log-port* "Using lazy value res: " result)
	  result)
	(let ((newres (rmt:get-prereqs-not-met run-id waitons ref-item-path mode: mode itemmaps: itemmaps)))
	  (hash-table-set! *pre-reqs-met-cache* key (vector (current-seconds) newres))
	  newres))))

(define (mt:get-run-stats dbstruct run-id)
;;  Get run stats from local access, move this ... but where?
  (db:get-run-stats dbstruct run-id))

(define (mt:discard-blocked-tests run-id failed-test tests test-records)
  (if (null? tests)
      tests
      (begin
	(debug:print-info 1 *default-log-port* "Discarding tests from " tests " that are waiting on " failed-test)
	(let loop ((testn (car tests))
		   (remt  (cdr tests))
		   (res   '()))
	  (let* ((test-dat (hash-table-ref/default test-records testn (vector #f #f '())))
		 (waitons  (vector-ref test-dat 2)))
	    ;; (print "mt:discard-blocked-tests run-id: " run-id " failed-test: " failed-test " testn: " testn " with waitons: " waitons)
	    (if (null? remt)
		(let ((new-res (reverse res)))
		  ;; (print "       new-res: " new-res)
		  new-res)
		(loop (car remt)
		      (cdr remt)
		      (if (member failed-test waitons)
			  (begin
			    (debug:print 0 *default-log-port* "Discarding test " testn "(" test-dat ") due to " failed-test)
			    res)
			  (cons testn res)))))))))

;;======================================================================
;;  T R I G G E R S
;;======================================================================

(define (mt:run-trigger cmd test-id test-rundir trigger logname test-name item-path event-time actual-state actual-status)
  ;; Putting the commandline into ( )'s means no control over the shell. 
  ;; stdout and stderr will be caught in the NBFAKE or mt_launch.log files
  ;; or equivalent. No need to do this. Just run it?
  (let* ((fullcmd (conc "nbfake "
			cmd           " "
			test-id       " "
			test-rundir   " "
			trigger       " "
			test-name     " "
			item-path     " " ;; has / prepended to deal with toplevel tests
			actual-state  " "
			actual-status " "
			event-time
			))
	 (prev-nbfake-log (get-environment-variable "NBFAKE_LOG")))
    (setenv "NBFAKE_LOG" (conc (cond
				((and (directory-exists? test-rundir)
				      (file-write-access? test-rundir))
				 test-rundir)
				((and (directory-exists? *toppath*)
				      (file-write-access? *toppath*))
				 *toppath*)
				(else (conc "/tmp/" (current-user-name))))
			       "/" logname))
    (debug:print-info 0 *default-log-port* "TRIGGERED on " trigger ", running command " fullcmd " output at " (get-environment-variable "NBFAKE_LOG"))
    ;; (call-with-environment-variables
    ;;  `(("NBFAKE_LOG" . ,(conc test-rundir "/" logname)))
    ;;  (lambda ()
    (process-run fullcmd)
    (if prev-nbfake-log
	(setenv "NBFAKE_LOG" prev-nbfake-log)
	(unsetenv "NBFAKE_LOG"))
    )) ;; ))

(define (mt:process-triggers dbstruct run-id test-id newstate newstatus)
  (if test-id 
      (let* ((test-dat      (db:get-test-info-by-id dbstruct run-id test-id)))
	(if test-dat
	    (let* ((test-rundir   (db:test-get-rundir       test-dat)) ;; ) ;; )
		   (test-name     (db:test-get-testname     test-dat))
		   (item-path     (db:test-get-item-path    test-dat))
		   (duration      (db:test-get-run_duration test-dat))
		   (comment       (db:test-get-comment      test-dat))
		   (event-time    (db:test-get-event_time   test-dat))
		   (tconfig       #f)
		   (state         (if newstate  newstate  (db:test-get-state  test-dat)))
		   (status        (if newstatus newstatus (db:test-get-status test-dat))))
	      ;; (mutex-lock! *triggers-mutex*)
              (handle-exceptions
               exn
               (begin
                 (debug:print-error 0 *default-log-port* " Exception in mt:process-triggers for run-id="run-id" test-id="test-id" newstate="newstate" newstatus="newstatus
                                    "\n   error: " ((condition-property-accessor 'exn 'message) exn)
                                    "\n   test-rundir="test-rundir
                                    "\n   test-name="test-name
                                    "\n   item-path="item-path
                                    "\n   state="state
                                    "\n   status="status
                                    "\n")
                 (print-call-chain (current-error-port))
                 #f)
               (if (and test-name
                        test-rundir)   ;; #f means no dir set yet
                   ;; (common:file-exists? test-rundir)
                   ;; (directory? test-rundir))
                   (call-with-environment-variables
                    (list (cons "MT_TEST_NAME"    (or test-name "no such test"))
                          (cons "MT_TEST_RUN_DIR" (or test-rundir "no test directory yet"))
                          (cons "MT_ITEMPATH"     (or item-path "")))
                    (lambda ()
                      (if (directory-exists? test-rundir)
                          (push-directory test-rundir)
                          (push-directory *toppath*))
                      (set! tconfig (mt:lazy-read-test-config test-name))
                      (for-each (lambda (trigger)
                                  (let* ((munged-trigger (string-translate trigger "/ " "--"))
					(logname        (conc "last-trigger-" munged-trigger ".log")))
                                    ;; first any triggers from the testconfig
                                    (let ((cmd  (configf:lookup tconfig "triggers" trigger)))
                                      (if cmd (mt:run-trigger cmd test-id test-rundir trigger (conc "tconfig-" logname) test-name item-path event-time state status)))
                                    ;; next any triggers from megatest.config
                                    (let ((cmd  (configf:lookup *configdat* "triggers" trigger)))
                                      (if cmd (mt:run-trigger cmd test-id test-rundir trigger (conc "mtconfig-" logname) test-name item-path event-time state status)))))
                                (list
                                 (conc state "/" status)
                                 (conc state "/")
                                 (conc "/" status)))
		     (pop-directory))
                    )))
	      ;; (mutex-unlock! *triggers-mutex*)
	      )))))

;;======================================================================
;;  S T A T E   A N D   S T A T U S   F O R   T E S T S 
;;======================================================================

;; speed up for common cases with a little logic
(define (mt:test-set-state-status-by-id run-id test-id newstate newstatus newcomment)
  (if (not (and run-id test-id))
      (begin
	(debug:print-error 0 *default-log-port* "bad data handed to mt:test-set-state-status-by-id, run-id=" run-id ", test-id=" test-id ", newstate=" newstate)
	(print-call-chain (current-error-port))
	#f)
      (begin
	;; cond
	;; ((and newstate newstatus newcomment)
	;;  (rmt:general-call 'state-status-msg run-id newstate newstatus newcomment test-id))
	;; ((and newstate newstatus)
	;;  (rmt:general-call 'state-status run-id newstate newstatus test-id))
	;; (else
	;;  (if newstate   (rmt:general-call 'set-test-state   run-id newstate   test-id))
	;;  (if newstatus  (rmt:general-call 'set-test-status  run-id newstatus  test-id))
	;;  (if newcomment (rmt:general-call 'set-test-comment run-id newcomment test-id))))
	(rmt:set-state-status-and-roll-up-items run-id test-id #f newstate newstatus newcomment)
	;; (mt:process-triggers run-id test-id newstate newstatus)
	#t)))


(define (mt:test-set-state-status-by-id-unless-completed run-id test-id newstate newstatus newcomment)
  (let* ((test-vec   (rmt:get-testinfo-state-status run-id test-id))
         (state     (vector-ref test-vec 3)))
    (if (equal? state "COMPLETED")
        #t
        (rmt:set-state-status-and-roll-up-items run-id test-id #f newstate newstatus newcomment))))

  
(define (mt:test-set-state-status-by-testname run-id test-name item-path new-state new-status new-comment)
  ;(let ((test-id (rmt:get-test-id run-id test-name item-path)))
  (rmt:set-state-status-and-roll-up-items run-id test-name item-path new-state new-status new-comment)
  ;; (mt:process-triggers run-id test-id new-state new-status)
  #t);)
	;;(mt:test-set-state-status-by-id run-id test-id new-state new-status new-comment)))

(define (mt:test-set-state-status-by-testname-unless-completed run-id test-name item-path new-state new-status new-comment)
  (let ((test-id (rmt:get-test-id run-id test-name item-path)))
    (mt:test-set-state-status-by-id-unless-completed run-id test-id new-state new-status new-comment)))
    
(define (mt:lazy-read-test-config test-name)
  (let ((tconf (hash-table-ref/default *testconfigs* test-name #f)))
    (if tconf
	tconf
	(let ((test-dirs (tests:get-tests-search-path *configdat*)))
	  (let loop ((hed (car test-dirs))
		     (tal (cdr test-dirs)))
	    ;; Setting MT_LINKTREE here is almost certainly unnecessary. 
	    (let ((tconfig-file (conc hed "/" test-name "/testconfig")))
	      (if (and (common:file-exists? tconfig-file)
		       (file-read-access? tconfig-file))
		  (let ((link-tree-path (common:get-linktree)) ;; (configf:lookup *configdat* "setup" "linktree"))
			(old-link-tree  (get-environment-variable "MT_LINKTREE")))
		    (if link-tree-path (setenv "MT_LINKTREE" link-tree-path))
		    (let ((newtcfg (read-config tconfig-file #f #f))) ;; NOTE: Does NOT run [system ...]
		      (hash-table-set! *testconfigs* test-name newtcfg)
		      (if old-link-tree 
			  (setenv "MT_LINKTREE" old-link-tree)
			  (unsetenv "MT_LINKTREE"))
		      newtcfg))
		  (if (null? tal)
		      (begin
			(debug:print-error 0 *default-log-port* "No readable testconfig found for " test-name)
			#f)
		      (loop (car tal)(cdr tal))))))))))

;;======================================================================
;; portlogger
;;======================================================================



(define (portlogger:open-db fname)
  (let* ((avail    (tasks:wait-on-journal fname 5 remove: #t)) ;; wait up to about 10 seconds for the journal to go away
	 (exists   (common:file-exists? fname))
	 (db       (if avail 
		       (sqlite3:open-database fname)
		       (begin
			 (system (conc "rm -f " fname))
			 (sqlite3:open-database fname))))
	 (handler  (make-busy-timeout 136000))
	 (canwrite (file-write-access? fname)))
	 ;; (db-init  (lambda ()
	 ;;             (sqlite3:execute 
	 ;;              db
	 ;;              "CREATE TABLE IF NOT EXISTS ports (
         ;;                 port INTEGER PRIMARY KEY,
         ;;                 state TEXT DEFAULT 'not-used',
         ;;                 fail_count INTEGER DEFAULT 0,
         ;;                 update_time TIMESTAMP DEFAULT (strftime('%s','now')) );"))))
    (sqlite3:set-busy-handler! db handler)
    (db:set-sync db) ;; (sqlite3:execute db "PRAGMA synchronous = 0;")
    ;; (if (not exists) ;; needed with IF NOT EXISTS?
    (sqlite3:execute 
     db
     "CREATE TABLE IF NOT EXISTS ports (
            port INTEGER PRIMARY KEY,
            state TEXT DEFAULT 'not-used',
            fail_count INTEGER DEFAULT 0,
            update_time TIMESTAMP DEFAULT (strftime('%s','now')) );")
    db))

(define (portlogger:open-run-close proc . params)
  (let* ((fname  (conc "/tmp/." (current-user-name) "-portlogger.db"))
	 (avail  (tasks:wait-on-journal fname 10))) ;; wait up to about 10 seconds for the journal to go away
    (handle-exceptions
     exn
     (begin
       ;; (release-dot-lock fname)
       (debug:print-error 0 *default-log-port* "portlogger:open-run-close failed. " proc " " params)
       (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))
       (debug:print 5 *default-log-port* "exn=" (condition->list exn))
       (if (common:file-exists? fname)(delete-file fname)) ;; brutally get rid of it
       (print-call-chain (current-error-port)))
     (let* (;; (lock   (obtain-dot-lock fname 2 9 10))
	    (db     (portlogger:open-db fname))
	    (res    (apply proc db params)))
       (sqlite3:finalize! db)
       ;; (release-dot-lock fname)
       res))))

;; (fold-row PROC INIT DATABASE SQL . PARAMETERS) 
(define (portlogger:take-port db portnum)
  (let* ((qry1 (sqlite3:prepare db "INSERT INTO ports (port,state) VALUES (?,?);"))
	 (qry2 (sqlite3:prepare db "UPDATE ports SET state=?,update_time=strftime('%s','now') WHERE port=?;"))
	 (qry3 (sqlite3:prepare db "SELECT state FROM ports WHERE port=?;"))
	 (res  (sqlite3:with-transaction
		db
		(lambda ()
		  ;; (fold-row (lambda (var curr) (or var curr)) #f db "SELECT var FROM foo WHERE id=100;")
		  (let* ((curr #f)
			 (res  #f))
		    (set! curr (sqlite3:fold-row
				(lambda (var curr)
				  (or curr var curr))
				"not-tried"
				qry3
				portnum))
		    ;; (print "curr=" curr)
		    (set! res (case (string->symbol curr)
				((released)  (sqlite3:execute qry2 "taken" portnum) 'taken)
				((not-tried) (sqlite3:execute qry1 portnum "taken") 'taken)
				((taken)                                            'already-taken)
				((failed)                                           'failed)
				(else                                               'error)))
		    ;; (print "res=" res)
		    res)))))
    (sqlite3:finalize! qry1)
    (sqlite3:finalize! qry2)
    (sqlite3:finalize! qry3)
    res))

(define (portlogger:get-prev-used-port db)
  (handle-exceptions
      exn
      (begin
	(debug:print 0 *default-log-port* "EXCEPTION: portlogger database probably overloaded or unreadable. If you see this message again remove /tmp/.$USER-portlogger.db")
	(debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))
	(debug:print 5 *default-log-port* "exn=" (condition->list exn))
	(print-call-chain (current-error-port))
	(debug:print 0 *default-log-port* "Continuing anyway.")
	#f)
    (sqlite3:fold-row
     (lambda (var curr)
       (or curr var curr))
     #f
     db
     "SELECT (port) FROM ports WHERE state='released' LIMIT 1;")))

(define (portlogger:find-port db)
  (let* ((lowport (let ((val (configf:lookup *configdat* "server" "lowport")))
		    (if (and val 
			     (string->number val))
			(string->number val)
			32768)))
	 (portnum (or (portlogger:get-prev-used-port db)
		      (+ lowport ;; top of registered ports is 49152 but lets use ports in the registered range
			 (random (- 64000 lowport))))))
    (handle-exceptions
     exn
     (begin
       (debug:print 0 *default-log-port* "EXCEPTION: portlogger database probably overloaded or unreadable. If you see this message again remove /tmp/.$USER-portlogger.db")
       (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))
       (debug:print 5 *default-log-port* "exn=" (condition->list exn))
       (print-call-chain (current-error-port))
       (debug:print 0 *default-log-port* "Continuing anyway."))
     (portlogger:take-port db portnum))
    portnum))

;; set port to "released", "failed" etc.
;; 
(define (portlogger:set-port db portnum value)
  (sqlite3:execute db "UPDATE ports SET state=?,update_time=strftime('%s','now') WHERE port=?;" value portnum))

;; set port to failed (attempted to take but got error)
;;
(define (portlogger:set-failed db portnum)
  (sqlite3:execute db "UPDATE ports SET state='failed',fail_count=fail_count+1,update_time=strftime('%s','now') WHERE port=?;" portnum))

;;======================================================================
;; MAIN
;;======================================================================

(define (portlogger:main . args)
  (let* ((dbfname (conc "/tmp/." (current-user-name) "-portlogger.db"))
	 (db      (portlogger:open-db dbfname))
	 (numargs (length args))
	 (result  
	  (handle-exceptions
	   exn
	   (begin
	     (debug:print 0 *default-log-port* "EXCEPTION: portlogger database at " dbfname " probably overloaded or unreadable. Try removing it.")
	     (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))
	     (debug:print 5 *default-log-port* "exn=" (condition->list exn))
	     (debug:print 0 *default-log-port* " status:  " ((condition-property-accessor 'sqlite3 'status) exn))
	     (print-call-chain (current-error-port))
	     #f)
	   (case (string->symbol (car args)) ;; commands with two or more params
	     ((take)(portlogger:take-port db (string->number (cadr args))))
	     ((find)(portlogger:find-port db))
	     ((set) (let ((port  (cadr  args))
			  (state (caddr args)))
		      (portlogger:set-port db 
					   (if (number? port) port (string->number port))
					   state)
		      state))
	     ((failed)(portlogger:set-failed db (string->number (cadr args))) 'failed)))))
    (sqlite3:finalize! db)
    result))
     
;; (print (apply portlogger:main (cdr (argv))))
;;======================================================================
;; runconfig
;;======================================================================

(define (runconfig:read fname target environ-patt)
  (let ((ht (make-hash-table)))
    (if target (hash-table-set! ht target '()))
    (read-config fname ht #t environ-patt: environ-patt sections: (if target (list "default" target) #f))))

;; NB// to process a runconfig ensure to use environ-patt with target!
;;
(define (setup-env-defaults fname run-id already-seen keyvals #!key (environ-patt #f)(change-env #t))
  (let* ((keys    (map car keyvals))
	 (thekey  (if keyvals 
		      (string-intersperse (map (lambda (x)(if x x "-na-")) (map cadr keyvals)) "/")
		      (or (common:args-get-target)
			  (get-environment-variable "MT_TARGET")
			  (begin
			    (debug:print-error 0 *default-log-port* "setup-env-defaults called with no run-id or -target or -reqtarg")
			    "nothing matches this I hope"))))
	 ;; Why was system disallowed in the reading of the runconfigs file?
	 ;; NOTE: Should be setting env vars based on (target|default)
	 (confdat   (runconfig:read fname thekey environ-patt))
	 (whatfound (make-hash-table))
	 (finaldat  (make-hash-table))
	 (sections (list "default" thekey)))
    (if (not *target*)(set! *target* thekey)) ;; may save a db access or two but repeats db:get-target code
    (debug:print 4 *default-log-port* "Using key=\"" thekey "\"")

    (if change-env
	(for-each ;; NB// This can be simplified with new content of keyvals having all that is needed.
	 (lambda (keyval)
	   (safe-setenv (car keyval)(cadr keyval)))
	 keyvals))
	
    (for-each 
     (lambda (section)
       (let ((section-dat (hash-table-ref/default confdat section #f)))
	 (if section-dat
	     (for-each 
	      (lambda (envvar)
		(let ((val (cadr (assoc envvar section-dat))))
		(hash-table-set! whatfound section (+ (hash-table-ref/default whatfound section 0) 1))
		(if (and (string? envvar)
			 (string? val)
			 change-env)
		    (safe-setenv envvar val))
		(hash-table-set! finaldat envvar val)))
	      (map car section-dat)))))
     sections)
    (if already-seen
	(begin
	  (debug:print 2 *default-log-port* "Key settings found in runconfigs.config:")
	  (for-each (lambda (fullkey)
		      (debug:print 2 *default-log-port* (format #f "~20a ~a\n" fullkey (hash-table-ref/default whatfound fullkey 0))))
		    sections)
	  (debug:print 2 *default-log-port* "---")
	  (set! *already-seen-runconfig-info* #t)))
    ;; finaldat ;; was returning this "finaldat" which would be good but conflicts with other uses
    confdat
    ))

(define (set-run-config-vars run-id keyvals targ-from-db)
  (push-directory *toppath*) ;; the push/pop doesn't appear to do anything ...
  (let ((runconfigf (conc  *toppath* "/runconfigs.config"))
	(targ       (or (common:args-get-target)
			targ-from-db
			(get-environment-variable "MT_TARGET"))))
    (pop-directory)
    (if (common:file-exists? runconfigf)
	(setup-env-defaults runconfigf run-id #t keyvals
			    environ-patt: (conc "(default"
						(if targ
						    (conc "|" targ ")")
						    ")")))
	(debug:print 0 *default-log-port* "WARNING: You do not have a run config file: " runconfigf))))

;; given (a (b c) d) return ((a b d)(a c d))
;; NOTE: this feels like it has been done before - perhaps with items handling?
;;
(define (runconfig:combinations inlst)
  (let loop ((hed (car inlst))
	     (tal (cdr inlst))
	     (res '()))
    ;; (print "res: " res " hed: " hed)
    (if (list? hed)
	(let ((newres (if (null? res) ;; first time through convert incoming items to list of items
			  (map list hed)
			  (apply append
				 (map (lambda (r)  ;; iterate over items in res
					(map (lambda (h) ;; iterate over items in hed
					       (append r (list h)))
					     hed))
				      res)))))
	  ;; (print "newres1: " newres)
	  (if (null? tal)
	      newres
	      (loop (car tal)(cdr tal) newres)))
	(let ((newres (if (null? res)
			  (list (list hed))
			  (map (lambda (r)
				 (append r (list hed)))
			       res))))
	  ;; (print "newres2: " newres)
	  (if (null? tal)
	      newres
	      (loop (car tal)(cdr tal) newres))))))

;; multi-part expand
;; Given a/b,c,d/e,f return a/b/e a/b/f a/c/e a/c/f a/d/e a/d/f
;;
(define (runconfig:expand target)
  (let* ((parts (map (lambda (x)
		       (string-split x ","))
		     (string-split target "/"))))
    (map (lambda (x)
	   (string-intersperse x "/"))
	 (runconfig:combinations parts))))

;; multi-target expansion
;; a/b/c/x,y,z a/b/d/x,y => a/b/c/x a/b/c/y a/b/c/z a/b/d/x a/b/d/y
;; 
(define (runconfig:expand-target target-strs)
  (delete-duplicates
   (apply append (map runconfig:expand (string-split target-strs " ")))))
;;======================================================================
;; runs
;;======================================================================
;; (include "debugger.scm")

;; use this struct to facilitate refactoring
;;

(defstruct runs:dat
  reglen regfull
  runname max-concurrent-jobs run-id
  test-patts required-tests test-registry
  registry-mutex flags keyvals run-info all-tests-registry
  can-run-more-tests
  ((can-run-more-tests-count 0) : fixnum))

(defstruct runs:testdat
  hed tal reg reruns  test-record
  test-name item-path jobgroup
  waitons testmode  newtal itemmaps prereqs-not-met)


(define (runs:get-mt-env-alist run-id runname target testname itempath)
  ;;(bb-check-path msg: "runs:set-megatest-env-vars entry")
  `(("MT_TEST_NAME"     . ,testname)
    
    ("MT_ITEMPATH"      . ,itempath)

    ("MT_TARGET"        . ,target)
    
    ("MT_RUNNAME"       . ,runname)
    
    ("MT_RUN_AREA_HOME" . ,*toppath*)
    
    ,@(let* ((link-tree (common:get-linktree))) ;; (configf:lookup *configdat* "setup" "linktree")))
        (if link-tree
            (list (cons "MT_LINKTREE" link-tree)
                  
                  (cons "MT_TEST_RUN_DIR"
                        (conc link-tree  "/" target "/" runname "/" testname
                              (if (and (string? itempath) (not (equal? itempath "")))
                                  (conc "/" itempath)
                                  "")))
                  )
            '()))
    
    ,@(map
       (lambda (key)
         (cons  (car key) (cadr key)))
       (keys:target->keyval (rmt:get-keys) target))
    
    ,@(map (lambda (var)
             (let ((val (configf:lookup *configdat* "env-override" var)))
               (cons var val)))
           (configf:section-vars *configdat* "env-override"))))


    

    
    
;; set up needed environment variables given a run-id and optionally a target, itempath etc.
;;
(define (runs:set-megatest-env-vars run-id #!key (inkeys #f)(inrunname #f)(inkeyvals #f)(intarget #f)(testname #f)(itempath #f))
  ;;(bb-check-path msg: "runs:set-megatest-env-vars entry")
  (let* ((target    (or intarget 
			(common:args-get-target)
			(get-environment-variable "MT_TARGET")))
	 (keys      (if inkeys    inkeys    (rmt:get-keys)))
	 (keyvals   (if inkeyvals inkeyvals (keys:target->keyval keys target)))
	 (vals      (hash-table-ref/default *env-vars-by-run-id* run-id #f))
	 (link-tree (common:get-linktree))) ;; (configf:lookup *configdat* "setup" "linktree")))
    (if testname (setenv "MT_TEST_NAME" testname))
    (if itempath (setenv "MT_ITEMPATH"  itempath))

    ;; get the info from the db and put it in the cache
    (if link-tree
	(setenv "MT_LINKTREE" link-tree)
	(debug:print-error 0 *default-log-port* "linktree not set, should be set in megatest.config in [setup] section."))
    (if (not vals)
	(let ((ht (make-hash-table)))
	  (hash-table-set! *env-vars-by-run-id* run-id ht)
	  (set! vals ht)
	  (for-each
	   (lambda (key)
	     (hash-table-set! vals (car key) (cadr key)))
	   keyvals)))
    ;; from the cached data set the vars
    
    (hash-table-for-each
     vals
     (lambda (key val)
       (debug:print 2 *default-log-port* "setenv " key " " val)
       (safe-setenv key val)))
    ;;(bb-check-path msg: "runs:set-megatest-env-vars block 1")
    ;;(BB> "*env-vars-by-run-id*/runid("run-id" vals="(hash-table->alist vals))

    (if (not (get-environment-variable "MT_TARGET"))(setenv "MT_TARGET" target))
    ;; we had a case where there was an exception generated by the hash-table-ref
    ;; due to *configdat* being #f Adding a handle and exit
    (let fatal-loop ((count 0)) 
      (handle-exceptions
	  exn
	  (let ((call-chain (get-call-chain))
		(msg        ((condition-property-accessor 'exn 'message) exn)))
	    (if (< count 5)
		(begin ;; this call is colliding, do some crude stuff to fix it.
		  (debug:print 0 *default-log-port* "ERROR: *configdat* was inaccessible! This should never happen. Retry #" count)
		  (launch:setup force-reread: #t)
		  (fatal-loop (+ count 1))) 
		(begin
		  (debug:print 0 *default-log-port* "FATAL: *configdat* was inaccessible! This should never happen. Retried " count " times. Message: " msg)
		  (debug:print 0 *default-log-port* "Call chain:")
		  (with-output-to-port *default-log-port*

                    (lambda ()
                      (print "*configdat* is >>"*configdat*"<<")
                      (pp *configdat*)
                      (pp call-chain)))
                  
		  (exit 1))))
          ;;(bb-check-path msg: "runs:set-megatest-env-vars block 1.5")
          (when (or (not *configdat*) (not (hash-table? *configdat*)))
              (debug:print 0 *default-log-port* "WARNING: *configdat* was inaccessible! This should never happen.  Brute force reread.")
              ;;(BB> "ERROR: *configdat* was inaccessible! This should never happen.  Brute force reread.")
              (thread-sleep! 2) ;; assuming nfs lag.
              (launch:setup force-reread: #t))
          (alist->env-vars (hash-table-ref/default *configdat* "env-override" '())))) ;;;; environment is tainted HERE in this let block.
    ;;(bb-check-path msg: "runs:set-megatest-env-vars block 2")
    ;; Lets use this as an opportunity to put MT_RUNNAME in the environment
    (let ((runname  (if inrunname inrunname (rmt:get-run-name-from-id run-id))))
      (if runname
	  (setenv "MT_RUNNAME" runname)
	  (debug:print-error 0 *default-log-port* "no value for runname for id " run-id)))
    (setenv "MT_RUN_AREA_HOME" *toppath*)
    ;; if a testname and itempath are available set the remaining appropriate variables
    (if testname (setenv "MT_TEST_NAME" testname))
    (if itempath (setenv "MT_ITEMPATH"  itempath))
    ;;(bb-check-path msg: "runs:set-megatest-env-vars block 3")
    (if (and testname link-tree)
	(setenv "MT_TEST_RUN_DIR" (conc (getenv "MT_LINKTREE")  "/"
					(getenv "MT_TARGET")    "/"
					(getenv "MT_RUNNAME")   "/"
					(getenv "MT_TEST_NAME")
					(if (and itempath
						 (not (equal? itempath "")))
					    (conc "/" itempath)
					    ""))))))

(define (set-item-env-vars itemdat)
  (for-each (lambda (item)
	      (debug:print 2 *default-log-port* "setenv " (car item) " " (cadr item))
	      (setenv (car item) (cadr item)))
	    itemdat))

;; Every time can-run-more-tests is called increment the delay
;;
;; NOTE: We run this server-side!! Do not use this global except in the runs:can-run-more-tests routine
;;
(define *last-num-running-tests* 0)
;; (define *runs:can-run-more-tests-count* 0)
(define (runs:shrink-can-run-more-tests-count runsdat)
  (runs:dat-can-run-more-tests-count-set! runsdat 0))

(define (runs:inc-can-run-more-tests-count runsdat)
  (runs:dat-can-run-more-tests-count-set!
   runsdat
   (+ (runs:dat-can-run-more-tests-count runsdat) 1)))

;;  (set! *runs:can-run-more-tests-count* 0)) ;; (/ *runs:can-run-more-tests-count* 2)))

;; Temporary globals. Move these into the logic or into common
;;
(define *seen-cant-run-tests* (make-hash-table)) ;; use to track tests that we suspect cannot be run
(define (runs:inc-cant-run-tests testname)
  (hash-table-set! *seen-cant-run-tests* testname
		   (+ (hash-table-ref/default *seen-cant-run-tests* testname 0) 1)))

(define (runs:can-keep-running? testname n)
  (< (hash-table-ref/default *seen-cant-run-tests* testname 0) n))

(define *runs:denoise* (make-hash-table)) ;; key => last-time-ran

;; mechanism to limit printing info to the screen that is repetitive.
;;
;; Example: 
;; (if (runs:lownoise "waiting on tasks" 60)
;;     (debug:print-info 2 *default-log-port* "waiting for tasks to complete, sleeping briefly ..."))
;;
(define (runs:lownoise key waitval)
  (let ((lasttime (hash-table-ref/default *runs:denoise* key 0))
	(currtime (current-seconds)))
    (if (> (- currtime lasttime) waitval)
	(begin
	  (hash-table-set! *runs:denoise* key currtime)
	  #t)
	#f)))

(define (runs:can-run-more-tests runsdat run-id jobgroup max-concurrent-jobs)

  ;; Take advantage of a good place to exit if running the one-pass methodology
  (if (and (> (runs:dat-can-run-more-tests-count runsdat) 20)
	   (args:get-arg "-one-pass"))
      (exit 0))

  (thread-sleep! (cond ;; BB: check with Matt.  Should this sleep move to cond clauses below where we determine we have too many jobs running rather than each time the and condition above is true (which seems like always)?
        	  ((> (runs:dat-can-run-more-tests-count runsdat) 20)
		   (if (runs:lownoise "waiting on tasks" 60)(debug:print-info 2 *default-log-port* "waiting for tasks to complete, sleeping briefly ..."))
                   (configf:lookup-number *configdat* "setup" "inter-test-delay" default: 0.1) ;; was 2
		   );; obviously haven't had any work to do for a while
        	  (else 0)))
  
  (let* ((num-running             (rmt:get-count-tests-running run-id))
	 (num-running-in-jobgroup (rmt:get-count-tests-running-in-jobgroup run-id jobgroup))
	 (job-group-limit         (let ((jobg-count (config-lookup *configdat* "jobgroups" jobgroup)))
				    (if (string? jobg-count)
					(string->number jobg-count)
					jobg-count))))
    (if (> (+ num-running num-running-in-jobgroup) 0)
	(runs:inc-can-run-more-tests-count runsdat)) ;; (set! *runs:can-run-more-tests-count* (+ *runs:can-run-more-tests-count* 1)))
    (if (not (eq? *last-num-running-tests* num-running))
	(begin
	  (debug:print 2 *default-log-port* "max-concurrent-jobs: " max-concurrent-jobs ", num-running: " num-running)
	  (set! *last-num-running-tests* num-running)))
    (if (not (eq? 0 *globalexitstatus*))
	(list #f num-running num-running-in-jobgroup max-concurrent-jobs job-group-limit)
	(let* ((can-not-run-more (cond
				 ;; if max-concurrent-jobs is set and the number running is greater 
				 ;; than it then cannot run more jobs
				 ((and max-concurrent-jobs (>= num-running max-concurrent-jobs))
				  (if (runs:lownoise "mcj msg" 60)
				      (debug:print 0 *default-log-port* "WARNING: Max running jobs exceeded, current number running: " num-running 
						   ", max_concurrent_jobs: " max-concurrent-jobs))
				  #t)
				 ;; if job-group-limit is set and number of jobs in the group is greater
				 ;; than the limit then cannot run more jobs of this kind
				 ((and job-group-limit
				       (>= num-running-in-jobgroup job-group-limit))
				  (if (runs:lownoise (conc "maxjobgroup " jobgroup) 60)
				      (debug:print 1 *default-log-port* "WARNING: number of jobs " num-running-in-jobgroup 
						   " in jobgroup \"" jobgroup "\" exceeds limit of " job-group-limit))
				  #t)
				 (else #f))))
	  (list (not can-not-run-more) num-running num-running-in-jobgroup max-concurrent-jobs job-group-limit)))))

(define (runs:run-pre-hook run-id)
    (let* ((run-pre-hook   (configf:lookup *configdat* "runs" "pre-hook"))
           (existing-tests (if run-pre-hook
                               (rmt:get-tests-for-run run-id "%" '() '() ;; run-id testpatt states statuses
                                                      #f #f ;; offset limit
                                                      #f ;; not-in
                                                      #f ;; sort-by
                                                      #f ;; sort-order
                                                      #f ;; get full data (not 'shortlist)
                                                      0 ;; (runs:gendat-inc-results-last-update *runs:general-data*) ;; last update time
                                                      'dashboard)
                               '()))
           (log-dir         (conc *toppath* "/logs"))
           (log-file        (conc "pre-hook-" (string-translate (getenv "MT_TARGET") "/" "-") "-" (getenv "MT_RUNNAME") ".log"))
           (full-log-fname  (conc log-dir "/" log-file)))
      (if run-pre-hook
          (if (null? existing-tests)
              (let* ((use-log-dir (if (not (directory-exists? log-dir))
                                      (handle-exceptions
                                       exn
                                       (begin
                                         (debug:print 0 *default-log-port* "WARNING: Failed to create " log-dir)
                                         #f)
                                       (create-directory log-dir #t)
                                       #t)
                                      #t))
                     (start-time   (current-seconds))
                     (actual-logf  (if use-log-dir full-log-fname log-file)))
                (handle-exceptions
                 exn
                 (begin
                   (print-call-chain *default-log-port*)
                   (debug:print 0 *default-log-port* "Message: " ((condition-property-accessor 'exn 'message) exn))
                   (debug:print 0 *default-log-port* "ERROR: failed to run pre-hook " run-pre-hook ", check the log " log-file))
                 (debug:print-info 0 *default-log-port* "running run-pre-hook: \"" run-pre-hook "\", log is " actual-logf)
                 (system (conc run-pre-hook " >> " actual-logf " 2>&1"))
                 (debug:print-info 0 *default-log-port* "pre-hook \"" run-pre-hook "\" took " (- (current-seconds) start-time) " seconds to run.")))
              (debug:print 0 *default-log-port* "Skipping pre-hook call \"" run-pre-hook "\" as there are existing tests for this run.")))))
    
(define (runs:run-post-hook run-id)
    (let* ((run-post-hook   (configf:lookup *configdat* "runs" "post-hook"))
           (existing-tests (if run-post-hook
                               (rmt:get-tests-for-run run-id "%" '() '() ;; run-id testpatt states statuses
                                                      #f #f ;; offset limit
                                                      #f ;; not-in
                                                      #f ;; sort-by
                                                      #f ;; sort-order
                                                      #f ;; get full data (not 'shortlist)
                                                      0 ;; (runs:gendat-inc-results-last-update *runs:general-data*) ;; last update time
                                                      'dashboard)
                               '()))
           (log-dir         (conc *toppath* "/logs"))
           (log-file        (conc "post-hook-" (string-translate (getenv "MT_TARGET") "/" "-") "-" (getenv "MT_RUNNAME") ".log"))
           (full-log-fname  (conc log-dir "/" log-file)))
      (if run-post-hook
          ;; (if (null? existing-tests)
          ;;    (debug:print 0 *default-log-port* "Skipping post-hook call \"" run-post-hook "\" as there are existing tests for this run.")))))
	  (let* ((use-log-dir (if (not (directory-exists? log-dir))
				  (handle-exceptions
				      exn
				      (begin
					(debug:print 0 *default-log-port* "WARNING: Failed to create " log-dir)
					#f)
				    (create-directory log-dir #t)
				    #t)
				  #t))
		 (start-time   (current-seconds))
		 (actual-logf  (if use-log-dir full-log-fname log-file)))
	    (handle-exceptions
		exn
		(begin
		  (print-call-chain *default-log-port*)
		  (debug:print 0 *default-log-port* "Message: " ((condition-property-accessor 'exn 'message) exn))
		  (debug:print 0 *default-log-port* "ERROR: failed to run post-hook " run-post-hook ", check the log " log-file))
	      (debug:print-info 0 *default-log-port* "running run-post-hook: \"" run-post-hook "\", log is " actual-logf)
	      (system (conc run-post-hook " >> " actual-logf " 2>&1"))
	      (debug:print-info 0 *default-log-port* "post-hook \"" run-post-hook "\" took " (- (current-seconds) start-time) " seconds to run."))))))

;; return #t when all items in waitors-upon list are represented in test-patt, #f otherwise.
(define (runs:testpatts-mention-waitors-upon? test-patt waitors-upon)
  (null? (tests:filter-test-names-not-matched waitors-upon test-patt)))

;;======================================================================
;; runs:run-tests is called from megatest.scm and itself
;;======================================================================
;;
;;  test-names: Comma separated patterns same as test-patts but used in selection 
;;              of tests to run. The item portions are not respected.
;;              FIXME: error out if /patt specified
;;            
(define (runs:run-tests target runname test-patts user flags #!key (run-count 1)) ;; test-names
  (let* ((keys               (keys:config-get-fields *configdat*))
	 (keyvals            (keys:target->keyval keys target))
	 (run-id             (rmt:register-run keyvals runname "new" "n/a" user (args:get-arg "-contour")))  ;;  test-name)))
	 ;; (deferred          '()) ;; delay running these since they have a waiton clause
	 (runconfigf         (conc  *toppath* "/runconfigs.config"))
         (dbfile             (conc  *toppath* "/megatest.db"))
         (readonly-mode      (not (file-write-access? dbfile)))
	 (test-records       (make-hash-table))
	 ;; need to process runconfigs before generating these lists
	 (all-tests-registry #f)  ;; (tests:get-all)) ;; (tests:get-valid-tests (make-hash-table) test-search-path)) ;; all valid tests to check waiton names
	 (all-test-names     #f)  ;; (hash-table-keys all-tests-registry))
	 (test-names         #f)  ;; Generated by a call to (tests:filter-test-names all-test-names test-patts))
	 (required-tests     #f)  ;; Put fully qualified test/testpath names in this list to be done
         (waitors-upon       (make-hash-table)) ;; given a test, return list of tests waiting upon this test.
	 (task-key           (conc (hash-table->alist flags) " " (get-host-name) " " (current-process-id)))
	 ;; (tdbdat             (tasks:open-db))
	 (config-reruns      (let ((x (configf:lookup *configdat* "setup" "reruns")))
			       (if x (string->number x) #f)))
	 (allowed-tests      #f))

    ;; check if readonly
    (when readonly-mode
      (debug:print-error 0 *default-log-port* "megatest.db is readonly.  Cannot proceed.")
      (exit 1))

    ;; per user request. If less than 100Meg space on dbdir partition, bail out with error
    ;; this will reduce issues in database corruption
    (common:check-db-dir-and-exit-if-insufficient)

    ;; override the number of reruns from the configs
    ;; this needs to be done at the place where is first runs:run-tests called 
    ;(if (and config-reruns
	  ;   (> run-count config-reruns))
 	  ;(set! run-count config-reruns))

    ;; (if (tasks:need-server run-id)(tasks:start-and-wait-for-server tdbdat run-id 10))

    (let ((sighand (lambda (signum)
		     ;; (signal-mask! signum) ;; to mask or not? seems to cause issues in exiting
		     (set! *time-to-exit* #t)
		     (print "Received signal " signum ", cleaning up before exit. Please wait...")
		     (let ((th1 (make-thread (lambda ()
					       ;; (let ((tdbdat (tasks:open-db)))
						 (rmt:tasks-set-state-given-param-key task-key "killed") ;; )
					       (print "Killed by signal " signum ". Exiting")
					       (thread-sleep! 3)
					       (exit))))
			   (th2 (make-thread (lambda ()
					       (thread-sleep! 5)
					       (debug:print 0 *default-log-port* "Done")
					       (exit 4)))))
		       (thread-start! th2)
		       (thread-start! th1)
		       (thread-join! th2)))))
      (set-signal-handler! signal/int sighand)
      (set-signal-handler! signal/term sighand))

    ;; force the starting of a server -- removed BB 17ww28 - no longer needed.
    ;;(debug:print 0 *default-log-port* "waiting on server...")
    ;;(server:start-and-wait *toppath*)
    
    (runs:set-megatest-env-vars run-id inkeys: keys inrunname: runname) ;; these may be needed by the launching process
    (set! runconf (if (common:file-exists? runconfigf)
		      (setup-env-defaults runconfigf run-id *already-seen-runconfig-info* keyvals target)
		      (begin
			(debug:print 0 *default-log-port* "WARNING: You do not have a run config file: " runconfigf)
			#f)))

    (if (not test-patts) ;; first time in - adjust testpatt
	(set! test-patts (common:args-get-testpatt runconf)))
    ;; if test-patts is #f at this point there is something wrong and we need to bail out
    (if (not test-patts)
	(begin
	  (debug:print 0 *default-log-port* "WARNING: there is no test pattern for this run. Exiting now.")
	  (exit 0)))
    
    (if (args:get-arg "-tagexpr")
	(begin
	  (set! allowed-tests (string-join (runs:get-tests-matching-tags (args:get-arg "-tagexpr")) ","))
	  	  (debug:print-info 0 *default-log-port* "filtering initial test list with tagexpr: " (args:get-arg "-tagexpr") " => " allowed-tests)
		  ));; tests will be ANDed with this list

    ;; register this run in monitor.db
    (rmt:tasks-add "run-tests" user target runname test-patts task-key) ;; params)
    (rmt:tasks-set-state-given-param-key task-key "running")

    (common:telemetry-log "run-tests"
                          payload:
                          `( (target . ,target)
                             (run-name . ,runname)
                             (test-patts . ,test-patts) ) )

    
    ;; Now generate all the tests lists
    (set! all-tests-registry (tests:get-all))   ;; hash of testname => path-to-test
    (set! all-test-names     (hash-table-keys all-tests-registry))
    ;; filter first for allowed-tests (from -tagexpr) then for test-patts.
    (set! test-names         (tests:filter-test-names
			      (if allowed-tests
				  (tests:filter-test-names all-test-names allowed-tests)
				  all-test-names)
			      test-patts))

    ;; I think seeding required-tests with all test-names makes sense but lack analysis to back that up.

    ;; NEW STRATEGY HERE:
    ;; 1. fill required tests with test-patts
    ;; 2. scan testconfigs and if waitons, itemwait, itempatt calc prior test test-patt
    ;; 3. repeat until all deps propagated
    
    ;; any tests with direct mention in test-patts can be added to required
    ;;(set! required-tests     (lset-intersection equal? (string-split test-patts ",") all-test-names))
    (set! required-tests     (tests:filter-test-names all-test-names test-patts))
    ;;
    ;; (set! required-tests     (lset-intersection equal? test-names all-test-names))
    
    ;; look up all tests matching the comma separated list of globs in
    ;; test-patts (using % as wildcard)

    ;; (set! test-names (delete-duplicates (tests:get-valid-tests *toppath* test-patts)))
    (debug:print-info 0 *default-log-port* "tests search path: " (string-intersperse (tests:get-tests-search-path *configdat*) " "))
    (debug:print-info 0 *default-log-port* "all tests:         " (string-intersperse (sort all-test-names string<) " "))
    (debug:print-info 0 *default-log-port* "test names:        " (string-intersperse (sort test-names string<) " "))
    (debug:print-info 0 *default-log-port* "required tests:    " (string-intersperse (sort required-tests string<) " "))

    ;; on the first pass or call to run-tests set FAILS to NOT_STARTED if
    ;; -keepgoing is specified
    (if (eq? *passnum* 0)
	(begin
	  ;; Is this still necessary? I think not. Unreachable tests are marked as such and 
	  ;; should not cause problems here.
	  ;;
	  ;; have to delete test records where NOT_STARTED since they can cause -keepgoing to 
	  ;; get stuck due to becoming inaccessible from a failed test. I.e. if test B depends 
	  ;; on test A but test B reached the point on being registered as NOT_STARTED and test
	  ;; A failed for some reason then on re-run using -keepgoing the run can never complete.
	  ;;
	  ;; (rmt:general-call 'delete-tests-in-state run-id "NOT_STARTED")
	  
	  ;; Now convert anything in allow-auto-rerun to NOT_STARTED
	  ;;
	  (for-each
	   (lambda (state-status)
	     (let* ((ss-lst (string-split-fields "/" state-status #:infix))
		    (state  (if (> (length ss-lst) 0)(car  ss-lst) #f))
		    (status (if (> (length ss-lst) 1)(cadr ss-lst) #f)))
	       (rmt:set-tests-state-status run-id test-names state status "NOT_STARTED" status)))
	   ;; list of state/status pairs separated by spaces
	   (string-split (or (configf:lookup *configdat* "setup" "allow-auto-rerun") "")))))

    ;; Ensure all tests are registered in the test_meta table
    (runs:update-all-test_meta #f)

    ;; run the run prehook if there are no tests yet run for this run:
    ;;
    (runs:run-pre-hook run-id)
    ;; mark all test launced flag as false in the meta table 
    (rmt:set-var (conc "lunch-complete-" run-id) "no")
    (rmt:set-run-state-status run-id "new" "n/a")
    ;; now add non-directly referenced dependencies (i.e. waiton)
    ;;======================================================================
    ;; refactoring this block into tests:get-full-data
    ;;
    ;; What happended, this code is now duplicated in tests!?
    ;;
    ;;======================================================================
    
    (if (not (null? test-names)) ;; BEGIN test-names loop
	(let loop ((hed (car test-names))   ;; NOTE: This is the main loop that iterates over the test-names
		   (tal (cdr test-names)))         ;; 'return-procs tells the config reader to prep running system but return a proc
          (debug:print-info 4 *default-log-port* "\n\ntestpatt elaboration loop => hed="hed " tal="tal" test-patts="test-patts" test-names="test-names)
	  (change-directory *toppath*) ;; PLEASE OPTIMIZE ME!!! I think this should be a no-op but there are several places where change-directories could be happening.
	  (setenv "MT_TEST_NAME" hed) ;; 
	  (let*-values (((waitons waitors config) (tests:get-waitons hed all-tests-registry))

			;; NOTE: Have the config - can extract [waitons] section
			
                        ((hed-mode)
                         (let ((m (config-lookup config "requirements" "mode")))
                           (if m (map string->symbol (string-split m)) '(normal))))
                        ((hed-itemized-waiton) ;; are items in hed waiting on items of waiton?
                         (not (null? (lset-intersection eq? hed-mode '(itemmatch itemwait)))))
                        )
	    (debug:print-info 8 *default-log-port* "waitons: " waitons)
	    ;; check for hed in waitons => this would be circular, remove it and issue an
	    ;; error
	    (if (or (member hed waitons)
		    (member hed waitors))
		(begin
		  (debug:print-error 0 *default-log-port* "test " hed " has listed itself as a waiton or waitor, please correct this!")
		  (set! waitons (filter (lambda (x)(not (equal? x hed))) waitons))
		  (set! waitors (filter (lambda (x)(not (equal? x hed))) waitors))))
	    
	    ;; (items   (items:get-items-from-config config)))
	    (if (not (hash-table-ref/default test-records hed #f)) ;; waiton-tconfig below will be #f until that test is visted here at least once
		(hash-table-set! test-records ;; BB: we are doing a manual make-tests:testqueue
				 hed (vector hed     ;; 0 ;; testname
					     config  ;; 1 
					     waitons ;; 2 
					     (config-lookup config "requirements" "priority")     ;; priority 3
					     (tests:get-items config) ;; 4 ;; expand the [items] and or [itemstable] into explict items
					     #f      ;; itemsdat 5
					     #f      ;; spare - used for item-path
					     waitors ;; 
					     )))
            ;; update waitors-upon here
            (for-each
             (lambda (waiton)
               (let* ((current-waitors-upon (hash-table-ref/default waitors-upon waiton '())))
                 (debug:print-info 8 *default-log-port* " current-waiters-upon["waiton"] is "current-waitors-upon )
                 (when (not (member hed current-waitors-upon))
                   (debug:print-info 8 *default-log-port* " current-waiters-upon["waiton"] << "hed  )
                   (hash-table-set! waitors-upon waiton (cons hed current-waitors-upon)))))
             (if (list? waitons) waitons '()))
            (debug:print-info 8 *default-log-port* " process waitons&waitors of "hed": "(delete-duplicates (append waitons waitors)))
            (for-each 
	     (lambda (waiton)
	       (if (and waiton (not (member waiton test-names)))
		   (let* ((waitors-in-testpatt (runs:testpatts-mention-waitors-upon? test-patts (hash-table-ref/default waitors-upon waiton '())))
                          (waiton-record   (hash-table-ref/default test-records waiton #f))
			  (waiton-tconfig  (if waiton-record (vector-ref waiton-record 1) #f))
			  (waiton-itemized (and waiton-tconfig
						(or (hash-table-ref/default waiton-tconfig "items" #f)
						    (hash-table-ref/default waiton-tconfig "itemstable" #f))))
			  (itemmaps        (tests:get-itemmaps config))  ;; (configf:lookup config "requirements" "itemmap"))
			  (new-test-patts  (tests:extend-test-patts test-patts hed waiton itemmaps hed-itemized-waiton))) 
		     (debug:print-info 0 *default-log-port* "Test " waiton " has " (if waiton-record "a" "no") " waiton-record and" (if waiton-itemized " " " no ") "items")
		     ;; need to account for test-patt here, if I am test "a", selected with a test-patt of "hed/b%"
		     ;; and we are waiting on "waiton" we need to add "waiton/,waiton/b%" to test-patt
		     ;; is this satisfied by merely appending "/" to the waiton name added to the list?
		     ;;
		     ;; This approach causes all of the items in an upstream test to be run 
		     ;; if we have this waiton already processed once we can analzye it for extending
		     ;; tests to be run, since we can't properly process waitons unless they have been
		     ;; initially added we add them again to be processed on second round AND add the hed
		     ;; back in to also be processed on second round
		     (if waiton-tconfig ;; BB: waiter should be in test-patts as well as the waiton have a tconfig.
                         (if waiton-itemized
                             (if waitors-in-testpatt
                                 (begin
                                   (debug:print-info 0 *default-log-port* "New test patts: " new-test-patts ", prev test patts: " test-patts)
                                   (set! test-names (cons waiton test-names)) ;; need to process this one, only add once the waiton tconfig read
                                   (set! required-tests (cons (conc waiton "/") required-tests))
                                   (set! test-patts new-test-patts))
                                 (begin
                                   (debug:print-info 0 *default-log-port* "Waitor(s) not yet on testpatt for " waiton ", setting up to re-process it")
                                   (set! tal (append (cons waiton tal)(list hed)))))
                             (begin
                               (debug:print-info 0 *default-log-port* "Adding non-itemized test " waiton " to required-tests")
                               (set! required-tests (cons waiton required-tests))
                               (set! test-patts new-test-patts)))
			 (begin
			   (debug:print-info 0 *default-log-port* "No testconfig info yet for " waiton ", setting up to re-process it")
			   (set! tal (append (cons waiton tal)(list hed))))) ;; (cons (conc waiton "/") required-tests))
		     ;; NOPE: didn't work. required needs to be plain test names. Try tacking on to test-patts
		     ;;  - doesn't work
		     ;; (set! test-patts (conc test-patts "," waiton "/"))
		     ;; (set! test-names (cons waiton test-names))))) ;; was an append, now a cons
		     )))
	     (delete-duplicates (append waitons waitors)))
	    (let ((remtests (delete-duplicates (append waitons tal))))
              (debug:print-info 8 *default-log-port* " remtests are "remtests)
	      (if (not (null? remtests))
		  (begin
		    ;; (debug:print-info 0 *default-log-port* "Preprocessing continues for " (string-intersperse remtests ", "))
		    (loop (car remtests)(cdr remtests)))))))) ;; END test-names loop

    (if (not (null? required-tests))
	(debug:print-info 1 *default-log-port* "Adding \"" (string-intersperse required-tests " ") "\" to the run queue"))
    ;; NOTE: these are all parent tests, items are not expanded yet.
    (debug:print-info 4 *default-log-port* "test-records=" (hash-table->alist test-records))
    (let ((reglen (configf:lookup *configdat* "setup" "runqueue")))
      (if (> (length (hash-table-keys test-records)) 0)
	  (let* ((keep-going        #t)
		 (run-queue-retries 5)
		 #;(th1        (make-thread (lambda ()
					    (handle-exceptions
						exn
						(begin
						  (print-call-chain)
						  (print " message: " ((condition-property-accessor 'exn 'message) exn)))
					      (runs:run-tests-queue run-id runname test-records keyvals flags test-patts required-tests
								    (any->number reglen) all-tests-registry)))
					  "runs:run-tests-queue"))
		 (th2        (make-thread (lambda ()			 ;; BBQ: why are we visiting ALL runs here?	    
					    ;; (rmt:find-and-mark-incomplete-all-runs))))) CAN'T INTERRUPT IT ...
					    (let ((run-ids (rmt:get-all-run-ids)))
					      (for-each (lambda (run-id)
							  (if keep-going
							      (handle-exceptions
							       exn
							       (debug:print 0 *default-log-port* "error in calling find-and-mark-incomplete for run-id " run-id)
							       (rmt:find-and-mark-incomplete run-id #f)))) ;; ovr-deadtime))) ;; could be root of https://hsdes.intel.com/appstore/article/#/220546828/main -- Title: Megatest jobs show DEAD even though they are still running (1.64/27)
							run-ids)))
					  "runs: mark-incompletes")))
	    ;; (thread-start! th1)
	    (thread-start! th2)
	    ;; (thread-join! th1)
	    ;; just do the main stuff in the main thread
	    (runs:run-tests-queue run-id runname test-records keyvals flags test-patts required-tests
								    (any->number reglen) all-tests-registry)
	    (set! keep-going #f)
	    (thread-join! th2)
	    ;; if run-count > 0 call, set -preclean and -rerun STUCK/DEAD
	    (if (> run-count 0) ;; handle reruns
		(begin
		  (if (not (hash-table-ref/default flags "-preclean" #f))
		      (hash-table-set! flags "-preclean" #t))
		  (if (not (hash-table-ref/default flags "-rerun" #f))
		      (hash-table-set! flags "-rerun" "STUCK/DEAD,n/a,ZERO_ITEMS"))
		  ;; recursive call to self
      (runs:run-tests target runname test-patts user flags run-count: (- run-count 1)))))
	  (debug:print-info 0 *default-log-port* "No tests to run")))
    (debug:print-info 4 *default-log-port* "All done by here")
    ;; TODO: try putting post hook call here
    ;(if (eq? run-count 0)
    ; (begin  
    ;  (debug:print-info 0 *default-log-port* "Calling Post Hook")  
    ;  (debug:print-info 2 *default-log-port* " run-count " run-count)
    ;  (runs:run-post-hook run-id))
    ;  (debug:print-info 2 *default-log-port* "Not calling post hook runcount = " run-count ))   
    (rmt:tasks-set-state-given-param-key task-key "done")
    ;; (sqlite3:finalize! tasks-db)
    ))


;; loop logic. These are used in runs:run-tests-queue to make it a bit more readable.
;;
;; If reg not full and have items in tal then loop with (car tal)(cdr tal) reg reruns
;; If reg is full (i.e. length >= n
;;   loop with (car reg) tal (cdr reg) reruns
;; If tal is empty
;;   but have items in reg; loop with (car reg)(cdr reg) '() reruns
;;   If reg is empty => all done

(define (runs:queue-next-hed tal reg n regfull)
  (if regfull
      (if (null? reg) #f (car reg))
      (if (null? tal) ;; tal is used up, pop from reg
	  (if (null? reg) #f (car reg))
	  (car tal))))

(define (runs:queue-next-tal tal reg n regfull)
  (if regfull
      tal
      (if (null? tal) ;; must transfer from reg
	  (if (null? reg) '() (cdr reg))
	  (cdr tal))))

(define (runs:queue-next-reg tal reg n regfull)
  (if regfull
      (if (null? reg) '() (cdr reg)) ;; EXPLORE: reorder (cdr reg) such that looping is more efficient
      (if (null? tal) ;; if tal is null and reg not full then '() as reg contents moved to tal
	  '()
	  reg)))

;; this is the list of parameters to the named loop "loop" near the top of runs:run-tests-queue, look around line 1216
;;
(define (runs:loop-values tal reg reglen regfull reruns)
  (list (runs:queue-next-hed tal reg reglen regfull)      ;; hed
        (runs:queue-next-tal tal reg reglen regfull)      ;; tal
        (runs:queue-next-reg tal reg reglen regfull)      ;; reg
        reruns))                                          ;; reruns

;; objective - iterate thru tests
;;    => want to prioritize tests we haven't seen before
;;    => sometimes need to squeeze things in (added to reg)
;;    => review of a previously seen test is higher priority of never visited test
;; reg - list of previously visited tests
;; tal - list of never visited tests
;;   prefer next hed to be from reg than tal.

(define runs:nothing-left-in-queue-count 0)

;;======================================================================
;; runs:expand-items is called by runs:run-tests-queue
;;======================================================================
;;
;; return value of runs:expand-items is passed back to runs-tests-queue and is fed to named loop with this signature:
;;    (let loop ((hed         (car sorted-test-names))
;;	         (tal         (cdr sorted-test-names))
;;	         (reg         '()) ;; registered, put these at the head of tal 
;;	         (reruns      '()))
(define (runs:expand-items hed tal reg reruns regfull newtal jobgroup max-concurrent-jobs run-id waitons item-path testmode test-record can-run-more items runname tconfig reglen test-registry test-records itemmaps)
  (let* ((loop-list       (list hed tal reg reruns))
	 (prereqs-not-met (let ((res (rmt:get-prereqs-not-met run-id waitons hed item-path mode: testmode itemmaps: itemmaps)))
			    (if (list? res)
				res
				(begin
				  (debug:print 0 *default-log-port*
					       "ERROR: rmt:get-prereqs-not-met returned non-list!\n"
					       "  res=" res " run-id=" run-id " waitons=" waitons " hed=" hed " item-path=" item-path " testmode=" testmode " itemmaps=" itemmaps)
				  '()))))
         (have-itemized (not (null? (lset-intersection eq? testmode '(itemmatch itemwait)))))
	 ;; (prereqs-not-met (mt:lazy-get-prereqs-not-met run-id waitons item-path mode: testmode itemmap: itemmap))
	 (fails           (runs:calc-fails prereqs-not-met))
	 (prereq-fails    (runs:calc-prereq-fail prereqs-not-met))
	 (non-completed   (runs:calc-not-completed prereqs-not-met))
	 (runnables       (runs:calc-runnable prereqs-not-met))
         (unexpanded-prereqs
          (filter (lambda (testname)
                    (let* ((test-rec (hash-table-ref test-records testname))
                           (items       (tests:testqueue-get-items  test-rec)))
                      ;;(BB> "HEY " testname "=>"items)
                      (or (procedure? items)(eq? items 'have-procedure))))
                  waitons))


         )
    (debug:print-info 4 *default-log-port* "START OF INNER COND #2 "
		      "\n can-run-more:    " can-run-more
		      "\n testname:        " hed
		      "\n prereqs-not-met: " (runs:pretty-string prereqs-not-met)
		      "\n non-completed:   " (runs:pretty-string non-completed) 
		      "\n prereq-fails:    " (runs:pretty-string prereq-fails)
		      "\n fails:           " (runs:pretty-string fails)
		      "\n testmode:        " testmode
		      "\n (member 'toplevel testmode): " (member 'toplevel testmode)
		      "\n (null? non-completed):    " (null? non-completed)
		      "\n reruns:          " reruns
		      "\n items:           " items
		      "\n can-run-more:    " can-run-more)

   (cond
     ;; all prereqs met, fire off the test
     ;; or, if it is a 'toplevel test and all prereqs not met are COMPLETED then launch

    ((and (not (member 'toplevel testmode))
	   (member (hash-table-ref/default test-registry (db:test-make-full-name hed item-path) 'n/a)
		   '(DONOTRUN removed CANNOTRUN))) ;; *common:cant-run-states-sym*) ;; '(COMPLETED KILLED WAIVED UNKNOWN INCOMPLETE)) ;; try to catch repeat processing of COMPLETED tests here
      (debug:print-info 4 *default-log-port* "cond branch - "  "ei-1")
      (debug:print-info 1 *default-log-port* "Test " hed " set to \"" (hash-table-ref test-registry (db:test-make-full-name hed item-path)) "\". Removing it from the queue")
      (if (or (not (null? tal))
	      (not (null? reg)))
          (runs:loop-values tal reg reglen regfull reruns)
	  (begin
	    (debug:print-info 0 *default-log-port* "Nothing left in the queue!")
	    ;; If get here twice then we know we've tried to expand all items
	    ;; since there must be a logic issue with the handling of loops in the 
	    ;; items expand phase we will brute force an exit here.
	    (if (> runs:nothing-left-in-queue-count 2)
		(begin
		  (debug:print 0 *default-log-port* "WARNING: this condition is triggered when there were no items to expand and nothing to run. Please check your run for completeness")
		  (exit 0))
		(set! runs:nothing-left-in-queue-count (+ runs:nothing-left-in-queue-count 1)))
	    #f)))

     ;; desired result of below cond branch:
     ;;   we want to expand items in our test of interest (hed) in the following cases:
     ;;    case 1 - mode is itemmatch or itemwait: 
     ;;       - all prereq tests have been expanded
     ;;       - at least one prereq's items have completed
     ;;    case 2 - mode is toplevel   
     ;;       - prereqs are completed.
     ;;       - or no prereqs can complete 
     ;;    case 3 - mode not specified 
     ;;       - prereqs are completed and passed (we could consider removing "and passed" -- it would change behavior from current)          
    ((or (null? prereqs-not-met)
	  (and (member 'toplevel testmode)
	       (null? non-completed)))
      (debug:print-info 4 *default-log-port* "cond branch - "  "ei-2")
      (debug:print-info 4 *default-log-port* "runs:expand-items: (or (null? prereqs-not-met) (and (member 'toplevel testmode)(null? non-completed)))")
      (let ((test-name (tests:testqueue-get-testname test-record)))
	(setenv "MT_TEST_NAME" test-name) ;; 
	(setenv "MT_RUNNAME"   runname)
	(runs:set-megatest-env-vars run-id inrunname: runname) ;; these may be needed by the launching process
	(let ((items-list (items:get-items-from-config tconfig)))
	  (if (list? items-list)
	      (begin
		(if (null? items-list)
		    (let ((test-id   (rmt:get-test-id run-id test-name ""))
			  (num-items (rmt:test-toplevel-num-items run-id test-name)))
		      (if (and test-id
			       (not (> num-items 0)))
			  (mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "ZERO_ITEMS" "Failed to run due to failed prerequisites"))))
		(tests:testqueue-set-items! test-record items-list)
		(list hed tal reg reruns))
	      (begin
		(debug:print-error 0 *default-log-port* "The proc from reading the items table did not yield a list - please report this")
		(exit 1))))))

    ((and (null? fails)
	   (null? prereq-fails)
	   (not (null? non-completed)))
      (debug:print-info 4 *default-log-port* "cond branch - "  "ei-3")
      (let* ((allinqueue (map (lambda (x)(if (string? x) x (db:test-get-testname x)))
        		      (append newtal reruns)))
	     ;; prereqstrs is a list of test names as strings that are prereqs for hed
             (prereqstrs (delete-duplicates (map (lambda (x)(if (string? x) x (db:test-get-testname x)))
						 prereqs-not-met)))
	     ;; a prereq that is not found in allinqueue will be put in the notinqueue list
	     ;; 
             ;; (notinqueue (filter (lambda (x)
             ;;    		   (not (member x allinqueue)))
             ;;    		 prereqstrs))
	     (give-up    #f))

	;; We can get here when a prereq has not been run due to *it* having a prereq that failed.
	;; We need to use this to dequeue this item as CANNOTRUN
	;; 
	(if (member 'toplevel testmode) ;; '(toplevel)) ;; NOTE: this probably should be (member 'toplevel testmode)
	    (for-each (lambda (prereq)
			(if (eq? (hash-table-ref/default test-registry prereq 'justfine) 'CANNOTRUN)
			    (set! give-up #t)))
		      prereqstrs))

	(if (and give-up
		 (not (and (null? tal)(null? reg))))
	    (let ((trimmed-tal (mt:discard-blocked-tests run-id hed tal test-records))
		  (trimmed-reg (mt:discard-blocked-tests run-id hed reg test-records)))
	      (debug:print 1 *default-log-port* "WARNING: test " hed " has discarded prerequisites, removing it from the queue")

	      (let ((test-id (rmt:get-test-id run-id hed "")))
		(if test-id (mt:test-set-state-status-by-id run-id test-id "COMPLETED" "PREQ_DISCARDED" "Failed to run due to discarded prerequisites")))
	      
	      (if (and (null? trimmed-tal)
		       (null? trimmed-reg))
		  #f
                  (runs:loop-values trimmed-tal trimmed-reg reglen regfull reruns)
                  ))
	      (list (car newtal)(append (cdr newtal) reg) '() reruns))))

    ((and (null? fails) ;; have not-started tests, but unable to run them.  everything looks completed with no prospect of unsticking something that is stuck.  we should mark hed as moribund and exit or continue if there are more tests to consider
	   (null? prereq-fails)
	   (null? non-completed))
     (debug:print-info 4 *default-log-port* "cond branch - "  "ei-4")
      (if  (runs:can-keep-running? hed 20)
	  (begin
	    (runs:inc-cant-run-tests hed)
	    (debug:print-info 0 *default-log-port* "no fails in prerequisites for " hed " but also none running, keeping " hed " for now. Try count: " (hash-table-ref/default *seen-cant-run-tests* hed 0)) ;; 
	    ;; getting here likely means the system is way overloaded, kill a full minute before continuing
	    (thread-sleep! 60) ;; TODO: gate by normalized server load > 1.0 (maxload config thing)
	    ;; num-retries code was here
	    ;; we use this opportunity to move contents of reg to tal
	    (list (car newtal)(append (cdr newtal) reg) '() reruns)) ;; an issue with prereqs not yet met?
	  (begin
	    (debug:print-info 1 *default-log-port* "no fails in prerequisites for " hed " but nothing seen running in a while, dropping test " hed " from the run queue")
	    (let ((test-id (rmt:get-test-id run-id hed "")))
	      (if test-id (mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "TIMED_OUT" "Nothing seen running in a while.")))
            (runs:loop-values tal reg reglen regfull reruns)
            )))

    ((and 
       (or (not (null? fails))
	   (not (null? prereq-fails)))
       (member 'normal testmode))
      (debug:print-info 4 *default-log-port* "cond branch - "  "ei-5")
      (debug:print-info 1 *default-log-port* "test "  hed " (mode=" testmode ") has failed prerequisite(s); "
			(string-intersperse (map (lambda (t)(conc (db:test-get-testname t) ":" (db:test-get-state t)"/"(db:test-get-status t))) fails) ", ")
			", removing it from to-do list")
      (let ((test-id (rmt:get-test-id run-id hed "")))
	(if test-id
	    (if (not (null? prereq-fails))
		(mt:test-set-state-status-by-id-unless-completed run-id test-id "COMPLETED" "PREQ_DISCARDED" "Failed to run due to prior failed prerequisites")
		(mt:test-set-state-status-by-id-unless-completed run-id test-id "COMPLETED" "PREQ_FAIL"      "Failed to run due to failed prerequisites"))))
      ;; (debug:print 4 *default-log-port*"BB> set PREQ_FAIL on "hed)
      ;; (mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "PREQ_FAIL"      "Failed to run due to failed prerequisites")))) ;; BB: this works, btu equivalent for itemwait mode does not work.
      (if (or (not (null? reg))(not (null? tal)))
	  (begin
	    (hash-table-set! test-registry hed 'CANNOTRUN)
            (runs:loop-values tal reg reglen regfull (cons hed reruns))
            )
	  #f)) ;; #f flags do not loop
      
     ((and (not (null? fails))(member 'toplevel testmode))
      (debug:print-info 4 *default-log-port* "cond branch - "  "ei-6")
      (if (or (not (null? reg))(not (null? tal)))
	   (list (car newtal)(append (cdr newtal) reg) '() reruns)
	  #f)) 
     ((null? runnables)
      (debug:print-info 4 *default-log-port* "cond branch - "  "ei-7")
      #f) ;; if we get here and non-completed is null then it is all over.
     (else
      (debug:print-info 4 *default-log-port* "cond branch - "  "ei-8")
      (debug:print 0 *default-log-port* "WARNING: FAILS or incomplete tests maybe preventing completion of this run. Watch for issues with test " hed ", continuing for now")
      (list (car newtal)(cdr newtal) reg reruns)))))

(define (runs:mixed-list-testname-and-testrec->list-of-strings inlst)
  (if (null? inlst)
      '()
      (map (lambda (t)
	     (cond
	      ((vector? t)
	       (let ((test-name (db:test-get-testname t))
		     (item-path (db:test-get-item-path t))
		     (test-state (db:test-get-state t))
		     (test-status (db:test-get-status t)))
		 (conc test-name (if (equal? item-path "") "" "/") item-path ":" test-state "/" test-status)))
	      ((string? t)
	       t)
	      (else 
	       (conc t))))
	   inlst)))


;;  hed tal reg reruns reglen regfull test-record runname test-name item-path jobgroup max-concurrent-jobs run-id waitons item-path testmode test-patts required-tests test-registry registry-mutex flags keyvals run-info newtal all-tests-registry itemmaps)
(define (runs:process-expanded-tests runsdat testdat)
  ;; unroll the contents of runsdat and testdat (due to ongoing refactoring).
  (debug:print 2 *default-log-port* "runs:process-expanded-tests; testdat:" )
  (debug:print 2 *default-log-port* (with-output-to-string
                                            (lambda () (pp (runs:testdat->alist testdat) ))))
  (let* ((hed                    (runs:testdat-hed testdat))
	 (tal                    (runs:testdat-tal testdat))
	 (reg                    (runs:testdat-reg testdat))
	 (reruns                 (runs:testdat-reruns testdat))
	 (test-name              (runs:testdat-test-name testdat))
	 (item-path              (runs:testdat-item-path testdat))
	 (jobgroup               (runs:testdat-jobgroup testdat))
	 (waitons                (runs:testdat-waitons testdat))
	 (item-path              (runs:testdat-item-path testdat))
	 (testmode               (runs:testdat-testmode testdat))
	 (newtal                 (runs:testdat-newtal testdat))
	 (itemmaps               (runs:testdat-itemmaps testdat))
	 (test-record            (runs:testdat-test-record testdat))
	 (prereqs-not-met        (runs:testdat-prereqs-not-met testdat))

	 (reglen                 (runs:dat-reglen runsdat))
	 (regfull                (runs:dat-regfull runsdat))
	 (runname                (runs:dat-runname runsdat))
	 (max-concurrent-jobs    (runs:dat-max-concurrent-jobs runsdat))
	 (run-id                 (runs:dat-run-id runsdat))
	 (test-patts             (runs:dat-test-patts runsdat))
	 (required-tests         (runs:dat-required-tests runsdat))
	 (test-registry          (runs:dat-test-registry runsdat))
	 (registry-mutex         (runs:dat-registry-mutex runsdat))
	 (flags                  (runs:dat-flags runsdat))
	 (keyvals                (runs:dat-keyvals runsdat))
	 (run-info               (runs:dat-run-info runsdat))
	 (all-tests-registry     (runs:dat-all-tests-registry runsdat))
	 (run-limits-info        (runs:dat-can-run-more-tests runsdat))
	 ;; (runs:can-run-more-tests run-id jobgroup max-concurrent-jobs)) ;; look at the test jobgroup and tot jobs running
	 (have-resources         (car run-limits-info))
	 (num-running            (list-ref run-limits-info 1))
	 (num-running-in-jobgroup(list-ref run-limits-info 2)) 
	 (max-concurrent-jobs    (list-ref run-limits-info 3))
	 (job-group-limit        (list-ref run-limits-info 4))
	 ;; (prereqs-not-met        (rmt:get-prereqs-not-met run-id waitons hed item-path mode: testmode itemmaps: itemmaps))
	 ;; (prereqs-not-met         (mt:lazy-get-prereqs-not-met run-id waitons item-path mode: testmode itemmap: itemmap))
	 (fails                  (if (list? prereqs-not-met) ;; TODO: rename fails to failed-prereqs
				      (runs:calc-fails prereqs-not-met)
				      (begin
					(debug:print-error 0 *default-log-port* "prereqs-not-met is not a list! " prereqs-not-met)
					'())))
	 (non-completed           (filter (lambda (x)             ;; remove hed from not completed list, duh, of course it is not completed!
					    (not (equal? x hed)))
					  (runs:calc-not-completed prereqs-not-met)))
	 (loop-list               (list hed tal reg reruns))
	 ;; configure the load runner
	 (numcpus                 (common:get-num-cpus #f))
	 (maxload                 (string->number (or (configf:lookup *configdat* "jobtools" "maxload") "3.0")))         ;; use a non-number string to disable
         (maxhomehostload         (string->number (or (configf:lookup *configdat* "jobtools" "maxhomehostload") "2.0"))) ;; use a non-number string to disable
         (waitdelay               (string->number (or (configf:lookup *configdat* "jobtools" "waitdelay") "60"))))
    (debug:print-info 4 *default-log-port* "have-resources: " have-resources " prereqs-not-met: (" 
		      (string-intersperse 
		       (map (lambda (t)
			      (if (vector? t)
				  (conc (db:test-get-state t) "/" (db:test-get-status t))
				  (conc " WARNING: t is not a vector=" t )))
			    prereqs-not-met)
		       ", ") ") fails: " fails
		       "\nregistered? " (hash-table-ref/default test-registry (db:test-make-full-name test-name item-path) #f))
			    

    
    (if (and (not (null? prereqs-not-met))
	     (runs:lownoise (conc "waiting on tests " prereqs-not-met hed) 60))
	(debug:print-info 2 *default-log-port* "waiting on tests; " (string-intersperse (runs:mixed-list-testname-and-testrec->list-of-strings prereqs-not-met) ", ")))

    ;; Don't know at this time if the test have been launched at some time in the past
    ;; i.e. is this a re-launch?
    (debug:print-info 4 *default-log-port* "run-limits-info = " run-limits-info)
    
    (cond ; cond 894- 1067
     
     ;; Check item path against item-patts, 
     ;;
     ((not (tests:match test-patts (tests:testqueue-get-testname test-record) item-path required: required-tests)) ;; This test/itempath is not to be run
      ;; else the run is stuck, temporarily or permanently
      ;; but should check if it is due to lack of resources vs. prerequisites
      (debug:print-info 1 *default-log-port* "Skipping " (tests:testqueue-get-testname test-record) " " item-path " as it doesn't match " test-patts)
      (if (or (not (null? tal))(not (null? reg)))
	  (runs:loop-values tal reg reglen regfull reruns)
	  #f))
     
     ;; Register tests 
     ;;
     ((not (hash-table-ref/default test-registry (db:test-make-full-name test-name item-path) #f))
      (debug:print-info 4 *default-log-port* "Pre-registering test " test-name "/" item-path " to create placeholder" )
      ;; always do firm registration now in v1.60 and greater ;; (eq? *transport-type* 'fs) ;; no point in parallel registration if use fs
      (let register-loop ((numtries 15))
	(rmt:register-test run-id test-name item-path)
	(if (rmt:get-test-id run-id test-name item-path)
	    (hash-table-set! test-registry (db:test-make-full-name test-name item-path) 'done)
	    (if (> numtries 0)
		(begin
		  (thread-sleep! 0.5)
		  (register-loop (- numtries 1)))
		(debug:print-error 0 *default-log-port* "failed to register test " (db:test-make-full-name test-name item-path)))))
      (if (not (eq? (hash-table-ref/default test-registry (db:test-make-full-name test-name "") #f) 'done))
	  (begin
	    (rmt:register-test run-id test-name "")
	    (if (rmt:get-test-id run-id test-name "")
		(hash-table-set! test-registry (db:test-make-full-name test-name "") 'done))))
      (runs:shrink-can-run-more-tests-count runsdat)   ;; DELAY TWEAKER (still needed?)
      (if (and (null? tal)(null? reg))
	  (list hed tal (append reg (list hed)) reruns)
	  (list (runs:queue-next-hed tal reg reglen regfull) ;; cannot replace with a call to runs:loop-values as the logic is different for reg
		(runs:queue-next-tal tal reg reglen regfull)
		;; NB// Here we are building reg as we register tests
		;; if regfull we must pop the front item off reg
		(if regfull
		    (append (cdr reg) (list hed))
		    (append reg (list hed)))
		reruns)))
     
     ;; At this point hed test registration must be completed.
     ;;
     ((eq? (hash-table-ref/default test-registry (db:test-make-full-name test-name item-path) #f)
	   'start)
      (debug:print-info 0 *default-log-port* "Waiting on test registration(s): "
			(string-intersperse 
			 (filter (lambda (x)
				   (eq? (hash-table-ref/default test-registry x #f) 'start))
				 (hash-table-keys test-registry))
			 ", "))
      (thread-sleep! 0.051)
      (list hed tal reg reruns))
     
     ;; If no resources are available just kill time and loop again
     ;;
     ((not have-resources) ;; simply try again after waiting a second
      (if (runs:lownoise "no resources" 60)
	  (debug:print-info 1 *default-log-port* "no resources to run new tests, waiting ..."))
      ;; Have gone back and forth on this but db starvation is an issue.
      ;; wait one second before looking again to run jobs.
      (thread-sleep! 1)
      ;; could have done hed tal here but doing car/cdr of newtal to rotate tests
      (list (car newtal)(cdr newtal) reg reruns))
     
     ;; This is the final stage, everything is in place so launch the test
     ;;
     ((and have-resources
	   (or (null? prereqs-not-met)
	       (and (member 'toplevel testmode) ;;  'toplevel)
		    (null? non-completed)
		    (not (member 'exclusive testmode)))))
      ;; (hash-table-delete! *max-tries-hash* (db:test-make-full-name test-name item-path))
      ;; we are going to reset all the counters for test retries by setting a new hash table
      ;; this means they will increment only when nothing can be run
      (set! *max-tries-hash* (make-hash-table))
      ;; well, first lets see if cpu load throttling is enabled. If so wait around until the
      ;; average cpu load is under the threshold before continuing
      (if maxload ;; only gate if maxload is specified
          (common:wait-for-cpuload maxload numcpus waitdelay))
      (if maxhomehostload
          (common:wait-for-homehost-load maxhomehostload (conc "Waiting for homehost load to drop below normalized value of " maxhomehostload)))
      
      (run:test run-id run-info keyvals runname test-record flags #f test-registry all-tests-registry)
      (runs:incremental-print-results run-id)
      (hash-table-set! test-registry (db:test-make-full-name test-name item-path) 'running)
      (runs:shrink-can-run-more-tests-count runsdat)  ;; DELAY TWEAKER (still needed?)
      ;; (thread-sleep! *global-delta*)
      (if (or (not (null? tal))(not (null? reg)))
	  (runs:loop-values tal reg reglen regfull reruns)
	  #f))
     
     ;; must be we have unmet prerequisites
     ;;
     (else
      (debug:print 4 *default-log-port* "FAILS: " fails)
      ;; If one or more of the prereqs-not-met are FAIL then we can issue
      ;; a message and drop hed from the items to be processed.
      ;; (runs:mixed-list-testname-and-testrec->list-of-strings prereqs-not-met)
      (if (and (not (null? prereqs-not-met))
	       (runs:lownoise (conc "waiting on tests " prereqs-not-met hed) 60))
	  (debug:print-info 1 *default-log-port* "waiting on tests; " (string-intersperse 
						    (runs:mixed-list-testname-and-testrec->list-of-strings 
						     prereqs-not-met) ", ")))
      (if (or (null? fails)
	      (member 'toplevel testmode))
	  (begin
	    ;; couldn't run, take a breather
	    (if  (runs:lownoise "Waiting for more work to do..." 60)
		 (debug:print-info 0 *default-log-port* "Waiting for more work to do..."))
	    (thread-sleep! 1)
	    (list (car newtal)(cdr newtal) reg reruns))
	  ;; the waiton is FAIL so no point in trying to run hed ever again
	  (begin
            (let ((my-test-id (rmt:get-test-id run-id test-name item-path)))
              (mt:test-set-state-status-by-id-unless-completed run-id my-test-id "COMPLETED" "PREQ_FAIL" "Failed to run due to failed prerequisites2"))


            
            (if (or (not (null? reg))(not (null? tal)))
                (if (vector? hed)
		  (begin
		    (debug:print 1 *default-log-port* "WARNING: Dropping test " test-name "/" item-path
				 " from the launch list as it has prerequistes that are FAIL")
		    (let ((test-id (rmt:get-test-id run-id hed "")))
		      (if test-id (mt:test-set-state-status-by-id-unless-completed run-id test-id "COMPLETED" "PREQ_FAIL" "Failed to run due to failed prerequisites")))
		    (runs:shrink-can-run-more-tests-count runsdat) ;; DELAY TWEAKER (still needed?)
		    ;; (thread-sleep! *global-delta*)
		    ;; This next is for the items

                    (if (not (null? fails))
                        ;;(mt:test-set-state-status-by-testname run-id test-name item-path "NOT_STARTED" "PREQ_FAIL" #f)
                        (rmt:set-state-status-and-roll-up-items run-id test-name item-path "NOT_STARTED" "PREQ_FAIL" #f) 
                        ;;(mt:test-set-state-status-by-testname run-id test-name item-path "NOT_STARTED" "BLOCKED" #f)
                        (rmt:set-state-status-and-roll-up-items run-id test-name item-path "NOT_STARTED" "BLOCKED" #f) )
		    (hash-table-set! test-registry (db:test-make-full-name test-name item-path) 'removed)
		    (runs:loop-values tal reg reglen regfull reruns))
		  (let ((nth-try (hash-table-ref/default test-registry hed 0))) ;; hed not a vector...
                    (debug:print 2 *default-log-port* "nth-try("hed")="nth-try)
		    (cond
		     ((member "RUNNING" (map db:test-get-state prereqs-not-met))
		      (if (runs:lownoise (conc "possible RUNNING prerequistes " hed) 60)
			  (debug:print 0 *default-log-port* "WARNING: test " hed " has possible RUNNING prerequisites, don't give up on it yet."))
		      (thread-sleep! 0.1)
		      (runs:loop-values tal reg reglen regfull reruns))
		     ((or (not nth-try) ;; BB: condition on subsequent tries, condition below fires on first try 
			  (and (number? nth-try)
			       (< nth-try 2)))
		      (hash-table-set! test-registry hed (if (number? nth-try)
							     (+ nth-try 1)
							     0))
		      (if (runs:lownoise (conc "not removing test " hed) 60)
			  (debug:print 1 *default-log-port* "WARNING: not removing test " hed " from queue although it may not be runnable due to FAILED prerequisites"))
		      ;; may not have processed correctly. Could be a race condition in your test implementation? Dropping test " hed) ;;  " as it has prerequistes that are FAIL. (NOTE: hed is not a vector)")
		      (runs:shrink-can-run-more-tests-count runsdat) ;; DELAY TWEAKER (still needed?)
		      (runs:loop-values newtal reg reglen regfull reruns))
		     ((symbol? nth-try) ;; BB: 'done matches here in one case where prereq itemwait failed.  This is first "try"
		      (if (eq? nth-try 'removed) ;; removed is removed - drop it NOW
			  (if (null? tal)
			      #f ;; yes, really
			      (list (car tal)(cdr tal) reg reruns))
			  (begin
			    (if (runs:lownoise (conc "FAILED prerequisites or other issue" hed) 60)
				(debug:print 0 *default-log-port* "WARNING: test " hed " has FAILED prerequisites or other issue. Internal state >" nth-try "< will be overridden and we'll retry."))
			    ;; was: (mt:test-set-state-status-by-testname run-id test-name item-path "NOT_STARTED" "KEEP_TRYING" #f)
                            (mt:test-set-state-status-by-testname-unless-completed run-id test-name item-path "COMPLETED" "PREQ_FAIL" #f)
			    (hash-table-set! test-registry hed 'removed) ;; was 0
                            (if (not (and (null? reg) (null? tal)))
                                (runs:loop-values tal reg reglen regfull reruns)
                                #f))))
		     (else
		      (if (runs:lownoise (conc "FAILED prerequitests and we tried" hed) 60)
			  (debug:print 0 *default-log-port* "WARNING: test " hed " has FAILED prerequitests and we've tried at least 10 times to run it. Giving up now."))
		      ;; (debug:print 0 *default-log-port* "         prereqs: " prereqs-not-met)
		      (hash-table-set! test-registry hed 'removed)
		      (mt:test-set-state-status-by-testname run-id test-name item-path "NOT_STARTED" "TEN_STRIKES" #f)
		      ;; I'm unclear on if this roll up is needed - it may be the root cause of the "all set to FAIL" bug.
		      (rmt:set-state-status-and-roll-up-items run-id test-name item-path #f "FAIL" #f) ;; treat as FAIL
		      (list (if (null? tal)(car newtal)(car tal))
			    tal
			    reg
			    reruns)))))
                ;; ELSE: can't drop this - maybe running? Just keep trying

                ;;(if (not (or (not (null? reg))(not (null? tal)))) ;; old experiment
                (let ((runable-tests (runs:runable-tests prereqs-not-met))) ;; SUSPICIOUS: Should look at more than just prereqs-not-met?
                  (if (null? runable-tests)
                      #f   ;; I think we are truly done here
                      (runs:loop-values newtal reg reglen regfull reruns)))
                ;;) ;;from old experiment
                ) ;; end if (or (not (null? reg))(not (null? tal)))
            ))))))

;; scan a list of tests looking to see if any are potentially runnable
;;
(define (runs:runable-tests tests)
  (filter (lambda (t)
	    (if (not (vector? t))
		t
		(let ((state  (db:test-get-state t))
		      (status (db:test-get-status t)))
		  (case (string->symbol state)
		    ((COMPLETED INCOMPLETE) #f)
		    ((NOT_STARTED)
		     (if (member status '("TEN_STRIKES" "BLOCKED" "PREQ_FAIL" "ZERO_ITEMS" "PREQ_DISCARDED" "TIMED_OUT" ))
			 #f
			 t))
		    ((DELETED) #f)
		    (else t)))))
	  tests))

;; move all the miscellanea into this struct
;;
(defstruct runs:gendat inc-results inc-results-last-update inc-results-fmt run-info runname target)

(define *runs:general-data* 
  (make-runs:gendat
   inc-results: (make-hash-table)
   inc-results-last-update: 0
   inc-results-fmt: "~12a~12a~20a~12a~40a\n" ;; state status time duration test-name item-path
   run-info: #f
   runname: #f
   target: #f
   )
  )

(define (runs:incremental-print-results run-id)
  (let ((curr-sec (current-seconds)))
    (if (> (- curr-sec (runs:gendat-inc-results-last-update *runs:general-data*)) 5) ;; at least five seconds since last update
	(let* ((run-dat  (or (runs:gendat-run-info *runs:general-data*)(rmt:get-run-info run-id)))
	       (runname  (or (runs:gendat-runname *runs:general-data*)
			     (db:get-value-by-header (db:get-rows run-dat)
						     (db:get-header run-dat) "runname")))
	       (target   (or (runs:gendat-target *runs:general-data*)(rmt:get-target run-id)))
	       (testsdat (rmt:get-tests-for-run run-id "%" '() '() ;; run-id testpatt states statuses
						#f #f ;; offset limit
						#f ;; not-in
						#f ;; sort-by
						#f ;; sort-order
						#f ;; get full data (not 'shortlist)
						(runs:gendat-inc-results-last-update *runs:general-data*) ;; last update time
						'dashboard)))
	  (if (not (runs:gendat-run-info *runs:general-data*))
	      (runs:gendat-run-info-set! *runs:general-data* run-dat))
	  (if (not (runs:gendat-runname  *runs:general-data*))
	      (runs:gendat-runname-set! *runs:general-data* runname))
	  (if (not (runs:gendat-target *runs:general-data*))
	      (runs:gendat-target-set! *runs:general-data* target))
	  (for-each
	   (lambda (testdat)
	     (let* ((test-id    (db:test-get-id           testdat))
		    (prevdat    (hash-table-ref/default   (runs:gendat-inc-results *runs:general-data*)
							  (conc run-id "," test-id) #f))
		    (test-name  (db:test-get-testname     testdat))
		    (item-path  (db:test-get-item-path    testdat))
		    (state      (db:test-get-state        testdat))
		    (status     (db:test-get-status       testdat))
		    (event-time (db:test-get-event_time   testdat))
		    (duration   (db:test-get-run_duration testdat)))
	       (if (and (not (member state '("DELETED" "REMOTEHOSTSTART" "RUNNING" "LAUNCHED""NOT_STARTED")))
			(not (and prevdat
				  (equal? state  (db:test-get-state  prevdat))
				  (equal? status (db:test-get-status prevdat)))))
		   (let ((fmt   (runs:gendat-inc-results-fmt *runs:general-data*))
			 (dtime (seconds->year-work-week/day-time event-time))) 
		     (if (runs:lownoise "inc-print" 600)
			 (format #t fmt "State" "Status" "Start Time" "Duration" "Test path"))
		     ;; (debug:print 0 *default-log-port* "fmt: " fmt " state: " state " status: " status " test-name: " test-name " item-path: " item-path " dtime: " dtime)
		     ;; (debug:print 0 #f "event-time: " event-time " duration: " duration)
		     (format #t fmt
			     state
			     status
			     dtime
			     (seconds->hr-min-sec duration)
			     (conc "lt/" target "/" runname "/" test-name (if (string-null? item-path) "" (conc "/" item-path))))
		     (hash-table-set! (runs:gendat-inc-results *runs:general-data*) (conc run-id "," test-id) testdat)))))
	   testsdat)))
    (runs:gendat-inc-results-last-update-set! *runs:general-data* (- curr-sec 10))))

;; every time though the loop increment the test/itempatt val.
;; when the min is > max-allowed and none running then force exit
;;
(define *max-tries-hash* (make-hash-table))

;;======================================================================
;; runs:run-tests-queue is called by runs:run-tests
;;======================================================================
;;
;; test-records is a hash table testname:item_path => vector < testname testconfig waitons priority items-info ... >
(define (runs:run-tests-queue run-id runname test-records keyvals flags test-patts required-tests reglen-in all-tests-registry)
  ;; At this point the list of parent tests is expanded 
  ;; NB// Should expand items here and then insert into the run queue.
  (debug:print 5 *default-log-port* "test-records: " test-records ", flags: " (hash-table->alist flags))

  ;; Do mark-and-find clean up of db before starting runing of quue
  ;;
  ;; (rmt:find-and-mark-incomplete)

  (let* ((run-info              (rmt:get-run-info run-id))
         (tests-info            (mt:get-tests-for-run run-id #f '() '())) ;;  qryvals: "id,testname,item_path"))
         (sorted-test-names     (tests:sort-by-priority-and-waiton test-records))
         (test-registry         (make-hash-table))
         (registry-mutex        (make-mutex))
         (num-retries           0)
         (max-retries           (config-lookup *configdat* "setup" "maxretries"))
         (max-concurrent-jobs   (configf:lookup-number *configdat* "setup" "max_concurrent_jobs" default: 50))
         (reglen                (if (number? reglen-in) reglen-in 1))
         (last-time-incomplete  (- (current-seconds) 900)) ;; force at least one clean up cycle
         (last-time-some-running (current-seconds))
         ;; (tdbdat                (tasks:open-db))
         (runsdat (make-runs:dat
                   ;; hed: hed
                   ;; tal: tal
                   ;; reg: reg
                   ;; reruns: reruns
                   reglen: reglen
                   regfull: #f ;; regfull
                   ;; test-record: test-record
                   runname: runname
                   ;; test-name: test-name
                   ;; item-path: item-path
                   ;; jobgroup: jobgroup
                   max-concurrent-jobs: max-concurrent-jobs
                   run-id: run-id
                   ;; waitons: waitons
                   ;; testmode: testmode
                   test-patts: test-patts
                   required-tests: required-tests
                   test-registry: test-registry
                   registry-mutex: registry-mutex
                   flags: flags
                   keyvals: keyvals
                   run-info: run-info
                   ;; newtal: newtal
                   all-tests-registry: all-tests-registry
                   ;; itemmaps: itemmaps
                   ;; prereqs-not-met: (rmt:get-prereqs-not-met run-id waitons hed item-path mode: testmode itemmaps: itemmaps)
                   ;; can-run-more-tests: (runs:can-run-more-tests run-id jobgroup max-concurrent-jobs) ;; look at the test jobgroup and tot jobs running
                   )))

    ;; Initialize the test-registery hash with tests that already have a record
    ;; convert state to symbol and use that as the hash value
    (for-each (lambda (trec)
		(let ((id (db:test-get-id        trec))
		      (tn (db:test-get-testname  trec))
		      (ip (db:test-get-item-path trec))
		      (st (db:test-get-state     trec)))
		  (if (not (equal? st "DELETED"))
		      (hash-table-set! test-registry (db:test-make-full-name tn ip) (string->symbol st)))))
	      tests-info)
    (set! max-retries (if (and max-retries (string->number max-retries))(string->number max-retries) 100))

    (let loop ((hed         (car sorted-test-names))
               (tal         (cdr sorted-test-names))
	       (reg         '()) ;; registered, put these at the head of tal 
	       (reruns      '()))

      (runs:incremental-print-results run-id)

      (if (not (null? reruns))(debug:print-info 4 *default-log-port* "reruns=" reruns))

      ;; Here we mark any old defunct tests as incomplete. Do this every fifteen minutes
      ;; moving this to a parallel thread and just run it once.
      ;;
      (if (> (current-seconds)(+ last-time-incomplete 900))
          (begin
            (set! last-time-incomplete (current-seconds))
            ;; (rmt:find-and-mark-incomplete-all-runs)
	    ))

      ;; (print "Top of loop, hed=" hed ", tal=" tal " ,reruns=" reruns)
      (let* ((test-record (hash-table-ref test-records hed))
	     (test-name   (tests:testqueue-get-testname test-record))
	     (tconfig     (tests:testqueue-get-testconfig test-record))
	     (jobgroup    (config-lookup tconfig "test_meta" "jobgroup"))
	     (testmode    (let ((m (config-lookup tconfig "requirements" "mode")))
			    (if m (map string->symbol (string-split m)) '(normal))))
	     (itemmaps    (tests:get-itemmaps tconfig)) ;;  (configf:lookup tconfig "requirements" "itemmap"))
	     (priority    (tests:testqueue-get-priority   test-record))
	     (itemdat     (tests:testqueue-get-itemdat    test-record)) ;; itemdat can be a string, list or #f
	     (items       (tests:testqueue-get-items      test-record))
	     (item-path   (item-list->path itemdat))
	     (tfullname   (db:test-make-full-name test-name item-path))
	      ;; these are hard coded item-item waits test/item-path => test/item-path2 ...
	     (extra-waits (let* ((section (configf:get-section (tests:testqueue-get-testconfig test-record) "waitons"))
				 (myextra (alist-ref tfullname section equal?)))
			    (if myextra
				(let ((extras (string-split (car myextra))))
				  (if (runs:lownoise (conc tfullname "extra-waitons" tfullname) 60)
				      (debug:print-info 0 *default-log-port* "HAVE EXTRA WAITONS for test " tfullname ": " myextra))
				  (for-each
				   (lambda (extra)
				     ;; (debug:print 0 *default-log-port* "FYI: extra = " extra " reruns = " reruns)
				     (let ((basetestname (car (string-split extra "/"))))
				       #;(if (not (member extra tal))
					   (set! reruns (append tal (list extra))))
				       (if (not (member basetestname tal))
					   (set! reruns (append tal (list basetestname))))
				       ))
				   extras)
				  extras)
				'())))
	     (waitons     (delete-duplicates (append (tests:testqueue-get-waitons test-record) extra-waits) equal?))
	     (newtal      (append tal (list hed)))
	     (regfull     (>= (length reg) reglen))
	     (num-running (rmt:get-count-tests-running-for-run-id run-id))
	     (testdat     (make-runs:testdat
			   hed: hed
			   tal: tal
			   reg: reg
			   reruns: reruns
			   test-record: test-record
			   test-name:   test-name
			   item-path:   item-path
			   jobgroup:    jobgroup
			   waitons:     waitons
			   testmode:    testmode
			   newtal:      newtal
			   itemmaps:    itemmaps
			   ;; prereqs-not-met: prereqs-not-met
			   )))
	(runs:dat-regfull-set! runsdat regfull)
    
	(if (> num-running 0)
            (set! last-time-some-running (current-seconds)))

        (if (> (current-seconds)(+ last-time-some-running (or (configf:lookup *configdat* "setup" "give-up-waiting") 36000)))
            (hash-table-set! *max-tries-hash* tfullname (+ (hash-table-ref/default *max-tries-hash* tfullname 0) 1)))
	;; (debug:print 0 *default-log-port* "max-tries-hash: " (hash-table->alist *max-tries-hash*))

	;; Ensure all top level tests get registered. This way they show up as "NOT_STARTED" on the dashboard
	;; and it is clear they *should* have run but did not.
	(if (not (hash-table-ref/default test-registry (db:test-make-full-name test-name "") #f))
	    (begin
	      (rmt:register-test run-id test-name "")
	      (hash-table-set! test-registry (db:test-make-full-name test-name "") 'done)))
	
	;; Fast skip of tests that are already "COMPLETED" - NO! Cannot do that as the items may not have been expanded yet :(
	;;
	(if (member (hash-table-ref/default test-registry tfullname #f) 
		    '(DONOTRUN removed)) ;; *common:cant-run-states-sym*) ;; '(COMPLETED KILLED WAIVED UNKNOWN INCOMPLETE))
	    (begin
	      (if (runs:lownoise (conc "been marked do not run " tfullname) 60)
		  (debug:print-info 0 *default-log-port* "Skipping test " tfullname " as it has been marked do not run due to being completed or not runnable"))
	      (if (or (not (null? tal))(not (null? reg)))
		  (loop (runs:queue-next-hed tal reg reglen regfull)
			(runs:queue-next-tal tal reg reglen regfull)
			(runs:queue-next-reg tal reg reglen regfull)
			reruns))))
        ;; (loop (car tal)(cdr tal) reg reruns))))

	(runs:incremental-print-results run-id)
	(debug:print 4 *default-log-port* "TOP OF LOOP => "
		     "test-name: " test-name
		     "\n  hed:         " hed
		     "\n  tal:         " tal
		     "\n  reg:         " reg
                     "\n  test-record  " test-record
                     "\n  itemdat:     " itemdat
		     "\n  items:       " items
		     "\n  item-path:   " item-path
		     "\n  waitons:     " waitons
		     "\n  num-retries: " num-retries
                     "\n  reruns:      " reruns
		     "\n  regfull:     " regfull
		     "\n  reglen:      " reglen
		     "\n  length reg:  " (length reg)
                     )

	;; check for hed in waitons => this would be circular, remove it and issue an
	;; error
	(if (member test-name waitons)
	    (begin
	      (debug:print-error 0 *default-log-port* "test " test-name " has listed itself as a waiton, please correct this!")
	      (set! waiton (filter (lambda (x)(not (equal? x hed))) waitons))))

	(cond 
	 
	 ;; We want to catch tests that have waitons that are NOT in the queue and discard them IFF 
	 ;; they have been through the wringer 10 or more times
	 ((and (list? waitons)
	       (not (null? waitons))
	       (> (hash-table-ref/default *max-tries-hash* tfullname 0) 10)
	       (not (null? (filter
			    number?
			    (map (lambda (waiton)
				   (if (and (not (member waiton tal))            ;; this waiton is not in the list to be tried to run
					    (not (member waiton reruns)))
				       1
				       #f))
				 waitons))))) ;; could do this more elegantly with a marker....
          (debug:print-info 4 *default-log-port* "cond branch - "  "rtq-1")
	  (debug:print 0 *default-log-port* "WARNING: Marking test " tfullname " as not runnable. It is waiting on tests that cannot be run. Giving up now.")
	  (hash-table-set! test-registry tfullname 'removed))

	 ;; items is #f then the test is ok to be handed off to launch (but not before)
	 ;; 
	 ((not items)
          (debug:print-info 4 *default-log-port* "cond branch - "  "rtq-2")
	  (debug:print-info 4 *default-log-port* "OUTER COND: (not items)")
	  (if (and (not (tests:match test-patts (tests:testqueue-get-testname test-record) item-path required: required-tests))
		   (not (null? tal)))
	      (loop (car tal)(cdr tal) reg reruns))
	  (runs:testdat-prereqs-not-met-set! testdat (rmt:get-prereqs-not-met run-id waitons hed item-path mode: testmode itemmaps: itemmaps))
	  (runs:dat-can-run-more-tests-set! runsdat (runs:can-run-more-tests runsdat run-id jobgroup max-concurrent-jobs)) ;; look at the test jobgroup and tot jobs running
	  (let ((loop-list (runs:process-expanded-tests runsdat testdat)))
            (if loop-list (apply loop loop-list))))

	 ;; items processed into a list but not came in as a list been processed
	 ;;
	 ((and (list? items)     ;; thus we know our items are already calculated
	       (not   itemdat))  ;; and not yet expanded into the list of things to be done
          (debug:print-info 4 *default-log-port* "cond branch - "  "rtq-3")
	  (debug:print-info 4 *default-log-port* "OUTER COND: (and (list? items)(not itemdat))")
	  ;; Must determine if the items list is valid. Discard the test if it is not.
	  (if (and (list? items)
		   (> (length items) 0)
		   (and (list? (car items))
			(> (length (car items)) 0))
		   (debug:debug-mode 1))
	      (debug:print 2 *default-log-port* (map (lambda (row)
                                                       (conc (string-intersperse
                                                              (map (lambda (varval)
                                                                     (string-intersperse varval "="))
                                                                   row)
                                                              " ")
                                                             "\n"))
                                                     items)))

          (let* ((items-in-testpatt
                  (filter
                   (lambda (my-itemdat)
                     (tests:match test-patts hed (item-list->path my-itemdat) ))
                   ;; was: (tests:match test-patts hed (item-list->path my-itemdat) required: required-tests))
                   items) ))
            (if (null? items-in-testpatt)
                (let ((test-id   (rmt:get-test-id run-id test-name "")))
                  (debug:print-info 0 *default-log-port* "Test " (tests:testqueue-get-testname test-record) " is itemized but has no items matching test pattern -- marking status ZERO_ITEMS")
                  (if test-id
                      (mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "ZERO_ITEMS" "This test has no items which match test pattern.")))
                
                (for-each (lambda (my-itemdat)
                            (let* ((new-test-record (let ((newrec (make-tests:testqueue)))
                                                      (vector-copy! test-record newrec)
                                                      newrec))
                                   (my-item-path (item-list->path my-itemdat))

                                   (newtestname (db:test-make-full-name hed my-item-path)))    ;; test names are unique on testname/item-path
                              (tests:testqueue-set-items!     new-test-record #f)
                              (tests:testqueue-set-itemdat!   new-test-record my-itemdat)
                              (tests:testqueue-set-item_path! new-test-record my-item-path)
                              (hash-table-set! test-records newtestname new-test-record)
                              (set! tal (append tal (list newtestname)))))  ;; since these are itemized create new test names testname/itempath
                          items-in-testpatt)))
          
          

	  ;; At this point we have possibly added items to tal but all must be handed off to 
	  ;; INNER COND logic. I think loop without rotating the queue 
	  ;; (loop hed tal reg reruns))
	  ;; (let ((newtal (append tal (list hed))))  ;; We should discard hed as it has been expanded into it's items? Yes, but only if this *is* an itemized test
	  ;; (loop (car newtal)(cdr newtal) reg reruns)
	  (if (null? tal)
	      #f
	      (loop (car tal)(cdr tal) reg reruns)))
         
	 ;; if items is a proc then need to run items:get-items-from-config, get the list and loop 
	 ;;    - but only do that if resources exist to kick off the job
	 ;; EXPAND ITEMS
	 ((or (procedure? items)(eq? items 'have-procedure))
          (debug:print-info 4 *default-log-port* "cond branch - "  "rtq-4")
	  (let ((can-run-more    (runs:can-run-more-tests runsdat run-id jobgroup max-concurrent-jobs)))
	    (if (and (list? can-run-more)
		     (car can-run-more))
		(let ((loop-list (runs:expand-items hed tal reg reruns regfull newtal jobgroup max-concurrent-jobs run-id waitons item-path testmode test-record can-run-more items runname tconfig reglen test-registry test-records itemmaps))) ;; itemized test expanded here
		  (if loop-list
		      (apply loop loop-list)
                      (debug:print-info 4 *default-log-port* " -- Can't expand hed="hed)
                      )
                  )
		;; if can't run more just loop with next possible test
		(loop (car newtal)(cdr newtal) reg reruns))))
         
	 ;; this case should not happen, added to help catch any bugs
	 ((and (list? items) itemdat)
          (debug:print-info 4 *default-log-port* "cond branch - "  "rtq-5")
	  (debug:print-error 0 *default-log-port* "Should not have a list of items in a test and the itemspath set - please report this")
	  (exit 1))
	 ((not (null? reruns))
          (debug:print-info 4 *default-log-port* "cond branch - "  "rtq-6")
	  (let* ((newlst (tests:filter-non-runnable run-id tal test-records)) ;; i.e. not FAIL, WAIVED, INCOMPLETE, PASS, KILLED,
		 (junked (lset-difference equal? tal newlst)))
	    (debug:print-info 4 *default-log-port* "full drop through, if reruns is less than 100 we will force retry them, reruns=" reruns ", tal=" tal)
	    (if (< num-retries max-retries)
		(set! newlst (append reruns newlst)))
	    (set! num-retries (+ num-retries 1))
	    ;; (thread-sleep! (+ 1 *global-delta*))
	    (if (not (null? newlst))
		;; since reruns have been tacked on to newlst create new reruns from junked
		(loop (car newlst)(cdr newlst) reg (delete-duplicates junked)))))
	 ((not (null? tal))
          (debug:print-info 4 *default-log-port* "cond branch - "  "rtq-7")
	  (debug:print-info 4 *default-log-port* "I'm pretty sure I shouldn't get here."))
	 ((not (null? reg)) ;; could we get here with leftovers?
          (debug:print-info 4 *default-log-port* "cond branch - "  "rtq-8")
	  (debug:print-info 0 *default-log-port* "Have leftovers!")
	  (loop (car reg)(cdr reg) '() reruns))
	 (else
          (debug:print-info 4 *default-log-port* "cond branch - "  "rtq-9")
	  (debug:print-info 4 *default-log-port* "Exiting loop with...\n  hed=" hed "\n  tal=" tal "\n  reruns=" reruns))
	 ))) ;; end loop on sorted test names
    ;; this is the point where everything is launced and now you can mark the run in metadata table as all launced 
    (rmt:set-var (conc "lunch-complete-" run-id) "yes")
  
    ;; now *if* -run-wait we wait for all tests to be done
    ;; Now wait for any RUNNING tests to complete (if in run-wait mode)
    (thread-sleep! 10) ;; I think there is a race condition here. Let states/statuses settle
    (let wait-loop ((num-running      (rmt:get-count-tests-running-for-run-id run-id))
		    (prev-num-running 0))
      ;; (debug:print-info 13 *default-log-port* "num-running=" num-running ", prev-num-running=" prev-num-running)
      (if (and (or (args:get-arg "-run-wait")
		   (equal? (configf:lookup *configdat* "setup" "run-wait") "yes"))
	       (> num-running 0))
	  (begin
	    ;; Here we mark any old defunct tests as incomplete. Do this every fifteen minutes
	    ;; (debug:print 0 *default-log-port* "Got here eh! num-running=" num-running " (> num-running 0) " (> num-running 0))
	    (if (> (current-seconds)(+ last-time-incomplete 900))
		(begin
		  (debug:print-info 0 *default-log-port* "Marking stuck tests as INCOMPLETE while waiting for run " run-id ". Running as pid " (current-process-id) " on " (get-host-name))
		  (set! last-time-incomplete (current-seconds))
		  (rmt:find-and-mark-incomplete run-id #f)))
	    (if (not (eq? num-running prev-num-running))
		(debug:print-info 0 *default-log-port* "run-wait specified, waiting on " num-running " tests in RUNNING, REMOTEHOSTSTART or LAUNCHED state at " (time->string (seconds->local-time (current-seconds)))))
	    (thread-sleep! 5)
	    ;; (wait-loop (rmt:get-count-tests-running-for-run-id run-id) num-running))))
	    (wait-loop (rmt:get-count-tests-running-for-run-id run-id) num-running))))
    ;; LET* ((test-record
    ;; we get here on "drop through". All done!
    ;; this is moved to runs:run-testes since this function is getting called twice to ensure everthing is completed. 
    ;; (debug:print-info 0 *default-log-port* "Calling Post Hook")    
    ;; (runs:run-post-hook run-id)
    (debug:print-info 1 *default-log-port* "All tests launched")))

(define (runs:calc-fails prereqs-not-met)
  (filter (lambda (test)
	    (and (vector? test) ;; not (string? test))
		 (member (db:test-get-state test) '("INCOMPLETE" "COMPLETED")) ;; TODO: pull from *common:stuff...*
		 (not (member (db:test-get-status test)
			      '("PASS" "WARN" "CHECK" "WAIVED" "SKIP")))))
	  prereqs-not-met))

(define (runs:calc-prereq-fail prereqs-not-met) ;; REMOVEME since NOT_STARTED/PREQ_FAIL is now COMPLETED/PREQ_FAIL
  (filter (lambda (test)
	    (and (vector? test) ;; not (string? test))
		 (equal? (db:test-get-state test) "NOT_STARTED")
		 (not (member (db:test-get-status test)
			      '("n/a" "KEEP_TRYING")))))
	  prereqs-not-met))

(define (runs:calc-not-completed prereqs-not-met)
  (filter
   (lambda (t)
     (or (not (vector? t))
	 (not (member (db:test-get-state t) '("INCOMPLETE" "COMPLETED")))))
   prereqs-not-met))

;; (define (runs:calc-not-completed prereqs-not-met)
;;   (filter
;;    (lambda (t)
;;      (or (not (vector? t))
;; 	 (not (equal? "COMPLETED" (db:test-get-state t)))))
;;    prereqs-not-met))

(define (runs:calc-runnable prereqs-not-met)
  (filter 
   (lambda (t)
     (or (not (vector? t))
	 (and (equal? "NOT_STARTED" (db:test-get-state t))
	      (member (db:test-get-status t)
		      '("n/a" "KEEP_TRYING")))
	 (and (equal? "RUNNING" (db:test-get-state t))))) ;; account for a test that is running
   prereqs-not-met))

(define (runs:pretty-string lst)
  (map (lambda (t)
	 (if (not (vector? t))
	     (conc t)
	     (conc (db:test-get-testname t) ":" (db:test-get-state t) "/" (db:test-get-status t))))
       lst))

;; parent-test is there as a placeholder for when parent-tests can be run as a setup step
;;
(define (run:test run-id run-info keyvals runname test-record flags parent-test test-registry all-tests-registry)
  ;; All these vars might be referenced by the testconfig file reader
  (let* ((test-name    (tests:testqueue-get-testname   test-record))
	 (test-waitons (tests:testqueue-get-waitons    test-record))
	 (test-conf    (tests:testqueue-get-testconfig test-record))
	 (itemdat      (tests:testqueue-get-itemdat    test-record))
	 (test-path    (hash-table-ref all-tests-registry test-name)) ;; (conc *toppath* "/tests/" test-name)) ;; could use tests:get-testconfig here ...
	 (force        (hash-table-ref/default flags "-force" #f))
	 (rerun        (hash-table-ref/default flags "-rerun" #f))
	 (keepgoing    (hash-table-ref/default flags "-keepgoing" #f))
	 (incomplete-timeout (string->number (or (configf:lookup *configdat* "setup" "incomplete-timeout") "x")))
	 (item-path     "")
	 (db           #f)
	 (full-test-name #f))

    ;; setting itemdat to a list if it is #f
    (if (not itemdat)(set! itemdat '()))
    (set! item-path (item-list->path itemdat))
    (set! full-test-name (db:test-make-full-name test-name item-path))
    (debug:print-info 4 *default-log-port*
		      "\nTESTNAME: " full-test-name 
		      "\n   test-config: " (hash-table->alist test-conf)
		      "\n   itemdat: " itemdat
		      )
    (debug:print 2 *default-log-port* "Attempting to launch test " full-test-name)
    ;; (setenv "MT_TEST_NAME" test-name) ;; 
    ;; (setenv "MT_ITEMPATH"  item-path)
    ;; (setenv "MT_RUNNAME"   runname)
    (runs:set-megatest-env-vars run-id inrunname: runname testname: test-name itempath: item-path) ;; these may be needed by the launching process
    (change-directory *toppath*)

    ;; Here is where the test_meta table is best updated
    ;; Yes, another use of a global for caching. Need a better way?
    ;;
    ;; There is now a single call to runs:update-all-test_meta and this 
    ;; per-test call is not needed. Given the delicacy of the move to 
    ;; v1.55 this code is being left in place for the time being.
    ;;
    (if (not (hash-table-ref/default *test-meta-updated* test-name #f))
        (begin
          (hash-table-set! *test-meta-updated* test-name #t)
          (runs:update-test_meta test-name test-conf)))
    
    ;; itemdat => ((ripeness "overripe") (temperature "cool") (season "summer"))
    (let* ((new-test-path (string-intersperse (cons test-path (map cadr itemdat)) "/"))
	   (test-id       (rmt:get-test-id run-id test-name item-path))
	   (testdat       (if test-id (rmt:get-test-info-by-id run-id test-id) #f)))
      (if (not testdat)
	  (let loop ()
	    ;; ensure that the path exists before registering the test
	    ;; NOPE: Cannot! Don't know yet which disk area will be assigned....
	    ;; (system (conc "mkdir -p " new-test-path))
	    ;;
	    ;; (open-run-close tests:register-test db run-id test-name item-path)
	    ;;
	    ;; NB// for the above line. I want the test to be registered long before this routine gets called!
	    ;;
	    (if (not test-id)(set! test-id (rmt:get-test-id run-id test-name item-path)))
	    (if (not test-id)
		(begin
		  (debug:print 2 *default-log-port* "WARN: Test not pre-created? test-name=" test-name ", item-path=" item-path ", run-id=" run-id)
		  (rmt:register-test run-id test-name item-path)
		  (set! test-id (rmt:get-test-id run-id test-name item-path))))
	    (debug:print-info 4 *default-log-port* "test-id=" test-id ", run-id=" run-id ", test-name=" test-name ", item-path=\"" item-path "\"")
	    (set! testdat (rmt:get-test-info-by-id run-id test-id))
	    (if (not testdat)
		(begin
		  (debug:print-info 0 *default-log-port* "WARNING: server is overloaded, trying again in one second")
		  (thread-sleep! 1)
		  (loop)))))
      (if (not testdat) ;; should NOT happen
	  (debug:print-error 0 *default-log-port* "failed to get test record for test-id " test-id))
      (set! test-id (db:test-get-id testdat))
      (if (common:file-exists? test-path)
	  (change-directory test-path)
	  (begin
	    (debug:print-error 0 *default-log-port* "test run path not created before attempting to run the test. Perhaps you are running -remove-runs at the same time?")
	    (change-directory *toppath*)))
      (case (if force ;; (args:get-arg "-force")
		'NOT_STARTED
		(if testdat
		    (string->symbol (test:get-state testdat))
		    'failed-to-insert))
	((failed-to-insert)
	 (debug:print-error 0 *default-log-port* "Failed to insert the record into the db"))
	((NOT_STARTED COMPLETED DELETED INCOMPLETE)
	 (let ((runflag #f))
	   (cond
	    ;; -force, run no matter what
	    (force (set! runflag #t))
	    ;; NOT_STARTED, run no matter what
	    ((member (test:get-state testdat) '("DELETED" "NOT_STARTED" "INCOMPLETE"))(set! runflag #t))
	    ;; not -rerun and PASS, WARN or CHECK, do no run
	    ((and (or (not rerun)
		      keepgoing)
		  ;; Require to force re-run for COMPLETED or *anything* + PASS,WARN or CHECK
		  (or (member (test:get-status testdat) '("PASS" "WARN" "CHECK" "SKIP" "WAIVED"))
		      (member (test:get-state  testdat) '("COMPLETED")))) 
	     (debug:print-info 2 *default-log-port* "running test " test-name "/" item-path " suppressed as it is " (test:get-state testdat) " and " (test:get-status testdat))
	     (hash-table-set! test-registry full-test-name 'DONOTRUN) ;; COMPLETED)
	     (set! runflag #f))
	    ;; -rerun and status is one of the specifed, run it
	    ((and rerun
		  (let* ((rerunlst   (string-split rerun ","))
			 (must-rerun (member (test:get-status testdat) rerunlst)))
		    (debug:print-info 3 *default-log-port* "-rerun list: " rerun ", test-status: " (test:get-status testdat)", must-rerun: " must-rerun)
		    must-rerun))
	     (debug:print-info 2 *default-log-port* "Rerun forced for test " test-name "/" item-path)
	     (set! runflag #t))
	    ;; -keepgoing, do not rerun FAIL
	    ((and keepgoing
		  (member (test:get-status testdat) '("FAIL")))
	     (set! runflag #f))
	    ((and (not rerun)
		  (member (test:get-status testdat) '("FAIL" "n/a")))
	     (set! runflag #t))
	    (else (set! runflag #f)))
	   (debug:print 4 *default-log-port* "RUNNING => runflag: " runflag " STATE: " (test:get-state testdat) " STATUS: " (test:get-status testdat))
	   (if (not runflag)
	       (if (not parent-test)
		   (if (runs:lownoise (conc "not starting test" full-test-name) 60)
		       (debug:print 1 *default-log-port* "NOTE: Not starting test " full-test-name " as it is state \"" (test:get-state testdat) 
				    "\" and status \"" (test:get-status testdat) "\", use -rerun \"" (test:get-status testdat)
				    "\" or -force to override")))
	       ;; NOTE: No longer be checking prerequisites here! Will never get here unless prereqs are
	       ;;       already met.
	       ;; This would be a great place to do the process-fork
	       ;; 
	       (let ((skip-test   #f)
		     (skip-check  (configf:get-section test-conf "skip")))
		 (cond 
		  ;; Have to check for skip conditions. This one skips if there are same-named tests
		  ;; currently running
		  ((and skip-check
			(configf:lookup test-conf "skip" "prevrunning"))
		   ;; run-ids = #f means *all* runs
		   (let ((running-tests (rmt:get-tests-for-runs-mindata #f full-test-name '("RUNNING" "REMOTEHOSTSTART" "LAUNCHED") '() #f)))
		     (if (not (null? running-tests)) ;; have to skip 
			 (set! skip-test "Skipping due to previous tests running"))))
		  ((and skip-check
			(configf:lookup test-conf "skip" "fileexists"))
		   (if (common:file-exists? (configf:lookup test-conf "skip" "fileexists"))
		       (set! skip-test (conc "Skipping due to existance of file " (configf:lookup test-conf "skip" "fileexists")))))
		  ((and skip-check
			(configf:lookup test-conf "skip" "rundelay"))
		   ;; run-ids = #f means *all* runs
		   (let* ((numseconds      (common:hms-string->seconds (configf:lookup test-conf "skip" "rundelay")))
			  (running-tests   (rmt:get-tests-for-runs-mindata #f full-test-name '("RUNNING" "REMOTEHOSTSTART" "LAUNCHED") '() #f))
			  (completed-tests (rmt:get-tests-for-runs-mindata #f full-test-name '("COMPLETED" "INCOMPLETE") '("PASS" "FAIL" "ABORT") #f)) ;; ironically INCOMPLETE is same as COMPLETED in this contex
			  (last-run-times  (map db:mintest-get-event_time completed-tests))
			  (time-since-last (- (current-seconds) (if (null? last-run-times) 0 (common:max last-run-times)))))
		     (if (or (not (null? running-tests)) ;; have to skip if test is running
			     (> numseconds time-since-last))
			 (set! skip-test (conc "Skipping due to previous test run less than " (configf:lookup test-conf "skip" "rundelay") " ago"))))))
		 
		 (if skip-test
		     (begin
		       (mt:test-set-state-status-by-id run-id test-id "COMPLETED" "SKIP" skip-test)
		       (debug:print-info 1 *default-log-port* "SKIPPING Test " full-test-name " due to " skip-test))
		     ;;
		     ;; Here the test is handed off to launch.scm for launch-test to complete the launch process
		     ;;
		     (if (not (launch-test test-id run-id run-info keyvals runname test-conf test-name test-path itemdat flags))
			 (begin
			   (print "ERROR: Failed to launch the test. Exiting as soon as possible")
			   (set! *globalexitstatus* 1) ;; 
			   (process-signal (current-process-id) signal/kill))))))))
	((KILLED) 
	 (debug:print 1 *default-log-port* "NOTE: " full-test-name " is already running or was explictly killed, use -force to launch it.")
	 (hash-table-set! test-registry (db:test-make-full-name test-name test-path) 'DONOTRUN)) ;; KILLED))
	((LAUNCHED REMOTEHOSTSTART RUNNING)  
	 (debug:print 2 *default-log-port* "NOTE: " test-name " is already running"))
	;; (if (> (- (current-seconds)(+ (db:test-get-event_time testdat)
	;; 			       (db:test-get-run_duration testdat)))
	;; 	(or incomplete-timeout
	;; 	    6000)) ;; i.e. no update for more than 6000 seconds
	;;      (begin
	;;        (debug:print 0 *default-log-port* "WARNING: Test " test-name " appears to be dead. Forcing it to state INCOMPLETE and status STUCK/DEAD")
	;;        (tests:test-set-status! run-id test-id "INCOMPLETE" "STUCK/DEAD" "" #f))
	;;        ;; (tests:test-set-status! test-id "INCOMPLETE" "STUCK/DEAD" "" #f))
	;;      (debug:print 2 *default-log-port* "NOTE: " test-name " is already running")))
	(else      
	 (debug:print-error 0 *default-log-port* "Failed to launch test " full-test-name ". Unrecognised state " (test:get-state testdat))
	 (case (string->symbol (test:get-state testdat)) 
	   ((COMPLETED INCOMPLETE)
	    (hash-table-set! test-registry (db:test-make-full-name test-name test-path) 'DONOTRUN))
	   (else
	    (hash-table-set! test-registry (db:test-make-full-name test-name test-path) 'DONOTRUN))))))))

;;======================================================================
;; END OF NEW STUFF
;;======================================================================

(define (get-dir-up-n dir . params) 
  (let ((dparts  (string-split dir "/"))
	(count   (if (null? params) 1 (car params))))
    (conc "/" (string-intersperse 
	       (take dparts (- (length dparts) count))
	       "/"))))

(define (runs:recursive-delete-with-error-msg real-dir)
  (if (> (system (conc "rm -rf " real-dir)) 0)
      (begin
	;; FAILED, possibly due to permissions, do chmod a+rwx then try one more time
	(system (conc "chmod -R a+rwx " real-dir))
	(if (> (system (conc "rm -rf " real-dir)) 0)
	    (debug:print-error 0 *default-log-port* "There was a problem removing " real-dir " with rm -f")))))

(define (runs:safe-delete-test-dir real-dir)
  ;; first delete all sub-directories
  (directory-fold 
   (lambda (f x)
     (let ((fullname (conc real-dir "/" f)))
       (if (directory? fullname)(runs:recursive-delete-with-error-msg fullname)))
     (+ 1 x))
   0 real-dir)
  ;; then files other than *testdat.db*
  (directory-fold 
   (lambda (f x)
     (let ((fullname (conc real-dir "/" f)))
       (if (not (string-search (regexp "testdat.db") f))
	   (runs:recursive-delete-with-error-msg fullname)))
     (+ 1 x))
   0 real-dir)
  ;; then the entire directory
  (runs:recursive-delete-with-error-msg real-dir))

;; cleanup often needs to remove all but the last N runs per target
;;
;; target-patts a1/b1/c1,a2/b2/c2 ...
;;
;; This will fail if called with empty target or a bad target (i.e. missing or extra fields)
;;
(define (runs:get-hash-by-target target-patts runpatt)
  (let* ((targets (string-split target-patts ","))
	 (keys    (rmt:get-keys))
	 (res-ht  (make-hash-table))) ;; target -> ( runrecord1 runrecord2 ... )
    (for-each
     (lambda (target-patt)
       (let ((runs     (rmt:simple-get-runs runpatt #f #f target-patt)))
	 (for-each
	  (lambda (run)
	    (let ((target (simple-run-target run)))
	      (hash-table-set! res-ht target (cons run (hash-table-ref/default res-ht target '())))))
	  runs)))
     targets)
    res-ht))

;; delete runs older than X (weeks, days, months years etc.)
;; delete redundant runs within a target - N is the input
;; delete redundant runs within a target IFF older than given date/time AND keep at least N
;; 
(define (runs:remove-all-but-last-n-runs-per-target target-patts runpatt num-to-keep #!key (actions '(print)))
  (let* ((runs-ht  (runs:get-hash-by-target target-patts runpatt))
	 (age      (if (args:get-arg "-age")(common:hms-string->seconds (args:get-arg "-age")) #f))
	 (age-mark (if age (- (current-seconds) age) (+ (current-seconds) 86400)))
	 (precmd   (or (args:get-arg "-precmd") "")))
    (print "Actions: " actions)
    (for-each
     (lambda (target)
       (let* ((runs      (hash-table-ref runs-ht target))
	      (sorted    (sort runs (lambda (a b)(< (simple-run-event_time a)(simple-run-event_time b)))))
	      (to-remove (let* ((len      (length sorted))
                                (trim-amt (- len num-to-keep)))
                           (if (> trim-amt 0)
                               (take sorted trim-amt)
                               '()))))
	 (hash-table-set! runs-ht target to-remove)
         (print target ":")
         (for-each
          (lambda (run)
            (let ((remove (member run to-remove (lambda (a b)
                                                  (eq? (simple-run-id a)
                                                       (simple-run-id b))))))
	      (if (and age (> (simple-run-event_time run) age-mark))
		  (print "Skipping handling of " target "/" (simple-run-runname run) " as it is younger than " (args:get-arg "-age"))
		  (for-each
		   (lambda (action)
		     (case action
		       ((print)
			(print " " (simple-run-runname run)
			       " " (time->string (seconds->local-time (simple-run-event_time run)) "WW%V.%u %H:%M:%S")
			       " " (if remove "REMOVE" "")))
		       ((remove-runs)
			(if remove (system (conc precmd " megatest -remove-runs -target " target " -runname " (simple-run-runname run) " -testpatt %"
						 (if (member 'kill-runs actions) ;; if kill-runs is specified then set -kill-wait to 0
						     " -kill-wait 0"
						     "")))))
		       ((archive)
			(if remove (system (conc precmd " megatest -archive save-remove -target " target " -runname " (simple-run-runname run) " -testpatt %"))))
		       ((kill-runs)
			(if remove (system (conc precmd " megatest -kill-runs -target " target " -runname " (simple-run-runname run) " -testpatt %"))))
			))
		   actions))))
          sorted)))
     ;; (print "Sorted: " (map simple-run-event_time sorted))
     ;; (print "Remove: " (map simple-run-event_time to-remove))))
     (hash-table-keys runs-ht))
    runs-ht))

;; (define (runs:remove-all-but-last-n-runs-per-target target-patts runpatt num-to-keep)
;;   (let ((data (runs:get-all-but-most-recent-n-per-target target-patts runpatt num-to-keep)))
;;     (for-each
;;      (lambda (target)
;;        (let ((runs-to-remove (hash-table-ref data target )))
;;          (for-each
;;           (lambda (run)
;;             (print "megatest -remove-runs -target " target " -runname " (simple-run-runname run) " -testpatt %"))
;;           runs-to-remove)))
;;      (hash-table-keys data))))

;; Remove runs
;; fields are passing in through 
;; action:
;;    'remove-runs
;;    'set-state-status
;;
;; NB// should pass in keys?
;;
(define (runs:operate-on action target runnamepatt testpatt #!key (state #f)(status #f)(new-state-status #f)(mode #f)(options '()))
  (common:clear-caches) ;; clear all caches
  (let* ((db           #f)
	 ;; (tdbdat       (tasks:open-db))
	 (keys         (rmt:get-keys))
	 (rundat       (mt:get-runs-by-patt keys runnamepatt target))
	 (header       (vector-ref rundat 0))
	 (runs         (vector-ref rundat 1))
	 (states       (if state  (string-split state  ",") '()))
	 (statuses     (if status (string-split status ",") '()))
	 (state-status (if (string? new-state-status) (string-split new-state-status ",") '(#f #f)))
	 (rp-mutex     (make-mutex))
	 (bup-mutex    (make-mutex))
         (keep-records (args:get-arg "-keep-records"))) ;; used in conjunction with -remove-runs to keep the records, TODO: consolidate this with "mode".

    (let* ((write-access-actions '(remove-runs set-state-status archive run-wait kill-runs))
           (dbfile             (conc  *toppath* "/megatest.db"))
           (readonly-mode      (not (file-write-access? dbfile))))
      (when (and readonly-mode
                 (member action write-access-actions))
        (debug:print-error 0 *default-log-port* "megatest.db is readonly.  Cannot proceed with action ["action"] in which write-access isrequired .")
        (exit 1)))
    
    (debug:print-info 4 *default-log-port* "runs:operate-on => Header: " header " action: " action " new-state-status: " new-state-status)
    (if (> 2 (length state-status))
	(begin
	  (debug:print-error 0 *default-log-port* "the parameter to -set-state-status is a comma delimited string. E.g. COMPLETED,FAIL")
	  (exit)))
    (for-each
     (lambda (run)
       (let ((runkey (string-intersperse (map (lambda (k)
						(db:get-value-by-header run header k)) keys) "/"))
	     (dirs-to-remove (make-hash-table))
	     (proc-get-tests (lambda (run-id)
                               (mt:get-tests-for-run run-id
                                                     testpatt states statuses
                                                     not-in:  #f
                                                     sort-by: (case action
                                                                ((remove-runs) 'rundir)
                                                                (else          'event_time))))))
	 (let* ((run-id    (db:get-value-by-header run header "id"))
		(run-state (db:get-value-by-header run header "state"))
		(run-name  (db:get-value-by-header run header "runname"))
		(tests     (if (not (equal? run-state "locked"))
			       (proc-get-tests run-id)
			       '()))
		(lasttpath "/does/not/exist/I/hope")
		(worker-thread #f))
	   (debug:print-info 4 *default-log-port* "runs:operate-on run=" run ", header=" header)
	   (if (not (null? tests))
	       (begin
		 (case action
                   ((kill-runs)
                    (tasks:kill-runner target run-name "%")
                    (debug:print 1 *default-log-port* "Killing tests for run: " runkey " " (db:get-value-by-header run header "runname"))
                    )
		   ((remove-runs)
		    ;; (if (tasks:need-server run-id)(tasks:start-and-wait-for-server tdbdat run-id 10))
		    ;; seek and kill in flight -runtests with % as testpatt here
		    ;; (if (equal? testpatt "%")
		    (tasks:kill-runner target run-name testpatt)
		    ;; (debug:print 0 *default-log-port* "not attempting to kill any run launcher processes as testpatt is " testpatt))
		    (debug:print 1 *default-log-port* "Removing tests for run: " runkey " " (db:get-value-by-header run header "runname")))
		   ((set-state-status)
		    ;; (if (tasks:need-server run-id)(tasks:start-and-wait-for-server tdbdat run-id 10))
		    (debug:print 1 *default-log-port* "Modifying state and staus for tests for run: " runkey " " (db:get-value-by-header run header "runname")))
		   ((print-run)
		    (debug:print 1 *default-log-port* "Printing info for run " runkey ", run=" run ", tests=" tests ", header=" header)
		    action)
		   ((run-wait)
		    (debug:print 1 *default-log-port* "Waiting for run " runkey ", run=" runnamepatt " to complete"))
		   ((archive)
		    (debug:print 1 *default-log-port* "Archiving/restoring (" (args:get-arg "-archive") ") data for run: " runkey " " (db:get-value-by-header run header "runname"))
		    (set! worker-thread
			  (make-thread
			   (lambda ()
			     (case (string->symbol (args:get-arg "-archive"))
			       ((save save-remove keep-html)
				(archive:run-bup (args:get-arg "-archive") run-id run-name tests rp-mutex bup-mutex))
			       ((restore)
				(archive:bup-restore (args:get-arg "-archive") run-id run-name tests rp-mutex bup-mutex))
			       (else 
				(debug:print-error 0 *default-log-port* "unrecognised sub command to -archive. Run \"megatest\" to see help")
				(exit))))
			   "archive-bup-thread"))
		    (thread-start! worker-thread))
		   (else
		    (debug:print-info 0 *default-log-port* "action not recognised " action)))
		 
		 ;; actions that operate on one test at a time can be handled below
		 ;;
		 (let ((sorted-tests     (filter 
					  vector?
					  (sort tests (lambda (a b)(let ((dira ;; (rmt:sdb-qry 'getstr 
									  (db:test-get-rundir a)) ;; )  ;; (filedb:get-path *fdb* (db:test-get-rundir a)))
									 (dirb ;; (rmt:sdb-qry 'getstr 
									  (db:test-get-rundir b))) ;; ) ;; ((filedb:get-path *fdb* (db:test-get-rundir b))))
								     (if (and (string? dira)(string? dirb))
									 (> (string-length dira)(string-length dirb))
									 #f))))))
		       (toplevel-retries (make-hash-table)) ;; try three times to loop through and remove top level tests
		       (test-retry-time  (make-hash-table))
                       (backgrounded-remove-status     (make-hash-table))
                       (backgrounded-remove-last-visit (make-hash-table))
                       (backgrounded-remove-result     (make-hash-table))
		       (allow-run-time   (string->number (or (args:get-arg "-kill-wait") "10")))) ;; seconds to allow for killing tests before just brutally killing 'em
		   (let loop ((test (car sorted-tests))
			      (tal  (cdr sorted-tests)))
		     (let* ((test-id       (db:test-get-id test))
			    (new-test-dat  (rmt:get-test-info-by-id run-id test-id)))
		       (if (not new-test-dat)
			   (begin
			     (debug:print-error 0 *default-log-port* "We have a test-id of " test-id " but no record was found. NOTE: No locking of records is done between processes, do not simultaneously remove the same run from two processes!")
			     (if (not (null? tal))
				 (loop (car tal)(cdr tal))))
			   (let* ((item-path     (db:test-get-item-path new-test-dat))
				  (test-name     (db:test-get-testname new-test-dat))
				  (run-dir       ;;(filedb:get-path *fdb*
				   ;; (rmt:sdb-qry 'getid 
				   (db:test-get-rundir new-test-dat)) ;; )    ;; run dir is from the link tree
                                  (has-subrun    (and (subrun:subrun-test-initialized? run-dir)
                                                      (not (subrun:subrun-removed? run-dir))))
				  (test-state    (db:test-get-state new-test-dat))
                                  (test-status    (db:test-get-status new-test-dat))
				  (test-fulln    (db:test-get-fullname new-test-dat))
				  (uname         (db:test-get-uname    new-test-dat))
				  (toplevel-with-children (and (db:test-get-is-toplevel test)
							       (> (rmt:test-toplevel-num-items run-id test-name) 0))))
                             
			     (case action
			       ((remove-runs)
				;; if the test is a toplevel-with-children issue an error and do not remove
				(cond
                                 (toplevel-with-children
                                  (debug:print 0 *default-log-port* "WARNING: skipping removal of " test-fulln " with run-id " run-id " as it has sub tests")
                                  (hash-table-set! toplevel-retries test-fulln (+ (hash-table-ref/default toplevel-retries test-fulln 0) 1))
                                  (if (> (hash-table-ref toplevel-retries test-fulln) 3)
                                      (if (not (null? tal))
                                          (loop (car tal)(cdr tal))) ;; no else clause - drop it if no more in queue and > 3 tries
                                      (let ((newtal (append tal (list test))))
                                        (loop (car newtal)(cdr newtal))))) ;; loop with test still in queue
                                 (has-subrun
                                  ;; 
                                  (let ((last-visit (hash-table-ref/default backgrounded-remove-last-visit test-fulln 0))
                                        (now        (current-seconds))
                                        (rem-status (hash-table-ref/default backgrounded-remove-status test-fulln 'not-started)))
                                    (case rem-status
                                      ((not-started)
                                       (debug:print 0 *default-log-port* "WARNING: postponing removal of " test-fulln " with run-id " run-id " as it has a subrun")
                                       (hash-table-set! backgrounded-remove-status test-fulln 'started)
                                       (hash-table-set! backgrounded-remove-last-visit test-fulln (current-seconds))
                                       (common:send-thunk-to-background-thread
                                        (lambda ()
                                          (let* ((subrun-remove-succeeded
                                                  (subrun:remove-subrun run-dir keep-records)))
                                            (hash-table-set! backgrounded-remove-result test-fulln subrun-remove-succeeded)
                                            (hash-table-set! backgrounded-remove-status test-fulln 'done)))
                                        name: (conc "remove-subrun:"test-fulln))
                                       
                                       ;; send to back of line, loop
                                       (let ((newtal (append tal (list test))))
                                        (loop (car newtal)(cdr newtal)))
                                       )
                                      ((started)
                                       ;; if last visit was within last second, sleep 1 second
                                       (if (< (- now last-visit) 1.0)
                                           (thread-sleep! 1.0))
                                       (hash-table-set! backgrounded-remove-last-visit test-fulln (current-seconds))
                                       ;; send to back of line, loop
                                       (let ((newtal (append tal (list test))))
                                         (loop (car newtal)(cdr newtal)))
                                       )
                                      ((done)
                                       ;; drop this one; if remaining, loop, else finish
                                       (hash-table-set! backgrounded-remove-last-visit test-fulln (current-seconds))
                                       (let ((subrun-remove-succeeded (hash-table-ref/default backgrounded-remove-result test-fulln 'exception)))
                                         (cond
                                          ((eq? subrun-remove-succeeded 'exception)
                                           (let* ((logfile (subrun:get-log-path run-dir "remove")))
                                             (debug:print 0 *default-log-port* "ERROR: removing subrun of of " test-fulln " with run-id " run-id " ; see logfile @ "logfile))
					   (if (not (null? tal))
					       (loop (car tal)(cdr tal))))
					  (subrun-remove-succeeded
					   (debug:print 0 *default-log-port* "Now removing of " test-fulln " with run-id " run-id " since subrun was removed.")
					   ;;(runs:remove-test-directory new-test-dat mode) ;; let normal case handle this. it will go thru loop again as non-subrun
					   (let ((newtal (append tal (list test))))
					     (loop (car newtal)(cdr newtal))))
					  (else
					   (let* ((logfile (subrun:get-log-path run-dir "remove")))
					     (debug:print 0 *default-log-port* "WARNING: removal of subrun failed.  Please check "logfile" for details."))
					   ;; send to back of line, loop (will not match has-subrun next time through)
					    (if (not (null? tal))
						(loop (car tal)(cdr tal))))))
				       )
				      ) ; end case rem-status
                                    ) ; end let
                                  ); end cond has-subrun

                                 (else
                                  ;; BB - TODO - consider backgrounding to threads to delete tests (work below) 
                                  (debug:print-info 0 *default-log-port* "test: " test-name " itest-state: " test-state)
                                  (if (member test-state (list "RUNNING" "LAUNCHED" "REMOTEHOSTSTART" "KILLREQ"))
                                      (begin
                                        (if (not (hash-table-ref/default test-retry-time test-fulln #f))
                                            (begin
                                              ;; want to set to REMOVING BUT CANNOT do it here?
                                              (hash-table-set! test-retry-time test-fulln (current-seconds))))
                                        (if (> (- (current-seconds)(hash-table-ref test-retry-time test-fulln)) allow-run-time)
                                            ;; This test is not in a correct state for cleaning up. Let's try some graceful shutdown steps first
                                            ;; Set the test to "KILLREQ" and wait five seconds then try again. Repeat up to five times then give
                                            ;; up and blow it away.
                                            (begin
                                              (debug:print 0 *default-log-port* "WARNING: could not gracefully remove test " test-fulln ", tried to kill it to no avail. Forcing state to FAILEDKILL and continuing")
                                              (mt:test-set-state-status-by-id run-id (db:test-get-id test) "FAILEDKILL" "n/a" #f)
                                              (thread-sleep! 1))
                                            (begin
                                              (mt:test-set-state-status-by-id run-id (db:test-get-id test) "KILLREQ" "n/a" #f)
                                              (thread-sleep! 1)))
                                        ;; NOTE: This is suboptimal as the testdata will be used later and the state/status may have changed ...
                                        (if (null? tal)
                                            (loop new-test-dat tal)
                                            (loop (car tal)(append tal (list new-test-dat)))))
                                      (begin
                                        (runs:remove-test-directory new-test-dat mode) ;; 'remove-all)
                                        (if (not (null? tal))
                                            (loop (car tal)(cdr tal)))))))
				(rmt:update-run-stats run-id (rmt:get-raw-run-stats run-id)))
                               ((kill-runs)
                                ;; RUNNING -> KILLREQ
                                ;; LAUNCHED,RUNNING,REMOTEHOSTSTART -> NOT STARTED
                                (cond
                                 ((and has-subrun (member test-state (list "RUNNING" "LAUNCHED" "REMOTEHOSTSTART" "KILLREQ")))
                                  (common:send-thunk-to-background-thread
                                   (lambda ()
                                     (let* ((subrun-remove-succeeded
                                             (subrun:kill-subrun run-dir keep-records)))
                                       #t)))
                                  (if (not (null? tal))
				    (loop (car tal)(cdr tal)))
                                  )
                                 ((member test-state (list "RUNNING" "LAUNCHED" "REMOTEHOSTSTART" "KILLREQ"))
                                  (debug:print 1 *default-log-port* "INFO: issuing killreq to test "test-fulln)
                                  (mt:test-set-state-status-by-id run-id (db:test-get-id test) "KILLREQ" "n/a" #f)
                                  (if (not (null? tal))
				      (loop (car tal)(cdr tal))))
                                 ((and (member test-status '("PREQ_FAIL" "PREQ_DISCARDED" "BLOCKED" "ZERO_ITEMS" "KEEP_TRYING" "TEN_STRIKES" "TIMED_OUT")))
                                  (rmt:set-state-status-and-roll-up-items run-id (db:test-get-id test) 'foo "NOT_STARTED" "n/a" (conc "kill-run moved from "test-state":"test-status" to NOT_STARTED:n/a"))
                                  ;;(mt:test-set-state-status-by-id run-id (db:test-get-id test) "NOT_STARTED" "n/a" (conc "kill-run moved from "test-state":"test-status" to NOT_STARTED:n/a"))
                                  (if (not (null? tal))
				      (loop (car tal)(cdr tal)))
                                  )
                                 (else
                                  (if (not (null? tal))
				      (loop (car tal)(cdr tal)))
                                  )))
			       ((set-state-status)
                                (let* ((new-state (car state-status))
                                       (new-status (cadr state-status))
                                       (test-id (db:test-get-id test))
                                       (test-run-dir (db:test-get-rundir new-test-dat))
                                       (has-subrun (and (subrun:subrun-test-initialized? test-run-dir)
                                                      (not (subrun:subrun-removed? test-run-dir)))))
                                  (when has-subrun
                                     (common:send-thunk-to-background-thread
                                        (lambda ()
                                          (subrun:set-state-status test-run-dir state status new-state-status)
                                          )
                                        )
                                    )
                                  (debug:print-info 2 *default-log-port* "new state " new-state ", new status " new-status )
                                  (mt:test-set-state-status-by-id run-id test-id new-state new-status #f))
				(if (not (null? tal))
				    (loop (car tal)(cdr tal))))
			       ((run-wait)
                                ;; BB TODO - manage has-subrun case
				(debug:print-info 2 *default-log-port* "still waiting, " (length tests) " tests still running")
				(thread-sleep! 10)
				(let ((new-tests (proc-get-tests run-id)))
				  (if (null? new-tests)
				      (debug:print-info 1 *default-log-port* "Run completed according to zero tests matching provided criteria.")
				      (loop (car new-tests)(cdr new-tests)))))
			       ((archive)
                                ;; BB TODO - manage has-subrun case
				(if (and run-dir (not toplevel-with-children))
				    (let ((ddir (conc run-dir "/")))
				      (case (string->symbol (args:get-arg "-archive"))
					((save save-remove keep-html)
					 (if (common:file-exists? ddir)
					     (debug:print-info 0 *default-log-port* "Estimating disk space usage for " test-fulln ": " (common:get-disk-space-used ddir)))))))
				(if (not (null? tal))
				    (loop (car tal)(cdr tal))))
			       )))
		       )
		     (if worker-thread (thread-join! worker-thread)))
                   (common:join-backgrounded-threads))))
	   ;; remove the run if zero tests remain
	   (if (eq? action 'remove-runs)
	       (let* ((run-id   (db:get-value-by-header run header "id")) ;; NB// masks run-id from above?
                      (remtests (mt:get-tests-for-run run-id #f '("DELETED") '("n/a") not-in: #t)))
		 (if (null? remtests) ;; no more tests remaining
		     (let* ((dparts  (string-split lasttpath "/"))
			    (runpath (conc "/" (string-intersperse 
						(take dparts (- (length dparts) 1))
						"/"))))
		       (debug:print 1 *default-log-port* "Removing run: " runkey " " (db:get-value-by-header run header "runname") " and related record")
                       (if (not keep-records)
                           (begin
                             (rmt:delete-run run-id)
                             (rmt:delete-old-deleted-test-records)))
                           ;; (rmt:set-var "DELETED_TESTS" (current-seconds))
		       ;; need to figure out the path to the run dir and remove it if empty
		       ;;    (if (null? (glob (conc runpath "/*")))
		       ;;        (begin
		       ;; 	 (debug:print 1 *default-log-port* "Removing run dir " runpath)
		       ;; 	 (system (conc "rmdir -p " runpath))))
		       )))))
	 ))
     runs)
    ;; (sqlite3:finalize! (db:delay-if-busy tdbdat))
    )
  #t)

(define (runs:remove-test-directory test mode) ;; remove-data-only)
  (let* ((run-dir       (db:test-get-rundir test))    ;; run dir is from the link tree
	 (real-dir      (if (common:file-exists? run-dir)
			    ;; (resolve-pathname run-dir)
			    (common:nice-path run-dir)
			    #f))
         (clean-mode    (or mode 'remove-all))
         (test-id       (db:test-get-id test))
        ;; (lock-key      (conc "test-" test-id))
        ;; (got-lock      (let loop ((lock        (rmt:no-sync-get-lock lock-key))
	;; 			     (expire-time (+ (current-seconds) 30))) ;; give up on getting the lock and steal it after 15 seconds
	;; 		    (if (car lock)
	;; 			#t
	;; 			(if (> (current-seconds) expire-time)
	;; 			    (begin
	;; 			      (debug:print-info 0 *default-log-port* "Timed out waiting for a lock to clean test with id " test-id)
	;; 			      (rmt:no-sync-del! lock-key) ;; destroy the lock
	;; 			      (loop (rmt:no-sync-get-lock lock-key) expire-time)) ;; 
	;; 			    (begin
	;; 			      (thread-sleep! 1)
	 ;; 			      (loop (rmt:no-sync-get-lock lock-key) expire-time)))))))
	 )
    (case clean-mode
      ((remove-data-only)(mt:test-set-state-status-by-id (db:test-get-run_id test)(db:test-get-id test) "CLEANING" "LOCKED" #f))
      ((remove-all)      (mt:test-set-state-status-by-id (db:test-get-run_id test)(db:test-get-id test) "REMOVING" "LOCKED" #f))
      ((archive-remove)  (mt:test-set-state-status-by-id (db:test-get-run_id test)(db:test-get-id test) "ARCHIVE_REMOVING" #f #f)))
    (debug:print-info 1 *default-log-port* "Attempting to remove " (if real-dir (conc " dir " real-dir " and ") "") " link " run-dir)
    (if (and real-dir 
	     (> (string-length real-dir) 5)
	     (common:file-exists? real-dir)) ;; bad heuristic but should prevent /tmp /home etc.
	(begin ;; let* ((realpath (resolve-pathname run-dir)))
	  (debug:print-info 1 *default-log-port* "Recursively removing " real-dir)
	  (if (common:file-exists? real-dir)
	      (runs:safe-delete-test-dir real-dir)
	      (debug:print 0 *default-log-port* "WARNING: test dir " real-dir " appears to not exist or is not readable")))
	(if real-dir 
	    (debug:print 0 *default-log-port* "WARNING: directory " real-dir " does not exist")
	    (debug:print 0 *default-log-port* "WARNING: no real directory corrosponding to link " run-dir ", nothing done")))
    (if (symbolic-link? run-dir)
	(begin
	  (debug:print-info 1 *default-log-port* "Removing symlink " run-dir)
	  (handle-exceptions
	   exn
	   (debug:print-error 0 *default-log-port* " Failed to remove symlink " run-dir ((condition-property-accessor 'exn 'message) exn) ", attempting to continue")
	   (delete-file run-dir)))
	(if (directory? run-dir)
	    (if (> (directory-fold (lambda (f x)(+ 1 x)) 0 run-dir) 0)
		(debug:print 0 *default-log-port* "WARNING: refusing to remove " run-dir " as it is not empty")
		(handle-exceptions
		 exn
		 (debug:print-error 0 *default-log-port* " Failed to remove directory " run-dir ((condition-property-accessor 'exn 'message) exn) ", attempting to continue")
		 (delete-directory run-dir)))
	    (if (and run-dir
		     (not (member run-dir (list "n/a" "/tmp/badname"))))
		(debug:print 0 *default-log-port* "WARNING: not removing " run-dir " as it either doesn't exist or is not a symlink")
		(debug:print 0 *default-log-port* "NOTE: the run dir for this test is undefined. Test may have already been deleted."))
	    ))
    ;; Only delete the records *after* removing the directory. If things fail we have a record 
    (case clean-mode
      ((remove-data-only)(mt:test-set-state-status-by-id (db:test-get-run_id test)(db:test-get-id test) (db:test-get-state test)(db:test-get-status test) #f))
      ((archive-remove)  (mt:test-set-state-status-by-id (db:test-get-run_id test)(db:test-get-id test) "ARCHIVED" #f #f))
      (else (rmt:delete-test-records (db:test-get-run_id test) (db:test-get-id test))))
    ;; (rmt:no-sync-del! lock-key)
    ))

;;======================================================================
;; Routines for manipulating runs
;;======================================================================

;; Since many calls to a run require pretty much the same setup 
;; this wrapper is used to reduce the replication of code
(define (general-run-call switchname action-desc proc)
  (let ((runname (or (args:get-arg "-runname")(args:get-arg ":runname")))
	(target  (common:args-get-target)))
    (cond
     ((not target)
      (debug:print-error 0 *default-log-port* "Missing required parameter for " switchname ", you must specify the target with -target")
      (exit 3))
     ((not runname)
      (debug:print-error 0 *default-log-port* "Missing required parameter for " switchname ", you must specify the run name with -runname runname")
      (exit 3))
     (else
      (let (;; (db   #f)
	    (keys #f))
	(if (launch:setup)
	    (begin
	      (full-runconfigs-read) ;; cache the run config
	      ;; (launch:cache-config) ;; there are two independent config cache locations, turning this one off for now. MRW.
	      ) ;; do not cache here - need to be sure runconfigs is processed
	    (begin 
	      (debug:print 0 *default-log-port* "Failed to setup, exiting")
	      (exit 1)))

        
	(set! keys (keys:config-get-fields *configdat*))
	;; have enough to process -target or -reqtarg here
	(if (args:get-arg "-reqtarg")
	    (let* ((runconfigf (conc  *toppath* "/runconfigs.config")) ;; DO NOT EVALUATE ALL 
		   (runconfig  (read-config runconfigf #f #t environ-patt: #f)))
	      (if (hash-table-ref/default runconfig (args:get-arg "-reqtarg") #f)
		  (keys:target-set-args keys (args:get-arg "-reqtarg") args:arg-hash)
		    
		  (begin
		    (debug:print-error 0 *default-log-port* "[" (args:get-arg "-reqtarg") "] not found in " runconfigf)
		    ;; (if db (sqlite3:finalize! db))
		    (exit 1)
		    )))
	    (if (args:get-arg "-target")
		(keys:target-set-args keys (args:get-arg "-target" args:arg-hash) args:arg-hash)))
	(if (not (car *configinfo*))
	    (begin
	      (debug:print-error 0 *default-log-port* "Attempted to " action-desc " but run area config file not found")
	      (exit 1))
	    ;; Extract out stuff needed in most or many calls
	    ;; here then call proc
	    (let* ((keyvals    (keys:target->keyval keys target)))
	      (proc target runname keys keyvals)))
	;; (if db (sqlite3:finalize! db))
	(set! *didsomething* #t))))))

;;======================================================================
;; Lock/unlock runs
;;======================================================================

(define (runs:handle-locking target keys runname lock unlock user)
  (let* ((db       #f)
	 (rundat   (mt:get-runs-by-patt keys runname target))
	 (header   (vector-ref rundat 0))
	 (runs     (vector-ref rundat 1)))
    (for-each (lambda (run)
		(let ((run-id (db:get-value-by-header run header "id"))
           (str (if lock 
									"lock"
                  "unlock")))
		  (if (or lock
			  (and unlock
			       (or (args:get-arg "-force")
                (begin
				 (print "Do you really wish to unlock run " run-id "?\n   y/n: ")
				 (equal? "y" (read-line))))))
          (begin 
		      (rmt:lock/unlock-run run-id lock unlock user)
					(debug:print-info 0 *default-log-port* "Done " str " on run id " run-id))
		      (debug:print-info 0 *default-log-port* "Skipping lock/unlock on " run-id))))
	      runs)))
;;======================================================================
;; Rollup runs
;;======================================================================

;; Update the test_meta table for this test
(define (runs:update-test_meta test-name test-conf)
  (let ((currrecord (rmt:testmeta-get-record test-name)))
    (if (not currrecord)
	(begin
	  (set! currrecord (make-vector 11 #f))
	  (rmt:testmeta-add-record test-name)))
    (for-each 
     (lambda (key)
       (let* ((idx (cadr key))
	      (fld (car  key))
	      (val (config-lookup test-conf "test_meta" fld)))
	 ;; (debug:print 5 *default-log-port* "idx: " idx " fld: " fld " val: " val)
	 (if (and val (not (equal? (vector-ref currrecord idx) val)))
	     (begin
	       (print "Updating " test-name " " fld " to " val)
	       (rmt:testmeta-update-field test-name fld val)))))
     '(("author" 2)("owner" 3)("description" 4)("reviewed" 5)("tags" 9)("jobgroup" 10)))))

;; find tests with matching tags, tagpatt is a string "tagpatt1,tagpatt2%, ..."
;;
(define (runs:get-tests-matching-tags tagpatt)
  (let* ((tagdata (rmt:get-tests-tags))
         (res     '())) ;; list of tests that match one or more tags
    (for-each
     (lambda (row)
       (let* ((tag (car row))
              (tests (cdr row)))
       (if (patt-list-match tag tagpatt)
           (set! res (append tests res)))))
     tagdata)
    res))
    

;; Update test_meta for all tests
(define (runs:update-all-test_meta db)
  (let ((test-names (tests:get-all))) ;; (tests:get-valid-tests)))
    (for-each 
     (lambda (test-name)
       (let* ((test-conf    (mt:lazy-read-test-config test-name)))
	 (if test-conf (runs:update-test_meta test-name test-conf))))
     (hash-table-keys test-names))))

;; This could probably be refactored into one complex query ...
;; NOT PORTED - DO NOT USE YET
;;
(define (runs:rollup-run keys runname user keyvals)
  (debug:print 4 *default-log-port* "runs:rollup-run, keys: " keys " -runname " runname " user: " user)
  (let* ((db              #f)
	 ;; register run operates on the main db
	 (new-run-id      (rmt:register-run keyvals runname "new" "n/a" user (args:get-arg "-contour")))
	 (prev-tests      (rmt:get-matching-previous-test-run-records new-run-id "%" "%"))
	 (curr-tests      (mt:get-tests-for-run new-run-id "%/%" '() '()))
	 (curr-tests-hash (make-hash-table)))
    (rmt:update-run-event_time new-run-id)
    ;; index the already saved tests by testname and itemdat in curr-tests-hash
    (for-each
     (lambda (testdat)
       (let* ((testname  (db:test-get-testname testdat))
	      (item-path (db:test-get-item-path testdat))
	      (full-name (conc testname "/" item-path)))
	 (hash-table-set! curr-tests-hash full-name testdat)))
     curr-tests)
    ;; NOPE: Non-optimal approach. Try this instead.
    ;;   1. tests are received in a list, most recent first
    ;;   2. replace the rollup test with the new *always*
    (for-each 
     (lambda (testdat)
       (let* ((testname  (db:test-get-testname testdat))
	      (item-path (db:test-get-item-path testdat))
	      (full-name (conc testname "/" item-path))
	      (prev-test-dat (hash-table-ref/default curr-tests-hash full-name #f))
	      (test-steps    (rmt:get-steps-for-test (db:test-get-id testdat)))
	      (new-test-record #f))
	 ;; replace these with insert ... select
	 (apply sqlite3:execute 
		db 
		(conc "INSERT OR REPLACE INTO tests (run_id,testname,state,status,event_time,host,cpuload,diskfree,uname,rundir,item_path,run_duration,final_logf,comment) "
		      "VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?);")
		new-run-id (cddr (vector->list testdat)))
	 (set! new-testdat (car (mt:get-tests-for-run new-run-id (conc testname "/" item-path) '() '())))
	 (hash-table-set! curr-tests-hash full-name new-testdat) ;; this could be confusing, which record should go into the lookup table?
	 ;; Now duplicate the test steps
	 (debug:print 4 *default-log-port* "Copying records in test_steps from test_id=" (db:test-get-id testdat) " to " (db:test-get-id new-testdat))
	 (cdb:remote-run ;; to be replaced, note: this routine is not used currently
	  (lambda ()
	    (sqlite3:execute 
	     db 
	     (conc "INSERT OR REPLACE INTO test_steps (test_id,stepname,state,status,event_time,comment) "
		   "SELECT " (db:test-get-id new-testdat) ",stepname,state,status,event_time,comment FROM test_steps WHERE test_id=?;")
	     (db:test-get-id testdat))
	    ;; Now duplicate the test data
	    (debug:print 4 *default-log-port* "Copying records in test_data from test_id=" (db:test-get-id testdat) " to " (db:test-get-id new-testdat))
	    (sqlite3:execute 
	     db 
	     (conc "INSERT OR REPLACE INTO test_data (test_id,category,variable,value,expected,tol,units,comment) "
		   "SELECT " (db:test-get-id new-testdat) ",category,variable,value,expected,tol,units,comment FROM test_data WHERE test_id=?;")
	     (db:test-get-id testdat))))
	 ))
     prev-tests)))
	 
     
;; clean cache files
(define (runs:clean-cache target runname toppath)
  (if target
      (if runname
	  (let* ((linktree (common:get-linktree)) ;; (if toppath (configf:lookup *configdat* "setup" "linktree")))
		 (runtop   (conc linktree "/" target "/" runname))
		 (files    (if (common:file-exists? runtop)
			       (append (glob (conc runtop "/.megatest*"))
				       (glob (conc runtop "/.runconfig*")))
			       '())))
	    (if (null? files)
		(debug:print-info 0 *default-log-port* "No cached megatest or runconfigs files found. None removed.")
		(begin
		  (debug:print-info 0 *default-log-port* "Removing cached files:\n    " (string-intersperse files "\n    "))
		  (for-each 
		   (lambda (f)
		     (handle-exceptions
			 exn
			 (debug:print 0 *default-log-port* "WARNING: Failed to remove file " f)
		       (delete-file f)))
		   files))))
	  (debug:print-error 0 *default-log-port* "-clean-cache requires -runname."))
      (debug:print-error 0 *default-log-port* "-clean-cache requires -target or -reqtarg")))

;;======================================================================
;; strings db sdb
;;======================================================================

;; 
(define (sdb:open fname)
  (let* ((dbpath    (pathname-directory fname))
	 (dbexists  (let ((fe (common:file-exists? fname)))
		      (if fe 
			  fe
			  (begin
			    (create-directory dbpath #t)
			    #f))))
	 (sdb        (sqlite3:open-database fname))
	 (handler   (make-busy-timeout 136000)))
    (sqlite3:set-busy-handler! sdb handler)
    (if (not dbexists)
	(sdb:initialize sdb))
    (sqlite3:execute sdb "PRAGMA synchronous = 1;")
    sdb))

(define (sdb:initialize sdb)
  (sqlite3:execute sdb "CREATE TABLE IF NOT EXISTS strs
                           (id  INTEGER PRIMARY KEY,
                            str TEXT,
                        CONSTRAINT str UNIQUE (str));")
  (sqlite3:execute sdb "CREATE INDEX IF NOT EXISTS strindx ON strs (str);"))

;; (define sumup (let ((a 0))(lambda (x)(set! a (+ x a)) a)))

(define (sdb:register-string sdb str)
  (sqlite3:execute sdb "INSERT OR IGNORE INTO strs (str) VALUES (?);" str))

(define (sdb:string->id sdb str-cache str)
  (let ((id (hash-table-ref/default str-cache str #f)))
    (if (not id)
	(sqlite3:for-each-row
	 (lambda (sid)
	   (set! id sid)
	   (hash-table-set! str-cache str id))
	 sdb
	 "SELECT id FROM strs WHERE str=?;" str))
    id))

(define (sdb:id->string sdb id-cache id)
  (let ((str (hash-table-ref/default id-cache id #f)))
    (if (not str)
	(sqlite3:for-each-row
	 (lambda (istr)
	   (set! str istr)
	   (hash-table-set! id-cache id str))
	 sdb
	 "SELECT str FROM strs WHERE id=?;" id))
    str))

;; Numbers get passed though in both directions
;;
(define (make-sdb:qry fname)
  (let ((sdb    #f)
	(scache (make-hash-table))
	(icache (make-hash-table)))
    (lambda (cmd var)
      (case cmd
	((setup)   (set! sdb (if (not sdb)
				 (sdb:open (if var var fname)))))
	((setdb)    (set! sdb var))
	((getdb)    sdb)
	((finalize) (if sdb
			(begin
			  (sqlite3:finalize! sdb)
			  (set! sdb #f))))
	((getid)     (let ((id (if (or (number? var)
				       (string->number var))
				   var
				   (sdb:string->id sdb scache var))))
		       (if id
			   id
			   (begin
			     (sdb:register-string sdb var)
			     (sdb:string->id sdb scache var)))))
	((getstr)    (if (or (number? var)
			     (string->number var))
			 (sdb:id->string sdb icache var)
			 var))
	((passid)    var)
	((passstr)   var)
	(else #f)))))

;;======================================================================
;; subrun
;;======================================================================

(define (subrun:subrun-test-initialized? test-run-dir)
  (if (and (common:file-exists? (conc test-run-dir "/subrun-area") )
           (common:file-exists? (conc test-run-dir "/testconfig.subrun") ))
      #t
      #f))

(define (subrun:launch-dashboard test-run-dir)
  (if (subrun:subrun-test-initialized? test-run-dir)
      (let* ((subarea (subrun:get-runarea test-run-dir)))
        (if (and subarea (common:file-exists? subarea))
            (system (conc "cd " subarea ";env -i PATH=$PATH DISPLAY=$DISPLAY HOME=$HOME USER=$USER dashboard &"))))))

(define (subrun:subrun-removed? test-run-dir)
  (if (subrun:subrun-test-initialized? test-run-dir)
      (let ((flagfile (conc test-run-dir "/subrun.removed")))
        (if (common:file-exists? flagfile)
            #t
            #f))
      #t))

(define (subrun:set-subrun-removed test-run-dir)
  (let ((flagfile (conc test-run-dir "/subrun.removed")))
    (if (and (subrun:subrun-test-initialized? test-run-dir) (not (common:file-exists? flagfile)))
        (with-output-to-file flagfile
          (lambda () (print (current-seconds)))))))

(define (subrun:unset-subrun-removed test-run-dir)
  (let ((flagfile (conc test-run-dir "/subrun.removed")))
    (if (and (subrun:subrun-test-initialized? test-run-dir) (common:file-exists? flagfile))
        (delete-file flagfile))))


(define (subrun:testconfig-defines-subrun? testconfig)
  (configf:lookup testconfig "subrun" "runwait")) ;; we use runwait as the flag that a subrun is requested

(define (subrun:initialize-toprun-test  testconfig test-run-dir)
  (let ((ra (configf:lookup testconfig "subrun" "run-area"))
        (logpro (configf:lookup testconfig "subrun" "logpro"))
        (symlink-target (conc test-run-dir "/subrun-area"))
        )
    (if (not ra)      ;; when runarea is not set we default to *toppath*. However 
	(let ((fallback-run-area (or *toppath* (conc test-run-dir "/subrun"))))
	  ;; we need to force the setting in the testconfig so it will
          ;; be preserved in the testconfig.subrun file
	  (configf:set-section-var testconfig "subrun" "run-area" fallback-run-area)
	  (set! ra fallback-run-area)))
    (configf:set-section-var testconfig "logpro" "subrun" logpro) ;; append the logpro rules to the logpro section as stepname subrun
    (if (common:file-exists? symlink-target)
        (delete-file symlink-target))
    (create-symbolic-link ra symlink-target)
    (configf:write-alist testconfig "testconfig.subrun")))

(define (subrun:set-state-status test-run-dir state status new-state-status)
  (if (and (not (subrun:subrun-removed? test-run-dir)) (subrun:subrun-test-initialized? test-run-dir))
      (let* ((action-switches-str
              (conc "-set-state-status "new-state-status
                    (if state (conc " -state "state) "")
                    (if status (conc " -status "status) "")))
             (log-prefix
              (subrun:sanitize-path
               (conc "set-state-status="new-state-status
                     (if state (conc ":state="state) "")
                     (if status (conc "+status="status) ""))))
             (submt-result 
              (subrun:exec-sub-megatest test-run-dir action-switches-str log-prefix)))
        submt-result)))

(define (subrun:remove-subrun test-run-dir keep-records )
  (if (and (not (subrun:subrun-removed? test-run-dir)) (subrun:subrun-test-initialized? test-run-dir))
      (let* ((action-switches-str
              (conc "-remove-runs"
                    (if keep-records "-keep-records " "")
                    ))
             (remove-result
              (subrun:exec-sub-megatest test-run-dir action-switches-str "remove")))
        (if remove-result
            (begin
              (subrun:set-subrun-removed test-run-dir)
              #t)
            #f))
      #t))

(define (subrun:kill-subrun test-run-dir )
  (if (and (not (subrun:subrun-removed? test-run-dir)) (subrun:subrun-test-initialized? test-run-dir))
      (let* ((action-switches-str
              (conc "-kill-runs" ))
             (kill-result
              (subrun:exec-sub-megatest test-run-dir action-switches-str "kill")))
        kill-result)
      #t))

(define (subrun:launch-cmd test-run-dir #!optional (sub-cmd "-run")) ;; BUG: "-run" should be changed to "-rerun-clean" but current doesn't work
  (if (subrun:subrun-removed? test-run-dir)
      (subrun:unset-subrun-removed test-run-dir))      

  (let* ((log-prefix "run")
         (switches (subrun:selector+log-switches test-run-dir log-prefix))
         (run-wait #t)
         (cmd      (conc "megatest " sub-cmd " " switches" "
                         (if run-wait "-run-wait " ""))))
    cmd))


(define (subrun:sanitize-path inpath)
  (let* ((insane-pattern (irregex "[^[a-zA-Z0-9_\\-]")))
    (regex#string-substitute insane-pattern "_" inpath #t)))

(define (subrun:get-runarea test-run-dir)
  (if (subrun:subrun-test-initialized? test-run-dir)
      (let* ((info-alist (subrun:selector+log-alist
                          test-run-dir
                          "foo"))
             (run-area   (if (list? info-alist)
                             (alist-ref "-start-dir" info-alist equal? #f)
                             #f)))
        run-area)
      #f))

(define (subrun:selector+log-alist test-run-dir log-prefix)
  (let* ((switch-def-alist (common:get-param-mapping flavor: 'config))
         (subrunfile   (conc test-run-dir "/testconfig.subrun" ))
         (subrundata   (with-input-from-file subrunfile read))
         (subrunconfig (configf:alist->config subrundata))
         (run-area     (configf:lookup subrunconfig "subrun" "run-area"))
         (defvals      `(("start-dir" . ,(or run-area  ;; default values if not specified in subrun section of tconf
                                             (get-environment-variable "MT_RUN_AREA_HOME")
                                             "/no/rundir/found")) 
                         ("run-name"  . ,(or (get-environment-variable "MT_RUNNAME") "NO-RUNNAME"))
                         ("target"    . ,(or (get-environment-variable "MT_TARGET")  "NO-TARGET"))))
         (switch-alist-pre  (filter-map (lambda (item)
                                          (let* ((config-key (car item))
                                                 (switch     (cdr item))
                                                 (defval     (alist-ref config-key defvals equal? #f))
                                                 (val        (or (configf:lookup subrunconfig "subrun" config-key)
                                                                 defval)))
                                            (if val
                                                (cons switch val)
                                                #f)))
                                        switch-def-alist))

         ;; testpatt may be modified if all three of mode-patt, tag-expr, and testpatt are null
         (mode-patt     (alist-ref "-modepatt" switch-alist-pre equal? #f))
         (tag-expr      (alist-ref "-tagexpr" switch-alist-pre equal? #f))
         (testpatt      (alist-ref "-testpatt" switch-alist-pre equal?
                                   (if (not (or mode-patt tag-expr)) "%" #f))) ;; testpatt is % if not
                                                                               ;; otherwise specified

         ;; define compact-stem for logfile
         (target        (alist-ref "-target" switch-alist-pre equal? #f)) ;; want data-structures alist-ref, not alist-lib alist-ref
         (runname       (alist-ref "-runname" switch-alist-pre equal? #f))


         (compact-stem  (subrun:sanitize-path
                         (conc
                          target
                          "-"
                          runname
                          "-" (or testpatt mode-patt tag-expr "NO-TESTPATT"))))
         (logfile       (conc
                         test-run-dir "/"
                         (if log-prefix
                             (conc (subrun:sanitize-path log-prefix) "-")
                             "")
                         compact-stem
                         ".log"))
         ;; swap out testpatt with modified test-patt and add -log
         (switch-alist  (cons
                         (cons "-log" logfile)
                         (map (lambda (item)
                                (if (equal? (car item) "-testpatt")
                                    (cons "-testpatt" testpatt)
                                    item))
                                switch-alist-pre))))
    switch-alist))
    ;; note - get precmd from subrun section
    ;;   apply to submegatest commands

(define (subrun:get-log-path test-run-dir log-prefix)
  (let* ((alist (subrun:selector+log-alist test-run-dir log-prefix))
         (res   (alist-ref "-log" alist equal? #f)))
    res))

(define (subrun:selector+log-switches test-run-dir log-prefix)
  (let* ((switch-alist (subrun:selector+log-alist test-run-dir log-prefix))
         (res
          (string-intersperse
           (apply
            append
            (map
             (lambda (x)
               (list (car x) (cdr x)))
             switch-alist))
           " ")))
    res))

(define (subrun:exec-sub-megatest test-run-dir action-switches-str log-prefix)
  (let* ((selector-switches  (subrun:selector+log-switches test-run-dir log-prefix))
         (cmd (conc "megatest " selector-switches " " action-switches-str ))
         (pid #f)
         (proc (lambda ()
                 (debug:print-info 0 *default-log-port* "Running sub megatest command: "cmd)
                 ;;(set! pid (process-run "/usr/bin/xterm" (list ))))))
                 (set! pid (process-run "/bin/bash" (list "-c" cmd))))))
    (call-with-environment-variables 
     (list (cons "PATH" (conc (get-environment-variable "PATH") ":.")))
     (lambda  ()
       (common:without-vars proc "^MT_.*")))
    (let processloop ((i 0))
      (let-values (((pid-val exit-status exit-code)(process-wait pid #t)))
        (if (eq? pid-val 0)
            (begin
              (thread-sleep! 2)
              (processloop (+ i 1)))
            (begin
              (debug:print-info 0 *default-log-port* "sub megatest " action-switches-str " completed with exit code " exit-code)
              (if (eq? 0 exit-code)
                  (begin
                    #t)
                  (begin
                    #f))))))))



;; (subrun:exec-sub-megatest "/nfs/pdx/disks/icf_env_disk001/bjbarcla/gwa/issues/mtdev/165/megatest/ext-tests/tests/subrun-usecases/toparea/links/SYSTEM_val/RELEASE_val/go/toptest" "-foo" "foo")

;;======================================================================
;; synchash
;;======================================================================

(define (synchash:make)
   (make-hash-table))

;; given an alist of objects '((id obj) ...) 
;;   1. remove unchanged objects from the list
;;   2. create a list of removed objects by id
;;   3. remove removed objects from synchash
;;   4. replace or add new or changed objects to synchash
;;
(define (synchash:get-delta indat synchash)
  (let ((deleted '())
	(changed '())
	(found   '())
	(orig-keys (hash-table-keys synchash)))
    (for-each
     (lambda (item)
       (let* ((id  (car  item))
	      (dat (cadr item))
	      (ref (hash-table-ref/default synchash id #f)))
	 (if (not (equal? dat ref)) ;; item changed or new
	     (begin
	       (set! changed (cons item changed))
	       (hash-table-set! synchash id dat)))
	 (set! found (cons id found))))
     indat)
    (for-each 
     (lambda (id)
       (if (not (member id found))
	   (begin
	     (set! deleted (cons id deleted))
	     (hash-table-delete! synchash id))))
     orig-keys)
    (list changed deleted)
    ;; (list indat '()) ;; just for debugging
    ))
    
;; keynum => the field to use as the unique key (usually 0 but can be other field)
;;
(define (synchash:client-get proc synckey keynum synchash run-id . params)
  (let* ((data   (rmt:synchash-get run-id proc synckey keynum params))
	 (newdat (car data))
	 (removs (cadr data))
	 (myhash (hash-table-ref/default synchash synckey #f)))
    (if (not myhash)
	(begin
	  (set! myhash (make-hash-table))
	  (hash-table-set! synchash synckey myhash)))
    (for-each 
     (lambda (item)
       (let ((id  (car item))
	     (dat (cadr item)))
	 ;; (debug:print-info 2 *default-log-port* "Processing item: " item)
	 (hash-table-set! myhash id dat)))
     newdat)
    (for-each
     (lambda (id)
       (hash-table-delete! myhash id))
     removs)
    ;; WHICH ONE!?
    ;; data)) ;; return the changed and deleted list
    (list newdat removs))) ;; synchash))

(define *synchashes* (make-hash-table))

(define (synchash:server-get dbstruct run-id proc synckey keynum params)
  ;; (debug:print-info 2 *default-log-port* "synckey: " synckey ", keynum: " keynum ", params: " params)
  (let* ((dbdat     (db:get-db dbstruct run-id))
	 (db        (db:dbdat-get-db dbdat))
	 (synchash  (hash-table-ref/default *synchashes* synckey #f))
	 (newdat    (apply (case proc
			     ((db:get-runs)                   db:get-runs)
			     ((db:get-tests-for-run-mindata)  db:get-tests-for-run-mindata)
			     ((db:get-test-info-by-ids)       db:get-test-info-by-ids)
			     (else
			      (print "ERROR: sync for hash " proc " not setup! Edits needed in synchash.scm")
			      print))
			   db params))
	 (postdat  #f)
	 (make-indexed (lambda (x)
			 (list (vector-ref x keynum) x))))
    ;; Now process newdat based on the query type
    (set! postdat (case proc
		    ((db:get-runs)
		     ;; (debug:print-info 2 *default-log-port* "Get runs call")
		     (let ((header (vector-ref newdat 0))
			   (data   (vector-ref newdat 1)))
		       ;; (debug:print-info 2 *default-log-port* "header: " header ", data: " data)
		       (cons (list "header" header)         ;; add the header keyed by the word "header"
			     (map make-indexed data))))        ;; add each element keyed by the keynum'th val
		    (else 
		     ;; (debug:print-info 2 *default-log-port* "Non-get runs call")
		     (map make-indexed newdat))))
    ;; (debug:print-info 2 *default-log-port* "postdat: " postdat)
    ;; (if (not indb)(sqlite3:finalize! db))
    (if (not synchash)
	(begin
	  (set! synchash (make-hash-table))
	  (hash-table-set! *synchashes* synckey synchash)))
    (synchash:get-delta postdat synchash)))

;;======================================================================
;; tree
;;======================================================================

;;======================================================================
;; T R E E   S T U F F 
;;======================================================================

;; path is a list of nodes, each the child of the previous
;; this routine returns the id so another node can be added
;; either as a leaf or as a branch
;;
;; BUG: This needs a stop sensor for when a branch is exhausted
;;
(define (tree:find-node obj path)
  ;; start at the base of the tree
  (if (null? path)
      #f ;; or 0 ????
      (let loop ((hed      (car path))
		 (tal      (cdr path))
		 (depth    0)
		 (nodenum  0))
	;; nodes in iup tree are 100% sequential so iterate over nodenum
	(if (iup:attribute obj (conc "DEPTH" nodenum)) ;; end when no more nodes
	    (let ((node-depth (string->number (iup:attribute obj (conc "DEPTH" nodenum))))
		  (node-title (iup:attribute obj (conc "TITLE" nodenum))))
	      (if (and (equal? depth node-depth)
		       (equal? hed   node-title)) ;; yep, this is the one!
		  (if (null? tal) ;; end of the line
		      nodenum
		      (loop (car tal)(cdr tal)(+ depth 1)(+ 1 nodenum)))
		  ;; this is the case where we found part of the hierarchy but not 
		  ;; all of it, i.e. the node-depth went from deep to less deep
		  (if (> depth node-depth) ;; (+ 1 node-depth))
		      #f
		      (loop hed tal depth (+ nodenum 1)))))
	    #f))))

;; top is the top node name zeroeth node VALUE=0
(define (tree:add-node obj top nodelst #!key (userdata #f))
  (let ((curr-top (iup:attribute obj "TITLE0")))
    (if (or (not (string? curr-top))
	    (string-null? curr-top)
	    (string-match "^\\s*$" curr-top))
          (iup:attribute-set! obj "ADDBRANCH0" top))


    
    (cond
     ((not (equal? top (iup:attribute obj "TITLE0")))
      (print "ERROR: top name " top " doesn't match " (iup:attribute obj "TITLE0")))
     ((null? nodelst))
     (else
      (let loop ((hed      (car nodelst))
		 (tal      (cdr nodelst))
		 (depth    1)
		 (pathl    (list top)))
	;; Because the tree dialog changes node numbers when
	;; nodes are added or removed we must look up nodes
	;; each and every time. 0 is the top node so default
	;; to that.
	(let* ((newpath    (append pathl (list hed)))
	       (parentnode (tree:find-node obj pathl))
	       (nodenum    (tree:find-node obj newpath)))
	  ;; Add the branch under lastnode if not found
	  (if (not nodenum)
	      (begin
		(iup:attribute-set! obj (conc "ADDBRANCH" parentnode) hed)
		;; ERROR? ADDING DATA TO PARENT, DONT WE WANT IT ON CREATED NODE?
		(if userdata
		    (iup:attribute-set! obj (conc "USERDATA"   parentnode) userdata))
		(if (null? tal)
		    #t
		    ;; reset to top
		    (loop (car nodelst)(cdr nodelst) 1 (list top)))) 
	      (if (null? tal) ;; if null here then this path has already been added
		  #t
		  (loop (car tal)(cdr tal)(+ depth 1) newpath)))))))))

(define (tree:node->path obj nodenum)
  (let loop ((currnode 0)
	     (path     '()))
    (let* ((node-depth (string->number (iup:attribute obj (conc "DEPTH" currnode))))
	   (node-title (iup:attribute obj (conc "TITLE" currnode)))
	   (trimpath   (if (and (not (null? path))
				(> (length path) node-depth))
			   (take path node-depth)
			   path))
	   (newpath    (append trimpath (list node-title))))
      (if (>= currnode nodenum)
	  newpath
	  (loop (+ currnode 1)
		newpath)))))

(define (tree:delete-node obj top node-path) ;; node-path is a list of strings
  (let ((id  (tree:find-node obj (cons top node-path))))
    (print "Found node to remove " id " for path " top " " node-path)
    (iup:attribute-set! obj (conc "DELNODE" id) "SELECTED")))
	
#|

  (let* ((tb      (iup:treebox
                   #:value 0
                   #:name "Runs"
                   #:expand "YES"
                   #:addexpanded "NO"
                   #:selection-cb
                   (lambda (obj id state)
                     ;; (print "obj: " obj ", id: " id ", state: " state)
                     (let* ((run-path (tree:node->path obj id))
                            (run-id   (tree-path->run-id (cdr run-path))))
                       (if run-id
                           (begin
                             (dboard:data-curr-run-id-set! data run-id)
                             (dashboard:update-run-summary-tab)))
                       ;; (print "path: " (tree:node->path obj id) " run-id: " run-id)
                       ))))
|#

;;======================================================================
;; db
;;======================================================================

;; Use db:test-get* to access
;; Get test data using test_ids. NB// Only works within a single run!!
;;
(define (db:get-test-info-by-ids dbstruct run-id test-ids)
  (db:with-db
   dbstruct
   run-id
   #f
   (lambda (db)
     (let ((res '()))
       (sqlite3:for-each-row
	(lambda (a . b)
	  ;;                 0    1       2      3      4        5       6      7        8     9     10      11          12          13       14
	  (set! res (cons (apply vector a b) res)))
	db
	(conc "SELECT " db:test-record-qry-selector " FROM tests WHERE id in ("
	      (string-intersperse (map conc test-ids) ",") ");"))
       res))))




)