Megatest

Artifact [3c7c56e5e8]
Login

Artifact 3c7c56e5e8ab5ebb1e1e5673bbd28a11f2fe4982:


;;======================================================================
;; Copyright 2017, Matthew Welland.
;; 
;; This file is part of Megatest.
;; 
;;     Megatest is free software: you can redistribute it and/or modify
;;     it under the terms of the GNU General Public License as published by
;;     the Free Software Foundation, either version 3 of the License, or
;;     (at your option) any later version.
;; 
;;     Megatest is distributed in the hope that it will be useful,
;;     but WITHOUT ANY WARRANTY; without even the implied warranty of
;;     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
;;     GNU General Public License for more details.
;; 
;;     You should have received a copy of the GNU General Public License
;;     along with Megatest.  If not, see <http://www.gnu.org/licenses/>.

;;======================================================================

(declare (unit commonmod))
(declare (uses margsmod))
(declare (uses debugprint))
(declare (uses configfmod))

(module commonmod
	*
	
;;(import scheme chicken data-structures extras files ports)
(import scheme chicken)
(import margsmod)
(import debugprint)
(import configfmod)

(use data-structures extras files ports)
(use 
  (prefix base64 base64:)
  (prefix sqlite3 sqlite3:) 
  (srfi 18)
  (prefix dbi dbi:)
  directory-utils
  hostinfo
  format
  matchable
  md5
  message-digest
  pkts
  posix 
  regex
  regex-case
  sparse-vectors
  srfi-1
  srfi-4
  srfi-13
  srfi-69
  stack
  stml2
  typed-records 
  z3
 )
	
(include "common_records.scm")

;;======================================================================
;; (require-library margs)
;; (include "margs.scm")

;; (define old-exit exit)
;; 
;; (define (exit . code)
;;   (if (null? code)
;;       (old-exit)
;;       (old-exit code)))

(define *numcpus-cache* (make-hash-table))



;;======================================================================
;; use to transition to area-name

(define (common:get-sync-lock-filepath)
  (let* ((tmp-area     (common:get-db-tmp-area))
         (lockfile     (conc tmp-area "/megatest.db.sync-lock")))
    lockfile))


(define (common:get-sync-lock-filepath)
  (let* ((tmp-area     (common:get-db-tmp-area))
         (lockfile     (conc tmp-area "/megatest.db.sync-lock")))
    lockfile))

;;======================================================================
;; (debug:print 0 *default-log-port* "Failed to find this executable! Using what can be found on the path")

(define (common:get-testsuite-name)
  (or (configf:lookup *configdat* "setup" "area-name") ;; megatest is a flexible tool, testsuite is too limiting a description.
      (configf:lookup *configdat* "setup" "testsuite" )
      (getenv "MT_TESTSUITE_NAME")
      (pathname-file (or (if (string? *toppath* )
			     (pathname-file *toppath*)
			     #f)
			 (common:get-toppath #f)))
      "please-set-setup-area-name")) ;; (pathname-file (current-directory)))))

(define common:get-area-name common:get-testsuite-name)


(define (get-area-name configdat toppath #!optional (short #f))
  ;; look up my area name in areas table (future)
  ;; generate auto name
  (conc (get-area-path-signature toppath short)
	"-"
	(common:get-testsuite-name toppath configdat)))

;; pathenvvar will set the named var to the path of the config
(define (find-and-read-config fname #!key (environ-patt #f)(given-toppath #f)(pathenvvar #f))
  (let* ((curr-dir   (current-directory))
         (configinfo (find-config fname toppath: given-toppath))
	 (toppath    (car configinfo))
	 (configfile (cadr configinfo))
	 (set-fields (lambda (curr-section next-section ht path)
		       (let ((field-names (if ht (common:get-fields ht) '()))
			     (target      (or (getenv "MT_TARGET")(args:get-arg "-reqtarg")(args:get-arg "-target"))))
			 (debug:print-info 9 *default-log-port* "set-fields with field-names=" field-names " target=" target " curr-section=" curr-section " next-section=" next-section " path=" path " ht=" ht)
			 (if (not (null? field-names))(keys:target-set-args field-names target #f))))))
    (if toppath (change-directory toppath)) 
    (if (and toppath pathenvvar)(setenv pathenvvar toppath))
    (let ((configdat  (if configfile 
			  (read-config configfile #f #t environ-patt: environ-patt post-section-procs: (list (cons "^fields$" set-fields)) #f))))
      (if toppath (change-directory curr-dir))
      (list configdat toppath configfile fname))))


;;======================================================================
;; L O C K I N G   M E C H A N I S M S 
;;======================================================================
(define (common:get-db-tmp-area . junk)
  (if *db-cache-path*
      *db-cache-path*
      (if *toppath* ;; common:get-create-writeable-dir
	  (handle-exceptions
	   exn
	   (begin
	     (debug:print-error 0 *default-log-port* "Couldn't create path to " *db-cache-path* ", exn=" exn)
	     (exit 1))
	   (let* ((tsname (common:get-testsuite-name))
		  (dbpath (common:get-create-writeable-dir
			   (list (conc "/tmp/" (current-user-name)
				       "/megatest_localdb/"
				       tsname "/"
				       (string-translate *toppath* "/" "."))
				 (conc "/tmp/" (current-process-id) ;; just in case we have an issue with the dir by own user name
				       "/megatest_localdb/"
				       tsname
				       (string-translate *toppath* "/" "."))
				 ))))
	     (set! *db-cache-path* dbpath)
	     dbpath))
	  #f)))



;;======================================================================
;; CONTENTS
;;
;;  config file utils
;;  misc conversion, data manipulation functions
;;  testsuite and area utilites
;;
;;======================================================================

(include "megatest-version.scm")
(include "megatest-fossil-hash.scm")

;; Move globals here
(define *toppath*           #f)
(define *configinfo*        #f)   ;; raw results from setup, includes toppath and table from megatest.config
(define *db-with-db-mutex*  (make-mutex))
(define *max-api-process-requests* 0)
(define *common:denoise*    (make-hash-table)) ;; for low noise printing
(define *api-process-request-count* 0)
(define *db-keys*           #f)
(define *db-cache-path*     #f)

(define *number-of-writes* 0)
(define *number-non-write-queries* 0)

(define *glob-like-match-cache* (make-hash-table))
(define *numcpus-cache* (make-hash-table))

(define (get-full-version)
  (conc megatest-version "-" megatest-fossil-hash))

(define (version-signature)
  (conc megatest-version "-" (substring megatest-fossil-hash 0 4)))

(defstruct remote
  (hh-dat            #f) ;; (common:get-homehost)) ;; homehost record ( addr . hhflag )
  (server-url        #f) ;; (server:check-if-running *toppath*) #f))
  (server-id         #f)
  (server-info       #f) ;; (if *toppath* (server:check-if-running *toppath*) #f))
  (last-server-check 0)  ;; last time we checked to see if the server was alive
  (conndat           #f)
  (transport         #f) ;; *transport-type*)
  (server-timeout    #f) ;; (server:expiration-timeout))
  (force-server      #f)
  (ro-mode           #f)  
  (ro-mode-checked   #f)) ;; flag that indicates we have checked for ro-mode

;; launching and hosts
(defstruct host
  (reachable    #f)
  (last-update  0)
  (last-used    0)
  (last-cpuload 1))

;;======================================================================
;; V E R S I O N
;;======================================================================

(define (common:get-full-version)
  (conc megatest-version "-" megatest-fossil-hash))

(define (common:version-signature)
  (conc megatest-version "-" (substring megatest-fossil-hash 0 4)))

;;======================================================================
;; config file utils
;;======================================================================

(define (lookup cfgdat section var)
  (if (hash-table? cfgdat)
      (let ((sectdat (hash-table-ref/default cfgdat section '())))
	(if (null? sectdat)
	    #f
	    (let ((match (assoc var sectdat)))
	      (if match ;; (and match (list? match)(> (length match) 1))
		  (cadr match)
		  #f))
	    ))
      #f))

;; returns var key1=val1; key2=val2 ... as alist
(define (get-key-list cfgdat section var)
  ;; convert string a=1; b=2; c=a silly thing; d=
  (let ((valstr (lookup cfgdat section var)))
    (if valstr
	(val->alist valstr)
	'()))) ;; should it return empty list or #f to indicate not set?


(define (get-section cfgdat section)
  (hash-table-ref/default cfgdat section '()))

;;======================================================================
;; misc conversion, data manipulation functions
;;======================================================================

;;======================================================================
;; if it looks like a number -> convert it to a number, else return it
;;
(define (lazy-convert inval)
  (let* ((as-num (if (string? inval)(string->number inval) #f)))
    (or as-num inval)))

;;======================================================================
;; to '((a . 1)(b . 2)(c . "a silly thing")(d . ""))
;;
(define (val->alist val #!key (convert #f))
  (let ((val-list (string-split-fields ";\\s*" val #:infix)))
    (if val-list
	(map (lambda (x)
	       (let ((f (string-split-fields "\\s*=\\s*" x #:infix)))
		 (case (length f)
		   ((0) `(,#f))  ;; null string case
		   ((1) `(,(string->symbol (car f))))
		   ((2) `(,(string->symbol (car f)) .
			  ,(let ((inval (cadr f)))
			     (if convert (lazy-convert inval) inval))))
		   (else f))))
	     (filter (lambda (x)
		       (not (string-match "^\\s*" x)))
		     val-list))
	'())))

;;======================================================================
;; testsuite and area utilites
;;======================================================================

(define (get-area-path-signature toppath #!optional (short #f))
  (let ((res (message-digest-string (md5-primitive) toppath)))
    (if short
	(substring res 0 4)
	res)))

;;======================================================================
;; need generic find-record-with-var-nmatching-val
;;
(define (path->area-record cfgdat path)
  (let* ((areadat (get-cfg-areas cfgdat))
	 (all     (filter (lambda (x)
			    (let* ((keyvals (cdr x))
				   (pth     (alist-ref 'path keyvals)))
			      (equal? path pth)))
			  areadat)))
    (if (null? all)
	#f
	(car all)))) ;; return first match

;;======================================================================
;; given a config return an alist of alists
;;   area-name => data
;;
(define (get-cfg-areas cfgdat)
  (let ((adat (get-section cfgdat "areas")))
    (map (lambda (entry)
	   `(,(car entry) . 
	     ,(val->alist (cadr entry))))
	 adat)))

;;======================================================================
;; debug
;;======================================================================

(define (debug:setup debug-arg verbose-arg) ;; debug-arg= #f, #t or 'noprop
  (let ((debugstr (or debug-arg ;; (args:get-arg "-debug")
		      ;; (args:get-arg "-debug-noprop")
		      (get-environment-variable "MT_DEBUG_MODE"))))
    (debug:calc-verbosity debugstr verbose-arg)
    ;; (debug:check-verbosity *verbosity* debugstr)
    ;; if we were handed a bad verbosity rule then we will override it with 1 and continue
    (if (not (verbosity))(set! (verbosity) 1))
    (if (and (not (eq? debug-arg 'noprop))
	     (or debug-arg
		 (not (get-environment-variable "MT_DEBUG_MODE"))))
	(setenv "MT_DEBUG_MODE" (if (list? (verbosity))
				    (string-intersperse (map conc (verbosity)) ",")
				    (conc (verbosity)))))))


;;======================================================================
;; Safe utilities
;;======================================================================

(define (common:false-on-exception thunk #!key (message #f))
  (handle-exceptions exn
                     (begin
                       (if message
                           (debug:print-info 0 *default-log-port* message))
                       #f) (thunk) ))

(define (common:file-exists? path-string #!key (silent #f))
  ;;;; TODO: catch permission denied exceptions and emit appropriate warnings. Might be able to get rid of with chicken 5?
  (common:false-on-exception (lambda () (file-exists? path-string))
                             message: (if (not silent)
                                          (conc "Unable to access path: " path-string)
                                          #f)
                             ))

(define (common:directory-exists? path-string)
  ;;;; TODO: catch permission denied exceptions and emit appropriate warnings
  (common:false-on-exception (lambda () (directory-exists? path-string))
                             message: (conc "Unable to access path: " path-string)
                             ))

;;======================================================================
;; does the directory exist and do we have write access?
;;
;;    returns the directory or #f
;;
(define (common:directory-writable? path-string)
  (handle-exceptions
   exn
   (begin
     (debug:print 0 *default-log-port* "Failed to identify access to " path-string ", exn=" exn)
     #f)
   (if (and (directory-exists? path-string)
            (file-write-access? path-string))
       path-string
       #f)))

;;======================================================================
;; T I M E   A N D   D A T E
;;======================================================================

;; Convert strings like "5s 2h 3m" => 60x60x2 + 3x60 + 5
(define (common:hms-string->seconds tstr)
  (let ((parts     (string-split-fields "\\w+" tstr))
	(time-secs 0)
	;; s=seconds, m=minutes, h=hours, d=days, M=months, y=years, w=weeks
	(trx       (regexp "(\\d+)([smhdMyw])")))
    (for-each (lambda (part)
		(let ((match  (string-match trx part)))
		  (if match
		      (let ((val (string->number (cadr match)))
			    (unt (caddr match)))
			(if val 
			    (set! time-secs (+ time-secs (* val
							    (case (string->symbol unt)
							      ((s) 1)
							      ((m) 60) ;; minutes
							      ((h) 3600)
							      ((d) 86400)
							      ((w) 604800)
							      ((M) 2628000) ;; aproximately one month
							      ((y) 31536000)
							      (else #f))))))))))
	      parts)
    time-secs))

(define (seconds->hr-min-sec secs)
  (let* ((hrs (quotient secs 3600))
	 (min (quotient (- secs (* hrs 3600)) 60))
	 (sec (- secs (* hrs 3600)(* min 60))))
    (conc (if (> hrs 0)(conc hrs "hr ") "")
	  (if (> min 0)(conc min "m ")  "")
	  sec "s")))

(define (seconds->time-string sec)
  (time->string 
   (seconds->local-time sec) "%H:%M:%S"))

(define (seconds->work-week/day-time sec)
  (time->string
   (seconds->local-time sec) "ww%V.%u %H:%M"))

(define (seconds->work-week/day sec)
  (time->string
   (seconds->local-time sec) "ww%V.%u"))

(define (seconds->year-work-week/day sec)
  (time->string
   (seconds->local-time sec) "%yww%V.%w"))

(define (seconds->year-work-week/day-time sec)
  (time->string
   (seconds->local-time sec) "%Yww%V.%w %H:%M"))

(define (seconds->year-week/day-time sec)
  (time->string
   (seconds->local-time sec) "%Yw%V.%w %H:%M"))

(define (seconds->quarter sec)
  (case (string->number
	 (time->string 
	  (seconds->local-time sec)
	  "%m"))
    ((1 2 3) 1)
    ((4 5 6) 2)
    ((7 8 9) 3)
    ((10 11 12) 4)
    (else #f)))

;;======================================================================
;; basic ISO8601 format (e.g. "2017-02-28 06:02:54") date time => Unix epoch
;;
(define (common:date-time->seconds datetime)
  (local-time->seconds (string->time datetime "%Y-%m-%d %H:%M:%S")))

;;======================================================================
;; given span of seconds tstart to tend
;; find start time to mark and mark delta
;;
(define (common:find-start-mark-and-mark-delta tstart tend)
  (let* ((deltat   (- (max tend (+ tend 10)) tstart)) ;; can't handle runs of less than 4 seconds. Pad it to 10 seconds ...
	 (result   #f)
	 (min      60)
	 (hr       (* 60 60))
	 (day      (* 24 hr))
	 (yr       (* 365 day)) ;; year
	 (mo       (/ yr 12))
	 (wk       (* day 7)))
    (for-each
     (lambda (max-blks)
       (for-each
	(lambda (span) ;; 5 2 1
	  (if (not result)
	      (for-each 
	       (lambda (timeunit timesym) ;; year month day hr min sec
		 (if (not result)
		     (let* ((time-blk (* span timeunit))
			    (num-blks (quotient deltat time-blk)))
		       (if (and (> num-blks 4)(< num-blks max-blks))
			   (let ((first (* (quotient tstart time-blk) time-blk)))
			     (set! result (list span timeunit time-blk first timesym))
			     )))))
	       (list yr mo wk day hr min 1)
	       '(     y  mo w  d   h  m   s))))
	(list 8 6 5 2 1)))
     '(5 10 15 20 30 40 50 500))
    (if values
	(apply values result)
	(values 0 day 1 0 'd))))

;;======================================================================
;; given x y lim return the cron expansion
;;
(define (common:expand-cron-slash x y lim)
  (let loop ((curr x)
	     (res  `()))
    (if (< curr lim)
	(loop (+ curr y) (cons curr res))
	(reverse res))))

;;======================================================================
;; expand a complex cron string to a list of cron strings
;;
;;  x/y   => x, x+y, x+2y, x+3y while x+Ny<max_for_field
;;  a,b,c => a, b ,c
;;
;;   NOTE: with flatten a lot of the crud below can be factored down.
;;
(define (common:cron-expand cron-str)
  (if (list? cron-str)
      (flatten
       (fold (lambda (x res)
	       (if (list? x)
		   (let ((newres (map common:cron-expand x)))
		     (append x newres))
		   (cons x res)))
	     '()
	     cron-str)) ;; (map common:cron-expand cron-str))
      (let ((cron-items (string-split cron-str))
	    (slash-rx   (regexp "(\\d+)/(\\d+)"))
	    (comma-rx   (regexp ".*,.*"))
	    (max-vals   '((min        . 60)
			  (hour       . 24)
			  (dayofmonth . 28) ;;; BUG!!!! This will be a bug for some combinations
			  (month      . 12)
			  (dayofweek  . 7))))
	(if (< (length cron-items) 5) ;; bad spec
	    cron-str ;; `(,cron-str)              ;; just return the string, something downstream will fix it
	    (let loop ((hed  (car cron-items))
		       (tal  (cdr cron-items))
		       (type 'min)
		       (type-tal '(hour dayofmonth month dayofweek))
		       (res  '()))
	      (regex-case
	       hed
	       (slash-rx ( _ base incr ) (let* ((basen          (string->number base))
						(incrn          (string->number incr))
						(expanded-vals  (common:expand-cron-slash basen incrn (alist-ref type max-vals)))
						(new-list-crons (fold (lambda (x myres)
									(cons (conc (if (null? res)
											""
											(conc (string-intersperse res " ") " "))
										    x " " (string-intersperse tal " "))
									      myres))
								      '() expanded-vals)))
					   ;; (print "new-list-crons: " new-list-crons)
					   ;; (fold (lambda (x res)
					   ;; 	    (if (list? x)
					   ;; 		(let ((newres (map common:cron-expand x)))
					   ;; 		  (append x newres))
					   ;; 		(cons x res)))
					   ;; 	  '()
					   (flatten (map common:cron-expand new-list-crons))))
	       ;;					    (map common:cron-expand (map common:cron-expand new-list-crons))))
	       (else (if (null? tal)
			 cron-str
			 (loop (car tal)(cdr tal)(car type-tal)(cdr type-tal)(append res (list hed)))))))))))

;;======================================================================
;; given a cron string and the last time event was processed return #t to run or #f to not run
;;
;;  min    hour   dayofmonth month  dayofweek
;; 0-59    0-23   1-31       1-12   0-6          ### NOTE: dayofweek does not include 7
;;
;;  #t => yes, run the job
;;  #f => no, do not run the job
;;
(define (common:cron-event cron-str now-seconds-in last-done) ;; ref-seconds = #f is NOW.
  (let* ((cron-items     (map string->number (string-split cron-str)))
	 (now-seconds    (or now-seconds-in (current-seconds)))
	 (now-time       (seconds->local-time now-seconds))
	 (last-done-time (seconds->local-time last-done))
	 (all-times      (make-hash-table)))
    ;; (print "cron-items: " cron-items "(length cron-items): " (length cron-items))
    (if (not (eq? (length cron-items) 5)) ;; don't even try to figure out junk strings
	#f
	(match-let (((     cmin chour cdayofmonth cmonth    cdayofweek)
		     cron-items)
		    ;; 0     1    2        3         4    5      6
		    ((nsec nmin nhour ndayofmonth nmonth nyr ndayofweek n7 n8 n9)
		     (vector->list now-time))
		    ((lsec lmin lhour ldayofmonth lmonth lyr ldayofweek l7 l8 l9)
		     (vector->list last-done-time)))
		   ;; create all possible time slots
		   ;; remove invalid slots due to (for example) day of week
		   ;; get the start and end entries for the ref-seconds (current) time
		   ;; if last-done > ref-seconds => this is an ERROR!
		   ;; does the last-done time fall in the legit region?
		   ;;    yes => #f  do not run again this command
		   ;;    no  => #t  ok to run the command
		   (for-each ;; month
		    (lambda (month)
		      (for-each ;; dayofmonth
		       (lambda (dom)
			 (for-each
			  (lambda (hr) ;; hour
			    (for-each
			     (lambda (minute) ;; minute
			       (let ((copy-now (apply vector (vector->list now-time))))
				 (vector-set! copy-now 0 0) ;; force seconds to zero
				 (vector-set! copy-now 1 minute)
				 (vector-set! copy-now 2 hr)
				 (vector-set! copy-now 3 dom)  ;; dom is already corrected for zero referenced
				 (vector-set! copy-now 4 month)
				 (let* ((copy-now-secs (local-time->seconds copy-now))
					(new-copy      (seconds->local-time copy-now-secs))) ;; remake the time vector
				   (if (or (not cdayofweek)
					   (equal? (vector-ref new-copy 6)
						   cdayofweek)) ;; if the day is specified and a match OR if the day is NOT specified
				       (if (or (not cdayofmonth)
					       (equal? (vector-ref new-copy 3)
						       (+ 1 cdayofmonth))) ;; if the month is specified and a match OR if the month is NOT specified
					   (hash-table-set! all-times copy-now-secs new-copy))))))
			     (if cmin
				 `(,cmin)  ;; if given cmin, have to use it
				 (list (- nmin 1) nmin (+ nmin 1))))) ;; minute
			  (if chour
			      `(,chour)
			      (list (- nhour 1) nhour (+ nhour 1))))) ;; hour
		       (if cdayofmonth
			   `(,cdayofmonth)
			   (list (- ndayofmonth 1) ndayofmonth (+ ndayofmonth 1)))))
		    (if cmonth
			`(,cmonth)
			(list (- nmonth 1) nmonth (+ nmonth 1))))
		   (let ((before #f)
			 (is-in  #f))
		     (for-each
		      (lambda (moment)
			(if (and before
				 (<= before now-seconds)
				 (>= moment now-seconds))
			    (begin
			      ;; (print)
			      ;; (print "Before: " (time->string (seconds->local-time before)))
			      ;; (print "Now:    " (time->string (seconds->local-time now-seconds)))
			      ;; (print "After:  " (time->string (seconds->local-time moment)))
			      ;; (print "Last:   " (time->string (seconds->local-time last-done)))
			      (if (<  last-done before)
				  (set! is-in before))
			      ))
			(set! before moment))
		      (sort (hash-table-keys all-times) <))
		     is-in)))))

(define (common:extended-cron  cron-str now-seconds-in last-done)
  (let ((expanded-cron (common:cron-expand cron-str)))
    (if (string? expanded-cron)
	(common:cron-event expanded-cron now-seconds-in last-done)
	(let loop ((hed (car expanded-cron))
		   (tal (cdr expanded-cron)))
	  (if (common:cron-event hed now-seconds-in last-done)
	      #t
	      (if (null? tal)
		  #f
		  (loop (car tal)(cdr tal))))))))

;;======================================================================
;; Keys
;;======================================================================

(define (keys->keystr keys) ;; => key1,key2,key3,additiona1, ...
  (string-intersperse keys ","))

(define (args:usage . a) #f)

;;======================================================================
;; key <=> target routines
;;======================================================================

;;======================================================================
;; This invalidates using "/" in item names. Every key will be
;; available via args:get-arg as :keyfield. Since this only needs to
;; be called once let's use it to set the environment vars
;;
;; The setting of :keyfield in args should be turned off ASAP
;;
(define (keys:target-set-args keys target ht)
  (if target
      (let ((vals (string-split target "/")))
	(if (eq? (length vals)(length keys))
	    (for-each (lambda (key val)
			(setenv key val)
			(if ht (hash-table-set! ht (conc ":" key) val)))
		      keys
		      vals)
	    (debug:print-error 0 *default-log-port* "wrong number of values in " target ", should match " keys))
	vals)
      (debug:print 4 *default-log-port* "ERROR: keys:target-set-args called with no target.")))

;;======================================================================
;; given the keys (a list of vectors <key field> or a list of keys) and a target return a keyval list
;; keyval list ( (key1 val1) (key2 val2) ...)
(define (keys:target->keyval keys target)
  (let* ((targlist (string-split target "/"))
	 (numkeys  (length keys))
	 (numtarg  (length targlist))
	 (targtweaked (if (> numkeys numtarg)
			  (append targlist (make-list (- numkeys numtarg) ""))
			  targlist)))
    (map (lambda (key targ)
	   (list key targ))
	 keys targtweaked)))

(define (keys:make-key/field-string keys)
  (string-join
   (map (lambda (key)(conc key " TEXT"))
	keys)
   ","))

;;======================================================================
;; (define keys:config-get-fields common:get-fields)

(define (common:get-area-path-signature)
  (message-digest-string (md5-primitive) *toppath*))

(define (common:get-signature str)
  (message-digest-string (md5-primitive) str))


;;======================================================================
;; S Y S T E M   S T U F F
;;======================================================================

;;======================================================================
;; lazy-safe get file mod time. on any error (file not existing etc.) return 0
;;
(define (common:lazy-modification-time fpath)
  (handle-exceptions
   exn
   (begin
     (debug:print 0 *default-log-port* "Failed to get modifcation time for " fpath ", treating it as zero. exn=" exn)
     0)
   (if (file-exists? fpath)
       (file-modification-time fpath)
       0)))

;;======================================================================
;; find timestamp of newest file associated with a sqlite db file
(define (common:lazy-sqlite-db-modification-time fpath)
  (let* ((glob-list (handle-exceptions
		     exn
		     (begin
		       (debug:print 0 *default-log-port* "Failed to glob " fpath "*, exn=" exn)
		       `(,(conc "/no/such/file, message: " ((condition-property-accessor 'exn 'message) exn))))
		     (glob (conc fpath "*"))))
         (file-list (if (eq? 0 (length glob-list))
			'("/no/such/file")
			glob-list)))
    (apply max
	   (map
	    common:lazy-modification-time 
	    file-list))))

;;======================================================================
;; execute thunk, return value.  If exception thrown, trap exception, return #f, and emit nonfatal condition note to *default-log-port* .
;; arguments - thunk, message
(define (common:fail-safe thunk warning-message-on-exception)
  (handle-exceptions
   exn
   (begin
     (debug:print-info 0 *default-log-port* "notable but nonfatal condition - "warning-message-on-exception", exn=" exn)
     (debug:print-info 0 *default-log-port*
                       (string-substitute "\n?Error:" "nonfatal condition:"
                                          (with-output-to-string
                                            (lambda ()
                                              (print-error-message exn) ))))
     (debug:print-info 0 *default-log-port* "    -- continuing after nonfatal condition...")
     #f)
   (thunk)))

(define home (getenv "HOME"))
(define user (getenv "USER"))


;;======================================================================
;; returns list of fd count, socket count
(define (get-file-descriptor-count #!key  (pid (current-process-id )))
  (list
   (length (glob (conc "/proc/" pid "/fd/*")))
   (length  (filter identity (map socket? (glob (conc "/proc/" pid "/fd/*")))))
   )
  )



;;======================================================================
;; GLOBALS

;; CONTEXTS
(defstruct cxt
  (taskdb #f)
  (cmutex (make-mutex)))
;; (define *contexts* (make-hash-table))
;; (define *context-mutex* (make-mutex))

;; ;; safe method for accessing a context given a toppath
;; ;;
;; (define (common:with-cxt toppath proc)
;;   (mutex-lock! *context-mutex*)
;;   (let ((cxt (hash-table-ref/default *contexts* toppath #f)))
;;     (if (not cxt)
;;         (set! cxt (let ((x (make-cxt)))(hash-table-set! *contexts* toppath x) x)))
;;     (let ((cxt-mutex (cxt-mutex cxt)))
;;       (mutex-unlock! *context-mutex*)
;;       (mutex-lock! cxt-mutex)
;;       (let ((res (proc cxt)))
;;         (mutex-unlock! cxt-mutex)
;;         res))))

;;======================================================================
;; A hash table that can be accessed by #{scheme ...} calls in
;; config files. Allows communicating between confgs
;;
(define *user-hash-data* (make-hash-table))

(define *pkts-info*    (make-hash-table)) ;; store stuff like the last parent here

(define *runconfigdat* #f)   ;; run configs data
(define *configdat*    #f)   ;; megatest.config data
(define *configstatus* #f)   ;; status of data; 'fulldata : all processing done, #f : no data yet, 'partialdata : partial read done

(define *already-seen-runconfig-info* #f)

(define *test-meta-updated* (make-hash-table))
(define *globalexitstatus*  0) ;; attempt to work around possible thread issues
(define *passnum*           0) ;; when running track calls to run-tests or similar
;; (define *alt-log-file* #f)  ;; used by -log

(define *time-zero* (current-seconds)) ;; for the watchdog
(define *default-area-tag* "local")

;; DATABASE
(define *dbstruct-db*         #f) ;; used to cache the dbstruct in db:setup. Goal is to remove this.
;; db stats
(define *db-stats*            (make-hash-table)) ;; hash of vectors < count duration-total >
(define *db-stats-mutex*      (make-mutex))
;; db access
(define *db-last-access*      (current-seconds)) ;; last db access, used in server
(define *db-write-access*     #t)
;; db sync
(define *db-last-sync*        0)                 ;; last time the sync to megatest.db happened
(define *db-sync-in-progress* #f)                ;; if there is a sync in progress do not try to start another
(define *db-multi-sync-mutex* (make-mutex))      ;; protect access to *db-sync-in-progress*, *db-last-sync*
;; task db
(define *task-db*             #f) ;; (vector db path-to-db)
(define *db-access-allowed*   #t) ;; flag to allow access
(define *db-access-mutex*     (make-mutex))
(define *db-transaction-mutex* (make-mutex))

(define *db-api-call-time*    (make-hash-table)) ;; hash of command => (list of times)
;; no sync db
(define *no-sync-db*          #f)

;; SERVER
(define *my-client-signature* #f)
(define *transport-type*    'http)             ;; override with [server] transport http|rpc|nmsg
(define *runremote*         #f)                ;; if set up for server communication this will hold <host port>
;; (define *max-cache-size*    0)
(define *logged-in-clients* (make-hash-table))
(define *server-id*         #f)
(define *server-info*       #f)  ;; good candidate for easily convert to non-global
(define *time-to-exit*      #f)
(define *server-run*        #t)
(define *run-id*            #f)
(define *server-kind-run*   (make-hash-table))
(define *home-host*         #f)
;; (define *total-non-write-delay* 0)
(define *heartbeat-mutex*   (make-mutex))

(define *server-overloaded*  #f)
(define *send-receive-mutex* (make-mutex)) ;; should have separate mutex per run-id

;; client
(define *rmt-mutex*         (make-mutex))     ;; remote access calls mutex 

;; RPC transport
(define *rpc:listener*      #f)

;; KEY info
(define *target*            (make-hash-table)) ;; cache the target here; target is keyval1/keyval2/.../keyvalN
(define *keys*              (make-hash-table)) ;; cache the keys here
(define *keyvals*           (make-hash-table))
(define *toptest-paths*     (make-hash-table)) ;; cache toptest path settings here
(define *test-paths*        (make-hash-table)) ;; cache test-id to test run paths here
(define *test-ids*          (make-hash-table)) ;; cache run-id, testname, and item-path => test-id
(define *test-info*         (make-hash-table)) ;; cache the test info records, update the state, status, run_duration etc. from testdat.db

(define *run-info-cache*     (make-hash-table)) ;; run info is stable, no need to reget
(define *launch-setup-mutex* (make-mutex))     ;; need to be able to call launch:setup often so mutex it and re-call the real deal only if *toppath* not set
(define *homehost-mutex*     (make-mutex))

;; Miscellaneous
(define *triggers-mutex*     (make-mutex))     ;; block overlapping processing of triggers

(define *host-loads*         (make-hash-table))

;;======================================================================
;; cache environment vars for each run here
(define *env-vars-by-run-id* (make-hash-table))

;;======================================================================
;; Testconfig and runconfig caches. 
(define *testconfigs*        (make-hash-table)) ;; test-name => testconfig
(define *runconfigs*         (make-hash-table)) ;; target    => runconfig

;; This is a cache of pre-reqs met, don't re-calc in cases where called with same params less than
;; five seconds ago
(define *pre-reqs-met-cache* (make-hash-table))

(use posix-extras pathname-expand files)

;; this plugs a hole in posix-extras in recent chicken versions > 4.9)
(let-values (( (chicken-release-number chicken-major-version)
               (apply values
                      (map string->number
                           (take
                            (string-split (chicken-version) ".")
                            2)))))
  (let ((resolve-pathname-broken?
         (or (> chicken-release-number 4)
             (and (eq? 4 chicken-release-number) (> chicken-major-version 9)))))
    (if resolve-pathname-broken?
        (define ##sys#expand-home-path pathname-expand))))

(define (realpath x) (resolve-pathname  (pathname-expand (or x "/dev/null")) ))

(define (common:get-this-exe-fullpath #!key (argv (argv)))
  (let* ((this-script
          (cond
           ((and (> (length argv) 2)
                 (string-match "^(.*/csi|csi)$" (car argv))
                 (string-match "^-(s|ss|sx|script)$" (cadr argv)))
            (caddr argv))
           (else (car argv))))
         (fullpath (realpath this-script)))
    fullpath))

;;======================================================================

(define *common:this-exe-fullpath* (common:get-this-exe-fullpath))
(define *common:this-exe-dir*      (pathname-directory *common:this-exe-fullpath*))
(define *common:this-exe-name*     (pathname-strip-directory *common:this-exe-fullpath*))

;;======================================================================
;; when called from a wrapper I need sometimes to find the calling
;; wrapper, this is for dashboard to find the correct megatest.
;;
(define (common:find-local-megatest #!optional (progname "megatest"))
  (let ((res (filter file-exists?
		     (map (lambda (updir)
			    (let* ((lm  (car (argv)))
				   (dir (pathname-directory lm))
				   (exe (pathname-strip-directory lm)))
			      (conc (if dir (conc dir "/") "")
				    (case (string->symbol exe)
				      ((dboard)    (conc updir progname))
				      ((mtest)     (conc updir progname))
				      ((dashboard) progname)
				      (else exe)))))
			  '("../../" "../")))))
    (if (null? res)
	(begin
	  (debug:print 0 *default-log-port* "Failed to find this executable! Using what can be found on the path")
	  progname)
	(car res))))

(define *common:logpro-exit-code->status-sym-alist*
  '( ( 0 . pass )
     ( 1 . fail )
     ( 2 . warn )
     ( 3 . check )
     ( 4 . waived )
     ( 5 . abort )
     ( 6 . skip )))

(define (common:logpro-exit-code->status-sym exit-code)
  (or (alist-ref exit-code *common:logpro-exit-code->status-sym-alist*) 'fail))

(define (common:worse-status-sym ss1 ss2)
  (let loop ((status-syms-remaining '(abort fail check skip warn waived pass)))
    (cond
     ((null? status-syms-remaining)
      'fail)
     ((eq? (car status-syms-remaining) ss1)
      ss1)
     ((eq? (car status-syms-remaining) ss2)
      ss2)
     (else
      (loop (cdr status-syms-remaining))))))

(define (common:steps-can-proceed-given-status-sym status-sym)
  (if (member status-sym '(warn waived pass))
      #t
      #f))

(define (status-sym->string status-sym)
  (case status-sym
    ((pass) "PASS")
    ((fail) "FAIL")
    ((warn) "WARN")
    ((check) "CHECK")
    ((waived) "WAIVED")
    ((abort) "ABORT")
    ((skip) "SKIP")
    (else "FAIL")))

(define (common:logpro-exit-code->test-status exit-code)
  (status-sym->string (common:logpro-exit-code->status-sym exit-code)))

(define (common:clear-caches)
  (set! *target*             (make-hash-table))
  (set! *keys*               (make-hash-table))
  (set! *keyvals*            (make-hash-table))
  (set! *toptest-paths*      (make-hash-table))
  (set! *test-paths*         (make-hash-table))
  (set! *test-ids*           (make-hash-table))
  (set! *test-info*          (make-hash-table))
  (set! *run-info-cache*     (make-hash-table))
  (set! *env-vars-by-run-id* (make-hash-table))
  (set! *test-id-cache*      (make-hash-table)))

;; Generic string database
(define sdb:qry #f) ;; (make-sdb:qry)) ;;  'init #f)
;; Generic path database
(define *fdb* #f)

(define *last-launch* (current-seconds)) ;; use for throttling the launch rate. Would be better to use the db and last time of a test in LAUNCHED state.

(define (common:snapshot-file filepath #!key (subdir  ".") )
  (if (file-exists? filepath)
      (let* ((age-sec  (lambda (file)
                         (if (file-exists? file)
                             (- (current-seconds) (file-modification-time file))
                             1000000000))) ;; return really old value if file doesn't exist.  we want to clobber it if old or not exist.
             (ok-flag  #t)
             (age-mins (lambda (file) (/ (age-sec   file) 60)))
             (age-hrs  (lambda (file) (/ (age-mins file) 60)))
             (age-days (lambda (file) (/ (age-hrs   file) 24)))
             (age-wks  (lambda (file) (/ (age-days  file) 7)))
             (docmd    (lambda (cmd)
                         (cond
                          (ok-flag
                           (let ((res (system cmd)))
                             (cond
                              ((eq? 0 res)
                               #t)
                              (else
                               (set! ok-flag #f)
                               (debug:print 0 *default-log-port* "ERROR: ["(common:human-time)"] Command failed with exit code "
                                            (if (< res 0)
                                                res
                                                (/ res 8)) " ["cmd"]" )
                               #f))))
                          (else
                           (debug:print 0 *default-log-port* "ERROR: ["(common:human-time)"] Not runnining command due to prior error. ["cmd"]")
                           #f))))
             (copy     (lambda (src dest) (docmd (conc "/bin/cp '"src"' '"dest"'"))))
             (copy+zip (lambda (src dest) (docmd (conc "gzip -c - < '"src"' > '"dest"'"))))
             (fullpath (realpath filepath))
             (basedir  (pathname-directory fullpath))
             (basefile (pathname-strip-directory fullpath))
             ;;(prevfile (conc filepath ".prev.gz"))
             (minsfile (conc basedir "/" subdir "/" basefile ".mins.gz"))
             (hrsfile  (conc basedir "/" subdir "/" basefile ".hrs.gz"))
             (daysfile (conc basedir "/" subdir "/" basefile ".days.gz"))
             (wksfile  (conc basedir "/" subdir "/" basefile ".weeks.gz")))

        ;; create subdir it not exists
        (if (not (directory-exists? (conc basedir "/" subdir)))
            (docmd (conc "/bin/mkdir -p '"(conc basedir "/" subdir)"'")))
        
        ;; copy&zip <file> to <file>.mins if not exists
        (if (not (file-exists? minsfile))
            (copy+zip filepath minsfile))
        ;; copy <file>.mins to <file>.hrs if not exists
        (if (not (file-exists? hrsfile))
            (copy minsfile hrsfile))
        ;; copy <file>.hrs to <file>.days if not exists
        (if (not (file-exists? daysfile))
            (copy hrsfile daysfile))
        ;; copy <file>.days to <file>.weeks if not exists
        (if (not (file-exists? wksfile))
            (copy daysfile wksfile))


        ;; if age(<file>.mins.gz) >= 1h:
        ;;     copy <file>.mins.gz <file>.hrs.gz
        ;;     copy <prev file> <file>.mins.gz
        (when (>= (age-mins minsfile) 1)
	      (copy minsfile hrsfile)
	      (copy+zip filepath  minsfile))
        
        ;; if age(<file>.hrs.gz) >= 1d:
        ;;     copy <file>.hrs.gz <file>.days.gz
        ;;     copy <file>.mins.gz <file>.hrs.gz
        (when (>= (age-days hrsfile) 1)
	      (copy hrsfile daysfile)
	      (copy minsfile hrsfile))
        
        ;; if age(<file>.days.gz) >= 1w:
        ;;     copy <file>.days.gz <file>.weeks.gz
        ;;     copy <file>.hrs.gz <file>.days.gz
        (when (>= (age-wks daysfile) 1)
	      (copy daysfile wksfile)
	      (copy hrsfile daysfile))
        #t)
      #f))


;;======================================================================
;; S P A R S E   A R R A Y S
;;======================================================================

(define (make-sparse-array)
  (let ((a (make-sparse-vector)))
    (sparse-vector-set! a 0 (make-sparse-vector))
    a))

(define (sparse-array? a)
  (and (sparse-vector? a)
       (sparse-vector? (sparse-vector-ref a 0))))

(define (sparse-array-ref a x y)
  (let ((row (sparse-vector-ref a x)))
    (if row
	(sparse-vector-ref row y)
	#f)))

(define (sparse-array-set! a x y val)
  (let ((row (sparse-vector-ref a x)))
    (if row
	(sparse-vector-set! row y val)
	(let ((new-row (make-sparse-vector)))
	  (sparse-vector-set! a x new-row)
	  (sparse-vector-set! new-row y val)))))

;;======================================================================
;; L O C K E R S   A N D   B L O C K E R S 
;;======================================================================

;;======================================================================
;; block further accesses to databases. Call this before shutting db down
(define (common:db-block-further-queries)
  (mutex-lock! *db-access-mutex*)
  (set! *db-access-allowed* #f)
  (mutex-unlock! *db-access-mutex*))

(define (common:db-access-allowed?)
  (let ((val (begin
	       (mutex-lock! *db-access-mutex*)
	       *db-access-allowed*
	       (mutex-unlock! *db-access-mutex*))))
    val))

;;======================================================================
;; File locking
;;======================================================================

;;======================================================================
;; dot-locking egg seems not to work, using this for now
;; if lock is older than expire-time then remove it and try again
;; to get the lock
;;
(define (common:simple-file-lock fname #!key (expire-time 300))
  (let ((fmod-time (handle-exceptions
		    ext
		    (current-seconds)
		    (file-modification-time fname))))
    (if (common:file-exists? fname)
	(if (> (- (current-seconds) fmod-time) expire-time)
	    (begin
	      (handle-exceptions exn #f (delete-file* fname))	
	      (common:simple-file-lock fname expire-time: expire-time))
	    #f)
	(let ((key-string (conc (get-host-name) "-" (current-process-id))))
	  (with-output-to-file fname
	    (lambda ()
	      (print key-string)))
	  (thread-sleep! 0.25)
	  (if (common:file-exists? fname)
	      (handle-exceptions exn
				 #f 
				 (with-input-from-file fname
				   (lambda ()
				     (equal? key-string (read-line)))))
	      #f)))))

(define (common:simple-file-lock-and-wait fname #!key (expire-time 300))
  (let ((end-time (+ expire-time (current-seconds))))
    (let loop ((got-lock (common:simple-file-lock fname expire-time: expire-time)))
      (if got-lock
	  #t
	  (if (> end-time (current-seconds))
	      (begin
		(thread-sleep! 3)
		(loop (common:simple-file-lock fname expire-time: expire-time)))
	      #f)))))

(define (common:simple-file-release-lock fname)
  (handle-exceptions
   exn
   #f ;; I don't really care why this failed (at least for now)
   (delete-file* fname)))

;;======================================================================
;; U S E F U L   S T U F F
;;======================================================================

;;======================================================================
;; convert things to an alist or assoc list, #f gets converted to ""
;;
(define (common:to-alist dat)
  (cond
   ((list? dat)   (map common:to-alist dat))
   ((vector? dat)
    (map common:to-alist (vector->list dat)))
   ((pair? dat)
    (cons (common:to-alist (car dat))
	  (common:to-alist (cdr dat))))
   ((hash-table? dat)
    (map common:to-alist (hash-table->alist dat)))
   (else
    (if dat
	dat
	""))))

(define (common:alist-ref/default key alist default)
  (or (alist-ref key alist) default))

(define (common:low-noise-print waitval . keys)
  (let* ((key      (string-intersperse (map conc keys) "-" ))
	 (lasttime (hash-table-ref/default *common:denoise* key 0))
	 (currtime (current-seconds)))
    (if (> (- currtime lasttime) waitval)
	(begin
	  (hash-table-set! *common:denoise* key currtime)
	  #t)
	#f)))

(define (common:get-megatest-exe)
  (or (getenv "MT_MEGATEST") "megatest"))

(define (common:read-encoded-string instr)
  (handle-exceptions
   exn
   (handle-exceptions
    exn
    (begin
      (debug:print-error 0 *default-log-port* "received bad encoded string \"" instr "\", message: " ((condition-property-accessor 'exn 'message) exn) ", exn=" exn)
      (print-call-chain (current-error-port))
      #f)
    (read (open-input-string (base64:base64-decode instr))))
   (read (open-input-string (z3:decode-buffer (base64:base64-decode instr))))))

;;======================================================================
;; S T A T E S   A N D   S T A T U S E S
;;======================================================================

;; BBnote: *common:std-states* - dashboard filter control and test control state buttons defined here; used in set-fields-panel and dboard:make-controls
(define *common:std-states*   ;; for toggle buttons in dashboard
  '(
    (0 "ARCHIVED")
    (1 "STUCK")
    (2 "KILLREQ")
    (3 "KILLED")
    (4 "NOT_STARTED")
    (5 "COMPLETED")
    (6 "LAUNCHED")
    (7 "REMOTEHOSTSTART")
    (8 "RUNNING")
    ))

(define *common:dont-roll-up-states*
  '("DELETED"
    "REMOVING"
    "CLEANING"
    "ARCHIVE_REMOVING"
    ))

;; BBnote: *common:std-statuses* dashboard filter control and test control status buttons defined here; used in set-fields-panel and dboard:make-controls
;; note these statuses are sorted from better to worse.
;; This sort order is important to dcommon:status-compare3 and db:set-state-status-and-roll-up-items
(define *common:std-statuses*
  '(;; (0 "DELETED")  
    (1 "n/a")
    (2 "PASS")
    (3 "SKIP")
    (4 "WARN")
    (5 "WAIVED")
    (6 "CHECK")
    (7 "STUCK/DEAD")
    (8 "DEAD")
    (9 "FAIL")
    (10 "PREQ_FAIL")
    (11 "PREQ_DISCARDED")
    (12 "ABORT")))

(define (common:status>? s1 s2)
  (let* ((munged (map (lambda (x) `(,(cadr x) . ,(car x))) *common:std-statuses*))
	 (v1     (alist-ref s1 munged equal?))
	 (v2     (alist-ref s2 munged equal?)))
    (> v1 v2)))

(define *common:ended-states*       ;; states which indicate the test is stopped and will not proceed
  '("COMPLETED" "ARCHIVED" "KILLED" "KILLREQ" "STUCK" "INCOMPLETE" ))

(define *common:badly-ended-states* ;; these roll up as CHECK, i.e. results need to be checked
  '("KILLED" "KILLREQ" "STUCK" "INCOMPLETE" "DEAD"))

(define *common:well-ended-states* ;; an item's prereq in this state allows item to proceed
  '("PASS" "WARN" "CHECK" "WAIVED" "SKIP"))

;; BBnote: *common:running-states* used from db:set-state-status-and-roll-up-items
(define *common:running-states*     ;; test is either running or can be run
  '("RUNNING" "REMOTEHOSTSTART" "LAUNCHED" "STARTED"))

(define *common:cant-run-states*    ;; These are stopping conditions that prevent a test from being run
  '("COMPLETED" "KILLED" "UNKNOWN" "INCOMPLETE" "ARCHIVED"))

(define *common:not-started-ok-statuses* ;; if not one of these statuses when in not_started state treat as dead
  '("n/a" "na" "PASS" "FAIL" "WARN" "CHECK" "WAIVED" "DEAD" "SKIP"))

;; group tests into buckets corresponding to rollup
;;; Running, completed-pass,  completed-non-pass + worst status, not started.
;; filter out 
					;(define (common:categorize-items-for-rollup in-tests)
					;  (

(define (common:special-sort items order comp)
  (let ((items-order (map reverse order))
        (acomp       (or comp >)))
    (sort items
	  (lambda (a b)
	    (let ((a-num (cadr (or (assoc a items-order) '(0 0))))
		  (b-num (cadr (or (assoc b items-order) '(0 0)))))
	      (acomp a-num b-num))))))

;;======================================================================
;; ;; given a toplevel with currstate, currstatus apply state and status
;; ;;  => (newstate . newstatus)
;; (define (common:apply-state-status currstate currstatus state status)
;;   (let* ((cstate  (string->symbol (string-downcase currstate)))
;;          (cstatus (string->symbol (string-downcase currstatus)))
;;          (sstate  (string->symbol (string-downcase state)))
;;          (sstatus (string->symbol (string-downcase status)))
;;          (nstate  #f)
;;          (nstatus #f))
;;     (set! nstate
;;           (case cstate
;;             ((completed not_started killed killreq stuck archived) 
;;              (case sstate ;; completed -> sstate
;;                ((completed killed killreq stuck archived) completed)
;;                ((running remotehoststart launched)        running)
;;                (else                                      unknown-error-1)))
;;             ((running remotehoststart launched)
;;              (case sstate
;;                ((completed killed killreq stuck archived) #f) ;; need to look at all items
;;                ((running remotehoststart launched)        running)
;;                (else                                      unknown-error-2)))
;;             (else unknown-error-3)))
;;     (set! nstatus
;;           (case sstatus
;;             ((pass)
;;              (case nstate
;;                ((pass n/a deleted)     pass)
;;                ((warn)                 warn)
;;                ((fail)                 fail)
;;                ((check)               check)
;;                ((waived)             waived)
;;                ((skip)                 skip)
;;                ((stuck/dead)          stuck)
;;                ((abort)               abort)
;;                (else        unknown-error-4)))
;;             ((warn)
;;              (case nstate
;;                ((pass warn n/a skip deleted)   warn)
;;                ((fail)                         fail)
;;                ((check)                       check)
;;                ((waived)                     waived)
;;                ((stuck/dead)                  stuck)
;;                (else                unknown-error-5)))
;;             ((fail)
;;              (case nstate
;;                ((pass warn fail check n/a waived skip deleted stuck/dead stuck)  fail)
;;                ((abort)                                                         abort)
;;                (else                                                  unknown-error-6)))
;;             (else    unknown-error-7)))
;;     (cons 
;;      (if nstate  (symbol->string nstate)  nstate)
;;      (if nstatus (symbol->string nstatus) nstatus))))

;;======================================================================
;; D E B U G G I N G   S T U F F 
;;======================================================================

(define (common:debug-setup)
  (debug:setup (cond ;; debug arg
		((args:get-arg "-debug-noprop") 'noprop)
		((args:get-arg "-debug")         #t)
		(else #f))
	       (cond ;; verbosity arg
		((args:get-arg "-q") 'v)
		((args:get-arg "-q") 'q)
		(else #f))))

(define *verbosity*         1)
(define *logging*           #f)

(define (assoc/default key lst . default)
  (let ((res (assoc key lst)))
    (if res (cadr res)(if (null? default) #f (car default)))))

;;======================================================================
;; safe getting of toppath
(define (common:get-toppath areapath)
  (or *toppath*
      (if areapath
	  (begin
	    (set! *toppath* areapath)
	    (setenv "MT_RUN_AREA_HOME" areapath)
	    areapath)
	  #f)
      (if (getenv "MT_RUN_AREA_HOME")
	  (begin
	    (set! *toppath* (getenv "MT_RUN_AREA_HOME"))
	    *toppath*)
	  #f)
      ;; last resort, look for megatest.config
      (let loop ((thepath (realpath ".")))
	(if (file-exists? (conc thepath "/megatest.config"))
	    thepath
	    (if (equal? thepath "/")
		(begin
		  (debug:print-error 0 *default-log-port* "Unable to find megatest home directory.")
		  #f)
		(loop (pathname-directory thepath)))))
      ))

;;======================================================================
;; E X I T   H A N D L I N G
;;======================================================================

;;   (let ((ohh (common:on-homehost?))
;; 	(srv (args:get-arg "-server")))
;;     (and ohh srv)))
;; (debug:print-info 0 *default-log-port* "common:run-sync? ohh=" ohh ", srv=" srv)



(define *wdnum* 0)
(define *wdnum*mutex (make-mutex))


(define (common:human-time)
  (time->string (seconds->local-time (current-seconds)) "%Y-%m-%d %H:%M:%S"))

(define (std-signal-handler signum)
  ;; (signal-mask! signum)
  (set! *time-to-exit* #t) 
  ;;(debug:print-info 13 *default-log-port* "got signal "signum)
  (debug:print-error 0 *default-log-port* "Received signal " signum " aaa exiting promptly")
  ;; (std-exit-procedure) ;; shouldn't need this since we are exiting and it will be called anyway
  (exit))

(define (special-signal-handler signum)
  ;; (signal-mask! signum)
  (set! *time-to-exit* #t)
  ;;(debug:print-info 13 *default-log-port* "got signal "signum)
  (debug:print-error 0 *default-log-port* "Received signal " signum " sending email befor exiting!!")
  ;;TODO send email to notify admin contact listed in the config that the lisner got killed
  ;; (std-exit-procedure) ;; shouldn't need this since we are exiting and it will be called anyway
  (exit))


(set-signal-handler! signal/int  std-signal-handler)  ;; ^C
(set-signal-handler! signal/term std-signal-handler)

;; (set-signal-handler! signal/stop std-signal-handler)  ;; ^Z NO, do NOT handle ^Z!

;;======================================================================
;; M I S C   U T I L S
;;======================================================================

;;======================================================================
;; convert stuff to a number if possible
(define (any->number val)
  (cond 
   ((number? val) val)
   ((string? val) (string->number val))
   ((symbol? val) (any->number (symbol->string val)))
   (else #f)))

(define (any->number-if-possible val)
  (let ((num (any->number val)))
    (if num num val)))

(define (patt-list-match item patts)
  (debug:print-info 8 *default-log-port* "patt-list-match item=" item " patts=" patts)
  (if (and item patts)  ;; here we are filtering for matches with item patterns
      (let ((res #f))   ;; look through all the item-patts if defined, format is patt1,patt2,patt3 ... wildcard is %
	(for-each 
	 (lambda (patt)
	   (let ((modpatt (string-substitute "%" ".*" patt #t)))
	     (debug:print-info 10 *default-log-port* "patt " patt " modpatt " modpatt)
	     (if (string-match (regexp modpatt) item)
		 (set! res #t))))
	 (string-split patts ","))
	res)
      #t))

;;======================================================================
;; return first command that exists, else #f
;;
(define (common:which cmds)
  (if (null? cmds)
      #f
      (let loop ((hed (car cmds))
		 (tal (cdr cmds)))
	(let ((res (with-input-from-pipe (conc "which " hed) read-line)))
	  (if (and (string? res)
		   (common:file-exists? res))
	      res
	      (if (null? tal)
		  #f
		  (loop (car tal)(cdr tal))))))))

(define (common:get-install-area)
  (let ((exe-path (car (argv))))
    (if (common:file-exists? exe-path)
	(handle-exceptions
	 exn
	 #f
	 (pathname-directory
	  (pathname-directory 
	   (pathname-directory exe-path))))
	#f)))

;;======================================================================
;; return first path that can be created or already exists and is writable
;;
(define (common:get-create-writeable-dir dirs)
  (if (null? dirs)
      #f
      (let loop ((hed (car dirs))
		 (tal (cdr dirs)))
	(let ((res (or (and (directory? hed)
			    (file-write-access? hed)
			    hed)
		       (handle-exceptions
			exn
			(begin
			  (debug:print-info 0 *default-log-port* "could not create " hed
					    ", this might cause problems down the road. exn=" exn)
			  #f)
			(create-directory hed #t)))))
	  (if (and (string? res)
		   (directory? res))
	      res
	      (if (null? tal)
		  #f
		  (loop (car tal)(cdr tal))))))))

;;======================================================================
;; return the youngest timestamp . filename
;;
(define (common:get-youngest glob-list)
  (let ((all-files (apply append
			  (map (lambda (patt)
				 (handle-exceptions
				  exn
				  '()
				  (glob patt)))
			       glob-list))))
    (fold (lambda (fname res)
	    (let ((last-mod (car res))
		  (curmod   (handle-exceptions
			     exn
			     0
			     (file-modification-time fname))))
	      (if (> curmod last-mod)
		  (list curmod fname)
		  res)))
	  '(0 "n/a")
	  all-files)))

;;======================================================================
;; use bash to expand a glob. Does NOT handle paths with spaces!
;;
(define (common:bash-glob instr)
  (string-split
   (with-input-from-pipe
    (conc "/bin/bash -c \"echo " instr "\"")
    read-line)))

;;======================================================================
;; Some safety net stuff
;;======================================================================

;;======================================================================
;; return input if it is a list or return null
(define (common:list-or-null inlst #!key (ovrd #f)(message #f))
  (if (list? inlst)
      inlst
      (begin
	(if message (debug:print-error 0 *default-log-port* message))
	(or ovrd '()))))

(define (common:get-fields cfgdat)
  (let ((fields (hash-table-ref/default cfgdat "fields" '())))
    (map car fields)))

(define keys:config-get-fields common:get-fields)

;;======================================================================
;; looking only (at least for now) at the MT_ variables craft the full testname
;;
(define (common:get-full-test-name)
  (if (getenv "MT_TEST_NAME")
      (if (and (getenv "MT_ITEMPATH")
               (not (equal? (getenv "MT_ITEMPATH") "")))
          (getenv "MT_TEST_NAME")
          (conc (getenv "MT_TEST_NAME") "/" (getenv "MT_ITEMPATH")))
      #f))

;;======================================================================
;; M I S C   L I S T S
;;======================================================================

;;======================================================================
;; items in lista are matched value and position in listb
;; return the remaining items in listb or #f
;;
(define (common:list-is-sublist lista listb)
  (if (null? lista)
      listb ;; all items in listb are "remaining"
      (if (> (length lista)(length listb)) 
	  #f
	  (let loop ((heda (car lista))
		     (tala (cdr lista))
		     (hedb (car listb))
		     (talb (cdr listb)))
	    (if (equal? heda hedb)
		(if (null? tala) ;; we are done
		    talb
		    (loop (car tala)
			  (cdr tala)
			  (car talb)
			  
			  (cdr talb)))
		#f)))))

;;======================================================================
;; Needed for long lists to be sorted where (apply max ... ) dies
;;
(define (common:max inlst)
  (let loop ((max-val (car inlst))
	     (hed     (car inlst))
	     (tal     (cdr inlst)))
    (if (not (null? tal))
	(loop (max hed max-val)
	      (car tal)
	      (cdr tal))
	(max hed max-val))))

;; get min or max, use > for max and < for min, this works around the limits on apply
;;
(define (common:min-max comp lst)
  (if (null? lst)
      #f ;; better than an exception for my needs
      (fold (lambda (a b)
	      (if (comp a b) a b))
	    (car lst)
	    lst)))

;;======================================================================
;; get min or max, use > for max and < for min, this works around the limits on apply
;;
(define (common:sum lst)
  (if (null? lst)
      0
      (fold (lambda (a b)
	      (+ a b))
	    (car lst)
	    lst)))

;;======================================================================
;; path list to hash-table tree
;;   ((a b c)(a b d)(e b c)) => ((a (b (d) (c))) (e (b (c))))
;;
(define (common:list->htree lst)
  (let ((resh (make-hash-table)))
    (for-each
     (lambda (inlst)
       (let loop ((ht  resh)
		  (hed (car inlst))
		  (tal (cdr inlst)))
	 (if (hash-table-ref/default ht hed #f)
	     (if (not (null? tal))
		 (loop (hash-table-ref ht hed)
		       (car tal)
		       (cdr tal)))
	     (begin
	       (hash-table-set! ht hed (make-hash-table))
	       (loop ht hed tal)))))
     lst)
    resh))

;;======================================================================
;; hash-table tree to alist tree
;;
(define (common:htree->atree ht)
  (map (lambda (x)
	 (cons (car x)
	       (let ((y (cdr x)))
		 (if (hash-table? y)
		     (common:htree->atree y)
		     y))))
       (hash-table->alist ht)))

;;======================================================================
;; M U N G E   D A T A   I N T O   N I C E   F O R M S
;;======================================================================

;;======================================================================
;; Generate an index for a sparse list of key values
;;   ( (rowname1 colname1 val1)(rowname2 colname2 val2) )
;;
;; => 
;;
;;   ( (rowname1 0)(rowname2 1))    ;; rownames -> num
;;     (colname1 0)(colname2 1)) )  ;; colnames -> num
;; 
;; optional apply proc to rownum colnum value
(define (common:sparse-list-generate-index data #!key (proc #f))
  (if (null? data)
      (list '() '())
      (let loop ((hed (car data))
		 (tal (cdr data))
		 (rownames '())
		 (colnames '())
		 (rownum   0)
		 (colnum   0))
	(let* ((rowkey          (car   hed))
	       (colkey          (cadr  hed))
	       (value           (caddr hed))
	       (existing-rowdat (assoc rowkey rownames))
	       (existing-coldat (assoc colkey colnames))
	       (curr-rownum     (if existing-rowdat rownum (+ rownum 1)))
	       (curr-colnum     (if existing-coldat colnum (+ colnum 1)))
	       (new-rownames    (if existing-rowdat rownames (cons (list rowkey curr-rownum) rownames)))
	       (new-colnames    (if existing-coldat colnames (cons (list colkey curr-colnum) colnames))))
	  ;; (debug:print-info 0 *default-log-port* "Processing record: " hed )
	  (if proc (proc curr-rownum curr-colnum rowkey colkey value))
	  (if (null? tal)
	      (list new-rownames new-colnames)
	      (loop (car tal)
		    (cdr tal)
		    new-rownames
		    new-colnames
		    (if (> curr-rownum rownum) curr-rownum rownum)
		    (if (> curr-colnum colnum) curr-colnum colnum)
		    ))))))

;;======================================================================
;; if it looks like a number -> convert it to a number, else return it
;;
(define (common:lazy-convert inval)
  (let* ((as-num (if (string? inval)(string->number inval) #f)))
    (or as-num inval)))

;;======================================================================
;; convert string a=1; b=2; c=a silly thing; d=
;; to '((a . 1)(b . 2)(c . "a silly thing")(d . ""))
;;
(define (common:val->alist val #!key (convert #f))
  (let ((val-list (string-split-fields ";\\s*" val #:infix)))
    (if val-list
	(map (lambda (x)
	       (let ((f (string-split-fields "\\s*=\\s*" x #:infix)))
		 (case (length f)
		   ((0) `(,#f))  ;; null string case
		   ((1) `(,(string->symbol (car f))))
		   ((2) `(,(string->symbol (car f)) . ,(let ((inval (cadr f)))
							 (if convert (common:lazy-convert inval) inval))))
		   (else f))))
	     val-list)
	'())))

;;======================================================================
;; S Y S T E M   S T U F F
;;======================================================================

;;======================================================================
;; returns *effective load* (not normalized)
;;
(define (common:get-intercept onemin fivemin)
  (if (< onemin fivemin) ;; load is decreasing, just use the onemin load
      onemin
      (let* ((load-change (- onemin fivemin))
	     (tchange     (- 300 60)))
	(max (+ onemin (* 60 (/ load-change tchange))) 0))))

(define (get-cpu-load #!key (remote-host #f))
  (car (common:get-cpu-load remote-host)))

;;======================================================================
;;   (let* ((load-res (process:cmd-run->list "uptime"))
;; 	 (load-rx  (regexp "load average:\\s+(\\d+)"))
;; 	 (cpu-load #f))
;;     (for-each (lambda (l)
;; 		(let ((match (string-search load-rx l)))
;; 		  (if match
;; 		      (let ((newval (string->number (cadr match))))
;; 			(if (number? newval)
;; 			    (set! cpu-load newval))))))
;; 	      (car load-res))
;;     cpu-load))

;;======================================================================
;; get values from cached info from dropping file in logs dir
;;  e.g. key is host and dtype is normalized-load
;;
(define (common:get-cached-info key dtype #!key (age 10))
  (if *toppath*
      (let* ((fullpath (conc *toppath* "/.sysdata/" key "-" dtype ".log"))
	     (delfile  (lambda (exn)
			 (debug:print-info 1 *default-log-port* " removing bad file " fullpath ", exn=" exn)
			 (delete-file* fullpath)
			 #f)))
	(if (and (file-exists? fullpath)
		 (file-read-access? fullpath))
	    (handle-exceptions
	     exn
	     (begin
	       (debug:print 0 *default-log-port* "failed to get cached info from " fullpath ", exn=" exn)
	       #f)
	     (debug:print 2 *default-log-port* "reading file " fullpath)
	     (let ((real-age (- (current-seconds)
				(handle-exceptions
				 exn
				 (begin
				   (debug:print 1 *default-log-port* "Failed to read mod time on file "
						fullpath ", using 0, exn=" exn)
				   0)
				 (file-change-time fullpath)))))
	       (if (< real-age age)
		   (handle-exceptions
		    exn
		    (delfile exn)
		    (let* ((res (with-input-from-file fullpath read)))
		      (if (eof-object? res)
			  (begin
			    (delfile "n/a")
			    #f)
			  res)))
		   (begin
		     (debug:print-info 2 *default-log-port* "file " fullpath
				       " is too old (" real-age" seconds) to trust, skipping reading it")
		     #f))))
	    (begin
	      (debug:print 2 *default-log-port* "not reading file " fullpath)
	      #f)))
      #f))

(define (common:write-cached-info key dtype dat)
  (if *toppath*
      (let* ((fulldir  (conc *toppath* "/.sysdata"))
	     (fullpath (conc fulldir "/" key "-" dtype ".log")))
	(if (not (file-exists? fulldir))(create-directory fulldir #t))
	(handle-exceptions
	 exn
	 (begin
	   (debug:print 0 *default-log-port* "failed to write file " fullpath ", exn=" exn)
	   #f)
	 (with-output-to-file fullpath (lambda ()(pp dat)))))
      #f))

(define (common:raw-get-remote-host-load remote-host)
  (handle-exceptions
   exn
   (begin
     (debug:print 0 *default-log-port* "failed to ssh to " remote-host " and get loadavg. exn=" exn)
     #f) ;; more specific handling of errors needed
   (with-input-from-pipe 
    (conc "ssh " remote-host " cat /proc/loadavg")
    (lambda ()(list (read)(read)(read))))))

;;======================================================================
;; get cpu load by reading from /proc/loadavg, return all three values
;;
(define (common:get-cpu-load remote-host)
  ;; (handle-exceptions
  ;;     exn
  ;;   (begin
  ;;     (debug:print 0 *default-log-port* "failed to ssh or read loadavg from host " remote-host ", exn=" exn)
  ;;     '(-99 -99 -99))
  (let* ((actual-hostname (or remote-host (get-host-name) "localhost")))
    (or (common:get-cached-info actual-hostname "cpu-load")
	(let ((result (if remote-host
			  (map (lambda (res)
				 (if (eof-object? res) 9e99 res))
			       (with-input-from-pipe 
				(conc "ssh " remote-host " cat /proc/loadavg")
				(lambda ()(list (read)(read)(read)))))
			  (with-input-from-file "/proc/loadavg" 
			    (lambda ()(list (read)(read)(read)))))))
	  (match
	   result
	   ((l1 l2 l3)
	    (if (and (number? l1)
		     (number? l2)
		     (number? l3))
		(begin
		  (common:write-cached-info actual-hostname "cpu-load" result)
		  result)
		'(-1 -1 -1))) ;; -1 is bad result
	   (else '(-2 -2 -2))))))) ;; )

;;======================================================================
;; get normalized cpu load by reading from /proc/loadavg and /proc/cpuinfo return all three values and the number of real cpus and the number of threads
;; returns alist '((adj-cpu-load . normalized-proc-load) ... etc.
;;  keys: adj-proc-load, adj-core-load, 1m-load, 5m-load, 15m-load
;;
(define (common:get-normalized-cpu-load remote-host)
  (let ((res (common:get-normalized-cpu-load-raw remote-host))
	(default `((adj-proc-load . 2) ;; there is no right answer
		   (adj-core-load . 2)
		   (1m-load       . 2)
		   (5m-load       . 0) ;; causes a large delta - thus causing default of throttling if stuff goes wrong
		   (15m-load      . 0)
		   (proc          . 1)
		   (core          . 1)
		   (phys          . 1)
		   (error         . #t))))
    (cond
     ((and (list? res)
	   (> (length res) 2))
      res)
     ((eq? res #f)   default) ;; add messages?
     ((eq? res #f) default)   ;; this would be the #eof
     (else default))))

(define (common:get-normalized-cpu-load-raw remote-host)
  (let* ((actual-host (or remote-host (get-host-name)))) ;; #f is localhost
    (or (common:get-cached-info actual-host "normalized-load")
	(let ((data (if remote-host
			(with-input-from-pipe 
			 (conc "ssh " remote-host " \"cat /proc/loadavg;cat /proc/cpuinfo;echo end\"")
			 read-lines)
			(append 
			 (with-input-from-file "/proc/loadavg" 
			   read-lines)
			 (with-input-from-file "/proc/cpuinfo"
			   read-lines)
			 (list "end"))))
	      (load-rx  (regexp "^([\\d\\.]+)\\s+([\\d\\.]+)\\s+([\\d\\.]+)\\s+.*$"))
	      (proc-rx  (regexp "^processor\\s+:\\s+(\\d+)\\s*$"))
	      (core-rx  (regexp "^core id\\s+:\\s+(\\d+)\\s*$"))
	      (phys-rx  (regexp "^physical id\\s+:\\s+(\\d+)\\s*$"))
	      (max-num  (lambda (p n)(max (string->number p) n))))
	  ;; (print "data=" data)
	  (if (null? data) ;; something went wrong
	      #f
	      (let loop ((hed      (car data))
			 (tal      (cdr data))
			 (loads    #f)
			 (proc-num 0)  ;; processor includes threads
			 (phys-num 0)  ;; physical chip on motherboard
			 (core-num 0)) ;; core
		;;; (print hed ", " loads ", " proc-num ", " phys-num ", " core-num)
		(if (null? tal) ;; have all our data, calculate normalized load and return result
		    (let* ((act-proc (+ proc-num 1))
			   (act-phys (+ phys-num 1))
			   (act-core (+ core-num 1))
			   (adj-proc-load (/ (car loads) act-proc))
			   (adj-core-load (/ (car loads) act-core))
			   (result
			    (append (list (cons 'adj-proc-load adj-proc-load)
					  (cons 'adj-core-load adj-core-load))
				    (list (cons '1m-load (car loads))
					  (cons '5m-load (cadr loads))
					  (cons '15m-load (caddr loads)))
				    (list (cons 'proc act-proc)
					  (cons 'core act-core)
					  (cons 'phys act-phys)))))
		      (common:write-cached-info actual-host "normalized-load" result)
		      result)
		    (regex-case
		     hed
		     (load-rx  ( x l1 l5 l15 ) (loop (car tal)(cdr tal)(map string->number (list l1 l5 l15)) proc-num phys-num core-num))
		     (proc-rx  ( x p         ) (loop (car tal)(cdr tal) loads           (max-num p proc-num) phys-num core-num))
		     (phys-rx  ( x p         ) (loop (car tal)(cdr tal) loads           proc-num (max-num p phys-num) core-num))
		     (core-rx  ( x c         ) (loop (car tal)(cdr tal) loads           proc-num phys-num (max-num c core-num)))
		     (else 
		      (begin
			;; (print "NO MATCH: " hed)
			(loop (car tal)(cdr tal) loads proc-num phys-num core-num)))))))))))

(define (common:unix-ping hostname)
  (let ((res (system (conc "ping -c 1 " hostname " > /dev/null"))))
    (eq? res 0)))

(define (common:get-num-cpus remote-host)
  (let* ((actual-host (or remote-host (get-host-name))))
    ;; hosts had better not be changing the number of cpus too often!
    (or (hash-table-ref/default *numcpus-cache* actual-host #f)
	(let* ((numcpus (or (common:get-cached-info actual-host "num-cpus" age: (+ 2592000 (random 3600)))
			    (let* ((proc   (lambda ()
					     (let loop ((numcpu 0)
							(inl    (read-line)))
					       (if (eof-object? inl)
						   (if (> numcpu 0)
						       numcpu
						       #f) ;; if zero return #f so caller knows that things are not working
						   (loop (if (string-match "^processor\\s+:\\s+\\d+$" inl)
							     (+ numcpu 1)
							     numcpu)
							 (read-line))))))
				   (result (if remote-host
					       (with-input-from-pipe 
						(conc "ssh " remote-host " cat /proc/cpuinfo")
						proc)
					       (with-input-from-file "/proc/cpuinfo" proc))))
			      (if (and (number? result)
				       (> result 0))
				  (common:write-cached-info actual-host "num-cpus" result))
			      result))))
	  (hash-table-set! *numcpus-cache* actual-host numcpus)
	  numcpus))))

;;======================================================================
;; DO NOT CALL THIS DIRECTLY. It is called from common:wait-for-normalized-load
;;
;; (define (common:wait-for-cpuload maxload-in numcpus-in waitdelay #!key (count 1000) (msg #f)(remote-host #f)(force-maxload #f)(num-tries 5))
;;   (let* ((loadavg (common:get-cpu-load remote-host))
;; 	 (numcpus (if (<= 1 numcpus-in) ;; not possible to have zero.  If we get 1, it's possible that we got the previous default, and we should check again
;; 		      (common:get-num-cpus remote-host)
;; 		      numcpus-in))
;; 	 (maxload (if force-maxload
;; 		      maxload-in
;; 		      (if (number? maxload-in)
;; 			  (max maxload-in 0.5)
;; 			  0.5))) ;; so maxload must be greater than 0.5 for now BUG - FIXME?
;; 	 (first   (car loadavg))
;; 	 (next    (cadr loadavg))
;; 	 (adjmaxload (* maxload (max 1 numcpus))) ;; possible bug where
;; 					       ;; numcpus (or could be
;; 					       ;; maxload) is zero,
;; 					       ;; crude fallback is to
;; 					       ;; at least use 1
;; 	 (loadjmp (- first (if (> next (* numcpus 0.7)) ;; could do something with average of first and next?
;; 			       0
;; 			       next))) ;; we will force a conservative calculation any time next is large.
;; 	 (first-next-avg    (/ (+ first next) 2))
;; 	 ;; add some randomness to the time to break any alignment
;; 	 ;; where netbatch dumps many jobs to machines simultaneously
;;          (adjwait           (min (+ 300 (random 10)) (abs (* (+ (random 10)
;; 								(/ (- 1000 count) 10)
;; 								waitdelay)
;; 							     (- first adjmaxload) ))))
;; 	 (load-jump-limit   (configf:lookup-number *configdat* "setup" "load-jump-limit"))
;; 	 ;; effective load accounts for load jumps, this should elminate all the first-next-avg, adjwait, load-jump-limit
;; 	 ;; etc.
;; 	 (effective-load    (common:get-intercept first next))
;; 	 (effective-host    (or remote-host "localhost"))
;; 	 (normalized-effective-load (/ effective-load numcpus))
;; 	 (will-wait                 (> normalized-effective-load maxload)))
;; 	 
;;     ;; let's let the user know once in a long while that load checking
;;     ;; is happening but not constantly report it
;;     #;(if (common:low-noise-print 30 (conc "cpuload" (or remote-host "localhost"))) ;; (> (random 100) 75) ;; about 25% of the time
;; 	(debug:print-info 1 *default-log-port* "Checking cpuload on " (or remote-host "localhost") ", maxload: " maxload
;;     ", load: " first ", adjmaxload: " adjmaxload ", loadjmp: " loadjmp))
;; 
;;     (debug:print-info 1 *default-log-port*
;; 		      "On host: " effective-host
;; 		      ", effective load: " effective-load
;; 		      ", numcpus: " numcpus
;; 		      ", normalized effective load: " normalized-effective-load
;; 		      )
;;     
;;     (cond
;;      ;; bad data, try again to get the data
;;      ((and (< first 0) ;; this indicates the loadavg data is bad - machine may not be reachable
;; 	   (> num-tries 0))
;;       (debug:print 0 *default-log-port* "WARNING: received bad data from get-cpu-load " first ", we'll sleep 10s and try " num-tries " more times.")
;;       (thread-sleep! 10)
;;       (common:wait-for-cpuload maxload-in numcpus-in waitdelay
;; 			       count: count remote-host: remote-host force-maxload: force-maxload num-tries: (- num-tries 1)))
;;      ;; need to wait for load to drop
;;      ((and will-wait ;; (> first adjmaxload)
;; 	   (> count 0))
;;       (debug:print-info 0 *default-log-port*
;; 			"Delaying " 15 ;; adjwait
;; 			" seconds due to normalized effective load " normalized-effective-load ;; first
;; 			" exceeding max of " adjmaxload
;; 			" on server " (or remote-host (get-host-name))
;; 			" (normalized load-limit: " maxload ") " (if msg msg ""))
;;       (thread-sleep! 15) ;; adjwait)
;;       (common:wait-for-cpuload maxload numcpus waitdelay count: (- count 1) msg: msg remote-host: remote-host))
;;      ((and (> loadjmp (cond
;; 		       (load-jump-limit load-jump-limit)
;; 		       ((> numcpus 8)(/ numcpus 2))
;; 		       ((> numcpus 4)(/ numcpus 1.2))
;; 		       (else 0.5)))
;; 	   (> count 0))
;;       (debug:print-info 0 *default-log-port* "waiting " adjwait " seconds due to possible load jump " loadjmp ". "
;; 			(if msg msg ""))
;;       (thread-sleep! adjwait)
;;       (common:wait-for-cpuload maxload numcpus waitdelay count: (- count 1) msg: msg remote-host: remote-host))
;;      (else
;;       (if (> num-tries 0)
;; 	  (if (common:low-noise-print 30 (conc (round first) "-load-acceptable-" (or remote-host "localhost")))
;; 	      (debug:print 0 *default-log-port* "Load on " (or remote-host "localhost") " is acceptable at " first " continuing."))
;; 	  (debug:print 0 *default-log-port* "Load on " (or remote-host "localhost") ", "first" could not be retrieved. Giving up and continuing."))))))
;; 
(define (get-uname . params)
  (let* ((uname-res (process:cmd-run->list (conc "uname " (if (null? params) "-a" (car params)))))
	 (uname #f))
    (if (null? (car uname-res))
	"unknown"
	(caar uname-res))))

;;======================================================================
;; for reasons I don't understand multiple calls to real-path in parallel threads
;; must be protected by mutexes
;;
(define (common:real-path inpath)
  ;; (process:cmd-run-with-stderr->list "readlink" "-f" inpath)) ;; cmd . params)
  ;; (let-values 
  ;;  (((inp oup pid) (process "readlink" (list "-f" inpath))))
  ;;  (with-input-from-port inp
  ;;    (let loop ((inl (read-line))
  ;;       	(res #f))
  ;;      (print "inl=" inl)
  ;;      (if (eof-object? inl)
  ;;          (begin
  ;;            (close-input-port inp)
  ;;            (close-output-port oup)
  ;;            ;; (process-wait pid)
  ;;            res)
  ;;          (loop (read-line) inl))))))
  (with-input-from-pipe (conc "readlink -f " inpath) read-line))

;;======================================================================
;; D I S K   S P A C E 
;;======================================================================

(define (common:get-disk-space-used fpath)
  (with-input-from-pipe (conc "/usr/bin/du -s " fpath) read))

(define (get-unix-df path)
  (let* ((df-results (process:cmd-run->list (conc "df " path)))
	 (space-rx   (regexp "([0-9]+)\\s+([0-9]+)%"))
	 (freespc    #f))
    ;; (write df-results)
    (for-each (lambda (l)
		(let ((match (string-search space-rx l)))
		  (if match 
		      (let ((newval (string->number (cadr match))))
			(if (number? newval)
			    (set! freespc newval))))))
	      (car df-results))
    freespc))

(define (get-unix-inodes path)
  (let* ((df-results (process:cmd-run->list (conc "df -i " path)))
	 (space-rx   (regexp "([0-9]+)\\s+([0-9]+)%"))
	 (freenodes    0)) ;; 0 is a better failsafe than #f here.
    ;; (write df-results)
    (for-each (lambda (l)
		(let ((match (string-search space-rx l)))
		  (if match 
		      (let ((newval (string->number (cadr match))))
			(if (number? newval)
			    (set! freenodes newval))))))
	      (car df-results))
    freenodes))

;;======================================================================
;; convert a spec string to a list of vectors #( rx  action rx-string )
(define (common:spec-string->list-of-specs spec-string actions)
  (let ((spec-strings  (string-split-fields "\\s*;\\s*" spec-string #:infix))
	(actions-regex (regexp (conc "^(.*)\\s+(" (string-intersperse (map conc actions) "|") ")"))))
    (filter
     (lambda (x) x)
     (map (lambda (s)
	    (let ((m (string-match actions-regex s)))
	      (if m
		  (vector (regexp (cadr m))(string->symbol (caddr m))(cadr m))
		  (begin
		    (debug:print 0 *default-log-port* "WARNING: Unrecognised rule \"" s "\" in clean-up specification.")
		    #f))))
	  spec-strings))))

;;======================================================================
;; given a list of specs rx . rule and a file return the first matching rule
;; 
(define (common:file-find-rule fname rules) ;; rule is vector #( rx action rx-string)
  (let loop ((rule   (car rules))
	     (tail   (cdr rules)))
    (let ((rx (vector-ref rule 0))
	  (rn (vector-ref rule 1))) ;; rule name
      (if (string-match rx fname)
	  rule ;; return the whole rule so regex can be printed etc.
	  (if (null? tail)
	      #f
	      (loop (car tail)(cdr tail)))))))

;;======================================================================
;; given a spec apply some rules to a directory
;;
;;  WARNING: This function will REMOVE files - be sure your spec and path is correct!
;;
;; spec format:
;;   file-regex1 action; file-regex2 action; ...
;; e.g.
;;   .*\.log$ keep; .* remove
;;   --> keep all .log files, remove everything else
;; limitations:
;;   cannot have a rule with ; as part of the spec
;;   not very flexible, would be nice to return binned file names?
;; supported rules:
;;   keep     - keep this file
;;   remove   - remove this file
;;   compress - compress this file
;;
(define (common:dir-clean-up path spec-string #!key (compress "gzip")(actions '(keep remove compress))(remove-empty #f))
  (let* ((specs         (common:spec-string->list-of-specs spec-string actions))
	 (keepers       (make-hash-table))
	 (directories   (make-hash-table)))
    (find-files 
     path
     action: (lambda (p res)
	       (let ((rule  (common:file-find-rule p specs)))
		 (cond
		  ((directory? p)(hash-table-set! directories p #t))
		  (else
		   (case (vector-ref rule 1)
		     ((keep)(hash-table-set! keepers p rule))
		     ((remove)
		      (print "Removing file " p)
		      (delete-file p))
		     ((compress)
		      (print "Compressing file " p)
		      (system (conc compress " " p)))
		     (else
		      (print "No match for file " p))))))))
    (if remove-empty
	(for-each
	 (lambda (d)
	   (if (null? (glob (conc d "/.*")(conc d "/*")))
	       (begin
		 (print "Removing empty directory " d)
		 (delete-directory d))))
	 (sort (hash-table-keys directories) (lambda (a b)(> (string-length a)(string-length b))))))
    ))

;;======================================================================
;; E N V I R O N M E N T   V A R S
;;======================================================================
(define (bb-check-path #!key (msg "check-path: "))
  (let ((path (or (get-environment-variable "PATH") "none")))
    (debug:print-info 0 *default-log-port* (conc msg" : $PATH="path))
    (if (string-match "^.*/isoenv-core/.*" path)
        (debug:print-error 0 *default-log-port* (conc msg" : !!ISOENV PRESENT!!")) ;; remove for prod
        (debug:print-info 1 *default-log-port* (conc msg" : **no isoenv present**")))))


(define (save-environment-as-files fname #!key (ignorevars (list "USER" "HOME" "DISPLAY" "LS_COLORS" "XKEYSYMDB" "EDITOR" "MAKEFLAGS" "MAKEF" "MAKEOVERRIDES")))
  ;;(bb-check-path msg: "save-environment-as-files entry")
  (let ((envvars (get-environment-variables))
        (whitesp (regexp "[^a-zA-Z0-9_\\-:,\\.\\/%$]"))
	(mungeval (lambda (val)
		    (cond
		     ((eq? val #t) "") ;; convert #t to empty string
		     ((eq? val #f) #f) ;; convert #f to itself (still thinking about this one
		     (else val)))))
    (with-output-to-file (conc fname ".csh")
      (lambda ()
	(for-each (lambda (keyval)
		    (let* ((key   (car keyval))
			   (val   (cdr keyval))
			   (delim (if (string-search whitesp val) 
				      "\""
				      "")))
		      (print (if (or (member key ignorevars)
				     (string-search whitesp key))
				 "# setenv "
				 "setenv ")
			     key " " delim (mungeval val) delim)))
		  envvars)))
    (with-output-to-file (conc fname ".sh")
      (lambda ()
	(for-each (lambda (keyval)
		    (let* ((key (car keyval))
			   (val (cdr keyval))
			   (delim (if (string-search whitesp val) 
				      "\""
				      "")))
		      (print (if (or (member key ignorevars)
				     (string-search whitesp key)
				     (string-search ":" key)) ;; internal only values to be skipped.
				 "# export "
				 "export ")
			     key "=" delim (mungeval val) delim)))
		  envvars)))))

;;======================================================================
;;
(define (common:propogate-mt-vars-to-subrun proc propogate-vars)
  (let ((vars      (make-hash-table))
        (var-patt  "^MT_.*"))
    (for-each
     (lambda (vardat) ;; each env var
					;(for-each
					;(lambda (var-patt)
       (if (string-match var-patt (car vardat))
	   (let ((var (car vardat))
		 (val (cdr vardat)))
	     (hash-table-set! vars var val)
	     (if (member var propogate-vars)
		 (begin
		   (print var "  " (string-substitute "MT_" "PARENT_" var))
		   (setenv (string-substitute "MT_" "PARENT_"  var) val)))
	     (unsetenv var))))
					;       var-patts))
     (get-environment-variables))
    (cond
     ((string? proc)(system proc))
     (proc          (proc)))
    (hash-table-for-each
     vars
     (lambda (var val)
       (if (member var propogate-vars)
	   (unsetenv (string-substitute "MT_" "PARENT_" var)))  
       (setenv var val)))
    vars))


(define (common:get-param-mapping #!key (flavor #f))
  "returns alist mapping string keys in testconfig/subrun to megatest command line switches; if flavor is switch-symbol, maps tcmt symbolic switches to megatest switches"
  (let ((default '(("tag-expr"  . "-tagexpr")
                   ("mode-patt" . "-modepatt")
                   ("run-name"  . "-runname")
                   ("contour"   . "-contour")
                   ("target"    . "-target")
                   ("test-patt" . "-testpatt")
		   ("rerun"     . "-rerun")
		   ("setvars"   . "-setvars")
                   ("msg"       . "-m")
                   ("log"       . "-log")
                   ("start-dir" . "-start-dir")
                   ("new"       . "-set-state-status"))))
    (if (eq? flavor 'switch-symbol)
        (map (lambda (x)
               (cons (string->symbol (conc "-" (car x))) (cdr x)))
             default)
        default)))

;;======================================================================
;; set some env vars from an alist, return an alist with original values
;; (("VAR" "value") ...)
;; a value of #f means "unset this var"
;;
(define (alist->env-vars lst)
  (if (list? lst)
      (let ((res '()))
	(for-each (lambda (p)
		    (let* ((var (car  p))
			   (val (cadr p))
			   (prv (get-environment-variable var)))
		      (set! res (cons (list var prv) res))
		      (if val 
			  (safe-setenv var (->string val))
			  (unsetenv var))))
		  lst)
	res)
      '()))

;;======================================================================
;; clear vars matching pattern, run proc, set vars back
;; if proc is a string run that string as a command with
;; system.
;;
(define *common:orig-env*
  (let ((envvars (get-environment-variables)))
    (if (get-environment-variable "MT_ORIG_ENV")
        (with-input-from-string
            (z3:decode-buffer (base64:base64-decode (get-environment-variable "MT_ORIG_ENV")))
          read)
        (filter-map (lambda (x)
                      (if (string-match "^MT_.*" (car x))
                          #f
                          x))
                    envvars))))

(define (common:with-orig-env proc)
  (let  ((current-env (get-environment-variables)))
    (for-each (lambda (x) (unsetenv (car x)))             current-env)
    (for-each (lambda (x) (setenv (car x) (cdr x))) *common:orig-env*)
    (let ((rv (cond
               ((string? proc)(system proc))
               (proc          (proc)))))
      (for-each (lambda (x) (unsetenv (car x))) *common:orig-env*)
      (for-each (lambda (x) (setenv (car x) (cdr x))) current-env)
      rv)))

(define (common:without-vars proc . var-patts)
  (let ((vars (make-hash-table)))
    (for-each
     (lambda (vardat) ;; each env var
       (for-each
	(lambda (var-patt)
	  (if (string-match var-patt (car vardat))
	      (let ((var (car vardat))
		    (val (cdr vardat)))
		(hash-table-set! vars var val)
		(unsetenv var))))
	var-patts))
     (get-environment-variables))
    (cond
     ((string? proc)(system proc))
     (proc          (proc)))
    (hash-table-for-each
     vars
     (lambda (var val)
       (setenv var val)))
    vars))

;;======================================================================
;; C O L O R S
;;======================================================================

(define (common:name->iup-color name)
  (case (string->symbol (string-downcase name))
    ((red)    "223 33 49")
    ((grey)   "192 192 192")
    ((orange) "255 172 13")
    ((purple) "This is unfinished ...")))

;;======================================================================
;; (define (common:get-color-for-state-status state status)
;;   (case (string->symbol state)
;;     ((COMPLETED)
;;      (case (string->symbol status)
;;        ((PASS)        "70  249 73")
;;        ((WARN WAIVED) "255 172 13")
;;        ((SKIP)        "230 230 0")
;;        (else "223 33 49")))
;;     ((LAUNCHED)         "101 123 142")
;;     ((CHECK)            "255 100 50")
;;     ((REMOTEHOSTSTART)  "50  130 195")
;;     ((RUNNING)          "9   131 232")
;;     ((KILLREQ)          "39  82  206")
;;     ((KILLED)           "234 101 17")
;;     ((NOT_STARTED)      "240 240 240")
;;     (else               "192 192 192")))

(define (common:iup-color->rgb-hex instr)
  (string-intersperse 
   (map (lambda (x)
          (number->string x 16))
        (map string->number
             (string-split instr)))
   "/"))


;;======================================================================
;;
;;======================================================================

(define (common:get-color-from-status status)
  (cond
   ((equal? status "PASS")    "green")
   ((equal? status "FAIL")    "red")
   ((equal? status "WARN")    "orange")
   ((equal? status "KILLED")  "orange")
   ((equal? status "KILLREQ") "purple")
   ((equal? status "RUNNING") "blue")
   ((equal? status "ABORT")   "brown")
   (else "black")))

(define (tests:cache-regexp str-in flag)
  (let* ((key (conc str-in flag)))
    (or (hash-table-ref/default *glob-like-match-cache* key #f)
	(let* ((newrx (regexp str-in flag)))
	  (hash-table-set! *glob-like-match-cache* key newrx)
	  newrx))))

;;======================================================================
;; tests:glob-like-match 
(define (tests:glob-like-match patt str) 
  (let* ((like     (substring-index "%" patt))
	 (notpatt  (equal? (substring-index "~" patt) 0))
	 (newpatt  (if notpatt (substring patt 1) patt))
	 (finpatt  (if like
		       (string-substitute (regexp "%") ".*" newpatt #f)
		       (string-substitute (regexp "\\*") ".*" newpatt #f)))
	 (rx       (tests:cache-regexp finpatt (if like #t #f)))
	 (res      (string-match rx str)))
    (if notpatt (not res) res)))

;;======================================================================
;; if itempath is #f then look only at the testname part
;;
(define (tests:match patterns testname itempath #!key (required '()))
  (if (string? patterns)
      (let ((patts (append (string-split patterns ",") required)))
	(if (null? patts) ;;; no pattern(s) means no match
	    #f
	    (let loop ((patt (car patts))
		       (tal  (cdr patts)))
	      ;; (print "loop: patt: " patt ", tal " tal)
	      (if (string=? patt "")
		  #f ;; nothing ever matches empty string - policy
		  (let* ((patt-parts (string-match (regexp "^([^\\/]*)(\\/(.*)|)$") patt))
			 (test-patt  (cadr patt-parts))
			 (item-patt  (cadddr patt-parts)))
		    ;; special case: test vs. test/
		    ;;   test  => "test" "%"
		    ;;   test/ => "test" ""
		    (if (and (not (substring-index "/" patt)) ;; no slash in the original
			     (or (not item-patt)
				 (equal? item-patt "")))      ;; should always be true that item-patt is ""
			(set! item-patt "%"))
		    ;; (print "tests:match => patt-parts: " patt-parts ", test-patt: " test-patt ", item-patt: " item-patt)
		    (if (and (tests:glob-like-match test-patt testname)
			     (or (not itempath)
				 (tests:glob-like-match (if item-patt item-patt "") itempath)))
			#t
			(if (null? tal)
			    #f
			    (loop (car tal)(cdr tal)))))))))))

;;======================================================================
;; make a query (fieldname like 'patt1' OR fieldname 
(define (db:patt->like fieldname pattstr #!key (comparator " OR "))
  (let ((patts (if (string? pattstr)
		   (string-split pattstr ",")
		   '("%"))))
    (string-intersperse (map (lambda (patt)
			       (let ((wildtype (if (substring-index "%" patt) "LIKE" "GLOB")))
				 (conc fieldname " " wildtype " '" patt "'")))
			     (if (null? patts)
				 '("")
				 patts))
			comparator)))

;;======================================================================
;; if itempath is #f then look only at the testname part
;;
(define (tests:match->sqlqry patterns)
  (if (string? patterns)
      (let ((patts (string-split patterns ",")))
	(if (null? patts) ;;; no pattern(s) means no match, we will do no query
	    #f
	    (let loop ((patt (car patts))
		       (tal  (cdr patts))
		       (res  '()))
	      ;; (print "loop: patt: " patt ", tal " tal)
	      (let* ((patt-parts (string-match (regexp "^([^\\/]*)(\\/(.*)|)$") patt))
		     (test-patt  (cadr patt-parts))
		     (item-patt  (cadddr patt-parts))
		     (test-qry   (db:patt->like "testname" test-patt))
		     (item-qry   (db:patt->like "item_path" item-patt))
		     (qry        (conc "(" test-qry " AND " item-qry ")")))
		;; (print "tests:match => patt-parts: " patt-parts ", test-patt: " test-patt ", item-patt: " item-patt)
		(if (null? tal)
		    (string-intersperse (append (reverse res)(list qry)) " OR ")
		    (loop (car tal)(cdr tal)(cons qry res)))))))
      #f))

;;======================================================================
;; A routine to map itempaths using a itemmap
;; patha and pathb must be strings or this will fail
;;
;; path-b is waiting on path-a
;;
(define (db:compare-itempaths test-b-name path-a path-b itemmaps )
  (debug:print-info 6 *default-log-port* "ITEMMAPS: " itemmaps)
  (let* ((itemmap    (tests:lookup-itemmap itemmaps test-b-name)))
    (if itemmap
	(let ((path-b-mapped (db:multi-pattern-apply path-b itemmap)))
	  (debug:print-info 6 *default-log-port* "ITEMMAP is " itemmap ", path: " path-b ", mapped path: " path-b-mapped)
	  (equal? path-a path-b-mapped))
	(equal? path-b path-a))))

;;======================================================================
;; A routine to convert test/itempath using a itemmap
;; NOTE: to process only an itempath (i.e. no prepended testname)
;;       just call db:multi-pattern-apply
;;
(define (db:convert-test-itempath path-in itemmap)
  (debug:print-info 6 *default-log-port* "ITEMMAP is " itemmap)
  (let* ((path-parts  (string-split path-in "/"))
	 (test-name   (if (null? path-parts) "" (car path-parts)))
	 (item-path   (string-intersperse (if (null? path-parts) '() (cdr path-parts)) "/")))
    (conc test-name "/" 
	  (db:multi-pattern-apply item-path itemmap))))

;;======================================================================
;; patterns are:
;;    "rx1"  "replacement1"\n
;;    "rx2"  "replacement2"
;; etc.
;;
(define (db:multi-pattern-apply item-path itemmap)
  (let ((all-patts (string-split itemmap "\n")))
    (if (null? all-patts)
	item-path
	(let loop ((hed (car all-patts))
		   (tal (cdr all-patts))
		   (res item-path))
	  (let* ((parts (string-split hed))
		 (patt  (car parts))

		 (repl  (if (> (length parts) 1)(cadr parts) ""))

		 (newr  (if (and patt repl)
			    (begin
                              (handle-exceptions
                               exn
                               (begin
				 (debug:print 0 *default-log-port*
					      "WARNING: itemmap has problem \"" itemmap "\", patt: " patt ", repl: " repl ", exn=" exn)
                                 res)
			       (string-substitute patt repl res))


                              )
			    (begin
                              (debug:print 0 *default-log-port*
					   "WARNING: itemmap has problem \"" itemmap "\", patt: " patt ", repl: " repl)
			      res))))
	    (if (null? tal)
		newr
		(loop (car tal)(cdr tal) newr)))))))


(define (keys->valslots keys) ;; => ?,?,? ....
  (string-intersperse (map (lambda (x) "?") keys) ","))

;;======================================================================
;; (define-inline (keys->key/field keys . additional)
;;   (string-join (map (lambda (k)(conc k " TEXT"))
;; 		    (append keys additional)) ","))

(define (item-list->path itemdat)
  (if (list? itemdat)
      (string-intersperse  (map cadr itemdat) "/")
      ""))

(define (launch:is-test-alive host pid)
  (if (and host pid (not (equal? host "n/a")))
      (let* ((is-local (equal? host (get-host-name)))
	     (ssh-cmd   (if is-local " " (conc "ssh " host " ")))
	     (cmd (conc ssh-cmd "pstree -A " pid))
	     (output (with-input-from-pipe cmd read-lines)))
	(debug:print 2 *default-log-port* "Running " cmd " received " output)
	(if (eq? (length output) 0)
	    #f
	    #t))
      #t))

;;======================================================================
;; N A N O M S G   C L I E N T
;;======================================================================
;; 
;; 
;; 
;; (define (common:send-dboard-main-changed)
;;   (let* ((dashboard-ips (mddb:get-dashboards)))
;;     (for-each
;;      (lambda (ipadr)
;;        (let* ((soc (common:open-nm-req (conc "tcp://" ipadr)))
;; 	      (msg (conc "main " *toppath*))
;; 	      (res (common:nm-send-receive-timeout soc msg)))
;; 	 (if (not res) ;; couldn't reach that dashboard - remove it from db
;; 	     (print "ERROR: couldn't reach dashboard " ipadr))
;; 	 res))
;;      dashboard-ips)))
;;     
;;     
;; ;;======================================================================
;; ;; D A S H B O A R D   D B 
;; ;;======================================================================
;; 
;; (define (mddb:open-db)
;;   (let* ((db (open-database (conc (get-environment-variable "HOME") "/.dashboard.db"))))
;;     (set-busy-handler! db (busy-timeout 10000))
;;     (for-each
;;      (lambda (qry)
;;        (exec (sql db qry)))
;;      (list 
;;       "CREATE TABLE IF NOT EXISTS vars       (id INTEGER PRIMARY KEY,key TEXT, val TEXT, CONSTRAINT varsconstraint UNIQUE (key));"
;;       "CREATE TABLE IF NOT EXISTS dashboards (
;;           id         INTEGER PRIMARY KEY,
;;           pid        INTEGER,
;;           username   TEXT,
;;           hostname   TEXT,
;;           ipaddr     TEXT,
;;           portnum    INTEGER,
;;           start_time TIMESTAMP DEFAULT (strftime('%s','now')),
;;              CONSTRAINT hostport UNIQUE (hostname,portnum)
;;         );"
;;       ))
;;     db))
;; 
;; ;; register a dashboard 
;; ;;
;; (define (mddb:register-dashboard port)
;;   (let* ((pid      (current-process-id))
;; 	 (hostname (get-host-name))
;; 	 (ipaddr   (server:get-best-guess-address hostname))
;; 	 (username (current-user-name)) ;; (car userinfo)))
;; 	 (db      (mddb:open-db)))
;;     (print "Register monitor, pid: " pid ", hostname: " hostname ", port: " port ", username: " username)
;;     (exec (sql db "INSERT OR REPLACE INTO dashboards (pid,username,hostname,ipaddr,portnum) VALUES (?,?,?,?,?);")
;; 	   pid username hostname ipaddr port)
;;     (close-database db)))
;; 
;; ;; unregister a monitor
;; ;;
;; (define (mddb:unregister-dashboard host port)
;;   (let* ((db      (mddb:open-db)))
;;     (print "Register unregister monitor, host:port=" host ":" port)
;;     (exec (sql db "DELETE FROM dashboards WHERE hostname=? AND portnum=?;") host port)
;;     (close-database db)))
;; 
;; ;; get registered dashboards
;; ;;
;; (define (mddb:get-dashboards)
;;   (let ((db (mddb:open-db)))
;;     (query fetch-column
;; 	   (sql db "SELECT ipaddr || ':' || portnum FROM dashboards;"))))

;;======================================================================
;; NMSG AND NEW API
;;======================================================================

;; nm based server experiment, keep around for now.
;;
#;(define (nm:start-server dbconn #!key (given-host-name #f))
(let* ((srvdat    (start-raw-server given-host-name: given-host-name))
       (host-name (srvdat-host srvdat))
       (soc       (srvdat-soc srvdat)))
  
  ;; start the queue processor (save for second round of development)
  ;;
  (thread-start! (make-thread! (lambda ()(queue-processor dbconn) "Queue processor")))
  ;; msg is an alist
  ;;  'r host:port  <== where to return the data
  ;;  'p params     <== data to apply the command to
  ;;  'e j|s|l      <== encoding of the params. default is s (sexp), if not specified is assumed to be default
  ;;  'c command    <== look up the function to call using this key
  ;;
  (let loop ((msg-in (nn-recv soc)))
    (if (not (equal? msg-in "quit"))
	(let* ((dat        (decode msg-in))
	       (host-port  (alist-ref 'r dat)) ;; this is for the reverse req rep where the server is a client of the original client
	       (params     (alist-ref 'p dat))
	       (command    (let ((c (alist-ref 'c dat)))(if c (string->symbol c) #f)))
	       (all-good   (and host-port params command (hash-table-exists? *commands* command))))
	  (if all-good
	      (let ((cmddat (make-qitem
			     command:   command
			     host-port: host-port
			     params:    params)))
		(queue-push cmddat) 		;; put request into the queue
		(nn-send soc "queued"))         ;; reply with "queued"
	      (print "ERROR: ["(common:human-time)"] BAD request " dat))
	  (loop (nn-recv soc)))))
  (nn-close soc)))

;;======================================================================
;; H I E R A R C H I C A L   H A S H   T A B L E S
;;======================================================================

;;======================================================================
;; Every element including top element is a vector:
;;   <vector subhash value>

(define (hh:make-hh #!key (ht #f)(value #f))
  (vector (or ht    (make-hash-table)) value))

;; used internally
(define-inline (hh:set-ht! hh ht)       (vector-set! hh 0 ht))
(define-inline (hh:get-ht hh)           (vector-ref  hh 0))
(define-inline (hh:set-value! hh value) (vector-set! hh 1 value))
(define-inline (hh:get-value  hh value) (vector-ref  hh 1))

;;======================================================================
;; given a hierarchial hash and some keys look up the value ...
;;
(define (hh:get hh . keys)
  (if (null? keys)
      (vector-ref hh 1) ;; we have reached the end of the line, return the value sought
      (let ((sub-ht (hh:get-ht hh)))
	(if sub-ht ;; yes, there is more hierarchy
	    (let ((sub-hh (hash-table-ref/default sub-ht (car keys) #f)))
	      (if sub-hh
		  (apply hh:get sub-hh (cdr keys))
		  #f))
	    #f))))

;;======================================================================
;; given a hierarchial hash, a value and some keys, add needed hierarcy and insert the value
;;
(define (hh:set! hh value . keys)
  (if (null? keys)
      (hh:set-value! hh value) ;; we have reached the end of the line, store the value
      (let ((sub-ht (hh:get-ht hh)))
	(if sub-ht ;; yes, there is more hierarchy
	    (let ((sub-hh (hash-table-ref/default sub-ht (car keys) #f)))
	      (if (not sub-hh) ;; we'll need to add the next level of hierarchy
		  (let ((new-sub-hh (hh:make-hh)))
		    (hash-table-set! sub-ht (car keys) new-sub-hh)
		    (apply hh:set! new-sub-hh value (cdr keys)))
		  (apply hh:set! sub-hh value (cdr keys))))    ;; call the sub-hierhash with remaining keys
	    (begin
	      (hh:set-ht! hh (make-hash-table))
	      (apply hh:set! hh value keys))))))

;;======================================================================
;; Manage pkts, used in servers, tests and likely other contexts so put
;; in common
;;======================================================================

(define common:pkts-spec
  '((default . ((parent    . P)
                (action    . a)
                (filename  . f)))
    (configf . ((parent    . P)
                (action    . a)
                (filename  . f)))
    (server  . ((action    . a)
		(pid       . d)
		(ipaddr    . i)
		(port      . p)
		(parent    . P)))
    
    (test    . ((cpuuse    . c)
		(diskuse   . d)
		(item-path . i)
		(runname   . r)
		(state     . s)
		(target    . t)
		(status    . u)
		(parent    . P)))))

(define (common:get-pkt-alists pkts)
  (map (lambda (x)
	 (alist-ref 'apkt x)) ;; 'pkta pulls out the alist from the read pkt
       pkts))

;;======================================================================
;; given list of pkts (alist mode) return list of D cards as Unix epoch, sorted descending
;; also delete duplicates by target i.e. (car pkt)
;;
(define (common:get-pkt-times pkts)
  (delete-duplicates
   (sort 
    (map (lambda (x)
	   `(,(alist-ref 't x) . ,(string->number (alist-ref 'D x))))
	 pkts)
    (lambda (a b)(> (cdr a)(cdr b))))      ;; sort descending
   (lambda (a b)(equal? (car a)(car b))))) ;; remove duplicates by target


(define *common:thread-punchlist* (make-hash-table))
(define (common:send-thunk-to-background-thread thunk #!key (name #f))
  ;;(BB> "launched thread " name)

  ;; we need a unique name for the thread.
  (let* ((realname (if name
                       (if (not (hash-table-ref/default *common:thread-punchlist* name #f))
                           name
                           (conc name"-" (symbol->string (gensym))))
                       (conc "anonymous-"(symbol->string (gensym)))))
         (realthunk (lambda ()
                      (let ((res (thunk)))
                        (hash-table-delete! *common:thread-punchlist* realname)
                        res)))
         (thread   (make-thread realthunk realname)))
    (hash-table-set! *common:thread-punchlist* realname thread)
    (thread-start! thread)
    ))

(define (common:join-backgrounded-threads)
  ;; may need to trap and ignore exceptions -- dunno how atomic threads are...
  (for-each
   (lambda (thread-name)
     (let* ((thread (hash-table-ref/default *common:thread-punchlist* thread-name #f)))
       (if thread
           (handle-exceptions
	    exn
	    (begin
	      (debug:print 0 *default-log-port* "joining threads failed. exn=" exn)
	      #t) ;; just ignore it, it might have died in the meantime so joining it will throw an exception
	    (thread-join! thread))
           )))
   (hash-table-keys *common:thread-punchlist*)))

;;======================================================================
;; (define *common:telemetry-log-state* 'startup)
;; (define *common:telemetry-log-socket* #f)
;; 
;; (define (common:telemetry-log-open)
;;   (if (eq? *common:telemetry-log-state* 'startup)
;;       (let* ((serverhost (configf:lookup *configdat* "telemetry" "host"))
;;              (serverport (configf:lookup-number *configdat* "telemetry" "port"))
;;              (user (or (get-environment-variable "USER") "unknown"))
;;              (host (or (get-environment-variable "HOST") "unknown")))
;;         (set! *common:telemetry-log-state*
;;               (handle-exceptions
;;                exn
;;                (begin
;;                  (debug:print-info 0 *default-log-port* "common-telemetry-log open udp port failure")
;;                  'broken)
;;                (if (and serverhost serverport user host)
;;                    (let* ((s (udp-open-socket)))
;;                      ;;(udp-bind! s #f 0)
;;                      (udp-connect! s serverhost serverport)
;;                      (set! *common:telemetry-log-socket* s)
;;                      'open)
;;                    'not-needed))))))
;;   
;; (define (common:telemetry-log event #!key (payload '()))
;;   (if (eq? *common:telemetry-log-state* 'startup)
;;       (common:telemetry-log-open))
;; 
;;   (if (eq? 'open *common:telemetry-log-state*)
;;       (handle-exceptions
;;        exn
;;        (begin
;;          (debug:print-info 0 *default-log-port* "common-telemetry-log comms failure ; disabled (no server?)")
;;          ;;(define *common:telemetry-log-state* 'broken-or-no-server-preclose)
;;          ;;(common:telemetry-log-close)
;;          (define *common:telemetry-log-state* 'broken-or-no-server)
;;          (set! *common:telemetry-log-socket* #f)
;;          )
;;        (if (and *common:telemetry-log-socket* event) ;; TODO - filter on event against telemetry.want-events
;;            (let* ((user (or (get-environment-variable "USER") "unknown"))
;;                   (host (or (get-environment-variable "HOST") "unknown"))
;;                   (start (conc "[megatest "event"]"))
;;                   (toppath (or *toppath* "/dev/null"))
;;                   (payload-serialized
;;                    (base64:base64-encode
;;                     (z3:encode-buffer
;;                      (with-output-to-string (lambda () (pp payload))))))
;;                   (msg     (conc user":"host":"start":"(current-process-id)":"(car (argv))":"
;;                                  toppath":"payload-serialized)))
;;              (udp-send *common:telemetry-log-socket* msg))))))
;;   
;; (define (common:telemetry-log-close)
;;   (when (or (member *common:telemetry-log-state* '(broken-or-no-server-preclose open)) *common:telemetry-log-socket*)
;;     (handle-exceptions
;;      exn
;;      (begin
;;        (define *common:telemetry-log-state* 'closed-fail)
;;        (debug:print-info 0 *default-log-port* "common-telemetry-log closure failure")
;;        )
;;      (begin
;;        (define *common:telemetry-log-state* 'closed)
;;        (udp-close-socket *common:telemetry-log-socket*)
;;        (set! *common:telemetry-log-socket* #f)))))

(define (process:conservative-read port)
  (let loop ((res ""))
    (if (not (eof-object? (peek-char port)))
	(loop (conc res (read-char port)))
	res)))

(define (process:cmd-run-with-stderr->list cmd . params)
  ;; (print "Called with cmd=" cmd ", proc=" proc ", params=" params)
  ;;  (handle-exceptions
  ;;   exn
  ;;   (begin
  ;;     (print "ERROR:  Failed to run command: " cmd " " (string-intersperse params " "))
  ;;     (print "       " ((condition-property-accessor 'exn 'message) exn))
  ;;     #f)
  (let-values (((fh fho pid fhe) (if (null? params)
				     (process* cmd)
				     (process* cmd params))))
    (let loop ((curr (read-line fh))
	       (result  '()))
      (let ((errstr (process:conservative-read fhe)))
	(if (not (string=? errstr ""))
	    (set! result (append result (list errstr)))))
      (if (not (eof-object? curr))
	  (loop (read-line fh)
		(append result (list curr)))
	  (begin
	    (close-input-port fh)
	    (close-input-port fhe)
	    (close-output-port fho)
	    result))))) ;; )

(define (process:cmd-run-with-stderr-and-exitcode->list cmd . params)
  ;; (print "Called with cmd=" cmd ", proc=" proc ", params=" params)
  ;;  (handle-exceptions
  ;;   exn
  ;;   (begin
  ;;     (print "ERROR:  Failed to run command: " cmd " " (string-intersperse params " "))
  ;;     (print "       " ((condition-property-accessor 'exn 'message) exn))
  ;;     #f)
  (let-values (((fh fho pid fhe) (if (null? params)
				     (process* cmd)
				     (process* cmd params))))
    (let loop ((curr (read-line fh))
	       (result  '()))
      (let ((errstr (process:conservative-read fhe)))
	(if (not (string=? errstr ""))
	    (set! result (append result (list errstr)))))
      (if (not (eof-object? curr))
	  (loop (read-line fh)
		(append result (list curr)))
	  (begin
	    (let-values (((anotherpid normalexit? exitstatus)  (process-wait pid)))
	      (close-input-port fh)
	      (close-input-port fhe)
	      (close-output-port fho)
	      (list result (if normalexit? exitstatus -1))))))))

(define (process:cmd-run-proc-each-line cmd proc . params)
  ;; (print "Called with cmd=" cmd ", proc=" proc ", params=" params)
  (handle-exceptions
   exn
   (begin
     (print "ERROR:  Failed to run command: " cmd " " (string-intersperse params " "))
     (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))
     (debug:print 5 *default-log-port* "exn=" (condition->list exn))
     #f)
   (let-values (((fh fho pid) (if (null? params)
				  (process cmd)
				  (process cmd params))))
     (let loop ((curr (read-line fh))
		(result  '()))
       (if (not (eof-object? curr))
	   (loop (read-line fh)
		 (append result (list (proc curr))))
	   (begin
	     (close-input-port fh)
	     ;;(close-input-port fhe)
	     (close-output-port fho)
	     result))))))

(define (process:cmd-run-proc-each-line-alt cmd proc)
  (let* ((fh (open-input-pipe cmd))
         (res (port-proc->list fh proc))
         (status (close-input-pipe fh)))
    (if (eq? status 0) res #f)))


(define (port-proc->list fh proc)
  (if (eof-object? fh) #f
      (let loop ((curr (proc (read-line fh)))
                 (result '()))
        (if (not (eof-object? curr))
            (loop (let ((l (read-line fh)))
                    (if (eof-object? l) l (proc l)))
                  (append result (list curr)))
            result))))

;;======================================================================
;; here is an example line where the shell is sh or bash
;; "find / -print 2&>1 > findall.log"
(define (run-n-wait cmdline #!key (params #f)(print-cmd #f)(run-dir #f))
  (if print-cmd 
      (debug:print 0 *default-log-port* 
		   (if (string? print-cmd)
		       print-cmd
		       "")
		   (if run-dir (conc "Run in " run-dir ";") "")
		   cmdline
		   (if params
		       (conc " " (string-intersperse params " "))
		       "")))
  (if (and run-dir
	   (directory-exists? run-dir))
      (push-directory run-dir))
  (let ((pid (if params
		 (process-run cmdline params)
		 (process-run cmdline))))
    (let loop ((i 0))
      (let-values (((pid-val exit-status exit-code) (process-wait pid #t)))
	(if (eq? pid-val 0)
	    (begin
	      (thread-sleep! 2)
	      (loop (+ i 1)))
	    (begin
	      (if (and run-dir
		       (directory-exists? run-dir))
		  (pop-directory))
	      (values pid-val exit-status exit-code)))))))

;;======================================================================
;; MISC PROCESS RELATED STUFF
;;======================================================================

(define (process:children proc)
  (with-input-from-pipe
   (conc "ps h --ppid " (current-process-id) " -o pid")
   (lambda ()
     (let loop ((inl (read-line))
		(res '()))
       (if (eof-object? inl)
	   (reverse res)
	   (let ((pid (string->number inl)))
	     (if proc (proc pid))
	     (loop (read-line) (cons pid res))))))))

(define (process:alive? pid)
  (handle-exceptions
   exn
   ;; possibly pid is a process not a child, look in /proc to see if it is running still
   (common:file-exists? (conc "/proc/" pid))
   (let-values (((rpid exit-type exit-signal)(process-wait pid #t)))
     (and (number? rpid)
	  (equal? rpid pid)))))

(define (process:alive-on-host? host pid)
  (let ((cmd (conc "ssh " host " ps -o pid= -p " pid)))
    (handle-exceptions
     exn
     (begin
       (debug:print 0 *default-log-port* "failed to identify if process " pid ", on host " host " is alive. exn=" exn)
       #f) ;; anything goes wrong - assume the process in NOT running.
     (with-input-from-pipe 
      cmd
      (lambda ()
	(let loop ((inl (read-line)))
	  (if (eof-object? inl)
	      #f
	      (let* ((clean-str (string-substitute "^[^\\d]*([0-9]+)[^\\d]*$" "\\1" inl))
		     (innum     (string->number clean-str)))
		(and innum
		     (eq? pid innum))))))))))

(define (process:get-sub-pids pid)
  (with-input-from-pipe
   (conc "pstree -A -p " pid) ;; | tr 'a-z\\-+`()\\.' ' ' " pid)
   (lambda ()
     (let loop ((inl (read-line))
		(res '()))
       (if (eof-object? inl)
	   (reverse res)
	   (let ((nums (map string->number
			    (string-split-fields "\\d+" inl))))
	     (loop (read-line)
		   (append res nums))))))))

;;======================================================================
;; stuff from tests.scm
;;======================================================================

;;======================================================================
;; given a list of itemmaps (testname . map), return the first match
;;
(define (tests:lookup-itemmap itemmaps testname)
  (let ((best-matches (filter (lambda (itemmap)
				(tests:match (car itemmap) testname #f))
			      itemmaps)))
    (if (null? best-matches)
	#f
	(let ((res (car best-matches)))
	  ;; (debug:print 0 *default-log-port* "res=" res)
	  (cond
	   ((string? res) res) ;;; FIX THE ROOT CAUSE HERE ....
	   ((null? res)   #f)
	   ((string? (cdr res)) (cdr res))  ;; it is a pair
	   ((string? (cadr res))(cadr res)) ;; it is a list
	   (else cadr res))))))

;;======================================================================
;; stuff from tasks.scm
;;======================================================================

;;======================================================================
;; no elegance here ...
;;
(define (tasks:kill-server hostname pid #!key (kill-switch ""))
  (debug:print-info 0 *default-log-port* "Attempting to kill server process " pid " on host " hostname)
  (setenv "TARGETHOST" hostname)
  (let* ((logdir (if (directory-exists? "logs")
		     "logs/"
		     ""))
         (logfile (if logdir (conc "logs/server-"pid"-"hostname".log") #f))
         (gzfile  (if logfile (conc logfile ".gz"))))
    (setenv "TARGETHOST_LOGF" (conc logdir "server-kills.log"))

    (system (conc "nbfake kill "kill-switch" "pid))

    (when logfile
	  (thread-sleep! 0.5)
	  (if (common:file-exists? gzfile) (delete-file gzfile))
	  (system (conc "gzip " logfile))
	  
	  (unsetenv "TARGETHOST_LOGF")
	  (unsetenv "TARGETHOST"))))


;;======================================================================
;; hash-table tree to html list tree
;;
;;   tipfunc takes two parameters: y the tip value and path the path to that point
;;
(define (common:htree->html ht path tipfunc)
  (let ((datlist 	(sort (hash-table->alist ht)
			      ;;======================================================================
			      ;;======================================================================
                              (lambda (a b)
                                (string< (car a)(car b))))))
    (if (null? datlist)
    	(tipfunc #f path) ;; really shouldn't get here
	(s:ul
	 (map (lambda (x)
		(let* ((levelname (car x))
		       (y         (cdr x))
		       (newpath   (append path (list levelname)))
		       (leaf      (or (not (hash-table? y))
				      (null? (hash-table-keys y)))))
		  (if leaf
		      (s:li (tipfunc y newpath))
		      (s:li
		       (list 
			levelname
			(common:htree->html y newpath tipfunc))))))
	      datlist)))))

;;======================================================================


;;======================================================================
;;======================================================================
;;======================================================================
;;======================================================================
;; D A S H B O A R D   U S E R   V I E W S
;;======================================================================

;;======================================================================

(define (common:args-get-runname)
  (let ((res (or (args:get-arg "-runname")
		 (args:get-arg ":runname")
		 (getenv "MT_RUNNAME"))))
    ;; (if res (set-environment-variable "MT_RUNNAME" res)) ;; not sure if this is a good idea. side effect and all ...
    res))

(define (get-with-default val default)
  (let ((val (args:get-arg val)))
    (if val val default)))

(define (common:args-get-target #!key (split #f)(exit-if-bad #f))
  (let* ((keys    (if (hash-table? *configdat*) (common:get-fields *configdat*) '()))
	 (numkeys (length keys))
	 (target  (or (args:get-arg "-reqtarg")
		      (args:get-arg "-target")
		      (getenv "MT_TARGET")))
	 (tlist   (if target (string-split target "/" #t) '()))
	 (valid   (if target
		      (or (null? keys) ;; probably don't know our keys yet
			  (and (not (null? tlist))
			       (eq? numkeys (length tlist))
			       (null? (filter string-null? tlist))))
		      #f)))
    (if valid
	(if split
	    tlist
	    target)
	(if target
	    (begin
	      (debug:print-error 0 *default-log-port* "Invalid target, spaces or blanks not allowed \"" target "\", target should be: " (string-intersperse keys "/") ", have " tlist " for elements")
	      (if exit-if-bad (exit 1))
	      #f)
	    #f))))

;;======================================================================
;;======================================================================
;;======================================================================
;;======================================================================
;; do we honor the caches of the config files?
;;
(define (common:use-cache?)
  (let ((res #t)) ;; priority by order of evaluation
    (if *configdat* ;; sillyness here. can't use setup/use-cache to know if we can use the cached files!
	(if (equal? (configf:lookup *configdat* "setup" "use-cache") "no")
	    (set! res #f)
	    (if (equal? (configf:lookup *configdat* "setup" "use-cache") "yes")
		(set! res #t))))
    (if (args:get-arg "-no-cache")(set! res #f)) ;; overrides setting in "setup"
    (if (getenv "MT_USE_CACHE")
	(if (equal? (getenv "MT_USE_CACHE") "yes")
	    (set! res #t)
	    (if (equal? (getenv "MT_USE_CACHE") "no")
		(set! res #f))))    ;; overrides -no-cache switch
    res))

;;======================================================================
;; force use of server?
;;
(define (common:force-server?)
  (let* ((force-setting (configf:lookup *configdat* "server" "force"))
	 (force-type    (if force-setting (string->symbol force-setting) #f))
	 (force-result  (case force-type
			  ((#f)     #f)
			  ((always) #t)
			  ((test)   (if (args:get-arg "-execute") ;; we are in a test
					#t
					#f))
			  (else
			   (debug:print 0 *default-log-port* "ERROR: Bad server force setting " force-setting ", forcing server.")
			   #t)))) ;; default to requiring server
    (if force-result
	(begin
	  (debug:print-info 0 *default-log-port* "forcing use of server, force setting is \"" force-setting "\".")
	  #t)
	#f)))

(define (common:in-running-test?)
  (and (args:get-arg "-execute") (get-environment-variable "MT_CMDINFO")))
;;======================================================================
;;======================================================================
;;======================================================================
;;======================================================================
;;======================================================================
;;======================================================================
;;======================================================================
;;======================================================================
;;======================================================================


;;======================================================================
;;======================================================================
;;======================================================================
;;======================================================================
;;======================================================================
;;======================================================================
;;======================================================================

;;======================================================================
;;======================================================================
;;======================================================================
;;======================================================================
;;======================================================================
;;======================================================================

;;======================================================================
;; Rotate logs, logic: 
;;                 if > 500k and older than 1 week:
;;                     remove previous compressed log and compress this log
;; WARNING: This proc operates assuming that it is in the directory above the
;;          logs directory you wish to log-rotate.
;;
(define (common:rotate-logs)
  (let* ((all-files (make-hash-table))
	 (stats     (make-hash-table))
	 (inc-stat  (lambda (key)
		      (hash-table-set! stats key (+ (hash-table-ref/default stats key 0) 1))))
	 (max-allowed (string->number (or (configf:lookup *configdat* "setup" "max-logfiles") "300")))) ;; name -> age
    (if (not (directory-exists? "logs"))(create-directory "logs"))
    (directory-fold 
     (lambda (file rem)
       (handle-exceptions
	exn
	(begin
	  (debug:print-info 2 *default-log-port* "unable to rotate log " file ", probably handled by another process, this is safe to ignore. exn=" exn)
	  (debug:print 2 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))
	  ;; (print-call-chain (current-error-port)) ;; 
	  )
	(let* ((fullname  (conc "logs/" file))
	       (mod-time  (file-modification-time fullname))
	       (file-age  (- (current-seconds) mod-time))
	       (file-old  (> file-age (* 48 60 60)))
	       (file-big  (> (file-size fullname) 200000)))
	  (hash-table-set! all-files file mod-time)
	  (if (or (and (string-match "^.*.log" file)
		       file-old
		       file-big)
		  (and (string-match "^server-.*.log" file)
		       file-old))
	      (let ((gzfile (conc fullname ".gz")))
		(if (common:file-exists? gzfile)
		    (begin
		      (debug:print-info 0 *default-log-port* "removing " gzfile)
		      (delete-file* gzfile)
		      (hash-table-delete!  all-files gzfile) ;; needed?
		      ))
		(debug:print-info 0 *default-log-port* "compressing " file)
		(system (conc "gzip " fullname))
		(inc-stat "gzipped")
		(hash-table-set! all-files (conc file ".gz") file-age)  ;; add the .gz file and remove the base file
		(hash-table-delete! all-files file)
		)
	      (if (and (> file-age (* (string->number (or (configf:lookup *configdat* "setup" "log-expire-days") "30")) 24 3600))
		       (file-exists? fullname)) ;; just in case it was gzipped - will get it next time
		  (handle-exceptions
		   exn
		   #f
		   (if (directory? fullname)
		       (begin
			 (debug:print-info 0 *default-log-port* fullname " in logs directory is a directory! Cannot rotate it, it is best to not put subdirectories in the logs dir.")
			 (inc-stat "directories"))
		       (begin
			 (delete-file* fullname)
			 (inc-stat "deleted")))
		   (hash-table-delete! all-files file)))))))
     '()
     "logs")
    (for-each
     (lambda (category)
       (let ((quant (hash-table-ref/default stats category 0)))
	 (if (> quant 0)
	     (debug:print-info 0 *default-log-port* category " log files: " quant))))
     `("deleted" "gzipped" "directories"))
    (let ((num-logs (hash-table-size all-files)))
      (if (> num-logs max-allowed) ;; because NFS => don't let number of logs exceed 300
	  (let ((files (take (sort (hash-table-keys all-files)
				   (lambda (a b)
				     (< (hash-table-ref all-files a)(hash-table-ref all-files b))))
			     (- num-logs max-allowed))))
	    (for-each
	     (lambda (file)
	       (let* ((fullname (conc "logs/" file)))
		 (if (directory? fullname)
		     (debug:print-info 0 *default-log-port* fullname " in logs directory is a directory! Cannot rotate it, it is best to not put subdirectories in the logs dir.")
		     (handle-exceptions
		      exn
		      (debug:print-error 0 *default-log-port* "failed to remove " fullname ", exn=" exn)
		      (delete-file* fullname)))))
	     files)
	    (debug:print-info 0 *default-log-port* "Deleted " (length files) " files from logs, keeping " max-allowed " files."))))))

;;======================================================================
;; calculate a delay number based on a droop curve
;;    inputs are:
;;      - load-in, load as from uptime, NOT normalized
;;      - numcpus, number of cpus, ideally use the real cpus, not threads
;;
(define (common:get-delay load-in numcpus)
  (let* ((ratio (/ load-in numcpus))
         (new-option (configf:lookup *configdat* "load" "new-load-method"))
	 (paramstr   (or (configf:lookup *configdat* "load" "exp-params")
			 "15 12 1281453987.9543 0.75")) ;; 5 4 10 1"))
	 (paramlst   (map string->number (string-split paramstr))))
    (if new-option
	(begin
          (cond ((and (>= ratio 0) (< ratio .5))
		 0)
		((and (>= ratio 0.5) (<= ratio .9))
		 (* ratio (/ 5 .9)))
		((and (> ratio .9) (<= ratio 1.1))
		 (+ 5 (* (- ratio .9) (/ 55 .2)))) 
		((> ratio 1.1)
		 60)))
	(match paramlst
	       ((r1 r2 s1 s2)
		(debug:print 3 *default-log-port* "Using params r1=" r1 " r2=" r2 " s1=" s1 " s2=" s2)
		(min (max (/ (expt r1 (* r2 s2 ratio)) s1) 0) 30))
	       (else
		(debug:print 0 *default-log-port* "BAD exp-params, should be \"r1 r2 s1 s2\" but got " paramstr)
		30)))))

(define (common:print-delay-table)
  (let loop ((x 0))
    (print x "," (common:get-delay x 1))
    (if (< x 2)
	(loop (+ x 0.1)))))

;;======================================================================
;; DO NOT CALL THIS DIRECTLY. It is called from common:wait-for-normalized-load
;;   count     - count down to zero, at some point we'd give up if the load never drops
;;   num-tries - count down to zero number tries to get numcpus
;;
(define (common:wait-for-cpuload maxnormload numcpus-in
				 #!key (count 1000)
				 (msg #f)(remote-host #f)(num-tries 5))
  (let* ((loadavg     (common:get-cpu-load remote-host))
	 ;; not possible to have zero.  If we get 1, it's possible that we got the previous default, and we should check again
	 (numcpus     (if (<= 1 numcpus-in)
			  (common:get-num-cpus remote-host)
			  numcpus-in))
	 (first       (car loadavg))
	 (next        (cadr loadavg))
	 (adjmaxload  (* maxnormload (max 1 numcpus))) ;; possible bug
	 ;; where numcpus
	 ;; (or could be
	 ;; maxload) is
	 ;; zero, crude
	 ;; fallback is to
	 ;; at least use 1
	 ;; effective load accounts for load jumps, this should elminate all the first-next-avg, adjwait, load-jump-limit
	 ;; etc.
	 (effective-load    (common:get-intercept first next))
	 (recommended-delay (common:get-delay effective-load numcpus))
	 (effective-host    (or remote-host "localhost"))
	 (normalized-effective-load (/ effective-load numcpus))
	 (will-wait                 (> normalized-effective-load maxnormload)))
    (if (> recommended-delay 1) 
	(let* ((actual-delay (min recommended-delay 30)))
	  (if (common:low-noise-print 30 (conc (round actual-delay) "-safe-load"))
	      (debug:print-info 0 *default-log-port* "Load control, delaying "
				actual-delay " seconds to maintain safe load. current normalized effective load is "
				normalized-effective-load". maxnormload = " maxnormload " numcpus = " numcpus " loadavg = " loadavg " effective-load = " effective-load))
	  (thread-sleep! actual-delay)))
    
    (cond
     ;; bad data, try again to get the data
     ((not will-wait)
      (if (common:low-noise-print 3600 (conc (round normalized-effective-load) "-load-acceptable-" effective-host))
      	  (debug:print 0 *default-log-port* "Effective load on " effective-host " is acceptable at " effective-load " continuing.")))

     ((and (< first 0) ;; this indicates the loadavg data is bad - machine may not be reachable
	   (> num-tries 0))
      (debug:print 0 *default-log-port* "WARNING: received bad data from get-cpu-load "
		   first ", we'll sleep 10s and try " num-tries " more times.")
      (thread-sleep! 10)
      (common:wait-for-cpuload maxnormload numcpus-in
			       count: count remote-host: remote-host num-tries: (- num-tries 1)))

     ;; need to wait for load to drop
     ((and will-wait ;; (> first adjmaxload)
	   (> count 0))
      (debug:print-info 0 *default-log-port*
			"Delaying 15" ;; adjwait
			" seconds due to normalized effective load " normalized-effective-load ;; first
			" exceeding max of " adjmaxload
			" on server " (or remote-host (get-host-name))
			" (normalized load-limit: " maxnormload ") " (if msg msg ""))
      (thread-sleep! 15) ;; adjwait)
      (common:wait-for-cpuload maxnormload numcpus count: (- count 1) msg: msg remote-host: remote-host)
      ;; put the message here to indicate came out of waiting
      (debug:print-info 1 *default-log-port*
			"On host: " effective-host
			", effective load: " effective-load
			", numcpus: " numcpus
			", normalized effective load: " normalized-effective-load
			))
     ;; overloaded and count expired (i.e. went to zero)
     (else
      (if (> num-tries 0) ;; should be "num-tries-left". 
	  (if (common:low-noise-print 30 (conc (round effective-load) "-load-acceptable-" effective-host))
	      (debug:print 0 *default-log-port* "Load on " effective-host " is acceptable at effective normalized load of "
			   normalized-effective-load "  continuing."))
	  (debug:print 0 *default-log-port* "Load on " effective-host ", "
		       first" could not be retrieved. Giving up and continuing."))))))

;;======================================================================
;; wait for normalized cpu load to drop below maxload
;;
(define (common:wait-for-normalized-load maxnormload msg remote-host #!optional (rem-tries 5))
  (let ((num-cpus (common:get-num-cpus remote-host)))
    (if num-cpus
	(common:wait-for-cpuload maxnormload num-cpus 15 msg: msg remote-host: remote-host)
	(begin
	  (thread-sleep! (random 60)) ;; we failed to get num cpus. wait a bit and try again
	  (if (> rem-tries 0)
	      (common:wait-for-normalized-load maxnormload msg remote-host (- rem-tries 1))
	      #f)))))

;;======================================================================
;;======================================================================
;; given path get free space, allows override in [setup]
;; with free-space-script /path/to/some/script.sh
;;
(define (get-df path)
  (if (configf:lookup *configdat* "setup" "free-space-script")
      (with-input-from-pipe 
       (conc (configf:lookup *configdat* "setup" "free-space-script") " " path)
       (lambda ()
	 (let ((res (read-line)))
	   (if (string? res)
	       (string->number res)))))
      (get-unix-df path)))

(define (common:check-space-in-dir dirpath required)
  (let* ((dbspace  (if (directory? dirpath)
		       (get-df dirpath)
		       0)))
    (list (> dbspace required)
	  dbspace
	  required
	  dirpath)))

(define (get-free-inodes path)
  (if (configf:lookup *configdat* "setup" "free-inodes-script")
      (with-input-from-pipe 
       (conc (configf:lookup *configdat* "setup" "free-inodes-script") " " path)
       ;;======================================================================
       ;;======================================================================
       ;;======================================================================
       ;;======================================================================
       ;;======================================================================

       (lambda ()
	 (let ((res (read-line)))
	   (if (string? res)
	       (string->number res)))))
      (get-unix-inodes path)))
;;======================================================================
;;======================================================================
;;======================================================================
;;======================================================================
;;======================================================================
;;======================================================================
;;======================================================================
;;======================================================================
;;======================================================================

;; 		(let ((cmddat (make-qitem
;; 			       command:   command
;; 			       host-port: host-port
;; 			       params:    params)))
;; 		  (queue-push cmddat) 		;; put request into the queue
;; 		  (nn-send soc "queued"))         ;; reply with "queued"
;; 		(print "ERROR: ["(common:human-time)"] BAD request " dat))
;; 	    (loop (nn-recv soc)))))
;;     (nn-close soc)))
;;======================================================================
;;======================================================================
;;======================================================================
;;======================================================================
;;======================================================================
(define (common:get-pkts-dirs mtconf use-lt)
  (let* ((pktsdirs-str (or (configf:lookup mtconf "setup"  "pktsdirs")
			   (and use-lt
				(conc (or *toppath*
					  (current-directory))
				      "/lt/.pkts"))))
	 (pktsdirs  (if pktsdirs-str
			(string-split pktsdirs-str " ")
			#f)))
    pktsdirs))

;;======================================================================
(define (common:with-queue-db mtconf proc #!key (use-lt #f)(toppath-in #f))
  (let* ((pktsdirs (common:get-pkts-dirs mtconf use-lt))
	 (pktsdir  (if pktsdirs (car pktsdirs) #f))
	 (toppath  (or (configf:lookup mtconf "scratchdat" "toppath")
		       toppath-in))
	 (pdbpath  (or (configf:lookup mtconf "setup"  "pdbpath") pktsdir)))
    (cond
     ((not (and  pktsdir toppath pdbpath))
      (debug:print 0 *default-log-port* "ERROR: settings are missing in your megatest.config for area management.")
      (debug:print  0 *default-log-port* "  you need to have pktsdirs in the [setup] section."))
     ((not (common:file-exists? pktsdir))
      (debug:print 0 *default-log-port* "ERROR: pkts directory not found " pktsdir))
     ((not (equal? (file-owner pktsdir)(current-effective-user-id)))
      (debug:print 0 *default-log-port* "ERROR: directory " pktsdir " is not owned by " (current-effective-user-name)))
     (else
      (let* ((pdb  (open-queue-db pdbpath "pkts.db"
				  schema: '("CREATE TABLE groups (id INTEGER PRIMARY KEY,groupname TEXT, CONSTRAINT group_constraint UNIQUE (groupname));"))))
	(proc pktsdirs pktsdir pdb)
	(dbi:close pdb))))))

;;======================================================================
;; check space in dbdir and in megatest dir
;; returns: ok/not dbspace required-space
;;
(define (common:check-db-dir-space)
  (let* ((required (string->number 
                    ;; default is 1GB (or actually a billion bytes) This is the number of 1 kB blocks.
		    (or (configf:lookup *configdat* "setup" "dbdir-space-required")
			"1000000")))
	 (dbdir    (common:get-db-tmp-area)) ;; (db:get-dbdir))
	 (tdbspace (common:check-space-in-dir dbdir required))
	 (mdbspace (common:check-space-in-dir *toppath* required)))
    (sort (list tdbspace mdbspace) (lambda (a b)
				     (< (cadr a)(cadr b))))))

;;======================================================================
;; check available space in dbdir, exit if insufficient
;;
(define (common:check-db-dir-and-exit-if-insufficient)
  (let* ((spacedat (car (common:check-db-dir-space))) ;; look only at worst for now
	 (is-ok    (car spacedat))
	 (dbspace  (cadr spacedat))
	 (required (caddr spacedat))
	 (dbdir    (cadddr spacedat)))
    (if (not is-ok)
	(begin
	  (debug:print-error 0 *default-log-port* "Insufficient space in " dbdir ", require " required ", have " dbspace  ", exiting now.")
	  (exit 1)))))

;;======================================================================
;; paths is list of lists ((name path) ... )
;;
(define (common:get-disk-with-most-free-space disks minsize)
  (let* ((best     #f)
	 (bestsize 0)
	 (default-min-inodes-string "1000000")
	 (default-min-inodes (string->number default-min-inodes-string))
	 (min-inodes (or (string->number (if (configf:lookup *configdat* "setup" "min_inodes") (configf:lookup *configdat* "setup" "min_inodes") default-min-inodes-string)) default-min-inodes)))

    (for-each 
     (lambda (disk-num)
       (let* ((dirpath    (cadr (assoc disk-num disks)))
	      (freespc    (cond
			   ((not (directory? dirpath))
			    (if (common:low-noise-print 300 "disks not a dir " disk-num)
				(debug:print 0 *default-log-port* "WARNING: disk " disk-num " at path \"" dirpath "\" is not a directory - ignoring it."))
			    -1)
			   ((not (file-write-access? dirpath))
			    (if (common:low-noise-print 300 "disks not writeable " disk-num)
				(debug:print 0 *default-log-port* "WARNING: disk " disk-num " at path \"" dirpath "\" is not writeable - ignoring it."))
			    -1)
			   ((not (eq? (string-ref dirpath 0) #\/))
			    (if (common:low-noise-print 300 "disks not a proper path " disk-num)
				(debug:print 0 *default-log-port* "WARNING: disk " disk-num " at path \"" dirpath "\" is not a fully qualified path - ignoring it."))
			    -1)
			   (else
			    (get-df dirpath))))
	      (free-inodes (cond
			    ((not (directory? dirpath))
			     (if (common:low-noise-print 300 "disks not a dir " disk-num)
				 (debug:print 0 *default-log-port* "WARNING: disk " disk-num " at path \"" dirpath "\" is not a directory - ignoring it."))
			     -1)
			    ((not (file-write-access? dirpath))
			     (if (common:low-noise-print 300 "disks not writeable " disk-num)
				 (debug:print 0 *default-log-port* "WARNING: disk " disk-num " at path \"" dirpath "\" is not writeable - ignoring it."))
			     -1)
			    ((not (eq? (string-ref dirpath 0) #\/))
			     (if (common:low-noise-print 300 "disks not a proper path " disk-num)
				 (debug:print 0 *default-log-port* "WARNING: disk " disk-num " at path \"" dirpath "\" is not a fully qualified path - ignoring it."))
			     -1)
			    (else
			     (get-free-inodes dirpath))))
	      ;;(free-inodes (get-free-inodes dirpath))
	      )
	 (debug:print 2 *default-log-port* "INFO: disk " disk-num " path " dirpath " free space " freespc " free inodes " free-inodes)
	 (if (and (> freespc bestsize)(> free-inodes min-inodes ))
	     (begin
	       (set! best     (cons disk-num dirpath))
	       (set! bestsize freespc)))
	 ;;(print "Processing: " disk-num " bestsize: " bestsize " best: " best " freespc: " freespc " min-inodes: " min-inodes " free-inodes: " free-inodes)
	 ))
     (map car disks))
    (if (and best (> bestsize minsize))
	best
	#f))) ;; #f means no disk candidate found

(define (common:load-pkts-to-db mtconf #!key (use-lt #f))
  (common:with-queue-db
   mtconf
   (lambda (pktsdirs pktsdir pdb)
     (for-each
      (lambda (pktsdir) ;; look at all
	(cond
	 ((not (common:file-exists? pktsdir))
	  (debug:print 0 *default-log-port* "ERROR: packets directory " pktsdir " does not exist."))
	 ((not (directory? pktsdir))
	  (debug:print 0 *default-log-port* "ERROR: packets directory path " pktsdir " is not a directory."))
	 ((not (file-read-access? pktsdir))
	  (debug:print 0 *default-log-port* "ERROR: packets directory path " pktsdir " is not readable."))
	 (else
	  (debug:print-info 0 *default-log-port* "Loading packets found in " pktsdir)
	  (let ((pkts (glob (conc pktsdir "/*.pkt"))))
	    (for-each
	     (lambda (pkt)
	       (let* ((uuid    (cadr (string-match ".*/([0-9a-f]+).pkt" pkt)))
		      (exists  (lookup-by-uuid pdb uuid #f)))
		 (if (not exists)
		     (let* ((pktdat (string-intersperse
				     (with-input-from-file pkt read-lines)
				     "\n"))
			    (apkt   (pkt->alist pktdat))
			    (ptype  (alist-ref 'T apkt)))
		       (add-to-queue pdb pktdat uuid (or ptype 'cmd) #f 0)
		       (debug:print 4 *default-log-port* "Added " uuid " of type " ptype " to queue"))
		     (debug:print 4 *default-log-port* "pkt: " uuid " exists, skipping...")
		     )))
	     pkts)))))
      pktsdirs))
   use-lt: use-lt))

;;======================================================================
;; use-lt is use linktree "lt" link to find pkts dir
(define (common:save-pkt pktalist-in mtconf use-lt #!key (add-only #f)) ;; add-only saves the pkt only if there is a parent already
  (if (or add-only
	  (hash-table-exists? *pkts-info* 'last-parent))
      (let* ((parent   (hash-table-ref/default *pkts-info* 'last-parent #f))
	     (pktalist (if parent
			   (cons `(parent . ,parent)
				 pktalist-in)
			   pktalist-in)))
	(let-values (((uuid pkt)
		      (alist->pkt pktalist common:pkts-spec)))
	  (hash-table-set! *pkts-info* 'last-parent uuid)
	  (let ((pktsdir (or (hash-table-ref/default *pkts-info* 'pkts-dir #f)
			     (let* ((pktsdirs (common:get-pkts-dirs mtconf use-lt))
				    (pktsdir   (car pktsdirs))) ;; assume it is there
			       (hash-table-set! *pkts-info* 'pkts-dir pktsdir)
			       pktsdir))))
            (handle-exceptions
             exn
             (debug:print-info 0 "failed to write out packet to " pktsdir ", exn=" exn) ;; don't care if this failed for now but MUST FIX - BUG!!
             (if (not (file-exists? pktsdir))
                 (create-directory pktsdir #t))
             (with-output-to-file
                 (conc pktsdir "/" uuid ".pkt")
               (lambda ()
                 (print pkt)))))))))
;;        (set! *common:telemetry-log-socket* #f)))))

(define (common:get-linktree)
  (or (getenv "MT_LINKTREE")
      (if *configdat*
	  (configf:lookup *configdat* "setup" "linktree")
	  #f)
      (if (or *toppath* (getenv "MT_RUN_AREA_HOME"))
	  (conc (or *toppath* (getenv "MT_RUN_AREA_HOME")) "/lt")
	  #f)
      (let* ((tp (common:get-toppath #f))
	     (lt (conc tp "/lt")))
	(if (not tp)(debug:print 0 *default-log-port* "WARNING: cannot calculate best path for linktree, using " lt))
	lt)))


(define (tests:get-tests-search-path cfgdat)
  (let ((paths (let ((section (if cfgdat
				  (configf:get-section cfgdat "tests-paths")
				  #f)))
		 (if section
		     (map cadr section)
		     '()))))
    (filter (lambda (d)
	      (if (directory-exists? d)
		  d
		  (begin
		    ;; (if (common:low-noise-print 60 "tests:get-tests-search-path" d)
		    ;;	(debug:print 0 *default-log-port* "WARNING: problem with directory " d ", dropping it from tests path"))
		    #f)))
	    (append paths (list (conc *toppath* "/tests"))))))

(define (server:get-best-guess-address hostname)
  (let ((res #f))
    (for-each 
     (lambda (adr)
       (if (not (eq? (u8vector-ref adr 0) 127))
	   (set! res adr)))
     ;; NOTE: This can fail when there is no mention of the host in /etc/hosts. FIXME
     (vector->list (hostinfo-addresses (hostname->hostinfo hostname))))
    (string-intersperse 
     (map number->string
	  (u8vector->list
	   (if res res (hostname->ip hostname)))) ".")))
;;======================================================================
;; logic for getting homehost. Returns (host . at-home)
;; IF *toppath* is not set, wait up to five seconds trying every two seconds
;; (this is to accomodate the watchdog)
;;
(define (common:get-homehost #!key (trynum 5))
  ;; called often especially at start up. use mutex to eliminate collisions
  (mutex-lock! *homehost-mutex*)
  (cond
   (*home-host*
    (mutex-unlock! *homehost-mutex*)
    *home-host*)
   ((not *toppath*)
    (mutex-unlock! *homehost-mutex*)
    (launch:setup) ;; safely mutexed now
    (if (> trynum 0)
	(begin
	  (thread-sleep! 2)
	  (common:get-homehost trynum: (- trynum 1)))
	#f))
   (else
    (let* ((currhost (get-host-name))
	   (bestadrs (server:get-best-guess-address currhost))
	   ;; first look in config, then look in file .homehost, create it if not found
	   (homehost (or (configf:lookup *configdat* "server" "homehost" )
			 (handle-exceptions
			  exn
			  (if (> trynum 0)
			      (let ((delay-time (* (- 5 trynum) 5)))
				(mutex-unlock! *homehost-mutex*)
				(debug:print 0 *default-log-port* "ERROR: ["(common:human-time)"] Failed to read .homehost file, delaying "
					     delay-time " seconds and trying again, message: "  ((condition-property-accessor 'exn 'message) exn)
					     ", exn=" exn)
				(thread-sleep! delay-time)
				(common:get-homehost trynum: (- trynum 1)))
			      (begin
				(mutex-unlock! *homehost-mutex*)
				(debug:print 0 *default-log-port* "ERROR: ["(common:human-time)
					     "] Failed to read .homehost file after trying five times. Giving up and exiting, message: "
					     ((condition-property-accessor 'exn 'message) exn))
				(exit 1)))
			  (let ((hhf (conc *toppath* "/.homehost")))
			    (if (common:file-exists? hhf)
				(with-input-from-file hhf read-line)
				(if (file-write-access? *toppath*)
				    (begin
				      (with-output-to-file hhf
					(lambda ()
					  (print bestadrs)))
				      (begin
					(mutex-unlock! *homehost-mutex*)
					(car (common:get-homehost))))
				    #f))))))
	   (at-home  (or (equal? homehost currhost)
			 (equal? homehost bestadrs))))
      (set! *home-host* (cons homehost at-home))
      (mutex-unlock! *homehost-mutex*)
      *home-host*))))

(define (common:wait-for-homehost-load maxnormload msg)
  (let* ((hh-dat (if (common:on-homehost?) ;; if we are on the homehost then pass in #f so the calls are local.
		     #f
		     (common:get-homehost)))
	 (hh     (if hh-dat (car hh-dat) #f)))
    (common:wait-for-normalized-load maxnormload msg hh)))

;;======================================================================
;; am I on the homehost?
;;======================================================================
;;======================================================================
;;======================================================================
;;
(define (common:on-homehost?)
  (let ((hh (common:get-homehost)))
    (if hh
	(cdr hh)
	#f)))

(define (common:run-sync?)
  (and (common:on-homehost?)
       (args:get-arg "-server")))


;; gather available information, if legit read configs in this order:
;;
;;   if have cache;
;;      read it a return it
;;   else
;;     megatest.config     (do not cache)
;;     runconfigs.config   (cache if all vars avail)
;;     megatest.config     (cache if all vars avail)
;;   returns:
;;     *toppath*
;;   side effects:
;;     sets; *configdat*    (megatest.config info)
;;           *runconfigdat* (runconfigs.config info)
;;           *configstatus* (status of the read data)
;;
(define (launch:setup #!key (force-reread #f) (areapath #f))
  (mutex-lock! *launch-setup-mutex*)
  (if (and *toppath*
	   (eq? *configstatus* 'fulldata) (not force-reread)) ;; got it all
      (begin
	(debug:print 2 *default-log-port* "NOTE: skipping launch:setup-body call since we have fulldata")
	(mutex-unlock! *launch-setup-mutex*)
	*toppath*)
      (let ((res (launch:setup-body force-reread: force-reread areapath: areapath)))
	(mutex-unlock! *launch-setup-mutex*)
	res)))

(define (launch:setup-body #!key (force-reread #f) (areapath #f))
  (if (and (eq? *configstatus* 'fulldata)
	   *toppath*
	   (not force-reread)) ;; no need to reprocess
      *toppath*   ;; return toppath
      (let* ((use-cache (common:use-cache?)) ;; BB- use-cache checks *configdat* for use-cache setting.  We do not have *configdat*.  Bootstrapping problem here.
	     (toppath  (common:get-toppath areapath))
	     (target   (common:args-get-target))
	     (sections (if target (list "default" target) #f)) ;; for runconfigs
	     (mtconfig (or (args:get-arg "-config") "megatest.config")) ;; allow overriding megatest.config 
	     (cachefiles (launch:get-cache-file-paths areapath toppath target mtconfig))
	     ;; checking for null cachefiles should not be necessary, I was seeing error car of '(), might be a chicken bug or a red herring ...
	     (mtcachef   (if (null? cachefiles)
			     #f
			     (car cachefiles))) ;; (and cachedir (conc cachedir "/" ".megatest.cfg-"  megatest-version "-" megatest-fossil-hash)))
	     (rccachef   (if (null? cachefiles)
			     #f
			     (cdr cachefiles)))) ;; (and cachedir (conc cachedir "/" ".runconfigs.cfg-"  megatest-version "-" megatest-fossil-hash)))
	;; (cancreate (and cachedir (common:file-exists? cachedir)(file-write-access? cachedir) (not (common:in-running-test?)))))
	(set! *toppath* toppath) ;; This is needed when we are running as a test using CMDINFO as a datasource
	;;(BB> "launch:setup-body -- cachefiles="cachefiles)
	(cond
	 ;; if mtcachef exists just read it, however we need to assume toppath is available in $MT_RUN_AREA_HOME
	 ((and (not force-reread)
	       mtcachef  rccachef
	       use-cache
	       (get-environment-variable "MT_RUN_AREA_HOME")
	       (common:file-exists? mtcachef)
	       (common:file-exists? rccachef))
	  ;;(BB> "launch:setup-body -- cond branch 1 - use-cache")
	  (set! *configdat*    (configf:read-alist mtcachef))
	  (set! *db-keys*      (common:get-fields *configdat*))
	  ;;(BB> "launch:setup-body -- 1 set! *configdat*="*configdat*)
	  (set! *runconfigdat* (configf:read-alist rccachef))
	  (set! *configinfo*   (list *configdat*  (get-environment-variable "MT_RUN_AREA_HOME")))
	  (set! *configstatus* 'fulldata)
	  (set! *toppath*      (get-environment-variable "MT_RUN_AREA_HOME"))
	  *toppath*)
	 ;; there are no existing cached configs, do full reads of the configs and cache them
	 ;; we have all the info needed to fully process runconfigs and megatest.config
	 ((and ;; (not force-reread) ;; force-reread is irrelevant in the AND, could however OR it?
	   mtcachef
	   rccachef) ;; BB- why are we doing this without asking if caching is desired?
	  ;;(BB> "launch:setup-body -- cond branch 2")
	  (let* ((first-pass    (find-and-read-config        ;; NB// sets MT_RUN_AREA_HOME as side effect
				 mtconfig
				 environ-patt: "env-override"
				 given-toppath: toppath
				 pathenvvar: "MT_RUN_AREA_HOME"))
		 (first-rundat  (let ((toppath (if toppath 
						   toppath
						   (car first-pass))))
				  (read-config ;; (conc toppath "/runconfigs.config") ;; this should be converted to runconfig:read but it is non-trivial, leaving it for now.
				   (conc (if (string? toppath)
					     toppath
					     (get-environment-variable "MT_RUN_AREA_HOME"))
					 "/runconfigs.config")
				   *runconfigdat* #t 
				   sections: sections))))
	    (set! *runconfigdat* first-rundat)
	    (if first-pass  ;; 
		(begin
		  ;;(BB> "launch:setup-body -- \"first-pass\"=first-pass")
		  (set! *configdat*  (car first-pass))
		  ;;(BB> "launch:setup-body -- 2 set! *configdat*="*configdat*)
		  (set! *configinfo* first-pass)
		  (set! *toppath*    (or toppath (cadr first-pass))) ;; use the gathered data unless already have it
		  (set! toppath      *toppath*)
		  (set! *db-keys*    (common:get-fields *configdat*))
		  (if (not *toppath*)
		      (begin
			(debug:print-error 0 *default-log-port* "you are not in a megatest area!")
			(exit 1)))
		  (setenv "MT_RUN_AREA_HOME" *toppath*)
		  ;; the seed read is done, now read runconfigs, cache it then read megatest.config one more time and cache it
		  (let* ((keys         (common:list-or-null *db-keys* ;; (common:get-fields (rmt:get-keys)
							    message: "Failed to retrieve keys in launch.scm. Please report this to the developers."))
			 (key-vals     (keys:target->keyval keys target))
			 (linktree     (common:get-linktree)) ;; (or (getenv "MT_LINKTREE")(if *configdat* (configf:lookup *configdat* "setup" "linktree") #f)))
					;     (if *configdat*
					; 	   (configf:lookup *configdat* "setup" "linktree")
					; 	   (conc *toppath* "/lt"))))
			 (second-pass  (find-and-read-config
					mtconfig
					environ-patt: "env-override"
					given-toppath: toppath
					pathenvvar: "MT_RUN_AREA_HOME"))
			 (runconfigdat (begin     ;; this read of the runconfigs will see any adjustments made by re-reading megatest.config
					 (for-each (lambda (kt)
						     (setenv (car kt) (cadr kt)))
						   key-vals)
					 (read-config (conc toppath "/runconfigs.config") *runconfigdat* #t ;; consider using runconfig:read some day ...
						      sections: sections)))
			 (cachefiles   (launch:get-cache-file-paths areapath toppath target mtconfig))
			 (mtcachef     (car cachefiles))
			 (rccachef     (cdr cachefiles)))
		    ;;  trap exception due to stale NFS handle -- Error: (open-output-file) cannot open file - Stale NFS file handle: "/p/fdk/gwa/lefkowit/mtTesting/qa/primbeqa/links/p1222/11/PDK_r1.1.1/prim/clean/pcell_testgen/.runconfigs.cfg-1.6427-7d1e789cb3f62f9cde719a4865bb51b3c17ea853" - ticket 220546342
		    ;; TODO - consider 1) using simple-lock to bracket cache write
		    ;;                 2) cache in hash on server, since need to do rmt: anyway to lock.

		    (if rccachef
			(common:fail-safe
			 (lambda ()
			   (configf:write-alist runconfigdat rccachef))
			 (conc "Could not write cache file - "rccachef)))
		    (if mtcachef
			(common:fail-safe
			 (lambda ()
			   (configf:write-alist *configdat* mtcachef))
			 (conc "Could not write cache file - "mtcachef)))
		    (set! *runconfigdat* runconfigdat)
		    (if (and rccachef mtcachef) (set! *configstatus* 'fulldata))))
		;; no configs found? should not happen but let's try to recover gracefully, return an empty hash-table
		(set! *configdat* (make-hash-table))
		)))

	 ;; else read what you can and set the flag accordingly
	 ;; here we don't have either mtconfig or rccachef
	 (else
	  ;;(BB> "launch:setup-body -- cond branch 3 - else")
	  (let* ((cfgdat   (find-and-read-config 
			    (or (args:get-arg "-config") "megatest.config")
			    environ-patt: "env-override"
			    given-toppath: (get-environment-variable "MT_RUN_AREA_HOME")
			    pathenvvar: "MT_RUN_AREA_HOME")))

	    (if (and cfgdat (list? cfgdat) (> (length cfgdat) 0) (hash-table? (car cfgdat)))
		(let* ((toppath  (or (get-environment-variable "MT_RUN_AREA_HOME")(cadr cfgdat)))
		       (rdat     (read-config (conc toppath  ;; convert this to use runconfig:read!
						    "/runconfigs.config") *runconfigdat* #t sections: sections)))
		  (set! *configinfo*   cfgdat)
		  (set! *configdat*    (car cfgdat))
		  (set! *db-keys*    (common:get-fields *configdat*))
		  (set! *runconfigdat* rdat)
		  (set! *toppath*      toppath)
		  (set! *configstatus* 'partial))
		(begin
		  (debug:print-error 0 *default-log-port* "No " mtconfig " file found. Giving up.")
		  (exit 2))))))
	;; COND ends here.
	
	;; additional house keeping
	(let* ((linktree (or (common:get-linktree)
			     (conc *toppath* "/lt"))))
	  (if linktree
	      (begin
		(if (not (common:file-exists? linktree))
		    (begin
		      (handle-exceptions
		       exn
		       (begin
			 (debug:print-error 0 *default-log-port* "Something went wrong when trying to create linktree dir at " linktree)
			 (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn) ", exn=" exn)
			 (exit 1))
		       (create-directory linktree #t))))
		(handle-exceptions
		 exn
		 (begin
		   (debug:print-error 0 *default-log-port* "Something went wrong when trying to create link to linktree at " *toppath*)
		   (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn) ", exn=" exn))
		 (let ((tlink (conc *toppath* "/lt")))
		   (if (not (common:file-exists? tlink))
		       (create-symbolic-link linktree tlink)))))
	      (begin
		(debug:print-error 0 *default-log-port* "linktree not defined in [setup] section of megatest.config")
		)))
	(if (and *toppath*
		 (directory-exists? *toppath*))
	    (begin
	      (setenv "MT_RUN_AREA_HOME" *toppath*)
	      (setenv "MT_TESTSUITENAME" (common:get-testsuite-name)))
	    (begin
	      (debug:print-error 0 *default-log-port* "failed to find the top path to your Megatest area.")
	      (set! *toppath* #f) ;; force it to be false so we return #f
	      #f))
	
	;; one more attempt to cache the configs for future reading
	(let* ((cachefiles   (launch:get-cache-file-paths areapath toppath target mtconfig))
	       (mtcachef     (car cachefiles))
	       (rccachef     (cdr cachefiles)))

	  ;; trap exception due to stale NFS handle -- Error: (open-output-file) cannot open file - Stale NFS file handle: "...somepath.../.runconfigs.cfg-1.6427-7d1e789cb3f62f9cde719a4865bb51b3c17ea853" - ticket 220546342
	  ;; TODO - consider 1) using simple-lock to bracket cache write
	  ;;                 2) cache in hash on server, since need to do rmt: anyway to lock.
	  (if (and rccachef *runconfigdat* (not (common:file-exists? rccachef)))
	      (common:fail-safe
	       (lambda ()
		 (configf:write-alist *runconfigdat* rccachef))
	       (conc "Could not write cache file - "rccachef))
	      )
	  (if (and mtcachef *configdat*    (not (common:file-exists? mtcachef)))
	      (common:fail-safe
	       (lambda ()
		 (configf:write-alist *configdat* mtcachef))
	       (conc "Could not write cache file - "mtcachef))
	      )
	  (if (and rccachef mtcachef *runconfigdat* *configdat*)
	      (set! *configstatus* 'fulldata)))

	;; if have -append-config then read and append here
	(let ((cfname (args:get-arg "-append-config")))
	  (if (and cfname
		   (file-read-access? cfname))
	      (read-config cfname *configdat* #t))) ;; values are added to the hash, no need to do anything special.
	*toppath*)))

;; return paths depending on what info is available.
;;
(define (launch:get-cache-file-paths areapath toppath target mtconfig)
  (let* ((use-cache (common:use-cache?))
         (runname  (common:args-get-runname))
         (linktree (common:get-linktree))
         (testname (common:get-full-test-name))
         (rundir   (if (and runname target linktree)
                       (common:directory-writable? (conc linktree "/" target "/" runname))
                       #f))
         (testdir  (if (and rundir testname)
                       (common:directory-writable? (conc rundir "/" testname))
                       #f))
         (cachedir (or testdir rundir))
         (mtcachef (and cachedir (conc cachedir "/" ".megatest.cfg-"  megatest-version "-" megatest-fossil-hash)))
         (rccachef (and cachedir (conc cachedir "/" ".runconfigs.cfg-"  megatest-version "-" megatest-fossil-hash))))
    (debug:print-info 6 *default-log-port* 
                      "runname=" runname 
                      "\n  linktree=" linktree
                      "\n  testname=" testname
                      "\n  rundir=" rundir 
                      "\n  testdir=" testdir 
                      "\n  cachedir=" cachedir
                      "\n  mtcachef=" mtcachef
                      "\n  rccachef=" rccachef)
    (cons mtcachef rccachef)))


;;======================================================================the end

)