Megatest

servermod.scm at [9f479c2454]
Login

File servermod.scm artifact ed8ceb5dcd part of check-in 9f479c2454


;; Copyright 2006-2023, Matthew Welland.
;; 
;; This file is part of Megatest.
;; 
;;     Megatest is free software: you can redistribute it and/or modify
;;     it under the terms of the GNU General Public License as published by
;;     the Free Software Foundation, either version 3 of the License, or
;;     (at your option) any later version.
;; 
;;     Megatest is distributed in the hope that it will be useful,
;;     but WITHOUT ANY WARRANTY; without even the implied warranty of
;;     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
;;     GNU General Public License for more details.
;; 
;;     You should have received a copy of the GNU General Public License
;;     along with Megatest.  If not, see <http://www.gnu.org/licenses/>.
;;======================================================================

(declare (unit servermod))
(declare (uses artifacts))

(use md5 message-digest posix typed-records extras)

(module servermod
*

(import scheme
	chicken

	extras
	md5
	message-digest
	ports
	posix
	srfi-18

	typed-records
	data-structures

	artifacts
	)

(defstruct srv
  (areapath #f)
  (host     #f)
  (pid      #f)
  (type     #f)
  (sdir     #f) ;; .server directory
  (hdir     #f) ;; .server/host.pid directory
  (incoming #f)
  (dbstruct #f)
  (handler  #f)
  (obj-to-str #f)
  (str-to-obj #f)
  )

;; nearly every process in Megatest (if write access) starts a server so it
;; can receive messages to exit on request
;; servers have a type, mtserve, dboard, runner, execute? TOO COMPLICATED.

;; one server per run db file would be ideal.

;; mtrah/.servers/<host>.<pid>/incoming/*.artifact
;;                            |        `attic
;;                            |
;;                            `outgoing/<clienthost>.<clientpid>/*.artifact
;;                            |                                 `attic
;;                            `<tcp|http|nmsg|?>.host:port

;; on exit processes clean up. only mtserv or dboard clean up abandoned records?

;; IDEA: All requests could go into one directory instead of server specific directory - need locking
;;       don't get multiple processing of arfs

;; server:setup          - setup the directory
;; server:launch         - start a new mtserve process, possibly
;;                         using a launcher
;; server:run            - run the long running thread that monitors
;;                         the .server area
;; server:exit           - shutdown the server and exit
;; server:handle-request - take incoming request, process it, send response
;;                         back via best or fastest available transport

;; call this with handler that takes dbstruct cmd and params after doing server:setup
;; and before starting server:run
;;
(define (server:set-handler srvdat handler)
  (srv-handler-set! srvdat handler))

;; set up the server area and return a server struct
;; NOTE: This will need to be gated by write-access
;;
(define (server:setup areapath)
  (let* ((srvdat (make-srv
		  areapath: areapath
		  host:     (get-host-name) ;; likely need to replace with ip address
		  pid:      (current-process-id)
		  sdir:     (conc areapath"/.server") ;; put server artifacts here
		  ))
	 (hdir   (conc (srv-sdir srvdat)"/"(get-host.pid srvdat))))
    (srv-hdir-set! srvdat hdir)
    (srv-incoming-set! srvdat (conc hdir"/incoming"))
    (create-directory hdir #t)
    (for-each (lambda (d)
		(create-directory (conc hdir"/"d)))
	      '("incoming" "responses"))
    srvdat))

(define *server-keep-running* #f)

;; to cleanly shut the server down set *server-keep-running* to #f
;;
(define (server:run srvdat)
  ;; create server arf
  ;; put arf in srvdat-dir
  ;; forever
  ;;    scan incoming dir
  ;;    foreach arf
  ;;       bundle into with-transaction, no-transaction
  ;;    foreach bundle
  ;;       process the request
  ;;       create results arf and write it to clients dir
  ;;       remove in-arf from incoming
  (let* ((areapath  (srv-areapath srvdat))
	 (sdir      (srv-sdir srvdat))
	 (hdir      (srv-hdir srvdat))
	 (myarf     `((h . ,(srv-host srvdat))
		      (i . ,(srv-pid  srvdat))
		      (d . ,hdir)))
	 (myuuid    (write-alist->artifact sdir myarf ptype: 'S))
	 (arf-fname (get-artifact-fname sdir myuuid))
	 (dbstruct  (srv-dbstruct srvdat)))
    (set! *server-keep-running* #t)
    (let loop ((last-access (current-seconds)))
      (let* ((start (current-milliseconds))
	     (res   (server:process-incoming srvdat))
	     (delta (- (current-milliseconds) start))
	     (timed-out (> (- (current-seconds) last-access)
			   60))) ;; accessed in last 60 seconds
	(if timed-out
	    (begin
	      (print "INFO: server has not been accessed in 60 seconds, exiting shortly.")
	      (set! *server-keep-running* #f))
	    (thread-sleep! (if (> delta 500)
			       0.1
			       0.9)))
	(if (or (> res 0) ;; res is the number of requests that were found and processed
		*server-keep-running*)
	    (loop (if (> res 0)
		      (current-seconds)
		      last-access)
		  ))))
    (delete-file arf-fname)
    ))

;; read arfs from incoming, process them and put result arfs in proper dirs
;; return number requests found and processed
;;
(define	(server:process-incoming srvdat)
  (let* ((sdir   (srv-sdir srvdat))
	 (hdir   (srv-hdir srvdat))
	 (indir  (srv-incoming srvdat))
	 (arfs   (glob (conc indir"/*.artifacts")))
	 (handler (srv-handler srvdat))
	 (obj->string (srv-obj-to-str srvdat))
	 (dbstruct (srv-dbstruct srvdat)))
    (let loop ((rem arfs))
      (if (not (null? arfs))
	  (let* ((arf  (car rem))
		 (dat  (read-artifact->alist arf))
		 (ruuid (alist-ref 'Z dat))
		 (host (alist-ref 'h dat))
		 (pid  (alist-ref 'i dat))
		 (dest (conc sdir"/"host"."pid"/responses")) ;; the calling host area
		 (cmd  (alist-ref 'c dat))
		 (params (alist-ref 'p dat))
		 (res  (handler dbstruct cmd params))
		 (narf `((r . ,(obj->string res))
			 (P . ,ruuid))))
	    (delete-file arf) ;; add ability to save in bundles in archive area
	    (write-alist->artifact dest narf ptype: 'Q)
	    (loop (cdr rem)))))
    (length arfs)))
	  
;; start a server process (NOT start server in this process)
;;
;; maybe check load before calling this?
(define (server:launch areapath)
  (let* ((logd (conc areapath"/logs"))
	 (logf (conc logd"/from-"(get-host.pid #f)".log")))
    (if (not (file-exists? logd))(create-directory logd #t))
    (setenv "NBFAKE_LOG" logf)
    (system (conc "nbfake mtserve -start-dir "areapath))))



;;======================================================================
;; OLD SERVER STUFF BELOW HERE
;;======================================================================

;; ;; servers start by setting up fs transport
;; ;; and put a flag file for that ASAP.
;; ;; they then set up tcp and put a flag file for
;; ;; that 
;; ;;
;; (define *client-server-id* #f)
;; 
;; ;; oldest server alive determines host then choose random of youngest
;; ;; five servers on that host
;; ;;
;; ;; mode:
;; ;;   best - get best server (random of newest five)
;; ;;   home - get home host based on oldest server
;; ;;   info - print info
;; (define (server:choose-server areapath #!optional (mode 'best))
;;   ;; age is current-starttime
;;   ;; find oldest alive
;;   ;;   1. sort by age ascending and ping until good
;;   ;; find alive rand from youngest
;;   ;;   1. sort by age descending
;;   ;;   2. take five
;;   ;;   3. check alive, discard if not and repeat
;;   ;; first we clean up old server files
;;   (server:clean-up-old areapath)
;; ;;  (let* ((since-last (- (current-seconds) server-last-start))
;; ;;         (server-start-delay 10))     
;; ;;    (if ( < (- (current-seconds) server-last-start) 10 )
;; ;;	(begin
;; ;;          (debug:print 2 *default-log-port* "server:choose-server: seconds since last server start: " (- (current-seconds) server-last-start))
;; ;;          (debug:print 2 *default-log-port* "server:choose-server: last server start less than " server-start-delay " seconds ago. Sleeping " server-start-delay " seconds")
;; ;;          (thread-sleep! server-start-delay)
;; ;;	  )
;; ;;	(debug:print 2 *default-log-port* "server:choose-server: seconds since last server start: " (- (current-seconds) server-last-start))
;; ;;	)
;;   (let* ((serversdat  (server:get-servers-info areapath))
;; 	 (servkeys    (hash-table-keys serversdat))
;; 	 (by-time-asc (if (not (null? servkeys)) ;; NOTE: Oldest is last
;; 			  (sort servkeys ;; list of "host:port"
;; 				(lambda (a b)
;; 				  (>= (list-ref (hash-table-ref serversdat a) 2)
;; 				      (list-ref (hash-table-ref serversdat b) 2))))
;; 			  '())))
;;     (debug:print 2 *default-log-port* "server:choose-server: serversdat: " serversdat)
;;     (debug:print 2 *default-log-port* "server:choose-server: servkeys: " servkeys)
;;     (if (not (null? by-time-asc))
;; 	(let* ((oldest     (last by-time-asc))
;; 	       (oldest-dat (hash-table-ref serversdat oldest))
;; 	       (host       (list-ref oldest-dat 0))
;; 	       (all-valid  (filter (lambda (x)
;; 				     (equal? host (list-ref (hash-table-ref serversdat x) 0)))
;; 				   by-time-asc))
;; 	       (best-ten  (lambda ()
;; 			    (if (> (length all-valid) 11)
;; 				(take (drop-right all-valid 1) 10) ;; remove the oldest from consideration so it can age out
;; 				(if (> (length all-valid) 8)
;; 				    (drop-right all-valid 1)
;; 				    all-valid))))
;; 	       (names->dats (lambda (names)
;; 			      (map (lambda (x)
;; 				     (hash-table-ref serversdat x))
;; 				   names)))
;; 	       (am-home?    (lambda ()
;; 			      (let* ((currhost (get-host-name))
;; 				     (bestadrs (server:get-best-guess-address currhost)))
;; 				(or (equal? host currhost)
;; 				    (equal? host bestadrs))))))
;; 	  (case mode
;; 	    ((info)
;; 	     (print "oldest: "oldest-dat", selected host: "host", all-valid: "all-valid)
;; 	     (print "youngest: "(hash-table-ref serversdat (car all-valid))))
;; 	    ((home)     host)
;; 	    ((homehost) (cons host (am-home?))) ;; shut up old code
;; 	    ((home?)    (am-home?))
;; 	    ((best-ten)(names->dats (best-ten)))
;; 	    ((all-valid)(names->dats all-valid))
;; 	    ((best)     (let* ((best-ten (best-ten))
;; 			       (len       (length best-ten)))
;; 			  (hash-table-ref serversdat (list-ref best-ten (random len)))))
;; 	    ((count)(length all-valid))
;; 	    (else
;; 	     (debug:print 0 *default-log-port* "ERROR: invalid command "mode)
;; 	     #f)))
;; 	(begin
;; 	  (server:run areapath)
;;           (set! server-last-start (current-seconds))
;; 	  ;; (thread-sleep! 3)
;; 	  (case mode
;; 	    ((homehost) (cons #f #f))
;; 	    (else	#f))))))
 
;;======================================================================
;; S E R V E R   U T I L I T I E S 
;;======================================================================

(define (server:get-servinfo-dir areapath)
  (let* ((spath (conc areapath"/.servinfo")))
    (if (not (file-exists? spath))
	(create-directory spath #t))
    spath))

;; ;; Generate a unique signature for this server
;; (define (mk-signature)
;;   (message-digest-string (md5-primitive) 
;; 			 (with-output-to-string
;; 			   (lambda ()
;; 			     (write (list (current-directory)
;;                                           (current-process-id)
;; 					  (argv)))))))
;; 
;; (define (server:clean-up-old areapath)
;;   ;; any server file that has not been touched in ten minutes is effectively dead
;;   (let* ((sfiles (glob (conc (server:get-servinfo-dir areapath)"/*"))))
;;     (for-each
;;      (lambda (sfile)
;;        (let* ((modtime (handle-exceptions
;; 			   exn
;; 			 (begin
;; 			   (debug:print 0 *default-log-port* "WARNING: failed to get modification file for "sfile)
;; 			   (current-seconds))
;; 			 (file-modification-time sfile))))
;; 	 (if (and (number? modtime)
;; 		  (> (- (current-seconds) modtime)
;; 		     600))
;; 	     (begin
;; 	       (debug:print 0 *default-log-port* "WARNING: found old server info file "sfile", removing it.")
;; 	       (handle-exceptions
;; 		   exn
;; 		 (debug:print 0 *default-log-port* "WARNING: failed to delete old server info file "sfile)
;; 		 (delete-file sfile))))))
;;      sfiles)))
;; 
;; (define (get-client-server-id)
;;   (if *client-server-id* *client-server-id*
;;       (let ((sig (mk-signature))) ;; clients re-use the server:mk-signature logic
;;         (set! *client-server-id* sig)
;;         *client-server-id*)))

;; if srvdat is #f calculate host.pid
(define (get-host.pid srvdat)
  (if srvdat
      (conc (srv-host srvdat)"."(srv-pid srvdat))
      (conc (get-host-name)"."(current-process-id))))

;; ;; ;; When using zmq this would send the message back (two step process)
;; ;; ;; with spiffy or rpc this simply returns the return data to be returned
;; ;; ;; 
;; ;; (define (server:reply return-addr query-sig success/fail result)
;; ;;   (debug:print-info 11 *default-log-port* "server:reply return-addr=" return-addr ", result=" result)
;; ;;   ;; (send-message pubsock target send-more: #t)
;; ;;   ;; (send-message pubsock 
;; ;;   (case (server:get-transport)
;; ;;     ((rpc)  (db:obj->string (vector success/fail query-sig result)))
;; ;;     ((http) (db:obj->string (vector success/fail query-sig result)))
;; ;;     ((fs)   result)
;; ;;     (else 
;; ;;      (debug:print-error 0 *default-log-port* "unrecognised transport type: " *transport-type*)
;; ;;      result)))
;; ;; 
;; ;; ;; Given an area path,  start a server process    ### NOTE ### > file 2>&1 
;; ;; ;; if the target-host is set 
;; ;; ;; try running on that host
;; ;; ;;   incidental: rotate logs in logs/ dir.
;; ;; ;;
;; ;; (define  (server:run areapath) ;; areapath is *toppath* for a given testsuite area
;; ;;   (let* ((testsuite   (common:get-testsuite-name))
;; ;; 	 (logfile     (conc areapath "/logs/server.log")) ;; -" curr-pid "-" target-host ".log"))
;; ;; 	 (profile-mode (or (configf:lookup *configdat* "misc" "profilesw")
;; ;; 			   ""))
;; ;; 	 (cmdln (conc (common:get-megatest-exe)
;; ;; 		      " -server - ";; (or target-host "-")
;; ;; 		      (if (equal? (configf:lookup *configdat* "server" "daemonize") "yes")
;; ;; 			  " -daemonize "
;; ;; 			  "")
;; ;; 		      ;; " -log " logfile
;; ;; 		      " -m testsuite:" testsuite
;; ;; 		      " " profile-mode
;; ;; 		      )) ;; (conc " >> " logfile " 2>&1 &")))))
;; ;; 	 (log-rotate  (make-thread common:rotate-logs  "server run, rotate logs thread")) ;; why are we rotating logs here? This is a sensitive location with a lot going on!?
;; ;;          (load-limit  (configf:lookup-number *configdat* "jobtools" "max-server-start-load" default: 3.0)))
;; ;;     ;; we want the remote server to start in *toppath* so push there
;; ;;     (push-directory areapath)
;; ;;     (debug:print 0 *default-log-port* "INFO: Trying to start server (" cmdln ") ...")
;; ;;     (thread-start! log-rotate)
;; ;;     
;; ;;     ;; host.domain.tld match host?
;; ;;     ;; (if (and target-host 
;; ;;     ;; 	     ;; look at target host, is it host.domain.tld or ip address and does it 
;; ;;     ;; 	     ;; match current ip or hostname
;; ;;     ;; 	     (not (string-match (conc "("curr-host "|" curr-host"\\..*)") target-host))
;; ;;     ;; 	     (not (equal? curr-ip target-host)))
;; ;;     ;; 	(begin
;; ;;     ;; 	  (debug:print-info 0 *default-log-port* "Starting server on " target-host ", logfile is " logfile)
;; ;;     ;; 	  (setenv "TARGETHOST" target-host)))
;; ;;     ;;   
;; ;;     (setenv "TARGETHOST_LOGF" logfile)
;; ;;     (thread-sleep! (/ (random 3000) 1000)) ;; add a random initial delay. It seems pretty common that many running tests request a server at the same time
;; ;;     (debug:print 0 *default-log-port* "INFO: starting server at " (common:human-time))
;; ;;     (system (conc "nbfake " cmdln))
;; ;;     (unsetenv "TARGETHOST_LOGF")
;; ;;     ;; (if (get-environment-variable "TARGETHOST")(unsetenv "TARGETHOST"))
;; ;;     (thread-join! log-rotate)
;; ;;     (pop-directory)))
;; ;; 
;; ;; ;; given a path to a server log return: host port startseconds server-id
;; ;; ;; any changes to number of elements returned by this fuction will dirctly affect server:record->url,server:record->id,server:kill,server:get-num-alive which use match let 
;; ;; ;; example of what it's looking for in the log file:
;; ;; ;;     SERVER STARTED: 10.38.175.67:50216 AT 1616502350.0 server-id: 4907e90fc55c7a09694e3f658c639cf4 
;; ;; 
;; ;; (define (server:logf-get-start-info logf)
;; ;;   (let ((server-rx    (regexp "^SERVER STARTED: (\\S+):(\\d+) AT ([\\d\\.]+) server-id: (\\S+) pid: (\\d+)")) ;; SERVER STARTED: host:port AT timesecs server id
;; ;;         (dbprep-rx    (regexp "^SERVER: dbprep"))
;; ;;         (dbprep-found 0)
;; ;; 	(bad-dat      (list #f #f #f #f #f)))
;; ;;     (handle-exceptions
;; ;;      exn
;; ;;      (begin
;; ;;        ;; WARNING: this is potentially dangerous to blanket ignore the errors
;; ;;        (if (file-exists? logf)
;; ;; 	   (debug:print-info 2 *default-log-port* "Unable to get server info from "logf", exn=" exn))
;; ;;        bad-dat) ;; no idea what went wrong, call it a bad server
;; ;;      (with-input-from-file
;; ;; 	 logf
;; ;;        (lambda ()
;; ;; 	 (let loop ((inl  (read-line))
;; ;; 		    (lnum 0))
;; ;; 	   (if (not (eof-object? inl))
;; ;; 	       (let ((mlst (string-match server-rx inl))
;; ;; 		     (dbprep (string-match dbprep-rx inl)))
;; ;; 		 (if dbprep (set! dbprep-found 1))
;; ;; 		 (if (not mlst)
;; ;; 		     (if (< lnum 500) ;; give up if more than 500 lines of server log read
;; ;; 			 (loop (read-line)(+ lnum 1))
;; ;; 			 (begin 
;; ;;                            (debug:print-info 0 *default-log-port* "Unable to get server info from first 500 lines of " logf )
;; ;;                            bad-dat))
;; ;; 		     (match mlst
;; ;; 			    ((_ host port start server-id pid)
;; ;; 			     (list host
;; ;; 				   (string->number port)
;; ;; 				   (string->number start)
;; ;; 				   server-id
;; ;; 				   (string->number pid)))
;; ;; 			    (else
;; ;; 			     (debug:print 0 *current-log-port* "ERROR: did not recognise SERVER line info "mlst)
;; ;; 			     bad-dat))))
;; ;; 	       (begin 
;; ;; 		 (if dbprep-found
;; ;; 		     (begin
;; ;; 		       (debug:print-info 2 *default-log-port* "Server is in dbprep at " (common:human-time))
;; ;; 		       (thread-sleep! 0.5)) ;; was 25 sec but that blocked things from starting?
;; ;; 		     (debug:print-info 0 *default-log-port* "Unable to get server info from " logf " at " (seconds->time-string (current-seconds))))
;; ;; 		 bad-dat))))))))
;; ;; 
;; ;; ;; ;; get a list of servers from the log files, with all relevant data
;; ;; ;; ;; ( mod-time host port start-time pid )
;; ;; ;; ;;
;; ;; ;; (define (server:get-list areapath #!key (limit #f))
;; ;; ;;   (let ((fname-rx    (regexp "^(|.*/)server-(\\d+)-(\\S+).log$"))
;; ;; ;; 	(day-seconds (* 24 60 60)))
;; ;; ;;     ;; if the directory exists continue to get the list
;; ;; ;;     ;; otherwise attempt to create the logs dir and then
;; ;; ;;     ;; continue
;; ;; ;;     (if (if (directory-exists? (conc areapath "/logs"))
;; ;; ;; 	    '()
;; ;; ;; 	    (if (file-write-access? areapath)
;; ;; ;; 		(begin
;; ;; ;; 		  (condition-case
;; ;; ;; 		   (create-directory (conc areapath "/logs") #t)
;; ;; ;; 		   (exn (i/o file)(debug:print 0 *default-log-port* "ERROR: Cannot create directory at " (conc areapath "/logs")))
;; ;; ;; 		   (exn ()(debug:print 0 *default-log-port* "ERROR: Unknown error attemtping to get server list. exn=" exn)))
;; ;; ;; 		  (directory-exists? (conc areapath "/logs")))
;; ;; ;; 		'()))
;; ;; ;; 
;; ;; ;;         ;; Get the list of server logs.
;; ;; ;; 	(let* (
;; ;; ;;                ;; For some reason, when I uncomment the below line, ext-tests sometimes starts 1000's of servers.
;; ;; ;;                ;; (exiting-servers (system (conc "bash -c 'rm -f `grep -il exiting " areapath "/logs/server-*-*.log 2> /dev/null`'")))
;; ;; ;;                (server-logs   (glob (conc areapath "/logs/server-*-*.log")))
;; ;; ;; 	       (num-serv-logs (length server-logs)))
;; ;; ;; 	  (if (or (null? server-logs) (= num-serv-logs 0))
;; ;; ;;               (let ()
;; ;; ;;                  (debug:print 2  *default-log-port* "There are no servers running at " (common:human-time))
;; ;; ;; 	         '()
;; ;; ;;               )
;; ;; ;; 	      (let loop ((hed  (string-chomp (car server-logs)))
;; ;; ;; 			 (tal  (cdr server-logs))
;; ;; ;; 			 (res '()))
;; ;; ;; 		(let* ((mod-time  (handle-exceptions
;; ;; ;; 				   exn
;; ;; ;; 				   (begin
;; ;; ;; 				     (debug:print 0 *default-log-port* "server:get-list: failed to get modification time on " hed ", exn=" exn)
;; ;; ;; 				     (current-seconds)) ;; 0
;; ;; ;; 				   (file-modification-time hed))) ;; default to *very* old so log gets ignored if deleted
;; ;; ;; 		       (down-time (- (current-seconds) mod-time))
;; ;; ;; 		       (serv-dat  (if (or (< num-serv-logs 10)
;; ;; ;; 				  	  (< down-time 900)) ;; day-seconds))
;; ;; ;; 				      (server:logf-get-start-info hed)
;; ;; ;; 				      '())) ;; don't waste time processing server files not touched in the 15 minutes if there are more than ten servers to look at
;; ;; ;; 		       (serv-rec (cons mod-time serv-dat))
;; ;; ;; 		       (fmatch   (string-match fname-rx hed))
;; ;; ;; 		       (pid      (if fmatch (string->number (list-ref fmatch 2)) #f))
;; ;; ;; 		       (new-res  (if (null? serv-dat)
;; ;; ;; 				     res
;; ;; ;; 				     (cons (append serv-rec (list pid)) res)))) ;; any changes to number of elements in new-res will dirctly affect server:record->url,server:record->id,server:kill,server:get-num-alive which uses match let 
;; ;; ;; 		  (if (null? tal)
;; ;; ;; 		      (if (and limit
;; ;; ;; 			       (> (length new-res) limit))
;; ;; ;; 			  new-res ;; (take new-res limit)  <= need intelligent sorting before this will work
;; ;; ;; 			  new-res)
;; ;; ;; 		      (loop (string-chomp (car tal)) (cdr tal) new-res)))))))))
;; ;; 
;; ;; #;(define (server:get-num-alive srvlst)
;; ;;   (let ((num-alive 0))
;; ;;     (for-each
;; ;;      (lambda (server)
;; ;;         (handle-exceptions
;; ;;           exn
;; ;;          (begin 
;; ;;           (debug:print-info 0 *default-log-port*  "Unable to get server start-time and/or mod-time from " server ", exn=" exn))
;; ;;        (match-let (((mod-time host port start-time server-id pid)
;; ;; 		    server))
;; ;; 	 (let* ((uptime  (- (current-seconds) mod-time))
;; ;; 		(runtime (if start-time
;; ;; 			     (- mod-time start-time)
;; ;; 			     0)))
;; ;; 	   (if (< uptime 5)(set! num-alive (+ num-alive 1)))))))
;; ;;      srvlst)
;; ;;     num-alive))
;; ;; 
;; ;; ;; ;; given a list of servers get a list of valid servers, i.e. at least
;; ;; ;; ;; 10 seconds old, has started and is less than 1 hour old and is
;; ;; ;; ;; active (i.e. mod-time < 10 seconds
;; ;; ;; ;;
;; ;; ;; ;; mod-time host port start-time pid
;; ;; ;; ;;
;; ;; ;; ;; sort by start-time descending. I.e. get the oldest first. Young servers will thus drop off
;; ;; ;; ;; and servers should stick around for about two hours or so.
;; ;; ;; ;;
;; ;; ;; (define (server:get-best srvlst)
;; ;; ;;   (let* ((nums (server:get-num-servers))
;; ;; ;; 	 (now  (current-seconds))
;; ;; ;; 	 (slst (sort
;; ;; ;; 		(filter (lambda (rec)
;; ;; ;; 			  (if (and (list? rec)
;; ;; ;; 				   (> (length rec) 2))
;; ;; ;; 			      (let ((start-time (list-ref rec 3))
;; ;; ;; 				    (mod-time   (list-ref rec 0)))
;; ;; ;; 				;; (print "start-time: " start-time " mod-time: " mod-time)
;; ;; ;; 				(and start-time mod-time
;; ;; ;; 				     (> (- now start-time) 0)    ;; been running at least 0 seconds
;; ;; ;; 				     (< (- now mod-time)   16)   ;; still alive - file touched in last 16 seconds
;; ;; ;; 				     (or (not (configf:lookup *configdat* "server" "runtime")) ;; skip if not set
;; ;; ;; 					 (< (- now start-time)       
;; ;; ;; 					    (+ (- (string->number (configf:lookup *configdat* "server" "runtime"))
;; ;; ;; 					       180)
;; ;; ;; 					    (random 360)))) ;; under one hour running time +/- 180
;; ;; ;; 				     ))
;; ;; ;; 			      #f))
;; ;; ;; 			srvlst)
;; ;; ;; 		(lambda (a b)
;; ;; ;; 		  (< (list-ref a 3)
;; ;; ;; 		     (list-ref b 3))))))
;; ;; ;;     (if (> (length slst) nums)
;; ;; ;; 	(take slst nums)
;; ;; ;; 	slst)))
;; ;; 
;; ;; ;; ;; switch from server:get-list to server:get-servers-info
;; ;; ;; ;;
;; ;; ;; (define (server:get-first-best areapath)
;; ;; ;;   (let ((srvrs (server:get-best (server:get-list areapath))))
;; ;; ;;     (if (and srvrs
;; ;; ;; 	     (not (null? srvrs)))
;; ;; ;; 	(car srvrs)
;; ;; ;; 	#f)))
;; ;; ;; 
;; ;; ;; (define (server:get-rand-best areapath)
;; ;; ;;   (let ((srvrs (server:get-best (server:get-list areapath))))
;; ;; ;;     (if (and (list? srvrs)
;; ;; ;; 	     (not (null? srvrs)))
;; ;; ;; 	(let* ((len (length srvrs))
;; ;; ;; 	       (idx (random len)))
;; ;; ;; 	  (list-ref srvrs idx))
;; ;; ;; 	#f)))
;; ;; 
;; ;; (define (server:record->id servr)
;; ;;   (handle-exceptions
;; ;;    exn
;; ;;    (begin 
;; ;;      (debug:print-info 0 *default-log-port*  "Unable to get server id from " servr ", exn=" exn)     
;; ;;    #f)
;; ;;   (match-let (((host port start-time server-id pid)
;; ;; 	       servr))
;; ;;     (if server-id
;; ;; 	server-id
;; ;; 	#f))))
;; ;; 
;; ;; (define (server:record->url servr)
;; ;;   (handle-exceptions
;; ;;    exn
;; ;;    (begin 
;; ;;      (debug:print-info 0 *default-log-port*  "Unable to get server url from " servr ", exn=" exn)     
;; ;;    #f)
;; ;;   (match-let (((host port start-time server-id pid)
;; ;; 	       servr))
;; ;;     (if (and host port)
;; ;; 	(conc host ":" port)
;; ;; 	#f))))
;; ;; 
;; ;; 
;; ;; ;; if server-start-last exists, and wasn't old enough, wait <idle time> + 1, then call this function recursively until it is old enough.
;; ;; ;; if it is old enough, overwrite it and wait 0.25 seconds.
;; ;; ;; if it then has the wrong server key, wait <idle time> + 1 and call this function recursively.
;; ;; ;;
;; ;; #;(define (server:wait-for-server-start-last-flag areapath)
;; ;;   (let* ((start-flag (conc areapath "/logs/server-start-last"))
;; ;; 	 ;;; THIS INTERACTS WITH [server] timeout. Suggest using 0.1 or above for timeout (6 seconds)
;; ;; 	 (idletime    (configf:lookup-number *configdat* "server" "idletime" default: 4))
;; ;; 	 (server-key (conc (get-host-name) "-" (current-process-id))))
;; ;;     (if (file-exists? start-flag)
;; ;; 	(let* ((fmodtime (file-modification-time start-flag))
;; ;; 	       (delta    (- (current-seconds) fmodtime))
;; ;; 	       (old-enough   (> delta idletime))
;; ;;                (new-server-key ""))
;; ;;           ;; write start-flag file, wait 0.25s, then if previously the start-flag file was older than <idletime> seconds, and the new file still has the same server key as you just wrote, return #t.
;; ;; 	  ;; the intention is to make sure nfs can read the file we just wrote, and make sure it was written by us, and not another process.
;; ;;            (if (and old-enough
;; ;; 		    (begin
;; ;;                       (debug:print-info 2 *default-log-port* "Writing " start-flag)
;; ;; 		      (with-output-to-file start-flag (lambda () (print server-key)))
;; ;; 		      (thread-sleep! 0.25)
;; ;; 		      (set! new-server-key (with-input-from-file start-flag (lambda () (read-line))))
;; ;; 		      (equal? server-key new-server-key)))
;; ;; 	       #t
;; ;;                ;; If either of the above conditions is not true, print a "Gating server start" message, wait <idle-time> + 1, then call this function recursively. 
;; ;; 	       (begin
;; ;; 		 (debug:print-info 0 *default-log-port* "Gating server start, last start: "
;; ;; 				   (seconds->time-string fmodtime) ", time since last start: " delta ", required idletime: " idletime ", gating reason:" (if old-enough "another job started a server" "too soon to start another server"))
;; ;; 		 
;; ;; 		 (thread-sleep! ( + 1 idletime))
;; ;; 		 (server:wait-for-server-start-last-flag areapath)))))))
;; ;; 
;; ;; ;; oldest server alive determines host then choose random of youngest
;; ;; ;; five servers on that host
;; ;; ;;
;; ;; (define (server:get-servers-info areapath)
;; ;;   ;; (assert *toppath* "FATAL: server:get-servers-info called before *toppath* has been set.")
;; ;;   (let* ((servinfodir (server:get-servinfo-dir areapath))) ;; (conc *toppath*"/.servinfo")))
;; ;;     (if (not (file-exists? servinfodir))
;; ;; 	(create-directory servinfodir))
;; ;;     (let* ((allfiles    (glob (conc servinfodir"/*")))
;; ;; 	   (res         (make-hash-table)))
;; ;;       (for-each
;; ;;        (lambda (f)
;; ;; 	 (let* ((hostport  (pathname-strip-directory f))
;; ;; 		(serverdat (server:logf-get-start-info f)))
;; ;; 	   (match serverdat
;; ;; 	     ((host port start server-id pid)
;; ;; 	      (if (and host port start server-id pid)
;; ;; 		  (hash-table-set! res hostport serverdat)
;; ;; 		  (debug:print-info 2 *default-log-port* "bad server info for "f": "serverdat)))
;; ;; 	     (else
;; ;; 	      (debug:print-info 2 *default-log-port* "bad server info for "f": "serverdat)))))
;; ;;        allfiles)
;; ;;       res)))
;; ;; 
;; ;; ;; check the .servinfo directory, are there other servers running on this
;; ;; ;; or another host?
;; ;; ;;
;; ;; ;; returns #t => ok to start another server
;; ;; ;;         #f => not ok to start another server
;; ;; ;;
;; ;; (define (server:minimal-check areapath)
;; ;;   (server:clean-up-old areapath)
;; ;;   (let* ((srvdir      (server:get-servinfo-dir areapath)) ;; (conc areapath"/.servinfo"))
;; ;; 	 (servrs      (glob (conc srvdir"/*")))
;; ;; 	 (thishostip  (server:get-best-guess-address (get-host-name)))
;; ;; 	 (thisservrs  (glob (conc srvdir"/"thishostip":*")))
;; ;; 	 (homehostinf (server:choose-server areapath 'homehost))
;; ;; 	 (havehome    (car homehostinf))
;; ;; 	 (wearehome   (cdr homehostinf)))
;; ;;     (debug:print-info 0 *default-log-port* thishostip", have homehost: "havehome", we are homehost: "wearehome
;; ;; 		      ", numservers: "(length thisservrs))
;; ;;     (cond
;; ;;      ((not havehome) #t) ;; no homehost yet, go for it
;; ;;      ((and havehome wearehome (< (length thisservrs) 20)) #t) ;; we are home and less than 20 servers, ok to start another
;; ;;      ((and havehome (not wearehome)) #f)     ;; we are not the home host
;; ;;      ((and havehome wearehome (>= (length thisservrs) 20)) #f) ;; have enough running
;; ;;      (else
;; ;;       (debug:print 0 *default-log-port* "WARNING: Unrecognised scenario, servrs="servrs", thishostip="thishostip", thisservrs="thisservrs)
;; ;;       #t))))
;; ;; 	 
;; ;; 
;; ;; (define server-last-start 0)
;; ;; 
;; ;; 
;; ;; ;; oldest server alive determines host then choose random of youngest
;; ;; ;; five servers on that host
;; ;; ;;
;; ;; ;; mode:
;; ;; ;;   best - get best server (random of newest five)
;; ;; ;;   home - get home host based on oldest server
;; ;; ;;   info - print info
;; ;; (define (server:choose-server areapath #!optional (mode 'best))
;; ;;   ;; age is current-starttime
;; ;;   ;; find oldest alive
;; ;;   ;;   1. sort by age ascending and ping until good
;; ;;   ;; find alive rand from youngest
;; ;;   ;;   1. sort by age descending
;; ;;   ;;   2. take five
;; ;;   ;;   3. check alive, discard if not and repeat
;; ;;   ;; first we clean up old server files
;; ;;   (server:clean-up-old areapath)
;; ;;   (let* ((since-last (- (current-seconds) server-last-start))
;; ;;         (server-start-delay 10))     
;; ;;     (if ( < (- (current-seconds) server-last-start) 10 )
;; ;;       (begin
;; ;;         (debug:print 2 *default-log-port* "server:choose-server: seconds since last server start: " (- (current-seconds) server-last-start))
;; ;;         (debug:print 2 *default-log-port* "server:choose-server: last server start less than " server-start-delay " seconds ago. Sleeping " server-start-delay " seconds")
;; ;;         (thread-sleep! server-start-delay)
;; ;;       )
;; ;;       (debug:print 2 *default-log-port* "server:choose-server: seconds since last server start: " (- (current-seconds) server-last-start))
;; ;;     )
;; ;;   )
;; ;;   (let* ((serversdat  (server:get-servers-info areapath))
;; ;; 	 (servkeys    (hash-table-keys serversdat))
;; ;; 	 (by-time-asc (if (not (null? servkeys)) ;; NOTE: Oldest is last
;; ;; 			  (sort servkeys ;; list of "host:port"
;; ;; 				(lambda (a b)
;; ;; 				  (>= (list-ref (hash-table-ref serversdat a) 2)
;; ;; 				      (list-ref (hash-table-ref serversdat b) 2))))
;; ;; 			  '())))
;; ;;     (debug:print 2 *default-log-port* "server:choose-server: serversdat: " serversdat)
;; ;;     (debug:print 2 *default-log-port* "server:choose-server: servkeys: " servkeys)
;; ;;     (if (not (null? by-time-asc))
;; ;; 	(let* ((oldest     (last by-time-asc))
;; ;; 	       (oldest-dat (hash-table-ref serversdat oldest))
;; ;; 	       (host       (list-ref oldest-dat 0))
;; ;; 	       (all-valid  (filter (lambda (x)
;; ;; 				     (equal? host (list-ref (hash-table-ref serversdat x) 0)))
;; ;; 				   by-time-asc))
;; ;; 	       (best-ten  (lambda ()
;; ;; 			     (if (> (length all-valid) 11)
;; ;; 				 (take (drop-right all-valid 1) 10) ;; remove the oldest from consideration so it can age out
;; ;; 				 (if (> (length all-valid) 8)
;; ;; 				     (drop-right all-valid 1)
;; ;; 				     all-valid))))
;; ;; 	       (names->dats (lambda (names)
;; ;; 			      (map (lambda (x)
;; ;; 				     (hash-table-ref serversdat x))
;; ;; 				   names)))
;; ;; 	       (am-home?    (lambda ()
;; ;; 			      (let* ((currhost (get-host-name))
;; ;; 				     (bestadrs (server:get-best-guess-address currhost)))
;; ;; 				(or (equal? host currhost)
;; ;; 				    (equal? host bestadrs))))))
;; ;; 	  (case mode
;; ;; 	    ((info)
;; ;; 	     (print "oldest: "oldest-dat", selected host: "host", all-valid: "all-valid)
;; ;; 	     (print "youngest: "(hash-table-ref serversdat (car all-valid))))
;; ;; 	    ((home)     host)
;; ;; 	    ((homehost) (cons host (am-home?))) ;; shut up old code
;; ;; 	    ((home?)    (am-home?))
;; ;; 	    ((best-ten)(names->dats (best-ten)))
;; ;; 	    ((all-valid)(names->dats all-valid))
;; ;; 	    ((best)     (let* ((best-ten (best-ten))
;; ;; 			       (len       (length best-ten)))
;; ;; 			  (hash-table-ref serversdat (list-ref best-ten (random len)))))
;; ;; 	    ((count)(length all-valid))
;; ;; 	    (else
;; ;; 	     (debug:print 0 *default-log-port* "ERROR: invalid command "mode)
;; ;; 	     #f)))
;; ;; 	(begin
;; ;; 	  (server:run areapath)
;; ;;           (set! server-last-start (current-seconds))
;; ;; 	  ;; (thread-sleep! 3)
;; ;; 	  (case mode
;; ;; 	    ((homehost) (cons #f #f))
;; ;; 	    (else	#f))))))
;; ;; 
;; ;; (define (server:get-servinfo-dir areapath)
;; ;;   (let* ((spath (conc areapath"/.servinfo")))
;; ;;     (if (not (file-exists? spath))
;; ;; 	(create-directory spath #t))
;; ;;     spath))
;; ;; 
;; ;; (define (server:clean-up-old areapath)
;; ;;   ;; any server file that has not been touched in ten minutes is effectively dead
;; ;;   (let* ((sfiles (glob (conc (server:get-servinfo-dir areapath)"/*"))))
;; ;;     (for-each
;; ;;      (lambda (sfile)
;; ;;        (let* ((modtime (handle-exceptions
;; ;; 			   exn
;; ;; 			 (begin
;; ;; 			   (debug:print 0 *default-log-port* "WARNING: failed to get modification file for "sfile)
;; ;; 			   (current-seconds))
;; ;; 			 (file-modification-time sfile))))
;; ;; 	 (if (and (number? modtime)
;; ;; 		  (> (- (current-seconds) modtime)
;; ;; 		     600))
;; ;; 	     (begin
;; ;; 	       (debug:print 0 *default-log-port* "WARNING: found old server info file "sfile", removing it.")
;; ;; 	       (handle-exceptions
;; ;; 		   exn
;; ;; 		 (debug:print 0 *default-log-port* "WARNING: failed to delete old server info file "sfile)
;; ;; 		 (delete-file sfile))))))
;; ;;      sfiles)))
;; ;; 
;; ;; ;; would like to eventually get rid of this
;; ;; ;;
;; ;; (define (common:on-homehost?)
;; ;;   (server:choose-server *toppath* 'home?))
;; ;; 
;; ;; ;; kind start up of server, wait before allowing another server for a given
;; ;; ;; area to be launched
;; ;; ;;
;; ;; (define (server:kind-run areapath)
;; ;;   ;; look for $MT_RUN_AREA_HOME/logs/server-start-last
;; ;;   ;; and wait for it to be at least <server idletime> seconds old
;; ;;   ;; (server:wait-for-server-start-last-flag areapath)
;; ;;   (let loop ()
;; ;;     (if (> (alist-ref 'adj-proc-load (common:get-normalized-cpu-load #f)) 2)
;; ;; 	(begin
;; ;; 	  (if (common:low-noise-print 30 "our-host-load")
;; ;; 	      (debug:print 0 *default-log-port* "WARNING: system load is high, waiting to start server."))
;; ;; 	  (loop))))
;; ;;   (if (< (server:choose-server areapath 'count) 20)
;; ;;       (server:run areapath))
;; ;;   #;(if (not (server:check-if-running areapath)) ;; why try if there is already a server running?
;; ;;       (let* ((lock-file    (conc areapath "/logs/server-start.lock")))
;; ;; 	(let* ((start-flag (conc areapath "/logs/server-start-last")))
;; ;; 	  (common:simple-file-lock-and-wait lock-file expire-time: 25)
;; ;; 	  (debug:print-info  2 *default-log-port* "server:kind-run: touching " start-flag)
;; ;; 	  (system (conc "touch " start-flag)) ;; lazy but safe
;; ;; 	  (server:run areapath)
;; ;; 	  (thread-sleep! 20) ;; don't release the lock for at least a few seconds. And allow time for the server startup to get to "SERVER STARTED".
;; ;; 	  (common:simple-file-release-lock lock-file)))
;; ;;       (debug:print-info 0 *default-log-port* "Found server already running. NOT trying to start another.")))
;; ;; 
;; ;; ;; this one seems to be the general entry point
;; ;; ;;
;; ;; (define (server:start-and-wait areapath #!key (timeout 60))
;; ;;   (let ((give-up-time (+ (current-seconds) timeout)))
;; ;;     (let loop ((server-info (server:check-if-running areapath))
;; ;; 	       (try-num    0))
;; ;;       (if (or server-info
;; ;; 	      (> (current-seconds) give-up-time)) ;; server-url will be #f if no server available.
;; ;; 	  (server:record->url server-info)
;; ;; 	  (let* ( (servers (server:choose-server areapath 'all-valid))
;; ;;                 (num-ok (if servers (length (server:choose-server areapath 'all-valid)) 0)))
;; ;; 	    (if (and (> try-num 0)  ;; first time through simply wait a little while then try again
;; ;; 		     (< num-ok 1))  ;; if there are no decent candidates for servers then try starting a new one
;; ;; 		(server:run areapath))
;; ;; 	    (thread-sleep! 5)
;; ;; 	    (loop (server:check-if-running areapath)
;; ;; 		  (+ try-num 1)))))))
;; ;; 
;; ;; (define (server:get-num-servers #!key (numservers 2))
;; ;;   (let ((ns (string->number
;; ;; 	     (or (configf:lookup *configdat* "server" "numservers") "notanumber"))))
;; ;;     (or ns numservers)))
;; ;; 
;; ;; ;; no longer care if multiple servers are started by accident. older servers will drop off in time.
;; ;; ;;
;; ;; (define (server:check-if-running areapath) ;;  #!key (numservers "2"))
;; ;;   (let* ((ns            (server:get-num-servers)) ;; get the setting the for maximum number of servers allowed
;; ;; 	 (servers       (server:choose-server areapath 'best-ten))) ;; (server:get-best (server:get-list areapath))))
;; ;;     (if (or (and servers
;; ;; 		 (null? servers))
;; ;; 	    (not servers))
;; ;; 	    ;; (and (list? servers)
;; ;; 	    ;;	 (< (length servers) (+ 1 (random ns))))) ;; somewhere between 1 and numservers
;; ;;         #f
;; ;;         (let loop ((hed (car servers))
;; ;;                    (tal (cdr servers)))
;; ;;           (let ((res (server:check-server hed)))
;; ;;             (if res
;; ;;                 hed
;; ;;                 (if (null? tal)
;; ;;                     #f
;; ;;                     (loop (car tal)(cdr tal)))))))))
;; ;; 
;; ;; ;; ping the given server
;; ;; ;;
;; ;; (define (server:check-server server-record)
;; ;;   (let* ((server-url (server:record->url server-record))
;; ;;          (server-id  (server:record->id server-record)) 
;; ;;          (res        (server:ping server-url server-id)))
;; ;;     (if res
;; ;;         server-url
;; ;; 	#f)))
;; ;; 
;; ;; (define (server:kill servr)
;; ;;   (handle-exceptions
;; ;;     exn
;; ;;     (begin 
;; ;;       (debug:print-info 0 *default-log-port*  "Unable to get host and/or port from " servr ", exn=" exn)     
;; ;;     #f)
;; ;;   (match-let (((mod-time hostname port start-time server-id pid)
;; ;; 	       servr))
;; ;;     (tasks:kill-server hostname pid))))
;; ;; 
;; ;; ;; called in megatest.scm, host-port is string hostname:port
;; ;; ;;
;; ;; ;; NOTE: This is NOT called directly from clients as not all transports support a client running
;; ;; ;;       in the same process as the server.
;; ;; ;;
;; ;; (define (server:ping host:port server-id #!key (do-exit #f))
;; ;;   (let* ((host-port (cond
;; ;; 		     ((string? host:port)
;; ;; 		      (let ((slst (string-split   host:port ":")))
;; ;; 			(if (eq? (length slst) 2)
;; ;; 			    (list (car slst)(string->number (cadr slst)))
;; ;; 			    #f)))
;; ;; 		     (else
;; ;; 		      #f))))
;; ;;     (cond
;; ;;      ((and (list? host-port)
;; ;; 	   (eq? (length host-port) 2))
;; ;;       (let* ((myrunremote (make-remote))
;; ;; 	     (iface       (car host-port))
;; ;; 	     (port        (cadr host-port))
;; ;; 	     (server-dat  (client:connect iface port server-id myrunremote))
;; ;; 	     (login-res   (rmt:login-no-auto-client-setup myrunremote)))
;; ;; 	(if (and (list? login-res)
;; ;; 		 (car login-res))
;; ;; 	    (begin
;; ;; 	      ;; (print "LOGIN_OK")
;; ;; 	      (if do-exit (exit 0))
;; ;; 	      #t)
;; ;; 	    (begin
;; ;; 	      ;; (print "LOGIN_FAILED")
;; ;; 	      (if do-exit (exit 1))
;; ;; 	      #f))))
;; ;;      (else 
;; ;;       (if host:port
;; ;; 	  (debug:print 0 *default-log-port*  "ERROR: bad host:port "host:port))
;; ;;       (if do-exit
;; ;; 	  (exit 1)
;; ;; 	  #f)))))
;; ;; 
;; ;; ;; run ping in separate process, safest way in some cases
;; ;; ;;
;; ;; (define (server:ping-server ifaceport)
;; ;;   (with-input-from-pipe 
;; ;;    (conc (common:get-megatest-exe) " -ping " ifaceport)
;; ;;    (lambda ()
;; ;;      (let loop ((inl (read-line))
;; ;; 		(res "NOREPLY"))
;; ;;        (if (eof-object? inl)
;; ;; 	   (case (string->symbol res)
;; ;; 	     ((NOREPLY)  #f)
;; ;; 	     ((LOGIN_OK) #t)
;; ;; 	     (else       #f))
;; ;; 	   (loop (read-line) inl))))))
;; ;; 
;; ;; ;; NOT USED (well, ok, reference in rpc-transport but otherwise not used).
;; ;; ;;
;; ;; (define (server:login toppath)
;; ;;   (lambda (toppath)
;; ;;     (set! *db-last-access* (current-seconds)) ;; might not be needed.
;; ;;     (if (equal? *toppath* toppath)
;; ;; 	#t
;; ;; 	#f)))
;; ;; 
;; ;; ;; timeout is hms string: 1h 5m 3s, default is 1 minute
;; ;; ;; This is currently broken. Just use the number of hours with no unit.
;; ;; ;; Default is 60 seconds.
;; ;; ;;
;; ;; (define (server:expiration-timeout)
;; ;;   (let ((tmo (configf:lookup *configdat* "server" "timeout")))
;; ;;     (if (and (string? tmo)
;; ;; 	     (common:hms-string->seconds tmo)) ;; BUG: hms-string->seconds is broken, if given "10" returns 0. Also, it doesn't belong in this logic unless the string->number is changed below
;; ;;         (* 3600 (string->number tmo))
;; ;; 	600)))
;; ;; 
;; ;; (define (server:get-best-guess-address hostname)
;; ;;   (let ((res #f))
;; ;;     (for-each 
;; ;;      (lambda (adr)
;; ;;        (if (not (eq? (u8vector-ref adr 0) 127))
;; ;; 	   (set! res adr)))
;; ;;      ;; NOTE: This can fail when there is no mention of the host in /etc/hosts. FIXME
;; ;;      (vector->list (hostinfo-addresses (hostname->hostinfo hostname))))
;; ;;     (string-intersperse 
;; ;;      (map number->string
;; ;; 	  (u8vector->list
;; ;; 	   (if res res (hostname->ip hostname)))) ".")))
;; ;; 
;; ;; ;; (define server:sync-lock-token "SERVER_SYNC_LOCK")
;; ;; ;; (define (server:release-sync-lock)
;; ;; ;;   (db:no-sync-del! *no-sync-db* server:sync-lock-token))
;; ;; ;; (define (server:have-sync-lock?)
;; ;; ;;   (let* ((have-lock-pair (db:no-sync-get-lock *no-sync-db* server:sync-lock-token))
;; ;; ;;          (have-lock?     (car have-lock-pair))
;; ;; ;;          (lock-time      (cdr have-lock-pair))
;; ;; ;;          (lock-age       (- (current-seconds) lock-time)))
;; ;; ;;     (cond
;; ;; ;;      (have-lock? #t)
;; ;; ;;      ((>lock-age
;; ;; ;;        (* 3 (configf:lookup-number *configdat* "server" "minimum-intersync-delay" default: 180)))
;; ;; ;;       (server:release-sync-lock)
;; ;; ;;       (server:have-sync-lock?))
;; ;; ;;      (else #f))))
;; ;; 
;; ;; ;; moving this here as it needs access to db and cannot be in common.
;; ;; ;;
;; ;; 
;; ;; (define (server:get-bruteforce-syncer dbstruct #!key (fork-to-background #f) (persist-until-sync #f))
;; ;;   (debug:print "WARNING: bruteforce-syncer is called but has been disabled!")
;; ;;   (lambda ()
;; ;;     (debug:print "WARNING: bruteforce-syncer is called but has been disabled!"))
;; ;;   #;(let* ((sqlite-exe   (or (get-environment-variable "MT_SQLITE3_EXE"))) ;; defined in cfg.sh
;; ;;          (sync-log     (or (args:get-arg "-sync-log") (conc *toppath* "/logs/sync-" (current-process-id) "-" (get-host-name) ".log")))
;; ;; 	 (tmp-area     (common:get-db-tmp-area))
;; ;; 	 (tmp-db       (conc tmp-area "/megatest.db"))
;; ;; 	 (staging-file (conc *toppath* "/.megatest.db"))
;; ;; 	 (mtdbfile     (conc *toppath* "/megatest.db"))
;; ;; 	 (lockfile     (common:get-sync-lock-filepath))
;; ;;          (sync-cmd-core     (conc sqlite-exe" " tmp-db " .dump | "sqlite-exe" " staging-file "&>"sync-log))
;; ;;          (sync-cmd     (if fork-to-background 
;; ;;                            (conc "/usr/bin/env NBFAKE_LOG="*toppath*"/logs/last-server-sync-"(current-process-id)".log nbfake \""sync-cmd-core" && /bin/mv -f " staging-file " " mtdbfile" \"")
;; ;;                            sync-cmd-core))
;; ;;          (default-min-intersync-delay 2)
;; ;; 	 (min-intersync-delay (configf:lookup-number *configdat* "server" "minimum-intersync-delay" default: default-min-intersync-delay))
;; ;;          (default-duty-cycle 0.1)
;; ;;          (duty-cycle   (configf:lookup-number *configdat* "server" "sync-duty-cycle" default: default-duty-cycle))
;; ;;          (last-sync-seconds 10) ;; we will adjust this to a measurement and delay last-sync-seconds * (1 - duty-cycle)
;; ;;          (calculate-off-time (lambda (work-duration duty-cycle)
;; ;;                                   (* (/ (- 1 duty-cycle) duty-cycle) last-sync-seconds)))
;; ;;          (off-time min-intersync-delay) ;; adjusted in closure below.
;; ;;          (do-a-sync
;; ;;           (lambda ()
;; ;;             (BB> "Start do-a-sync with fork-to-background="fork-to-background" persist-until-sync="persist-until-sync)
;; ;;             (let* ((finalres
;; ;;                     (let retry-loop ((num-tries 0))
;; ;;                          (if (common:simple-file-lock lockfile)
;; ;; 	                     (begin
;; ;;                                (cond
;; ;;                                 ((not (or fork-to-background persist-until-sync))
;; ;;                                  (debug:print 0 *default-log-port* "INFO: syncer thread sleeping for max of (server.minimum-intersync-delay="min-intersync-delay
;; ;;                                               " , off-time="off-time" seconds ]")
;; ;;                                  (thread-sleep! (max off-time min-intersync-delay)))
;; ;;                                 (else
;; ;;                                  (debug:print 0 *default-log-port* "INFO: syncer thread NOT sleeping ; maybe time-to-exit...")))
;; ;; 
;; ;;                                (if (not (configf:lookup *configdat* "server" "disable-db-snapshot"))
;; ;;                                    (common:snapshot-file mtdbfile subdir: ".db-snapshot"))
;; ;; 		               (delete-file* staging-file)
;; ;; 		               (let* ((start-time (current-milliseconds))
;; ;;                                       (res (system sync-cmd))
;; ;;                                       (dbbackupfile (conc mtdbfile ".backup"))
;; ;;                                       (res2 
;; ;;                                        (cond
;; ;;                                         ((eq? 0 res )
;; ;;                                          (handle-exceptions
;; ;;                                             exn
;; ;;                                             #f
;; ;;                                          (if (file-exists? dbbackupfile)
;; ;; 		                           (delete-file* dbbackupfile)
;; ;;                                          )
;; ;;                                          (if (eq? 0 (file-size sync-log))
;; ;;                                              (delete-file* sync-log))
;; ;; 		                         (system (conc "/bin/mv " staging-file " " mtdbfile))
;; ;;                                          
;; ;;                                          (set! last-sync-seconds (/ (- (current-milliseconds) start-time) 1000))
;; ;;                                          (set! off-time (calculate-off-time
;; ;;                                                          last-sync-seconds
;; ;;                                                          (cond
;; ;;                                                           ((and (number? duty-cycle) (> duty-cycle 0) (< duty-cycle 1))
;; ;;                                                            duty-cycle)
;; ;;                                                           (else
;; ;;                                                            (debug:print 0 *default-log-port* "WARNING: ["(common:human-time)"] server.sync-duty-cycle is invalid.  Should be a number between 0 and 1, but "duty-cycle" was specified.  Using default value: "default-duty-cycle)
;; ;;                                                            default-duty-cycle))))
;; ;;                                          
;; ;;                                          (debug:print 1 *default-log-port* "INFO: ["(common:human-time)"] pid="(current-process-id)" SYNC took "last-sync-seconds" sec")
;; ;;                                          (debug:print 1 *default-log-port* "INFO: ["(common:human-time)"] pid="(current-process-id)" SYNC took "last-sync-seconds" sec ; with duty-cycle of "duty-cycle" off time is now "off-time)
;; ;;                                          'sync-completed))
;; ;;                                         (else
;; ;;                                          (system (conc "/bin/cp "sync-log" "sync-log".fail"))
;; ;;                                          (debug:print 0 *default-log-port* "ERROR: ["(common:human-time)"] Sync failed. See log at "sync-log".fail")
;; ;;                                          (if (file-exists? (conc mtdbfile ".backup"))
;; ;;                                              (system (conc "/bin/cp "mtdbfile ".backup " mtdbfile)))
;; ;;                                          #f))))
;; ;;                                  (common:simple-file-release-lock lockfile)
;; ;;                                  (BB> "released lockfile: " lockfile)
;; ;;                                  (when (common:file-exists? lockfile)
;; ;;                                    (BB> "DID NOT ACTUALLY RELEASE LOCKFILE"))
;; ;;                                  res2) ;; end let
;; ;;                                );; end begin
;; ;;                              ;; else
;; ;;                              (cond
;; ;;                               (persist-until-sync
;; ;;                                (thread-sleep! 1)
;; ;;                                (debug:print 1 *default-log-port* "INFO: ["(common:human-time)"] pid="(current-process-id)" other SYNC in progress; we're in a fork-to-background so we need to succeed.  Let's wait a jiffy and and try again. num-tries="num-tries" (waiting for lockfile="lockfile" to disappear)")
;; ;;                                (retry-loop (add1 num-tries)))
;; ;;                               (else
;; ;;                                (thread-sleep! (max off-time (+ last-sync-seconds min-intersync-delay)))
;; ;;                                (debug:print 1 *default-log-port* "INFO: ["(common:human-time)"] pid="(current-process-id)" other SYNC in progress; not syncing.")
;; ;;                                'parallel-sync-in-progress))
;; ;;                              ) ;; end if got lockfile
;; ;;                          )
;; ;;                     ))
;; ;;               (BB> "End do-a-sync with fork-to-background="fork-to-background" persist-until-sync="persist-until-sync" and result="finalres)
;; ;;               finalres)
;; ;;             ) ;; end lambda
;; ;;           ))
;; ;;     do-a-sync))
;; ;; 
;; ;; 

)