;;======================================================================
;; Copyright 2017, Matthew Welland.
;;
;; This file is part of Megatest.
;;
;; Megatest is free software: you can redistribute it and/or modify
;; it under the terms of the GNU General Public License as published by
;; the Free Software Foundation, either version 3 of the License, or
;; (at your option) any later version.
;;
;; Megatest is distributed in the hope that it will be useful,
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
;; GNU General Public License for more details.
;;
;; You should have received a copy of the GNU General Public License
;; along with Megatest. If not, see <http://www.gnu.org/licenses/>.
;;======================================================================
(declare (unit servermod))
(declare (uses commonmod))
(declare (uses debugprint))
(declare (uses configfmod))
(declare (uses http-transportmod))
(declare (uses pkts))
(module servermod
*
(import scheme
chicken.base
chicken.string
chicken.process
chicken.io
chicken.time
chicken.condition
chicken.file
chicken.process-context
chicken.process-context.posix
chicken.random
chicken.file.posix
system-information
(prefix sqlite3 sqlite3:)
typed-records
regex
directory-utils
matchable
srfi-18
srfi-69
commonmod
debugprint
configfmod
http-transportmod
pkts
)
;;======================================================================
;; NEW SERVER METHOD
;;======================================================================
(define *srvpktspec*
`((server (host . h)
(port . p)
(servkey . k)
(pid . i)
(ipaddr . a)
(dbpath . d))))
(define (register-server pkts-dir pkt-spec host port servkey ipaddr dbpath)
(let* ((pkt-dat `((host . ,host)
(port . ,port)
(servkey . ,servkey)
(pid . ,(current-process-id))
(ipaddr . ,ipaddr)
(dbpath . ,dbpath))))
(write-alist->pkt
pkts-dir
pkt-dat
pktspec: pkt-spec
ptype: 'server)))
;; given a pkts dir read
;;
(define (get-all-server-pkts pktsdir-in pktspec)
(let* ((pktsdir (if (file-exists? pktsdir-in)
pktsdir-in
(begin
(create-directory pktsdir-in #t)
pktsdir-in)))
(all-pkt-files (glob (conc pktsdir "/*.pkt"))))
(map (lambda (pkt-file)
(read-pkt->alist pkt-file pktspec: pktspec))
all-pkt-files)))
(define (server-address srv-pkt)
(conc (alist-ref 'host srv-pkt) ":"
(alist-ref 'port srv-pkt)))
(define (server-ready? server-address)
;; ping the server and ask it
;; if it ready
#f)
;; from the pkts return servers associated with dbpath
;; NOTE: Only one can be alive - have to check on each
;; in the list of pkts returned
;;
(define (get-viable-servers serv-pkts dbpath)
(let loop ((tail serv-pkts)
(res '()))
(if (null? tail)
res ;; NOTE: sort by age so oldest is considered first
(let* ((spkt (car tail)))
(loop (cdr tail)
(if (equal? dbpath (alist-ref 'dbpath spkt))
(cons spkt res)
res))))))
;; from viable servers get one that is alive and ready
;;
(define (get-the-server serv-pkts dbpath)
(let loop ((tail serv-pkts))
(if (null? tail)
#f
(let* ((spkt (car tail))
(addr (server-address spkt)))
(if (server-ready? addr)
spkt
(loop (cdr tail)))))))
;;======================================================================
;; END NEW SERVER METHOD
;;======================================================================
(define (server:make-server-url hostport)
(if (not hostport)
#f
(conc "http://" (car hostport) ":" (cadr hostport))))
;;======================================================================
;; logic for getting homehost. Returns (host . at-home)
;; IF *toppath* is not set, wait up to five seconds trying every two seconds
;; (this is to accomodate the watchdog)
;;
(define (common:get-homehost #!key (trynum 5))
(assert *toppath* "ERROR: common:get-homehost called before launch:setup. This is fatal.")
;; called often especially at start up. use mutex to eliminate collisions
(mutex-lock! *homehost-mutex*)
(cond
(*home-host*
(mutex-unlock! *homehost-mutex*)
*home-host*)
((not *toppath*)
(mutex-unlock! *homehost-mutex*)
;; (launch:setup) ;; safely mutexed now
(if (> trynum 0)
(begin
(thread-sleep! 2)
(common:get-homehost trynum: (- trynum 1)))
#f))
(else
(let* ((currhost (get-host-name))
(bestadrs (server:get-best-guess-address currhost))
;; first look in config, then look in file .homehost, create it if not found
(homehost (or (configf:lookup *configdat* "server" "homehost" )
(handle-exceptions
exn
(if (> trynum 0)
(let ((delay-time (* (- 5 trynum) 5)))
(mutex-unlock! *homehost-mutex*)
(debug:print 0 *default-log-port* "ERROR: ["(common:human-time)"] Failed to read .homehost file, delaying "
delay-time " seconds and trying again, message: " ((condition-property-accessor 'exn 'message) exn)
", exn=" exn)
(thread-sleep! delay-time)
(common:get-homehost trynum: (- trynum 1)))
(begin
(mutex-unlock! *homehost-mutex*)
(debug:print 0 *default-log-port* "ERROR: ["(common:human-time)
"] Failed to read .homehost file after trying five times. Giving up and exiting, message: "
((condition-property-accessor 'exn 'message) exn))
(exit 1)))
(let ((hhf (conc *toppath* "/.homehost")))
(if (common:file-exists? hhf)
(with-input-from-file hhf read-line)
(if (file-writable? *toppath*)
(begin
(with-output-to-file hhf
(lambda ()
(print bestadrs)))
(begin
(mutex-unlock! *homehost-mutex*)
(car (common:get-homehost))))
#f))))))
(at-home (or (equal? homehost currhost)
(equal? homehost bestadrs))))
(set! *home-host* (cons homehost at-home))
(mutex-unlock! *homehost-mutex*)
*home-host*))))
;;======================================================================
;; am I on the homehost?
;;
(define (common:on-homehost?)
(let ((hh (common:get-homehost)))
(if hh
(cdr hh)
#f)))
(define (common:wait-for-homehost-load maxnormload msg)
(let* ((hh-dat (if (common:on-homehost?) ;; if we are on the homehost then pass in #f so the calls are local.
#f
(common:get-homehost)))
(hh (if hh-dat (car hh-dat) #f)))
(common:wait-for-normalized-load maxnormload msg hh)))
;; Given a run id start a server process ### NOTE ### > file 2>&1
;; if the run-id is zero and the target-host is set
;; try running on that host
;; incidental: rotate logs in logs/ dir.
;;
(define (server:run areapath) ;; areapath is *toppath* for a given testsuite area
(let* ((curr-host (get-host-name))
;; (attempt-in-progress (server:start-attempted? areapath))
;; (dot-server-url (server:check-if-running areapath))
(curr-ip (server:get-best-guess-address curr-host))
(curr-pid (current-process-id))
(homehost (common:get-homehost)) ;; configf:lookup *configdat* "server" "homehost" ))
(target-host (car homehost))
(testsuite (common:get-area-name))
(logfile (conc areapath "/logs/server.log")) ;; -" curr-pid "-" target-host ".log"))
(profile-mode (or (configf:lookup *configdat* "misc" "profilesw")
""))
(cmdln (conc (common:get-megatest-exe)
" -server " (or target-host "-") (if (equal? (configf:lookup *configdat* "server" "daemonize") "yes")
" -daemonize "
"")
;; " -log " logfile
" -m testsuite:" testsuite
" " profile-mode
)) ;; (conc " >> " logfile " 2>&1 &")))))
(log-rotate (make-thread common:rotate-logs "server run, rotate logs thread")) ;; why are we rotating logs here? This is a sensitive location with a lot going on!?
(load-limit (configf:lookup-number *configdat* "jobtools" "max-server-start-load" default: 3.0)))
;; we want the remote server to start in *toppath* so push there
(push-directory areapath)
(debug:print 0 *default-log-port* "INFO: Trying to start server (" cmdln ") ...")
(thread-start! log-rotate)
;; host.domain.tld match host?
(if (and target-host
;; look at target host, is it host.domain.tld or ip address and does it
;; match current ip or hostname
(not (string-match (conc "("curr-host "|" curr-host"\\..*)") target-host))
(not (equal? curr-ip target-host)))
(begin
(debug:print-info 0 *default-log-port* "Starting server on " target-host ", logfile is " logfile)
(setenv "TARGETHOST" target-host)))
(setenv "TARGETHOST_LOGF" logfile)
(thread-sleep! (/ (pseudo-random-integer 5000) 1000)) ;; add about a random (up to 5 seconds) initial delay. It seems pretty common that many running tests request a server at the same time
;; (common:wait-for-normalized-load load-limit " delaying server start due to load" target-host) ;; do not try starting servers on an already overloaded machine, just wait forever
#;(common:wait-for-homehost-load load-limit (conc " delaying server start due to load on homehost. limit is " load-limit))
(system (conc "nbfake " cmdln))
(unsetenv "TARGETHOST_LOGF")
(if (get-environment-variable "TARGETHOST")(unsetenv "TARGETHOST"))
(thread-join! log-rotate)
(pop-directory)))
(define (server:record->url servr)
(handle-exceptions
exn
(begin
(debug:print-info 0 *default-log-port* "Unable to get server url from " servr ", exn=" exn)
#f)
(match-let (((mod-time host port start-time server-id pid)
servr))
(if (and host port)
(conc host ":" port)
#f))))
;; wait for server=start-last to be three seconds old
;;
(define (server:wait-for-server-start-last-flag areapath)
(let* ((start-flag (conc areapath "/logs/server-start-last"))
;;; THIS INTERACTS WITH [server] timeout. Suggest using 0.1 or above for timeout (6 seconds)
(reftime (configf:lookup-number *configdat* "server" "idletime" default: 4))
(server-key (conc (get-host-name) "-" (current-process-id))))
(if (file-exists? start-flag)
(let* ((fmodtime (file-modification-time start-flag))
(delta (- (current-seconds) fmodtime))
(all-go (> delta reftime)))
(if (and all-go
(begin
(debug:print-info 0 *default-log-port* "Writing " start-flag)
(with-output-to-file start-flag
(lambda ()
(print server-key)))
(thread-sleep! 0.254)
(let ((res (with-input-from-file start-flag
(lambda ()
(read-line)))))
(equal? server-key res))))
#t ;; (system (conc "touch " start-flag)) ;; lazy but safe
(begin
(debug:print-info 0 *default-log-port* "Gating server start, last start: "
fmodtime ", delta: " delta ", reftime: " reftime ", all-go=" all-go)
(thread-sleep! reftime)
(server:wait-for-server-start-last-flag areapath)))))))
(define (server:kill servr)
(handle-exceptions
exn
(begin
(debug:print-info 0 *default-log-port* "Unable to get host and/or port from " servr ", exn=" exn)
#f)
(match-let (((mod-time hostname port start-time server-id pid)
servr))
(tasks:kill-server hostname pid))))
)