;;======================================================================
;; Copyright 2006-2012, Matthew Welland.
;;
;; This file is part of Megatest.
;;
;; Megatest is free software: you can redistribute it and/or modify
;; it under the terms of the GNU General Public License as published by
;; the Free Software Foundation, either version 3 of the License, or
;; (at your option) any later version.
;;
;; Megatest is distributed in the hope that it will be useful,
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
;; GNU General Public License for more details.
;;
;; You should have received a copy of the GNU General Public License
;; along with Megatest. If not, see <http://www.gnu.org/licenses/>.
;;======================================================================
;; (use srfi-1 data-structures posix regex-case (prefix base64 base64:)
;; format dot-locking csv-xml z3 udp ;; sql-de-lite
;; hostinfo md5 message-digest typed-records directory-utils stack
;; matchable regex posix (srfi 18) extras ;; tcp
;; (prefix nanomsg nmsg:)
;; (prefix sqlite3 sqlite3:)
;; pkts (prefix dbi dbi:)
;; )
;;
;; (declare (unit common))
;; ;; (declare (uses commonmod))
;; ;; (import commonmod)
;;
;; (include "common_records.scm")
;; (require-library margs)
;; (include "margs.scm")
;; (define old-exit exit)
;;
;; (define (exit . code)
;; (if (null? code)
;; (old-exit)
;; (old-exit code)))
(define (common:with-queue-db mtconf proc #!key (use-lt #f)(toppath-in #f))
(let* ((pktsdirs (common:get-pkts-dirs mtconf use-lt))
(pktsdir (if pktsdirs (car pktsdirs) #f))
(toppath (or (configf:lookup mtconf "scratchdat" "toppath")
toppath-in))
(pdbpath (or (configf:lookup mtconf "setup" "pdbpath") pktsdir)))
(cond
((not (and pktsdir toppath pdbpath))
(debug:print 0 *default-log-port* "ERROR: settings are missing in your megatest.config for area management.")
(debug:print 0 *default-log-port* " you need to have pktsdirs in the [setup] section."))
((not (common:file-exists? pktsdir))
(debug:print 0 *default-log-port* "ERROR: pkts directory not found " pktsdir))
((not (equal? (file-owner pktsdir)(current-effective-user-id)))
(debug:print 0 *default-log-port* "ERROR: directory " pktsdir " is not owned by " (current-effective-user-name)))
(else
(let* ((pdb (open-queue-db pdbpath "pkts.db"
schema: '("CREATE TABLE groups (id INTEGER PRIMARY KEY,groupname TEXT, CONSTRAINT group_constraint UNIQUE (groupname));"))))
(proc pktsdirs pktsdir pdb)
(dbi:close pdb))))))
(define (common:load-pkts-to-db mtconf #!key (use-lt #f))
(common:with-queue-db
mtconf
(lambda (pktsdirs pktsdir pdb)
(for-each
(lambda (pktsdir) ;; look at all
(cond
((not (common:file-exists? pktsdir))
(debug:print 0 *default-log-port* "ERROR: packets directory " pktsdir " does not exist."))
((not (directory? pktsdir))
(debug:print 0 *default-log-port* "ERROR: packets directory path " pktsdir " is not a directory."))
((not (file-readable? pktsdir))
(debug:print 0 *default-log-port* "ERROR: packets directory path " pktsdir " is not readable."))
(else
(debug:print-info 0 *default-log-port* "Loading packets found in " pktsdir)
(let ((pkts (glob (conc pktsdir "/*.pkt"))))
(for-each
(lambda (pkt)
(let* ((uuid (cadr (string-match ".*/([0-9a-f]+).pkt" pkt)))
(exists (lookup-by-uuid pdb uuid #f)))
(if (not exists)
(let* ((pktdat (string-intersperse
(with-input-from-file pkt read-lines)
"\n"))
(apkt (pkt->alist pktdat))
(ptype (alist-ref 'T apkt)))
(add-to-queue pdb pktdat uuid (or ptype 'cmd) #f 0)
(debug:print 4 *default-log-port* "Added " uuid " of type " ptype " to queue"))
(debug:print 4 *default-log-port* "pkt: " uuid " exists, skipping...")
)))
pkts)))))
pktsdirs))
use-lt: use-lt))
;;======================================================================
;; N A N O M S G C L I E N T
;;======================================================================
;;
;;
;;
;; (define (common:send-dboard-main-changed)
;; (let* ((dashboard-ips (mddb:get-dashboards)))
;; (for-each
;; (lambda (ipadr)
;; (let* ((soc (common:open-nm-req (conc "tcp://" ipadr)))
;; (msg (conc "main " *toppath*))
;; (res (common:nm-send-receive-timeout soc msg)))
;; (if (not res) ;; couldn't reach that dashboard - remove it from db
;; (print "ERROR: couldn't reach dashboard " ipadr))
;; res))
;; dashboard-ips)))
;;
;;
;; ;;======================================================================
;; ;; D A S H B O A R D D B
;; ;;======================================================================
;;
;; (define (mddb:open-db)
;; (let* ((db (open-database (conc (get-environment-variable "HOME") "/.dashboard.db"))))
;; (set-busy-handler! db (busy-timeout 10000))
;; (for-each
;; (lambda (qry)
;; (exec (sql db qry)))
;; (list
;; "CREATE TABLE IF NOT EXISTS vars (id INTEGER PRIMARY KEY,key TEXT, val TEXT, CONSTRAINT varsconstraint UNIQUE (key));"
;; "CREATE TABLE IF NOT EXISTS dashboards (
;; id INTEGER PRIMARY KEY,
;; pid INTEGER,
;; username TEXT,
;; hostname TEXT,
;; ipaddr TEXT,
;; portnum INTEGER,
;; start_time TIMESTAMP DEFAULT (strftime('%s','now')),
;; CONSTRAINT hostport UNIQUE (hostname,portnum)
;; );"
;; ))
;; db))
;;
;; ;; register a dashboard
;; ;;
;; (define (mddb:register-dashboard port)
;; (let* ((pid (current-process-id))
;; (hostname (get-host-name))
;; (ipaddr (server:get-best-guess-address hostname))
;; (username (current-user-name)) ;; (car userinfo)))
;; (db (mddb:open-db)))
;; (print "Register monitor, pid: " pid ", hostname: " hostname ", port: " port ", username: " username)
;; (exec (sql db "INSERT OR REPLACE INTO dashboards (pid,username,hostname,ipaddr,portnum) VALUES (?,?,?,?,?);")
;; pid username hostname ipaddr port)
;; (close-database db)))
;;
;; ;; unregister a monitor
;; ;;
;; (define (mddb:unregister-dashboard host port)
;; (let* ((db (mddb:open-db)))
;; (print "Register unregister monitor, host:port=" host ":" port)
;; (exec (sql db "DELETE FROM dashboards WHERE hostname=? AND portnum=?;") host port)
;; (close-database db)))
;;
;; ;; get registered dashboards
;; ;;
;; (define (mddb:get-dashboards)
;; (let ((db (mddb:open-db)))
;; (query fetch-column
;; (sql db "SELECT ipaddr || ':' || portnum FROM dashboards;"))))
;;======================================================================
;; T E S T L A U N C H I N G P E R I T E M W I T H H O S T T Y P E S
;;======================================================================
;;
;; [hosts]
;; arm cubie01 cubie02
;; x86_64 zeus xena myth01
;; allhosts #{g hosts arm} #{g hosts x86_64}
;;
;; [host-types]
;; general #MTLOWESTLOAD #{g hosts allhosts}
;; arm #MTLOWESTLOAD #{g hosts arm}
;; nbgeneral nbjob run JOBCOMMAND -log $MT_LINKTREE/$MT_TARGET/$MT_RUNNAME.$MT_TESTNAME-$MT_ITEM_PATH.lgo
;;
;; [host-rules]
;; # maxnload => max normalized load
;; # maxnjobs => max jobs per cpu
;; # maxjobrate => max jobs per second
;; general maxnload=1.1; maxnjobs=1.2; maxjobrate=0.1
;;
;; [launchers]
;; envsetup general
;; xor/%/n 4C16G
;; % nbgeneral
;;
;; [jobtools]
;; # if defined and not "no" flexi-launcher will bypass "launcher" unless no match.
;; flexi-launcher yes
;; launcher nbfake
;;
(define (common:get-launcher configdat testname itempath)
(let ((fallback-launcher (configf:lookup configdat "jobtools" "launcher")))
(if (and (configf:lookup configdat "jobtools" "flexi-launcher") ;; overrides launcher
(not (equal? (configf:lookup configdat "jobtools" "flexi-launcher") "no")))
(let* ((launchers (hash-table-ref/default configdat "launchers" '())))
(if (null? launchers)
fallback-launcher
(let loop ((hed (car launchers))
(tal (cdr launchers)))
(let ((patt (car hed))
(host-type (cadr hed)))
(if (tests:match patt testname itempath)
(begin
(debug:print-info 2 *default-log-port* "Have flexi-launcher match for " testname "/" itempath " = " host-type)
(let ((launcher (configf:lookup configdat "host-types" host-type)))
(if launcher
(let* ((launcher-parts (string-split launcher))
(launcher-exe (car launcher-parts)))
(if (equal? launcher-exe "#MTLOWESTLOAD") ;; this is our special case, we will find the lowest load and craft a nbfake commandline
(let host-loop ((targ-host (common:get-least-loaded-host (cdr launcher-parts) host-type configdat))
(count 100))
(if targ-host
(conc "remrun " targ-host)
(if (> count 0)
(begin
(debug:print 0 *default-log-port* "INFO: Waiting for a host for host-type " host-type)
(thread-sleep! (- 101 count))
(host-loop (common:get-least-loaded-host (cdr launcher-parts) host-type configdat)
(- count 1)))
(begin
(debug:print 0 *default-log-port* "FATAL: Failed to find a host from #MTLOWESTLOAD for host-type " host-type)
(exit)))))
launcher))
(begin
(debug:print-info 0 *default-log-port* "WARNING: no launcher found for host-type " host-type)
(if (null? tal)
fallback-launcher
(loop (car tal)(cdr tal)))))))
;; no match, try again
(if (null? tal)
fallback-launcher
(loop (car tal)(cdr tal))))))))
fallback-launcher)))
;;======================================================================
;; faux-lock is deprecated. Please use simple-lock below
;;
(define (common:faux-lock keyname #!key (wait-time 8)(allow-lock-steal #t))
(if (rmt:no-sync-get/default keyname #f) ;; do not be tempted to compare to pid. locking is a one-shot action, if already locked for this pid it doesn't actually count
(if (> wait-time 0)
(begin
(thread-sleep! 1)
(if (eq? wait-time 1) ;; only one second left, steal the lock
(begin
(debug:print-info 0 *default-log-port* "stealing lock for " keyname)
(common:faux-unlock keyname force: #t)))
(common:faux-lock keyname wait-time: (- wait-time 1)))
#f)
(begin
(rmt:no-sync-set keyname (conc (current-process-id)))
(equal? (conc (current-process-id)) (conc (rmt:no-sync-get/default keyname #f))))))
(define (common:faux-unlock keyname #!key (force #f))
(if (or force (equal? (conc (current-process-id)) (conc (rmt:no-sync-get/default keyname #f))))
(begin
(if (rmt:no-sync-get/default keyname #f) (rmt:no-sync-del! keyname))
#t)
#f))
;;======================================================================
;; simple lock. improve and converge on this one.
;;
(define (common:simple-lock keyname)
(rmt:no-sync-get-lock keyname))
(define (common:simple-unlock keyname #!key (force #f))
(rmt:no-sync-del! keyname))
;;======================================================================
;; ideally put all this info into the db, no need to preserve it across moving homehost
;;
;; return list of
;; ( reachable? cpuload update-time )
(define (common:get-host-info hostname)
(let* ((loadinfo (rmt:get-latest-host-load hostname)) ;; if this host happens to have been recently used by a test reuse the load data
(load (car loadinfo))
(load-sample-time (cdr loadinfo))
(load-sample-age (- (current-seconds) load-sample-time))
(loadinfo-timeout-seconds 6) ;; this was 20 seconds, seems way too lax. Switch to 6 seconds
(host-last-update-timeout-seconds 4)
(host-rec (hash-table-ref/default *host-loads* hostname #f))
)
(cond
((< load-sample-age loadinfo-timeout-seconds)
(list #t
load-sample-time
load))
((and host-rec
(< (current-seconds) (+ (host-last-update host-rec) host-last-update-timeout-seconds)))
(list #t
(host-last-update host-rec)
(host-last-cpuload host-rec )))
((common:unix-ping hostname)
(list #t
(current-seconds)
(alist-ref 'adj-core-load (common:get-normalized-cpu-load hostname)))) ;; this is cheaper than you might think. get-normalized-cpu-load is cached for up to 5 seconds
(else
(list #f 0 -1) ;; bad host, don't use!
))))
;;======================================================================
;; see defstruct host at top of file.
;; host: reachable last-update last-used last-cpuload
;;
(define (common:update-host-loads-table hosts-raw)
(let* ((hosts (filter (lambda (x)
(string-match (regexp "^\\S+$") x))
hosts-raw)))
(for-each
(lambda (hostname)
(let* ((rec (let ((h (hash-table-ref/default *host-loads* hostname #f)))
(if h
h
(let ((h (make-host)))
(hash-table-set! *host-loads* hostname h)
h))))
(host-info (common:get-host-info hostname))
(is-reachable (car host-info))
(last-reached-time (cadr host-info))
(load (caddr host-info)))
(host-reachable-set! rec is-reachable)
(host-last-update-set! rec last-reached-time)
(host-last-cpuload-set! rec load)))
hosts)))
;;======================================================================
;; go through the hosts from least recently used to most recently used, pick the first that meets the load criteral from the
;; [host-rules] section.
;;
(define (common:get-least-loaded-host hosts-raw host-type configdat)
(let* ((rdat (configf:lookup configdat "host-rules" host-type))
(rules (common:val->alist (or rdat "") convert: #t)) ;; maxnload, maxnjobs, maxjobrate
(maxnload (common:alist-ref/default 'maxnload rules 1.5)) ;; max normalized load
(maxnjobs (common:alist-ref/default 'maxnjobs rules 1.5)) ;; max normalized number of jobs
(maxjobrate (common:alist-ref/default 'maxjobrate rules (/ 1 6))) ;; max rate of submitting jobs to a given host in jobs/second
(hosts (filter (lambda (x)
(string-match (regexp "^\\S+$") x))
hosts-raw))
;; (best-host #f)
(get-rec (lambda (hostname)
;; (print "get-rec hostname=" hostname)
(let ((h (hash-table-ref/default *host-loads* hostname #f)))
(if h
h
(let ((h (make-host)))
(hash-table-set! *host-loads* hostname h)
h)))))
(best-load 99999)
(curr-time (current-seconds))
(get-hosts-sorted (lambda (hosts)
(sort hosts (lambda (a b)
(let ((a-rec (get-rec a))
(b-rec (get-rec b)))
;; (print "a=" a " a-rec=" a-rec " host-last-used=" (host-last-used a-rec))
;; (print "b=" b " b-rec=" b-rec " host-last-used=" (host-last-used b-rec))
(< (host-last-used a-rec)
(host-last-used b-rec))))))))
(debug:print 0 *default-log-port* "INFO: hosts-sorted=" (get-hosts-sorted hosts))
(if (null? hosts)
#f ;; no hosts to select from. All done and giving up now.
(let ((hosts-sorted (get-hosts-sorted hosts)))
(common:update-host-loads-table hosts)
(let loop ((hostname (car hosts-sorted))
(tal (cdr hosts-sorted))
(best-host #f))
(let* ((rec (get-rec hostname))
(reachable (host-reachable rec))
(load (host-last-cpuload rec))
(last-used (host-last-used rec))
(delta (- curr-time last-used))
(job-rate (if (> delta 0)
(/ 1 delta)
999)) ;; jobs per second
(new-best
(cond
((not reachable)
(debug:print 0 *default-log-port* "Skipping host " hostname " as it cannot be reached.")
best-host)
((and (< load maxnload) ;; load is acceptable
(< job-rate maxjobrate)) ;; job rate is acceptable
(set! best-load load)
hostname)
(else best-host))))
(debug:print 0 *default-log-port* "INFO: Trying host " hostname " with load " load ", last used " delta " seconds ago, with job-rate " job-rate " for running a test." )
(if new-best
(begin ;; found a host, return it
(debug:print 0 *default-log-port* "INFO: Found host: " new-best " load: " load " last-used: " delta " seconds ago, with job-rate: " job-rate)
(host-last-used-set! rec curr-time)
new-best)
(if (null? tal) #f (loop (car tal)(cdr tal) best-host)))))))))
(define (std-exit-procedure)
;;(common:telemetry-log-close)
(on-exit (lambda () 0))
;;(debug:print-info 13 *default-log-port* "std-exit-procedure called; *time-to-exit*="*time-to-exit*)
(let ((no-hurry (if *time-to-exit* ;; hurry up
#f
(begin
(set! *time-to-exit* #t)
#t))))
(debug:print-info 4 *default-log-port* "starting exit process, finalizing databases.")
(if (and no-hurry (debug:debug-mode 18))
(rmt:print-db-stats))
(let ((th1 (make-thread (lambda () ;; thread for cleaning up, give it five seconds
(if *dbstruct-db* (db:close-all *dbstruct-db*)) ;; one second allocated
(if *task-db*
(let ((db (cdr *task-db*)))
(if (sqlite3:database? db)
(begin
(sqlite3:interrupt! db)
(sqlite3:finalize! db #t)
;; (vector-set! *task-db* 0 #f)
(set! *task-db* #f)))))
(http-client#close-idle-connections!)
;; (if (and *runremote*
;; (remote-conndat *runremote*))
;; (begin
;; (http-client#close-all-connections!))) ;; for http-client
(if (not (eq? *default-log-port* (current-error-port)))
(close-output-port *default-log-port*))
(set! *default-log-port* (current-error-port))) "Cleanup db exit thread"))
(th2 (make-thread (lambda ()
(debug:print 4 *default-log-port* "Attempting clean exit. Please be patient and wait a few seconds...")
(if no-hurry
(begin
(thread-sleep! 5)) ;; give the clean up few seconds to do it's stuff
(begin
(thread-sleep! 2)))
(debug:print 4 *default-log-port* " ... done")
)
"clean exit")))
(thread-start! th1)
(thread-start! th2)
(thread-join! th1)
)
)
0)
;;======================================================================
;; TODO: for multiple areas, we will have multiple watchdogs; and multiple threads to manage
(define (common:watchdog)
(debug:print-info 13 *default-log-port* "common:watchdog entered.")
(if (launch:setup)
(if (common:on-homehost?)
(let ((dbstruct (db:setup #t)))
(debug:print-info 13 *default-log-port* "after db:setup with dbstruct=" dbstruct)
(cond
((dbr:dbstruct-read-only dbstruct)
(debug:print-info 13 *default-log-port* "loading read-only watchdog")
(common:readonly-watchdog dbstruct))
(else
(debug:print-info 13 *default-log-port* "loading writable-watchdog.")
(let* ((syncer (or (configf:lookup *configdat* "server" "sync-method") "brute-force-sync")))
(cond
((equal? syncer "brute-force-sync")
(server:writable-watchdog-bruteforce dbstruct))
((equal? syncer "delta-sync")
(server:writable-watchdog-deltasync dbstruct))
(else
(debug:print-error 0 *default-log-port* "Unknown server/sync-method specified ("syncer") - valid values are brute-force-sync and delta-sync.")
(exit 1)))
;;(debug:print 1 *default-log-port* "INFO: ["(common:human-time)"] Syncer started (method="syncer")")
)))
(debug:print-info 13 *default-log-port* "watchdog done."))
(debug:print-info 13 *default-log-port* "no need for watchdog on non-homehost"))))
;;======================================================================
;; logic for getting homehost. Returns (host . at-home)
;; IF *toppath* is not set, wait up to five seconds trying every two seconds
;; (this is to accomodate the watchdog)
;;
(define (common:get-homehost #!key (trynum 5))
;; called often especially at start up. use mutex to eliminate collisions
(mutex-lock! *homehost-mutex*)
(cond
(*home-host*
(mutex-unlock! *homehost-mutex*)
*home-host*)
((not *toppath*)
(mutex-unlock! *homehost-mutex*)
(launch:setup) ;; safely mutexed now
(if (> trynum 0)
(begin
(thread-sleep! 2)
(common:get-homehost trynum: (- trynum 1)))
#f))
(else
(let* ((currhost (get-host-name))
(bestadrs (server:get-best-guess-address currhost))
;; first look in config, then look in file .homehost, create it if not found
(homehost (or (configf:lookup *configdat* "server" "homehost" )
(handle-exceptions
exn
(if (> trynum 0)
(let ((delay-time (* (- 5 trynum) 5)))
(mutex-unlock! *homehost-mutex*)
(debug:print 0 *default-log-port* "ERROR: ["(common:human-time)"] Failed to read .homehost file, delaying "
delay-time " seconds and trying again, message: " ((condition-property-accessor 'exn 'message) exn)
", exn=" exn)
(thread-sleep! delay-time)
(common:get-homehost trynum: (- trynum 1)))
(begin
(mutex-unlock! *homehost-mutex*)
(debug:print 0 *default-log-port* "ERROR: ["(common:human-time)
"] Failed to read .homehost file after trying five times. Giving up and exiting, message: "
((condition-property-accessor 'exn 'message) exn))
(exit 1)))
(let ((hhf (conc *toppath* "/.homehost")))
(if (common:file-exists? hhf)
(with-input-from-file hhf read-line)
(if (file-writable? *toppath*)
(begin
(with-output-to-file hhf
(lambda ()
(print bestadrs)))
(begin
(mutex-unlock! *homehost-mutex*)
(car (common:get-homehost))))
#f))))))
(at-home (or (equal? homehost currhost)
(equal? homehost bestadrs))))
(set! *home-host* (cons homehost at-home))
(mutex-unlock! *homehost-mutex*)
*home-host*))))
;;======================================================================
;; am I on the homehost?
;;
(define (common:on-homehost?)
(let ((hh (common:get-homehost)))
(if hh
(cdr hh)
#f)))
(define (common:wait-for-homehost-load maxnormload msg)
(let* ((hh-dat (if (common:on-homehost?) ;; if we are on the homehost then pass in #f so the calls are local.
#f
(common:get-homehost)))
(hh (if hh-dat (car hh-dat) #f)))
(common:wait-for-normalized-load maxnormload msg hh)))
(define (make-and-init-remote)
(make-remote hh-dat: (common:get-homehost)
server-info: (if *toppath* (server:check-if-running *toppath*) #f)
server-timeout: (server:expiration-timeout)))
;;======================================================================
;; currently the primary job of the watchdog is to run the sync back to megatest.db from the db in /tmp
;; if we are on the homehost and we are a server (by definition we are on the homehost if we are a server)
;;
(define (common:readonly-watchdog dbstruct)
(let ((just-testing 0.0501))
(thread-sleep! just-testing)) ;; (/ 1 20)) ;; 0.051) ;; delay for startup
(debug:print-info 13 *default-log-port* "common:readonly-watchdog entered.")
;; sync megatest.db to /tmp/.../megatst.db
(let* ((sync-cool-off-duration 3)
(golden-mtdb (dbr:dbstruct-mtdb dbstruct))
(golden-mtpath (db:dbdat-get-path golden-mtdb))
(tmp-mtdb (dbr:dbstruct-tmpdb dbstruct))
(tmp-mtpath (db:dbdat-get-path tmp-mtdb)))
(debug:print-info 0 *default-log-port* "Read-only periodic sync thread started.")
(let loop ((last-sync-time 0))
(debug:print-info 13 *default-log-port* "loop top tmp-mtpath="tmp-mtpath" golden-mtpath="golden-mtpath)
(let* ((duration-since-last-sync (- (current-seconds) last-sync-time)))
(debug:print-info 13 *default-log-port* "duration-since-last-sync="duration-since-last-sync)
(if (and (not *time-to-exit*)
(< duration-since-last-sync sync-cool-off-duration))
(thread-sleep! (- sync-cool-off-duration duration-since-last-sync)))
(if (not *time-to-exit*)
(let ((golden-mtdb-mtime (file-modification-time golden-mtpath))
(tmp-mtdb-mtime (file-modification-time tmp-mtpath)))
(if (> golden-mtdb-mtime tmp-mtdb-mtime)
(if (< golden-mtdb-mtime (- (current-seconds) 3)) ;; file has NOT been touched in past three seconds, this way multiple servers won't fight to sync back
(let ((res (db:multi-db-sync dbstruct 'old2new)))
(debug:print-info 13 *default-log-port* "rosync called, " res " records transferred."))))
(loop (current-seconds)))
#t)))
(debug:print-info 0 *default-log-port* "Exiting readonly-watchdog timer, *time-to-exit* = " *time-to-exit*" pid="(current-process-id)" mtpath="golden-mtpath)))
;;======================================================================
;; from metadat lookup MEGATEST_VERSION
;;
(define (common:get-last-run-version) ;; RADT => How does this work in send-receive function??; assume it is the value saved in some DB
(rmt:get-var "MEGATEST_VERSION"))
(define (common:get-last-run-version-number)
(string->number
(substring (common:get-last-run-version) 0 6)))
(define (common:set-last-run-version)
(rmt:set-var "MEGATEST_VERSION" (common:version-signature)))
;;======================================================================
;; postive number if megatest version > db version
;; negative number if megatest version < db version
(define (common:version-db-delta)
(- megatest-version (common:get-last-run-version-number)))
(define (common:version-changed?)
(not (equal? (common:get-last-run-version)
(common:version-signature))))
(define (common:api-changed?)
(not (equal? (substring (->string megatest-version) 0 4)
(substring (conc (common:get-last-run-version)) 0 4))))
;;======================================================================
;; Move me elsewhere ...
;; RADT => Why do we meed the version check here, this is called only if version misma
;;
(define (common:cleanup-db dbstruct #!key (full #f))
(apply db:multi-db-sync
dbstruct
'schema
;; 'new2old
'killservers
'adj-target
;; 'old2new
'new2old
;; (if full
'(dejunk)
;; '())
)
(if (common:api-changed?)
(common:set-last-run-version)))
;;======================================================================
;; Force a megatest cleanup-db if version is changed and skip-version-check not specified
;; Do NOT check if not on homehost!
;;
(define (common:exit-on-version-changed)
(if (common:on-homehost?)
(if (common:api-changed?)
(let* ((mtconf (conc (get-environment-variable "MT_RUN_AREA_HOME") "/megatest.config"))
(dbfile (conc (get-environment-variable "MT_RUN_AREA_HOME") "/megatest.db"))
(read-only (not (file-writable? dbfile)))
(dbstruct (db:setup #t)))
(debug:print 0 *default-log-port*
"WARNING: Version mismatch!\n"
" expected: " (common:version-signature) "\n"
" got: " (common:get-last-run-version))
(cond
((get-environment-variable "MT_SKIP_DB_MIGRATE") #t)
((and (common:file-exists? mtconf) (common:file-exists? dbfile) (not read-only)
(eq? (current-user-id)(file-owner mtconf))) ;; safe to run -cleanup-db
(debug:print 0 *default-log-port* " I see you are the owner of megatest.config, attempting to cleanup and reset to new version")
(handle-exceptions
exn
(begin
(debug:print 0 *default-log-port* "Failed to switch versions. exn=" exn)
(debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))
(print-call-chain (current-error-port))
(exit 1))
(common:cleanup-db dbstruct)))
((not (common:file-exists? mtconf))
(debug:print 0 *default-log-port* " megatest.config does not exist in this area. Cannot proceed with megatest version migration.")
(exit 1))
((not (common:file-exists? dbfile))
(debug:print 0 *default-log-port* " megatest.db does not exist in this area. Cannot proceed with megatest version migration.")
(exit 1))
((not (eq? (current-user-id)(file-owner mtconf)))
(debug:print 0 *default-log-port* " You do not own megatest.db in this area. Cannot proceed with megatest version migration.")
(exit 1))
(read-only
(debug:print 0 *default-log-port* " You have read-only access to this area. Cannot proceed with megatest version migration.")
(exit 1))
(else
(debug:print 0 *default-log-port* " to switch versions you can run: \"megatest -cleanup-db\"")
(exit 1)))))))
;;======================================================================
;; (begin
;; (debug:print 0 *default-log-port* "ERROR: cannot migrate version unless on homehost. Exiting.")
;; (exit 1))))
(define (common:run-sync?)
(and (common:on-homehost?)
(args:get-arg "-server")))