;;======================================================================
;; Copyright 2006-2012, Matthew Welland.
;;
;; This file is part of Megatest.
;;
;; Megatest is free software: you can redistribute it and/or modify
;; it under the terms of the GNU General Public License as published by
;; the Free Software Foundation, either version 3 of the License, or
;; (at your option) any later version.
;;
;; Megatest is distributed in the hope that it will be useful,
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
;; GNU General Public License for more details.
;;
;; You should have received a copy of the GNU General Public License
;; along with Megatest. If not, see <http://www.gnu.org/licenses/>.
;;======================================================================
;; (use srfi-1 data-structures posix regex-case (prefix base64 base64:)
;; format dot-locking csv-xml z3 udp ;; sql-de-lite
;; hostinfo md5 message-digest typed-records directory-utils stack
;; matchable regex posix (srfi 18) extras ;; tcp
;; (prefix nanomsg nmsg:)
;; (prefix sqlite3 sqlite3:)
;; pkts (prefix dbi dbi:)
;; )
;;
;; (declare (unit common))
;; ;; (declare (uses commonmod))
;; ;; (import commonmod)
;;
;; (include "common_records.scm")
;; (require-library margs)
;; (include "margs.scm")
;; (define old-exit exit)
;;
;; (define (exit . code)
;; (if (null? code)
;; (old-exit)
;; (old-exit code)))
(define (common:with-queue-db mtconf proc #!key (use-lt #f)(toppath-in #f))
(let* ((pktsdirs (common:get-pkts-dirs mtconf use-lt))
(pktsdir (if pktsdirs (car pktsdirs) #f))
(toppath (or (configf:lookup mtconf "scratchdat" "toppath")
toppath-in))
(pdbpath (or (configf:lookup mtconf "setup" "pdbpath") pktsdir)))
(cond
((not (and pktsdir toppath pdbpath))
(debug:print 0 *default-log-port* "ERROR: settings are missing in your megatest.config for area management.")
(debug:print 0 *default-log-port* " you need to have pktsdirs in the [setup] section."))
((not (common:file-exists? pktsdir))
(debug:print 0 *default-log-port* "ERROR: pkts directory not found " pktsdir))
((not (equal? (file-owner pktsdir)(current-effective-user-id)))
(debug:print 0 *default-log-port* "ERROR: directory " pktsdir " is not owned by " (current-effective-user-name)))
(else
(let* ((pdb (open-queue-db pdbpath "pkts.db"
schema: '("CREATE TABLE groups (id INTEGER PRIMARY KEY,groupname TEXT, CONSTRAINT group_constraint UNIQUE (groupname));"))))
(proc pktsdirs pktsdir pdb)
(dbi:close pdb))))))
(define (common:load-pkts-to-db mtconf #!key (use-lt #f))
(common:with-queue-db
mtconf
(lambda (pktsdirs pktsdir pdb)
(for-each
(lambda (pktsdir) ;; look at all
(cond
((not (common:file-exists? pktsdir))
(debug:print 0 *default-log-port* "ERROR: packets directory " pktsdir " does not exist."))
((not (directory? pktsdir))
(debug:print 0 *default-log-port* "ERROR: packets directory path " pktsdir " is not a directory."))
((not (file-readable? pktsdir))
(debug:print 0 *default-log-port* "ERROR: packets directory path " pktsdir " is not readable."))
(else
(debug:print-info 0 *default-log-port* "Loading packets found in " pktsdir)
(let ((pkts (glob (conc pktsdir "/*.pkt"))))
(for-each
(lambda (pkt)
(let* ((uuid (cadr (string-match ".*/([0-9a-f]+).pkt" pkt)))
(exists (lookup-by-uuid pdb uuid #f)))
(if (not exists)
(let* ((pktdat (string-intersperse
(with-input-from-file pkt read-lines)
"\n"))
(apkt (pkt->alist pktdat))
(ptype (alist-ref 'T apkt)))
(add-to-queue pdb pktdat uuid (or ptype 'cmd) #f 0)
(debug:print 4 *default-log-port* "Added " uuid " of type " ptype " to queue"))
(debug:print 4 *default-log-port* "pkt: " uuid " exists, skipping...")
)))
pkts)))))
pktsdirs))
use-lt: use-lt))
;;======================================================================
;; N A N O M S G C L I E N T
;;======================================================================
;;
;;
;;
;; (define (common:send-dboard-main-changed)
;; (let* ((dashboard-ips (mddb:get-dashboards)))
;; (for-each
;; (lambda (ipadr)
;; (let* ((soc (common:open-nm-req (conc "tcp://" ipadr)))
;; (msg (conc "main " *toppath*))
;; (res (common:nm-send-receive-timeout soc msg)))
;; (if (not res) ;; couldn't reach that dashboard - remove it from db
;; (print "ERROR: couldn't reach dashboard " ipadr))
;; res))
;; dashboard-ips)))
;;
;;
;; ;;======================================================================
;; ;; D A S H B O A R D D B
;; ;;======================================================================
;;
;; (define (mddb:open-db)
;; (let* ((db (open-database (conc (get-environment-variable "HOME") "/.dashboard.db"))))
;; (set-busy-handler! db (busy-timeout 10000))
;; (for-each
;; (lambda (qry)
;; (exec (sql db qry)))
;; (list
;; "CREATE TABLE IF NOT EXISTS vars (id INTEGER PRIMARY KEY,key TEXT, val TEXT, CONSTRAINT varsconstraint UNIQUE (key));"
;; "CREATE TABLE IF NOT EXISTS dashboards (
;; id INTEGER PRIMARY KEY,
;; pid INTEGER,
;; username TEXT,
;; hostname TEXT,
;; ipaddr TEXT,
;; portnum INTEGER,
;; start_time TIMESTAMP DEFAULT (strftime('%s','now')),
;; CONSTRAINT hostport UNIQUE (hostname,portnum)
;; );"
;; ))
;; db))
;;
;; ;; register a dashboard
;; ;;
;; (define (mddb:register-dashboard port)
;; (let* ((pid (current-process-id))
;; (hostname (get-host-name))
;; (ipaddr (server:get-best-guess-address hostname))
;; (username (current-user-name)) ;; (car userinfo)))
;; (db (mddb:open-db)))
;; (print "Register monitor, pid: " pid ", hostname: " hostname ", port: " port ", username: " username)
;; (exec (sql db "INSERT OR REPLACE INTO dashboards (pid,username,hostname,ipaddr,portnum) VALUES (?,?,?,?,?);")
;; pid username hostname ipaddr port)
;; (close-database db)))
;;
;; ;; unregister a monitor
;; ;;
;; (define (mddb:unregister-dashboard host port)
;; (let* ((db (mddb:open-db)))
;; (print "Register unregister monitor, host:port=" host ":" port)
;; (exec (sql db "DELETE FROM dashboards WHERE hostname=? AND portnum=?;") host port)
;; (close-database db)))
;;
;; ;; get registered dashboards
;; ;;
;; (define (mddb:get-dashboards)
;; (let ((db (mddb:open-db)))
;; (query fetch-column
;; (sql db "SELECT ipaddr || ':' || portnum FROM dashboards;"))))
;;======================================================================
;; faux-lock is deprecated. Please use simple-lock below
;;
(define (common:faux-lock keyname #!key (wait-time 8)(allow-lock-steal #t))
(if (rmt:no-sync-get/default keyname #f) ;; do not be tempted to compare to pid. locking is a one-shot action, if already locked for this pid it doesn't actually count
(if (> wait-time 0)
(begin
(thread-sleep! 1)
(if (eq? wait-time 1) ;; only one second left, steal the lock
(begin
(debug:print-info 0 *default-log-port* "stealing lock for " keyname)
(common:faux-unlock keyname force: #t)))
(common:faux-lock keyname wait-time: (- wait-time 1)))
#f)
(begin
(rmt:no-sync-set keyname (conc (current-process-id)))
(equal? (conc (current-process-id)) (conc (rmt:no-sync-get/default keyname #f))))))
(define (common:faux-unlock keyname #!key (force #f))
(if (or force (equal? (conc (current-process-id)) (conc (rmt:no-sync-get/default keyname #f))))
(begin
(if (rmt:no-sync-get/default keyname #f) (rmt:no-sync-del! keyname))
#t)
#f))
(define (std-exit-procedure)
;;(common:telemetry-log-close)
(on-exit (lambda () 0))
;;(debug:print-info 13 *default-log-port* "std-exit-procedure called; *time-to-exit*="*time-to-exit*)
(let ((no-hurry (if (bdat-time-to-exit *bdat*) ;; hurry up
#f
(begin
(bdat-time-to-exit-set! *bdat* #t)
#t))))
(debug:print-info 4 *default-log-port* "starting exit process, finalizing databases.")
(if (and no-hurry (debug:debug-mode 18))
(rmt:print-db-stats))
(let ((th1 (make-thread (lambda () ;; thread for cleaning up, give it five seconds
(if *dbstruct-db* (db:close-all *dbstruct-db*)) ;; one second allocated
(if *task-db*
(let ((db (cdr *task-db*)))
(if (sqlite3:database? db)
(begin
(sqlite3:interrupt! db)
(sqlite3:finalize! db #t)
;; (vector-set! *task-db* 0 #f)
(set! *task-db* #f)))))
(http-client#close-idle-connections!)
;; (if (and *runremote*
;; (remote-conndat *runremote*))
;; (begin
;; (http-client#close-all-connections!))) ;; for http-client
(if (not (eq? *default-log-port* (current-error-port)))
(close-output-port *default-log-port*))
(set! *default-log-port* (current-error-port))) "Cleanup db exit thread"))
(th2 (make-thread (lambda ()
(debug:print 4 *default-log-port* "Attempting clean exit. Please be patient and wait a few seconds...")
(if no-hurry
(begin
(thread-sleep! 5)) ;; give the clean up few seconds to do it's stuff
(begin
(thread-sleep! 2)))
(debug:print 4 *default-log-port* " ... done")
)
"clean exit")))
(thread-start! th1)
(thread-start! th2)
(thread-join! th1)
)
)
0)
;; TODO: for multiple areas, we will have multiple watchdogs; and multiple threads to manage
(define (init-watchdog)
(set! (bdat-watchdog-set! *bdat*)
(make-thread
(lambda ()
(handle-exceptions
exn
(begin
(print-call-chain)
(print " message: " ((condition-property-accessor 'exn 'message) exn) ", exn=" exn))
(common:watchdog)))
"Watchdog thread"))
(start-watchdog))
(define (start-watchdog)
;;(if (not (args:get-arg "-server"))
;; (thread-start! *watchdog*)) ;; if starting a server; wait till we get to running state before kicking off watchdog
(let* ((no-watchdog-args
'("-list-runs"
"-testdata-csv"
"-list-servers"
"-server"
"-adjutant"
"-list-disks"
"-list-targets"
"-show-runconfig"
;;"-list-db-targets"
"-show-runconfig"
"-show-config"
"-show-cmdinfo"
"-cleanup-db"
))
(no-watchdog-argvals (list '("-archive" . "replicate-db")))
(start-watchdog-specail-arg-val (let loop ((hed (car no-watchdog-argvals))
(tail (cdr no-watchdog-argvals)))
;; (print "hed" hed " arg " (args:get-arg (car hed)) " val:" (cdr hed) " eql" (equal? (args:get-arg (car hed)) (cdr hed)))
(if (equal? (args:get-arg (car hed)) (cdr hed))
#f
(if (null? tail)
#t
(loop (car tail) (cdr tail))))))
(no-watchdog-args-vals (filter (lambda (x) x)
(map args:get-arg no-watchdog-args)))
(start-watchdog (and (null? no-watchdog-args-vals) start-watchdog-specail-arg-val)))
;(print "no-watchdog-args="no-watchdog-args "no-watchdog-args-vals="no-watchdog-args-vals " start-watchdog-specail-arg-val:" start-watchdog-specail-arg-val " start-watchdog:" start-watchdog)
(if start-watchdog
(thread-start! (bdat-watchdog *bdat*)))))
;;======================================================================
;; TODO: for multiple areas, we will have multiple watchdogs; and multiple threads to manage
(define (common:watchdog)
(debug:print-info 13 *default-log-port* "common:watchdog entered.")
(if (launch:setup)
(if (common:on-homehost?)
(let ((dbstruct (db:setup #t)))
(debug:print-info 13 *default-log-port* "after db:setup with dbstruct=" dbstruct)
(cond
((dbr:dbstruct-read-only dbstruct)
(debug:print-info 13 *default-log-port* "loading read-only watchdog")
(common:readonly-watchdog dbstruct))
(else
(debug:print-info 13 *default-log-port* "loading writable-watchdog.")
(let* ((syncer (or (configf:lookup *configdat* "server" "sync-method") "brute-force-sync")))
(cond
((equal? syncer "brute-force-sync")
(server:writable-watchdog-bruteforce dbstruct))
((equal? syncer "delta-sync")
(server:writable-watchdog-deltasync dbstruct))
(else
(debug:print-error 0 *default-log-port* "Unknown server/sync-method specified ("syncer") - valid values are brute-force-sync and delta-sync.")
(exit 1)))
;;(debug:print 1 *default-log-port* "INFO: ["(common:human-time)"] Syncer started (method="syncer")")
)))
(debug:print-info 13 *default-log-port* "watchdog done."))
(debug:print-info 13 *default-log-port* "no need for watchdog on non-homehost"))))
;;======================================================================
;; currently the primary job of the watchdog is to run the sync back to megatest.db from the db in /tmp
;; if we are on the homehost and we are a server (by definition we are on the homehost if we are a server)
;;
(define (common:readonly-watchdog dbstruct)
(let ((just-testing 0.0501))
(thread-sleep! just-testing)) ;; (/ 1 20)) ;; 0.051) ;; delay for startup
(debug:print-info 13 *default-log-port* "common:readonly-watchdog entered.")
;; sync megatest.db to /tmp/.../megatst.db
(let* ((sync-cool-off-duration 3)
(golden-mtdb (dbr:dbstruct-mtdb dbstruct))
(golden-mtpath (db:dbdat-get-path golden-mtdb))
(tmp-mtdb (dbr:dbstruct-tmpdb dbstruct))
(tmp-mtpath (db:dbdat-get-path tmp-mtdb)))
(debug:print-info 0 *default-log-port* "Read-only periodic sync thread started.")
(let loop ((last-sync-time 0))
(debug:print-info 13 *default-log-port* "loop top tmp-mtpath="tmp-mtpath" golden-mtpath="golden-mtpath)
(let* ((duration-since-last-sync (- (current-seconds) last-sync-time)))
(debug:print-info 13 *default-log-port* "duration-since-last-sync="duration-since-last-sync)
(if (and (not (bdat-time-to-exit *bdat*))
(< duration-since-last-sync sync-cool-off-duration))
(thread-sleep! (- sync-cool-off-duration duration-since-last-sync)))
(if (not (bdat-time-to-exit *bdat*))
(let ((golden-mtdb-mtime (file-modification-time golden-mtpath))
(tmp-mtdb-mtime (file-modification-time tmp-mtpath)))
(if (> golden-mtdb-mtime tmp-mtdb-mtime)
(if (< golden-mtdb-mtime (- (current-seconds) 3)) ;; file has NOT been touched in past three seconds, this way multiple servers won't fight to sync back
(let ((res (db:multi-db-sync dbstruct 'old2new)))
(debug:print-info 13 *default-log-port* "rosync called, " res " records transferred."))))
(loop (current-seconds)))
#t)))
(debug:print-info 0 *default-log-port* "Exiting readonly-watchdog timer, (bdat-time-to-exit *bdat*) = " (bdat-time-to-exit *bdat*)" pid="(current-process-id)" mtpath="golden-mtpath)))
;;======================================================================
;; from metadat lookup MEGATEST_VERSION
;;
(define (common:get-last-run-version) ;; RADT => How does this work in send-receive function??; assume it is the value saved in some DB
(rmt:get-var "MEGATEST_VERSION"))
(define (common:get-last-run-version-number)
(string->number
(substring (common:get-last-run-version) 0 6)))
(define (common:set-last-run-version)
(rmt:set-var "MEGATEST_VERSION" (common:version-signature)))
;;======================================================================
;; postive number if megatest version > db version
;; negative number if megatest version < db version
(define (common:version-db-delta)
(- megatest-version (common:get-last-run-version-number)))
(define (common:version-changed?)
(not (equal? (common:get-last-run-version)
(common:version-signature))))
(define (common:api-changed?)
(not (equal? (substring (->string megatest-version) 0 4)
(substring (conc (common:get-last-run-version)) 0 4))))
;;======================================================================
;; Move me elsewhere ...
;; RADT => Why do we meed the version check here, this is called only if version misma
;;
(define (common:cleanup-db dbstruct #!key (full #f))
(apply db:multi-db-sync
dbstruct
'schema
;; 'new2old
'killservers
'adj-target
;; 'old2new
'new2old
;; (if full
'(dejunk)
;; '())
)
(if (common:api-changed?)
(common:set-last-run-version)))
;;======================================================================
;; Force a megatest cleanup-db if version is changed and skip-version-check not specified
;; Do NOT check if not on homehost!
;;
(define (common:exit-on-version-changed)
(if (common:on-homehost?)
(if (common:api-changed?)
(let* ((mtconf (conc (get-environment-variable "MT_RUN_AREA_HOME") "/megatest.config"))
(dbfile (conc (get-environment-variable "MT_RUN_AREA_HOME") "/megatest.db"))
(read-only (not (file-writable? dbfile)))
(dbstruct (db:setup #t)))
(debug:print 0 *default-log-port*
"WARNING: Version mismatch!\n"
" expected: " (common:version-signature) "\n"
" got: " (common:get-last-run-version))
(cond
((get-environment-variable "MT_SKIP_DB_MIGRATE") #t)
((and (common:file-exists? mtconf) (common:file-exists? dbfile) (not read-only)
(eq? (current-user-id)(file-owner mtconf))) ;; safe to run -cleanup-db
(debug:print 0 *default-log-port* " I see you are the owner of megatest.config, attempting to cleanup and reset to new version")
(handle-exceptions
exn
(begin
(debug:print 0 *default-log-port* "Failed to switch versions. exn=" exn)
(debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))
(print-call-chain (current-error-port))
(exit 1))
(common:cleanup-db dbstruct)))
((not (common:file-exists? mtconf))
(debug:print 0 *default-log-port* " megatest.config does not exist in this area. Cannot proceed with megatest version migration.")
(exit 1))
((not (common:file-exists? dbfile))
(debug:print 0 *default-log-port* " megatest.db does not exist in this area. Cannot proceed with megatest version migration.")
(exit 1))
((not (eq? (current-user-id)(file-owner mtconf)))
(debug:print 0 *default-log-port* " You do not own megatest.db in this area. Cannot proceed with megatest version migration.")
(exit 1))
(read-only
(debug:print 0 *default-log-port* " You have read-only access to this area. Cannot proceed with megatest version migration.")
(exit 1))
(else
(debug:print 0 *default-log-port* " to switch versions you can run: \"megatest -cleanup-db\"")
(exit 1)))))))
;;======================================================================
;; (begin
;; (debug:print 0 *default-log-port* "ERROR: cannot migrate version unless on homehost. Exiting.")
;; (exit 1))))
(define (common:run-sync?)
(and (common:on-homehost?)
(args:get-arg "-server")))