;;======================================================================
;; Copyright 2017, Matthew Welland.
;;
;; This file is part of Megatest.
;;
;; Megatest is free software: you can redistribute it and/or modify
;; it under the terms of the GNU General Public License as published by
;; the Free Software Foundation, either version 3 of the License, or
;; (at your option) any later version.
;;
;; Megatest is distributed in the hope that it will be useful,
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
;; GNU General Public License for more details.
;;
;; You should have received a copy of the GNU General Public License
;; along with Megatest. If not, see <http://www.gnu.org/licenses/>.
;;======================================================================
;;======================================================================
;; Cpumod:
;;
;; Put things here don't fit anywhere else
;;======================================================================
(declare (unit tasksmod))
(declare (uses debugprint))
(declare (uses mtargs))
(declare (uses commonmod))
(declare (uses configfmod))
(declare (uses dbfile))
(declare (uses dbmod))
(declare (uses rmtmod))
(use srfi-69)
(module tasksmod
*
(import scheme)
(cond-expand
(chicken-4
(import chicken
ports
data-structures
extras
files
matchable
pathname-expand
posix
posix-extras
regex
regex-case
sparse-vectors
)
(use srfi-69))
(chicken-5
(import (prefix sqlite3 sqlite3:)
;; data-structures
;; extras
;; files
;; posix
;; posix-extras
chicken.base
chicken.condition
chicken.file
chicken.file.posix
chicken.io
chicken.pathname
chicken.port
chicken.process
chicken.process-context
chicken.process-context.posix
chicken.sort
chicken.string
chicken.time
chicken.time.posix
matchable
md5
message-digest
pathname-expand
regex
regex-case
system-information
)))
;; imports common to ck4 and ck5
(import srfi-1
srfi-13
srfi-18
srfi-69
typed-records
(prefix base64 base64:)
(prefix sqlite3 sqlite3:)
md5
message-digest
z3
debugprint
commonmod
configfmod
(prefix mtargs args:)
dbmod
dbfile
rmtmod
)
(include "task_records.scm")
(include "db_records.scm")
;;======================================================================
;; Tasks db
;;======================================================================
(define (tasks:get-task-db-path)
(let ((dbdir (or (configf:lookup *configdat* "setup" "monitordir")
(configf:lookup *configdat* "setup" "dbdir")
(conc (common:get-linktree) "/.db"))))
(handle-exceptions
exn
(begin
(debug:print-error 0 *default-log-port* "Couldn't create path to " dbdir ", exn=" exn)
(exit 1))
(if (not (directory? dbdir))(create-directory dbdir #t)))
dbdir))
;; If file exists AND
;; file readable
;; ==> open it
;; If file exists AND
;; file NOT readable
;; ==> open in-mem version
;; If file NOT exists
;; ==> open in-mem version
;;
(define (tasks:open-db #!key (numretries 4))
(if *task-db*
*task-db*
(handle-exceptions
exn
(if (> numretries 0)
(begin
(print-call-chain (current-error-port))
(debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))
(debug:print 5 *default-log-port* " exn=" (condition->list exn))
(thread-sleep! 1)
(tasks:open-db numretries (- numretries 1)))
(begin
(print-call-chain (current-error-port))
(debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))
(debug:print 5 *default-log-port* " exn=" (condition->list exn))))
(let* ((dbpath (common:make-tmpdir-name *toppath* "")) ;; (tasks:get-task-db-path))
(dbfile (conc dbpath "/monitor.db"))
(avail (tasks:wait-on-journal dbpath 10)) ;; wait up to about 10 seconds for the journal to go away
(exists (common:file-exists? dbpath))
(write-access (file-write-access? dbpath))
(mdb (cond ;; what the hek is *toppath* doing here?
((and (string? *toppath*)(file-write-access? *toppath*))
(sqlite3:open-database dbfile))
((file-read-access? dbpath) (sqlite3:open-database dbfile))
(else (sqlite3:open-database ":memory:")))) ;; (never-give-up-open-db dbpath))
(handler (sqlite3:make-busy-timeout 36000)))
(if (and exists
(not write-access))
(set! *db-write-access* write-access)) ;; only unset so other db's also can use this control
(sqlite3:set-busy-handler! mdb handler)
(db:set-sync mdb) ;; (sqlite3:execute mdb (conc "PRAGMA synchronous = 0;"))
;; (if (or (and (not exists)
;; (file-write-access? *toppath*))
;; (not (file-read-access? dbpath)))
;; (begin
;;
;; TASKS QUEUE MOVED TO main.db
;;
;; (sqlite3:execute mdb "CREATE TABLE IF NOT EXISTS tasks_queue (id INTEGER PRIMARY KEY,
;; action TEXT DEFAULT '',
;; owner TEXT,
;; state TEXT DEFAULT 'new',
;; target TEXT DEFAULT '',
;; name TEXT DEFAULT '',
;; testpatt TEXT DEFAULT '',
;; keylock TEXT,
;; params TEXT,
;; creation_time TIMESTAMP,
;; execution_time TIMESTAMP);")
(sqlite3:execute mdb "CREATE TABLE IF NOT EXISTS monitors (id INTEGER PRIMARY KEY,
pid INTEGER,
start_time TIMESTAMP,
last_update TIMESTAMP,
hostname TEXT,
username TEXT,
CONSTRAINT monitors_constraint UNIQUE (pid,hostname));")
(sqlite3:execute mdb "CREATE TABLE IF NOT EXISTS servers (id INTEGER PRIMARY KEY,
pid INTEGER,
interface TEXT,
hostname TEXT,
port INTEGER,
pubport INTEGER,
start_time TIMESTAMP,
priority INTEGER,
state TEXT,
mt_version TEXT,
heartbeat TIMESTAMP,
transport TEXT,
run_id INTEGER);")
;; CONSTRAINT servers_constraint UNIQUE (pid,hostname,port));")
(sqlite3:execute mdb "CREATE TABLE IF NOT EXISTS clients (id INTEGER PRIMARY KEY,
server_id INTEGER,
pid INTEGER,
hostname TEXT,
cmdline TEXT,
login_time TIMESTAMP,
logout_time TIMESTAMP DEFAULT -1,
CONSTRAINT clients_constraint UNIQUE (pid,hostname));")
;))
(set! *task-db* (cons mdb dbpath))
*task-db*))))
;;======================================================================
;; Server and client management
;;======================================================================
;; make-vector-record tasks hostinfo id interface port pubport transport pid hostname
(define (tasks:hostinfo-get-id vec) (vector-ref vec 0))
(define (tasks:hostinfo-get-interface vec) (vector-ref vec 1))
(define (tasks:hostinfo-get-port vec) (vector-ref vec 2))
(define (tasks:hostinfo-get-pubport vec) (vector-ref vec 3))
(define (tasks:hostinfo-get-transport vec) (vector-ref vec 4))
(define (tasks:hostinfo-get-pid vec) (vector-ref vec 5))
(define (tasks:hostinfo-get-hostname vec) (vector-ref vec 6))
(define (tasks:need-server run-id)
(equal? (configf:lookup *configdat* "server" "required") "yes"))
;; no elegance here ...
;;
(define (tasks:kill-server hostname pid #!key (kill-switch ""))
(debug:print-info 0 *default-log-port* "Attempting to kill server process " pid " on host " hostname)
(setenv "TARGETHOST" hostname)
(let* ((logdir (if (directory-exists? "logs")
"logs/"
""))
(logfile (if logdir (conc "logs/server-"pid"-"hostname".log") #f))
(gzfile (if logfile (conc logfile ".gz"))))
(setenv "TARGETHOST_LOGF" (conc logdir "server-kills.log"))
(system (conc "nbfake kill "kill-switch" "pid))
(when logfile
(thread-sleep! 0.5)
(if (common:file-exists? gzfile) (delete-file gzfile))
(system (conc "gzip " logfile))
(unsetenv "TARGETHOST_LOGF")
(unsetenv "TARGETHOST"))))
;;======================================================================
;; M O N I T O R S
;;======================================================================
(define (tasks:remove-monitor-record mdb)
(sqlite3:execute mdb "DELETE FROM monitors WHERE pid=? AND hostname=?;"
(current-process-id)
(get-host-name)))
(define (tasks:get-monitors mdb)
(let ((res '()))
(sqlite3:for-each-row
(lambda (a . rem)
(set! res (cons (apply vector a rem) res)))
mdb
"SELECT id,pid,strftime('%m/%d/%Y %H:%M',datetime(start_time,'unixepoch'),'localtime'),strftime('%m/%d/%Y %H:%M:%S',datetime(last_update,'unixepoch'),'localtime'),hostname,username FROM monitors ORDER BY last_update ASC;")
(reverse res)
))
(define (tasks:monitors->text-table monitors)
(let ((fmtstr "~4a~8a~20a~20a~10a~10a"))
(conc (format #f fmtstr "id" "pid" "start time" "last update" "hostname" "user") "\n"
(string-intersperse
(map (lambda (monitor)
(format #f fmtstr
(tasks:monitor-get-id monitor)
(tasks:monitor-get-pid monitor)
(tasks:monitor-get-start_time monitor)
(tasks:monitor-get-last_update monitor)
(tasks:monitor-get-hostname monitor)
(tasks:monitor-get-username monitor)))
monitors)
"\n"))))
;; update the last_update field with the current time and
;; if any monitors appear dead, remove them
(define (tasks:monitors-update mdb)
(sqlite3:execute mdb "UPDATE monitors SET last_update=strftime('%s','now') WHERE pid=? AND hostname=?;"
(current-process-id)
(get-host-name))
(let ((deadlist '()))
(sqlite3:for-each-row
(lambda (id pid host last-update delta)
(debug:print 0 *default-log-port* "Going to delete stale record for monitor with pid " pid " on host " host " last updated " delta " seconds ago")
(set! deadlist (cons id deadlist)))
mdb
"SELECT id,pid,hostname,last_update,strftime('%s','now')-last_update AS delta FROM monitors WHERE delta > 700;")
(sqlite3:execute mdb (conc "DELETE FROM monitors WHERE id IN ('" (string-intersperse (map conc deadlist) "','") "');")))
)
(define (tasks:register-monitor db port)
(let* ((pid (current-process-id))
(hostname (get-host-name))
(userinfo (user-information (current-user-id)))
(username (car userinfo)))
(debug:print 0 *default-log-port* "Register monitor, pid: " pid ", hostname: " hostname ", port: " port ", username: " username)
(sqlite3:execute db "INSERT INTO monitors (pid,start_time,last_update,hostname,username) VALUES (?,strftime('%s','now'),strftime('%s','now'),?,?);"
pid hostname username)))
(define (tasks:get-num-alive-monitors mdb)
(let ((res 0))
(sqlite3:for-each-row
(lambda (count)
(set! res count))
mdb
"SELECT count(id) FROM monitors WHERE last_update < (strftime('%s','now') - 300) AND username=?;"
(car (user-information (current-user-id))))
res))
;;
#;(define (tasks:start-monitor db mdb)
(if (> (tasks:get-num-alive-monitors mdb) 2) ;; have two running, no need for more
(debug:print-info 1 *default-log-port* "Not starting monitor, already have more than two running")
(let* ((megatestdb (conc *toppath* "/megatest.db"))
(monitordbf (conc (db:dbfile-path #f) "/monitor.db"))
(last-db-update 0)) ;; (file-modification-time megatestdb)))
(task:register-monitor mdb)
(let loop ((count 0)
(next-touch 0)) ;; next-touch is the time where we need to update last_update
;; if the db has been modified we'd best look at the task queue
(let ((modtime (file-modification-time megatestdbpath )))
(if (> modtime last-db-update)
(tasks:process-queue db)) ;; BROKEN. mdb last-db-update megatestdb next-touch))
;; WARNING: Possible race conditon here!!
;; should this update be immediately after the task-get-action call above?
(if (> (current-seconds) next-touch)
(begin
(tasks:monitors-update mdb)
(loop (+ count 1)(+ (current-seconds) 240)))
(loop (+ count 1) next-touch)))))))
;;======================================================================
;; T A S K S Q U E U E
;;
;; NOTE:: These operate on task_queue which is in main.db
;;
;;======================================================================
;; NOTE: It might be good to add one more layer of checking to ensure
;; that no task gets run in parallel.
;; id INTEGER PRIMARY KEY,
;; action TEXT DEFAULT '',
;; owner TEXT,
;; state TEXT DEFAULT 'new',
;; target TEXT DEFAULT '',
;; name TEXT DEFAULT '',
;; testpatt TEXT DEFAULT '',
;; keylock TEXT,
;; params TEXT,
;; creation_time TIMESTAMP DEFAULT (strftime('%s','now')),
;; execution_time TIMESTAMP);
(define (keys:key-vals-hash->target keys key-params)
(let ((tmp (hash-table-ref/default key-params (vector-ref (car keys) 0) "")))
(if (> (length keys) 1)
(for-each (lambda (key)
(set! tmp (conc tmp "/" (hash-table-ref/default key-params (vector-ref key 0) ""))))
(cdr keys)))
tmp))
;; for use from the gui, not ported
;;
;; (define (tasks:add-from-params mdb action keys key-params var-params)
;; (let ((target (keys:key-vals-hash->target keys key-params))
;; (owner (car (user-information (current-user-id))))
;; (runname (hash-table-ref/default var-params "runname" #f))
;; (testpatts (hash-table-ref/default var-params "testpatts" "%"))
;; (params (hash-table-ref/default var-params "params" "")))
;; (tasks:add mdb action owner target runname testpatts params)))
;; return one task from those who are 'new' OR 'waiting' AND more than 10sec old
;;
(define (tasks:snag-a-task dbstruct)
(let ((res #f)
(keytxt (conc (current-process-id) "-" (get-host-name) "-" (car (user-information (current-user-id))))))
(db:with-db
dbstruct #f #t
(lambda (dat db)
;; first randomly set a new to pid-hostname-hostname
(sqlite3:execute
db
"UPDATE tasks_queue SET keylock=? WHERE id IN
(SELECT id FROM tasks_queue
WHERE state='new' OR
(state='waiting' AND (strftime('%s','now')-execution_time) > 10) OR
state='reset'
ORDER BY RANDOM() LIMIT 1);" keytxt)
(sqlite3:for-each-row
(lambda (id . rem)
(set! res (apply vector id rem)))
db
"SELECT id,action,owner,state,target,name,test,item,params,creation_time,execution_time FROM tasks_queue WHERE keylock=? ORDER BY execution_time ASC LIMIT 1;" keytxt)
(if res ;; yep, have work to be done
(begin
(sqlite3:execute db "UPDATE tasks_queue SET state='inprogress',execution_time=strftime('%s','now') WHERE id=?;"
(tasks:task-get-id res))
res)
#f)))))
(define (tasks:reset-stuck-tasks dbstruct)
(let ((res '()))
(db:with-db
dbstruct #f #t
(lambda (dat db)
(sqlite3:for-each-row
(lambda (id delta)
(set! res (cons id res)))
db
"SELECT id,strftime('%s','now')-execution_time AS delta FROM tasks_queue WHERE state='inprogress' AND delta>700 ORDER BY delta DESC LIMIT 2;")
(sqlite3:execute
db
(conc "UPDATE tasks_queue SET state='reset' WHERE id IN ('" (string-intersperse (map conc res) "','") "');")
)))))
;; return all tasks in the tasks_queue table
;;
(define (tasks:get-tasks dbstruct types states)
(let ((res '()))
(db:with-db
dbstruct #f #f
(lambda (dbdat db)
(sqlite3:for-each-row
(lambda (id . rem)
(set! res (cons (apply vector id rem) res)))
db
(conc "SELECT id,action,owner,state,target,name,test,item,params,creation_time,execution_time
FROM tasks_queue "
;; WHERE
;; state IN " statesstr " AND
;; action IN " actionsstr
" ORDER BY creation_time DESC;"))
res))))
;; remove tasks given by a string of numbers comma separated
(define (tasks:remove-queue-entries dbstruct task-ids)
(db:with-db
dbstruct #f #t
(lambda (dbdat db)
(sqlite3:execute db (conc "DELETE FROM tasks_queue WHERE id IN (" task-ids ");")))))
;; (define (tasks:process-queue dbstruct)
;; (let* ((task (tasks:snag-a-task dbstruct))
;; (action (if task (tasks:task-get-action task) #f)))
;; (if action (print "tasks:process-queue task: " task))
;; (if action
;; (case (string->symbol action)
;; ((run) (tasks:start-run dbstruct task))
;; ((remove) (tasks:remove-runs dbstruct task))
;; ((lock) (tasks:lock-runs dbstruct task))
;; ;; ((monitor) (tasks:start-monitor db task))
;; #;((rollup) (tasks:rollup-runs dbstruct task))
;; ((updatemeta)(tasks:update-meta dbstruct task))
;; #;((kill) (tasks:kill-monitors dbstruct task))))))
(define (tasks:tasks->text tasks)
(let ((fmtstr "~10a~10a~10a~12a~20a~12a~12a~10a"))
(conc (format #f fmtstr "id" "action" "owner" "state" "target" "runname" "testpatts" "params") "\n"
(string-intersperse
(map (lambda (task)
(format #f fmtstr
(tasks:task-get-id task)
(tasks:task-get-action task)
(tasks:task-get-owner task)
(tasks:task-get-state task)
(tasks:task-get-target task)
(tasks:task-get-name task)
(tasks:task-get-testpatt task)
;; (tasks:task-get-item task)
(tasks:task-get-params task)))
tasks) "\n"))))
(define (tasks:set-state dbstruct task-id state)
(db:with-db
dbstruct #f #t
(lambda (dbdat db)
(sqlite3:execute db "UPDATE tasks_queue SET state=? WHERE id=?;"
state
task-id))))
;;======================================================================
;; Access using task key (stored in params; (hash-table->alist flags) hostname pid
;;======================================================================
(define (tasks:param-key->id dbstruct task-params)
(db:with-db
dbstruct #f #f
(lambda (dbdat db)
(handle-exceptions
exn
#f
(sqlite3:first-result db "SELECT id FROM tasks_queue WHERE params LIKE ?;"
task-params)))))
(define (tasks:get-records-given-param-key dbstruct param-key state-patt action-patt test-patt)
(db:with-db
dbstruct #f #f
(lambda (dbdat db)
(handle-exceptions
exn
'()
(sqlite3:first-row db "SELECT id,action,owner,state,target,name,testpatt,keylock,params WHERE
params LIKE ? AND state LIKE ? AND action LIKE ? AND testpatt LIKE ?;"
param-key state-patt action-patt test-patt)))))
;; kill any runner processes (i.e. processes handling -runtests) that match target/runname
;;
;; do a remote call to get the task queue info but do the killing as self here.
;;
(define (tasks:kill-runner target run-name testpatt)
(let ((records (rmt:tasks-find-task-queue-records target run-name testpatt "running" "run-tests"))
(hostpid-rx (regexp "\\s+(\\w+)\\s+(\\d+)$"))) ;; host pid is at end of param string
(if (null? records)
(debug:print 0 *default-log-port* "No run launching processes found for " target " / " run-name " with testpatt " (or testpatt "* no testpatt specified! *"))
(debug:print 0 *default-log-port* "Found " (length records) " run(s) to kill."))
(for-each
(lambda (record)
(let* ((param-key (list-ref record 8))
(match-dat (string-search hostpid-rx param-key)))
(if match-dat
(let ((hostname (cadr match-dat))
(pid (string->number (caddr match-dat))))
(debug:print 0 *default-log-port* "Sending SIGINT to process " pid " on host " hostname)
(if (equal? (get-host-name) hostname)
(if (process:alive? pid)
(begin
(handle-exceptions
exn
(begin
(debug:print 0 *default-log-port* "Kill of process " pid " on host " hostname " failed.")
(debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))
#t)
(process-signal pid signal/int)
(thread-sleep! 5)
(if (process:alive? pid)
(process-signal pid signal/kill)))))
;; (call-with-environment-variables
(let ((old-targethost (getenv "TARGETHOST")))
(setenv "TARGETHOST" hostname)
(setenv "TARGETHOST_LOGF" "server-kills.log")
(system (conc "nbfake kill " pid))
(if old-targethost (setenv "TARGETHOST" old-targethost))
(unsetenv "TARGETHOST")
(unsetenv "TARGETHOST_LOGF"))))
(debug:print-error 0 *default-log-port* "no record or improper record for " target "/" run-name " in tasks_queue in main.db"))))
records)))
;; (define (tasks:start-run dbstruct mdb task)
;; (let ((flags (make-hash-table)))
;; (hash-table-set! flags "-rerun" "NOT_STARTED")
;; (if (not (string=? (tasks:task-get-params task) ""))
;; (hash-table-set! flags "-setvars" (tasks:task-get-params task)))
;; (print "Starting run " task)
;; ;; sillyness, just call the damn routine with the task vector and be done with it. FIXME SOMEDAY
;; (runs:run-tests db
;; (tasks:task-get-target task)
;; (tasks:task-get-name task)
;; (tasks:task-get-test task)
;; (tasks:task-get-item task)
;; (tasks:task-get-owner task)
;; flags)
;; (tasks:set-state mdb (tasks:task-get-id task) "waiting")))
;;
;; (define (tasks:rollup-runs db mdb task)
;; (let* ((flags (make-hash-table))
;; (keys (db:get-keys db))
;; (keyvals (keys:target-keyval keys (tasks:task-get-target task))))
;; ;; (hash-table-set! flags "-rerun" "NOT_STARTED")
;; (print "Starting rollup " task)
;; ;; sillyness, just call the damn routine with the task vector and be done with it. FIXME SOMEDAY
;; (runs:rollup-run db
;; keys
;; keyvals
;; (tasks:task-get-name task)
;; (tasks:task-get-owner task))
;; (tasks:set-state mdb (tasks:task-get-id task) "waiting")))
;;======================================================================
;; S Y N C T O P O S T G R E S Q L
;;======================================================================
;; In the spirit of "dump your junk in the tasks module" I'll put the
;; sync to postgres here for now.
;; attempt to automatically set up an area. call only if get area by path
;; returns naught of interest
;;
(define (tasks:set-area dbh configdat #!key (toppath #f)) ;; could I safely put *toppath* in for the default for toppath? when would it be evaluated?
(let loop ((area-name (or (configf:lookup configdat "setup" "area-name")
(common:get-area-name)))
(modifier 'none))
(let ((success (handle-exceptions
exn
(begin
(debug:print 0 *default-log-port* "ERROR: cannot create area entry, " ((condition-property-accessor 'exn 'message) exn))
#f) ;; FIXME: I don't care for now but I should look at *why* there was an exception
(pgdb:add-area dbh area-name (or toppath *toppath*)))))
(or success
(case modifier
((none)(loop (conc (current-user-name) "_" area-name) 'user))
((user)(loop (conc (substring (common:get-area-path-signature) 0 4)
area-name) 'areasig))
(else #f)))))) ;; give up
(define (task:print-runtime run-times saperator)
(for-each
(lambda (run-time-info)
(let* ((run-name (vector-ref run-time-info 0))
(run-time (vector-ref run-time-info 1))
(target (vector-ref run-time-info 2)))
(print target saperator run-name saperator run-time )))
run-times))
(define (task:print-runtime-as-json run-times)
(let loop ((run-time-info (car run-times))
(rema (cdr run-times))
(str ""))
(let* ((run-name (vector-ref run-time-info 0))
(run-time (vector-ref run-time-info 1))
(target (vector-ref run-time-info 2)))
;(print (not (equal? str "")))
(if (not (equal? str ""))
(set! str (conc str ",")))
(if (null? rema)
(print "[" str "{target:" target ",run-name:" run-name ", run-time:" run-time "}]")
(loop (car rema) (cdr rema) (conc str "{target:" target ", run-name:" run-name ", run-time:" run-time "}"))))))
(define (task:get-run-times)
(let* (
(run-patt (if (args:get-arg "-run-patt")
(args:get-arg "-run-patt")
"%"))
(target-patt (if (args:get-arg "-target-patt")
(args:get-arg "-target-patt")
"%"))
(run-times (rmt:get-run-times run-patt target-patt )))
(if (eq? (length run-times) 0)
(begin
(debug:print 0 *default-log-port* "Data not found!!")
(exit)))
(if (equal? (args:get-arg "-dumpmode") "json")
(task:print-runtime-as-json run-times)
(if (equal? (args:get-arg "-dumpmode") "csv")
(task:print-runtime run-times ",")
(task:print-runtime run-times " ")))))
(define (task:print-testtime test-times saperator)
(for-each
(lambda (test-time-info)
(let* ((test-name (vector-ref test-time-info 0))
(test-time (vector-ref test-time-info 2))
(test-item (if (eq? (string-length (vector-ref test-time-info 1)) 0)
"N/A"
(vector-ref test-time-info 1))))
(print test-name saperator test-item saperator test-time )))
test-times))
(define (task:print-testtime-as-json test-times)
(let loop ((test-time-info (car test-times))
(rema (cdr test-times))
(str ""))
(let* ((test-name (vector-ref test-time-info 0))
(test-time (vector-ref test-time-info 2))
(item (vector-ref test-time-info 1)))
;(print (not (equal? str "")))
(if (not (equal? str ""))
(set! str (conc str ",")))
(if (null? rema)
(print "[" str "{test-name:" test-name ", item-path:" item ", test-time:" test-time "}]")
(loop (car rema) (cdr rema) (conc str "{test-name:" test-name ", item-path:" item ", test-time:" test-time "}"))))))
(define (task:get-test-times)
(let* ((runname (if (args:get-arg "-runname")
(args:get-arg "-runname")
#f))
(target (if (args:get-arg "-target")
(args:get-arg "-target")
#f))
(test-times (rmt:get-test-times runname target )))
(if (not runname)
(begin
(debug:print 0 *default-log-port* "Error: Missing argument -runname")
(exit)))
(if (string-contains runname "%")
(begin
(debug:print 0 *default-log-port* "Error: Invalid runname, '%' not allowed (" runname ") ")
(exit)))
(if (not target)
(begin
(debug:print 0 *default-log-port* "Error: Missing argument -target")
(exit)))
(if (string-contains target "%")
(begin
(debug:print 0 *default-log-port* "Error: Invalid target, '%' not allowed (" target ") ")
(exit)))
(if (eq? (length test-times) 0)
(begin
(debug:print 0 *default-log-port* "Data not found!!")
(exit)))
(if (equal? (args:get-arg "-dumpmode") "json")
(task:print-testtime-as-json test-times)
(if (equal? (args:get-arg "-dumpmode") "csv")
(task:print-testtime test-times ",")
(task:print-testtime test-times " ")))))
;; gets mtpg-run-id and syncs the record if different
;;
(define (tasks:run-id->mtpg-run-id dbh cached-info run-id area-info smallest-last-update-time)
(let* ((runs-ht (hash-table-ref cached-info 'runs))
(runinf (hash-table-ref/default runs-ht run-id #f))
(area-id (vector-ref area-info 0)))
(if runinf
runinf ;; already cached
(let* ((run-dat (rmt:get-run-info run-id)) ;; NOTE: get-run-info returns a vector < row header >
(run-name (rmt:get-run-name-from-id run-id))
(row (db:get-rows run-dat)) ;; yes, this returns a single row
(header (db:get-header run-dat))
(state (db:get-value-by-header row header "state"))
(status (db:get-value-by-header row header "status"))
(owner (db:get-value-by-header row header "owner"))
(event-time (db:get-value-by-header row header "event_time"))
(comment (db:get-value-by-header row header "comment"))
(fail-count (db:get-value-by-header row header "fail_count"))
(pass-count (db:get-value-by-header row header "pass_count"))
(db-contour (db:get-value-by-header row header "contour"))
(contour (if (args:get-arg "-prepend-contour")
(if (and db-contour (not (equal? db-contour "")) (string? db-contour ))
(begin
(debug:print-info 10 *default-log-port* "db-contour" db-contour)
db-contour)
(args:get-arg "-contour"))))
(run-tag (if (args:get-arg "-run-tag")
(args:get-arg "-run-tag")
""))
(last-update (db:get-value-by-header row header "last_update"))
(keytarg (if (or (args:get-arg "-prepend-contour") (args:get-arg "-prefix-target"))
(conc "MT_CONTOUR/MT_AREA/" (string-intersperse (rmt:get-keys) "/")) (string-intersperse (rmt:get-keys) "/"))) ;; e.g. version/iteration/platform
(base-target (rmt:get-target run-id))
(target (if (or (args:get-arg "-prepend-contour") (args:get-arg "-prefix-target"))
(conc (or (args:get-arg "-prefix-target") (conc contour "/" (common:get-area-name) "/")) base-target) base-target)) ;; e.g. v1.63/a3e1/ubuntu
(spec-id (pgdb:get-ttype dbh keytarg))
(publish-time (if (args:get-arg "-cp-eventtime-to-publishtime")
event-time
(current-seconds)))
(new-run-id (if (and run-name base-target) (pgdb:get-run-id dbh spec-id target run-name area-id) #f)))
(if new-run-id
(begin ;; let ((run-record (pgdb:get-run-info dbh new-run-id))
(hash-table-set! runs-ht run-id new-run-id)
;; ensure key fields are up to date
;; if last_update == pgdb_last_update do not update smallest-last-update-time
(let* ((pgdb-last-update (pgdb:get-run-last-update dbh new-run-id))
(smallest-time (hash-table-ref/default smallest-last-update-time "smallest-time" #f)))
(if (and (> last-update pgdb-last-update) (or (not smallest-time) (< last-update smallest-time)))
(hash-table-set! smallest-last-update-time "smallest-time" last-update)))
(pgdb:refresh-run-info
dbh
new-run-id
state status owner event-time comment fail-count pass-count area-id last-update publish-time)
(debug:print-info 4 *default-log-port* (conc "Working on run-id " run-id " pgdb-id " new-run-id))
(if (not (equal? run-tag ""))
(task:add-run-tag dbh new-run-id run-tag))
new-run-id)
(if (or (not state) (equal? state "deleted"))
(begin
(debug:print-info 1 *default-log-port* "Warning: Run with id " run-id " was created after previous sync and deleted before the sync") #f)
(if (handle-exceptions
exn
(begin (print-call-chain)
(debug:print 0 *default-log-port* ((condition-property-accessor 'exn 'message) exn))
#f)
(pgdb:insert-run
dbh
spec-id target run-name state status owner event-time comment fail-count pass-count area-id last-update publish-time))
(let* ((smallest-time (hash-table-ref/default smallest-last-update-time "smallest-time" #f)))
(if (or (not smallest-time) (< last-update smallest-time))
(hash-table-set! smallest-last-update-time "smallest-time" last-update))
(tasks:run-id->mtpg-run-id dbh cached-info run-id area-info smallest-last-update-time))
#f)))))))
(define (task:add-run-tag dbh run-id tag)
(let* ((tag-info (pgdb:get-tag-info-by-name dbh tag)))
(if (not tag-info)
(begin
(if (handle-exceptions
exn
(begin
(debug:print-info 1 *default-log-port* ((condition-property-accessor 'exn 'message) exn))
#f)
(pgdb:insert-tag dbh tag))
(set! tag-info (pgdb:get-tag-info-by-name dbh tag))
#f)))
;;add to area_tags
(handle-exceptions
exn
(begin
(debug:print-info 1 *default-log-port* ((condition-property-accessor 'exn 'message) exn))
#f)
(if (not (pgdb:is-run-taged-with-a-tag dbh (vector-ref tag-info 0) run-id))
(pgdb:insert-run-tag dbh (vector-ref tag-info 0) run-id)))))
(define (tasks:sync-test-steps dbh cached-info test-step-ids smallest-last-update-time)
; (print "Sync Steps " test-step-ids )
(let ((test-ht (hash-table-ref cached-info 'tests))
(step-ht (hash-table-ref cached-info 'steps))
(run-id-in #f)
)
(for-each
(lambda (test-step-id)
(set! run-id-in (cdr test-step-id))
(set! test-step-id (car test-step-id))
(let* ((test-step-info (rmt:get-steps-info-by-id run-id-in test-step-id))
(step-id (tdb:step-get-id test-step-info))
(test-id (tdb:step-get-test_id test-step-info))
(stepname (tdb:step-get-stepname test-step-info))
(state (tdb:step-get-state test-step-info))
(status (tdb:step-get-status test-step-info))
(event_time (tdb:step-get-event_time test-step-info))
(comment (tdb:step-get-comment test-step-info))
(logfile (tdb:step-get-logfile test-step-info))
(last-update (tdb:step-get-last_update test-step-info))
(pgdb-test-id (hash-table-ref/default test-ht test-id #f))
(smallest-time (hash-table-ref/default smallest-last-update-time "smallest-time" #f))
(pgdb-step-id (if pgdb-test-id
(pgdb:get-test-step-id dbh pgdb-test-id stepname state)
#f)))
(if step-id
(begin
(if pgdb-test-id
(begin
(if pgdb-step-id
(begin
(debug:print-info 4 *default-log-port* "Updating existing test-step with test-id: " test-id " and step-id " step-id " pgdb test id: " pgdb-test-id " pgdb step id " pgdb-step-id )
(let* ((pgdb-last-update (pgdb:get-test-step-last-update dbh pgdb-step-id)))
(if (and (> last-update pgdb-last-update) (or (not smallest-time) (< last-update smallest-time)))
(hash-table-set! smallest-last-update-time "smallest-time" last-update)))
(pgdb:update-test-step dbh pgdb-step-id pgdb-test-id stepname state status event_time comment logfile last-update))
(begin
(debug:print-info 4 *default-log-port* "Inserting test-step with test-id: " test-id " and step-id " step-id " pgdb test id: " pgdb-test-id)
(if (or (not smallest-time) (< last-update smallest-time))
(hash-table-set! smallest-last-update-time "smallest-time" last-update))
(pgdb:insert-test-step dbh pgdb-test-id stepname state status event_time comment logfile last-update )
(set! pgdb-step-id (pgdb:get-test-step-id dbh pgdb-test-id stepname state))))
(hash-table-set! step-ht step-id pgdb-step-id ))
(debug:print-info 1 *default-log-port* "Error: Test not cashed")))
(debug:print-info 1 *default-log-port* "Error: Could not get test step info for step id " test-step-id )))) ;; this is a wierd senario need to debug
test-step-ids)))
(define (tasks:sync-test-gen-data dbh cached-info test-data-ids smallest-last-update-time)
(let ((test-ht (hash-table-ref cached-info 'tests))
(data-ht (hash-table-ref cached-info 'data))
(run-id-in #f)
)
(for-each
(lambda (test-data-id)
(set! run-id-in (cdr test-data-id))
(set! test-data-id (car test-data-id))
(let* ((test-data-info (rmt:get-data-info-by-id run-id-in test-data-id))
(data-id (db:test-data-get-id test-data-info))
(test-id (db:test-data-get-test_id test-data-info))
(category (db:test-data-get-category test-data-info))
(variable (db:test-data-get-variable test-data-info))
(value (db:test-data-get-value test-data-info))
(expected (db:test-data-get-expected test-data-info))
(tol (db:test-data-get-tol test-data-info))
(units (db:test-data-get-units test-data-info))
(comment (db:test-data-get-comment test-data-info))
(status (db:test-data-get-status test-data-info))
(type (db:test-data-get-type test-data-info))
(last-update (db:test-data-get-last_update test-data-info))
(smallest-time (hash-table-ref/default smallest-last-update-time "smallest-time" #f))
(pgdb-test-id (hash-table-ref/default test-ht test-id #f))
(pgdb-data-id (if pgdb-test-id
(pgdb:get-test-data-id dbh pgdb-test-id category variable)
#f)))
(if data-id
(begin
(if pgdb-test-id
(begin
(if pgdb-data-id
(begin
(debug:print-info 4 *default-log-port* "Updating existing test-data with test-id: " test-id " and data-id " data-id " pgdb test id: " pgdb-test-id " pgdb data id " pgdb-data-id)
(let* ((pgdb-last-update (pgdb:get-test-data-last-update dbh pgdb-data-id)))
(if (and (> last-update pgdb-last-update) (or (not smallest-time) (< last-update smallest-time)))
(hash-table-set! smallest-last-update-time "smallest-time" last-update)))
(pgdb:update-test-data dbh pgdb-data-id pgdb-test-id category variable value expected tol units comment status type last-update))
(begin
(debug:print-info 4 *default-log-port* "Inserting test-data with test-id: " test-id " and data-id " data-id " pgdb test id: " pgdb-test-id)
(if (handle-exceptions
exn
(begin (print-call-chain)
(debug:print 0 *default-log-port* ((condition-property-accessor 'exn 'message) exn))
#f)
(pgdb:insert-test-data dbh pgdb-test-id category variable value expected tol units comment status type last-update))
;(tasks:run-id->mtpg-run-id dbh cached-info run-id area-info)
(begin
;(pgdb:insert-test-data dbh pgdb-test-id category variable value expected tol units comment status type )
(if (or (not smallest-time) (< last-update smallest-time))
(hash-table-set! smallest-last-update-time "smallest-time" last-update))
(set! pgdb-data-id (pgdb:get-test-data-id dbh pgdb-test-id category variable)))
#f)))
(hash-table-set! data-ht data-id pgdb-data-id ))
(begin
(debug:print-info 1 *default-log-port* "Error: Test not in pgdb"))))
(debug:print-info 1 *default-log-port* "Error: Could not get test data info for data id " test-data-id )))) ;; this is a wierd senario need to debug
test-data-ids)))
(define (tasks:sync-tests-data dbh cached-info test-ids area-info smallest-last-update-time main-run-id)
(let ((test-ht (hash-table-ref cached-info 'tests))
(run-id-in main-run-id))
(for-each
(lambda (test-id)
; (set! run-id-in (cdr test-id))
; (set! test-id (car test-id))
(debug:print 0 *default-log-port* "test-id: " test-id " run-id: " run-id-in)
(let* ((test-info (rmt:get-test-info-by-id run-id-in test-id))
(run-id (db:test-get-run_id test-info)) ;; look these up in db_records.scm
(test-id (db:test-get-id test-info))
(test-name (db:test-get-testname test-info))
(item-path (db:test-get-item-path test-info))
(state (db:test-get-state test-info))
(status (db:test-get-status test-info))
(host (db:test-get-host test-info))
(pid (db:test-get-process_id test-info))
(cpuload (db:test-get-cpuload test-info))
(diskfree (db:test-get-diskfree test-info))
(uname (db:test-get-uname test-info))
(run-dir (db:test-get-rundir test-info))
(log-file (db:test-get-final_logf test-info))
(run-duration (db:test-get-run_duration test-info))
(comment (db:test-get-comment test-info))
(event-time (db:test-get-event_time test-info))
(archived (db:test-get-archived test-info))
(last-update (db:test-get-last_update test-info))
(pgdb-run-id (tasks:run-id->mtpg-run-id dbh cached-info run-id area-info smallest-last-update-time))
(smallest-time (hash-table-ref/default smallest-last-update-time "smallest-time" #f))
(pgdb-test-id (if pgdb-run-id
(begin
;(print pgdb-run-id)
(pgdb:get-test-id dbh pgdb-run-id test-name item-path))
#f)))
;; "id" "run_id" "testname" "state" "status" "event_time"
;; "host" "cpuload" "diskfree" "uname" "rundir" "item_path"
;; "run_duration" "final_logf" "comment" "shortdir" "attemptnum" "archived"
(if (or (not item-path) (string-null? item-path))
(debug:print-info 0 *default-log-port* "Working on Run id : " run-id " and test name : " test-name))
(if pgdb-run-id
(begin
(if pgdb-test-id ;; have a record
(begin ;; let ((key-name (conc run-id "/" test-name "/" item-path)))
(debug:print-info 4 *default-log-port* "Updating existing test with run-id: " run-id " and test-id: " test-id " pgdb run id: " pgdb-run-id " pgdb-test-id " pgdb-test-id)
(let* ((pgdb-last-update (pgdb:get-test-last-update dbh pgdb-test-id)))
(if (and (> last-update pgdb-last-update) (or (not smallest-time) (< last-update smallest-time))) ;;if last-update is same as pgdb-last-update then it is safe to assume the records are identical and we can use a larger last update time.
(hash-table-set! smallest-last-update-time "smallest-time" last-update)))
(pgdb:update-test dbh pgdb-test-id pgdb-run-id test-name item-path state status host cpuload diskfree uname run-dir log-file run-duration comment event-time archived last-update pid))
(begin
(debug:print-info 4 *default-log-port* "Inserting test with run-id: " run-id " and test-id: " test-id " pgdb run id: " pgdb-run-id)
(pgdb:insert-test dbh pgdb-run-id test-name item-path state status host cpuload diskfree uname run-dir log-file run-duration comment event-time archived last-update pid)
(if (or (not smallest-time) (< last-update smallest-time))
(hash-table-set! smallest-last-update-time "smallest-time" last-update))
(set! pgdb-test-id (pgdb:get-test-id dbh pgdb-run-id test-name item-path))))
(hash-table-set! test-ht test-id pgdb-test-id))
(debug:print-info 1 *default-log-port* "WARNING: Skipping run with run-id:" run-id ". This run was created after privious sync and removed before this sync."))))
test-ids)))
(define (task:add-area-tag dbh area-info tag)
(let* ((tag-info (pgdb:get-tag-info-by-name dbh tag)))
(if (not tag-info)
(begin
(if (handle-exceptions
exn
(begin
(debug:print-info 1 *default-log-port* ((condition-property-accessor 'exn 'message) exn))
#f)
(pgdb:insert-tag dbh tag))
(set! tag-info (pgdb:get-tag-info-by-name dbh tag))
#f)))
;;add to area_tags
(handle-exceptions
exn
(begin
(debug:print-info 1 *default-log-port* ((condition-property-accessor 'exn 'message) exn))
#f)
(if (not (pgdb:is-area-taged-with-a-tag dbh (vector-ref tag-info 0) (vector-ref area-info 0)))
(pgdb:insert-area-tag dbh (vector-ref tag-info 0) (vector-ref area-info 0))))))
(define (tasks:sync-run-data dbh cached-info run-ids area-info smallest-last-update-time)
(for-each
(lambda (run-id)
(debug:print-info 4 *default-log-port* "Check if run with " run-id " needs to be synced" )
(tasks:run-id->mtpg-run-id dbh cached-info run-id area-info smallest-last-update-time))
run-ids))
;; get runs changed since last sync
;; (define (tasks:sync-test-data dbh cached-info area-info)
;; (let* ((
(define (tasks:sync-to-postgres configdat dest)
;; (print "In sync")
(let* ((dbh (pgdb:open configdat dbname: dest))
(area-info (pgdb:get-area-by-path dbh *toppath*))
(cached-info (make-hash-table))
(start (current-seconds))
(test-patt (if (args:get-arg "-testpatt")
(args:get-arg "-testpatt")
"%"))
(target (if (args:get-arg "-target")
(args:get-arg "-target")
#f))
(run-name (if (args:get-arg "-runname")
(args:get-arg "-runname")
#f)))
(if (and target (not run-name))
(begin
(debug:print 0 *default-log-port* "Error: Provide runname")
(exit 1)))
(if (and (not target) run-name)
(begin
(debug:print 0 *default-log-port* "Error: Provide target")
(exit 1)))
;(print "123")
;(exit 1)
(for-each (lambda (dtype)
(hash-table-set! cached-info dtype (make-hash-table)))
'(runs targets tests steps data))
(hash-table-set! cached-info 'start start) ;; when done we'll set sync times to this
(if area-info
(let* ((last-sync-time (if (and target run-name)
0
(if (args:get-arg "-since")
(string->number (args:get-arg "-since")) (vector-ref area-info 3))))
(smallest-last-update-time (make-hash-table))
(run-ids (if (and target run-name)
(rmt:get-run-record-ids target run-name (rmt:get-keys))
(rmt:get-changed-record-run-ids last-sync-time)))
(all-run-ids (if (and target run-name) '() (rmt:get-all-runids)))
(changed-run-dbs (if (and target run-name) '() (db:get-changed-run-ids last-sync-time)))
(changed-run-ids (if (and target run-name) run-ids (filter (lambda (run) (member (modulo run (num-run-dbs)) changed-run-dbs)) all-run-ids)))
(area-tag (if (args:get-arg "-area-tag")
(args:get-arg "-area-tag")
(if (args:get-arg "-area")
(args:get-arg "-area")
""))))
(if (and (equal? area-tag "") (not (pgdb:is-area-taged dbh (vector-ref area-info 0))))
(set! area-tag *default-area-tag*))
(if (not (equal? area-tag ""))
(task:add-area-tag dbh area-info area-tag))
(if (not (null? run-ids))
(begin
(debug:print-info 0 *default-log-port* "syncing runs: " run-ids)
(tasks:sync-run-data dbh cached-info run-ids area-info smallest-last-update-time)))
(for-each
(lambda (run-id)
(let ((test-ids (rmt:get-changed-record-test-ids run-id last-sync-time)))
(print test-ids)
(if (not (null? test-ids))
(begin
(debug:print-info 0 *default-log-port* "syncing tests: " test-ids)
(tasks:sync-tests-data dbh cached-info test-ids area-info smallest-last-update-time run-id)))))
changed-run-ids)
(let* ((smallest-time (hash-table-ref/default smallest-last-update-time "smallest-time" (current-seconds))))
(debug:print-info 0 "smallest-time :" smallest-time " last-sync-time " last-sync-time)
(if (not (and target run-name))
(if (or (and smallest-time (> smallest-time last-sync-time)) (and smallest-time (eq? last-sync-time 0)))
(pgdb:write-sync-time dbh area-info smallest-time))))) ;;this needs to be changed
(if (tasks:set-area dbh configdat)
(tasks:sync-to-postgres configdat dest)
(begin
(debug:print 0 *default-log-port* "ERROR: unable to create an area record")
#f)))))
;;======================================================================
;; see defstruct host at top of file.
;; host: reachable last-update last-used last-cpuload
;;
(define (common:update-host-loads-table hosts-raw)
(let* ((hosts (filter (lambda (x)
(string-match (regexp "^\\S+$") x))
hosts-raw)))
(for-each
(lambda (hostname)
(let* ((rec (let ((h (hash-table-ref/default *host-loads* hostname #f)))
(if h
h
(let ((h (make-host)))
(hash-table-set! *host-loads* hostname h)
h))))
(host-info (common:get-host-info hostname))
(is-reachable (car host-info))
(last-reached-time (cadr host-info))
(load (caddr host-info)))
(host-reachable-set! rec is-reachable)
(host-last-update-set! rec last-reached-time)
(host-last-cpuload-set! rec load)))
hosts)))
;;======================================================================
;; ideally put all this info into the db, no need to preserve it across moving homehost
;;
;; return list of
;; ( reachable? cpuload update-time )
(define (common:get-host-info hostname)
(let* ((loadinfo (rmt:get-latest-host-load hostname)) ;; if this host happens to have been recently used by a test reuse the load data
(load (car loadinfo))
(load-sample-time (cdr loadinfo))
(load-sample-age (- (current-seconds) load-sample-time))
(loadinfo-timeout-seconds 6) ;; this was 20 seconds, seems way too lax. Switch to 6 seconds
(host-last-update-timeout-seconds 4)
(host-rec (hash-table-ref/default *host-loads* hostname #f))
)
(cond
((< load-sample-age loadinfo-timeout-seconds)
(list #t
load-sample-time
load))
((and host-rec
(< (current-seconds) (+ (host-last-update host-rec) host-last-update-timeout-seconds)))
(list #t
(host-last-update host-rec)
(host-last-cpuload host-rec )))
((common:unix-ping hostname)
(list #t
(current-seconds)
(alist-ref 'adj-core-load (common:get-normalized-cpu-load hostname)))) ;; this is cheaper than you might think. get-normalized-cpu-load is cached for up to 5 seconds
(else
(list #f 0 -1) ;; bad host, don't use!
))))
;;======================================================================
;; Force a megatest cleanup-db if version is changed and skip-version-check not specified
;; Do NOT check if not on homehost!
;;
(define (common:exit-on-version-changed)
(if (and *toppath* ;; do nothing if *toppath* not yet provided
(rmt:on-homehost?))
(if (common:api-changed?)
(let* ((mtconf (conc (get-environment-variable "MT_RUN_AREA_HOME") "/megatest.config"))
(dbfile (conc (get-environment-variable "MT_RUN_AREA_HOME") ".mtdb/main.db"))
(read-only (not (file-write-access? dbfile)))
(dbstruct (db:setup))) ;; (db:setup-db *dbstruct-dbs* *toppath* #f))) ;; #t)))
(debug:print 0 *default-log-port*
"WARNING: Version mismatch!\n"
" expected: " (common:version-signature) "\n"
" got: " (common:get-last-run-version))
(cond
((get-environment-variable "MT_SKIP_DB_MIGRATE") #t)
((and (common:file-exists? mtconf) (common:file-exists? dbfile) (not read-only)
(eq? (current-user-id)(file-owner mtconf))) ;; safe to run -cleanup-db
(debug:print 0 *default-log-port* " I see you are the owner of megatest.config, attempting to cleanup and reset to new version")
(handle-exceptions
exn
(begin
(debug:print 0 *default-log-port* "Failed to switch versions. exn=" exn)
(debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))
(print-call-chain (current-error-port))
(exit 1))
(common:cleanup-db dbstruct)))
((not (common:file-exists? mtconf))
(debug:print 0 *default-log-port* " megatest.config does not exist in this area. Cannot proceed with megatest version migration.")
(exit 1))
((not (common:file-exists? dbfile))
(debug:print 0 *default-log-port* " .mtdb/main.db does not exist in this area. Cannot proceed with megatest version migration.")
(exit 1))
((not (eq? (current-user-id)(file-owner mtconf)))
(debug:print 0 *default-log-port* " You do not own .mtdb/main.db in this area. Cannot proceed with megatest version migration.")
(exit 1))
(read-only
(debug:print 0 *default-log-port* " You have read-only access to this area. Cannot proceed with megatest version migration.")
(exit 1))
(else
(debug:print 0 *default-log-port* " to switch versions you can run: \"megatest -cleanup-db\"")
(exit 1)))))))
;;======================================================================
;; (begin
;; (debug:print 0 *default-log-port* "ERROR: cannot migrate version unless on homehost. Exiting.")
;; (exit 1))))
(define (common:wait-for-homehost-load maxnormload msg)
(let loop ((start-time (current-seconds))) ;; we saw some instances of this being called before *toppath* was set. This might be an early setup race. This delay should help but it is impossible to test...
(if (not *toppath*)
(begin
(debug:print 0 *default-log-port* "ERROR: common:wait-for-homehost-load called before *toppath* set.")
(thread-sleep! 30)
(if (< (- (current-seconds) start-time) 300)
(loop start-time)))))
(case (rmt:transport-mode)
((http)
(let* ((hh-dat (if (rmt:on-homehost?) ;; if we are on the homehost then pass in #f so the calls are local.
#f
(server:choose-server *toppath* 'homehost)))
(hh (if hh-dat (car hh-dat) #f)))
(common:wait-for-normalized-load maxnormload msg hh)))
(else
(common:wait-for-normalized-load maxnormload msg (get-host-name)))))
(define (configf:write-alist cdat fname)
(if (not (common:faux-lock fname))
(debug:print 0 *default-log-port* "INFO: Could not get lock on " fname))
(let* ((dat (configf:config->alist cdat))
(res
(begin
(with-output-to-file fname ;; first write out the file
(lambda ()
(pp dat)))
(if (common:file-exists? fname) ;; now verify it is readable
(if (configf:read-alist fname)
#t ;; data is good.
(begin
(handle-exceptions
exn
(begin
(debug:print 0 *default-log-port* "deleting " fname " failed, exn=" exn)
#f)
(debug:print 0 *default-log-port* "WARNING: content " dat " for cache " fname " is not readable. Deleting generated file.")
(delete-file fname))
#f))
#f))))
(common:faux-unlock fname)
res))
;;======================================================================
;; faux-lock is deprecated. Please use simple-lock below
;;
(define (common:faux-lock keyname #!key (wait-time 8)(allow-lock-steal #t))
(if (rmt:no-sync-get/default keyname #f) ;; do not be tempted to compare to pid. locking is a one-shot action, if already locked for this pid it doesn't actually count
(if (> wait-time 0)
(begin
(thread-sleep! 1)
(if (eq? wait-time 1) ;; only one second left, steal the lock
(begin
(debug:print-info 0 *default-log-port* "stealing lock for " keyname)
(common:faux-unlock keyname force: #t)))
(common:faux-lock keyname wait-time: (- wait-time 1)))
#f)
(begin
(rmt:no-sync-set keyname (conc (current-process-id)))
(equal? (conc (current-process-id)) (conc (rmt:no-sync-get/default keyname #f))))))
(define (common:faux-unlock keyname #!key (force #f))
(if (or force (equal? (conc (current-process-id)) (conc (rmt:no-sync-get/default keyname #f))))
(begin
(if (rmt:no-sync-get/default keyname #f) (rmt:no-sync-del! keyname))
#t)
#f))
;;======================================================================
;; simple lock. improve and converge on this one.
;;
(define (common:simple-lock keyname)
(rmt:no-sync-get-lock keyname))
(define (common:simple-unlock keyname #!key (force #f))
(rmt:no-sync-del! keyname))
;; returns waitons waitors tconfigdat
;;
(define (tests:get-waitons test-name all-tests-registry global-waitons)
(let* ((config (tests:get-testconfig test-name #f all-tests-registry 'return-procs))) ;; assuming no problems with immediate evaluation, this could be simplified ('return-procs -> #t)
(let ((instr (if config
(configf:lookup config "requirements" "waiton")
(begin ;; No config means this is a non-existant test
(debug:print-error 0 *default-log-port* "non-existent required test \"" test-name "\"")
(exit 1))))
(instr2 (if config
(configf:lookup config "requirements" "waitor")
"")))
(debug:print-info 8 *default-log-port* "waitons string is " instr ", waitors string is " instr2)
(let* ((newwaitons-tmp
(string-split (cond
((procedure? instr) ;; here
(let ((res (instr)))
(debug:print-info 8 *default-log-port* "waiton procedure results in string " res " for test " test-name)
res))
((string? instr) instr)
(else
;; NOTE: This is actually the case of *no* waitons! ;; (debug:print-error 0 *default-log-port* "something went wrong in processing waitons for test " test-name)
""))))
(newwaitors
(string-split (cond
((procedure? instr2)
(let ((res (instr2)))
(debug:print-info 8 *default-log-port* "waitor procedure results in string " res " for test " test-name)
res))
((string? instr2) instr2)
(else
;; NOTE: This is actually the case of *no* waitons! ;; (debug:print-error 0 *default-log-port* "something went wrong in processing waitons for test " test-name)
""))))
(newwaitons (if (and (list? global-waitons)
(not (null? global-waitons)))
(begin
(debug:print 0 *default-log-port* "Adding global waitons " global-waitons)
(append newwaitons-tmp (filter (lambda (x) ;; remove self from global waitons
(not (equal? x test-name)))
global-waitons)))
newwaitons-tmp)))
(values
;; the waitons
(filter (lambda (x)
(if (hash-table-ref/default all-tests-registry x #f)
#t
(begin
(debug:print-error 0 *default-log-port* "test " test-name " has unrecognised waiton testname " x)
#f)))
newwaitons)
(filter (lambda (x)
(if (hash-table-ref/default all-tests-registry x #f)
#t
(begin
(debug:print-error 0 *default-log-port* "test " test-name " has unrecognised waiton testname " x)
#f)))
newwaitors)
config)))))
;; Check for waiver eligibility
;;
(define (tests:check-waiver-eligibility testdat prev-testdat)
(let* ((test-registry (make-hash-table))
(testconfig (tests:get-testconfig (db:test-get-testname testdat) (db:test-get-item-path testdat) test-registry #f))
(test-rundir ;; (sdb:qry 'passstr
(db:test-get-rundir testdat)) ;; )
(prev-rundir ;; (sdb:qry 'passstr
(db:test-get-rundir prev-testdat)) ;; )
(waivers (if testconfig (configf:section-vars testconfig "waivers") '()))
(waiver-rx (regexp "^(\\S+)\\s+(.*)$"))
(diff-rule "diff %file1% %file2%")
(logpro-rule "diff %file1% %file2% | logpro %waivername%.logpro %waivername%.html"))
(if (not (common:file-exists? test-rundir))
(begin
(debug:print-error 0 *default-log-port* "test run directory is gone, cannot propagate waiver")
#f)
(begin
(push-directory test-rundir)
(let ((result (if (null? waivers)
#f
(let loop ((hed (car waivers))
(tal (cdr waivers)))
(debug:print 0 *default-log-port* "INFO: Applying waiver rule \"" hed "\"")
(let* ((waiver (configf:lookup testconfig "waivers" hed))
(wparts (if waiver (string-match waiver-rx waiver) #f))
(waiver-rule (if wparts (cadr wparts) #f))
(waiver-glob (if wparts (caddr wparts) #f))
(logpro-file (if waiver
(let ((fname (conc hed ".logpro")))
(if (common:file-exists? fname)
fname
(begin
(debug:print 0 *default-log-port* "INFO: No logpro file " fname " falling back to diff")
#f)))
#f))
;; if rule by name of waiver-rule is found in testconfig - use it
;; else if waivername.logpro exists use logpro-rule
;; else default to diff-rule
(rule-string (let ((rule (configf:lookup testconfig "waiver_rules" waiver-rule)))
(if rule
rule
(if logpro-file
logpro-rule
(begin
(debug:print 0 *default-log-port* "INFO: No logpro file " logpro-file " found, using diff rule")
diff-rule)))))
;; (string-substitute "%file1%" "foofoo.txt" "This is %file1% and so is this %file1%." #t)
(processed-cmd (string-substitute
"%file1%" (conc test-rundir "/" waiver-glob)
(string-substitute
"%file2%" (conc prev-rundir "/" waiver-glob)
(string-substitute
"%waivername%" hed rule-string #t) #t) #t))
(res #f))
(debug:print 0 *default-log-port* "INFO: waiver command is \"" processed-cmd "\"")
(if (eq? (system processed-cmd) 0)
(if (null? tal)
#t
(loop (car tal)(cdr tal)))
#f))))))
(pop-directory)
result)))))
;; if .testconfig exists in test directory read and return it
;; else if have cached copy in *testconfigs* return it IFF there is a section "have fulldata"
;; else read the testconfig file
;; if have path to test directory save the config as .testconfig and return it
;;
(define (tests:get-testconfig test-name item-path test-registry system-allowed #!key (force-create #f)(allow-write-cache #t)(wait-a-minute #f))
(let* ((use-cache (common:use-cache?))
(cache-path (tests:get-test-path-from-environment))
(cache-file (and cache-path (conc cache-path "/.testconfig")))
(cache-exists (and cache-file
(not force-create) ;; if force-create then pretend there is no cache to read
(common:file-exists? cache-file)))
(cached-dat (if (and (not force-create)
cache-exists
use-cache)
(handle-exceptions
exn
(begin
(debug:print 0 *default-log-port* "failed to read " cache-file ", exn=" exn)
#f) ;; any issues, just give up with the cached version and re-read
(configf:read-alist cache-file))
#f))
(test-full-name (if (and item-path (not (string-null? item-path)))
(conc test-name "/" item-path)
test-name)))
(if cached-dat
cached-dat
(let ((dat (hash-table-ref/default *testconfigs* test-full-name #f)))
(if (and dat ;; have a locally cached version
(hash-table-ref/default dat "have fulldata" #f)) ;; marked as good data?
dat
;; no cached data available
(let* ((treg (or test-registry
(tests:get-all)))
(test-path (or (hash-table-ref/default treg test-name #f)
(let* ((local-tcdir (conc (getenv "MT_LINKTREE") "/"
(getenv "MT_TARGET") "/"
(getenv "MT_RUNNAME") "/"
test-name "/" item-path))
(local-tcfg (conc local-tcdir "/testconfig")))
(if (common:file-exists? local-tcfg)
local-tcdir
#f))
(conc *toppath* "/tests/" test-name)))
(test-configf (conc test-path "/testconfig"))
(testexists (let loopa ((tries-left 30))
(cond
(
(and (common:file-exists? test-configf)(file-read-access? test-configf))
#t)
(
(common:file-exists? test-configf)
(debug:print 0 *default-log-port* "WARNING: Cannot read testconfig file: "test-configf)
#f)
(
(and wait-a-minute (> tries-left 0))
(thread-sleep! 10)
(debug:print 0 *default-log-port* "WARNING: testconfig file does not exist: "test-configf" will retry in 10 seconds. Tries left: "tries-left) ;; BB: this fires
(loopa (sub1 tries-left)))
(else
(debug:print 2 *default-log-port* "WARNING: testconfig file does not exist: "test-configf) ;; BB: this fires
#f))))
(tcfg (if testexists
(read-config test-configf #f system-allowed
environ-patt: (if system-allowed
"pre-launch-env-vars"
#f))
#f)))
(if (and tcfg cache-file) (hash-table-set! tcfg "have fulldata" #t)) ;; mark this as fully read data
(if tcfg (hash-table-set! *testconfigs* test-full-name tcfg))
(if (and testexists
cache-file
(file-write-access? cache-path)
allow-write-cache)
(let ((tpath (conc cache-path "/.testconfig")))
(debug:print-info 1 *default-log-port* "Caching testconfig for " test-name " in " tpath)
(if (and tcfg (not (common:in-running-test?)))
(configf:write-alist tcfg tpath))))
tcfg))))))
;;======================================================================
;; go through the hosts from least recently used to most recently used, pick the first that meets the load criteral from the
;; [host-rules] section.
;;
(define (common:get-least-loaded-host hosts-raw host-type configdat)
(let* ((rdat (configf:lookup configdat "host-rules" host-type))
(rules (common:val->alist (or rdat "") convert: #t)) ;; maxnload, maxnjobs, maxjobrate
(maxnload (common:alist-ref/default 'maxnload rules 1.5)) ;; max normalized load
(maxnjobs (common:alist-ref/default 'maxnjobs rules 1.5)) ;; max normalized number of jobs
(maxjobrate (common:alist-ref/default 'maxjobrate rules (/ 1 6))) ;; max rate of submitting jobs to a given host in jobs/second
(hosts (filter (lambda (x)
(string-match (regexp "^\\S+$") x))
hosts-raw))
;; (best-host #f)
(get-rec (lambda (hostname)
;; (print "get-rec hostname=" hostname)
(let ((h (hash-table-ref/default *host-loads* hostname #f)))
(if h
h
(let ((h (make-host)))
(hash-table-set! *host-loads* hostname h)
h)))))
(best-load 99999)
(curr-time (current-seconds))
(get-hosts-sorted (lambda (hosts)
(sort hosts (lambda (a b)
(let ((a-rec (get-rec a))
(b-rec (get-rec b)))
;; (print "a=" a " a-rec=" a-rec " host-last-used=" (host-last-used a-rec))
;; (print "b=" b " b-rec=" b-rec " host-last-used=" (host-last-used b-rec))
(< (host-last-used a-rec)
(host-last-used b-rec))))))))
(debug:print 0 *default-log-port* "INFO: hosts-sorted=" (get-hosts-sorted hosts))
(if (null? hosts)
#f ;; no hosts to select from. All done and giving up now.
(let ((hosts-sorted (get-hosts-sorted hosts)))
(common:update-host-loads-table hosts)
(let loop ((hostname (car hosts-sorted))
(tal (cdr hosts-sorted))
(best-host #f))
(let* ((rec (get-rec hostname))
(reachable (host-reachable rec))
(load (host-last-cpuload rec))
(last-used (host-last-used rec))
(delta (- curr-time last-used))
(job-rate (if (> delta 0)
(/ 1 delta)
999)) ;; jobs per second
(new-best
(cond
((not reachable)
(debug:print 0 *default-log-port* "Skipping host " hostname " as it cannot be reached.")
best-host)
((and (< load maxnload) ;; load is acceptable
(< job-rate maxjobrate)) ;; job rate is acceptable
(set! best-load load)
hostname)
(else best-host))))
(debug:print 0 *default-log-port* "INFO: Trying host " hostname " with load " load ", last used " delta " seconds ago, with job-rate " job-rate " for running a test." )
(if new-best
(begin ;; found a host, return it
(debug:print 0 *default-log-port* "INFO: Found host: " new-best " load: " load " last-used: " delta " seconds ago, with job-rate: " job-rate)
(host-last-used-set! rec curr-time)
new-best)
(if (null? tal) #f (loop (car tal)(cdr tal) best-host)))))))))
;;======================================================================
;; T E S T L A U N C H I N G P E R I T E M W I T H H O S T T Y P E S
;;======================================================================
;;
;; [hosts]
;; arm cubie01 cubie02
;; x86_64 zeus xena myth01
;; allhosts #{g hosts arm} #{g hosts x86_64}
;;
;; [host-types]
;; general #MTLOWESTLOAD #{g hosts allhosts}
;; arm #MTLOWESTLOAD #{g hosts arm}
;; nbgeneral nbjob run JOBCOMMAND -log $MT_LINKTREE/$MT_TARGET/$MT_RUNNAME.$MT_TESTNAME-$MT_ITEM_PATH.lgo
;;
;; [host-rules]
;; # maxnload => max normalized load
;; # maxnjobs => max jobs per cpu
;; # maxjobrate => max jobs per second
;; general maxnload=1.1; maxnjobs=1.2; maxjobrate=0.1
;;
;; [launchers]
;; envsetup general
;; xor/%/n 4C16G
;; % nbgeneral
;;
;; [jobtools]
;; # if defined and not "no" flexi-launcher will bypass "launcher" unless no match.
;; flexi-launcher yes
;; launcher nbfake
;;
(define (common:get-launcher configdat testname itempath)
(let ((fallback-launcher (configf:lookup configdat "jobtools" "launcher")))
(if (and (configf:lookup configdat "jobtools" "flexi-launcher") ;; overrides launcher
(not (equal? (configf:lookup configdat "jobtools" "flexi-launcher") "no")))
(let* ((launchers (hash-table-ref/default configdat "launchers" '())))
(if (null? launchers)
fallback-launcher
(let loop ((hed (car launchers))
(tal (cdr launchers)))
(let ((patt (car hed))
(host-type (cadr hed)))
(if (tests:match patt testname itempath)
(begin
(debug:print-info 2 *default-log-port* "Have flexi-launcher match for " testname "/" itempath " = " host-type)
(let ((launcher (configf:lookup configdat "host-types" host-type)))
(if launcher
(let* ((launcher-parts (string-split launcher))
(launcher-exe (car launcher-parts)))
(if (equal? launcher-exe "#MTLOWESTLOAD") ;; this is our special case, we will find the lowest load and craft a nbfake commandline
(let host-loop ((targ-host (common:get-least-loaded-host (cdr launcher-parts) host-type configdat))
(count 100))
(if targ-host
(conc "remrun " targ-host)
(if (> count 0)
(begin
(debug:print 0 *default-log-port* "INFO: Waiting for a host for host-type " host-type)
(thread-sleep! (- 101 count))
(host-loop (common:get-least-loaded-host (cdr launcher-parts) host-type configdat)
(- count 1)))
(begin
(debug:print 0 *default-log-port* "FATAL: Failed to find a host from #MTLOWESTLOAD for host-type " host-type)
(exit)))))
launcher))
(begin
(debug:print-info 0 *default-log-port* "WARNING: no launcher found for host-type " host-type)
(if (null? tal)
fallback-launcher
(loop (car tal)(cdr tal)))))))
;; no match, try again
(if (null? tal)
fallback-launcher
(loop (car tal)(cdr tal))))))))
fallback-launcher)))
;; Do not rpc this one, do the underlying calls!!!
(define (tests:test-set-status! run-id test-id state status comment dat #!key (work-area #f))
(let* ((real-status status)
(otherdat (if dat dat (make-hash-table)))
(testdat (rmt:get-test-info-by-id run-id test-id))
(test-name (db:test-get-testname testdat))
(item-path (db:test-get-item-path testdat))
;; before proceeding we must find out if the previous test (where all keys matched except runname)
;; was WAIVED if this test is FAIL
;; NOTES:
;; 1. Is the call to test:get-previous-run-record remotified?
;; 2. Add test for testconfig waiver propagation control here
;;
(prev-test (if (equal? status "FAIL")
(rmt:get-previous-test-run-record run-id test-name item-path)
#f))
(waived (if prev-test
(if prev-test ;; true if we found a previous test in this run series
(let ((prev-status (db:test-get-status prev-test))
(prev-state (db:test-get-state prev-test))
(prev-comment (db:test-get-comment prev-test)))
(debug:print 4 *default-log-port* "prev-status " prev-status ", prev-state " prev-state ", prev-comment " prev-comment)
(if (and (equal? prev-state "COMPLETED")
(equal? prev-status "WAIVED"))
(if comment
comment
prev-comment) ;; waived is either the comment or #f
#f))
#f)
#f)))
(if (and waived
(tests:check-waiver-eligibility testdat prev-test))
(set! real-status "WAIVED"))
(debug:print 4 *default-log-port* "real-status " real-status ", waived " waived ", status " status)
;; update the primary record IF state AND status are defined
(if (and state status)
(begin
(rmt:set-state-status-and-roll-up-items run-id test-id item-path state real-status (if waived waived comment))
;; (mt:process-triggers run-id test-id state real-status) ;; triggers are called in test-set-state-status
))
;; if status is "AUTO" then call rollup (note, this one modifies data in test
;; run area, it does remote calls under the hood.
;; (if (and test-id state status (equal? status "AUTO"))
;; (rmt:test-data-rollup run-id test-id status))
;; add metadata (need to do this way to avoid SQL injection issues)
;; :first_err
;; (let ((val (hash-table-ref/default otherdat ":first_err" #f)))
;; (if val
;; (sqlite3:execute db "UPDATE tests SET first_err=? WHERE run_id=? AND testname=? AND item_path=?;" val run-id test-name item-path)))
;;
;; ;; :first_warn
;; (let ((val (hash-table-ref/default otherdat ":first_warn" #f)))
;; (if val
;; (sqlite3:execute db "UPDATE tests SET first_warn=? WHERE run_id=? AND testname=? AND item_path=?;" val run-id test-name item-path)))
(let ((category (hash-table-ref/default otherdat ":category" ""))
(variable (hash-table-ref/default otherdat ":variable" ""))
(value (hash-table-ref/default otherdat ":value" #f))
(expected (hash-table-ref/default otherdat ":expected" "n/a"))
(tol (hash-table-ref/default otherdat ":tol" "n/a"))
(units (hash-table-ref/default otherdat ":units" ""))
(type (hash-table-ref/default otherdat ":type" ""))
(dcomment (hash-table-ref/default otherdat ":comment" "")))
(debug:print 4 *default-log-port*
"category: " category ", variable: " variable ", value: " value
", expected: " expected ", tol: " tol ", units: " units)
(if (and value) ;; require only value; BB was- all three required
(let ((dat (conc category ","
variable ","
value ","
expected ","
tol ","
units ","
dcomment ",," ;; extra comma for status
type )))
;; This was run remote, don't think that makes sense. Perhaps not, but that is the easiest path for the moment.
(rmt:csv->test-data run-id test-id
dat)
;; This was added in check-in a5adfa3f9a. Message was: "...added delay in set-values to allow for delayed write on server start"
;; I'm inserting an arbitrary rmt: call to force/ensure that the server is available to (hopefully) prevent a communication issue.
(rmt:get-var "MEGATEST_VERSION") ;; this does NOTHING but ensure the server is reachable. This is almost certainly NOT needed :)
;; BB - commentiong out arbitrary 10 second wait (thread-sleep! 10) ;; add 10 second delay before quit incase rmt needs time to start a server.
)))
;; need to update the top test record if PASS or FAIL and this is a subtest
;;;;;; (if (not (equal? item-path ""))
;;;;;; (rmt:set-state-status-and-roll-up-items run-id test-name item-path state status #f) ;;;;;)
(if (or (and (string? comment)
(string-match (regexp "\\S+") comment))
waived)
(let ((cmt (if waived waived comment)))
(rmt:general-call 'set-test-comment run-id cmt test-id)))))
)