Brought up to date with v1.64. Next up, speculatively create working directory for a test. Check space on actual test launch. If existing created directory is on best disk, use it. Else get new area on best disk and create pointer so Megatest can remove original when asked to remove the test.
Closed-Leaf
check-in: 3766f8e4ea user: matt tags: v1.64-area-fix
(debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))
(vector #f (vector exn call-chain dat))) ;; return some stuff for debug if an exception happens
(cond
((not (vector? dat)) ;; it is an error to not receive a vector
(vector #f (vector #f "remote must be called with a vector")))
((> *api-process-request-count* 20) ;; 20)
(debug:print 0 *default-log-port* "WARNING: api:execute-requests received an overloaded message.")
(set! *server-overloaded* #t)
(vector #f (vector #f 'overloaded))) ;; the inner vector is what gets returned. nope, don't know why. please refactor!
(else
(let* ((cmd-in (vector-ref dat 0))
(cmd (if (symbol? cmd-in)
cmd-in
(string->symbol cmd-in)))
(params (vector-ref dat 1))
;; Copyright 2006-2014, Matthew Welland.
;;
;; This program is made available under the GNU GPL version 2.0 or
;; greater. See the accompanying file COPYING for details.
;;
;; This program is distributed WITHOUT ANY WARRANTY; without even the
;; implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
;; PURPOSE.
;; strftime('%m/%d/%Y %H:%M:%S','now','localtime')
(use sqlite3 srfi-1 posix regex regex-case srfi-69 dot-locking format md5 message-digest srfi-18)
(import (prefix sqlite3 sqlite3:))
(use (prefix sqlite3 sqlite3:) srfi-1 posix regex regex-case srfi-69 format md5 message-digest srfi-18)
(declare (unit archive))
(declare (uses db))
(declare (uses common))
(include "common_records.scm")
(include "db_records.scm")
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
-
+
(define (archive:main linktree target runname testname itempath options)
(let ((testdir (conc linktree "/" target "/" runname "/" testname "/" itempatt))
(flavor 'plain) ;; type of machine to run jobs on
(maxload 1.5) ;; max allowed load for this work
(adisks (archive:get-archive-disks)))
;; get testdir size
;; - hand off du to job mgr
(if (and (file-exists? testdir)
(if (and (common:file-exists? testdir)
(file-is-writable? testdir))
(let* ((dused (jobrunner:run-job
flavor ;; machine type
maxload ;; max allowed load
'() ;; prevars - environment vars to set for the job
common:get-disk-space-used ;; if a proc call it, if a string it is a unix command
(list testdir)))
(toplevel/children (and (db:test-get-is-toplevel test-dat)
(> (rmt:test-toplevel-num-items run-id test-name) 0)))
(test-partial-path (conc target "/" run-name "/" (db:test-make-full-name test-name item-path)))
;; note the trailing slash to get the dir inspite of it being a link
(test-path (conc linktree "/" test-partial-path))
;; if the old path was not deleted then prev-test-physical-path will end up pointing to a real directory
(mutex-lock! rp-mutex)
(prev-test-physical-path (if (file-exists? test-path)
(prev-test-physical-path (if (common:file-exists? test-path)
;; (read-symbolic-link test-path #t)
(common:real-path test-path)
#f))
(mutex-unlock! rp-mutex)
(new-test-physical-path (conc best-disk "/" test-partial-path))
(archive-block-id (db:test-get-archived test-dat))
(archive-block-info (rmt:test-get-archive-block-info archive-block-id))
(archive-path (if (vector? archive-block-info)
(vector-ref archive-block-info 2) ;; look in db.scm for test-get-archive-block-info for the vector record info
#f)) ;; no archive found?
(archive-internal-path (conc (common:get-testsuite-name) "-" run-id "/latest/" test-partial-path)))
;; some sanity checks, move an existing path out of the way - iif it is not a toplevel with children
;;
(if (and (not toplevel/children) ;; special handling needed for toplevel with children
prev-test-physical-path
(file-exists? prev-test-physical-path)) ;; what to do? abort or clean up or link it in?
(common:file-exists? prev-test-physical-path)) ;; what to do? abort or clean up or link it in?
(let* ((base (pathname-directory prev-test-physical-path))
(dirn (pathname-file prev-test-physical-path))
(newn (conc base "/." dirn)))
(debug:print-error 0 *default-log-port* "the old directory " prev-test-physical-path ", still exists! Moving it to " newn)
(rename-file prev-test-physical-path newn)))
(if (and archive-path ;; no point in proceeding if there is no actual archive
SUM(CASE WHEN t.status='PASS' THEN 1 ELSE 0 END) AS pass,
SUM(CASE WHEN t.status='FAIL' THEN 1 ELSE 0 END) AS fail,
SUM(CASE WHEN t.status IN ('PASS','FAIL') THEN 0 ELSE 1 END) AS other
FROM tests AS t INNER JOIN runs AS r ON t.run_id=r.id
WHERE t.state='COMPLETED' AND r.target LIKE ? GROUP BY r.target;"
target-patt))
(define (pgdb:get-latest-run-stats-given-target dbh ttype-id target-patt)
(define (pgdb:get-latest-run-stats-given-target dbh ttype-id target-patt limit offset)
(dbi:get-rows
dbh
;; "SELECT COUNT(t.id),t.status,r.target FROM tests AS t INNER JOIN runs AS r ON t.run_id=r.id
;; WHERE t.state='COMPLETED' AND ttype_id=? AND r.target LIKE ? GROUP BY r.target,t.status;"
"SELECT r.target,COUNT(*) AS total,
"SELECT r.target, r.event_time, COUNT(*) AS total,
SUM(CASE WHEN t.status='PASS' THEN 1 ELSE 0 END) AS pass,
SUM(CASE WHEN t.status='FAIL' THEN 1 ELSE 0 END) AS fail,
SUM(CASE WHEN t.status IN ('PASS','FAIL') THEN 0 ELSE 1 END) AS other, r.id
FROM tests AS t INNER JOIN runs AS r ON t.run_id=r.id
WHERE t.state like '%' AND ttype_id=? AND r.target LIKE ?
and r.id in
(SELECT DISTINCT on (target) id from runs where target like ? AND ttype_id=? order by target,event_time desc) GROUP BY r.target,r.id;"
ttype-id target-patt target-patt ttype-id))
(SELECT DISTINCT on (target) id from runs where target like ? AND ttype_id=? order by target,event_time desc)
GROUP BY r.target,r.id
order by r.event_time desc limit ? offset ? ;"
ttype-id target-patt target-patt ttype-id limit offset))
(define (pgdb:get-latest-run-stats-given-pattern dbh patt limit offset)
(dbi:get-rows
dbh
;; "SELECT COUNT(t.id),t.status,r.target FROM tests AS t INNER JOIN runs AS r ON t.run_id=r.id
;; WHERE t.state='COMPLETED' AND ttype_id=? AND r.target ILIKE ? GROUP BY r.target,t.status;"
"SELECT r.target, r.event_time, COUNT(*) AS total,
SUM(CASE WHEN t.status='PASS' THEN 1 ELSE 0 END) AS pass,
SUM(CASE WHEN t.status='FAIL' THEN 1 ELSE 0 END) AS fail,
SUM(CASE WHEN t.status IN ('PASS','FAIL') THEN 0 ELSE 1 END) AS other, r.id
FROM tests AS t INNER JOIN runs AS r ON t.run_id=r.id
WHERE t.state like '%' AND r.target ILIKE ?
and r.id in
(SELECT DISTINCT on (target) id from runs where target ilike ? order by target,event_time desc)
GROUP BY r.target,r.id
order by r.event_time desc limit ? offset ? ;"
patt patt limit offset))
(define (pgdb:get-count-data-stats-target-latest dbh ttype-id target-patt)
(dbi:get-rows
dbh
"SELECT count(*) from
(SELECT DISTINCT on (target) id
from runs where target like ? AND ttype_id = ?
order by target, event_time desc
) as x;"
target-patt ttype-id))
(define (pgdb:get-latest-run-cnt dbh ttype-id target-patt)
(let* ((cnt-result (pgdb:get-count-data-stats-target-latest dbh ttype-id target-patt))
;(cnt-row (car (cnt-result)))
(cnt 0)
)
(for-each
(lambda (row)
(set! cnt (vector-ref row 0 )))
cnt-result)
cnt))
(define (pgdb:get-count-data-stats-latest-pattern dbh patt)
(dbi:get-rows
dbh
"SELECT count(*) from
(SELECT DISTINCT on (target) id
from runs where target ilike ?
order by target, event_time desc
) as x;"
patt))
(define (pgdb:get-latest-run-cnt-by-pattern dbh target-patt)
(let* ((cnt-result (pgdb:get-count-data-stats-latest-pattern dbh target-patt))
;(cnt-row (car (cnt-result)))
(cnt 0)
)
(for-each
(lambda (row)
(set! cnt (vector-ref row 0 )))
cnt-result)
cnt))
(define (pgdb:get-run-stats-history-given-target dbh ttype-id target-patt)
(dbi:get-rows
dbh
;; "SELECT COUNT(t.id),t.status,r.target FROM tests AS t INNER JOIN runs AS r ON t.run_id=r.id
;; WHERE t.state='COMPLETED' AND ttype_id=? AND r.target LIKE ? GROUP BY r.target,t.status;"
"SELECT r.run_name,COUNT(*) AS total,
SUM(CASE WHEN t.status='PASS' THEN 1 ELSE 0 END) AS pass,
SUM(CASE WHEN t.status='FAIL' THEN 1 ELSE 0 END) AS fail,
SUM(CASE WHEN t.status IN ('PASS','FAIL') THEN 0 ELSE 1 END) AS other
FROM tests AS t INNER JOIN runs AS r ON t.run_id=r.id
WHERE t.state like '%' AND ttype_id=? AND r.target LIKE ?
GROUP BY r.run_name;"
ttype-id target-patt ))
(define (pgdb:get-all-run-stats-target-slice dbh target-patt)
(dbi:get-rows
dbh
"SELECT r.target, r.run_name,r.event_time, COUNT(*) AS total,
(define (pgdb:get-all-run-stats-target-slice dbh target-patt limit offset)
(dbi:get-rows
dbh
"SELECT r.target, r.run_name,r.event_time, COUNT(*) AS total,
SUM(CASE WHEN t.status='PASS' THEN 1 ELSE 0 END) AS pass,
SUM(CASE WHEN t.status='FAIL' THEN 1 ELSE 0 END) AS fail,
SUM(CASE WHEN t.status IN ('PASS','FAIL') THEN 0 ELSE 1 END) AS other
FROM tests AS t INNER JOIN runs AS r ON t.run_id=r.id
WHERE r.target LIKE ?
GROUP BY r.target,r.run_name, r.event_time
order by r.target,r.event_time desc limit ? offset ? ;"
target-patt limit offset))
GROUP BY r.target,r.run_name, r.event_time;"
(define (pgdb:get-count-data-stats-target-slice dbh target-patt)
(dbi:get-rows
dbh
"SELECT count(*) from (SELECT r.target, r.run_name,r.event_time, COUNT(*) AS total
FROM tests AS t INNER JOIN runs AS r ON t.run_id=r.id
WHERE r.target LIKE ?
GROUP BY r.target,r.run_name, r.event_time
) as x;"
target-patt))
(define (pgdb:get-slice-cnt dbh target-patt)
(let* ((cnt-result (pgdb:get-count-data-stats-target-slice dbh target-patt))
;(cnt-row (car (cnt-result)))
(cnt 0)
)
(for-each
(lambda (row)
(set! cnt (vector-ref row 0 )))
cnt-result)
cnt))
(define (pgdb:get-target-types dbh)
(dbi:get-rows dbh "SELECT id,target_spec FROM ttype;"))
(define (pgdb:get-distict-target-slice dbh)
(dbi:get-rows dbh " select distinct on (split_part (target, '/', 1)) (split_part (target, '/', 1)) from runs;"))
(define (pgdb:get-distict-target-slice3 dbh)
(dbi:get-rows dbh " select distinct on (split_part (target, '/', 3)) (split_part (target, '/', 3)) from runs;"))
;;
(define (pgdb:get-targets dbh target-patt)
(let ((ttypes (pgdb:get-target-types dbh)))
(map
(lambda (ttype-dat)
(let ((tt-id (vector-ref ttype-dat 0))
(ttype (vector-ref ttype-dat 1)))
;;======================================================================
;; Copyright 2017, Matthew Welland.
;;
;; This program is made available under the GNU GPL version 2.0 or
;; greater. See the accompanying file COPYING for details.
;;
;; This program is distributed WITHOUT ANY WARRANTY; without even the
;; implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
;; PURPOSE.
;;======================================================================
(use regex)
(load "models/pgdb.scm")
(include "pages/filter-defs.scm")
(include "pages/home_ctrl.scm")
(include "pages/home_view.scm")
;;======================================================================
;; a function <pagename>-action is called on POST
(define (home-action action)
(case (string->symbol action)
((filter)
(let ((target-type (s:get-input 'target-type))
(target-filter (s:get-input 'tfilter))
(target (s:get-input 'target))
(row-or-col (s:get-input 'row-or-col)))
(let ((dot (s:get-input 'dot))
(type (s:get-input 'kit-type))
(rel (s:get-input 'rel-num))
(bp (s:get-input 'bp)))
;;
;; s:set! is a page local var. Better than s:session-var-set! but still not a good idea.
;;
(s:set! "row-or-col" (if (list? row-or-col) (string-intersperse row-or-col ",") row-or-col))
(s:set! "target-type" target-type)
(s:set! "tfilter" target-filter)
(s:set! "target" target)
(s:set! "target-filter" target-filter))) (s:set! "dot" dot)
(s:set! "type" type)
(s:set! "bp" bp)
((filter2) (let ((tslice-select (s:get-input 'tslice-select)) (t-slice-filter (s:get-input 't-slice-filter))) ;; ;; s:set! is a page local var. Better than s:session-var-set! but still not a good idea. ;;
(s:set! "tslice" tslice-select)
(s:set! "rel" rel)))))
(s:set! "t-slice-patt" t-slice-filter)))))
;;======================================================================
;; Copyright 2017, Matthew Welland.
;;
;; This program is made available under the GNU GPL version 2.0 or
;; greater. See the accompanying file COPYING for details.
;;
;; This program is distributed WITHOUT ANY WARRANTY; without even the
;; implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
;; PURPOSE.
;;======================================================================
(use regex)
;; (load "models/pgdb.scm")
(include "pages/index_ctrl.scm")
(include "pages/index_view.scm")
(include "pages/index_ctrl.scm")
(include "pages/index_view.scm")
;;======================================================================
;; Copyright 2006-2012, Matthew Welland.
;;
;; This program is made available under the GNU GPL version 2.0 or
;; greater. See the accompanying file COPYING for details.
;;
;; This program is distributed WITHOUT ANY WARRANTY; without even the
;; implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
;; PURPOSE.
;;======================================================================
(use srfi-1 posix regex-case base64 format dot-locking csv-xml z3 sql-de-lite hostinfo md5 message-digest typed-records directory-utils stack
matchable) matchable regex posix srfi-18 extras
(require-extension regex posix) pkts (prefix dbi dbi:))
(require-extension (srfi 18) extras tcp rpc)
(import (prefix sqlite3 sqlite3:))
(import (prefix base64 base64:))
(declare (unit common))
(include "common_records.scm")
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
-
+
(debug:print-error 0 *default-log-port* "bad value for setenv, key=" key ", value=" val)
(setenv key val))
(debug:print-error 0 *default-log-port* "bad value for setenv, key=" key ", value=" val))))
(define home (getenv "HOME"))
(define user (getenv "USER"))
;; GLOBAL GLETCHES
;; GLOBALS
;; CONTEXTS
(defstruct cxt
(taskdb #f)
(cmutex (make-mutex)))
;; (define *contexts* (make-hash-table))
;; (define *context-mutex* (make-mutex))
(define *task-db* #f) ;; (vector db path-to-db)
(define *db-access-allowed* #t) ;; flag to allow access
(define *db-access-mutex* (make-mutex))
(define *db-transaction-mutex* (make-mutex))
(define *db-cache-path* #f)
(define *db-with-db-mutex* (make-mutex))
(define *db-api-call-time* (make-hash-table)) ;; hash of command => (list of times)
;; no sync db
(define *no-sync-db* #f)
;; SERVER
(define *my-client-signature* #f)
(define *transport-type* 'http) ;; override with [server] transport http|rpc|nmsg
(define *runremote* #f) ;; if set up for server communication this will hold <host port>
;; (define *max-cache-size* 0)
(define *logged-in-clients* (make-hash-table))
;; (define *server-id* #f)
(define *server-info* #f) ;; good candidate for easily convert to non-global
(define *time-to-exit* #f)
(define *server-run* #t)
(define *run-id* #f)
(define *server-kind-run* (make-hash-table))
(define *home-host* #f)
;; (define *total-non-write-delay* 0)
(define *heartbeat-mutex* (make-mutex))
(define *api-process-request-count* 0)
(define *max-api-process-requests* 0)
(define *server-overloaded* #f)
;; client
(define *rmt-mutex* (make-mutex)) ;; remote access calls mutex
;; RPC transport
(define *rpc:listener* #f)
;; KEY info
(define *target* (make-hash-table)) ;; cache the target here; target is keyval1/keyval2/.../keyvalN
(define *keys* (make-hash-table)) ;; cache the keys here
(define *keyvals* (make-hash-table))
(define *toptest-paths* (make-hash-table)) ;; cache toptest path settings here
(define *test-paths* (make-hash-table)) ;; cache test-id to test run paths here
(define *test-ids* (make-hash-table)) ;; cache run-id, testname, and item-path => test-id
(define *test-info* (make-hash-table)) ;; cache the test info records, update the state, status, run_duration etc. from testdat.db
(define *run-info-cache* (make-hash-table)) ;; run info is stable, no need to reget
(define *launch-setup-mutex* (make-mutex)) ;; need to be able to call launch:setup often so mutex it and re-call the real deal only if *toppath* not set
(define *homehost-mutex* (make-mutex))
;; Miscellaneous
(define *triggers-mutex* (make-mutex)) ;; block overlapping processing of triggers
(defstruct remote
(hh-dat (common:get-homehost)) ;; homehost record ( addr . hhflag )
(server-url (if *toppath* (server:check-if-running *toppath*))) ;; (server:check-if-running *toppath*) #f))
(last-server-check 0) ;; last time we checked to see if the server was alive
(conndat #f)
(transport *transport-type*)
(server-timeout (server:get-timeout)) ;; default from server:get-timeout
(define (common:version-changed?)
(not (equal? (common:get-last-run-version)
(common:version-signature))))
(define (common:api-changed?)
(not (equal? (substring (->string megatest-version) 0 4)
(substring (common:get-last-run-version) 0 4))))
(substring (conc (common:get-last-run-version)) 0 4))))
;; Move me elsewhere ...
;; RADT => Why do we meed the version check here, this is called only if version misma
;;
(define (common:cleanup-db dbstruct)
(db:multi-db-sync
(define (common:cleanup-db dbstruct #!key (full #f))
(apply db:multi-db-sync
dbstruct
'schema
;; 'new2old
'killservers
'dejunk 'adj-target
;; 'old2new
'new2old
(if full
'(dejunk)
)
'()))
(if (common:api-changed?)
(common:set-last-run-version)))
;; Rotate logs, logic:
;; if > 500k and older than 1 week:
;; remove previous compressed log and compress this log
;; WARNING: This proc operates assuming that it is in the directory above the
(dbstruct (db:setup #t)))
(debug:print 0 *default-log-port*
"WARNING: Version mismatch!\n"
" expected: " (common:version-signature) "\n"
" got: " (common:get-last-run-version))
(cond
((get-environment-variable "MT_SKIP_DB_MIGRATE") #t)
((and (file-exists? mtconf) (file-exists? dbfile) (not read-only)
((and (common:file-exists? mtconf) (common:file-exists? dbfile) (not read-only)
(eq? (current-user-id)(file-owner mtconf))) ;; safe to run -cleanup-db
(debug:print 0 *default-log-port* " I see you are the owner of megatest.config, attempting to cleanup and reset to new version")
(handle-exceptions
exn
(begin
(debug:print 0 *default-log-port* "Failed to switch versions.")
(debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))
(print-call-chain (current-error-port))
(exit 1))
(common:cleanup-db dbstruct)))
((not (file-exists? mtconf))
((not (common:file-exists? mtconf))
(debug:print 0 *default-log-port* " megatest.config does not exist in this area. Cannot proceed with megatest version migration.")
(exit 1))
((not (file-exists? dbfile))
((not (common:file-exists? dbfile))
(debug:print 0 *default-log-port* " megatest.db does not exist in this area. Cannot proceed with megatest version migration.")
(exit 1))
((not (eq? (current-user-id)(file-owner mtconf)))
(debug:print 0 *default-log-port* " You do not own megatest.db in this area. Cannot proceed with megatest version migration.")
(exit 1))
(read-only
(debug:print 0 *default-log-port* " You have read-only access to this area. Cannot proceed with megatest version migration.")
(exit 1))
(else
(debug:print 0 *default-log-port* " to switch versions you can run: \"megatest -cleanup-db\"")
(exit 1)))))
(begin
(debug:print 0 *default-log-port* "ERROR: cannot migrate version unless on homehost. Exiting.")
(exit 1))))
(exit 1)))))))
;; (begin
;; (debug:print 0 *default-log-port* "ERROR: cannot migrate version unless on homehost. Exiting.")
;; (exit 1))))
;;======================================================================
;; S P A R S E A R R A Y S
;;======================================================================
(define (make-sparse-array)
(let ((a (make-sparse-vector)))
(and (common:on-homehost?)
(args:get-arg "-server")))
;; (let ((ohh (common:on-homehost?))
;; (srv (args:get-arg "-server")))
;; (and ohh srv)))
;; (debug:print-info 0 *default-log-port* "common:run-sync? ohh=" ohh ", srv=" srv)
;;;; run-ids;; if #f use *db-local-sync* : or 'local-sync-flags;; if #t use timestamps : or 'timestamps(define (common:sync-to-megatest.db dbstruct) (let ((start-time (current-seconds)) (res (db:multi-db-sync dbstruct 'new2old))) (let ((sync-time (- (current-seconds) start-time))) (debug:print-info 3 *default-log-port* "Sync of newdb to olddb completed in " sync-time " seconds pid="(current-process-id)) (if (common:low-noise-print 30 "sync new to old") (debug:print-info 0 *default-log-port* "Sync of newdb to olddb completed in " sync-time " seconds pid="(current-process-id)))) res))
(define *wdnum* 0)
(define *wdnum*mutex (make-mutex))
;; currently the primary job of the watchdog is to run the sync back to megatest.db from the db in /tmp
;; if we are on the homehost and we are a server (by definition we are on the homehost if we are a server)
(cdr hh)
#f)))
;; do we honor the caches of the config files?
;;
(define (common:use-cache?)
(let ((res #t)) ;; priority by order of evaluation
(if *configdat*
(if *configdat* ;; sillyness here. can't use setup/use-cache to know if we can use the cached files!
(if (equal? (configf:lookup *configdat* "setup" "use-cache") "no")
(set! res #f)
(if (equal? (configf:lookup *configdat* "setup" "use-cache") "yes")
(set! res #t))))
(if (args:get-arg "-no-cache")(set! res #f)) ;; overrides setting in "setup"
(if (getenv "MT_USE_CACHE")
(if (equal? (getenv "MT_USE_CACHE") "yes")
;; apply-wildcards: #t/#f - apply vars from targets with % wildcards to all matching sections
;;
(define (read-config path ht allow-system #!key (environ-patt #f) (curr-section #f)
(sections #f) (settings (make-hash-table)) (keep-filenames #f)
(post-section-procs '()) (apply-wildcards #t))
(debug:print 9 *default-log-port* "START: " path)
(if (and (not (port? path))
(not (file-exists? path))) ;; for case where we are handed a port
(not (common:file-exists? path))) ;; for case where we are handed a port
(begin
(debug:print-info 1 *default-log-port* "read-config - file not found " path " current path: " (current-directory))
;; WARNING: This is a risky change but really, we should not return an empty hash table if no file read?
#f) ;; (if (not ht)(make-hash-table) ht))
(let ((inp (if (string? path)
(open-input-file path)
path)) ;; we can be handed a port
;;======================================================================
;; The heavy lifting starts here
;;======================================================================
(define (main)
(let ((mtdb-path (conc *toppath* "/megatest.db"))) ;;
(if (and (file-exists? mtdb-path)
(if (and (common:file-exists? mtdb-path)
(file-write-access? mtdb-path))
(if (not (args:get-arg "-skip-version-check"))
(common:exit-on-version-changed)))
(let* ((commondat (dboard:commondat-make)))
;; Move this stuff to db.scm? I'm not sure that is the right thing to do...
(cond
((args:get-arg "-test") ;; run-id,test-id
;; This routine creates the db if not already present. It is only called if the db is not already opened
;;
(define (db:open-db dbstruct #!key (areapath #f)(do-sync #t)) ;; TODO: actually use areapath
(let ((tmpdb-stack (dbr:dbstruct-dbstack dbstruct))) ;; RA => Returns the first reference in dbstruct
(if (stack? tmpdb-stack)
(db:get-db tmpdb-stack) ;; get previously opened db (will create new db handle if all in the stack are already used
(let* ((dbpath (db:dbfile-path )) ;; path to tmp db area
(dbexists (file-exists? dbpath))
(dbexists (common:file-exists? dbpath))
(tmpdbfname (conc dbpath "/megatest.db"))
(dbfexists (file-exists? tmpdbfname)) ;; (conc dbpath "/megatest.db")))
(mtdbexists (file-exists? (conc *toppath* "/megatest.db")))
(dbfexists (common:file-exists? tmpdbfname)) ;; (conc dbpath "/megatest.db")))
(mtdbexists (common:file-exists? (conc *toppath* "/megatest.db")))
(mtdb (db:open-megatest-db))
(mtdbpath (db:dbdat-get-path mtdb))
(tmpdb (db:open-megatest-db path: dbpath)) ;; lock-create-open dbpath db:initialize-main-db))
(refndb (db:open-megatest-db path: dbpath name: "megatest_ref.db"))
(write-access (file-write-access? mtdbpath))
(mtdbmodtime (if mtdbexists (common:lazy-sqlite-db-modification-time mtdbpath) #f))
;; These are defunct tests, do not do all the overhead of set-state-status. Force them to INCOMPLETE.
;;
;; (db:delay-if-busy dbdat)
(let* (;; (min-incompleted (filter (lambda (x)
;; (let* ((testpath (cadr x))
;; (tdatpath (conc testpath "/testdat.db"))
;; (dbexists (file-exists? tdatpath)))
;; (dbexists (common:file-exists? tdatpath)))
;; (or (not dbexists) ;; if no file then something wrong - mark as incomplete
;; (> (- (current-seconds)(file-modification-time tdatpath)) 600)))) ;; no change in 10 minutes to testdat.db - she's dead Jim
;; incompleted))
(min-incompleted-ids (map car incompleted)) ;; do 'em all
(all-ids (append min-incompleted-ids (map car oldlaunched))))
(if (> (length all-ids) 0)
(begin
(lambda (db)
(sqlite3:execute db "INSERT OR REPLACE INTO metadat (var,val) VALUES (?,?);" var val))))
(define (db:del-var dbstruct var)
(db:with-db dbstruct #f #t
(lambda (db)
(sqlite3:execute db "DELETE FROM metadat WHERE var=?;" var))))
;;======================================================================
;; no-sync.db - small bits of data to be shared between servers
;;======================================================================
(define (db:open-no-sync-db)
(let* ((dbpath (db:dbfile-path))
(dbname (conc dbpath "/no-sync.db"))
(db (sqlite3:open-database dbname)))
(sqlite3:set-busy-handler! db (make-busy-timeout 136000))
(sqlite3:execute db "PRAGMA synchronous = 0;")
(sqlite3:execute db "CREATE TABLE IF NOT EXISTS no_sync_metadat (var TEXT,val TEXT, CONSTRAINT no_sync_metadat_constraint UNIQUE (var));")
db))
;; if we are not a server create a db handle. this is not finalized
;; so watch for problems. I'm still not clear if it is needed to manually
;; finalize sqlite3 dbs with the sqlite3 egg.
;;
(define (db:no-sync-db db-in)
(if db-in
db-in
(let ((db (db:open-no-sync-db)))
(set! *no-sync-db* db)
db)))
(define (db:no-sync-set db var val)
(sqlite3:execute (db:no-sync-db db) "INSERT OR REPLACE INTO no_sync_metadat (var,val) VALUES (?,?);" var val))
(define (db:no-sync-del! db var)
(sqlite3:execute (db:no-sync-db db) "DELETE FROM no_sync_metadat WHERE var=?;" var))
(define (db:no-sync-get/default db var default)
(let ((res default))
(sqlite3:for-each-row
(lambda (val)
(set! res val))
(db:no-sync-db db)
"SELECT val FROM no_sync_metadat WHERE var=?;"
var)
(if res
(let ((newres (if (string? res)
(string->number res)
#f)))
(if newres
newres
res))
res)))
(define (db:no-sync-close-db db)
(db:safely-close-sqlite3-db db))
;; use a global for some primitive caching, it is just silly to
;; re-read the db over and over again for the keys since they never
;; change
;; why get the keys from the db? why not get from the *configdat*
;; using keys:config-get-fields?
(lambda (db)
(sqlite3:for-each-row
(lambda (id test_id category variable value expected tol units comment status type)
(set! res (cons (vector id test_id category variable value expected tol units comment status type) res)))
db
"SELECT id,test_id,category,variable,value,expected,tol,units,comment,status,type FROM test_data WHERE test_id=? AND category LIKE ? ORDER BY category,variable;" test-id categorypatt)
(reverse res)))))
;; This routine moved from tdb.scm, tdb:read-test-data
;;
(define (db:read-test-data* dbstruct run-id test-id categorypatt varpatt)
(let* ((res '()))
(db:with-db
dbstruct #f #f
(lambda (db)
(sqlite3:for-each-row
(lambda (id test_id category variable value expected tol units comment status type)
(set! res (cons (vector id test_id category variable value expected tol units comment status type) res)))
db
"SELECT id,test_id,category,variable,value,expected,tol,units,comment,status,type FROM test_data WHERE test_id=? AND category LIKE ? AND variable LIKE ? ORDER BY category,variable;" test-id categorypatt varpatt)
(reverse res)))))
;;======================================================================
;; Misc. test related queries
;;======================================================================
(define (db:get-run-ids-matching-target dbstruct keynames target res runname testpatt statepatt statuspatt)
(db:with-db
(and (equal? newstate "NOT_STARTED")
(> num-non-completes 0)))
"STARTED"
(car all-curr-statuses))))
;; (print "bad-not-supported: " bad-not-support " all-curr-states: " all-curr-states " all-curr-statuses: " all-curr-states)
;; " newstate: " newstate " newstatus: " newstatus)
;; NB// Pass the db so it is part of the transaction
(if tl-test-id
(db:test-set-state-status db run-id tl-test-id newstate newstatus #f)))))))
(db:test-set-state-status db run-id tl-test-id newstate newstatus #f))))))))
(mutex-unlock! *db-transaction-mutex*)
(if (and test-id state status (equal? status "AUTO"))
(db:test-data-rollup dbstruct run-id test-id status))
tr-res)))))
;; BBnote: db:get-all-state-status-counts-for-test returns dbr:counts object aggregating state and status of items of a given test, *not including rollup state/status*
(define (db:get-all-state-status-counts-for-test dbstruct run-id test-name item-path)
(db:with-db
<div class="paragraph"><p>Attempt to rerun tests in "STUCK/DEAD", "n/a", "ZERO_ITEMS" states.</p></div>
<div class="listingblock">
<div class="title">In megatest.config</div>
<div class="content monospaced">
<pre>[setup]
reruns 5</pre>
</div></div>
<div class="paragraph"><p>Replace the default blacklisted environment variables with user supplied
list.</p></div>
<div class="paragraph"><p>Default list: USER HOME DISPLAY LS_COLORS XKEYSYMDB EDITOR MAKEFLAGS MAKEF MAKEOVERRIDES</p></div>
<div class="paragraph"><div class="title">Add a "bad" variable "PROMPT" to the variables that will be commented out</div><p>in the megatest.sh and megatest.csh files:</p></div>
<div class="listingblock">
<div class="content monospaced">
<pre>[setup]
blacklistvars USER HOME DISPLAY LS_COLORS XKEYSYMDB EDITOR MAKEFLAGS PROMPT</pre>
</div></div>
</div>
<div class="sect4">
<h5 id="_run_time_limit">Run time limit</h5>
<div class="listingblock">
<div class="content monospaced">
<pre>[setup]
# this will automatically kill the test if it runs for more than 1h 2m and 3s
<div class="listingblock">
<div class="content monospaced">
<pre>$MT_MEGATEST -env2file .ezsteps/${stepname}</pre>
</div></div>
</div>
<div class="sect2">
<h3 id="_triggers">Triggers</h3>
<div class="paragraph"><p>In your testconfig triggers can be specified</p></div>
<div class="paragraph"><p>In your testconfig or megatest.config triggers can be specified</p></div>
<div class="listingblock">
<div class="content monospaced">
<pre>[triggers]
# Call script running.sh when test goes to state=RUNNING, status=PASS
RUNNING/PASS running.sh
# Call script running.sh any time state goes to RUNNING
RUNNING/ running.sh
# Call script onpass.sh any time status goes to PASS
PASS/ onpass.sh</pre>
</div></div>
<div class="paragraph"><p>Scripts called will have; test-id test-rundir trigger, added to the commandline.</p></div>
<div class="paragraph"><p>Scripts called will have; test-id test-rundir trigger test-name item-path state status event-time, added to the commandline.</p></div>
<div class="paragraph"><p>HINT</p></div>
<div class="paragraph"><p>To start an xterm (useful for debugging), use a command line like the following:</p></div>
<div class="listingblock">
<div class="content monospaced">
<pre>[triggers]
COMPLETED/ xterm -e bash -s --</pre>
</div></div>
.In megatest.config
------------------
[setup]
reruns 5
------------------
Replace the default blacklisted environment variables with user supplied
list.
Default list: USER HOME DISPLAY LS_COLORS XKEYSYMDB EDITOR MAKEFLAGS MAKEF MAKEOVERRIDES
.Add a "bad" variable "PROMPT" to the variables that will be commented out
in the megatest.sh and megatest.csh files:
-----------------
[setup]
blacklistvars USER HOME DISPLAY LS_COLORS XKEYSYMDB EDITOR MAKEFLAGS PROMPT
-----------------
Run time limit
++++++++++++++
-----------------
[setup]
# this will automatically kill the test if it runs for more than 1h 2m and 3s
runtimelim 1h 2m 3s
----------------------------
$MT_MEGATEST -env2file .ezsteps/${stepname}
----------------------------
Triggers
~~~~~~~~
In your testconfig triggers can be specified
In your testconfig or megatest.config triggers can be specified
-----------------
[triggers]
# Call script running.sh when test goes to state=RUNNING, status=PASS
RUNNING/PASS running.sh
# Call script running.sh any time state goes to RUNNING
RUNNING/ running.sh
# Call script onpass.sh any time status goes to PASS
PASS/ onpass.sh
-----------------
Scripts called will have; test-id test-rundir trigger, added to the commandline.
Scripts called will have; test-id test-rundir trigger test-name item-path state status event-time, added to the commandline.
HINT
To start an xterm (useful for debugging), use a command line like the following:
-----------------
[triggers]
(include "common_records.scm")
(include "key_records.scm")
(include "db_records.scm")
(include "run_records.scm")
(include "test_records.scm")
(define (common:wait-for-cpuload maxload numcpus waitdelay #!key (count 1000) (msg #f)(remote-host #f))
(let* ((loadavg (common:get-cpu-load remote-host))
(first (car loadavg))
(next (cadr loadavg))
(adjload (* maxload numcpus))
(loadjmp (- first next)))
(cond
((and (> first adjload)
(> count 0))
(debug:print-info 0 *default-log-port* "waiting " waitdelay " seconds due to load " first " exceeding max of " adjload " " (if msg msg ""))
(thread-sleep! waitdelay)
(common:wait-for-cpuload maxload numcpus waitdelay count: (- count 1)))
((and (> loadjmp numcpus)
(> count 0))
(debug:print-info 0 *default-log-port* "waiting " waitdelay " seconds due to load jump " loadjmp " > numcpus " numcpus (if msg msg ""))
(thread-sleep! waitdelay)
(common:wait-for-cpuload maxload numcpus waitdelay count: (- count 1))))))
(define (common:wait-for-homehost-load maxload msg)
(let* ((hh-dat (if (common:on-homehost?) ;; if we are on the homehost then pass in #f so the calls are local.
#f
(common:get-homehost)))
(hh (if hh-dat (car hh-dat) #f))
(numcpus (common:get-num-cpus hh)))
(common:wait-for-normalized-load maxload msg: msg remote-host: hh)))
;; wait for normalized cpu load to drop below maxload
;;
(define (common:wait-for-normalized-load maxload #!key (msg #f)(remote-host #f))
(let ((num-cpus (common:get-num-cpus remote-host)))
(common:wait-for-cpuload maxload num-cpus 15 msg: msg remote-host: remote-host)))
;; hed tal reg reruns reglen regfull test-record runname test-name item-path jobgroup max-concurrent-jobs run-id waitons item-path testmode test-patts required-tests test-registry registry-mutex flags keyvals run-info newtal all-tests-registry itemmaps)
(define (runs:process-expanded-tests runsdat testdat)
;; unroll the contents of runsdat and testdat (due to ongoing refactoring).
(let* ((hed (runs:testdat-hed testdat))
(tal (runs:testdat-tal testdat))
(reg (runs:testdat-reg testdat))
(reruns (runs:testdat-reruns testdat))
(test-name (runs:testdat-test-name testdat))
(item-path (runs:testdat-item-path testdat))
(jobgroup (runs:testdat-jobgroup testdat))
(waitons (runs:testdat-waitons testdat))
(item-path (runs:testdat-item-path testdat))
(testmode (runs:testdat-testmode testdat))
(newtal (runs:testdat-newtal testdat))
(itemmaps (runs:testdat-itemmaps testdat))
(test-record (runs:testdat-test-record testdat))
(prereqs-not-met (runs:testdat-prereqs-not-met testdat))
(reglen (runs:dat-reglen runsdat))
(regfull (runs:dat-regfull runsdat))
(runname (runs:dat-runname runsdat))
(max-concurrent-jobs (runs:dat-max-concurrent-jobs runsdat))
(run-id (runs:dat-run-id runsdat))
(test-patts (runs:dat-test-patts runsdat))
(required-tests (runs:dat-required-tests runsdat))
(test-registry (runs:dat-test-registry runsdat))
(registry-mutex (runs:dat-registry-mutex runsdat))
(flags (runs:dat-flags runsdat))
(keyvals (runs:dat-keyvals runsdat))
(run-info (runs:dat-run-info runsdat))
(all-tests-registry (runs:dat-all-tests-registry runsdat))
(run-limits-info (runs:dat-can-run-more-tests runsdat))
;; (runs:can-run-more-tests run-id jobgroup max-concurrent-jobs)) ;; look at the test jobgroup and tot jobs running
(have-resources (car run-limits-info))
(num-running (list-ref run-limits-info 1))
(num-running-in-jobgroup(list-ref run-limits-info 2))
(max-concurrent-jobs (list-ref run-limits-info 3))
(job-group-limit (list-ref run-limits-info 4))
;; (prereqs-not-met (rmt:get-prereqs-not-met run-id waitons hed item-path mode: testmode itemmaps: itemmaps))
;; (prereqs-not-met (mt:lazy-get-prereqs-not-met run-id waitons item-path mode: testmode itemmap: itemmap))
(fails (if (list? prereqs-not-met)
(runs:calc-fails prereqs-not-met)
(begin
(debug:print-error 0 *default-log-port* "prereqs-not-met is not a list! " prereqs-not-met)
'())))
(non-completed (filter (lambda (x) ;; remove hed from not completed list, duh, of course it is not completed!
(not (equal? x hed)))
(runs:calc-not-completed prereqs-not-met)))
(loop-list (list hed tal reg reruns))
;; configure the load runner
(numcpus (common:get-num-cpus #f))
(maxload (string->number (or (configf:lookup *configdat* "jobtools" "maxload") "3.0"))) ;; use a non-number string to disable
(maxhomehostload (string->number (or (configf:lookup *configdat* "jobtools" "maxhomehostload") "1.2"))) ;; use a non-number string to disable
(waitdelay (string->number (or (configf:lookup *configdat* "jobtools" "waitdelay") "60"))))
(debug:print-info 4 *default-log-port* "have-resources: " have-resources " prereqs-not-met: ("
(string-intersperse
(map (lambda (t)
(if (vector? t)
(conc (db:test-get-state t) "/" (db:test-get-status t))
(conc " WARNING: t is not a vector=" t )))
prereqs-not-met)
", ") ") fails: " fails
"\nregistered? " (hash-table-ref/default test-registry (db:test-make-full-name test-name item-path) #f))
(if (and (not (null? prereqs-not-met))
(runs:lownoise (conc "waiting on tests " prereqs-not-met hed) 60))
(debug:print-info 2 *default-log-port* "waiting on tests; " (string-intersperse (runs:mixed-list-testname-and-testrec->list-of-strings prereqs-not-met) ", ")))
;; Don't know at this time if the test have been launched at some time in the past
;; i.e. is this a re-launch?
(debug:print-info 4 *default-log-port* "run-limits-info = " run-limits-info)
(cond
;; Check item path against item-patts,
;;
((not (tests:match test-patts (tests:testqueue-get-testname test-record) item-path required: required-tests)) ;; This test/itempath is not to be run
;; else the run is stuck, temporarily or permanently
;; but should check if it is due to lack of resources vs. prerequisites
(debug:print-info 1 *default-log-port* "Skipping " (tests:testqueue-get-testname test-record) " " item-path " as it doesn't match " test-patts)
(if (or (not (null? tal))(not (null? reg)))
(list (runs:queue-next-hed tal reg reglen regfull)
(runs:queue-next-tal tal reg reglen regfull)
(runs:queue-next-reg tal reg reglen regfull)
reruns)
#f))
;; Register tests
;;
((not (hash-table-ref/default test-registry (db:test-make-full-name test-name item-path) #f))
(debug:print-info 4 *default-log-port* "Pre-registering test " test-name "/" item-path " to create placeholder" )
;; always do firm registration now in v1.60 and greater ;; (eq? *transport-type* 'fs) ;; no point in parallel registration if use fs
(let register-loop ((numtries 15))
(rmt:register-test run-id test-name item-path)
(if (rmt:get-test-id run-id test-name item-path)
(hash-table-set! test-registry (db:test-make-full-name test-name item-path) 'done)
(if (> numtries 0)
(begin
(thread-sleep! 0.5)
(register-loop (- numtries 1)))
(debug:print-error 0 *default-log-port* "failed to register test " (db:test-make-full-name test-name item-path)))))
(if (not (eq? (hash-table-ref/default test-registry (db:test-make-full-name test-name "") #f) 'done))
(begin
(rmt:register-test run-id test-name "")
(if (rmt:get-test-id run-id test-name "")
(hash-table-set! test-registry (db:test-make-full-name test-name "") 'done))))
(runs:shrink-can-run-more-tests-count runsdat) ;; DELAY TWEAKER (still needed?)
(if (and (null? tal)(null? reg))
(list hed tal (append reg (list hed)) reruns)
(list (runs:queue-next-hed tal reg reglen regfull)
(runs:queue-next-tal tal reg reglen regfull)
;; NB// Here we are building reg as we register tests
;; if regfull we must pop the front item off reg
(if regfull
(append (cdr reg) (list hed))
(append reg (list hed)))
reruns)))
;; At this point hed test registration must be completed.
;;
((eq? (hash-table-ref/default test-registry (db:test-make-full-name test-name item-path) #f)
'start)
(debug:print-info 0 *default-log-port* "Waiting on test registration(s): "
(string-intersperse
(filter (lambda (x)
(eq? (hash-table-ref/default test-registry x #f) 'start))
(hash-table-keys test-registry))
", "))
(thread-sleep! 0.051)
(list hed tal reg reruns))
;; If no resources are available just kill time and loop again
;;
((not have-resources) ;; simply try again after waiting a second
(if (runs:lownoise "no resources" 60)
(debug:print-info 1 *default-log-port* "no resources to run new tests, waiting ..."))
;; Have gone back and forth on this but db starvation is an issue.
;; wait one second before looking again to run jobs.
(thread-sleep! 1)
;; could have done hed tal here but doing car/cdr of newtal to rotate tests
(list (car newtal)(cdr newtal) reg reruns))
;; This is the final stage, everything is in place so launch the test
;;
((and have-resources
(or (null? prereqs-not-met)
(and (member 'toplevel testmode) ;; 'toplevel)
(null? non-completed)
(not (member 'exclusive testmode)))))
;; (hash-table-delete! *max-tries-hash* (db:test-make-full-name test-name item-path))
;; we are going to reset all the counters for test retries by setting a new hash table
;; this means they will increment only when nothing can be run
(set! *max-tries-hash* (make-hash-table))
;; well, first lets see if cpu load throttling is enabled. If so wait around until the
;; average cpu load is under the threshold before continuing
(if maxload ;; only gate if maxload is specified
(common:wait-for-cpuload maxload numcpus waitdelay))
(if maxhomehostload
(common:wait-for-homehost-load maxhomehostload (conc "Waiting for homehost load to drop below normalized value of " maxhomehostload)))
(run:test run-id run-info keyvals runname test-record flags #f test-registry all-tests-registry)
(runs:incremental-print-results run-id)
(hash-table-set! test-registry (db:test-make-full-name test-name item-path) 'running)
(runs:shrink-can-run-more-tests-count runsdat) ;; DELAY TWEAKER (still needed?)
;; (thread-sleep! *global-delta*)
(if (or (not (null? tal))(not (null? reg)))
(list (runs:queue-next-hed tal reg reglen regfull)
(runs:queue-next-tal tal reg reglen regfull)
(runs:queue-next-reg tal reg reglen regfull)
reruns)
#f))
;; must be we have unmet prerequisites
;;
(else
(debug:print 4 *default-log-port* "FAILS: " fails)
;; If one or more of the prereqs-not-met are FAIL then we can issue
;; a message and drop hed from the items to be processed.
;; (runs:mixed-list-testname-and-testrec->list-of-strings prereqs-not-met)
(if (and (not (null? prereqs-not-met))
(runs:lownoise (conc "waiting on tests " prereqs-not-met hed) 60))
(debug:print-info 1 *default-log-port* "waiting on tests; " (string-intersperse
(runs:mixed-list-testname-and-testrec->list-of-strings
prereqs-not-met) ", ")))
(if (or (null? fails)
(member 'toplevel testmode))
(begin
;; couldn't run, take a breather
(if (runs:lownoise "Waiting for more work to do..." 60)
(debug:print-info 0 *default-log-port* "Waiting for more work to do..."))
(thread-sleep! 1)
(list (car newtal)(cdr newtal) reg reruns))
;; the waiton is FAIL so no point in trying to run hed ever again
(if (or (not (null? reg))(not (null? tal)))
(if (vector? hed)
(begin
(debug:print 1 *default-log-port* "WARNING: Dropping test " test-name "/" item-path
" from the launch list as it has prerequistes that are FAIL")
(let ((test-id (rmt:get-test-id run-id hed "")))
(if test-id (mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "PREQ_FAIL" "Failed to run due to failed prerequisites")))
(runs:shrink-can-run-more-tests-count runsdat) ;; DELAY TWEAKER (still needed?)
;; (thread-sleep! *global-delta*)
;; This next is for the items
(mt:test-set-state-status-by-testname run-id test-name item-path "NOT_STARTED" "BLOCKED" #f)
(hash-table-set! test-registry (db:test-make-full-name test-name item-path) 'removed)
(list (runs:queue-next-hed tal reg reglen regfull)
(runs:queue-next-tal tal reg reglen regfull)
(runs:queue-next-reg tal reg reglen regfull)
reruns ;; WAS: (cons hed reruns) ;; but that makes no sense?
))
(let ((nth-try (hash-table-ref/default test-registry hed 0)))
(cond
((member "RUNNING" (map db:test-get-state prereqs-not-met))
(if (runs:lownoise (conc "possible RUNNING prerequistes " hed) 60)
(debug:print 0 *default-log-port* "WARNING: test " hed " has possible RUNNING prerequisites, don't give up on it yet."))
(thread-sleep! 4)
(list (runs:queue-next-hed newtal reg reglen regfull)
(runs:queue-next-tal newtal reg reglen regfull)
(runs:queue-next-reg newtal reg reglen regfull)
reruns))
((or (not nth-try)
(and (number? nth-try)
(< nth-try 10)))
(hash-table-set! test-registry hed (if (number? nth-try)
(+ nth-try 1)
0))
(if (runs:lownoise (conc "not removing test " hed) 60)
(debug:print 1 *default-log-port* "WARNING: not removing test " hed " from queue although it may not be runnable due to FAILED prerequisites"))
;; may not have processed correctly. Could be a race condition in your test implementation? Dropping test " hed) ;; " as it has prerequistes that are FAIL. (NOTE: hed is not a vector)")
(runs:shrink-can-run-more-tests-count runsdat) ;; DELAY TWEAKER (still needed?)
;; (list hed tal reg reruns)
;; (list (car newtal)(cdr newtal) reg reruns)
;; (hash-table-set! test-registry hed 'removed)
(list (runs:queue-next-hed newtal reg reglen regfull)
(runs:queue-next-tal newtal reg reglen regfull)
(runs:queue-next-reg newtal reg reglen regfull)
reruns))
((symbol? nth-try)
(if (eq? nth-try 'removed) ;; removed is removed - drop it NOW
(if (null? tal)
#f ;; yes, really
(list (car tal)(cdr tal) reg reruns))
(begin
(if (runs:lownoise (conc "FAILED prerequisites or other issue" hed) 60)
(debug:print 0 *default-log-port* "WARNING: test " hed " has FAILED prerequisites or other issue. Internal state " nth-try " will be overridden and we'll retry."))
(mt:test-set-state-status-by-testname run-id test-name item-path "NOT_STARTED" "KEEP_TRYING" #f)
(hash-table-set! test-registry hed 0)
(list (runs:queue-next-hed newtal reg reglen regfull)
(runs:queue-next-tal newtal reg reglen regfull)
(runs:queue-next-reg newtal reg reglen regfull)
reruns))))
(else
(if (runs:lownoise (conc "FAILED prerequitests and we tried" hed) 60)
(debug:print 0 *default-log-port* "WARNING: test " hed " has FAILED prerequitests and we've tried at least 10 times to run it. Giving up now."))
;; (debug:print 0 *default-log-port* " prereqs: " prereqs-not-met)
(hash-table-set! test-registry hed 'removed)
(mt:test-set-state-status-by-testname run-id test-name item-path "NOT_STARTED" "TEN_STRIKES" #f)
;; I'm unclear on if this roll up is needed - it may be the root cause of the "all set to FAIL" bug.
(rmt:set-state-status-and-roll-up-items run-id test-name item-path #f "FAIL" #f) ;; treat as FAIL
(list (if (null? tal)(car newtal)(car tal))
tal
reg
reruns)))))
;; can't drop this - maybe running? Just keep trying
(let ((runable-tests (runs:runable-tests prereqs-not-met)))
(if (null? runable-tests)
#f ;; I think we are truly done here
(list (runs:queue-next-hed newtal reg reglen regfull)
(runs:queue-next-tal newtal reg reglen regfull)
(runs:queue-next-reg newtal reg reglen regfull)
reruns)))))))))
;; Copyright 2006-2012, Matthew Welland.
;;
;; This program is made available under the GNU GPL version 2.0 or
;; greater. See the accompanying file COPYING for details.
;;
;; This program is distributed WITHOUT ANY WARRANTY; without even the
;; implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
;; PURPOSE.
;; strftime('%m/%d/%Y %H:%M:%S','now','localtime')
(use sqlite3 srfi-1 posix regex regex-case srfi-69 dot-locking directory-utils)
(use srfi-1 posix regex srfi-69 directory-utils)
(import (prefix sqlite3 sqlite3:))
(declare (unit ezsteps))
(declare (uses db))
(declare (uses common))
(declare (uses items))
(declare (uses runconfig))
;; (declare (uses sdb))
(rollup-status 0)
(exit-info (vector #t #t #t))
(test-id (db:test-get-id testdat))
(run-id (db:test-get-run_id testdat))
(test-name (db:test-get-testname testdat))
(kill-job #f)) ;; for future use (on re-factoring with launch.scm code
(let loop ((count 5))
(if (file-exists? test-run-dir)
(if (common:file-exists? test-run-dir)
(push-directory test-run-dir)
(if (> count 0)
(begin
(debug:print 0 *default-log-port* "WARNING: ezsteps attempting to run but test run directory " test-run-dir " is not there. Waiting and trying again " count " more times")
(sleep 3)
(loop (- count 1))))))
(debug:print-info 0 *default-log-port* "Running in directory " test-run-dir)
(if (not (file-exists? ".ezsteps"))(create-directory ".ezsteps"))
(if (not (common:file-exists? ".ezsteps"))(create-directory ".ezsteps"))
;; if ezsteps was defined then we are sure to have at least one step but check anyway
(if (not (> (length ezstepslst) 0))
(message-window "ERROR: You can only re-run steps defined via ezsteps")
(begin
(let loop ((ezstep (car ezstepslst))
(tal (cdr ezstepslst))
(prevstep #f)
(if (not (directory? path))
(begin
(print "The path " path " does not exist or is not a directory. Attempting to create it now")
(create-directory path #t)))
;; First check that the directory is empty!
(if (and (file-exists? path)
(if (and (common:file-exists? path)
(not (null? (glob (conc path "/*")))))
(begin
(print "WARNING: directory " path " is not empty, are you sure you want to continue?")
(display "Enter y/n: ")
(if (equal? "y" (read-line))
(print "Using directory " path " for your Megatest area.")
(begin
(description #f)
(steps '())
(scripts '())
(items '())
(rel-path #f))
(cond
((file-exists? "megatest.config") (set! rel-path "./"))
((file-exists? "../megatest.config") (set! rel-path "../"))
((file-exists? "../../megatest.config") (set! rel-path "../../"))
((file-exists? "../../../megatest.config")(set! rel-path "../../../"))) ;; good enough dang it.
((common:file-exists? "megatest.config") (set! rel-path "./"))
((common:file-exists? "../megatest.config") (set! rel-path "../"))
((common:file-exists? "../../megatest.config") (set! rel-path "../../"))
((common:file-exists? "../../../megatest.config")(set! rel-path "../../../"))) ;; good enough dang it.
;; Don't gather data or continue if a) megatest.config can't be found or b) testconfig already exists
(if (not rel-path)
(begin
(print "ERROR: I could not find megatest.config, please run -create-test in the top dir of your megatest area")
(exit 1)))
(if (file-exists? (conc rel-path "tests/" testname "/testconfig"))
(if (common:file-exists? (conc rel-path "tests/" testname "/testconfig"))
(begin
(print "WARNING: You already have a testconfig in " rel-path "tests/" testname ", do you want to clobber your files?")
(display "Enter y/n: ")
(if (not (equal? "y" (read-line)))
(begin
(print "INFO: user abort of creation of test " testname)
(exit 1)))))
;; Copyright 2006-2012, Matthew Welland.
;;
;; This program is made available under the GNU GPL version 2.0 or
;; greater. See the accompanying file COPYING for details.
;;
;; This program is distributed WITHOUT ANY WARRANTY; without even the
;; implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
;; PURPOSE.
(require-extension (srfi 18) extras tcp s11n)
(use srfi-1 posix regex regex-case srfi-69 hostinfo md5 message-digest posix-extras) ;; sqlite3(use srfi-1 posix regex regex-case srfi-69 hostinfo md5 message-digest posix-extras)
;; (import (prefix sqlite3 sqlite3:))
(use spiffy uri-common intarweb http-client spiffy-request-vars intarweb spiffy-directory-listing)
;; Configurations for server
(tcp-buffer-size 2048)
(max-connections 2048)
(exit)))
;; all routes though here end in exit ...
;;
;; start_server?
;;
(define (http-transport:launch)
;; lets not even bother to start if there are already three or more server files ready to go
(let* ((num-alive (server:get-num-alive (server:get-list *toppath*))))
;; (if (args:get-arg"-daemonize")
;; (begin
(if (> num-alive 3)
(begin
;; (daemon:ize) ;; (if *alt-log-file* ;; we should re-connect to this port, I think daemon:ize disrupts it ;; (begin (debug:print 0 *default-log-port* "ERROR: Aborting server start because there are already " num-alive " possible servers either running or starting up")
;; (current-error-port *alt-log-file*) ;; (current-output-port *alt-log-file*)))))
(exit))))
(let* ((th2 (make-thread (lambda ()
(debug:print-info 0 *default-log-port* "Server run thread started")
(http-transport:run
(if (args:get-arg "-server")
(args:get-arg "-server")
"-")
)) "Server run"))
(defstruct launch:einf (pid #t)(exit-status #t)(exit-code #t)(rollup-status 0))
;; return (conc status ": " comment) from the final section so that
;; the comment can be set in the step record in launch.scm
;;
(define (launch:load-logpro-dat run-id test-id stepname)
(let ((cname (conc stepname ".dat")))
(if (file-exists? cname)
(if (common:file-exists? cname)
(let* ((dat (read-config cname #f #f))
(csvr (db:logpro-dat->csv dat stepname))
(csvt (let-values (((fmt-cell fmt-record fmt-csv) (make-format ",")))
(fmt-csv (map list->csv-record csvr))))
(status (configf:lookup dat "final" "exit-status"))
(msg (configf:lookup dat "final" "message")))
(if csvt ;; this if blocked stack dump caused by .dat file from logpro being 0-byte. fixed by upgrading logpro
(handle-exceptions
exn
(debug:print 0 *default-log-port* "Failed to create directory " logdir " expect problems, message: " ((condition-property-accessor 'exn 'message) exn))
(create-directory logdir #t)))))
;; NFS might not have propagated the directory meta data to the run host - give it time if needed
(let loop ((count 0))
(if (or (file-exists? top-path)
(if (or (common:file-exists? top-path)
(> count 10))
(change-directory top-path)
(begin
(debug:print 0 *default-log-port* "INFO: Not starting job yet - directory " top-path " not found")
(thread-sleep! 10)
(loop (+ count 1)))))
(launch:setup) ;; should be properly in the top-path now
) ;; (set-signal-handler! signal/stop sighand)
;; Do not run the test if it is REMOVING, RUNNING, KILLREQ or REMOTEHOSTSTART,
;; Mark the test as REMOTEHOSTSTART *IMMEDIATELY*
;;
(let* ((test-info (rmt:get-test-info-by-id run-id test-id))
(tconfig (tests:get-testconfig test-name item-path tconfigreg #t force-create: #t))
(test-host (if test-info
(test-host (db:test-get-host test-info))
(db:test-get-host test-info)
(begin
(debug:print 0 *default-log-port* "ERROR: failed to find a record for test-id " test-id ", exiting.")
(exit))))
(test-pid (db:test-get-process_id test-info)))
;; if work-area was pre-ordained, use it, else create and then use
(if (not (and work-area
(file-exists? work-area)
(file-is-directory? work-area)))
;; (if (configf:var-is? *configdat* "setup" "early-setup" "yes")
(let ((dat (create-work-area run-id runname keyvals test-id testpath #f test-name itemdat tconfig: tconfig)))
*toppath* ;; return toppath
(let* ((use-cache (common:use-cache?)) ;; BB- use-cache checks *configdat* for use-cache setting. We do not have *configdat*. Bootstrapping problem here.
(toppath (or *toppath* areapath (getenv "MT_RUN_AREA_HOME"))) ;; preserve toppath
(target (common:args-get-target))
(sections (if target (list "default" target) #f)) ;; for runconfigs
(mtconfig (or (args:get-arg "-config") "megatest.config")) ;; allow overriding megatest.config
(cachefiles (launch:get-cache-file-paths areapath toppath target mtconfig))
;; checking for null cachefiles should not be necessary, I was seeing error car of '(), might be a chicken bug or a red herring ...
(mtcachef (if (null? cachefiles)
#f
(mtcachef (car cachefiles)) ;; (and cachedir (conc cachedir "/" ".megatest.cfg-" megatest-version "-" megatest-fossil-hash)))
(rccachef (cdr cachefiles)) ;; (and cachedir (conc cachedir "/" ".runconfigs.cfg-" megatest-version "-" megatest-fossil-hash)))
) ;; (cancreate (and cachedir (common:file-exists? cachedir)(file-write-access? cachedir) (not (common:in-running-test?)))))
(car cachefiles))) ;; (and cachedir (conc cachedir "/" ".megatest.cfg-" megatest-version "-" megatest-fossil-hash)))
(rccachef (if (null? cachefiles)
#f
(cdr cachefiles)))) ;; (and cachedir (conc cachedir "/" ".runconfigs.cfg-" megatest-version "-" megatest-fossil-hash)))
;; (cancreate (and cachedir (common:file-exists? cachedir)(file-write-access? cachedir) (not (common:in-running-test?)))))
(set! *toppath* toppath) ;; This is needed when we are running as a test using CMDINFO as a datasource
;;(BB> "launch:setup-body -- cachefiles="cachefiles)
(cond
;; if mtcachef exists just read it, however we need to assume toppath is available in $MT_RUN_AREA_HOME
((and (not force-reread)
mtcachef rccachef
use-cache
((and (not force-reread) mtcachef (common:file-exists? mtcachef) (get-environment-variable "MT_RUN_AREA_HOME") use-cache)
(get-environment-variable "MT_RUN_AREA_HOME")
(common:file-exists? mtcachef)
(common:file-exists? rccachef))
;;(BB> "launch:setup-body -- cond branch 1 - use-cache")
(set! *configdat* (configf:read-alist mtcachef))
;;(BB> "launch:setup-body -- 1 set! *configdat*="*configdat*)
(set! *runconfigdat* (configf:read-alist rccachef))
(set! *configinfo* (list *configdat* (get-environment-variable "MT_RUN_AREA_HOME")))
(set! *configstatus* 'fulldata)
(set! *toppath* (get-environment-variable "MT_RUN_AREA_HOME"))
*toppath*)
;; there are no existing cached configs, do full reads of the configs and cache them
;; we have all the info needed to fully process runconfigs and megatest.config
((and ;; (not force-reread) ;; force-reread is irrelevant in the AND, could however OR it?
mtcachef
((and(not force-reread) mtcachef) ;; BB- why are we doing this without asking if caching is desired?
rccachef) ;; BB- why are we doing this without asking if caching is desired?
;;(BB> "launch:setup-body -- cond branch 2")
(let* ((first-pass (find-and-read-config ;; NB// sets MT_RUN_AREA_HOME as side effect
mtconfig
environ-patt: "env-override"
given-toppath: toppath
pathenvvar: "MT_RUN_AREA_HOME"))
(first-rundat (let ((toppath (if toppath
;; If there is already a symlink delete it and recreate it.
(handle-exceptions
exn
(begin
(debug:print-error 0 *default-log-port* " Failed to re-create link " lnktarget ((condition-property-accessor 'exn 'message) exn) ", exiting")
(exit))
(if (symbolic-link? lnktarget) (delete-file lnktarget))
(if (not (file-exists? lnktarget)) (create-symbolic-link test-path lnktarget)))))
(if (not (common:file-exists? lnktarget)) (create-symbolic-link test-path lnktarget)))))
(if (not (directory? test-path))
(create-directory test-path #t)) ;; this is a hack, I don't know why out of the blue this path does not exist sometimes
(if (and test-src-path (directory? test-path))
(begin
(let* ((ovrcmd (let ((cmd (config-lookup *configdat* "setup" "testcopycmd")))
(mutex-lock! *launch-setup-mutex*) ;; setting variables and processing the testconfig is NOT thread-safe, reuse the launch-setup mutex
(let* ((item-path (item-list->path itemdat))
(contour #f)) ;; NOT READY FOR THIS (args:get-arg "-contour")))
(let loop ((delta (- (current-seconds) *last-launch*))
(launch-delay (configf:lookup-number *configdat* "setup" "launch-delay" default: 1)))
(if (> launch-delay delta)
(begin
(if (common:low-noise-print 1200 "test launch delay") ;; every two hours or so remind the user about launch delay.
(debug:print-info 0 *default-log-port* "Delaying launch of " test-name " for " (- launch-delay delta) " seconds")
(debug:print-info 0 *default-log-port* "NOTE: test launches are delayed by " launch-delay " seconds. See megatest.config launch-delay setting to adjust.")) ;; launch of " test-name " for " (- launch-delay delta) " seconds"))
(thread-sleep! (- launch-delay delta))
(loop (- (current-seconds) *last-launch*) launch-delay))))
(change-directory *toppath*)
(alist->env-vars ;; consolidate this code with the code in megatest.scm for "-execute", *maybe* - the longer they are set the longer each launch takes (must be non-overlapping with the vars)
(append
(list
(list "MT_RUN_AREA_HOME" *toppath*)
(list 'set-vars (if params (hash-table-ref/default params "-setvars" #f)))
(list 'runname runname)
(list 'mt-bindir-path mt-bindir-path))))))))
;; clean out step records from previous run if they exist
;; (rmt:delete-test-step-records run-id test-id)
;; if the dir does not exist we may have a itempath where individual variables are a path, launch anyway
(if (and work-area (file-exists? work-area))
(if (and work-area (common:file-exists? work-area))
(change-directory work-area)) ;; so that log files from the launch process don't clutter the test dir
(cond
;; ((and launcher hosts) ;; must be using ssh hostname
;; (set! fullcmd (append launcher (car hosts)(list remote-megatest "-m" test-sig "-execute" cmdparms) debug-param)))
;; (set! fullcmd (append launcher (car hosts)(list remote-megatest test-sig "-execute" cmdparms))))
(launcher
(set! fullcmd (append launcher (list remote-megatest "-m" test-sig "-execute" cmdparms) debug-param)))
;; Copyright 2006-2013, Matthew Welland.
;;
;; This program is made available under the GNU GPL version 2.0 or
;; greater. See the accompanying file COPYING for details.
;;
;; This program is distributed WITHOUT ANY WARRANTY; without even the
;; implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
;; PURPOSE.
(use sqlite3 srfi-18)
(import (prefix sqlite3 sqlite3:))
(use (prefix sqlite3 sqlite3:) srfi-18)
(declare (unit lock-queue))
(declare (uses common))
(declare (uses tasks))
;;======================================================================
;; attempt to prevent overlapping updates of rollup files by queueing
(lock-queue:release-lock fname test-id count: (- count 1)))
(let ((journal (conc fname "-journal")))
;; If we've tried ten times and failed there is a serious problem
;; try to remove the lock db and allow it to be recreated
(handle-exceptions
exn
#f
(if (file-exists? journal)(delete-file journal))
(if (file-exists? fname) (delete-file fname))
(if (common:file-exists? journal)(delete-file journal))
(if (common:file-exists? fname) (delete-file fname))
#f))))
(sqlite3:execute (lock-queue:db-dat-get-db dbdat) "DELETE FROM runlocks WHERE test_id=?;" test-id)
(sqlite3:finalize! (lock-queue:db-dat-get-db dbdat)))))
(define (lock-queue:steal-lock dbdat test-id #!key (count 10))
(debug:print-info 0 *default-log-port* "Attempting to steal lock at " (lock-queue:db-dat-get-path dbdat))
(tasks:wait-on-journal (lock-queue:db-dat-get-path dbdat) 1200 "lock-queue:steal-lock; waiting on journal")
## commented out due to a bug in v1.6501 in mtutil
[fields]
a text
b text
c text
## [fields]
## a text
## b text
## c text
usercode .mtutil.scm
areafilter area-to-run
targtrans generic-target-translator
runtrans generic-runname-translator
[setup]
pktsdirs /tmp/pkts /some/other/source
pktsdirs /tmp/mt_pkts /some/other/source
[areas]
# path-to-area map-target-script(future, optional)
# someqa path=../megatestqa/someqa; targtrans=somefunc; areafilter=area-to-run
fullrun path=tests/fullrun
fullrun path=tests/fullrun;
# targtrans is name of scheme proc stored in .mtutil.scm, which lives in PWD where mtutil is run
# the target translator can return: a/target OR (list/of targets/to apply/run) OR #f i.e. run nothing
# ext-tests path=ext-tests; targtrans=prefix-contour;
ext-tests path=ext-tests
ext path=ext-tests
[contours]
# mode-patt/tag-expr
quick areas=ext; selector=/QUICKPATT
quick2 areafn=check-area; selector=/QUICKPATT
quick selector=QUICKPATT/quick
full areas=fullrun,ext-tests; selector=MAXPATT/
all areas=fullrun,ext-tests
snazy areas=%; selector=QUICKPATT/
# quick areas=fullrun,ext-tests; selector=QUICKPATT/quick
# full areas=fullrun,ext-tests; selector=MAXPATT/
# short areas=fullrun,ext-tests; selector=MAXPATT/
# all areas=fullrun,ext-tests
# snazy selector=QUICKPATT/
[nopurpose]
[access]
ext matt:admin mattw:owner
[accesstypes]
admin run rerun resume remove set-ss
owner run rerun resume remove
jerk set-ss
[setup]
maxload 1.2
-reqtarg key1/key2/... : run for key1, key2, etc. but key1/key2 must be in runconfigs
-testpatt patt1/patt2,patt3/... : % is wildcard
-runname : required, name for this particular test run
-state : Applies to runs, tests or steps depending on context
-status : Applies to runs, tests or steps depending on context
--modepatt key : load testpatt from <key> in runconfigs instead of default TESTPATT if -testpatt and -tagexpr are not specified
-tagexpr tag1,tag2%,.. : select tests with tags matching expression
Test helpers (for use inside tests)
-step stepname
-test-status : set the state and status of a test (use :state and :status)
-setlog logfname : set the path/filename to the final log relative to the test
directory. may be used with -test-status
-set-toplog logfname : set the overall log for a suite of sub-tests
-dumpmode MODE : dump in MODE format instead of sexpr, MODE=json,ini,sexp etc. (add -debug 0,9 to see which file contributes each line)
-show-cmdinfo : dump the command info for a test (run in test environment)
-section sectionName
-var varName : for config and runconfig lookup value for sectionName varName
-since N : get list of runs changed since time N (Unix seconds)
-fields fieldspec : fields to include in json dump; runs:id,runame+tests:testname+steps
-sort fieldname : in -list-runs sort tests by this field
-testdata-csv [categorypatt/]varpatt : dump testdata for given category
Misc
-start-dir path : switch to this directory before running megatest
-contour cname : add a level of hierarcy to the linktree and run paths
-rebuild-db : bring the database schema up to date
-cleanup-db : remove any orphan records, vacuum the db
-import-megatest.db : push data from megatest.db to cache db files in /tmp/$USER
(directory-exists? rundir)
(file-write-access? rundir))
(begin
(if (not (common:in-running-test?))
(configf:write-alist data cfgf))
;; force re-read of megatest.config - this resolves circular references between megatest.config
(launch:setup force-reread: #t)
;; (launch:cache-config) ;; there are two independent config cache locations, turning this one off for now. MRW.
(launch:cache-config))) ;; we can safely cache megatest.config since we have a valid runconfig
)) ;; we can safely cache megatest.config since we have a valid runconfig
data))))
(if (args:get-arg "-show-runconfig")
(let ((tl (launch:setup)))
(push-directory *toppath*)
(let ((data (full-runconfigs-read)))
;; keep this one local
;; MLAUNCH
;;
;; take jobs from the given queue and keep launching them keeping
;; the cpu load at the targeted level
;;
;;======================================================================
(use sqlite3 srfi-1 posix regex regex-case srfi-69 dot-locking format)
(use sqlite3 srfi-1 posix regex regex-case srfi-69 format)
(import (prefix sqlite3 sqlite3:))
(declare (unit mlaunch))
(declare (uses db))
(declare (uses common))
;; (include "common.scm")
;; (include "megatest-version.scm")
;; fake out readline usage of toplevel-command
(define (toplevel-command . a) #f)
(use srfi-1 posix srfi-69 readline ;; regex regex-case srfi-69 apropos json http-client directory-utils rpc typed-records;; (srfi 18) extras)
srfi-18 extras format pkts pkts regex regex-case
(prefix dbi dbi:)) ;; zmq extras) srfi-18 extras format pkts regex regex-case
(prefix dbi dbi:)
(prefix nanomsg nmsg:))
(declare (uses common))
(declare (uses megatest-version))
(declare (uses margs))
(declare (uses configf))
;; (declare (uses rmt))
(include "megatest-fossil-hash.scm")
(require-library stml)
;; stuff for the mapper and checker functions
;;
(define *target-mappers* (make-hash-table)) ;; '())
(define *runname-mappers* (make-hash-table)) ;; '())(define *target-mappers* (make-hash-table))
(define *runname-mappers* (make-hash-table))
(define *area-checkers* (make-hash-table))
;; helpers for mappers/checkers
(define (add-target-mapper name proc)
(hash-table-set! *target-mappers* name proc))
(define (add-runname-mapper name proc)
(hash-table-set! *runname-mappers* name proc))
(define (add-area-checker name proc)
(hash-table-set! *area-checkers* name proc))
;; given a runkey, xlatr-key and other info return one of the following:
;; list of targets, null list to skip processing
;;
(define (map-targets mtconf aval-alist runkey area contour #!key (xlatr-key-in #f))
(let* ((xlatr-key (or xlatr-key-in
(conf-get/default mtconf aval-alist 'targtrans)))
(proc (hash-table-ref/default *target-mappers* xlatr-key #f)))
(if proc
(begin
(print "Using target mapper: " xlatr-key)
(handle-exceptions
exn
(begin
(print "FAILED TO RUN TARGET MAPPER FOR " area ", called " xlatr-key)
(print " function is: " (hash-table-ref/default *target-mappers* xlatr-key #f ) )
(print " message: " ((condition-property-accessor 'exn 'message) exn))
runkey)
(proc runkey area contour)))
(begin
(if xlatr-key
(print "ERROR: Failed to find named target translator " xlatr-key ", using original target."))
`(,runkey))))) ;; no proc then use runkey
;; given mtconf and areaconf extract a translator/filter, first look at areaconf
;; then if not found look at default
;;
(define (conf-get/default mtconf areaconf keyname #!key (default #f))
(let ((res (or (alist-ref keyname areaconf)
(configf:lookup mtconf "default" (conc keyname))
default)))
(if res
(string->symbol res)
res)))
;; this needs some thought regarding security implications.
;;
;; i. Check that owner of the file and calling user are same?
;; ii. Check that we are in a legal megatest area?
;; iii. Have some form of authentication or record of the md5sum or similar of the file?
;; iv. Use compiled version in preference to .scm version. Thus there is a manual "blessing"
;; required to use .mtutil.scm.
;;
(if (file-exists? "megatest.config")
(if (file-exists? ".mtutil.so")
(if (common:file-exists? "megatest.config")
(if (common:file-exists? ".mtutil.so")
(load ".mtutil.so")
(if (file-exists? ".mtutil.scm")
(load ".mtutil.scm"))))
(if (common:file-exists? ".mtutil.scm")
(load ".mtutil.scm"))))
;; Disabled help items
;; -rollup : (currently disabled) fill run (set by :runname) with latest test(s)
;; from prior runs with same keys
;; Contour actions
;; import : import pkts
;; dispatch : dispatch queued run jobs from imported pkts
;; rungen : look at input sense list in [rungen] and generate run pkts
(define help (conc "
mtutil, part of the Megatest tool suite, documentation at http://www.kiatoa.com/fossils/megatest
version " megatest-version "
license GPL, Copyright Matt Welland 2006-2017
Usage: mtutil action [options]
-h : this help
-manual : show the Megatest user manual
-version : print megatest version (currently " megatest-version ")
Actions:
run : initiate runs
remove : remove runs
rerun : register action for processing
set-ss : set state/status
archive : compress and move test data to archive disk
kill : stop tests or entire runs
db : database utilities
-h : this help
-manual : show the Megatest user manual
-version : print megatest version (currently " megatest-version ")
Actions:
run : initiate runs
remove : remove runs
rerun : register action for processing
set-ss : set state/status
archive : compress and move test data to archive disk
kill : stop tests or entire runs
db : database utilities
areas, contours, setup : show areas, contours or setup section from megatest.config
Contour actions:
process : runs import, rungen and dispatch
Selectors
-immediate : apply this action immediately, default is to queue up actions
-area areapatt1,area2... : apply this action only to the specified areas
-target key1/key2/... : run for key1, key2, etc.
-test-patt p1/p2,p3/... : % is wildcard
-run-name : required, name for this particular test run
-contour contourname : run all targets for contourname, requires -run-name, -target
-state-status c/p,c/f : Specify a list of state and status patterns
-tag-expr tag1,tag2%,.. : select tests with tags matching expression
-mode-patt key : load testpatt from <key> in runconfigs instead of default TESTPATT
if -testpatt and -tagexpr are not specified
-new state/status : specify new state/status for set-ss
Misc
-start-dir path : switch to this directory before running mtutil
-set-vars V1=1,V2=2 : Add environment variables to a run NB// these are
overwritten by values set in config files.
-log logfile : send stdout and stderr to logfile
-repl : start a repl (useful for extending megatest)
-load file.scm : load and run file.scm
-debug N|N,M,O... : enable debug messages 0-N or N and M and O ...
Utility
db pgschema : emit postgresql schema; do \"mtutil db pgschema | psql -d mydb\"
process : runs import, rungen and dispatch
Selectors
-immediate : apply this action immediately, default is to queue up actions
-area areapatt1,area2... : apply this action only to the specified areas
-target key1/key2/... : run for key1, key2, etc.
-test-patt p1/p2,p3/... : % is wildcard
-run-name : required, name for this particular test run
-contour contourname : run all targets for contourname, requires -run-name, -target
-state-status c/p,c/f : Specify a list of state and status patterns
-tag-expr tag1,tag2%,.. : select tests with tags matching expression
-mode-patt key : load testpatt from <key> in runconfigs instead of default TESTPATT
if -testpatt and -tagexpr are not specified
-new state/status : specify new state/status for set-ss
Misc
-start-dir path : switch to this directory before running mtutil
-set-vars V1=1,V2=2 : Add environment variables to a run NB// these are
overwritten by values set in config files.
-log logfile : send stdout and stderr to logfile
-repl : start a repl (useful for extending megatest)
-load file.scm : load and run file.scm
-debug N|N,M,O... : enable debug messages 0-N or N and M and O ...
Utility
db pgschema : emit postgresql schema; do \"mtutil db pgschema | psql -d mydb\"
Examples:
# Start a megatest run in the area \"mytests\"
mtutil -area mytests -action run -target v1.63/aa3e -mode-patt MYPATT -tag-expr quick
mtutil run -area mytests -target v1.63/aa3e -mode-patt MYPATT -tag-expr quick
# Start a contour
mtutil run -contour quick -target v1.63/aa3e
Called as " (string-intersperse (argv) " ") "
Version " megatest-version ", built from " megatest-fossil-hash ))
;; args and pkt key specs
;;
(define *arg-keys*
;; used keys
;; a - action
'(
'(("-area" . G) ;; maps to group
("-target" . t)
("-run-name" . n)
("-state" . e)
("-status" . s)
("-contour" . c)
("-test-patt" . p) ;; idea, enhance margs ("-test-patt" "-testpatt") => yields one value in "-test-patt"
("-mode-patt" . o)
("-tag-expr" . x)
("-area" . G) ;; maps to group
("-contour" . c)
("-append-config" . d)
("-state" . e)
("-item-patt" . i)
("-sync-to" . k)
("-new" . l) ;; l (see below) is new-ss
("-run-name" . n)
("-mode-patt" . o)
("-test-patt" . p) ;; idea, enhance margs ("-test-patt" "-testpatt") => yields one value in "-test-patt"
("-status" . s)
("-target" . t)
("-tag-expr" . x)
("-item-patt" . i) ("-sync-to" . k) ("-append-config" . d) ;; misc
("-start-dir" . S) ("-msg" . M) ("-set-vars" . v)
("-debug" . #f) ;; for *verbosity* > 2
("-load" . #f) ;; load and exectute a scheme file
("-log" . #f)
("-debug" . #f) ;; for *verbosity* > 2
("-load" . #f) ;; load and exectute a scheme file
("-log" . #f)
("-msg" . M)
("-start-dir" . S)
("-set-vars" . v)
))
(define *switch-keys*
'(
'(("-h" . #f)
("-help" . #f)
("--help" . #f)
("-manual" . #f)
("-version" . #f)
;; misc
("-repl" . #f)
("-immediate" . I)
("-preclean" . r)
("-rerun-all" . u)
("-h" . #f)
("-help" . #f)
("--help" . #f)
("-manual" . #f)
("-version" . #f)
;; misc
("-repl" . #f)
("-immediate" . I)
("-preclean" . r)
("-rerun-all" . u)
("-prepend-contour" . w)
))
;; alist to map actions to old megatest commands
(define *action-keys*
'((run . "-run")
(sync . "")
(archive . "-archive")
(set-ss . "-set-state-status"))) (set-ss . "-set-state-status")
(remove . "-remove-runs")))
;; Card types:
;;
;; A action
;; U username (Unix)
;; D timestamp
;; T card type
;; utilitarian alist for standard cards
;;
(define *additional-cards*
'(
;; Standard Cards
(A . action )
(D . timestamp )
(T . cardtype )
(U . user ) ;; username
(Z . shar1sum )
;; Extras
(a . runkey ) ;; needed for matching up pkts with target derived from runkey
;; (l . new-ss ) ;; new state/status
))
;; inlst is an alternative input
;;
(define (lookup-param-by-key key #!key (inlst #f))
(fold (lambda (a res)
(if (eq? (cdr a) key)
(car a)
(else ;; have some unrecognised junk? spit out error message
(print "ERROR: fossil timeline returned unrecognisable junk \"" inl "\"")
(loop (get-line) date node time))))
(else ;; no more datat and last node on branch not found
(close-input-port timeline-port)
(values (common:date-time->seconds (conc date " " time)) node))))))
;;======================================================================
;; GLOBALS
;;======================================================================
;; Card types:;;;; a action;; u username (Unix);; D timestamp;; T card type;; process args
(define *action* (if (> (length (argv)) 1)
(cadr (argv))
#f))
(define remargs (args:get-args
(if *action* (cdr (argv)) (argv)) ;; args:get-args dumps first in argv list (the program name)
(map car *arg-keys*)
(ruletype (if (> len-key 1)(cadr keyparts) #f))
(action (if (> len-key 2)(caddr keyparts) #f))
(optional (if (> len-key 3)(cadddr keyparts) #f))
;; (val-list (string-split-fields ";\\s*" val #:infix)) ;; (string-split val)) ;; runname-rule params
(val-alist (val->alist val))
(runname (make-runname "" ""))
(runtrans (alist-ref 'runtrans val-alist))
;; these may or may not be defined and not all are used in each handler type in the case below
(run-name (alist-ref 'run-name val-alist))
(target (alist-ref 'target val-alist))
(crontab (alist-ref 'cron val-alist))
(areas (val-alist->areas val-alist)) ;; areas can be a single string (a reference to call an areas function), or a list of area names.
(dbdest (alist-ref 'dbdest val-alist))
(appendconf (alist-ref 'appendconf val-alist))
(file-globs (alist-ref 'glob val-alist))
(runstarts (find-pkts pdb '(runstart) `((o . ,contour)
(t . ,runkey))))
(rspkts (get-pkt-alists runstarts))
(rspkts (common:get-pkt-alists runstarts))
;; starttimes is for run start times and is used to know when the last run was launched
(starttimes (get-pkt-times rspkts)) ;; sort by age (youngest first) and delete duplicates by target
(last-run (if (null? starttimes) ;; if '() then it has never been run, else get the max
0
(apply max (map cdr starttimes))))
(starttimes (common:get-pkt-times rspkts)) ;; sort by age (youngest first) and delete duplicates by target
(last-run (if (null? starttimes) ;; if '() then it has never been run, else get the max
0
(apply max (map cdr starttimes))))
;; synctimes is for figuring out the last time a sync was done
(syncstarts (find-pkts pdb '(syncstart) '())) ;; no qualifiers, a sync does all tarets etc.
(sspkts (get-pkt-alists syncstarts))
(synctimes (get-pkt-times sspkts))
(last-sync (if (null? synctimes) ;; if '() then it has never been run, else get the max
0
(apply max (map cdr synctimes))))
(syncstarts (find-pkts pdb '(syncstart) '())) ;; no qualifiers, a sync does all tarets etc.
(sspkts (common:get-pkt-alists syncstarts))
(synctimes (common:get-pkt-times sspkts))
(last-sync (if (null? synctimes) ;; if '() then it has never been run, else get the max
0
(apply max (map cdr synctimes))))
)
(let ((delta (lambda (x)
(round (/ (- (current-seconds) x) 60)))))
(print "runkey: " runkey ", ruletype: " ruletype ", action: " action ", last-run: " last-run " time since; last-run: " (delta last-run) ", last-sync: " (delta last-sync)))
(print "val-alist=" val-alist " runtrans=" runtrans)
;; look in runstarts for matching runs by target and contour
;; get the timestamp for when that run started and pass it
;; to the rule logic here where "ruletype" will be applied
;; if it comes back "changed" then proceed to register the runs
(case (string->symbol (or ruletype "no-such-rule"))
((no-such-rule) (print "ERROR: no such rule for " sense))
;; Handle crontab like rules
;;
((scheduled)
(if (not (alist-ref 'cron val-alist)) ;; gotta have cron spec
(print "ERROR: bad sense spec \"" (string-intersperse sense " ") "\" params: " val-alist)
(let* ((run-name (alist-ref 'run-name val-alist)) (let* (
(target (alist-ref 'target val-alist)) (crontab (alist-ref 'cron val-alist)) (areas (val-alist->areas val-alist)) ;; (action (alist-ref 'action val-alist))
(cron-safe-string (string-translate (string-intersperse (string-split (alist-ref 'cron val-alist)) "-") "*" "X"))
(cron-safe-string (string-translate (string-intersperse (string-split crontab) "-") "*" "X"))
(runname std-runname)) ;; (conc "sched" (time->string (seconds->local-time (current-seconds)) "%M%H%d")))))
;; (print "last-run: " last-run " need-run: " need-run)
;; (if need-run
(case (string->symbol action)
((sync)
((sync sync-prepend)
(if (common:extended-cron crontab #f last-sync)
(push-run-spec torun contour runkey
`((message . ,(conc ruletype ":sync-" cron-safe-string))
(action . ,action)
(dbdest . ,(alist-ref 'dbdest val-alist))
(append . ,(alist-ref 'appendconf val-alist))))))
(dbdest . ,dbdest)
(append . ,appendconf)
(areas . ,areas)))))
((run)
(if (common:extended-cron crontab #f last-run)
(push-run-spec torun contour runkey
`((message . ,(conc ruletype ":" cron-safe-string))
(runname . ,runname)
`((message . ,(conc ruletype ":" cron-safe-string))
(runname . ,runname)
(runtrans . ,runtrans)
(action . ,action)
(target . ,target)))))
(action . ,action)
(areas . ,areas)
(target . ,target)))))
((remove)
(push-run-spec torun contour runkey
`((message . ,(conc ruletype ":" cron-safe-string))
(runname . ,runname)
`((message . ,(conc ruletype ":" cron-safe-string))
(runname . ,runname)
(runtrans . ,runtrans)
(action . ,action)
(target . ,target))))
(action . ,action)
(areas . ,areas)
(target . ,target))))
(else
(print "ERROR: action \"" action "\" has no scheduled handler")
)))))
;; script based sensors
;;
((script)
;; syntax is a little different here. It is a list of commands to run, "scriptname = extra_parameters;scriptname = ..."
;; where scriptname may be repeated multiple times. The script must return unix-epoch of last change, new-target-name and new-run-name
;; the script is called like this: scriptname contour runkey std-runname action extra_param1 extra_param2 ...
(for-each
(lambda (cmd)
(print "cmd: " cmd)
;; PURPOSE.
;;======================================================================
(use format typed-records) ;; RADT => purpose of json format??
(declare (unit rmt))
(declare (uses api))
(declare (uses tdb))(declare (uses http-transport))
;;(declare (uses nmsg-transport))(include "common_records.scm")
;;
;; THESE ARE ALL CALLED ON THE CLIENT SIDE!!!
;;
;; generate entries for ~/.megatestrc with the following
(define *send-receive-mutex* (make-mutex)) ;; should have separate mutex per run-id
;; RA => e.g. usage (rmt:send-receive 'get-var #f (list varname))
;;
(define (rmt:send-receive cmd rid params #!key (attemptnum 1)(area-dat #f)) ;; start attemptnum at 1 so the modulo below works as expected
;;DOT digraph megatest_state_status {
;;DOT ranksep=0;
;;DOT // rankdir=LR;
;;DOT node [shape="box"];
;;DOT "rmt:send-receive" -> MUTEXLOCK;
;;DOT { edge [style=invis];"case 1" -> "case 2" -> "case 3" -> "case 4" -> "case 5" -> "case 6" -> "case 7" -> "case 8" -> "case 9" -> "case 10" -> "case 11"; }
;; do all the prep locked under the rmt-mutex
(mutex-lock! *rmt-mutex*)
;; 1. check if server is started IFF cmd is a write OR if we are not on the homehost, store in runremote
;; 2. check the age of the connections. refresh the connection if it is older than timeout-20 seconds.
;; 3. do the query, if on homehost use local access
;;
(let* ((start-time (current-seconds)) ;; snapshot time so all use cases get same value
(areapath *toppath*);; TODO - resolve from dbstruct to be compatible with multiple areas
(runremote (or area-dat
*runremote*))
(readonly-mode (if (and runremote
(remote-ro-mode-checked runremote))
(remote-ro-mode runremote)
(let* ((dbfile (conc *toppath* "/megatest.db"))
(ro-mode (not (file-write-access? dbfile)))) ;; TODO: use dbstruct or runremote to figure this out in future
(if runremote
(begin
(remote-ro-mode-set! runremote ro-mode)
(remote-ro-mode-checked-set! runremote #t)
ro-mode)
ro-mode)))))
;; DOT INIT_RUNREMOTE; // leaving off - doesn't really add to the clarity
;; DOT MUTEXLOCK -> INIT_RUNREMOTE [label="no remote?"];
;; DOT INIT_RUNREMOTE -> MUTEXLOCK;
;; ensure we have a record for our connection for given area
(if (not runremote) ;; can remove this one. should never get here.
(begin
(set! *runremote* (make-remote))
(set! runremote *runremote*))) ;; new runremote will come from this on next iteration
;; ensure we have a homehost record
(if (not (pair? (remote-hh-dat runremote))) ;; not on homehost
(thread-sleep! 0.1) ;; since we shouldn't get here, delay a little
(remote-hh-dat-set! runremote (common:get-homehost)))
;;(print "BB> readonly-mode is "readonly-mode" dbfile is "dbfile)
;; ensure we have a record for our connection for given area
(if (not runremote) ;; can remove this one. should never get here.
(begin
(set! *runremote* (make-remote))
(set! runremote *runremote*))) ;; new runremote will come from this on next iteration
;; DOT SET_HOMEHOST; // leaving off - doesn't really add to the clarity
;; DOT MUTEXLOCK -> SET_HOMEHOST [label="no homehost?"];
;; DOT SET_HOMEHOST -> MUTEXLOCK;
;; ensure we have a homehost record
(if (not (pair? (remote-hh-dat runremote))) ;; not on homehost
(thread-sleep! 0.1) ;; since we shouldn't get here, delay a little
(remote-hh-dat-set! runremote (common:get-homehost)))
;;(print "BB> readonly-mode is "readonly-mode" dbfile is "dbfile)
(cond
;;DOT EXIT;
;;DOT MUTEXLOCK -> EXIT [label="> 15 attempts"]; {rank=same "case 1" "EXIT" }
;; give up if more than 15 attempts
((> attemptnum 15)
(debug:print 0 *default-log-port* "ERROR: 15 tries to start/connect to server. Giving up.")
(exit 1))
;;DOT CASE2 [label="local\nreadonly\nquery"];
;;DOT MUTEXLOCK -> CASE2; {rank=same "case 2" CASE2}
;;DOT CASE2 -> "rmt:open-qry-close-locally";
;; readonly mode, read request- handle it - case 20 ;; readonly mode, read request- handle it - case 2
((and readonly-mode
(member cmd api:read-only-queries))
(mutex-unlock! *rmt-mutex*)
(debug:print-info 12 *default-log-port* "rmt:send-receive, case 20")
(debug:print-info 12 *default-log-port* "rmt:send-receive, case 2")
(rmt:open-qry-close-locally cmd 0 params)
)
;;DOT CASE3 [label="write in\nread-only mode"];
;;DOT MUTEXLOCK -> CASE3 [label="readonly\nmode?"]; {rank=same "case 3" CASE3}
;;DOT CASE3 -> "#f";
;; readonly mode, write request. Do nothing, return #f
(readonly-mode
(mutex-unlock! *rmt-mutex*)
(debug:print-info 12 *default-log-port* "rmt:send-receive, case 21")
(debug:print-info 12 *default-log-port* "rmt:send-receive, case 3")
(debug:print 0 *default-log-port* "WARNING: write transaction requested on a readonly area. cmd="cmd" params="params)
#f
)
;; reset the connection if it has been unused too long
((and runremote
(remote-conndat runremote)
(let ((expire-time (+ (- start-time (remote-server-timeout runremote))(random 10)))) ;; Subtract or add the random value? Seems like it should be substract but Neither fixes the "WARNING: failure in with-input-from-request to #<request>.\n message: Server closed connection before sending response"
(< (http-transport:server-dat-get-last-access (remote-conndat runremote)) expire-time)))
(debug:print-info 0 *default-log-port* "Connection to " (remote-server-url runremote) " expired due to no accesses, forcing new connection.")
(remote-conndat-set! runremote #f) ;; invalidate the connection, thus forcing a new connection.
(mutex-unlock! *rmt-mutex*)
(rmt:send-receive cmd rid params attemptnum: attemptnum))
#f)
;; This block was for pre-emptively resetting the connection if there had been no communication for some time.
;; I don't think it adds any value. If the server is not there, just fail and start a new connection.
;; also, the expire-time calculation might not be correct. We want, time-since-last-server-access > (server:get-timeout)
;;
;; ;;DOT CASE4 [label="reset\nconnection"];
;; ;;DOT MUTEXLOCK -> CASE4 [label="have connection,\nlast_access > expire_time"]; {rank=same "case 4" CASE4}
;; ;;DOT CASE4 -> "rmt:send-receive";
;; ;; reset the connection if it has been unused too long
;; ((and runremote
;; (remote-conndat runremote)
;; (let ((expire-time (+ (- start-time (remote-server-timeout runremote))(random 10)))) ;; Subtract or add the random value? Seems like it should be substract but Neither fixes the "WARNING: failure in with-input-from-request to #<request>.\n message: Server closed connection before sending response"
;; (< (http-transport:server-dat-get-last-access (remote-conndat runremote)) expire-time)))
;; (debug:print-info 0 *default-log-port* "Connection to " (remote-server-url runremote) " expired due to no accesses, forcing new connection.")
;; (http-transport:close-connections area-dat: runremote)
;; (remote-conndat-set! runremote #f) ;; invalidate the connection, thus forcing a new connection.
;; (mutex-unlock! *rmt-mutex*)
;; (rmt:send-receive cmd rid params attemptnum: attemptnum))
;;DOT CASE5 [label="local\nread"];
;;DOT MUTEXLOCK -> CASE5 [label="server not required,\non homehost,\nread-only query"]; {rank=same "case 5" CASE5};
;;DOT CASE5 -> "rmt:open-qry-close-locally";
;; on homehost and this is a read
((and (not (remote-force-server runremote)) ;; honor forced use of server
(cdr (remote-hh-dat runremote)) ;; on homehost
(member cmd api:read-only-queries)) ;; this is a read
((and (not (remote-force-server runremote)) ;; honor forced use of server, i.e. server NOT required
(cdr (remote-hh-dat runremote)) ;; on homehost
(member cmd api:read-only-queries)) ;; this is a read
(mutex-unlock! *rmt-mutex*)
(debug:print-info 12 *default-log-port* "rmt:send-receive, case 3")
(debug:print-info 12 *default-log-port* "rmt:send-receive, case 5")
(rmt:open-qry-close-locally cmd 0 params))
;;DOT CASE6 [label="init\nremote"];
;;DOT MUTEXLOCK -> CASE6 [label="on homehost,\nwrite query,\nhave server,\ncan't reach it"]; {rank=same "case 6" CASE6};
;;DOT CASE6 -> "rmt:send-receive";
;; on homehost and this is a write, we already have a server, but server has died
((and (cdr (remote-hh-dat runremote)) ;; on homehost
(not (member cmd api:read-only-queries)) ;; this is a write
(remote-server-url runremote) ;; have a server
(not (server:ping (remote-server-url runremote)))) ;; server has died. NOTE: this is not a cheap call! Need better approach.
(set! *runremote* (make-remote))
(remote-force-server-set! runremote (common:force-server?))
(mutex-unlock! *rmt-mutex*)
(debug:print-info 12 *default-log-port* "rmt:send-receive, case 4.1")
(debug:print-info 12 *default-log-port* "rmt:send-receive, case 6")
(rmt:send-receive cmd rid params attemptnum: attemptnum))
;;DOT CASE7 [label="homehost\nwrite"];
;;DOT MUTEXLOCK -> CASE7 [label="server not required,\non homehost,\na write,\nhave a server"]; {rank=same "case 7" CASE7};
;;DOT CASE7 -> "rmt:open-qry-close-locally";
;; on homehost and this is a write, we already have a server
((and (not (remote-force-server runremote)) ;; honor forced use of server
((and (not (remote-force-server runremote)) ;; honor forced use of server, i.e. server NOT required
(cdr (remote-hh-dat runremote)) ;; on homehost
(not (member cmd api:read-only-queries)) ;; this is a write
(remote-server-url runremote)) ;; have a server
(mutex-unlock! *rmt-mutex*)
(debug:print-info 12 *default-log-port* "rmt:send-receive, case 4")
(debug:print-info 12 *default-log-port* "rmt:send-receive, case 4.1")
(rmt:open-qry-close-locally cmd 0 params))
;;DOT CASE8 [label="force\nserver"];
;;DOT MUTEXLOCK -> CASE8 [label="server not required,\nhave homehost info,\nno connection yet,\nnot a read-only query"]; {rank=same "case 8" CASE8};
;;DOT CASE8 -> "rmt:open-qry-close-locally";
;; on homehost, no server contact made and this is a write, passively start a server
((and (not (remote-force-server runremote)) ;; honor forced use of server
(cdr (remote-hh-dat runremote)) ;; new
(not (remote-server-url runremote))
(not (member cmd api:read-only-queries)))
(debug:print-info 12 *default-log-port* "rmt:send-receive, case 5")
((and (not (remote-force-server runremote)) ;; honor forced use of server, i.e. server NOT required
(cdr (remote-hh-dat runremote)) ;; have homehost
(not (remote-server-url runremote)) ;; no connection yet
(not (member cmd api:read-only-queries))) ;; not a read-only query
(debug:print-info 12 *default-log-port* "rmt:send-receive, case 8")
(let ((server-url (server:check-if-running *toppath*))) ;; (server:read-dotserver->url *toppath*))) ;; (server:check-if-running *toppath*))) ;; Do NOT want to run server:check-if-running - very expensive to do for every write call
(if server-url
(remote-server-url-set! runremote server-url) ;; the string can be consumed by the client setup if needed
(if (common:force-server?)
(server:start-and-wait *toppath*)
(server:kind-run *toppath*))))
(remote-force-server-set! runremote (common:force-server?))
(mutex-unlock! *rmt-mutex*)
(debug:print-info 12 *default-log-port* "rmt:send-receive, case 5.1")
(debug:print-info 12 *default-log-port* "rmt:send-receive, case 8.1")
(rmt:open-qry-close-locally cmd 0 params))
;;DOT CASE9 [label="force server\nnot on homehost"];
;;DOT MUTEXLOCK -> CASE9 [label="no connection\nand either require server\nor not on homehost"]; {rank=same "case 9" CASE9};
;;DOT CASE9 -> "start\nserver" -> "rmt:send-receive";
((or (and (remote-force-server runremote) ;; we are forcing a server and don't yet have a connection to one
(not (remote-conndat runremote)))
(and (not (cdr (remote-hh-dat runremote))) ;; not on a homehost
(not (remote-conndat runremote)))) ;; and no connection
(debug:print-info 12 *default-log-port* "rmt:send-receive, case 6 hh-dat: " (remote-hh-dat runremote) " conndat: " (remote-conndat runremote))
(debug:print-info 12 *default-log-port* "rmt:send-receive, case 9, hh-dat: " (remote-hh-dat runremote) " conndat: " (remote-conndat runremote))
(mutex-unlock! *rmt-mutex*)
(if (not (server:check-if-running *toppath*)) ;; who knows, maybe one has started up?
(server:start-and-wait *toppath*))
(remote-force-server-set! runremote (common:force-server?)) (remote-conndat-set! runremote (rmt:get-connection-info *toppath*)) ;; calls client:setup which calls client:setup-http
(rmt:send-receive cmd rid params attemptnum: attemptnum)) ;; TODO: add back-off timeout as
;;DOT CASE10 [label="on homehost"];
;;DOT MUTEXLOCK -> CASE10 [label="server not required,\non homehost"]; {rank=same "case 10" CASE10};
;;DOT CASE10 -> "rmt:open-qry-close-locally";
;; all set up if get this far, dispatch the query
((and (not (remote-force-server runremote))
(cdr (remote-hh-dat runremote))) ;; we are on homehost
(mutex-unlock! *rmt-mutex*)
(debug:print-info 12 *default-log-port* "rmt:send-receive, case 7")
(debug:print-info 12 *default-log-port* "rmt:send-receive, case 10")
(rmt:open-qry-close-locally cmd (if rid rid 0) params))
;;DOT CASE11 [label="send_receive"];
;;DOT MUTEXLOCK -> CASE11 [label="else"]; {rank=same "case 11" CASE11};
;;DOT CASE11 -> "rmt:send-receive" [label="call failed"];
;;DOT CASE11 -> "RESULT" [label="call succeeded"];
;; not on homehost, do server query
(else
(mutex-unlock! *rmt-mutex*)
(debug:print-info 12 *default-log-port* "rmt:send-receive, case 9")
(mutex-lock! *rmt-mutex*)
(let* ((conninfo (remote-conndat runremote))
(dat (case (remote-transport runremote)
(if success ;; success only tells us that the transport was successful, have to examine the data to see if there was a detected issue at the other end
(if (and (vector? res)
(eq? (vector-length res) 2)
(eq? (vector-ref res 1) 'overloaded)) ;; since we are looking at the data to carry the error we'll use a fairly obtuse combo to minimise the chances of some sort of collision.
(let ((wait-delay (+ attemptnum (* attemptnum 10))))
(debug:print 0 *default-log-port* "WARNING: server is overloaded. Delaying " wait-delay " seconds and trying call again.")
(mutex-lock! *rmt-mutex*)
(http-transport:close-connections area-dat: runremote)
(set! *runremote* #f) ;; force starting over
(mutex-unlock! *rmt-mutex*)
(thread-sleep! wait-delay)
(rmt:send-receive cmd rid params attemptnum: (+ attemptnum 1)))
res) ;; All good, return res
(begin
(debug:print 0 *default-log-port* "WARNING: communication failed. Trying again, try num: " attemptnum)
(remote-conndat-set! runremote #f)
(http-transport:close-connections area-dat: runremote)
(remote-server-url-set! runremote #f)
(debug:print-info 12 *default-log-port* "rmt:send-receive, case 9.1")
(if (not (server:check-if-running *toppath*))
(server:start-and-wait *toppath*))
;; (if (not (server:check-if-running *toppath*))
;; (server:start-and-wait *toppath*))
(rmt:send-receive cmd rid params attemptnum: (+ attemptnum 1)))))))))
;;DOT }
;; (define (rmt:update-db-stats run-id rawcmd params duration)
;; (mutex-lock! *db-stats-mutex*)
;; (handle-exceptions
;; exn
;; (begin
;; (debug:print 0 *default-log-port* "WARNING: stats collection failed in update-db-stats")
;; (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))
(rmt:send-receive 'tasks-add #f (list action owner target runname testpatt params)))
(define (rmt:tasks-set-state-given-param-key param-key new-state)
(rmt:send-receive 'tasks-set-state-given-param-key #f (list param-key new-state)))
(define (rmt:tasks-get-last target runname)
(rmt:send-receive 'tasks-get-last #f (list target runname)))
;;======================================================================
;; N O S Y N C D B
;;======================================================================
(define (rmt:no-sync-set var val)
(rmt:send-receive 'no-sync-set #f `(,var ,val)))
(define (rmt:no-sync-get/default var default)
(rmt:send-receive 'no-sync-get/default #f `(,var ,default)))
(define (rmt:no-sync-del! var)
(rmt:send-receive 'no-sync-del! #f `(,var)))
;;======================================================================
;; A R C H I V E S
;;======================================================================
(define (rmt:archive-get-allocations testname itempath dneeded)
(rmt:send-receive 'archive-get-allocations #f (list testname itempath dneeded)))
;; Copyright 2006-2016, Matthew Welland.
;;
;; This program is made available under the GNU GPL version 2.0 or
;; greater. See the accompanying file COPYING for details.
;;
;; This program is distributed WITHOUT ANY WARRANTY; without even the
;; implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
;; PURPOSE.
;; strftime('%m/%d/%Y %H:%M:%S','now','localtime')
(use sqlite3 srfi-1 posix regex regex-case srfi-69 dot-locking (srfi 18)
(use (prefix sqlite3 sqlite3:) srfi-1 posix regex regex-case srfi-69 (srfi 18)
posix-extras directory-utils pathname-expand typed-records format)
(import (prefix sqlite3 sqlite3:))
(declare (unit runs))
(declare (uses db))
(declare (uses common))
(declare (uses items))
(declare (uses runconfig))
(declare (uses tests))
(set-signal-handler! signal/term sighand))
;; force the starting of a server
(debug:print 0 *default-log-port* "waiting on server...")
(server:start-and-wait *toppath*)
(runs:set-megatest-env-vars run-id inkeys: keys inrunname: runname) ;; these may be needed by the launching process
(set! runconf (if (file-exists? runconfigf)
(set! runconf (if (common:file-exists? runconfigf)
(setup-env-defaults runconfigf run-id *already-seen-runconfig-info* keyvals target)
(begin
(debug:print 0 *default-log-port* "WARNING: You do not have a run config file: " runconfigf)
#f)))
(if (not test-patts) ;; first time in - adjust testpatt
(set! test-patts (common:args-get-testpatt runconf)))
(not (member 'exclusive testmode)))))
;; (hash-table-delete! *max-tries-hash* (db:test-make-full-name test-name item-path))
;; we are going to reset all the counters for test retries by setting a new hash table
;; this means they will increment only when nothing can be run
(set! *max-tries-hash* (make-hash-table))
;; well, first lets see if cpu load throttling is enabled. If so wait around until the
;; average cpu load is under the threshold before continuing
(if (configf:lookup *configdat* "jobtools" "maxload") ;; only gate if maxload is specified
(common:wait-for-cpuload maxload numcpus waitdelay))
(if maxload ;; only gate if maxload is specified
(common:wait-for-cpuload maxload numcpus waitdelay))
(if maxhomehostload
(common:wait-for-homehost-load maxhomehostload (conc "Waiting for homehost load to drop below normalized value of " maxhomehostload)))
(run:test run-id run-info keyvals runname test-record flags #f test-registry all-tests-registry)
(runs:incremental-print-results run-id)
(hash-table-set! test-registry (db:test-make-full-name test-name item-path) 'running)
(runs:shrink-can-run-more-tests-count runsdat) ;; DELAY TWEAKER (still needed?)
;; (thread-sleep! *global-delta*)
(if (or (not (null? tal))(not (null? reg)))
(list (runs:queue-next-hed tal reg reglen regfull)
(begin
(debug:print-info 0 *default-log-port* "WARNING: server is overloaded, trying again in one second")
(thread-sleep! 1)
(loop)))))
(if (not testdat) ;; should NOT happen
(debug:print-error 0 *default-log-port* "failed to get test record for test-id " test-id))
(set! test-id (db:test-get-id testdat))
(if (file-exists? test-path)
(if (common:file-exists? test-path)
(change-directory test-path)
(begin
(debug:print-error 0 *default-log-port* "test run path not created before attempting to run the test. Perhaps you are running -remove-runs at the same time?")
(change-directory *toppath*)))
(case (if force ;; (args:get-arg "-force")
'NOT_STARTED
(if testdat
(exit 3))
(else
(let (;; (db #f)
(keys #f))
(if (launch:setup)
(begin
(full-runconfigs-read) ;; cache the run config
;; (launch:cache-config) ;; there are two independent config cache locations, turning this one off for now. MRW.
(launch:cache-config)) ;; do not cache here - need to be sure runconfigs is processed
) ;; do not cache here - need to be sure runconfigs is processed
(begin
(debug:print 0 *default-log-port* "Failed to setup, exiting")
(exit 1)))
(set! keys (keys:config-get-fields *configdat*))
;; have enough to process -target or -reqtarg here
;; Copyright 2006-2012, Matthew Welland.
;; Copyright 2006-2017, Matthew Welland.
;;
;; This program is made available under the GNU GPL version 2.0 or
;; greater. See the accompanying file COPYING for details.
;;
;; This program is distributed WITHOUT ANY WARRANTY; without even the
;; implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
;; PURPOSE.
(require-extension (srfi 18) extras tcp s11n)
(use srfi-1 posix regex regex-case srfi-69 hostinfo md5 message-digest directory-utils posix-extras matchable)(use srfi-1 posix regex regex-case srfi-69 hostinfo md5 message-digest directory-utils posix-extras matchable
;; (use zmq) )
(use spiffy uri-common intarweb http-client spiffy-request-vars)
(declare (unit server))
(declare (uses common))
(declare (uses db))
(declare (uses tasks)) ;; tasks are where stuff is maintained about what is running.
;; (declare (uses synchash))
(declare (uses http-transport))
(declare (uses rpc-transport));;(declare (uses nmsg-transport))(declare (uses launch))
(declare (uses daemon))
(include "common_records.scm")
(include "db_records.scm")
(define (server:make-server-url hostport)
(if (not hostport)
#f
(conc "http://" (car hostport) ":" (cadr hostport))))
(define *server-loop-heart-beat* (current-seconds))
;;======================================================================
;; P K T S S T U F F
;;======================================================================
;; ???
;;======================================================================
;; S E R V E R
;;======================================================================
;; Call this to start the actual server
;;
res
(cons (append serv-rec (list pid)) res))))
(if (null? tal)
(if (and limit
(> (length new-res) limit))
new-res ;; (take new-res limit) <= need intelligent sorting before this will work
new-res)
(loop (car tal)(cdr tal) new-res)))))))))
(loop (car tal)(cdr tal) new-res)))))))))
(define (server:get-num-alive srvlst)
(let ((num-alive 0))
(for-each
(lambda (server)
(match-let (((mod-time host port start-time pid)
server))
(let* ((uptime (- (current-seconds) mod-time))
(runtime (if start-time
(- mod-time start-time)
0)))
(if (< uptime 5)(set! num-alive (+ num-alive 1))))))
srvlst)
num-alive))
;; given a list of servers get a list of valid servers, i.e. at least
;; 10 seconds old, has started and is less than 1 hour old and is
;; active (i.e. mod-time < 10 seconds
;;
;; mod-time host port start-time pid
;;
;;
(define (open-test-db work-area)
(debug:print-info 11 *default-log-port* "open-test-db " work-area)
(if (and work-area
(directory? work-area)
(file-read-access? work-area))
(let* ((dbpath (conc work-area "/testdat.db"))
(dbexists (file-exists? dbpath))
(dbexists (common:file-exists? dbpath))
(work-area-writeable (file-write-access? work-area))
(db (handle-exceptions ;; open the db if area writeable or db pre-existing. open in-mem otherwise. if exception, open in-mem
exn
(begin
(print-call-chain (current-error-port))
(debug:print 2 *default-log-port* "ERROR: problem accessing test db " work-area ", you probably should clean and re-run this test"
((condition-property-accessor 'exn 'message) exn))
;; (let ((val (hash-table-ref/default otherdat ":first_warn" #f)))
;; (if val
;; (sqlite3:execute db "UPDATE tests SET first_warn=? WHERE run_id=? AND testname=? AND item_path=?;" val run-id test-name item-path)))
(let ((category (hash-table-ref/default otherdat ":category" ""))
(variable (hash-table-ref/default otherdat ":variable" ""))
(value (hash-table-ref/default otherdat ":value" #f))
(expected (hash-table-ref/default otherdat ":expected" #f))
(tol (hash-table-ref/default otherdat ":tol" #f))
(expected (hash-table-ref/default otherdat ":expected" "n/a"))
(tol (hash-table-ref/default otherdat ":tol" "n/a"))
(units (hash-table-ref/default otherdat ":units" ""))
(type (hash-table-ref/default otherdat ":type" ""))
(dcomment (hash-table-ref/default otherdat ":comment" "")))
(debug:print 4 *default-log-port*
"category: " category ", variable: " variable ", value: " value
", expected: " expected ", tol: " tol ", units: " units)
(if (and value expected tol) ;; all three required
(if (and value) ;; require only value; BB was- all three required
(let ((dat (conc category ","
variable ","
value ","
expected ","
tol ","
units ","
dcomment ",," ;; extra comma for status
type )))
;; This was run remote, don't think that makes sense. Perhaps not, but that is the easiest path for the moment.
(rmt:csv->test-data run-id test-id
dat)))) dat)
(thread-sleep! 10) ;; add 10 second delay before quit incase rmt needs time to start a server.
)))
;; need to update the top test record if PASS or FAIL and this is a subtest
;;;;;; (if (not (equal? item-path ""))
;;;;;; (rmt:set-state-status-and-roll-up-items run-id test-name item-path state status #f) ;;;;;)
(if (or (and (string? comment)
(string-match (regexp "\\S+") comment))
;;
(define (tests:get-testconfig test-name item-path test-registry system-allowed #!key (force-create #f))
(let* ((use-cache (common:use-cache?))
(cache-path (tests:get-test-path-from-environment))
(cache-file (and cache-path (conc cache-path "/.testconfig")))
(cache-exists (and cache-file
(not force-create) ;; if force-create then pretend there is no cache to read
(file-exists? cache-file)))
(common:file-exists? cache-file)))
(cached-dat (if (and (not force-create)
cache-exists
use-cache)
(handle-exceptions
exn
#f ;; any issues, just give up with the cached version and re-read
(configf:read-alist cache-file))
[setup]
testcopycmd cp --remove-destination -rlv TEST_SRC_PATH/. TEST_TARG_PATH/. >> TEST_TARG_PATH/mt_launch.log 2>> TEST_TARG_PATH/mt_launch.log
# launchwait no
launch-delay 0
launch-delay 0.1
[server]
runtime 180
# runtime 180
# timeout is in hours, this is how long the server will stay alive when not being used.
timeout 0.1
# All these are overridden in ../fdk.config
# [jobtools]
# launcher nbfake
# launcher bsub -q priority -o $MT_TEST_RUN_DIR/openlava.log
[include ../fdk.config]
# Add additional steps here. Format is "stepname script"
[ezsteps]
step1 step1.sh
# Test requirements are specified here
[requirements]
waiton bigrun
priority 0
mode itemwait
itemmap .*/
# Iteration for your tests are controlled by the items section
[items]
NUMBER #{scheme (string-intersperse (map (lambda (x)(conc "blah/" x)) \
NUMBER #{scheme (string-intersperse (map (lambda (x)(conc (if (getenv "USEBLAH") "blah/" "") x)) \
(map number->string (sort (let loop ((a 0)(res '())) \
(if (<= a (or (any->number (get-environment-variable "NUMTESTS")) 2500)) \
(loop (+ a 1)(cons a res)) res)) <))) " ")}
# test_meta is a section for storing additional data on your test
[test_meta]
# this may save a few milliseconds on launching tests
# launchwait no
waivercommentpatt ^WW\d+ [a-z].*
incomplete-timeout 1
# wait 0.5 seconds between launching every process
#
launch-delay 0.5
# launch-delay 0.5
launch-delay 0
# wait for runs to completely complete. yes, anything else is no
run-wait yes
# If set to "default" the old code is used. Otherwise defaults to 200 or uses
# numeric value given.
#
[default]
SOMEVAR This should show up in SOMEVAR3
VARNOVAL
VARNOVAL_WITHSPACE
QUICK %QUICKPATT test_mt_vars,test2,priority_9
# target based getting of config file, look at afs.config and nfs.config
[include #{getenv fsname}.config]
[include #{getenv MT_RUN_AREA_HOME}/common_runconfigs.config]
# #{system echo 'VACKYVAR #{shell pwd}' > $MT_RUN_AREA_HOME/configs/$USER.config}