Cherrypick 0495fb 2a858 from v1.65-broken. Better support for utilizing MT_ vars to fill defaults when -target, -testpatt, etc. are not specified
check-in: 74a0e98868 user: mrwellan tags: v1.65
(archive-path (conc bdisk-path "/" archive-name))
(block-id (rmt:archive-register-block-name bdisk-id archive-path)))
;; (allocation-id (rmt:archive-allocate-testsuite/area-to-block block-id testsuite-name area-key)))
(if block-id ;; (and block-id allocation-id)
(let ((res (cons block-id archive-path)))
(hash-table-set! blockid-cache key res)
res)
(begin
(debug:print 0 *default-log-port* "WARNING: no disk found for " target ", " run-name ", " test-name ", archive-path=" archive-path)
#f))
#f)) ;; no best disk found
#f)))
(begin
(debug:print 0 *default-log-port* "WARNING: no disk found for " target ", " run-name ", " test-name ", block-id=" block-id)
#f)))))) ;; no best disk found
)))
;; archive - run bup
;;
;; 1. create the bup dir if not exists
;; 2. start the du of each directory
;; 3. gen index
;; 4. save
(mutex-unlock! rp-mutex)
(new-test-physical-path (conc best-disk "/" test-partial-path))
(archive-block-id (db:test-get-archived test-dat))
(archive-block-info (rmt:test-get-archive-block-info archive-block-id))
(archive-path (if (vector? archive-block-info)
(vector-ref archive-block-info 2) ;; look in db.scm for test-get-archive-block-info for the vector record info
#f)) ;; no archive found?
(archive-internal-path (conc (common:get-testsuite-name) "-" run-id "/latest/" test-partial-path))) (archive-internal-path (conc (common:get-testsuite-name) "-" run-id "/latest/" test-partial-path))
(include-paths (args:get-arg "-include"))
(exclude-pattern (args:get-arg "-exclude-rx"))
(exclude-file (args:get-arg "-exclude-rx-from")))
;; some sanity checks, move an existing path out of the way - iif it is not a toplevel with children
;;
(if (and (not toplevel/children) ;; special handling needed for toplevel with children
prev-test-physical-path
(common:file-exists? prev-test-physical-path)) ;; what to do? abort or clean up or link it in?
(let* ((base (pathname-directory prev-test-physical-path))
;; use bash to expand a glob. Does NOT handle paths with spaces!
;;
(define (common:bash-glob instr)
(string-split
(with-input-from-pipe
(conc "/bin/bash -c \"echo " instr "\"")
read-line)))
read-line)))
;;======================================================================
;; Some safety net stuff
;;======================================================================
;; return input if it is a list or return null
(define (common:list-or-null inlst #!key (ovrd #f)(message #f))
(if (list? inlst)
inlst
(begin
(if message (debug:print-error 0 *default-log-port* message))
(or ovrd '()))))
;;======================================================================
;; T A R G E T S , S T A T E , S T A T U S ,
;; R U N N A M E A N D T E S T P A T T
;;======================================================================
;; (map print (map car (hash-table->alist (read-config "runconfigs.config" #f #t))))
maxload-in
(max maxload-in 0.5))) ;; so maxload must be greater than 0.5 for now BUG - FIXME?
(first (car loadavg))
(next (cadr loadavg))
(adjload (* maxload (max 1 numcpus))) ;; possible bug where numcpus (or could be maxload) is zero, crude fallback is to at least use 1
(loadjmp (- first next))
(adjwait (min (+ 300 (random 10)) (abs (* (+ (random 10)(/ (- 1000 count) 10) waitdelay) (- first adjload) )) )));; add some randomness to the time to break any alignment where netbatch dumps many jobs to machines simultaneously
;; let's let the user know once in a long while that load checking is happening but not constantly report it
(if (> (random 100) 75) ;; about 25% of the time
(debug:print-info 1 *default-log-port* "Checking cpuload on " (or remote-host "localhost") ", maxload: " maxload
", load: " first ", adjload: " adjload ", loadjmp: " loadjmp)
(debug:print-info 1 *default-log-port* "Checking cpuload on " (or remote-host "localhost") ", maxload: " maxload
", load: " first ", adjload: " adjload ", loadjmp: " loadjmp))
(cond
((and (> first adjload)
(> count 0))
(debug:print-info 0 *default-log-port* "server start delayed " adjwait " seconds due to load " first " exceeding max of " adjload " on server " (or remote-host (get-host-name)) " (normalized load-limit: " maxload ") " (if msg msg ""))
(thread-sleep! adjwait)
(common:wait-for-cpuload maxload numcpus waitdelay count: (- count 1) msg: msg remote-host: remote-host))
((and (> loadjmp numcpus)
(sqlite3:for-each-row
(lambda (id)
(set! res id))
db
"SELECT id FROM archive_blocks WHERE archive_disk_id=? AND disk_path=?;"
bdisk-id archive-path)
(if res ;; record exists, update du if applicable and return res
(begin(if du (sqlite3:execute db "UPDATE archive_blocks SET last_du=?,last_du_time=(strftime('%s','now'))
(if du (sqlite3:execute db "UPDATE archive_blocks SET last_du=?,last_du_time=(strftime('%s','now'))
WHERE archive_disk_id=? AND disk_path=?;"
bdisk-id archive-path du))
bdisk-id archive-path du))
res) (begin
(sqlite3:execute db "INSERT OR REPLACE INTO archive_blocks (archive_disk_id,disk_path,last_du)
VALUES (?,?,?);"
bdisk-id archive-path (or du 0))
(db:archive-register-block-name dbstruct bdisk-id archive-path du: du)))
(set! res (db:archive-register-block-name dbstruct bdisk-id archive-path du: du))))
(stack-push! (dbr:dbstruct-dbstack dbstruct) dbdat)
res))
;; The "archived" field in tests is overloaded; 0 = not archived, > 0 archived in block with given id
;;
(define (db:test-set-archive-block-id dbstruct run-id test-id archive-block-id)
----------------
Hint: You can browse the archive using bup commands directly.
----------------
bup -d /path/to/bup/archive ftp
----------------
Pass Data from Test to Test
~~~~~~~~~~~~~~~~~~~~~~~~~~~
.To save the data call archive save within your test:
----------------
megatest -archive save
----------------
.To retrieve the data call archive get using patterns as needed
----------------
# Put the retrieved data into /tmp
DESTPATH=/tmp/$USER/$MT_TARGET/$MT_RUN_NAME/$MT_TESTNAME/$MT_ITEMPATH/my_data
mkdir -p $DESTPATH
megatest -archive get -runname % -dest $DESTPATH
----------------
Submit jobs to Host Types based on Test Name
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.In megatest.config
------------------------
[host-types]
<div class="paragraph"><p>Hint: You can browse the archive using bup commands directly.</p></div>
<div class="listingblock">
<div class="content monospaced">
<pre>bup -d /path/to/bup/archive ftp</pre>
</div></div>
</div>
</div>
</div>
<div class="sect2">
<h3 id="_pass_data_from_test_to_test">Pass Data from Test to Test</h3>
<div class="listingblock">
<div class="title">To save the data call archive save within your test:</div>
<div class="content monospaced">
<pre>megatest -archive save</pre>
</div></div>
<div class="listingblock">
<div class="title">To retrieve the data call archive get using patterns as needed</div>
<div class="content monospaced">
<pre># Put the retrieved data into /tmp
DESTPATH=/tmp/$USER/$MT_TARGET/$MT_RUN_NAME/$MT_TESTNAME/$MT_ITEMPATH/my_data
mkdir -p $DESTPATH
megatest -archive get -runname % -dest $DESTPATH</pre>
</div></div>
</div>
<div class="sect2">
<h3 id="_submit_jobs_to_host_types_based_on_test_name">Submit jobs to Host Types based on Test Name</h3>
<div class="listingblock">
<div class="title">In megatest.config</div>
<div class="content monospaced">
<pre>[host-types]
</div></div>
<div class="paragraph"><p>To transfer the environment to the next step you can do the following:</p></div>
<div class="listingblock">
<div class="title">Propagate environment to next step</div>
<div class="content monospaced">
<pre>$MT_MEGATEST -env2file .ezsteps/${stepname}</pre>
</div></div>
</div>
<div class="sect2">
<h3 id="_scripts">Scripts</h3>
<div class="listingblock">
<div class="title">Specifying scripts inline (best used for only simple scripts)</div>
<div class="content monospaced">
<pre>[scripts]
loaddb #!/bin/bash
sqlite3 $1 <<EOF
.mode tabs
.import $2 data
.q
EOF</pre>
</div></div>
<div class="paragraph"><p>The above snippet results in the creation of an executable script
called "loaddb" in the test directory. NOTE: every line in the script
must be prefixed with the exact same number of spaces. Lines beginning
with a # will not work as expected. Currently you cannot indent
intermediate lines.</p></div>
<div class="listingblock">
<div class="title">Full example with ezsteps, logpro rules, scripts etc.</div>
<div class="content monospaced">
<pre># You can include a common file
#
[include #{getenv MT_RUN_AREA_HOME}/global-testconfig.inc]
--------------
Complex mapping example
~~~~~~~~~~~~~~~~~~~~~~~
// image::itemmap.png[]
image::complex-itemmap.png[]
We accomplish this by configuring the testconfigs of our tests C D and E as follows:
.Testconfig for Test E has
----------------------
[requirements]
waiton C
. Test A has no waitons. All waitons of all tests in full list have been processed. Full list is finalized.
itemstable
~~~~~~~~~~
An alternative to defining items is the itemstable section. This lets you define the itempath in a table format rather than specifying components and relying on getting all permutations of those components.
Dynamic Flow Dependency Tree
~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.Autogeneration waiton list for dynamic flow dependency trees
-------------------
[requirements]
To transfer the environment to the next step you can do the following:
.Propagate environment to next step
----------------------------
$MT_MEGATEST -env2file .ezsteps/${stepname}
----------------------------
Scripts
~~~~~~~
.Specifying scripts inline (best used for only simple scripts)
----------------------------
[scripts]
loaddb #!/bin/bash
sqlite3 $1 <<EOF
.mode tabs
.import $2 data
.q
EOF
----------------------------
The above snippet results in the creation of an executable script
called "loaddb" in the test directory. NOTE: every line in the script
must be prefixed with the exact same number of spaces. Lines beginning
with a # will not work as expected. Currently you cannot indent
intermediate lines.
.Full example with ezsteps, logpro rules, scripts etc.
-----------------
# You can include a common file
#
[include #{getenv MT_RUN_AREA_HOME}/global-testconfig.inc]
# Use "var" for a scratch pad
(print "[fields]")
(map (lambda (k)(print k " TEXT")) keys)
(print "")
(print "[setup]")
(print "# Adjust max_concurrent_jobs to limit how much you load your machines")
(print "max_concurrent_jobs 50\n")
(print "# This is your link path. Avoid moving it once set.")
(print "linktree " (common:real-path lntree))
(print "linktree " lntree) ;; (common:real-path lntree))
(print "\n# Job tools are more advanced ways to control how your jobs are launched")
(print "[jobtools]\nuseshell yes\nlauncher nbfake\nmaxload 1.5\n")
(print "# You can override environment variables for all your tests here")
(print "[env-override]\nEXAMPLE_VAR example value\n")
(print "# As you run more tests you may need to add additional disks, the names are arbitrary but must be unique")
(print "[disks]\ndisk0 " (common:real-path firstd))))
(print "[disks]\ndisk0 " firstd))) ;; (common:real-path firstd))))
(print
"==================
I'm now creating a runconfigs.config file for you with a default section.
You can use this file to set variables for your tests based on the \"target\" (the combination
of keys).
(print "# Override settings in ../runconfigs.config for user " (current-user-name) " here.")))
;; Now create a test and logpro file
(print
"==================
You now have the basic common files for your megatest setup. Next run
\"megatest -gen-test\" to create a test.
\"megatest -create-test <testname>\" to create a test.
Thank you for using Megatest.
You can edit your config files and create tests in the " path " directory
")))
(set! toppath *toppath*)
(if (not *toppath*)
(begin
(debug:print-error 0 *default-log-port* "you are not in a megatest area!")
(exit 1)))
(setenv "MT_RUN_AREA_HOME" *toppath*)
;; the seed read is done, now read runconfigs, cache it then read megatest.config one more time and cache it
(let* ((keys (rmt:get-keys))
(let* ((keys (common:list-or-null (rmt:get-keys)
message: "Failed to retrieve keys in launch.scm. Please report this to the developers."))
(key-vals (keys:target->keyval keys target))
(linktree (common:get-linktree)) ;; (or (getenv "MT_LINKTREE")(if *configdat* (configf:lookup *configdat* "setup" "linktree") #f)))
; (if *configdat*
; (configf:lookup *configdat* "setup" "linktree")
; (conc *toppath* "/lt"))))
(second-pass (find-and-read-config
mtconfig
-refdb2dat refdb : convert refdb to sexp or to format specified by s-dumpmode
formats: perl, ruby, sqlite3, csv (for csv the -o param
will substitute %s for the sheet name in generating
multiple sheets)
-o : output file for refdb2dat (defaults to stdout)
-archive cmd : archive runs specified by selectors to one of disks specified
in the [archive-disks] section.
cmd: keep-html, restore, save, save-remove
cmd: keep-html, restore, save, save-remove, get (use
-dest to set destination), -include path1,path2... to get or save specific files
-generate-html : create a simple html dashboard for browsing your runs
-generate-html-structure : create a top level html veiw to list targets/runs and a Run view within each run directory.
-list-run-time : list time requered to complete runs. It supports following switches
-run-patt <patt> -target-patt <patt> -dumpmode <csv,json,plain-text>
-list-test-time : list time requered to complete each test in a run. It following following arguments
-runname <patt> -target <patt> -dumpmode <csv,json,plain-text>
;;======================================================================
;; Remove old run(s)
;;======================================================================
;; since several actions can be specified on the command line the removal
;; is done first
(define (operate-on action #!key (mode #f)) ;; #f is "use default"
(define (operate-on action #!key (mode #f)(target-in #f)(runname-in #f)(keys-in #f)(keyvals-in #f)) ;; #f is "use default"
(let* ((runrec (runs:runrec-make-record))
(target (or target-in (common:args-get-target))) ;; eventually get rid of the call to common:args-get-target
(runname (or runname-in
(args:get-arg "-runname"))) ;; eventually get rid of the get-arg calls
(testpatt (or (args:get-arg "-testpatt")
(and (eq? action 'archive) ;; if it is an archive command fallback to MT_TEST_NAME and MT_ITEMPATH
(common:get-full-test-name))
(and (eq? action 'kill-runs)
"%/%") ;; I'm just guessing that this is correct :(
(target (common:args-get-target)))
(common:args-get-testpatt #f) ;; (args:get-arg "-testpatt")))
))) ;;
(cond
((not target)
(debug:print-error 0 *default-log-port* "Missing required parameter for " action ", you must specify -target or -reqtarg") (debug:print-error 0 *default-log-port* "Missing required parameter for "
action ", you must specify -target or -reqtarg")
(exit 1))
((not (or (args:get-arg ":runname") (args:get-arg "-runname")))
(debug:print-error 0 *default-log-port* "Missing required parameter for " action ", you must specify the run name pattern with -runname patt") ((not runname)
(debug:print-error 0 *default-log-port* "Missing required parameter for "
action ", you must specify the run name pattern with -runname patt")
(exit 2))
((not (or (args:get-arg "-testpatt") (eq? action 'kill-runs)))
(debug:print-error 0 *default-log-port* "Missing required parameter for " action ", you must specify the test pattern with -testpatt") ((not testpatt)
(debug:print-error 0 *default-log-port* "Missing required parameter for "
action ", you must specify the test pattern with -testpatt")
(exit 3))
(else
(if (not (car *configinfo*))
(begin
(debug:print-error 0 *default-log-port* "Attempted " action "on test(s) but run area config file not found")
(exit 1))
;; put test parameters into convenient variables
(begin
;; check for correct version, exit with message if not correct
(common:exit-on-version-changed)
(runs:operate-on action
target
(common:args-get-runname) ;; (or (args:get-arg "-runname")(args:get-arg ":runname"))(common:args-get-testpatt #f) ;; (args:get-arg "-testpatt")
state: (common:args-get-state)
runname
testpatt
state: (common:args-get-state)
status: (common:args-get-status)
new-state-status: (args:get-arg "-set-state-status")
mode: mode)))
(set! *didsomething* #t)))))
(if (args:get-arg "-kill-runs")
(general-run-call
(define (runs:can-run-more-tests runsdat run-id jobgroup max-concurrent-jobs)
;; Take advantage of a good place to exit if running the one-pass methodology
(if (and (> (runs:dat-can-run-more-tests-count runsdat) 20)
(args:get-arg "-one-pass"))
(exit 0))
(thread-sleep! (cond ;; BB: check with Matt. Should this sleep move to cond clauses below where we determine we have too many jobs running rather than each time the and condition above is true (which seems like always)?
((> (runs:dat-can-run-more-tests-count runsdat) 20)
(thread-sleep! (cond ;; BB: check with Matt. Should this sleep move
;; to cond clauses below where we determine we
;; have too many jobs running rather than each
;; time the and condition above is true (which
;; seems like always)?
((> (runs:dat-can-run-more-tests-count runsdat) 20) ;; original intent was - save cycles, wait a long time
(if (runs:lownoise "waiting on tasks" 60)(debug:print-info 2 *default-log-port* "waiting for tasks to complete, sleeping briefly ..."))
(configf:lookup-number *configdat* "setup" "inter-test-delay" default: 0.1) ;; was 2);; obviously haven't had any work to do for a while
(else 0))) 10) ;; obviously haven't had any work to do for a while
(else
;; if have a number for inter-test-delay, use it, else don't delay much, maybe even zero?
(configf:lookup-number *configdat* "setup" "inter-test-delay" default: 0.01))))
(let* ((num-running (rmt:get-count-tests-running run-id))
(num-running-in-jobgroup (rmt:get-count-tests-running-in-jobgroup run-id jobgroup))
(job-group-limit (let ((jobg-count (configf:lookup *configdat* "jobgroups" jobgroup)))
(if (string? jobg-count)
(string->number jobg-count)
jobg-count))))
(header (vector-ref rundat 0))
(runs (vector-ref rundat 1))
(states (if state (string-split state ",") '()))
(statuses (if status (string-split status ",") '()))
(state-status (if (string? new-state-status) (string-split new-state-status ",") '(#f #f)))
(rp-mutex (make-mutex))
(bup-mutex (make-mutex))
(keep-records (args:get-arg "-keep-records"))) ;; used in conjunction with -remove-runs to keep the records, TODO: consolidate this with "mode".
(keep-records (args:get-arg "-keep-records")) ;; used in conjunction with -remove-runs to keep the records, TODO: consolidate this with "mode".
(test-records '())) ;; for tasks that we wish to operate on all tests in one fell swoop
(let* ((write-access-actions '(remove-runs set-state-status archive run-wait kill-runs))
(dbfile (conc *toppath* "/megatest.db"))
(readonly-mode (not (file-write-access? dbfile))))
(when (and readonly-mode
(member action write-access-actions))
(debug:print-error 0 *default-log-port* "megatest.db is readonly. Cannot proceed with action ["action"] in which write-access isrequired .")
((print-run)
(debug:print 1 *default-log-port* "Printing info for run " runkey ", run=" run ", tests=" tests ", header=" header)
action)
((run-wait)
(debug:print 1 *default-log-port* "Waiting for run " runkey ", run=" runnamepatt " to complete"))
((archive)
(debug:print 1 *default-log-port* "Archiving/restoring (" (args:get-arg "-archive") ") data for run: " runkey " " (db:get-value-by-header run header "runname"))
(let ((op (string->symbol (args:get-arg "-archive"))))
(set! worker-thread
(make-thread
(lambda ()
(case (string->symbol (args:get-arg "-archive")) ((save save-remove keep-html)
(archive:run-bup (args:get-arg "-archive") run-id run-name tests rp-mutex bup-mutex))
((restore)
(archive:bup-restore (args:get-arg "-archive") run-id run-name tests rp-mutex bup-mutex))
(else
(debug:print-error 0 *default-log-port* "unrecognised sub command to -archive. Run \"megatest\" to see help")
(exit))))
"archive-bup-thread"))
(thread-start! worker-thread))
(set! worker-thread
(make-thread
(lambda ()
(case op
((save save-remove keep-html)
(archive:run-bup op run-id run-name tests rp-mutex bup-mutex))
((restore)
(archive:bup-restore op run-id run-name tests rp-mutex bup-mutex))
((get) ;;; NOTE: This is a special case. We wish to operate on ALL tests in one go
(set! test-records (append tests test-records)))
(else
(debug:print-error 0 *default-log-port* "unrecognised sub command " op " for -archive. Run \"megatest\" to see help")
(exit))))
"archive-bup-thread"))
(thread-start! worker-thread)
(if (eq? op 'get)
(thread-join! worker-thread)) ;; we need the test-records set to not overlap
))
(else
(debug:print-info 0 *default-log-port* "action not recognised " action)))
;; actions that operate on one test at a time can be handled below
;;
(let ((sorted-tests (filter
vector?
(debug:print-info 0 *default-log-port* "Estimating disk space usage for " test-fulln ": " (common:get-disk-space-used ddir)))))))
(if (not (null? tal))
(loop (car tal)(cdr tal))))
)))
)
(if worker-thread (thread-join! worker-thread)))
(common:join-backgrounded-threads))))
;; remove the run if zero tests remain
(if (eq? action 'remove-runs)
(let* ((run-id (db:get-value-by-header run header "id")) ;; NB// masks run-id from above?
(remtests (mt:get-tests-for-run run-id #f '("DELETED") '("n/a") not-in: #t)))
(if (null? remtests) ;; no more tests remaining
;; Remove the last dir from the path.
;; And same for the link-resolved path
;;======================================================================
;; Routines for manipulating runs
;;======================================================================
;; Since many calls to a run require pretty much the same setup
;; this wrapper is used to reduce the replication of code
(define (general-run-call switchname action-desc proc)
(let ((runname (or (args:get-arg "-runname")(args:get-arg ":runname")))
(let ((runname (common:args-get-runname))
(target (common:args-get-target)))
(cond
((not target)
(debug:print-error 0 *default-log-port* "Missing required parameter for " switchname ", you must specify the target with -target")
(exit 3))
((not runname)
(debug:print-error 0 *default-log-port* "Missing required parameter for " switchname ", you must specify the run name with -runname runname")