Index: apimod.scm ================================================================== --- apimod.scm +++ apimod.scm @@ -1585,23 +1585,10 @@ (let* ((header (append keys remfields)) (keystr (conc (keys->keystr keys) "," (string-intersperse remfields ",")))) (list keystr header))) -;; make a query (fieldname like 'patt1' OR fieldname -(define (db:patt->like fieldname pattstr #!key (comparator " OR ")) - (let ((patts (if (string? pattstr) - (string-split pattstr ",") - '("%")))) - (string-intersperse (map (lambda (patt) - (let ((wildtype (if (substring-index "%" patt) "LIKE" "GLOB"))) - (conc fieldname " " wildtype " '" patt "'"))) - (if (null? patts) - '("") - patts)) - comparator))) - ;; register a test run with the db, this accesses the main.db and does NOT ;; use server api ;; (define (db:register-run dbstruct keyvals runname state status user contour-in) @@ -1634,53 +1621,10 @@ res))) (begin (debug:print-error 0 *default-log-port* "Called without all necessary keys") #f)))) -;; replace header and keystr with a call to runs:get-std-run-fields -;; -;; keypatts: ( (KEY1 "abc%def")(KEY2 "%") ) -;; runpatts: patt1,patt2 ... -;; -(define (db:get-runs dbstruct runpatt count offset keypatts) - (let* ((res '()) - (keys (db:get-keys dbstruct)) - (runpattstr (db:patt->like "runname" runpatt)) - (remfields (list "id" "runname" "state" "status" "owner" "event_time")) - (header (append keys remfields)) - (keystr (conc (keys->keystr keys) "," - (string-intersperse remfields ","))) - (qrystr (conc "SELECT " keystr " FROM runs WHERE (" runpattstr ") " ;; runname LIKE ? " - ;; Generate: " AND x LIKE 'keypatt' ..." - (if (null? keypatts) "" - (conc " AND " - (string-join - (map (lambda (keypatt) - (let ((key (car keypatt)) - (patt (cadr keypatt))) - (db:patt->like key patt))) - keypatts) - " AND "))) - " AND state != 'deleted' ORDER BY event_time DESC " - (if (number? count) - (conc " LIMIT " count) - "") - (if (number? offset) - (conc " OFFSET " offset) - "")))) - (debug:print-info 11 *default-log-port* "db:get-runs START qrystr: " qrystr " keypatts: " keypatts " offset: " offset " limit: " count) - (db:with-db dbstruct #f #f - (lambda (db) - (sqlite3:for-each-row - (lambda (a . x) - (set! res (cons (apply vector a x) res))) - db - qrystr - ))) - (debug:print-info 11 *default-log-port* "db:get-runs END qrystr: " qrystr " keypatts: " keypatts " offset: " offset " limit: " count) - (vector header res))) - (define-record simple-run target id runname state status owner event_time) (define-record-printer (simple-run x out) (fprintf out "#,(simple-run ~S ~S ~S ~S)" (simple-run-target x) (simple-run-id x) (simple-run-runname x) (time->string (seconds->local-time (simple-run-event_time x) )))) @@ -2703,29 +2647,10 @@ db (conc "SELECT " db:test-record-qry-selector " FROM tests WHERE id=?;") test-id) res)))) -;; Use db:test-get* to access -;; Get test data using test_ids. NB// Only works within a single run!! -;; -(define (db:get-test-info-by-ids dbstruct run-id test-ids) - (db:with-db - dbstruct - run-id - #f - (lambda (db) - (let ((res '())) - (sqlite3:for-each-row - (lambda (a . b) - ;; 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 - (set! res (cons (apply vector a b) res))) - db - (conc "SELECT " db:test-record-qry-selector " FROM tests WHERE id in (" - (string-intersperse (map conc test-ids) ",") ");")) - res)))) - (define (db:get-test-info dbstruct run-id test-name item-path) (db:with-db dbstruct run-id #f Index: archive.scm ================================================================== --- archive.scm +++ archive.scm @@ -28,367 +28,5 @@ (import commonmod) (include "common_records.scm") (include "db_records.scm") -;;====================================================================== -;; -;;====================================================================== - -;; NOT CURRENTLY USED -;; -(define (archive:main linktree target runname testname itempath options) - (let ((testdir (conc linktree "/" target "/" runname "/" testname "/" itempatt)) - (flavor 'plain) ;; type of machine to run jobs on - (maxload 1.5) ;; max allowed load for this work - (adisks (archive:get-archive-disks))) - ;; get testdir size - ;; - hand off du to job mgr - (if (and (common:file-exists? testdir) - (file-is-writable? testdir)) - (let* ((dused (jobrunner:run-job - flavor ;; machine type - maxload ;; max allowed load - '() ;; prevars - environment vars to set for the job - common:get-disk-space-used ;; if a proc call it, if a string it is a unix command - (list testdir))) - (apath (archive:get-archive testname itempath dused))) - (jobrunner:run-job - flavor - maxload - '() - archive:run-bup - (list testdir apath)))))) - -;; Get archive disks from megatest.config -;; -(define (archive:get-archive-disks) - (let ((section (configf:get-section *configdat* "archive-disks"))) - (if section - section - '()))) - -;; look for the best candidate archive area, else create new -;; area -;; -(define (archive:get-archive testname itempath dused) - ;; look up in archive_allocations if there is a pre-used archive - ;; with adequate diskspace - ;; - (let* ((existing-blocks (rmt:archive-get-allocations testname itempath dused)) - (candidate-disks (map (lambda (block) - (list - (vector-ref block 1) ;; archive-area-name - (vector-ref block 2))) ;; disk-path - existing-blocks))) - (or (common:get-disk-with-most-free-space candidate-disks dused) - (archive:allocate-new-archive-block #f #f #f)))) ;; BROKEN. testname itempath)))) - -;; allocate a new archive area -;; -(define (archive:allocate-new-archive-block blockid-cache run-area-home testsuite-name dneeded target run-name test-name) - (let ((key (conc testsuite-name "/" target "/" run-name "/" test-name))) - (if (hash-table-exists? blockid-cache key) - (hash-table-ref blockid-cache key) - (let* ((pscript (configf:lookup *configdat* "archive" "pathscript")) - (pscript-cmd (conc pscript " " testsuite-name " " target " " run-name " " test-name)) - (apath (if pscript - (handle-exceptions - exn - (begin - (debug:print 0 "ERROR: script \"" pscript-cmd "\" failed to run properly.") - (exit 1)) - (with-input-from-pipe - pscript-cmd - read-line)) - #f)) ;; this is the user-calculated archive path - (adisks (archive:get-archive-disks)) - (best-disk (common:get-disk-with-most-free-space adisks dneeded))) - (if best-disk - (let* ((bdisk-name (car best-disk)) - (bdisk-path (cdr best-disk)) - (area-key (substring (message-digest-string (md5-primitive) run-area-home) 0 5)) - (bdisk-id (rmt:archive-register-disk bdisk-name bdisk-path (get-df bdisk-path))) - (archive-name (if apath - apath - (let ((sec (current-seconds))) - (conc (time->string (seconds->local-time sec) "%Y") - "_q" (seconds->quarter sec) "/" - testsuite-name "_" area-key)))) - (archive-path (conc bdisk-path "/" archive-name)) - (block-id (rmt:archive-register-block-name bdisk-id archive-path))) - ;; (allocation-id (rmt:archive-allocate-testsuite/area-to-block block-id testsuite-name area-key))) - (if block-id ;; (and block-id allocation-id) - (let ((res (cons block-id archive-path))) - (hash-table-set! blockid-cache key res) - res) - #f)) - #f)) ;; no best disk found - ))) - -;; archive - run bup -;; -;; 1. create the bup dir if not exists -;; 2. start the du of each directory -;; 3. gen index -;; 4. save -;; -(define (archive:run-bup archive-command run-id run-name tests rp-mutex bup-mutex) - ;; move the getting of archive space down into the below block so that a single run can - ;; allocate as needed should a disk fill up - ;; - (let* ((blockid-cache (make-hash-table)) - (tsname (common:get-area-name *alldat*)) - (min-space (string->number (or (configf:lookup *configdat* "archive" "minspace") "1000"))) - (arch-groups (make-hash-table)) ;; archive groups, each corrosponds to a bup area - (disk-groups (make-hash-table)) ;; - (test-groups (make-hash-table)) ;; these two (disk and test groups) could be combined nicely - (test-dirs (make-hash-table)) - (bup-exe (or (configf:lookup *configdat* "archive" "bup") "bup")) - (compress (or (configf:lookup *configdat* "archive" "compress") "9")) - (linktree (common:get-linktree)) ;; (configf:lookup *configdat* "setup" "linktree"))) - (archiver (let ((s (configf:lookup *configdat* "archive" "archiver"))) - (if s (string->symbol s) 'bup))) - (archiver-cmd (case archiver - ((tar) "tar cfj ARCHIVE_NAME.tar.bz2 ") - ((7z) " 7z u -t7z -m0=lzma -mx=9 -mfb=64 -md=32m -ms=on ARCHIVE_NAME.7z ") - (else #f))) - (print-prefix "Running: ") ;; change to #f to turn off printing - (preclean-spec (configf:get-section *configdat* "archive-preclean"))) - - ;; (tests:match patt testname itempath) - - ;; from the test info bin the path to the test by stem - ;; - (for-each - (lambda (test-dat) - (let* ((item-path (db:test-get-item-path test-dat)) - (test-name (db:test-get-testname test-dat)) - (test-id (db:test-get-id test-dat)) - (run-id (db:test-get-run_id test-dat)) - (target (string-intersperse (map cadr (rmt:get-key-val-pairs run-id)) "/")) - - (toplevel/children (and (db:test-get-is-toplevel test-dat) - (> (rmt:test-toplevel-num-items run-id test-name) 0))) - (test-partial-path (conc target "/" run-name "/" (db:test-make-full-name test-name item-path))) - ;; note the trailing slash to get the dir inspite of it being a link - (test-path (conc linktree "/" test-partial-path)) - (mutex-lock! rp-mutex) - (test-physical-path (if (common:file-exists? test-path) - (common:real-path test-path) - #f)) - (mutex-unlock! rp-mutex) - (partial-path-index (if test-physical-path (substring-index test-partial-path test-physical-path) #f)) - (test-base (if (and partial-path-index - test-physical-path ) - (substring test-physical-path - 0 - partial-path-index) - #f)) - ;; we need our archive dir checked for every test to enable folks who want to store other ways. - (archive-info (archive:allocate-new-archive-block blockid-cache *toppath* tsname min-space target run-name test-name)) - (archive-dir (if archive-info (cdr archive-info) #f)) - (archive-id (if archive-info (car archive-info) -1)) - - ) - - (if (not archive-dir) ;; no archive disk found, this is fatal - (begin - (debug:print 0 *default-log-port* "FATAL: No archive disks found. Please add disks with at least " - min-space " MB space to the [archive-disks] section of megatest.config") - (debug:print 0 *default-log-port* " use [archive] minspace to specify minimum available space") - (debug:print 0 *default-log-port* " disks: " - (string-intersperse (map cadr (archive:get-archive-disks)) "\n ")) - (exit 1)) - (debug:print-info 0 *default-log-port* "Using path " archive-dir " for archiving test " test-path)) - - ;; preclean the test directory per the spec if provided - (if (not (null? preclean-spec)) ;; we've been asked to preclean before archiving - (let loop ((spec (car preclean-spec)) - (tail (cdr preclean-spec))) - (if (> (length spec) 1) - (let ((testspec (car spec)) - (rules (cadr spec))) - (if (tests:match testspec test-name item-path) - (begin - (debug:print 0 *default-log-port* "INFO: cleanup requested for " test-physical-path) - (common:dir-clean-up test-physical-path rules remove-empty: #t)) - (if (not (null? tail)) - (loop (car tail)(cdr tail))))) - (begin - (debug:print 0 *default-log-port* "ERROR: bad spec line in [archive-preclean] section. \"" spec "\"") - (if (not (null? tail))(loop (car tail)(cdr tail))))))) - (cond - (toplevel/children - (debug:print 0 *default-log-port* "WARNING: cannot archive " test-name " with id " test-id - " as it is a toplevel test with children")) - ((not (common:file-exists? test-path)) - (debug:print 0 *default-log-port* "WARNING: Cannot archive " test-name "/" item-path - " as path " test-path " does not exist")) - (else - (debug:print 0 *default-log-port* - "From test-dat=" test-dat " derived the following:\n" - "test-partial-path = " test-partial-path "\n" - "test-path = " test-path "\n" - "test-physical-path = " test-physical-path "\n" - "partial-path-index = " partial-path-index "\n" - "test-base = " test-base) - (hash-table-set! disk-groups test-base - (cons test-physical-path (hash-table-ref/default disk-groups test-base '()))) - (hash-table-set! test-groups test-base - (cons test-dat (hash-table-ref/default test-groups test-base '()))) - (hash-table-set! arch-groups test-base - (cons archive-info (hash-table-ref/default arch-groups test-base '()))) - (hash-table-set! test-dirs test-id test-path))))) - ;; test-path)))) - tests) - (debug:print 0 *default-log-port* "INFO: DISK GROUPS=" (hash-table->alist disk-groups)) - ;; for each disk-group, initialize the bup area if needed - (for-each - (lambda (test-base) - (let* ((disk-group (hash-table-ref disk-groups test-base)) - (arch-group (hash-table-ref arch-groups test-base)) - (arch-info (car arch-group)) ;; don't know yet how this will work, can I get more than one possibility? - (archive-id (car arch-info)) - (archive-dir (cdr arch-info))) - (debug:print 0 *default-log-port* "Processing disk-group " test-base) - (let* ((test-paths (hash-table-ref disk-groups test-base))) - (if (not (common:file-exists? archive-dir)) - (create-directory archive-dir #t)) - (case archiver - ((bup) ;; Archive using bup - (let* ((bup-init-params (list "-d" archive-dir "init")) - (bup-index-params (append (list "-d" archive-dir "index") test-paths)) - (bup-save-params (append (list "-d" archive-dir "save" ;; (conc "--strip-path=" linktree) - (conc "-" compress) ;; or (conc "--compress=" compress) - "-n" (conc (common:get-area-name *alldat*) "-" run-id) - (conc "--strip-path=" test-base) ;; if we push to the directory do we need this? - ) - test-paths))) - (if (not (common:file-exists? (conc archive-dir "/HEAD"))) - (begin - ;; replace this with jobrunner stuff enventually - (debug:print-info 0 *default-log-port* "Init bup in " archive-dir) - ;; (mutex-lock! bup-mutex) - (run-n-wait bup-exe params: bup-init-params print-cmd: print-prefix) - ;; (mutex-unlock! bup-mutex) - )) - (debug:print-info 0 *default-log-port* "Indexing data to be archived") - ;; (mutex-lock! bup-mutex) - (run-n-wait bup-exe params: bup-index-params print-cmd: print-prefix) - (debug:print-info 0 *default-log-port* "Archiving data with bup") - (run-n-wait bup-exe params: bup-save-params print-cmd: print-prefix))) - ((7z tar) - (for-each - (lambda (test-dat) - (let* ((test-id (db:test-get-id test-dat)) - (test-name (db:test-get-testname test-dat)) - (item-path (db:test-get-item-path test-dat)) - (test-full-name (db:test-make-full-name test-name item-path)) - (run-id (db:test-get-run_id test-dat)) - (target (string-intersperse (map cadr (rmt:get-key-val-pairs run-id)) "/")) - (run-name (rmt:get-run-name-from-id run-id)) - (source-dir (hash-table-ref test-dirs test-id)) ;; (conc test-base "/" test-name "/" item-path)) - (target-dir (string-substitute "/$" "" (conc archive-dir "/" target "/" run-name "/" test-full-name)))) - ;; create the test and item-path levels under archive-dir - (create-directory (pathname-directory target-dir) #t) - (run-n-wait - (conc - (string-substitute "ARCHIVE_NAME" target-dir archiver-cmd) " " - "." - ) - print-cmd: print-prefix - run-dir: source-dir))) - (hash-table-ref test-groups test-base)))) - ;; (mutex-unlock! bup-mutex) - (for-each - (lambda (test-dat) - (let ((test-id (db:test-get-id test-dat)) - (run-id (db:test-get-run_id test-dat))) - (rmt:test-set-archive-block-id run-id test-id archive-id) - (if (member archive-command '("save-remove")) - (runs:remove-test-directory test-dat 'archive-remove)))) - (hash-table-ref test-groups test-base))))) - (hash-table-keys disk-groups)) - #t)) - -(define (archive:bup-restore archive-command run-id run-name tests rp-mutex bup-mutex) ;; move the getting of archive space down into the below block so that a single run can - ;; allocate as needed should a disk fill up - ;; - (let* ((bup-exe (or (configf:lookup *configdat* "archive" "bup") "bup")) - (linktree (common:get-linktree))) ;; (configf:lookup *configdat* "setup" "linktree"))) - - ;; from the test info bin the path to the test by stem - ;; - (for-each - (lambda (test-dat) - ;; When restoring test-dat will initially contain an old and invalid path to the test - (let* ((best-disk (get-best-disk *configdat* #f)) ;; BUG: get the testconfig and use it here. Otherwise data pulled out of archive could end up on the wrong kind of disk. - (item-path (db:test-get-item-path test-dat)) - (test-name (db:test-get-testname test-dat)) - (test-id (db:test-get-id test-dat)) - (run-id (db:test-get-run_id test-dat)) - (keyvals (rmt:get-key-val-pairs run-id)) - (target (string-intersperse (map cadr keyvals) "/")) - - (toplevel/children (and (db:test-get-is-toplevel test-dat) - (> (rmt:test-toplevel-num-items run-id test-name) 0))) - (test-partial-path (conc target "/" run-name "/" (db:test-make-full-name test-name item-path))) - ;; note the trailing slash to get the dir inspite of it being a link - (test-path (conc linktree "/" test-partial-path)) - ;; if the old path was not deleted then prev-test-physical-path will end up pointing to a real directory - (mutex-lock! rp-mutex) - (prev-test-physical-path (if (common:file-exists? test-path) - ;; (read-symbolic-link test-path #t) - (common:real-path test-path) - #f)) - (mutex-unlock! rp-mutex) - (new-test-physical-path (conc best-disk "/" test-partial-path)) - (archive-block-id (db:test-get-archived test-dat)) - (archive-block-info (rmt:test-get-archive-block-info archive-block-id)) - (archive-path (if (vector? archive-block-info) - (vector-ref archive-block-info 2) ;; look in db.scm for test-get-archive-block-info for the vector record info - #f)) ;; no archive found? - (archive-internal-path (conc (common:get-area-name *alldat*) "-" run-id "/latest/" test-partial-path))) - - ;; some sanity checks, move an existing path out of the way - iif it is not a toplevel with children - ;; - (if (and (not toplevel/children) ;; special handling needed for toplevel with children - prev-test-physical-path - (common:file-exists? prev-test-physical-path)) ;; what to do? abort or clean up or link it in? - (let* ((base (pathname-directory prev-test-physical-path)) - (dirn (pathname-file prev-test-physical-path)) - (newn (conc base "/." dirn))) - (debug:print-error 0 *default-log-port* "the old directory " prev-test-physical-path ", still exists! Moving it to " newn) - (rename-file prev-test-physical-path newn))) - - (if (and archive-path ;; no point in proceeding if there is no actual archive - (not toplevel/children)) - (begin - ;; CREATE WORK AREA - ;; test-src-path == #f ==> don't copy in data from tests directory - ;; itemdat == string ==> use directly - (create-work-area run-id run-name keyvals test-id #f best-disk test-name item-path) ;; #!key (remtries 2)) - - ;; 1. Get the block id from the test info - ;; 2. Get the block data given the block id - ;; 3. Construct the paths etc. for the following command: - ;; - ;; bup -d /tmp/matt/adisk1/2015_q1/fullrun_e1a40/ restore -C /tmp/seeme fullrun-30/latest/ubuntu/nfs/none/w02.1.20.54_b/ - - ;; DO BUP RESTORE - (let* ((new-test-dat (rmt:get-test-info-by-id run-id test-id)) - (new-test-path (if (vector? new-test-dat ) - (db:test-get-rundir new-test-dat) - (begin - (debug:print-error 0 *default-log-port* "unable to get data for run-id=" run-id ", test-id=" test-id) - (exit 1)))) - ;; new-test-path won't work - must use best-disk instead? Nope, new-test-path but tack on /.. - (bup-restore-params (list "-d" archive-path "restore" "-C" (conc new-test-path "/..") archive-internal-path))) - (debug:print-info 0 *default-log-port* "Restoring archived data to " new-test-physical-path " from archive in " archive-path " ... " archive-internal-path) - ;; (mutex-lock! bup-mutex) - (run-n-wait bup-exe params: bup-restore-params print-cmd: #f) - ;; (mutex-unlock! bup-mutex) - (mt:test-set-state-status-by-id run-id test-id "COMPLETED" #f #f))) - (debug:print-error 0 *default-log-port* "No archive path in the record for run-id=" run-id " test-id=" test-id)))) - (filter vector? tests)))) - Index: client.scm ================================================================== --- client.scm +++ client.scm @@ -34,83 +34,5 @@ (include "common_records.scm") (include "db_records.scm") -;; client:get-signature -(define (client:get-signature) - (if *my-client-signature* *my-client-signature* - (let ((sig (conc (get-host-name) " " (current-process-id)))) - (set! *my-client-signature* sig) - *my-client-signature*))) - -;; Not currently used! But, I think it *should* be used!!! -(define (client:logout serverdat) - (let ((ok (and (socket? serverdat) - (cdb:logout serverdat *toppath* (client:get-signature))))) - ok)) - -(define (client:connect iface port) - (http:client-connect iface port)) - -(define (client:setup areapath #!key (remaining-tries 100) (failed-connects 0)) - (client:setup-http *alldat* areapath remaining-tries: remaining-tries failed-connects: failed-connects)) - -(set-fn 'client:setup client:setup) - - -;; Do all the connection work, look up the transport type and set up the -;; connection if required. -;; -;; There are two scenarios. -;; 1. We are a test manager and we received *transport-type* and *runremote* via cmdline -;; 2. We are a run tests, list runs or other interactive process and we must figure out -;; *transport-type* and *runremote* from the monitor.db -;; -;; client:setup -;; -;; lookup_server, need to remove *runremote* stuff -> replace with *alldat* for now -;; - -(define (client:setup-http runremote areapath #!key (remaining-tries 100) (failed-connects 0)) ;; (area-dat #f)) - (debug:print-info 2 *default-log-port* "client:setup remaining-tries=" remaining-tries) - (server:start-and-wait areapath) - (if (<= remaining-tries 0) - (begin - (debug:print-error 0 *default-log-port* "failed to start or connect to server") - (exit 1)) - ;; - ;; Alternatively here, we can get the list of candidate servers and work our way - ;; through them searching for a good one. - ;; - (let* ((server-dat (server:get-rand-best areapath))) ;; (server:get-first-best areapath)) - (if (not server-dat) ;; no server found - (client:setup-http runremote areapath remaining-tries: (- remaining-tries 1)) - (let ((host (cadr server-dat)) - (port (caddr server-dat))) - (debug:print-info 4 *default-log-port* "client:setup server-dat=" server-dat ", remaining-tries=" remaining-tries) - (if (and host port) - (let* ((start-res (case *transport-type* - ((http)(http-transport:client-connect host port)))) - (ping-res (case *transport-type* - ((http)(rmt:login-no-auto-client-setup start-res))))) - (if (and start-res - ping-res) - (begin - (alldat-conndat-set! runremote start-res) - (debug:print-info 2 *default-log-port* "connected to " (http-transport:server-dat-make-url start-res)) - start-res) - (begin ;; login failed but have a server record, clean out the record and try again - (debug:print-info 0 *default-log-port* "client:setup, login unsuccessful, will attempt to start server ... start-res=" start-res ", server-dat=" server-dat) ;; had runid. Fixes part of Randy;s ticket 1405717332 - (case *transport-type* - ((http)(http-transport:close-connections))) - (alldat-conndat-set! runremote #f) ;; (hash-table-delete! runremote run-id) - (thread-sleep! 1) - (client:setup-http runremote areapath remaining-tries: (- remaining-tries 1)) - ))) - (begin ;; no server registered - ;; (server:kind-run areapath) - (server:start-and-wait areapath) - (debug:print-info 0 *default-log-port* "client:setup, no server registered, remaining-tries=" remaining-tries) - (thread-sleep! 1) ;; (+ 5 (random (- 20 remaining-tries)))) ;; give server a little time to start up, randomize a little to avoid start storms. - (client:setup-http runremote areapath remaining-tries: (- remaining-tries 1))))))))) - Index: common.scm ================================================================== --- common.scm +++ common.scm @@ -31,121 +31,5 @@ (declare (uses commonmod)) (import commonmod) (include "common_records.scm") -;;====================================================================== -;; D A S H B O A R D U S E R V I E W S -;;====================================================================== - -;; first read ~/views.config if it exists, then read $MTRAH/views.config if it exists -;; -(define (common:load-views-config) - (let* ((view-cfgdat (make-hash-table)) - (home-cfgfile (conc (get-environment-variable "HOME") "/.mtviews.config")) - (mthome-cfgfile (conc *toppath* "/.mtviews.config"))) - (if (common:file-exists? mthome-cfgfile) - (read-config mthome-cfgfile view-cfgdat #t)) - ;; we load the home dir file AFTER the MTRAH file so the user can clobber settings when running the dashboard in read-only areas - (if (common:file-exists? home-cfgfile) - (read-config home-cfgfile view-cfgdat #t)) - view-cfgdat)) - - -;; (require-library margs) -;; (include "margs.scm") - -;; (define old-exit exit) -;; -;; (define (exit . code) -;; (if (null? code) -;; (old-exit) -;; (old-exit code))) - -;;====================================================================== -;; T A R G E T S , S T A T E , S T A T U S , -;; R U N N A M E A N D T E S T P A T T -;;====================================================================== - -;; (map print (map car (hash-table->alist (read-config "runconfigs.config" #f #t)))) -;; -(define (common:get-runconfig-targets #!key (configf #f)) - (let ((targs (sort (map car (hash-table->alist - (or configf ;; NOTE: There is no value in using runconfig:read here. - (read-config (conc *toppath* "/runconfigs.config") - #f #t) - (make-hash-table)))) - stringstring (u8vector->list (if res res (hostname->ip hostname)))) "."))) +;;====================================================================== +;; D A S H B O A R D U S E R V I E W S +;;====================================================================== + +;; first read ~/views.config if it exists, then read $MTRAH/views.config if it exists +;; +(define (common:load-views-config) + (let* ((view-cfgdat (make-hash-table)) + (home-cfgfile (conc (get-environment-variable "HOME") "/.mtviews.config")) + (mthome-cfgfile (conc *toppath* "/.mtviews.config"))) + (if (common:file-exists? mthome-cfgfile) + (read-config mthome-cfgfile view-cfgdat #t)) + ;; we load the home dir file AFTER the MTRAH file so the user can clobber settings when running the dashboard in read-only areas + (if (common:file-exists? home-cfgfile) + (read-config home-cfgfile view-cfgdat #t)) + view-cfgdat)) + + +;; (require-library margs) +;; (include "margs.scm") + +;; (define old-exit exit) +;; +;; (define (exit . code) +;; (if (null? code) +;; (old-exit) +;; (old-exit code))) + +;;====================================================================== +;; T A R G E T S , S T A T E , S T A T U S , +;; R U N N A M E A N D T E S T P A T T +;;====================================================================== + +;; (map print (map car (hash-table->alist (read-config "runconfigs.config" #f #t)))) +;; +(define (common:get-runconfig-targets #!key (configf #f)) + (let ((targs (sort (map car (hash-table->alist + (or configf ;; NOTE: There is no value in using runconfig:read here. + (read-config (conc *toppath* "/runconfigs.config") + #f #t) + (make-hash-table)))) + stringstring (vector success/fail query-sig result))) ;; (send-message pubsock target send-more: #t) + +;; Given a run id start a server process ### NOTE ### > file 2>&1 +;; if the run-id is zero and the target-host is set +;; try running on that host +;; incidental: rotate logs in logs/ dir. +;; +(define (server:run areapath) ;; areapath is *toppath* for a given testsuite area + (let* ((curr-host (get-host-name)) + ;; (attempt-in-progress (server:start-attempted? areapath)) + ;; (dot-server-url (server:check-if-running areapath)) + (curr-ip (server:get-best-guess-address curr-host)) + (curr-pid (current-process-id)) + (homehost (common:get-homehost)) ;; configf:lookup *configdat* "server" "homehost" )) + (target-host (car homehost)) + (testsuite (common:get-area-name *alldat*)) + (logfile (conc areapath "/logs/server.log")) ;; -" curr-pid "-" target-host ".log")) + (cmdln (conc (common:get-megatest-exe) + " -server " (or target-host "-") (if (equal? (configf:lookup *configdat* "server" "daemonize") "yes") + " -daemonize " + "") + ;; " -log " logfile + " -m testsuite:" testsuite)) ;; (conc " >> " logfile " 2>&1 &"))))) + (log-rotate (make-thread common:rotate-logs "server run, rotate logs thread")) + (load-limit (configf:lookup-number *configdat* "jobtools" "max-server-start-load" default: 3.0))) + ;; we want the remote server to start in *toppath* so push there + (push-directory areapath) + (debug:print 0 *default-log-port* "INFO: Trying to start server (" cmdln ") ...") + (thread-start! log-rotate) + + ;; host.domain.tld match host? + (if (and target-host + ;; look at target host, is it host.domain.tld or ip address and does it + ;; match current ip or hostname + (not (string-match (conc "("curr-host "|" curr-host"\\..*)") target-host)) + (not (equal? curr-ip target-host))) + (begin + (debug:print-info 0 *default-log-port* "Starting server on " target-host ", logfile is " logfile) + (setenv "TARGETHOST" target-host))) + + (setenv "TARGETHOST_LOGF" logfile) + (thread-sleep! (/ (random 5000) 1000)) ;; add about a random (up to 5 seconds) initial delay. It seems pretty common that many running tests request a server at the same time + (common:wait-for-normalized-load load-limit " delaying server start due to load" target-host) ;; do not try starting servers on an already overloaded machine, just wait forever + (system (conc "nbfake " cmdln)) + (unsetenv "TARGETHOST_LOGF") + (if (get-environment-variable "TARGETHOST")(unsetenv "TARGETHOST")) + (thread-join! log-rotate) + (pop-directory))) + +;; given a path to a server log return: host port startseconds +;; +(define (server:logf-get-start-info logf) + (let ((rx (regexp "^SERVER STARTED: (\\S+):(\\d+) AT ([\\d\\.]+)"))) ;; SERVER STARTED: host:port AT timesecs + (handle-exceptions + exn + (list #f #f #f) ;; no idea what went wrong, call it a bad server + (with-input-from-file + logf + (lambda () + (let loop ((inl (read-line)) + (lnum 0)) + (if (not (eof-object? inl)) + (let ((mlst (string-match rx inl))) + (if (not mlst) + (if (< lnum 500) ;; give up if more than 500 lines of server log read + (loop (read-line)(+ lnum 1)) + (list #f #f #f)) + (let ((dat (cdr mlst))) + (list (car dat) ;; host + (string->number (cadr dat)) ;; port + (string->number (caddr dat)))))) + (list #f #f #f)))))))) + +;; get a list of servers with all relevant data +;; ( mod-time host port start-time pid ) +;; +(define (server:get-list areapath #!key (limit #f)) + (let ((fname-rx (regexp "^(|.*/)server-(\\d+)-(\\S+).log$")) + (day-seconds (* 24 60 60))) + ;; if the directory exists continue to get the list + ;; otherwise attempt to create the logs dir and then + ;; continue + (if (if (directory-exists? (conc areapath "/logs")) + '() + (if (file-write-access? areapath) + (begin + (condition-case + (create-directory (conc areapath "/logs") #t) + (exn (i/o file)(debug:print 0 *default-log-port* "ERROR: Cannot create directory at " (conc areapath "/logs"))) + (exn ()(debug:print 0 *default-log-port* "ERROR: Unknown error attemtping to get server list."))) + (directory-exists? (conc areapath "/logs"))) + '())) + (let* ((server-logs (glob (conc areapath "/logs/server-*.log"))) + (num-serv-logs (length server-logs))) + (if (null? server-logs) + '() + (let loop ((hed (car server-logs)) + (tal (cdr server-logs)) + (res '())) + (let* ((mod-time (handle-exceptions + exn + (current-seconds) ;; 0 + (file-modification-time hed))) ;; default to *very* old so log gets ignored if deleted + (down-time (- (current-seconds) mod-time)) + (serv-dat (if (or (< num-serv-logs 10) + (< down-time 900)) ;; day-seconds)) + (server:logf-get-start-info hed) + '())) ;; don't waste time processing server files not touched in the 15 minutes if there are more than ten servers to look at + (serv-rec (cons mod-time serv-dat)) + (fmatch (string-match fname-rx hed)) + (pid (if fmatch (string->number (list-ref fmatch 2)) #f)) + (new-res (if (null? serv-dat) + res + (cons (append serv-rec (list pid)) res)))) + (if (null? tal) + (if (and limit + (> (length new-res) limit)) + new-res ;; (take new-res limit) <= need intelligent sorting before this will work + new-res) + (loop (car tal)(cdr tal) new-res))))))))) + +(define (server:get-num-alive srvlst) + (let ((num-alive 0)) + (for-each + (lambda (server) + (match-let (((mod-time host port start-time pid) + server)) + (let* ((uptime (- (current-seconds) mod-time)) + (runtime (if start-time + (- mod-time start-time) + 0))) + (if (< uptime 5)(set! num-alive (+ num-alive 1)))))) + srvlst) + num-alive)) + +;; given a list of servers get a list of valid servers, i.e. at least +;; 10 seconds old, has started and is less than 1 hour old and is +;; active (i.e. mod-time < 10 seconds +;; +;; mod-time host port start-time pid +;; +;; sort by start-time descending. I.e. get the oldest first. Young servers will thus drop off +;; and servers should stick around for about two hours or so. +;; +(define (server:get-best srvlst) + (let* ((nums (server:get-num-servers)) + (now (current-seconds)) + (slst (sort + (filter (lambda (rec) + (if (and (list? rec) + (> (length rec) 2)) + (let ((start-time (list-ref rec 3)) + (mod-time (list-ref rec 0))) + ;; (print "start-time: " start-time " mod-time: " mod-time) + (and start-time mod-time + (> (- now start-time) 0) ;; been running at least 0 seconds + (< (- now mod-time) 16) ;; still alive - file touched in last 16 seconds + (< (- now start-time) + (+ (- (string->number (or (configf:lookup *configdat* "server" "runtime") "3600")) + 180) + (random 360))) ;; under one hour running time +/- 180 + )) + #f)) + srvlst) + (lambda (a b) + (< (list-ref a 3) + (list-ref b 3)))))) + (if (> (length slst) nums) + (take slst nums) + slst))) + +(define (server:get-first-best areapath) + (let ((srvrs (server:get-best (server:get-list areapath)))) + (if (and srvrs + (not (null? srvrs))) + (car srvrs) + #f))) + +(define (server:get-rand-best areapath) + (let ((srvrs (server:get-best (server:get-list areapath)))) + (if (and (list? srvrs) + (not (null? srvrs))) + (let* ((len (length srvrs)) + (idx (random len))) + (list-ref srvrs idx)) + #f))) + + +(define (server:record->url servr) + (match-let (((mod-time host port start-time pid) + servr)) + (if (and host port) + (conc host ":" port) + #f))) + +(define (server:get-client-signature) ;; BB> why is this proc named "get-"? it returns nothing -- set! has not return value. + (if *my-client-signature* *my-client-signature* + (let ((sig (server:mk-signature))) + (set! *my-client-signature* sig) + *my-client-signature*))) + +;; kind start up of servers, wait 40 seconds before allowing another server for a given +;; run-id to be launched +(define (server:kind-run areapath) + (if (not (server:check-if-running areapath)) ;; why try if there is already a server running? + (let* ((last-run-dat (hash-table-ref/default *server-kind-run* areapath '(0 0))) ;; callnum, whenrun + (call-num (car last-run-dat)) + (when-run (cadr last-run-dat)) + (run-delay (+ (case call-num + ((0) 0) + ((1) 20) + ((2) 300) + (else 600)) + (random 5))) ;; add a small random number just in case a lot of jobs hit the work hosts simultaneously + (lock-file (conc areapath "/logs/server-start.lock"))) + (if (> (- (current-seconds) when-run) run-delay) + (begin + (common:simple-file-lock-and-wait lock-file expire-time: 15) + (server:run areapath) + (thread-sleep! 2) ;; don't release the lock for at least a few seconds + (common:simple-file-release-lock lock-file))) + (hash-table-set! *server-kind-run* areapath (list (+ call-num 1)(current-seconds)))))) + +(define (server:start-and-wait areapath #!key (timeout 60)) + (let ((give-up-time (+ (current-seconds) timeout))) + (let loop ((server-url (server:check-if-running areapath)) + (try-num 0)) + (if (or server-url + (> (current-seconds) give-up-time)) ;; server-url will be #f if no server available. + server-url + (let ((num-ok (length (server:get-best (server:get-list areapath))))) + (if (and (> try-num 0) ;; first time through simply wait a little while then try again + (< num-ok 1)) ;; if there are no decent candidates for servers then try starting a new one + (server:kind-run areapath)) + (thread-sleep! 5) + (loop (server:check-if-running areapath) + (+ try-num 1))))))) + +(define server:try-running server:run) ;; there is no more per-run servers ;; REMOVE ME. BUG. + +(define (server:get-num-servers #!key (numservers 2)) + (let ((ns (string->number + (or (configf:lookup *configdat* "server" "numservers") "notanumber")))) + (or ns numservers))) + +;; no longer care if multiple servers are started by accident. older servers will drop off in time. +;; +(define (server:check-if-running areapath) ;; #!key (numservers "2")) + (let* ((ns (server:get-num-servers)) + (servers (server:get-best (server:get-list areapath)))) + ;; (print "servers: " servers " ns: " ns) + (if (or (and servers + (null? servers)) + (not servers) + (and (list? servers) + (< (length servers) (random ns)))) ;; somewhere between 0 and numservers + #f + (let loop ((hed (car servers)) + (tal (cdr servers))) + (let ((res (server:check-server hed))) + (if res + res + (if (null? tal) + #f + (loop (car tal)(cdr tal))))))))) + +;; ping the given server +;; +(define (server:check-server server-record) + (let* ((server-url (server:record->url server-record)) + (res (case *transport-type* + ((http)(server:ping server-url)) + ;; ((nmsg)(nmsg-transport:ping (tasks:hostinfo-get-interface server) + ))) + (if res + server-url + #f))) + +(define (server:kill servr) + (match-let (((mod-time hostname port start-time pid) + servr)) + (tasks:kill-server hostname pid))) + +;; called in megatest.scm, host-port is string hostname:port +;; +;; NOTE: This is NOT called directly from clients as not all transports support a client running +;; in the same process as the server. +;; +(define (server:ping host-port-in #!key (do-exit #f)) + (let ((host:port (if (not host-port-in) ;; use read-dotserver to find + #f ;; (server:check-if-running *toppath*) + ;; (if (number? host-port-in) ;; we were handed a server-id + ;; (let ((srec (tasks:get-server-by-id (db:delay-if-busy (tasks:open-db)) host-port-in))) + ;; ;; (print "srec: " srec " host-port-in: " host-port-in) + ;; (if srec + ;; (conc (vector-ref srec 3) ":" (vector-ref srec 4)) + ;; (conc "no such server-id " host-port-in))) + host-port-in))) ;; ) + (let* ((host-port (if host:port + (let ((slst (string-split host:port ":"))) + (if (eq? (length slst) 2) + (list (car slst)(string->number (cadr slst))) + #f)) + #f))) +;; (toppath (launch:setup))) + ;; (print "host-port=" host-port) + (if (not host-port) + (begin + (if host-port-in + (debug:print 0 *default-log-port* "ERROR: bad host:port")) + (if do-exit (exit 1)) + #f) + (let* ((iface (car host-port)) + (port (cadr host-port)) + (server-dat (http-transport:client-connect iface port)) + (login-res (rmt:login-no-auto-client-setup server-dat))) + (if (and (list? login-res) + (car login-res)) + (begin + ;; (print "LOGIN_OK") + (if do-exit (exit 0)) + #t) + (begin + ;; (print "LOGIN_FAILED") + (if do-exit (exit 1)) + #f))))))) + +;; run ping in separate process, safest way in some cases +;; +(define (server:ping-server ifaceport) + (with-input-from-pipe + (conc (common:get-megatest-exe) " -ping " ifaceport) + (lambda () + (let loop ((inl (read-line)) + (res "NOREPLY")) + (if (eof-object? inl) + (case (string->symbol res) + ((NOREPLY) #f) + ((LOGIN_OK) #t) + (else #f)) + (loop (read-line) inl)))))) + +;; NOT USED (well, ok, was referenced in rpc-transport but otherwise +;; not used). +;; +(define (server:login toppath) + (lambda (toppath) + (set! *db-last-access* (current-seconds)) ;; might not be needed. + (if (equal? *toppath* toppath) + #t + #f))) + +;; timeout is hms string: 1h 5m 3s, default is 1 minute +;; +(define (server:expiration-timeout) + (let ((tmo (configf:lookup *configdat* "server" "timeout"))) + (if (and (string? tmo) + (common:hms-string->seconds tmo)) ;; BUG: hms-string->seconds is broken, if given "10" returns 0. Also, it doesn't belong in this logic unless the string->number is changed below + (* 3600 (string->number tmo)) + 60))) + +;; (define server:sync-lock-token "SERVER_SYNC_LOCK") +;; (define (server:release-sync-lock) +;; (db:no-sync-del! *no-sync-db* server:sync-lock-token)) +;; (define (server:have-sync-lock?) +;; (let* ((have-lock-pair (db:no-sync-get-lock *no-sync-db* server:sync-lock-token)) +;; (have-lock? (car have-lock-pair)) +;; (lock-time (cdr have-lock-pair)) +;; (lock-age (- (current-seconds) lock-time))) +;; (cond +;; (have-lock? #t) +;; ((>lock-age +;; (* 3 (configf:lookup-number *configdat* "server" "minimum-intersync-delay" default: 180))) +;; (server:release-sync-lock) +;; (server:have-sync-lock?)) +;; (else #f)))) + +;; moving this here as it needs access to db and cannot be in common. +;; + +(define (server:get-bruteforce-syncer dbstruct #!key (fork-to-background #f) (persist-until-sync #f)) + (let* ((sqlite-exe (or (get-environment-variable "MT_SQLITE3_EXE"))) ;; defined in cfg.sh + (sync-log (or (args:get-arg "-sync-log") (conc *toppath* "/logs/sync-" (current-process-id) "-" (get-host-name) ".log"))) + (tmp-area (common:get-db-tmp-area *alldat*)) + (tmp-db (conc tmp-area "/megatest.db")) + (staging-file (conc *toppath* "/.megatest.db")) + (mtdbfile (conc *toppath* "/megatest.db")) + (lockfile (common:get-sync-lock-filepath *alldat*)) + (sync-cmd-core (conc sqlite-exe" " tmp-db " .dump | "sqlite-exe" " staging-file "&>"sync-log)) + (sync-cmd (if fork-to-background + (conc "/usr/bin/env NBFAKE_LOG="*toppath*"/logs/last-server-sync-"(current-process-id)".log nbfake \""sync-cmd-core" && /bin/mv -f " staging-file " " mtdbfile" \"") + sync-cmd-core)) + (default-min-intersync-delay 2) + (min-intersync-delay (configf:lookup-number *configdat* "server" "minimum-intersync-delay" default: default-min-intersync-delay)) + (default-duty-cycle 0.1) + (duty-cycle (configf:lookup-number *configdat* "server" "sync-duty-cycle" default: default-duty-cycle)) + (last-sync-seconds 10) ;; we will adjust this to a measurement and delay last-sync-seconds * (1 - duty-cycle) + (calculate-off-time (lambda (work-duration duty-cycle) + (* (/ (- 1 duty-cycle) duty-cycle) last-sync-seconds))) + (off-time min-intersync-delay) ;; adjusted in closure below. + (do-a-sync + (lambda () + (BB> "Start do-a-sync with fork-to-background="fork-to-background" persist-until-sync="persist-until-sync) + (let* ((finalres + (let retry-loop ((num-tries 0)) + (if (common:simple-file-lock lockfile) + (begin + (cond + ((not (or fork-to-background persist-until-sync)) + (debug:print 0 *default-log-port* "INFO: syncer thread sleeping for max of (server.minimum-intersync-delay="min-intersync-delay + " , off-time="off-time" seconds ]") + (thread-sleep! (max off-time min-intersync-delay))) + (else + (debug:print 0 *default-log-port* "INFO: syncer thread NOT sleeping ; maybe time-to-exit..."))) + + (if (not (configf:lookup *configdat* "server" "disable-db-snapshot")) + (common:snapshot-file mtdbfile subdir: ".db-snapshot")) + (delete-file* staging-file) + (let* ((start-time (current-milliseconds)) + (res (system sync-cmd)) + (res2 + (cond + ((eq? 0 res) + (delete-file* (conc mtdbfile ".backup")) + (if (eq? 0 (file-size sync-log)) + (delete-file sync-log)) + (system (conc "/bin/mv " staging-file " " mtdbfile)) + + (set! last-sync-seconds (/ (- (current-milliseconds) start-time) 1000)) + (set! off-time (calculate-off-time + last-sync-seconds + (cond + ((and (number? duty-cycle) (> duty-cycle 0) (< duty-cycle 1)) + duty-cycle) + (else + (debug:print 0 *default-log-port* "WARNING: ["(common:human-time)"] server.sync-duty-cycle is invalid. Should be a number between 0 and 1, but "duty-cycle" was specified. Using default value: "default-duty-cycle) + default-duty-cycle)))) + + (debug:print 1 *default-log-port* "INFO: ["(common:human-time)"] pid="(current-process-id)" SYNC took "last-sync-seconds" sec") + (debug:print 1 *default-log-port* "INFO: ["(common:human-time)"] pid="(current-process-id)" SYNC took "last-sync-seconds" sec ; with duty-cycle of "duty-cycle" off time is now "off-time) + 'sync-completed) + (else + (system (conc "/bin/cp "sync-log" "sync-log".fail")) + (debug:print 0 *default-log-port* "ERROR: ["(common:human-time)"] Sync failed. See log at "sync-log".fail") + (if (file-exists? (conc mtdbfile ".backup")) + (system (conc "/bin/cp "mtdbfile ".backup " mtdbfile))) + #f)))) + (common:simple-file-release-lock lockfile) + (BB> "released lockfile: " lockfile) + (when (common:file-exists? lockfile) + (BB> "DID NOT ACTUALLY RELEASE LOCKFILE")) + res2) ;; end let + );; end begin + ;; else + (cond + (persist-until-sync + (thread-sleep! 1) + (debug:print 1 *default-log-port* "INFO: ["(common:human-time)"] pid="(current-process-id)" other SYNC in progress; we're in a fork-to-background so we need to succeed. Let's wait a jiffy and and try again. num-tries="num-tries" (waiting for lockfile="lockfile" to disappear)") + (retry-loop (add1 num-tries))) + (else + (thread-sleep! (max off-time (+ last-sync-seconds min-intersync-delay))) + (debug:print 1 *default-log-port* "INFO: ["(common:human-time)"] pid="(current-process-id)" other SYNC in progress; not syncing.") + 'parallel-sync-in-progress)) + ) ;; end if got lockfile + ) + )) + (BB> "End do-a-sync with fork-to-background="fork-to-background" persist-until-sync="persist-until-sync" and result="finalres) + finalres) + ) ;; end lambda + )) + do-a-sync)) + +(define (server:writable-watchdog-bruteforce dbstruct) + (thread-sleep! 1) ;; delay for startup + (let* ((do-a-sync (server:get-bruteforce-syncer dbstruct)) + (final-sync (server:get-bruteforce-syncer dbstruct fork-to-background: #t persist-until-sync: #t))) + (when (and (not (args:get-arg "-sync-to-megatest.db")) ;; conditions under which we do not run the sync + (args:get-arg "-server")) + + (let loop () + (do-a-sync) + (if (not *time-to-exit*) (loop))) ;; keep going unless time to exit + + ;; time to exit, close the no-sync db here + (final-sync) + + (if (common:low-noise-print 30) + (debug:print-info 0 *default-log-port* "Exiting watchdog timer, *time-to-exit* = " *time-to-exit*" pid="(current-process-id) + ))))) + +(define (server:writable-watchdog-deltasync dbstruct) + (thread-sleep! 0.05) ;; delay for startup + (let ((legacy-sync (common:run-sync?)) + (sync-stale-seconds (configf:lookup-number *configdat* "server" "sync-stale-seconds" default: 300)) + (debug-mode (debug:debug-mode 1)) + (last-time (current-seconds)) + (no-sync-db (db:open-no-sync-db)) + (sync-duration 0) ;; run time of the sync in milliseconds + ;;(this-wd-num (begin (mutex-lock! *wdnum*mutex) (let ((x *wdnum*)) (set! *wdnum* (add1 *wdnum*)) (mutex-unlock! *wdnum*mutex) x))) + ) + (set! *no-sync-db* no-sync-db) ;; make the no sync db available to api calls + (debug:print-info 2 *default-log-port* "Periodic sync thread started.") + (debug:print-info 3 *default-log-port* "watchdog starting. legacy-sync is " legacy-sync" pid="(current-process-id) );; " this-wd-num="this-wd-num) + (if (and legacy-sync (not *time-to-exit*)) + (let* (;;(dbstruct (db:setup)) + (mtdb (dbr:dbstruct-mtdb dbstruct)) + (mtpath (db:dbdat-get-path mtdb)) + (tmp-area (common:get-db-tmp-area *alldat*)) + (start-file (conc tmp-area "/.start-sync")) + (end-file (conc tmp-area "/.end-sync"))) + (debug:print-info 0 *default-log-port* "Server running, periodic sync started.") + (let loop () + ;; sync for filesystem local db writes + ;; + (mutex-lock! *db-multi-sync-mutex*) + (let* ((need-sync (>= *db-last-access* *db-last-sync*)) ;; no sync since last write + (sync-in-progress *db-sync-in-progress*) + (min-intersync-delay (configf:lookup-number *configdat* "server" "minimum-intersync-delay" default: 5)) + (should-sync (and (not *time-to-exit*) + (> (- (current-seconds) *db-last-sync*) min-intersync-delay))) ;; sync every five seconds minimum, deprecated logic, can probably be removed + (start-time (current-seconds)) + (cpu-load-adj (alist-ref 'adj-proc-load (common:get-normalized-cpu-load #f))) + (mt-mod-time (file-modification-time mtpath)) + (last-sync-start (if (common:file-exists? start-file) + (file-modification-time start-file) + 0)) + (last-sync-end (if (common:file-exists? end-file) + (file-modification-time end-file) + 10)) + (sync-period (+ 3 (* cpu-load-adj 30))) ;; as adjusted load increases increase the sync period + (recently-synced (and (< (- start-time mt-mod-time) sync-period) ;; not useful if sync didn't modify megatest.db! + (< mt-mod-time last-sync-start))) + (sync-done (<= last-sync-start last-sync-end)) + (sync-stale (> start-time (+ last-sync-start sync-stale-seconds))) + (will-sync (and (not *time-to-exit*) ;; do not start a sync if we are in the process of exiting + (or need-sync should-sync) + (or sync-done sync-stale) + (not sync-in-progress) + (not recently-synced)))) + (debug:print-info 13 *default-log-port* "WD writable-watchdog top of loop. need-sync="need-sync" sync-in-progress=" sync-in-progress + " should-sync="should-sync" start-time="start-time" mt-mod-time="mt-mod-time" recently-synced="recently-synced" will-sync="will-sync + " sync-done=" sync-done " sync-period=" sync-period) + (if (and (> sync-period 5) + (common:low-noise-print 30 "sync-period")) + (debug:print-info 0 *default-log-port* "Increased sync period due to long sync times, sync took: " sync-period " seconds.")) + ;; (if recently-synced (debug:print-info 0 *default-log-port* "Skipping sync due to recently-synced flag=" recently-synced)) + ;; (debug:print-info 0 *default-log-port* "need-sync: " need-sync " sync-in-progress: " sync-in-progress " should-sync: " should-sync " will-sync: " will-sync) + (if will-sync (set! *db-sync-in-progress* #t)) + (mutex-unlock! *db-multi-sync-mutex*) + (if will-sync + (let (;; (max-sync-duration (configf:lookup-number *configdat* "server" "max-sync-duration")) ;; KEEPING THIS AVAILABLE BUT SHOULD NOT USE, I'M PRETTY SURE IT DOES NOT WORK! + (sync-start (current-milliseconds))) + (with-output-to-file start-file (lambda ()(print (current-process-id)))) + + ;; put lock here + + ;; (if (or (not max-sync-duration) + ;; (< sync-duration max-sync-duration)) ;; NOTE: db:sync-to-megatest.db keeps track of time of last sync and syncs incrementally + (let ((res (db:sync-to-megatest.db dbstruct no-sync-db: no-sync-db))) ;; did we sync any data? If so need to set the db touched flag to keep the server alive + (set! sync-duration (- (current-milliseconds) sync-start)) + (if (> res 0) ;; some records were transferred, keep the db alive + (begin + (mutex-lock! *heartbeat-mutex*) + (set! *db-last-access* (current-seconds)) + (mutex-unlock! *heartbeat-mutex*) + (debug:print-info 0 *default-log-port* "sync called, " res " records transferred.")) + (debug:print-info 2 *default-log-port* "sync called but zero records transferred"))))) +;; ;; TODO: factor this next routine out into a function +;; (with-input-from-pipe ;; this should not block other threads but need to verify this +;; (conc "megatest -sync-to-megatest.db -m testsuite:" (common:get-area-name) ":" *toppath*) +;; (lambda () +;; (let loop ((inl (read-line)) +;; (res #f)) +;; (if (eof-object? inl) +;; (begin +;; (set! sync-duration (- (current-milliseconds) sync-start)) +;; (cond +;; ((not res) +;; (debug:print 0 *default-log-port* "ERROR: sync from /tmp db to megatest.db appears to have failed. Recommended that you stop your runs and run \"megatest -cleanup-db\"")) +;; ((> res 0) +;; (mutex-lock! *heartbeat-mutex*) +;; (set! *db-last-access* (current-seconds)) +;; (mutex-unlock! *heartbeat-mutex*)))) +;; (let ((num-synced (let ((matches (string-match "^Synced (\\d+).*$" inl))) +;; (if matches +;; (string->number (cadr matches)) +;; #f)))) +;; (loop (read-line) +;; (or num-synced res)))))))))) + (if will-sync + (begin + (mutex-lock! *db-multi-sync-mutex*) + (set! *db-sync-in-progress* #f) + (set! *db-last-sync* start-time) + (with-output-to-file end-file (lambda ()(print (current-process-id)))) + + ;; release lock here + + (mutex-unlock! *db-multi-sync-mutex*))) + (if (and debug-mode + (> (- start-time last-time) 60)) + (begin + (set! last-time start-time) + (debug:print-info 4 *default-log-port* "timestamp -> " (seconds->time-string (current-seconds)) ", time since start -> " (seconds->hr-min-sec (- (current-seconds) *time-zero*)))))) + + ;; keep going unless time to exit + ;; + (if (not *time-to-exit*) + (let delay-loop ((count 0)) + ;;(debug:print-info 13 *default-log-port* "delay-loop top; count="count" pid="(current-process-id)" this-wd-num="this-wd-num" *time-to-exit*="*time-to-exit*) + + (if (and (not *time-to-exit*) + (< count 6)) ;; was 11, changing to 4. + (begin + (thread-sleep! 1) + (delay-loop (+ count 1)))) + (if (not *time-to-exit*) (loop)))) + ;; time to exit, close the no-sync db here + (db:no-sync-close-db no-sync-db) + (if (common:low-noise-print 30) + (debug:print-info 0 *default-log-port* "Exiting watchdog timer, *time-to-exit* = " *time-to-exit*" pid="(current-process-id) ))))))) ;;" this-wd-num="this-wd-num))))))) + +;;====================================================================== +;; archive +;;====================================================================== +;;====================================================================== +;; +;;====================================================================== + +;; NOT CURRENTLY USED +;; +(define (archive:main linktree target runname testname itempath options) + (let ((testdir (conc linktree "/" target "/" runname "/" testname "/" itempatt)) + (flavor 'plain) ;; type of machine to run jobs on + (maxload 1.5) ;; max allowed load for this work + (adisks (archive:get-archive-disks))) + ;; get testdir size + ;; - hand off du to job mgr + (if (and (common:file-exists? testdir) + (file-is-writable? testdir)) + (let* ((dused (jobrunner:run-job + flavor ;; machine type + maxload ;; max allowed load + '() ;; prevars - environment vars to set for the job + common:get-disk-space-used ;; if a proc call it, if a string it is a unix command + (list testdir))) + (apath (archive:get-archive testname itempath dused))) + (jobrunner:run-job + flavor + maxload + '() + archive:run-bup + (list testdir apath)))))) + +;; Get archive disks from megatest.config +;; +(define (archive:get-archive-disks) + (let ((section (configf:get-section *configdat* "archive-disks"))) + (if section + section + '()))) + +;; look for the best candidate archive area, else create new +;; area +;; +(define (archive:get-archive testname itempath dused) + ;; look up in archive_allocations if there is a pre-used archive + ;; with adequate diskspace + ;; + (let* ((existing-blocks (rmt:archive-get-allocations testname itempath dused)) + (candidate-disks (map (lambda (block) + (list + (vector-ref block 1) ;; archive-area-name + (vector-ref block 2))) ;; disk-path + existing-blocks))) + (or (common:get-disk-with-most-free-space candidate-disks dused) + (archive:allocate-new-archive-block #f #f #f)))) ;; BROKEN. testname itempath)))) + +;; allocate a new archive area +;; +(define (archive:allocate-new-archive-block blockid-cache run-area-home testsuite-name dneeded target run-name test-name) + (let ((key (conc testsuite-name "/" target "/" run-name "/" test-name))) + (if (hash-table-exists? blockid-cache key) + (hash-table-ref blockid-cache key) + (let* ((pscript (configf:lookup *configdat* "archive" "pathscript")) + (pscript-cmd (conc pscript " " testsuite-name " " target " " run-name " " test-name)) + (apath (if pscript + (handle-exceptions + exn + (begin + (debug:print 0 "ERROR: script \"" pscript-cmd "\" failed to run properly.") + (exit 1)) + (with-input-from-pipe + pscript-cmd + read-line)) + #f)) ;; this is the user-calculated archive path + (adisks (archive:get-archive-disks)) + (best-disk (common:get-disk-with-most-free-space adisks dneeded))) + (if best-disk + (let* ((bdisk-name (car best-disk)) + (bdisk-path (cdr best-disk)) + (area-key (substring (message-digest-string (md5-primitive) run-area-home) 0 5)) + (bdisk-id (rmt:archive-register-disk bdisk-name bdisk-path (get-df bdisk-path))) + (archive-name (if apath + apath + (let ((sec (current-seconds))) + (conc (time->string (seconds->local-time sec) "%Y") + "_q" (seconds->quarter sec) "/" + testsuite-name "_" area-key)))) + (archive-path (conc bdisk-path "/" archive-name)) + (block-id (rmt:archive-register-block-name bdisk-id archive-path))) + ;; (allocation-id (rmt:archive-allocate-testsuite/area-to-block block-id testsuite-name area-key))) + (if block-id ;; (and block-id allocation-id) + (let ((res (cons block-id archive-path))) + (hash-table-set! blockid-cache key res) + res) + #f)) + #f)) ;; no best disk found + ))) + +;; archive - run bup +;; +;; 1. create the bup dir if not exists +;; 2. start the du of each directory +;; 3. gen index +;; 4. save +;; +(define (archive:run-bup archive-command run-id run-name tests rp-mutex bup-mutex) + ;; move the getting of archive space down into the below block so that a single run can + ;; allocate as needed should a disk fill up + ;; + (let* ((blockid-cache (make-hash-table)) + (tsname (common:get-area-name *alldat*)) + (min-space (string->number (or (configf:lookup *configdat* "archive" "minspace") "1000"))) + (arch-groups (make-hash-table)) ;; archive groups, each corrosponds to a bup area + (disk-groups (make-hash-table)) ;; + (test-groups (make-hash-table)) ;; these two (disk and test groups) could be combined nicely + (test-dirs (make-hash-table)) + (bup-exe (or (configf:lookup *configdat* "archive" "bup") "bup")) + (compress (or (configf:lookup *configdat* "archive" "compress") "9")) + (linktree (common:get-linktree)) ;; (configf:lookup *configdat* "setup" "linktree"))) + (archiver (let ((s (configf:lookup *configdat* "archive" "archiver"))) + (if s (string->symbol s) 'bup))) + (archiver-cmd (case archiver + ((tar) "tar cfj ARCHIVE_NAME.tar.bz2 ") + ((7z) " 7z u -t7z -m0=lzma -mx=9 -mfb=64 -md=32m -ms=on ARCHIVE_NAME.7z ") + (else #f))) + (print-prefix "Running: ") ;; change to #f to turn off printing + (preclean-spec (configf:get-section *configdat* "archive-preclean"))) + + ;; (tests:match patt testname itempath) + + ;; from the test info bin the path to the test by stem + ;; + (for-each + (lambda (test-dat) + (let* ((item-path (db:test-get-item-path test-dat)) + (test-name (db:test-get-testname test-dat)) + (test-id (db:test-get-id test-dat)) + (run-id (db:test-get-run_id test-dat)) + (target (string-intersperse (map cadr (rmt:get-key-val-pairs run-id)) "/")) + + (toplevel/children (and (db:test-get-is-toplevel test-dat) + (> (rmt:test-toplevel-num-items run-id test-name) 0))) + (test-partial-path (conc target "/" run-name "/" (db:test-make-full-name test-name item-path))) + ;; note the trailing slash to get the dir inspite of it being a link + (test-path (conc linktree "/" test-partial-path)) + (mutex-lock! rp-mutex) + (test-physical-path (if (common:file-exists? test-path) + (common:real-path test-path) + #f)) + (mutex-unlock! rp-mutex) + (partial-path-index (if test-physical-path (substring-index test-partial-path test-physical-path) #f)) + (test-base (if (and partial-path-index + test-physical-path ) + (substring test-physical-path + 0 + partial-path-index) + #f)) + ;; we need our archive dir checked for every test to enable folks who want to store other ways. + (archive-info (archive:allocate-new-archive-block blockid-cache *toppath* tsname min-space target run-name test-name)) + (archive-dir (if archive-info (cdr archive-info) #f)) + (archive-id (if archive-info (car archive-info) -1)) + + ) + + (if (not archive-dir) ;; no archive disk found, this is fatal + (begin + (debug:print 0 *default-log-port* "FATAL: No archive disks found. Please add disks with at least " + min-space " MB space to the [archive-disks] section of megatest.config") + (debug:print 0 *default-log-port* " use [archive] minspace to specify minimum available space") + (debug:print 0 *default-log-port* " disks: " + (string-intersperse (map cadr (archive:get-archive-disks)) "\n ")) + (exit 1)) + (debug:print-info 0 *default-log-port* "Using path " archive-dir " for archiving test " test-path)) + + ;; preclean the test directory per the spec if provided + (if (not (null? preclean-spec)) ;; we've been asked to preclean before archiving + (let loop ((spec (car preclean-spec)) + (tail (cdr preclean-spec))) + (if (> (length spec) 1) + (let ((testspec (car spec)) + (rules (cadr spec))) + (if (tests:match testspec test-name item-path) + (begin + (debug:print 0 *default-log-port* "INFO: cleanup requested for " test-physical-path) + (common:dir-clean-up test-physical-path rules remove-empty: #t)) + (if (not (null? tail)) + (loop (car tail)(cdr tail))))) + (begin + (debug:print 0 *default-log-port* "ERROR: bad spec line in [archive-preclean] section. \"" spec "\"") + (if (not (null? tail))(loop (car tail)(cdr tail))))))) + (cond + (toplevel/children + (debug:print 0 *default-log-port* "WARNING: cannot archive " test-name " with id " test-id + " as it is a toplevel test with children")) + ((not (common:file-exists? test-path)) + (debug:print 0 *default-log-port* "WARNING: Cannot archive " test-name "/" item-path + " as path " test-path " does not exist")) + (else + (debug:print 0 *default-log-port* + "From test-dat=" test-dat " derived the following:\n" + "test-partial-path = " test-partial-path "\n" + "test-path = " test-path "\n" + "test-physical-path = " test-physical-path "\n" + "partial-path-index = " partial-path-index "\n" + "test-base = " test-base) + (hash-table-set! disk-groups test-base + (cons test-physical-path (hash-table-ref/default disk-groups test-base '()))) + (hash-table-set! test-groups test-base + (cons test-dat (hash-table-ref/default test-groups test-base '()))) + (hash-table-set! arch-groups test-base + (cons archive-info (hash-table-ref/default arch-groups test-base '()))) + (hash-table-set! test-dirs test-id test-path))))) + ;; test-path)))) + tests) + (debug:print 0 *default-log-port* "INFO: DISK GROUPS=" (hash-table->alist disk-groups)) + ;; for each disk-group, initialize the bup area if needed + (for-each + (lambda (test-base) + (let* ((disk-group (hash-table-ref disk-groups test-base)) + (arch-group (hash-table-ref arch-groups test-base)) + (arch-info (car arch-group)) ;; don't know yet how this will work, can I get more than one possibility? + (archive-id (car arch-info)) + (archive-dir (cdr arch-info))) + (debug:print 0 *default-log-port* "Processing disk-group " test-base) + (let* ((test-paths (hash-table-ref disk-groups test-base))) + (if (not (common:file-exists? archive-dir)) + (create-directory archive-dir #t)) + (case archiver + ((bup) ;; Archive using bup + (let* ((bup-init-params (list "-d" archive-dir "init")) + (bup-index-params (append (list "-d" archive-dir "index") test-paths)) + (bup-save-params (append (list "-d" archive-dir "save" ;; (conc "--strip-path=" linktree) + (conc "-" compress) ;; or (conc "--compress=" compress) + "-n" (conc (common:get-area-name *alldat*) "-" run-id) + (conc "--strip-path=" test-base) ;; if we push to the directory do we need this? + ) + test-paths))) + (if (not (common:file-exists? (conc archive-dir "/HEAD"))) + (begin + ;; replace this with jobrunner stuff enventually + (debug:print-info 0 *default-log-port* "Init bup in " archive-dir) + ;; (mutex-lock! bup-mutex) + (run-n-wait bup-exe params: bup-init-params print-cmd: print-prefix) + ;; (mutex-unlock! bup-mutex) + )) + (debug:print-info 0 *default-log-port* "Indexing data to be archived") + ;; (mutex-lock! bup-mutex) + (run-n-wait bup-exe params: bup-index-params print-cmd: print-prefix) + (debug:print-info 0 *default-log-port* "Archiving data with bup") + (run-n-wait bup-exe params: bup-save-params print-cmd: print-prefix))) + ((7z tar) + (for-each + (lambda (test-dat) + (let* ((test-id (db:test-get-id test-dat)) + (test-name (db:test-get-testname test-dat)) + (item-path (db:test-get-item-path test-dat)) + (test-full-name (db:test-make-full-name test-name item-path)) + (run-id (db:test-get-run_id test-dat)) + (target (string-intersperse (map cadr (rmt:get-key-val-pairs run-id)) "/")) + (run-name (rmt:get-run-name-from-id run-id)) + (source-dir (hash-table-ref test-dirs test-id)) ;; (conc test-base "/" test-name "/" item-path)) + (target-dir (string-substitute "/$" "" (conc archive-dir "/" target "/" run-name "/" test-full-name)))) + ;; create the test and item-path levels under archive-dir + (create-directory (pathname-directory target-dir) #t) + (run-n-wait + (conc + (string-substitute "ARCHIVE_NAME" target-dir archiver-cmd) " " + "." + ) + print-cmd: print-prefix + run-dir: source-dir))) + (hash-table-ref test-groups test-base)))) + ;; (mutex-unlock! bup-mutex) + (for-each + (lambda (test-dat) + (let ((test-id (db:test-get-id test-dat)) + (run-id (db:test-get-run_id test-dat))) + (rmt:test-set-archive-block-id run-id test-id archive-id) + (if (member archive-command '("save-remove")) + (runs:remove-test-directory test-dat 'archive-remove)))) + (hash-table-ref test-groups test-base))))) + (hash-table-keys disk-groups)) + #t)) + +(define (archive:bup-restore archive-command run-id run-name tests rp-mutex bup-mutex) ;; move the getting of archive space down into the below block so that a single run can + ;; allocate as needed should a disk fill up + ;; + (let* ((bup-exe (or (configf:lookup *configdat* "archive" "bup") "bup")) + (linktree (common:get-linktree))) ;; (configf:lookup *configdat* "setup" "linktree"))) + + ;; from the test info bin the path to the test by stem + ;; + (for-each + (lambda (test-dat) + ;; When restoring test-dat will initially contain an old and invalid path to the test + (let* ((best-disk (get-best-disk *configdat* #f)) ;; BUG: get the testconfig and use it here. Otherwise data pulled out of archive could end up on the wrong kind of disk. + (item-path (db:test-get-item-path test-dat)) + (test-name (db:test-get-testname test-dat)) + (test-id (db:test-get-id test-dat)) + (run-id (db:test-get-run_id test-dat)) + (keyvals (rmt:get-key-val-pairs run-id)) + (target (string-intersperse (map cadr keyvals) "/")) + + (toplevel/children (and (db:test-get-is-toplevel test-dat) + (> (rmt:test-toplevel-num-items run-id test-name) 0))) + (test-partial-path (conc target "/" run-name "/" (db:test-make-full-name test-name item-path))) + ;; note the trailing slash to get the dir inspite of it being a link + (test-path (conc linktree "/" test-partial-path)) + ;; if the old path was not deleted then prev-test-physical-path will end up pointing to a real directory + (mutex-lock! rp-mutex) + (prev-test-physical-path (if (common:file-exists? test-path) + ;; (read-symbolic-link test-path #t) + (common:real-path test-path) + #f)) + (mutex-unlock! rp-mutex) + (new-test-physical-path (conc best-disk "/" test-partial-path)) + (archive-block-id (db:test-get-archived test-dat)) + (archive-block-info (rmt:test-get-archive-block-info archive-block-id)) + (archive-path (if (vector? archive-block-info) + (vector-ref archive-block-info 2) ;; look in db.scm for test-get-archive-block-info for the vector record info + #f)) ;; no archive found? + (archive-internal-path (conc (common:get-area-name *alldat*) "-" run-id "/latest/" test-partial-path))) + + ;; some sanity checks, move an existing path out of the way - iif it is not a toplevel with children + ;; + (if (and (not toplevel/children) ;; special handling needed for toplevel with children + prev-test-physical-path + (common:file-exists? prev-test-physical-path)) ;; what to do? abort or clean up or link it in? + (let* ((base (pathname-directory prev-test-physical-path)) + (dirn (pathname-file prev-test-physical-path)) + (newn (conc base "/." dirn))) + (debug:print-error 0 *default-log-port* "the old directory " prev-test-physical-path ", still exists! Moving it to " newn) + (rename-file prev-test-physical-path newn))) + + (if (and archive-path ;; no point in proceeding if there is no actual archive + (not toplevel/children)) + (begin + ;; CREATE WORK AREA + ;; test-src-path == #f ==> don't copy in data from tests directory + ;; itemdat == string ==> use directly + (create-work-area run-id run-name keyvals test-id #f best-disk test-name item-path) ;; #!key (remtries 2)) + + ;; 1. Get the block id from the test info + ;; 2. Get the block data given the block id + ;; 3. Construct the paths etc. for the following command: + ;; + ;; bup -d /tmp/matt/adisk1/2015_q1/fullrun_e1a40/ restore -C /tmp/seeme fullrun-30/latest/ubuntu/nfs/none/w02.1.20.54_b/ + + ;; DO BUP RESTORE + (let* ((new-test-dat (rmt:get-test-info-by-id run-id test-id)) + (new-test-path (if (vector? new-test-dat ) + (db:test-get-rundir new-test-dat) + (begin + (debug:print-error 0 *default-log-port* "unable to get data for run-id=" run-id ", test-id=" test-id) + (exit 1)))) + ;; new-test-path won't work - must use best-disk instead? Nope, new-test-path but tack on /.. + (bup-restore-params (list "-d" archive-path "restore" "-C" (conc new-test-path "/..") archive-internal-path))) + (debug:print-info 0 *default-log-port* "Restoring archived data to " new-test-physical-path " from archive in " archive-path " ... " archive-internal-path) + ;; (mutex-lock! bup-mutex) + (run-n-wait bup-exe params: bup-restore-params print-cmd: #f) + ;; (mutex-unlock! bup-mutex) + (mt:test-set-state-status-by-id run-id test-id "COMPLETED" #f #f))) + (debug:print-error 0 *default-log-port* "No archive path in the record for run-id=" run-id " test-id=" test-id)))) + (filter vector? tests)))) + +;;====================================================================== +;; client stuff +;;====================================================================== +;; client:get-signature +(define (client:get-signature) + (if *my-client-signature* *my-client-signature* + (let ((sig (conc (get-host-name) " " (current-process-id)))) + (set! *my-client-signature* sig) + *my-client-signature*))) + +;; Not currently used! But, I think it *should* be used!!! +(define (client:logout serverdat) + (let ((ok (and (socket? serverdat) + (cdb:logout serverdat *toppath* (client:get-signature))))) + ok)) + +(define (client:connect iface port) + (http:client-connect iface port)) + +(define (client:setup areapath #!key (remaining-tries 100) (failed-connects 0)) + (client:setup-http *alldat* areapath remaining-tries: remaining-tries failed-connects: failed-connects)) + +(set-fn 'client:setup client:setup) + + +;; Do all the connection work, look up the transport type and set up the +;; connection if required. +;; +;; There are two scenarios. +;; 1. We are a test manager and we received *transport-type* and *runremote* via cmdline +;; 2. We are a run tests, list runs or other interactive process and we must figure out +;; *transport-type* and *runremote* from the monitor.db +;; +;; client:setup +;; +;; lookup_server, need to remove *runremote* stuff -> replace with *alldat* for now +;; + +(define (client:setup-http runremote areapath #!key (remaining-tries 100) (failed-connects 0)) ;; (area-dat #f)) + (debug:print-info 2 *default-log-port* "client:setup remaining-tries=" remaining-tries) + (server:start-and-wait areapath) + (if (<= remaining-tries 0) + (begin + (debug:print-error 0 *default-log-port* "failed to start or connect to server") + (exit 1)) + ;; + ;; Alternatively here, we can get the list of candidate servers and work our way + ;; through them searching for a good one. + ;; + (let* ((server-dat (server:get-rand-best areapath))) ;; (server:get-first-best areapath)) + (if (not server-dat) ;; no server found + (client:setup-http runremote areapath remaining-tries: (- remaining-tries 1)) + (let ((host (cadr server-dat)) + (port (caddr server-dat))) + (debug:print-info 4 *default-log-port* "client:setup server-dat=" server-dat ", remaining-tries=" remaining-tries) + (if (and host port) + (let* ((start-res (case *transport-type* + ((http)(http-transport:client-connect host port)))) + (ping-res (case *transport-type* + ((http)(rmt:login-no-auto-client-setup start-res))))) + (if (and start-res + ping-res) + (begin + (alldat-conndat-set! runremote start-res) + (debug:print-info 2 *default-log-port* "connected to " (http-transport:server-dat-make-url start-res)) + start-res) + (begin ;; login failed but have a server record, clean out the record and try again + (debug:print-info 0 *default-log-port* "client:setup, login unsuccessful, will attempt to start server ... start-res=" start-res ", server-dat=" server-dat) ;; had runid. Fixes part of Randy;s ticket 1405717332 + (case *transport-type* + ((http)(http-transport:close-connections))) + (alldat-conndat-set! runremote #f) ;; (hash-table-delete! runremote run-id) + (thread-sleep! 1) + (client:setup-http runremote areapath remaining-tries: (- remaining-tries 1)) + ))) + (begin ;; no server registered + ;; (server:kind-run areapath) + (server:start-and-wait areapath) + (debug:print-info 0 *default-log-port* "client:setup, no server registered, remaining-tries=" remaining-tries) + (thread-sleep! 1) ;; (+ 5 (random (- 20 remaining-tries)))) ;; give server a little time to start up, randomize a little to avoid start storms. + (client:setup-http runremote areapath remaining-tries: (- remaining-tries 1))))))))) + +;;====================================================================== +;; configf +;;====================================================================== +;; return list (path fullpath configname) +(define (find-config configname #!key (toppath #f)) + (if toppath + (let ((cfname (conc toppath "/" configname))) + (if (common:file-exists? cfname) + (list toppath cfname configname) + (list #f #f #f))) + (let* ((cwd (string-split (current-directory) "/"))) + (let loop ((dir cwd)) + (let* ((path (conc "/" (string-intersperse dir "/"))) + (fullpath (conc path "/" configname))) + (if (common:file-exists? fullpath) + (list path fullpath configname) + (let ((remcwd (take dir (- (length dir) 1)))) + (if (null? remcwd) + (list #f #f #f) ;; #f #f) + (loop remcwd))))))))) + +(define (config:assoc-safe-add alist key val #!key (metadata #f)) + (let ((newalist (filter (lambda (x)(not (equal? key (car x)))) alist))) + (append newalist (list (if metadata + (list key val metadata) + (list key val)))))) + +(define (configf:section-var-set! cfgdat section-name var value #!key (metadata #f)) + (hash-table-set! cfgdat section-name + (config:assoc-safe-add + (hash-table-ref/default cfgdat section-name '()) + var value metadata: metadata))) + +(define (config:eval-string-in-environment str) + ;; (if (or (string-null? str) + ;; (equal? "!" (substring str 0 1))) ;; null string or starts with ! are preserved but NOT set in the environment + str + (handle-exceptions + exn + (begin + (debug:print-error 0 *default-log-port* "problem evaluating \"" str "\" in the shell environment") + #f) + (let ((cmdres (process:cmd-run->list (conc "echo " str)))) + (if (null? cmdres) "" + (caar cmdres))))) ;; ) + +;;====================================================================== +;; Make the regexp's needed globally available +;;====================================================================== + +(define configf:include-rx (regexp "^\\[include\\s+(.*)\\]\\s*$")) +(define configf:script-rx (regexp "^\\[scriptinc\\s+(\\S+)([^\\]]*)\\]\\s*$")) ;; include output from a script +(define configf:section-rx (regexp "^\\[(.*)\\]\\s*$")) +(define configf:blank-l-rx (regexp "^\\s*$")) +(define configf:key-sys-pr (regexp "^(\\S+)\\s+\\[system\\s+(\\S+.*)\\]\\s*$")) +(define configf:key-val-pr (regexp "^(\\S+)(\\s+(.*)|())$")) +(define configf:key-no-val (regexp "^(\\S+)(\\s*)$")) +(define configf:comment-rx (regexp "^\\s*#.*")) +(define configf:cont-ln-rx (regexp "^(\\s+)(\\S+.*)$")) +(define configf:settings (regexp "^\\[configf:settings\\s+(\\S+)\\s+(\\S+)]\\s*$")) + +;; read a line and process any #{ ... } constructs + +(define configf:var-expand-regex (regexp "^(.*)#\\{(scheme|system|shell|getenv|get|runconfigs-get|rget|scm|sh|rp|gv|g|mtrah)\\s+([^\\}\\{]*)\\}(.*)")) + +(define (configf:system ht cmd) + (system cmd) + ) + +(define (configf:process-line l ht allow-system #!key (linenum #f)) + (let loop ((res l)) + (if (string? res) + (let ((matchdat (string-search configf:var-expand-regex res))) + (if matchdat + (let* ((prestr (list-ref matchdat 1)) + (cmdtype (list-ref matchdat 2)) ;; eval, system, shell, getenv + (cmd (list-ref matchdat 3)) + (poststr (list-ref matchdat 4)) + (result #f) + (start-time (current-seconds)) + (cmdsym (string->symbol cmdtype)) + (fullcmd (case cmdsym + ((scheme scm) (conc "(lambda (ht)" cmd ")")) + ((system) (conc "(lambda (ht)(configf:system ht \"" cmd "\"))")) + ((shell sh) (conc "(lambda (ht)(string-translate (shell \"" cmd "\") \"\n\" \" \"))")) + ((realpath rp)(conc "(lambda (ht)(common:nice-path \"" cmd "\"))")) + ((getenv gv) (conc "(lambda (ht)(get-environment-variable \"" cmd "\"))")) + ((mtrah) (conc "(lambda (ht)" + " (let ((extra \"" cmd "\"))" + " (conc (or *toppath* (get-environment-variable \"MT_RUN_AREA_HOME\"))" + " (if (string-null? extra) \"\" \"/\")" + " extra)))")) + ((get g) + (let* ((parts (string-split cmd)) + (sect (car parts)) + (var (cadr parts))) + (conc "(lambda (ht)(config-lookup ht \"" sect "\" \"" var "\"))"))) + ((runconfigs-get rget) (conc "(lambda (ht)(runconfigs-get ht \"" cmd "\"))")) + ;; ((rget) (conc "(lambda (ht)(runconfigs-get ht \"" cmd "\"))")) + (else "(lambda (ht)(print \"ERROR\") \"ERROR\")")))) + ;; (print "fullcmd=" fullcmd) + (handle-exceptions + exn + (begin + (debug:print 0 *default-log-port* "WARNING: failed to process config input \"" l "\"") + (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) + ;; (print "exn=" (condition->list exn)) + (set! result (conc "#{( " cmdtype ") " cmd "}, full expansion: " fullcmd))) + (if (or allow-system + (not (member cmdtype '("system" "shell" "sh")))) + (with-input-from-string fullcmd + (lambda () + (set! result ((eval (read)) ht)))) + (set! result (conc "#{(" cmdtype ") " cmd "}")))) + (case cmdsym + ((system shell scheme) + (let ((delta (- (current-seconds) start-time))) + (if (> delta 2) + (debug:print-info 0 *default-log-port* "for line \"" l "\"\n command: " cmd " took " delta " seconds to run with output:\n " result) + (debug:print-info 9 *default-log-port* "for line \"" l "\"\n command: " cmd " took " delta " seconds to run with output:\n " result))))) + (loop (conc prestr result poststr))) + res)) + res))) + +;; Run a shell command and return the output as a string +(define (shell cmd) + (let* ((output (process:cmd-run->list cmd)) + (res (car output)) + (status (cadr output))) + (if (equal? status 0) + (let ((outres (string-intersperse + res + "\n"))) + (debug:print-info 4 *default-log-port* "shell result:\n" outres) + outres) + (begin + (with-output-to-port (current-error-port) + (lambda () + (print "ERROR: " cmd " returned bad exit code " status))) + "")))) + +;; this was inline but I'm pretty sure that is a hold over from when it was *very* simple ... +;; +(define (configf:read-line p ht allow-processing settings) + (let loop ((inl (read-line p))) + (let ((cont-line (and (string? inl) + (not (string-null? inl)) + (equal? "\\" (string-take-right inl 1))))) + (if cont-line ;; last character is \ + (let ((nextl (read-line p))) + (if (not (eof-object? nextl)) + (loop (string-append (if cont-line + (string-take inl (- (string-length inl) 1)) + inl) + nextl)))) + (let ((res (case allow-processing ;; if (and allow-processing + ;; (not (eq? allow-processing 'return-string))) + ((#t #f) + (configf:process-line inl ht allow-processing)) + ((return-string) + inl) + (else + (configf:process-line inl ht allow-processing))))) + (if (and (string? res) + (not (equal? (hash-table-ref/default settings "trim-trailing-spaces" "no") "no"))) + (string-substitute "\\s+$" "" res) + res)))))) + +(define (configf:cfgdat->env-alist section cfgdat-ht allow-system) + (filter + (lambda (pair) + (let* ((var (car pair)) + (val (cdr pair))) + (cons var + (cond + ((and allow-system (procedure? val)) ;; if we decided to use something other than #t or #f for allow-system ('return-procs or 'return-string) , this may become problematic + (val)) + ((procedure? val) #f) + ((string? val) val) + (else "#f"))))) + (append + (hash-table-ref/default cfgdat-ht "default" '()) + (if (equal? section "default") '() (hash-table-ref/default cfgdat-ht section '()))))) + +(define (calc-allow-system allow-system section sections) + (if sections + (and (or (equal? "default" section) + (member section sections)) + allow-system) ;; account for sections and return allow-system as it might be a symbol such as return-strings + allow-system)) + +;; given a config hash and a section name, apply that section to all matching sections (using wildcard % or regex if /..../) +;; remove the section when done so that there is no downstream clobbering +;; +(define (configf:apply-wildcards ht section-name) + (if (hash-table-exists? ht section-name) + (let* ((vars (hash-table-ref ht section-name)) + (rxstr (if (string-contains section-name "%") + (string-substitute (regexp "%") ".*" section-name) + (string-substitute (regexp "^/(.*)/$") "\\1" section-name))) + (rx (regexp rxstr))) + ;; (print "\nsection-name: " section-name " rxstr: " rxstr) + (for-each + (lambda (section) + (if section + (let ((same-section (string=? section-name section)) + (rx-match (string-match rx section))) + ;; (print "section: " section " vars: " vars " same-section: " same-section " rx-match: " rx-match) + (if (and (not same-section) rx-match) + (for-each + (lambda (bundle) + ;; (print "bundle: " bundle) + (let ((key (car bundle)) + (val (cadr bundle)) + (meta (if (> (length bundle) 2)(caddr bundle) #f))) + (hash-table-set! ht section (config:assoc-safe-add (hash-table-ref ht section) key val metadata: meta)))) + vars))))) + (hash-table-keys ht)))) + ht) + +;; read a config file, returns hash table of alists + +;; read a config file, returns hash table of alists +;; adds to ht if given (must be #f otherwise) +;; allow-system: +;; #f - do not evaluate [system +;; #t - immediately evaluate [system and store result as string +;; 'return-procs -- return a proc taking ht as an argument that may be evaulated at some future time +;; 'return-string -- return a string representing a proc taking ht as an argument that may be evaulated at some future time +;; envion-patt is a regex spec that identifies sections that will be eval'd +;; in the environment on the fly +;; sections: #f => get all, else list of sections to gather +;; post-section-procs alist of section-pattern => proc, where: (proc section-name next-section-name ht curr-path) +;; apply-wildcards: #t/#f - apply vars from targets with % wildcards to all matching sections +;; +(define (read-config path ht allow-system #!key (environ-patt #f) (curr-section #f) + (sections #f) (settings (make-hash-table)) (keep-filenames #f) + (post-section-procs '()) (apply-wildcards #t) ) + (debug:print 9 *default-log-port* "START: " path) +;; (if *configdat* +;; (common:save-pkt `((action . read-config) +;; (f . ,(cond ((string? path) path) +;; ((port? path) "port") +;; (else (conc path)))) +;; (T . configf)) +;; *configdat* #t add-only: #t)) + (if (and (not (port? path)) + (not (common:file-exists? path))) ;; for case where we are handed a port + (begin + (debug:print-info 1 *default-log-port* "read-config - file not found " path " current path: " (current-directory)) + ;; WARNING: This is a risky change but really, we should not return an empty hash table if no file read? + #f) ;; (if (not ht)(make-hash-table) ht)) + (let ((inp (if (string? path) + (open-input-file path) + path)) ;; we can be handed a port + (res (if (not ht)(make-hash-table) ht)) + (metapath (if (or (debug:debug-mode 9) + keep-filenames) + path #f)) + (process-wildcards (lambda (res curr-section-name) + (if (and apply-wildcards + (or (string-contains curr-section-name "%") ;; wildcard + (string-match "/.*/" curr-section-name))) ;; regex + (begin + (configf:apply-wildcards res curr-section-name) + (hash-table-delete! res curr-section-name)))))) ;; NOTE: if the section is a wild card it will be REMOVED from res + (let loop ((inl (configf:read-line inp res (calc-allow-system allow-system curr-section sections) settings)) ;; (read-line inp)) + (curr-section-name (if curr-section curr-section "default")) + (var-flag #f);; turn on for key-var-pr and cont-ln-rx, turn off elsewhere + (lead #f)) + (debug:print-info 8 *default-log-port* "curr-section-name: " curr-section-name " var-flag: " var-flag "\n inl: \"" inl "\"") + (if (eof-object? inl) + (begin + ;; process last section for wildcards + (process-wildcards res curr-section-name) + (if (string? path) ;; we received a path, not a port, thus we are responsible for closing it. + (close-input-port inp)) + (if (list? sections) ;; delete all sections except given when sections is provided + (for-each + (lambda (section) + (if (not (member section sections)) + (hash-table-delete! res section))) ;; we are using "" as a dumping ground and must remove it before returning the ht + (hash-table-keys res))) + (debug:print 9 *default-log-port* "END: " path) + res + ) ;; retval + (regex-case + inl + (configf:comment-rx _ (loop (configf:read-line inp res (calc-allow-system allow-system curr-section-name sections) settings) + curr-section-name #f #f)) + + (configf:blank-l-rx _ (loop (configf:read-line inp res (calc-allow-system allow-system curr-section-name sections) settings) + curr-section-name #f #f)) + (configf:settings ( x setting val ) + (begin + (hash-table-set! settings setting val) + (loop (configf:read-line inp res (calc-allow-system allow-system curr-section-name sections) settings) + curr-section-name #f #f))) + + (configf:include-rx ( x include-file ) + (let* ((curr-conf-dir (pathname-directory path)) + (full-conf (if (and (absolute-pathname? include-file) (file-exists? include-file)) + include-file + (common:nice-path + (conc (if curr-conf-dir + curr-conf-dir + ".") + "/" include-file))))) + (let ((all-matches (sort (handle-exceptions exn (list) (glob full-conf)) string<=?))) + (if (null? all-matches) + (begin + (debug:print '(2 9) #f "INFO: include file(s) matching " include-file " not found (called from " path ")") + (debug:print 2 *default-log-port* " " full-conf)) + (for-each + (lambda (fpath) + ;; (push-directory conf-dir) + (debug:print 9 *default-log-port* "Including: " full-conf) + (read-config fpath res allow-system environ-patt: environ-patt + curr-section: curr-section-name sections: sections settings: settings + keep-filenames: keep-filenames)) + all-matches)) + (loop (configf:read-line inp res (calc-allow-system allow-system curr-section-name sections) settings) + curr-section-name #f #f)))) + (configf:script-rx ( x include-script params);; handle-exceptions + ;; exn + ;; (begin + ;; (debug:print '(0 2 9) #f "INFO: include from script " include-script " failed.") + ;; (loop (configf:read-line inp res (calc-allow-system allow-system curr-section-name sections) settings) curr-section-name #f #f)) + (if (and (common:file-exists? include-script)(file-execute-access? include-script)) + (let* ((local-allow-system (calc-allow-system allow-system curr-section-name sections)) + (env-delta (configf:cfgdat->env-alist curr-section-name res local-allow-system)) + (new-inp-port + (common:with-env-vars + env-delta + (lambda () + (open-input-pipe (conc include-script " " params)))))) + (debug:print '(2 9) *default-log-port* "Including from script output: " include-script) + ;; (print "We got here, calling read-config next. Port is: " new-inp-port) + (read-config new-inp-port res allow-system environ-patt: environ-patt curr-section: curr-section-name sections: sections settings: settings keep-filenames: keep-filenames) + (close-input-port new-inp-port) + (loop (configf:read-line inp res (calc-allow-system allow-system curr-section-name sections) settings) curr-section-name #f #f)) + (begin + (debug:print 0 *default-log-port* "Script not found or not exectutable: " include-script) + (loop (configf:read-line inp res (calc-allow-system allow-system curr-section-name sections) settings) curr-section-name #f #f))) + ) ;; ) + (configf:section-rx ( x section-name ) + (begin + ;; call post-section-procs + (for-each + (lambda (dat) + (let ((patt (car dat)) + (proc (cdr dat))) + (if (string-match patt curr-section-name) + (proc curr-section-name section-name res path)))) + post-section-procs) + ;; after gathering the vars for a section and if apply-wildcards is true and if there is a wildcard in the section name process wildcards + ;; NOTE: we are processing the curr-section-name, NOT section-name. + (process-wildcards res curr-section-name) + (if (not (hash-table-ref/default res section-name #f))(hash-table-set! res section-name '())) ;; ensure that mere mention of a section is not lost + (loop (configf:read-line inp res (calc-allow-system allow-system curr-section-name sections) settings) + ;; if we have the sections list then force all settings into "" and delete it later? + ;; (if (or (not sections) + ;; (member section-name sections)) + ;; section-name "") ;; stick everything into "". NOPE: We need new strategy. Put stuff in correct sections and then delete all sections later. + section-name + #f #f))) + (configf:key-sys-pr ( x key cmd ) + (if (calc-allow-system allow-system curr-section-name sections) + (let ((alist (hash-table-ref/default res curr-section-name '())) + (val-proc (lambda () + (let* ((start-time (current-seconds)) + (local-allow-system (calc-allow-system allow-system curr-section-name sections)) + (env-delta (configf:cfgdat->env-alist curr-section-name res local-allow-system)) + (cmdres (process:cmd-run->list cmd delta-env-alist-or-hash-table: env-delta)) ;; BB: here is where [system is exec'd. needs to have env from other vars! + (delta (- (current-seconds) start-time)) + (status (cadr cmdres)) + (res (car cmdres))) + (debug:print-info 4 *default-log-port* "" inl "\n => " (string-intersperse res "\n")) + (if (not (eq? status 0)) + (begin + (debug:print-error 0 *default-log-port* "problem with " inl ", return code " status + " output: " cmdres))) + (if (> delta 2) + (debug:print-info 0 *default-log-port* "for line \"" inl "\"\n command: " cmd " took " delta " seconds to run with output:\n " res) + (debug:print-info 9 *default-log-port* "for line \"" inl "\"\n command: " cmd " took " delta " seconds to run with output:\n " res)) + (if (null? res) + "" + (string-intersperse res " ")))))) + (hash-table-set! res curr-section-name + (config:assoc-safe-add alist + key + (case (calc-allow-system allow-system curr-section-name sections) + ((return-procs) val-proc) + ((return-string) cmd) + (else (val-proc))) + metadata: metapath)) + (loop (configf:read-line inp res (calc-allow-system allow-system curr-section-name sections) settings) curr-section-name #f #f)) + (loop (configf:read-line inp res + (calc-allow-system allow-system curr-section-name sections) + settings) + curr-section-name #f #f))) + + (configf:key-no-val ( x key val) + (let* ((alist (hash-table-ref/default res curr-section-name '())) + (fval (or (if (string? val) val #f) ""))) ;; fval should be either "" or " " (one or more spaces) + (debug:print 10 *default-log-port* " setting: [" curr-section-name "] " key " = #t") + (safe-setenv key fval) + (hash-table-set! res curr-section-name + (config:assoc-safe-add alist key fval metadata: metapath)) + (loop (configf:read-line inp res + (calc-allow-system allow-system curr-section-name sections) + settings) + curr-section-name key #f))) + + (configf:key-val-pr ( x key unk1 val unk2 ) + (let* ((alist (hash-table-ref/default res curr-section-name '())) + (envar (and environ-patt + (string-search (regexp environ-patt) curr-section-name) ;; does the section match the envionpatt? + (and (not (string-null? key)) + (not (equal? "!" (substring key 0 1)))) ;; ! as leading character is a signature to NOT export to the environment + ;; (string-match "^.*:.*:.*$" key) ;; ;; something:something:something reserved for triggers in runconfigs + )) + (realval (if envar + (config:eval-string-in-environment val) + val))) + (debug:print-info 6 *default-log-port* "read-config env setting, envar: " envar " realval: " realval " val: " val " key: " key " curr-section-name: " curr-section-name) + (if envar (safe-setenv key realval)) + (debug:print 10 *default-log-port* " setting: [" curr-section-name "] " key " = " val) + (hash-table-set! res curr-section-name + (config:assoc-safe-add alist key realval metadata: metapath)) + (loop (configf:read-line inp res + (calc-allow-system allow-system curr-section-name sections) settings) + curr-section-name key #f))) + ;; if a continued line + (configf:cont-ln-rx ( x whsp val ) + (let ((alist (hash-table-ref/default res curr-section-name '()))) + (if var-flag ;; if set to a string then we have a continued var + (let ((newval (conc + (config-lookup res curr-section-name var-flag) "\n" + ;; trim lead from the incoming whsp to support some indenting. + (if lead + (string-substitute (regexp lead) "" whsp) + "") + val))) + ;; (print "val: " val "\nnewval: \"" newval "\"\nvarflag: " var-flag) + (hash-table-set! res curr-section-name + (config:assoc-safe-add alist var-flag newval metadata: metapath)) + (loop (configf:read-line inp res (calc-allow-system allow-system curr-section-name sections) settings) curr-section-name var-flag (if lead lead whsp))) + (loop (configf:read-line inp res (calc-allow-system allow-system curr-section-name sections) settings) curr-section-name #f #f)))) + (else (debug:print-error 0 *default-log-port* "problem parsing " path ",\n \"" inl "\"") + (set! var-flag #f) + (loop (configf:read-line inp res (calc-allow-system allow-system curr-section-name sections) settings) curr-section-name #f #f)))) + ) ;; end loop + ))) + +;; pathenvvar will set the named var to the path of the config +(define (find-and-read-config fname #!key (environ-patt #f)(given-toppath #f)(pathenvvar #f)) + (let* ((curr-dir (current-directory)) + (configinfo (find-config fname toppath: given-toppath)) + (toppath (car configinfo)) + (configfile (cadr configinfo)) + (set-fields (lambda (curr-section next-section ht path) + (let ((field-names (if ht (common:get-fields ht) '())) + (target (or (getenv "MT_TARGET")(args:get-arg "-reqtarg")(args:get-arg "-target")))) + (debug:print-info 9 *default-log-port* "set-fields with field-names=" field-names " target=" target " curr-section=" curr-section " next-section=" next-section " path=" path " ht=" ht) + (if (not (null? field-names))(keys:target-set-args field-names target #f)))))) + (if toppath (change-directory toppath)) + (if (and toppath pathenvvar)(setenv pathenvvar toppath)) + (let ((configdat (if configfile + (read-config configfile #f #t environ-patt: environ-patt post-section-procs: (list (cons "^fields$" set-fields)) #f)))) + (if toppath (change-directory curr-dir)) + (list configdat toppath configfile fname)))) + +(define (config-lookup cfgdat section var) + (if (hash-table? cfgdat) + (let ((sectdat (hash-table-ref/default cfgdat section '()))) + (if (null? sectdat) + #f + (let ((match (assoc var sectdat))) + (if match ;; (and match (list? match)(> (length match) 1)) + (cadr match) + #f)) + )) + #f)) + +;; use to have definitive setting: +;; [foo] +;; var yes +;; +;; (configf:var-is? cfgdat "foo" "var" "yes") => #t +;; +(define (configf:var-is? cfgdat section var expected-val) + (equal? (configf:lookup cfgdat section var) expected-val)) + +(define configf:lookup config-lookup) +(define configf:read-file read-config) + +(define (configf:section-vars cfgdat section) + (let ((sectdat (hash-table-ref/default cfgdat section '()))) + (if (null? sectdat) + '() + (map car sectdat)))) + +(define (configf:set-section-var cfgdat section var val) + (let ((sectdat (configf:get-section cfgdat section))) + (hash-table-set! cfgdat section + (config:assoc-safe-add sectdat var val)))) + + ;;(append (filter (lambda (x)(not (assoc var sectdat))) sectdat) + ;; (list var val)))) + +(define (setup) + (let* ((configf (find-config "megatest.config")) + (config (if configf (read-config configf #f #t) #f))) + (if config + (setenv "RUN_AREA_HOME" (pathname-directory configf))) + config)) + +;;====================================================================== +;; Non destructive writing of config file +;;====================================================================== + +(define (configf:compress-multi-lines fdat) + ;; step 1.5 - compress any continued lines + (if (null? fdat) fdat + (let loop ((hed (car fdat)) + (tal (cdr fdat)) + (cur "") + (led #f) + (res '())) + ;; ALL WHITESPACE LEADING LINES ARE TACKED ON!! + ;; 1. remove led whitespace + ;; 2. tack on to hed with "\n" + (let ((match (string-match configf:cont-ln-rx hed))) + (if match ;; blast! have to deal with a multiline + (let* ((lead (cadr match)) + (lval (caddr match)) + (newl (conc cur "\n" lval))) + (if (not led)(set! led lead)) + (if (null? tal) + (set! fdat (append fdat (list newl))) + (loop (car tal)(cdr tal) newl led res))) ;; NB// not tacking newl onto res + (let ((newres (if led + (append res (list cur hed)) + (append res (list hed))))) + ;; prev was a multiline + (if (null? tal) + newres + (loop (car tal)(cdr tal) "" #f newres)))))))) + +;; note: I'm cheating a little here. I merely replace "\n" with "\n " +(define (configf:expand-multi-lines fdat) + ;; step 1.5 - compress any continued lines + (if (null? fdat) fdat + (let loop ((hed (car fdat)) + (tal (cdr fdat)) + (res '())) + (let ((newres (append res (list (string-substitute (regexp "\n") "\n " hed #t))))) + (if (null? tal) + newres + (loop (car tal)(cdr tal) newres)))))) + +(define (configf:file->list fname) + (if (common:file-exists? fname) + (let ((inp (open-input-file fname))) + (let loop ((inl (read-line inp)) + (res '())) + (if (eof-object? inl) + (begin + (close-input-port inp) + (reverse res)) + (loop (read-line inp)(cons inl res))))) + '())) + +;;====================================================================== +;; Write a config +;; 0. Given a refererence data structure "indat" +;; 1. Open the output file and read it into a list +;; 2. Flatten any multiline entries +;; 3. Modify values per contents of "indat" and remove absent values +;; 4. Append new values to the section (immediately after last legit entry) +;; 5. Write out the new list +;;====================================================================== + +(define (configf:write-config indat fname #!key (required-sections '())) + (let* (;; step 1: Open the output file and read it into a list + (fdat (configf:file->list fname)) + (refdat (make-hash-table)) + (sechash (make-hash-table)) ;; current section hash, init with hash for "default" section + (new #f) ;; put the line to be used in new, if it is to be deleted the set new to #f + (secname #f)) + + ;; step 2: Flatten multiline entries + (if (not (null? fdat))(set! fdat (configf:compress-multi-line fdat))) + + ;; step 3: Modify values per contents of "indat" and remove absent values + (if (not (null? fdat)) + (let loop ((hed (car fdat)) + (tal (cadr fdat)) + (res '()) + (lnum 0)) + (regex-case + hed + (configf:comment-rx _ (set! res (append res (list hed)))) ;; (loop (read-line inp) curr-section-name #f #f)) + (configf:blank-l-rx _ (set! res (append res (list hed)))) ;; (loop (read-line inp) curr-section-name #f #f)) + (configf:section-rx ( x section-name ) (let ((section-hash (hash-table-ref/default refdat section-name #f))) + (if (not section-hash) + (let ((newhash (make-hash-table))) + (hash-table-set! refhash section-name newhash) + (set! sechash newhash)) + (set! sechash section-hash)) + (set! new hed) ;; will append this at the bottom of the loop + (set! secname section-name) + )) + ;; No need to process key cmd, let it fall though to key val + (configf:key-val-pr ( x key val ) + (let ((newval (config-lookup indat sec key))) + ;; can handle newval == #f here => that means key is removed + (cond + ((equal? newval val) + (set! res (append res (list hed)))) + ((not newval) ;; key has been removed + (set! new #f)) + ((not (equal? newval val)) + (hash-table-set! sechash key newval) + (set! new (conc key " " newval))) + (else + (debug:print-error 0 *default-log-port* "problem parsing line number " lnum "\"" hed "\""))))) + (else + (debug:print-error 0 *default-log-port* "Problem parsing line num " lnum " :\n " hed ))) + (if (not (null? tal)) + (loop (car tal)(cdr tal)(if new (append res (list new)) res)(+ lnum 1))) + ;; drop to here when done processing, res contains modified list of lines + (set! fdat res))) + + ;; step 4: Append new values to the section + (for-each + (lambda (section) + (let ((sdat '()) ;; append needed bits here + (svars (configf:section-vars indat section))) + (for-each + (lambda (var) + (let ((val (config-lookup refdat section var))) + (if (not val) ;; this one is new + (begin + (if (null? sdat)(set! sdat (list (conc "[" section "]")))) + (set! sdat (append sdat (list (conc var " " val)))))))) + svars) + (set! fdat (append fdat sdat)))) + (delete-duplicates (append require-sections (hash-table-keys indat)))) + + ;; step 5: Write out new file + (with-output-to-file fname + (lambda () + (for-each + (lambda (line) + (print line)) + (configf:expand-multi-lines fdat)))))) + +;;====================================================================== +;; refdb +;;====================================================================== + +;; reads a refdb into an assoc array of assoc arrays +;; returns (list dat msg) +(define (configf:read-refdb refdb-path) + (let ((sheets-file (conc refdb-path "/sheet-names.cfg"))) + (if (not (common:file-exists? sheets-file)) + (list #f (conc "ERROR: no refdb found at " refdb-path)) + (if (not (file-read-access? sheets-file)) + (list #f (conc "ERROR: refdb file not readable at " refdb-path)) + (let* ((sheets (with-input-from-file sheets-file + (lambda () + (let loop ((inl (read-line)) + (res '())) + (if (eof-object? inl) + (reverse res) + (loop (read-line)(cons inl res))))))) + (data '())) + (for-each + (lambda (sheet-name) + (let* ((dat-path (conc refdb-path "/" sheet-name ".dat")) + (ref-dat (configf:read-file dat-path #f #t)) + (ref-assoc (map (lambda (key) + (list key (hash-table-ref ref-dat key))) + (hash-table-keys ref-dat)))) + ;; (hash-table->alist ref-dat))) + ;; (set! data (append data (list (list sheet-name ref-assoc)))))) + (set! data (cons (list sheet-name ref-assoc) data)))) + sheets) + (list data "NO ERRORS")))))) + +;; map over all pairs in a three level hierarchial alist and apply a function to the keys/val +;; +(define (configf:map-all-hier-alist data proc #!key (initproc1 #f)(initproc2 #f)(initproc3 #f)) + (for-each + (lambda (sheetname) + (let* ((sheettmp (assoc sheetname data)) + (sheetdat (if sheettmp (cadr sheettmp) '()))) + (if initproc1 (initproc1 sheetname)) + (for-each + (lambda (sectionname) + (let* ((sectiontmp (assoc sectionname sheetdat)) + (sectiondat (if sectiontmp (cadr sectiontmp) '()))) + (if initproc2 (initproc2 sheetname sectionname)) + (for-each + (lambda (varname) + (let* ((valtmp (assoc varname sectiondat)) + (val (if valtmp (cadr valtmp) ""))) + (proc sheetname sectionname varname val))) + (map car sectiondat)))) + (map car sheetdat)))) + (map car data)) + data) + +;;====================================================================== +;; C O N F I G T O / F R O M A L I S T +;;====================================================================== + +(define (configf:config->alist cfgdat) + (hash-table->alist cfgdat)) + +(define (configf:alist->config adat) + (let ((ht (make-hash-table))) + (for-each + (lambda (section) + (hash-table-set! ht (car section)(cdr section))) + adat) + ht)) + +;; if +(define (configf:read-alist fname) + (handle-exceptions + exn + #f + (configf:alist->config + (with-input-from-file fname read)))) + +(define (configf:write-alist cdat fname) + (if (not (common:faux-lock fname)) + (debug:print 0 *default-log-port* "INFO: Could not get lock on " fname)) + (let* ((dat (configf:config->alist cdat)) + (res + (begin + (with-output-to-file fname ;; first write out the file + (lambda () + (pp dat))) + + (if (common:file-exists? fname) ;; now verify it is readable + (if (configf:read-alist fname) + #t ;; data is good. + (begin + (handle-exceptions + exn + #f + (debug:print 0 *default-log-port* "WARNING: content " dat " for cache " fname " is not readable. Deleting generated file.") + (delete-file fname)) + #f)) + #f)))) + (common:faux-unlock fname) + res)) + +;; convert hierarchial list to ini format +;; +(define (configf:config->ini data) + (map + (lambda (section) + (let ((section-name (car section)) + (section-dat (cdr section))) + (print "\n[" section-name "]") + (map (lambda (dat-pair) + (let* ((var (car dat-pair)) + (val (cadr dat-pair)) + (fname (if (> (length dat-pair) 2)(caddr dat-pair) #f))) + (if fname (print "# " var "=>" fname)) + (print var " " val))) + section-dat))) ;; (print "section-dat: " section-dat)) + (hash-table->alist data))) + +;;====================================================================== +;; dashboard-context-menus +;;====================================================================== + +(define (dboard:launch-testpanel run-id test-id) + (let* (;; (cfg-sh (conc *common:this-exe-dir* "/cfg.sh")) + ;; (cmd (conc + ;; (if (common:file-exists? cfg-sh) + ;; (conc "source "cfg-sh" && ") + ;; "") + ;; *common:this-exe-fullpath* + ;; " -test " run-id "," test-id + ;; " &")) + (cmd (conc *common:this-exe-dir*"/../dashboard " + "-test " run-id "," test-id + " &"))) + (system cmd))) + + +(define (dashboard:run-menu-items run-id test-id target runname test-name testpatt item-test-path test-info) + (list + (iup:menu-item + (conc "Rerun " testpatt) + #:action + (lambda (obj) + ;; (print " run-id: " run-id " test-id: " test-id " target: " target " runname: " runname " test-name: " test-name " testpatt: " testpatt "item-path : " item-path) + (common:run-a-command + (conc "megatest -run -target " target + " -runname " runname + " -testpatt " testpatt + " -preclean -clean-cache") + ))) + (iup:menu-item + "Rerun Complete Run" + #:action + (lambda (obj) + (common:run-a-command + (conc "megatest -set-state-status NOT_STARTED,n/a -run -target " target + " -runname " runname + " -testpatt % " + " -preclean -clean-cache")))) + (iup:menu-item + "Clean Complete Run" + #:action + (lambda (obj) + (common:run-a-command + (conc "megatest -remove-runs -target " target + " -runname " runname + " -testpatt % ")))) + (iup:menu-item + "Kill Complete Run" + #:action + (lambda (obj) + (common:run-a-command + (conc "megatest -set-state-status KILLREQ,n/a -target " target + " -runname " runname + " -testpatt % " + " -state RUNNING,REMOTEHOSTSTART,LAUNCHED,NOT_STARTED")))) + (iup:menu-item + "Delete Run Data" + #:action + (lambda (obj) + (common:run-a-command + (conc "megatest -remove-runs -target " target + " -runname " runname + " -testpatt % " + " -keep-records")))))) + +(define (dashboard:test-menu-items run-id test-id target runname test-name testpatt item-test-path test-info) + (list + (iup:menu-item + (conc "Rerun " item-test-path) + #:action + (lambda (obj) + (common:run-a-command + (conc "megatest -set-state-status NOT_STARTED,n/a -run -target " target + " -runname " runname + " -testpatt " item-test-path + " -preclean -clean-cache")))) + (iup:menu-item + (conc "Kill " item-test-path) + #:action + (lambda (obj) + ;; (rmt:test-set-state-status-by-id run-id test-id "KILLREQ" #f #f) + (common:run-a-command + (conc "megatest -set-state-status KILLREQ,n/a -target " target + " -runname " runname + " -testpatt " item-test-path + " -state RUNNING,REMOTEHOSTSTART,LAUNCHED")))) + (iup:menu-item + (conc "Delete data : " item-test-path) + #:action + (lambda (obj) + (common:run-a-command + (conc "megatest -remove-runs -target " target + " -runname " runname + " -testpatt " item-test-path + " -keep-records")))) + (iup:menu-item + (conc "Clean "item-test-path) + #:action + (lambda (obj) + (common:run-a-command + (conc "megatest -remove-runs -target " target + " -runname " runname + " -testpatt " item-test-path)))) + (iup:menu-item + "Start xterm" + #:action + (lambda (obj) + (dcommon:examine-xterm run-id test-id))) + ;;(let* ((cmd (conc (car (argv)) " -xterm " run-id "," test-id "&"))) + ;; (system cmd)))) + (iup:menu-item + "Edit testconfig" + #:action + (lambda (obj) + (let* ((all-tests (tests:get-all)) + (editor-rx (or (configf:lookup *configdat* "setup" "editor-regex") + "\\b(vim?|nano|pico)\\b")) + (editor (or (configf:lookup *configdat* "setup" "editor") + (get-environment-variable "VISUAL") + (get-environment-variable "EDITOR") "vi")) + (tconfig (conc (hash-table-ref all-tests test-name) "/testconfig")) + (cmd (conc (if (string-search editor-rx editor) + (conc "xterm -e " editor) + editor) + " " tconfig " &"))) + (system cmd)))))) + +(define (dashboard:step-logs-menu-item run-id test-id target runname test-name testpatt item-test-path test-info) + (let* ((steps (tests:get-compressed-steps run-id test-id)) ;; # + (rundir (db:test-get-rundir test-info))) + + (iup:menu-item + "Step logs" + (apply iup:menu + (map (lambda (step) + (let ((stepname (vector-ref step 0)) + (logfile (vector-ref step 5)) + (status (vector-ref step 3))) + (iup:menu-item + (conc stepname "/" (if (string=? logfile "") "no log!" logfile) " (" status ")") + #:action (lambda (obj) + (let ((fullfile (conc rundir "/" logfile))) + (if (common:file-exists? fullfile) + (dcommon:run-html-viewer fullfile) + (message-window (conc "file " fullfile " not found")))))))) + steps))))) + +(define (dashboard:toplevel-menu-items run-id test-id target runname test-name testpatt item-test-path test-info) + (list + + (iup:menu-item + "Test Control Panel" + #:action + (lambda (obj) + (dboard:launch-testpanel run-id test-id))) + + (dashboard:step-logs-menu-item run-id test-id target runname test-name testpatt item-test-path test-info) + + (iup:menu-item + (conc "Rerun " item-test-path) + #:action + (lambda (obj) + (common:run-a-command + (conc "megatest -set-state-status NOT_STARTED,n/a -run -target " target + " -runname " runname + " -testpatt " item-test-path + " -preclean -clean-cache")))) + + (iup:menu-item + "Start xterm" + #:action + (lambda (obj) + (dcommon:examine-xterm run-id test-id))) + + (iup:menu-item + (conc "Kill " item-test-path) + #:action + (lambda (obj) + ;; (rmt:test-set-state-status-by-id run-id test-id "KILLREQ" #f #f) + (common:run-a-command + (conc "megatest -set-state-status KILLREQ,n/a -target " target + " -runname " runname + " -testpatt " item-test-path + " -state RUNNING,REMOTEHOSTSTART,LAUNCHED,NOT_STARTED")))) + + (let* ((rundir (db:test-get-rundir test-info)) + (has-subrun (subrun:subrun-test-initialized? rundir))) + (if has-subrun + (iup:menu-item + "Launch subrun dashboard" + #:action + (lambda (obj) + (subrun:launch-dashboard rundir))) + (iup:vbox))) + + (iup:menu-item + (conc "View Log " item-test-path) + #:action + (lambda (obj) + (let* ((rundir (db:test-get-rundir test-info)) + (logf (db:test-get-final_logf test-info)) + (fullfile (conc rundir "/" logf))) + (if (common:file-exists? fullfile) + (dcommon:run-html-viewer fullfile) + (message-window (conc "file " fullfile " not found."))))) + ) + )) +;; example section for megatest.config: +;; +;; +;; [custom-context-menu-items] +;; # : +;; item1 custom show run-id (%run-id%):echo "%run-id%" +;; item2 custom show test-id (%test-id%):echo "%test-id%" +;; item3 custom show target (%target%):echo "%target%" +;; item4 custom show test-name (%test-name%):echo "%test-name%" +;; item5 custom show test-patt (%test-patt%):echo "%test-patt%" +;; item6 custom show test-run-dir (%test-run-dir%):echo "%test-run-dir%" +;; item7 custom show run-area-home (%run-area-home%):echo "%run-area-home%" +;; item8 custom show megatest root (%mt-root%):echo "%mt-root%" +;; item9 custom ls : ls -lrt +;; item10 custom see $MT_RUN_AREA_HOME (not yet implemented) : echo $MT_RUN_AREA_HOME + +(define (dashboard:custom-menu-items run-id test-id target run-name test-name testpatt item-test-path test-info) + (let* ((vars (configf:section-vars *configdat* "custom-context-menu-items")) + (item-path (db:test-get-item-path test-info)) + (mt-root (pathname-directory (pathname-directory *common:this-exe-dir* )))) + (filter-map + (lambda (var) + (let* ((val (configf:lookup *configdat* "custom-context-menu-items" var)) + (m (string-match "^\\s*([^:]+?)\\s*:\\s*(.*?)\\s*$" val))) + (if m + (let* ((menu-item-text-raw (list-ref m 1)) + (command-line-raw (list-ref m 2)) + (subst-alist ;; template vars + `(( "%run-id%" . ,run-id ) + ( "%test-id%" . ,test-id ) + ( "%target%" . ,target ) + ( "%test-name%" . ,test-name) + ( "%test-patt%" . ,testpatt) + ( "%test-run-dir%" . ,(db:test-get-rundir test-info)) + ( "%mt-root%" . ,mt-root) + ( "%run-name%" . ,run-name) + ( "%run-area-home%" . ,*toppath*) + ( "%item-path%" . ,item-path) + ( "%item-test-patt%" . ,item-test-path ))) + (command-line ;; replace template vars + (foldr + (lambda (x i) + (string-substitute + (car x) + (->string (cdr x)) + i + #t)) + command-line-raw + subst-alist)) + (menu-item-text ;; replace template vars + (foldr + (lambda (x i) + (string-substitute + (car x) + (->string (cdr x)) + i + #t)) + menu-item-text-raw + subst-alist))) + (iup:menu-item + (conc "*"menu-item-text) + #:action + (lambda (obj) + + (let* ((scheme-match (string-match "^#(\\(.*)" command-line))) + ;;(BB> "cmdline is >"command-line"<") + (common:with-env-vars + ;; TODO: with-env-vars + ;; TODO: with-env-vars MT_* + (runs:get-mt-env-alist run-id run-name target test-name item-path) + + (lambda () + (if scheme-match + (begin + (handle-exceptions + exn + (print "error with custom menu scheme") + (begin + ;;(BB> "gonna eval it!") + (eval (with-input-from-string (cadr scheme-match) read))))) + (common:run-a-command command-line with-vars: #t)))))))) + #f))) + vars))) + +(define (dashboard:context-menu run-id test-id target runname test-name testpatt item-test-path test-info) + (let* ((run-menu-items + (dashboard:run-menu-items run-id test-id target runname test-name testpatt item-test-path test-info)) + (test-menu-items + (dashboard:test-menu-items run-id test-id target runname test-name testpatt item-test-path test-info)) + (custom-menu-items + (dashboard:custom-menu-items run-id test-id target runname test-name testpatt item-test-path test-info)) + (toplevel-menu-items + (dashboard:toplevel-menu-items run-id test-id target runname test-name testpatt item-test-path test-info)) + ) + (apply iup:menu + `(,@toplevel-menu-items + ,(iup:menu-item + "Run" + (apply iup:menu run-menu-items)) + ,(iup:menu-item + "Test" + (apply iup:menu test-menu-items)) + ,@custom-menu-items)))) +;;====================================================================== +;; dashboard-gui-monitor +;;====================================================================== +(define (control-panel db tdb keys) + (let* ((var-params (make-hash-table)) ;; register all the widgets here for querying on run, rollup, remove? + (key-params (make-hash-table)) + (monitordat '()) ;; list of monitor records + (keyentries (iup:frame + #:title "Keys" + (apply + iup:vbox + (map (lambda (key) + (iup:hbox (iup:label (vector-ref key 0) #:size "60x15") ; #:expand "HORIZONTAL") + (iup:textbox #:expand "HORIZONTAL" + #:action (lambda (obj a val) + (hash-table-set! key-params (vector-ref key 0) val))))) + keys)))) + (othervars (iup:frame + #:title "Run Vars" + (apply + iup:vbox + (map (lambda (var) + (iup:hbox (iup:label var #:size "60x15") + (iup:textbox #:expand "HORIZONTAL" + #:action (lambda (obj a val) + (hash-table-set! var-params var val))))) + (list "runname" "testpatts" "params"))))) + (controls (iup:frame + #:title "Controls" + (iup:hbox + (iup:frame + #:title "Runs" + (iup:hbox + (iup:button "Start" + #:expand "HORIZONTAL" + #:action (lambda (obj) + (tasks:add-from-params tdb "run" keys key-params var-params) + (print "Launch Run"))) + (iup:button "Remove" + #:expand "HORIZONTAL" + #:action (lambda (obj) + (print "Remove Run") + (tasks:add-from-params tdb "remove" keys key-params var-params) + )) + (iup:button "Rollup" + #:expand "HORIZONTAL" + #:action (lambda (obj) + (print "Rollup Run") + (tasks:add-from-params tdb "rollup" keys key-params var-params))))) + (iup:frame + #:title "Misc" + (iup:hbox + (iup:button "Quit" + #:expand "HORIZONTAL" + #:action (lambda (obj) + (sqlite3:finalize! db) + (sqlite3:finalize! tdb) + (exit)))))))) + (monitors (iup:textbox + #:expand "YES" ; HORIZONTAL" + ; #:size "x40" + #:multiline "YES" + #:font "Courier New, -10" + #:value "None...............................................")) + (actions (iup:textbox + #:expand "YES" + #:multiline "YES" + #:font "Courier New, -10" + #:value "None...............................................")) + (lastmodtime 0) + (next-touch 0) ;; the last time the "last_update" field was updated + (refreshdat (lambda () + (let* ((monitordbpath (conc *toppath* "/monitor.db")) + (megatestdbpath (conc *toppath* "/megatest.db")) + (modtime (max (file-modification-time megatestdbpath) + (file-modification-time monitordbpath)))) + ;; do stuff here when the db is updated by some other process + (if (> modtime lastmodtime) + (let ((tlst (tasks:get-tasks tdb '() '())) + (mlst (tasks:get-monitors tdb))) + (set! tasksdat tlst) + (set! monitorsdat mlst) + (iup:attribute-set! monitors "VALUE" (tasks:monitors->text-table mlst)) + (iup:attribute-set! actions "VALUE" (tasks:tasks->text tlst)) + (tasks:process-queue db tdb) + (set! lastmodtime (max (file-modification-time megatestdbpath) + (file-modification-time monitordbpath))) + (tasks:reset-stuck-tasks tdb))) + ;; stuff to do every 10 seconds + (if (> (current-seconds) next-touch) + (begin + ;; (tasks:process-queue db tdb monitordbpath) + (tasks:monitors-update tdb) + (tasks:reset-stuck-tasks tdb) + (set! monitorsdat (tasks:get-monitors tdb)) + (set! next-touch (+ (current-seconds) 10)) + ))))) + (topdialog #f)) + (set! topdialog (iup:dialog + #:close_cb (lambda (a)(exit)) + #:title "Run Controls" + (iup:vbox + (iup:hbox keyentries othervars) + controls + (let ((tabtop (iup:tabs + (iup:vbox + (let* ((tb (iup:textbox #:expand "HORIZONTAL")) + (bt (iup:button "Remove tasks by id" + #:action (lambda (obj) + (let ((val (iup:attribute tb "VALUE"))) + (tasks:remove-queue-entries tdb val))))) + (lb (iup:label "(comma separated)"))) + (iup:hbox bt tb lb)) + actions) + monitors + ))) + (iup:attribute-set! tabtop "TABTITLE0" "Actions") + (iup:attribute-set! tabtop "TABTITLE1" "Monitors") + tabtop) + ))) + ; (iup:frame + ; #:title "Monitors" + ; monitors) + ; (iup:frame + ; #:title "Actions" + ; actions)))) + + (iup:show topdialog) + (iup:callback-set! *tim* "ACTION_CB" + (lambda (x) + (refreshdat) + (if *exit-started* + (set! *exit-started* 'ok)))))) + +(define (main-window setuptab fsltab collateraltab toolstab) + (iup:show + (iup:dialog #:title "FSL Power Window" #:size "290x190" ; #:expand "YES" + (let ((tabtop (iup:tabs setuptab collateraltab fsltab toolstab))) + (iup:attribute-set! tabtop "TABTITLE0" "Setup") + (iup:attribute-set! tabtop "TABTITLE1" "Collateral") + (iup:attribute-set! tabtop "TABTITLE2" "Fossil") + (iup:attribute-set! tabtop "TABTITLE3" "Tools") + tabtop)))) + +;; BUG: Remember to re-instate this!!!! +;; (on-exit (lambda () +;; (let ((tdb (tasks:open-db))) +;; ;; (print "On-exit called") +;; (tasks:remove-monitor-record tdb) +;; (sqlite3:finalize! tdb)))) + +(define (gui-monitor db) + (let ((keys (db:get-keys db)) + (tdb (tasks:open-db))) + (tasks:register-monitor db tdb) ;;; let the other monitors know we are here + (control-panel db tdb keys) + ;(tasks:remove-monitor-record db) + ;(sqlite3:finalize! db) + )) + +;;====================================================================== +;; dashboard-tests +;;====================================================================== + +;;====================================================================== +;; C O M M O N +;;====================================================================== + +(define *dashboard-comment-share-slot* #f) + +(define (dtests:get-pre-command #!key (default-override #f)) + (let* ((orig-pre-command "export CMD='") + (viewscreen-pre-command "viewscreen ") + (use-viewscreen (configf:lookup *configdat* "dashboard" "use-viewscreen")) + (default-pre-command (if use-viewscreen viewscreen-pre-command orig-pre-command)) + (cfg-ovrd (configf:lookup *configdat* "dashboard" "pre-command"))) + (or cfg-ovrd default-override default-pre-command))) ;; "xterm -geometry 180x20 -e \"")) + + +(define (dtests:get-post-command #!key (default-override #f)) + (let* ((orig-post-command (conc "';xterm -geometry 180x20 -e \"(echo; echo -n START:;date +ww%U.%w-$H:%M:%S;echo;echo $CMD;echo;$CMD)|&" + "tee -a runlog-`date +ww%U.%w-%H:%M`.log;echo Press any key to continue;bash -c 'read -n 1 -s'\" &")) + (viewscreen-post-command "") + (use-viewscreen (configf:lookup *configdat* "dashboard" "use-viewscreen")) + (default-post-command (if use-viewscreen viewscreen-post-command orig-post-command)) + (cfg-ovrd (configf:lookup *configdat* "dashboard" "post-command"))) + (or cfg-ovrd default-override default-post-command))) ;; ";echo Press any key to continue;bash -c 'read -n 1 -s'\" &"))) + +(define (common:run-a-command cmd #!key (with-vars #f) (with-orig-env #f)) + (let* ((pre-cmd (dtests:get-pre-command)) + (post-cmd (dtests:get-post-command)) + (fullcmd (if (or pre-cmd post-cmd) + (conc pre-cmd cmd post-cmd) + (conc "viewscreen " cmd)))) + (debug:print-info 02 *default-log-port* "Running command: " fullcmd) + (cond + (with-vars (common:without-vars fullcmd)) + (with-orig-env (common:with-orig-env fullcmd)) + (else (common:without-vars fullcmd "MT_.*"))))) + +(define (test-info-panel testdat store-label widgets) + (iup:frame + #:title "Test Info" ; #:expand "YES" + (iup:hbox ; #:expand "YES" + (apply iup:vbox ; #:expand "YES" + (append (map (lambda (val) + (iup:label val ; #:expand "HORIZONTAL" + )) + (list "Testname: " + "Item path: " + "Current state: " + "Current status: " + "Test comment: " + "Test id: " + "Test date: ")) + (list (iup:label "" #:expand "VERTICAL")))) + (apply iup:vbox ; #:expand "YES" + (list + (store-label "testname" + (iup:label (db:test-get-testname testdat) #:expand "HORIZONTAL") + (lambda (testdat)(db:test-get-testname testdat))) + (store-label "item-path" + (iup:label (db:test-get-item-path testdat) #:expand "HORIZONTAL") + (lambda (testdat)(db:test-get-item-path testdat))) + (store-label "teststate" + (iup:label (db:test-get-state testdat) #:expand "HORIZONTAL") + (lambda (testdat) + (db:test-get-state testdat))) + (let ((lbl (iup:label (db:test-get-status testdat) #:expand "HORIZONTAL"))) + (hash-table-set! widgets "teststatus" + (lambda (testdat) + (let ((newstatus (db:test-get-status testdat)) + (oldstatus (iup:attribute lbl "TITLE"))) + (if (not (equal? oldstatus newstatus)) + (begin + (iup:attribute-set! lbl "FGCOLOR" (car (gutils:get-color-for-state-status (db:test-get-state testdat) + (db:test-get-status testdat)))) + (iup:attribute-set! lbl "TITLE" (db:test-get-status testdat))))))) + lbl) + (store-label "testcomment" + (iup:label "TestComment " + #:expand "HORIZONTAL") + (lambda (testdat) + (let ((newcomment (db:test-get-comment testdat))) + (if *dashboard-comment-share-slot* + (if (not (equal? (iup:attribute *dashboard-comment-share-slot* "VALUE") + newcomment)) + (iup:attribute-set! *dashboard-comment-share-slot* + "VALUE" + newcomment))) + newcomment))) + (store-label "testid" + (iup:label "TestId " + #:expand "HORIZONTAL") + (lambda (testdat) + (db:test-get-id testdat))) + (store-label "testdate" + (iup:label "TestDate " + #:expand "HORIZONTAL") + (lambda (testdat) + (seconds->work-week/day-time (db:test-get-event_time testdat)))) + ))))) + +;;====================================================================== +;; Test meta panel +;;====================================================================== + +(define (test-meta-panel-get-description testmeta) + (fmt #f (with-width 40 (wrap-lines (db:testmeta-get-description testmeta))))) + +(define (test-meta-panel testmeta store-meta) + (iup:frame + #:title "Test Meta Data" ; #:expand "YES" + (iup:hbox ; #:expand "YES" + (apply iup:vbox ; #:expand "YES" + (append (map (lambda (val) + (iup:label val ; #:expand "HORIZONTAL" + )) + (list "Author: " + "Owner: " + "Reviewed: " + "Tags: " + "Description: ")) + (list (iup:label "" #:expand "VERTICAL")))) + (apply iup:vbox ; #:expand "YES" + (list + (store-meta "author" + (iup:label (db:testmeta-get-author testmeta) #:expand "HORIZONTAL") + (lambda (testmeta)(db:testmeta-get-author testmeta))) + (store-meta "owner" + (iup:label (db:testmeta-get-owner testmeta) #:expand "HORIZONTAL") + (lambda (testmeta)(db:testmeta-get-owner testmeta))) + (store-meta "reviewed" + (iup:label (db:testmeta-get-reviewed testmeta) #:expand "HORIZONTAL") + (lambda (testmeta)(db:testmeta-get-reviewed testmeta))) + (store-meta "tags" + (iup:label (db:testmeta-get-tags testmeta) #:expand "HORIZONTAL") + (lambda (testmeta)(db:testmeta-get-tags testmeta))) + (store-meta "description" + (iup:label (test-meta-panel-get-description testmeta) #:size "x50"); #:expand "HORIZONTAL") + (lambda (testmeta) + (test-meta-panel-get-description testmeta))) + ))))) + + +;;====================================================================== +;; Run info panel +;;====================================================================== +(define (run-info-panel db keydat testdat runname) + (let* ((run-id (db:test-get-run_id testdat)) + (rundat (rmt:get-run-info run-id)) + (header (db:get-header rundat)) + (event_time (db:get-value-by-header (db:get-rows rundat) + (db:get-header rundat) + "event_time"))) + (iup:frame + #:title "Megatest Run Info" ; #:expand "YES" + (iup:hbox ; #:expand "YES" + (apply iup:vbox ; #:expand "YES" + (append (map (lambda (keyval) + (iup:label (conc (car keyval) " "))) + keydat) + (list (iup:label "runname ") + (iup:label "run-id") + (iup:label "run-date")))) + (apply iup:vbox + (append (map (lambda (keyval) + (iup:label (cadr keyval) #:expand "HORIZONTAL")) + keydat) + (list (iup:label runname) + (iup:label (conc run-id)) + (iup:label (seconds->year-work-week/day-time event_time)) + (iup:label "" #:expand "VERTICAL")))))))) + +;;====================================================================== +;; Host info panel +;;====================================================================== +(define (host-info-panel testdat store-label) + (iup:frame + #:title "Remote host and Test Run Info" ; #:expand "YES" + (iup:hbox ; #:expand "YES" + (apply iup:vbox ; #:expand "YES" ;; The heading labels + (append (map (lambda (val) + (iup:label val ; #:expand "HORIZONTAL" + )) + (list "Hostname: " + "Disk free: " + "CPU Load: " + "Run duration: " + "Logfile: " + "Top process id: " + "Uname -a: ")) + (iup:label "" #:expand "VERTICAL"))) + (apply iup:vbox ; #:expand "YES" + (list + ;; NOTE: Yes, the host can change! + (store-label "HostName" + (iup:label ;; (sdb:qry 'getstr + (db:test-get-host testdat) ;; ) + #:expand "HORIZONTAL") + (lambda (testdat)(db:test-get-host testdat))) + (store-label "DiskFree" + (iup:label (conc (db:test-get-diskfree testdat)) #:expand "HORIZONTAL") + (lambda (testdat)(conc (db:test-get-diskfree testdat)))) + (store-label "CPULoad" + (iup:label (conc (db:test-get-cpuload testdat)) #:expand "HORIZONTAL") + (lambda (testdat)(conc (db:test-get-cpuload testdat)))) + (store-label "RunDuration" + (iup:label (conc (seconds->hr-min-sec (db:test-get-run_duration testdat))) #:expand "HORIZONTAL") + (lambda (testdat)(conc (seconds->hr-min-sec (db:test-get-run_duration testdat))))) + (store-label "LogFile" + (iup:label (conc (db:test-get-final_logf testdat)) #:expand "HORIZONTAL") + (lambda (testdat)(conc (db:test-get-final_logf testdat)))) + (store-label "ProcessId" + (iup:label (conc (db:test-get-process_id testdat)) #:expand "HORIZONTAL") + (lambda (testdat)(conc (db:test-get-process_id testdat)))) + (store-label "Uname" + (iup:label " " #:expand "HORIZONTAL") ;; #:wordwrap "YES") + (lambda (testdat) ;; (sdb:qry 'getstr + (db:test-get-uname testdat))) ;; ) + ))))) + +;; if there is a submegatest create a button to launch dashboard in that area +;; +(define (submegatest-panel dbstruct keydat testdat runname testconfig) + (let* ((test-run-dir (db:test-get-rundir testdat)) + (subarea (subrun:get-runarea test-run-dir)) + (area-exists (and subarea (common:file-exists? subarea silent: #t)))) + (if subarea + (iup:frame + #:title "Megatest Run Info" ; #:expand "YES" + (iup:button + "Launch Dashboard" + #:action (lambda (obj) + (subrun:launch-dashboard test-run-dir)))) + (iup:vbox)))) + +;; use a global for setting the buttons colors +;; state status teststeps +(define *state-status* (vector #f #f #f)) +(define (update-state-status-buttons testdat) + (let* ((state (db:test-get-state testdat)) + (status (db:test-get-status testdat)) + (color (car (gutils:get-color-for-state-status state status)))) + ((vector-ref *state-status* 0) state color) + ((vector-ref *state-status* 1) status color))) + +(define *dashboard-test-db* #t) +(define *dashboard-comment-share-slot* #f) + +;;====================================================================== +;; Set fields +;;====================================================================== +(define (set-fields-panel dbstruct run-id test-id testdat #!key (db #f)) + (let ((newcomment #f) + (newstatus #f) + (newstate #f) + (wtxtbox #f)) + (iup:frame + #:title "Set fields" + (iup:vbox + (iup:hbox (iup:label "Comment:") + (let ((txtbox (iup:textbox #:action (lambda (val a b) + ;; (rmt:test-set-state-status-by-id run-id test-id #f #f b) + (rmt:test-set-state-status run-id test-id #f #f b) + ;; IDEA: Just set a variable with the proc to call? + ;; (rmt:test-set-state-status-by-id run-id test-id #f #f b) + (set! newcomment b)) + #:value (db:test-get-comment testdat) + #:expand "HORIZONTAL"))) + (set! wtxtbox txtbox) + txtbox)) + + (apply iup:hbox + (iup:label "STATE:" #:size "30x") + (let* ((btns (map (lambda (state) + (let ((btn (iup:button state + #:expand "HORIZONTAL" #:size "50x" #:font "Courier New, -10" + #:action (lambda (x) + ;; (rmt:test-set-state-status-by-id run-id test-id state #f #f) + (rmt:set-state-status-and-roll-up-items run-id test-id #f state #f #f) ;; test-name passed in as test-id is respected + (db:test-set-state! testdat state))))) + btn)) + (map cadr *common:std-states*)))) ;; (list "COMPLETED" "NOT_STARTED" "RUNNING" "REMOTEHOSTSTART" "LAUNCHED" "KILLED" "KILLREQ")))) + (vector-set! *state-status* 0 + (lambda (state color) + (for-each + (lambda (btn) + (let* ((name (iup:attribute btn "TITLE")) + (newcolor (if (equal? name state) color "192 192 192"))) + (if (not (colors-similar? newcolor (iup:attribute btn "BGCOLOR"))) + (iup:attribute-set! btn "BGCOLOR" newcolor)))) + btns))) + btns)) + (apply iup:hbox + (iup:label "STATUS:" #:size "30x") + (let* ((btns (map (lambda (status) + (let ((btn (iup:button status + #:expand "HORIZONTAL" #:size "50x" #:font "Courier New, -10" + #:action (lambda (x) + (let ((t (iup:attribute x "TITLE"))) + (if (equal? t "WAIVED") + (iup:show (dashboard-tests:waiver run-id testdat + (if wtxtbox (iup:attribute wtxtbox "VALUE") #f) + (lambda (c) + (set! newcomment c) + (if wtxtbox + (begin + (iup:attribute-set! wtxtbox "VALUE" c) + (if (not *dashboard-comment-share-slot*) + (set! *dashboard-comment-share-slot* wtxtbox))) + )))) + (begin + ;; (rmt:test-set-state-status-by-id run-id test-id #f status #f) + (rmt:set-state-status-and-roll-up-items run-id test-id #f #f status #f) ;; test-name passed in as test-id is respected + (db:test-set-status! testdat status)))))))) + btn)) + (map cadr *common:std-statuses*)))) ;; (list "PASS" "WARN" "FAIL" "CHECK" "n/a" "WAIVED" "SKIP")))) + (vector-set! *state-status* 1 + (lambda (status color) + (for-each + (lambda (btn) + (let* ((name (iup:attribute btn "TITLE")) + (newcolor (if (equal? name status) color "192 192 192"))) + (if (not (colors-similar? newcolor (iup:attribute btn "BGCOLOR"))) + (iup:attribute-set! btn "BGCOLOR" newcolor)))) + btns))) + btns)))))) + +(define (dashboard-tests:run-a-step info) + #t) + +;; (define (dashboard-tests:step-run-control testdat stepname testconfig) +;; (let* ((mutex (make-mutex))) +;; (letrec ((dlg +;; (iup:dialog ;; #:close_cb (lambda (a)(exit)) ; #:expand "YES" +;; #:title stepname +;; (iup:vbox ; #:expand "YES" +;; (iup:label (conc "Step: " stepname "\nNB// These buttons only run the test step\nfor the purpose of debugging.\nNot all database updates are done.")) +;; (iup:button "Re-run" +;; #:expand "HORIZONTAL" +;; #:action (lambda (obj) +;; (debug:catch-and-dump (lambda () +;; (thread-start! +;; (make-thread +;; (lambda () +;; (print "BB> started ezsteps:run-from") +;; (debug:catch-and-dump +;; (lambda () +;; (ezsteps:run-from testdat stepname #t)) +;; "dashboard-tests:step-run-control -> ezstep:run-from (1)") +;; (print "BB> done ezsteps:run-from") +;; 'foo) +;; (conc "ezstep run single step " stepname))) +;; ) +;; "step-run-control action"))) +;; (iup:button "Re-run and continue" +;; #:expand "HORIZONTAL" +;; #:action (lambda (obj) +;; (debug:catch-and-dump +;; (lambda () +;; (thread-start! +;; (make-thread (lambda () +;; (ezsteps:run-from testdat stepname #f)) +;; (conc "ezstep run from step " stepname)))) +;; "dashboard-tests:step-run-control -> ezstep:run-from (2)"))) +;; (iup:button "Close" +;; #:action (lambda (obj) +;; (iup:destroy! dlg))) +;; ;; (iup:button "Refresh test data" +;; ;; #:expand "HORIZONTAL" +;; ;; #:action (lambda (obj) +;; ;; (print "Refresh test data " stepname)) +;; )))) +;; dlg))) + +(define (dashboard-tests:waiver run-id testdat ovrdval cmtcmd) + (let* ((wpatt (configf:lookup *configdat* "setup" "waivercommentpatt")) + (wregx (if (string? wpatt)(regexp wpatt) #f)) + (wmesg (iup:label (if wpatt (conc "Comment must match pattern " wpatt) ""))) + (comnt (iup:textbox #:action (lambda (val a b) + (if wpatt + (if (string-match wregx b) + (iup:attribute-set! wmesg "TITLE" (conc "Comment matches " wpatt)) + (iup:attribute-set! wmesg "TITLE" (conc "Comment does not match " wpatt)) + ))) + #:value (if ovrdval ovrdval (db:test-get-comment testdat)) + #:expand "HORIZONTAL")) + (dlog #f)) + (set! dlog (iup:dialog ;; #:close_cb (lambda (a)(exit)) ; #:expand "YES" + #:title "SET WAIVER" + (iup:vbox ; #:expand "YES" + (iup:label (conc "Enter justification for waiving test " + (db:test-get-testname testdat) + (if (equal? (db:test-get-item-path testdat) "") + "" + (conc "/" (db:test-get-item-path testdat))))) + wmesg ;; the informational msg on whether it matches + comnt + (iup:hbox + (iup:button "Apply and Close " + #:expand "HORIZONTAL" + #:action (lambda (obj) + (let ((comment (iup:attribute comnt "VALUE")) + (test-id (db:test-get-id testdat))) + (if (or (not wpatt) + (string-match wregx comment)) + (begin + ;; (rmt:test-set-state-status-by-id run-id test-id #f "WAIVED" comment) + (rmt:test-set-state-status run-id test-id #f "WAIVED" comment) + (db:test-set-status! testdat "WAIVED") + (cmtcmd comment) + (iup:destroy! dlog)))))) + (iup:button "Cancel" + #:expand "HORIZONTAL" + #:action (lambda (obj) + (iup:destroy! dlog))))))) + dlog)) + + +;;====================================================================== +;; +;;====================================================================== +(define (dashboard-tests:examine-test run-id test-id) ;; run-id run-key origtest) + (let* ((db-path (common:get-db-tmp-area *alldat*)) + (dbstruct #f) ;; NOT ACTUALLY USED (db:setup)) + ;; local: #t)) + (testdat (rmt:get-test-info-by-id run-id test-id)) ;; (db:get-test-info-by-id dbstruct run-id test-id)) + (db-mod-time 0) ;; (file-modification-time db-path)) + (last-update 0) ;; (current-seconds)) + (request-update #t)) + (if (not testdat) + (begin + (debug:print 2 *default-log-port* "ERROR: No test data found for test " test-id ", exiting") + (exit 1)) + (let* (;; (run-id (if testdat (db:test-get-run_id testdat) #f)) + (test-registry (tests:get-all)) + (keydat (if testdat (rmt:get-key-val-pairs run-id) #f)) + (rundat (if testdat (rmt:get-run-info run-id) #f)) + (runname (if testdat (db:get-value-by-header (db:get-rows rundat) + (db:get-header rundat) + "runname") #f)) + ;; (tdb (tdb:open-test-db-by-test-id-local dbstruct run-id test-id)) + ;; These next two are intentional bad values to ensure errors if they should not + ;; get filled in properly. + (logfile "/this/dir/better/not/exist") + (rundir (if testdat + (db:test-get-rundir testdat) + logfile)) + ;; (testdat-path (conc rundir "/testdat.db")) ;; this gets recalculated until found + (augment-teststeps (lambda (inlov) + (map + (lambda (invec) + (list->vector + `( + ,@(reverse (cdr (reverse (vector->list invec)))) + "rerun this step" "restart from here" ))) + inlov))) + (teststeps (if testdat (augment-teststeps (tests:get-compressed-steps run-id test-id)) '())) + (testfullname (if testdat (db:test-get-fullname testdat) "Gathering data ...")) + (testname (if testdat (db:test-get-testname testdat) "n/a")) + ;; (tests:get-testconfig testdat testname 'return-procs)) + (testmeta (if testdat + (let ((tm (rmt:testmeta-get-record testname))) + (if tm tm (make-db:testmeta))) + (make-db:testmeta))) + + (keystring (string-intersperse + (map (lambda (keyval) + ;; (conc ":" (car keyval) " " (cadr keyval))) + (cadr keyval)) + keydat) + "/")) + (item-path (db:test-get-item-path testdat)) + ;; this next block was added to fix a bug where variables were + ;; needed. Revisit this. + (runconfig (let ((runconfigf (conc *toppath* "/runconfigs.config"))) ;; no rush but it would be good to convert this call to use runconfig:read + (if (common:file-exists? runconfigf) + (handle-exceptions + exn + #f ;; do nothing, just keep on trucking .... + (setup-env-defaults runconfigf run-id (make-hash-table) keydat environ-patt: keystring)) + (make-hash-table)))) + (testconfig (begin + ;; (runs:set-megatest-env-vars run-id inrunname: runname testname: test-name itempath: item-path) + (runs:set-megatest-env-vars run-id inkeyvals: keydat inrunname: runname intarget: keystring testname: testname itempath: item-path) ;; these may be needed by the launching process + (handle-exceptions + exn ;; NOTE: I've no idea why this was written this way. Research, study and fix needed! + (tests:get-testconfig (db:test-get-testname testdat) (db:test-get-item-path testdat) test-registry #f allow-write-cache: #f) + (tests:get-testconfig (db:test-get-testname testdat) item-path test-registry #t allow-write-cache: #f)))) + (viewlog (lambda (x) + (if (common:file-exists? logfile) + ;(system (conc "firefox " logfile "&")) + (dcommon:run-html-viewer logfile) + (message-window (conc "File " logfile " not found"))))) + (view-a-log (lambda (lfile) + (let ((lfilename (conc rundir "/" lfile))) + ;; (print "lfilename: " lfilename) + (if (common:file-exists? lfilename) + ;(system (conc "firefox " logfile "&")) + (dcommon:run-html-viewer lfilename) + (message-window (conc "File " lfilename " not found")))))) + (xterm (lambda (x) + (if (directory-exists? rundir) + (let ((shell (if (get-environment-variable "SHELL") + (conc "-e " (get-environment-variable "SHELL")) + ""))) + (common:without-vars + (conc "cd " rundir + ";mt_xterm -T \"" (string-translate testfullname "()" " ") "\" " shell "&") + "MT_.*")) + (message-window (conc "Directory " rundir " not found"))))) + (widgets (make-hash-table)) + (refreshdat (lambda () + (let* ((curr-mod-time (file-modification-time db-path)) + ;; (max ..... (if (common:file-exists? testdat-path) + ;; (file-modification-time testdat-path) + ;; (begin + ;; (set! testdat-path (conc rundir "/testdat.db")) + ;; 0)))) + (need-update (or (and (>= curr-mod-time db-mod-time) + (> (current-milliseconds)(+ last-update 250))) ;; every half seconds if db touched + (> (current-milliseconds)(+ last-update 10000)) ;; force update even 10 seconds + request-update)) + (newtestdat (if need-update + ;; NOTE: BUG HIDER, try to eliminate this exception handler + (handle-exceptions + exn + (debug:print-info 0 *default-log-port* "test db access issue in examine test for run-id " run-id ", test-id " test-id ": " ((condition-property-accessor 'exn 'message) exn)) + (rmt:get-test-info-by-id run-id test-id ))))) + ;; (print "INFO: need-update= " need-update " curr-mod-time = " curr-mod-time) + (cond + ((and need-update newtestdat) + (set! testdat newtestdat) + (set! teststeps (augment-teststeps (tests:get-compressed-steps run-id test-id))) + (set! logfile (conc (db:test-get-rundir testdat) "/" (db:test-get-final_logf testdat))) + (set! rundir ;; (filedb:get-path *fdb* + (db:test-get-rundir testdat)) ;; ) + (set! testfullname (db:test-get-fullname testdat)) + ;; (debug:print 0 *default-log-port* "INFO: teststeps=" (intersperse teststeps "\n ")) + + ;; I don't see why this was implemented this way. Please comment it ... + ;; (if (eq? curr-mod-time db-mod-time) ;; do only once if same + ;; (set! db-mod-time (+ curr-mod-time 1)) + ;; (set! db-mod-time curr-mod-time)) + + (if (not (eq? curr-mod-time db-mod-time)) + (set! db-mod-time curr-mod-time)) + (set! last-update (current-milliseconds)) + (set! request-update #f) ;; met the need ... + ) + (need-update ;; if this was true and yet there is no data .... + (db:test-set-testname! testdat "DEAD OR DELETED TEST"))) + (if need-update + (begin + ;; update the gui elements here + (for-each + (lambda (key) + ;; (print "Updating " key) + ((hash-table-ref widgets key) testdat)) + (hash-table-keys widgets)) + (update-state-status-buttons testdat))) + ;; (iup:refresh self) + ))) + (meta-widgets (make-hash-table)) + (self #f) + (store-label (lambda (name lbl cmd) + (hash-table-set! widgets name + (lambda (testdat) + (let ((newval (cmd testdat)) + (oldval (iup:attribute lbl "TITLE"))) + (if (not (equal? newval oldval)) + (begin + ;(mutex-lock! mx1) + (iup:attribute-set! lbl "TITLE" newval) + ;(mutex-unlock! mx1) + ))))) + lbl)) + (store-meta (lambda (name lbl cmd) + (hash-table-set! meta-widgets name + (lambda (testmeta) + (let ((newval (cmd testmeta)) + (oldval (iup:attribute lbl "TITLE"))) + (if (not (equal? newval oldval)) + (begin + ;(mutex-lock! mx1) + (iup:attribute-set! lbl "TITLE" newval) + ;(mutex-unlock! mx1) + ))))) + lbl)) + (store-button store-label) + (command-proc (lambda (command-text-box) + (let* ((cmd (iup:attribute command-text-box "VALUE"))) + (common:run-a-command cmd with-orig-env: #t)))) + (command-text-box (iup:textbox + #:expand "HORIZONTAL" + #:font "Courier New, -10" + #:action (lambda (obj cnum val) + ;; (print "cnum=" cnum) + (if (eq? cnum 13) + (command-prox obj))) + )) + (command-launch-button (iup:button "Execute!" #:action (lambda (x) + (command-proc command-text-box)))) + ;; (lambda (x) + ;; (let* ((cmd (iup:attribute command-text-box "VALUE")) + ;; (fullcmd (conc (dtests:get-pre-command) + ;; cmd + ;; (dtests:get-post-command)))) + ;; (debug:print-info 02 *default-log-port* "Running command: " fullcmd) + ;; (common:without-vars fullcmd "MT_.*"))))) + (kill-jobs (lambda (x) + (iup:attribute-set! + command-text-box "VALUE" + (conc "megatest -target " keystring " -runname " runname + " -set-state-status KILLREQ,n/a -testpatt %/% " + " -state RUNNING,REMOTEHOSTSTART,LAUNCHED")))) + (run-test (lambda (x) + (iup:attribute-set! + command-text-box "VALUE" + (conc "megatest -target " keystring " -runname " runname + " -run -testpatt " (conc testname "/" (if (equal? item-path "") + "%" + item-path)) + " -clean-cache" + )))) + (remove-test (lambda (x) + (iup:attribute-set! + command-text-box "VALUE" + (conc "megatest -remove-runs -target " keystring " -runname " runname + " -testpatt " (conc testname "/" (if (equal? item-path "") + "%" + item-path)) + " -v")))) + (clean-run-execute (lambda (x) + (let ((cmd (conc ;; "megatest -remove-runs -target " keystring " -runname " runname + "megatest -set-state-status NOT_STARTED,n/a -target " keystring " -runname " runname + " -testpatt " (conc testname "/" (if (equal? item-path "") + "%" + item-path)) + ";megatest -target " keystring " -runname " runname + " -run -preclean -testpatt " (conc testname "/" (if (equal? item-path "") + "%" + item-path)) + " -clean-cache" + ))) + (thread-start! (make-thread (lambda () + (common:run-a-command cmd)) + "clean-run-execute"))))) + (remove-test (lambda (x) + (iup:attribute-set! + command-text-box "VALUE" + (conc "megatest -remove-runs -target " keystring " -runname " runname + " -testpatt " (conc testname "/" (if (equal? item-path "") + "%" + item-path)) + " -v")))) + (archive-test (lambda (x) + (iup:attribute-set! + command-text-box "VALUE" + (conc "megatest -target " keystring " -runname " runname + " -archive save-remove -testpatt " (conc testname "/" (if (equal? item-path "") + "%" + item-path)) + ))))) + (cond + ((not testdat)(begin (print "ERROR: bad test info for " test-id)(exit 1))) + ((not rundat)(begin (print "ERROR: found test info but there is a problem with the run info for " run-id)(exit 1))) + (else + ;; (test-set-status! db run-id test-name state status itemdat) + (set! self ; + (iup:dialog #:close_cb (lambda (a)(exit)) ; #:expand "YES" + #:title testfullname + (iup:vbox ; #:expand "YES" + ;; The run and test info + (iup:hbox ; #:expand "YES" + (run-info-panel dbstruct keydat testdat runname) + (test-info-panel testdat store-label widgets) + (test-meta-panel testmeta store-meta)) + (iup:hbox + (host-info-panel testdat store-label) + (submegatest-panel dbstruct keydat testdat runname testconfig)) + ;; The controls + (iup:frame #:title "Actions" + (iup:vbox + (iup:hbox + (iup:button "View Log" #:action viewlog #:size "80x") + (iup:button "Start Xterm" #:action xterm #:size "80x") + (iup:button "Run Test" #:action run-test #:size "80x") + (iup:button "Clean Test" #:action remove-test #:size "80x") + (iup:button "CleanRunExecute!" #:action clean-run-execute #:size "80x") + (iup:button "Kill All Jobs" #:action kill-jobs #:size "80x") + (iup:button "Archive Test" #:action archive-test #:size "80x") + (iup:button "Close" #:action (lambda (x)(exit)) #:size "80x")) + (apply + iup:hbox + (list command-text-box command-launch-button)))) + (set-fields-panel dbstruct run-id test-id testdat) + (let ((tabs + (iup:tabs + ;; Replace here with matrix + (let ((steps-matrix (iup:matrix + #:font "Courier New, -8" + #:expand "YES" + #:scrollbar "YES" + #:numcol 9 + #:numlin 100 + #:numcol-visible 9 + #:numlin-visible 5 + #:click-cb (lambda (obj lin col status) + ;; (if (equal? col 6) + (let* ((mtrx-rc (conc lin ":" 6)) + (fname (iup:attribute obj mtrx-rc)) + (stepname (iup:attribute obj (conc lin ":" 1))) (comment (iup:attribute obj (conc lin ":" 7)))) + (case col + + ((7) (print "Comment from step "stepname": "comment)) + ((8) (ezsteps:spawn-run-from testdat stepname #t)) + ((9) (ezsteps:spawn-run-from testdat stepname #f)) + (else (view-a-log fname)))))))) + ;; (let loop ((count 0)) + ;; (iup:attribute-set! steps-matrix "FITTOTEXT" (conc "L" count)) + ;; (if (< count 30) + ;; (loop (+ count 1)))) + (iup:attribute-set! steps-matrix "0:1" "Step Name") + (iup:attribute-set! steps-matrix "0:2" "Start") + (iup:attribute-set! steps-matrix "0:3" "End") + (iup:attribute-set! steps-matrix "WIDTH3" "50") + (iup:attribute-set! steps-matrix "0:4" "Status") + (iup:attribute-set! steps-matrix "WIDTH4" "50") + (iup:attribute-set! steps-matrix "0:5" "Duration") + (iup:attribute-set! steps-matrix "0:6" "Log File") + (iup:attribute-set! steps-matrix "0:7" "Comment") + (iup:attribute-set! steps-matrix "0:8" "rerun only") + (iup:attribute-set! steps-matrix "BGCOLOR0:9" "149 208 252") + (iup:attribute-set! steps-matrix "BGCOLOR0:8" "149 208 252") + (iup:attribute-set! steps-matrix "BGCOLOR0:7" "149 208 252") + (iup:attribute-set! steps-matrix "0:9" "rerun & continue") + (iup:attribute-set! steps-matrix "ALIGNMENT1" "ALEFT") + ;; (iup:attribute-set! steps-matrix "FIXTOTEXT" "C1") + (iup:attribute-set! steps-matrix "RESIZEMATRIX" "YES") + (let ((proc + (lambda (testdat) + (dcommon:populate-steps teststeps steps-matrix run-id test-id)))) + (hash-table-set! widgets "StepsMatrix" proc) + (proc testdat)) + steps-matrix) + ;; populate the Test Data panel + (iup:frame + #:title "Test Data" + (let ((test-data + (iup:textbox ;; #:action (lambda (obj char val) + ;; #f) + #:expand "YES" + #:multiline "YES" + #:font "Courier New, -10" + #:size "100x100"))) + (hash-table-set! widgets "Test Data" + (lambda (testdat) ;; + (let* ((currval (iup:attribute test-data "VALUE")) ;; "TITLE")) + (fmtstr "~10a~10a~10a~10a~7a~7a~6a~7a~a") ;; category,variable,value,expected,tol,units,type,comment + (newval (string-intersperse + (append + (list + (format #f fmtstr "Category" "Variable" "Value" "Expected" "Tol" "Status" "Units" "Type" "Comment") + (format #f fmtstr "========" "========" "=====" "========" "===" "======" "=====" "====" "=======")) + (map (lambda (x) + (format #f fmtstr + (db:test-data-get-category x) + (db:test-data-get-variable x) + (db:test-data-get-value x) + (db:test-data-get-expected x) + (db:test-data-get-tol x) + (db:test-data-get-status x) + (db:test-data-get-units x) + (db:test-data-get-type x) + (db:test-data-get-comment x))) + (rmt:read-test-data run-id test-id "%"))) + "\n"))) + (if (not (equal? currval newval)) + (iup:attribute-set! test-data "VALUE" newval ))))) ;; "TITLE" newval))))) + test-data)) + ;;(dashboard:run-controls) + ))) + (iup:attribute-set! tabs "TABTITLE0" "Steps") + (iup:attribute-set! tabs "TABTITLE1" "Test Data") + tabs)))) + (iup:show self) + (iup:callback-set! *tim* "ACTION_CB" + (lambda (x) + ;; Now start keeping the gui updated from the db + (refreshdat) ;; update from the db here + ;(thread-suspend! other-thread) + (if *exit-started* + (set! *exit-started* 'ok)))))))))) + +;;====================================================================== +;; dcommon +;;====================================================================== + +;; yes, this is non-ideal +(define dashboard:update-summary-tab #f) +(define dashboard:update-servers-table #f) + +;;====================================================================== +;; C O M M O N D A T A S T R U C T U R E +;;====================================================================== +;; + +;;====================================================================== +;; D O T F I L E +;;====================================================================== + +(define (dcommon:write-dotfile fname dat) + (with-output-to-file fname + (lambda () + (pp dat)))) + +;;====================================================================== +;; TARGET AND PATTERN MANIPULATIONS +;;====================================================================== + +;; Convert to and from list of lines (for a text box) +;; "," => "\n" +(define (dboard:test-patt->lines test-patt) + (string-substitute (regexp ",") "\n" test-patt)) + +(define (dboard:lines->test-patt lines) + (string-substitute (regexp "\n") "," lines #t)) + + +;;====================================================================== +;; P R O C E S S R U N S +;;====================================================================== + +;; MOVE THIS INTO *data* +(define *cachedata* (make-hash-table)) +(hash-table-set! *cachedata* "runid-to-col" (make-hash-table)) +(hash-table-set! *cachedata* "testname-to-row" (make-hash-table)) + +;; modify a cell if the data is changed, return #t or-ed with previous if modified, #f elsewise +;; +(define (dcommon:modifiy-if-different mtrx cell-name new-val prev-changed) + (let ((curr-val (iup:attribute mtrx cell-name))) + (if (not (equal? curr-val new-val)) + (begin + (iup:attribute-set! mtrx cell-name col-name) + #t) ;; need a re-draw + prev-changed))) + + +;; TO-DO +;; 1. Make "data" hash-table hierarchial store of all displayed data +;; 2. Update synchash to understand "get-runs", "get-tests" etc. +;; 3. Add extraction of filters to synchash calls +;; +;; NOTE: Used in newdashboard +;; +;; Mode is 'full or 'incremental for full refresh or incremental refresh +;; (define (dcommon:run-update keys data runname keypatts testpatt states statuses mode window-id) +;; (let* (;; count and offset => #f so not used +;; ;; the synchash calls modify the "data" hash +;; (changed #f) +;; (get-runs-sig (conc (client:get-signature) " get-runs")) +;; (get-tests-sig (conc (client:get-signature) " get-tests")) +;; (get-details-sig (conc (client:get-signature) " get-test-details")) +;; +;; ;; test-ids to get and display are indexed on window-id in curr-test-ids hash +;; (test-ids (hash-table-values (dboard:tabdat-curr-test-ids data))) +;; ;; run-id is #f in next line to send the query to server 0 +;; (run-changes (synchash:client-get 'db:get-runs get-runs-sig (length keypatts) data #f runname #f #f keypatts)) +;; (tests-detail-changes (if (not (null? test-ids)) +;; (synchash:client-get 'db:get-test-info-by-ids get-details-sig 0 data #f test-ids) +;; '())) +;; +;; ;; Now can calculate the run-ids +;; (run-hash (hash-table-ref/default data get-runs-sig #f)) +;; (run-ids (if run-hash (filter number? (hash-table-keys run-hash)) '())) +;; +;; (all-test-changes (let ((res (make-hash-table))) +;; (for-each (lambda (run-id) +;; (if (> run-id 0) +;; (hash-table-set! res run-id (synchash:client-get 'db:get-tests-for-run-mindata get-tests-sig 0 data run-id 1 testpatt states statuses #f)))) +;; run-ids) +;; res)) +;; (runs-hash (hash-table-ref/default data get-runs-sig #f)) +;; (header (hash-table-ref/default runs-hash "header" #f)) +;; (run-ids (sort (filter number? (hash-table-keys runs-hash)) +;; (lambda (a b) +;; (let* ((record-a (hash-table-ref runs-hash a)) +;; (record-b (hash-table-ref runs-hash b)) +;; (time-a (db:get-value-by-header record-a header "event_time")) +;; (time-b (db:get-value-by-header record-b header "event_time"))) +;; (> time-a time-b))) +;; )) +;; (runid-to-col (hash-table-ref *cachedata* "runid-to-col")) +;; (testname-to-row (hash-table-ref *cachedata* "testname-to-row")) +;; (colnum 1) +;; (rownum 0) +;; (cellname (conc rownum ":" colnum))) ;; rownum = 0 is the header +;; ;; (debug:print 0 *default-log-port* "test-ids " test-ids ", tests-detail-changes " tests-detail-changes) +;; +;; ;; tests related stuff +;; ;; (all-testnames (delete-duplicates (map db:test-get-testname test-changes)))) +;; +;; ;; Given a run-id and testname/item_path calculate a cell R:C +;; +;; ;; NOTE: Also build the test tree browser and look up table +;; ;; +;; ;; Each run is unique on its keys and runname or run-id, store in hash on colnum +;; (for-each (lambda (run-id) +;; (let* ((run-record (hash-table-ref/default runs-hash run-id #f)) +;; (key-vals (map (lambda (key)(db:get-value-by-header run-record header key)) +;; keys)) +;; (run-name (db:get-value-by-header run-record header "runname")) +;; (col-name (conc (string-intersperse key-vals "\n") "\n" run-name)) +;; (run-path (append key-vals (list run-name)))) +;; (hash-table-set! (dboard:tabdat-run-keys data) run-id run-path) +;; ;; modify cell - but only if changed +;; (set! changed (dcommon:modifiy-if-different (dboard:tabdat-runs-matrix data) cellname col-name changed)) +;; (hash-table-set! runid-to-col run-id (list colnum run-record)) +;; ;; Here we update the tests treebox and tree keys +;; (tree:add-node (dboard:tabdat-tests-tree data) "Runs" (append key-vals (list run-name)) +;; userdata: (conc "run-id: " run-id)) +;; (set! colnum (+ colnum 1)))) +;; run-ids) +;; +;; ;; Scan all tests to be displayed and organise all the test names, respecting what is in the hash table +;; ;; Do this analysis in the order of the run-ids, the most recent run wins +;; (for-each (lambda (run-id) +;; (let* ((run-path (hash-table-ref (dboard:tabdat-run-keys data) run-id)) +;; (test-changes (hash-table-ref all-test-changes run-id)) +;; (new-test-dat (car test-changes)) +;; (removed-tests (cadr test-changes)) +;; (tests (sort (map cadr (filter (lambda (testrec) +;; (eq? run-id (db:mintest-get-run_id (cadr testrec)))) +;; new-test-dat)) +;; (lambda (a b) +;; (let ((time-a (db:mintest-get-event_time a)) +;; (time-b (db:mintest-get-event_time b))) +;; (> time-a time-b))))) +;; ;; test-changes is a list of (( id record ) ... ) +;; ;; Get list of test names sorted by time, remove tests +;; (test-names (delete-duplicates (map (lambda (t) +;; (let ((i (db:mintest-get-item_path t)) +;; (n (db:mintest-get-testname t))) +;; (if (string=? i "") +;; (conc " " i) +;; n))) +;; tests))) +;; (colnum (car (hash-table-ref runid-to-col run-id)))) +;; ;; for each test name get the slot if it exists and fill in the cell +;; ;; or take the next slot and fill in the cell, deal with items in the +;; ;; run view panel? The run view panel can have a tree selector for +;; ;; browsing the tests/items +;; +;; ;; SWITCH THIS TO USING CHANGED TESTS ONLY +;; (for-each (lambda (test) +;; (let* ((test-id (db:mintest-get-id test)) +;; (state (db:mintest-get-state test)) +;; (status (db:mintest-get-status test)) +;; (testname (db:mintest-get-testname test)) +;; (itempath (db:mintest-get-item_path test)) +;; (fullname (conc testname "/" itempath)) +;; (dispname (if (string=? itempath "") testname (conc " " itempath))) +;; (rownum (hash-table-ref/default testname-to-row fullname #f)) +;; (test-path (append run-path (if (equal? itempath "") +;; (list testname) +;; (list testname itempath)))) +;; (tb (dboard:tabdat-tests-tree data))) +;; (print "INFONOTE: run-path: " run-path) +;; (tree:add-node (dboard:tabdat-tests-tree data) "Runs" +;; test-path +;; userdata: (conc "test-id: " test-id)) +;; (let ((node-num (tree:find-node tb (cons "Runs" test-path))) +;; (color (car (gutils:get-color-for-state-status state status)))) +;; (debug:print 0 *default-log-port* "node-num: " node-num ", color: " color) +;; +;; (set! changed (dcommon:modifiy-if-different +;; tb +;; (conc "COLOR" node-num) +;; color changed)) +;; +;; ;; (iup:attribute-set! tb (conc "COLOR" node-num) color) +;; ) +;; (hash-table-set! (dboard:tabdat-path-test-ids data) test-path test-id) +;; (if (not rownum) +;; (let ((rownums (hash-table-values testname-to-row))) +;; (set! rownum (if (null? rownums) +;; 1 +;; (+ 1 (common:max rownums)))) +;; (hash-table-set! testname-to-row fullname rownum) +;; ;; create the label +;; (set! changed (dcommon:modifiy-if-different +;; (dboard:tabdat-runs-matrix data) +;; (conc rownum ":" 0) +;; dispname +;; changed)) +;; ;; (iup:attribute-set! (dboard:tabdat-runs-matrix data) +;; ;; (conc rownum ":" 0) dispname) +;; )) +;; ;; set the cell text and color +;; ;; (debug:print 2 *default-log-port* "rownum:colnum=" rownum ":" colnum ", state=" status) +;; (set! changed (dcommon:modifiy-if-different +;; (dboard:tabdat-runs-matrix data) +;; (conc rownum ":" colnum) +;; (if (member state '("ARCHIVED" "COMPLETED")) +;; status +;; state) +;; changed)) +;; ;; (iup:attribute-set! (dboard:tabdat-runs-matrix data) +;; ;; (conc rownum ":" colnum) +;; ;; (if (member state '("ARCHIVED" "COMPLETED")) +;; ;; status +;; ;; state)) +;; (set! changed (dcommon:modifiy-if-different +;; (dboard:tabdat-runs-matrix data) +;; (conc "BGCOLOR" rownum ":" colnum) +;; (car (gutils:get-color-for-state-status state status)) +;; changed)) +;; ;; (iup:attribute-set! (dboard:tabdat-runs-matrix data) +;; ;; (conc "BGCOLOR" rownum ":" colnum) +;; ;; (car (gutils:get-color-for-state-status state status))) +;; )) +;; tests))) +;; run-ids) +;; +;; (let ((updater (hash-table-ref/default (dboard:commondat-updaters commondat) window-id #f))) +;; (if updater (updater (hash-table-ref/default data get-details-sig #f)))) +;; +;; (if changed (iup:attribute-set! (dboard:tabdat-runs-matrix data) "REDRAW" "ALL")) +;; ;; (debug:print 2 *default-log-port* "run-changes: " run-changes) +;; ;; (debug:print 2 *default-log-port* "test-changes: " test-changes) +;; (list run-changes all-test-changes))) + +(define (dcommon:runsdat-get-col-num dat target runname force-set) + (let* ((runs-index (dboard:runsdat-runs-index dat)) + (col-name (conc target "/" runname)) + (res (hash-table-ref/default runs-index col-name #f))) + (if res + res + (if force-set + (let ((max-col-num (+ 1 (common:max (cons-1 (hash-table-values runs-index)))))) + (hash-table-set! runs-index col-name max-col-num) + max-col-num))))) + +(define (dcommon:runsdat-get-row-num dat testname itempath force-set) + (let* ((tests-index (dboard:runsdat-runs-index dat)) + (row-name (conc testname "/" itempath)) + (res (hash-table-ref/default runs-index row-name #f))) + (if res + res + (if force-set + (let ((max-row-num (+ 1 (common:max (cons -1 (hash-table-values tests-index)))))) + (hash-table-set! runs-index row-name max-row-num) + max-row-num))))) + +(define (dcommon:rundat-copy-tests-to-by-name rundat) + (let ((src-ht (dboard:rundat-tests rundat)) + (trg-ht (dboard:rundat-tests-by-name rundat))) + (if (and (hash-table? src-ht)(hash-table? trg-ht)) + (begin + (hash-table-clear! trg-ht) + (for-each + (lambda (testdat) + (hash-table-set! trg-ht (test:test-get-fullname testdat) testdat)) + (hash-table-values src-ht))) + (debug:print 0 *default-log-port* "WARNING: src-ht " src-ht " trg-ht " trg-ht)))) + + +;;====================================================================== +;; TESTS DATA +;;====================================================================== + +;; Produce a list of lists ready for common:sparse-list-generate-index +;; +(define (dcommon:minimize-test-data tests-dat) + (if (null? tests-dat) + '() + (let loop ((hed (car tests-dat)) + (tal (cdr tests-dat)) + (res '())) + (let* ((test-id (db:test-get-id hed)) ;; look at the tests-dat spec for locations + (test-name (db:test-get-testname hed)) + (item-path (db:test-get-item-path hed)) + (state (db:test-get-state hed)) + (status (db:test-get-status hed)) + (event-time (db:test-get-event_time hed)) + (newitem (list test-name item-path (list test-id state status event-time)))) + (if (null? tal) + (reverse (cons newitem res)) + (loop (car tal)(cdr tal)(cons newitem res))))))) + +(define (dcommon:tests-mindat->hash tests-mindat) + (let* ((res (make-hash-table))) + (for-each + (lambda (item) + (let* ((test-name+item-path (cons (list-ref item 0) (list-ref item 1))) + (value (list-ref item 2))) + (hash-table-set! res test-name+item-path value))) + tests-mindat) + res)) + +;; return 1 if status1 is better +;; return 0 if status1 and 2 are equally good +;; return -1 if status2 is better +(define (dcommon:status-compare3 status1 status2) + (let* + ((status-goodness-ranking (cdr ;; cdr to drop first item -- "n/a" + (append (map cadr *common:std-statuses*) + '(#f)) ;; algorithm requres last item to be #f + ) ) + (mem1 (member status1 status-goodness-ranking)) + (mem2 (member status2 status-goodness-ranking)) + ) + (cond + ((and (not mem1) (not mem2)) 0) + ((not mem1) -1) + ((not mem2) 1) + ((= (length mem1) (length mem2)) 0) + ((> (length mem1) (length mem2)) 1) + (else -1)))) + +(define (dcommon:xor-tests-mindat src-tests-mindat dest-tests-mindat #!key (hide-clean #f)) + (let* ((src-hash (dcommon:tests-mindat->hash src-tests-mindat)) + (dest-hash (dcommon:tests-mindat->hash dest-tests-mindat)) + (all-keys + (reverse (sort + (delete-duplicates + (append (hash-table-keys src-hash) (hash-table-keys dest-hash))) + + (lambda (a b) + (cond + ((< 0 (string-compare3 (car a) (car b))) #t) + ((> 0 (string-compare3 (car a) (car b))) #f) + ((< 0 (string-compare3 (cdr a) (cdr b))) #t) + (else #f))) + + )))) + (let ((res + (map ;; TODO: rename xor to delta globally in dcommon and dashboard + (lambda (key) + (let* ((test-name (car key)) + (item-path (cdr key)) + + (dest-value (hash-table-ref/default dest-hash key #f)) ;; (list test-id state status) + (dest-test-id (if dest-value (list-ref dest-value 0) #f)) + (dest-state (if dest-value (list-ref dest-value 1) #f)) + (dest-status (if dest-value (list-ref dest-value 2) #f)) + + (src-value (hash-table-ref/default src-hash key #f)) ;; (list test-id state status) + (src-test-id (if src-value (list-ref src-value 0) #f)) + (src-state (if src-value (list-ref src-value 1) #f)) + (src-status (if src-value (list-ref src-value 2) #f)) + + (incomplete-statuses '("DELETED" "INCOMPLETE" "STUCK/DEAD" "N/A")) ;; if any of these statuses apply, treat test as incomplete + + (dest-complete + (and dest-value dest-state dest-status + (equal? dest-state "COMPLETED") + (not (member dest-status incomplete-statuses)))) + (src-complete + (and src-value src-state src-status + (equal? src-state "COMPLETED") + (not (member src-status incomplete-statuses)))) + (status-compare-result (dcommon:status-compare3 src-status dest-status)) + (xor-new-item + (cond + ;; complete, for this case means: state=compelte AND status not in ( deleted uncomplete stuck/dead n/a ) + ;; neither complete -> bad + + ;; src !complete, dest complete -> better + ((and (not dest-complete) (not src-complete)) + (list dest-test-id "BOTH-BAD" "BOTH-INCOMPLETE")) + ((not dest-complete) + (list src-test-id "DIFF-MISSING" "DEST-INCOMPLETE")) + ((not src-complete) + (list dest-test-id "DIFF-NEW" "SRC-INCOMPLETE")) + ((and + (equal? src-state dest-state) + (equal? src-status dest-status)) + (list dest-test-id (conc "CLEAN") (conc "CLEAN-" dest-status) )) + ;; better or worse: pass > warn > waived > skip > fail > abort + ;; pass > warn > waived > skip > fail > abort + + ((= 1 status-compare-result) ;; src is better, dest is worse + (list dest-test-id "DIRTY-WORSE" (conc src-status "->" dest-status))) + (else + (list dest-test-id "DIRTY-BETTER" (conc src-status "->" dest-status))) + ))) + (list test-name item-path xor-new-item))) + all-keys))) + + (if hide-clean + (filter + (lambda (item) + ;;(print item) + (not + (equal? + "CLEAN" + (list-ref (list-ref item 2) 1)))) + res) + res)))) + +(define (dcommon:examine-xterm run-id test-id) + (let* ((testdat (rmt:get-test-info-by-id run-id test-id))) + (if (not testdat) + (begin + (debug:print 2 "ERROR: No test data found for test " test-id ", exiting") + (exit 1)) + (let* + ((rundir (if testdat + (db:test-get-rundir testdat) + logfile)) + (testfullname (if testdat (db:test-get-fullname testdat) "Gathering data ...")) + (xterm (lambda () + (if (directory-exists? rundir) + (let* ((shell (if (get-environment-variable "SHELL") + (conc "-e " (get-environment-variable "SHELL")) + "")) + (command (conc "cd " rundir + ";mt_xterm -T \"" (string-translate testfullname "()" " ") "\" " shell "&"))) + (print "Command =" command) + (common:without-vars + command + "MT_.*")) + (message-window (conc "Directory " rundir " not found")))))) + (xterm) + (print "Adding xterm code"))))) + +;;====================================================================== +;; D A T A T A B L E S +;;====================================================================== + +;; Table of keys +(define (dcommon:keys-matrix rawconfig) + (let* ((curr-row-num 1) + (key-vals (configf:section-vars rawconfig "fields")) + (keys-matrix (iup:matrix + #:alignment1 "ALEFT" + #:expand "YES" ;; "HORIZONTAL" ;; "VERTICAL" + ;; #:scrollbar "YES" + #:numcol 1 + #:numlin (length key-vals) + #:numcol-visible 1 + #:numlin-visible (length key-vals) + #:click-cb (lambda (obj lin col status) + (print "obj: " obj " lin: " lin " col: " col " status: " status))))) + ;; (iup:attribute-set! keys-matrix "0:0" "Run Keys") + (iup:attribute-set! keys-matrix "WIDTH0" 0) + (iup:attribute-set! keys-matrix "0:1" "Key Name") + ;; (iup:attribute-set! keys-matrix "WIDTH1" "100") + ;; fill in keys + (for-each + (lambda (var) + ;; (iup:attribute-set! keys-matrix "ADDLIN" (conc curr-row-num)) + (iup:attribute-set! keys-matrix (conc curr-row-num ":0") curr-row-num) + (iup:attribute-set! keys-matrix (conc curr-row-num ":1") var) + (set! curr-row-num (+ 1 curr-row-num))) ;; (config-lookup *configdat* "fields" var))) + key-vals) + (iup:attribute-set! keys-matrix "WIDTHDEF" "40") + keys-matrix)) + +;; Section to table +(define (dcommon:section-matrix rawconfig sectionname varcolname valcolname #!key (title #f)) + (let* ((curr-row-num 1) + (key-vals (configf:section-vars rawconfig sectionname)) + (section-matrix (iup:matrix + #:alignment1 "ALEFT" + ;; #:expand "YES" ;; "HORIZONTAL" + #:numcol 1 + #:numlin (length key-vals) + #:numcol-visible 1 + #:numlin-visible (min 10 (length key-vals)) + #:scrollbar "YES"))) + (iup:attribute-set! section-matrix "0:0" varcolname) + (iup:attribute-set! section-matrix "0:1" valcolname) + (iup:attribute-set! section-matrix "WIDTH1" "200") + ;; fill in keys + (for-each + (lambda (var) + ;; (iup:attribute-set! keys-matrix "ADDLIN" (conc curr-row-num)) + (iup:attribute-set! section-matrix (conc curr-row-num ":0") var) + (iup:attribute-set! section-matrix (conc curr-row-num ":1") (configf:lookup rawconfig sectionname var)) + (set! curr-row-num (+ 1 curr-row-num))) ;; (config-lookup *configdat* "fields" var))) + key-vals) + (iup:vbox + (iup:label (if title title (conc "Settings from [" sectionname "]")) + ;; #:size "5x" + #:expand "HORIZONTAL" + ) + section-matrix))) + +;; General data +;; +(define (dcommon:general-info) + (let ((general-matrix (iup:matrix + #:alignment1 "ALEFT" + #:expand "YES" ;; "HORIZONTAL" + #:numcol 1 + #:numlin 2 + #:numcol-visible 1 + #:numlin-visible 2))) + (iup:attribute-set! general-matrix "WIDTH1" "150") + (iup:attribute-set! general-matrix "0:1" "About this Megatest area") + ;; User (this is not always obvious - it is common to run as a different user + (iup:attribute-set! general-matrix "1:0" "User") + (iup:attribute-set! general-matrix "1:1" (current-user-name)) + ;; Megatest area + ;; (iup:attribute-set! general-matrix "2:0" "Area") + ;; (iup:attribute-set! general-matrix "2:1" *toppath*) + ;; Megatest version + (iup:attribute-set! general-matrix "2:0" "Version") + (iup:attribute-set! general-matrix "2:1" (conc megatest-version "-" (substring megatest-fossil-hash 0 4))) + + general-matrix)) + +(define (dcommon:run-stats commondat tabdat #!key (tab-num #f)) + (let* ((stats-matrix (iup:matrix expand: "YES")) + (changed #f) + (stats-updater (lambda () + (if (dashboard:database-changed? commondat tabdat context-key: 'run-stats) + (let* ((run-stats (rmt:get-run-stats)) + (indices (common:sparse-list-generate-index run-stats)) ;; proc: set-cell)) + (row-indices (car indices)) + (col-indices (cadr indices)) + (max-row (if (null? row-indices) 1 (common:max (map cadr row-indices)))) + (max-col (if (null? col-indices) 1 + (common:max (map cadr col-indices)))) + (max-visible (max (- (dboard:tabdat-num-tests tabdat) 15) 3)) + (max-col-vis (if (> max-col 10) 10 max-col)) + (numrows 1) + (numcols 1)) + (iup:attribute-set! stats-matrix "CLEARVALUE" "CONTENTS") + (iup:attribute-set! stats-matrix "NUMCOL" max-col ) + (iup:attribute-set! stats-matrix "NUMLIN" (if (< max-row max-visible) max-visible max-row)) ;; min of 20 + (iup:attribute-set! stats-matrix "NUMCOL_VISIBLE" max-col-vis) + (iup:attribute-set! stats-matrix "NUMLIN_VISIBLE" (if (> max-row max-visible) max-visible max-row)) + + ;; Row labels + (for-each (lambda (ind) + (let* ((name (car ind)) + (num (cadr ind)) + (key (conc num ":0"))) + (if (not (equal? (iup:attribute stats-matrix key) name)) + (begin + (set! changed #t) + (iup:attribute-set! stats-matrix key name))))) + row-indices) + + ;; Col labels + (for-each (lambda (ind) + (let* ((name (car ind)) + (num (cadr ind)) + (key (conc "0:" num))) + (if (not (equal? (iup:attribute stats-matrix key) name)) + (begin + (set! changed #t) + (iup:attribute-set! stats-matrix key name))))) + col-indices) + + ;; Cell contents + (for-each (lambda (entry) + (let* ((row-name (car entry)) + (col-name (cadr entry)) + (value (caddr entry)) + (row-num (cadr (assoc row-name row-indices))) + (col-num (cadr (assoc col-name col-indices))) + (key (conc row-num ":" col-num))) + (if (not (equal? (iup:attribute stats-matrix key) value)) + (begin + (set! changed #t) + (iup:attribute-set! stats-matrix key value))))) + run-stats) + (if changed (iup:attribute-set! stats-matrix "REDRAW" "ALL"))) + )))) + ;; (dboard:commondat-please-update-set! commondat #t) ;; force redraw on first pass + ;; (mark-for-update tabdat) + ;; (stats-updater) + (dboard:commondat-add-updater commondat stats-updater tab-num: tab-num) + ;; (set! dashboard:update-summary-tab updater) + (iup:attribute-set! stats-matrix "WIDTHDEF" "40") + (iup:vbox + ;; (iup:label "Run statistics" #:expand "HORIZONTAL") + stats-matrix))) + +(define (dcommon:servers-table commondat tabdat) + (let* ((colnum 0) + (rownum 0) + (servers-matrix (iup:matrix #:expand "YES" + #:numcol 7 + #:numcol-visible 7 + #:numlin-visible 5 + )) + (colnames (list "Id" "MTver" "Pid" "Host" "Interface:OutPort" "RunTime" "State" "RunId")) + (updater (lambda () + (if (dashboard:monitor-changed? commondat tabdat) + (let ((servers (server:get-list *toppath* limit: 10))) + (iup:attribute-set! servers-matrix "NUMLIN" (length servers)) + ;; (set! colnum 0) + ;; (for-each (lambda (colname) + ;; ;; (print "colnum: " colnum " colname: " colname) + ;; (iup:attribute-set! servers-matrix (conc "0:" colnum) colname) + ;; (set! colnum (+ 1 colnum))) + ;; colnames) + (set! rownum 1) + (for-each + (lambda (server) + (set! colnum 0) + (match-let (((mod-time host port start-time pid) + server)) + (let* ((uptime (- (current-seconds) mod-time)) + (runtime (if start-time + (- mod-time start-time) + 0)) + (vals (list "-" ;; (vector-ref server 0) ;; Id + "-" ;; (vector-ref server 9) ;; MT-Ver + pid ;; (vector-ref server 1) ;; Pid + host ;; (vector-ref server 2) ;; Hostname + (conc host ":" port) ;; (conc (vector-ref server 3) ":" (vector-ref server 4)) ;; IP:Port + (seconds->hr-min-sec runtime) ;; (- (current-seconds) start-time)) ;; (vector-ref server 6))) + (cond + ((< uptime 5) "alive") + ((< uptime 16) "probably alive");; less than 15 seconds since mod, call it alive (vector-ref server 8) ;; State + (else "dead")) + "-" ;; (vector-ref server 12) ;; RunId + ))) + (for-each (lambda (val) + (let* ((row-col (conc rownum ":" colnum)) + (curr-val (iup:attribute servers-matrix row-col))) + (if (not (equal? (conc val) curr-val)) + (begin + (iup:attribute-set! servers-matrix row-col val) + (iup:attribute-set! servers-matrix "FITTOTEXT" (conc "C" colnum)))) + (set! colnum (+ 1 colnum)))) + vals) + (set! rownum (+ rownum 1))) + (iup:attribute-set! servers-matrix "REDRAW" "ALL"))) + (sort servers (lambda (a b)(> (car a)(car b)))))))))) + (set! colnum 0) + (for-each (lambda (colname) + (iup:attribute-set! servers-matrix (conc "0:" colnum) colname) + (iup:attribute-set! servers-matrix "FITTOTEXT" (conc "C" colnum)) + (set! colnum (+ colnum 1))) + colnames) + ;; (set! dashboard:update-servers-table updater) + (dboard:commondat-add-updater commondat updater) + ;; (iup:attribute-set! servers-matrix "WIDTHDEF" "40") + ;; (iup:hbox + ;; (iup:vbox + ;; (iup:button "Start" + ;; ;; #:size "50x" + ;; #:expand "YES" + ;; #:action (lambda (obj) + ;; (let ((cmd (conc ;; "xterm -geometry 180x20 -e \"" + ;; "megatest -server - &"))) + ;; ;; ";echo Press any key to continue;bash -c 'read -n 1 -s'\" &"))) + ;; (system cmd)))) + ;; (iup:button "Stop" + ;; #:expand "YES" + ;; ;; #:size "50x" + ;; #:action (lambda (obj) + ;; (let ((cmd (conc ;; "xterm -geometry 180x20 -e \"" + ;; "megatest -stop-server 0 &"))) + ;; ;; ";echo Press any key to continue;bash -c 'read -n 1 -s'\" &"))) + ;; (system cmd)))) + ;; (iup:button "Restart" + ;; #:expand "YES" + ;; ;; #:size "50x" + ;; #:action (lambda (obj) + ;; (let ((cmd (conc ;; "xterm -geometry 180x20 -e \"" + ;; "megatest -stop-server 0;megatest -server - &"))) + ;; ;; ";echo Press any key to continue;bash -c 'read -n 1 -s'\" &"))) + ;; (system cmd))))) + ;; servers-matrix + ;; ))) + servers-matrix + )) + +;; The main menu +(define (dcommon:main-menu) + (iup:menu ;; a menu is a special attribute to a dialog (think Gnome putting the menu at screen top) + (iup:menu-item "Files" (iup:menu ;; Note that you can use either #:action or action: for options + (iup:menu-item "Open" action: (lambda (obj) + (let* ((area-name (iup:textbox #:expand "HORIZONTAL")) + (fd (iup:file-dialog #:dialogtype "DIR")) + (top (iup:show fd #:modal? "YES"))) + (iup:attribute-set! source-tb "VALUE" + (iup:attribute fd "VALUE")) + (iup:destroy! fd)))) + ;; (lambda (obj) + ;; (iup:show (iup:file-dialog)) + ;; (print "File->open " obj))) + (iup:menu-item "Save" #:action (lambda (obj)(print "File->save " obj))) + (iup:menu-item "Exit" #:action (lambda (obj)(exit))))) + (iup:menu-item "Tools" (iup:menu + (iup:menu-item "Create new blah" #:action (lambda (obj)(print "Tools->new blah"))) + ;; (iup:menu-item "Show dialog" #:action (lambda (obj) + ;; (show message-window + ;; #:modal? #t + ;; ;; set positon using coordinates or center, start, top, left, end, bottom, right, parent-center, current + ;; ;; #:x 'mouse + ;; ;; #:y 'mouse + ;; ) + )))) + +;;====================================================================== +;; CANVAS STUFF FOR TESTS +;;====================================================================== + +(define (dcommon:draw-test cnv xoffset yoffset scalef x y w h name selected) + (let* ((llx (dcommon:x->canvas x scalef xoffset)) + (lly (dcommon:y->canvas y scalef yoffset)) + (urx (dcommon:x->canvas (+ x w) scalef xoffset)) + (ury (dcommon:y->canvas (+ y h) scalef yoffset))) + (canvas-text! cnv (+ llx 5)(+ lly 5) name) + (canvas-rectangle! cnv llx urx lly ury) + (if selected (canvas-box! cnv llx (+ llx 5) lly (+ lly 5))))) + +(define (dcommon:draw-arrow cnv test-box-center waiton-center) + (let* ((test-box-center-x (vector-ref test-box-center 0)) + (test-box-center-y (vector-ref test-box-center 1)) + (waiton-center-x (vector-ref waiton-center 0)) + (waiton-center-y (vector-ref waiton-center 1)) + (delta-y (- waiton-center-y test-box-center-y)) + (delta-x (- waiton-center-x test-box-center-x)) + (abs-delta-x (abs delta-x)) + (abs-delta-y (abs delta-y)) + (use-delta-x (> abs-delta-x abs-delta-y)) ;; use the larger one + (delta-ratio (if use-delta-x + (if (> abs-delta-x 0) + (/ abs-delta-y abs-delta-x) + 1) + (if (> abs-delta-y 0) + (/ abs-delta-x abs-delta-y) + 1))) + (x-adj (if use-delta-x + 8 + (* delta-ratio 8))) + (y-adj (if use-delta-x + (* x-adj delta-ratio) + 8)) + (new-waiton-x (inexact->exact + (round (if (> delta-x 0) ;; have positive x + (- waiton-center-x x-adj) + (+ waiton-center-x x-adj))))) + (new-waiton-y (inexact->exact + (round (if (> delta-y 0) + (- waiton-center-y y-adj) + (+ waiton-center-y y-adj)))))) + ;; (canvas-line-width-set! cnv 5) + (canvas-line! cnv + test-box-center-x + test-box-center-y + new-waiton-x + new-waiton-y + ) + (canvas-mark! cnv new-waiton-x new-waiton-y))) + +(define (dcommon:get-box-center box) + (let* ((llx (list-ref box 0)) + (lly (list-ref box 1)) + (boxw (list-ref box 4)) + (boxh (list-ref box 5))) + (vector (+ llx (/ boxw 2)) + (+ lly (/ boxh 2))))) + +(define-inline (num->int num) + (inexact->exact (round num))) + +(define (dcommon:draw-edges cnv xoffset yoffset scalef edges) + (for-each + (lambda (e) + (let loop ((x1 (car e)) + (y1 (cadr e)) + (x2 #f) + (y2 #f) + (tal (cddr e))) + (if (and x1 y1 x2 y2) + (canvas-line! + cnv + (num->int (dcommon:x->canvas x1 scalef xoffset)) + (num->int (dcommon:y->canvas y1 scalef yoffset)) + (num->int (dcommon:x->canvas x2 scalef xoffset)) + (num->int (dcommon:y->canvas y2 scalef yoffset)))) ;; (num->int x1)(num->int y1)(num->int x2)(num->int y2))) + (if (< (length tal) 2) + (canvas-mark! cnv + (num->int (dcommon:x->canvas x1 scalef xoffset)) + (num->int (dcommon:y->canvas y1 scalef yoffset))) ;; (num->int x1)(num->int y1)) + (loop (car tal)(cadr tal) x1 y1 (cddr tal))))) + ;; (map (lambda (e)(map (lambda (x)(num->int (* x scalef))) e)) edges))) + edges)) + + +(define (dcommon:draw-arrows cnv testname tests-hash test-records) + (let* ((test-box-info (hash-table-ref tests-hash testname)) + (test-box-center (dcommon:get-box-center test-box-info)) + (test-record (hash-table-ref test-records testname)) + (waitons (vector-ref test-record 2))) + (for-each + (lambda (waiton) + (let* ((waiton-box-info (hash-table-ref/default tests-hash waiton #f)) + (waiton-center (dcommon:get-box-center (or waiton-box-info test-box-info)))) + (dcommon:draw-arrow cnv test-box-center waiton-center))) + waitons) + ;; (debug:print 0 *default-log-port* "test-box-info=" test-box-info) + ;; (debug:print 0 *default-log-port* "test-record=" test-record) + )) + +(define (dcommon:estimate-scale sizex sizey originx originy nodes) + ;; (print "sizex: " sizex " sizey: " sizey " originx: " originx " originy: " originy " nodes: " nodes) + (let* ((maxx 1) + (maxy 1)) + (for-each + (lambda (node) + (if (equal? (car node) "node") + (let ((x (string->number (list-ref node 2))) + (y (string->number (list-ref node 3)))) + (if (and x (> x maxx))(set! maxx x)) + (if (and y (> y maxy))(set! maxy y))))) + nodes) + (let ((scalex (/ sizex maxx)) + (scaley (/ sizey maxy))) + ;; (print "maxx: " maxx " maxy: " maxy " scalex: " scalex " scaley: " scaley) + (min scalex scaley)))) + +(define (dcommon:get-xoffset tests-draw-state sizex-in xadj-in) + (let ((xadj (or xadj-in (hash-table-ref/default tests-draw-state 'xadj 0))) + (sizex (or sizex-in (hash-table-ref/default tests-draw-state 'sizex 500)))) + (hash-table-set! tests-draw-state 'xadj xadj) ;; for use in de-scaling when handling mouse clicks + (hash-table-set! tests-draw-state 'sizex sizex) + (* (/ sizex 2) (- 0.5 xadj)))) + +(define (dcommon:get-yoffset tests-draw-state sizey-in yadj-in) + (let ((yadj (or yadj-in (hash-table-ref/default tests-draw-state 'yadj 0))) + (sizey (or sizey-in (hash-table-ref/default tests-draw-state 'sizey 500)))) + (hash-table-set! tests-draw-state 'yadj yadj) ;; for use in de-scaling when handling mouse clicks + (hash-table-set! tests-draw-state 'sizey sizey) + (* (/ sizey 2) (- yadj 0.5)))) + +(define (dcommon:x->canvas x scalef xoffset) + (+ xoffset (* x scalef))) + +(define (dcommon:y->canvas y scalef yoffset) + (+ yoffset (* y scalef))) + +;; sizex, sizey - canvas size +;; originx, originy - canvas origin +;; +(define (dcommon:initial-draw-tests cnv xadj yadj sizex sizey sizexmm sizeymm originx originy tests-draw-state sorted-testnames test-records) + (let* ((dot-data ;; (map cdr (filter + ;; (lambda (x)(equal? "node" (car x))) + (map string-split (tests:lazy-dot test-records "plain" sizex sizey))) ;; (tests:easy-dot test-records "plain"))) + (xoffset (dcommon:get-xoffset tests-draw-state sizex xadj)) + (yoffset (dcommon:get-yoffset tests-draw-state sizey yadj)) + (no-dot (configf:lookup *configdat* "setup" "nodot")) + (boxh 15) + (boxw 10) + (margin 5) + (tests-info (hash-table-ref tests-draw-state 'tests-info)) + (selected-tests (hash-table-ref tests-draw-state 'selected-tests )) + (scalef (if no-dot + 1 + (dcommon:estimate-scale sizex sizey originx originy dot-data))) + (sorted-testnames (if no-dot + (sort sorted-testnames string>=?) + sorted-testnames)) + (curr-x 0) ;; NB// NOT screen units + (curr-y (/ (- sizey boxh margin) scalef)) ;; used when no-dot + (scaled-sizex (/ sizex scalef))) + + (hash-table-set! tests-draw-state 'scalef scalef) + + (let ((longest-str (if (null? sorted-testnames) " " (car (sort sorted-testnames (lambda (a b)(>= (string-length a)(string-length b)))))))) + (let-values (((x-max y-max) (canvas-text-size cnv longest-str))) + (if (> x-max boxw)(set! boxw (+ 10 x-max))))) + ;; (print "sizex: " sizex " sizey: " sizey " font: " (canvas-font cnv) " originx: " originx " originy: " originy " xtorig: " xtorig " ytorig: " ytorig " xadj: " xadj " yadj: " yadj) + (if (not (null? sorted-testnames)) + (let loop ((hed (car (reverse sorted-testnames))) + (tal (cdr (reverse sorted-testnames)))) + (let* ((nodedat (if no-dot + #f + (let ((tmpres (filter (lambda (x) + (if (and (not (null? x)) + (equal? (car x) "node")) + (equal? hed (cadr x)) + #f)) + dot-data))) + (if (null? tmpres) + ;; llx lly boxw boxh + (list "0" "1" "1" (conc (length tal)) "2" "0.5") ;; return some placeholder junk if no dat found + (car tmpres))))) + (edgedat (if no-dot + '() + (let ((edges (filter (lambda (x) ;; filter for edge + (if (and (not (null? x)) + (equal? (car x) "edge")) + (equal? hed (cadr x)) + #f)) + dot-data))) + (map (lambda (inlst) + (dcommon:process-polyline + (map (lambda (instr) + (string->number instr)) ;; convert to number and scale + (let ((il (cddddr inlst))) + (take il (- (length il) 2)))) + (lambda (x y) + (list (+ x 0) ;; xtorig) + (+ y 0))) ;; ytorig))) + #f #f)) ;; process polyline + edges)))) + (cx (if no-dot ;; this is the centerpoint! + curr-x + (string->number (list-ref nodedat 2)))) + (cy (if no-dot + curr-y + (string->number (list-ref nodedat 3)))) + (boxw (if no-dot + boxw + (string->number (list-ref nodedat 4)))) + (boxh (if no-dot + boxh + (string->number (list-ref nodedat 5)))) + (boxw/2 (/ boxw 2)) + (boxh/2 (/ boxh 2)) + (urx (+ cx boxw/2)) + (ury (+ cy boxh/2)) + (llx (- cx boxw/2)) + (lly (- cy boxh/2))) + + ;; if we are in no-dot mode then increment curr-x and curr-y as needed + (if no-dot + (begin + (cond + ((< curr-x (- scaled-sizex boxw boxw margin)) + (set! curr-x (+ curr-x boxw margin))) + ((> curr-x (- scaled-sizex boxw boxw margin)) + (set! curr-x 0) + (set! curr-y (- curr-y (+ boxh margin))))))) + ; (print "hed " hed " llx " llx " lly " lly " urx " urx " ury " ury) + (dcommon:draw-test cnv xoffset yoffset scalef llx lly boxw boxh hed (hash-table-ref/default selected-tests hed #f)) + ;; (dcommon:draw-arrows cnv testname tests-info test-records)) + (dcommon:draw-edges cnv xoffset yoffset scalef edgedat) + + ;; data used by mouse click calc. keep the wacky order for now. + (hash-table-set! tests-info hed (list llx lly urx ury boxw boxh edgedat)) + (if (not (null? tal)) + (loop (car tal) + (cdr tal)))))) + )) + +;; per-point-proc required, remainder optional +;; +(define (dcommon:process-polyline line per-point-proc per-segment-proc last-segment-proc) + (if (< (length line) 2) + '() + (let loop ((x1 (car line)) + (y1 (cadr line)) + (x2 #f) + (y2 #f) + (tal (cddr line)) + (res '())) + (if (and x1 y1 x2 y2 per-segment-proc) + (per-segment-proc x1 y1 x2 y2)) + (if (< (length tal) 2) + (begin + (if last-segment-proc (last-segment-proc x1 y1 x2 y2)) + (append res (per-point-proc x1 y1))) + (loop (car tal)(cadr tal) x1 y1 (cddr tal) (append res (per-point-proc x1 y1))))))) + +(define (dcommon:redraw-tests cnv xadj yadj sizex sizey sizexmm sizeymm originx originy tests-draw-state sorted-testnames test-records) + (let* ((scalef (hash-table-ref tests-draw-state 'scalef)) + (xoffset (dcommon:get-xoffset tests-draw-state sizex xadj)) + (yoffset (dcommon:get-yoffset tests-draw-state sizey yadj)) + (tests-info (hash-table-ref tests-draw-state 'tests-info)) + (selected-tests (hash-table-ref tests-draw-state 'selected-tests ))) + (if (not (null? sorted-testnames)) + (let loop ((hed (car (reverse sorted-testnames))) + (tal (cdr (reverse sorted-testnames)))) + (let* ((tvals (hash-table-ref tests-info hed)) + (llx (list-ref tvals 0)) + (lly (list-ref tvals 1)) + (boxw (list-ref tvals 4)) + (boxh (list-ref tvals 5)) + (edges (map (lambda (pline) + (dcommon:process-polyline pline + (lambda (x1 y1) + (list x1 y1)) + #f #f)) + (list-ref tvals 6))) + (urx (+ llx boxw)) + (ury (+ lly boxh))) + (dcommon:draw-test cnv xoffset yoffset scalef llx lly boxw boxh hed (hash-table-ref/default selected-tests hed #f)) + (dcommon:draw-edges cnv xoffset yoffset scalef edges) + (if (not (null? tal)) + ;; leave a column of space to the right to list items + (loop (car tal) + (cdr tal)))))))) + +;;====================================================================== +;; RUN CONTROLS +;;====================================================================== + +(define (dcommon:command-execution-control data) + ;; The command line display/exectution control + (iup:frame + #:title "Command to be exectuted" + (iup:hbox + (iup:label "Run on" #:size "40x") + (iup:radio + (iup:hbox + (iup:toggle "Local" #:size "40x") + (iup:toggle "Server" #:size "40x"))) + (let ((tb (iup:textbox + #:value "megatest " + #:expand "HORIZONTAL" + #:readonly "YES" + #:font "Courier New, -12" + ))) + (dboard:tabdat-command-tb-set! data tb) + tb) + (iup:button "Execute" #:size "50x" + #:action (lambda (obj) + ;; (let ((cmd (conc ;; "xterm -geometry 180x20 -e \"" + (common:run-a-command (iup:attribute (dboard:tabdat-command-tb data) "VALUE"))))))) + ;; ";echo Press any key to continue;bash -c 'read -n 1 -s'\" &"))) + ;; (system cmd))))))) + +(define (dcommon:command-action-selector commondat tabdat #!key (tab-num #f)) + (iup:frame + #:title "Set the action to take" + (iup:hbox + ;; (iup:label "Command to run" #:expand "HORIZONTAL" #:size "70x" #:alignment "LEFT:ACENTER") + (let* ((cmds-list '("run" "remove-runs")) ;; "set-state-status" "lock-runs" "unlock-runs")) + (lb (iup:listbox #:expand "HORIZONTAL" + #:dropdown "YES" + #:action (lambda (obj val index lbstate) + ;; (print obj " " val " " index " " lbstate) + (dboard:tabdat-command-set! tabdat val) + (dashboard:update-run-command tabdat)))) + (default-cmd (car cmds-list))) + (iuplistbox-fill-list lb cmds-list selected-item: default-cmd) + (dboard:tabdat-command-set! tabdat default-cmd) + lb)))) + +(define (dcommon:command-runname-selector commondat tabdat #!key (tab-num #f)) ;; alldat data) + (iup:frame + #:title "Runname" + (let* ((default-run-name (seconds->work-week/day (current-seconds))) + (tb (iup:textbox #:expand "HORIZONTAL" + #:action (lambda (obj val txt) + (debug:catch-and-dump + (lambda () + ;; (print "obj: " obj " val: " val " unk: " unk) + (dboard:tabdat-run-name-set! tabdat txt) ;; (iup:attribute obj "VALUE")) + (dashboard:update-run-command tabdat)) + "command-runname-selector tb action")) + #:value (or default-run-name (dboard:tabdat-run-name tabdat)))) + (lb (iup:listbox #:expand "HORIZONTAL" + #:dropdown "YES" + #:action (lambda (obj val index lbstate) + (debug:catch-and-dump + (lambda () + (if (not (equal? val "")) + (begin + (iup:attribute-set! tb "VALUE" val) + (dboard:tabdat-run-name-set! tabdat val) + (dashboard:update-run-command tabdat)))) + "command-runname-selector lb action")))) + (refresh-runs-list (lambda () + (if (dashboard:database-changed? commondat tabdat context-key: 'runname-selector-runs-list) + (let* (;; (target (dboard:tabdat-target-string tabdat)) + (runs-for-targ (rmt:get-runs-by-patt (dboard:tabdat-keys tabdat) "%" #f #f #f #f 0)) + (runs-header (vector-ref runs-for-targ 0)) + (runs-dat (vector-ref runs-for-targ 1)) + (run-names (cons default-run-name + (map (lambda (x) + (db:get-value-by-header x runs-header "runname")) + runs-dat)))) + ;; (print "DEBUGINFO: run-names=" run-names) + ;; (iup:attribute-set! lb "REMOVEITEM" "ALL") + (iuplistbox-fill-list lb run-names selected-item: default-run-name)))))) + ;; (dboard:tabdat-updater-for-runs-set! tabdat refresh-runs-list) + (dboard:commondat-add-updater commondat refresh-runs-list tab-num: tab-num) + ;; (refresh-runs-list) + (dboard:tabdat-run-name-set! tabdat default-run-name) + (iup:hbox + tb + lb)))) + +(define (dcommon:command-testname-selector commondat tabdat update-keyvals) ;; key-listboxes) + (iup:vbox + ;; Text box for test patterns + (iup:frame + #:title "Test patterns (one per line)" + (let ((tb (iup:textbox #:action (lambda (val a b) + (debug:catch-and-dump + (lambda () + (dboard:tabdat-test-patts-set!-use + tabdat + (dboard:lines->test-patt b)) + (dashboard:update-run-command tabdat)) + "command-testname-selector tb action")) + #:value (dboard:test-patt->lines + (dboard:tabdat-test-patts-use tabdat)) + #:expand "YES" + #:size "x30" ;; was 10x30 + #:multiline "YES"))) + (set! test-patterns-textbox tb) + (dboard:tabdat-test-patterns-textbox-set! tabdat tb) + tb)) +;; (iup:frame +;; #:title "Target" +;; ;; Target selectors +;; (apply iup:hbox +;; (let* ((dat (dashboard:update-target-selector tabdat action-proc: update-keyvals)) +;; (key-lb (car dat)) +;; (combos (cadr dat))) +;; combos))) + ;; (iup:hbox + ;; ;; Text box for STATES + ;; (iup:frame + ;; #:title "States" + ;; (dashboard:text-list-toggle-box + ;; ;; Move these definitions to common and find the other useages and replace! + ;; (map cadr *common:std-states*) ;; '("COMPLETED" "RUNNING" "STUCK" "INCOMPLETE" "LAUNCHED" "REMOTEHOSTSTART" "KILLED") + ;; (lambda (all) + ;; (dboard:tabdat-states-set! tabdat all) + ;; (dashboard:update-run-command tabdat)))) + ;; ;; Text box for STATES + ;; (iup:frame + ;; #:title "Statuses" + ;; (dashboard:text-list-toggle-box + ;; (map cadr *common:std-statuses*) ;; '("PASS" "FAIL" "n/a" "CHECK" "WAIVED" "SKIP" "DELETED" "STUCK/DEAD") + ;; (lambda (all) + ;; (dboard:tabdat-statuses-set! tabdat all) + ;; (dashboard:update-run-command tabdat))))) + )) + +(define (dcommon:command-tests-tasks-canvas tabdat test-records sorted-testnames tests-draw-state) + (iup:frame + #:title "Tests and Tasks" + (let* ((updater #f) + (last-xadj 0) + (last-yadj 0) + (the-cnv #f) + (canvas-obj + (iup:canvas #:action (make-canvas-action + (lambda (cnv xadj yadj) + (if (not updater) + (set! updater (lambda (xadj yadj) + ;; (print "cnv: " cnv " xadj: " xadj " yadj: " yadj) + (dashboard:draw-tests cnv xadj yadj tests-draw-state sorted-testnames test-records) + (set! last-xadj xadj) + (set! last-yadj yadj)))) + (updater xadj yadj) + (set! the-cnv cnv) + )) + ;; Following doesn't work + #:wheel-cb (lambda (obj step x y dir) ;; dir is 4 for up and 5 for down. I think. + (let ((scalef (hash-table-ref tests-draw-state 'scalef))) + (hash-table-set! tests-draw-state 'scalef (+ scalef + (if (> step 0) + (* scalef 0.01) + (* scalef -0.01)))) + (if the-cnv + (dashboard:draw-tests the-cnv last-xadj last-yadj tests-draw-state sorted-testnames test-records)) + )) + ;; #:size "250x250" + #:expand "YES" + #:scrollbar "YES" + #:posx "0.5" + #:posy "0.5" + #:button-cb (lambda (obj btn pressed x y status) + ;; (print "obj: " obj ", pressed " pressed ", status " status) + ; (print "canvas-origin: " (canvas-origin the-cnv)) + ;; (let-values (((xx yy)(canvas-origin the-cnv))) + ;; (canvas-transform-set! the-cnv #f) + ;; (print "canvas-origin: " xx " " yy " click at " x " " y)) + (let* ((tests-info (hash-table-ref tests-draw-state 'tests-info)) + (selected-tests (hash-table-ref tests-draw-state 'selected-tests)) + (scalef (hash-table-ref tests-draw-state 'scalef)) + (sizey (hash-table-ref tests-draw-state 'sizey)) + (xoffset (dcommon:get-xoffset tests-draw-state #f #f)) + (yoffset (dcommon:get-yoffset tests-draw-state #f #f)) + (new-y (- sizey y)) + (test-patterns-textbox (dboard:tabdat-test-patterns-textbox tabdat))) + ;; (print "xoffset=" xoffset ", yoffset=" yoffset) + ;; (print "\tx\ty\tllx\tlly\turx\tury") + (for-each (lambda (test-name) + (let* ((rec-coords (hash-table-ref tests-info test-name)) + (llx (dcommon:x->canvas (list-ref rec-coords 0) scalef xoffset)) + (lly (dcommon:y->canvas (list-ref rec-coords 1) scalef yoffset)) + (urx (dcommon:x->canvas (list-ref rec-coords 2) scalef xoffset)) + (ury (dcommon:y->canvas (list-ref rec-coords 3) scalef yoffset))) + ;; (if (eq? pressed 1) + ;; (print "\tx=" x "\ty=" y "\tnew-y=" new-y "\tllx=" llx "\tlly=" lly "\turx=" urx "\tury=" ury "\t" test-name " ")) + (if (and (eq? pressed 1) + (>= x llx) + (>= new-y lly) + (<= x urx) + (<= new-y ury)) + (let* ((box-patterns (string-split (iup:attribute test-patterns-textbox "VALUE"))) + (test-patts (string-split (or (dboard:tabdat-test-patts tabdat) + "") + ",")) + (patterns (delete-duplicates (append box-patterns test-patts)))) + (let* ((selected (not (member test-name patterns))) + (newpatt-list (if selected + (cons test-name patterns) + (delete test-name patterns))) + (newpatt (string-intersperse newpatt-list "\n"))) + (iup:attribute-set! test-patterns-textbox "VALUE" newpatt) + (iup:attribute-set! obj "REDRAW" "ALL") + (hash-table-set! selected-tests test-name selected) + (dboard:tabdat-test-patts-set!-use tabdat (dboard:lines->test-patt newpatt)) + (dashboard:update-run-command tabdat) + (if updater (updater last-xadj last-yadj))))))) + (hash-table-keys tests-info))))))) + canvas-obj))) + +;;====================================================================== +;; S T E P S +;;====================================================================== + +(define (dcommon:populate-steps teststeps steps-matrix run-id test-id) + (let* ((max-row 0) + (max-col 9) + (white "255 255 255") + + (testinfo (rmt:get-testinfo-state-status run-id test-id)) + (state (db:test-get-state testinfo)) + (status (db:test-get-status testinfo)) + (test-status-color (car (gutils:get-color-for-state-status state status))) + (running-color (car (gutils:get-color-for-state-status "RUNNING" "STARTED"))) + (failcolor (car (gutils:get-color-for-state-status "COMPLETED" "FAIL")))) + (if (null? teststeps) + (begin + (iup:attribute-set! steps-matrix "CLEARATTRIB" "CONTENTS") + (iup:attribute-set! steps-matrix "CLEARVALUE" "CONTENTS")) + (let loop ((hed (car teststeps)) + (tal (cdr teststeps)) + (rownum 1) + (colnum 1)) + (if (> rownum max-row)(set! max-row rownum)) + (let* ((status (vector-ref hed 3)) + (val (vector-ref hed (- colnum 1))) + (bgcolor (cond + ((member (conc status) '("" "-" "#")) + running-color) + + ((member (conc status) '("0" 0)) + white) + (else test-status-color))) + ; (else failcolor))) + (mtrx-rc (conc rownum ":" colnum))) + ;;(print "BB> status=>"status"< bgcolor="bgcolor) + (iup:attribute-set! steps-matrix mtrx-rc (if val (conc val) "")) + (if (< colnum 5) + (iup:attribute-set! steps-matrix (conc "BGCOLOR" mtrx-rc) bgcolor)) + (if (< colnum max-col) + (loop hed tal rownum (+ colnum 1)) + (if (not (null? tal)) + (loop (car tal) (cdr tal) (+ rownum 1) 1)))))) + (if (> max-row 0) + (begin + ;; we are going to speculatively clear rows until we find a row that is already cleared + (let loop ((rownum (+ max-row 1)) + (colnum 0) + (deleted #f)) + ;; (debug:print-info 0 *default-log-port* "cleaning " rownum ":" colnum) + (let* ((next-row (if (eq? colnum max-col) (+ rownum 1) rownum)) + (next-col (if (eq? colnum max-col) 1 (+ colnum 1))) + (mtrx-rc (conc rownum ":" colnum)) + (curr-val (iup:attribute steps-matrix mtrx-rc))) + ;; (debug:print-info 0 *default-log-port* "cleaning " rownum ":" colnum " currval= " curr-val) + (if (and (string? curr-val) + (not (equal? curr-val ""))) + (begin + (iup:attribute-set! steps-matrix mtrx-rc "") + (loop next-row next-col #t)) + (if (eq? colnum max-col) ;; not done, didn't get a full blank row + (if deleted (loop next-row next-col #f)) ;; exit on this not met + (loop next-row next-col deleted))))) + (iup:attribute-set! steps-matrix "REDRAW" "ALL"))))) + +;;====================================================================== +;; U T I L I T I E S +;;====================================================================== + +(define (dcommon:run-html-viewer lfilename) + (let ((htmlviewercmd (configf:lookup *configdat* "setup" "htmlviewercmd"))) + (if htmlviewercmd + (system (conc "(" htmlviewercmd " " lfilename " ) &")) + (iup:send-url lfilename)))) + +;;====================================================================== +;; diff-report +;;====================================================================== + +(define css "") + +(define (diff:tests-mindat->hash tests-mindat) + (let* ((res (make-hash-table))) + (for-each + (lambda (item) + (let* ((test-name+item-path (cons (list-ref item 0) (list-ref item 1))) + (value (list-ref item 2))) + (hash-table-set! res test-name+item-path value))) + tests-mindat) + res)) + +;; return 1 if status1 is better +;; return 0 if status1 and 2 are equally good +;; return -1 if status2 is better +(define (diff:status-compare3 status1 status2) + (let* + ((status-goodness-ranking (list "PASS" "WARN" "WAIVED" "SKIP" "FAIL" "ABORT" #f)) + (mem1 (member status1 status-goodness-ranking)) + (mem2 (member status2 status-goodness-ranking)) + ) + (cond + ((and (not mem1) (not mem2)) 0) + ((not mem1) -1) + ((not mem2) 1) + ((= (length mem1) (length mem2)) 0) + ((> (length mem1) (length mem2)) 1) + (else -1)))) + + +(define (diff:xor-tests-mindat src-tests-mindat dest-tests-mindat #!key (hide-clean #f) (consistent-fail-not-clean #f)) + (let* ((src-hash (diff:tests-mindat->hash src-tests-mindat)) + (dest-hash (diff:tests-mindat->hash dest-tests-mindat)) + (all-keys + (reverse (sort + (delete-duplicates + (append (hash-table-keys src-hash) (hash-table-keys dest-hash))) + + (lambda (a b) + (cond + ((< 0 (string-compare3 (car a) (car b))) #t) + ((> 0 (string-compare3 (car a) (car b))) #f) + ((< 0 (string-compare3 (cdr a) (cdr b))) #t) + (else #f))) + + )))) + (let ((res + (map ;; TODO: rename xor to delta globally in dcommon and dashboard + (lambda (key) + (let* ((test-name (car key)) + (item-path (cdr key)) + + (dest-value (hash-table-ref/default dest-hash key (list 0 "NULL" "NULL"))) ;; (list test-id state status) + (dest-test-id (list-ref dest-value 0)) + (dest-state (list-ref dest-value 1)) + (dest-status (list-ref dest-value 2)) + + (src-value (hash-table-ref/default src-hash key (list 0 "NULL" "NULL"))) ;; (list test-id state status) + (src-test-id (list-ref src-value 0)) + (src-state (list-ref src-value 1)) + (src-status (list-ref src-value 2)) + + (incomplete-statuses '("DELETED" "INCOMPLETE" "STUCK/DEAD" "N/A")) ;; if any of these statuses apply, treat test as incomplete + + (dest-complete + (and dest-value dest-state dest-status + (equal? dest-state "COMPLETED") + (not (member dest-status incomplete-statuses)))) + (src-complete + (and src-value src-state src-status + (equal? src-state "COMPLETED") + (not (member src-status incomplete-statuses)))) + (status-compare-result (diff:status-compare3 src-status dest-status)) + (xor-new-item + (cond + ;; complete, for this case means: state=compelte AND status not in ( deleted uncomplete stuck/dead n/a ) + ;; neither complete -> bad + + ;; src !complete, dest complete -> better + ((and (not dest-complete) (not src-complete)) + (list dest-test-id "BOTH-BAD" "BOTH-INCOMPLETE") src-value dest-value) + ((not dest-complete) + (list src-test-id "NOT-IN-DEST" "DEST-INCOMPLETE") src-value dest-value) + ((not src-complete) + (list dest-test-id "NOT-IN-SRC" "SRC-INCOMPLETE") src-value dest-value) + ((and + (equal? src-state dest-state) + (equal? src-status dest-status)) + (if (and consistent-fail-not-clean (not (member dest-status '("PASS" "SKIP" "WAIVED" "WARN")))) + (list dest-test-id (conc "BOTH-BAD") (conc "CLEAN-" dest-status) src-value dest-value) + (list dest-test-id (conc "CLEAN") (conc "CLEAN-" dest-status) src-value dest-value))) + ;; better or worse: pass > warn > waived > skip > fail > abort + ;; pass > warn > waived > skip > fail > abort + + ((= 1 status-compare-result) ;; src is better, dest is worse + (list dest-test-id "WORSE" (conc src-status "->" dest-status) src-value dest-value)) + (else + (list dest-test-id "BETTER" (conc src-status "->" dest-status) src-value dest-value))))) + (list test-name item-path xor-new-item))) + all-keys))) + + (if hide-clean + (filter + (lambda (item) + (not + (equal? + "CLEAN" + (list-ref (list-ref item 2) 1)))) + res) + res)))) + +(define (diff:run-name->run-id run-name) + (if (number? run-name) + run-name + (let* ((qry-res (rmt:get-runs run-name 1 0 '()))) + (if (eq? 2 (vector-length qry-res)) + (vector-ref (car (vector-ref qry-res 1)) 1) + #f)))) + +(define (diff:target+run-name->run-id target run-name) + (let* ((keys (rmt:get-keys)) + (target-parts (if target (string-split target "/") (map (lambda (x) "%") keys)))) + (if (not (eq? (length keys) (length keys))) + (begin + (print "Error: Target ("target") item count does not match fields count target tokens="target-parts" fields="keys) + #f) + (let* ((target-map (zip keys target-parts)) + (qry-res (rmt:get-runs run-name 1 0 target-map))) + + (if (eq? 2 (vector-length qry-res)) + (let ((first-ent (vector-ref qry-res 1))) + (if (> (length first-ent) 0) + (vector-ref (car first-ent) 1) + #f)) + #f))))) + +(define (diff:run-id->tests-mindat run-id #!key (testpatt "%/%")) + (let* ((states '()) + (statuses '()) + (offset #f) + (limit #f) + (not-in #t) + (sort-by #f) + (sort-order #f) + (qryvals "id,testname,item_path,state,status") + (qryvals "id,testname,item_path,state,status") + (last-update 0) + (mode #f) + ) + (map + ;; (lambda (row) + ;; (match row + ;; ((#(id test-name item-path state status) + ;; (list test-name item-path (list id state status)))) + ;; (else #f))) + (lambda (row) + (let* ((id (vector-ref row 0)) + (test-name (vector-ref row 1)) + (item-path (vector-ref row 2)) + (state (vector-ref row 3)) + (status (vector-ref row 4))) + (list test-name item-path (list id state status)))) + + (rmt:get-tests-for-run run-id + testpatt states statuses + offset limit + not-in sort-by sort-order + qryvals + last-update + mode)))) + + +(define (diff:diff-runs src-run-id dest-run-id) + (let* ((src-tests-mindat (diff:run-id->tests-mindat src-run-id)) + (dest-tests-mindat (diff:run-id->tests-mindat dest-run-id))) + (diff:xor-tests-mindat src-tests-mindat dest-tests-mindat consistent-fail-not-clean: #t))) + + +(define (diff:rundiff-find-by-state run-diff state) + (filter + (lambda (x) + (equal? (list-ref (caddr x) 1) state)) + run-diff)) + +(define (diff:rundiff-clean-breakdown run-diff) + (map + (lambda (run-diff-item) + (match run-diff-item + ((test-name item-path (junk-id diff-state diff-status (src-test-id src-state src-status) (dest-test-id dest-state dest-status))) + (list test-name item-path "CLEAN" src-status)) + (else ""))) + (diff:rundiff-find-by-state run-diff "CLEAN"))) + +(define (diff:summarize-run-diff run-diff) + + (let* ((diff-states (list "CLEAN" "BETTER" "WORSE" "BOTH-BAD" "NOT-IN-DEST" "NOT-IN-SRC" ))) + (map + (lambda (state) + (list state + (length (diff:rundiff-find-by-state run-diff state)))) + diff-states))) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Presentation code below, business logic above ;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(define (diff:stml->string in-stml) + (with-output-to-string + (lambda () + (s:output-new + (current-output-port) + in-stml)))) + +(define (diff:state-status->bgcolor state status) + (match (list state status) + (("CLEAN" _) "#88ff88") + (("BETTER" _) "#33ff33") + (("WORSE" _) "#ff3333") + (("BOTH-BAD" _) "#ff3333") + ((_ "WARN") "#ffff88") + ((_ "FAIL") "#ff8888") + ((_ "ABORT") "#ff0000") + ((_ "PASS") "#88ff88") + ((_ "SKIP") "#ffff00") + (else "#ffffff"))) + +(define (diff:test-state-status->diff-report-cell state status) + (s:td 'bgcolor (diff:state-status->bgcolor state status) status)) + +(define (diff:diff-state-status->diff-report-cell state status) + (s:td state 'bgcolor (diff:state-status->bgcolor state status))) + + +(define (diff:megatest-html-logo) + + "
+___  ___                 _            _
+|  \\/  | ___  __ _  __ _| |_ ___  ___| |_
+| |\\/| |/ _ \\/ _` |/ _` | __/ _ \\/ __| __|
+| |  | |  __/ (_| | (_| | ||  __/\\__ \\ |_
+|_|  |_|\\___|\\__, |\\__,_|\\__\\___||___/\\__|
+             |___/
+
") + +(define (diff:megatest-html-diff-logo) + "
+___  ___                 _            _
+|  \\/  | ___  __ _  __ _| |_ ___  ___| |_  |  _ \\(_)/ _|/ _|
+| |\\/| |/ _ \\/ _` |/ _` | __/ _ \\/ __| __| | | | | | |_| |_
+| |  | |  __/ (_| | (_| | ||  __/\\__ \\ |_  | |_| | |  _|  _|
+|_|  |_|\\___|\\__, |\\__,_|\\__\\___||___/\\__| |____/|_|_| |_|
+             |___/
+
") + + +(define (diff:run-id->target+run-name+starttime run-id) + (let* ((target (rmt:get-target run-id)) + (runinfo (rmt:get-run-info run-id)) ; vector of header (list) and result (vector) + (info-hash (alist->hash-table + (map (lambda (x) (cons (car x) (cadr x))) ; make it a useful hash + (zip (vector-ref runinfo 0) (vector->list (vector-ref runinfo 1)))))) + (run-name (hash-table-ref/default info-hash "runname" "N/A")) + (start-time (hash-table-ref/default info-hash "event_time" 0))) + (list target run-name start-time))) + +(define (diff:deliver-diff-report src-run-id dest-run-id + #!key + (html-output-file #f) + (email-subject-prefix "[MEGATEST DIFF]") + (email-recipients-list '()) ) + (let* ((src-info (diff:run-id->target+run-name+starttime src-run-id)) + (src-target (car src-info)) + (src-run-name (cadr src-info)) + (src-start (conc (seconds->string (caddr src-info)) " " (local-timezone-abbreviation))) + (dest-info (diff:run-id->target+run-name+starttime dest-run-id)) + (dest-target (car dest-info)) + (dest-run-name (cadr dest-info)) + (dest-start (conc (seconds->string (caddr dest-info)) " " (local-timezone-abbreviation))) + + + (run-diff (diff:diff-runs src-run-id dest-run-id )) + (test-count (length run-diff)) + (summary-table + (apply s:table 'cellspacing "0" 'border "1" + (s:tr + (s:th "Diff type") + (s:th "% share") + (s:th "Count")) + + (map + (lambda (state-count) + (s:tr + (diff:diff-state-status->diff-report-cell (car state-count) #f) + (s:td 'align "right" (fmt #f + (decimal-align 3 + (fix 2 + (num/fit 6 + (* 100 (/ (cadr state-count) test-count))))))) + (s:td 'align "right" (cadr state-count)))) + (diff:summarize-run-diff run-diff)))) + (meta-table + (s:table 'cellspacing "0" 'border "1" + + (s:tr + (s:td 'colspan "2" + (s:table 'cellspacing "0" 'border "1" + (s:tr + (s:th 'align "LEFT" "") (s:th "SOURCE RUN") (s:th "DESTINATION RUN")) + (s:tr + (s:th 'align "LEFT" "Started") (s:td src-start) (s:td dest-start)) + (s:tr + (s:th 'align "LEFT" "TARGET") (s:td src-target) (s:td dest-target)) + (s:tr + (s:th 'align "LEFT" "RUN NAME") (s:td src-run-name) (s:td dest-run-name))))))) + + (main-table + (apply s:table 'cellspacing "0" 'border "1" + (s:tr + (s:th "Test name") + (s:th "Item Path") + (s:th (conc "SOURCE")) + (s:th (conc "DEST")) + (s:th "Diff")) + (map + (lambda (run-diff-item) + (match run-diff-item + ((test-name item-path (junk-id diff-state diff-status (src-test-id src-state src-status) (dest-test-id dest-state dest-status))) + (s:tr + (s:td test-name) + (s:td item-path) + (diff:test-state-status->diff-report-cell src-state src-status) + (diff:test-state-status->diff-report-cell dest-state dest-status) + (diff:diff-state-status->diff-report-cell diff-state diff-status))) + (else ""))) + (filter (lambda (run-diff-item) + (match run-diff-item + ((test-name item-path (junk-id diff-state diff-status (src-test-id src-state src-status) (dest-test-id dest-state dest-status))) + (not (equal? diff-state "CLEAN"))) + (else #f))) + run-diff)))) + (email-subject (conc email-subject-prefix " " src-target "/" src-run-name" vs. "dest-target"/"dest-run-name)) + (html-body (diff:stml->string (s:body + (diff:megatest-html-diff-logo) + (s:h2 "Summary") + (s:table 'border "0" + (s:tr + (s:td "Diff calculated at") + (s:td (conc (seconds->string) " " (local-timezone-abbreviation)))) + (s:tr + (s:td "MT_RUN_AREA_HOME" ) (s:td *toppath*)) + (s:tr 'valign "TOP" + (s:td summary-table) + (s:td meta-table))) + (s:h2 "Diffs + consistently failing tests") + main-table))) + + ) + (if html-output-file + (with-output-to-file html-output-file (lambda () (print html-body)))) + (when (and email-recipients-list (> (length email-recipients-list) 0)) + (sendmail (string-join email-recipients-list ",") email-subject html-body use_html: #t)) + html-body)) + + + + + +;; (let* ((src-run-name "all57") +;; (dest-run-name "all60") +;; (src-run-id (diff:run-name->run-id src-run-name)) +;; (dest-run-id (diff:run-name->run-id dest-run-name)) +;; (to-list (list "bjbarcla"))) +;; (diff:deliver-diff-report src-run-id dest-run-id email-recipients-list: to-list html-output-file: "/tmp/bjbarcla/zippy.html") +;; ) + +(define (do-diff-report src-target src-runname dest-target dest-runname html-file to-list-raw) + (let* (;;(src-target "nope%") + ;;(src-runname "all57") + ;;(dest-target "%") + ;;(dest-runname "all60") + (src-run-id (diff:target+run-name->run-id src-target src-runname)) + (dest-run-id (diff:target+run-name->run-id dest-target dest-runname)) + ;(html-file "/tmp/bjbarcla/zippy.html") + (to-list (if (string? to-list-raw) (string-split to-list-raw ",:") #f)) + ) + + (cond + ((not src-run-id) + (print "No match for source target/runname="src-target"/"src-runname) + (print "Cannot proceed.") + #f) + ((not dest-run-id) + (print "No match for source target/runname="dest-target"/"dest-runname) + (print "Cannot proceed.") + #f) + (else + (diff:deliver-diff-report src-run-id dest-run-id email-recipients-list: to-list html-output-file: html-file))))) + + +;;====================================================================== +;; env +;;====================================================================== + +(define (env:open-db fname) + (let* ((db-exists (common:file-exists? fname)) + (db (open-database fname))) + (if (not db-exists) + (begin + (exec (sql db "CREATE TABLE envvars ( + id INTEGER PRIMARY KEY, + context TEXT NOT NULL, + var TEXT NOT NULL, + val TEXT NOT NULL, + CONSTRAINT envvars_constraint UNIQUE (context,var))")))) + (set-busy-handler! db (busy-timeout 10000)) + db)) + +;; save vars in given context, this is NOT incremental by default +;; +(define (env:save-env-vars db context #!key (incremental #f)(vardat #f)) + (with-transaction + db + (lambda () + ;; first clear out any vars for this context + (if (not incremental)(exec (sql db "DELETE FROM envvars WHERE context=?") context)) + (for-each + (lambda (varval) + (let ((var (car varval)) + (val (cdr varval))) + (if incremental (exec (sql db "DELETE FROM envvars WHERE context=? AND var=?") context var)) + (exec (sql db "INSERT INTO envvars (context,var,val) VALUES (?,?,?)") context var val))) + (if vardat + (hash-table->alist vardat) + (get-environment-variables)))))) + +;; merge contexts in the order given +;; - each context is applied in the given order +;; - variables in the paths list are split on the separator and the components +;; merged using simple delta addition +;; returns a hash of the merged vars +;; +(define (env:merge-contexts db basecontext contexts paths) + (let ((result (make-hash-table))) + (for-each + (lambda (context) + (query + (for-each-row + (lambda (row) + (let ((var (car row)) + (val (cadr row))) + (hash-table-set! result var + (if (and (hash-table-ref/default results var #f) + (assoc var paths)) ;; this var is a path and there is a previous path + (let ((sep (cadr (assoc var paths)))) + (env:merge-path-envvar sep (hash-table-ref results var) valb)) + valb))))) + (sql db "SELECT var,val FROM envvars WHERE context=?") + context)) + contexts) + result)) + +;; get list of removed variables between two contexts +;; +(define (env:get-removed db contexta contextb) + (let ((result (make-hash-table))) + (query + (for-each-row + (lambda (row) + (let ((var (car row)) + (val (cadr row))) + (hash-table-set! result var val)))) + (sql db "SELECT var,val FROM envvars WHERE context=? AND var NOT IN (SELECT var FROM envvars WHERE context=?)") + contexta contextb) + result)) + +;; get list of variables added to contextb from contexta +;; +(define (env:get-added db contexta contextb) + (let ((result (make-hash-table))) + (query + (for-each-row + (lambda (row) + (let ((var (car row)) + (val (cadr row))) + (hash-table-set! result var val)))) + (sql db "SELECT var,val FROM envvars WHERE context=? AND var NOT IN (SELECT var FROM envvars WHERE context=?)") + contextb contexta) + result)) + +;; get list of variables in both contexta and contexb that have been changed +;; +(define (env:get-changed db contexta contextb) + (let ((result (make-hash-table))) + (query + (for-each-row + (lambda (row) + (let ((var (car row)) + (val (cadr row))) + (hash-table-set! result var val)))) + (sql db "SELECT var,val FROM envvars AS a WHERE context=? AND val != (SELECT val FROM envvars WHERE var=a.var AND context=?)") + contextb contexta) + result)) + +;; +(define (env:blind-merge l1 l2) + (if (null? l1) l2 + (if (null? l2) l1 + (cons (car l1) (cons (car l2) (env:blind-merge (cdr l1) (cdr l2))))))) + +;; given a before and an after envvar calculate a new merged path +;; +(define (env:merge-path-envvar separator patha pathb) + (let* ((patha-parts (string-split patha separator)) + (pathb-parts (string-split pathb separator)) + (common-parts (lset-intersection equal? patha-parts pathb-parts)) + (final (delete-duplicates ;; env:blind-merge + (append pathb-parts common-parts patha-parts)))) +;; (print "BEFORE: " (string-intersperse patha-parts "\n ")) +;; (print "AFTER: " (string-intersperse pathb-parts "\n ")) +;; (print "COMMON: " (string-intersperse common-parts "\n ")) + (string-intersperse final separator))) + +(define (env:process-path-envvar varname separator patha pathb) + (let ((newpath (env:merge-path-envvar separator patha pathb))) + (setenv varname newpath))) + +(define (env:have-context db context) + (> (query fetch-value (sql db "SELECT count(id) FROM envvars WHERE context=?") context) + 0)) + +;; this is so the calling block does not need to import sql-de-lite +(define (env:close-database db) + (close-database db)) + +(define (env:lazy-hash-table->alist indat) + (if (hash-table? indat) + (let ((dat (hash-table->alist indat))) + (if (null? dat) + #f + dat)) + #f)) + +(define (env:inc-path path) + (print "PATH " + (conc "#{scheme (env:min-path \"" path "\" \"#{getenv PATH}\")}"))) +;; (conc +;; "#{scheme (string-intersperse " +;; "(delete-duplicates " +;; "(append (string-split \"" path "\" \":\") " +;; "(string-split \"#{getenv PATH}\" \":\")))" +;; " \":\")}"))) + +(define (env:min-path path1 path2) + (string-intersperse + (delete-duplicates + (append + (string-split path1 ":") + (string-split path2 ":"))) + ":")) + +;; inc path will set a PATH that is incrementally modified when read - config mode only +;; +(define (env:print added removed changed #!key (inc-path #t)) + (let ((a (env:lazy-hash-table->alist added)) + (r (env:lazy-hash-table->alist removed)) + (c (env:lazy-hash-table->alist changed))) + (case (if (args:get-arg "-dumpmode") + (string->symbol (args:get-arg "-dumpmode")) + 'bash) + ((bash) + (if a + (begin + (print "# Added vars") + (map (lambda (dat)(print "export " (car dat) "=" (cdr dat))) + (hash-table->alist added)))) + (if r + (begin + (print "# Removed vars") + (map (lambda (dat)(print "unset " (car dat))) + (hash-table->alist removed)))) + (if c + (begin + (print "# Changed vars") + (map (lambda (dat)(print "export " (car dat) "=" (cdr dat))) + (hash-table->alist changed))))) + ((csh) + (if a + (begin + (print "# Added vars") + (map (lambda (dat)(print "setenv " (car dat) " " (cdr dat))) + (hash-table->alist added)))) + (if r + (begin + (print "# Removed vars") + (map (lambda (dat)(print "unsetenv " (car dat))) + (hash-table->alist removed)))) + (if c + (begin + (print "# Changed vars") + (map (lambda (dat)(print "setenv " (car dat) " " (cdr dat))) + (hash-table->alist changed))))) + ((config ini) + (if a + (begin + (print "# Added vars") + (map (lambda (dat) + (let ((var (car dat)) + (val (cdr dat))) + (if (and inc-path + (equal? var "PATH")) + (env:inc-path val) + (print var " " val)))) + (hash-table->alist added)))) + (if r + (begin + (print "# Removed vars") + (map (lambda (dat)(print "#{scheme (unsetenv \"" (car dat) "\")}")) + (hash-table->alist removed)))) + (if c + (begin + (print "# Changed vars") + (map (lambda (dat) + (let ((var (car dat)) + (val (cdr dat))) + (if (and inc-path + (equal? var "PATH")) + (env:inc-path val) + (print var " " val)))) + (hash-table->alist changed))))) + (else + (debug:print-error 0 *default-log-port* "No dumpmode specified, use -dumpmode [bash|csh|config]"))))) +;;====================================================================== +;; ezsteps +;;====================================================================== + + +(define (ezsteps:run-from testdat start-step-name run-one) + ;;# TODO - recapture item variables, debug repeated step eval; regen logpro from test + (let* ((do-update-test-state-status #f) + (test-run-dir ;; (filedb:get-path *fdb* + (db:test-get-rundir testdat)) ;; ) + (testconfig (read-config (conc test-run-dir "/testconfig") #f #t environ-patt: "pre-launch-env-vars")) + (ezstepslst (hash-table-ref/default testconfig "ezsteps" '())) + (run-mutex (make-mutex)) + (rollup-status 0) + (rollup-status-string #f) + (rollup-status-sym #f) + (exit-info (vector #t #t #t)) + (test-id (db:test-get-id testdat)) + (run-id (db:test-get-run_id testdat)) + (test-name (db:test-get-testname testdat)) + (orig-test-state (db:test-get-state testdat)) + (orig-test-status (db:test-get-status testdat)) + (kill-job #f)) ;; for future use (on re-factoring with launch.scm code + + ;; keep trying till NFS deigns to populate test run dir on this host + (let loop ((count 5)) + (if (not (common:file-exists? test-run-dir)) + ;;(push-directory test-run-dir) + (if (> count 0) + (begin + (debug:print 0 *default-log-port* "WARNING: ezsteps attempting to run but test run directory " test-run-dir " is not there. Waiting and trying again " count " more times") + (sleep 3) + (loop (- count 1)))))) + + (debug:print-info 0 *default-log-port* "Running in directory " test-run-dir) + (if (not (common:file-exists? ".ezsteps"))(create-directory ".ezsteps")) + ;; if ezsteps was defined then we are sure to have at least one step but check anyway + + (if (not (> (length ezstepslst) 0)) + (message-window "ERROR: You can only re-run steps defined via ezsteps") + (begin + (let loop ((ezstep (car ezstepslst)) + (tal (cdr ezstepslst)) + (status-sym-so-far 'pass) + ;;(runflag #f) + (saw-start-step-name #f)) ;; flag used to skip steps when not starting at the beginning + (if (vector-ref exit-info 1) + (let* ((stepname (car ezstep)) ;; do stuff to run the step + (logpro-used (common:file-exists? (conc test-run-dir "/" stepname ".logpro"))) + (stepinfo (cadr ezstep)) + (stepparts (string-match (regexp "^(\\{([^\\}]*)\\}\\s*|)(.*)$") stepinfo)) + (stepparms (list-ref stepparts 2)) ;; for future use, {VAR=1,2,3}, run step for each + (stepcmd (list-ref stepparts 3)) + (script (conc "mt_ezstep '"test-run-dir"' '"stepname"' '"stepcmd"'")) ;; call the command using mt_ezstep + (saw-start-step-name-next (or saw-start-step-name (equal? stepname start-step-name))) + (proceed-with-this-step + (or (not start-step-name) + (equal? stepname start-step-name) + (and saw-start-step-name (not run-one)) + saw-start-step-name-next + (and start-step-name (equal? stepname start-step-name)))) + ) + (set! do-update-test-state-status (and proceed-with-this-step (null? tal))) + ;;(BB> "stepname="stepname" proceed-with-this-step="proceed-with-this-step " do-update-test-state-status="do-update-test-state-status " orig-test-state="orig-test-state" orig-test-status="orig-test-status) + (cond + ((and (not proceed-with-this-step) (null? tal)) + 'done) + ((not proceed-with-this-step) + (loop (car tal) + (cdr tal) + status-sym-so-far + saw-start-step-name-next)) + (else + (debug:print 4 *default-log-port* "ezsteps:\n stepname: " stepname " stepinfo: " stepinfo " stepparts: " stepparts + " stepparms: " stepparms " stepcmd: " stepcmd) + (debug:print 4 *default-log-port* "script: " script) + (rmt:teststep-set-status! run-id test-id stepname "start" "-" #f #f) + + ;; now launch the script + (let ((pid (process-run script))) + (let processloop ((i 0)) + (let-values (((pid-val exit-status exit-code)(process-wait pid #t))) + (mutex-lock! run-mutex) + (vector-set! exit-info 0 pid) + (vector-set! exit-info 1 exit-status) + (vector-set! exit-info 2 exit-code) + (mutex-unlock! run-mutex) + (if (eq? pid-val 0) + (begin + (thread-sleep! 1) + (processloop (+ i 1)))) + )) + (let ((exinfo (vector-ref exit-info 2)) + (logfna (if logpro-used (conc stepname ".html") ""))) + (rmt:teststep-set-status! run-id test-id stepname "end" exinfo #f logfna)) + + (if logpro-used + (rmt:test-set-log! run-id test-id (conc stepname ".html"))) + + ;; set the test final status + (let* ((this-step-status (cond + (logpro-used + (common:logpro-exit-code->status-sym (vector-ref exit-info 2))) + ((eq? (vector-ref exit-info 2) 0) + 'pass) + (else + 'fail))) + (overall-status-sym (common:worse-status-sym this-step-status status-sym-so-far)) + (overall-status-string (status-sym->string overall-status-sym))) + (debug:print 4 *default-log-port* "Exit value received: " (vector-ref exit-info 2) " logpro-used: " logpro-used + " this-step-status: " this-step-status " overall-status: " overall-status-sym) + ;;" next-status: " next-status " rollup-status: " rollup-status) + (set! rollup-status-string overall-status-string) + (set! rollup-status-sym overall-status-sym) + (tests:test-set-status! run-id test-id "RUNNING" overall-status-string #f #f))) + + (if (and + (not run-one) + (common:steps-can-proceed-given-status-sym rollup-status-sym) + (not (null? tal))) + (loop (car tal) + (cdr tal) + rollup-status-sym + saw-start-step-name-next))))) + (debug:print 4 *default-log-port* "WARNING: a prior step failed, stopping at " ezstep))) + + ;; Once done with step/steps update the test record + ;; + (let* ((item-path (db:test-get-item-path testdat)) ;; (item-list->path itemdat)) + (testinfo (rmt:get-testinfo-state-status run-id test-id))) ;; refresh the testdat, call it iteminfo in case need prev/curr + ;; Am I completed? + (if (equal? (db:test-get-state testinfo) "RUNNING") ;; (not (equal? (db:test-get-state testinfo) "COMPLETED")) + (let ((new-state (if kill-job "KILLED" "COMPLETED") ;; (if (eq? (vector-ref exit-info 2) 0) ;; exited with "good" status + ;; "COMPLETED" + ;; (db:test-get-state testinfo))) ;; else preseve the state as set within the test + ) + (new-status rollup-status-string) + ) ;; (db:test-get-status testinfo))) + (debug:print-info 2 *default-log-port* "Test NOT logged as COMPLETED, (state=" (db:test-get-state testinfo) "), updating result, rollup-status is " rollup-status) + (tests:test-set-status! run-id test-id + (if do-update-test-state-status new-state orig-test-state) + (if do-update-test-state-status new-status orig-test-status) + (args:get-arg "-m") #f) + ;; need to update the top test record if PASS or FAIL and this is a subtest + (if (and (not (equal? item-path "")) do-update-test-state-status) + (rmt:set-state-status-and-roll-up-items run-id test-name item-path new-state new-status #f)))) + ;; for automated creation of the rollup html file this is a good place... + (if (not (equal? item-path "")) + (tests:summarize-items run-id test-id test-name #f)) ;; don't force - just update if no + ))) + ;;(pop-directory) + rollup-status-string)) + +(define (ezsteps:spawn-run-from testdat start-step-name run-one) + (thread-start! + (make-thread + (lambda () + (ezsteps:run-from testdat start-step-name run-one)) + (conc "ezstep run single step " start-step-name " run-one="run-one))) + ) + +;;====================================================================== +;; genexample +;;====================================================================== + +(define genexample:example-logpro +#< 0 "Put description here" #/put pattern here/) + ;; + ;; You may need ignores to suppress false error or warning hits from the later expects + ;; NOTE: Order is important here! + (expect:ignore in "LogFileBody" < 99 "Ignore the word error in comments" #/^\/\/.*error/) + (expect:warning in "LogFileBody" = 0 "Any warning" #/warn/) + (expect:error in "LogFileBody" = 0 "Any error" (list #/ERROR/ #/error/)) ;; but disallow any other errors +EOF +) + +(define genexample:example-script +#<number (string-split color1))) + (c2 (map string->number (string-split color2))) + (delta (map (lambda (a b)(abs (- a b))) c1 c2))) + (null? (filter (lambda (x)(> x 3)) delta)))) + +(define gutils:colors + '((PASS . "70 249 73") + (FAIL . "253 33 49") + (SKIP . "230 230 0"))) + +(define (gutils:get-color-spec effective-state) + (or (alist-ref effective-state gutils:colors) + (alist-ref 'FAIL gutils:colors))) + +;; BBnote - state status dashboard button color / text defined here +(define (gutils:get-color-for-state-status state status);; #!key (get-label #f)) + ;; ((if get-label cadr car) + (case (string->symbol state) + ((COMPLETED) ;; ARCHIVED) + (case (string->symbol status) + ((PASS) (list "70 249 73" status)) + ((PREQ_FAIL PREQ_DISCARDED) (list "255 127 127" status)) + ((WARN WAIVED) (list "255 172 13" status)) + ((SKIP) (list (gutils:get-color-spec 'SKIP) status)) + ((ABORT) (list "198 36 166" status)) + (else (list "253 33 49" status)))) + ((ARCHIVED) + (case (string->symbol status) + ((PASS) (list "70 170 73" status)) + ((WARN WAIVED) (list "200 130 13" status)) + ((SKIP) (list (gutils:get-color-spec 'SKIP) status)) + (else (list "180 33 49" status)))) + ;; (if (equal? status "PASS") + ;; '("70 249 73" "PASS") + ;; (if (or (equal? status "WARN") + ;; (equal? status "WAIVED")) + ;; (list "255 172 13" status) + ;; (list "223 33 49" status)))) ;; greenish orangeish redish + ((LAUNCHED) (list "101 123 142" state)) + ((CHECK) (list "255 100 50" state)) + ((REMOTEHOSTSTART) (list "50 130 195" state)) + ((RUNNING STARTED) (list "9 131 232" state)) + ((KILLREQ) (list "39 82 206" state)) + ((KILLED) (list "234 101 17" state)) + ((NOT_STARTED) (case (string->symbol status) + ((CHECK STARTED)(list (gutils:get-color-spec 'SKIP) state)) + (else (list "240 240 240" state)))) + ;; for xor mode below + ;; + ((CLEAN) + (case (string->symbol status) + ((CLEAN-FAIL CLEAN-CHECK CLEAN-ABORT) (list "200 130 13" status)) ;; orange requested for these + (else (list "60 235 63" status)))) + ((DIRTY-BETTER) (list "160 255 153" status)) + ((DIRTY-WORSE) (list "165 42 42" status)) + ((BOTH-BAD) (list "180 33 49" status)) + + (else (list "192 192 192" state)))) + +;;====================================================================== +;; http-transport +;;====================================================================== + +(define (http-transport:make-server-url hostport) + (if (not hostport) + #f + (conc "http://" (car hostport) ":" (cadr hostport)))) + +(define *server-loop-heart-beat* (current-seconds)) + +;;====================================================================== +;; S E R V E R +;; ====================================================================== + +;; Call this to start the actual server +;; + +(define *db:process-queue-mutex* (make-mutex)) + +(define (http-transport:run hostn) + (debug:print 2 *default-log-port* "Attempting to start the server ...") + (let* ((db #f) ;; (open-db)) ;; we don't want the server to be opening and closing the db unnecesarily + (hostname (get-host-name)) + (ipaddrstr (let ((ipstr (if (string=? "-" hostn) + ;; (string-intersperse (map number->string (u8vector->list (hostname->ip hostname))) ".") + (server:get-best-guess-address hostname) + #f))) + (if ipstr ipstr hostn))) ;; hostname))) + (start-port (portlogger:open-run-close portlogger:find-port)) + (link-tree-path (common:get-linktree)) + (tmp-area (common:get-db-tmp-area *alldat*)) + (start-file (conc tmp-area "/.server-start"))) + (debug:print-info 0 *default-log-port* "portlogger recommended port: " start-port) + ;; set some parameters for the server + (root-path (if link-tree-path + link-tree-path + (current-directory))) ;; WARNING: SECURITY HOLE. FIX ASAP! + (handle-directory spiffy-directory-listing) + (handle-exception (lambda (exn chain) + (signal (make-composite-condition + (make-property-condition + 'server + 'message "server error"))))) + + ;; http-transport:handle-directory) ;; simple-directory-handler) + ;; Setup the web server and a /ctrl interface + ;; + (vhost-map `(((* any) . ,(lambda (continue) + ;; open the db on the first call + ;; This is were we set up the database connections + (let* (($ (request-vars source: 'both)) + (dat ($ 'dat)) + (res #f)) + (cond + ((equal? (uri-path (request-uri (current-request))) + '(/ "api")) + (send-response body: (api:process-request *dbstruct-db* $) ;; the $ is the request vars proc + headers: '((content-type text/plain))) + (mutex-lock! *heartbeat-mutex*) + (set! *db-last-access* (current-seconds)) + (mutex-unlock! *heartbeat-mutex*)) + ((equal? (uri-path (request-uri (current-request))) + '(/ "")) + (send-response body: (http-transport:main-page))) + ((equal? (uri-path (request-uri (current-request))) + '(/ "json_api")) + (send-response body: (http-transport:main-page))) + ((equal? (uri-path (request-uri (current-request))) + '(/ "runs")) + (send-response body: (http-transport:main-page))) + ((equal? (uri-path (request-uri (current-request))) + '(/ any)) + (send-response body: "hey there!\n" + headers: '((content-type text/plain)))) + ((equal? (uri-path (request-uri (current-request))) + '(/ "hey")) + (send-response body: "hey there!\n" + headers: '((content-type text/plain)))) + ((equal? (uri-path (request-uri (current-request))) + '(/ "jquery3.1.0.js")) + (send-response body: (http-transport:show-jquery) + headers: '((content-type application/javascript)))) + ((equal? (uri-path (request-uri (current-request))) + '(/ "test_log")) + (send-response body: (http-transport:html-test-log $) + headers: '((content-type text/HTML)))) + ((equal? (uri-path (request-uri (current-request))) + '(/ "dashboard")) + (send-response body: (http-transport:html-dboard $) + headers: '((content-type text/HTML)))) + (else (continue)))))))) + (with-output-to-file start-file (lambda ()(print (current-process-id)))) + (http-transport:try-start-server ipaddrstr start-port))) + +;; This is recursively run by http-transport:run until sucessful +;; +(define (http-transport:try-start-server ipaddrstr portnum) + (let ((config-hostname (configf:lookup *configdat* "server" "hostname")) + (config-use-proxy (equal? (configf:lookup *configdat* "client" "use-http_proxy") "yes"))) + (if (not config-use-proxy) + (determine-proxy (constantly #f))) + (debug:print-info 0 *default-log-port* "http-transport:try-start-server time=" (seconds->time-string (current-seconds)) " ipaddrsstr=" ipaddrstr " portnum=" portnum " config-hostname=" config-hostname) + (handle-exceptions + exn + (begin + (print-error-message exn) + (if (< portnum 64000) + (begin + (debug:print 0 *default-log-port* "WARNING: attempt to start server failed. Trying again ...") + (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) + (debug:print 5 *default-log-port* "exn=" (condition->list exn)) + (portlogger:open-run-close portlogger:set-failed portnum) + (debug:print 0 *default-log-port* "WARNING: failed to start on portnum: " portnum ", trying next port") + (thread-sleep! 0.1) + + ;; get_next_port goes here + (http-transport:try-start-server ipaddrstr + (portlogger:open-run-close portlogger:find-port))) + (begin + (print "ERROR: Tried and tried but could not start the server")))) + ;; any error in following steps will result in a retry + (set! *server-info* (list ipaddrstr portnum)) + (debug:print 0 *default-log-port* "INFO: Trying to start server on " ipaddrstr ":" portnum) + ;; This starts the spiffy server + ;; NEED WAY TO SET IP TO #f TO BIND ALL + ;; (start-server bind-address: ipaddrstr port: portnum) + (if config-hostname ;; this is a hint to bind directly + (start-server port: portnum bind-address: (if (equal? config-hostname "-") + ipaddrstr + config-hostname)) + (start-server port: portnum)) + (portlogger:open-run-close portlogger:set-port portnum "released") + (debug:print 1 *default-log-port* "INFO: server has been stopped")))) + +;;====================================================================== +;; S E R V E R U T I L I T I E S +;;====================================================================== + +;;====================================================================== +;; C L I E N T S +;;====================================================================== + +(define *http-mutex* (make-mutex)) + +;; NOTE: Large block of code from 32436b426188080f72fceb6894af541fbad9921e removed here +;; I'm pretty sure it is defunct. + +;; This next block all imported en-mass from the api branch +(define *http-requests-in-progress* 0) +(define *http-connections-next-cleanup* (current-seconds)) + +(define (http-transport:get-time-to-cleanup) + (let ((res #f)) + (mutex-lock! *http-mutex*) + (set! res (> (current-seconds) *http-connections-next-cleanup*)) + (mutex-unlock! *http-mutex*) + res)) + +(define (http-transport:inc-requests-count) + (mutex-lock! *http-mutex*) + (set! *http-requests-in-progress* (+ 1 *http-requests-in-progress*)) + ;; Use this opportunity to slow things down iff there are too many requests in flight + (if (> *http-requests-in-progress* 5) + (begin + (debug:print-info 0 *default-log-port* "Whoa there buddy, ease up...") + (thread-sleep! 1))) + (mutex-unlock! *http-mutex*)) + +(define (http-transport:dec-requests-count proc) + (mutex-lock! *http-mutex*) + (proc) + (set! *http-requests-in-progress* (- *http-requests-in-progress* 1)) + (mutex-unlock! *http-mutex*)) + +(define (http-transport:dec-requests-count-and-close-all-connections) + (set! *http-requests-in-progress* (- *http-requests-in-progress* 1)) + (let loop ((etime (+ (current-seconds) 5))) ;; give up in five seconds + (if (> *http-requests-in-progress* 0) + (if (> etime (current-seconds)) + (begin + (thread-sleep! 0.05) + (loop etime)) + (debug:print-error 0 *default-log-port* "requests still in progress after 5 seconds of waiting. I'm going to pass on cleaning up http connections")) + (close-all-connections!))) + (set! *http-connections-next-cleanup* (+ (current-seconds) 10)) + (mutex-unlock! *http-mutex*)) + +(define (http-transport:inc-requests-and-prep-to-close-all-connections) + (mutex-lock! *http-mutex*) + (set! *http-requests-in-progress* (+ 1 *http-requests-in-progress*))) + +;; Send "cmd" with json payload "params" to serverdat and receive result +;; +(define (http-transport:client-api-send-receive run-id serverdat cmd params #!key (numretries 3)(area-dat #f)) + (let* ((fullurl (if (vector? serverdat) + (http-transport:server-dat-get-api-req serverdat) + (begin + (debug:print 0 *default-log-port* "FATAL ERROR: http-transport:client-api-send-receive called with no server info") + (exit 1)))) + (res (vector #f "uninitialized")) + (success #t) + (sparams (db:obj->string params transport: 'http)) + (areadat (or area-dat *areadat*))) + (debug:print-info 11 *default-log-port* "fullurl=" fullurl ", cmd=" cmd ", params=" params ", run-id=" run-id "\n") + ;; set up the http-client here + (max-retry-attempts 1) + ;; consider all requests indempotent + (retry-request? (lambda (request) + #f)) + ;; send the data and get the response + ;; extract the needed info from the http data and + ;; process and return it. + (let* ((send-recieve (lambda () + (mutex-lock! *http-mutex*) + ;; (condition-case (with-input-from-request "http://localhost"; #f read-lines) + ;; ((exn http client-error) e (print e))) + (set! res (vector ;;; DON'T FORGET - THIS IS THE CLIENT SIDE! NOTE: consider moving this to client.scm since we are only supporting http transport at this time. + success + (db:string->obj + (handle-exceptions + exn + (let ((call-chain (get-call-chain)) + (msg ((condition-property-accessor 'exn 'message) exn))) + (set! success #f) + (if (debug:debug-mode 1) + (debug:print-info 0 *default-log-port* "couldn't talk to server, trying again ...") + (begin + (debug:print 0 *default-log-port* "WARNING: failure in with-input-from-request to " fullurl ".") + (debug:print 0 *default-log-port* " message: " msg) + (debug:print 0 *default-log-port* " cmd: " cmd " params: " params) + (debug:print 0 *default-log-port* " call-chain: " call-chain))) + (if areadat + (areadat-conndat-set! areadat #f)) + ;; Killing associated server to allow clean retry.") + ;; (tasks:kill-server-run-id run-id) ;; better to kill the server in the logic that called this routine? + (mutex-unlock! *http-mutex*) + ;;; (signal (make-composite-condition + ;;; (make-property-condition 'commfail 'message "failed to connect to server"))) + ;;; "communications failed" + (db:obj->string #f)) + (with-input-from-request ;; was dat + fullurl + (list (cons 'key (or *server-id* "thekey")) + (cons 'cmd cmd) + (cons 'params sparams)) + read-string)) + transport: 'http) + 0)) ;; added this speculatively + ;; Shouldn't this be a call to the managed call-all-connections stuff above? + (close-all-connections!) + (mutex-unlock! *http-mutex*) + )) + (time-out (lambda () + (thread-sleep! 45) + #f)) + (th1 (make-thread send-recieve "with-input-from-request")) + (th2 (make-thread time-out "time out"))) + (thread-start! th1) + (thread-start! th2) + (thread-join! th1) + (thread-terminate! th2) + (debug:print-info 11 *default-log-port* "got res=" res) + (if (vector? res) + (if (vector-ref res 0) ;; this is the first flag or the second flag? + res ;; this is the *inner* vector? seriously? why? + (if (debug:debug-mode 11) + (let ((call-chain (get-call-chain))) ;; note: this code also called in nmsg-transport - consider consolidating it + (print-call-chain (current-error-port)) + (debug:print-error 11 *default-log-port* "error above occured at server, res=" res " message: " ((condition-property-accessor 'exn 'message) exn)) + (debug:print 11 *default-log-port* " server call chain:") + (pp (vector-ref res 1) (current-error-port)) + (signal (vector-ref res 0))) + res)) + (signal (make-composite-condition + (make-property-condition + 'timeout + 'message "nmsg-transport:client-api-send-receive-raw timed out talking to server"))))))) + +;; careful closing of connections stored in *alldat* +;; +(define (http-transport:close-connections #!key (all-dat #f)) + (let* ((alldat (or all-dat *alldat*)) + (server-dat (if alldat + (alldat-conndat alldat) + #f))) ;; (hash-table-ref/default *areadat* run-id #f))) + (if (vector? server-dat) + (let ((api-dat (http-transport:server-dat-get-api-uri server-dat))) + (handle-exceptions + exn + (begin + (print-call-chain *default-log-port*) + (debug:print-error 0 *default-log-port* " closing connection failed with error: " ((condition-property-accessor 'exn 'message) exn))) + (close-connection! api-dat) + ;;(close-idle-connections!) + #t)) + #f))) + +;; http-transport:server-dat definition moved to common_records.scm + +;; +;; connect +;; +(define (http-transport:client-connect iface port) + (let* ((api-url (conc "http://" iface ":" port "/api")) + (api-uri (uri-reference (conc "http://" iface ":" port "/api"))) + (api-req (make-request method: 'POST uri: api-uri)) + (server-dat (vector iface port api-uri api-url api-req (current-seconds)))) + server-dat)) + +;; run http-transport:keep-running in a parallel thread to monitor that the db is being +;; used and to shutdown after sometime if it is not. +;; +(define (http-transport:keep-running) + ;; if none running or if > 20 seconds since + ;; server last used then start shutdown + ;; This thread waits for the server to come alive + (debug:print-info 0 *default-log-port* "Starting the sync-back, keep alive thread in server") + (let* ((tmp-area (common:get-db-tmp-area *alldat*)) + (started-file (conc tmp-area "/.server-started")) + (server-start-time (current-seconds)) + (server-info (let loop ((start-time (current-seconds)) + (changed #t) + (last-sdat "not this")) + (let ((sdat #f)) + (thread-sleep! 0.01) + (debug:print-info 0 *default-log-port* "Waiting for server alive signature") + (mutex-lock! *heartbeat-mutex*) + (set! sdat *server-info*) + (mutex-unlock! *heartbeat-mutex*) + (if (and sdat + (not changed) + (> (- (current-seconds) start-time) 2)) + (begin + (debug:print-info 0 *default-log-port* "Received server alive signature") + (common:save-pkt `((action . alive) + (T . server) + (pid . ,(current-process-id)) + (ipaddr . ,(car sdat)) + (port . ,(cadr sdat))) + *configdat* #t) + sdat) + (begin + (debug:print-info 0 *default-log-port* "Still waiting, last-sdat=" last-sdat) + (sleep 4) + (if (> (- (current-seconds) start-time) 120) ;; been waiting for two minutes + (begin + (debug:print-error 0 *default-log-port* "transport appears to have died, exiting server") + (common:save-pkt `((action . died) + (T . server) + (pid . ,(current-process-id)) + (ipaddr . ,(car sdat)) + (port . ,(cadr sdat)) + (msg . "Transport died?")) + *configdat* #t) + (exit)) + (loop start-time + (equal? sdat last-sdat) + sdat))))))) + (iface (car server-info)) + (port (cadr server-info)) + (last-access 0) + (server-timeout (server:expiration-timeout)) + (server-going #f) + (server-log-file (args:get-arg "-log"))) ;; always set when we are a server + + (with-output-to-file started-file (lambda ()(print (current-process-id)))) + + (let loop ((count 0) + (server-state 'available) + (bad-sync-count 0) + (start-time (current-milliseconds))) + ;; Use this opportunity to sync the tmp db to megatest.db + (if (not server-going) ;; *dbstruct-db* + (begin + (debug:print 0 *default-log-port* "SERVER: dbprep") + (set! *dbstruct-db* (db:setup #t)) ;; run-id)) + (set! server-going #t) + (debug:print 0 *default-log-port* "SERVER: running, megatest version: " (common:get-full-version)) ;; NOTE: the server is NOT yet marked as running in the log. We do that in the keep-running routine. + (thread-start! *watchdog*))) + + ;; when things go wrong we don't want to be doing the various queries too often + ;; so we strive to run this stuff only every four seconds or so. + (let* ((sync-time (- (current-milliseconds) start-time)) + (rem-time (quotient (- 4000 sync-time) 1000))) + (if (and (<= rem-time 4) + (> rem-time 0)) + (thread-sleep! rem-time))) + + (if (< count 1) ;; 3x3 = 9 secs aprox + (loop (+ count 1) 'running bad-sync-count (current-milliseconds))) + + ;; Check that iface and port have not changed (can happen if server port collides) + (mutex-lock! *heartbeat-mutex*) + (set! sdat *server-info*) + (mutex-unlock! *heartbeat-mutex*) + + (if (not (equal? sdat (list iface port))) + (let ((new-iface (car sdat)) + (new-port (cadr sdat))) + (debug:print-info 0 *default-log-port* "WARNING: interface changed, refreshing iface and port info") + (set! iface new-iface) + (set! port new-port) + (debug:print 0 *default-log-port* "SERVER STARTED: " iface ":" port " AT " (current-seconds)) + (flush-output *default-log-port*))) + + ;; Transfer *db-last-access* to last-access to use in checking that we are still alive + (mutex-lock! *heartbeat-mutex*) + (set! last-access *db-last-access*) + (mutex-unlock! *heartbeat-mutex*) + + (if (common:low-noise-print 120 (conc "server running on " iface ":" port)) + (begin + (debug:print 0 *default-log-port* "SERVER STARTED: " iface ":" port " AT " (current-seconds)) + (flush-output *default-log-port*))) + (if (common:low-noise-print 60 "dbstats") + (begin + (debug:print 0 *default-log-port* "Server stats:") + (db:print-current-query-stats))) + (let* ((hrs-since-start (/ (- (current-seconds) server-start-time) 3600))) + (cond + ((and *server-run* + (> (+ last-access server-timeout) + (current-seconds))) + (if (common:low-noise-print 120 "server continuing") + (debug:print-info 0 *default-log-port* "Server continuing, seconds since last db access: " (- (current-seconds) last-access)) + (let ((curr-time (current-seconds))) + (handle-exceptions + exn + (debug:print 0 *default-log-port* "ERROR: Failed to change timestamp on log file " server-log-file ". Are you out of space on that disk?") + (if (not *server-overloaded*) + (change-file-times server-log-file curr-time curr-time))))) + (loop 0 server-state bad-sync-count (current-milliseconds))) + (else + (debug:print-info 0 *default-log-port* "Server timed out. seconds since last db access: " (- (current-seconds) last-access)) + (http-transport:server-shutdown port))))))) + +(define (http-transport:server-shutdown port) + (begin + ;;(BB> "http-transport:server-shutdown called") + (debug:print-info 0 *default-log-port* "Starting to shutdown the server. pid="(current-process-id)) + ;; + ;; start_shutdown + ;; + (set! *time-to-exit* #t) ;; tell on-exit to be fast as we've already cleaned up + (portlogger:open-run-close portlogger:set-port port "released") + (thread-sleep! 1) + + ;; (debug:print-info 0 *default-log-port* "Max cached queries was " *max-cache-size*) + ;; (debug:print-info 0 *default-log-port* "Number of cached writes " *number-of-writes*) + ;; (debug:print-info 0 *default-log-port* "Average cached write time " + ;; (if (eq? *number-of-writes* 0) + ;; "n/a (no writes)" + ;; (/ *writes-total-delay* + ;; *number-of-writes*)) + ;; " ms") + ;; (debug:print-info 0 *default-log-port* "Number non-cached queries " *number-non-write-queries*) + ;; (debug:print-info 0 *default-log-port* "Average non-cached time " + ;; (if (eq? *number-non-write-queries* 0) + ;; "n/a (no queries)" + ;; (/ *total-non-write-delay* + ;; *number-non-write-queries*)) + ;; " ms") + + (db:print-current-query-stats) + (common:save-pkt `((action . exit) + (T . server) + (pid . ,(current-process-id))) + *configdat* #t) + (debug:print-info 0 *default-log-port* "Server shutdown complete. Exiting") + (exit))) + +;; all routes though here end in exit ... +;; +;; start_server? +;; +(define (http-transport:launch) + ;; check that a server start is in progress, pause or exit if so + (let* ((tmp-area (common:get-db-tmp-area *alldat*)) + (server-start (conc tmp-area "/.server-start")) + (server-started (conc tmp-area "/.server-started")) + (start-time (common:lazy-modification-time server-start)) + (started-time (common:lazy-modification-time server-started)) + (server-starting (< start-time started-time)) ;; if start-time is less than started-time then a server is still starting + (start-time-old (> (- (current-seconds) start-time) 5)) + (cleanup-proc (lambda (msg) + (let* ((serv-fname (conc "server-" (current-process-id) "-" (get-host-name) ".log")) + (full-serv-fname (conc *toppath* "/logs/" serv-fname)) + (new-serv-fname (conc *toppath* "/logs/" "defunct-" serv-fname))) + (debug:print 0 *default-log-port* msg) + (if (common:file-exists? full-serv-fname) + (system (conc "sleep 1;mv -f " full-serv-fname " " new-serv-fname)) + (debug:print 0 *default-log-port* "INFO: cannot move " full-serv-fname " to " new-serv-fname)) + (exit))))) + (if (and (not start-time-old) ;; last server start try was less than five seconds ago + (not server-starting)) + (begin + (cleanup-proc "NOT starting server, there is either a recently started server or a server in process of starting") + (exit))) + ;; lets not even bother to start if there are already three or more server files ready to go + (let* ((num-alive (server:get-num-alive (server:get-list *toppath*)))) + (if (> num-alive 3) + (begin + (cleanup-proc (conc "ERROR: Aborting server start because there are already " num-alive " possible servers either running or starting up")) + (exit)))) + (common:save-pkt `((action . start) + (T . server) + (pid . ,(current-process-id))) + *configdat* #t) + (let* ((th2 (make-thread (lambda () + (debug:print-info 0 *default-log-port* "Server run thread started") + (http-transport:run + (if (args:get-arg "-server") + (args:get-arg "-server") + "-") + )) "Server run")) + (th3 (make-thread (lambda () + (debug:print-info 0 *default-log-port* "Server monitor thread started") + (http-transport:keep-running) + "Keep running")))) + (thread-start! th2) + (thread-sleep! 0.25) ;; give the server time to settle before starting the keep-running monitor. + (thread-start! th3) + (set! *didsomething* #t) + (thread-join! th2) + (exit)))) + +;; (define (http-transport:server-signal-handler signum) +;; (signal-mask! signum) +;; (handle-exceptions +;; exn +;; (debug:print 0 *default-log-port* " ... exiting ...") +;; (let ((th1 (make-thread (lambda () +;; (thread-sleep! 1)) +;; "eat response")) +;; (th2 (make-thread (lambda () +;; (debug:print-error 0 *default-log-port* "Received ^C, attempting clean exit. Please be patient and wait a few seconds before hitting ^C again.") +;; (thread-sleep! 3) ;; give the flush three seconds to do it's stuff +;; (debug:print 0 *default-log-port* " Done.") +;; (exit 4)) +;; "exit on ^C timer"))) +;; (thread-start! th2) +;; (thread-start! th1) +;; (thread-join! th2)))) + +;;=============================================== +;; Java script +;;=============================================== +(define (http-transport:show-jquery) + (let* ((data (tests:readlines *java-script-lib*))) +(string-join data "\n"))) + + + +;;====================================================================== +;; web pages +;;====================================================================== + +(define (http-transport:html-test-log $) + (let* ((run-id ($ 'runid)) + (test-item ($ 'testname)) + (parts (string-split test-item ":")) + (test-name (car parts)) + + (item-name (if (equal? (length parts) 1) + "" + (cadr parts)))) + ;(print $) +(tests:get-test-log run-id test-name item-name))) + + +(define (http-transport:html-dboard $) + (let* ((page ($ 'page)) + (oup (open-output-string)) + (bdy "--------------------------") + + (ret (tests:dynamic-dboard page))) + (s:output-new oup ret) + (close-output-port oup) + + (set! bdy (get-output-string oup)) + (conc "

Dashboard

" bdy "

" ))) + +(define (http-transport:main-page) + (let ((linkpath (root-path))) + (conc "

" (pathname-strip-directory *toppath*) "

" + "" + "Run area: " *toppath* + "

Server Stats

" + (http-transport:stats-table) + "
" + (http-transport:runs linkpath) + "
" + (http-transport:run-stats) + "" + ))) + +(define (http-transport:stats-table) + (mutex-lock! *heartbeat-mutex*) + (let ((res + (conc "" + ;; "" + "" + "" + "" + ;; "" + "" + "
Max cached queries " *max-cache-size* "
Number of cached writes " *number-of-writes* "
Average cached write time " (if (eq? *number-of-writes* 0) + "n/a (no writes)" + (/ *writes-total-delay* + *number-of-writes*)) + " ms
Number non-cached queries " *number-non-write-queries* "
Average non-cached time " (if (eq? *number-non-write-queries* 0) + ;; "n/a (no queries)" + ;; (/ *total-non-write-delay* + ;; *number-non-write-queries*)) + " ms
Last access" (seconds->time-string *db-last-access*) "
"))) + (mutex-unlock! *heartbeat-mutex*) + res)) + +(define (http-transport:runs linkpath) + (conc "

Runs

" + (string-intersperse + (let ((files (map pathname-strip-directory (glob (conc linkpath "/*"))))) + (map (lambda (p) + (conc "" p "
")) + files)) + " "))) + +(define (http-transport:run-stats) + (let ((stats (open-run-close db:get-running-stats #f))) + (conc "" + (string-intersperse + (map (lambda (stat) + (conc "")) + stats) + " ") + "
" (car stat) "" (cadr stat) "
"))) + +;; Configurations for server +(tcp-buffer-size 2048) +(max-connections 2048) + +;;====================================================================== +;; +;;====================================================================== + + +;; Puts out all combinations +(define (process-itemlist hierdepth curritemkey itemlist) + (let ((res '())) + (if (not hierdepth) + (set! hierdepth (length itemlist))) + (let loop ((hed (car itemlist)) + (tal (cdr itemlist))) + (if (null? tal) + (for-each (lambda (item) + (if (> (length curritemkey) (- hierdepth 2)) + (set! res (append res (list (append curritemkey (list (list (car hed) item)))))))) + (cadr hed)) + (begin + (for-each (lambda (item) + (set! res (append res (process-itemlist hierdepth (append curritemkey (list (list (car hed) item))) tal)))) + (cadr hed)) + (loop (car tal)(cdr tal))))) + res)) + +;; (item-assoc->item-list '(("ANIMAL" "Elephant Lion")("SEASON" "Spring Fall"))) +;; => ((("ANIMAL" "Elephant") ("SEASON" "Spring")) +;; (("ANIMAL" "Elephant") ("SEASON" "Fall")) +;; (("ANIMAL" "Lion") ("SEASON" "Spring")) +;; (("ANIMAL" "Lion") ("SEASON" "Fall"))) +(define (item-assoc->item-list itemsdat) + (if (and itemsdat (not (null? itemsdat))) + (let ((itemlst (filter (lambda (x) + (list? x)) + (map (lambda (x) + (debug:print 6 *default-log-port* "item-assoc->item-list x: " x) + (if (< (length x) 2) + (begin + (debug:print-error 0 *default-log-port* "malformed items spec " (string-intersperse x " ")) + (list (car x)'())) + (let* ((name (car x)) + (items (cadr x)) + (ilist (list name (if (string? items) + (string-split items) + '())))) + (if (null? ilist) + (debug:print-error 0 *default-log-port* "No items specified for " name)) + ilist))) + itemsdat)))) + (let ((debuglevel 5)) + (debug:print 5 *default-log-port* "item-assoc->item-list: itemsdat => itemlst ") + (if (debug:debug-mode 5) + (begin + (pp itemsdat) + (print " => ") + (pp itemlst)))) + (if (> (length itemlst) 0) + (process-itemlist #f '() itemlst) + '())) + '())) ;; return a list consisting on a single null list for non-item runs + ;; Nope, not now, return null as of 6/6/2011 + +;; (item-table->item-list '(("ANIMAL" "Elephant Lion")("SEASON" "Spring Winter"))) +;; => ((("ANIMAL" "Elephant")("SEASON" "Spring")) +;; (("ANIMAL" "Lion") ("SEASON" "Winter"))) +(define (item-table->item-list itemtable) + (let ((newlst (map (lambda (x) + (if (> (length x) 1) + (list (car x) + (string-split (cadr x))) + (list x '()))) + itemtable)) + (res '())) ;; a list of items + (let loop ((indx 0) + (item '()) ;; an item will be ((KEYNAME1 VAL1)(KEYNAME2 VAL2) ...) + (elflag #f)) + (for-each (lambda (row) + (let ((rowname (car row)) + (rowdat (cadr row))) + (set! item (append item + (list + (if (< indx (length rowdat)) + (let ((new (list rowname (list-ref rowdat indx)))) + ;; (debug:print 0 *default-log-port* "New: " new) + (set! elflag #t) + new + ) ;; i.e. had at least on legit value to use + (list rowname "-"))))))) + newlst) + (if elflag + (begin + (set! res (append res (list item))) + (loop (+ indx 1) + '() + #f))) + res))) + ;; Nope, not now, return null as of 6/6/2011 + +(define (items:check-valid-items class item) + (let ((valid-values (let ((s (config-lookup *configdat* "validvalues" class))) + (if s (string-split s) #f)))) + (if valid-values + (if (member item valid-values) + item #f) + item))) + +(define (items:get-items-from-config tconfig) + (let* ((have-items (hash-table-ref/default tconfig "items" #f)) + (have-itable (hash-table-ref/default tconfig "itemstable" #f)) + (items (hash-table-ref/default tconfig "items" '())) + (itemstable (hash-table-ref/default tconfig "itemstable" '()))) + (debug:print 5 *default-log-port* "items: " items " itemstable: " itemstable) + (set! items (map (lambda (item) + (if (procedure? (cadr item)) + (list (car item)((cadr item))) ;; evaluate the proc + item)) + items)) + (set! itemstable (map (lambda (item) + (if (procedure? (cadr item)) + (list (car item)((cadr item))) ;; evaluate the proc + item)) + itemstable)) + (if (and have-items (null? items)) (debug:print 0 *default-log-port* "WARNING:[items] section in testconfig but no entries defined")) + (if (and have-itable (null? itemstable))(debug:print 0 *default-log-port* "WARNNG:[itemstable] section in testconfig but no entries defined")) + (if (or (not (null? items))(not (null? itemstable))) + (append (item-assoc->item-list items) + (item-table->item-list itemstable)) + '(())))) + +;; (pp (item-assoc->item-list itemdat)) + +;;====================================================================== +;; keys +;;====================================================================== + + +(define (args:usage . a) #f) + +;;====================================================================== +;; key <=> target routines +;;====================================================================== + +;; This invalidates using "/" in item names. Every key will be +;; available via args:get-arg as :keyfield. Since this only needs to +;; be called once let's use it to set the environment vars +;; +;; The setting of :keyfield in args should be turned off ASAP +;; +(define (keys:target-set-args keys target ht) + (if target + (let ((vals (string-split target "/"))) + (if (eq? (length vals)(length keys)) + (for-each (lambda (key val) + (setenv key val) + (if ht (hash-table-set! ht (conc ":" key) val))) + keys + vals) + (debug:print-error 0 *default-log-port* "wrong number of values in " target ", should match " keys)) + vals) + (debug:print 4 *default-log-port* "ERROR: keys:target-set-args called with no target."))) + +;; given the keys (a list of vectors or a list of keys) and a target return a keyval list +;; keyval list ( (key1 val1) (key2 val2) ...) +(define (keys:target->keyval keys target) + (let* ((targlist (string-split target "/")) + (numkeys (length keys)) + (numtarg (length targlist)) + (targtweaked (if (> numkeys numtarg) + (append targlist (make-list (- numkeys numtarg) "")) + targlist))) + (map (lambda (key targ) + (list key targ)) + keys targtweaked))) + +;;====================================================================== +;; launch +;;====================================================================== + +;;====================================================================== +;; ezsteps +;;====================================================================== + +;; ezsteps were going to be coded as +;; stepname[,predstep1,predstep2 ...] [{VAR1=first,second,third}] command to execute +;; BUT +;; now are +;; stepname {VAR=first,second,third ...} command ... +;; where the {VAR=first,second,third ...} is optional. + +;; given an exit code and whether or not logpro was used calculate OK/BAD +;; return #t if we are ok, #f otherwise +(define (steprun-good? logpro exitcode) + (or (eq? exitcode 0) + (and logpro (eq? exitcode 2)))) + +;; if handed a string, process it, else look for MT_CMDINFO +(define (launch:get-cmdinfo-assoc-list #!key (encoded-cmd #f)) + (let ((enccmd (if encoded-cmd encoded-cmd (getenv "MT_CMDINFO")))) + (if enccmd + (common:read-encoded-string enccmd) + '()))) + +;; 0 1 2 3 +(defstruct launch:einf (pid #t)(exit-status #t)(exit-code #t)(rollup-status 0)) + +;; return (conc status ": " comment) from the final section so that +;; the comment can be set in the step record in launch.scm +;; +(define (launch:load-logpro-dat run-id test-id stepname) + (let ((cname (conc stepname ".dat"))) + (if (common:file-exists? cname) + (let* ((dat (read-config cname #f #f)) + (csvr (db:logpro-dat->csv dat stepname)) + (csvt (let-values (((fmt-cell fmt-record fmt-csv) (make-format ","))) + (fmt-csv (map list->csv-record csvr)))) + (status (configf:lookup dat "final" "exit-status")) + (msg (configf:lookup dat "final" "message"))) + (if csvt ;; this if blocked stack dump caused by .dat file from logpro being 0-byte. fixed by upgrading logpro + (rmt:csv->test-data run-id test-id csvt) + (debug:print 0 *default-log-port* "ERROR: no csvdat exists for run-id: " run-id " test-id: " test-id " stepname: " stepname ", check that logpro version is 1.15 or newer")) + ;; (debug:print-info 13 *default-log-port* "Error: run-id/test-id/stepname="run-id"/"test-id"/"stepname" => bad csvr="csvr) + ;; ) + (cond + ((equal? status "PASS") "PASS") ;; skip the message part if status is pass + (status (conc (configf:lookup dat "final" "exit-status") ": " (if msg msg "no message"))) + (else #f))) + #f))) + +(define (launch:runstep ezstep run-id test-id exit-info m tal testconfig) ;;; TODO: deprecate me in favor of ezsteps.scm + (let* ((stepname (car ezstep)) ;; do stuff to run the step + (stepinfo (cadr ezstep)) + ;; (let ((info (cadr ezstep))) + ;; (if (proc? info) "" info))) + ;; (stepproc (let ((info (cadr ezstep))) + ;; (if (proc? info) info #f))) + (stepparts (string-match (regexp "^(\\{([^\\}\\{]*)\\}\\s*|)(.*)$") stepinfo)) + (stepparams (list-ref stepparts 2)) ;; for future use, {VAR=1,2,3}, run step for each + (paramparts (if (string? stepparams) + (map (lambda (x)(string-split x "=")) (string-split-fields "[^;]*=[^;]*" stepparams)) + '())) + (subrun (alist-ref "subrun" paramparts equal?)) + (stepcmd (list-ref stepparts 3)) + (script "") ; "#!/bin/bash\n") ;; yep, we depend on bin/bash FIXME!!!\ + (logpro-file (conc stepname ".logpro")) + (html-file (conc stepname ".html")) + (dat-file (conc stepname ".dat")) + (tconfig-logpro (configf:lookup testconfig "logpro" stepname)) + (logpro-used (common:file-exists? logpro-file))) + + (debug:print 0 *default-log-port* "stepparts: " stepparts ", stepparams: " stepparams + ", paramparts: " paramparts ", subrun: " subrun ", stepcmd: " stepcmd) + + (if (and tconfig-logpro + (not logpro-used)) ;; no logpro file found but have a defn in the testconfig + (begin + (with-output-to-file logpro-file + (lambda () + (print ";; logpro file extracted from testconfig\n" + ";;") + (print tconfig-logpro))) + (set! logpro-used #t))) + + ;; NB// can safely assume we are in test-area directory + (debug:print 4 *default-log-port* "ezsteps:\n stepname: " stepname " stepinfo: " stepinfo " stepparts: " stepparts + " stepparams: " stepparams " stepcmd: " stepcmd) + + ;; ;; first source the previous environment + ;; (let ((prev-env (conc ".ezsteps/" prevstep (if (string-search (regexp "csh") + ;; (get-environment-variable "SHELL")) ".csh" ".sh")))) + ;; (if (and prevstep (common:file-exists? prev-env)) + ;; (set! script (conc script "source " prev-env)))) + + ;; call the command using mt_ezstep + ;; (set! script (conc "mt_ezstep " stepname " " (if prevstep prevstep "x") " " stepcmd)) + + (debug:print 4 *default-log-port* "script: " script) + (rmt:teststep-set-status! run-id test-id stepname "start" "-" #f #f) + ;; now launch the actual process + (call-with-environment-variables + (list (cons "PATH" (conc (get-environment-variable "PATH") ":."))) + (lambda () ;; (process-run "/bin/bash" "-c" "exec ls -l /tmp/foobar > /tmp/delme-more.log 2>&1") + (let* ((cmd (conc stepcmd " > " stepname ".log 2>&1")) ;; >outfile 2>&1 + (pid #f)) + (let ((proc (lambda () + (set! pid (process-run "/bin/bash" (list "-c" cmd)))))) + (if subrun + (begin + (debug:print-info 0 *default-log-port* "Running without MT_.* environment variables.") + (common:without-vars proc "^MT_.*")) + (proc))) + + (with-output-to-file "Makefile.ezsteps" + (lambda () + (print stepname ".log :") + (print "\t" cmd) + (if (common:file-exists? (conc stepname ".logpro")) + (print "\tlogpro " stepname ".logpro " stepname ".html < " stepname ".log")) + (print) + (print stepname " : " stepname ".log") + (print)) + #:append) + + (rmt:test-set-top-process-pid run-id test-id pid) + (let processloop ((i 0)) + (let-values (((pid-val exit-status exit-code)(process-wait pid #t))) + (mutex-lock! m) + (launch:einf-pid-set! exit-info pid) ;; (vector-set! exit-info 0 pid) + (launch:einf-exit-status-set! exit-info exit-status) ;; (vector-set! exit-info 1 exit-status) + (launch:einf-exit-code-set! exit-info exit-code) ;; (vector-set! exit-info 2 exit-code) + (mutex-unlock! m) + (if (eq? pid-val 0) + (begin + (thread-sleep! 2) + (processloop (+ i 1)))) + ))))) + (debug:print-info 0 *default-log-port* "step " stepname " completed with exit code " (launch:einf-exit-code exit-info)) ;; (vector-ref exit-info 2)) + ;; now run logpro if needed + (if logpro-used + (let* ((logpro-exe (or (getenv "LOGPRO_EXE") "logpro")) + (pid (process-run (conc "/bin/sh -c '"logpro-exe" "logpro-file " " (conc stepname ".html") " < " stepname ".log > /dev/null'")))) + (let processloop ((i 0)) + (let-values (((pid-val exit-status exit-code)(process-wait pid #t))) + (mutex-lock! m) + ;; (make-launch:einf pid: pid exit-status: exit-status exit-code: exit-code) + (launch:einf-pid-set! exit-info pid) ;; (vector-set! exit-info 0 pid) + (launch:einf-exit-status-set! exit-info exit-status) ;; (vector-set! exit-info 1 exit-status) + (launch:einf-exit-code-set! exit-info exit-code) ;; (vector-set! exit-info 2 exit-code) + (mutex-unlock! m) + (if (eq? pid-val 0) + (begin + (thread-sleep! 2) + (processloop (+ i 1))))) + (debug:print-info 0 *default-log-port* "logpro for step " stepname " exited with code " (launch:einf-exit-code exit-info))))) ;; (vector-ref exit-info 2))))) + + (let ((exinfo (launch:einf-exit-code exit-info)) ;; (vector-ref exit-info 2)) + (logfna (if logpro-used (conc stepname ".html") "")) + (comment #f)) + (if logpro-used + (let ((datfile (conc stepname ".dat"))) + ;; load the .dat file into the test_data table if it exists + (if (common:file-exists? datfile) + (set! comment (launch:load-logpro-dat run-id test-id stepname))) + (rmt:test-set-log! run-id test-id (conc stepname ".html")))) + (rmt:teststep-set-status! run-id test-id stepname "end" exinfo comment logfna)) + ;; set the test final status + (let* ((process-exit-status (launch:einf-exit-code exit-info)) ;; (vector-ref exit-info 2)) + (this-step-status (cond + ((and (eq? process-exit-status 2) logpro-used) 'warn) ;; logpro 2 = warnings + ((and (eq? process-exit-status 3) logpro-used) 'check) ;; logpro 3 = check + ((and (eq? process-exit-status 4) logpro-used) 'waived) ;; logpro 4 = waived + ((and (eq? process-exit-status 5) logpro-used) 'abort) ;; logpro 5 = abort + ((and (eq? process-exit-status 6) logpro-used) 'skip) ;; logpro 6 = skip + ((eq? process-exit-status 0) 'pass) ;; logpro 0 = pass + (else 'fail))) + (overall-status (cond + ((eq? (launch:einf-rollup-status exit-info) 2) 'warn) ;; rollup-status (vector-ref exit-info 3) + ((eq? (launch:einf-rollup-status exit-info) 0) 'pass) ;; (vector-ref exit-info 3) + (else 'fail))) + (next-status (cond + ((eq? overall-status 'pass) this-step-status) + ((eq? overall-status 'warn) + (if (eq? this-step-status 'fail) 'fail 'warn)) + ((eq? overall-status 'abort) 'abort) + (else 'fail))) + (next-state ;; "RUNNING") ;; WHY WAS THIS CHANGED TO NOT USE (null? tal) ?? + (cond + ((null? tal) ;; more to run? + "COMPLETED") + (else "RUNNING")))) + (debug:print 4 *default-log-port* "Exit value received: " (launch:einf-exit-code exit-info) " logpro-used: " logpro-used + " this-step-status: " this-step-status " overall-status: " overall-status + " next-status: " next-status " rollup-status: " (launch:einf-rollup-status exit-info)) ;; (vector-ref exit-info 3)) + (case next-status + ((warn) + (launch:einf-rollup-status-set! exit-info 2) ;; (vector-set! exit-info 3 2) ;; rollup-status + ;; NB// test-set-status! does rdb calls under the hood + (tests:test-set-status! run-id test-id next-state "WARN" + (if (eq? this-step-status 'warn) "Logpro warning found" #f) + #f)) + ((check) + (launch:einf-rollup-status-set! exit-info 3) ;; (vector-set! exit-info 3 3) ;; rollup-status + ;; NB// test-set-status! does rdb calls under the hood + (tests:test-set-status! run-id test-id next-state "CHECK" + (if (eq? this-step-status 'check) "Logpro check found" #f) + #f)) + ((waived) + (launch:einf-rollup-status-set! exit-info 4) ;; (vector-set! exit-info 3 3) ;; rollup-status + ;; NB// test-set-status! does rdb calls under the hood + (tests:test-set-status! run-id test-id next-state "WAIVED" + (if (eq? this-step-status 'check) "Logpro waived found" #f) + #f)) + ((abort) + (launch:einf-rollup-status-set! exit-info 5) ;; (vector-set! exit-info 3 4) ;; rollup-status + ;; NB// test-set-status! does rdb calls under the hood + (tests:test-set-status! run-id test-id next-state "ABORT" + (if (eq? this-step-status 'abort) "Logpro abort found" #f) + #f)) + ((skip) + (launch:einf-rollup-status-set! exit-info 6) ;; (vector-set! exit-info 3 4) ;; rollup-status + ;; NB// test-set-status! does rdb calls under the hood + (tests:test-set-status! run-id test-id next-state "SKIP" + (if (eq? this-step-status 'skip) "Logpro skip found" #f) + #f)) + ((pass) + (tests:test-set-status! run-id test-id next-state "PASS" #f #f)) + (else ;; 'fail + (launch:einf-rollup-status-set! exit-info 1) ;; (vector-set! exit-info 3 1) ;; force fail, this used to be next-state but that doesn't make sense. should always be "COMPLETED" + (tests:test-set-status! run-id test-id "COMPLETED" "FAIL" (conc "Failed at step " stepname) #f) + ))) + logpro-used)) + +(define (launch:manage-steps run-id test-id item-path fullrunscript ezsteps subrun test-name tconfigreg exit-info m) + ;; (let-values + ;; (((pid exit-status exit-code) + ;; (run-n-wait fullrunscript))) + ;; (tests:test-set-status! test-id "RUNNING" "n/a" #f #f) + ;; Since we should have a clean slate at this time there is no need to do + ;; any of the other stuff that tests:test-set-status! does. Let's just + ;; force RUNNING/n/a + + ;; (thread-sleep! 0.3) + ;; (tests:test-force-state-status! run-id test-id "RUNNING" "n/a") + (rmt:set-state-status-and-roll-up-items run-id test-name item-path "RUNNING" #f #f) + ;; (thread-sleep! 0.3) ;; NFS slowness has caused grief here + + ;; if there is a runscript do it first + (if fullrunscript + (let ((pid (process-run fullrunscript))) + (rmt:test-set-top-process-pid run-id test-id pid) + (let loop ((i 0)) + (let-values + (((pid-val exit-status exit-code) (process-wait pid #t))) + (mutex-lock! m) + (launch:einf-pid-set! exit-info pid) ;; (vector-set! exit-info 0 pid) + (launch:einf-exit-status-set! exit-info exit-status) ;; (vector-set! exit-info 1 exit-status) + (launch:einf-exit-code-set! exit-info exit-code) ;; (vector-set! exit-info 2 exit-code) + (launch:einf-rollup-status-set! exit-info exit-code) ;; (vector-set! exit-info 3 exit-code) ;; rollup status + (mutex-unlock! m) + (if (eq? pid-val 0) + (begin + (thread-sleep! 2) + (loop (+ i 1))) + ))))) + ;; then, if runscript ran ok (or did not get called) + ;; do all the ezsteps (if any) + (if (or ezsteps subrun) + (let* ((test-run-dir (tests:get-test-path-from-environment)) + (testconfig ;; (read-config (conc work-area "/testconfig") #f #t environ-patt: "pre-launch-env-vars")) ;; FIXME??? is allow-system ok here? + ;; NOTE: it is tempting to turn off force-create of testconfig but dynamic + ;; ezstep names need a full re-eval here. + (tests:get-testconfig test-name item-path tconfigreg #t force-create: #t)) ;; 'return-procs))) + (ezstepslst (if (hash-table? testconfig) + (hash-table-ref/default testconfig "ezsteps" '()) + #f))) + (if testconfig + (hash-table-set! *testconfigs* test-name testconfig) ;; cached for lazy reads later ... + (begin + (launch:setup) + (debug:print 0 *default-log-port* "WARNING: no testconfig found for " test-name " in search path:\n " + (string-intersperse (tests:get-tests-search-path *configdat*) "\n ")))) + ;; after all that, still no testconfig? Time to abort + (if (not testconfig) + (begin + (debug:print-error 0 *default-log-port* "Failed to resolve megatest.config, runconfigs.config and testconfig issues. Giving up now") + (exit 1))) + + ;; create a proc for the subrun if requested, save that proc in the ezsteps table as the last entry + ;; 1. get section [runarun] + ;; 2. unset MT_* vars + ;; 3. fix target + ;; 4. fix runname + ;; 5. fix testpatt or calculate it from contour + ;; 6. launch the run + ;; 7. roll up the run result and or roll up the logpro processed result + (when (configf:lookup testconfig "subrun" "runwait") ;; we use runwait as the flag that a subrun is requested + (subrun:initialize-toprun-test testconfig test-run-dir) + (let* ((mt-cmd (subrun:launch-cmd test-run-dir))) + (debug:print-info 0 *default-log-port* "Subrun command is \"" mt-cmd "\"") + (set! ezsteps #t) ;; set the needed flag + (set! ezstepslst + (append (or ezstepslst '()) + (list (list "subrun" (conc "{subrun=true} " mt-cmd))))))) + + ;; process the ezsteps + (if ezsteps + (begin + (if (not (common:file-exists? ".ezsteps"))(create-directory ".ezsteps")) + ;; if ezsteps was defined then we are sure to have at least one step but check anyway + (if (not (> (length ezstepslst) 0)) + (debug:print-error 0 *default-log-port* "ezsteps defined but ezstepslst is zero length") + (let loop ((ezstep (car ezstepslst)) + (tal (cdr ezstepslst)) + (prevstep #f)) + (debug:print-info 0 *default-log-port* "Processing ezstep \"" (string-intersperse ezstep " ") "\"") + ;; check exit-info (vector-ref exit-info 1) + (if (launch:einf-exit-status exit-info) ;; (vector-ref exit-info 1) + (let ((logpro-used (launch:runstep ezstep run-id test-id exit-info m tal testconfig)) + (stepname (car ezstep))) + ;; if logpro-used read in the stepname.dat file + (if (and logpro-used (common:file-exists? (conc stepname ".dat"))) + (launch:load-logpro-dat run-id test-id stepname)) + (if (steprun-good? logpro-used (launch:einf-exit-code exit-info)) + (if (not (null? tal)) + (loop (car tal) (cdr tal) stepname)) + (debug:print 0 *default-log-port* "WARNING: step " (car ezstep) " failed. Stopping"))) + (debug:print 0 *default-log-port* "WARNING: a prior step failed, stopping at " ezstep))))))))) + +(define (launch:monitor-job run-id test-id item-path fullrunscript ezsteps test-name tconfigreg exit-info m work-area runtlim misc-flags) + (let* ((update-period (string->number (or (configf:lookup *configdat* "setup" "test-stats-update-period") "30"))) + (start-seconds (current-seconds)) + (calc-minutes (lambda () + (inexact->exact + (round + (- + (current-seconds) + start-seconds))))) + (kill-tries 0)) + ;; (tests:set-full-meta-info #f test-id run-id (calc-minutes) work-area) + ;; (tests:set-full-meta-info test-id run-id (calc-minutes) work-area) + (tests:set-full-meta-info #f test-id run-id (calc-minutes) work-area 10) + + (let loop ((minutes (calc-minutes)) + (cpu-load (alist-ref 'adj-core-load (common:get-normalized-cpu-load #f))) + (disk-free (get-df (current-directory))) + (last-sync (current-seconds))) + (common:telemetry-log "zombie" (conc "launch:monitor-job - top of loop encountered at "(current-seconds)" with last-sync="last-sync)) + (let* ((over-time (> (current-seconds) (+ last-sync update-period))) + (new-cpu-load (let* ((load (alist-ref 'adj-core-load (common:get-normalized-cpu-load #f))) + (delta (abs (- load cpu-load)))) + (if (> delta 0.1) ;; don't bother updating with small changes + load + #f))) + (new-disk-free (let* ((df (if over-time ;; only get df every 30 seconds + (get-df (current-directory)) + disk-free)) + (delta (abs (- df disk-free)))) + (if (and (> df 0) + (> (/ delta df) 0.1)) ;; (> delta 200) ;; ignore changes under 200 Meg + df + #f))) + (do-sync (or new-cpu-load new-disk-free over-time)) + + (test-info (rmt:get-test-info-by-id run-id test-id)) + (state (db:test-get-state test-info)) + (status (db:test-get-status test-info)) + (kill-reason "no kill reason specified") + (kill-job? #f)) + (common:telemetry-log "zombie" (conc "launch:monitor-job - decision time encountered at "(current-seconds)" with last-sync="last-sync" do-sync="do-sync" over-time="over-time" update-period="update-period)) + (cond + ((test-get-kill-request run-id test-id) + (set! kill-reason "KILLING TEST since received kill request (KILLREQ)") + (set! kill-job? #t)) + ((and runtlim (> (- (current-seconds) start-seconds) runtlim)) + (set! kill-reason (conc "KILLING TEST DUE TO TIME LIMIT EXCEEDED! Runtime=" (- (current-seconds) start-seconds) " seconds, limit=" runtlim)) + (set! kill-job? #t)) + ((equal? status "DEAD") + (tests:update-central-meta-info run-id test-id new-cpu-load new-disk-free (calc-minutes) #f #f) + (rmt:set-state-status-and-roll-up-items run-id test-id 'foo "RUNNING" "n/a" "was marked dead; really still running.") + ;;(set! kill-reason "KILLING TEST because it was marked as DEAD by launch:handle-zombie-tests (might indicate really overloaded server or else overzealous setup.deadtime)") ;; MARK RUNNING + (set! kill-job? #f))) + + (debug:print 4 *default-log-port* "cpu: " new-cpu-load " disk: " new-disk-free " last-sync: " last-sync " do-sync: " do-sync) + (launch:handle-zombie-tests run-id) + (when do-sync + ;;(with-output-to-file (conc (getenv "MT_TEST_RUN_DIR") "/last-loadinfo.log" #:append) + ;; (lambda () (pp (list (current-seconds) new-cpu-load new-disk-free (calc-minutes))))) + (common:telemetry-log "zombie" (conc "launch:monitor-job - dosync started at "(current-seconds))) + (tests:update-central-meta-info run-id test-id new-cpu-load new-disk-free (calc-minutes) #f #f) + (common:telemetry-log "zombie" (conc "launch:monitor-job - dosync finished at "(current-seconds)))) + + (if kill-job? + (begin + (debug:print-info 0 *default-log-port* "proceeding to kill test: "kill-reason) + (mutex-lock! m) + ;; NOTE: The pid can change as different steps are run. Do we need handshaking between this + ;; section and the runit section? Or add a loop that tries three times with a 1/4 second + ;; between tries? + (let* ((pid1 (launch:einf-pid exit-info)) ;; (vector-ref exit-info 0)) + (pid2 (rmt:test-get-top-process-pid run-id test-id)) + (pids (delete-duplicates (filter number? (list pid1 pid2))))) + (if (not (null? pids)) + (begin + (for-each + (lambda (pid) + (handle-exceptions + exn + (begin + (debug:print-info 0 *default-log-port* "Unable to kill process with pid " pid ", possibly already killed.") + (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))) + (debug:print 0 *default-log-port* "WARNING: Request received to kill job " pid) ;; " (attempt # " kill-tries ")") + (debug:print-info 0 *default-log-port* "Signal mask=" (signal-mask)) + ;; (if (process:alive? pid) + ;; (begin + (map (lambda (pid-num) + (process-signal pid-num signal/term)) + (process:get-sub-pids pid)) + (thread-sleep! 5) + ;; (if (process:process-alive? pid) + (map (lambda (pid-num) + (handle-exceptions + exn + #f + (process-signal pid-num signal/kill))) + (process:get-sub-pids pid)))) + ;; (debug:print-info 0 *default-log-port* "not killing process " pid " as it is not alive")))) + pids) + ;; BB: question to Matt -- does the tests:test-state-status! encompass rollup to toplevel? If not, should it? + (tests:test-set-status! run-id test-id "KILLED" "KILLED" (conc (args:get-arg "-m")" "kill-reason) #f)) ;; BB ADDED kill-reason -- confirm OK with Matt + (begin + (debug:print-error 0 *default-log-port* "Nothing to kill, pid1=" pid1 ", pid2=" pid2) + (tests:test-set-status! run-id test-id "KILLED" "FAILED TO KILL" (conc (args:get-arg "-m")" "kill-reason) #f) ;; BB ADDED kill-reason -- confirm OK with Matt + ))) + (mutex-unlock! m) + ;; no point in sticking around. Exit now. But run end of run before exiting? + (launch:end-of-run-check run-id) + (exit))) + (if (hash-table-ref/default misc-flags 'keep-going #f) + (begin + (thread-sleep! 3) ;; (+ 3 (random 6))) ;; add some jitter to the call home time to spread out the db accesses + (if (hash-table-ref/default misc-flags 'keep-going #f) ;; keep originals for cpu-load and disk-free unless they change more than the allowed delta + (loop (calc-minutes) + (or new-cpu-load cpu-load) + (or new-disk-free disk-free) + (if do-sync (current-seconds) last-sync))))))) + (tests:update-central-meta-info run-id test-id (get-cpu-load) (get-df (current-directory))(calc-minutes) #f #f))) ;; NOTE: Checking twice for keep-going is intentional + + +(define (launch:execute encoded-cmd) + (let* ((cmdinfo (common:read-encoded-string encoded-cmd)) + (tconfigreg #f)) + (setenv "MT_CMDINFO" encoded-cmd) + ;;(bb-check-path msg: "launch:execute incoming") + (if (list? cmdinfo) ;; ((testpath /tmp/mrwellan/jazzmind/src/example_run/tests/sqlitespeed) + ;; (test-name sqlitespeed) (runscript runscript.rb) (db-host localhost) (run-id 1)) + (let* ((testpath (assoc/default 'testpath cmdinfo)) ;; testpath is the test spec area + (top-path (assoc/default 'toppath cmdinfo)) + (work-area (assoc/default 'work-area cmdinfo)) ;; work-area is the test run area + (test-name (assoc/default 'test-name cmdinfo)) + (runscript (assoc/default 'runscript cmdinfo)) + (ezsteps (assoc/default 'ezsteps cmdinfo)) + (subrun (assoc/default 'subrun cmdinfo)) + (serverurl (assoc/default 'serverurl cmdinfo)) + (homehost (assoc/default 'homehost cmdinfo)) + (run-id (assoc/default 'run-id cmdinfo)) + (test-id (assoc/default 'test-id cmdinfo)) + (target (assoc/default 'target cmdinfo)) + (areaname (assoc/default 'areaname cmdinfo)) + (itemdat (assoc/default 'itemdat cmdinfo)) + (env-ovrd (assoc/default 'env-ovrd cmdinfo)) + (set-vars (assoc/default 'set-vars cmdinfo)) ;; pre-overrides from -setvar + (runname (assoc/default 'runname cmdinfo)) + (megatest (assoc/default 'megatest cmdinfo)) + (runtlim (assoc/default 'runtlim cmdinfo)) + (contour (assoc/default 'contour cmdinfo)) + (item-path (item-list->path itemdat)) + (mt-bindir-path (assoc/default 'mt-bindir-path cmdinfo)) + (keys #f) + (keyvals #f) + (fullrunscript (if (not runscript) + #f + (if (substring-index "/" runscript) + runscript ;; use unadultered if contains slashes + (let ((fulln (conc work-area "/" runscript))) + (if (and (common:file-exists? fulln) + (file-execute-access? fulln)) + fulln + runscript))))) ;; assume it is on the path + (check-work-area (lambda () + ;; NFS might not have propagated the directory meta data to the run host - give it time if needed + (let loop ((count 0)) + (if (or (common:directory-exists? work-area) + (> count 10)) + (change-directory work-area) + (begin + (debug:print 0 *default-log-port* "INFO: Not starting job yet - directory " work-area " not found") + (thread-sleep! 10) + (loop (+ count 1))))) + + (if (not (string=? (common:real-path work-area)(common:real-path (current-directory)))) + (begin + (debug:print 0 *default-log-port* + "INFO: we are expecting to be in directory " work-area "\n" + " but we are actually in the directory " (current-directory) "\n" + " doing another change dir.") + (change-directory work-area))) + + ;; spot check that the files in testpath are available. Too often NFS delays cause problems here. + (let ((files (glob (conc testpath "/*"))) + (bad-files '())) + (for-each + (lambda (fullname) + (let* ((fname (pathname-strip-directory fullname)) + (targn (conc work-area "/" fname))) + (if (not (file-exists? targn)) + (set! bad-files (cons fname bad-files))))) + files) + (if (not (null? bad-files)) + (begin + (debug:print 0 *default-log-port* "INFO: test data from " testpath " not copied properly or filesystem problems causing data to not be found. Re-running the copy command.") + (debug:print 0 *default-log-port* "INFO: missing files from " work-area ": " (string-intersperse bad-files ", ")) + (launch:test-copy testpath work-area)))) + ;; one more time, change to the work-area directory + (change-directory work-area))) + ) ;; let* + + (if contour (setenv "MT_CONTOUR" contour)) + + ;; immediated set some key variables from CMDINFO data, yes, these will be set again below ... + ;; + (setenv "MT_TESTSUITENAME" areaname) + (setenv "MT_RUN_AREA_HOME" top-path) + (set! *toppath* top-path) + (change-directory *toppath*) ;; temporarily switch to the run area home + (setenv "MT_TEST_RUN_DIR" work-area) + + (launch:setup) ;; should be properly in the run area home now + + (if contour (setenv "MT_CONTOUR" contour)) + + ;; immediated set some key variables from CMDINFO data, yes, these will be set again below ... + ;; + (setenv "MT_TESTSUITENAME" areaname) + (setenv "MT_RUN_AREA_HOME" top-path) + (set! *toppath* top-path) + (change-directory *toppath*) ;; temporarily switch to the run area home + (setenv "MT_TEST_RUN_DIR" work-area) + + (launch:setup) ;; should be properly in the run area home now + + (set! tconfigreg (tests:get-all)) ;; mapping of testname => test source path + (let ((sighand (lambda (signum) + ;; (signal-mask! signum) ;; to mask or not? seems to cause issues in exiting + (if (eq? signum signal/stop) + (debug:print-error 0 *default-log-port* "attempt to STOP process. Exiting.")) + (set! *time-to-exit* #t) + (print "Received signal " signum ", cleaning up before exit (set this test to COMPLETED/ABORT) . Please wait...") + (let ((th1 (make-thread (lambda () + (print "set test to COMPLETED/ABORT begin.") + (rmt:test-set-state-status run-id test-id "COMPLETED" "ABORT" "received kill signal") + (print "set test to COMPLETED/ABORT complete.") + (print "Killed by signal " signum ". Exiting") + (exit 1)))) + (th2 (make-thread (lambda () + (thread-sleep! 20) + (debug:print 0 *default-log-port* "Done") + (exit 4))))) + (thread-start! th2) + (thread-start! th1) + (thread-join! th2))))) + (set-signal-handler! signal/int sighand) + (set-signal-handler! signal/term sighand) + ) ;; (set-signal-handler! signal/stop sighand) + + ;; Do not run the test if it is REMOVING, RUNNING, KILLREQ or REMOTEHOSTSTART, + ;; Mark the test as REMOTEHOSTSTART *IMMEDIATELY* + ;; + (let* ((test-info (rmt:get-test-info-by-id run-id test-id)) + (test-host (if test-info + (db:test-get-host test-info) + (begin + (debug:print 0 *default-log-port* "ERROR: failed to find a record for test-id " test-id ", exiting.") + (exit)))) + (test-pid (db:test-get-process_id test-info))) + (cond + ;; -mrw- I'm removing KILLREQ from this list so that a test in KILLREQ state is treated as a "do not run" flag. + ((member (db:test-get-state test-info) '("INCOMPLETE" "KILLED" "UNKNOWN" "STUCK")) ;; prior run of this test didn't complete, go ahead and try to rerun + (debug:print 0 *default-log-port* "INFO: test is INCOMPLETE or KILLED, treat this execute call as a rerun request") + ;; (tests:test-force-state-status! run-id test-id "REMOTEHOSTSTART" "n/a") + + (rmt:general-call 'set-test-start-time #f test-id) + (rmt:test-set-state-status run-id test-id "REMOTEHOSTSTART" "n/a" #f) + ) ;; prime it for running + ((member (db:test-get-state test-info) '("RUNNING" "REMOTEHOSTSTART")) + (if (process:alive-on-host? test-host test-pid) + (debug:print-error 0 *default-log-port* "test state is " (db:test-get-state test-info) " and process " test-pid " is still running on host " test-host ", cannot proceed") + (exit))) + ((not (member (db:test-get-state test-info) '("REMOVING" "REMOTEHOSTSTART" "RUNNING" "KILLREQ"))) + ;; (tests:test-force-state-status! run-id test-id "REMOTEHOSTSTART" "n/a") + (rmt:general-call 'set-test-start-time #f test-id) + (rmt:test-set-state-status run-id test-id "REMOTEHOSTSTART" "n/a" #f) + ) + (else ;; (member (db:test-get-state test-info) '("REMOVING" "REMOTEHOSTSTART" "RUNNING" "KILLREQ")) + (debug:print-error 0 *default-log-port* "test state is " (db:test-get-state test-info) ", cannot proceed") + (exit)))) + + ;; cleanup prior execution's steps + (rmt:delete-steps-for-test! run-id test-id) + + (debug:print 2 *default-log-port* "Executing " test-name " (id: " test-id ") on " (get-host-name)) + (set! keys (rmt:get-keys)) + ;; (runs:set-megatest-env-vars run-id inkeys: keys inkeyvals: keyvals) ;; these may be needed by the launching process + ;; one of these is defunct/redundant ... + (if (not (launch:setup force-reread: #t)) + (begin + (debug:print 0 *default-log-port* "Failed to setup, exiting") + ;; (sqlite3:finalize! db) + ;; (sqlite3:finalize! tdb) + (exit 1))) + ;; validate that the test run area is available + (check-work-area) + + ;; still need to go back to run area home for next couple steps + (change-directory *toppath*) + + ;; NOTE: Current order is to process runconfigs *before* setting the MT_ vars. This + ;; seems non-ideal but could well break stuff + ;; BUG? BUG? BUG? + + (let ((rconfig (full-runconfigs-read)) ;; (read-config (conc *toppath* "/runconfigs.config") #f #t sections: (list "default" target)))) + (wconfig (read-config "waivers.config" #f #t sections: `( "default" ,target )))) ;; read the waivers config if it exists + ;; (setup-env-defaults (conc *toppath* "/runconfigs.config") run-id (make-hash-table) keyvals target) + ;; (set-run-config-vars run-id keyvals target) ;; (db:get-target db run-id)) + ;; Now have runconfigs data loaded, set environment vars + (for-each + (lambda (section) + (for-each + (lambda (varval) + (let ((var (car varval)) + (val (cadr varval))) + (if (and (string? var)(string? val)) + (begin + (safe-setenv var (config:eval-string-in-environment val))) ;; val) + (debug:print-error 0 *default-log-port* "bad variable spec, " var "=" val)))) + (configf:get-section rconfig section))) + (list "default" target))) + ;;(bb-check-path msg: "launch:execute post block 1") + + ;; NFS might not have propagated the directory meta data to the run host - give it time if needed + (let loop ((count 0)) + (if (or (common:file-exists? work-area) + (> count 10)) + (change-directory work-area) + (begin + (debug:print 0 *default-log-port* "INFO: Not starting job yet - directory " work-area " not found") + (thread-sleep! 10) + (loop (+ count 1))))) + + ;; now we can switch to the work-area? + (change-directory work-area) + ;;(bb-check-path msg: "launch:execute post block 1.5") + ;; (change-directory work-area) + (set! keyvals (keys:target->keyval keys target)) + ;; apply pre-overrides before other variables. The pre-override vars must not + ;; clobbers things from the official sources such as megatest.config and runconfigs.config + (if (string? set-vars) + (let ((varpairs (string-split set-vars ","))) + (debug:print 4 *default-log-port* "varpairs: " varpairs) + (map (lambda (varpair) + (let ((varval (string-split varpair "="))) + (if (eq? (length varval) 2) + (let ((var (car varval)) + (val (cadr varval))) + (debug:print 1 *default-log-port* "Adding pre-var/val " var " = " val " to the environment") + (setenv var val))))) + varpairs))) + ;;(bb-check-path msg: "launch:execute post block 2") + (for-each + (lambda (varval) + (let ((var (car varval)) + (val (cadr varval))) + (if val + (setenv var val) + (begin + (debug:print-error 0 *default-log-port* "required variable " var " does not have a valid value. Exiting") + (exit))))) + (list + (list "MT_TEST_RUN_DIR" work-area) + (list "MT_TEST_NAME" test-name) + (list "MT_ITEM_INFO" (conc itemdat)) + (list "MT_ITEMPATH" item-path) + (list "MT_RUNNAME" runname) + (list "MT_MEGATEST" megatest) + (list "MT_TARGET" target) + (list "MT_LINKTREE" (common:get-linktree)) ;; (configf:lookup *configdat* "setup" "linktree")) + (list "MT_TESTSUITENAME" (common:get-area-name *alldat*)))) + ;;(bb-check-path msg: "launch:execute post block 3") + + (if mt-bindir-path (setenv "PATH" (conc (getenv "PATH") ":" mt-bindir-path))) + ;;(bb-check-path msg: "launch:execute post block 4") + ;; (change-directory top-path) + ;; Can setup as client for server mode now + ;; (client:setup) + + + ;; environment overrides are done *before* the remaining critical envars. + (alist->env-vars env-ovrd) + ;;(bb-check-path msg: "launch:execute post block 41") + (runs:set-megatest-env-vars run-id inkeys: keys inkeyvals: keyvals) + ;;(bb-check-path msg: "launch:execute post block 42") + (set-item-env-vars itemdat) + ;;(bb-check-path msg: "launch:execute post block 43") + (let ((blacklist (configf:lookup *configdat* "setup" "blacklistvars"))) + (if blacklist + (save-environment-as-files "megatest" ignorevars: (string-split blacklist)) + (save-environment-as-files "megatest"))) + ;;(bb-check-path msg: "launch:execute post block 44") + ;; open-run-close not needed for test-set-meta-info + ;; (tests:set-full-meta-info #f test-id run-id 0 work-area) + ;; (tests:set-full-meta-info test-id run-id 0 work-area) + (tests:set-full-meta-info #f test-id run-id 0 work-area 10) + + ;; (thread-sleep! 0.3) ;; NFS slowness has caused grief here + + (if (args:get-arg "-xterm") + (set! fullrunscript "xterm") + (if (and fullrunscript + (common:file-exists? fullrunscript) + (not (file-execute-access? fullrunscript))) + (system (conc "chmod ug+x " fullrunscript)))) + + ;; We are about to actually kick off the test + ;; so this is a good place to remove the records for + ;; any previous runs + ;; (db:test-remove-steps db run-id testname itemdat) + ;; now is also a good time to write the .testconfig file + (let* ((tconfig-fname (conc work-area "/.testconfig")) + (tconfig-tmpfile (conc tconfig-fname ".tmp")) + (tconfig (tests:get-testconfig test-name item-path tconfigreg #t force-create: #t))) ;; 'return-procs))) + (configf:write-alist tconfig tconfig-tmpfile) + (file-move tconfig-tmpfile tconfig-fname #t)) + ;; + (let* ((m (make-mutex)) + (kill-job? #f) + (exit-info (make-launch:einf pid: #t exit-status: #t exit-code: #t rollup-status: 0)) ;; pid exit-status exit-code (i.e. process was successfully run) rollup-status + (job-thread #f) + ;; (keep-going #t) + (misc-flags (let ((ht (make-hash-table))) + (hash-table-set! ht 'keep-going #t) + ht)) + (runit (lambda () + (launch:manage-steps run-id test-id item-path fullrunscript ezsteps subrun test-name tconfigreg exit-info m))) + (monitorjob (lambda () + (launch:monitor-job run-id test-id item-path fullrunscript ezsteps test-name tconfigreg exit-info m work-area runtlim misc-flags))) + (th1 (make-thread monitorjob "monitor job")) + (th2 (make-thread runit "run job"))) + (set! job-thread th2) + (thread-start! th1) + (thread-start! th2) + (thread-join! th2) + (debug:print-info 0 *default-log-port* "Megatest exectute of test " test-name ", item path " item-path " complete. Notifying the db ...") + (hash-table-set! misc-flags 'keep-going #f) + (thread-join! th1) + (thread-sleep! 1) ;; givbe thread th1 a chance to be done TODO: Verify this is needed. At 0.1 I was getting fail to stop, increased to total of 1.1 sec. + (mutex-lock! m) + (let* ((item-path (item-list->path itemdat)) + ;; only state and status needed - use lazy routine + (testinfo (rmt:get-testinfo-state-status run-id test-id))) + ;; Am I completed? + (if (member (db:test-get-state testinfo) '("REMOTEHOSTSTART" "RUNNING")) ;; NOTE: It should *not* be REMOTEHOSTSTART but for reasons I don't yet understand it sometimes gets stuck in that state ;; (not (equal? (db:test-get-state testinfo) "COMPLETED")) + (let ((new-state (if kill-job? "KILLED" "COMPLETED") ;; (if (eq? (vector-ref exit-info 2) 0) ;; exited with "good" status + ;; "COMPLETED" ;; (db:test-get-state testinfo))) ;; else preseve the state as set within the test + ) + (new-status (cond + ((not (launch:einf-exit-status exit-info)) "FAIL") ;; job failed to run ... (vector-ref exit-info 1) + ((eq? (launch:einf-rollup-status exit-info) 0) ;; (vector-ref exit-info 3) + ;; if the current status is AUTO then defer to the calculated value (i.e. leave this AUTO) + (if (equal? (db:test-get-status testinfo) "AUTO") "AUTO" "PASS")) + ((eq? (launch:einf-rollup-status exit-info) 1) "FAIL") ;; (vector-ref exit-info 3) + ((eq? (launch:einf-rollup-status exit-info) 2) ;; (vector-ref exit-info 3) + ;; if the current status is AUTO the defer to the calculated value but qualify (i.e. make this AUTO-WARN) + (if (equal? (db:test-get-status testinfo) "AUTO") "AUTO-WARN" "WARN")) + ((eq? (launch:einf-rollup-status exit-info) 3) "CHECK") + ((eq? (launch:einf-rollup-status exit-info) 4) "WAIVED") + ((eq? (launch:einf-rollup-status exit-info) 5) "ABORT") + ((eq? (launch:einf-rollup-status exit-info) 6) "SKIP") + (else "FAIL")))) ;; (db:test-get-status testinfo))) + (debug:print-info 1 *default-log-port* "Test exited in state=" (db:test-get-state testinfo) ", setting state/status based on exit code of " (launch:einf-exit-status exit-info) " and rollup-status of " (launch:einf-rollup-status exit-info)) + (tests:test-set-status! run-id + test-id + new-state + new-status + (args:get-arg "-m") #f) + ;; need to update the top test record if PASS or FAIL and this is a subtest + ;; NO NEED TO CALL set-state-status-and-roll-up-items HERE, THIS IS DONE IN set-state-status-and-roll-up-items called by tests:test-set-status! + )) + ;; for automated creation of the rollup html file this is a good place... + (if (not (equal? item-path "")) + (tests:summarize-items run-id test-id test-name #f)) + (tests:summarize-test run-id test-id) ;; don't force - just update if no + (rmt:update-run-stats run-id (rmt:get-raw-run-stats run-id))) + (mutex-unlock! m) + (launch:end-of-run-check run-id ) + (debug:print 2 *default-log-port* "Output from running " fullrunscript ", pid " (launch:einf-pid exit-info) " in work area " + work-area ":\n====\n exit code " (launch:einf-exit-code exit-info) "\n" "====\n") + (if (not (launch:einf-exit-status exit-info)) + (exit 4)))) + ))) + +;; Spec for End of test +;; At end of each test call, after marking self as COMPLETED do run-state-status-rollup +;; At transition to run COMPLETED/X do hooks +;; Definition: test_dead if event_time + duration + 1 minute? < current_time AND +;; we can prove the process is not alive (ssh host pstree -A pid) +;; if dead safe to mark the test as killed in the db +;; State/status table +;; new +;; 100% COMPLETED/ (PASS,FAIL,ABORT etc.) ==> COMPLETED / X where X is same as itemized rollup +;; > 3 RUNNING with not test_dead do nothing (run should already be RUNNING/ na +;; > 0 RUNNING and test_dead then send KILLREQ ==> COMPLETED +;; 0 RUNNING ==> this is actually the first condition, should not get here + +(define (launch:end-of-run-check run-id ) + (let* ((not-completed-cnt (rmt:get-not-completed-cnt run-id)) + (running-cnt (rmt:get-count-tests-running-for-run-id run-id)) + (all-test-launched (rmt:get-var (conc "lunch-complete-" run-id))) + (current-state (rmt:get-run-state run-id)) + (current-status (rmt:get-run-status run-id))) + ;;get-vars run-id to query metadata table to check if all completed. if all-test-launched = yes then only not-completed-cnt = 0 means everyting is completed if no entry found in the table do nothing + (debug:print 0 *default-log-port* "rollup run state/status") + (rmt:set-state-status-and-roll-up-run run-id current-state current-status) + + (cond + ((and all-test-launched (eq? not-completed-cnt 0) (equal? all-test-launched "yes" )) + (debug:print 0 *default-log-port* "look for post hook.") + (runs:run-post-hook run-id)) + ((> running-cnt 3) + (debug:print 0 *default-log-port* "There are " running-cnt " tests running." )) + ((> running-cnt 0) + (debug:print 0 *default-log-port* "running cnt > 0 but <= 3 kill-running-tests-if-dead" ) + (let ((kill-cnt (launch:kill-tests-if-dead run-id))) + (if (and all-test-launched (equal? all-test-launched "yes") (eq? kill-cnt running-cnt)) + (launch:end-of-run-check run-id)))) ;;todo + (else (debug:print 0 *default-log-port* "Should it get here?? May be everything is not launched yet. Running test cnt:" running-cnt " Not completed test cnt:" not-completed-cnt) + (let* ((not-completed-tests (rmt:get-tests-for-run run-id "%" `("NOT_STARTED" "RUNNING" "LAUNCHED" "REMOTEHOSTSTART") `() #f #f #f #f #f #f #f #f))) + (if (> (length not-completed-tests) 0) + (let loop ((running-test (car not-completed-tests)) + (tal (cdr not-completed-tests))) + (let* ((test-name (vector-ref running-test 2)) + (item-path (vector-ref running-test 11))) + (debug:print 0 *default-log-port* "test " test-name "/" item-path " not completed") + (if (not (null? tal)) + (loop (car tal) (cdr tal))))))))))) + +(define (launch:is-test-alive host pid) +(if (and host pid (not (equal? host "n/a"))) +(let* ((cmd (conc "ssh " host " pstree -A " pid)) + (output (with-input-from-pipe cmd read-lines))) + (print "cmd: " cmd "\n op: " output ) + (if(eq? (length output) 0) + #f + #t)) +#t)) + +(define (launch:kill-tests-if-dead run-id) + (let* ((running-tests (rmt:get-tests-for-run run-id "%" `("RUNNING" "LAUNCHED" "REMOTEHOSTSTART") `() #f #f #f #f #f #f #f #f))) + (let loop ((running-test (car running-tests)) + (tal (cdr running-tests)) + (kill-cnt 0)) + (let* ((test-name (vector-ref running-test 2)) + (item-path (vector-ref running-test 11)) + (test-id (vector-ref running-test 0)) + (host (vector-ref running-test 6)) + (pid (rmt:test-get-top-process-pid run-id test-id)) + (event-time (vector-ref running-test 5)) + (duration (vector-ref running-test 12)) + (flag 0) + (curr-time (current-seconds))) + (if (and (< (+ event-time duration 600) curr-time) (not (launch:is-test-alive host pid))) ;;test has not updated duration in last 10 min then likely its not running but confirm before marking it as killed + (begin + (debug:print 0 *default-log-port* "test " test-name "/" item-path " needs to be killed") + (set! flag 1) + (rmt:set-state-status-and-roll-up-items run-id test-name item-path "KILLREQ" "n/a" #f))) + (if (not (null? tal)) + (loop (car tal) (cdr tal) (+ kill-cnt flag)) + (+ kill-cnt flag)))))) + +;; DO NOT USE - caching of configs is handled in launch:setup now. +;; +(define (launch:cache-config) + ;; if we have a linktree and -runtests and -target and the directory exists dump the config + ;; to megatest-(current-seconds).cfg and symlink it to megatest.cfg + (if (and *configdat* + (or (args:get-arg "-run") + (args:get-arg "-runtests") + (args:get-arg "-execute"))) + (let* ((linktree (common:get-linktree)) ;; (get-environment-variable "MT_LINKTREE")) + (target (common:args-get-target exit-if-bad: #t)) + (runname (or (args:get-arg "-runname") + (args:get-arg ":runname") + (getenv "MT_RUNNAME"))) + (fulldir (conc linktree "/" + target "/" + runname))) + (if (and linktree (common:file-exists? linktree)) ;; can't proceed without linktree + (begin + (debug:print-info 0 *default-log-port* "Have -run with target=" target ", runname=" runname ", fulldir=" fulldir ", testpatt=" (or (args:get-arg "-testpatt") "%")) + (if (not (common:file-exists? fulldir)) + (create-directory fulldir #t)) ;; need to protect with exception handler + (if (and target + runname + (common:file-exists? fulldir)) + (let ((tmpfile (conc fulldir "/.megatest.cfg." (current-seconds))) + (targfile (conc fulldir "/.megatest.cfg-" megatest-version "-" megatest-fossil-hash)) + (rconfig (conc fulldir "/.runconfig." megatest-version "-" megatest-fossil-hash))) + (if (common:file-exists? rconfig) ;; only cache megatest.config AFTER runconfigs has been cached + (begin + (debug:print-info 0 *default-log-port* "Caching megatest.config in " tmpfile) + (if (not (common:in-running-test?)) + (configf:write-alist *configdat* tmpfile)) + (system (conc "ln -sf " tmpfile " " targfile)))) + ))) + (debug:print-info 1 *default-log-port* "No linktree yet, no caching configs."))))) + + +;; gather available information, if legit read configs in this order: +;; +;; if have cache; +;; read it a return it +;; else +;; megatest.config (do not cache) +;; runconfigs.config (cache if all vars avail) +;; megatest.config (cache if all vars avail) +;; returns: +;; *toppath* +;; side effects: +;; sets; *configdat* (megatest.config info) +;; *runconfigdat* (runconfigs.config info) +;; *configstatus* (status of the read data) +;; +(define (launch:setup #!key (force-reread #f) (areapath #f)) + (mutex-lock! *launch-setup-mutex*) + (if (and *toppath* + (eq? *configstatus* 'fulldata) (not force-reread)) ;; got it all + (begin + (debug:print 2 *default-log-port* "NOTE: skipping launch:setup-body call since we have fulldata") + (mutex-unlock! *launch-setup-mutex*) + *toppath*) + (let ((res (launch:setup-body force-reread: force-reread areapath: areapath))) + (mutex-unlock! *launch-setup-mutex*) + res))) + +;; return paths depending on what info is available. +;; +(define (launch:get-cache-file-paths areapath toppath target mtconfig) + (let* ((use-cache (common:use-cache?)) + (runname (common:args-get-runname)) + (linktree (common:get-linktree)) + (testname (common:get-full-test-name)) + (rundir (if (and runname target linktree) + (common:directory-writable? (conc linktree "/" target "/" runname)) + #f)) + (testdir (if (and rundir testname) + (common:directory-writable? (conc rundir "/" testname)) + #f)) + (cachedir (or testdir rundir)) + (mtcachef (and cachedir (conc cachedir "/" ".megatest.cfg-" megatest-version "-" megatest-fossil-hash))) + (rccachef (and cachedir (conc cachedir "/" ".runconfigs.cfg-" megatest-version "-" megatest-fossil-hash)))) + (debug:print-info 6 *default-log-port* + "runname=" runname + "\n linktree=" linktree + "\n testname=" testname + "\n rundir=" rundir + "\n testdir=" testdir + "\n cachedir=" cachedir + "\n mtcachef=" mtcachef + "\n rccachef=" rccachef) + (cons mtcachef rccachef))) + +(define (launch:setup-body #!key (force-reread #f) (areapath #f)) + (if (and (eq? *configstatus* 'fulldata) + *toppath* + (not force-reread)) ;; no need to reprocess + *toppath* ;; return toppath + (let* ((use-cache (common:use-cache?)) ;; BB- use-cache checks + ;; *configdat* for + ;; use-cache setting. + ;; We do not have + ;; *configdat*. + ;; Bootstrapping problem + ;; here. + (toppath (or *toppath* areapath (getenv "MT_RUN_AREA_HOME"))) ;; preserve toppath + (target (common:args-get-target)) + (sections (if target (list "default" target) #f)) ;; for runconfigs + (mtconfig (or (args:get-arg "-config") "megatest.config")) ;; allow overriding megatest.config + (cachefiles (launch:get-cache-file-paths areapath toppath target mtconfig)) + ;; checking for null cachefiles should not be necessary, + ;; I was seeing error car of '(), might be a chicken bug + ;; or a red herring ... + (mtcachef (if (null? cachefiles) + #f + (car cachefiles))) ;; (and cachedir (conc + ;; cachedir "/" + ;; ".megatest.cfg-" + ;; megatest-version + ;; "-" + ;; megatest-fossil-hash))) + (rccachef (if (null? cachefiles) + #f + (cdr cachefiles)))) ;; (and cachedir + ;; (conc cachedir "/" + ;; ".runconfigs.cfg-" + ;; megatest-version + ;; "-" + ;; megatest-fossil-hash))) + ;; (cancreate (and + ;; cachedir + ;; (common:file-exists? + ;; cachedir)(file-write-access? + ;; cachedir) (not + ;; (common:in-running-test?))))) + (set! *toppath* toppath) ;; This is needed when we are running + ;; as a test using CMDINFO as a + ;; datasource (BB> "launch:setup-body + ;; -- cachefiles="cachefiles) + (cond + ;; if mtcachef exists just read it, however we need to assume + ;; toppath is available in $MT_RUN_AREA_HOME + ((and (not force-reread) + mtcachef rccachef + use-cache + (get-environment-variable "MT_RUN_AREA_HOME") + (common:file-exists? mtcachef) + (common:file-exists? rccachef)) + ;;(BB> "launch:setup-body -- cond branch 1 - use-cache") + (set! *configdat* (configf:read-alist mtcachef)) + ;;(BB> "launch:setup-body -- 1 set! *configdat*="*configdat*) + (set! *runconfigdat* (configf:read-alist rccachef)) + (set! *configinfo* (list *configdat* (get-environment-variable "MT_RUN_AREA_HOME"))) + (set! *configstatus* 'fulldata) + (set! *toppath* (get-environment-variable "MT_RUN_AREA_HOME")) + *toppath*) + ;; there are no existing cached configs, do full reads of the + ;; configs and cache them we have all the info needed to + ;; fully process runconfigs and megatest.config + ((and ;; (not force-reread) ;; force-reread is irrelevant in the AND, could however OR it? + mtcachef + rccachef) ;; BB- why are we doing this without asking if caching is desired? + ;;(BB> "launch:setup-body -- cond branch 2") + (let* ((first-pass (find-and-read-config ;; NB// sets MT_RUN_AREA_HOME as side effect + mtconfig + environ-patt: "env-override" + given-toppath: toppath + pathenvvar: "MT_RUN_AREA_HOME")) + (first-rundat (let ((toppath (if toppath + toppath + (car first-pass)))) + (read-config ;; (conc toppath "/runconfigs.config") ;; this should be converted to runconfig:read but it is non-trivial, leaving it for now. + (conc (if (string? toppath) + toppath + (get-environment-variable "MT_RUN_AREA_HOME")) + "/runconfigs.config") + *runconfigdat* #t + sections: sections)))) + (set! *runconfigdat* first-rundat) + (if first-pass ;; + (begin + ;;(BB> "launch:setup-body -- \"first-pass\"=first-pass") + (set! *configdat* (car first-pass)) + ;;(BB> "launch:setup-body -- 2 set! *configdat*="*configdat*) + (set! *configinfo* first-pass) + (set! *toppath* (or toppath (cadr first-pass))) ;; use the gathered data unless already have it + (set! toppath *toppath*) + (if (not *toppath*) + (begin + (debug:print-error 0 *default-log-port* "you are not in a megatest area!") + (exit 1))) + (setenv "MT_RUN_AREA_HOME" *toppath*) + ;; the seed read is done, now read runconfigs, cache it then read megatest.config one more time and cache it + (let* ((keys (rmt:get-keys)) + (key-vals (keys:target->keyval keys target)) + (linktree (common:get-linktree)) ;; (or (getenv "MT_LINKTREE")(if *configdat* (configf:lookup *configdat* "setup" "linktree") #f))) + ; (if *configdat* + ; (configf:lookup *configdat* "setup" "linktree") + ; (conc *toppath* "/lt")))) + (second-pass (find-and-read-config + mtconfig + environ-patt: "env-override" + given-toppath: toppath + pathenvvar: "MT_RUN_AREA_HOME")) + (runconfigdat (begin ;; this read of the runconfigs will see any adjustments made by re-reading megatest.config + (for-each (lambda (kt) + (setenv (car kt) (cadr kt))) + key-vals) + (read-config (conc toppath "/runconfigs.config") *runconfigdat* #t ;; consider using runconfig:read some day ... + sections: sections))) + (cachefiles (launch:get-cache-file-paths areapath toppath target mtconfig)) + (mtcachef (car cachefiles)) + (rccachef (cdr cachefiles))) + ;; trap exception due to stale NFS handle -- Error: (open-output-file) cannot open file - Stale NFS file handle: "/p/fdk/gwa/lefkowit/mtTesting/qa/primbeqa/links/p1222/11/PDK_r1.1.1/prim/clean/pcell_testgen/.runconfigs.cfg-1.6427-7d1e789cb3f62f9cde719a4865bb51b3c17ea853" - ticket 220546342 + ;; TODO - consider 1) using simple-lock to bracket cache write + ;; 2) cache in hash on server, since need to do rmt: anyway to lock. + + (if rccachef + (common:fail-safe + (lambda () + (configf:write-alist runconfigdat rccachef)) + (conc "Could not write cache file - "rccachef))) + (if mtcachef + (common:fail-safe + (lambda () + (configf:write-alist *configdat* mtcachef)) + (conc "Could not write cache file - "mtcachef))) + (set! *runconfigdat* runconfigdat) + (if (and rccachef mtcachef) (set! *configstatus* 'fulldata)))) + ;; no configs found? should not happen but let's try to recover gracefully, return an empty hash-table + (set! *configdat* (make-hash-table)) + ))) + + ;; else read what you can and set the flag accordingly + ;; here we don't have either mtconfig or rccachef + (else + ;;(BB> "launch:setup-body -- cond branch 3 - else") + (let* ((cfgdat (find-and-read-config + (or (args:get-arg "-config") "megatest.config") + environ-patt: "env-override" + given-toppath: (get-environment-variable "MT_RUN_AREA_HOME") + pathenvvar: "MT_RUN_AREA_HOME"))) + + (if (and cfgdat (list? cfgdat) (> (length cfgdat) 0) (hash-table? (car cfgdat))) + (let* ((toppath (or (get-environment-variable "MT_RUN_AREA_HOME")(cadr cfgdat))) + (rdat (read-config (conc toppath ;; convert this to use runconfig:read! + "/runconfigs.config") *runconfigdat* #t sections: sections))) + (set! *configinfo* cfgdat) + (set! *configdat* (car cfgdat)) + (set! *runconfigdat* rdat) + (set! *toppath* toppath) + (set! *configstatus* 'partial) + ;; set up as many vars in *alldat* as possible here + (alldat-areapath-set! *alldat* toppath) + (alldat-log-port-set! *alldat* *default-log-port*) + (alldat-mtconfig-set! *alldat* *configdat*) + + ) + (begin + (debug:print-error 0 *default-log-port* "No " mtconfig " file found. Giving up.") + (exit 2)))))) + ;; COND ends here. + + ;; additional house keeping + (let* ((linktree (or (common:get-linktree) + (conc *toppath* "/lt")))) + (if linktree + (begin + (if (not (common:file-exists? linktree)) + (begin + (handle-exceptions + exn + (begin + (debug:print-error 0 *default-log-port* "Something went wrong when trying to create linktree dir at " linktree) + (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) + (exit 1)) + (create-directory linktree #t)))) + (handle-exceptions + exn + (begin + (debug:print-error 0 *default-log-port* "Something went wrong when trying to create link to linktree at " *toppath*) + (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))) + (let ((tlink (conc *toppath* "/lt"))) + (if (not (common:file-exists? tlink)) + (create-symbolic-link linktree tlink))))) + (begin + (debug:print-error 0 *default-log-port* "linktree not defined in [setup] section of megatest.config") + ))) + (if (and *toppath* + (directory-exists? *toppath*)) + (begin + (setenv "MT_RUN_AREA_HOME" *toppath*) + (setenv "MT_TESTSUITENAME" (common:get-area-name *alldat*))) + (begin + (debug:print-error 0 *default-log-port* "failed to find the top path to your Megatest area.") + (set! *toppath* #f) ;; force it to be false so we return #f + #f)) + + ;; one more attempt to cache the configs for future reading + (let* ((cachefiles (launch:get-cache-file-paths areapath toppath target mtconfig)) + (mtcachef (car cachefiles)) + (rccachef (cdr cachefiles))) + + ;; trap exception due to stale NFS handle -- Error: (open-output-file) cannot open file - Stale NFS file handle: "...somepath.../.runconfigs.cfg-1.6427-7d1e789cb3f62f9cde719a4865bb51b3c17ea853" - ticket 220546342 + ;; TODO - consider 1) using simple-lock to bracket cache write + ;; 2) cache in hash on server, since need to do rmt: anyway to lock. + (if (and rccachef *runconfigdat* (not (common:file-exists? rccachef))) + (common:fail-safe + (lambda () + (configf:write-alist *runconfigdat* rccachef)) + (conc "Could not write cache file - "rccachef)) + ) + (if (and mtcachef *configdat* (not (common:file-exists? mtcachef))) + (common:fail-safe + (lambda () + (configf:write-alist *configdat* mtcachef)) + (conc "Could not write cache file - "mtcachef)) + ) + (if (and rccachef mtcachef *runconfigdat* *configdat*) + (set! *configstatus* 'fulldata))) + + ;; if have -append-config then read and append here + (let ((cfname (args:get-arg "-append-config"))) + (if (and cfname + (file-read-access? cfname)) + (read-config cfname *configdat* #t))) ;; values are added to the hash, no need to do anything special. + *toppath*))) + +(define (get-best-disk confdat testconfig) + (let* ((disks (or (and testconfig (hash-table-ref/default testconfig "disks" #f)) + (hash-table-ref/default confdat "disks" #f))) + (minspace (let ((m (configf:lookup confdat "setup" "minspace"))) + (string->number (or m "10000"))))) + (if disks + (let ((res (common:get-disk-with-most-free-space disks minspace))) ;; min size of 1000, seems tad dumb + (if res + (cdr res) + (begin +;; (if (common:low-noise-print 20 "No valid disks or no disk with enough space") +;; (debug:print-error 0 *default-log-port* "No valid disks found in megatest.config. Please add some to your [disks] section and ensure the directory exists and has enough space!\n You can change minspace in the [setup] section of megatest.config. Current setting is: " minspace)) + ;;(exit 1) + (if (null? disks) + (cons 1 (conc *toppath* "/runs")) + (let ((paths (sort disks (lambda (x y) (> (string-length (cadr x)) (string-length (cadr y))))))) + (let loop ((head (car paths)) (tail (cdr paths))) + (let ((result (handle-exceptions exn #f (create-directory (cadr head) #t)))) + (if result + result + (if (null? tail) + (cons 1 (conc *toppath* "/runs")) + (loop (car tail) (cdr tail)))))))))))))) ;; the code creates the necessary directories if it does not exist and returns the path. + + +(define (launch:test-copy test-src-path test-path) + (let* ((ovrcmd (let ((cmd (config-lookup *configdat* "setup" "testcopycmd"))) + (if cmd + ;; substitute the TEST_SRC_PATH and TEST_TARG_PATH + (string-substitute "TEST_TARG_PATH" test-path + (string-substitute "TEST_SRC_PATH" test-src-path cmd #t) #t) + #f))) + (cmd (if ovrcmd + ovrcmd + (conc "rsync -av" (if (debug:debug-mode 1) "" "q") " " test-src-path "/ " test-path "/" + " >> " test-path "/mt_launch.log 2>> " test-path "/mt_launch.log"))) + (status (system cmd))) + (if (not (eq? status 0)) + (debug:print 2 *default-log-port* "ERROR: problem with running \"" cmd "\"")))) + + +;; Desired directory structure: +;; +;; - - -. +;; | +;; v +;; - - -|- +;; +;; dir stored in test is: +;; +;; - - [ - ] +;; +;; All log file links should be stored relative to the top of link path +;; +;; - [ - ] +;; +(define (create-work-area run-id run-info keyvals test-id test-src-path disk-path testname itemdat #!key (remtries 2)) + (let* ((item-path (if (string? itemdat) itemdat (item-list->path itemdat))) ;; if pass in string - just use it + (runname (if (string? run-info) ;; if we pass in a string as run-info use it as run-name. + run-info + (db:get-value-by-header (db:get-rows run-info) + (db:get-header run-info) + "runname"))) + (contour #f) ;; NOT READY FOR THIS (args:get-arg "-contour")) + ;; convert back to db: from rdb: - this is always run at server end + (target (string-intersperse (map cadr keyvals) "/")) + + (not-iterated (equal? "" item-path)) + + ;; all tests are found at /test-base or /test-base + (testtop-base (conc target "/" runname "/" testname)) + (test-base (conc testtop-base (if not-iterated "" "/") item-path)) + + ;; nb// if itempath is not "" then it is prefixed with "/" + (toptest-path (conc disk-path (if contour (conc "/" contour) "") "/" testtop-base)) + (test-path (conc disk-path (if contour (conc "/" contour) "") "/" test-base)) + + ;; ensure this exists first as links to subtests must be created there + (linktree (common:get-linktree)) + ;; WAS: (let ((rd (config-lookup *configdat* "setup" "linktree"))) + ;; (if rd rd (conc *toppath* "/runs")))) + ;; which seems wrong ... + + (lnkbase (conc linktree (if contour (conc "/" contour) "") "/" target "/" runname)) + (lnkpath (conc lnkbase "/" testname)) + (lnkpathf (conc lnkpath (if not-iterated "" "/") item-path)) + (lnktarget (conc lnkpath "/" item-path))) + + ;; Update the rundir path in the test record for all, rundir=physical, shortdir=logical + ;; rundir shortdir + (rmt:general-call 'test-set-rundir-shortdir run-id lnkpathf test-path testname item-path run-id) + + (debug:print 2 *default-log-port* "INFO:\n lnkbase=" lnkbase "\n lnkpath=" lnkpath "\n toptest-path=" toptest-path "\n test-path=" test-path) + (if (not (common:file-exists? linktree)) + (begin + (debug:print 0 *default-log-port* "WARNING: linktree did not exist! Creating it now at " linktree) + (create-directory linktree #t))) ;; (system (conc "mkdir -p " linktree)))) + ;; create the directory for the tests dir links, this is needed no matter what... try up to three times + (let loop ((done 3)) + (let ((success (if (and (not (common:directory-exists? lnkbase)) + (not (common:file-exists? lnkbase))) + (handle-exceptions + exn + (begin + (debug:print-error 0 *default-log-port* "Problem creating linktree base at " lnkbase) + (print-error-message exn (current-error-port)) + #t) + (create-directory lnkbase #t) + #f)))) + (if (and (not success)(> done 0)) + (loop (- done 1))))) + + ;; update the toptest record with its location rundir, cache the path + ;; This wass highly inefficient, one db write for every subtest, potentially + ;; thousands of unnecessary updates, cache the fact it was set and don't set it + ;; again. + + ;; Now create the link from the test path to the link tree, however + ;; if the test is iterated it is necessary to create the parent path + ;; to the iteration. use pathname-directory to trim the path by one + ;; level + (if (not not-iterated) ;; i.e. iterated + (let ((iterated-parent (pathname-directory (conc lnkpath "/" item-path)))) + (debug:print-info 2 *default-log-port* "Creating iterated parent " iterated-parent) + (handle-exceptions + exn + (begin + (debug:print-error 0 *default-log-port* " Failed to create directory " iterated-parent ((condition-property-accessor 'exn 'message) exn) ", exiting") + (exit 1)) + (create-directory iterated-parent #t)))) + + (if (symbolic-link? lnkpath) + (handle-exceptions + exn + (begin + (debug:print-error 0 *default-log-port* " Failed to remove symlink " lnkpath ((condition-property-accessor 'exn 'message) exn) ", exiting") + (exit 1)) + (delete-file lnkpath))) + + (if (not (or (common:file-exists? lnkpath) + (symbolic-link? lnkpath))) + (handle-exceptions + exn + (begin + (debug:print-error 0 *default-log-port* " Failed to create symlink " lnkpath ((condition-property-accessor 'exn 'message) exn) ", exiting") + (exit 1)) + (create-symbolic-link toptest-path lnkpath))) + + ;; NB - This was not working right - some top tests are not getting the path set!!! + ;; + ;; Do the setting of this record after the paths are created so that the shortdir can + ;; be set to the real directory location. This is safer for future clean up if the link + ;; tree is damaged or lost. + ;; + (if (not (hash-table-ref/default *toptest-paths* testname #f)) + (let* ((testinfo (rmt:get-test-info-by-id run-id test-id)) ;; run-id testname item-path)) + (curr-test-path (if testinfo ;; (filedb:get-path *fdb* + ;; (db:get-path dbstruct + ;; (rmt:sdb-qry 'getstr + (db:test-get-rundir testinfo) ;; ) ;; ) + #f))) + (hash-table-set! *toptest-paths* testname curr-test-path) + ;; NB// Was this for the test or for the parent in an iterated test? + (rmt:general-call 'test-set-rundir-shortdir run-id lnkpath + (if (common:file-exists? lnkpath) + ;; (resolve-pathname lnkpath) + (common:nice-path lnkpath) + lnkpath) + testname "" run-id) + ;; (rmt:general-call 'test-set-rundir run-id lnkpath testname "") ;; toptest-path) + (if (or (not curr-test-path) + (not (directory-exists? toptest-path))) + (begin + (debug:print-info 2 *default-log-port* "Creating " toptest-path " and link " lnkpath) + (handle-exceptions + exn + #f ;; don't care to catch and deal with errors here for now. + (create-directory toptest-path #t)) + (hash-table-set! *toptest-paths* testname toptest-path))))) + + ;; The toptest path has been created, the link to the test in the linktree has + ;; been created. Now, if this is an iterated test the real test dir must be created + (if (not not-iterated) ;; this is an iterated test + (begin ;; (let ((lnktarget (conc lnkpath "/" item-path))) + (debug:print 2 *default-log-port* "Setting up sub test run area") + (debug:print 2 *default-log-port* " - creating run area in " test-path) + (handle-exceptions + exn + (begin + (debug:print-error 0 *default-log-port* " Failed to create directory " test-path ((condition-property-accessor 'exn 'message) exn) ", exiting") + (exit 1)) + (create-directory test-path #t)) + (debug:print 2 *default-log-port* + " - creating link from: " test-path "\n" + " to: " lnktarget) + + ;; If there is already a symlink delete it and recreate it. + (handle-exceptions + exn + (begin + (debug:print-error 0 *default-log-port* " Failed to re-create link " lnktarget ((condition-property-accessor 'exn 'message) exn) ", exiting") + (exit)) + (if (symbolic-link? lnktarget) (delete-file lnktarget)) + (if (not (common:file-exists? lnktarget)) (create-symbolic-link test-path lnktarget))))) + + (if (not (directory? test-path)) + (create-directory test-path #t)) ;; this is a hack, I don't know why out of the blue this path does not exist sometimes + + (if (and test-src-path (directory? test-path)) + (begin + (launch:test-copy test-src-path test-path) + (list lnkpathf lnkpath )) + (if (and test-src-path (> remtries 0)) + (begin + (debug:print-error 0 *default-log-port* "Failed to create work area at " test-path " with link at " lnktarget ", remaining attempts " remtries) + ;; + (create-work-area run-id run-info keyvals test-id test-src-path disk-path testname itemdat remtries: (- remtries 1))) + (list #f #f))))) + + +(define (launch:handle-zombie-tests run-id) + (let* ((key (conc "zombiescan-runid-"run-id)) + (now (current-seconds)) + (threshold (- (current-seconds) (* 2 (or (configf:lookup-number *configdat* "setup" "deadtime") 120)))) + (val (rmt:get-var key)) + (do-scan? + (cond + ((not val) + #t) + ((< val threshold) + #t) + (else #f)))) + (when do-scan? + (debug:print 1 *default-log-port* "INFO: search and mark zombie tests") + (rmt:set-var key (current-seconds)) + (rmt:find-and-mark-incomplete run-id #f)))) + + + + + +;; 1. look though disks list for disk with most space +;; 2. create run dir on disk, path name is meaningful +;; 3. create link from run dir to megatest runs area +;; 4. remotely run the test on allocated host +;; - could be ssh to host from hosts table (update regularly with load) +;; - could be netbatch +;; (launch-test db (cadr status) test-conf)) +(define (launch-test test-id run-id run-info keyvals runname test-conf test-name test-path itemdat params) + (mutex-lock! *launch-setup-mutex*) ;; setting variables and processing the testconfig is NOT thread-safe, reuse the launch-setup mutex + (let* ( ;; (lock-key (conc "test-" test-id)) + ;; (got-lock (let loop ((lock (rmt:no-sync-get-lock lock-key)) + ;; (expire-time (+ (current-seconds) 15))) ;; give up on getting the lock and steal it after 15 seconds + ;; (if (car lock) + ;; #t + ;; (if (> (current-seconds) expire-time) + ;; (begin + ;; (debug:print-info 0 *default-log-port* "Timed out waiting for a lock to launch test " keyvals " " runname " " test-name " " test-path) + ;; (rmt:no-sync-del! lock-key) ;; destroy the lock + ;; (loop (rmt:no-sync-get-lock lock-key) expire-time)) ;; + ;; (begin + ;; (thread-sleep! 1) + ;; (loop (rmt:no-sync-get-lock lock-key) expire-time)))))) + (item-path (item-list->path itemdat)) + (contour #f)) ;; NOT READY FOR THIS (args:get-arg "-contour"))) + (let loop ((delta (- (current-seconds) *last-launch*)) + (launch-delay (configf:lookup-number *configdat* "setup" "launch-delay" default: 1))) + (if (> launch-delay delta) + (begin + (if (common:low-noise-print 1200 "test launch delay") ;; every two hours or so remind the user about launch delay. + (debug:print-info 0 *default-log-port* "NOTE: test launches are delayed by " launch-delay " seconds. See megatest.config launch-delay setting to adjust.")) ;; launch of " test-name " for " (- launch-delay delta) " seconds")) + (thread-sleep! (- launch-delay delta)) + (loop (- (current-seconds) *last-launch*) launch-delay)))) + (change-directory *toppath*) + (alist->env-vars ;; consolidate this code with the code in megatest.scm for "-execute", *maybe* - the longer they are set the longer each launch takes (must be non-overlapping with the vars) + (append + (list + (list "MT_RUN_AREA_HOME" *toppath*) + (list "MT_TEST_NAME" test-name) + (list "MT_RUNNAME" runname) + (list "MT_ITEMPATH" item-path) + (list "MT_CONTOUR" contour) + ) + itemdat)) + (let* ((tregistry (tests:get-all)) ;; third param (below) is system-allowed + ;; for tconfig, why do we allow fallback to test-conf? + (tconfig (or (tests:get-testconfig test-name item-path tregistry #t force-create: #t) + (begin + (debug:print 0 *default-log-port* "WARNING: falling back to pre-calculated testconfig. This is likely not desired.") + test-conf))) ;; force re-read now that all vars are set + (useshell (let ((ush (config-lookup *configdat* "jobtools" "useshell"))) + (if ush + (if (equal? ush "no") ;; must use "no" to NOT use shell + #f + ush) + #t))) ;; default is yes + (runscript (config-lookup tconfig "setup" "runscript")) + (ezsteps (> (length (hash-table-ref/default tconfig "ezsteps" '())) 0)) ;; don't send all the steps, could be big, just send a flag + (subrun (> (length (hash-table-ref/default tconfig "subrun" '())) 0)) ;; send a flag to process a subrun + ;; (diskspace (config-lookup tconfig "requirements" "diskspace")) + ;; (memory (config-lookup tconfig "requirements" "memory")) + ;; (hosts (config-lookup *configdat* "jobtools" "workhosts")) ;; I'm pretty sure this was never completed + (remote-megatest (config-lookup *configdat* "setup" "executable")) + (run-time-limit (or (configf:lookup tconfig "requirements" "runtimelim") + (configf:lookup *configdat* "setup" "runtimelim"))) + ;; FIXME SOMEDAY: not good how this is so obtuse, this hack is to + ;; allow running from dashboard. Extract the path + ;; from the called megatest and convert dashboard + ;; or dboard to megatest + (local-megatest (let* ((lm (car (argv))) + (dir (pathname-directory lm)) + (exe (pathname-strip-directory lm))) + (conc (if dir (conc dir "/") "") + (case (string->symbol exe) + ((dboard) "../megatest") + ((mtest) "../megatest") + ((dashboard) "megatest") + (else exe))))) + (launcher (common:get-launcher *configdat* test-name item-path)) ;; (config-lookup *configdat* "jobtools" "launcher")) + (test-sig (conc (common:get-area-name *alldat*) ":" test-name ":" item-path)) ;; (item-list->path itemdat))) ;; test-path is the full path including the item-path + (work-area #f) + (toptest-work-area #f) ;; for iterated tests the top test contains data relevant for all + (diskpath #f) + (cmdparms #f) + (fullcmd #f) ;; (define a (with-output-to-string (lambda ()(write x)))) + (mt-bindir-path #f) + (testinfo (rmt:get-test-info-by-id run-id test-id)) + (mt_target (string-intersperse (map cadr keyvals) "/")) + (debug-param (append (if (args:get-arg "-debug") (list "-debug" (args:get-arg "-debug")) '()) + (if (args:get-arg "-logging")(list "-logging") '())))) + ;; (if hosts (set! hosts (string-split hosts))) + ;; set the megatest to be called on the remote host + (if (not remote-megatest)(set! remote-megatest local-megatest)) ;; "megatest")) + (set! mt-bindir-path (pathname-directory remote-megatest)) + (if launcher (set! launcher (string-split launcher))) + ;; set up the run work area for this test + (if (and (args:get-arg "-preclean") ;; user has requested to preclean for this run + (not (member (db:test-get-rundir testinfo)(list "n/a" "/tmp/badname")))) ;; n/a is a placeholder and thus not a read dir + (begin + (debug:print-info 0 *default-log-port* "attempting to preclean directory " (db:test-get-rundir testinfo) " for test " test-name "/" item-path) + (runs:remove-test-directory testinfo 'remove-data-only))) ;; remove data only, do not perturb the record + + ;; prevent overlapping actions - set to LAUNCHED as early as possible + ;; + ;; the following call handles waiver propogation. cannot yet condense into roll-up-pass-fail + (tests:test-set-status! run-id test-id "LAUNCHED" "n/a" #f #f) ;; (if launch-results launch-results "FAILED")) + (rmt:set-state-status-and-roll-up-items run-id test-name item-path #f "LAUNCHED" #f) + ;; (pp (hash-table->alist tconfig)) + (set! diskpath (get-best-disk *configdat* tconfig)) + (if diskpath + (let ((dat (create-work-area run-id run-info keyvals test-id test-path diskpath test-name itemdat))) + (set! work-area (car dat)) + (set! toptest-work-area (cadr dat)) + (debug:print-info 2 *default-log-port* "Using work area " work-area)) + (begin + (set! work-area (conc test-path "/tmp_run")) + (create-directory work-area #t) + (debug:print 0 *default-log-port* "WARNING: No disk work area specified - running in the test directory under tmp_run"))) + (set! cmdparms (base64:base64-encode + (z3:encode-buffer + (with-output-to-string + (lambda () ;; (list 'hosts hosts) + (write (list (list 'testpath test-path) + ;; (list 'transport (conc *transport-type*)) + ;; (list 'serverinf *server-info*) + (list 'homehost (let* ((hhdat (common:get-homehost))) + (if hhdat + (car hhdat) + #f))) + (list 'serverurl (if *alldat* + (alldat-server-url *alldat*) + #f)) ;; + (list 'areaname (common:get-area-name *alldat*)) + (list 'toppath *toppath*) + (list 'work-area work-area) + (list 'test-name test-name) + (list 'runscript runscript) + (list 'run-id run-id ) + (list 'test-id test-id ) + ;; (list 'item-path item-path ) + (list 'itemdat itemdat ) + (list 'megatest remote-megatest) + (list 'ezsteps ezsteps) + (list 'subrun subrun) + (list 'target mt_target) + (list 'contour contour) + (list 'runtlim (if run-time-limit (common:hms-string->seconds run-time-limit) #f)) + (list 'env-ovrd (hash-table-ref/default *configdat* "env-override" '())) + (list 'set-vars (if params (hash-table-ref/default params "-setvars" #f))) + (list 'runname runname) + (list 'mt-bindir-path mt-bindir-path)))))))) + + ;; clean out step records from previous run if they exist + ;; (rmt:delete-test-step-records run-id test-id) + ;; if the dir does not exist we may have a itempath where individual variables are a path, launch anyway + (if (common:file-exists? work-area) + (change-directory work-area)) ;; so that log files from the launch process don't clutter the test dir + (cond + ;; ((and launcher hosts) ;; must be using ssh hostname + ;; (set! fullcmd (append launcher (car hosts)(list remote-megatest "-m" test-sig "-execute" cmdparms) debug-param))) + ;; (set! fullcmd (append launcher (car hosts)(list remote-megatest test-sig "-execute" cmdparms)))) + (launcher + (set! fullcmd (append launcher (list remote-megatest "-m" test-sig "-execute" cmdparms) debug-param))) + ;; (set! fullcmd (append launcher (list remote-megatest test-sig "-execute" cmdparms)))) + (else + (if (not useshell)(debug:print 0 *default-log-port* "WARNING: internal launching will not work well without \"useshell yes\" in your [jobtools] section")) + (set! fullcmd (append (list remote-megatest "-m" test-sig "-execute" cmdparms) debug-param (list (if useshell "&" "")))))) + ;; (set! fullcmd (list remote-megatest test-sig "-execute" cmdparms (if useshell "&" ""))))) + (if (args:get-arg "-xterm")(set! fullcmd (append fullcmd (list "-xterm")))) + (debug:print 1 *default-log-port* "Launching " work-area) + ;; set pre-launch-env-vars before launching, keep the vars in prevvals and put the envionment back when done + (debug:print 4 *default-log-port* "fullcmd: " fullcmd) + (set! *last-launch* (current-seconds)) ;; all that junk above takes time, set this as late as possible. + (let* ((commonprevvals (alist->env-vars + (hash-table-ref/default *configdat* "env-override" '()))) + (miscprevvals (alist->env-vars ;; consolidate this code with the code in megatest.scm for "-execute" + (append (list (list "MT_TEST_RUN_DIR" work-area) + (list "MT_TEST_NAME" test-name) + (list "MT_ITEM_INFO" (conc itemdat)) + (list "MT_RUNNAME" runname) + (list "MT_TARGET" mt_target) + (list "MT_ITEMPATH" item-path) + ) + itemdat))) + (testprevvals (alist->env-vars + (hash-table-ref/default tconfig "pre-launch-env-overrides" '()))) + ;; Launchwait defaults to true, must override it to turn off wait + (launchwait (if (equal? (configf:lookup *configdat* "setup" "launchwait") "no") #f #t)) + (launch-results-prev (apply (if launchwait ;; BB: TODO: refactor this to examine return code of launcher, if nonzero, set state to launch failed. + process:cmd-run-with-stderr-and-exitcode->list + process-run) + (if useshell + (let ((cmdstr (string-intersperse fullcmd " "))) + (if launchwait + cmdstr + (conc cmdstr " >> mt_launch.log 2>&1 &"))) + (car fullcmd)) + (if useshell + '() + (cdr fullcmd)))) + (success (if launchwait (equal? 0 (cadr launch-results-prev)) #t)) + (launch-results (if launchwait (car launch-results-prev) launch-results-prev))) + (if (not success) + (tests:test-set-status! run-id test-id "COMPLETED" "DEAD" "launcher failed; exited non-zero; check mt_launch.log" #f)) ;; (if launch-results launch-results "FAILED")) + (mutex-unlock! *launch-setup-mutex*) ;; yes, really should mutex all the way to here. Need to put this entire process into a fork. + ;; (rmt:no-sync-del! lock-key) ;; release the lock for starting this test + (if (not launchwait) ;; give the OS a little time to allow the process to start + (thread-sleep! 0.01)) + (with-output-to-file "mt_launch.log" + (lambda () + (print "LAUNCHCMD: " (string-intersperse fullcmd " ")) + (if (list? launch-results) + (apply print launch-results) + (print "NOTE: launched \"" fullcmd "\"\n but did not wait for it to proceed. Add the following to megatest.config \n[setup]\nlaunchwait yes\n if you have problems with this")) + #:append)) + (debug:print 2 *default-log-port* "Launching completed, updating db") + (debug:print 2 *default-log-port* "Launch results: " launch-results) + (if (not launch-results) + (begin + (print "ERROR: Failed to run " (string-intersperse fullcmd " ") ", exiting now") + ;; (sqlite3:finalize! db) + ;; good ole "exit" seems not to work + ;; (_exit 9) + ;; but this hack will work! Thanks go to Alan Post of the Chicken email list + ;; NB// Is this still needed? Should be safe to go back to "exit" now? + (process-signal (current-process-id) signal/kill) + )) + (alist->env-vars miscprevvals) + (alist->env-vars testprevvals) + (alist->env-vars commonprevvals) + launch-results)) + (change-directory *toppath*))) + +;; recover a test where the top controlling mtest may have died +;; +(define (launch:recover-test run-id test-id) + ;; this function is called on the test run host via ssh + ;; + ;; 1. look at the process from pid + ;; - is it owned by calling user + ;; - it it's run directory correct for the test + ;; - is there a controlling mtest (maybe stuck) + ;; 2. if recovery is needed watch pid + ;; - when it exits take the exit code and do the needful + ;; + (let* ((pid (rmt:test-get-top-process-id run-id test-id)) + (psres (with-input-from-pipe + (conc "ps -F -u " (current-user-name) " | grep -E '" pid " ' | grep -v 'grep -E " pid "'") + (lambda () + (read-line)))) + (rundir (if (string? psres) ;; real process owned by user + (read-symbolic-link (conc "/proc/" pid "/cwd")) + #f))) + ;; now wait on that process if all is correct + ;; periodically update the db with runtime + ;; when the process exits look at the db, if still RUNNING after 10 seconds set + ;; state/status appropriately + (process-wait pid))) + +;;====================================================================== +;; lock-queue +;;====================================================================== + + +;;====================================================================== +;; attempt to prevent overlapping updates of rollup files by queueing +;; update requests in an sqlite db +;;====================================================================== + +;;====================================================================== +;; db record, +;;====================================================================== + +(define (make-lock-queue:db-dat)(make-vector 3)) +(define-inline (lock-queue:db-dat-get-db vec) (vector-ref vec 0)) +(define-inline (lock-queue:db-dat-get-path vec) (vector-ref vec 1)) +(define-inline (lock-queue:db-dat-set-db! vec val)(vector-set! vec 0 val)) +(define-inline (lock-queue:db-dat-set-path! vec val)(vector-set! vec 1 val)) + +(define (lock-queue:delete-lock-db dbdat) + (let ((fname (lock-queue:db-dat-get-path dbdat))) + (system (conc "rm -f " fname "*")))) + +(define (lock-queue:open-db fname #!key (count 10)) + (let* ((actualfname (conc fname ".lockdb")) + (dbexists (common:file-exists? actualfname)) + (db (sqlite3:open-database actualfname)) + (handler (make-busy-timeout 136000))) + (if dbexists + (vector db actualfname) + (begin + (handle-exceptions + exn + (begin + (thread-sleep! 10) + (if (> count 0) + (lock-queue:open-db fname count: (- count 1)) + (vector db actualfname))) + (sqlite3:with-transaction + db + (lambda () + (sqlite3:execute + db + "CREATE TABLE IF NOT EXISTS queue ( + id INTEGER PRIMARY KEY, + test_id INTEGER, + start_time INTEGER, + state TEXT, + CONSTRAINT queue_constraint UNIQUE (test_id));") + (sqlite3:execute + db + "CREATE TABLE IF NOT EXISTS runlocks ( + id INTEGER PRIMARY KEY, + test_id INTEGER, + run_lock TEXT, + CONSTRAINT runlock_constraint UNIQUE (run_lock));")))))) + (sqlite3:set-busy-handler! db handler) + (vector db actualfname))) + +(define (lock-queue:set-state dbdat test-id newstate #!key (remtries 10)) + (tasks:wait-on-journal (lock-queue:db-dat-get-path dbdat) 1200) + (handle-exceptions + exn + (if (> remtries 0) + (begin + (debug:print 0 *default-log-port* "WARNING: exception on lock-queue:set-state. Trying again in 30 seconds.") + (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) + (thread-sleep! 30) + (lock-queue:set-state dbdat test-id newstate remtries: (- remtries 1))) + (begin + (debug:print-error 0 *default-log-port* " Failed to set lock state for test with id " test-id ", error: " ((condition-property-accessor 'exn 'message) exn) ", giving up.") + #f)) + (sqlite3:execute (lock-queue:db-dat-get-db dbdat) "UPDATE queue SET state=? WHERE test_id=?;" + newstate + test-id))) + +(define (lock-queue:any-younger? dbdat mystart test-id #!key (remtries 10)) + ;; no need to wait on journal on read only queries + ;; (tasks:wait-on-journal (lock-queue:db-dat-get-path dbdat) 1200) + (handle-exceptions + exn + (if (> remtries 0) + (begin + (debug:print 0 *default-log-port* "WARNING: exception on lock-queue:any-younger. Removing lockdb and trying again in 5 seconds.") + (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) + (thread-sleep! 5) + (lock-queue:delete-lock-db dbdat) + (lock-queue:any-younger? dbdat mystart test-id remtries: (- remtries 1))) + (begin + (debug:print-error 0 *default-log-port* " Failed to find younger locks for test with id " test-id ", error: " ((condition-property-accessor 'exn 'message) exn) ", giving up.") + #f)) + (let ((res #f)) + (sqlite3:for-each-row + (lambda (tid) + ;; Actually this should not be needed as mystart cannot be simultaneously less than and test-id same as + (if (not (equal? tid test-id)) + (set! res tid))) + (lock-queue:db-dat-get-db dbdat) + "SELECT test_id FROM queue WHERE start_time > ?;" mystart) + res))) + +(define (lock-queue:get-lock dbdat test-id #!key (count 10)(waiting-msg #f)) + (tasks:wait-on-journal (lock-queue:db-dat-get-path dbdat) 1200 remove: #t waiting-msg: "lock-queue:get-lock, waiting on journal") + (let* ((res #f) + (db (lock-queue:db-dat-get-db dbdat)) + (lckqry (sqlite3:prepare db "SELECT test_id,run_lock FROM runlocks WHERE run_lock='locked';")) + (mklckqry (sqlite3:prepare db "INSERT INTO runlocks (test_id,run_lock) VALUES (?,'locked');"))) + (let ((result + (handle-exceptions + exn + (begin + (debug:print 0 *default-log-port* "WARNING: failed to get queue lock. Removing lock db and returning fail") ;; Will try again in a few seconds") + (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) + (thread-sleep! 10) + ;; (if (> count 0) + ;; #f ;; (lock-queue:get-lock dbdat test-id count: (- count 1)) - give up on retries + ;; (begin ;; never recovered, remote the lock file and return #f, no lock obtained + (lock-queue:delete-lock-db dbdat) + #f) + (sqlite3:with-transaction + db + (lambda () + (sqlite3:for-each-row (lambda (tid lockstate) + (set! res (list tid lockstate))) + lckqry) + (if res + (if (equal? (car res) test-id) + #t ;; already have the lock + #f) + (begin + (sqlite3:execute mklckqry test-id) + ;; if no error handled then return #t for got the lock + #t))))))) + (sqlite3:finalize! lckqry) + (sqlite3:finalize! mklckqry) + result))) + +(define (lock-queue:release-lock fname test-id #!key (count 10)) + (let* ((dbdat (lock-queue:open-db fname))) + (tasks:wait-on-journal (lock-queue:db-dat-get-path dbdat) 1200 "lock-queue:release-lock; waiting on journal") + (handle-exceptions + exn + (begin + (debug:print 0 *default-log-port* "WARNING: Failed to release queue lock. Will try again in few seconds") + (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) + (thread-sleep! (/ count 10)) + (if (> count 0) + (begin + (sqlite3:finalize! (lock-queue:db-dat-get-db dbdat)) + (lock-queue:release-lock fname test-id count: (- count 1))) + (let ((journal (conc fname "-journal"))) + ;; If we've tried ten times and failed there is a serious problem + ;; try to remove the lock db and allow it to be recreated + (handle-exceptions + exn + #f + (if (common:file-exists? journal)(delete-file journal)) + (if (common:file-exists? fname) (delete-file fname)) + #f)))) + (sqlite3:execute (lock-queue:db-dat-get-db dbdat) "DELETE FROM runlocks WHERE test_id=?;" test-id) + (sqlite3:finalize! (lock-queue:db-dat-get-db dbdat))))) + +(define (lock-queue:steal-lock dbdat test-id #!key (count 10)) + (debug:print-info 0 *default-log-port* "Attempting to steal lock at " (lock-queue:db-dat-get-path dbdat)) + (tasks:wait-on-journal (lock-queue:db-dat-get-path dbdat) 1200 "lock-queue:steal-lock; waiting on journal") + (handle-exceptions + exn + (begin + (debug:print 0 *default-log-port* "WARNING: Failed to steal queue lock. Will try again in few seconds") + (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) + (thread-sleep! 10) + (if (> count 0) + (lock-queue:steal-lock dbdat test-id count: (- count 1)) + #f)) + (sqlite3:execute (lock-queue:db-dat-get-db dbdat) "DELETE FROM runlocks WHERE run_lock='locked';")) + (lock-queue:get-lock dbdat test-it)) + +;; returns #f if ok to skip the task +;; returns #t if ok to proceed with task +;; otherwise waits +;; +(define (lock-queue:wait-turn fname test-id #!key (count 10)(waiting-msg #f)) + (let* ((dbdat (lock-queue:open-db fname)) + (mystart (current-seconds)) + (db (lock-queue:db-dat-get-db dbdat))) + ;; (tasks:wait-on-journal (lock-queue:db-dat-get-path dbdat) 1200 waiting-msg: "lock-queue:wait-turn; waiting on journal file") + (handle-exceptions + exn + (begin + (debug:print 0 *default-log-port* "WARNING: Failed to find out if it is ok to skip the wait queue. Will try again in few seconds") + (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) + (print-call-chain (current-error-port)) + (thread-sleep! 10) + (if (> count 0) + (begin + (sqlite3:finalize! db) + (lock-queue:wait-turn fname test-id count: (- count 1))) + (begin + (debug:print 0 *default-log-port* "Giving up calls to lock-queue:wait-turn for test-id " test-id " at path " fname ", printing call chain") + (print-call-chain (current-error-port)) + #f))) + ;; wait 10 seconds and then check to see if someone is already updating the html + (thread-sleep! 10) + (if (not (lock-queue:any-younger? dbdat mystart test-id)) ;; no processing in flight, must try to start processing + (begin + (tasks:wait-on-journal (lock-queue:db-dat-get-path dbdat) 1200 waiting-msg: "lock-queue:wait-turn; waiting on journal file") + (sqlite3:execute + db + "INSERT OR REPLACE INTO queue (test_id,start_time,state) VALUES (?,?,'waiting');" + test-id mystart) + ;; (thread-sleep! 1) ;; give other tests a chance to register + (let ((result + (let loop ((younger-waiting (lock-queue:any-younger? dbdat mystart test-id))) + (if younger-waiting + (begin + ;; no need for us to wait. mark in the lock queue db as skipping + ;; no point in marking anything in the queue - simply never register this + ;; test as it is *covered* by a previously started update to the html file + ;; (lock-queue:set-state dbdat test-id "skipping") + #f) ;; let the calling process know that nothing needs to be done + (if (lock-queue:get-lock dbdat test-id) + #t + (if (> (- (current-seconds) mystart) 36000) ;; waited too long, steal the lock + (lock-queue:steal-lock dbdat test-id) + (begin + (thread-sleep! 1) + (loop (lock-queue:any-younger? dbdat mystart test-id))))))))) + (sqlite3:finalize! db) + result)))))) + + +;; (use trace) +;; (trace lock-queue:get-lock lock-queue:release-lock lock-queue:wait-turn lock-queue:any-younger? lock-queue:set-state) + +;;====================================================================== +;; margs +;;====================================================================== +(define args:arg-hash (make-hash-table)) + +(define (args:get-arg arg . default) + (if (null? default) + (hash-table-ref/default args:arg-hash arg #f) + (hash-table-ref/default args:arg-hash arg (car default)))) + +(define (args:any? . args) + (not (null? (filter (lambda (x) x) + (map args:get-arg args))))) + +(define (args:get-arg-from ht arg . default) + (if (null? default) + (hash-table-ref/default ht arg #f) + (hash-table-ref/default ht arg (car default)))) + +(define (args:usage . args) + (if (> (length args) 0) + (apply print "ERROR: " args)) + (if (string? help) + (print help) + (print "Usage: " (car (argv)) " ... ")) + (exit 0)) + + ;; one-of args defined +(define (args:any-defined? . param) + (let ((res #f)) + (for-each + (lambda (arg) + (if (args:get-arg arg)(set! res #t))) + param) + res)) + +;; args: +(define (args:get-args args params switches arg-hash num-needed) + (let* ((numargs (length args)) + (adj-num-needed (if num-needed (+ num-needed 2) #f))) + (if (< numargs (if adj-num-needed adj-num-needed 2)) + (if (>= num-needed 1) + (args:usage "No arguments provided") + '()) + (let loop ((arg (cadr args)) + (tail (cddr args)) + (remargs '())) + (cond + ((member arg params) ;; args with params + (if (< (length tail) 1) + (args:usage "param given without argument " arg) + (let ((val (car tail)) + (newtail (cdr tail))) + (hash-table-set! arg-hash arg val) + (if (null? newtail) remargs + (loop (car newtail)(cdr newtail) remargs))))) + ((member arg switches) ;; args with no params (i.e. switches) + (hash-table-set! arg-hash arg #t) + (if (null? tail) remargs + (loop (car tail)(cdr tail) remargs))) + (else + (if (null? tail)(append remargs (list arg)) ;; return the non-used args + (loop (car tail)(cdr tail)(append remargs (list arg)))))))) + )) + +(define (args:print-args remargs arg-hash) + (print "ARGS: " remargs) + (for-each (lambda (arg) + (print " " arg " " (hash-table-ref/default arg-hash arg #f))) + (hash-table-keys arg-hash))) + +;;====================================================================== +;; mt +;;====================================================================== + +;; This is the Megatest API. All generally "useful" routines will be wrapped or extended +;; here. + +;;====================================================================== +;; R U N S +;;====================================================================== + +;; runs:get-runs-by-patt +;; get runs by list of criteria +;; register a test run with the db +;; +;; Use: (db-get-value-by-header (db:get-header runinfo)(db:get-rows runinfo)) +;; to extract info from the structure returned +;; +(define (mt:get-runs-by-patt keys runnamepatt targpatt) + (let loop ((runsdat (rmt:get-runs-by-patt keys runnamepatt targpatt 0 500 #f 0)) + (res '()) + (offset 0) + (limit 500)) + ;; (print "runsdat: " runsdat) + (let* ((header (vector-ref runsdat 0)) + (runslst (vector-ref runsdat 1)) + (full-list (append res runslst)) + (have-more (eq? (length runslst) limit))) + ;; (debug:print 0 *default-log-port* "header: " header " runslst: " runslst " have-more: " have-more) + (if have-more + (let ((new-offset (+ offset limit)) + (next-batch (rmt:get-runs-by-patt keys runnamepatt targpatt offset limit #f 0))) + (debug:print-info 4 *default-log-port* "More than " limit " runs, have " (length full-list) " runs so far.") + (debug:print-info 0 *default-log-port* "next-batch: " next-batch) + (loop next-batch + full-list + new-offset + limit)) + (vector header full-list))))) + +;;====================================================================== +;; T E S T S +;;====================================================================== + +(define (mt:get-tests-for-run run-id testpatt states status #!key (not-in #t) (sort-by 'event_time) (sort-order "ASC") (qryvals #f)(last-update #f)) + (let loop ((testsdat (rmt:get-tests-for-run run-id testpatt states status 0 500 not-in sort-by sort-order qryvals last-update 'normal)) + (res '()) + (offset 0) + (limit 500)) + (let* ((full-list (append res testsdat)) + (have-more (eq? (length testsdat) limit))) + (if have-more + (let ((new-offset (+ offset limit))) + (debug:print-info 4 *default-log-port* "More than " limit " tests, have " (length full-list) " tests so far.") + (loop (rmt:get-tests-for-run run-id testpatt states status new-offset limit not-in sort-by sort-order qryvals last-update 'normal) + full-list + new-offset + limit)) + full-list)))) + +(define (mt:lazy-get-prereqs-not-met run-id waitons ref-item-path #!key (mode '(normal))(itemmaps #f) ) + (let* ((key (list run-id waitons ref-item-path mode)) + (res (hash-table-ref/default *pre-reqs-met-cache* key #f)) + (useres (let ((last-time (if (vector? res) (vector-ref res 0) #f))) + (if last-time + (< (current-seconds)(+ last-time 5)) + #f)))) + (if useres + (let ((result (vector-ref res 1))) + (debug:print 4 *default-log-port* "Using lazy value res: " result) + result) + (let ((newres (rmt:get-prereqs-not-met run-id waitons ref-item-path mode: mode itemmaps: itemmaps))) + (hash-table-set! *pre-reqs-met-cache* key (vector (current-seconds) newres)) + newres)))) + +(define (mt:get-run-stats dbstruct run-id) +;; Get run stats from local access, move this ... but where? + (db:get-run-stats dbstruct run-id)) + +(define (mt:discard-blocked-tests run-id failed-test tests test-records) + (if (null? tests) + tests + (begin + (debug:print-info 1 *default-log-port* "Discarding tests from " tests " that are waiting on " failed-test) + (let loop ((testn (car tests)) + (remt (cdr tests)) + (res '())) + (let* ((test-dat (hash-table-ref/default test-records testn (vector #f #f '()))) + (waitons (vector-ref test-dat 2))) + ;; (print "mt:discard-blocked-tests run-id: " run-id " failed-test: " failed-test " testn: " testn " with waitons: " waitons) + (if (null? remt) + (let ((new-res (reverse res))) + ;; (print " new-res: " new-res) + new-res) + (loop (car remt) + (cdr remt) + (if (member failed-test waitons) + (begin + (debug:print 0 *default-log-port* "Discarding test " testn "(" test-dat ") due to " failed-test) + res) + (cons testn res))))))))) + +;;====================================================================== +;; T R I G G E R S +;;====================================================================== + +(define (mt:run-trigger cmd test-id test-rundir trigger logname test-name item-path event-time actual-state actual-status) + ;; Putting the commandline into ( )'s means no control over the shell. + ;; stdout and stderr will be caught in the NBFAKE or mt_launch.log files + ;; or equivalent. No need to do this. Just run it? + (let* ((fullcmd (conc "nbfake " + cmd " " + test-id " " + test-rundir " " + trigger " " + test-name " " + item-path " " ;; has / prepended to deal with toplevel tests + actual-state " " + actual-status " " + event-time + )) + (prev-nbfake-log (get-environment-variable "NBFAKE_LOG"))) + (setenv "NBFAKE_LOG" (conc (cond + ((and (directory-exists? test-rundir) + (file-write-access? test-rundir)) + test-rundir) + ((and (directory-exists? *toppath*) + (file-write-access? *toppath*)) + *toppath*) + (else (conc "/tmp/" (current-user-name)))) + "/" logname)) + (debug:print-info 0 *default-log-port* "TRIGGERED on " trigger ", running command " fullcmd " output at " (get-environment-variable "NBFAKE_LOG")) + ;; (call-with-environment-variables + ;; `(("NBFAKE_LOG" . ,(conc test-rundir "/" logname))) + ;; (lambda () + (process-run fullcmd) + (if prev-nbfake-log + (setenv "NBFAKE_LOG" prev-nbfake-log) + (unsetenv "NBFAKE_LOG")) + )) ;; )) + +(define (mt:process-triggers dbstruct run-id test-id newstate newstatus) + (if test-id + (let* ((test-dat (db:get-test-info-by-id dbstruct run-id test-id))) + (if test-dat + (let* ((test-rundir (db:test-get-rundir test-dat)) ;; ) ;; ) + (test-name (db:test-get-testname test-dat)) + (item-path (db:test-get-item-path test-dat)) + (duration (db:test-get-run_duration test-dat)) + (comment (db:test-get-comment test-dat)) + (event-time (db:test-get-event_time test-dat)) + (tconfig #f) + (state (if newstate newstate (db:test-get-state test-dat))) + (status (if newstatus newstatus (db:test-get-status test-dat)))) + ;; (mutex-lock! *triggers-mutex*) + (handle-exceptions + exn + (begin + (debug:print-error 0 *default-log-port* " Exception in mt:process-triggers for run-id="run-id" test-id="test-id" newstate="newstate" newstatus="newstatus + "\n error: " ((condition-property-accessor 'exn 'message) exn) + "\n test-rundir="test-rundir + "\n test-name="test-name + "\n item-path="item-path + "\n state="state + "\n status="status + "\n") + (print-call-chain (current-error-port)) + #f) + (if (and test-name + test-rundir) ;; #f means no dir set yet + ;; (common:file-exists? test-rundir) + ;; (directory? test-rundir)) + (call-with-environment-variables + (list (cons "MT_TEST_NAME" (or test-name "no such test")) + (cons "MT_TEST_RUN_DIR" (or test-rundir "no test directory yet")) + (cons "MT_ITEMPATH" (or item-path ""))) + (lambda () + (if (directory-exists? test-rundir) + (push-directory test-rundir) + (push-directory *toppath*)) + (set! tconfig (mt:lazy-read-test-config test-name)) + (for-each (lambda (trigger) + (let* ((munged-trigger (string-translate trigger "/ " "--")) + (logname (conc "last-trigger-" munged-trigger ".log"))) + ;; first any triggers from the testconfig + (let ((cmd (configf:lookup tconfig "triggers" trigger))) + (if cmd (mt:run-trigger cmd test-id test-rundir trigger (conc "tconfig-" logname) test-name item-path event-time state status))) + ;; next any triggers from megatest.config + (let ((cmd (configf:lookup *configdat* "triggers" trigger))) + (if cmd (mt:run-trigger cmd test-id test-rundir trigger (conc "mtconfig-" logname) test-name item-path event-time state status))))) + (list + (conc state "/" status) + (conc state "/") + (conc "/" status))) + (pop-directory)) + ))) + ;; (mutex-unlock! *triggers-mutex*) + ))))) + +;;====================================================================== +;; S T A T E A N D S T A T U S F O R T E S T S +;;====================================================================== + +;; speed up for common cases with a little logic +(define (mt:test-set-state-status-by-id run-id test-id newstate newstatus newcomment) + (if (not (and run-id test-id)) + (begin + (debug:print-error 0 *default-log-port* "bad data handed to mt:test-set-state-status-by-id, run-id=" run-id ", test-id=" test-id ", newstate=" newstate) + (print-call-chain (current-error-port)) + #f) + (begin + ;; cond + ;; ((and newstate newstatus newcomment) + ;; (rmt:general-call 'state-status-msg run-id newstate newstatus newcomment test-id)) + ;; ((and newstate newstatus) + ;; (rmt:general-call 'state-status run-id newstate newstatus test-id)) + ;; (else + ;; (if newstate (rmt:general-call 'set-test-state run-id newstate test-id)) + ;; (if newstatus (rmt:general-call 'set-test-status run-id newstatus test-id)) + ;; (if newcomment (rmt:general-call 'set-test-comment run-id newcomment test-id)))) + (rmt:set-state-status-and-roll-up-items run-id test-id #f newstate newstatus newcomment) + ;; (mt:process-triggers run-id test-id newstate newstatus) + #t))) + + +(define (mt:test-set-state-status-by-id-unless-completed run-id test-id newstate newstatus newcomment) + (let* ((test-vec (rmt:get-testinfo-state-status run-id test-id)) + (state (vector-ref test-vec 3))) + (if (equal? state "COMPLETED") + #t + (rmt:set-state-status-and-roll-up-items run-id test-id #f newstate newstatus newcomment)))) + + +(define (mt:test-set-state-status-by-testname run-id test-name item-path new-state new-status new-comment) + ;(let ((test-id (rmt:get-test-id run-id test-name item-path))) + (rmt:set-state-status-and-roll-up-items run-id test-name item-path new-state new-status new-comment) + ;; (mt:process-triggers run-id test-id new-state new-status) + #t);) + ;;(mt:test-set-state-status-by-id run-id test-id new-state new-status new-comment))) + +(define (mt:test-set-state-status-by-testname-unless-completed run-id test-name item-path new-state new-status new-comment) + (let ((test-id (rmt:get-test-id run-id test-name item-path))) + (mt:test-set-state-status-by-id-unless-completed run-id test-id new-state new-status new-comment))) + +(define (mt:lazy-read-test-config test-name) + (let ((tconf (hash-table-ref/default *testconfigs* test-name #f))) + (if tconf + tconf + (let ((test-dirs (tests:get-tests-search-path *configdat*))) + (let loop ((hed (car test-dirs)) + (tal (cdr test-dirs))) + ;; Setting MT_LINKTREE here is almost certainly unnecessary. + (let ((tconfig-file (conc hed "/" test-name "/testconfig"))) + (if (and (common:file-exists? tconfig-file) + (file-read-access? tconfig-file)) + (let ((link-tree-path (common:get-linktree)) ;; (configf:lookup *configdat* "setup" "linktree")) + (old-link-tree (get-environment-variable "MT_LINKTREE"))) + (if link-tree-path (setenv "MT_LINKTREE" link-tree-path)) + (let ((newtcfg (read-config tconfig-file #f #f))) ;; NOTE: Does NOT run [system ...] + (hash-table-set! *testconfigs* test-name newtcfg) + (if old-link-tree + (setenv "MT_LINKTREE" old-link-tree) + (unsetenv "MT_LINKTREE")) + newtcfg)) + (if (null? tal) + (begin + (debug:print-error 0 *default-log-port* "No readable testconfig found for " test-name) + #f) + (loop (car tal)(cdr tal)))))))))) + +;;====================================================================== +;; portlogger +;;====================================================================== + + + +(define (portlogger:open-db fname) + (let* ((avail (tasks:wait-on-journal fname 5 remove: #t)) ;; wait up to about 10 seconds for the journal to go away + (exists (common:file-exists? fname)) + (db (if avail + (sqlite3:open-database fname) + (begin + (system (conc "rm -f " fname)) + (sqlite3:open-database fname)))) + (handler (make-busy-timeout 136000)) + (canwrite (file-write-access? fname))) + ;; (db-init (lambda () + ;; (sqlite3:execute + ;; db + ;; "CREATE TABLE IF NOT EXISTS ports ( + ;; port INTEGER PRIMARY KEY, + ;; state TEXT DEFAULT 'not-used', + ;; fail_count INTEGER DEFAULT 0, + ;; update_time TIMESTAMP DEFAULT (strftime('%s','now')) );")))) + (sqlite3:set-busy-handler! db handler) + (db:set-sync db) ;; (sqlite3:execute db "PRAGMA synchronous = 0;") + ;; (if (not exists) ;; needed with IF NOT EXISTS? + (sqlite3:execute + db + "CREATE TABLE IF NOT EXISTS ports ( + port INTEGER PRIMARY KEY, + state TEXT DEFAULT 'not-used', + fail_count INTEGER DEFAULT 0, + update_time TIMESTAMP DEFAULT (strftime('%s','now')) );") + db)) + +(define (portlogger:open-run-close proc . params) + (let* ((fname (conc "/tmp/." (current-user-name) "-portlogger.db")) + (avail (tasks:wait-on-journal fname 10))) ;; wait up to about 10 seconds for the journal to go away + (handle-exceptions + exn + (begin + ;; (release-dot-lock fname) + (debug:print-error 0 *default-log-port* "portlogger:open-run-close failed. " proc " " params) + (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) + (debug:print 5 *default-log-port* "exn=" (condition->list exn)) + (if (common:file-exists? fname)(delete-file fname)) ;; brutally get rid of it + (print-call-chain (current-error-port))) + (let* (;; (lock (obtain-dot-lock fname 2 9 10)) + (db (portlogger:open-db fname)) + (res (apply proc db params))) + (sqlite3:finalize! db) + ;; (release-dot-lock fname) + res)))) + +;; (fold-row PROC INIT DATABASE SQL . PARAMETERS) +(define (portlogger:take-port db portnum) + (let* ((qry1 (sqlite3:prepare db "INSERT INTO ports (port,state) VALUES (?,?);")) + (qry2 (sqlite3:prepare db "UPDATE ports SET state=?,update_time=strftime('%s','now') WHERE port=?;")) + (qry3 (sqlite3:prepare db "SELECT state FROM ports WHERE port=?;")) + (res (sqlite3:with-transaction + db + (lambda () + ;; (fold-row (lambda (var curr) (or var curr)) #f db "SELECT var FROM foo WHERE id=100;") + (let* ((curr #f) + (res #f)) + (set! curr (sqlite3:fold-row + (lambda (var curr) + (or curr var curr)) + "not-tried" + qry3 + portnum)) + ;; (print "curr=" curr) + (set! res (case (string->symbol curr) + ((released) (sqlite3:execute qry2 "taken" portnum) 'taken) + ((not-tried) (sqlite3:execute qry1 portnum "taken") 'taken) + ((taken) 'already-taken) + ((failed) 'failed) + (else 'error))) + ;; (print "res=" res) + res))))) + (sqlite3:finalize! qry1) + (sqlite3:finalize! qry2) + (sqlite3:finalize! qry3) + res)) + +(define (portlogger:get-prev-used-port db) + (handle-exceptions + exn + (begin + (debug:print 0 *default-log-port* "EXCEPTION: portlogger database probably overloaded or unreadable. If you see this message again remove /tmp/.$USER-portlogger.db") + (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) + (debug:print 5 *default-log-port* "exn=" (condition->list exn)) + (print-call-chain (current-error-port)) + (debug:print 0 *default-log-port* "Continuing anyway.") + #f) + (sqlite3:fold-row + (lambda (var curr) + (or curr var curr)) + #f + db + "SELECT (port) FROM ports WHERE state='released' LIMIT 1;"))) + +(define (portlogger:find-port db) + (let* ((lowport (let ((val (configf:lookup *configdat* "server" "lowport"))) + (if (and val + (string->number val)) + (string->number val) + 32768))) + (portnum (or (portlogger:get-prev-used-port db) + (+ lowport ;; top of registered ports is 49152 but lets use ports in the registered range + (random (- 64000 lowport)))))) + (handle-exceptions + exn + (begin + (debug:print 0 *default-log-port* "EXCEPTION: portlogger database probably overloaded or unreadable. If you see this message again remove /tmp/.$USER-portlogger.db") + (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) + (debug:print 5 *default-log-port* "exn=" (condition->list exn)) + (print-call-chain (current-error-port)) + (debug:print 0 *default-log-port* "Continuing anyway.")) + (portlogger:take-port db portnum)) + portnum)) + +;; set port to "released", "failed" etc. +;; +(define (portlogger:set-port db portnum value) + (sqlite3:execute db "UPDATE ports SET state=?,update_time=strftime('%s','now') WHERE port=?;" value portnum)) + +;; set port to failed (attempted to take but got error) +;; +(define (portlogger:set-failed db portnum) + (sqlite3:execute db "UPDATE ports SET state='failed',fail_count=fail_count+1,update_time=strftime('%s','now') WHERE port=?;" portnum)) + +;;====================================================================== +;; MAIN +;;====================================================================== + +(define (portlogger:main . args) + (let* ((dbfname (conc "/tmp/." (current-user-name) "-portlogger.db")) + (db (portlogger:open-db dbfname)) + (numargs (length args)) + (result + (handle-exceptions + exn + (begin + (debug:print 0 *default-log-port* "EXCEPTION: portlogger database at " dbfname " probably overloaded or unreadable. Try removing it.") + (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) + (debug:print 5 *default-log-port* "exn=" (condition->list exn)) + (debug:print 0 *default-log-port* " status: " ((condition-property-accessor 'sqlite3 'status) exn)) + (print-call-chain (current-error-port)) + #f) + (case (string->symbol (car args)) ;; commands with two or more params + ((take)(portlogger:take-port db (string->number (cadr args)))) + ((find)(portlogger:find-port db)) + ((set) (let ((port (cadr args)) + (state (caddr args))) + (portlogger:set-port db + (if (number? port) port (string->number port)) + state) + state)) + ((failed)(portlogger:set-failed db (string->number (cadr args))) 'failed))))) + (sqlite3:finalize! db) + result)) + +;; (print (apply portlogger:main (cdr (argv)))) +;;====================================================================== +;; runconfig +;;====================================================================== + +(define (runconfig:read fname target environ-patt) + (let ((ht (make-hash-table))) + (if target (hash-table-set! ht target '())) + (read-config fname ht #t environ-patt: environ-patt sections: (if target (list "default" target) #f)))) + +;; NB// to process a runconfig ensure to use environ-patt with target! +;; +(define (setup-env-defaults fname run-id already-seen keyvals #!key (environ-patt #f)(change-env #t)) + (let* ((keys (map car keyvals)) + (thekey (if keyvals + (string-intersperse (map (lambda (x)(if x x "-na-")) (map cadr keyvals)) "/") + (or (common:args-get-target) + (get-environment-variable "MT_TARGET") + (begin + (debug:print-error 0 *default-log-port* "setup-env-defaults called with no run-id or -target or -reqtarg") + "nothing matches this I hope")))) + ;; Why was system disallowed in the reading of the runconfigs file? + ;; NOTE: Should be setting env vars based on (target|default) + (confdat (runconfig:read fname thekey environ-patt)) + (whatfound (make-hash-table)) + (finaldat (make-hash-table)) + (sections (list "default" thekey))) + (if (not *target*)(set! *target* thekey)) ;; may save a db access or two but repeats db:get-target code + (debug:print 4 *default-log-port* "Using key=\"" thekey "\"") + + (if change-env + (for-each ;; NB// This can be simplified with new content of keyvals having all that is needed. + (lambda (keyval) + (safe-setenv (car keyval)(cadr keyval))) + keyvals)) + + (for-each + (lambda (section) + (let ((section-dat (hash-table-ref/default confdat section #f))) + (if section-dat + (for-each + (lambda (envvar) + (let ((val (cadr (assoc envvar section-dat)))) + (hash-table-set! whatfound section (+ (hash-table-ref/default whatfound section 0) 1)) + (if (and (string? envvar) + (string? val) + change-env) + (safe-setenv envvar val)) + (hash-table-set! finaldat envvar val))) + (map car section-dat))))) + sections) + (if already-seen + (begin + (debug:print 2 *default-log-port* "Key settings found in runconfigs.config:") + (for-each (lambda (fullkey) + (debug:print 2 *default-log-port* (format #f "~20a ~a\n" fullkey (hash-table-ref/default whatfound fullkey 0)))) + sections) + (debug:print 2 *default-log-port* "---") + (set! *already-seen-runconfig-info* #t))) + ;; finaldat ;; was returning this "finaldat" which would be good but conflicts with other uses + confdat + )) + +(define (set-run-config-vars run-id keyvals targ-from-db) + (push-directory *toppath*) ;; the push/pop doesn't appear to do anything ... + (let ((runconfigf (conc *toppath* "/runconfigs.config")) + (targ (or (common:args-get-target) + targ-from-db + (get-environment-variable "MT_TARGET")))) + (pop-directory) + (if (common:file-exists? runconfigf) + (setup-env-defaults runconfigf run-id #t keyvals + environ-patt: (conc "(default" + (if targ + (conc "|" targ ")") + ")"))) + (debug:print 0 *default-log-port* "WARNING: You do not have a run config file: " runconfigf)))) + +;; given (a (b c) d) return ((a b d)(a c d)) +;; NOTE: this feels like it has been done before - perhaps with items handling? +;; +(define (runconfig:combinations inlst) + (let loop ((hed (car inlst)) + (tal (cdr inlst)) + (res '())) + ;; (print "res: " res " hed: " hed) + (if (list? hed) + (let ((newres (if (null? res) ;; first time through convert incoming items to list of items + (map list hed) + (apply append + (map (lambda (r) ;; iterate over items in res + (map (lambda (h) ;; iterate over items in hed + (append r (list h))) + hed)) + res))))) + ;; (print "newres1: " newres) + (if (null? tal) + newres + (loop (car tal)(cdr tal) newres))) + (let ((newres (if (null? res) + (list (list hed)) + (map (lambda (r) + (append r (list hed))) + res)))) + ;; (print "newres2: " newres) + (if (null? tal) + newres + (loop (car tal)(cdr tal) newres)))))) + +;; multi-part expand +;; Given a/b,c,d/e,f return a/b/e a/b/f a/c/e a/c/f a/d/e a/d/f +;; +(define (runconfig:expand target) + (let* ((parts (map (lambda (x) + (string-split x ",")) + (string-split target "/")))) + (map (lambda (x) + (string-intersperse x "/")) + (runconfig:combinations parts)))) + +;; multi-target expansion +;; a/b/c/x,y,z a/b/d/x,y => a/b/c/x a/b/c/y a/b/c/z a/b/d/x a/b/d/y +;; +(define (runconfig:expand-target target-strs) + (delete-duplicates + (apply append (map runconfig:expand (string-split target-strs " "))))) +;;====================================================================== +;; runs +;;====================================================================== +;; (include "debugger.scm") + +;; use this struct to facilitate refactoring +;; + +(defstruct runs:dat + reglen regfull + runname max-concurrent-jobs run-id + test-patts required-tests test-registry + registry-mutex flags keyvals run-info all-tests-registry + can-run-more-tests + ((can-run-more-tests-count 0) : fixnum)) + +(defstruct runs:testdat + hed tal reg reruns test-record + test-name item-path jobgroup + waitons testmode newtal itemmaps prereqs-not-met) + + +(define (runs:get-mt-env-alist run-id runname target testname itempath) + ;;(bb-check-path msg: "runs:set-megatest-env-vars entry") + `(("MT_TEST_NAME" . ,testname) + + ("MT_ITEMPATH" . ,itempath) + + ("MT_TARGET" . ,target) + + ("MT_RUNNAME" . ,runname) + + ("MT_RUN_AREA_HOME" . ,*toppath*) + + ,@(let* ((link-tree (common:get-linktree))) ;; (configf:lookup *configdat* "setup" "linktree"))) + (if link-tree + (list (cons "MT_LINKTREE" link-tree) + + (cons "MT_TEST_RUN_DIR" + (conc link-tree "/" target "/" runname "/" testname + (if (and (string? itempath) (not (equal? itempath ""))) + (conc "/" itempath) + ""))) + ) + '())) + + ,@(map + (lambda (key) + (cons (car key) (cadr key))) + (keys:target->keyval (rmt:get-keys) target)) + + ,@(map (lambda (var) + (let ((val (configf:lookup *configdat* "env-override" var))) + (cons var val))) + (configf:section-vars *configdat* "env-override")))) + + + + + + +;; set up needed environment variables given a run-id and optionally a target, itempath etc. +;; +(define (runs:set-megatest-env-vars run-id #!key (inkeys #f)(inrunname #f)(inkeyvals #f)(intarget #f)(testname #f)(itempath #f)) + ;;(bb-check-path msg: "runs:set-megatest-env-vars entry") + (let* ((target (or intarget + (common:args-get-target) + (get-environment-variable "MT_TARGET"))) + (keys (if inkeys inkeys (rmt:get-keys))) + (keyvals (if inkeyvals inkeyvals (keys:target->keyval keys target))) + (vals (hash-table-ref/default *env-vars-by-run-id* run-id #f)) + (link-tree (common:get-linktree))) ;; (configf:lookup *configdat* "setup" "linktree"))) + (if testname (setenv "MT_TEST_NAME" testname)) + (if itempath (setenv "MT_ITEMPATH" itempath)) + + ;; get the info from the db and put it in the cache + (if link-tree + (setenv "MT_LINKTREE" link-tree) + (debug:print-error 0 *default-log-port* "linktree not set, should be set in megatest.config in [setup] section.")) + (if (not vals) + (let ((ht (make-hash-table))) + (hash-table-set! *env-vars-by-run-id* run-id ht) + (set! vals ht) + (for-each + (lambda (key) + (hash-table-set! vals (car key) (cadr key))) + keyvals))) + ;; from the cached data set the vars + + (hash-table-for-each + vals + (lambda (key val) + (debug:print 2 *default-log-port* "setenv " key " " val) + (safe-setenv key val))) + ;;(bb-check-path msg: "runs:set-megatest-env-vars block 1") + ;;(BB> "*env-vars-by-run-id*/runid("run-id" vals="(hash-table->alist vals)) + + (if (not (get-environment-variable "MT_TARGET"))(setenv "MT_TARGET" target)) + ;; we had a case where there was an exception generated by the hash-table-ref + ;; due to *configdat* being #f Adding a handle and exit + (let fatal-loop ((count 0)) + (handle-exceptions + exn + (let ((call-chain (get-call-chain)) + (msg ((condition-property-accessor 'exn 'message) exn))) + (if (< count 5) + (begin ;; this call is colliding, do some crude stuff to fix it. + (debug:print 0 *default-log-port* "ERROR: *configdat* was inaccessible! This should never happen. Retry #" count) + (launch:setup force-reread: #t) + (fatal-loop (+ count 1))) + (begin + (debug:print 0 *default-log-port* "FATAL: *configdat* was inaccessible! This should never happen. Retried " count " times. Message: " msg) + (debug:print 0 *default-log-port* "Call chain:") + (with-output-to-port *default-log-port* + + (lambda () + (print "*configdat* is >>"*configdat*"<<") + (pp *configdat*) + (pp call-chain))) + + (exit 1)))) + ;;(bb-check-path msg: "runs:set-megatest-env-vars block 1.5") + (when (or (not *configdat*) (not (hash-table? *configdat*))) + (debug:print 0 *default-log-port* "WARNING: *configdat* was inaccessible! This should never happen. Brute force reread.") + ;;(BB> "ERROR: *configdat* was inaccessible! This should never happen. Brute force reread.") + (thread-sleep! 2) ;; assuming nfs lag. + (launch:setup force-reread: #t)) + (alist->env-vars (hash-table-ref/default *configdat* "env-override" '())))) ;;;; environment is tainted HERE in this let block. + ;;(bb-check-path msg: "runs:set-megatest-env-vars block 2") + ;; Lets use this as an opportunity to put MT_RUNNAME in the environment + (let ((runname (if inrunname inrunname (rmt:get-run-name-from-id run-id)))) + (if runname + (setenv "MT_RUNNAME" runname) + (debug:print-error 0 *default-log-port* "no value for runname for id " run-id))) + (setenv "MT_RUN_AREA_HOME" *toppath*) + ;; if a testname and itempath are available set the remaining appropriate variables + (if testname (setenv "MT_TEST_NAME" testname)) + (if itempath (setenv "MT_ITEMPATH" itempath)) + ;;(bb-check-path msg: "runs:set-megatest-env-vars block 3") + (if (and testname link-tree) + (setenv "MT_TEST_RUN_DIR" (conc (getenv "MT_LINKTREE") "/" + (getenv "MT_TARGET") "/" + (getenv "MT_RUNNAME") "/" + (getenv "MT_TEST_NAME") + (if (and itempath + (not (equal? itempath ""))) + (conc "/" itempath) + "")))))) + +(define (set-item-env-vars itemdat) + (for-each (lambda (item) + (debug:print 2 *default-log-port* "setenv " (car item) " " (cadr item)) + (setenv (car item) (cadr item))) + itemdat)) + +;; Every time can-run-more-tests is called increment the delay +;; +;; NOTE: We run this server-side!! Do not use this global except in the runs:can-run-more-tests routine +;; +(define *last-num-running-tests* 0) +;; (define *runs:can-run-more-tests-count* 0) +(define (runs:shrink-can-run-more-tests-count runsdat) + (runs:dat-can-run-more-tests-count-set! runsdat 0)) + +(define (runs:inc-can-run-more-tests-count runsdat) + (runs:dat-can-run-more-tests-count-set! + runsdat + (+ (runs:dat-can-run-more-tests-count runsdat) 1))) + +;; (set! *runs:can-run-more-tests-count* 0)) ;; (/ *runs:can-run-more-tests-count* 2))) + +;; Temporary globals. Move these into the logic or into common +;; +(define *seen-cant-run-tests* (make-hash-table)) ;; use to track tests that we suspect cannot be run +(define (runs:inc-cant-run-tests testname) + (hash-table-set! *seen-cant-run-tests* testname + (+ (hash-table-ref/default *seen-cant-run-tests* testname 0) 1))) + +(define (runs:can-keep-running? testname n) + (< (hash-table-ref/default *seen-cant-run-tests* testname 0) n)) + +(define *runs:denoise* (make-hash-table)) ;; key => last-time-ran + +;; mechanism to limit printing info to the screen that is repetitive. +;; +;; Example: +;; (if (runs:lownoise "waiting on tasks" 60) +;; (debug:print-info 2 *default-log-port* "waiting for tasks to complete, sleeping briefly ...")) +;; +(define (runs:lownoise key waitval) + (let ((lasttime (hash-table-ref/default *runs:denoise* key 0)) + (currtime (current-seconds))) + (if (> (- currtime lasttime) waitval) + (begin + (hash-table-set! *runs:denoise* key currtime) + #t) + #f))) + +(define (runs:can-run-more-tests runsdat run-id jobgroup max-concurrent-jobs) + + ;; Take advantage of a good place to exit if running the one-pass methodology + (if (and (> (runs:dat-can-run-more-tests-count runsdat) 20) + (args:get-arg "-one-pass")) + (exit 0)) + + (thread-sleep! (cond ;; BB: check with Matt. Should this sleep move to cond clauses below where we determine we have too many jobs running rather than each time the and condition above is true (which seems like always)? + ((> (runs:dat-can-run-more-tests-count runsdat) 20) + (if (runs:lownoise "waiting on tasks" 60)(debug:print-info 2 *default-log-port* "waiting for tasks to complete, sleeping briefly ...")) + (configf:lookup-number *configdat* "setup" "inter-test-delay" default: 0.1) ;; was 2 + );; obviously haven't had any work to do for a while + (else 0))) + + (let* ((num-running (rmt:get-count-tests-running run-id)) + (num-running-in-jobgroup (rmt:get-count-tests-running-in-jobgroup run-id jobgroup)) + (job-group-limit (let ((jobg-count (config-lookup *configdat* "jobgroups" jobgroup))) + (if (string? jobg-count) + (string->number jobg-count) + jobg-count)))) + (if (> (+ num-running num-running-in-jobgroup) 0) + (runs:inc-can-run-more-tests-count runsdat)) ;; (set! *runs:can-run-more-tests-count* (+ *runs:can-run-more-tests-count* 1))) + (if (not (eq? *last-num-running-tests* num-running)) + (begin + (debug:print 2 *default-log-port* "max-concurrent-jobs: " max-concurrent-jobs ", num-running: " num-running) + (set! *last-num-running-tests* num-running))) + (if (not (eq? 0 *globalexitstatus*)) + (list #f num-running num-running-in-jobgroup max-concurrent-jobs job-group-limit) + (let* ((can-not-run-more (cond + ;; if max-concurrent-jobs is set and the number running is greater + ;; than it then cannot run more jobs + ((and max-concurrent-jobs (>= num-running max-concurrent-jobs)) + (if (runs:lownoise "mcj msg" 60) + (debug:print 0 *default-log-port* "WARNING: Max running jobs exceeded, current number running: " num-running + ", max_concurrent_jobs: " max-concurrent-jobs)) + #t) + ;; if job-group-limit is set and number of jobs in the group is greater + ;; than the limit then cannot run more jobs of this kind + ((and job-group-limit + (>= num-running-in-jobgroup job-group-limit)) + (if (runs:lownoise (conc "maxjobgroup " jobgroup) 60) + (debug:print 1 *default-log-port* "WARNING: number of jobs " num-running-in-jobgroup + " in jobgroup \"" jobgroup "\" exceeds limit of " job-group-limit)) + #t) + (else #f)))) + (list (not can-not-run-more) num-running num-running-in-jobgroup max-concurrent-jobs job-group-limit))))) + +(define (runs:run-pre-hook run-id) + (let* ((run-pre-hook (configf:lookup *configdat* "runs" "pre-hook")) + (existing-tests (if run-pre-hook + (rmt:get-tests-for-run run-id "%" '() '() ;; run-id testpatt states statuses + #f #f ;; offset limit + #f ;; not-in + #f ;; sort-by + #f ;; sort-order + #f ;; get full data (not 'shortlist) + 0 ;; (runs:gendat-inc-results-last-update *runs:general-data*) ;; last update time + 'dashboard) + '())) + (log-dir (conc *toppath* "/logs")) + (log-file (conc "pre-hook-" (string-translate (getenv "MT_TARGET") "/" "-") "-" (getenv "MT_RUNNAME") ".log")) + (full-log-fname (conc log-dir "/" log-file))) + (if run-pre-hook + (if (null? existing-tests) + (let* ((use-log-dir (if (not (directory-exists? log-dir)) + (handle-exceptions + exn + (begin + (debug:print 0 *default-log-port* "WARNING: Failed to create " log-dir) + #f) + (create-directory log-dir #t) + #t) + #t)) + (start-time (current-seconds)) + (actual-logf (if use-log-dir full-log-fname log-file))) + (handle-exceptions + exn + (begin + (print-call-chain *default-log-port*) + (debug:print 0 *default-log-port* "Message: " ((condition-property-accessor 'exn 'message) exn)) + (debug:print 0 *default-log-port* "ERROR: failed to run pre-hook " run-pre-hook ", check the log " log-file)) + (debug:print-info 0 *default-log-port* "running run-pre-hook: \"" run-pre-hook "\", log is " actual-logf) + (system (conc run-pre-hook " >> " actual-logf " 2>&1")) + (debug:print-info 0 *default-log-port* "pre-hook \"" run-pre-hook "\" took " (- (current-seconds) start-time) " seconds to run."))) + (debug:print 0 *default-log-port* "Skipping pre-hook call \"" run-pre-hook "\" as there are existing tests for this run."))))) + +(define (runs:run-post-hook run-id) + (let* ((run-post-hook (configf:lookup *configdat* "runs" "post-hook")) + (existing-tests (if run-post-hook + (rmt:get-tests-for-run run-id "%" '() '() ;; run-id testpatt states statuses + #f #f ;; offset limit + #f ;; not-in + #f ;; sort-by + #f ;; sort-order + #f ;; get full data (not 'shortlist) + 0 ;; (runs:gendat-inc-results-last-update *runs:general-data*) ;; last update time + 'dashboard) + '())) + (log-dir (conc *toppath* "/logs")) + (log-file (conc "post-hook-" (string-translate (getenv "MT_TARGET") "/" "-") "-" (getenv "MT_RUNNAME") ".log")) + (full-log-fname (conc log-dir "/" log-file))) + (if run-post-hook + ;; (if (null? existing-tests) + ;; (debug:print 0 *default-log-port* "Skipping post-hook call \"" run-post-hook "\" as there are existing tests for this run."))))) + (let* ((use-log-dir (if (not (directory-exists? log-dir)) + (handle-exceptions + exn + (begin + (debug:print 0 *default-log-port* "WARNING: Failed to create " log-dir) + #f) + (create-directory log-dir #t) + #t) + #t)) + (start-time (current-seconds)) + (actual-logf (if use-log-dir full-log-fname log-file))) + (handle-exceptions + exn + (begin + (print-call-chain *default-log-port*) + (debug:print 0 *default-log-port* "Message: " ((condition-property-accessor 'exn 'message) exn)) + (debug:print 0 *default-log-port* "ERROR: failed to run post-hook " run-post-hook ", check the log " log-file)) + (debug:print-info 0 *default-log-port* "running run-post-hook: \"" run-post-hook "\", log is " actual-logf) + (system (conc run-post-hook " >> " actual-logf " 2>&1")) + (debug:print-info 0 *default-log-port* "post-hook \"" run-post-hook "\" took " (- (current-seconds) start-time) " seconds to run.")))))) + +;; return #t when all items in waitors-upon list are represented in test-patt, #f otherwise. +(define (runs:testpatts-mention-waitors-upon? test-patt waitors-upon) + (null? (tests:filter-test-names-not-matched waitors-upon test-patt))) + +;;====================================================================== +;; runs:run-tests is called from megatest.scm and itself +;;====================================================================== +;; +;; test-names: Comma separated patterns same as test-patts but used in selection +;; of tests to run. The item portions are not respected. +;; FIXME: error out if /patt specified +;; +(define (runs:run-tests target runname test-patts user flags #!key (run-count 1)) ;; test-names + (let* ((keys (keys:config-get-fields *configdat*)) + (keyvals (keys:target->keyval keys target)) + (run-id (rmt:register-run keyvals runname "new" "n/a" user (args:get-arg "-contour"))) ;; test-name))) + ;; (deferred '()) ;; delay running these since they have a waiton clause + (runconfigf (conc *toppath* "/runconfigs.config")) + (dbfile (conc *toppath* "/megatest.db")) + (readonly-mode (not (file-write-access? dbfile))) + (test-records (make-hash-table)) + ;; need to process runconfigs before generating these lists + (all-tests-registry #f) ;; (tests:get-all)) ;; (tests:get-valid-tests (make-hash-table) test-search-path)) ;; all valid tests to check waiton names + (all-test-names #f) ;; (hash-table-keys all-tests-registry)) + (test-names #f) ;; Generated by a call to (tests:filter-test-names all-test-names test-patts)) + (required-tests #f) ;; Put fully qualified test/testpath names in this list to be done + (waitors-upon (make-hash-table)) ;; given a test, return list of tests waiting upon this test. + (task-key (conc (hash-table->alist flags) " " (get-host-name) " " (current-process-id))) + ;; (tdbdat (tasks:open-db)) + (config-reruns (let ((x (configf:lookup *configdat* "setup" "reruns"))) + (if x (string->number x) #f))) + (allowed-tests #f)) + + ;; check if readonly + (when readonly-mode + (debug:print-error 0 *default-log-port* "megatest.db is readonly. Cannot proceed.") + (exit 1)) + + ;; per user request. If less than 100Meg space on dbdir partition, bail out with error + ;; this will reduce issues in database corruption + (common:check-db-dir-and-exit-if-insufficient) + + ;; override the number of reruns from the configs + ;; this needs to be done at the place where is first runs:run-tests called + ;(if (and config-reruns + ; (> run-count config-reruns)) + ;(set! run-count config-reruns)) + + ;; (if (tasks:need-server run-id)(tasks:start-and-wait-for-server tdbdat run-id 10)) + + (let ((sighand (lambda (signum) + ;; (signal-mask! signum) ;; to mask or not? seems to cause issues in exiting + (set! *time-to-exit* #t) + (print "Received signal " signum ", cleaning up before exit. Please wait...") + (let ((th1 (make-thread (lambda () + ;; (let ((tdbdat (tasks:open-db))) + (rmt:tasks-set-state-given-param-key task-key "killed") ;; ) + (print "Killed by signal " signum ". Exiting") + (thread-sleep! 3) + (exit)))) + (th2 (make-thread (lambda () + (thread-sleep! 5) + (debug:print 0 *default-log-port* "Done") + (exit 4))))) + (thread-start! th2) + (thread-start! th1) + (thread-join! th2))))) + (set-signal-handler! signal/int sighand) + (set-signal-handler! signal/term sighand)) + + ;; force the starting of a server -- removed BB 17ww28 - no longer needed. + ;;(debug:print 0 *default-log-port* "waiting on server...") + ;;(server:start-and-wait *toppath*) + + (runs:set-megatest-env-vars run-id inkeys: keys inrunname: runname) ;; these may be needed by the launching process + (set! runconf (if (common:file-exists? runconfigf) + (setup-env-defaults runconfigf run-id *already-seen-runconfig-info* keyvals target) + (begin + (debug:print 0 *default-log-port* "WARNING: You do not have a run config file: " runconfigf) + #f))) + + (if (not test-patts) ;; first time in - adjust testpatt + (set! test-patts (common:args-get-testpatt runconf))) + ;; if test-patts is #f at this point there is something wrong and we need to bail out + (if (not test-patts) + (begin + (debug:print 0 *default-log-port* "WARNING: there is no test pattern for this run. Exiting now.") + (exit 0))) + + (if (args:get-arg "-tagexpr") + (begin + (set! allowed-tests (string-join (runs:get-tests-matching-tags (args:get-arg "-tagexpr")) ",")) + (debug:print-info 0 *default-log-port* "filtering initial test list with tagexpr: " (args:get-arg "-tagexpr") " => " allowed-tests) + ));; tests will be ANDed with this list + + ;; register this run in monitor.db + (rmt:tasks-add "run-tests" user target runname test-patts task-key) ;; params) + (rmt:tasks-set-state-given-param-key task-key "running") + + (common:telemetry-log "run-tests" + payload: + `( (target . ,target) + (run-name . ,runname) + (test-patts . ,test-patts) ) ) + + + ;; Now generate all the tests lists + (set! all-tests-registry (tests:get-all)) ;; hash of testname => path-to-test + (set! all-test-names (hash-table-keys all-tests-registry)) + ;; filter first for allowed-tests (from -tagexpr) then for test-patts. + (set! test-names (tests:filter-test-names + (if allowed-tests + (tests:filter-test-names all-test-names allowed-tests) + all-test-names) + test-patts)) + + ;; I think seeding required-tests with all test-names makes sense but lack analysis to back that up. + + ;; NEW STRATEGY HERE: + ;; 1. fill required tests with test-patts + ;; 2. scan testconfigs and if waitons, itemwait, itempatt calc prior test test-patt + ;; 3. repeat until all deps propagated + + ;; any tests with direct mention in test-patts can be added to required + ;;(set! required-tests (lset-intersection equal? (string-split test-patts ",") all-test-names)) + (set! required-tests (tests:filter-test-names all-test-names test-patts)) + ;; + ;; (set! required-tests (lset-intersection equal? test-names all-test-names)) + + ;; look up all tests matching the comma separated list of globs in + ;; test-patts (using % as wildcard) + + ;; (set! test-names (delete-duplicates (tests:get-valid-tests *toppath* test-patts))) + (debug:print-info 0 *default-log-port* "tests search path: " (string-intersperse (tests:get-tests-search-path *configdat*) " ")) + (debug:print-info 0 *default-log-port* "all tests: " (string-intersperse (sort all-test-names string<) " ")) + (debug:print-info 0 *default-log-port* "test names: " (string-intersperse (sort test-names string<) " ")) + (debug:print-info 0 *default-log-port* "required tests: " (string-intersperse (sort required-tests string<) " ")) + + ;; on the first pass or call to run-tests set FAILS to NOT_STARTED if + ;; -keepgoing is specified + (if (eq? *passnum* 0) + (begin + ;; Is this still necessary? I think not. Unreachable tests are marked as such and + ;; should not cause problems here. + ;; + ;; have to delete test records where NOT_STARTED since they can cause -keepgoing to + ;; get stuck due to becoming inaccessible from a failed test. I.e. if test B depends + ;; on test A but test B reached the point on being registered as NOT_STARTED and test + ;; A failed for some reason then on re-run using -keepgoing the run can never complete. + ;; + ;; (rmt:general-call 'delete-tests-in-state run-id "NOT_STARTED") + + ;; Now convert anything in allow-auto-rerun to NOT_STARTED + ;; + (for-each + (lambda (state-status) + (let* ((ss-lst (string-split-fields "/" state-status #:infix)) + (state (if (> (length ss-lst) 0)(car ss-lst) #f)) + (status (if (> (length ss-lst) 1)(cadr ss-lst) #f))) + (rmt:set-tests-state-status run-id test-names state status "NOT_STARTED" status))) + ;; list of state/status pairs separated by spaces + (string-split (or (configf:lookup *configdat* "setup" "allow-auto-rerun") ""))))) + + ;; Ensure all tests are registered in the test_meta table + (runs:update-all-test_meta #f) + + ;; run the run prehook if there are no tests yet run for this run: + ;; + (runs:run-pre-hook run-id) + ;; mark all test launced flag as false in the meta table + (rmt:set-var (conc "lunch-complete-" run-id) "no") + (rmt:set-run-state-status run-id "new" "n/a") + ;; now add non-directly referenced dependencies (i.e. waiton) + ;;====================================================================== + ;; refactoring this block into tests:get-full-data + ;; + ;; What happended, this code is now duplicated in tests!? + ;; + ;;====================================================================== + + (if (not (null? test-names)) ;; BEGIN test-names loop + (let loop ((hed (car test-names)) ;; NOTE: This is the main loop that iterates over the test-names + (tal (cdr test-names))) ;; 'return-procs tells the config reader to prep running system but return a proc + (debug:print-info 4 *default-log-port* "\n\ntestpatt elaboration loop => hed="hed " tal="tal" test-patts="test-patts" test-names="test-names) + (change-directory *toppath*) ;; PLEASE OPTIMIZE ME!!! I think this should be a no-op but there are several places where change-directories could be happening. + (setenv "MT_TEST_NAME" hed) ;; + (let*-values (((waitons waitors config) (tests:get-waitons hed all-tests-registry)) + + ;; NOTE: Have the config - can extract [waitons] section + + ((hed-mode) + (let ((m (config-lookup config "requirements" "mode"))) + (if m (map string->symbol (string-split m)) '(normal)))) + ((hed-itemized-waiton) ;; are items in hed waiting on items of waiton? + (not (null? (lset-intersection eq? hed-mode '(itemmatch itemwait))))) + ) + (debug:print-info 8 *default-log-port* "waitons: " waitons) + ;; check for hed in waitons => this would be circular, remove it and issue an + ;; error + (if (or (member hed waitons) + (member hed waitors)) + (begin + (debug:print-error 0 *default-log-port* "test " hed " has listed itself as a waiton or waitor, please correct this!") + (set! waitons (filter (lambda (x)(not (equal? x hed))) waitons)) + (set! waitors (filter (lambda (x)(not (equal? x hed))) waitors)))) + + ;; (items (items:get-items-from-config config))) + (if (not (hash-table-ref/default test-records hed #f)) ;; waiton-tconfig below will be #f until that test is visted here at least once + (hash-table-set! test-records ;; BB: we are doing a manual make-tests:testqueue + hed (vector hed ;; 0 ;; testname + config ;; 1 + waitons ;; 2 + (config-lookup config "requirements" "priority") ;; priority 3 + (tests:get-items config) ;; 4 ;; expand the [items] and or [itemstable] into explict items + #f ;; itemsdat 5 + #f ;; spare - used for item-path + waitors ;; + ))) + ;; update waitors-upon here + (for-each + (lambda (waiton) + (let* ((current-waitors-upon (hash-table-ref/default waitors-upon waiton '()))) + (debug:print-info 8 *default-log-port* " current-waiters-upon["waiton"] is "current-waitors-upon ) + (when (not (member hed current-waitors-upon)) + (debug:print-info 8 *default-log-port* " current-waiters-upon["waiton"] << "hed ) + (hash-table-set! waitors-upon waiton (cons hed current-waitors-upon))))) + (if (list? waitons) waitons '())) + (debug:print-info 8 *default-log-port* " process waitons&waitors of "hed": "(delete-duplicates (append waitons waitors))) + (for-each + (lambda (waiton) + (if (and waiton (not (member waiton test-names))) + (let* ((waitors-in-testpatt (runs:testpatts-mention-waitors-upon? test-patts (hash-table-ref/default waitors-upon waiton '()))) + (waiton-record (hash-table-ref/default test-records waiton #f)) + (waiton-tconfig (if waiton-record (vector-ref waiton-record 1) #f)) + (waiton-itemized (and waiton-tconfig + (or (hash-table-ref/default waiton-tconfig "items" #f) + (hash-table-ref/default waiton-tconfig "itemstable" #f)))) + (itemmaps (tests:get-itemmaps config)) ;; (configf:lookup config "requirements" "itemmap")) + (new-test-patts (tests:extend-test-patts test-patts hed waiton itemmaps hed-itemized-waiton))) + (debug:print-info 0 *default-log-port* "Test " waiton " has " (if waiton-record "a" "no") " waiton-record and" (if waiton-itemized " " " no ") "items") + ;; need to account for test-patt here, if I am test "a", selected with a test-patt of "hed/b%" + ;; and we are waiting on "waiton" we need to add "waiton/,waiton/b%" to test-patt + ;; is this satisfied by merely appending "/" to the waiton name added to the list? + ;; + ;; This approach causes all of the items in an upstream test to be run + ;; if we have this waiton already processed once we can analzye it for extending + ;; tests to be run, since we can't properly process waitons unless they have been + ;; initially added we add them again to be processed on second round AND add the hed + ;; back in to also be processed on second round + (if waiton-tconfig ;; BB: waiter should be in test-patts as well as the waiton have a tconfig. + (if waiton-itemized + (if waitors-in-testpatt + (begin + (debug:print-info 0 *default-log-port* "New test patts: " new-test-patts ", prev test patts: " test-patts) + (set! test-names (cons waiton test-names)) ;; need to process this one, only add once the waiton tconfig read + (set! required-tests (cons (conc waiton "/") required-tests)) + (set! test-patts new-test-patts)) + (begin + (debug:print-info 0 *default-log-port* "Waitor(s) not yet on testpatt for " waiton ", setting up to re-process it") + (set! tal (append (cons waiton tal)(list hed))))) + (begin + (debug:print-info 0 *default-log-port* "Adding non-itemized test " waiton " to required-tests") + (set! required-tests (cons waiton required-tests)) + (set! test-patts new-test-patts))) + (begin + (debug:print-info 0 *default-log-port* "No testconfig info yet for " waiton ", setting up to re-process it") + (set! tal (append (cons waiton tal)(list hed))))) ;; (cons (conc waiton "/") required-tests)) + ;; NOPE: didn't work. required needs to be plain test names. Try tacking on to test-patts + ;; - doesn't work + ;; (set! test-patts (conc test-patts "," waiton "/")) + ;; (set! test-names (cons waiton test-names))))) ;; was an append, now a cons + ))) + (delete-duplicates (append waitons waitors))) + (let ((remtests (delete-duplicates (append waitons tal)))) + (debug:print-info 8 *default-log-port* " remtests are "remtests) + (if (not (null? remtests)) + (begin + ;; (debug:print-info 0 *default-log-port* "Preprocessing continues for " (string-intersperse remtests ", ")) + (loop (car remtests)(cdr remtests)))))))) ;; END test-names loop + + (if (not (null? required-tests)) + (debug:print-info 1 *default-log-port* "Adding \"" (string-intersperse required-tests " ") "\" to the run queue")) + ;; NOTE: these are all parent tests, items are not expanded yet. + (debug:print-info 4 *default-log-port* "test-records=" (hash-table->alist test-records)) + (let ((reglen (configf:lookup *configdat* "setup" "runqueue"))) + (if (> (length (hash-table-keys test-records)) 0) + (let* ((keep-going #t) + (run-queue-retries 5) + #;(th1 (make-thread (lambda () + (handle-exceptions + exn + (begin + (print-call-chain) + (print " message: " ((condition-property-accessor 'exn 'message) exn))) + (runs:run-tests-queue run-id runname test-records keyvals flags test-patts required-tests + (any->number reglen) all-tests-registry))) + "runs:run-tests-queue")) + (th2 (make-thread (lambda () ;; BBQ: why are we visiting ALL runs here? + ;; (rmt:find-and-mark-incomplete-all-runs))))) CAN'T INTERRUPT IT ... + (let ((run-ids (rmt:get-all-run-ids))) + (for-each (lambda (run-id) + (if keep-going + (handle-exceptions + exn + (debug:print 0 *default-log-port* "error in calling find-and-mark-incomplete for run-id " run-id) + (rmt:find-and-mark-incomplete run-id #f)))) ;; ovr-deadtime))) ;; could be root of https://hsdes.intel.com/appstore/article/#/220546828/main -- Title: Megatest jobs show DEAD even though they are still running (1.64/27) + run-ids))) + "runs: mark-incompletes"))) + ;; (thread-start! th1) + (thread-start! th2) + ;; (thread-join! th1) + ;; just do the main stuff in the main thread + (runs:run-tests-queue run-id runname test-records keyvals flags test-patts required-tests + (any->number reglen) all-tests-registry) + (set! keep-going #f) + (thread-join! th2) + ;; if run-count > 0 call, set -preclean and -rerun STUCK/DEAD + (if (> run-count 0) ;; handle reruns + (begin + (if (not (hash-table-ref/default flags "-preclean" #f)) + (hash-table-set! flags "-preclean" #t)) + (if (not (hash-table-ref/default flags "-rerun" #f)) + (hash-table-set! flags "-rerun" "STUCK/DEAD,n/a,ZERO_ITEMS")) + ;; recursive call to self + (runs:run-tests target runname test-patts user flags run-count: (- run-count 1))))) + (debug:print-info 0 *default-log-port* "No tests to run"))) + (debug:print-info 4 *default-log-port* "All done by here") + ;; TODO: try putting post hook call here + ;(if (eq? run-count 0) + ; (begin + ; (debug:print-info 0 *default-log-port* "Calling Post Hook") + ; (debug:print-info 2 *default-log-port* " run-count " run-count) + ; (runs:run-post-hook run-id)) + ; (debug:print-info 2 *default-log-port* "Not calling post hook runcount = " run-count )) + (rmt:tasks-set-state-given-param-key task-key "done") + ;; (sqlite3:finalize! tasks-db) + )) + + +;; loop logic. These are used in runs:run-tests-queue to make it a bit more readable. +;; +;; If reg not full and have items in tal then loop with (car tal)(cdr tal) reg reruns +;; If reg is full (i.e. length >= n +;; loop with (car reg) tal (cdr reg) reruns +;; If tal is empty +;; but have items in reg; loop with (car reg)(cdr reg) '() reruns +;; If reg is empty => all done + +(define (runs:queue-next-hed tal reg n regfull) + (if regfull + (if (null? reg) #f (car reg)) + (if (null? tal) ;; tal is used up, pop from reg + (if (null? reg) #f (car reg)) + (car tal)))) + +(define (runs:queue-next-tal tal reg n regfull) + (if regfull + tal + (if (null? tal) ;; must transfer from reg + (if (null? reg) '() (cdr reg)) + (cdr tal)))) + +(define (runs:queue-next-reg tal reg n regfull) + (if regfull + (if (null? reg) '() (cdr reg)) ;; EXPLORE: reorder (cdr reg) such that looping is more efficient + (if (null? tal) ;; if tal is null and reg not full then '() as reg contents moved to tal + '() + reg))) + +;; this is the list of parameters to the named loop "loop" near the top of runs:run-tests-queue, look around line 1216 +;; +(define (runs:loop-values tal reg reglen regfull reruns) + (list (runs:queue-next-hed tal reg reglen regfull) ;; hed + (runs:queue-next-tal tal reg reglen regfull) ;; tal + (runs:queue-next-reg tal reg reglen regfull) ;; reg + reruns)) ;; reruns + +;; objective - iterate thru tests +;; => want to prioritize tests we haven't seen before +;; => sometimes need to squeeze things in (added to reg) +;; => review of a previously seen test is higher priority of never visited test +;; reg - list of previously visited tests +;; tal - list of never visited tests +;; prefer next hed to be from reg than tal. + +(define runs:nothing-left-in-queue-count 0) + +;;====================================================================== +;; runs:expand-items is called by runs:run-tests-queue +;;====================================================================== +;; +;; return value of runs:expand-items is passed back to runs-tests-queue and is fed to named loop with this signature: +;; (let loop ((hed (car sorted-test-names)) +;; (tal (cdr sorted-test-names)) +;; (reg '()) ;; registered, put these at the head of tal +;; (reruns '())) +(define (runs:expand-items hed tal reg reruns regfull newtal jobgroup max-concurrent-jobs run-id waitons item-path testmode test-record can-run-more items runname tconfig reglen test-registry test-records itemmaps) + (let* ((loop-list (list hed tal reg reruns)) + (prereqs-not-met (let ((res (rmt:get-prereqs-not-met run-id waitons hed item-path mode: testmode itemmaps: itemmaps))) + (if (list? res) + res + (begin + (debug:print 0 *default-log-port* + "ERROR: rmt:get-prereqs-not-met returned non-list!\n" + " res=" res " run-id=" run-id " waitons=" waitons " hed=" hed " item-path=" item-path " testmode=" testmode " itemmaps=" itemmaps) + '())))) + (have-itemized (not (null? (lset-intersection eq? testmode '(itemmatch itemwait))))) + ;; (prereqs-not-met (mt:lazy-get-prereqs-not-met run-id waitons item-path mode: testmode itemmap: itemmap)) + (fails (runs:calc-fails prereqs-not-met)) + (prereq-fails (runs:calc-prereq-fail prereqs-not-met)) + (non-completed (runs:calc-not-completed prereqs-not-met)) + (runnables (runs:calc-runnable prereqs-not-met)) + (unexpanded-prereqs + (filter (lambda (testname) + (let* ((test-rec (hash-table-ref test-records testname)) + (items (tests:testqueue-get-items test-rec))) + ;;(BB> "HEY " testname "=>"items) + (or (procedure? items)(eq? items 'have-procedure)))) + waitons)) + + + ) + (debug:print-info 4 *default-log-port* "START OF INNER COND #2 " + "\n can-run-more: " can-run-more + "\n testname: " hed + "\n prereqs-not-met: " (runs:pretty-string prereqs-not-met) + "\n non-completed: " (runs:pretty-string non-completed) + "\n prereq-fails: " (runs:pretty-string prereq-fails) + "\n fails: " (runs:pretty-string fails) + "\n testmode: " testmode + "\n (member 'toplevel testmode): " (member 'toplevel testmode) + "\n (null? non-completed): " (null? non-completed) + "\n reruns: " reruns + "\n items: " items + "\n can-run-more: " can-run-more) + + (cond + ;; all prereqs met, fire off the test + ;; or, if it is a 'toplevel test and all prereqs not met are COMPLETED then launch + + ((and (not (member 'toplevel testmode)) + (member (hash-table-ref/default test-registry (db:test-make-full-name hed item-path) 'n/a) + '(DONOTRUN removed CANNOTRUN))) ;; *common:cant-run-states-sym*) ;; '(COMPLETED KILLED WAIVED UNKNOWN INCOMPLETE)) ;; try to catch repeat processing of COMPLETED tests here + (debug:print-info 4 *default-log-port* "cond branch - " "ei-1") + (debug:print-info 1 *default-log-port* "Test " hed " set to \"" (hash-table-ref test-registry (db:test-make-full-name hed item-path)) "\". Removing it from the queue") + (if (or (not (null? tal)) + (not (null? reg))) + (runs:loop-values tal reg reglen regfull reruns) + (begin + (debug:print-info 0 *default-log-port* "Nothing left in the queue!") + ;; If get here twice then we know we've tried to expand all items + ;; since there must be a logic issue with the handling of loops in the + ;; items expand phase we will brute force an exit here. + (if (> runs:nothing-left-in-queue-count 2) + (begin + (debug:print 0 *default-log-port* "WARNING: this condition is triggered when there were no items to expand and nothing to run. Please check your run for completeness") + (exit 0)) + (set! runs:nothing-left-in-queue-count (+ runs:nothing-left-in-queue-count 1))) + #f))) + + ;; desired result of below cond branch: + ;; we want to expand items in our test of interest (hed) in the following cases: + ;; case 1 - mode is itemmatch or itemwait: + ;; - all prereq tests have been expanded + ;; - at least one prereq's items have completed + ;; case 2 - mode is toplevel + ;; - prereqs are completed. + ;; - or no prereqs can complete + ;; case 3 - mode not specified + ;; - prereqs are completed and passed (we could consider removing "and passed" -- it would change behavior from current) + ((or (null? prereqs-not-met) + (and (member 'toplevel testmode) + (null? non-completed))) + (debug:print-info 4 *default-log-port* "cond branch - " "ei-2") + (debug:print-info 4 *default-log-port* "runs:expand-items: (or (null? prereqs-not-met) (and (member 'toplevel testmode)(null? non-completed)))") + (let ((test-name (tests:testqueue-get-testname test-record))) + (setenv "MT_TEST_NAME" test-name) ;; + (setenv "MT_RUNNAME" runname) + (runs:set-megatest-env-vars run-id inrunname: runname) ;; these may be needed by the launching process + (let ((items-list (items:get-items-from-config tconfig))) + (if (list? items-list) + (begin + (if (null? items-list) + (let ((test-id (rmt:get-test-id run-id test-name "")) + (num-items (rmt:test-toplevel-num-items run-id test-name))) + (if (and test-id + (not (> num-items 0))) + (mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "ZERO_ITEMS" "Failed to run due to failed prerequisites")))) + (tests:testqueue-set-items! test-record items-list) + (list hed tal reg reruns)) + (begin + (debug:print-error 0 *default-log-port* "The proc from reading the items table did not yield a list - please report this") + (exit 1)))))) + + ((and (null? fails) + (null? prereq-fails) + (not (null? non-completed))) + (debug:print-info 4 *default-log-port* "cond branch - " "ei-3") + (let* ((allinqueue (map (lambda (x)(if (string? x) x (db:test-get-testname x))) + (append newtal reruns))) + ;; prereqstrs is a list of test names as strings that are prereqs for hed + (prereqstrs (delete-duplicates (map (lambda (x)(if (string? x) x (db:test-get-testname x))) + prereqs-not-met))) + ;; a prereq that is not found in allinqueue will be put in the notinqueue list + ;; + ;; (notinqueue (filter (lambda (x) + ;; (not (member x allinqueue))) + ;; prereqstrs)) + (give-up #f)) + + ;; We can get here when a prereq has not been run due to *it* having a prereq that failed. + ;; We need to use this to dequeue this item as CANNOTRUN + ;; + (if (member 'toplevel testmode) ;; '(toplevel)) ;; NOTE: this probably should be (member 'toplevel testmode) + (for-each (lambda (prereq) + (if (eq? (hash-table-ref/default test-registry prereq 'justfine) 'CANNOTRUN) + (set! give-up #t))) + prereqstrs)) + + (if (and give-up + (not (and (null? tal)(null? reg)))) + (let ((trimmed-tal (mt:discard-blocked-tests run-id hed tal test-records)) + (trimmed-reg (mt:discard-blocked-tests run-id hed reg test-records))) + (debug:print 1 *default-log-port* "WARNING: test " hed " has discarded prerequisites, removing it from the queue") + + (let ((test-id (rmt:get-test-id run-id hed ""))) + (if test-id (mt:test-set-state-status-by-id run-id test-id "COMPLETED" "PREQ_DISCARDED" "Failed to run due to discarded prerequisites"))) + + (if (and (null? trimmed-tal) + (null? trimmed-reg)) + #f + (runs:loop-values trimmed-tal trimmed-reg reglen regfull reruns) + )) + (list (car newtal)(append (cdr newtal) reg) '() reruns)))) + + ((and (null? fails) ;; have not-started tests, but unable to run them. everything looks completed with no prospect of unsticking something that is stuck. we should mark hed as moribund and exit or continue if there are more tests to consider + (null? prereq-fails) + (null? non-completed)) + (debug:print-info 4 *default-log-port* "cond branch - " "ei-4") + (if (runs:can-keep-running? hed 20) + (begin + (runs:inc-cant-run-tests hed) + (debug:print-info 0 *default-log-port* "no fails in prerequisites for " hed " but also none running, keeping " hed " for now. Try count: " (hash-table-ref/default *seen-cant-run-tests* hed 0)) ;; + ;; getting here likely means the system is way overloaded, kill a full minute before continuing + (thread-sleep! 60) ;; TODO: gate by normalized server load > 1.0 (maxload config thing) + ;; num-retries code was here + ;; we use this opportunity to move contents of reg to tal + (list (car newtal)(append (cdr newtal) reg) '() reruns)) ;; an issue with prereqs not yet met? + (begin + (debug:print-info 1 *default-log-port* "no fails in prerequisites for " hed " but nothing seen running in a while, dropping test " hed " from the run queue") + (let ((test-id (rmt:get-test-id run-id hed ""))) + (if test-id (mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "TIMED_OUT" "Nothing seen running in a while."))) + (runs:loop-values tal reg reglen regfull reruns) + ))) + + ((and + (or (not (null? fails)) + (not (null? prereq-fails))) + (member 'normal testmode)) + (debug:print-info 4 *default-log-port* "cond branch - " "ei-5") + (debug:print-info 1 *default-log-port* "test " hed " (mode=" testmode ") has failed prerequisite(s); " + (string-intersperse (map (lambda (t)(conc (db:test-get-testname t) ":" (db:test-get-state t)"/"(db:test-get-status t))) fails) ", ") + ", removing it from to-do list") + (let ((test-id (rmt:get-test-id run-id hed ""))) + (if test-id + (if (not (null? prereq-fails)) + (mt:test-set-state-status-by-id-unless-completed run-id test-id "COMPLETED" "PREQ_DISCARDED" "Failed to run due to prior failed prerequisites") + (mt:test-set-state-status-by-id-unless-completed run-id test-id "COMPLETED" "PREQ_FAIL" "Failed to run due to failed prerequisites")))) + ;; (debug:print 4 *default-log-port*"BB> set PREQ_FAIL on "hed) + ;; (mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "PREQ_FAIL" "Failed to run due to failed prerequisites")))) ;; BB: this works, btu equivalent for itemwait mode does not work. + (if (or (not (null? reg))(not (null? tal))) + (begin + (hash-table-set! test-registry hed 'CANNOTRUN) + (runs:loop-values tal reg reglen regfull (cons hed reruns)) + ) + #f)) ;; #f flags do not loop + + ((and (not (null? fails))(member 'toplevel testmode)) + (debug:print-info 4 *default-log-port* "cond branch - " "ei-6") + (if (or (not (null? reg))(not (null? tal))) + (list (car newtal)(append (cdr newtal) reg) '() reruns) + #f)) + ((null? runnables) + (debug:print-info 4 *default-log-port* "cond branch - " "ei-7") + #f) ;; if we get here and non-completed is null then it is all over. + (else + (debug:print-info 4 *default-log-port* "cond branch - " "ei-8") + (debug:print 0 *default-log-port* "WARNING: FAILS or incomplete tests maybe preventing completion of this run. Watch for issues with test " hed ", continuing for now") + (list (car newtal)(cdr newtal) reg reruns))))) + +(define (runs:mixed-list-testname-and-testrec->list-of-strings inlst) + (if (null? inlst) + '() + (map (lambda (t) + (cond + ((vector? t) + (let ((test-name (db:test-get-testname t)) + (item-path (db:test-get-item-path t)) + (test-state (db:test-get-state t)) + (test-status (db:test-get-status t))) + (conc test-name (if (equal? item-path "") "" "/") item-path ":" test-state "/" test-status))) + ((string? t) + t) + (else + (conc t)))) + inlst))) + + +;; hed tal reg reruns reglen regfull test-record runname test-name item-path jobgroup max-concurrent-jobs run-id waitons item-path testmode test-patts required-tests test-registry registry-mutex flags keyvals run-info newtal all-tests-registry itemmaps) +(define (runs:process-expanded-tests runsdat testdat) + ;; unroll the contents of runsdat and testdat (due to ongoing refactoring). + (debug:print 2 *default-log-port* "runs:process-expanded-tests; testdat:" ) + (debug:print 2 *default-log-port* (with-output-to-string + (lambda () (pp (runs:testdat->alist testdat) )))) + (let* ((hed (runs:testdat-hed testdat)) + (tal (runs:testdat-tal testdat)) + (reg (runs:testdat-reg testdat)) + (reruns (runs:testdat-reruns testdat)) + (test-name (runs:testdat-test-name testdat)) + (item-path (runs:testdat-item-path testdat)) + (jobgroup (runs:testdat-jobgroup testdat)) + (waitons (runs:testdat-waitons testdat)) + (item-path (runs:testdat-item-path testdat)) + (testmode (runs:testdat-testmode testdat)) + (newtal (runs:testdat-newtal testdat)) + (itemmaps (runs:testdat-itemmaps testdat)) + (test-record (runs:testdat-test-record testdat)) + (prereqs-not-met (runs:testdat-prereqs-not-met testdat)) + + (reglen (runs:dat-reglen runsdat)) + (regfull (runs:dat-regfull runsdat)) + (runname (runs:dat-runname runsdat)) + (max-concurrent-jobs (runs:dat-max-concurrent-jobs runsdat)) + (run-id (runs:dat-run-id runsdat)) + (test-patts (runs:dat-test-patts runsdat)) + (required-tests (runs:dat-required-tests runsdat)) + (test-registry (runs:dat-test-registry runsdat)) + (registry-mutex (runs:dat-registry-mutex runsdat)) + (flags (runs:dat-flags runsdat)) + (keyvals (runs:dat-keyvals runsdat)) + (run-info (runs:dat-run-info runsdat)) + (all-tests-registry (runs:dat-all-tests-registry runsdat)) + (run-limits-info (runs:dat-can-run-more-tests runsdat)) + ;; (runs:can-run-more-tests run-id jobgroup max-concurrent-jobs)) ;; look at the test jobgroup and tot jobs running + (have-resources (car run-limits-info)) + (num-running (list-ref run-limits-info 1)) + (num-running-in-jobgroup(list-ref run-limits-info 2)) + (max-concurrent-jobs (list-ref run-limits-info 3)) + (job-group-limit (list-ref run-limits-info 4)) + ;; (prereqs-not-met (rmt:get-prereqs-not-met run-id waitons hed item-path mode: testmode itemmaps: itemmaps)) + ;; (prereqs-not-met (mt:lazy-get-prereqs-not-met run-id waitons item-path mode: testmode itemmap: itemmap)) + (fails (if (list? prereqs-not-met) ;; TODO: rename fails to failed-prereqs + (runs:calc-fails prereqs-not-met) + (begin + (debug:print-error 0 *default-log-port* "prereqs-not-met is not a list! " prereqs-not-met) + '()))) + (non-completed (filter (lambda (x) ;; remove hed from not completed list, duh, of course it is not completed! + (not (equal? x hed))) + (runs:calc-not-completed prereqs-not-met))) + (loop-list (list hed tal reg reruns)) + ;; configure the load runner + (numcpus (common:get-num-cpus #f)) + (maxload (string->number (or (configf:lookup *configdat* "jobtools" "maxload") "3.0"))) ;; use a non-number string to disable + (maxhomehostload (string->number (or (configf:lookup *configdat* "jobtools" "maxhomehostload") "2.0"))) ;; use a non-number string to disable + (waitdelay (string->number (or (configf:lookup *configdat* "jobtools" "waitdelay") "60")))) + (debug:print-info 4 *default-log-port* "have-resources: " have-resources " prereqs-not-met: (" + (string-intersperse + (map (lambda (t) + (if (vector? t) + (conc (db:test-get-state t) "/" (db:test-get-status t)) + (conc " WARNING: t is not a vector=" t ))) + prereqs-not-met) + ", ") ") fails: " fails + "\nregistered? " (hash-table-ref/default test-registry (db:test-make-full-name test-name item-path) #f)) + + + + (if (and (not (null? prereqs-not-met)) + (runs:lownoise (conc "waiting on tests " prereqs-not-met hed) 60)) + (debug:print-info 2 *default-log-port* "waiting on tests; " (string-intersperse (runs:mixed-list-testname-and-testrec->list-of-strings prereqs-not-met) ", "))) + + ;; Don't know at this time if the test have been launched at some time in the past + ;; i.e. is this a re-launch? + (debug:print-info 4 *default-log-port* "run-limits-info = " run-limits-info) + + (cond ; cond 894- 1067 + + ;; Check item path against item-patts, + ;; + ((not (tests:match test-patts (tests:testqueue-get-testname test-record) item-path required: required-tests)) ;; This test/itempath is not to be run + ;; else the run is stuck, temporarily or permanently + ;; but should check if it is due to lack of resources vs. prerequisites + (debug:print-info 1 *default-log-port* "Skipping " (tests:testqueue-get-testname test-record) " " item-path " as it doesn't match " test-patts) + (if (or (not (null? tal))(not (null? reg))) + (runs:loop-values tal reg reglen regfull reruns) + #f)) + + ;; Register tests + ;; + ((not (hash-table-ref/default test-registry (db:test-make-full-name test-name item-path) #f)) + (debug:print-info 4 *default-log-port* "Pre-registering test " test-name "/" item-path " to create placeholder" ) + ;; always do firm registration now in v1.60 and greater ;; (eq? *transport-type* 'fs) ;; no point in parallel registration if use fs + (let register-loop ((numtries 15)) + (rmt:register-test run-id test-name item-path) + (if (rmt:get-test-id run-id test-name item-path) + (hash-table-set! test-registry (db:test-make-full-name test-name item-path) 'done) + (if (> numtries 0) + (begin + (thread-sleep! 0.5) + (register-loop (- numtries 1))) + (debug:print-error 0 *default-log-port* "failed to register test " (db:test-make-full-name test-name item-path))))) + (if (not (eq? (hash-table-ref/default test-registry (db:test-make-full-name test-name "") #f) 'done)) + (begin + (rmt:register-test run-id test-name "") + (if (rmt:get-test-id run-id test-name "") + (hash-table-set! test-registry (db:test-make-full-name test-name "") 'done)))) + (runs:shrink-can-run-more-tests-count runsdat) ;; DELAY TWEAKER (still needed?) + (if (and (null? tal)(null? reg)) + (list hed tal (append reg (list hed)) reruns) + (list (runs:queue-next-hed tal reg reglen regfull) ;; cannot replace with a call to runs:loop-values as the logic is different for reg + (runs:queue-next-tal tal reg reglen regfull) + ;; NB// Here we are building reg as we register tests + ;; if regfull we must pop the front item off reg + (if regfull + (append (cdr reg) (list hed)) + (append reg (list hed))) + reruns))) + + ;; At this point hed test registration must be completed. + ;; + ((eq? (hash-table-ref/default test-registry (db:test-make-full-name test-name item-path) #f) + 'start) + (debug:print-info 0 *default-log-port* "Waiting on test registration(s): " + (string-intersperse + (filter (lambda (x) + (eq? (hash-table-ref/default test-registry x #f) 'start)) + (hash-table-keys test-registry)) + ", ")) + (thread-sleep! 0.051) + (list hed tal reg reruns)) + + ;; If no resources are available just kill time and loop again + ;; + ((not have-resources) ;; simply try again after waiting a second + (if (runs:lownoise "no resources" 60) + (debug:print-info 1 *default-log-port* "no resources to run new tests, waiting ...")) + ;; Have gone back and forth on this but db starvation is an issue. + ;; wait one second before looking again to run jobs. + (thread-sleep! 1) + ;; could have done hed tal here but doing car/cdr of newtal to rotate tests + (list (car newtal)(cdr newtal) reg reruns)) + + ;; This is the final stage, everything is in place so launch the test + ;; + ((and have-resources + (or (null? prereqs-not-met) + (and (member 'toplevel testmode) ;; 'toplevel) + (null? non-completed) + (not (member 'exclusive testmode))))) + ;; (hash-table-delete! *max-tries-hash* (db:test-make-full-name test-name item-path)) + ;; we are going to reset all the counters for test retries by setting a new hash table + ;; this means they will increment only when nothing can be run + (set! *max-tries-hash* (make-hash-table)) + ;; well, first lets see if cpu load throttling is enabled. If so wait around until the + ;; average cpu load is under the threshold before continuing + (if maxload ;; only gate if maxload is specified + (common:wait-for-cpuload maxload numcpus waitdelay)) + (if maxhomehostload + (common:wait-for-homehost-load maxhomehostload (conc "Waiting for homehost load to drop below normalized value of " maxhomehostload))) + + (run:test run-id run-info keyvals runname test-record flags #f test-registry all-tests-registry) + (runs:incremental-print-results run-id) + (hash-table-set! test-registry (db:test-make-full-name test-name item-path) 'running) + (runs:shrink-can-run-more-tests-count runsdat) ;; DELAY TWEAKER (still needed?) + ;; (thread-sleep! *global-delta*) + (if (or (not (null? tal))(not (null? reg))) + (runs:loop-values tal reg reglen regfull reruns) + #f)) + + ;; must be we have unmet prerequisites + ;; + (else + (debug:print 4 *default-log-port* "FAILS: " fails) + ;; If one or more of the prereqs-not-met are FAIL then we can issue + ;; a message and drop hed from the items to be processed. + ;; (runs:mixed-list-testname-and-testrec->list-of-strings prereqs-not-met) + (if (and (not (null? prereqs-not-met)) + (runs:lownoise (conc "waiting on tests " prereqs-not-met hed) 60)) + (debug:print-info 1 *default-log-port* "waiting on tests; " (string-intersperse + (runs:mixed-list-testname-and-testrec->list-of-strings + prereqs-not-met) ", "))) + (if (or (null? fails) + (member 'toplevel testmode)) + (begin + ;; couldn't run, take a breather + (if (runs:lownoise "Waiting for more work to do..." 60) + (debug:print-info 0 *default-log-port* "Waiting for more work to do...")) + (thread-sleep! 1) + (list (car newtal)(cdr newtal) reg reruns)) + ;; the waiton is FAIL so no point in trying to run hed ever again + (begin + (let ((my-test-id (rmt:get-test-id run-id test-name item-path))) + (mt:test-set-state-status-by-id-unless-completed run-id my-test-id "COMPLETED" "PREQ_FAIL" "Failed to run due to failed prerequisites2")) + + + + (if (or (not (null? reg))(not (null? tal))) + (if (vector? hed) + (begin + (debug:print 1 *default-log-port* "WARNING: Dropping test " test-name "/" item-path + " from the launch list as it has prerequistes that are FAIL") + (let ((test-id (rmt:get-test-id run-id hed ""))) + (if test-id (mt:test-set-state-status-by-id-unless-completed run-id test-id "COMPLETED" "PREQ_FAIL" "Failed to run due to failed prerequisites"))) + (runs:shrink-can-run-more-tests-count runsdat) ;; DELAY TWEAKER (still needed?) + ;; (thread-sleep! *global-delta*) + ;; This next is for the items + + (if (not (null? fails)) + ;;(mt:test-set-state-status-by-testname run-id test-name item-path "NOT_STARTED" "PREQ_FAIL" #f) + (rmt:set-state-status-and-roll-up-items run-id test-name item-path "NOT_STARTED" "PREQ_FAIL" #f) + ;;(mt:test-set-state-status-by-testname run-id test-name item-path "NOT_STARTED" "BLOCKED" #f) + (rmt:set-state-status-and-roll-up-items run-id test-name item-path "NOT_STARTED" "BLOCKED" #f) ) + (hash-table-set! test-registry (db:test-make-full-name test-name item-path) 'removed) + (runs:loop-values tal reg reglen regfull reruns)) + (let ((nth-try (hash-table-ref/default test-registry hed 0))) ;; hed not a vector... + (debug:print 2 *default-log-port* "nth-try("hed")="nth-try) + (cond + ((member "RUNNING" (map db:test-get-state prereqs-not-met)) + (if (runs:lownoise (conc "possible RUNNING prerequistes " hed) 60) + (debug:print 0 *default-log-port* "WARNING: test " hed " has possible RUNNING prerequisites, don't give up on it yet.")) + (thread-sleep! 0.1) + (runs:loop-values tal reg reglen regfull reruns)) + ((or (not nth-try) ;; BB: condition on subsequent tries, condition below fires on first try + (and (number? nth-try) + (< nth-try 2))) + (hash-table-set! test-registry hed (if (number? nth-try) + (+ nth-try 1) + 0)) + (if (runs:lownoise (conc "not removing test " hed) 60) + (debug:print 1 *default-log-port* "WARNING: not removing test " hed " from queue although it may not be runnable due to FAILED prerequisites")) + ;; may not have processed correctly. Could be a race condition in your test implementation? Dropping test " hed) ;; " as it has prerequistes that are FAIL. (NOTE: hed is not a vector)") + (runs:shrink-can-run-more-tests-count runsdat) ;; DELAY TWEAKER (still needed?) + (runs:loop-values newtal reg reglen regfull reruns)) + ((symbol? nth-try) ;; BB: 'done matches here in one case where prereq itemwait failed. This is first "try" + (if (eq? nth-try 'removed) ;; removed is removed - drop it NOW + (if (null? tal) + #f ;; yes, really + (list (car tal)(cdr tal) reg reruns)) + (begin + (if (runs:lownoise (conc "FAILED prerequisites or other issue" hed) 60) + (debug:print 0 *default-log-port* "WARNING: test " hed " has FAILED prerequisites or other issue. Internal state >" nth-try "< will be overridden and we'll retry.")) + ;; was: (mt:test-set-state-status-by-testname run-id test-name item-path "NOT_STARTED" "KEEP_TRYING" #f) + (mt:test-set-state-status-by-testname-unless-completed run-id test-name item-path "COMPLETED" "PREQ_FAIL" #f) + (hash-table-set! test-registry hed 'removed) ;; was 0 + (if (not (and (null? reg) (null? tal))) + (runs:loop-values tal reg reglen regfull reruns) + #f)))) + (else + (if (runs:lownoise (conc "FAILED prerequitests and we tried" hed) 60) + (debug:print 0 *default-log-port* "WARNING: test " hed " has FAILED prerequitests and we've tried at least 10 times to run it. Giving up now.")) + ;; (debug:print 0 *default-log-port* " prereqs: " prereqs-not-met) + (hash-table-set! test-registry hed 'removed) + (mt:test-set-state-status-by-testname run-id test-name item-path "NOT_STARTED" "TEN_STRIKES" #f) + ;; I'm unclear on if this roll up is needed - it may be the root cause of the "all set to FAIL" bug. + (rmt:set-state-status-and-roll-up-items run-id test-name item-path #f "FAIL" #f) ;; treat as FAIL + (list (if (null? tal)(car newtal)(car tal)) + tal + reg + reruns))))) + ;; ELSE: can't drop this - maybe running? Just keep trying + + ;;(if (not (or (not (null? reg))(not (null? tal)))) ;; old experiment + (let ((runable-tests (runs:runable-tests prereqs-not-met))) ;; SUSPICIOUS: Should look at more than just prereqs-not-met? + (if (null? runable-tests) + #f ;; I think we are truly done here + (runs:loop-values newtal reg reglen regfull reruns))) + ;;) ;;from old experiment + ) ;; end if (or (not (null? reg))(not (null? tal))) + )))))) + +;; scan a list of tests looking to see if any are potentially runnable +;; +(define (runs:runable-tests tests) + (filter (lambda (t) + (if (not (vector? t)) + t + (let ((state (db:test-get-state t)) + (status (db:test-get-status t))) + (case (string->symbol state) + ((COMPLETED INCOMPLETE) #f) + ((NOT_STARTED) + (if (member status '("TEN_STRIKES" "BLOCKED" "PREQ_FAIL" "ZERO_ITEMS" "PREQ_DISCARDED" "TIMED_OUT" )) + #f + t)) + ((DELETED) #f) + (else t))))) + tests)) + +;; move all the miscellanea into this struct +;; +(defstruct runs:gendat inc-results inc-results-last-update inc-results-fmt run-info runname target) + +(define *runs:general-data* + (make-runs:gendat + inc-results: (make-hash-table) + inc-results-last-update: 0 + inc-results-fmt: "~12a~12a~20a~12a~40a\n" ;; state status time duration test-name item-path + run-info: #f + runname: #f + target: #f + ) + ) + +(define (runs:incremental-print-results run-id) + (let ((curr-sec (current-seconds))) + (if (> (- curr-sec (runs:gendat-inc-results-last-update *runs:general-data*)) 5) ;; at least five seconds since last update + (let* ((run-dat (or (runs:gendat-run-info *runs:general-data*)(rmt:get-run-info run-id))) + (runname (or (runs:gendat-runname *runs:general-data*) + (db:get-value-by-header (db:get-rows run-dat) + (db:get-header run-dat) "runname"))) + (target (or (runs:gendat-target *runs:general-data*)(rmt:get-target run-id))) + (testsdat (rmt:get-tests-for-run run-id "%" '() '() ;; run-id testpatt states statuses + #f #f ;; offset limit + #f ;; not-in + #f ;; sort-by + #f ;; sort-order + #f ;; get full data (not 'shortlist) + (runs:gendat-inc-results-last-update *runs:general-data*) ;; last update time + 'dashboard))) + (if (not (runs:gendat-run-info *runs:general-data*)) + (runs:gendat-run-info-set! *runs:general-data* run-dat)) + (if (not (runs:gendat-runname *runs:general-data*)) + (runs:gendat-runname-set! *runs:general-data* runname)) + (if (not (runs:gendat-target *runs:general-data*)) + (runs:gendat-target-set! *runs:general-data* target)) + (for-each + (lambda (testdat) + (let* ((test-id (db:test-get-id testdat)) + (prevdat (hash-table-ref/default (runs:gendat-inc-results *runs:general-data*) + (conc run-id "," test-id) #f)) + (test-name (db:test-get-testname testdat)) + (item-path (db:test-get-item-path testdat)) + (state (db:test-get-state testdat)) + (status (db:test-get-status testdat)) + (event-time (db:test-get-event_time testdat)) + (duration (db:test-get-run_duration testdat))) + (if (and (not (member state '("DELETED" "REMOTEHOSTSTART" "RUNNING" "LAUNCHED""NOT_STARTED"))) + (not (and prevdat + (equal? state (db:test-get-state prevdat)) + (equal? status (db:test-get-status prevdat))))) + (let ((fmt (runs:gendat-inc-results-fmt *runs:general-data*)) + (dtime (seconds->year-work-week/day-time event-time))) + (if (runs:lownoise "inc-print" 600) + (format #t fmt "State" "Status" "Start Time" "Duration" "Test path")) + ;; (debug:print 0 *default-log-port* "fmt: " fmt " state: " state " status: " status " test-name: " test-name " item-path: " item-path " dtime: " dtime) + ;; (debug:print 0 #f "event-time: " event-time " duration: " duration) + (format #t fmt + state + status + dtime + (seconds->hr-min-sec duration) + (conc "lt/" target "/" runname "/" test-name (if (string-null? item-path) "" (conc "/" item-path)))) + (hash-table-set! (runs:gendat-inc-results *runs:general-data*) (conc run-id "," test-id) testdat))))) + testsdat))) + (runs:gendat-inc-results-last-update-set! *runs:general-data* (- curr-sec 10)))) + +;; every time though the loop increment the test/itempatt val. +;; when the min is > max-allowed and none running then force exit +;; +(define *max-tries-hash* (make-hash-table)) + +;;====================================================================== +;; runs:run-tests-queue is called by runs:run-tests +;;====================================================================== +;; +;; test-records is a hash table testname:item_path => vector < testname testconfig waitons priority items-info ... > +(define (runs:run-tests-queue run-id runname test-records keyvals flags test-patts required-tests reglen-in all-tests-registry) + ;; At this point the list of parent tests is expanded + ;; NB// Should expand items here and then insert into the run queue. + (debug:print 5 *default-log-port* "test-records: " test-records ", flags: " (hash-table->alist flags)) + + ;; Do mark-and-find clean up of db before starting runing of quue + ;; + ;; (rmt:find-and-mark-incomplete) + + (let* ((run-info (rmt:get-run-info run-id)) + (tests-info (mt:get-tests-for-run run-id #f '() '())) ;; qryvals: "id,testname,item_path")) + (sorted-test-names (tests:sort-by-priority-and-waiton test-records)) + (test-registry (make-hash-table)) + (registry-mutex (make-mutex)) + (num-retries 0) + (max-retries (config-lookup *configdat* "setup" "maxretries")) + (max-concurrent-jobs (configf:lookup-number *configdat* "setup" "max_concurrent_jobs" default: 50)) + (reglen (if (number? reglen-in) reglen-in 1)) + (last-time-incomplete (- (current-seconds) 900)) ;; force at least one clean up cycle + (last-time-some-running (current-seconds)) + ;; (tdbdat (tasks:open-db)) + (runsdat (make-runs:dat + ;; hed: hed + ;; tal: tal + ;; reg: reg + ;; reruns: reruns + reglen: reglen + regfull: #f ;; regfull + ;; test-record: test-record + runname: runname + ;; test-name: test-name + ;; item-path: item-path + ;; jobgroup: jobgroup + max-concurrent-jobs: max-concurrent-jobs + run-id: run-id + ;; waitons: waitons + ;; testmode: testmode + test-patts: test-patts + required-tests: required-tests + test-registry: test-registry + registry-mutex: registry-mutex + flags: flags + keyvals: keyvals + run-info: run-info + ;; newtal: newtal + all-tests-registry: all-tests-registry + ;; itemmaps: itemmaps + ;; prereqs-not-met: (rmt:get-prereqs-not-met run-id waitons hed item-path mode: testmode itemmaps: itemmaps) + ;; can-run-more-tests: (runs:can-run-more-tests run-id jobgroup max-concurrent-jobs) ;; look at the test jobgroup and tot jobs running + ))) + + ;; Initialize the test-registery hash with tests that already have a record + ;; convert state to symbol and use that as the hash value + (for-each (lambda (trec) + (let ((id (db:test-get-id trec)) + (tn (db:test-get-testname trec)) + (ip (db:test-get-item-path trec)) + (st (db:test-get-state trec))) + (if (not (equal? st "DELETED")) + (hash-table-set! test-registry (db:test-make-full-name tn ip) (string->symbol st))))) + tests-info) + (set! max-retries (if (and max-retries (string->number max-retries))(string->number max-retries) 100)) + + (let loop ((hed (car sorted-test-names)) + (tal (cdr sorted-test-names)) + (reg '()) ;; registered, put these at the head of tal + (reruns '())) + + (runs:incremental-print-results run-id) + + (if (not (null? reruns))(debug:print-info 4 *default-log-port* "reruns=" reruns)) + + ;; Here we mark any old defunct tests as incomplete. Do this every fifteen minutes + ;; moving this to a parallel thread and just run it once. + ;; + (if (> (current-seconds)(+ last-time-incomplete 900)) + (begin + (set! last-time-incomplete (current-seconds)) + ;; (rmt:find-and-mark-incomplete-all-runs) + )) + + ;; (print "Top of loop, hed=" hed ", tal=" tal " ,reruns=" reruns) + (let* ((test-record (hash-table-ref test-records hed)) + (test-name (tests:testqueue-get-testname test-record)) + (tconfig (tests:testqueue-get-testconfig test-record)) + (jobgroup (config-lookup tconfig "test_meta" "jobgroup")) + (testmode (let ((m (config-lookup tconfig "requirements" "mode"))) + (if m (map string->symbol (string-split m)) '(normal)))) + (itemmaps (tests:get-itemmaps tconfig)) ;; (configf:lookup tconfig "requirements" "itemmap")) + (priority (tests:testqueue-get-priority test-record)) + (itemdat (tests:testqueue-get-itemdat test-record)) ;; itemdat can be a string, list or #f + (items (tests:testqueue-get-items test-record)) + (item-path (item-list->path itemdat)) + (tfullname (db:test-make-full-name test-name item-path)) + ;; these are hard coded item-item waits test/item-path => test/item-path2 ... + (extra-waits (let* ((section (configf:get-section (tests:testqueue-get-testconfig test-record) "waitons")) + (myextra (alist-ref tfullname section equal?))) + (if myextra + (let ((extras (string-split (car myextra)))) + (if (runs:lownoise (conc tfullname "extra-waitons" tfullname) 60) + (debug:print-info 0 *default-log-port* "HAVE EXTRA WAITONS for test " tfullname ": " myextra)) + (for-each + (lambda (extra) + ;; (debug:print 0 *default-log-port* "FYI: extra = " extra " reruns = " reruns) + (let ((basetestname (car (string-split extra "/")))) + #;(if (not (member extra tal)) + (set! reruns (append tal (list extra)))) + (if (not (member basetestname tal)) + (set! reruns (append tal (list basetestname)))) + )) + extras) + extras) + '()))) + (waitons (delete-duplicates (append (tests:testqueue-get-waitons test-record) extra-waits) equal?)) + (newtal (append tal (list hed))) + (regfull (>= (length reg) reglen)) + (num-running (rmt:get-count-tests-running-for-run-id run-id)) + (testdat (make-runs:testdat + hed: hed + tal: tal + reg: reg + reruns: reruns + test-record: test-record + test-name: test-name + item-path: item-path + jobgroup: jobgroup + waitons: waitons + testmode: testmode + newtal: newtal + itemmaps: itemmaps + ;; prereqs-not-met: prereqs-not-met + ))) + (runs:dat-regfull-set! runsdat regfull) + + (if (> num-running 0) + (set! last-time-some-running (current-seconds))) + + (if (> (current-seconds)(+ last-time-some-running (or (configf:lookup *configdat* "setup" "give-up-waiting") 36000))) + (hash-table-set! *max-tries-hash* tfullname (+ (hash-table-ref/default *max-tries-hash* tfullname 0) 1))) + ;; (debug:print 0 *default-log-port* "max-tries-hash: " (hash-table->alist *max-tries-hash*)) + + ;; Ensure all top level tests get registered. This way they show up as "NOT_STARTED" on the dashboard + ;; and it is clear they *should* have run but did not. + (if (not (hash-table-ref/default test-registry (db:test-make-full-name test-name "") #f)) + (begin + (rmt:register-test run-id test-name "") + (hash-table-set! test-registry (db:test-make-full-name test-name "") 'done))) + + ;; Fast skip of tests that are already "COMPLETED" - NO! Cannot do that as the items may not have been expanded yet :( + ;; + (if (member (hash-table-ref/default test-registry tfullname #f) + '(DONOTRUN removed)) ;; *common:cant-run-states-sym*) ;; '(COMPLETED KILLED WAIVED UNKNOWN INCOMPLETE)) + (begin + (if (runs:lownoise (conc "been marked do not run " tfullname) 60) + (debug:print-info 0 *default-log-port* "Skipping test " tfullname " as it has been marked do not run due to being completed or not runnable")) + (if (or (not (null? tal))(not (null? reg))) + (loop (runs:queue-next-hed tal reg reglen regfull) + (runs:queue-next-tal tal reg reglen regfull) + (runs:queue-next-reg tal reg reglen regfull) + reruns)))) + ;; (loop (car tal)(cdr tal) reg reruns)))) + + (runs:incremental-print-results run-id) + (debug:print 4 *default-log-port* "TOP OF LOOP => " + "test-name: " test-name + "\n hed: " hed + "\n tal: " tal + "\n reg: " reg + "\n test-record " test-record + "\n itemdat: " itemdat + "\n items: " items + "\n item-path: " item-path + "\n waitons: " waitons + "\n num-retries: " num-retries + "\n reruns: " reruns + "\n regfull: " regfull + "\n reglen: " reglen + "\n length reg: " (length reg) + ) + + ;; check for hed in waitons => this would be circular, remove it and issue an + ;; error + (if (member test-name waitons) + (begin + (debug:print-error 0 *default-log-port* "test " test-name " has listed itself as a waiton, please correct this!") + (set! waiton (filter (lambda (x)(not (equal? x hed))) waitons)))) + + (cond + + ;; We want to catch tests that have waitons that are NOT in the queue and discard them IFF + ;; they have been through the wringer 10 or more times + ((and (list? waitons) + (not (null? waitons)) + (> (hash-table-ref/default *max-tries-hash* tfullname 0) 10) + (not (null? (filter + number? + (map (lambda (waiton) + (if (and (not (member waiton tal)) ;; this waiton is not in the list to be tried to run + (not (member waiton reruns))) + 1 + #f)) + waitons))))) ;; could do this more elegantly with a marker.... + (debug:print-info 4 *default-log-port* "cond branch - " "rtq-1") + (debug:print 0 *default-log-port* "WARNING: Marking test " tfullname " as not runnable. It is waiting on tests that cannot be run. Giving up now.") + (hash-table-set! test-registry tfullname 'removed)) + + ;; items is #f then the test is ok to be handed off to launch (but not before) + ;; + ((not items) + (debug:print-info 4 *default-log-port* "cond branch - " "rtq-2") + (debug:print-info 4 *default-log-port* "OUTER COND: (not items)") + (if (and (not (tests:match test-patts (tests:testqueue-get-testname test-record) item-path required: required-tests)) + (not (null? tal))) + (loop (car tal)(cdr tal) reg reruns)) + (runs:testdat-prereqs-not-met-set! testdat (rmt:get-prereqs-not-met run-id waitons hed item-path mode: testmode itemmaps: itemmaps)) + (runs:dat-can-run-more-tests-set! runsdat (runs:can-run-more-tests runsdat run-id jobgroup max-concurrent-jobs)) ;; look at the test jobgroup and tot jobs running + (let ((loop-list (runs:process-expanded-tests runsdat testdat))) + (if loop-list (apply loop loop-list)))) + + ;; items processed into a list but not came in as a list been processed + ;; + ((and (list? items) ;; thus we know our items are already calculated + (not itemdat)) ;; and not yet expanded into the list of things to be done + (debug:print-info 4 *default-log-port* "cond branch - " "rtq-3") + (debug:print-info 4 *default-log-port* "OUTER COND: (and (list? items)(not itemdat))") + ;; Must determine if the items list is valid. Discard the test if it is not. + (if (and (list? items) + (> (length items) 0) + (and (list? (car items)) + (> (length (car items)) 0)) + (debug:debug-mode 1)) + (debug:print 2 *default-log-port* (map (lambda (row) + (conc (string-intersperse + (map (lambda (varval) + (string-intersperse varval "=")) + row) + " ") + "\n")) + items))) + + (let* ((items-in-testpatt + (filter + (lambda (my-itemdat) + (tests:match test-patts hed (item-list->path my-itemdat) )) + ;; was: (tests:match test-patts hed (item-list->path my-itemdat) required: required-tests)) + items) )) + (if (null? items-in-testpatt) + (let ((test-id (rmt:get-test-id run-id test-name ""))) + (debug:print-info 0 *default-log-port* "Test " (tests:testqueue-get-testname test-record) " is itemized but has no items matching test pattern -- marking status ZERO_ITEMS") + (if test-id + (mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "ZERO_ITEMS" "This test has no items which match test pattern."))) + + (for-each (lambda (my-itemdat) + (let* ((new-test-record (let ((newrec (make-tests:testqueue))) + (vector-copy! test-record newrec) + newrec)) + (my-item-path (item-list->path my-itemdat)) + + (newtestname (db:test-make-full-name hed my-item-path))) ;; test names are unique on testname/item-path + (tests:testqueue-set-items! new-test-record #f) + (tests:testqueue-set-itemdat! new-test-record my-itemdat) + (tests:testqueue-set-item_path! new-test-record my-item-path) + (hash-table-set! test-records newtestname new-test-record) + (set! tal (append tal (list newtestname))))) ;; since these are itemized create new test names testname/itempath + items-in-testpatt))) + + + + ;; At this point we have possibly added items to tal but all must be handed off to + ;; INNER COND logic. I think loop without rotating the queue + ;; (loop hed tal reg reruns)) + ;; (let ((newtal (append tal (list hed)))) ;; We should discard hed as it has been expanded into it's items? Yes, but only if this *is* an itemized test + ;; (loop (car newtal)(cdr newtal) reg reruns) + (if (null? tal) + #f + (loop (car tal)(cdr tal) reg reruns))) + + ;; if items is a proc then need to run items:get-items-from-config, get the list and loop + ;; - but only do that if resources exist to kick off the job + ;; EXPAND ITEMS + ((or (procedure? items)(eq? items 'have-procedure)) + (debug:print-info 4 *default-log-port* "cond branch - " "rtq-4") + (let ((can-run-more (runs:can-run-more-tests runsdat run-id jobgroup max-concurrent-jobs))) + (if (and (list? can-run-more) + (car can-run-more)) + (let ((loop-list (runs:expand-items hed tal reg reruns regfull newtal jobgroup max-concurrent-jobs run-id waitons item-path testmode test-record can-run-more items runname tconfig reglen test-registry test-records itemmaps))) ;; itemized test expanded here + (if loop-list + (apply loop loop-list) + (debug:print-info 4 *default-log-port* " -- Can't expand hed="hed) + ) + ) + ;; if can't run more just loop with next possible test + (loop (car newtal)(cdr newtal) reg reruns)))) + + ;; this case should not happen, added to help catch any bugs + ((and (list? items) itemdat) + (debug:print-info 4 *default-log-port* "cond branch - " "rtq-5") + (debug:print-error 0 *default-log-port* "Should not have a list of items in a test and the itemspath set - please report this") + (exit 1)) + ((not (null? reruns)) + (debug:print-info 4 *default-log-port* "cond branch - " "rtq-6") + (let* ((newlst (tests:filter-non-runnable run-id tal test-records)) ;; i.e. not FAIL, WAIVED, INCOMPLETE, PASS, KILLED, + (junked (lset-difference equal? tal newlst))) + (debug:print-info 4 *default-log-port* "full drop through, if reruns is less than 100 we will force retry them, reruns=" reruns ", tal=" tal) + (if (< num-retries max-retries) + (set! newlst (append reruns newlst))) + (set! num-retries (+ num-retries 1)) + ;; (thread-sleep! (+ 1 *global-delta*)) + (if (not (null? newlst)) + ;; since reruns have been tacked on to newlst create new reruns from junked + (loop (car newlst)(cdr newlst) reg (delete-duplicates junked))))) + ((not (null? tal)) + (debug:print-info 4 *default-log-port* "cond branch - " "rtq-7") + (debug:print-info 4 *default-log-port* "I'm pretty sure I shouldn't get here.")) + ((not (null? reg)) ;; could we get here with leftovers? + (debug:print-info 4 *default-log-port* "cond branch - " "rtq-8") + (debug:print-info 0 *default-log-port* "Have leftovers!") + (loop (car reg)(cdr reg) '() reruns)) + (else + (debug:print-info 4 *default-log-port* "cond branch - " "rtq-9") + (debug:print-info 4 *default-log-port* "Exiting loop with...\n hed=" hed "\n tal=" tal "\n reruns=" reruns)) + ))) ;; end loop on sorted test names + ;; this is the point where everything is launced and now you can mark the run in metadata table as all launced + (rmt:set-var (conc "lunch-complete-" run-id) "yes") + + ;; now *if* -run-wait we wait for all tests to be done + ;; Now wait for any RUNNING tests to complete (if in run-wait mode) + (thread-sleep! 10) ;; I think there is a race condition here. Let states/statuses settle + (let wait-loop ((num-running (rmt:get-count-tests-running-for-run-id run-id)) + (prev-num-running 0)) + ;; (debug:print-info 13 *default-log-port* "num-running=" num-running ", prev-num-running=" prev-num-running) + (if (and (or (args:get-arg "-run-wait") + (equal? (configf:lookup *configdat* "setup" "run-wait") "yes")) + (> num-running 0)) + (begin + ;; Here we mark any old defunct tests as incomplete. Do this every fifteen minutes + ;; (debug:print 0 *default-log-port* "Got here eh! num-running=" num-running " (> num-running 0) " (> num-running 0)) + (if (> (current-seconds)(+ last-time-incomplete 900)) + (begin + (debug:print-info 0 *default-log-port* "Marking stuck tests as INCOMPLETE while waiting for run " run-id ". Running as pid " (current-process-id) " on " (get-host-name)) + (set! last-time-incomplete (current-seconds)) + (rmt:find-and-mark-incomplete run-id #f))) + (if (not (eq? num-running prev-num-running)) + (debug:print-info 0 *default-log-port* "run-wait specified, waiting on " num-running " tests in RUNNING, REMOTEHOSTSTART or LAUNCHED state at " (time->string (seconds->local-time (current-seconds))))) + (thread-sleep! 5) + ;; (wait-loop (rmt:get-count-tests-running-for-run-id run-id) num-running)))) + (wait-loop (rmt:get-count-tests-running-for-run-id run-id) num-running)))) + ;; LET* ((test-record + ;; we get here on "drop through". All done! + ;; this is moved to runs:run-testes since this function is getting called twice to ensure everthing is completed. + ;; (debug:print-info 0 *default-log-port* "Calling Post Hook") + ;; (runs:run-post-hook run-id) + (debug:print-info 1 *default-log-port* "All tests launched"))) + +(define (runs:calc-fails prereqs-not-met) + (filter (lambda (test) + (and (vector? test) ;; not (string? test)) + (member (db:test-get-state test) '("INCOMPLETE" "COMPLETED")) ;; TODO: pull from *common:stuff...* + (not (member (db:test-get-status test) + '("PASS" "WARN" "CHECK" "WAIVED" "SKIP"))))) + prereqs-not-met)) + +(define (runs:calc-prereq-fail prereqs-not-met) ;; REMOVEME since NOT_STARTED/PREQ_FAIL is now COMPLETED/PREQ_FAIL + (filter (lambda (test) + (and (vector? test) ;; not (string? test)) + (equal? (db:test-get-state test) "NOT_STARTED") + (not (member (db:test-get-status test) + '("n/a" "KEEP_TRYING"))))) + prereqs-not-met)) + +(define (runs:calc-not-completed prereqs-not-met) + (filter + (lambda (t) + (or (not (vector? t)) + (not (member (db:test-get-state t) '("INCOMPLETE" "COMPLETED"))))) + prereqs-not-met)) + +;; (define (runs:calc-not-completed prereqs-not-met) +;; (filter +;; (lambda (t) +;; (or (not (vector? t)) +;; (not (equal? "COMPLETED" (db:test-get-state t))))) +;; prereqs-not-met)) + +(define (runs:calc-runnable prereqs-not-met) + (filter + (lambda (t) + (or (not (vector? t)) + (and (equal? "NOT_STARTED" (db:test-get-state t)) + (member (db:test-get-status t) + '("n/a" "KEEP_TRYING"))) + (and (equal? "RUNNING" (db:test-get-state t))))) ;; account for a test that is running + prereqs-not-met)) + +(define (runs:pretty-string lst) + (map (lambda (t) + (if (not (vector? t)) + (conc t) + (conc (db:test-get-testname t) ":" (db:test-get-state t) "/" (db:test-get-status t)))) + lst)) + +;; parent-test is there as a placeholder for when parent-tests can be run as a setup step +;; +(define (run:test run-id run-info keyvals runname test-record flags parent-test test-registry all-tests-registry) + ;; All these vars might be referenced by the testconfig file reader + (let* ((test-name (tests:testqueue-get-testname test-record)) + (test-waitons (tests:testqueue-get-waitons test-record)) + (test-conf (tests:testqueue-get-testconfig test-record)) + (itemdat (tests:testqueue-get-itemdat test-record)) + (test-path (hash-table-ref all-tests-registry test-name)) ;; (conc *toppath* "/tests/" test-name)) ;; could use tests:get-testconfig here ... + (force (hash-table-ref/default flags "-force" #f)) + (rerun (hash-table-ref/default flags "-rerun" #f)) + (keepgoing (hash-table-ref/default flags "-keepgoing" #f)) + (incomplete-timeout (string->number (or (configf:lookup *configdat* "setup" "incomplete-timeout") "x"))) + (item-path "") + (db #f) + (full-test-name #f)) + + ;; setting itemdat to a list if it is #f + (if (not itemdat)(set! itemdat '())) + (set! item-path (item-list->path itemdat)) + (set! full-test-name (db:test-make-full-name test-name item-path)) + (debug:print-info 4 *default-log-port* + "\nTESTNAME: " full-test-name + "\n test-config: " (hash-table->alist test-conf) + "\n itemdat: " itemdat + ) + (debug:print 2 *default-log-port* "Attempting to launch test " full-test-name) + ;; (setenv "MT_TEST_NAME" test-name) ;; + ;; (setenv "MT_ITEMPATH" item-path) + ;; (setenv "MT_RUNNAME" runname) + (runs:set-megatest-env-vars run-id inrunname: runname testname: test-name itempath: item-path) ;; these may be needed by the launching process + (change-directory *toppath*) + + ;; Here is where the test_meta table is best updated + ;; Yes, another use of a global for caching. Need a better way? + ;; + ;; There is now a single call to runs:update-all-test_meta and this + ;; per-test call is not needed. Given the delicacy of the move to + ;; v1.55 this code is being left in place for the time being. + ;; + (if (not (hash-table-ref/default *test-meta-updated* test-name #f)) + (begin + (hash-table-set! *test-meta-updated* test-name #t) + (runs:update-test_meta test-name test-conf))) + + ;; itemdat => ((ripeness "overripe") (temperature "cool") (season "summer")) + (let* ((new-test-path (string-intersperse (cons test-path (map cadr itemdat)) "/")) + (test-id (rmt:get-test-id run-id test-name item-path)) + (testdat (if test-id (rmt:get-test-info-by-id run-id test-id) #f))) + (if (not testdat) + (let loop () + ;; ensure that the path exists before registering the test + ;; NOPE: Cannot! Don't know yet which disk area will be assigned.... + ;; (system (conc "mkdir -p " new-test-path)) + ;; + ;; (open-run-close tests:register-test db run-id test-name item-path) + ;; + ;; NB// for the above line. I want the test to be registered long before this routine gets called! + ;; + (if (not test-id)(set! test-id (rmt:get-test-id run-id test-name item-path))) + (if (not test-id) + (begin + (debug:print 2 *default-log-port* "WARN: Test not pre-created? test-name=" test-name ", item-path=" item-path ", run-id=" run-id) + (rmt:register-test run-id test-name item-path) + (set! test-id (rmt:get-test-id run-id test-name item-path)))) + (debug:print-info 4 *default-log-port* "test-id=" test-id ", run-id=" run-id ", test-name=" test-name ", item-path=\"" item-path "\"") + (set! testdat (rmt:get-test-info-by-id run-id test-id)) + (if (not testdat) + (begin + (debug:print-info 0 *default-log-port* "WARNING: server is overloaded, trying again in one second") + (thread-sleep! 1) + (loop))))) + (if (not testdat) ;; should NOT happen + (debug:print-error 0 *default-log-port* "failed to get test record for test-id " test-id)) + (set! test-id (db:test-get-id testdat)) + (if (common:file-exists? test-path) + (change-directory test-path) + (begin + (debug:print-error 0 *default-log-port* "test run path not created before attempting to run the test. Perhaps you are running -remove-runs at the same time?") + (change-directory *toppath*))) + (case (if force ;; (args:get-arg "-force") + 'NOT_STARTED + (if testdat + (string->symbol (test:get-state testdat)) + 'failed-to-insert)) + ((failed-to-insert) + (debug:print-error 0 *default-log-port* "Failed to insert the record into the db")) + ((NOT_STARTED COMPLETED DELETED INCOMPLETE) + (let ((runflag #f)) + (cond + ;; -force, run no matter what + (force (set! runflag #t)) + ;; NOT_STARTED, run no matter what + ((member (test:get-state testdat) '("DELETED" "NOT_STARTED" "INCOMPLETE"))(set! runflag #t)) + ;; not -rerun and PASS, WARN or CHECK, do no run + ((and (or (not rerun) + keepgoing) + ;; Require to force re-run for COMPLETED or *anything* + PASS,WARN or CHECK + (or (member (test:get-status testdat) '("PASS" "WARN" "CHECK" "SKIP" "WAIVED")) + (member (test:get-state testdat) '("COMPLETED")))) + (debug:print-info 2 *default-log-port* "running test " test-name "/" item-path " suppressed as it is " (test:get-state testdat) " and " (test:get-status testdat)) + (hash-table-set! test-registry full-test-name 'DONOTRUN) ;; COMPLETED) + (set! runflag #f)) + ;; -rerun and status is one of the specifed, run it + ((and rerun + (let* ((rerunlst (string-split rerun ",")) + (must-rerun (member (test:get-status testdat) rerunlst))) + (debug:print-info 3 *default-log-port* "-rerun list: " rerun ", test-status: " (test:get-status testdat)", must-rerun: " must-rerun) + must-rerun)) + (debug:print-info 2 *default-log-port* "Rerun forced for test " test-name "/" item-path) + (set! runflag #t)) + ;; -keepgoing, do not rerun FAIL + ((and keepgoing + (member (test:get-status testdat) '("FAIL"))) + (set! runflag #f)) + ((and (not rerun) + (member (test:get-status testdat) '("FAIL" "n/a"))) + (set! runflag #t)) + (else (set! runflag #f))) + (debug:print 4 *default-log-port* "RUNNING => runflag: " runflag " STATE: " (test:get-state testdat) " STATUS: " (test:get-status testdat)) + (if (not runflag) + (if (not parent-test) + (if (runs:lownoise (conc "not starting test" full-test-name) 60) + (debug:print 1 *default-log-port* "NOTE: Not starting test " full-test-name " as it is state \"" (test:get-state testdat) + "\" and status \"" (test:get-status testdat) "\", use -rerun \"" (test:get-status testdat) + "\" or -force to override"))) + ;; NOTE: No longer be checking prerequisites here! Will never get here unless prereqs are + ;; already met. + ;; This would be a great place to do the process-fork + ;; + (let ((skip-test #f) + (skip-check (configf:get-section test-conf "skip"))) + (cond + ;; Have to check for skip conditions. This one skips if there are same-named tests + ;; currently running + ((and skip-check + (configf:lookup test-conf "skip" "prevrunning")) + ;; run-ids = #f means *all* runs + (let ((running-tests (rmt:get-tests-for-runs-mindata #f full-test-name '("RUNNING" "REMOTEHOSTSTART" "LAUNCHED") '() #f))) + (if (not (null? running-tests)) ;; have to skip + (set! skip-test "Skipping due to previous tests running")))) + ((and skip-check + (configf:lookup test-conf "skip" "fileexists")) + (if (common:file-exists? (configf:lookup test-conf "skip" "fileexists")) + (set! skip-test (conc "Skipping due to existance of file " (configf:lookup test-conf "skip" "fileexists"))))) + ((and skip-check + (configf:lookup test-conf "skip" "rundelay")) + ;; run-ids = #f means *all* runs + (let* ((numseconds (common:hms-string->seconds (configf:lookup test-conf "skip" "rundelay"))) + (running-tests (rmt:get-tests-for-runs-mindata #f full-test-name '("RUNNING" "REMOTEHOSTSTART" "LAUNCHED") '() #f)) + (completed-tests (rmt:get-tests-for-runs-mindata #f full-test-name '("COMPLETED" "INCOMPLETE") '("PASS" "FAIL" "ABORT") #f)) ;; ironically INCOMPLETE is same as COMPLETED in this contex + (last-run-times (map db:mintest-get-event_time completed-tests)) + (time-since-last (- (current-seconds) (if (null? last-run-times) 0 (common:max last-run-times))))) + (if (or (not (null? running-tests)) ;; have to skip if test is running + (> numseconds time-since-last)) + (set! skip-test (conc "Skipping due to previous test run less than " (configf:lookup test-conf "skip" "rundelay") " ago")))))) + + (if skip-test + (begin + (mt:test-set-state-status-by-id run-id test-id "COMPLETED" "SKIP" skip-test) + (debug:print-info 1 *default-log-port* "SKIPPING Test " full-test-name " due to " skip-test)) + ;; + ;; Here the test is handed off to launch.scm for launch-test to complete the launch process + ;; + (if (not (launch-test test-id run-id run-info keyvals runname test-conf test-name test-path itemdat flags)) + (begin + (print "ERROR: Failed to launch the test. Exiting as soon as possible") + (set! *globalexitstatus* 1) ;; + (process-signal (current-process-id) signal/kill)))))))) + ((KILLED) + (debug:print 1 *default-log-port* "NOTE: " full-test-name " is already running or was explictly killed, use -force to launch it.") + (hash-table-set! test-registry (db:test-make-full-name test-name test-path) 'DONOTRUN)) ;; KILLED)) + ((LAUNCHED REMOTEHOSTSTART RUNNING) + (debug:print 2 *default-log-port* "NOTE: " test-name " is already running")) + ;; (if (> (- (current-seconds)(+ (db:test-get-event_time testdat) + ;; (db:test-get-run_duration testdat))) + ;; (or incomplete-timeout + ;; 6000)) ;; i.e. no update for more than 6000 seconds + ;; (begin + ;; (debug:print 0 *default-log-port* "WARNING: Test " test-name " appears to be dead. Forcing it to state INCOMPLETE and status STUCK/DEAD") + ;; (tests:test-set-status! run-id test-id "INCOMPLETE" "STUCK/DEAD" "" #f)) + ;; ;; (tests:test-set-status! test-id "INCOMPLETE" "STUCK/DEAD" "" #f)) + ;; (debug:print 2 *default-log-port* "NOTE: " test-name " is already running"))) + (else + (debug:print-error 0 *default-log-port* "Failed to launch test " full-test-name ". Unrecognised state " (test:get-state testdat)) + (case (string->symbol (test:get-state testdat)) + ((COMPLETED INCOMPLETE) + (hash-table-set! test-registry (db:test-make-full-name test-name test-path) 'DONOTRUN)) + (else + (hash-table-set! test-registry (db:test-make-full-name test-name test-path) 'DONOTRUN)))))))) + +;;====================================================================== +;; END OF NEW STUFF +;;====================================================================== + +(define (get-dir-up-n dir . params) + (let ((dparts (string-split dir "/")) + (count (if (null? params) 1 (car params)))) + (conc "/" (string-intersperse + (take dparts (- (length dparts) count)) + "/")))) + +(define (runs:recursive-delete-with-error-msg real-dir) + (if (> (system (conc "rm -rf " real-dir)) 0) + (begin + ;; FAILED, possibly due to permissions, do chmod a+rwx then try one more time + (system (conc "chmod -R a+rwx " real-dir)) + (if (> (system (conc "rm -rf " real-dir)) 0) + (debug:print-error 0 *default-log-port* "There was a problem removing " real-dir " with rm -f"))))) + +(define (runs:safe-delete-test-dir real-dir) + ;; first delete all sub-directories + (directory-fold + (lambda (f x) + (let ((fullname (conc real-dir "/" f))) + (if (directory? fullname)(runs:recursive-delete-with-error-msg fullname))) + (+ 1 x)) + 0 real-dir) + ;; then files other than *testdat.db* + (directory-fold + (lambda (f x) + (let ((fullname (conc real-dir "/" f))) + (if (not (string-search (regexp "testdat.db") f)) + (runs:recursive-delete-with-error-msg fullname))) + (+ 1 x)) + 0 real-dir) + ;; then the entire directory + (runs:recursive-delete-with-error-msg real-dir)) + +;; cleanup often needs to remove all but the last N runs per target +;; +;; target-patts a1/b1/c1,a2/b2/c2 ... +;; +;; This will fail if called with empty target or a bad target (i.e. missing or extra fields) +;; +(define (runs:get-hash-by-target target-patts runpatt) + (let* ((targets (string-split target-patts ",")) + (keys (rmt:get-keys)) + (res-ht (make-hash-table))) ;; target -> ( runrecord1 runrecord2 ... ) + (for-each + (lambda (target-patt) + (let ((runs (rmt:simple-get-runs runpatt #f #f target-patt))) + (for-each + (lambda (run) + (let ((target (simple-run-target run))) + (hash-table-set! res-ht target (cons run (hash-table-ref/default res-ht target '()))))) + runs))) + targets) + res-ht)) + +;; delete runs older than X (weeks, days, months years etc.) +;; delete redundant runs within a target - N is the input +;; delete redundant runs within a target IFF older than given date/time AND keep at least N +;; +(define (runs:remove-all-but-last-n-runs-per-target target-patts runpatt num-to-keep #!key (actions '(print))) + (let* ((runs-ht (runs:get-hash-by-target target-patts runpatt)) + (age (if (args:get-arg "-age")(common:hms-string->seconds (args:get-arg "-age")) #f)) + (age-mark (if age (- (current-seconds) age) (+ (current-seconds) 86400))) + (precmd (or (args:get-arg "-precmd") ""))) + (print "Actions: " actions) + (for-each + (lambda (target) + (let* ((runs (hash-table-ref runs-ht target)) + (sorted (sort runs (lambda (a b)(< (simple-run-event_time a)(simple-run-event_time b))))) + (to-remove (let* ((len (length sorted)) + (trim-amt (- len num-to-keep))) + (if (> trim-amt 0) + (take sorted trim-amt) + '())))) + (hash-table-set! runs-ht target to-remove) + (print target ":") + (for-each + (lambda (run) + (let ((remove (member run to-remove (lambda (a b) + (eq? (simple-run-id a) + (simple-run-id b)))))) + (if (and age (> (simple-run-event_time run) age-mark)) + (print "Skipping handling of " target "/" (simple-run-runname run) " as it is younger than " (args:get-arg "-age")) + (for-each + (lambda (action) + (case action + ((print) + (print " " (simple-run-runname run) + " " (time->string (seconds->local-time (simple-run-event_time run)) "WW%V.%u %H:%M:%S") + " " (if remove "REMOVE" ""))) + ((remove-runs) + (if remove (system (conc precmd " megatest -remove-runs -target " target " -runname " (simple-run-runname run) " -testpatt %" + (if (member 'kill-runs actions) ;; if kill-runs is specified then set -kill-wait to 0 + " -kill-wait 0" + ""))))) + ((archive) + (if remove (system (conc precmd " megatest -archive save-remove -target " target " -runname " (simple-run-runname run) " -testpatt %")))) + ((kill-runs) + (if remove (system (conc precmd " megatest -kill-runs -target " target " -runname " (simple-run-runname run) " -testpatt %")))) + )) + actions)))) + sorted))) + ;; (print "Sorted: " (map simple-run-event_time sorted)) + ;; (print "Remove: " (map simple-run-event_time to-remove)))) + (hash-table-keys runs-ht)) + runs-ht)) + +;; (define (runs:remove-all-but-last-n-runs-per-target target-patts runpatt num-to-keep) +;; (let ((data (runs:get-all-but-most-recent-n-per-target target-patts runpatt num-to-keep))) +;; (for-each +;; (lambda (target) +;; (let ((runs-to-remove (hash-table-ref data target ))) +;; (for-each +;; (lambda (run) +;; (print "megatest -remove-runs -target " target " -runname " (simple-run-runname run) " -testpatt %")) +;; runs-to-remove))) +;; (hash-table-keys data)))) + +;; Remove runs +;; fields are passing in through +;; action: +;; 'remove-runs +;; 'set-state-status +;; +;; NB// should pass in keys? +;; +(define (runs:operate-on action target runnamepatt testpatt #!key (state #f)(status #f)(new-state-status #f)(mode #f)(options '())) + (common:clear-caches) ;; clear all caches + (let* ((db #f) + ;; (tdbdat (tasks:open-db)) + (keys (rmt:get-keys)) + (rundat (mt:get-runs-by-patt keys runnamepatt target)) + (header (vector-ref rundat 0)) + (runs (vector-ref rundat 1)) + (states (if state (string-split state ",") '())) + (statuses (if status (string-split status ",") '())) + (state-status (if (string? new-state-status) (string-split new-state-status ",") '(#f #f))) + (rp-mutex (make-mutex)) + (bup-mutex (make-mutex)) + (keep-records (args:get-arg "-keep-records"))) ;; used in conjunction with -remove-runs to keep the records, TODO: consolidate this with "mode". + + (let* ((write-access-actions '(remove-runs set-state-status archive run-wait kill-runs)) + (dbfile (conc *toppath* "/megatest.db")) + (readonly-mode (not (file-write-access? dbfile)))) + (when (and readonly-mode + (member action write-access-actions)) + (debug:print-error 0 *default-log-port* "megatest.db is readonly. Cannot proceed with action ["action"] in which write-access isrequired .") + (exit 1))) + + (debug:print-info 4 *default-log-port* "runs:operate-on => Header: " header " action: " action " new-state-status: " new-state-status) + (if (> 2 (length state-status)) + (begin + (debug:print-error 0 *default-log-port* "the parameter to -set-state-status is a comma delimited string. E.g. COMPLETED,FAIL") + (exit))) + (for-each + (lambda (run) + (let ((runkey (string-intersperse (map (lambda (k) + (db:get-value-by-header run header k)) keys) "/")) + (dirs-to-remove (make-hash-table)) + (proc-get-tests (lambda (run-id) + (mt:get-tests-for-run run-id + testpatt states statuses + not-in: #f + sort-by: (case action + ((remove-runs) 'rundir) + (else 'event_time)))))) + (let* ((run-id (db:get-value-by-header run header "id")) + (run-state (db:get-value-by-header run header "state")) + (run-name (db:get-value-by-header run header "runname")) + (tests (if (not (equal? run-state "locked")) + (proc-get-tests run-id) + '())) + (lasttpath "/does/not/exist/I/hope") + (worker-thread #f)) + (debug:print-info 4 *default-log-port* "runs:operate-on run=" run ", header=" header) + (if (not (null? tests)) + (begin + (case action + ((kill-runs) + (tasks:kill-runner target run-name "%") + (debug:print 1 *default-log-port* "Killing tests for run: " runkey " " (db:get-value-by-header run header "runname")) + ) + ((remove-runs) + ;; (if (tasks:need-server run-id)(tasks:start-and-wait-for-server tdbdat run-id 10)) + ;; seek and kill in flight -runtests with % as testpatt here + ;; (if (equal? testpatt "%") + (tasks:kill-runner target run-name testpatt) + ;; (debug:print 0 *default-log-port* "not attempting to kill any run launcher processes as testpatt is " testpatt)) + (debug:print 1 *default-log-port* "Removing tests for run: " runkey " " (db:get-value-by-header run header "runname"))) + ((set-state-status) + ;; (if (tasks:need-server run-id)(tasks:start-and-wait-for-server tdbdat run-id 10)) + (debug:print 1 *default-log-port* "Modifying state and staus for tests for run: " runkey " " (db:get-value-by-header run header "runname"))) + ((print-run) + (debug:print 1 *default-log-port* "Printing info for run " runkey ", run=" run ", tests=" tests ", header=" header) + action) + ((run-wait) + (debug:print 1 *default-log-port* "Waiting for run " runkey ", run=" runnamepatt " to complete")) + ((archive) + (debug:print 1 *default-log-port* "Archiving/restoring (" (args:get-arg "-archive") ") data for run: " runkey " " (db:get-value-by-header run header "runname")) + (set! worker-thread + (make-thread + (lambda () + (case (string->symbol (args:get-arg "-archive")) + ((save save-remove keep-html) + (archive:run-bup (args:get-arg "-archive") run-id run-name tests rp-mutex bup-mutex)) + ((restore) + (archive:bup-restore (args:get-arg "-archive") run-id run-name tests rp-mutex bup-mutex)) + (else + (debug:print-error 0 *default-log-port* "unrecognised sub command to -archive. Run \"megatest\" to see help") + (exit)))) + "archive-bup-thread")) + (thread-start! worker-thread)) + (else + (debug:print-info 0 *default-log-port* "action not recognised " action))) + + ;; actions that operate on one test at a time can be handled below + ;; + (let ((sorted-tests (filter + vector? + (sort tests (lambda (a b)(let ((dira ;; (rmt:sdb-qry 'getstr + (db:test-get-rundir a)) ;; ) ;; (filedb:get-path *fdb* (db:test-get-rundir a))) + (dirb ;; (rmt:sdb-qry 'getstr + (db:test-get-rundir b))) ;; ) ;; ((filedb:get-path *fdb* (db:test-get-rundir b)))) + (if (and (string? dira)(string? dirb)) + (> (string-length dira)(string-length dirb)) + #f)))))) + (toplevel-retries (make-hash-table)) ;; try three times to loop through and remove top level tests + (test-retry-time (make-hash-table)) + (backgrounded-remove-status (make-hash-table)) + (backgrounded-remove-last-visit (make-hash-table)) + (backgrounded-remove-result (make-hash-table)) + (allow-run-time (string->number (or (args:get-arg "-kill-wait") "10")))) ;; seconds to allow for killing tests before just brutally killing 'em + (let loop ((test (car sorted-tests)) + (tal (cdr sorted-tests))) + (let* ((test-id (db:test-get-id test)) + (new-test-dat (rmt:get-test-info-by-id run-id test-id))) + (if (not new-test-dat) + (begin + (debug:print-error 0 *default-log-port* "We have a test-id of " test-id " but no record was found. NOTE: No locking of records is done between processes, do not simultaneously remove the same run from two processes!") + (if (not (null? tal)) + (loop (car tal)(cdr tal)))) + (let* ((item-path (db:test-get-item-path new-test-dat)) + (test-name (db:test-get-testname new-test-dat)) + (run-dir ;;(filedb:get-path *fdb* + ;; (rmt:sdb-qry 'getid + (db:test-get-rundir new-test-dat)) ;; ) ;; run dir is from the link tree + (has-subrun (and (subrun:subrun-test-initialized? run-dir) + (not (subrun:subrun-removed? run-dir)))) + (test-state (db:test-get-state new-test-dat)) + (test-status (db:test-get-status new-test-dat)) + (test-fulln (db:test-get-fullname new-test-dat)) + (uname (db:test-get-uname new-test-dat)) + (toplevel-with-children (and (db:test-get-is-toplevel test) + (> (rmt:test-toplevel-num-items run-id test-name) 0)))) + + (case action + ((remove-runs) + ;; if the test is a toplevel-with-children issue an error and do not remove + (cond + (toplevel-with-children + (debug:print 0 *default-log-port* "WARNING: skipping removal of " test-fulln " with run-id " run-id " as it has sub tests") + (hash-table-set! toplevel-retries test-fulln (+ (hash-table-ref/default toplevel-retries test-fulln 0) 1)) + (if (> (hash-table-ref toplevel-retries test-fulln) 3) + (if (not (null? tal)) + (loop (car tal)(cdr tal))) ;; no else clause - drop it if no more in queue and > 3 tries + (let ((newtal (append tal (list test)))) + (loop (car newtal)(cdr newtal))))) ;; loop with test still in queue + (has-subrun + ;; + (let ((last-visit (hash-table-ref/default backgrounded-remove-last-visit test-fulln 0)) + (now (current-seconds)) + (rem-status (hash-table-ref/default backgrounded-remove-status test-fulln 'not-started))) + (case rem-status + ((not-started) + (debug:print 0 *default-log-port* "WARNING: postponing removal of " test-fulln " with run-id " run-id " as it has a subrun") + (hash-table-set! backgrounded-remove-status test-fulln 'started) + (hash-table-set! backgrounded-remove-last-visit test-fulln (current-seconds)) + (common:send-thunk-to-background-thread + (lambda () + (let* ((subrun-remove-succeeded + (subrun:remove-subrun run-dir keep-records))) + (hash-table-set! backgrounded-remove-result test-fulln subrun-remove-succeeded) + (hash-table-set! backgrounded-remove-status test-fulln 'done))) + name: (conc "remove-subrun:"test-fulln)) + + ;; send to back of line, loop + (let ((newtal (append tal (list test)))) + (loop (car newtal)(cdr newtal))) + ) + ((started) + ;; if last visit was within last second, sleep 1 second + (if (< (- now last-visit) 1.0) + (thread-sleep! 1.0)) + (hash-table-set! backgrounded-remove-last-visit test-fulln (current-seconds)) + ;; send to back of line, loop + (let ((newtal (append tal (list test)))) + (loop (car newtal)(cdr newtal))) + ) + ((done) + ;; drop this one; if remaining, loop, else finish + (hash-table-set! backgrounded-remove-last-visit test-fulln (current-seconds)) + (let ((subrun-remove-succeeded (hash-table-ref/default backgrounded-remove-result test-fulln 'exception))) + (cond + ((eq? subrun-remove-succeeded 'exception) + (let* ((logfile (subrun:get-log-path run-dir "remove"))) + (debug:print 0 *default-log-port* "ERROR: removing subrun of of " test-fulln " with run-id " run-id " ; see logfile @ "logfile)) + (if (not (null? tal)) + (loop (car tal)(cdr tal)))) + (subrun-remove-succeeded + (debug:print 0 *default-log-port* "Now removing of " test-fulln " with run-id " run-id " since subrun was removed.") + ;;(runs:remove-test-directory new-test-dat mode) ;; let normal case handle this. it will go thru loop again as non-subrun + (let ((newtal (append tal (list test)))) + (loop (car newtal)(cdr newtal)))) + (else + (let* ((logfile (subrun:get-log-path run-dir "remove"))) + (debug:print 0 *default-log-port* "WARNING: removal of subrun failed. Please check "logfile" for details.")) + ;; send to back of line, loop (will not match has-subrun next time through) + (if (not (null? tal)) + (loop (car tal)(cdr tal)))))) + ) + ) ; end case rem-status + ) ; end let + ); end cond has-subrun + + (else + ;; BB - TODO - consider backgrounding to threads to delete tests (work below) + (debug:print-info 0 *default-log-port* "test: " test-name " itest-state: " test-state) + (if (member test-state (list "RUNNING" "LAUNCHED" "REMOTEHOSTSTART" "KILLREQ")) + (begin + (if (not (hash-table-ref/default test-retry-time test-fulln #f)) + (begin + ;; want to set to REMOVING BUT CANNOT do it here? + (hash-table-set! test-retry-time test-fulln (current-seconds)))) + (if (> (- (current-seconds)(hash-table-ref test-retry-time test-fulln)) allow-run-time) + ;; This test is not in a correct state for cleaning up. Let's try some graceful shutdown steps first + ;; Set the test to "KILLREQ" and wait five seconds then try again. Repeat up to five times then give + ;; up and blow it away. + (begin + (debug:print 0 *default-log-port* "WARNING: could not gracefully remove test " test-fulln ", tried to kill it to no avail. Forcing state to FAILEDKILL and continuing") + (mt:test-set-state-status-by-id run-id (db:test-get-id test) "FAILEDKILL" "n/a" #f) + (thread-sleep! 1)) + (begin + (mt:test-set-state-status-by-id run-id (db:test-get-id test) "KILLREQ" "n/a" #f) + (thread-sleep! 1))) + ;; NOTE: This is suboptimal as the testdata will be used later and the state/status may have changed ... + (if (null? tal) + (loop new-test-dat tal) + (loop (car tal)(append tal (list new-test-dat))))) + (begin + (runs:remove-test-directory new-test-dat mode) ;; 'remove-all) + (if (not (null? tal)) + (loop (car tal)(cdr tal))))))) + (rmt:update-run-stats run-id (rmt:get-raw-run-stats run-id))) + ((kill-runs) + ;; RUNNING -> KILLREQ + ;; LAUNCHED,RUNNING,REMOTEHOSTSTART -> NOT STARTED + (cond + ((and has-subrun (member test-state (list "RUNNING" "LAUNCHED" "REMOTEHOSTSTART" "KILLREQ"))) + (common:send-thunk-to-background-thread + (lambda () + (let* ((subrun-remove-succeeded + (subrun:kill-subrun run-dir keep-records))) + #t))) + (if (not (null? tal)) + (loop (car tal)(cdr tal))) + ) + ((member test-state (list "RUNNING" "LAUNCHED" "REMOTEHOSTSTART" "KILLREQ")) + (debug:print 1 *default-log-port* "INFO: issuing killreq to test "test-fulln) + (mt:test-set-state-status-by-id run-id (db:test-get-id test) "KILLREQ" "n/a" #f) + (if (not (null? tal)) + (loop (car tal)(cdr tal)))) + ((and (member test-status '("PREQ_FAIL" "PREQ_DISCARDED" "BLOCKED" "ZERO_ITEMS" "KEEP_TRYING" "TEN_STRIKES" "TIMED_OUT"))) + (rmt:set-state-status-and-roll-up-items run-id (db:test-get-id test) 'foo "NOT_STARTED" "n/a" (conc "kill-run moved from "test-state":"test-status" to NOT_STARTED:n/a")) + ;;(mt:test-set-state-status-by-id run-id (db:test-get-id test) "NOT_STARTED" "n/a" (conc "kill-run moved from "test-state":"test-status" to NOT_STARTED:n/a")) + (if (not (null? tal)) + (loop (car tal)(cdr tal))) + ) + (else + (if (not (null? tal)) + (loop (car tal)(cdr tal))) + ))) + ((set-state-status) + (let* ((new-state (car state-status)) + (new-status (cadr state-status)) + (test-id (db:test-get-id test)) + (test-run-dir (db:test-get-rundir new-test-dat)) + (has-subrun (and (subrun:subrun-test-initialized? test-run-dir) + (not (subrun:subrun-removed? test-run-dir))))) + (when has-subrun + (common:send-thunk-to-background-thread + (lambda () + (subrun:set-state-status test-run-dir state status new-state-status) + ) + ) + ) + (debug:print-info 2 *default-log-port* "new state " new-state ", new status " new-status ) + (mt:test-set-state-status-by-id run-id test-id new-state new-status #f)) + (if (not (null? tal)) + (loop (car tal)(cdr tal)))) + ((run-wait) + ;; BB TODO - manage has-subrun case + (debug:print-info 2 *default-log-port* "still waiting, " (length tests) " tests still running") + (thread-sleep! 10) + (let ((new-tests (proc-get-tests run-id))) + (if (null? new-tests) + (debug:print-info 1 *default-log-port* "Run completed according to zero tests matching provided criteria.") + (loop (car new-tests)(cdr new-tests))))) + ((archive) + ;; BB TODO - manage has-subrun case + (if (and run-dir (not toplevel-with-children)) + (let ((ddir (conc run-dir "/"))) + (case (string->symbol (args:get-arg "-archive")) + ((save save-remove keep-html) + (if (common:file-exists? ddir) + (debug:print-info 0 *default-log-port* "Estimating disk space usage for " test-fulln ": " (common:get-disk-space-used ddir))))))) + (if (not (null? tal)) + (loop (car tal)(cdr tal)))) + ))) + ) + (if worker-thread (thread-join! worker-thread))) + (common:join-backgrounded-threads)))) + ;; remove the run if zero tests remain + (if (eq? action 'remove-runs) + (let* ((run-id (db:get-value-by-header run header "id")) ;; NB// masks run-id from above? + (remtests (mt:get-tests-for-run run-id #f '("DELETED") '("n/a") not-in: #t))) + (if (null? remtests) ;; no more tests remaining + (let* ((dparts (string-split lasttpath "/")) + (runpath (conc "/" (string-intersperse + (take dparts (- (length dparts) 1)) + "/")))) + (debug:print 1 *default-log-port* "Removing run: " runkey " " (db:get-value-by-header run header "runname") " and related record") + (if (not keep-records) + (begin + (rmt:delete-run run-id) + (rmt:delete-old-deleted-test-records))) + ;; (rmt:set-var "DELETED_TESTS" (current-seconds)) + ;; need to figure out the path to the run dir and remove it if empty + ;; (if (null? (glob (conc runpath "/*"))) + ;; (begin + ;; (debug:print 1 *default-log-port* "Removing run dir " runpath) + ;; (system (conc "rmdir -p " runpath)))) + ))))) + )) + runs) + ;; (sqlite3:finalize! (db:delay-if-busy tdbdat)) + ) + #t) + +(define (runs:remove-test-directory test mode) ;; remove-data-only) + (let* ((run-dir (db:test-get-rundir test)) ;; run dir is from the link tree + (real-dir (if (common:file-exists? run-dir) + ;; (resolve-pathname run-dir) + (common:nice-path run-dir) + #f)) + (clean-mode (or mode 'remove-all)) + (test-id (db:test-get-id test)) + ;; (lock-key (conc "test-" test-id)) + ;; (got-lock (let loop ((lock (rmt:no-sync-get-lock lock-key)) + ;; (expire-time (+ (current-seconds) 30))) ;; give up on getting the lock and steal it after 15 seconds + ;; (if (car lock) + ;; #t + ;; (if (> (current-seconds) expire-time) + ;; (begin + ;; (debug:print-info 0 *default-log-port* "Timed out waiting for a lock to clean test with id " test-id) + ;; (rmt:no-sync-del! lock-key) ;; destroy the lock + ;; (loop (rmt:no-sync-get-lock lock-key) expire-time)) ;; + ;; (begin + ;; (thread-sleep! 1) + ;; (loop (rmt:no-sync-get-lock lock-key) expire-time))))))) + ) + (case clean-mode + ((remove-data-only)(mt:test-set-state-status-by-id (db:test-get-run_id test)(db:test-get-id test) "CLEANING" "LOCKED" #f)) + ((remove-all) (mt:test-set-state-status-by-id (db:test-get-run_id test)(db:test-get-id test) "REMOVING" "LOCKED" #f)) + ((archive-remove) (mt:test-set-state-status-by-id (db:test-get-run_id test)(db:test-get-id test) "ARCHIVE_REMOVING" #f #f))) + (debug:print-info 1 *default-log-port* "Attempting to remove " (if real-dir (conc " dir " real-dir " and ") "") " link " run-dir) + (if (and real-dir + (> (string-length real-dir) 5) + (common:file-exists? real-dir)) ;; bad heuristic but should prevent /tmp /home etc. + (begin ;; let* ((realpath (resolve-pathname run-dir))) + (debug:print-info 1 *default-log-port* "Recursively removing " real-dir) + (if (common:file-exists? real-dir) + (runs:safe-delete-test-dir real-dir) + (debug:print 0 *default-log-port* "WARNING: test dir " real-dir " appears to not exist or is not readable"))) + (if real-dir + (debug:print 0 *default-log-port* "WARNING: directory " real-dir " does not exist") + (debug:print 0 *default-log-port* "WARNING: no real directory corrosponding to link " run-dir ", nothing done"))) + (if (symbolic-link? run-dir) + (begin + (debug:print-info 1 *default-log-port* "Removing symlink " run-dir) + (handle-exceptions + exn + (debug:print-error 0 *default-log-port* " Failed to remove symlink " run-dir ((condition-property-accessor 'exn 'message) exn) ", attempting to continue") + (delete-file run-dir))) + (if (directory? run-dir) + (if (> (directory-fold (lambda (f x)(+ 1 x)) 0 run-dir) 0) + (debug:print 0 *default-log-port* "WARNING: refusing to remove " run-dir " as it is not empty") + (handle-exceptions + exn + (debug:print-error 0 *default-log-port* " Failed to remove directory " run-dir ((condition-property-accessor 'exn 'message) exn) ", attempting to continue") + (delete-directory run-dir))) + (if (and run-dir + (not (member run-dir (list "n/a" "/tmp/badname")))) + (debug:print 0 *default-log-port* "WARNING: not removing " run-dir " as it either doesn't exist or is not a symlink") + (debug:print 0 *default-log-port* "NOTE: the run dir for this test is undefined. Test may have already been deleted.")) + )) + ;; Only delete the records *after* removing the directory. If things fail we have a record + (case clean-mode + ((remove-data-only)(mt:test-set-state-status-by-id (db:test-get-run_id test)(db:test-get-id test) (db:test-get-state test)(db:test-get-status test) #f)) + ((archive-remove) (mt:test-set-state-status-by-id (db:test-get-run_id test)(db:test-get-id test) "ARCHIVED" #f #f)) + (else (rmt:delete-test-records (db:test-get-run_id test) (db:test-get-id test)))) + ;; (rmt:no-sync-del! lock-key) + )) + +;;====================================================================== +;; Routines for manipulating runs +;;====================================================================== + +;; Since many calls to a run require pretty much the same setup +;; this wrapper is used to reduce the replication of code +(define (general-run-call switchname action-desc proc) + (let ((runname (or (args:get-arg "-runname")(args:get-arg ":runname"))) + (target (common:args-get-target))) + (cond + ((not target) + (debug:print-error 0 *default-log-port* "Missing required parameter for " switchname ", you must specify the target with -target") + (exit 3)) + ((not runname) + (debug:print-error 0 *default-log-port* "Missing required parameter for " switchname ", you must specify the run name with -runname runname") + (exit 3)) + (else + (let (;; (db #f) + (keys #f)) + (if (launch:setup) + (begin + (full-runconfigs-read) ;; cache the run config + ;; (launch:cache-config) ;; there are two independent config cache locations, turning this one off for now. MRW. + ) ;; do not cache here - need to be sure runconfigs is processed + (begin + (debug:print 0 *default-log-port* "Failed to setup, exiting") + (exit 1))) + + + (set! keys (keys:config-get-fields *configdat*)) + ;; have enough to process -target or -reqtarg here + (if (args:get-arg "-reqtarg") + (let* ((runconfigf (conc *toppath* "/runconfigs.config")) ;; DO NOT EVALUATE ALL + (runconfig (read-config runconfigf #f #t environ-patt: #f))) + (if (hash-table-ref/default runconfig (args:get-arg "-reqtarg") #f) + (keys:target-set-args keys (args:get-arg "-reqtarg") args:arg-hash) + + (begin + (debug:print-error 0 *default-log-port* "[" (args:get-arg "-reqtarg") "] not found in " runconfigf) + ;; (if db (sqlite3:finalize! db)) + (exit 1) + ))) + (if (args:get-arg "-target") + (keys:target-set-args keys (args:get-arg "-target" args:arg-hash) args:arg-hash))) + (if (not (car *configinfo*)) + (begin + (debug:print-error 0 *default-log-port* "Attempted to " action-desc " but run area config file not found") + (exit 1)) + ;; Extract out stuff needed in most or many calls + ;; here then call proc + (let* ((keyvals (keys:target->keyval keys target))) + (proc target runname keys keyvals))) + ;; (if db (sqlite3:finalize! db)) + (set! *didsomething* #t)))))) + +;;====================================================================== +;; Lock/unlock runs +;;====================================================================== + +(define (runs:handle-locking target keys runname lock unlock user) + (let* ((db #f) + (rundat (mt:get-runs-by-patt keys runname target)) + (header (vector-ref rundat 0)) + (runs (vector-ref rundat 1))) + (for-each (lambda (run) + (let ((run-id (db:get-value-by-header run header "id")) + (str (if lock + "lock" + "unlock"))) + (if (or lock + (and unlock + (or (args:get-arg "-force") + (begin + (print "Do you really wish to unlock run " run-id "?\n y/n: ") + (equal? "y" (read-line)))))) + (begin + (rmt:lock/unlock-run run-id lock unlock user) + (debug:print-info 0 *default-log-port* "Done " str " on run id " run-id)) + (debug:print-info 0 *default-log-port* "Skipping lock/unlock on " run-id)))) + runs))) +;;====================================================================== +;; Rollup runs +;;====================================================================== + +;; Update the test_meta table for this test +(define (runs:update-test_meta test-name test-conf) + (let ((currrecord (rmt:testmeta-get-record test-name))) + (if (not currrecord) + (begin + (set! currrecord (make-vector 11 #f)) + (rmt:testmeta-add-record test-name))) + (for-each + (lambda (key) + (let* ((idx (cadr key)) + (fld (car key)) + (val (config-lookup test-conf "test_meta" fld))) + ;; (debug:print 5 *default-log-port* "idx: " idx " fld: " fld " val: " val) + (if (and val (not (equal? (vector-ref currrecord idx) val))) + (begin + (print "Updating " test-name " " fld " to " val) + (rmt:testmeta-update-field test-name fld val))))) + '(("author" 2)("owner" 3)("description" 4)("reviewed" 5)("tags" 9)("jobgroup" 10))))) + +;; find tests with matching tags, tagpatt is a string "tagpatt1,tagpatt2%, ..." +;; +(define (runs:get-tests-matching-tags tagpatt) + (let* ((tagdata (rmt:get-tests-tags)) + (res '())) ;; list of tests that match one or more tags + (for-each + (lambda (row) + (let* ((tag (car row)) + (tests (cdr row))) + (if (patt-list-match tag tagpatt) + (set! res (append tests res))))) + tagdata) + res)) + + +;; Update test_meta for all tests +(define (runs:update-all-test_meta db) + (let ((test-names (tests:get-all))) ;; (tests:get-valid-tests))) + (for-each + (lambda (test-name) + (let* ((test-conf (mt:lazy-read-test-config test-name))) + (if test-conf (runs:update-test_meta test-name test-conf)))) + (hash-table-keys test-names)))) + +;; This could probably be refactored into one complex query ... +;; NOT PORTED - DO NOT USE YET +;; +#;(define (runs:rollup-run keys runname user keyvals) + (debug:print 4 *default-log-port* "runs:rollup-run, keys: " keys " -runname " runname " user: " user) + (let* ((db #f) + ;; register run operates on the main db + (new-run-id (rmt:register-run keyvals runname "new" "n/a" user (args:get-arg "-contour"))) + (prev-tests (rmt:get-matching-previous-test-run-records new-run-id "%" "%")) + (curr-tests (mt:get-tests-for-run new-run-id "%/%" '() '())) + (curr-tests-hash (make-hash-table))) + (rmt:update-run-event_time new-run-id) + ;; index the already saved tests by testname and itemdat in curr-tests-hash + (for-each + (lambda (testdat) + (let* ((testname (db:test-get-testname testdat)) + (item-path (db:test-get-item-path testdat)) + (full-name (conc testname "/" item-path))) + (hash-table-set! curr-tests-hash full-name testdat))) + curr-tests) + ;; NOPE: Non-optimal approach. Try this instead. + ;; 1. tests are received in a list, most recent first + ;; 2. replace the rollup test with the new *always* + (for-each + (lambda (testdat) + (let* ((testname (db:test-get-testname testdat)) + (item-path (db:test-get-item-path testdat)) + (full-name (conc testname "/" item-path)) + (prev-test-dat (hash-table-ref/default curr-tests-hash full-name #f)) + (test-steps (rmt:get-steps-for-test (db:test-get-id testdat))) + (new-test-record #f)) + ;; replace these with insert ... select + (apply sqlite3:execute + db + (conc "INSERT OR REPLACE INTO tests (run_id,testname,state,status,event_time,host,cpuload,diskfree,uname,rundir,item_path,run_duration,final_logf,comment) " + "VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?);") + new-run-id (cddr (vector->list testdat))) + (set! new-testdat (car (mt:get-tests-for-run new-run-id (conc testname "/" item-path) '() '()))) + (hash-table-set! curr-tests-hash full-name new-testdat) ;; this could be confusing, which record should go into the lookup table? + ;; Now duplicate the test steps + (debug:print 4 *default-log-port* "Copying records in test_steps from test_id=" (db:test-get-id testdat) " to " (db:test-get-id new-testdat)) + (cdb:remote-run ;; to be replaced, note: this routine is not used currently + (lambda () + (sqlite3:execute + db + (conc "INSERT OR REPLACE INTO test_steps (test_id,stepname,state,status,event_time,comment) " + "SELECT " (db:test-get-id new-testdat) ",stepname,state,status,event_time,comment FROM test_steps WHERE test_id=?;") + (db:test-get-id testdat)) + ;; Now duplicate the test data + (debug:print 4 *default-log-port* "Copying records in test_data from test_id=" (db:test-get-id testdat) " to " (db:test-get-id new-testdat)) + (sqlite3:execute + db + (conc "INSERT OR REPLACE INTO test_data (test_id,category,variable,value,expected,tol,units,comment) " + "SELECT " (db:test-get-id new-testdat) ",category,variable,value,expected,tol,units,comment FROM test_data WHERE test_id=?;") + (db:test-get-id testdat)))) + )) + prev-tests))) + + +;; clean cache files +(define (runs:clean-cache target runname toppath) + (if target + (if runname + (let* ((linktree (common:get-linktree)) ;; (if toppath (configf:lookup *configdat* "setup" "linktree"))) + (runtop (conc linktree "/" target "/" runname)) + (files (if (common:file-exists? runtop) + (append (glob (conc runtop "/.megatest*")) + (glob (conc runtop "/.runconfig*"))) + '()))) + (if (null? files) + (debug:print-info 0 *default-log-port* "No cached megatest or runconfigs files found. None removed.") + (begin + (debug:print-info 0 *default-log-port* "Removing cached files:\n " (string-intersperse files "\n ")) + (for-each + (lambda (f) + (handle-exceptions + exn + (debug:print 0 *default-log-port* "WARNING: Failed to remove file " f) + (delete-file f))) + files)))) + (debug:print-error 0 *default-log-port* "-clean-cache requires -runname.")) + (debug:print-error 0 *default-log-port* "-clean-cache requires -target or -reqtarg"))) + +;;====================================================================== +;; strings db sdb +;;====================================================================== + +;; +(define (sdb:open fname) + (let* ((dbpath (pathname-directory fname)) + (dbexists (let ((fe (common:file-exists? fname))) + (if fe + fe + (begin + (create-directory dbpath #t) + #f)))) + (sdb (sqlite3:open-database fname)) + (handler (make-busy-timeout 136000))) + (sqlite3:set-busy-handler! sdb handler) + (if (not dbexists) + (sdb:initialize sdb)) + (sqlite3:execute sdb "PRAGMA synchronous = 1;") + sdb)) + +(define (sdb:initialize sdb) + (sqlite3:execute sdb "CREATE TABLE IF NOT EXISTS strs + (id INTEGER PRIMARY KEY, + str TEXT, + CONSTRAINT str UNIQUE (str));") + (sqlite3:execute sdb "CREATE INDEX IF NOT EXISTS strindx ON strs (str);")) + +;; (define sumup (let ((a 0))(lambda (x)(set! a (+ x a)) a))) + +(define (sdb:register-string sdb str) + (sqlite3:execute sdb "INSERT OR IGNORE INTO strs (str) VALUES (?);" str)) + +(define (sdb:string->id sdb str-cache str) + (let ((id (hash-table-ref/default str-cache str #f))) + (if (not id) + (sqlite3:for-each-row + (lambda (sid) + (set! id sid) + (hash-table-set! str-cache str id)) + sdb + "SELECT id FROM strs WHERE str=?;" str)) + id)) + +(define (sdb:id->string sdb id-cache id) + (let ((str (hash-table-ref/default id-cache id #f))) + (if (not str) + (sqlite3:for-each-row + (lambda (istr) + (set! str istr) + (hash-table-set! id-cache id str)) + sdb + "SELECT str FROM strs WHERE id=?;" id)) + str)) + +;; Numbers get passed though in both directions +;; +(define (make-sdb:qry fname) + (let ((sdb #f) + (scache (make-hash-table)) + (icache (make-hash-table))) + (lambda (cmd var) + (case cmd + ((setup) (set! sdb (if (not sdb) + (sdb:open (if var var fname))))) + ((setdb) (set! sdb var)) + ((getdb) sdb) + ((finalize) (if sdb + (begin + (sqlite3:finalize! sdb) + (set! sdb #f)))) + ((getid) (let ((id (if (or (number? var) + (string->number var)) + var + (sdb:string->id sdb scache var)))) + (if id + id + (begin + (sdb:register-string sdb var) + (sdb:string->id sdb scache var))))) + ((getstr) (if (or (number? var) + (string->number var)) + (sdb:id->string sdb icache var) + var)) + ((passid) var) + ((passstr) var) + (else #f))))) + +;;====================================================================== +;; subrun +;;====================================================================== + +(define (subrun:subrun-test-initialized? test-run-dir) + (if (and (common:file-exists? (conc test-run-dir "/subrun-area") ) + (common:file-exists? (conc test-run-dir "/testconfig.subrun") )) + #t + #f)) + +(define (subrun:launch-dashboard test-run-dir) + (if (subrun:subrun-test-initialized? test-run-dir) + (let* ((subarea (subrun:get-runarea test-run-dir))) + (if (and subarea (common:file-exists? subarea)) + (system (conc "cd " subarea ";env -i PATH=$PATH DISPLAY=$DISPLAY HOME=$HOME USER=$USER dashboard &")))))) + +(define (subrun:subrun-removed? test-run-dir) + (if (subrun:subrun-test-initialized? test-run-dir) + (let ((flagfile (conc test-run-dir "/subrun.removed"))) + (if (common:file-exists? flagfile) + #t + #f)) + #t)) + +(define (subrun:set-subrun-removed test-run-dir) + (let ((flagfile (conc test-run-dir "/subrun.removed"))) + (if (and (subrun:subrun-test-initialized? test-run-dir) (not (common:file-exists? flagfile))) + (with-output-to-file flagfile + (lambda () (print (current-seconds))))))) + +(define (subrun:unset-subrun-removed test-run-dir) + (let ((flagfile (conc test-run-dir "/subrun.removed"))) + (if (and (subrun:subrun-test-initialized? test-run-dir) (common:file-exists? flagfile)) + (delete-file flagfile)))) + + +(define (subrun:testconfig-defines-subrun? testconfig) + (configf:lookup testconfig "subrun" "runwait")) ;; we use runwait as the flag that a subrun is requested + +(define (subrun:initialize-toprun-test testconfig test-run-dir) + (let ((ra (configf:lookup testconfig "subrun" "run-area")) + (logpro (configf:lookup testconfig "subrun" "logpro")) + (symlink-target (conc test-run-dir "/subrun-area")) + ) + (if (not ra) ;; when runarea is not set we default to *toppath*. However + (let ((fallback-run-area (or *toppath* (conc test-run-dir "/subrun")))) + ;; we need to force the setting in the testconfig so it will + ;; be preserved in the testconfig.subrun file + (configf:set-section-var testconfig "subrun" "run-area" fallback-run-area) + (set! ra fallback-run-area))) + (configf:set-section-var testconfig "logpro" "subrun" logpro) ;; append the logpro rules to the logpro section as stepname subrun + (if (common:file-exists? symlink-target) + (delete-file symlink-target)) + (create-symbolic-link ra symlink-target) + (configf:write-alist testconfig "testconfig.subrun"))) + +(define (subrun:set-state-status test-run-dir state status new-state-status) + (if (and (not (subrun:subrun-removed? test-run-dir)) (subrun:subrun-test-initialized? test-run-dir)) + (let* ((action-switches-str + (conc "-set-state-status "new-state-status + (if state (conc " -state "state) "") + (if status (conc " -status "status) ""))) + (log-prefix + (subrun:sanitize-path + (conc "set-state-status="new-state-status + (if state (conc ":state="state) "") + (if status (conc "+status="status) "")))) + (submt-result + (subrun:exec-sub-megatest test-run-dir action-switches-str log-prefix))) + submt-result))) + +(define (subrun:remove-subrun test-run-dir keep-records ) + (if (and (not (subrun:subrun-removed? test-run-dir)) (subrun:subrun-test-initialized? test-run-dir)) + (let* ((action-switches-str + (conc "-remove-runs" + (if keep-records "-keep-records " "") + )) + (remove-result + (subrun:exec-sub-megatest test-run-dir action-switches-str "remove"))) + (if remove-result + (begin + (subrun:set-subrun-removed test-run-dir) + #t) + #f)) + #t)) + +(define (subrun:kill-subrun test-run-dir ) + (if (and (not (subrun:subrun-removed? test-run-dir)) (subrun:subrun-test-initialized? test-run-dir)) + (let* ((action-switches-str + (conc "-kill-runs" )) + (kill-result + (subrun:exec-sub-megatest test-run-dir action-switches-str "kill"))) + kill-result) + #t)) + +(define (subrun:launch-cmd test-run-dir #!optional (sub-cmd "-run")) ;; BUG: "-run" should be changed to "-rerun-clean" but current doesn't work + (if (subrun:subrun-removed? test-run-dir) + (subrun:unset-subrun-removed test-run-dir)) + + (let* ((log-prefix "run") + (switches (subrun:selector+log-switches test-run-dir log-prefix)) + (run-wait #t) + (cmd (conc "megatest " sub-cmd " " switches" " + (if run-wait "-run-wait " "")))) + cmd)) + + +(define (subrun:sanitize-path inpath) + (let* ((insane-pattern (irregex "[^[a-zA-Z0-9_\\-]"))) + (regex#string-substitute insane-pattern "_" inpath #t))) + +(define (subrun:get-runarea test-run-dir) + (if (subrun:subrun-test-initialized? test-run-dir) + (let* ((info-alist (subrun:selector+log-alist + test-run-dir + "foo")) + (run-area (if (list? info-alist) + (alist-ref "-start-dir" info-alist equal? #f) + #f))) + run-area) + #f)) + +(define (subrun:selector+log-alist test-run-dir log-prefix) + (let* ((switch-def-alist (common:get-param-mapping flavor: 'config)) + (subrunfile (conc test-run-dir "/testconfig.subrun" )) + (subrundata (with-input-from-file subrunfile read)) + (subrunconfig (configf:alist->config subrundata)) + (run-area (configf:lookup subrunconfig "subrun" "run-area")) + (defvals `(("start-dir" . ,(or run-area ;; default values if not specified in subrun section of tconf + (get-environment-variable "MT_RUN_AREA_HOME") + "/no/rundir/found")) + ("run-name" . ,(or (get-environment-variable "MT_RUNNAME") "NO-RUNNAME")) + ("target" . ,(or (get-environment-variable "MT_TARGET") "NO-TARGET")))) + (switch-alist-pre (filter-map (lambda (item) + (let* ((config-key (car item)) + (switch (cdr item)) + (defval (alist-ref config-key defvals equal? #f)) + (val (or (configf:lookup subrunconfig "subrun" config-key) + defval))) + (if val + (cons switch val) + #f))) + switch-def-alist)) + + ;; testpatt may be modified if all three of mode-patt, tag-expr, and testpatt are null + (mode-patt (alist-ref "-modepatt" switch-alist-pre equal? #f)) + (tag-expr (alist-ref "-tagexpr" switch-alist-pre equal? #f)) + (testpatt (alist-ref "-testpatt" switch-alist-pre equal? + (if (not (or mode-patt tag-expr)) "%" #f))) ;; testpatt is % if not + ;; otherwise specified + + ;; define compact-stem for logfile + (target (alist-ref "-target" switch-alist-pre equal? #f)) ;; want data-structures alist-ref, not alist-lib alist-ref + (runname (alist-ref "-runname" switch-alist-pre equal? #f)) + + + (compact-stem (subrun:sanitize-path + (conc + target + "-" + runname + "-" (or testpatt mode-patt tag-expr "NO-TESTPATT")))) + (logfile (conc + test-run-dir "/" + (if log-prefix + (conc (subrun:sanitize-path log-prefix) "-") + "") + compact-stem + ".log")) + ;; swap out testpatt with modified test-patt and add -log + (switch-alist (cons + (cons "-log" logfile) + (map (lambda (item) + (if (equal? (car item) "-testpatt") + (cons "-testpatt" testpatt) + item)) + switch-alist-pre)))) + switch-alist)) + ;; note - get precmd from subrun section + ;; apply to submegatest commands + +(define (subrun:get-log-path test-run-dir log-prefix) + (let* ((alist (subrun:selector+log-alist test-run-dir log-prefix)) + (res (alist-ref "-log" alist equal? #f))) + res)) + +(define (subrun:selector+log-switches test-run-dir log-prefix) + (let* ((switch-alist (subrun:selector+log-alist test-run-dir log-prefix)) + (res + (string-intersperse + (apply + append + (map + (lambda (x) + (list (car x) (cdr x))) + switch-alist)) + " "))) + res)) + +(define (subrun:exec-sub-megatest test-run-dir action-switches-str log-prefix) + (let* ((selector-switches (subrun:selector+log-switches test-run-dir log-prefix)) + (cmd (conc "megatest " selector-switches " " action-switches-str )) + (pid #f) + (proc (lambda () + (debug:print-info 0 *default-log-port* "Running sub megatest command: "cmd) + ;;(set! pid (process-run "/usr/bin/xterm" (list )))))) + (set! pid (process-run "/bin/bash" (list "-c" cmd)))))) + (call-with-environment-variables + (list (cons "PATH" (conc (get-environment-variable "PATH") ":."))) + (lambda () + (common:without-vars proc "^MT_.*"))) + (let processloop ((i 0)) + (let-values (((pid-val exit-status exit-code)(process-wait pid #t))) + (if (eq? pid-val 0) + (begin + (thread-sleep! 2) + (processloop (+ i 1))) + (begin + (debug:print-info 0 *default-log-port* "sub megatest " action-switches-str " completed with exit code " exit-code) + (if (eq? 0 exit-code) + (begin + #t) + (begin + #f)))))))) + + + +;; (subrun:exec-sub-megatest "/nfs/pdx/disks/icf_env_disk001/bjbarcla/gwa/issues/mtdev/165/megatest/ext-tests/tests/subrun-usecases/toparea/links/SYSTEM_val/RELEASE_val/go/toptest" "-foo" "foo") + +;;====================================================================== +;; synchash +;;====================================================================== + +#;(define (synchash:make) + (make-hash-table)) + +;; given an alist of objects '((id obj) ...) +;; 1. remove unchanged objects from the list +;; 2. create a list of removed objects by id +;; 3. remove removed objects from synchash +;; 4. replace or add new or changed objects to synchash +;; +#;(define (synchash:get-delta indat synchash) + (let ((deleted '()) + (changed '()) + (found '()) + (orig-keys (hash-table-keys synchash))) + (for-each + (lambda (item) + (let* ((id (car item)) + (dat (cadr item)) + (ref (hash-table-ref/default synchash id #f))) + (if (not (equal? dat ref)) ;; item changed or new + (begin + (set! changed (cons item changed)) + (hash-table-set! synchash id dat))) + (set! found (cons id found)))) + indat) + (for-each + (lambda (id) + (if (not (member id found)) + (begin + (set! deleted (cons id deleted)) + (hash-table-delete! synchash id)))) + orig-keys) + (list changed deleted) + ;; (list indat '()) ;; just for debugging + )) + +;; keynum => the field to use as the unique key (usually 0 but can be other field) +;; +#;(define (synchash:client-get proc synckey keynum synchash run-id . params) + (let* ((data (rmt:synchash-get run-id proc synckey keynum params)) + (newdat (car data)) + (removs (cadr data)) + (myhash (hash-table-ref/default synchash synckey #f))) + (if (not myhash) + (begin + (set! myhash (make-hash-table)) + (hash-table-set! synchash synckey myhash))) + (for-each + (lambda (item) + (let ((id (car item)) + (dat (cadr item))) + ;; (debug:print-info 2 *default-log-port* "Processing item: " item) + (hash-table-set! myhash id dat))) + newdat) + (for-each + (lambda (id) + (hash-table-delete! myhash id)) + removs) + ;; WHICH ONE!? + ;; data)) ;; return the changed and deleted list + (list newdat removs))) ;; synchash)) + +#;(define *synchashes* (make-hash-table)) + +#;(define (synchash:server-get dbstruct run-id proc synckey keynum params) + ;; (debug:print-info 2 *default-log-port* "synckey: " synckey ", keynum: " keynum ", params: " params) + (let* ((dbdat (db:get-db dbstruct run-id)) + (db (db:dbdat-get-db dbdat)) + (synchash (hash-table-ref/default *synchashes* synckey #f)) + (newdat (apply (case proc + ((db:get-runs) db:get-runs) + ((db:get-tests-for-run-mindata) db:get-tests-for-run-mindata) + ((db:get-test-info-by-ids) db:get-test-info-by-ids) + (else + (print "ERROR: sync for hash " proc " not setup! Edits needed in synchash.scm") + print)) + db params)) + (postdat #f) + (make-indexed (lambda (x) + (list (vector-ref x keynum) x)))) + ;; Now process newdat based on the query type + (set! postdat (case proc + ((db:get-runs) + ;; (debug:print-info 2 *default-log-port* "Get runs call") + (let ((header (vector-ref newdat 0)) + (data (vector-ref newdat 1))) + ;; (debug:print-info 2 *default-log-port* "header: " header ", data: " data) + (cons (list "header" header) ;; add the header keyed by the word "header" + (map make-indexed data)))) ;; add each element keyed by the keynum'th val + (else + ;; (debug:print-info 2 *default-log-port* "Non-get runs call") + (map make-indexed newdat)))) + ;; (debug:print-info 2 *default-log-port* "postdat: " postdat) + ;; (if (not indb)(sqlite3:finalize! db)) + (if (not synchash) + (begin + (set! synchash (make-hash-table)) + (hash-table-set! *synchashes* synckey synchash))) + (synchash:get-delta postdat synchash))) + +;;====================================================================== +;; tree +;;====================================================================== + +;;====================================================================== +;; T R E E S T U F F +;;====================================================================== + +;; path is a list of nodes, each the child of the previous +;; this routine returns the id so another node can be added +;; either as a leaf or as a branch +;; +;; BUG: This needs a stop sensor for when a branch is exhausted +;; +(define (tree:find-node obj path) + ;; start at the base of the tree + (if (null? path) + #f ;; or 0 ???? + (let loop ((hed (car path)) + (tal (cdr path)) + (depth 0) + (nodenum 0)) + ;; nodes in iup tree are 100% sequential so iterate over nodenum + (if (iup:attribute obj (conc "DEPTH" nodenum)) ;; end when no more nodes + (let ((node-depth (string->number (iup:attribute obj (conc "DEPTH" nodenum)))) + (node-title (iup:attribute obj (conc "TITLE" nodenum)))) + (if (and (equal? depth node-depth) + (equal? hed node-title)) ;; yep, this is the one! + (if (null? tal) ;; end of the line + nodenum + (loop (car tal)(cdr tal)(+ depth 1)(+ 1 nodenum))) + ;; this is the case where we found part of the hierarchy but not + ;; all of it, i.e. the node-depth went from deep to less deep + (if (> depth node-depth) ;; (+ 1 node-depth)) + #f + (loop hed tal depth (+ nodenum 1))))) + #f)))) + +;; top is the top node name zeroeth node VALUE=0 +(define (tree:add-node obj top nodelst #!key (userdata #f)) + (let ((curr-top (iup:attribute obj "TITLE0"))) + (if (or (not (string? curr-top)) + (string-null? curr-top) + (string-match "^\\s*$" curr-top)) + (iup:attribute-set! obj "ADDBRANCH0" top)) + + + + (cond + ((not (equal? top (iup:attribute obj "TITLE0"))) + (print "ERROR: top name " top " doesn't match " (iup:attribute obj "TITLE0"))) + ((null? nodelst)) + (else + (let loop ((hed (car nodelst)) + (tal (cdr nodelst)) + (depth 1) + (pathl (list top))) + ;; Because the tree dialog changes node numbers when + ;; nodes are added or removed we must look up nodes + ;; each and every time. 0 is the top node so default + ;; to that. + (let* ((newpath (append pathl (list hed))) + (parentnode (tree:find-node obj pathl)) + (nodenum (tree:find-node obj newpath))) + ;; Add the branch under lastnode if not found + (if (not nodenum) + (begin + (iup:attribute-set! obj (conc "ADDBRANCH" parentnode) hed) + ;; ERROR? ADDING DATA TO PARENT, DONT WE WANT IT ON CREATED NODE? + (if userdata + (iup:attribute-set! obj (conc "USERDATA" parentnode) userdata)) + (if (null? tal) + #t + ;; reset to top + (loop (car nodelst)(cdr nodelst) 1 (list top)))) + (if (null? tal) ;; if null here then this path has already been added + #t + (loop (car tal)(cdr tal)(+ depth 1) newpath))))))))) + +(define (tree:node->path obj nodenum) + (let loop ((currnode 0) + (path '())) + (let* ((node-depth (string->number (iup:attribute obj (conc "DEPTH" currnode)))) + (node-title (iup:attribute obj (conc "TITLE" currnode))) + (trimpath (if (and (not (null? path)) + (> (length path) node-depth)) + (take path node-depth) + path)) + (newpath (append trimpath (list node-title)))) + (if (>= currnode nodenum) + newpath + (loop (+ currnode 1) + newpath))))) + +(define (tree:delete-node obj top node-path) ;; node-path is a list of strings + (let ((id (tree:find-node obj (cons top node-path)))) + (print "Found node to remove " id " for path " top " " node-path) + (iup:attribute-set! obj (conc "DELNODE" id) "SELECTED"))) + +#| + + (let* ((tb (iup:treebox + #:value 0 + #:name "Runs" + #:expand "YES" + #:addexpanded "NO" + #:selection-cb + (lambda (obj id state) + ;; (print "obj: " obj ", id: " id ", state: " state) + (let* ((run-path (tree:node->path obj id)) + (run-id (tree-path->run-id (cdr run-path)))) + (if run-id + (begin + (dboard:data-curr-run-id-set! data run-id) + (dashboard:update-run-summary-tab))) + ;; (print "path: " (tree:node->path obj id) " run-id: " run-id) + )))) +|# + +;;====================================================================== +;; db +;;====================================================================== + +;; Use db:test-get* to access +;; Get test data using test_ids. NB// Only works within a single run!! +;; +(define (db:get-test-info-by-ids dbstruct run-id test-ids) + (db:with-db + dbstruct + run-id + #f + (lambda (db) + (let ((res '())) + (sqlite3:for-each-row + (lambda (a . b) + ;; 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 + (set! res (cons (apply vector a b) res))) + db + (conc "SELECT " db:test-record-qry-selector " FROM tests WHERE id in (" + (string-intersperse (map conc test-ids) ",") ");")) + res)))) + +;; (db:with-db alldat run-id sqlite3:exec "select blah fgrom blaz;") +;; r/w is a flag to indicate if the db is modified by this query #t = yes, #f = no +;; +(define (db:with-db alldat run-id r/w proc . params) + (let* ((have-struct (alldat? alldat)) + (dbdat (if have-struct + (db:get-db alldat) + #f)) + (db (if have-struct + (db:dbdat-get-db dbdat) + alldat)) + (use-mutex (> (alldat-api-process-request-count alldat) 25)) + (db-with-db-mutex (alldat-db-with-db-mutex alldat)) + (log-port (alldat-log-port alldat))) + (if (and use-mutex + (common:low-noise-print 120 "over-50-parallel-api-requests")) + (debug:print-info 0 log-port (alldat-api-process-request-count alldat) " parallel api requests being processed in process " (current-process-id) ", throttling access")) + (if (common:low-noise-print 600 (conc "parallel-api-requests" (alldat-max-api-process-requests alldat))) + (debug:print-info 2 log-port "Parallel api request count: " (alldat-api-process-request-count alldat) " max parallel requests: " (alldat-max-api-process-requests alldat))) + (handle-exceptions + exn + (begin + (print-call-chain (current-error-port)) + (debug:print-error 0 log-port "sqlite3 issue in db:with-db, alldat=" alldat ", run-id=" run-id ", proc=" proc ", params=" params " error: " ((condition-property-accessor 'exn 'message) exn)) + ;; there is no recovering at this time. exit + (exit 50)) + (if use-mutex (mutex-lock! db-with-db-mutex)) + (let ((res (apply proc db params))) + (if use-mutex (mutex-unlock! db-with-db-mutex)) + (if dbdat (stack-push! (alldat-dbstack alldat) dbdat)) + res)))) + + +(define db:test-record-qry-selector (string-intersperse db:test-record-fields ",")) + +;; get a useful subset of the tests data (used in dashboard +;; use db:mintest-get-{id ,run_id,testname ...} +;; +(define (db:get-tests-for-run-mindata dbstruct run-id testpatt states statuses not-in) + (db:get-tests-for-run dbstruct run-id testpatt states statuses #f #f not-in #f #f "id,run_id,testname,state,status,event_time,item_path" 0 #f)) + +;; states and statuses are lists, turn them into ("PASS","FAIL"...) and use NOT IN +;; i.e. these lists define what to NOT show. +;; states and statuses are required to be lists, empty is ok +;; not-in #t = above behaviour, #f = must match +;; mode: +;; 'dashboard - use state = 'COMPLETED' AND status in ( statuses ) OR state in ( states ) +;; +(define (db:get-tests-for-run dbstruct run-id testpatt states statuses offset limit not-in sort-by sort-order qryvals last-update mode) + (let* ((qryvalstr (case qryvals + ((shortlist) "id,run_id,testname,item_path,state,status") + ((#f) db:test-record-qry-selector) ;; "id,run_id,testname,state,status,event_time,host,cpuload,diskfree,uname,rundir,item_path,run_duration,final_logf,comment") + (else qryvals))) + (res '()) + ;; if states or statuses are null then assume match all when not-in is false + (states-qry (if (null? states) + #f + (conc " state " + (if (eq? mode 'dashboard) + " IN ('" + (if not-in + " NOT IN ('" + " IN ('")) + (string-intersperse states "','") + "')"))) + (statuses-qry (if (null? statuses) + #f + (conc " status " + (if (eq? mode 'dashboard) + " IN ('" + (if not-in + " NOT IN ('" + " IN ('") ) + (string-intersperse statuses "','") + "')"))) + (interim-qry (conc " AND " (if not-in "NOT " "") "( state='COMPLETED' " (if statuses-qry (conc " AND " statuses-qry " ) ") " ) ") + (if states-qry + (conc (if not-in " AND " " OR ") states-qry ) ;; " ) ") + ""))) + (states-statuses-qry + (cond + ((and states-qry statuses-qry) + (case mode + ((dashboard) + (if not-in + (conc " AND (state='COMPLETED' AND status NOT IN ('" (string-intersperse statuses "','") "')) " + " OR (state != 'COMPLETED' AND state NOT IN ('" (string-intersperse states "','") "')) ") + (conc " AND (state='COMPLETED' AND status IN ('" (string-intersperse statuses "','") "')) " + " OR (state NOT IN ('COMPLETED','DELETED') AND state IN ('" (string-intersperse states "','") "')) "))) + (else (conc " AND ( " states-qry " AND " statuses-qry " ) ")))) + (states-qry + (case mode + ((dashboard) (conc " AND " (if not-in "NOT " "") " state IN ('" (string-intersperse states "','") "') ")) ;; interim-qry) + (else (conc " AND " states-qry)))) + (statuses-qry + (case mode + ((dashboard) (conc " AND " (if not-in "NOT " "") " status IN ('" (string-intersperse statuses "','") "') ")) ;; interim-qry) + (else (conc " AND " statuses-qry)))) + (else ""))) + (tests-match-qry (tests:match->sqlqry testpatt)) + (qry (conc "SELECT " qryvalstr + (if run-id + " FROM tests WHERE run_id=? " + " FROM tests WHERE ? > 0 ") ;; should work? + (if last-update " " " AND state != 'DELETED' ") ;; if using last-update we want deleted tests? + states-statuses-qry + (if tests-match-qry (conc " AND (" tests-match-qry ") ") "") + (if last-update (conc " AND last_update >= " last-update " ") "") + (case sort-by + ((rundir) " ORDER BY length(rundir) ") + ((testname) (conc " ORDER BY testname " (if sort-order (conc sort-order ",") "") " item_path ")) + ((statestatus) (conc " ORDER BY state " (if sort-order (conc sort-order ",") "") " status ")) + ((event_time) " ORDER BY event_time ") + (else (if (string? sort-by) + (conc " ORDER BY " sort-by " ") + " "))) + (if sort-order sort-order " ") + (if limit (conc " LIMIT " limit) " ") + (if offset (conc " OFFSET " offset) " ") + ";" + ))) + (debug:print-info 8 *default-log-port* "db:get-tests-for-run run-id=" run-id ", qry=" qry) + (db:with-db dbstruct run-id #f + (lambda (db) + (sqlite3:for-each-row + (lambda (a . b) ;; id run-id testname state status event-time host cpuload diskfree uname rundir item-path run-duration final-logf comment) + (set! res (cons (apply vector a b) res))) ;; id run-id testname state status event-time host cpuload diskfree uname rundir item-path run-duration final-logf comment) res))) + db + qry + (or run-id 1) ;; 1 > 0 , for the case where we are seeking tests matching criteral for all runs + ))) + (case qryvals + ((shortlist)(map db:test-short-record->norm res)) + ((#f) res) + (else res)))) + +(define db:test-record-fields '("id" "run_id" "testname" "state" "status" "event_time" + "host" "cpuload" "diskfree" "uname" "rundir" "item_path" + "run_duration" "final_logf" "comment" "shortdir" "attemptnum" "archived" "last_update")) + +(define (db:test-short-record->norm inrec) + ;; "id,run_id,testname,item_path,state,status" + ;; "id,run_id,testname,state,status,event_time,host,cpuload,diskfree,uname,rundir,item_path,run_duration,final_logf,comment + (vector (vector-ref inrec 0) ;; id + (vector-ref inrec 1) ;; run_id + (vector-ref inrec 2) ;; testname + (vector-ref inrec 4) ;; state + (vector-ref inrec 5) ;; status + -1 "" -1 -1 "" "-" + (vector-ref inrec 3) ;; item-path + -1 "-" "-")) + +;; if itempath is #f then look only at the testname part +;; +(define (tests:match->sqlqry patterns) + (if (string? patterns) + (let ((patts (string-split patterns ","))) + (if (null? patts) ;;; no pattern(s) means no match, we will do no query + #f + (let loop ((patt (car patts)) + (tal (cdr patts)) + (res '())) + ;; (print "loop: patt: " patt ", tal " tal) + (let* ((patt-parts (string-match (regexp "^([^\\/]*)(\\/(.*)|)$") patt)) + (test-patt (cadr patt-parts)) + (item-patt (cadddr patt-parts)) + (test-qry (db:patt->like "testname" test-patt)) + (item-qry (db:patt->like "item_path" item-patt)) + (qry (conc "(" test-qry " AND " item-qry ")"))) + ;; (print "tests:match => patt-parts: " patt-parts ", test-patt: " test-patt ", item-patt: " item-patt) + (if (null? tal) + (string-intersperse (append (reverse res)(list qry)) " OR ") + (loop (car tal)(cdr tal)(cons qry res))))))) + #f)) + +;; make a query (fieldname like 'patt1' OR fieldname +(define (db:patt->like fieldname pattstr #!key (comparator " OR ")) + (let ((patts (if (string? pattstr) + (string-split pattstr ",") + '("%")))) + (string-intersperse (map (lambda (patt) + (let ((wildtype (if (substring-index "%" patt) "LIKE" "GLOB"))) + (conc fieldname " " wildtype " '" patt "'"))) + (if (null? patts) + '("") + patts)) + comparator))) + +;; replace header and keystr with a call to runs:get-std-run-fields +;; +;; keypatts: ( (KEY1 "abc%def")(KEY2 "%") ) +;; runpatts: patt1,patt2 ... +;; +(define (db:get-runs dbstruct runpatt count offset keypatts) + (let* ((res '()) + (keys (db:get-keys dbstruct)) + (runpattstr (db:patt->like "runname" runpatt)) + (remfields (list "id" "runname" "state" "status" "owner" "event_time")) + (header (append keys remfields)) + (keystr (conc (keys->keystr keys) "," + (string-intersperse remfields ","))) + (qrystr (conc "SELECT " keystr " FROM runs WHERE (" runpattstr ") " ;; runname LIKE ? " + ;; Generate: " AND x LIKE 'keypatt' ..." + (if (null? keypatts) "" + (conc " AND " + (string-join + (map (lambda (keypatt) + (let ((key (car keypatt)) + (patt (cadr keypatt))) + (db:patt->like key patt))) + keypatts) + " AND "))) + " AND state != 'deleted' ORDER BY event_time DESC " + (if (number? count) + (conc " LIMIT " count) + "") + (if (number? offset) + (conc " OFFSET " offset) + "")))) + (debug:print-info 11 *default-log-port* "db:get-runs START qrystr: " qrystr " keypatts: " keypatts " offset: " offset " limit: " count) + (db:with-db dbstruct #f #f + (lambda (db) + (sqlite3:for-each-row + (lambda (a . x) + (set! res (cons (apply vector a x) res))) + db + qrystr + ))) + (debug:print-info 11 *default-log-port* "db:get-runs END qrystr: " qrystr " keypatts: " keypatts " offset: " offset " limit: " count) + (vector header res))) + +(define (db:dbdat-get-path dbdat) + (if (pair? dbdat) + (cdr dbdat) + #f)) + +(define (keys->keystr keys) ;; => key1,key2,key3,additiona1, ... + (string-intersperse keys ",")) + +;; ;; legacy handling of structure for managing db's. Refactor this into dbr:? +(define (db:dbdat-get-db dbdat) + (if (pair? dbdat) + (car dbdat) + dbdat)) + +;; Get/open a database +;; if run-id => get run specific db +;; if #f => get main db +;; if db already open - return inmem +;; if db not open, open inmem, rundb and sync then return inmem +;; inuse gets set automatically for rundb's +;; +(define (db:get-db alldat) ;; run-id) + (if (stack? (alldat-dbstack alldat)) + (if (stack-empty? (alldat-dbstack alldat)) + (let ((newdb (db:open-megatest-db path: (common:get-db-tmp-area alldat)))) + ;; (stack-push! (alldat-dbstack alldat) newdb) + newdb) + (stack-pop! (alldat-dbstack alldat))) + (db:open-db alldat))) + +;; This routine creates the db if not already present. It is only called if the db is not already opened +;; +(define (db:open-db alldat #!key (areapath #f)(do-sync #t)) ;; TODO: actually use areapath + (let ((toppath (alldat-areapath alldat)) + (configdat (alldat-mtconfig alldat)) + (log-port (alldat-log-port alldat)) + (tmpdb-stack (alldat-dbstack alldat))) ;; RA => Returns the first reference in alldat + (if (stack? tmpdb-stack) + (db:get-db tmpdb-stack) ;; get previously opened db (will create new db handle if all in the stack are already used + (let* ((max-stale-tmp (configf:lookup-number configdat "server" "filling-db-max-stale-seconds" default: 10)) + (dbpath (common:get-db-tmp-area alldat)) ;; path to tmp db area + (dbexists (file-exists? dbpath)) + (tmpdbfname (conc dbpath "/megatest.db")) + (dbfexists (file-exists? tmpdbfname)) ;; (conc dbpath "/megatest.db"))) + (mtdbexists (file-exists? (conc toppath "/megatest.db"))) + + (mtdbmodtime (if mtdbexists (common:lazy-sqlite-db-modification-time (conc toppath "/megatest.db")) #f)) + (tmpdbmodtime (if dbfexists (common:lazy-sqlite-db-modification-time tmpdbfname) #f)) + (mtdb (db:open-megatest-db)) + (mtdbpath (db:dbdat-get-path mtdb)) + (tmpdb (db:open-megatest-db path: dbpath)) ;; lock-create-open dbpath db:initialize-main-db)) + (refndb (db:open-megatest-db path: dbpath name: "megatest_ref.db")) + (write-access (file-write-access? mtdbpath)) + + ;;(mtdbmodtime (if mtdbexists + ;;(common:lazy-sqlite-db-modification-time mtdbpath) + ;;#f)) ; moving this before db:open-megatest-db is + ;;called. if wal mode is on -WAL and -shm file get + ;;created with causing the tmpdbmodtime timestamp + ;;always greater than mtdbmodtime (tmpdbmodtime (if + ;;dbfexists (common:lazy-sqlite-db-modification-time + ;;tmpdbfname) #f)) + + ;;if wal mode is on -WAL and -shm file get created when + ;;db:open-megatest-db is called. modtimedelta will + ;;always be < 10 so db in tmp not get synced + ;;(tmpdbmodtime (if dbfexists (db:get-last-update-time + ;;(car tmpdb)) #f)) (fmt (file-modification-time + ;;tmpdbfname)) + + (modtimedelta (and mtdbmodtime tmpdbmodtime (- mtdbmodtime tmpdbmodtime)))) + + (handle-exceptions + exn + (let ((call-chain (get-call-chain)) + (msg ((condition-property-accessor 'exn 'message) exn))) + (debug:print 0 log-port "ERROR: attempted to drop triggers on MTRA/megatest.db but failed. Error is " msg) + (set! write-access #f)) ;; if we failed to drop the triggers then we probably don't have write access + (when write-access + (sqlite3:execute (car mtdb) "drop trigger if exists update_tests_trigger") + (sqlite3:execute (car mtdb) "drop trigger if exists update_runs_trigger"))) + + ;;(print "mtdbmodtime " mtdbmodtime " tmpdbmodtime " + ;;tmpdbmodtime " mtdbpath " mtdbpath " " (conc *toppath* + ;;"/megatest.db")) (debug:print-info 13 log-port + ;;"db:open-db>> mtdbpath="mtdbpath" mtdbexists="mtdbexists" + ;;and write-access="write-access) + (if (and dbexists (not write-access)) + (begin + (set! *db-write-access* #f) + (alldat-read-only-set! alldat #t))) + (alldat-mtdb-set! alldat mtdb) + (alldat-tmpdb-set! alldat tmpdb) + (alldat-dbstack-set! alldat (make-stack)) ;; why a stack? + (stack-push! (alldat-dbstack alldat) tmpdb) ;; olddb is already a (cons db path) + (alldat-refndb-set! alldat refndb) + ;; (mutex-unlock! *rundb-mutex*) + (if (and (or (not dbfexists) + (and modtimedelta + (> modtimedelta max-stale-tmp))) ;; if db in tmp is over ten seconds older than the file in MTRA then do a sync back + do-sync) + (begin + (debug:print 1 log-port "filling db " (db:dbdat-get-path tmpdb) " with data \n from " (db:dbdat-get-path mtdb) " mod time delta: " modtimedelta) + (db:sync-tables (db:sync-all-tables-list alldat) #f mtdb refndb tmpdb) + ;touch tmp db to avoid wal mode wierdness + (set! (file-modification-time tmpdbfname) (current-seconds)) + (debug:print-info 13 log-port "db:sync-all-tables-list done.") + ) + (debug:print 4 log-port " db, " (db:dbdat-get-path tmpdb) " already exists or fresh enough, not propogating data from\n " (db:dbdat-get-path mtdb) " mod time delta: " modtimedelta) ) + ;; (db:multi-db-sync alldat 'old2new)) ;; migrate data from megatest.db automatically + tmpdb)))) ) Index: configf.scm ================================================================== --- configf.scm +++ configf.scm @@ -28,780 +28,5 @@ (declare (uses env)) (declare (uses keys)) (include "common_records.scm") -;; return list (path fullpath configname) -(define (find-config configname #!key (toppath #f)) - (if toppath - (let ((cfname (conc toppath "/" configname))) - (if (common:file-exists? cfname) - (list toppath cfname configname) - (list #f #f #f))) - (let* ((cwd (string-split (current-directory) "/"))) - (let loop ((dir cwd)) - (let* ((path (conc "/" (string-intersperse dir "/"))) - (fullpath (conc path "/" configname))) - (if (common:file-exists? fullpath) - (list path fullpath configname) - (let ((remcwd (take dir (- (length dir) 1)))) - (if (null? remcwd) - (list #f #f #f) ;; #f #f) - (loop remcwd))))))))) - -(define (config:assoc-safe-add alist key val #!key (metadata #f)) - (let ((newalist (filter (lambda (x)(not (equal? key (car x)))) alist))) - (append newalist (list (if metadata - (list key val metadata) - (list key val)))))) - -(define (configf:section-var-set! cfgdat section-name var value #!key (metadata #f)) - (hash-table-set! cfgdat section-name - (config:assoc-safe-add - (hash-table-ref/default cfgdat section-name '()) - var value metadata: metadata))) - -(define (config:eval-string-in-environment str) - ;; (if (or (string-null? str) - ;; (equal? "!" (substring str 0 1))) ;; null string or starts with ! are preserved but NOT set in the environment - str - (handle-exceptions - exn - (begin - (debug:print-error 0 *default-log-port* "problem evaluating \"" str "\" in the shell environment") - #f) - (let ((cmdres (process:cmd-run->list (conc "echo " str)))) - (if (null? cmdres) "" - (caar cmdres))))) ;; ) - -;;====================================================================== -;; Make the regexp's needed globally available -;;====================================================================== - -(define configf:include-rx (regexp "^\\[include\\s+(.*)\\]\\s*$")) -(define configf:script-rx (regexp "^\\[scriptinc\\s+(\\S+)([^\\]]*)\\]\\s*$")) ;; include output from a script -(define configf:section-rx (regexp "^\\[(.*)\\]\\s*$")) -(define configf:blank-l-rx (regexp "^\\s*$")) -(define configf:key-sys-pr (regexp "^(\\S+)\\s+\\[system\\s+(\\S+.*)\\]\\s*$")) -(define configf:key-val-pr (regexp "^(\\S+)(\\s+(.*)|())$")) -(define configf:key-no-val (regexp "^(\\S+)(\\s*)$")) -(define configf:comment-rx (regexp "^\\s*#.*")) -(define configf:cont-ln-rx (regexp "^(\\s+)(\\S+.*)$")) -(define configf:settings (regexp "^\\[configf:settings\\s+(\\S+)\\s+(\\S+)]\\s*$")) - -;; read a line and process any #{ ... } constructs - -(define configf:var-expand-regex (regexp "^(.*)#\\{(scheme|system|shell|getenv|get|runconfigs-get|rget|scm|sh|rp|gv|g|mtrah)\\s+([^\\}\\{]*)\\}(.*)")) - -(define (configf:system ht cmd) - (system cmd) - ) - -(define (configf:process-line l ht allow-system #!key (linenum #f)) - (let loop ((res l)) - (if (string? res) - (let ((matchdat (string-search configf:var-expand-regex res))) - (if matchdat - (let* ((prestr (list-ref matchdat 1)) - (cmdtype (list-ref matchdat 2)) ;; eval, system, shell, getenv - (cmd (list-ref matchdat 3)) - (poststr (list-ref matchdat 4)) - (result #f) - (start-time (current-seconds)) - (cmdsym (string->symbol cmdtype)) - (fullcmd (case cmdsym - ((scheme scm) (conc "(lambda (ht)" cmd ")")) - ((system) (conc "(lambda (ht)(configf:system ht \"" cmd "\"))")) - ((shell sh) (conc "(lambda (ht)(string-translate (shell \"" cmd "\") \"\n\" \" \"))")) - ((realpath rp)(conc "(lambda (ht)(common:nice-path \"" cmd "\"))")) - ((getenv gv) (conc "(lambda (ht)(get-environment-variable \"" cmd "\"))")) - ((mtrah) (conc "(lambda (ht)" - " (let ((extra \"" cmd "\"))" - " (conc (or *toppath* (get-environment-variable \"MT_RUN_AREA_HOME\"))" - " (if (string-null? extra) \"\" \"/\")" - " extra)))")) - ((get g) - (let* ((parts (string-split cmd)) - (sect (car parts)) - (var (cadr parts))) - (conc "(lambda (ht)(config-lookup ht \"" sect "\" \"" var "\"))"))) - ((runconfigs-get rget) (conc "(lambda (ht)(runconfigs-get ht \"" cmd "\"))")) - ;; ((rget) (conc "(lambda (ht)(runconfigs-get ht \"" cmd "\"))")) - (else "(lambda (ht)(print \"ERROR\") \"ERROR\")")))) - ;; (print "fullcmd=" fullcmd) - (handle-exceptions - exn - (begin - (debug:print 0 *default-log-port* "WARNING: failed to process config input \"" l "\"") - (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) - ;; (print "exn=" (condition->list exn)) - (set! result (conc "#{( " cmdtype ") " cmd "}, full expansion: " fullcmd))) - (if (or allow-system - (not (member cmdtype '("system" "shell" "sh")))) - (with-input-from-string fullcmd - (lambda () - (set! result ((eval (read)) ht)))) - (set! result (conc "#{(" cmdtype ") " cmd "}")))) - (case cmdsym - ((system shell scheme) - (let ((delta (- (current-seconds) start-time))) - (if (> delta 2) - (debug:print-info 0 *default-log-port* "for line \"" l "\"\n command: " cmd " took " delta " seconds to run with output:\n " result) - (debug:print-info 9 *default-log-port* "for line \"" l "\"\n command: " cmd " took " delta " seconds to run with output:\n " result))))) - (loop (conc prestr result poststr))) - res)) - res))) - -;; Run a shell command and return the output as a string -(define (shell cmd) - (let* ((output (process:cmd-run->list cmd)) - (res (car output)) - (status (cadr output))) - (if (equal? status 0) - (let ((outres (string-intersperse - res - "\n"))) - (debug:print-info 4 *default-log-port* "shell result:\n" outres) - outres) - (begin - (with-output-to-port (current-error-port) - (lambda () - (print "ERROR: " cmd " returned bad exit code " status))) - "")))) - -;; this was inline but I'm pretty sure that is a hold over from when it was *very* simple ... -;; -(define (configf:read-line p ht allow-processing settings) - (let loop ((inl (read-line p))) - (let ((cont-line (and (string? inl) - (not (string-null? inl)) - (equal? "\\" (string-take-right inl 1))))) - (if cont-line ;; last character is \ - (let ((nextl (read-line p))) - (if (not (eof-object? nextl)) - (loop (string-append (if cont-line - (string-take inl (- (string-length inl) 1)) - inl) - nextl)))) - (let ((res (case allow-processing ;; if (and allow-processing - ;; (not (eq? allow-processing 'return-string))) - ((#t #f) - (configf:process-line inl ht allow-processing)) - ((return-string) - inl) - (else - (configf:process-line inl ht allow-processing))))) - (if (and (string? res) - (not (equal? (hash-table-ref/default settings "trim-trailing-spaces" "no") "no"))) - (string-substitute "\\s+$" "" res) - res)))))) - -(define (configf:cfgdat->env-alist section cfgdat-ht allow-system) - (filter - (lambda (pair) - (let* ((var (car pair)) - (val (cdr pair))) - (cons var - (cond - ((and allow-system (procedure? val)) ;; if we decided to use something other than #t or #f for allow-system ('return-procs or 'return-string) , this may become problematic - (val)) - ((procedure? val) #f) - ((string? val) val) - (else "#f"))))) - (append - (hash-table-ref/default cfgdat-ht "default" '()) - (if (equal? section "default") '() (hash-table-ref/default cfgdat-ht section '()))))) - -(define (calc-allow-system allow-system section sections) - (if sections - (and (or (equal? "default" section) - (member section sections)) - allow-system) ;; account for sections and return allow-system as it might be a symbol such as return-strings - allow-system)) - -;; given a config hash and a section name, apply that section to all matching sections (using wildcard % or regex if /..../) -;; remove the section when done so that there is no downstream clobbering -;; -(define (configf:apply-wildcards ht section-name) - (if (hash-table-exists? ht section-name) - (let* ((vars (hash-table-ref ht section-name)) - (rxstr (if (string-contains section-name "%") - (string-substitute (regexp "%") ".*" section-name) - (string-substitute (regexp "^/(.*)/$") "\\1" section-name))) - (rx (regexp rxstr))) - ;; (print "\nsection-name: " section-name " rxstr: " rxstr) - (for-each - (lambda (section) - (if section - (let ((same-section (string=? section-name section)) - (rx-match (string-match rx section))) - ;; (print "section: " section " vars: " vars " same-section: " same-section " rx-match: " rx-match) - (if (and (not same-section) rx-match) - (for-each - (lambda (bundle) - ;; (print "bundle: " bundle) - (let ((key (car bundle)) - (val (cadr bundle)) - (meta (if (> (length bundle) 2)(caddr bundle) #f))) - (hash-table-set! ht section (config:assoc-safe-add (hash-table-ref ht section) key val metadata: meta)))) - vars))))) - (hash-table-keys ht)))) - ht) - -;; read a config file, returns hash table of alists - -;; read a config file, returns hash table of alists -;; adds to ht if given (must be #f otherwise) -;; allow-system: -;; #f - do not evaluate [system -;; #t - immediately evaluate [system and store result as string -;; 'return-procs -- return a proc taking ht as an argument that may be evaulated at some future time -;; 'return-string -- return a string representing a proc taking ht as an argument that may be evaulated at some future time -;; envion-patt is a regex spec that identifies sections that will be eval'd -;; in the environment on the fly -;; sections: #f => get all, else list of sections to gather -;; post-section-procs alist of section-pattern => proc, where: (proc section-name next-section-name ht curr-path) -;; apply-wildcards: #t/#f - apply vars from targets with % wildcards to all matching sections -;; -(define (read-config path ht allow-system #!key (environ-patt #f) (curr-section #f) - (sections #f) (settings (make-hash-table)) (keep-filenames #f) - (post-section-procs '()) (apply-wildcards #t) ) - (debug:print 9 *default-log-port* "START: " path) -;; (if *configdat* -;; (common:save-pkt `((action . read-config) -;; (f . ,(cond ((string? path) path) -;; ((port? path) "port") -;; (else (conc path)))) -;; (T . configf)) -;; *configdat* #t add-only: #t)) - (if (and (not (port? path)) - (not (common:file-exists? path))) ;; for case where we are handed a port - (begin - (debug:print-info 1 *default-log-port* "read-config - file not found " path " current path: " (current-directory)) - ;; WARNING: This is a risky change but really, we should not return an empty hash table if no file read? - #f) ;; (if (not ht)(make-hash-table) ht)) - (let ((inp (if (string? path) - (open-input-file path) - path)) ;; we can be handed a port - (res (if (not ht)(make-hash-table) ht)) - (metapath (if (or (debug:debug-mode 9) - keep-filenames) - path #f)) - (process-wildcards (lambda (res curr-section-name) - (if (and apply-wildcards - (or (string-contains curr-section-name "%") ;; wildcard - (string-match "/.*/" curr-section-name))) ;; regex - (begin - (configf:apply-wildcards res curr-section-name) - (hash-table-delete! res curr-section-name)))))) ;; NOTE: if the section is a wild card it will be REMOVED from res - (let loop ((inl (configf:read-line inp res (calc-allow-system allow-system curr-section sections) settings)) ;; (read-line inp)) - (curr-section-name (if curr-section curr-section "default")) - (var-flag #f);; turn on for key-var-pr and cont-ln-rx, turn off elsewhere - (lead #f)) - (debug:print-info 8 *default-log-port* "curr-section-name: " curr-section-name " var-flag: " var-flag "\n inl: \"" inl "\"") - (if (eof-object? inl) - (begin - ;; process last section for wildcards - (process-wildcards res curr-section-name) - (if (string? path) ;; we received a path, not a port, thus we are responsible for closing it. - (close-input-port inp)) - (if (list? sections) ;; delete all sections except given when sections is provided - (for-each - (lambda (section) - (if (not (member section sections)) - (hash-table-delete! res section))) ;; we are using "" as a dumping ground and must remove it before returning the ht - (hash-table-keys res))) - (debug:print 9 *default-log-port* "END: " path) - res - ) ;; retval - (regex-case - inl - (configf:comment-rx _ (loop (configf:read-line inp res (calc-allow-system allow-system curr-section-name sections) settings) - curr-section-name #f #f)) - - (configf:blank-l-rx _ (loop (configf:read-line inp res (calc-allow-system allow-system curr-section-name sections) settings) - curr-section-name #f #f)) - (configf:settings ( x setting val ) - (begin - (hash-table-set! settings setting val) - (loop (configf:read-line inp res (calc-allow-system allow-system curr-section-name sections) settings) - curr-section-name #f #f))) - - (configf:include-rx ( x include-file ) - (let* ((curr-conf-dir (pathname-directory path)) - (full-conf (if (and (absolute-pathname? include-file) (file-exists? include-file)) - include-file - (common:nice-path - (conc (if curr-conf-dir - curr-conf-dir - ".") - "/" include-file))))) - (let ((all-matches (sort (handle-exceptions exn (list) (glob full-conf)) string<=?))) - (if (null? all-matches) - (begin - (debug:print '(2 9) #f "INFO: include file(s) matching " include-file " not found (called from " path ")") - (debug:print 2 *default-log-port* " " full-conf)) - (for-each - (lambda (fpath) - ;; (push-directory conf-dir) - (debug:print 9 *default-log-port* "Including: " full-conf) - (read-config fpath res allow-system environ-patt: environ-patt - curr-section: curr-section-name sections: sections settings: settings - keep-filenames: keep-filenames)) - all-matches)) - (loop (configf:read-line inp res (calc-allow-system allow-system curr-section-name sections) settings) - curr-section-name #f #f)))) - (configf:script-rx ( x include-script params);; handle-exceptions - ;; exn - ;; (begin - ;; (debug:print '(0 2 9) #f "INFO: include from script " include-script " failed.") - ;; (loop (configf:read-line inp res (calc-allow-system allow-system curr-section-name sections) settings) curr-section-name #f #f)) - (if (and (common:file-exists? include-script)(file-execute-access? include-script)) - (let* ((local-allow-system (calc-allow-system allow-system curr-section-name sections)) - (env-delta (configf:cfgdat->env-alist curr-section-name res local-allow-system)) - (new-inp-port - (common:with-env-vars - env-delta - (lambda () - (open-input-pipe (conc include-script " " params)))))) - (debug:print '(2 9) *default-log-port* "Including from script output: " include-script) - ;; (print "We got here, calling read-config next. Port is: " new-inp-port) - (read-config new-inp-port res allow-system environ-patt: environ-patt curr-section: curr-section-name sections: sections settings: settings keep-filenames: keep-filenames) - (close-input-port new-inp-port) - (loop (configf:read-line inp res (calc-allow-system allow-system curr-section-name sections) settings) curr-section-name #f #f)) - (begin - (debug:print 0 *default-log-port* "Script not found or not exectutable: " include-script) - (loop (configf:read-line inp res (calc-allow-system allow-system curr-section-name sections) settings) curr-section-name #f #f))) - ) ;; ) - (configf:section-rx ( x section-name ) - (begin - ;; call post-section-procs - (for-each - (lambda (dat) - (let ((patt (car dat)) - (proc (cdr dat))) - (if (string-match patt curr-section-name) - (proc curr-section-name section-name res path)))) - post-section-procs) - ;; after gathering the vars for a section and if apply-wildcards is true and if there is a wildcard in the section name process wildcards - ;; NOTE: we are processing the curr-section-name, NOT section-name. - (process-wildcards res curr-section-name) - (if (not (hash-table-ref/default res section-name #f))(hash-table-set! res section-name '())) ;; ensure that mere mention of a section is not lost - (loop (configf:read-line inp res (calc-allow-system allow-system curr-section-name sections) settings) - ;; if we have the sections list then force all settings into "" and delete it later? - ;; (if (or (not sections) - ;; (member section-name sections)) - ;; section-name "") ;; stick everything into "". NOPE: We need new strategy. Put stuff in correct sections and then delete all sections later. - section-name - #f #f))) - (configf:key-sys-pr ( x key cmd ) - (if (calc-allow-system allow-system curr-section-name sections) - (let ((alist (hash-table-ref/default res curr-section-name '())) - (val-proc (lambda () - (let* ((start-time (current-seconds)) - (local-allow-system (calc-allow-system allow-system curr-section-name sections)) - (env-delta (configf:cfgdat->env-alist curr-section-name res local-allow-system)) - (cmdres (process:cmd-run->list cmd delta-env-alist-or-hash-table: env-delta)) ;; BB: here is where [system is exec'd. needs to have env from other vars! - (delta (- (current-seconds) start-time)) - (status (cadr cmdres)) - (res (car cmdres))) - (debug:print-info 4 *default-log-port* "" inl "\n => " (string-intersperse res "\n")) - (if (not (eq? status 0)) - (begin - (debug:print-error 0 *default-log-port* "problem with " inl ", return code " status - " output: " cmdres))) - (if (> delta 2) - (debug:print-info 0 *default-log-port* "for line \"" inl "\"\n command: " cmd " took " delta " seconds to run with output:\n " res) - (debug:print-info 9 *default-log-port* "for line \"" inl "\"\n command: " cmd " took " delta " seconds to run with output:\n " res)) - (if (null? res) - "" - (string-intersperse res " ")))))) - (hash-table-set! res curr-section-name - (config:assoc-safe-add alist - key - (case (calc-allow-system allow-system curr-section-name sections) - ((return-procs) val-proc) - ((return-string) cmd) - (else (val-proc))) - metadata: metapath)) - (loop (configf:read-line inp res (calc-allow-system allow-system curr-section-name sections) settings) curr-section-name #f #f)) - (loop (configf:read-line inp res - (calc-allow-system allow-system curr-section-name sections) - settings) - curr-section-name #f #f))) - - (configf:key-no-val ( x key val) - (let* ((alist (hash-table-ref/default res curr-section-name '())) - (fval (or (if (string? val) val #f) ""))) ;; fval should be either "" or " " (one or more spaces) - (debug:print 10 *default-log-port* " setting: [" curr-section-name "] " key " = #t") - (safe-setenv key fval) - (hash-table-set! res curr-section-name - (config:assoc-safe-add alist key fval metadata: metapath)) - (loop (configf:read-line inp res - (calc-allow-system allow-system curr-section-name sections) - settings) - curr-section-name key #f))) - - (configf:key-val-pr ( x key unk1 val unk2 ) - (let* ((alist (hash-table-ref/default res curr-section-name '())) - (envar (and environ-patt - (string-search (regexp environ-patt) curr-section-name) ;; does the section match the envionpatt? - (and (not (string-null? key)) - (not (equal? "!" (substring key 0 1)))) ;; ! as leading character is a signature to NOT export to the environment - ;; (string-match "^.*:.*:.*$" key) ;; ;; something:something:something reserved for triggers in runconfigs - )) - (realval (if envar - (config:eval-string-in-environment val) - val))) - (debug:print-info 6 *default-log-port* "read-config env setting, envar: " envar " realval: " realval " val: " val " key: " key " curr-section-name: " curr-section-name) - (if envar (safe-setenv key realval)) - (debug:print 10 *default-log-port* " setting: [" curr-section-name "] " key " = " val) - (hash-table-set! res curr-section-name - (config:assoc-safe-add alist key realval metadata: metapath)) - (loop (configf:read-line inp res - (calc-allow-system allow-system curr-section-name sections) settings) - curr-section-name key #f))) - ;; if a continued line - (configf:cont-ln-rx ( x whsp val ) - (let ((alist (hash-table-ref/default res curr-section-name '()))) - (if var-flag ;; if set to a string then we have a continued var - (let ((newval (conc - (config-lookup res curr-section-name var-flag) "\n" - ;; trim lead from the incoming whsp to support some indenting. - (if lead - (string-substitute (regexp lead) "" whsp) - "") - val))) - ;; (print "val: " val "\nnewval: \"" newval "\"\nvarflag: " var-flag) - (hash-table-set! res curr-section-name - (config:assoc-safe-add alist var-flag newval metadata: metapath)) - (loop (configf:read-line inp res (calc-allow-system allow-system curr-section-name sections) settings) curr-section-name var-flag (if lead lead whsp))) - (loop (configf:read-line inp res (calc-allow-system allow-system curr-section-name sections) settings) curr-section-name #f #f)))) - (else (debug:print-error 0 *default-log-port* "problem parsing " path ",\n \"" inl "\"") - (set! var-flag #f) - (loop (configf:read-line inp res (calc-allow-system allow-system curr-section-name sections) settings) curr-section-name #f #f)))) - ) ;; end loop - ))) - -;; pathenvvar will set the named var to the path of the config -(define (find-and-read-config fname #!key (environ-patt #f)(given-toppath #f)(pathenvvar #f)) - (let* ((curr-dir (current-directory)) - (configinfo (find-config fname toppath: given-toppath)) - (toppath (car configinfo)) - (configfile (cadr configinfo)) - (set-fields (lambda (curr-section next-section ht path) - (let ((field-names (if ht (common:get-fields ht) '())) - (target (or (getenv "MT_TARGET")(args:get-arg "-reqtarg")(args:get-arg "-target")))) - (debug:print-info 9 *default-log-port* "set-fields with field-names=" field-names " target=" target " curr-section=" curr-section " next-section=" next-section " path=" path " ht=" ht) - (if (not (null? field-names))(keys:target-set-args field-names target #f)))))) - (if toppath (change-directory toppath)) - (if (and toppath pathenvvar)(setenv pathenvvar toppath)) - (let ((configdat (if configfile - (read-config configfile #f #t environ-patt: environ-patt post-section-procs: (list (cons "^fields$" set-fields)) #f)))) - (if toppath (change-directory curr-dir)) - (list configdat toppath configfile fname)))) - -(define (config-lookup cfgdat section var) - (if (hash-table? cfgdat) - (let ((sectdat (hash-table-ref/default cfgdat section '()))) - (if (null? sectdat) - #f - (let ((match (assoc var sectdat))) - (if match ;; (and match (list? match)(> (length match) 1)) - (cadr match) - #f)) - )) - #f)) - -;; use to have definitive setting: -;; [foo] -;; var yes -;; -;; (configf:var-is? cfgdat "foo" "var" "yes") => #t -;; -(define (configf:var-is? cfgdat section var expected-val) - (equal? (configf:lookup cfgdat section var) expected-val)) - -(define configf:lookup config-lookup) -(define configf:read-file read-config) - -(define (configf:section-vars cfgdat section) - (let ((sectdat (hash-table-ref/default cfgdat section '()))) - (if (null? sectdat) - '() - (map car sectdat)))) - -(define (configf:set-section-var cfgdat section var val) - (let ((sectdat (configf:get-section cfgdat section))) - (hash-table-set! cfgdat section - (config:assoc-safe-add sectdat var val)))) - - ;;(append (filter (lambda (x)(not (assoc var sectdat))) sectdat) - ;; (list var val)))) - -(define (setup) - (let* ((configf (find-config "megatest.config")) - (config (if configf (read-config configf #f #t) #f))) - (if config - (setenv "RUN_AREA_HOME" (pathname-directory configf))) - config)) - -;;====================================================================== -;; Non destructive writing of config file -;;====================================================================== - -(define (configf:compress-multi-lines fdat) - ;; step 1.5 - compress any continued lines - (if (null? fdat) fdat - (let loop ((hed (car fdat)) - (tal (cdr fdat)) - (cur "") - (led #f) - (res '())) - ;; ALL WHITESPACE LEADING LINES ARE TACKED ON!! - ;; 1. remove led whitespace - ;; 2. tack on to hed with "\n" - (let ((match (string-match configf:cont-ln-rx hed))) - (if match ;; blast! have to deal with a multiline - (let* ((lead (cadr match)) - (lval (caddr match)) - (newl (conc cur "\n" lval))) - (if (not led)(set! led lead)) - (if (null? tal) - (set! fdat (append fdat (list newl))) - (loop (car tal)(cdr tal) newl led res))) ;; NB// not tacking newl onto res - (let ((newres (if led - (append res (list cur hed)) - (append res (list hed))))) - ;; prev was a multiline - (if (null? tal) - newres - (loop (car tal)(cdr tal) "" #f newres)))))))) - -;; note: I'm cheating a little here. I merely replace "\n" with "\n " -(define (configf:expand-multi-lines fdat) - ;; step 1.5 - compress any continued lines - (if (null? fdat) fdat - (let loop ((hed (car fdat)) - (tal (cdr fdat)) - (res '())) - (let ((newres (append res (list (string-substitute (regexp "\n") "\n " hed #t))))) - (if (null? tal) - newres - (loop (car tal)(cdr tal) newres)))))) - -(define (configf:file->list fname) - (if (common:file-exists? fname) - (let ((inp (open-input-file fname))) - (let loop ((inl (read-line inp)) - (res '())) - (if (eof-object? inl) - (begin - (close-input-port inp) - (reverse res)) - (loop (read-line inp)(cons inl res))))) - '())) - -;;====================================================================== -;; Write a config -;; 0. Given a refererence data structure "indat" -;; 1. Open the output file and read it into a list -;; 2. Flatten any multiline entries -;; 3. Modify values per contents of "indat" and remove absent values -;; 4. Append new values to the section (immediately after last legit entry) -;; 5. Write out the new list -;;====================================================================== - -(define (configf:write-config indat fname #!key (required-sections '())) - (let* (;; step 1: Open the output file and read it into a list - (fdat (configf:file->list fname)) - (refdat (make-hash-table)) - (sechash (make-hash-table)) ;; current section hash, init with hash for "default" section - (new #f) ;; put the line to be used in new, if it is to be deleted the set new to #f - (secname #f)) - - ;; step 2: Flatten multiline entries - (if (not (null? fdat))(set! fdat (configf:compress-multi-line fdat))) - - ;; step 3: Modify values per contents of "indat" and remove absent values - (if (not (null? fdat)) - (let loop ((hed (car fdat)) - (tal (cadr fdat)) - (res '()) - (lnum 0)) - (regex-case - hed - (configf:comment-rx _ (set! res (append res (list hed)))) ;; (loop (read-line inp) curr-section-name #f #f)) - (configf:blank-l-rx _ (set! res (append res (list hed)))) ;; (loop (read-line inp) curr-section-name #f #f)) - (configf:section-rx ( x section-name ) (let ((section-hash (hash-table-ref/default refdat section-name #f))) - (if (not section-hash) - (let ((newhash (make-hash-table))) - (hash-table-set! refhash section-name newhash) - (set! sechash newhash)) - (set! sechash section-hash)) - (set! new hed) ;; will append this at the bottom of the loop - (set! secname section-name) - )) - ;; No need to process key cmd, let it fall though to key val - (configf:key-val-pr ( x key val ) - (let ((newval (config-lookup indat sec key))) - ;; can handle newval == #f here => that means key is removed - (cond - ((equal? newval val) - (set! res (append res (list hed)))) - ((not newval) ;; key has been removed - (set! new #f)) - ((not (equal? newval val)) - (hash-table-set! sechash key newval) - (set! new (conc key " " newval))) - (else - (debug:print-error 0 *default-log-port* "problem parsing line number " lnum "\"" hed "\""))))) - (else - (debug:print-error 0 *default-log-port* "Problem parsing line num " lnum " :\n " hed ))) - (if (not (null? tal)) - (loop (car tal)(cdr tal)(if new (append res (list new)) res)(+ lnum 1))) - ;; drop to here when done processing, res contains modified list of lines - (set! fdat res))) - - ;; step 4: Append new values to the section - (for-each - (lambda (section) - (let ((sdat '()) ;; append needed bits here - (svars (configf:section-vars indat section))) - (for-each - (lambda (var) - (let ((val (config-lookup refdat section var))) - (if (not val) ;; this one is new - (begin - (if (null? sdat)(set! sdat (list (conc "[" section "]")))) - (set! sdat (append sdat (list (conc var " " val)))))))) - svars) - (set! fdat (append fdat sdat)))) - (delete-duplicates (append require-sections (hash-table-keys indat)))) - - ;; step 5: Write out new file - (with-output-to-file fname - (lambda () - (for-each - (lambda (line) - (print line)) - (configf:expand-multi-lines fdat)))))) - -;;====================================================================== -;; refdb -;;====================================================================== - -;; reads a refdb into an assoc array of assoc arrays -;; returns (list dat msg) -(define (configf:read-refdb refdb-path) - (let ((sheets-file (conc refdb-path "/sheet-names.cfg"))) - (if (not (common:file-exists? sheets-file)) - (list #f (conc "ERROR: no refdb found at " refdb-path)) - (if (not (file-read-access? sheets-file)) - (list #f (conc "ERROR: refdb file not readable at " refdb-path)) - (let* ((sheets (with-input-from-file sheets-file - (lambda () - (let loop ((inl (read-line)) - (res '())) - (if (eof-object? inl) - (reverse res) - (loop (read-line)(cons inl res))))))) - (data '())) - (for-each - (lambda (sheet-name) - (let* ((dat-path (conc refdb-path "/" sheet-name ".dat")) - (ref-dat (configf:read-file dat-path #f #t)) - (ref-assoc (map (lambda (key) - (list key (hash-table-ref ref-dat key))) - (hash-table-keys ref-dat)))) - ;; (hash-table->alist ref-dat))) - ;; (set! data (append data (list (list sheet-name ref-assoc)))))) - (set! data (cons (list sheet-name ref-assoc) data)))) - sheets) - (list data "NO ERRORS")))))) - -;; map over all pairs in a three level hierarchial alist and apply a function to the keys/val -;; -(define (configf:map-all-hier-alist data proc #!key (initproc1 #f)(initproc2 #f)(initproc3 #f)) - (for-each - (lambda (sheetname) - (let* ((sheettmp (assoc sheetname data)) - (sheetdat (if sheettmp (cadr sheettmp) '()))) - (if initproc1 (initproc1 sheetname)) - (for-each - (lambda (sectionname) - (let* ((sectiontmp (assoc sectionname sheetdat)) - (sectiondat (if sectiontmp (cadr sectiontmp) '()))) - (if initproc2 (initproc2 sheetname sectionname)) - (for-each - (lambda (varname) - (let* ((valtmp (assoc varname sectiondat)) - (val (if valtmp (cadr valtmp) ""))) - (proc sheetname sectionname varname val))) - (map car sectiondat)))) - (map car sheetdat)))) - (map car data)) - data) - -;;====================================================================== -;; C O N F I G T O / F R O M A L I S T -;;====================================================================== - -(define (configf:config->alist cfgdat) - (hash-table->alist cfgdat)) - -(define (configf:alist->config adat) - (let ((ht (make-hash-table))) - (for-each - (lambda (section) - (hash-table-set! ht (car section)(cdr section))) - adat) - ht)) - -;; if -(define (configf:read-alist fname) - (handle-exceptions - exn - #f - (configf:alist->config - (with-input-from-file fname read)))) - -(define (configf:write-alist cdat fname) - (if (not (common:faux-lock fname)) - (debug:print 0 *default-log-port* "INFO: Could not get lock on " fname)) - (let* ((dat (configf:config->alist cdat)) - (res - (begin - (with-output-to-file fname ;; first write out the file - (lambda () - (pp dat))) - - (if (common:file-exists? fname) ;; now verify it is readable - (if (configf:read-alist fname) - #t ;; data is good. - (begin - (handle-exceptions - exn - #f - (debug:print 0 *default-log-port* "WARNING: content " dat " for cache " fname " is not readable. Deleting generated file.") - (delete-file fname)) - #f)) - #f)))) - (common:faux-unlock fname) - res)) - -;; convert hierarchial list to ini format -;; -(define (configf:config->ini data) - (map - (lambda (section) - (let ((section-name (car section)) - (section-dat (cdr section))) - (print "\n[" section-name "]") - (map (lambda (dat-pair) - (let* ((var (car dat-pair)) - (val (cadr dat-pair)) - (fname (if (> (length dat-pair) 2)(caddr dat-pair) #f))) - (if fname (print "# " var "=>" fname)) - (print var " " val))) - section-dat))) ;; (print "section-dat: " section-dat)) - (hash-table->alist data))) Index: dashboard-context-menu.scm ================================================================== --- dashboard-context-menu.scm +++ dashboard-context-menu.scm @@ -47,314 +47,5 @@ (include "common_records.scm") (include "db_records.scm") (include "run_records.scm") -(define (dboard:launch-testpanel run-id test-id) - (let* (;; (cfg-sh (conc *common:this-exe-dir* "/cfg.sh")) - ;; (cmd (conc - ;; (if (common:file-exists? cfg-sh) - ;; (conc "source "cfg-sh" && ") - ;; "") - ;; *common:this-exe-fullpath* - ;; " -test " run-id "," test-id - ;; " &")) - (cmd (conc *common:this-exe-dir*"/../dashboard " - "-test " run-id "," test-id - " &"))) - (system cmd))) - - -(define (dashboard:run-menu-items run-id test-id target runname test-name testpatt item-test-path test-info) - (list - (iup:menu-item - (conc "Rerun " testpatt) - #:action - (lambda (obj) - ;; (print " run-id: " run-id " test-id: " test-id " target: " target " runname: " runname " test-name: " test-name " testpatt: " testpatt "item-path : " item-path) - (common:run-a-command - (conc "megatest -run -target " target - " -runname " runname - " -testpatt " testpatt - " -preclean -clean-cache") - ))) - (iup:menu-item - "Rerun Complete Run" - #:action - (lambda (obj) - (common:run-a-command - (conc "megatest -set-state-status NOT_STARTED,n/a -run -target " target - " -runname " runname - " -testpatt % " - " -preclean -clean-cache")))) - (iup:menu-item - "Clean Complete Run" - #:action - (lambda (obj) - (common:run-a-command - (conc "megatest -remove-runs -target " target - " -runname " runname - " -testpatt % ")))) - (iup:menu-item - "Kill Complete Run" - #:action - (lambda (obj) - (common:run-a-command - (conc "megatest -set-state-status KILLREQ,n/a -target " target - " -runname " runname - " -testpatt % " - " -state RUNNING,REMOTEHOSTSTART,LAUNCHED,NOT_STARTED")))) - (iup:menu-item - "Delete Run Data" - #:action - (lambda (obj) - (common:run-a-command - (conc "megatest -remove-runs -target " target - " -runname " runname - " -testpatt % " - " -keep-records")))))) - -(define (dashboard:test-menu-items run-id test-id target runname test-name testpatt item-test-path test-info) - (list - (iup:menu-item - (conc "Rerun " item-test-path) - #:action - (lambda (obj) - (common:run-a-command - (conc "megatest -set-state-status NOT_STARTED,n/a -run -target " target - " -runname " runname - " -testpatt " item-test-path - " -preclean -clean-cache")))) - (iup:menu-item - (conc "Kill " item-test-path) - #:action - (lambda (obj) - ;; (rmt:test-set-state-status-by-id run-id test-id "KILLREQ" #f #f) - (common:run-a-command - (conc "megatest -set-state-status KILLREQ,n/a -target " target - " -runname " runname - " -testpatt " item-test-path - " -state RUNNING,REMOTEHOSTSTART,LAUNCHED")))) - (iup:menu-item - (conc "Delete data : " item-test-path) - #:action - (lambda (obj) - (common:run-a-command - (conc "megatest -remove-runs -target " target - " -runname " runname - " -testpatt " item-test-path - " -keep-records")))) - (iup:menu-item - (conc "Clean "item-test-path) - #:action - (lambda (obj) - (common:run-a-command - (conc "megatest -remove-runs -target " target - " -runname " runname - " -testpatt " item-test-path)))) - (iup:menu-item - "Start xterm" - #:action - (lambda (obj) - (dcommon:examine-xterm run-id test-id))) - ;;(let* ((cmd (conc (car (argv)) " -xterm " run-id "," test-id "&"))) - ;; (system cmd)))) - (iup:menu-item - "Edit testconfig" - #:action - (lambda (obj) - (let* ((all-tests (tests:get-all)) - (editor-rx (or (configf:lookup *configdat* "setup" "editor-regex") - "\\b(vim?|nano|pico)\\b")) - (editor (or (configf:lookup *configdat* "setup" "editor") - (get-environment-variable "VISUAL") - (get-environment-variable "EDITOR") "vi")) - (tconfig (conc (hash-table-ref all-tests test-name) "/testconfig")) - (cmd (conc (if (string-search editor-rx editor) - (conc "xterm -e " editor) - editor) - " " tconfig " &"))) - (system cmd)))))) - -(define (dashboard:step-logs-menu-item run-id test-id target runname test-name testpatt item-test-path test-info) - (let* ((steps (tests:get-compressed-steps run-id test-id)) ;; # - (rundir (db:test-get-rundir test-info))) - - (iup:menu-item - "Step logs" - (apply iup:menu - (map (lambda (step) - (let ((stepname (vector-ref step 0)) - (logfile (vector-ref step 5)) - (status (vector-ref step 3))) - (iup:menu-item - (conc stepname "/" (if (string=? logfile "") "no log!" logfile) " (" status ")") - #:action (lambda (obj) - (let ((fullfile (conc rundir "/" logfile))) - (if (common:file-exists? fullfile) - (dcommon:run-html-viewer fullfile) - (message-window (conc "file " fullfile " not found")))))))) - steps))))) - -(define (dashboard:toplevel-menu-items run-id test-id target runname test-name testpatt item-test-path test-info) - (list - - (iup:menu-item - "Test Control Panel" - #:action - (lambda (obj) - (dboard:launch-testpanel run-id test-id))) - - (dashboard:step-logs-menu-item run-id test-id target runname test-name testpatt item-test-path test-info) - - (iup:menu-item - (conc "Rerun " item-test-path) - #:action - (lambda (obj) - (common:run-a-command - (conc "megatest -set-state-status NOT_STARTED,n/a -run -target " target - " -runname " runname - " -testpatt " item-test-path - " -preclean -clean-cache")))) - - (iup:menu-item - "Start xterm" - #:action - (lambda (obj) - (dcommon:examine-xterm run-id test-id))) - - (iup:menu-item - (conc "Kill " item-test-path) - #:action - (lambda (obj) - ;; (rmt:test-set-state-status-by-id run-id test-id "KILLREQ" #f #f) - (common:run-a-command - (conc "megatest -set-state-status KILLREQ,n/a -target " target - " -runname " runname - " -testpatt " item-test-path - " -state RUNNING,REMOTEHOSTSTART,LAUNCHED,NOT_STARTED")))) - - (let* ((rundir (db:test-get-rundir test-info)) - (has-subrun (subrun:subrun-test-initialized? rundir))) - (if has-subrun - (iup:menu-item - "Launch subrun dashboard" - #:action - (lambda (obj) - (subrun:launch-dashboard rundir))) - (iup:vbox))) - - (iup:menu-item - (conc "View Log " item-test-path) - #:action - (lambda (obj) - (let* ((rundir (db:test-get-rundir test-info)) - (logf (db:test-get-final_logf test-info)) - (fullfile (conc rundir "/" logf))) - (if (common:file-exists? fullfile) - (dcommon:run-html-viewer fullfile) - (message-window (conc "file " fullfile " not found."))))) - ) - )) -;; example section for megatest.config: -;; -;; -;; [custom-context-menu-items] -;; # : -;; item1 custom show run-id (%run-id%):echo "%run-id%" -;; item2 custom show test-id (%test-id%):echo "%test-id%" -;; item3 custom show target (%target%):echo "%target%" -;; item4 custom show test-name (%test-name%):echo "%test-name%" -;; item5 custom show test-patt (%test-patt%):echo "%test-patt%" -;; item6 custom show test-run-dir (%test-run-dir%):echo "%test-run-dir%" -;; item7 custom show run-area-home (%run-area-home%):echo "%run-area-home%" -;; item8 custom show megatest root (%mt-root%):echo "%mt-root%" -;; item9 custom ls : ls -lrt -;; item10 custom see $MT_RUN_AREA_HOME (not yet implemented) : echo $MT_RUN_AREA_HOME - -(define (dashboard:custom-menu-items run-id test-id target run-name test-name testpatt item-test-path test-info) - (let* ((vars (configf:section-vars *configdat* "custom-context-menu-items")) - (item-path (db:test-get-item-path test-info)) - (mt-root (pathname-directory (pathname-directory *common:this-exe-dir* )))) - (filter-map - (lambda (var) - (let* ((val (configf:lookup *configdat* "custom-context-menu-items" var)) - (m (string-match "^\\s*([^:]+?)\\s*:\\s*(.*?)\\s*$" val))) - (if m - (let* ((menu-item-text-raw (list-ref m 1)) - (command-line-raw (list-ref m 2)) - (subst-alist ;; template vars - `(( "%run-id%" . ,run-id ) - ( "%test-id%" . ,test-id ) - ( "%target%" . ,target ) - ( "%test-name%" . ,test-name) - ( "%test-patt%" . ,testpatt) - ( "%test-run-dir%" . ,(db:test-get-rundir test-info)) - ( "%mt-root%" . ,mt-root) - ( "%run-name%" . ,run-name) - ( "%run-area-home%" . ,*toppath*) - ( "%item-path%" . ,item-path) - ( "%item-test-patt%" . ,item-test-path ))) - (command-line ;; replace template vars - (foldr - (lambda (x i) - (string-substitute - (car x) - (->string (cdr x)) - i - #t)) - command-line-raw - subst-alist)) - (menu-item-text ;; replace template vars - (foldr - (lambda (x i) - (string-substitute - (car x) - (->string (cdr x)) - i - #t)) - menu-item-text-raw - subst-alist))) - (iup:menu-item - (conc "*"menu-item-text) - #:action - (lambda (obj) - - (let* ((scheme-match (string-match "^#(\\(.*)" command-line))) - ;;(BB> "cmdline is >"command-line"<") - (common:with-env-vars - ;; TODO: with-env-vars - ;; TODO: with-env-vars MT_* - (runs:get-mt-env-alist run-id run-name target test-name item-path) - - (lambda () - (if scheme-match - (begin - (handle-exceptions - exn - (print "error with custom menu scheme") - (begin - ;;(BB> "gonna eval it!") - (eval (with-input-from-string (cadr scheme-match) read))))) - (common:run-a-command command-line with-vars: #t)))))))) - #f))) - vars))) - -(define (dashboard:context-menu run-id test-id target runname test-name testpatt item-test-path test-info) - (let* ((run-menu-items - (dashboard:run-menu-items run-id test-id target runname test-name testpatt item-test-path test-info)) - (test-menu-items - (dashboard:test-menu-items run-id test-id target runname test-name testpatt item-test-path test-info)) - (custom-menu-items - (dashboard:custom-menu-items run-id test-id target runname test-name testpatt item-test-path test-info)) - (toplevel-menu-items - (dashboard:toplevel-menu-items run-id test-id target runname test-name testpatt item-test-path test-info)) - ) - (apply iup:menu - `(,@toplevel-menu-items - ,(iup:menu-item - "Run" - (apply iup:menu run-menu-items)) - ,(iup:menu-item - "Test" - (apply iup:menu test-menu-items)) - ,@custom-menu-items)))) Index: dashboard-guimonitor.scm ================================================================== --- dashboard-guimonitor.scm +++ dashboard-guimonitor.scm @@ -43,162 +43,5 @@ (include "common_records.scm") (include "db_records.scm") (include "run_records.scm") (include "task_records.scm") -(define (control-panel db tdb keys) - (let* ((var-params (make-hash-table)) ;; register all the widgets here for querying on run, rollup, remove? - (key-params (make-hash-table)) - (monitordat '()) ;; list of monitor records - (keyentries (iup:frame - #:title "Keys" - (apply - iup:vbox - (map (lambda (key) - (iup:hbox (iup:label (vector-ref key 0) #:size "60x15") ; #:expand "HORIZONTAL") - (iup:textbox #:expand "HORIZONTAL" - #:action (lambda (obj a val) - (hash-table-set! key-params (vector-ref key 0) val))))) - keys)))) - (othervars (iup:frame - #:title "Run Vars" - (apply - iup:vbox - (map (lambda (var) - (iup:hbox (iup:label var #:size "60x15") - (iup:textbox #:expand "HORIZONTAL" - #:action (lambda (obj a val) - (hash-table-set! var-params var val))))) - (list "runname" "testpatts" "params"))))) - (controls (iup:frame - #:title "Controls" - (iup:hbox - (iup:frame - #:title "Runs" - (iup:hbox - (iup:button "Start" - #:expand "HORIZONTAL" - #:action (lambda (obj) - (tasks:add-from-params tdb "run" keys key-params var-params) - (print "Launch Run"))) - (iup:button "Remove" - #:expand "HORIZONTAL" - #:action (lambda (obj) - (print "Remove Run") - (tasks:add-from-params tdb "remove" keys key-params var-params) - )) - (iup:button "Rollup" - #:expand "HORIZONTAL" - #:action (lambda (obj) - (print "Rollup Run") - (tasks:add-from-params tdb "rollup" keys key-params var-params))))) - (iup:frame - #:title "Misc" - (iup:hbox - (iup:button "Quit" - #:expand "HORIZONTAL" - #:action (lambda (obj) - (sqlite3:finalize! db) - (sqlite3:finalize! tdb) - (exit)))))))) - (monitors (iup:textbox - #:expand "YES" ; HORIZONTAL" - ; #:size "x40" - #:multiline "YES" - #:font "Courier New, -10" - #:value "None...............................................")) - (actions (iup:textbox - #:expand "YES" - #:multiline "YES" - #:font "Courier New, -10" - #:value "None...............................................")) - (lastmodtime 0) - (next-touch 0) ;; the last time the "last_update" field was updated - (refreshdat (lambda () - (let* ((monitordbpath (conc *toppath* "/monitor.db")) - (megatestdbpath (conc *toppath* "/megatest.db")) - (modtime (max (file-modification-time megatestdbpath) - (file-modification-time monitordbpath)))) - ;; do stuff here when the db is updated by some other process - (if (> modtime lastmodtime) - (let ((tlst (tasks:get-tasks tdb '() '())) - (mlst (tasks:get-monitors tdb))) - (set! tasksdat tlst) - (set! monitorsdat mlst) - (iup:attribute-set! monitors "VALUE" (tasks:monitors->text-table mlst)) - (iup:attribute-set! actions "VALUE" (tasks:tasks->text tlst)) - (tasks:process-queue db tdb) - (set! lastmodtime (max (file-modification-time megatestdbpath) - (file-modification-time monitordbpath))) - (tasks:reset-stuck-tasks tdb))) - ;; stuff to do every 10 seconds - (if (> (current-seconds) next-touch) - (begin - ;; (tasks:process-queue db tdb monitordbpath) - (tasks:monitors-update tdb) - (tasks:reset-stuck-tasks tdb) - (set! monitorsdat (tasks:get-monitors tdb)) - (set! next-touch (+ (current-seconds) 10)) - ))))) - (topdialog #f)) - (set! topdialog (iup:dialog - #:close_cb (lambda (a)(exit)) - #:title "Run Controls" - (iup:vbox - (iup:hbox keyentries othervars) - controls - (let ((tabtop (iup:tabs - (iup:vbox - (let* ((tb (iup:textbox #:expand "HORIZONTAL")) - (bt (iup:button "Remove tasks by id" - #:action (lambda (obj) - (let ((val (iup:attribute tb "VALUE"))) - (tasks:remove-queue-entries tdb val))))) - (lb (iup:label "(comma separated)"))) - (iup:hbox bt tb lb)) - actions) - monitors - ))) - (iup:attribute-set! tabtop "TABTITLE0" "Actions") - (iup:attribute-set! tabtop "TABTITLE1" "Monitors") - tabtop) - ))) - ; (iup:frame - ; #:title "Monitors" - ; monitors) - ; (iup:frame - ; #:title "Actions" - ; actions)))) - - (iup:show topdialog) - (iup:callback-set! *tim* "ACTION_CB" - (lambda (x) - (refreshdat) - (if *exit-started* - (set! *exit-started* 'ok)))))) - -(define (main-window setuptab fsltab collateraltab toolstab) - (iup:show - (iup:dialog #:title "FSL Power Window" #:size "290x190" ; #:expand "YES" - (let ((tabtop (iup:tabs setuptab collateraltab fsltab toolstab))) - (iup:attribute-set! tabtop "TABTITLE0" "Setup") - (iup:attribute-set! tabtop "TABTITLE1" "Collateral") - (iup:attribute-set! tabtop "TABTITLE2" "Fossil") - (iup:attribute-set! tabtop "TABTITLE3" "Tools") - tabtop)))) - -;; BUG: Remember to re-instate this!!!! -;; (on-exit (lambda () -;; (let ((tdb (tasks:open-db))) -;; ;; (print "On-exit called") -;; (tasks:remove-monitor-record tdb) -;; (sqlite3:finalize! tdb)))) - -(define (gui-monitor db) - (let ((keys (db:get-keys db)) - (tdb (tasks:open-db))) - (tasks:register-monitor db tdb) ;;; let the other monitors know we are here - (control-panel db tdb keys) - ;(tasks:remove-monitor-record db) - ;(sqlite3:finalize! db) - )) - Index: dashboard-tests.scm ================================================================== --- dashboard-tests.scm +++ dashboard-tests.scm @@ -46,797 +46,5 @@ (include "common_records.scm") (include "db_records.scm") (include "run_records.scm") -;;====================================================================== -;; C O M M O N -;;====================================================================== - -(define *dashboard-comment-share-slot* #f) - -(define (dtests:get-pre-command #!key (default-override #f)) - (let* ((orig-pre-command "export CMD='") - (viewscreen-pre-command "viewscreen ") - (use-viewscreen (configf:lookup *configdat* "dashboard" "use-viewscreen")) - (default-pre-command (if use-viewscreen viewscreen-pre-command orig-pre-command)) - (cfg-ovrd (configf:lookup *configdat* "dashboard" "pre-command"))) - (or cfg-ovrd default-override default-pre-command))) ;; "xterm -geometry 180x20 -e \"")) - - -(define (dtests:get-post-command #!key (default-override #f)) - (let* ((orig-post-command (conc "';xterm -geometry 180x20 -e \"(echo; echo -n START:;date +ww%U.%w-$H:%M:%S;echo;echo $CMD;echo;$CMD)|&" - "tee -a runlog-`date +ww%U.%w-%H:%M`.log;echo Press any key to continue;bash -c 'read -n 1 -s'\" &")) - (viewscreen-post-command "") - (use-viewscreen (configf:lookup *configdat* "dashboard" "use-viewscreen")) - (default-post-command (if use-viewscreen viewscreen-post-command orig-post-command)) - (cfg-ovrd (configf:lookup *configdat* "dashboard" "post-command"))) - (or cfg-ovrd default-override default-post-command))) ;; ";echo Press any key to continue;bash -c 'read -n 1 -s'\" &"))) - -(define (common:run-a-command cmd #!key (with-vars #f) (with-orig-env #f)) - (let* ((pre-cmd (dtests:get-pre-command)) - (post-cmd (dtests:get-post-command)) - (fullcmd (if (or pre-cmd post-cmd) - (conc pre-cmd cmd post-cmd) - (conc "viewscreen " cmd)))) - (debug:print-info 02 *default-log-port* "Running command: " fullcmd) - (cond - (with-vars (common:without-vars fullcmd)) - (with-orig-env (common:with-orig-env fullcmd)) - (else (common:without-vars fullcmd "MT_.*"))))) - -(define (test-info-panel testdat store-label widgets) - (iup:frame - #:title "Test Info" ; #:expand "YES" - (iup:hbox ; #:expand "YES" - (apply iup:vbox ; #:expand "YES" - (append (map (lambda (val) - (iup:label val ; #:expand "HORIZONTAL" - )) - (list "Testname: " - "Item path: " - "Current state: " - "Current status: " - "Test comment: " - "Test id: " - "Test date: ")) - (list (iup:label "" #:expand "VERTICAL")))) - (apply iup:vbox ; #:expand "YES" - (list - (store-label "testname" - (iup:label (db:test-get-testname testdat) #:expand "HORIZONTAL") - (lambda (testdat)(db:test-get-testname testdat))) - (store-label "item-path" - (iup:label (db:test-get-item-path testdat) #:expand "HORIZONTAL") - (lambda (testdat)(db:test-get-item-path testdat))) - (store-label "teststate" - (iup:label (db:test-get-state testdat) #:expand "HORIZONTAL") - (lambda (testdat) - (db:test-get-state testdat))) - (let ((lbl (iup:label (db:test-get-status testdat) #:expand "HORIZONTAL"))) - (hash-table-set! widgets "teststatus" - (lambda (testdat) - (let ((newstatus (db:test-get-status testdat)) - (oldstatus (iup:attribute lbl "TITLE"))) - (if (not (equal? oldstatus newstatus)) - (begin - (iup:attribute-set! lbl "FGCOLOR" (car (gutils:get-color-for-state-status (db:test-get-state testdat) - (db:test-get-status testdat)))) - (iup:attribute-set! lbl "TITLE" (db:test-get-status testdat))))))) - lbl) - (store-label "testcomment" - (iup:label "TestComment " - #:expand "HORIZONTAL") - (lambda (testdat) - (let ((newcomment (db:test-get-comment testdat))) - (if *dashboard-comment-share-slot* - (if (not (equal? (iup:attribute *dashboard-comment-share-slot* "VALUE") - newcomment)) - (iup:attribute-set! *dashboard-comment-share-slot* - "VALUE" - newcomment))) - newcomment))) - (store-label "testid" - (iup:label "TestId " - #:expand "HORIZONTAL") - (lambda (testdat) - (db:test-get-id testdat))) - (store-label "testdate" - (iup:label "TestDate " - #:expand "HORIZONTAL") - (lambda (testdat) - (seconds->work-week/day-time (db:test-get-event_time testdat)))) - ))))) - -;;====================================================================== -;; Test meta panel -;;====================================================================== - -(define (test-meta-panel-get-description testmeta) - (fmt #f (with-width 40 (wrap-lines (db:testmeta-get-description testmeta))))) - -(define (test-meta-panel testmeta store-meta) - (iup:frame - #:title "Test Meta Data" ; #:expand "YES" - (iup:hbox ; #:expand "YES" - (apply iup:vbox ; #:expand "YES" - (append (map (lambda (val) - (iup:label val ; #:expand "HORIZONTAL" - )) - (list "Author: " - "Owner: " - "Reviewed: " - "Tags: " - "Description: ")) - (list (iup:label "" #:expand "VERTICAL")))) - (apply iup:vbox ; #:expand "YES" - (list - (store-meta "author" - (iup:label (db:testmeta-get-author testmeta) #:expand "HORIZONTAL") - (lambda (testmeta)(db:testmeta-get-author testmeta))) - (store-meta "owner" - (iup:label (db:testmeta-get-owner testmeta) #:expand "HORIZONTAL") - (lambda (testmeta)(db:testmeta-get-owner testmeta))) - (store-meta "reviewed" - (iup:label (db:testmeta-get-reviewed testmeta) #:expand "HORIZONTAL") - (lambda (testmeta)(db:testmeta-get-reviewed testmeta))) - (store-meta "tags" - (iup:label (db:testmeta-get-tags testmeta) #:expand "HORIZONTAL") - (lambda (testmeta)(db:testmeta-get-tags testmeta))) - (store-meta "description" - (iup:label (test-meta-panel-get-description testmeta) #:size "x50"); #:expand "HORIZONTAL") - (lambda (testmeta) - (test-meta-panel-get-description testmeta))) - ))))) - - -;;====================================================================== -;; Run info panel -;;====================================================================== -(define (run-info-panel db keydat testdat runname) - (let* ((run-id (db:test-get-run_id testdat)) - (rundat (rmt:get-run-info run-id)) - (header (db:get-header rundat)) - (event_time (db:get-value-by-header (db:get-rows rundat) - (db:get-header rundat) - "event_time"))) - (iup:frame - #:title "Megatest Run Info" ; #:expand "YES" - (iup:hbox ; #:expand "YES" - (apply iup:vbox ; #:expand "YES" - (append (map (lambda (keyval) - (iup:label (conc (car keyval) " "))) - keydat) - (list (iup:label "runname ") - (iup:label "run-id") - (iup:label "run-date")))) - (apply iup:vbox - (append (map (lambda (keyval) - (iup:label (cadr keyval) #:expand "HORIZONTAL")) - keydat) - (list (iup:label runname) - (iup:label (conc run-id)) - (iup:label (seconds->year-work-week/day-time event_time)) - (iup:label "" #:expand "VERTICAL")))))))) - -;;====================================================================== -;; Host info panel -;;====================================================================== -(define (host-info-panel testdat store-label) - (iup:frame - #:title "Remote host and Test Run Info" ; #:expand "YES" - (iup:hbox ; #:expand "YES" - (apply iup:vbox ; #:expand "YES" ;; The heading labels - (append (map (lambda (val) - (iup:label val ; #:expand "HORIZONTAL" - )) - (list "Hostname: " - "Disk free: " - "CPU Load: " - "Run duration: " - "Logfile: " - "Top process id: " - "Uname -a: ")) - (iup:label "" #:expand "VERTICAL"))) - (apply iup:vbox ; #:expand "YES" - (list - ;; NOTE: Yes, the host can change! - (store-label "HostName" - (iup:label ;; (sdb:qry 'getstr - (db:test-get-host testdat) ;; ) - #:expand "HORIZONTAL") - (lambda (testdat)(db:test-get-host testdat))) - (store-label "DiskFree" - (iup:label (conc (db:test-get-diskfree testdat)) #:expand "HORIZONTAL") - (lambda (testdat)(conc (db:test-get-diskfree testdat)))) - (store-label "CPULoad" - (iup:label (conc (db:test-get-cpuload testdat)) #:expand "HORIZONTAL") - (lambda (testdat)(conc (db:test-get-cpuload testdat)))) - (store-label "RunDuration" - (iup:label (conc (seconds->hr-min-sec (db:test-get-run_duration testdat))) #:expand "HORIZONTAL") - (lambda (testdat)(conc (seconds->hr-min-sec (db:test-get-run_duration testdat))))) - (store-label "LogFile" - (iup:label (conc (db:test-get-final_logf testdat)) #:expand "HORIZONTAL") - (lambda (testdat)(conc (db:test-get-final_logf testdat)))) - (store-label "ProcessId" - (iup:label (conc (db:test-get-process_id testdat)) #:expand "HORIZONTAL") - (lambda (testdat)(conc (db:test-get-process_id testdat)))) - (store-label "Uname" - (iup:label " " #:expand "HORIZONTAL") ;; #:wordwrap "YES") - (lambda (testdat) ;; (sdb:qry 'getstr - (db:test-get-uname testdat))) ;; ) - ))))) - -;; if there is a submegatest create a button to launch dashboard in that area -;; -(define (submegatest-panel dbstruct keydat testdat runname testconfig) - (let* ((test-run-dir (db:test-get-rundir testdat)) - (subarea (subrun:get-runarea test-run-dir)) - (area-exists (and subarea (common:file-exists? subarea silent: #t)))) - (if subarea - (iup:frame - #:title "Megatest Run Info" ; #:expand "YES" - (iup:button - "Launch Dashboard" - #:action (lambda (obj) - (subrun:launch-dashboard test-run-dir)))) - (iup:vbox)))) - -;; use a global for setting the buttons colors -;; state status teststeps -(define *state-status* (vector #f #f #f)) -(define (update-state-status-buttons testdat) - (let* ((state (db:test-get-state testdat)) - (status (db:test-get-status testdat)) - (color (car (gutils:get-color-for-state-status state status)))) - ((vector-ref *state-status* 0) state color) - ((vector-ref *state-status* 1) status color))) - -(define *dashboard-test-db* #t) -(define *dashboard-comment-share-slot* #f) - -;;====================================================================== -;; Set fields -;;====================================================================== -(define (set-fields-panel dbstruct run-id test-id testdat #!key (db #f)) - (let ((newcomment #f) - (newstatus #f) - (newstate #f) - (wtxtbox #f)) - (iup:frame - #:title "Set fields" - (iup:vbox - (iup:hbox (iup:label "Comment:") - (let ((txtbox (iup:textbox #:action (lambda (val a b) - ;; (rmt:test-set-state-status-by-id run-id test-id #f #f b) - (rmt:test-set-state-status run-id test-id #f #f b) - ;; IDEA: Just set a variable with the proc to call? - ;; (rmt:test-set-state-status-by-id run-id test-id #f #f b) - (set! newcomment b)) - #:value (db:test-get-comment testdat) - #:expand "HORIZONTAL"))) - (set! wtxtbox txtbox) - txtbox)) - - (apply iup:hbox - (iup:label "STATE:" #:size "30x") - (let* ((btns (map (lambda (state) - (let ((btn (iup:button state - #:expand "HORIZONTAL" #:size "50x" #:font "Courier New, -10" - #:action (lambda (x) - ;; (rmt:test-set-state-status-by-id run-id test-id state #f #f) - (rmt:set-state-status-and-roll-up-items run-id test-id #f state #f #f) ;; test-name passed in as test-id is respected - (db:test-set-state! testdat state))))) - btn)) - (map cadr *common:std-states*)))) ;; (list "COMPLETED" "NOT_STARTED" "RUNNING" "REMOTEHOSTSTART" "LAUNCHED" "KILLED" "KILLREQ")))) - (vector-set! *state-status* 0 - (lambda (state color) - (for-each - (lambda (btn) - (let* ((name (iup:attribute btn "TITLE")) - (newcolor (if (equal? name state) color "192 192 192"))) - (if (not (colors-similar? newcolor (iup:attribute btn "BGCOLOR"))) - (iup:attribute-set! btn "BGCOLOR" newcolor)))) - btns))) - btns)) - (apply iup:hbox - (iup:label "STATUS:" #:size "30x") - (let* ((btns (map (lambda (status) - (let ((btn (iup:button status - #:expand "HORIZONTAL" #:size "50x" #:font "Courier New, -10" - #:action (lambda (x) - (let ((t (iup:attribute x "TITLE"))) - (if (equal? t "WAIVED") - (iup:show (dashboard-tests:waiver run-id testdat - (if wtxtbox (iup:attribute wtxtbox "VALUE") #f) - (lambda (c) - (set! newcomment c) - (if wtxtbox - (begin - (iup:attribute-set! wtxtbox "VALUE" c) - (if (not *dashboard-comment-share-slot*) - (set! *dashboard-comment-share-slot* wtxtbox))) - )))) - (begin - ;; (rmt:test-set-state-status-by-id run-id test-id #f status #f) - (rmt:set-state-status-and-roll-up-items run-id test-id #f #f status #f) ;; test-name passed in as test-id is respected - (db:test-set-status! testdat status)))))))) - btn)) - (map cadr *common:std-statuses*)))) ;; (list "PASS" "WARN" "FAIL" "CHECK" "n/a" "WAIVED" "SKIP")))) - (vector-set! *state-status* 1 - (lambda (status color) - (for-each - (lambda (btn) - (let* ((name (iup:attribute btn "TITLE")) - (newcolor (if (equal? name status) color "192 192 192"))) - (if (not (colors-similar? newcolor (iup:attribute btn "BGCOLOR"))) - (iup:attribute-set! btn "BGCOLOR" newcolor)))) - btns))) - btns)))))) - -(define (dashboard-tests:run-a-step info) - #t) - -;; (define (dashboard-tests:step-run-control testdat stepname testconfig) -;; (let* ((mutex (make-mutex))) -;; (letrec ((dlg -;; (iup:dialog ;; #:close_cb (lambda (a)(exit)) ; #:expand "YES" -;; #:title stepname -;; (iup:vbox ; #:expand "YES" -;; (iup:label (conc "Step: " stepname "\nNB// These buttons only run the test step\nfor the purpose of debugging.\nNot all database updates are done.")) -;; (iup:button "Re-run" -;; #:expand "HORIZONTAL" -;; #:action (lambda (obj) -;; (debug:catch-and-dump (lambda () -;; (thread-start! -;; (make-thread -;; (lambda () -;; (print "BB> started ezsteps:run-from") -;; (debug:catch-and-dump -;; (lambda () -;; (ezsteps:run-from testdat stepname #t)) -;; "dashboard-tests:step-run-control -> ezstep:run-from (1)") -;; (print "BB> done ezsteps:run-from") -;; 'foo) -;; (conc "ezstep run single step " stepname))) -;; ) -;; "step-run-control action"))) -;; (iup:button "Re-run and continue" -;; #:expand "HORIZONTAL" -;; #:action (lambda (obj) -;; (debug:catch-and-dump -;; (lambda () -;; (thread-start! -;; (make-thread (lambda () -;; (ezsteps:run-from testdat stepname #f)) -;; (conc "ezstep run from step " stepname)))) -;; "dashboard-tests:step-run-control -> ezstep:run-from (2)"))) -;; (iup:button "Close" -;; #:action (lambda (obj) -;; (iup:destroy! dlg))) -;; ;; (iup:button "Refresh test data" -;; ;; #:expand "HORIZONTAL" -;; ;; #:action (lambda (obj) -;; ;; (print "Refresh test data " stepname)) -;; )))) -;; dlg))) - -(define (dashboard-tests:waiver run-id testdat ovrdval cmtcmd) - (let* ((wpatt (configf:lookup *configdat* "setup" "waivercommentpatt")) - (wregx (if (string? wpatt)(regexp wpatt) #f)) - (wmesg (iup:label (if wpatt (conc "Comment must match pattern " wpatt) ""))) - (comnt (iup:textbox #:action (lambda (val a b) - (if wpatt - (if (string-match wregx b) - (iup:attribute-set! wmesg "TITLE" (conc "Comment matches " wpatt)) - (iup:attribute-set! wmesg "TITLE" (conc "Comment does not match " wpatt)) - ))) - #:value (if ovrdval ovrdval (db:test-get-comment testdat)) - #:expand "HORIZONTAL")) - (dlog #f)) - (set! dlog (iup:dialog ;; #:close_cb (lambda (a)(exit)) ; #:expand "YES" - #:title "SET WAIVER" - (iup:vbox ; #:expand "YES" - (iup:label (conc "Enter justification for waiving test " - (db:test-get-testname testdat) - (if (equal? (db:test-get-item-path testdat) "") - "" - (conc "/" (db:test-get-item-path testdat))))) - wmesg ;; the informational msg on whether it matches - comnt - (iup:hbox - (iup:button "Apply and Close " - #:expand "HORIZONTAL" - #:action (lambda (obj) - (let ((comment (iup:attribute comnt "VALUE")) - (test-id (db:test-get-id testdat))) - (if (or (not wpatt) - (string-match wregx comment)) - (begin - ;; (rmt:test-set-state-status-by-id run-id test-id #f "WAIVED" comment) - (rmt:test-set-state-status run-id test-id #f "WAIVED" comment) - (db:test-set-status! testdat "WAIVED") - (cmtcmd comment) - (iup:destroy! dlog)))))) - (iup:button "Cancel" - #:expand "HORIZONTAL" - #:action (lambda (obj) - (iup:destroy! dlog))))))) - dlog)) - - -;;====================================================================== -;; -;;====================================================================== -(define (dashboard-tests:examine-test run-id test-id) ;; run-id run-key origtest) - (let* ((db-path (common:get-db-tmp-area *alldat*)) - (dbstruct #f) ;; NOT ACTUALLY USED (db:setup)) - ;; local: #t)) - (testdat (rmt:get-test-info-by-id run-id test-id)) ;; (db:get-test-info-by-id dbstruct run-id test-id)) - (db-mod-time 0) ;; (file-modification-time db-path)) - (last-update 0) ;; (current-seconds)) - (request-update #t)) - (if (not testdat) - (begin - (debug:print 2 *default-log-port* "ERROR: No test data found for test " test-id ", exiting") - (exit 1)) - (let* (;; (run-id (if testdat (db:test-get-run_id testdat) #f)) - (test-registry (tests:get-all)) - (keydat (if testdat (rmt:get-key-val-pairs run-id) #f)) - (rundat (if testdat (rmt:get-run-info run-id) #f)) - (runname (if testdat (db:get-value-by-header (db:get-rows rundat) - (db:get-header rundat) - "runname") #f)) - ;; (tdb (tdb:open-test-db-by-test-id-local dbstruct run-id test-id)) - ;; These next two are intentional bad values to ensure errors if they should not - ;; get filled in properly. - (logfile "/this/dir/better/not/exist") - (rundir (if testdat - (db:test-get-rundir testdat) - logfile)) - ;; (testdat-path (conc rundir "/testdat.db")) ;; this gets recalculated until found - (augment-teststeps (lambda (inlov) - (map - (lambda (invec) - (list->vector - `( - ,@(reverse (cdr (reverse (vector->list invec)))) - "rerun this step" "restart from here" ))) - inlov))) - (teststeps (if testdat (augment-teststeps (tests:get-compressed-steps run-id test-id)) '())) - (testfullname (if testdat (db:test-get-fullname testdat) "Gathering data ...")) - (testname (if testdat (db:test-get-testname testdat) "n/a")) - ;; (tests:get-testconfig testdat testname 'return-procs)) - (testmeta (if testdat - (let ((tm (rmt:testmeta-get-record testname))) - (if tm tm (make-db:testmeta))) - (make-db:testmeta))) - - (keystring (string-intersperse - (map (lambda (keyval) - ;; (conc ":" (car keyval) " " (cadr keyval))) - (cadr keyval)) - keydat) - "/")) - (item-path (db:test-get-item-path testdat)) - ;; this next block was added to fix a bug where variables were - ;; needed. Revisit this. - (runconfig (let ((runconfigf (conc *toppath* "/runconfigs.config"))) ;; no rush but it would be good to convert this call to use runconfig:read - (if (common:file-exists? runconfigf) - (handle-exceptions - exn - #f ;; do nothing, just keep on trucking .... - (setup-env-defaults runconfigf run-id (make-hash-table) keydat environ-patt: keystring)) - (make-hash-table)))) - (testconfig (begin - ;; (runs:set-megatest-env-vars run-id inrunname: runname testname: test-name itempath: item-path) - (runs:set-megatest-env-vars run-id inkeyvals: keydat inrunname: runname intarget: keystring testname: testname itempath: item-path) ;; these may be needed by the launching process - (handle-exceptions - exn ;; NOTE: I've no idea why this was written this way. Research, study and fix needed! - (tests:get-testconfig (db:test-get-testname testdat) (db:test-get-item-path testdat) test-registry #f allow-write-cache: #f) - (tests:get-testconfig (db:test-get-testname testdat) item-path test-registry #t allow-write-cache: #f)))) - (viewlog (lambda (x) - (if (common:file-exists? logfile) - ;(system (conc "firefox " logfile "&")) - (dcommon:run-html-viewer logfile) - (message-window (conc "File " logfile " not found"))))) - (view-a-log (lambda (lfile) - (let ((lfilename (conc rundir "/" lfile))) - ;; (print "lfilename: " lfilename) - (if (common:file-exists? lfilename) - ;(system (conc "firefox " logfile "&")) - (dcommon:run-html-viewer lfilename) - (message-window (conc "File " lfilename " not found")))))) - (xterm (lambda (x) - (if (directory-exists? rundir) - (let ((shell (if (get-environment-variable "SHELL") - (conc "-e " (get-environment-variable "SHELL")) - ""))) - (common:without-vars - (conc "cd " rundir - ";mt_xterm -T \"" (string-translate testfullname "()" " ") "\" " shell "&") - "MT_.*")) - (message-window (conc "Directory " rundir " not found"))))) - (widgets (make-hash-table)) - (refreshdat (lambda () - (let* ((curr-mod-time (file-modification-time db-path)) - ;; (max ..... (if (common:file-exists? testdat-path) - ;; (file-modification-time testdat-path) - ;; (begin - ;; (set! testdat-path (conc rundir "/testdat.db")) - ;; 0)))) - (need-update (or (and (>= curr-mod-time db-mod-time) - (> (current-milliseconds)(+ last-update 250))) ;; every half seconds if db touched - (> (current-milliseconds)(+ last-update 10000)) ;; force update even 10 seconds - request-update)) - (newtestdat (if need-update - ;; NOTE: BUG HIDER, try to eliminate this exception handler - (handle-exceptions - exn - (debug:print-info 0 *default-log-port* "test db access issue in examine test for run-id " run-id ", test-id " test-id ": " ((condition-property-accessor 'exn 'message) exn)) - (rmt:get-test-info-by-id run-id test-id ))))) - ;; (print "INFO: need-update= " need-update " curr-mod-time = " curr-mod-time) - (cond - ((and need-update newtestdat) - (set! testdat newtestdat) - (set! teststeps (augment-teststeps (tests:get-compressed-steps run-id test-id))) - (set! logfile (conc (db:test-get-rundir testdat) "/" (db:test-get-final_logf testdat))) - (set! rundir ;; (filedb:get-path *fdb* - (db:test-get-rundir testdat)) ;; ) - (set! testfullname (db:test-get-fullname testdat)) - ;; (debug:print 0 *default-log-port* "INFO: teststeps=" (intersperse teststeps "\n ")) - - ;; I don't see why this was implemented this way. Please comment it ... - ;; (if (eq? curr-mod-time db-mod-time) ;; do only once if same - ;; (set! db-mod-time (+ curr-mod-time 1)) - ;; (set! db-mod-time curr-mod-time)) - - (if (not (eq? curr-mod-time db-mod-time)) - (set! db-mod-time curr-mod-time)) - (set! last-update (current-milliseconds)) - (set! request-update #f) ;; met the need ... - ) - (need-update ;; if this was true and yet there is no data .... - (db:test-set-testname! testdat "DEAD OR DELETED TEST"))) - (if need-update - (begin - ;; update the gui elements here - (for-each - (lambda (key) - ;; (print "Updating " key) - ((hash-table-ref widgets key) testdat)) - (hash-table-keys widgets)) - (update-state-status-buttons testdat))) - ;; (iup:refresh self) - ))) - (meta-widgets (make-hash-table)) - (self #f) - (store-label (lambda (name lbl cmd) - (hash-table-set! widgets name - (lambda (testdat) - (let ((newval (cmd testdat)) - (oldval (iup:attribute lbl "TITLE"))) - (if (not (equal? newval oldval)) - (begin - ;(mutex-lock! mx1) - (iup:attribute-set! lbl "TITLE" newval) - ;(mutex-unlock! mx1) - ))))) - lbl)) - (store-meta (lambda (name lbl cmd) - (hash-table-set! meta-widgets name - (lambda (testmeta) - (let ((newval (cmd testmeta)) - (oldval (iup:attribute lbl "TITLE"))) - (if (not (equal? newval oldval)) - (begin - ;(mutex-lock! mx1) - (iup:attribute-set! lbl "TITLE" newval) - ;(mutex-unlock! mx1) - ))))) - lbl)) - (store-button store-label) - (command-proc (lambda (command-text-box) - (let* ((cmd (iup:attribute command-text-box "VALUE"))) - (common:run-a-command cmd with-orig-env: #t)))) - (command-text-box (iup:textbox - #:expand "HORIZONTAL" - #:font "Courier New, -10" - #:action (lambda (obj cnum val) - ;; (print "cnum=" cnum) - (if (eq? cnum 13) - (command-prox obj))) - )) - (command-launch-button (iup:button "Execute!" #:action (lambda (x) - (command-proc command-text-box)))) - ;; (lambda (x) - ;; (let* ((cmd (iup:attribute command-text-box "VALUE")) - ;; (fullcmd (conc (dtests:get-pre-command) - ;; cmd - ;; (dtests:get-post-command)))) - ;; (debug:print-info 02 *default-log-port* "Running command: " fullcmd) - ;; (common:without-vars fullcmd "MT_.*"))))) - (kill-jobs (lambda (x) - (iup:attribute-set! - command-text-box "VALUE" - (conc "megatest -target " keystring " -runname " runname - " -set-state-status KILLREQ,n/a -testpatt %/% " - " -state RUNNING,REMOTEHOSTSTART,LAUNCHED")))) - (run-test (lambda (x) - (iup:attribute-set! - command-text-box "VALUE" - (conc "megatest -target " keystring " -runname " runname - " -run -testpatt " (conc testname "/" (if (equal? item-path "") - "%" - item-path)) - " -clean-cache" - )))) - (remove-test (lambda (x) - (iup:attribute-set! - command-text-box "VALUE" - (conc "megatest -remove-runs -target " keystring " -runname " runname - " -testpatt " (conc testname "/" (if (equal? item-path "") - "%" - item-path)) - " -v")))) - (clean-run-execute (lambda (x) - (let ((cmd (conc ;; "megatest -remove-runs -target " keystring " -runname " runname - "megatest -set-state-status NOT_STARTED,n/a -target " keystring " -runname " runname - " -testpatt " (conc testname "/" (if (equal? item-path "") - "%" - item-path)) - ";megatest -target " keystring " -runname " runname - " -run -preclean -testpatt " (conc testname "/" (if (equal? item-path "") - "%" - item-path)) - " -clean-cache" - ))) - (thread-start! (make-thread (lambda () - (common:run-a-command cmd)) - "clean-run-execute"))))) - (remove-test (lambda (x) - (iup:attribute-set! - command-text-box "VALUE" - (conc "megatest -remove-runs -target " keystring " -runname " runname - " -testpatt " (conc testname "/" (if (equal? item-path "") - "%" - item-path)) - " -v")))) - (archive-test (lambda (x) - (iup:attribute-set! - command-text-box "VALUE" - (conc "megatest -target " keystring " -runname " runname - " -archive save-remove -testpatt " (conc testname "/" (if (equal? item-path "") - "%" - item-path)) - ))))) - (cond - ((not testdat)(begin (print "ERROR: bad test info for " test-id)(exit 1))) - ((not rundat)(begin (print "ERROR: found test info but there is a problem with the run info for " run-id)(exit 1))) - (else - ;; (test-set-status! db run-id test-name state status itemdat) - (set! self ; - (iup:dialog #:close_cb (lambda (a)(exit)) ; #:expand "YES" - #:title testfullname - (iup:vbox ; #:expand "YES" - ;; The run and test info - (iup:hbox ; #:expand "YES" - (run-info-panel dbstruct keydat testdat runname) - (test-info-panel testdat store-label widgets) - (test-meta-panel testmeta store-meta)) - (iup:hbox - (host-info-panel testdat store-label) - (submegatest-panel dbstruct keydat testdat runname testconfig)) - ;; The controls - (iup:frame #:title "Actions" - (iup:vbox - (iup:hbox - (iup:button "View Log" #:action viewlog #:size "80x") - (iup:button "Start Xterm" #:action xterm #:size "80x") - (iup:button "Run Test" #:action run-test #:size "80x") - (iup:button "Clean Test" #:action remove-test #:size "80x") - (iup:button "CleanRunExecute!" #:action clean-run-execute #:size "80x") - (iup:button "Kill All Jobs" #:action kill-jobs #:size "80x") - (iup:button "Archive Test" #:action archive-test #:size "80x") - (iup:button "Close" #:action (lambda (x)(exit)) #:size "80x")) - (apply - iup:hbox - (list command-text-box command-launch-button)))) - (set-fields-panel dbstruct run-id test-id testdat) - (let ((tabs - (iup:tabs - ;; Replace here with matrix - (let ((steps-matrix (iup:matrix - #:font "Courier New, -8" - #:expand "YES" - #:scrollbar "YES" - #:numcol 9 - #:numlin 100 - #:numcol-visible 9 - #:numlin-visible 5 - #:click-cb (lambda (obj lin col status) - ;; (if (equal? col 6) - (let* ((mtrx-rc (conc lin ":" 6)) - (fname (iup:attribute obj mtrx-rc)) - (stepname (iup:attribute obj (conc lin ":" 1))) (comment (iup:attribute obj (conc lin ":" 7)))) - (case col - - ((7) (print "Comment from step "stepname": "comment)) - ((8) (ezsteps:spawn-run-from testdat stepname #t)) - ((9) (ezsteps:spawn-run-from testdat stepname #f)) - (else (view-a-log fname)))))))) - ;; (let loop ((count 0)) - ;; (iup:attribute-set! steps-matrix "FITTOTEXT" (conc "L" count)) - ;; (if (< count 30) - ;; (loop (+ count 1)))) - (iup:attribute-set! steps-matrix "0:1" "Step Name") - (iup:attribute-set! steps-matrix "0:2" "Start") - (iup:attribute-set! steps-matrix "0:3" "End") - (iup:attribute-set! steps-matrix "WIDTH3" "50") - (iup:attribute-set! steps-matrix "0:4" "Status") - (iup:attribute-set! steps-matrix "WIDTH4" "50") - (iup:attribute-set! steps-matrix "0:5" "Duration") - (iup:attribute-set! steps-matrix "0:6" "Log File") - (iup:attribute-set! steps-matrix "0:7" "Comment") - (iup:attribute-set! steps-matrix "0:8" "rerun only") - (iup:attribute-set! steps-matrix "BGCOLOR0:9" "149 208 252") - (iup:attribute-set! steps-matrix "BGCOLOR0:8" "149 208 252") - (iup:attribute-set! steps-matrix "BGCOLOR0:7" "149 208 252") - (iup:attribute-set! steps-matrix "0:9" "rerun & continue") - (iup:attribute-set! steps-matrix "ALIGNMENT1" "ALEFT") - ;; (iup:attribute-set! steps-matrix "FIXTOTEXT" "C1") - (iup:attribute-set! steps-matrix "RESIZEMATRIX" "YES") - (let ((proc - (lambda (testdat) - (dcommon:populate-steps teststeps steps-matrix run-id test-id)))) - (hash-table-set! widgets "StepsMatrix" proc) - (proc testdat)) - steps-matrix) - ;; populate the Test Data panel - (iup:frame - #:title "Test Data" - (let ((test-data - (iup:textbox ;; #:action (lambda (obj char val) - ;; #f) - #:expand "YES" - #:multiline "YES" - #:font "Courier New, -10" - #:size "100x100"))) - (hash-table-set! widgets "Test Data" - (lambda (testdat) ;; - (let* ((currval (iup:attribute test-data "VALUE")) ;; "TITLE")) - (fmtstr "~10a~10a~10a~10a~7a~7a~6a~7a~a") ;; category,variable,value,expected,tol,units,type,comment - (newval (string-intersperse - (append - (list - (format #f fmtstr "Category" "Variable" "Value" "Expected" "Tol" "Status" "Units" "Type" "Comment") - (format #f fmtstr "========" "========" "=====" "========" "===" "======" "=====" "====" "=======")) - (map (lambda (x) - (format #f fmtstr - (db:test-data-get-category x) - (db:test-data-get-variable x) - (db:test-data-get-value x) - (db:test-data-get-expected x) - (db:test-data-get-tol x) - (db:test-data-get-status x) - (db:test-data-get-units x) - (db:test-data-get-type x) - (db:test-data-get-comment x))) - (rmt:read-test-data run-id test-id "%"))) - "\n"))) - (if (not (equal? currval newval)) - (iup:attribute-set! test-data "VALUE" newval ))))) ;; "TITLE" newval))))) - test-data)) - ;;(dashboard:run-controls) - ))) - (iup:attribute-set! tabs "TABTITLE0" "Steps") - (iup:attribute-set! tabs "TABTITLE1" "Test Data") - tabs)))) - (iup:show self) - (iup:callback-set! *tim* "ACTION_CB" - (lambda (x) - ;; Now start keeping the gui updated from the db - (refreshdat) ;; update from the db here - ;(thread-suspend! other-thread) - (if *exit-started* - (set! *exit-started* 'ok)))))))))) - Index: db.scm ================================================================== --- db.scm +++ db.scm @@ -50,9 +50,6 @@ (define *rundb-mutex* (make-mutex)) ;; prevent problems opening/closing rundb's (define *number-of-writes* 0) (define *number-non-write-queries* 0) - -;; tiresome setup for rmtmod (and other mods) goes here -;; (set-fn 'db:dbfile-path common:get-db-tmp-area) (set-fn 'db:setup dbmod#db:setup) Index: dbmod.scm ================================================================== --- dbmod.scm +++ dbmod.scm @@ -29,11 +29,11 @@ * (import scheme chicken data-structures extras) (import (prefix sqlite3 sqlite3:) posix typed-records srfi-18 srfi-69 format ports srfi-1 matchable stack regex - srfi-13) + srfi-13 stack) (import commonmod) (import configfmod) (import keysmod) (import files) @@ -46,14 +46,10 @@ ;;====================================================================== ;; Some utility stuff moved from common.scm ;;====================================================================== -(define (db:dbdat-get-path dbdat) - (if (pair? dbdat) - (cdr dbdat) - #f)) (define (common:get-area-name alldat #!optional (areapath-in #f)) (let* ((configdat (alldat-mtconfig alldat)) (areapath (or (alldat-areapath alldat) (get-environment-variable "MT_RUN_AREA_HOME") @@ -112,16 +108,10 @@ (set! dbdir dbpath) (alldat-tmppath-set! alldat dbpath) dbpath)) #f)))) -;; ;; legacy handling of structure for managing db's. Refactor this into dbr:? -(define (db:dbdat-get-db dbdat) - (if (pair? dbdat) - (car dbdat) - dbdat)) - ;; Make the dbstruct, setup up auxillary db's and call for main db at least once ;; ;; called in http-transport and replicated in rmt.scm for *local* access. ;; (define (db:setup do-sync alldat #!key (areapath #f)) @@ -137,107 +127,10 @@ (db:open-db alldat areapath: areapath do-sync: do-sync) (debug:print-info 13 log-port "Done db:open-db") ;; (set! *dbstruct-db* dbstruct) alldat)))) -;; This routine creates the db if not already present. It is only called if the db is not already opened -;; -(define (db:open-db alldat #!key (areapath #f)(do-sync #t)) ;; TODO: actually use areapath - (let ((toppath (alldat-areapath alldat)) - (configdat (alldat-mtconfig alldat)) - (log-port (alldat-log-port alldat)) - (tmpdb-stack (alldat-dbstack alldat))) ;; RA => Returns the first reference in alldat - (if (stack? tmpdb-stack) - (db:get-db tmpdb-stack) ;; get previously opened db (will create new db handle if all in the stack are already used - (let* ((max-stale-tmp (configf:lookup-number configdat "server" "filling-db-max-stale-seconds" default: 10)) - (dbpath (common:get-db-tmp-area alldat)) ;; path to tmp db area - (dbexists (file-exists? dbpath)) - (tmpdbfname (conc dbpath "/megatest.db")) - (dbfexists (file-exists? tmpdbfname)) ;; (conc dbpath "/megatest.db"))) - (mtdbexists (file-exists? (conc toppath "/megatest.db"))) - - (mtdbmodtime (if mtdbexists (common:lazy-sqlite-db-modification-time (conc toppath "/megatest.db")) #f)) - (tmpdbmodtime (if dbfexists (common:lazy-sqlite-db-modification-time tmpdbfname) #f)) - (mtdb (db:open-megatest-db)) - (mtdbpath (db:dbdat-get-path mtdb)) - (tmpdb (db:open-megatest-db path: dbpath)) ;; lock-create-open dbpath db:initialize-main-db)) - (refndb (db:open-megatest-db path: dbpath name: "megatest_ref.db")) - (write-access (file-write-access? mtdbpath)) - - ;;(mtdbmodtime (if mtdbexists - ;;(common:lazy-sqlite-db-modification-time mtdbpath) - ;;#f)) ; moving this before db:open-megatest-db is - ;;called. if wal mode is on -WAL and -shm file get - ;;created with causing the tmpdbmodtime timestamp - ;;always greater than mtdbmodtime (tmpdbmodtime (if - ;;dbfexists (common:lazy-sqlite-db-modification-time - ;;tmpdbfname) #f)) - - ;;if wal mode is on -WAL and -shm file get created when - ;;db:open-megatest-db is called. modtimedelta will - ;;always be < 10 so db in tmp not get synced - ;;(tmpdbmodtime (if dbfexists (db:get-last-update-time - ;;(car tmpdb)) #f)) (fmt (file-modification-time - ;;tmpdbfname)) - - (modtimedelta (and mtdbmodtime tmpdbmodtime (- mtdbmodtime tmpdbmodtime)))) - - (handle-exceptions - exn - (let ((call-chain (get-call-chain)) - (msg ((condition-property-accessor 'exn 'message) exn))) - (debug:print 0 log-port "ERROR: attempted to drop triggers on MTRA/megatest.db but failed. Error is " msg) - (set! write-access #f)) ;; if we failed to drop the triggers then we probably don't have write access - (when write-access - (sqlite3:execute (car mtdb) "drop trigger if exists update_tests_trigger") - (sqlite3:execute (car mtdb) "drop trigger if exists update_runs_trigger"))) - - ;;(print "mtdbmodtime " mtdbmodtime " tmpdbmodtime " - ;;tmpdbmodtime " mtdbpath " mtdbpath " " (conc *toppath* - ;;"/megatest.db")) (debug:print-info 13 log-port - ;;"db:open-db>> mtdbpath="mtdbpath" mtdbexists="mtdbexists" - ;;and write-access="write-access) - (if (and dbexists (not write-access)) - (begin - (set! *db-write-access* #f) - (alldat-read-only-set! alldat #t))) - (alldat-mtdb-set! alldat mtdb) - (alldat-tmpdb-set! alldat tmpdb) - (alldat-dbstack-set! alldat (make-stack)) ;; why a stack? - (stack-push! (alldat-dbstack alldat) tmpdb) ;; olddb is already a (cons db path) - (alldat-refndb-set! alldat refndb) - ;; (mutex-unlock! *rundb-mutex*) - (if (and (or (not dbfexists) - (and modtimedelta - (> modtimedelta max-stale-tmp))) ;; if db in tmp is over ten seconds older than the file in MTRA then do a sync back - do-sync) - (begin - (debug:print 1 log-port "filling db " (db:dbdat-get-path tmpdb) " with data \n from " (db:dbdat-get-path mtdb) " mod time delta: " modtimedelta) - (db:sync-tables (db:sync-all-tables-list alldat) #f mtdb refndb tmpdb) - ;touch tmp db to avoid wal mode wierdness - (set! (file-modification-time tmpdbfname) (current-seconds)) - (debug:print-info 13 log-port "db:sync-all-tables-list done.") - ) - (debug:print 4 log-port " db, " (db:dbdat-get-path tmpdb) " already exists or fresh enough, not propogating data from\n " (db:dbdat-get-path mtdb) " mod time delta: " modtimedelta) ) - ;; (db:multi-db-sync alldat 'old2new)) ;; migrate data from megatest.db automatically - tmpdb)))) - -;; Get/open a database -;; if run-id => get run specific db -;; if #f => get main db -;; if db already open - return inmem -;; if db not open, open inmem, rundb and sync then return inmem -;; inuse gets set automatically for rundb's -;; -(define (db:get-db alldat) ;; run-id) - (if (stack? (alldat-dbstack alldat)) - (if (stack-empty? (alldat-dbstack alldat)) - (let ((newdb (db:open-megatest-db path: (common:get-db-tmp-area alldat)))) - ;; (stack-push! (alldat-dbstack alldat) newdb) - newdb) - (stack-pop! (alldat-dbstack alldat))) - (db:open-db alldat))) (define (db:sync-all-tables-list alldat) (append (db:sync-main-list alldat) db:sync-tests-only)) @@ -336,42 +229,10 @@ db "SELECT fieldname FROM keys ORDER BY id DESC;"))) (alldat-db-keys-set! alldat res) res))) -;; (db:with-db alldat run-id sqlite3:exec "select blah fgrom blaz;") -;; r/w is a flag to indicate if the db is modified by this query #t = yes, #f = no -;; -(define (db:with-db alldat run-id r/w proc . params) - (let* ((have-struct (alldat? alldat)) - (dbdat (if have-struct - (db:get-db alldat) - #f)) - (db (if have-struct - (db:dbdat-get-db dbdat) - alldat)) - (use-mutex (> (alldat-api-process-request-count alldat) 25)) - (db-with-db-mutex (alldat-db-with-db-mutex alldat)) - (log-port (alldat-log-port alldat))) - (if (and use-mutex - (common:low-noise-print 120 "over-50-parallel-api-requests")) - (debug:print-info 0 log-port (alldat-api-process-request-count alldat) " parallel api requests being processed in process " (current-process-id) ", throttling access")) - (if (common:low-noise-print 600 (conc "parallel-api-requests" (alldat-max-api-process-requests alldat))) - (debug:print-info 2 log-port "Parallel api request count: " (alldat-api-process-request-count alldat) " max parallel requests: " (alldat-max-api-process-requests alldat))) - (handle-exceptions - exn - (begin - (print-call-chain (current-error-port)) - (debug:print-error 0 log-port "sqlite3 issue in db:with-db, alldat=" alldat ", run-id=" run-id ", proc=" proc ", params=" params " error: " ((condition-property-accessor 'exn 'message) exn)) - ;; there is no recovering at this time. exit - (exit 50)) - (if use-mutex (mutex-lock! db-with-db-mutex)) - (let ((res (apply proc db params))) - (if use-mutex (mutex-unlock! db-with-db-mutex)) - (if dbdat (stack-push! (alldat-dbstack alldat) dbdat)) - res)))) - ;; tbls is ( ("tablename" ( "field1" [#f|proc1] ) ( "field2" [#f|proc2] ) .... ) ) ;; db's are dbdat's ;; ;; if last-update specified ("field-name" . time-in-seconds) ;; then sync only records where field-name >= time-in-seconds @@ -2195,23 +2056,10 @@ (let* ((header (append keys remfields)) (keystr (conc (keys->keystr keys) "," (string-intersperse remfields ",")))) (list keystr header))) -;; make a query (fieldname like 'patt1' OR fieldname -(define (db:patt->like fieldname pattstr #!key (comparator " OR ")) - (let ((patts (if (string? pattstr) - (string-split pattstr ",") - '("%")))) - (string-intersperse (map (lambda (patt) - (let ((wildtype (if (substring-index "%" patt) "LIKE" "GLOB"))) - (conc fieldname " " wildtype " '" patt "'"))) - (if (null? patts) - '("") - patts)) - comparator))) - ;; register a test run with the db, this accesses the main.db and does NOT ;; use server api ;; (define (db:register-run dbstruct keyvals runname state status user contour-in) @@ -2244,53 +2092,10 @@ res))) (begin (debug:print-error 0 *default-log-port* "Called without all necessary keys") #f)))) -;; replace header and keystr with a call to runs:get-std-run-fields -;; -;; keypatts: ( (KEY1 "abc%def")(KEY2 "%") ) -;; runpatts: patt1,patt2 ... -;; -(define (db:get-runs dbstruct runpatt count offset keypatts) - (let* ((res '()) - (keys (db:get-keys dbstruct)) - (runpattstr (db:patt->like "runname" runpatt)) - (remfields (list "id" "runname" "state" "status" "owner" "event_time")) - (header (append keys remfields)) - (keystr (conc (keys->keystr keys) "," - (string-intersperse remfields ","))) - (qrystr (conc "SELECT " keystr " FROM runs WHERE (" runpattstr ") " ;; runname LIKE ? " - ;; Generate: " AND x LIKE 'keypatt' ..." - (if (null? keypatts) "" - (conc " AND " - (string-join - (map (lambda (keypatt) - (let ((key (car keypatt)) - (patt (cadr keypatt))) - (db:patt->like key patt))) - keypatts) - " AND "))) - " AND state != 'deleted' ORDER BY event_time DESC " - (if (number? count) - (conc " LIMIT " count) - "") - (if (number? offset) - (conc " OFFSET " offset) - "")))) - (debug:print-info 11 *default-log-port* "db:get-runs START qrystr: " qrystr " keypatts: " keypatts " offset: " offset " limit: " count) - (db:with-db dbstruct #f #f - (lambda (db) - (sqlite3:for-each-row - (lambda (a . x) - (set! res (cons (apply vector a x) res))) - db - qrystr - ))) - (debug:print-info 11 *default-log-port* "db:get-runs END qrystr: " qrystr " keypatts: " keypatts " offset: " offset " limit: " count) - (vector header res))) - (define-record simple-run target id runname state status owner event_time) (define-record-printer (simple-run x out) (fprintf out "#,(simple-run ~S ~S ~S ~S)" (simple-run-target x) (simple-run-id x) (simple-run-runname x) (time->string (seconds->local-time (simple-run-event_time x) )))) @@ -2802,117 +2607,10 @@ ;;====================================================================== ;; T E S T S ;;====================================================================== -;; states and statuses are lists, turn them into ("PASS","FAIL"...) and use NOT IN -;; i.e. these lists define what to NOT show. -;; states and statuses are required to be lists, empty is ok -;; not-in #t = above behaviour, #f = must match -;; mode: -;; 'dashboard - use state = 'COMPLETED' AND status in ( statuses ) OR state in ( states ) -;; -(define (db:get-tests-for-run dbstruct run-id testpatt states statuses offset limit not-in sort-by sort-order qryvals last-update mode) - (let* ((qryvalstr (case qryvals - ((shortlist) "id,run_id,testname,item_path,state,status") - ((#f) db:test-record-qry-selector) ;; "id,run_id,testname,state,status,event_time,host,cpuload,diskfree,uname,rundir,item_path,run_duration,final_logf,comment") - (else qryvals))) - (res '()) - ;; if states or statuses are null then assume match all when not-in is false - (states-qry (if (null? states) - #f - (conc " state " - (if (eq? mode 'dashboard) - " IN ('" - (if not-in - " NOT IN ('" - " IN ('")) - (string-intersperse states "','") - "')"))) - (statuses-qry (if (null? statuses) - #f - (conc " status " - (if (eq? mode 'dashboard) - " IN ('" - (if not-in - " NOT IN ('" - " IN ('") ) - (string-intersperse statuses "','") - "')"))) - (interim-qry (conc " AND " (if not-in "NOT " "") "( state='COMPLETED' " (if statuses-qry (conc " AND " statuses-qry " ) ") " ) ") - (if states-qry - (conc (if not-in " AND " " OR ") states-qry ) ;; " ) ") - ""))) - (states-statuses-qry - (cond - ((and states-qry statuses-qry) - (case mode - ((dashboard) - (if not-in - (conc " AND (state='COMPLETED' AND status NOT IN ('" (string-intersperse statuses "','") "')) " - " OR (state != 'COMPLETED' AND state NOT IN ('" (string-intersperse states "','") "')) ") - (conc " AND (state='COMPLETED' AND status IN ('" (string-intersperse statuses "','") "')) " - " OR (state NOT IN ('COMPLETED','DELETED') AND state IN ('" (string-intersperse states "','") "')) "))) - (else (conc " AND ( " states-qry " AND " statuses-qry " ) ")))) - (states-qry - (case mode - ((dashboard) (conc " AND " (if not-in "NOT " "") " state IN ('" (string-intersperse states "','") "') ")) ;; interim-qry) - (else (conc " AND " states-qry)))) - (statuses-qry - (case mode - ((dashboard) (conc " AND " (if not-in "NOT " "") " status IN ('" (string-intersperse statuses "','") "') ")) ;; interim-qry) - (else (conc " AND " statuses-qry)))) - (else ""))) - (tests-match-qry (tests:match->sqlqry testpatt)) - (qry (conc "SELECT " qryvalstr - (if run-id - " FROM tests WHERE run_id=? " - " FROM tests WHERE ? > 0 ") ;; should work? - (if last-update " " " AND state != 'DELETED' ") ;; if using last-update we want deleted tests? - states-statuses-qry - (if tests-match-qry (conc " AND (" tests-match-qry ") ") "") - (if last-update (conc " AND last_update >= " last-update " ") "") - (case sort-by - ((rundir) " ORDER BY length(rundir) ") - ((testname) (conc " ORDER BY testname " (if sort-order (conc sort-order ",") "") " item_path ")) - ((statestatus) (conc " ORDER BY state " (if sort-order (conc sort-order ",") "") " status ")) - ((event_time) " ORDER BY event_time ") - (else (if (string? sort-by) - (conc " ORDER BY " sort-by " ") - " "))) - (if sort-order sort-order " ") - (if limit (conc " LIMIT " limit) " ") - (if offset (conc " OFFSET " offset) " ") - ";" - ))) - (debug:print-info 8 *default-log-port* "db:get-tests-for-run run-id=" run-id ", qry=" qry) - (db:with-db dbstruct run-id #f - (lambda (db) - (sqlite3:for-each-row - (lambda (a . b) ;; id run-id testname state status event-time host cpuload diskfree uname rundir item-path run-duration final-logf comment) - (set! res (cons (apply vector a b) res))) ;; id run-id testname state status event-time host cpuload diskfree uname rundir item-path run-duration final-logf comment) res))) - db - qry - (or run-id 1) ;; 1 > 0 , for the case where we are seeking tests matching criteral for all runs - ))) - (case qryvals - ((shortlist)(map db:test-short-record->norm res)) - ((#f) res) - (else res)))) - -(define (db:test-short-record->norm inrec) - ;; "id,run_id,testname,item_path,state,status" - ;; "id,run_id,testname,state,status,event_time,host,cpuload,diskfree,uname,rundir,item_path,run_duration,final_logf,comment - (vector (vector-ref inrec 0) ;; id - (vector-ref inrec 1) ;; run_id - (vector-ref inrec 2) ;; testname - (vector-ref inrec 4) ;; state - (vector-ref inrec 5) ;; status - -1 "" -1 -1 "" "-" - (vector-ref inrec 3) ;; item-path - -1 "-" "-")) - (define (db:get-tests-for-run-state-status dbstruct run-id testpatt) (let* ((res '()) (tests-match-qry (tests:match->sqlqry testpatt)) (qry (conc "SELECT id,testname,item_path,state,status FROM tests WHERE run_id=? " (if tests-match-qry (conc " AND (" tests-match-qry ") ") "")))) @@ -2939,16 +2637,10 @@ db "SELECT run_id,testname,item_path,state,status FROM tests WHERE id=?;" test-id))) res)) -;; get a useful subset of the tests data (used in dashboard -;; use db:mintest-get-{id ,run_id,testname ...} -;; -(define (db:get-tests-for-run-mindata dbstruct run-id testpatt states statuses not-in) - (db:get-tests-for-run dbstruct run-id testpatt states statuses #f #f not-in #f #f "id,run_id,testname,state,status,event_time,item_path" 0 #f)) - ;; do not use. ;; (define (db:get-tests-for-runs dbstruct run-ids testpatt states statuses #!key (not-in #f)(qryvals #f)) ;; (db:delay-if-busy) (let ((res '())) @@ -3198,14 +2890,10 @@ db "SELECT attemptnum FROM tests WHERE id=?;" #f test-id)))) -(define db:test-record-fields '("id" "run_id" "testname" "state" "status" "event_time" - "host" "cpuload" "diskfree" "uname" "rundir" "item_path" - "run_duration" "final_logf" "comment" "shortdir" "attemptnum" "archived" "last_update")) - ;; fields *must* be a non-empty list ;; (define (db:field->number fieldname fields) (if (null? fields) #f @@ -3216,12 +2904,10 @@ indx (if (null? tal) #f (loop (car tal)(cdr tal)(+ indx 1))))))) -(define db:test-record-qry-selector (string-intersperse db:test-record-fields ",")) - ;; NOTE: Use db:test-get* to access records ;; NOTE: This needs rundir decoding? Decide, decode here or where used? For the moment decode where used. (define (db:get-all-tests-info-by-run-id dbstruct run-id) (let* ((res '())) @@ -4798,9 +4484,11 @@ ;; brutal clean up (stack-push! (dbr:dbstruct-dbstack dbstruct) dbdat) (system "rm -rf tempdir"))) ;; (db:extract-ods-file db "outputfile.ods" '(("sysname" "%")("fsname" "%")("datapath" "%")) "%") +;; tiresome setup for rmtmod (and other mods) goes here +;; (set-fn 'db:dbfile-path common:get-db-tmp-area) ) Index: dcommon.scm ================================================================== --- dcommon.scm +++ dcommon.scm @@ -38,1301 +38,5 @@ (include "common_records.scm") (include "db_records.scm") (include "key_records.scm") (include "run_records.scm") -;; yes, this is non-ideal -(define dashboard:update-summary-tab #f) -(define dashboard:update-servers-table #f) - -;;====================================================================== -;; C O M M O N D A T A S T R U C T U R E -;;====================================================================== -;; - -;;====================================================================== -;; D O T F I L E -;;====================================================================== - -(define (dcommon:write-dotfile fname dat) - (with-output-to-file fname - (lambda () - (pp dat)))) - -;;====================================================================== -;; TARGET AND PATTERN MANIPULATIONS -;;====================================================================== - -;; Convert to and from list of lines (for a text box) -;; "," => "\n" -(define (dboard:test-patt->lines test-patt) - (string-substitute (regexp ",") "\n" test-patt)) - -(define (dboard:lines->test-patt lines) - (string-substitute (regexp "\n") "," lines #t)) - - -;;====================================================================== -;; P R O C E S S R U N S -;;====================================================================== - -;; MOVE THIS INTO *data* -(define *cachedata* (make-hash-table)) -(hash-table-set! *cachedata* "runid-to-col" (make-hash-table)) -(hash-table-set! *cachedata* "testname-to-row" (make-hash-table)) - -;; modify a cell if the data is changed, return #t or-ed with previous if modified, #f elsewise -;; -(define (dcommon:modifiy-if-different mtrx cell-name new-val prev-changed) - (let ((curr-val (iup:attribute mtrx cell-name))) - (if (not (equal? curr-val new-val)) - (begin - (iup:attribute-set! mtrx cell-name col-name) - #t) ;; need a re-draw - prev-changed))) - - -;; TO-DO -;; 1. Make "data" hash-table hierarchial store of all displayed data -;; 2. Update synchash to understand "get-runs", "get-tests" etc. -;; 3. Add extraction of filters to synchash calls -;; -;; NOTE: Used in newdashboard -;; -;; Mode is 'full or 'incremental for full refresh or incremental refresh -;; (define (dcommon:run-update keys data runname keypatts testpatt states statuses mode window-id) -;; (let* (;; count and offset => #f so not used -;; ;; the synchash calls modify the "data" hash -;; (changed #f) -;; (get-runs-sig (conc (client:get-signature) " get-runs")) -;; (get-tests-sig (conc (client:get-signature) " get-tests")) -;; (get-details-sig (conc (client:get-signature) " get-test-details")) -;; -;; ;; test-ids to get and display are indexed on window-id in curr-test-ids hash -;; (test-ids (hash-table-values (dboard:tabdat-curr-test-ids data))) -;; ;; run-id is #f in next line to send the query to server 0 -;; (run-changes (synchash:client-get 'db:get-runs get-runs-sig (length keypatts) data #f runname #f #f keypatts)) -;; (tests-detail-changes (if (not (null? test-ids)) -;; (synchash:client-get 'db:get-test-info-by-ids get-details-sig 0 data #f test-ids) -;; '())) -;; -;; ;; Now can calculate the run-ids -;; (run-hash (hash-table-ref/default data get-runs-sig #f)) -;; (run-ids (if run-hash (filter number? (hash-table-keys run-hash)) '())) -;; -;; (all-test-changes (let ((res (make-hash-table))) -;; (for-each (lambda (run-id) -;; (if (> run-id 0) -;; (hash-table-set! res run-id (synchash:client-get 'db:get-tests-for-run-mindata get-tests-sig 0 data run-id 1 testpatt states statuses #f)))) -;; run-ids) -;; res)) -;; (runs-hash (hash-table-ref/default data get-runs-sig #f)) -;; (header (hash-table-ref/default runs-hash "header" #f)) -;; (run-ids (sort (filter number? (hash-table-keys runs-hash)) -;; (lambda (a b) -;; (let* ((record-a (hash-table-ref runs-hash a)) -;; (record-b (hash-table-ref runs-hash b)) -;; (time-a (db:get-value-by-header record-a header "event_time")) -;; (time-b (db:get-value-by-header record-b header "event_time"))) -;; (> time-a time-b))) -;; )) -;; (runid-to-col (hash-table-ref *cachedata* "runid-to-col")) -;; (testname-to-row (hash-table-ref *cachedata* "testname-to-row")) -;; (colnum 1) -;; (rownum 0) -;; (cellname (conc rownum ":" colnum))) ;; rownum = 0 is the header -;; ;; (debug:print 0 *default-log-port* "test-ids " test-ids ", tests-detail-changes " tests-detail-changes) -;; -;; ;; tests related stuff -;; ;; (all-testnames (delete-duplicates (map db:test-get-testname test-changes)))) -;; -;; ;; Given a run-id and testname/item_path calculate a cell R:C -;; -;; ;; NOTE: Also build the test tree browser and look up table -;; ;; -;; ;; Each run is unique on its keys and runname or run-id, store in hash on colnum -;; (for-each (lambda (run-id) -;; (let* ((run-record (hash-table-ref/default runs-hash run-id #f)) -;; (key-vals (map (lambda (key)(db:get-value-by-header run-record header key)) -;; keys)) -;; (run-name (db:get-value-by-header run-record header "runname")) -;; (col-name (conc (string-intersperse key-vals "\n") "\n" run-name)) -;; (run-path (append key-vals (list run-name)))) -;; (hash-table-set! (dboard:tabdat-run-keys data) run-id run-path) -;; ;; modify cell - but only if changed -;; (set! changed (dcommon:modifiy-if-different (dboard:tabdat-runs-matrix data) cellname col-name changed)) -;; (hash-table-set! runid-to-col run-id (list colnum run-record)) -;; ;; Here we update the tests treebox and tree keys -;; (tree:add-node (dboard:tabdat-tests-tree data) "Runs" (append key-vals (list run-name)) -;; userdata: (conc "run-id: " run-id)) -;; (set! colnum (+ colnum 1)))) -;; run-ids) -;; -;; ;; Scan all tests to be displayed and organise all the test names, respecting what is in the hash table -;; ;; Do this analysis in the order of the run-ids, the most recent run wins -;; (for-each (lambda (run-id) -;; (let* ((run-path (hash-table-ref (dboard:tabdat-run-keys data) run-id)) -;; (test-changes (hash-table-ref all-test-changes run-id)) -;; (new-test-dat (car test-changes)) -;; (removed-tests (cadr test-changes)) -;; (tests (sort (map cadr (filter (lambda (testrec) -;; (eq? run-id (db:mintest-get-run_id (cadr testrec)))) -;; new-test-dat)) -;; (lambda (a b) -;; (let ((time-a (db:mintest-get-event_time a)) -;; (time-b (db:mintest-get-event_time b))) -;; (> time-a time-b))))) -;; ;; test-changes is a list of (( id record ) ... ) -;; ;; Get list of test names sorted by time, remove tests -;; (test-names (delete-duplicates (map (lambda (t) -;; (let ((i (db:mintest-get-item_path t)) -;; (n (db:mintest-get-testname t))) -;; (if (string=? i "") -;; (conc " " i) -;; n))) -;; tests))) -;; (colnum (car (hash-table-ref runid-to-col run-id)))) -;; ;; for each test name get the slot if it exists and fill in the cell -;; ;; or take the next slot and fill in the cell, deal with items in the -;; ;; run view panel? The run view panel can have a tree selector for -;; ;; browsing the tests/items -;; -;; ;; SWITCH THIS TO USING CHANGED TESTS ONLY -;; (for-each (lambda (test) -;; (let* ((test-id (db:mintest-get-id test)) -;; (state (db:mintest-get-state test)) -;; (status (db:mintest-get-status test)) -;; (testname (db:mintest-get-testname test)) -;; (itempath (db:mintest-get-item_path test)) -;; (fullname (conc testname "/" itempath)) -;; (dispname (if (string=? itempath "") testname (conc " " itempath))) -;; (rownum (hash-table-ref/default testname-to-row fullname #f)) -;; (test-path (append run-path (if (equal? itempath "") -;; (list testname) -;; (list testname itempath)))) -;; (tb (dboard:tabdat-tests-tree data))) -;; (print "INFONOTE: run-path: " run-path) -;; (tree:add-node (dboard:tabdat-tests-tree data) "Runs" -;; test-path -;; userdata: (conc "test-id: " test-id)) -;; (let ((node-num (tree:find-node tb (cons "Runs" test-path))) -;; (color (car (gutils:get-color-for-state-status state status)))) -;; (debug:print 0 *default-log-port* "node-num: " node-num ", color: " color) -;; -;; (set! changed (dcommon:modifiy-if-different -;; tb -;; (conc "COLOR" node-num) -;; color changed)) -;; -;; ;; (iup:attribute-set! tb (conc "COLOR" node-num) color) -;; ) -;; (hash-table-set! (dboard:tabdat-path-test-ids data) test-path test-id) -;; (if (not rownum) -;; (let ((rownums (hash-table-values testname-to-row))) -;; (set! rownum (if (null? rownums) -;; 1 -;; (+ 1 (common:max rownums)))) -;; (hash-table-set! testname-to-row fullname rownum) -;; ;; create the label -;; (set! changed (dcommon:modifiy-if-different -;; (dboard:tabdat-runs-matrix data) -;; (conc rownum ":" 0) -;; dispname -;; changed)) -;; ;; (iup:attribute-set! (dboard:tabdat-runs-matrix data) -;; ;; (conc rownum ":" 0) dispname) -;; )) -;; ;; set the cell text and color -;; ;; (debug:print 2 *default-log-port* "rownum:colnum=" rownum ":" colnum ", state=" status) -;; (set! changed (dcommon:modifiy-if-different -;; (dboard:tabdat-runs-matrix data) -;; (conc rownum ":" colnum) -;; (if (member state '("ARCHIVED" "COMPLETED")) -;; status -;; state) -;; changed)) -;; ;; (iup:attribute-set! (dboard:tabdat-runs-matrix data) -;; ;; (conc rownum ":" colnum) -;; ;; (if (member state '("ARCHIVED" "COMPLETED")) -;; ;; status -;; ;; state)) -;; (set! changed (dcommon:modifiy-if-different -;; (dboard:tabdat-runs-matrix data) -;; (conc "BGCOLOR" rownum ":" colnum) -;; (car (gutils:get-color-for-state-status state status)) -;; changed)) -;; ;; (iup:attribute-set! (dboard:tabdat-runs-matrix data) -;; ;; (conc "BGCOLOR" rownum ":" colnum) -;; ;; (car (gutils:get-color-for-state-status state status))) -;; )) -;; tests))) -;; run-ids) -;; -;; (let ((updater (hash-table-ref/default (dboard:commondat-updaters commondat) window-id #f))) -;; (if updater (updater (hash-table-ref/default data get-details-sig #f)))) -;; -;; (if changed (iup:attribute-set! (dboard:tabdat-runs-matrix data) "REDRAW" "ALL")) -;; ;; (debug:print 2 *default-log-port* "run-changes: " run-changes) -;; ;; (debug:print 2 *default-log-port* "test-changes: " test-changes) -;; (list run-changes all-test-changes))) - -(define (dcommon:runsdat-get-col-num dat target runname force-set) - (let* ((runs-index (dboard:runsdat-runs-index dat)) - (col-name (conc target "/" runname)) - (res (hash-table-ref/default runs-index col-name #f))) - (if res - res - (if force-set - (let ((max-col-num (+ 1 (common:max (cons-1 (hash-table-values runs-index)))))) - (hash-table-set! runs-index col-name max-col-num) - max-col-num))))) - -(define (dcommon:runsdat-get-row-num dat testname itempath force-set) - (let* ((tests-index (dboard:runsdat-runs-index dat)) - (row-name (conc testname "/" itempath)) - (res (hash-table-ref/default runs-index row-name #f))) - (if res - res - (if force-set - (let ((max-row-num (+ 1 (common:max (cons -1 (hash-table-values tests-index)))))) - (hash-table-set! runs-index row-name max-row-num) - max-row-num))))) - -(define (dcommon:rundat-copy-tests-to-by-name rundat) - (let ((src-ht (dboard:rundat-tests rundat)) - (trg-ht (dboard:rundat-tests-by-name rundat))) - (if (and (hash-table? src-ht)(hash-table? trg-ht)) - (begin - (hash-table-clear! trg-ht) - (for-each - (lambda (testdat) - (hash-table-set! trg-ht (test:test-get-fullname testdat) testdat)) - (hash-table-values src-ht))) - (debug:print 0 *default-log-port* "WARNING: src-ht " src-ht " trg-ht " trg-ht)))) - - -;;====================================================================== -;; TESTS DATA -;;====================================================================== - -;; Produce a list of lists ready for common:sparse-list-generate-index -;; -(define (dcommon:minimize-test-data tests-dat) - (if (null? tests-dat) - '() - (let loop ((hed (car tests-dat)) - (tal (cdr tests-dat)) - (res '())) - (let* ((test-id (db:test-get-id hed)) ;; look at the tests-dat spec for locations - (test-name (db:test-get-testname hed)) - (item-path (db:test-get-item-path hed)) - (state (db:test-get-state hed)) - (status (db:test-get-status hed)) - (event-time (db:test-get-event_time hed)) - (newitem (list test-name item-path (list test-id state status event-time)))) - (if (null? tal) - (reverse (cons newitem res)) - (loop (car tal)(cdr tal)(cons newitem res))))))) - -(define (dcommon:tests-mindat->hash tests-mindat) - (let* ((res (make-hash-table))) - (for-each - (lambda (item) - (let* ((test-name+item-path (cons (list-ref item 0) (list-ref item 1))) - (value (list-ref item 2))) - (hash-table-set! res test-name+item-path value))) - tests-mindat) - res)) - -;; return 1 if status1 is better -;; return 0 if status1 and 2 are equally good -;; return -1 if status2 is better -(define (dcommon:status-compare3 status1 status2) - (let* - ((status-goodness-ranking (cdr ;; cdr to drop first item -- "n/a" - (append (map cadr *common:std-statuses*) - '(#f)) ;; algorithm requres last item to be #f - ) ) - (mem1 (member status1 status-goodness-ranking)) - (mem2 (member status2 status-goodness-ranking)) - ) - (cond - ((and (not mem1) (not mem2)) 0) - ((not mem1) -1) - ((not mem2) 1) - ((= (length mem1) (length mem2)) 0) - ((> (length mem1) (length mem2)) 1) - (else -1)))) - -(define (dcommon:xor-tests-mindat src-tests-mindat dest-tests-mindat #!key (hide-clean #f)) - (let* ((src-hash (dcommon:tests-mindat->hash src-tests-mindat)) - (dest-hash (dcommon:tests-mindat->hash dest-tests-mindat)) - (all-keys - (reverse (sort - (delete-duplicates - (append (hash-table-keys src-hash) (hash-table-keys dest-hash))) - - (lambda (a b) - (cond - ((< 0 (string-compare3 (car a) (car b))) #t) - ((> 0 (string-compare3 (car a) (car b))) #f) - ((< 0 (string-compare3 (cdr a) (cdr b))) #t) - (else #f))) - - )))) - (let ((res - (map ;; TODO: rename xor to delta globally in dcommon and dashboard - (lambda (key) - (let* ((test-name (car key)) - (item-path (cdr key)) - - (dest-value (hash-table-ref/default dest-hash key #f)) ;; (list test-id state status) - (dest-test-id (if dest-value (list-ref dest-value 0) #f)) - (dest-state (if dest-value (list-ref dest-value 1) #f)) - (dest-status (if dest-value (list-ref dest-value 2) #f)) - - (src-value (hash-table-ref/default src-hash key #f)) ;; (list test-id state status) - (src-test-id (if src-value (list-ref src-value 0) #f)) - (src-state (if src-value (list-ref src-value 1) #f)) - (src-status (if src-value (list-ref src-value 2) #f)) - - (incomplete-statuses '("DELETED" "INCOMPLETE" "STUCK/DEAD" "N/A")) ;; if any of these statuses apply, treat test as incomplete - - (dest-complete - (and dest-value dest-state dest-status - (equal? dest-state "COMPLETED") - (not (member dest-status incomplete-statuses)))) - (src-complete - (and src-value src-state src-status - (equal? src-state "COMPLETED") - (not (member src-status incomplete-statuses)))) - (status-compare-result (dcommon:status-compare3 src-status dest-status)) - (xor-new-item - (cond - ;; complete, for this case means: state=compelte AND status not in ( deleted uncomplete stuck/dead n/a ) - ;; neither complete -> bad - - ;; src !complete, dest complete -> better - ((and (not dest-complete) (not src-complete)) - (list dest-test-id "BOTH-BAD" "BOTH-INCOMPLETE")) - ((not dest-complete) - (list src-test-id "DIFF-MISSING" "DEST-INCOMPLETE")) - ((not src-complete) - (list dest-test-id "DIFF-NEW" "SRC-INCOMPLETE")) - ((and - (equal? src-state dest-state) - (equal? src-status dest-status)) - (list dest-test-id (conc "CLEAN") (conc "CLEAN-" dest-status) )) - ;; better or worse: pass > warn > waived > skip > fail > abort - ;; pass > warn > waived > skip > fail > abort - - ((= 1 status-compare-result) ;; src is better, dest is worse - (list dest-test-id "DIRTY-WORSE" (conc src-status "->" dest-status))) - (else - (list dest-test-id "DIRTY-BETTER" (conc src-status "->" dest-status))) - ))) - (list test-name item-path xor-new-item))) - all-keys))) - - (if hide-clean - (filter - (lambda (item) - ;;(print item) - (not - (equal? - "CLEAN" - (list-ref (list-ref item 2) 1)))) - res) - res)))) - -(define (dcommon:examine-xterm run-id test-id) - (let* ((testdat (rmt:get-test-info-by-id run-id test-id))) - (if (not testdat) - (begin - (debug:print 2 "ERROR: No test data found for test " test-id ", exiting") - (exit 1)) - (let* - ((rundir (if testdat - (db:test-get-rundir testdat) - logfile)) - (testfullname (if testdat (db:test-get-fullname testdat) "Gathering data ...")) - (xterm (lambda () - (if (directory-exists? rundir) - (let* ((shell (if (get-environment-variable "SHELL") - (conc "-e " (get-environment-variable "SHELL")) - "")) - (command (conc "cd " rundir - ";mt_xterm -T \"" (string-translate testfullname "()" " ") "\" " shell "&"))) - (print "Command =" command) - (common:without-vars - command - "MT_.*")) - (message-window (conc "Directory " rundir " not found")))))) - (xterm) - (print "Adding xterm code"))))) - -;;====================================================================== -;; D A T A T A B L E S -;;====================================================================== - -;; Table of keys -(define (dcommon:keys-matrix rawconfig) - (let* ((curr-row-num 1) - (key-vals (configf:section-vars rawconfig "fields")) - (keys-matrix (iup:matrix - #:alignment1 "ALEFT" - #:expand "YES" ;; "HORIZONTAL" ;; "VERTICAL" - ;; #:scrollbar "YES" - #:numcol 1 - #:numlin (length key-vals) - #:numcol-visible 1 - #:numlin-visible (length key-vals) - #:click-cb (lambda (obj lin col status) - (print "obj: " obj " lin: " lin " col: " col " status: " status))))) - ;; (iup:attribute-set! keys-matrix "0:0" "Run Keys") - (iup:attribute-set! keys-matrix "WIDTH0" 0) - (iup:attribute-set! keys-matrix "0:1" "Key Name") - ;; (iup:attribute-set! keys-matrix "WIDTH1" "100") - ;; fill in keys - (for-each - (lambda (var) - ;; (iup:attribute-set! keys-matrix "ADDLIN" (conc curr-row-num)) - (iup:attribute-set! keys-matrix (conc curr-row-num ":0") curr-row-num) - (iup:attribute-set! keys-matrix (conc curr-row-num ":1") var) - (set! curr-row-num (+ 1 curr-row-num))) ;; (config-lookup *configdat* "fields" var))) - key-vals) - (iup:attribute-set! keys-matrix "WIDTHDEF" "40") - keys-matrix)) - -;; Section to table -(define (dcommon:section-matrix rawconfig sectionname varcolname valcolname #!key (title #f)) - (let* ((curr-row-num 1) - (key-vals (configf:section-vars rawconfig sectionname)) - (section-matrix (iup:matrix - #:alignment1 "ALEFT" - ;; #:expand "YES" ;; "HORIZONTAL" - #:numcol 1 - #:numlin (length key-vals) - #:numcol-visible 1 - #:numlin-visible (min 10 (length key-vals)) - #:scrollbar "YES"))) - (iup:attribute-set! section-matrix "0:0" varcolname) - (iup:attribute-set! section-matrix "0:1" valcolname) - (iup:attribute-set! section-matrix "WIDTH1" "200") - ;; fill in keys - (for-each - (lambda (var) - ;; (iup:attribute-set! keys-matrix "ADDLIN" (conc curr-row-num)) - (iup:attribute-set! section-matrix (conc curr-row-num ":0") var) - (iup:attribute-set! section-matrix (conc curr-row-num ":1") (configf:lookup rawconfig sectionname var)) - (set! curr-row-num (+ 1 curr-row-num))) ;; (config-lookup *configdat* "fields" var))) - key-vals) - (iup:vbox - (iup:label (if title title (conc "Settings from [" sectionname "]")) - ;; #:size "5x" - #:expand "HORIZONTAL" - ) - section-matrix))) - -;; General data -;; -(define (dcommon:general-info) - (let ((general-matrix (iup:matrix - #:alignment1 "ALEFT" - #:expand "YES" ;; "HORIZONTAL" - #:numcol 1 - #:numlin 2 - #:numcol-visible 1 - #:numlin-visible 2))) - (iup:attribute-set! general-matrix "WIDTH1" "150") - (iup:attribute-set! general-matrix "0:1" "About this Megatest area") - ;; User (this is not always obvious - it is common to run as a different user - (iup:attribute-set! general-matrix "1:0" "User") - (iup:attribute-set! general-matrix "1:1" (current-user-name)) - ;; Megatest area - ;; (iup:attribute-set! general-matrix "2:0" "Area") - ;; (iup:attribute-set! general-matrix "2:1" *toppath*) - ;; Megatest version - (iup:attribute-set! general-matrix "2:0" "Version") - (iup:attribute-set! general-matrix "2:1" (conc megatest-version "-" (substring megatest-fossil-hash 0 4))) - - general-matrix)) - -(define (dcommon:run-stats commondat tabdat #!key (tab-num #f)) - (let* ((stats-matrix (iup:matrix expand: "YES")) - (changed #f) - (stats-updater (lambda () - (if (dashboard:database-changed? commondat tabdat context-key: 'run-stats) - (let* ((run-stats (rmt:get-run-stats)) - (indices (common:sparse-list-generate-index run-stats)) ;; proc: set-cell)) - (row-indices (car indices)) - (col-indices (cadr indices)) - (max-row (if (null? row-indices) 1 (common:max (map cadr row-indices)))) - (max-col (if (null? col-indices) 1 - (common:max (map cadr col-indices)))) - (max-visible (max (- (dboard:tabdat-num-tests tabdat) 15) 3)) - (max-col-vis (if (> max-col 10) 10 max-col)) - (numrows 1) - (numcols 1)) - (iup:attribute-set! stats-matrix "CLEARVALUE" "CONTENTS") - (iup:attribute-set! stats-matrix "NUMCOL" max-col ) - (iup:attribute-set! stats-matrix "NUMLIN" (if (< max-row max-visible) max-visible max-row)) ;; min of 20 - (iup:attribute-set! stats-matrix "NUMCOL_VISIBLE" max-col-vis) - (iup:attribute-set! stats-matrix "NUMLIN_VISIBLE" (if (> max-row max-visible) max-visible max-row)) - - ;; Row labels - (for-each (lambda (ind) - (let* ((name (car ind)) - (num (cadr ind)) - (key (conc num ":0"))) - (if (not (equal? (iup:attribute stats-matrix key) name)) - (begin - (set! changed #t) - (iup:attribute-set! stats-matrix key name))))) - row-indices) - - ;; Col labels - (for-each (lambda (ind) - (let* ((name (car ind)) - (num (cadr ind)) - (key (conc "0:" num))) - (if (not (equal? (iup:attribute stats-matrix key) name)) - (begin - (set! changed #t) - (iup:attribute-set! stats-matrix key name))))) - col-indices) - - ;; Cell contents - (for-each (lambda (entry) - (let* ((row-name (car entry)) - (col-name (cadr entry)) - (value (caddr entry)) - (row-num (cadr (assoc row-name row-indices))) - (col-num (cadr (assoc col-name col-indices))) - (key (conc row-num ":" col-num))) - (if (not (equal? (iup:attribute stats-matrix key) value)) - (begin - (set! changed #t) - (iup:attribute-set! stats-matrix key value))))) - run-stats) - (if changed (iup:attribute-set! stats-matrix "REDRAW" "ALL"))) - )))) - ;; (dboard:commondat-please-update-set! commondat #t) ;; force redraw on first pass - ;; (mark-for-update tabdat) - ;; (stats-updater) - (dboard:commondat-add-updater commondat stats-updater tab-num: tab-num) - ;; (set! dashboard:update-summary-tab updater) - (iup:attribute-set! stats-matrix "WIDTHDEF" "40") - (iup:vbox - ;; (iup:label "Run statistics" #:expand "HORIZONTAL") - stats-matrix))) - -(define (dcommon:servers-table commondat tabdat) - (let* ((colnum 0) - (rownum 0) - (servers-matrix (iup:matrix #:expand "YES" - #:numcol 7 - #:numcol-visible 7 - #:numlin-visible 5 - )) - (colnames (list "Id" "MTver" "Pid" "Host" "Interface:OutPort" "RunTime" "State" "RunId")) - (updater (lambda () - (if (dashboard:monitor-changed? commondat tabdat) - (let ((servers (server:get-list *toppath* limit: 10))) - (iup:attribute-set! servers-matrix "NUMLIN" (length servers)) - ;; (set! colnum 0) - ;; (for-each (lambda (colname) - ;; ;; (print "colnum: " colnum " colname: " colname) - ;; (iup:attribute-set! servers-matrix (conc "0:" colnum) colname) - ;; (set! colnum (+ 1 colnum))) - ;; colnames) - (set! rownum 1) - (for-each - (lambda (server) - (set! colnum 0) - (match-let (((mod-time host port start-time pid) - server)) - (let* ((uptime (- (current-seconds) mod-time)) - (runtime (if start-time - (- mod-time start-time) - 0)) - (vals (list "-" ;; (vector-ref server 0) ;; Id - "-" ;; (vector-ref server 9) ;; MT-Ver - pid ;; (vector-ref server 1) ;; Pid - host ;; (vector-ref server 2) ;; Hostname - (conc host ":" port) ;; (conc (vector-ref server 3) ":" (vector-ref server 4)) ;; IP:Port - (seconds->hr-min-sec runtime) ;; (- (current-seconds) start-time)) ;; (vector-ref server 6))) - (cond - ((< uptime 5) "alive") - ((< uptime 16) "probably alive");; less than 15 seconds since mod, call it alive (vector-ref server 8) ;; State - (else "dead")) - "-" ;; (vector-ref server 12) ;; RunId - ))) - (for-each (lambda (val) - (let* ((row-col (conc rownum ":" colnum)) - (curr-val (iup:attribute servers-matrix row-col))) - (if (not (equal? (conc val) curr-val)) - (begin - (iup:attribute-set! servers-matrix row-col val) - (iup:attribute-set! servers-matrix "FITTOTEXT" (conc "C" colnum)))) - (set! colnum (+ 1 colnum)))) - vals) - (set! rownum (+ rownum 1))) - (iup:attribute-set! servers-matrix "REDRAW" "ALL"))) - (sort servers (lambda (a b)(> (car a)(car b)))))))))) - (set! colnum 0) - (for-each (lambda (colname) - (iup:attribute-set! servers-matrix (conc "0:" colnum) colname) - (iup:attribute-set! servers-matrix "FITTOTEXT" (conc "C" colnum)) - (set! colnum (+ colnum 1))) - colnames) - ;; (set! dashboard:update-servers-table updater) - (dboard:commondat-add-updater commondat updater) - ;; (iup:attribute-set! servers-matrix "WIDTHDEF" "40") - ;; (iup:hbox - ;; (iup:vbox - ;; (iup:button "Start" - ;; ;; #:size "50x" - ;; #:expand "YES" - ;; #:action (lambda (obj) - ;; (let ((cmd (conc ;; "xterm -geometry 180x20 -e \"" - ;; "megatest -server - &"))) - ;; ;; ";echo Press any key to continue;bash -c 'read -n 1 -s'\" &"))) - ;; (system cmd)))) - ;; (iup:button "Stop" - ;; #:expand "YES" - ;; ;; #:size "50x" - ;; #:action (lambda (obj) - ;; (let ((cmd (conc ;; "xterm -geometry 180x20 -e \"" - ;; "megatest -stop-server 0 &"))) - ;; ;; ";echo Press any key to continue;bash -c 'read -n 1 -s'\" &"))) - ;; (system cmd)))) - ;; (iup:button "Restart" - ;; #:expand "YES" - ;; ;; #:size "50x" - ;; #:action (lambda (obj) - ;; (let ((cmd (conc ;; "xterm -geometry 180x20 -e \"" - ;; "megatest -stop-server 0;megatest -server - &"))) - ;; ;; ";echo Press any key to continue;bash -c 'read -n 1 -s'\" &"))) - ;; (system cmd))))) - ;; servers-matrix - ;; ))) - servers-matrix - )) - -;; The main menu -(define (dcommon:main-menu) - (iup:menu ;; a menu is a special attribute to a dialog (think Gnome putting the menu at screen top) - (iup:menu-item "Files" (iup:menu ;; Note that you can use either #:action or action: for options - (iup:menu-item "Open" action: (lambda (obj) - (let* ((area-name (iup:textbox #:expand "HORIZONTAL")) - (fd (iup:file-dialog #:dialogtype "DIR")) - (top (iup:show fd #:modal? "YES"))) - (iup:attribute-set! source-tb "VALUE" - (iup:attribute fd "VALUE")) - (iup:destroy! fd)))) - ;; (lambda (obj) - ;; (iup:show (iup:file-dialog)) - ;; (print "File->open " obj))) - (iup:menu-item "Save" #:action (lambda (obj)(print "File->save " obj))) - (iup:menu-item "Exit" #:action (lambda (obj)(exit))))) - (iup:menu-item "Tools" (iup:menu - (iup:menu-item "Create new blah" #:action (lambda (obj)(print "Tools->new blah"))) - ;; (iup:menu-item "Show dialog" #:action (lambda (obj) - ;; (show message-window - ;; #:modal? #t - ;; ;; set positon using coordinates or center, start, top, left, end, bottom, right, parent-center, current - ;; ;; #:x 'mouse - ;; ;; #:y 'mouse - ;; ) - )))) - -;;====================================================================== -;; CANVAS STUFF FOR TESTS -;;====================================================================== - -(define (dcommon:draw-test cnv xoffset yoffset scalef x y w h name selected) - (let* ((llx (dcommon:x->canvas x scalef xoffset)) - (lly (dcommon:y->canvas y scalef yoffset)) - (urx (dcommon:x->canvas (+ x w) scalef xoffset)) - (ury (dcommon:y->canvas (+ y h) scalef yoffset))) - (canvas-text! cnv (+ llx 5)(+ lly 5) name) - (canvas-rectangle! cnv llx urx lly ury) - (if selected (canvas-box! cnv llx (+ llx 5) lly (+ lly 5))))) - -(define (dcommon:draw-arrow cnv test-box-center waiton-center) - (let* ((test-box-center-x (vector-ref test-box-center 0)) - (test-box-center-y (vector-ref test-box-center 1)) - (waiton-center-x (vector-ref waiton-center 0)) - (waiton-center-y (vector-ref waiton-center 1)) - (delta-y (- waiton-center-y test-box-center-y)) - (delta-x (- waiton-center-x test-box-center-x)) - (abs-delta-x (abs delta-x)) - (abs-delta-y (abs delta-y)) - (use-delta-x (> abs-delta-x abs-delta-y)) ;; use the larger one - (delta-ratio (if use-delta-x - (if (> abs-delta-x 0) - (/ abs-delta-y abs-delta-x) - 1) - (if (> abs-delta-y 0) - (/ abs-delta-x abs-delta-y) - 1))) - (x-adj (if use-delta-x - 8 - (* delta-ratio 8))) - (y-adj (if use-delta-x - (* x-adj delta-ratio) - 8)) - (new-waiton-x (inexact->exact - (round (if (> delta-x 0) ;; have positive x - (- waiton-center-x x-adj) - (+ waiton-center-x x-adj))))) - (new-waiton-y (inexact->exact - (round (if (> delta-y 0) - (- waiton-center-y y-adj) - (+ waiton-center-y y-adj)))))) - ;; (canvas-line-width-set! cnv 5) - (canvas-line! cnv - test-box-center-x - test-box-center-y - new-waiton-x - new-waiton-y - ) - (canvas-mark! cnv new-waiton-x new-waiton-y))) - -(define (dcommon:get-box-center box) - (let* ((llx (list-ref box 0)) - (lly (list-ref box 1)) - (boxw (list-ref box 4)) - (boxh (list-ref box 5))) - (vector (+ llx (/ boxw 2)) - (+ lly (/ boxh 2))))) - -(define-inline (num->int num) - (inexact->exact (round num))) - -(define (dcommon:draw-edges cnv xoffset yoffset scalef edges) - (for-each - (lambda (e) - (let loop ((x1 (car e)) - (y1 (cadr e)) - (x2 #f) - (y2 #f) - (tal (cddr e))) - (if (and x1 y1 x2 y2) - (canvas-line! - cnv - (num->int (dcommon:x->canvas x1 scalef xoffset)) - (num->int (dcommon:y->canvas y1 scalef yoffset)) - (num->int (dcommon:x->canvas x2 scalef xoffset)) - (num->int (dcommon:y->canvas y2 scalef yoffset)))) ;; (num->int x1)(num->int y1)(num->int x2)(num->int y2))) - (if (< (length tal) 2) - (canvas-mark! cnv - (num->int (dcommon:x->canvas x1 scalef xoffset)) - (num->int (dcommon:y->canvas y1 scalef yoffset))) ;; (num->int x1)(num->int y1)) - (loop (car tal)(cadr tal) x1 y1 (cddr tal))))) - ;; (map (lambda (e)(map (lambda (x)(num->int (* x scalef))) e)) edges))) - edges)) - - -(define (dcommon:draw-arrows cnv testname tests-hash test-records) - (let* ((test-box-info (hash-table-ref tests-hash testname)) - (test-box-center (dcommon:get-box-center test-box-info)) - (test-record (hash-table-ref test-records testname)) - (waitons (vector-ref test-record 2))) - (for-each - (lambda (waiton) - (let* ((waiton-box-info (hash-table-ref/default tests-hash waiton #f)) - (waiton-center (dcommon:get-box-center (or waiton-box-info test-box-info)))) - (dcommon:draw-arrow cnv test-box-center waiton-center))) - waitons) - ;; (debug:print 0 *default-log-port* "test-box-info=" test-box-info) - ;; (debug:print 0 *default-log-port* "test-record=" test-record) - )) - -(define (dcommon:estimate-scale sizex sizey originx originy nodes) - ;; (print "sizex: " sizex " sizey: " sizey " originx: " originx " originy: " originy " nodes: " nodes) - (let* ((maxx 1) - (maxy 1)) - (for-each - (lambda (node) - (if (equal? (car node) "node") - (let ((x (string->number (list-ref node 2))) - (y (string->number (list-ref node 3)))) - (if (and x (> x maxx))(set! maxx x)) - (if (and y (> y maxy))(set! maxy y))))) - nodes) - (let ((scalex (/ sizex maxx)) - (scaley (/ sizey maxy))) - ;; (print "maxx: " maxx " maxy: " maxy " scalex: " scalex " scaley: " scaley) - (min scalex scaley)))) - -(define (dcommon:get-xoffset tests-draw-state sizex-in xadj-in) - (let ((xadj (or xadj-in (hash-table-ref/default tests-draw-state 'xadj 0))) - (sizex (or sizex-in (hash-table-ref/default tests-draw-state 'sizex 500)))) - (hash-table-set! tests-draw-state 'xadj xadj) ;; for use in de-scaling when handling mouse clicks - (hash-table-set! tests-draw-state 'sizex sizex) - (* (/ sizex 2) (- 0.5 xadj)))) - -(define (dcommon:get-yoffset tests-draw-state sizey-in yadj-in) - (let ((yadj (or yadj-in (hash-table-ref/default tests-draw-state 'yadj 0))) - (sizey (or sizey-in (hash-table-ref/default tests-draw-state 'sizey 500)))) - (hash-table-set! tests-draw-state 'yadj yadj) ;; for use in de-scaling when handling mouse clicks - (hash-table-set! tests-draw-state 'sizey sizey) - (* (/ sizey 2) (- yadj 0.5)))) - -(define (dcommon:x->canvas x scalef xoffset) - (+ xoffset (* x scalef))) - -(define (dcommon:y->canvas y scalef yoffset) - (+ yoffset (* y scalef))) - -;; sizex, sizey - canvas size -;; originx, originy - canvas origin -;; -(define (dcommon:initial-draw-tests cnv xadj yadj sizex sizey sizexmm sizeymm originx originy tests-draw-state sorted-testnames test-records) - (let* ((dot-data ;; (map cdr (filter - ;; (lambda (x)(equal? "node" (car x))) - (map string-split (tests:lazy-dot test-records "plain" sizex sizey))) ;; (tests:easy-dot test-records "plain"))) - (xoffset (dcommon:get-xoffset tests-draw-state sizex xadj)) - (yoffset (dcommon:get-yoffset tests-draw-state sizey yadj)) - (no-dot (configf:lookup *configdat* "setup" "nodot")) - (boxh 15) - (boxw 10) - (margin 5) - (tests-info (hash-table-ref tests-draw-state 'tests-info)) - (selected-tests (hash-table-ref tests-draw-state 'selected-tests )) - (scalef (if no-dot - 1 - (dcommon:estimate-scale sizex sizey originx originy dot-data))) - (sorted-testnames (if no-dot - (sort sorted-testnames string>=?) - sorted-testnames)) - (curr-x 0) ;; NB// NOT screen units - (curr-y (/ (- sizey boxh margin) scalef)) ;; used when no-dot - (scaled-sizex (/ sizex scalef))) - - (hash-table-set! tests-draw-state 'scalef scalef) - - (let ((longest-str (if (null? sorted-testnames) " " (car (sort sorted-testnames (lambda (a b)(>= (string-length a)(string-length b)))))))) - (let-values (((x-max y-max) (canvas-text-size cnv longest-str))) - (if (> x-max boxw)(set! boxw (+ 10 x-max))))) - ;; (print "sizex: " sizex " sizey: " sizey " font: " (canvas-font cnv) " originx: " originx " originy: " originy " xtorig: " xtorig " ytorig: " ytorig " xadj: " xadj " yadj: " yadj) - (if (not (null? sorted-testnames)) - (let loop ((hed (car (reverse sorted-testnames))) - (tal (cdr (reverse sorted-testnames)))) - (let* ((nodedat (if no-dot - #f - (let ((tmpres (filter (lambda (x) - (if (and (not (null? x)) - (equal? (car x) "node")) - (equal? hed (cadr x)) - #f)) - dot-data))) - (if (null? tmpres) - ;; llx lly boxw boxh - (list "0" "1" "1" (conc (length tal)) "2" "0.5") ;; return some placeholder junk if no dat found - (car tmpres))))) - (edgedat (if no-dot - '() - (let ((edges (filter (lambda (x) ;; filter for edge - (if (and (not (null? x)) - (equal? (car x) "edge")) - (equal? hed (cadr x)) - #f)) - dot-data))) - (map (lambda (inlst) - (dcommon:process-polyline - (map (lambda (instr) - (string->number instr)) ;; convert to number and scale - (let ((il (cddddr inlst))) - (take il (- (length il) 2)))) - (lambda (x y) - (list (+ x 0) ;; xtorig) - (+ y 0))) ;; ytorig))) - #f #f)) ;; process polyline - edges)))) - (cx (if no-dot ;; this is the centerpoint! - curr-x - (string->number (list-ref nodedat 2)))) - (cy (if no-dot - curr-y - (string->number (list-ref nodedat 3)))) - (boxw (if no-dot - boxw - (string->number (list-ref nodedat 4)))) - (boxh (if no-dot - boxh - (string->number (list-ref nodedat 5)))) - (boxw/2 (/ boxw 2)) - (boxh/2 (/ boxh 2)) - (urx (+ cx boxw/2)) - (ury (+ cy boxh/2)) - (llx (- cx boxw/2)) - (lly (- cy boxh/2))) - - ;; if we are in no-dot mode then increment curr-x and curr-y as needed - (if no-dot - (begin - (cond - ((< curr-x (- scaled-sizex boxw boxw margin)) - (set! curr-x (+ curr-x boxw margin))) - ((> curr-x (- scaled-sizex boxw boxw margin)) - (set! curr-x 0) - (set! curr-y (- curr-y (+ boxh margin))))))) - ; (print "hed " hed " llx " llx " lly " lly " urx " urx " ury " ury) - (dcommon:draw-test cnv xoffset yoffset scalef llx lly boxw boxh hed (hash-table-ref/default selected-tests hed #f)) - ;; (dcommon:draw-arrows cnv testname tests-info test-records)) - (dcommon:draw-edges cnv xoffset yoffset scalef edgedat) - - ;; data used by mouse click calc. keep the wacky order for now. - (hash-table-set! tests-info hed (list llx lly urx ury boxw boxh edgedat)) - (if (not (null? tal)) - (loop (car tal) - (cdr tal)))))) - )) - -;; per-point-proc required, remainder optional -;; -(define (dcommon:process-polyline line per-point-proc per-segment-proc last-segment-proc) - (if (< (length line) 2) - '() - (let loop ((x1 (car line)) - (y1 (cadr line)) - (x2 #f) - (y2 #f) - (tal (cddr line)) - (res '())) - (if (and x1 y1 x2 y2 per-segment-proc) - (per-segment-proc x1 y1 x2 y2)) - (if (< (length tal) 2) - (begin - (if last-segment-proc (last-segment-proc x1 y1 x2 y2)) - (append res (per-point-proc x1 y1))) - (loop (car tal)(cadr tal) x1 y1 (cddr tal) (append res (per-point-proc x1 y1))))))) - -(define (dcommon:redraw-tests cnv xadj yadj sizex sizey sizexmm sizeymm originx originy tests-draw-state sorted-testnames test-records) - (let* ((scalef (hash-table-ref tests-draw-state 'scalef)) - (xoffset (dcommon:get-xoffset tests-draw-state sizex xadj)) - (yoffset (dcommon:get-yoffset tests-draw-state sizey yadj)) - (tests-info (hash-table-ref tests-draw-state 'tests-info)) - (selected-tests (hash-table-ref tests-draw-state 'selected-tests ))) - (if (not (null? sorted-testnames)) - (let loop ((hed (car (reverse sorted-testnames))) - (tal (cdr (reverse sorted-testnames)))) - (let* ((tvals (hash-table-ref tests-info hed)) - (llx (list-ref tvals 0)) - (lly (list-ref tvals 1)) - (boxw (list-ref tvals 4)) - (boxh (list-ref tvals 5)) - (edges (map (lambda (pline) - (dcommon:process-polyline pline - (lambda (x1 y1) - (list x1 y1)) - #f #f)) - (list-ref tvals 6))) - (urx (+ llx boxw)) - (ury (+ lly boxh))) - (dcommon:draw-test cnv xoffset yoffset scalef llx lly boxw boxh hed (hash-table-ref/default selected-tests hed #f)) - (dcommon:draw-edges cnv xoffset yoffset scalef edges) - (if (not (null? tal)) - ;; leave a column of space to the right to list items - (loop (car tal) - (cdr tal)))))))) - -;;====================================================================== -;; RUN CONTROLS -;;====================================================================== - -(define (dcommon:command-execution-control data) - ;; The command line display/exectution control - (iup:frame - #:title "Command to be exectuted" - (iup:hbox - (iup:label "Run on" #:size "40x") - (iup:radio - (iup:hbox - (iup:toggle "Local" #:size "40x") - (iup:toggle "Server" #:size "40x"))) - (let ((tb (iup:textbox - #:value "megatest " - #:expand "HORIZONTAL" - #:readonly "YES" - #:font "Courier New, -12" - ))) - (dboard:tabdat-command-tb-set! data tb) - tb) - (iup:button "Execute" #:size "50x" - #:action (lambda (obj) - ;; (let ((cmd (conc ;; "xterm -geometry 180x20 -e \"" - (common:run-a-command (iup:attribute (dboard:tabdat-command-tb data) "VALUE"))))))) - ;; ";echo Press any key to continue;bash -c 'read -n 1 -s'\" &"))) - ;; (system cmd))))))) - -(define (dcommon:command-action-selector commondat tabdat #!key (tab-num #f)) - (iup:frame - #:title "Set the action to take" - (iup:hbox - ;; (iup:label "Command to run" #:expand "HORIZONTAL" #:size "70x" #:alignment "LEFT:ACENTER") - (let* ((cmds-list '("run" "remove-runs")) ;; "set-state-status" "lock-runs" "unlock-runs")) - (lb (iup:listbox #:expand "HORIZONTAL" - #:dropdown "YES" - #:action (lambda (obj val index lbstate) - ;; (print obj " " val " " index " " lbstate) - (dboard:tabdat-command-set! tabdat val) - (dashboard:update-run-command tabdat)))) - (default-cmd (car cmds-list))) - (iuplistbox-fill-list lb cmds-list selected-item: default-cmd) - (dboard:tabdat-command-set! tabdat default-cmd) - lb)))) - -(define (dcommon:command-runname-selector commondat tabdat #!key (tab-num #f)) ;; alldat data) - (iup:frame - #:title "Runname" - (let* ((default-run-name (seconds->work-week/day (current-seconds))) - (tb (iup:textbox #:expand "HORIZONTAL" - #:action (lambda (obj val txt) - (debug:catch-and-dump - (lambda () - ;; (print "obj: " obj " val: " val " unk: " unk) - (dboard:tabdat-run-name-set! tabdat txt) ;; (iup:attribute obj "VALUE")) - (dashboard:update-run-command tabdat)) - "command-runname-selector tb action")) - #:value (or default-run-name (dboard:tabdat-run-name tabdat)))) - (lb (iup:listbox #:expand "HORIZONTAL" - #:dropdown "YES" - #:action (lambda (obj val index lbstate) - (debug:catch-and-dump - (lambda () - (if (not (equal? val "")) - (begin - (iup:attribute-set! tb "VALUE" val) - (dboard:tabdat-run-name-set! tabdat val) - (dashboard:update-run-command tabdat)))) - "command-runname-selector lb action")))) - (refresh-runs-list (lambda () - (if (dashboard:database-changed? commondat tabdat context-key: 'runname-selector-runs-list) - (let* (;; (target (dboard:tabdat-target-string tabdat)) - (runs-for-targ (rmt:get-runs-by-patt (dboard:tabdat-keys tabdat) "%" #f #f #f #f 0)) - (runs-header (vector-ref runs-for-targ 0)) - (runs-dat (vector-ref runs-for-targ 1)) - (run-names (cons default-run-name - (map (lambda (x) - (db:get-value-by-header x runs-header "runname")) - runs-dat)))) - ;; (print "DEBUGINFO: run-names=" run-names) - ;; (iup:attribute-set! lb "REMOVEITEM" "ALL") - (iuplistbox-fill-list lb run-names selected-item: default-run-name)))))) - ;; (dboard:tabdat-updater-for-runs-set! tabdat refresh-runs-list) - (dboard:commondat-add-updater commondat refresh-runs-list tab-num: tab-num) - ;; (refresh-runs-list) - (dboard:tabdat-run-name-set! tabdat default-run-name) - (iup:hbox - tb - lb)))) - -(define (dcommon:command-testname-selector commondat tabdat update-keyvals) ;; key-listboxes) - (iup:vbox - ;; Text box for test patterns - (iup:frame - #:title "Test patterns (one per line)" - (let ((tb (iup:textbox #:action (lambda (val a b) - (debug:catch-and-dump - (lambda () - (dboard:tabdat-test-patts-set!-use - tabdat - (dboard:lines->test-patt b)) - (dashboard:update-run-command tabdat)) - "command-testname-selector tb action")) - #:value (dboard:test-patt->lines - (dboard:tabdat-test-patts-use tabdat)) - #:expand "YES" - #:size "x30" ;; was 10x30 - #:multiline "YES"))) - (set! test-patterns-textbox tb) - (dboard:tabdat-test-patterns-textbox-set! tabdat tb) - tb)) -;; (iup:frame -;; #:title "Target" -;; ;; Target selectors -;; (apply iup:hbox -;; (let* ((dat (dashboard:update-target-selector tabdat action-proc: update-keyvals)) -;; (key-lb (car dat)) -;; (combos (cadr dat))) -;; combos))) - ;; (iup:hbox - ;; ;; Text box for STATES - ;; (iup:frame - ;; #:title "States" - ;; (dashboard:text-list-toggle-box - ;; ;; Move these definitions to common and find the other useages and replace! - ;; (map cadr *common:std-states*) ;; '("COMPLETED" "RUNNING" "STUCK" "INCOMPLETE" "LAUNCHED" "REMOTEHOSTSTART" "KILLED") - ;; (lambda (all) - ;; (dboard:tabdat-states-set! tabdat all) - ;; (dashboard:update-run-command tabdat)))) - ;; ;; Text box for STATES - ;; (iup:frame - ;; #:title "Statuses" - ;; (dashboard:text-list-toggle-box - ;; (map cadr *common:std-statuses*) ;; '("PASS" "FAIL" "n/a" "CHECK" "WAIVED" "SKIP" "DELETED" "STUCK/DEAD") - ;; (lambda (all) - ;; (dboard:tabdat-statuses-set! tabdat all) - ;; (dashboard:update-run-command tabdat))))) - )) - -(define (dcommon:command-tests-tasks-canvas tabdat test-records sorted-testnames tests-draw-state) - (iup:frame - #:title "Tests and Tasks" - (let* ((updater #f) - (last-xadj 0) - (last-yadj 0) - (the-cnv #f) - (canvas-obj - (iup:canvas #:action (make-canvas-action - (lambda (cnv xadj yadj) - (if (not updater) - (set! updater (lambda (xadj yadj) - ;; (print "cnv: " cnv " xadj: " xadj " yadj: " yadj) - (dashboard:draw-tests cnv xadj yadj tests-draw-state sorted-testnames test-records) - (set! last-xadj xadj) - (set! last-yadj yadj)))) - (updater xadj yadj) - (set! the-cnv cnv) - )) - ;; Following doesn't work - #:wheel-cb (lambda (obj step x y dir) ;; dir is 4 for up and 5 for down. I think. - (let ((scalef (hash-table-ref tests-draw-state 'scalef))) - (hash-table-set! tests-draw-state 'scalef (+ scalef - (if (> step 0) - (* scalef 0.01) - (* scalef -0.01)))) - (if the-cnv - (dashboard:draw-tests the-cnv last-xadj last-yadj tests-draw-state sorted-testnames test-records)) - )) - ;; #:size "250x250" - #:expand "YES" - #:scrollbar "YES" - #:posx "0.5" - #:posy "0.5" - #:button-cb (lambda (obj btn pressed x y status) - ;; (print "obj: " obj ", pressed " pressed ", status " status) - ; (print "canvas-origin: " (canvas-origin the-cnv)) - ;; (let-values (((xx yy)(canvas-origin the-cnv))) - ;; (canvas-transform-set! the-cnv #f) - ;; (print "canvas-origin: " xx " " yy " click at " x " " y)) - (let* ((tests-info (hash-table-ref tests-draw-state 'tests-info)) - (selected-tests (hash-table-ref tests-draw-state 'selected-tests)) - (scalef (hash-table-ref tests-draw-state 'scalef)) - (sizey (hash-table-ref tests-draw-state 'sizey)) - (xoffset (dcommon:get-xoffset tests-draw-state #f #f)) - (yoffset (dcommon:get-yoffset tests-draw-state #f #f)) - (new-y (- sizey y)) - (test-patterns-textbox (dboard:tabdat-test-patterns-textbox tabdat))) - ;; (print "xoffset=" xoffset ", yoffset=" yoffset) - ;; (print "\tx\ty\tllx\tlly\turx\tury") - (for-each (lambda (test-name) - (let* ((rec-coords (hash-table-ref tests-info test-name)) - (llx (dcommon:x->canvas (list-ref rec-coords 0) scalef xoffset)) - (lly (dcommon:y->canvas (list-ref rec-coords 1) scalef yoffset)) - (urx (dcommon:x->canvas (list-ref rec-coords 2) scalef xoffset)) - (ury (dcommon:y->canvas (list-ref rec-coords 3) scalef yoffset))) - ;; (if (eq? pressed 1) - ;; (print "\tx=" x "\ty=" y "\tnew-y=" new-y "\tllx=" llx "\tlly=" lly "\turx=" urx "\tury=" ury "\t" test-name " ")) - (if (and (eq? pressed 1) - (>= x llx) - (>= new-y lly) - (<= x urx) - (<= new-y ury)) - (let* ((box-patterns (string-split (iup:attribute test-patterns-textbox "VALUE"))) - (test-patts (string-split (or (dboard:tabdat-test-patts tabdat) - "") - ",")) - (patterns (delete-duplicates (append box-patterns test-patts)))) - (let* ((selected (not (member test-name patterns))) - (newpatt-list (if selected - (cons test-name patterns) - (delete test-name patterns))) - (newpatt (string-intersperse newpatt-list "\n"))) - (iup:attribute-set! test-patterns-textbox "VALUE" newpatt) - (iup:attribute-set! obj "REDRAW" "ALL") - (hash-table-set! selected-tests test-name selected) - (dboard:tabdat-test-patts-set!-use tabdat (dboard:lines->test-patt newpatt)) - (dashboard:update-run-command tabdat) - (if updater (updater last-xadj last-yadj))))))) - (hash-table-keys tests-info))))))) - canvas-obj))) - -;;====================================================================== -;; S T E P S -;;====================================================================== - -(define (dcommon:populate-steps teststeps steps-matrix run-id test-id) - (let* ((max-row 0) - (max-col 9) - (white "255 255 255") - - (testinfo (rmt:get-testinfo-state-status run-id test-id)) - (state (db:test-get-state testinfo)) - (status (db:test-get-status testinfo)) - (test-status-color (car (gutils:get-color-for-state-status state status))) - (running-color (car (gutils:get-color-for-state-status "RUNNING" "STARTED"))) - (failcolor (car (gutils:get-color-for-state-status "COMPLETED" "FAIL")))) - (if (null? teststeps) - (begin - (iup:attribute-set! steps-matrix "CLEARATTRIB" "CONTENTS") - (iup:attribute-set! steps-matrix "CLEARVALUE" "CONTENTS")) - (let loop ((hed (car teststeps)) - (tal (cdr teststeps)) - (rownum 1) - (colnum 1)) - (if (> rownum max-row)(set! max-row rownum)) - (let* ((status (vector-ref hed 3)) - (val (vector-ref hed (- colnum 1))) - (bgcolor (cond - ((member (conc status) '("" "-" "#")) - running-color) - - ((member (conc status) '("0" 0)) - white) - (else test-status-color))) - ; (else failcolor))) - (mtrx-rc (conc rownum ":" colnum))) - ;;(print "BB> status=>"status"< bgcolor="bgcolor) - (iup:attribute-set! steps-matrix mtrx-rc (if val (conc val) "")) - (if (< colnum 5) - (iup:attribute-set! steps-matrix (conc "BGCOLOR" mtrx-rc) bgcolor)) - (if (< colnum max-col) - (loop hed tal rownum (+ colnum 1)) - (if (not (null? tal)) - (loop (car tal) (cdr tal) (+ rownum 1) 1)))))) - (if (> max-row 0) - (begin - ;; we are going to speculatively clear rows until we find a row that is already cleared - (let loop ((rownum (+ max-row 1)) - (colnum 0) - (deleted #f)) - ;; (debug:print-info 0 *default-log-port* "cleaning " rownum ":" colnum) - (let* ((next-row (if (eq? colnum max-col) (+ rownum 1) rownum)) - (next-col (if (eq? colnum max-col) 1 (+ colnum 1))) - (mtrx-rc (conc rownum ":" colnum)) - (curr-val (iup:attribute steps-matrix mtrx-rc))) - ;; (debug:print-info 0 *default-log-port* "cleaning " rownum ":" colnum " currval= " curr-val) - (if (and (string? curr-val) - (not (equal? curr-val ""))) - (begin - (iup:attribute-set! steps-matrix mtrx-rc "") - (loop next-row next-col #t)) - (if (eq? colnum max-col) ;; not done, didn't get a full blank row - (if deleted (loop next-row next-col #f)) ;; exit on this not met - (loop next-row next-col deleted))))) - (iup:attribute-set! steps-matrix "REDRAW" "ALL"))))) - -;;====================================================================== -;; U T I L I T I E S -;;====================================================================== - -(define (dcommon:run-html-viewer lfilename) - (let ((htmlviewercmd (configf:lookup *configdat* "setup" "htmlviewercmd"))) - (if htmlviewercmd - (system (conc "(" htmlviewercmd " " lfilename " ) &")) - (iup:send-url lfilename)))) - Index: diff-report.scm ================================================================== --- diff-report.scm +++ diff-report.scm @@ -22,404 +22,5 @@ (include "common_records.scm") (use matchable) (use fmt) (use ducttape-lib) -(define css "") - -(define (diff:tests-mindat->hash tests-mindat) - (let* ((res (make-hash-table))) - (for-each - (lambda (item) - (let* ((test-name+item-path (cons (list-ref item 0) (list-ref item 1))) - (value (list-ref item 2))) - (hash-table-set! res test-name+item-path value))) - tests-mindat) - res)) - -;; return 1 if status1 is better -;; return 0 if status1 and 2 are equally good -;; return -1 if status2 is better -(define (diff:status-compare3 status1 status2) - (let* - ((status-goodness-ranking (list "PASS" "WARN" "WAIVED" "SKIP" "FAIL" "ABORT" #f)) - (mem1 (member status1 status-goodness-ranking)) - (mem2 (member status2 status-goodness-ranking)) - ) - (cond - ((and (not mem1) (not mem2)) 0) - ((not mem1) -1) - ((not mem2) 1) - ((= (length mem1) (length mem2)) 0) - ((> (length mem1) (length mem2)) 1) - (else -1)))) - - -(define (diff:xor-tests-mindat src-tests-mindat dest-tests-mindat #!key (hide-clean #f) (consistent-fail-not-clean #f)) - (let* ((src-hash (diff:tests-mindat->hash src-tests-mindat)) - (dest-hash (diff:tests-mindat->hash dest-tests-mindat)) - (all-keys - (reverse (sort - (delete-duplicates - (append (hash-table-keys src-hash) (hash-table-keys dest-hash))) - - (lambda (a b) - (cond - ((< 0 (string-compare3 (car a) (car b))) #t) - ((> 0 (string-compare3 (car a) (car b))) #f) - ((< 0 (string-compare3 (cdr a) (cdr b))) #t) - (else #f))) - - )))) - (let ((res - (map ;; TODO: rename xor to delta globally in dcommon and dashboard - (lambda (key) - (let* ((test-name (car key)) - (item-path (cdr key)) - - (dest-value (hash-table-ref/default dest-hash key (list 0 "NULL" "NULL"))) ;; (list test-id state status) - (dest-test-id (list-ref dest-value 0)) - (dest-state (list-ref dest-value 1)) - (dest-status (list-ref dest-value 2)) - - (src-value (hash-table-ref/default src-hash key (list 0 "NULL" "NULL"))) ;; (list test-id state status) - (src-test-id (list-ref src-value 0)) - (src-state (list-ref src-value 1)) - (src-status (list-ref src-value 2)) - - (incomplete-statuses '("DELETED" "INCOMPLETE" "STUCK/DEAD" "N/A")) ;; if any of these statuses apply, treat test as incomplete - - (dest-complete - (and dest-value dest-state dest-status - (equal? dest-state "COMPLETED") - (not (member dest-status incomplete-statuses)))) - (src-complete - (and src-value src-state src-status - (equal? src-state "COMPLETED") - (not (member src-status incomplete-statuses)))) - (status-compare-result (diff:status-compare3 src-status dest-status)) - (xor-new-item - (cond - ;; complete, for this case means: state=compelte AND status not in ( deleted uncomplete stuck/dead n/a ) - ;; neither complete -> bad - - ;; src !complete, dest complete -> better - ((and (not dest-complete) (not src-complete)) - (list dest-test-id "BOTH-BAD" "BOTH-INCOMPLETE") src-value dest-value) - ((not dest-complete) - (list src-test-id "NOT-IN-DEST" "DEST-INCOMPLETE") src-value dest-value) - ((not src-complete) - (list dest-test-id "NOT-IN-SRC" "SRC-INCOMPLETE") src-value dest-value) - ((and - (equal? src-state dest-state) - (equal? src-status dest-status)) - (if (and consistent-fail-not-clean (not (member dest-status '("PASS" "SKIP" "WAIVED" "WARN")))) - (list dest-test-id (conc "BOTH-BAD") (conc "CLEAN-" dest-status) src-value dest-value) - (list dest-test-id (conc "CLEAN") (conc "CLEAN-" dest-status) src-value dest-value))) - ;; better or worse: pass > warn > waived > skip > fail > abort - ;; pass > warn > waived > skip > fail > abort - - ((= 1 status-compare-result) ;; src is better, dest is worse - (list dest-test-id "WORSE" (conc src-status "->" dest-status) src-value dest-value)) - (else - (list dest-test-id "BETTER" (conc src-status "->" dest-status) src-value dest-value))))) - (list test-name item-path xor-new-item))) - all-keys))) - - (if hide-clean - (filter - (lambda (item) - (not - (equal? - "CLEAN" - (list-ref (list-ref item 2) 1)))) - res) - res)))) - -(define (diff:run-name->run-id run-name) - (if (number? run-name) - run-name - (let* ((qry-res (rmt:get-runs run-name 1 0 '()))) - (if (eq? 2 (vector-length qry-res)) - (vector-ref (car (vector-ref qry-res 1)) 1) - #f)))) - -(define (diff:target+run-name->run-id target run-name) - (let* ((keys (rmt:get-keys)) - (target-parts (if target (string-split target "/") (map (lambda (x) "%") keys)))) - (if (not (eq? (length keys) (length keys))) - (begin - (print "Error: Target ("target") item count does not match fields count target tokens="target-parts" fields="keys) - #f) - (let* ((target-map (zip keys target-parts)) - (qry-res (rmt:get-runs run-name 1 0 target-map))) - - (if (eq? 2 (vector-length qry-res)) - (let ((first-ent (vector-ref qry-res 1))) - (if (> (length first-ent) 0) - (vector-ref (car first-ent) 1) - #f)) - #f))))) - -(define (diff:run-id->tests-mindat run-id #!key (testpatt "%/%")) - (let* ((states '()) - (statuses '()) - (offset #f) - (limit #f) - (not-in #t) - (sort-by #f) - (sort-order #f) - (qryvals "id,testname,item_path,state,status") - (qryvals "id,testname,item_path,state,status") - (last-update 0) - (mode #f) - ) - (map - ;; (lambda (row) - ;; (match row - ;; ((#(id test-name item-path state status) - ;; (list test-name item-path (list id state status)))) - ;; (else #f))) - (lambda (row) - (let* ((id (vector-ref row 0)) - (test-name (vector-ref row 1)) - (item-path (vector-ref row 2)) - (state (vector-ref row 3)) - (status (vector-ref row 4))) - (list test-name item-path (list id state status)))) - - (rmt:get-tests-for-run run-id - testpatt states statuses - offset limit - not-in sort-by sort-order - qryvals - last-update - mode)))) - - -(define (diff:diff-runs src-run-id dest-run-id) - (let* ((src-tests-mindat (diff:run-id->tests-mindat src-run-id)) - (dest-tests-mindat (diff:run-id->tests-mindat dest-run-id))) - (diff:xor-tests-mindat src-tests-mindat dest-tests-mindat consistent-fail-not-clean: #t))) - - -(define (diff:rundiff-find-by-state run-diff state) - (filter - (lambda (x) - (equal? (list-ref (caddr x) 1) state)) - run-diff)) - -(define (diff:rundiff-clean-breakdown run-diff) - (map - (lambda (run-diff-item) - (match run-diff-item - ((test-name item-path (junk-id diff-state diff-status (src-test-id src-state src-status) (dest-test-id dest-state dest-status))) - (list test-name item-path "CLEAN" src-status)) - (else ""))) - (diff:rundiff-find-by-state run-diff "CLEAN"))) - -(define (diff:summarize-run-diff run-diff) - - (let* ((diff-states (list "CLEAN" "BETTER" "WORSE" "BOTH-BAD" "NOT-IN-DEST" "NOT-IN-SRC" ))) - (map - (lambda (state) - (list state - (length (diff:rundiff-find-by-state run-diff state)))) - diff-states))) - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; Presentation code below, business logic above ;; -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -(define (diff:stml->string in-stml) - (with-output-to-string - (lambda () - (s:output-new - (current-output-port) - in-stml)))) - -(define (diff:state-status->bgcolor state status) - (match (list state status) - (("CLEAN" _) "#88ff88") - (("BETTER" _) "#33ff33") - (("WORSE" _) "#ff3333") - (("BOTH-BAD" _) "#ff3333") - ((_ "WARN") "#ffff88") - ((_ "FAIL") "#ff8888") - ((_ "ABORT") "#ff0000") - ((_ "PASS") "#88ff88") - ((_ "SKIP") "#ffff00") - (else "#ffffff"))) - -(define (diff:test-state-status->diff-report-cell state status) - (s:td 'bgcolor (diff:state-status->bgcolor state status) status)) - -(define (diff:diff-state-status->diff-report-cell state status) - (s:td state 'bgcolor (diff:state-status->bgcolor state status))) - - -(define (diff:megatest-html-logo) - - "
-___  ___                 _            _
-|  \\/  | ___  __ _  __ _| |_ ___  ___| |_
-| |\\/| |/ _ \\/ _` |/ _` | __/ _ \\/ __| __|
-| |  | |  __/ (_| | (_| | ||  __/\\__ \\ |_
-|_|  |_|\\___|\\__, |\\__,_|\\__\\___||___/\\__|
-             |___/
-
") - -(define (diff:megatest-html-diff-logo) - "
-___  ___                 _            _
-|  \\/  | ___  __ _  __ _| |_ ___  ___| |_  |  _ \\(_)/ _|/ _|
-| |\\/| |/ _ \\/ _` |/ _` | __/ _ \\/ __| __| | | | | | |_| |_
-| |  | |  __/ (_| | (_| | ||  __/\\__ \\ |_  | |_| | |  _|  _|
-|_|  |_|\\___|\\__, |\\__,_|\\__\\___||___/\\__| |____/|_|_| |_|
-             |___/
-
") - - -(define (diff:run-id->target+run-name+starttime run-id) - (let* ((target (rmt:get-target run-id)) - (runinfo (rmt:get-run-info run-id)) ; vector of header (list) and result (vector) - (info-hash (alist->hash-table - (map (lambda (x) (cons (car x) (cadr x))) ; make it a useful hash - (zip (vector-ref runinfo 0) (vector->list (vector-ref runinfo 1)))))) - (run-name (hash-table-ref/default info-hash "runname" "N/A")) - (start-time (hash-table-ref/default info-hash "event_time" 0))) - (list target run-name start-time))) - -(define (diff:deliver-diff-report src-run-id dest-run-id - #!key - (html-output-file #f) - (email-subject-prefix "[MEGATEST DIFF]") - (email-recipients-list '()) ) - (let* ((src-info (diff:run-id->target+run-name+starttime src-run-id)) - (src-target (car src-info)) - (src-run-name (cadr src-info)) - (src-start (conc (seconds->string (caddr src-info)) " " (local-timezone-abbreviation))) - (dest-info (diff:run-id->target+run-name+starttime dest-run-id)) - (dest-target (car dest-info)) - (dest-run-name (cadr dest-info)) - (dest-start (conc (seconds->string (caddr dest-info)) " " (local-timezone-abbreviation))) - - - (run-diff (diff:diff-runs src-run-id dest-run-id )) - (test-count (length run-diff)) - (summary-table - (apply s:table 'cellspacing "0" 'border "1" - (s:tr - (s:th "Diff type") - (s:th "% share") - (s:th "Count")) - - (map - (lambda (state-count) - (s:tr - (diff:diff-state-status->diff-report-cell (car state-count) #f) - (s:td 'align "right" (fmt #f - (decimal-align 3 - (fix 2 - (num/fit 6 - (* 100 (/ (cadr state-count) test-count))))))) - (s:td 'align "right" (cadr state-count)))) - (diff:summarize-run-diff run-diff)))) - (meta-table - (s:table 'cellspacing "0" 'border "1" - - (s:tr - (s:td 'colspan "2" - (s:table 'cellspacing "0" 'border "1" - (s:tr - (s:th 'align "LEFT" "") (s:th "SOURCE RUN") (s:th "DESTINATION RUN")) - (s:tr - (s:th 'align "LEFT" "Started") (s:td src-start) (s:td dest-start)) - (s:tr - (s:th 'align "LEFT" "TARGET") (s:td src-target) (s:td dest-target)) - (s:tr - (s:th 'align "LEFT" "RUN NAME") (s:td src-run-name) (s:td dest-run-name))))))) - - (main-table - (apply s:table 'cellspacing "0" 'border "1" - (s:tr - (s:th "Test name") - (s:th "Item Path") - (s:th (conc "SOURCE")) - (s:th (conc "DEST")) - (s:th "Diff")) - (map - (lambda (run-diff-item) - (match run-diff-item - ((test-name item-path (junk-id diff-state diff-status (src-test-id src-state src-status) (dest-test-id dest-state dest-status))) - (s:tr - (s:td test-name) - (s:td item-path) - (diff:test-state-status->diff-report-cell src-state src-status) - (diff:test-state-status->diff-report-cell dest-state dest-status) - (diff:diff-state-status->diff-report-cell diff-state diff-status))) - (else ""))) - (filter (lambda (run-diff-item) - (match run-diff-item - ((test-name item-path (junk-id diff-state diff-status (src-test-id src-state src-status) (dest-test-id dest-state dest-status))) - (not (equal? diff-state "CLEAN"))) - (else #f))) - run-diff)))) - (email-subject (conc email-subject-prefix " " src-target "/" src-run-name" vs. "dest-target"/"dest-run-name)) - (html-body (diff:stml->string (s:body - (diff:megatest-html-diff-logo) - (s:h2 "Summary") - (s:table 'border "0" - (s:tr - (s:td "Diff calculated at") - (s:td (conc (seconds->string) " " (local-timezone-abbreviation)))) - (s:tr - (s:td "MT_RUN_AREA_HOME" ) (s:td *toppath*)) - (s:tr 'valign "TOP" - (s:td summary-table) - (s:td meta-table))) - (s:h2 "Diffs + consistently failing tests") - main-table))) - - ) - (if html-output-file - (with-output-to-file html-output-file (lambda () (print html-body)))) - (when (and email-recipients-list (> (length email-recipients-list) 0)) - (sendmail (string-join email-recipients-list ",") email-subject html-body use_html: #t)) - html-body)) - - - - - -;; (let* ((src-run-name "all57") -;; (dest-run-name "all60") -;; (src-run-id (diff:run-name->run-id src-run-name)) -;; (dest-run-id (diff:run-name->run-id dest-run-name)) -;; (to-list (list "bjbarcla"))) -;; (diff:deliver-diff-report src-run-id dest-run-id email-recipients-list: to-list html-output-file: "/tmp/bjbarcla/zippy.html") -;; ) - -(define (do-diff-report src-target src-runname dest-target dest-runname html-file to-list-raw) - (let* (;;(src-target "nope%") - ;;(src-runname "all57") - ;;(dest-target "%") - ;;(dest-runname "all60") - (src-run-id (diff:target+run-name->run-id src-target src-runname)) - (dest-run-id (diff:target+run-name->run-id dest-target dest-runname)) - ;(html-file "/tmp/bjbarcla/zippy.html") - (to-list (if (string? to-list-raw) (string-split to-list-raw ",:") #f)) - ) - - (cond - ((not src-run-id) - (print "No match for source target/runname="src-target"/"src-runname) - (print "Cannot proceed.") - #f) - ((not dest-run-id) - (print "No match for source target/runname="dest-target"/"dest-runname) - (print "Cannot proceed.") - #f) - (else - (diff:deliver-diff-report src-run-id dest-run-id email-recipients-list: to-list html-output-file: html-file))))) - - ADDED docs/code/Makefile Index: docs/code/Makefile ================================================================== --- /dev/null +++ docs/code/Makefile @@ -0,0 +1,3 @@ +module-hierarchy.pdf : module-hierarchy.dot + dot -Tpdf module-hierarchy.dot -o module-hierarchy.pdf + ADDED docs/code/module-hierarchy.dot Index: docs/code/module-hierarchy.dot ================================================================== --- /dev/null +++ docs/code/module-hierarchy.dot @@ -0,0 +1,84 @@ +// Copyright 2006-2017, Matthew Welland. +// +// This file is part of Megatest. +// +// Megatest is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// Megatest is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with Megatest. If not, see . +// +digraph megatest_code_hierarchy { + ranksep=0.05; + // rankdir=LR + + node [shape=box,style=filled]; + + "common" [label="common module"]; + + subgraph cluster_megatest { + label="megatest"; + "common" -> "api" -> "rmt"; + "common" -> "rmt"; + } + +// subgraph cluster_code_hierarchy { +// label="Packets"; +// "SPKTS" [ label = "Sensor Packets" ]; +// "run code-hierarchy"; +// "work code-hierarchy"; +// "user request code-hierarchy"; +// } +} + +// "SENSORS" [ label = "{ Sensor Processing | { file | git | fossil | script }}" +// shape = "record"; ]; +// +// "RUNS" [ label = "{ Runs Processing | { launch | clean | re-run | archive } | { dispatcher }}"; +// shape = "record"; ]; +// +// "WORK" [ label = "{ Work Items | { start task | task competed }}"; +// shape = "record"; ]; +// +// "USERREQ" [ label = "{ User Requests (Unix and Web) | { launch | clean | re-run | archive }}"; +// shape = "record"; ]; +// +// "MTAREA1" [ label = "{ Megatest Area 1 | { parallel job\nmanagement | test\nmanagement | data\nrollup }}"; +// shape = "record"; ]; +// +// "MTAREA2" [ label = "{ Megatest Area 2 | { parallel job\nmanagement | test\nmanagement | data\nrollup }}"; +// shape = "record"; ]; +// +// "MTAREA3" [ label = "More Megatest Areas ... "; +// shape = "record"; ]; +// +// "PGDB" [ label = "postgres database"; +// shape = "cylinder"; ]; +// +// "WEBAPP" [ label = "{ Web View | { Runs | Contours | Control | Time View }}"; +// shape = "record"; ]; +// +// // "WEBCTRL" [ label = "{ Web View \n(control) }"; +// // shape = "record"; ]; +// +// "SENSORS" -> "SPKTS"; +// "RUNS" -> "run code-hierarchy"; +// "run code-hierarchy" -> "RUNS"; +// "WORK" -> "work code-hierarchy"; +// "work code-hierarchy" -> "RUNS"; +// "USERREQ" -> "user request code-hierarchy"; +// "SPKTS" -> "RUNS"; +// "user request code-hierarchy" -> "RUNS"; +// "RUNS" -> "MTAREA1" -> "PGDB"; +// "RUNS" -> "MTAREA2" -> "PGDB"; +// "RUNS" -> "MTAREA3" -> "PGDB"; +// "PGDB" -> "WEBAPP"; +// // "WEBCTRL" -> "run code-hierarchy"; + Index: env.scm ================================================================== --- env.scm +++ env.scm @@ -20,233 +20,5 @@ (declare (unit env)) (use sql-de-lite) ;; srfi-1 posix regex regex-case srfi-69 srfi-18 call-with-environment-variables) -(define (env:open-db fname) - (let* ((db-exists (common:file-exists? fname)) - (db (open-database fname))) - (if (not db-exists) - (begin - (exec (sql db "CREATE TABLE envvars ( - id INTEGER PRIMARY KEY, - context TEXT NOT NULL, - var TEXT NOT NULL, - val TEXT NOT NULL, - CONSTRAINT envvars_constraint UNIQUE (context,var))")))) - (set-busy-handler! db (busy-timeout 10000)) - db)) - -;; save vars in given context, this is NOT incremental by default -;; -(define (env:save-env-vars db context #!key (incremental #f)(vardat #f)) - (with-transaction - db - (lambda () - ;; first clear out any vars for this context - (if (not incremental)(exec (sql db "DELETE FROM envvars WHERE context=?") context)) - (for-each - (lambda (varval) - (let ((var (car varval)) - (val (cdr varval))) - (if incremental (exec (sql db "DELETE FROM envvars WHERE context=? AND var=?") context var)) - (exec (sql db "INSERT INTO envvars (context,var,val) VALUES (?,?,?)") context var val))) - (if vardat - (hash-table->alist vardat) - (get-environment-variables)))))) - -;; merge contexts in the order given -;; - each context is applied in the given order -;; - variables in the paths list are split on the separator and the components -;; merged using simple delta addition -;; returns a hash of the merged vars -;; -(define (env:merge-contexts db basecontext contexts paths) - (let ((result (make-hash-table))) - (for-each - (lambda (context) - (query - (for-each-row - (lambda (row) - (let ((var (car row)) - (val (cadr row))) - (hash-table-set! result var - (if (and (hash-table-ref/default results var #f) - (assoc var paths)) ;; this var is a path and there is a previous path - (let ((sep (cadr (assoc var paths)))) - (env:merge-path-envvar sep (hash-table-ref results var) valb)) - valb))))) - (sql db "SELECT var,val FROM envvars WHERE context=?") - context)) - contexts) - result)) - -;; get list of removed variables between two contexts -;; -(define (env:get-removed db contexta contextb) - (let ((result (make-hash-table))) - (query - (for-each-row - (lambda (row) - (let ((var (car row)) - (val (cadr row))) - (hash-table-set! result var val)))) - (sql db "SELECT var,val FROM envvars WHERE context=? AND var NOT IN (SELECT var FROM envvars WHERE context=?)") - contexta contextb) - result)) - -;; get list of variables added to contextb from contexta -;; -(define (env:get-added db contexta contextb) - (let ((result (make-hash-table))) - (query - (for-each-row - (lambda (row) - (let ((var (car row)) - (val (cadr row))) - (hash-table-set! result var val)))) - (sql db "SELECT var,val FROM envvars WHERE context=? AND var NOT IN (SELECT var FROM envvars WHERE context=?)") - contextb contexta) - result)) - -;; get list of variables in both contexta and contexb that have been changed -;; -(define (env:get-changed db contexta contextb) - (let ((result (make-hash-table))) - (query - (for-each-row - (lambda (row) - (let ((var (car row)) - (val (cadr row))) - (hash-table-set! result var val)))) - (sql db "SELECT var,val FROM envvars AS a WHERE context=? AND val != (SELECT val FROM envvars WHERE var=a.var AND context=?)") - contextb contexta) - result)) - -;; -(define (env:blind-merge l1 l2) - (if (null? l1) l2 - (if (null? l2) l1 - (cons (car l1) (cons (car l2) (env:blind-merge (cdr l1) (cdr l2))))))) - -;; given a before and an after envvar calculate a new merged path -;; -(define (env:merge-path-envvar separator patha pathb) - (let* ((patha-parts (string-split patha separator)) - (pathb-parts (string-split pathb separator)) - (common-parts (lset-intersection equal? patha-parts pathb-parts)) - (final (delete-duplicates ;; env:blind-merge - (append pathb-parts common-parts patha-parts)))) -;; (print "BEFORE: " (string-intersperse patha-parts "\n ")) -;; (print "AFTER: " (string-intersperse pathb-parts "\n ")) -;; (print "COMMON: " (string-intersperse common-parts "\n ")) - (string-intersperse final separator))) - -(define (env:process-path-envvar varname separator patha pathb) - (let ((newpath (env:merge-path-envvar separator patha pathb))) - (setenv varname newpath))) - -(define (env:have-context db context) - (> (query fetch-value (sql db "SELECT count(id) FROM envvars WHERE context=?") context) - 0)) - -;; this is so the calling block does not need to import sql-de-lite -(define (env:close-database db) - (close-database db)) - -(define (env:lazy-hash-table->alist indat) - (if (hash-table? indat) - (let ((dat (hash-table->alist indat))) - (if (null? dat) - #f - dat)) - #f)) - -(define (env:inc-path path) - (print "PATH " - (conc "#{scheme (env:min-path \"" path "\" \"#{getenv PATH}\")}"))) -;; (conc -;; "#{scheme (string-intersperse " -;; "(delete-duplicates " -;; "(append (string-split \"" path "\" \":\") " -;; "(string-split \"#{getenv PATH}\" \":\")))" -;; " \":\")}"))) - -(define (env:min-path path1 path2) - (string-intersperse - (delete-duplicates - (append - (string-split path1 ":") - (string-split path2 ":"))) - ":")) - -;; inc path will set a PATH that is incrementally modified when read - config mode only -;; -(define (env:print added removed changed #!key (inc-path #t)) - (let ((a (env:lazy-hash-table->alist added)) - (r (env:lazy-hash-table->alist removed)) - (c (env:lazy-hash-table->alist changed))) - (case (if (args:get-arg "-dumpmode") - (string->symbol (args:get-arg "-dumpmode")) - 'bash) - ((bash) - (if a - (begin - (print "# Added vars") - (map (lambda (dat)(print "export " (car dat) "=" (cdr dat))) - (hash-table->alist added)))) - (if r - (begin - (print "# Removed vars") - (map (lambda (dat)(print "unset " (car dat))) - (hash-table->alist removed)))) - (if c - (begin - (print "# Changed vars") - (map (lambda (dat)(print "export " (car dat) "=" (cdr dat))) - (hash-table->alist changed))))) - ((csh) - (if a - (begin - (print "# Added vars") - (map (lambda (dat)(print "setenv " (car dat) " " (cdr dat))) - (hash-table->alist added)))) - (if r - (begin - (print "# Removed vars") - (map (lambda (dat)(print "unsetenv " (car dat))) - (hash-table->alist removed)))) - (if c - (begin - (print "# Changed vars") - (map (lambda (dat)(print "setenv " (car dat) " " (cdr dat))) - (hash-table->alist changed))))) - ((config ini) - (if a - (begin - (print "# Added vars") - (map (lambda (dat) - (let ((var (car dat)) - (val (cdr dat))) - (if (and inc-path - (equal? var "PATH")) - (env:inc-path val) - (print var " " val)))) - (hash-table->alist added)))) - (if r - (begin - (print "# Removed vars") - (map (lambda (dat)(print "#{scheme (unsetenv \"" (car dat) "\")}")) - (hash-table->alist removed)))) - (if c - (begin - (print "# Changed vars") - (map (lambda (dat) - (let ((var (car dat)) - (val (cdr dat))) - (if (and inc-path - (equal? var "PATH")) - (env:inc-path val) - (print var " " val)))) - (hash-table->alist changed))))) - (else - (debug:print-error 0 *default-log-port* "No dumpmode specified, use -dumpmode [bash|csh|config]"))))) Index: ezsteps.scm ================================================================== --- ezsteps.scm +++ ezsteps.scm @@ -39,162 +39,5 @@ ;;(rmt:get-test-info-by-id run-id test-id) -> testdat - -(define (ezsteps:run-from testdat start-step-name run-one) - ;;# TODO - recapture item variables, debug repeated step eval; regen logpro from test - (let* ((do-update-test-state-status #f) - (test-run-dir ;; (filedb:get-path *fdb* - (db:test-get-rundir testdat)) ;; ) - (testconfig (read-config (conc test-run-dir "/testconfig") #f #t environ-patt: "pre-launch-env-vars")) - (ezstepslst (hash-table-ref/default testconfig "ezsteps" '())) - (run-mutex (make-mutex)) - (rollup-status 0) - (rollup-status-string #f) - (rollup-status-sym #f) - (exit-info (vector #t #t #t)) - (test-id (db:test-get-id testdat)) - (run-id (db:test-get-run_id testdat)) - (test-name (db:test-get-testname testdat)) - (orig-test-state (db:test-get-state testdat)) - (orig-test-status (db:test-get-status testdat)) - (kill-job #f)) ;; for future use (on re-factoring with launch.scm code - - ;; keep trying till NFS deigns to populate test run dir on this host - (let loop ((count 5)) - (if (not (common:file-exists? test-run-dir)) - ;;(push-directory test-run-dir) - (if (> count 0) - (begin - (debug:print 0 *default-log-port* "WARNING: ezsteps attempting to run but test run directory " test-run-dir " is not there. Waiting and trying again " count " more times") - (sleep 3) - (loop (- count 1)))))) - - (debug:print-info 0 *default-log-port* "Running in directory " test-run-dir) - (if (not (common:file-exists? ".ezsteps"))(create-directory ".ezsteps")) - ;; if ezsteps was defined then we are sure to have at least one step but check anyway - - (if (not (> (length ezstepslst) 0)) - (message-window "ERROR: You can only re-run steps defined via ezsteps") - (begin - (let loop ((ezstep (car ezstepslst)) - (tal (cdr ezstepslst)) - (status-sym-so-far 'pass) - ;;(runflag #f) - (saw-start-step-name #f)) ;; flag used to skip steps when not starting at the beginning - (if (vector-ref exit-info 1) - (let* ((stepname (car ezstep)) ;; do stuff to run the step - (logpro-used (common:file-exists? (conc test-run-dir "/" stepname ".logpro"))) - (stepinfo (cadr ezstep)) - (stepparts (string-match (regexp "^(\\{([^\\}]*)\\}\\s*|)(.*)$") stepinfo)) - (stepparms (list-ref stepparts 2)) ;; for future use, {VAR=1,2,3}, run step for each - (stepcmd (list-ref stepparts 3)) - (script (conc "mt_ezstep '"test-run-dir"' '"stepname"' '"stepcmd"'")) ;; call the command using mt_ezstep - (saw-start-step-name-next (or saw-start-step-name (equal? stepname start-step-name))) - (proceed-with-this-step - (or (not start-step-name) - (equal? stepname start-step-name) - (and saw-start-step-name (not run-one)) - saw-start-step-name-next - (and start-step-name (equal? stepname start-step-name)))) - ) - (set! do-update-test-state-status (and proceed-with-this-step (null? tal))) - ;;(BB> "stepname="stepname" proceed-with-this-step="proceed-with-this-step " do-update-test-state-status="do-update-test-state-status " orig-test-state="orig-test-state" orig-test-status="orig-test-status) - (cond - ((and (not proceed-with-this-step) (null? tal)) - 'done) - ((not proceed-with-this-step) - (loop (car tal) - (cdr tal) - status-sym-so-far - saw-start-step-name-next)) - (else - (debug:print 4 *default-log-port* "ezsteps:\n stepname: " stepname " stepinfo: " stepinfo " stepparts: " stepparts - " stepparms: " stepparms " stepcmd: " stepcmd) - (debug:print 4 *default-log-port* "script: " script) - (rmt:teststep-set-status! run-id test-id stepname "start" "-" #f #f) - - ;; now launch the script - (let ((pid (process-run script))) - (let processloop ((i 0)) - (let-values (((pid-val exit-status exit-code)(process-wait pid #t))) - (mutex-lock! run-mutex) - (vector-set! exit-info 0 pid) - (vector-set! exit-info 1 exit-status) - (vector-set! exit-info 2 exit-code) - (mutex-unlock! run-mutex) - (if (eq? pid-val 0) - (begin - (thread-sleep! 1) - (processloop (+ i 1)))) - )) - (let ((exinfo (vector-ref exit-info 2)) - (logfna (if logpro-used (conc stepname ".html") ""))) - (rmt:teststep-set-status! run-id test-id stepname "end" exinfo #f logfna)) - - (if logpro-used - (rmt:test-set-log! run-id test-id (conc stepname ".html"))) - - ;; set the test final status - (let* ((this-step-status (cond - (logpro-used - (common:logpro-exit-code->status-sym (vector-ref exit-info 2))) - ((eq? (vector-ref exit-info 2) 0) - 'pass) - (else - 'fail))) - (overall-status-sym (common:worse-status-sym this-step-status status-sym-so-far)) - (overall-status-string (status-sym->string overall-status-sym))) - (debug:print 4 *default-log-port* "Exit value received: " (vector-ref exit-info 2) " logpro-used: " logpro-used - " this-step-status: " this-step-status " overall-status: " overall-status-sym) - ;;" next-status: " next-status " rollup-status: " rollup-status) - (set! rollup-status-string overall-status-string) - (set! rollup-status-sym overall-status-sym) - (tests:test-set-status! run-id test-id "RUNNING" overall-status-string #f #f))) - - (if (and - (not run-one) - (common:steps-can-proceed-given-status-sym rollup-status-sym) - (not (null? tal))) - (loop (car tal) - (cdr tal) - rollup-status-sym - saw-start-step-name-next))))) - (debug:print 4 *default-log-port* "WARNING: a prior step failed, stopping at " ezstep))) - - ;; Once done with step/steps update the test record - ;; - (let* ((item-path (db:test-get-item-path testdat)) ;; (item-list->path itemdat)) - (testinfo (rmt:get-testinfo-state-status run-id test-id))) ;; refresh the testdat, call it iteminfo in case need prev/curr - ;; Am I completed? - (if (equal? (db:test-get-state testinfo) "RUNNING") ;; (not (equal? (db:test-get-state testinfo) "COMPLETED")) - (let ((new-state (if kill-job "KILLED" "COMPLETED") ;; (if (eq? (vector-ref exit-info 2) 0) ;; exited with "good" status - ;; "COMPLETED" - ;; (db:test-get-state testinfo))) ;; else preseve the state as set within the test - ) - (new-status rollup-status-string) - ) ;; (db:test-get-status testinfo))) - (debug:print-info 2 *default-log-port* "Test NOT logged as COMPLETED, (state=" (db:test-get-state testinfo) "), updating result, rollup-status is " rollup-status) - (tests:test-set-status! run-id test-id - (if do-update-test-state-status new-state orig-test-state) - (if do-update-test-state-status new-status orig-test-status) - (args:get-arg "-m") #f) - ;; need to update the top test record if PASS or FAIL and this is a subtest - (if (and (not (equal? item-path "")) do-update-test-state-status) - (rmt:set-state-status-and-roll-up-items run-id test-name item-path new-state new-status #f)))) - ;; for automated creation of the rollup html file this is a good place... - (if (not (equal? item-path "")) - (tests:summarize-items run-id test-id test-name #f)) ;; don't force - just update if no - ))) - ;;(pop-directory) - rollup-status-string)) - -(define (ezsteps:spawn-run-from testdat start-step-name run-one) - (thread-start! - (make-thread - (lambda () - (ezsteps:run-from testdat start-step-name run-one)) - (conc "ezstep run single step " start-step-name " run-one="run-one))) - ) - Index: genexample.scm ================================================================== --- genexample.scm +++ genexample.scm @@ -19,324 +19,5 @@ ;;====================================================================== (declare (unit genexample)) (use posix regex) -(define genexample:example-logpro -#< 0 "Put description here" #/put pattern here/) - ;; - ;; You may need ignores to suppress false error or warning hits from the later expects - ;; NOTE: Order is important here! - (expect:ignore in "LogFileBody" < 99 "Ignore the word error in comments" #/^\/\/.*error/) - (expect:warning in "LogFileBody" = 0 "Any warning" #/warn/) - (expect:error in "LogFileBody" = 0 "Any error" (list #/ERROR/ #/error/)) ;; but disallow any other errors -EOF -) - -(define genexample:example-script -#<number (string-split color1))) - (c2 (map string->number (string-split color2))) - (delta (map (lambda (a b)(abs (- a b))) c1 c2))) - (null? (filter (lambda (x)(> x 3)) delta)))) - -(define gutils:colors - '((PASS . "70 249 73") - (FAIL . "253 33 49") - (SKIP . "230 230 0"))) - -(define (gutils:get-color-spec effective-state) - (or (alist-ref effective-state gutils:colors) - (alist-ref 'FAIL gutils:colors))) - -;; BBnote - state status dashboard button color / text defined here -(define (gutils:get-color-for-state-status state status);; #!key (get-label #f)) - ;; ((if get-label cadr car) - (case (string->symbol state) - ((COMPLETED) ;; ARCHIVED) - (case (string->symbol status) - ((PASS) (list "70 249 73" status)) - ((PREQ_FAIL PREQ_DISCARDED) (list "255 127 127" status)) - ((WARN WAIVED) (list "255 172 13" status)) - ((SKIP) (list (gutils:get-color-spec 'SKIP) status)) - ((ABORT) (list "198 36 166" status)) - (else (list "253 33 49" status)))) - ((ARCHIVED) - (case (string->symbol status) - ((PASS) (list "70 170 73" status)) - ((WARN WAIVED) (list "200 130 13" status)) - ((SKIP) (list (gutils:get-color-spec 'SKIP) status)) - (else (list "180 33 49" status)))) - ;; (if (equal? status "PASS") - ;; '("70 249 73" "PASS") - ;; (if (or (equal? status "WARN") - ;; (equal? status "WAIVED")) - ;; (list "255 172 13" status) - ;; (list "223 33 49" status)))) ;; greenish orangeish redish - ((LAUNCHED) (list "101 123 142" state)) - ((CHECK) (list "255 100 50" state)) - ((REMOTEHOSTSTART) (list "50 130 195" state)) - ((RUNNING STARTED) (list "9 131 232" state)) - ((KILLREQ) (list "39 82 206" state)) - ((KILLED) (list "234 101 17" state)) - ((NOT_STARTED) (case (string->symbol status) - ((CHECK STARTED)(list (gutils:get-color-spec 'SKIP) state)) - (else (list "240 240 240" state)))) - ;; for xor mode below - ;; - ((CLEAN) - (case (string->symbol status) - ((CLEAN-FAIL CLEAN-CHECK CLEAN-ABORT) (list "200 130 13" status)) ;; orange requested for these - (else (list "60 235 63" status)))) - ((DIRTY-BETTER) (list "160 255 153" status)) - ((DIRTY-WORSE) (list "165 42 42" status)) - ((BOTH-BAD) (list "180 33 49" status)) - - (else (list "192 192 192" state)))) - Index: http-transport.scm ================================================================== --- http-transport.scm +++ http-transport.scm @@ -21,14 +21,10 @@ (use srfi-1 posix regex regex-case srfi-69 hostinfo md5 message-digest posix-extras) (use spiffy uri-common intarweb http-client spiffy-request-vars intarweb spiffy-directory-listing) -;; Configurations for server -(tcp-buffer-size 2048) -(max-connections 2048) - (declare (unit http-transport)) (declare (uses common)) (declare (uses db)) (declare (uses tests)) @@ -43,638 +39,5 @@ (include "common_records.scm") (include "db_records.scm") (include "js-path.scm") (require-library stml) -(define (http-transport:make-server-url hostport) - (if (not hostport) - #f - (conc "http://" (car hostport) ":" (cadr hostport)))) - -(define *server-loop-heart-beat* (current-seconds)) - -;;====================================================================== -;; S E R V E R -;; ====================================================================== - -;; Call this to start the actual server -;; - -(define *db:process-queue-mutex* (make-mutex)) - -(define (http-transport:run hostn) - (debug:print 2 *default-log-port* "Attempting to start the server ...") - (let* ((db #f) ;; (open-db)) ;; we don't want the server to be opening and closing the db unnecesarily - (hostname (get-host-name)) - (ipaddrstr (let ((ipstr (if (string=? "-" hostn) - ;; (string-intersperse (map number->string (u8vector->list (hostname->ip hostname))) ".") - (server:get-best-guess-address hostname) - #f))) - (if ipstr ipstr hostn))) ;; hostname))) - (start-port (portlogger:open-run-close portlogger:find-port)) - (link-tree-path (common:get-linktree)) - (tmp-area (common:get-db-tmp-area *alldat*)) - (start-file (conc tmp-area "/.server-start"))) - (debug:print-info 0 *default-log-port* "portlogger recommended port: " start-port) - ;; set some parameters for the server - (root-path (if link-tree-path - link-tree-path - (current-directory))) ;; WARNING: SECURITY HOLE. FIX ASAP! - (handle-directory spiffy-directory-listing) - (handle-exception (lambda (exn chain) - (signal (make-composite-condition - (make-property-condition - 'server - 'message "server error"))))) - - ;; http-transport:handle-directory) ;; simple-directory-handler) - ;; Setup the web server and a /ctrl interface - ;; - (vhost-map `(((* any) . ,(lambda (continue) - ;; open the db on the first call - ;; This is were we set up the database connections - (let* (($ (request-vars source: 'both)) - (dat ($ 'dat)) - (res #f)) - (cond - ((equal? (uri-path (request-uri (current-request))) - '(/ "api")) - (send-response body: (api:process-request *dbstruct-db* $) ;; the $ is the request vars proc - headers: '((content-type text/plain))) - (mutex-lock! *heartbeat-mutex*) - (set! *db-last-access* (current-seconds)) - (mutex-unlock! *heartbeat-mutex*)) - ((equal? (uri-path (request-uri (current-request))) - '(/ "")) - (send-response body: (http-transport:main-page))) - ((equal? (uri-path (request-uri (current-request))) - '(/ "json_api")) - (send-response body: (http-transport:main-page))) - ((equal? (uri-path (request-uri (current-request))) - '(/ "runs")) - (send-response body: (http-transport:main-page))) - ((equal? (uri-path (request-uri (current-request))) - '(/ any)) - (send-response body: "hey there!\n" - headers: '((content-type text/plain)))) - ((equal? (uri-path (request-uri (current-request))) - '(/ "hey")) - (send-response body: "hey there!\n" - headers: '((content-type text/plain)))) - ((equal? (uri-path (request-uri (current-request))) - '(/ "jquery3.1.0.js")) - (send-response body: (http-transport:show-jquery) - headers: '((content-type application/javascript)))) - ((equal? (uri-path (request-uri (current-request))) - '(/ "test_log")) - (send-response body: (http-transport:html-test-log $) - headers: '((content-type text/HTML)))) - ((equal? (uri-path (request-uri (current-request))) - '(/ "dashboard")) - (send-response body: (http-transport:html-dboard $) - headers: '((content-type text/HTML)))) - (else (continue)))))))) - (with-output-to-file start-file (lambda ()(print (current-process-id)))) - (http-transport:try-start-server ipaddrstr start-port))) - -;; This is recursively run by http-transport:run until sucessful -;; -(define (http-transport:try-start-server ipaddrstr portnum) - (let ((config-hostname (configf:lookup *configdat* "server" "hostname")) - (config-use-proxy (equal? (configf:lookup *configdat* "client" "use-http_proxy") "yes"))) - (if (not config-use-proxy) - (determine-proxy (constantly #f))) - (debug:print-info 0 *default-log-port* "http-transport:try-start-server time=" (seconds->time-string (current-seconds)) " ipaddrsstr=" ipaddrstr " portnum=" portnum " config-hostname=" config-hostname) - (handle-exceptions - exn - (begin - (print-error-message exn) - (if (< portnum 64000) - (begin - (debug:print 0 *default-log-port* "WARNING: attempt to start server failed. Trying again ...") - (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) - (debug:print 5 *default-log-port* "exn=" (condition->list exn)) - (portlogger:open-run-close portlogger:set-failed portnum) - (debug:print 0 *default-log-port* "WARNING: failed to start on portnum: " portnum ", trying next port") - (thread-sleep! 0.1) - - ;; get_next_port goes here - (http-transport:try-start-server ipaddrstr - (portlogger:open-run-close portlogger:find-port))) - (begin - (print "ERROR: Tried and tried but could not start the server")))) - ;; any error in following steps will result in a retry - (set! *server-info* (list ipaddrstr portnum)) - (debug:print 0 *default-log-port* "INFO: Trying to start server on " ipaddrstr ":" portnum) - ;; This starts the spiffy server - ;; NEED WAY TO SET IP TO #f TO BIND ALL - ;; (start-server bind-address: ipaddrstr port: portnum) - (if config-hostname ;; this is a hint to bind directly - (start-server port: portnum bind-address: (if (equal? config-hostname "-") - ipaddrstr - config-hostname)) - (start-server port: portnum)) - (portlogger:open-run-close portlogger:set-port portnum "released") - (debug:print 1 *default-log-port* "INFO: server has been stopped")))) - -;;====================================================================== -;; S E R V E R U T I L I T I E S -;;====================================================================== - -;;====================================================================== -;; C L I E N T S -;;====================================================================== - -(define *http-mutex* (make-mutex)) - -;; NOTE: Large block of code from 32436b426188080f72fceb6894af541fbad9921e removed here -;; I'm pretty sure it is defunct. - -;; This next block all imported en-mass from the api branch -(define *http-requests-in-progress* 0) -(define *http-connections-next-cleanup* (current-seconds)) - -(define (http-transport:get-time-to-cleanup) - (let ((res #f)) - (mutex-lock! *http-mutex*) - (set! res (> (current-seconds) *http-connections-next-cleanup*)) - (mutex-unlock! *http-mutex*) - res)) - -(define (http-transport:inc-requests-count) - (mutex-lock! *http-mutex*) - (set! *http-requests-in-progress* (+ 1 *http-requests-in-progress*)) - ;; Use this opportunity to slow things down iff there are too many requests in flight - (if (> *http-requests-in-progress* 5) - (begin - (debug:print-info 0 *default-log-port* "Whoa there buddy, ease up...") - (thread-sleep! 1))) - (mutex-unlock! *http-mutex*)) - -(define (http-transport:dec-requests-count proc) - (mutex-lock! *http-mutex*) - (proc) - (set! *http-requests-in-progress* (- *http-requests-in-progress* 1)) - (mutex-unlock! *http-mutex*)) - -(define (http-transport:dec-requests-count-and-close-all-connections) - (set! *http-requests-in-progress* (- *http-requests-in-progress* 1)) - (let loop ((etime (+ (current-seconds) 5))) ;; give up in five seconds - (if (> *http-requests-in-progress* 0) - (if (> etime (current-seconds)) - (begin - (thread-sleep! 0.05) - (loop etime)) - (debug:print-error 0 *default-log-port* "requests still in progress after 5 seconds of waiting. I'm going to pass on cleaning up http connections")) - (close-all-connections!))) - (set! *http-connections-next-cleanup* (+ (current-seconds) 10)) - (mutex-unlock! *http-mutex*)) - -(define (http-transport:inc-requests-and-prep-to-close-all-connections) - (mutex-lock! *http-mutex*) - (set! *http-requests-in-progress* (+ 1 *http-requests-in-progress*))) - -;; Send "cmd" with json payload "params" to serverdat and receive result -;; -(define (http-transport:client-api-send-receive run-id serverdat cmd params #!key (numretries 3)(area-dat #f)) - (let* ((fullurl (if (vector? serverdat) - (http-transport:server-dat-get-api-req serverdat) - (begin - (debug:print 0 *default-log-port* "FATAL ERROR: http-transport:client-api-send-receive called with no server info") - (exit 1)))) - (res (vector #f "uninitialized")) - (success #t) - (sparams (db:obj->string params transport: 'http)) - (areadat (or area-dat *areadat*))) - (debug:print-info 11 *default-log-port* "fullurl=" fullurl ", cmd=" cmd ", params=" params ", run-id=" run-id "\n") - ;; set up the http-client here - (max-retry-attempts 1) - ;; consider all requests indempotent - (retry-request? (lambda (request) - #f)) - ;; send the data and get the response - ;; extract the needed info from the http data and - ;; process and return it. - (let* ((send-recieve (lambda () - (mutex-lock! *http-mutex*) - ;; (condition-case (with-input-from-request "http://localhost"; #f read-lines) - ;; ((exn http client-error) e (print e))) - (set! res (vector ;;; DON'T FORGET - THIS IS THE CLIENT SIDE! NOTE: consider moving this to client.scm since we are only supporting http transport at this time. - success - (db:string->obj - (handle-exceptions - exn - (let ((call-chain (get-call-chain)) - (msg ((condition-property-accessor 'exn 'message) exn))) - (set! success #f) - (if (debug:debug-mode 1) - (debug:print-info 0 *default-log-port* "couldn't talk to server, trying again ...") - (begin - (debug:print 0 *default-log-port* "WARNING: failure in with-input-from-request to " fullurl ".") - (debug:print 0 *default-log-port* " message: " msg) - (debug:print 0 *default-log-port* " cmd: " cmd " params: " params) - (debug:print 0 *default-log-port* " call-chain: " call-chain))) - (if areadat - (areadat-conndat-set! areadat #f)) - ;; Killing associated server to allow clean retry.") - ;; (tasks:kill-server-run-id run-id) ;; better to kill the server in the logic that called this routine? - (mutex-unlock! *http-mutex*) - ;;; (signal (make-composite-condition - ;;; (make-property-condition 'commfail 'message "failed to connect to server"))) - ;;; "communications failed" - (db:obj->string #f)) - (with-input-from-request ;; was dat - fullurl - (list (cons 'key (or *server-id* "thekey")) - (cons 'cmd cmd) - (cons 'params sparams)) - read-string)) - transport: 'http) - 0)) ;; added this speculatively - ;; Shouldn't this be a call to the managed call-all-connections stuff above? - (close-all-connections!) - (mutex-unlock! *http-mutex*) - )) - (time-out (lambda () - (thread-sleep! 45) - #f)) - (th1 (make-thread send-recieve "with-input-from-request")) - (th2 (make-thread time-out "time out"))) - (thread-start! th1) - (thread-start! th2) - (thread-join! th1) - (thread-terminate! th2) - (debug:print-info 11 *default-log-port* "got res=" res) - (if (vector? res) - (if (vector-ref res 0) ;; this is the first flag or the second flag? - res ;; this is the *inner* vector? seriously? why? - (if (debug:debug-mode 11) - (let ((call-chain (get-call-chain))) ;; note: this code also called in nmsg-transport - consider consolidating it - (print-call-chain (current-error-port)) - (debug:print-error 11 *default-log-port* "error above occured at server, res=" res " message: " ((condition-property-accessor 'exn 'message) exn)) - (debug:print 11 *default-log-port* " server call chain:") - (pp (vector-ref res 1) (current-error-port)) - (signal (vector-ref res 0))) - res)) - (signal (make-composite-condition - (make-property-condition - 'timeout - 'message "nmsg-transport:client-api-send-receive-raw timed out talking to server"))))))) - -;; careful closing of connections stored in *alldat* -;; -(define (http-transport:close-connections #!key (all-dat #f)) - (let* ((alldat (or all-dat *alldat*)) - (server-dat (if alldat - (alldat-conndat alldat) - #f))) ;; (hash-table-ref/default *areadat* run-id #f))) - (if (vector? server-dat) - (let ((api-dat (http-transport:server-dat-get-api-uri server-dat))) - (handle-exceptions - exn - (begin - (print-call-chain *default-log-port*) - (debug:print-error 0 *default-log-port* " closing connection failed with error: " ((condition-property-accessor 'exn 'message) exn))) - (close-connection! api-dat) - ;;(close-idle-connections!) - #t)) - #f))) - -;; http-transport:server-dat definition moved to common_records.scm - -;; -;; connect -;; -(define (http-transport:client-connect iface port) - (let* ((api-url (conc "http://" iface ":" port "/api")) - (api-uri (uri-reference (conc "http://" iface ":" port "/api"))) - (api-req (make-request method: 'POST uri: api-uri)) - (server-dat (vector iface port api-uri api-url api-req (current-seconds)))) - server-dat)) - -;; run http-transport:keep-running in a parallel thread to monitor that the db is being -;; used and to shutdown after sometime if it is not. -;; -(define (http-transport:keep-running) - ;; if none running or if > 20 seconds since - ;; server last used then start shutdown - ;; This thread waits for the server to come alive - (debug:print-info 0 *default-log-port* "Starting the sync-back, keep alive thread in server") - (let* ((tmp-area (common:get-db-tmp-area *alldat*)) - (started-file (conc tmp-area "/.server-started")) - (server-start-time (current-seconds)) - (server-info (let loop ((start-time (current-seconds)) - (changed #t) - (last-sdat "not this")) - (let ((sdat #f)) - (thread-sleep! 0.01) - (debug:print-info 0 *default-log-port* "Waiting for server alive signature") - (mutex-lock! *heartbeat-mutex*) - (set! sdat *server-info*) - (mutex-unlock! *heartbeat-mutex*) - (if (and sdat - (not changed) - (> (- (current-seconds) start-time) 2)) - (begin - (debug:print-info 0 *default-log-port* "Received server alive signature") - (common:save-pkt `((action . alive) - (T . server) - (pid . ,(current-process-id)) - (ipaddr . ,(car sdat)) - (port . ,(cadr sdat))) - *configdat* #t) - sdat) - (begin - (debug:print-info 0 *default-log-port* "Still waiting, last-sdat=" last-sdat) - (sleep 4) - (if (> (- (current-seconds) start-time) 120) ;; been waiting for two minutes - (begin - (debug:print-error 0 *default-log-port* "transport appears to have died, exiting server") - (common:save-pkt `((action . died) - (T . server) - (pid . ,(current-process-id)) - (ipaddr . ,(car sdat)) - (port . ,(cadr sdat)) - (msg . "Transport died?")) - *configdat* #t) - (exit)) - (loop start-time - (equal? sdat last-sdat) - sdat))))))) - (iface (car server-info)) - (port (cadr server-info)) - (last-access 0) - (server-timeout (server:expiration-timeout)) - (server-going #f) - (server-log-file (args:get-arg "-log"))) ;; always set when we are a server - - (with-output-to-file started-file (lambda ()(print (current-process-id)))) - - (let loop ((count 0) - (server-state 'available) - (bad-sync-count 0) - (start-time (current-milliseconds))) - ;; Use this opportunity to sync the tmp db to megatest.db - (if (not server-going) ;; *dbstruct-db* - (begin - (debug:print 0 *default-log-port* "SERVER: dbprep") - (set! *dbstruct-db* (db:setup #t)) ;; run-id)) - (set! server-going #t) - (debug:print 0 *default-log-port* "SERVER: running, megatest version: " (common:get-full-version)) ;; NOTE: the server is NOT yet marked as running in the log. We do that in the keep-running routine. - (thread-start! *watchdog*))) - - ;; when things go wrong we don't want to be doing the various queries too often - ;; so we strive to run this stuff only every four seconds or so. - (let* ((sync-time (- (current-milliseconds) start-time)) - (rem-time (quotient (- 4000 sync-time) 1000))) - (if (and (<= rem-time 4) - (> rem-time 0)) - (thread-sleep! rem-time))) - - (if (< count 1) ;; 3x3 = 9 secs aprox - (loop (+ count 1) 'running bad-sync-count (current-milliseconds))) - - ;; Check that iface and port have not changed (can happen if server port collides) - (mutex-lock! *heartbeat-mutex*) - (set! sdat *server-info*) - (mutex-unlock! *heartbeat-mutex*) - - (if (not (equal? sdat (list iface port))) - (let ((new-iface (car sdat)) - (new-port (cadr sdat))) - (debug:print-info 0 *default-log-port* "WARNING: interface changed, refreshing iface and port info") - (set! iface new-iface) - (set! port new-port) - (debug:print 0 *default-log-port* "SERVER STARTED: " iface ":" port " AT " (current-seconds)) - (flush-output *default-log-port*))) - - ;; Transfer *db-last-access* to last-access to use in checking that we are still alive - (mutex-lock! *heartbeat-mutex*) - (set! last-access *db-last-access*) - (mutex-unlock! *heartbeat-mutex*) - - (if (common:low-noise-print 120 (conc "server running on " iface ":" port)) - (begin - (debug:print 0 *default-log-port* "SERVER STARTED: " iface ":" port " AT " (current-seconds)) - (flush-output *default-log-port*))) - (if (common:low-noise-print 60 "dbstats") - (begin - (debug:print 0 *default-log-port* "Server stats:") - (db:print-current-query-stats))) - (let* ((hrs-since-start (/ (- (current-seconds) server-start-time) 3600))) - (cond - ((and *server-run* - (> (+ last-access server-timeout) - (current-seconds))) - (if (common:low-noise-print 120 "server continuing") - (debug:print-info 0 *default-log-port* "Server continuing, seconds since last db access: " (- (current-seconds) last-access)) - (let ((curr-time (current-seconds))) - (handle-exceptions - exn - (debug:print 0 *default-log-port* "ERROR: Failed to change timestamp on log file " server-log-file ". Are you out of space on that disk?") - (if (not *server-overloaded*) - (change-file-times server-log-file curr-time curr-time))))) - (loop 0 server-state bad-sync-count (current-milliseconds))) - (else - (debug:print-info 0 *default-log-port* "Server timed out. seconds since last db access: " (- (current-seconds) last-access)) - (http-transport:server-shutdown port))))))) - -(define (http-transport:server-shutdown port) - (begin - ;;(BB> "http-transport:server-shutdown called") - (debug:print-info 0 *default-log-port* "Starting to shutdown the server. pid="(current-process-id)) - ;; - ;; start_shutdown - ;; - (set! *time-to-exit* #t) ;; tell on-exit to be fast as we've already cleaned up - (portlogger:open-run-close portlogger:set-port port "released") - (thread-sleep! 1) - - ;; (debug:print-info 0 *default-log-port* "Max cached queries was " *max-cache-size*) - ;; (debug:print-info 0 *default-log-port* "Number of cached writes " *number-of-writes*) - ;; (debug:print-info 0 *default-log-port* "Average cached write time " - ;; (if (eq? *number-of-writes* 0) - ;; "n/a (no writes)" - ;; (/ *writes-total-delay* - ;; *number-of-writes*)) - ;; " ms") - ;; (debug:print-info 0 *default-log-port* "Number non-cached queries " *number-non-write-queries*) - ;; (debug:print-info 0 *default-log-port* "Average non-cached time " - ;; (if (eq? *number-non-write-queries* 0) - ;; "n/a (no queries)" - ;; (/ *total-non-write-delay* - ;; *number-non-write-queries*)) - ;; " ms") - - (db:print-current-query-stats) - (common:save-pkt `((action . exit) - (T . server) - (pid . ,(current-process-id))) - *configdat* #t) - (debug:print-info 0 *default-log-port* "Server shutdown complete. Exiting") - (exit))) - -;; all routes though here end in exit ... -;; -;; start_server? -;; -(define (http-transport:launch) - ;; check that a server start is in progress, pause or exit if so - (let* ((tmp-area (common:get-db-tmp-area *alldat*)) - (server-start (conc tmp-area "/.server-start")) - (server-started (conc tmp-area "/.server-started")) - (start-time (common:lazy-modification-time server-start)) - (started-time (common:lazy-modification-time server-started)) - (server-starting (< start-time started-time)) ;; if start-time is less than started-time then a server is still starting - (start-time-old (> (- (current-seconds) start-time) 5)) - (cleanup-proc (lambda (msg) - (let* ((serv-fname (conc "server-" (current-process-id) "-" (get-host-name) ".log")) - (full-serv-fname (conc *toppath* "/logs/" serv-fname)) - (new-serv-fname (conc *toppath* "/logs/" "defunct-" serv-fname))) - (debug:print 0 *default-log-port* msg) - (if (common:file-exists? full-serv-fname) - (system (conc "sleep 1;mv -f " full-serv-fname " " new-serv-fname)) - (debug:print 0 *default-log-port* "INFO: cannot move " full-serv-fname " to " new-serv-fname)) - (exit))))) - (if (and (not start-time-old) ;; last server start try was less than five seconds ago - (not server-starting)) - (begin - (cleanup-proc "NOT starting server, there is either a recently started server or a server in process of starting") - (exit))) - ;; lets not even bother to start if there are already three or more server files ready to go - (let* ((num-alive (server:get-num-alive (server:get-list *toppath*)))) - (if (> num-alive 3) - (begin - (cleanup-proc (conc "ERROR: Aborting server start because there are already " num-alive " possible servers either running or starting up")) - (exit)))) - (common:save-pkt `((action . start) - (T . server) - (pid . ,(current-process-id))) - *configdat* #t) - (let* ((th2 (make-thread (lambda () - (debug:print-info 0 *default-log-port* "Server run thread started") - (http-transport:run - (if (args:get-arg "-server") - (args:get-arg "-server") - "-") - )) "Server run")) - (th3 (make-thread (lambda () - (debug:print-info 0 *default-log-port* "Server monitor thread started") - (http-transport:keep-running) - "Keep running")))) - (thread-start! th2) - (thread-sleep! 0.25) ;; give the server time to settle before starting the keep-running monitor. - (thread-start! th3) - (set! *didsomething* #t) - (thread-join! th2) - (exit)))) - -;; (define (http-transport:server-signal-handler signum) -;; (signal-mask! signum) -;; (handle-exceptions -;; exn -;; (debug:print 0 *default-log-port* " ... exiting ...") -;; (let ((th1 (make-thread (lambda () -;; (thread-sleep! 1)) -;; "eat response")) -;; (th2 (make-thread (lambda () -;; (debug:print-error 0 *default-log-port* "Received ^C, attempting clean exit. Please be patient and wait a few seconds before hitting ^C again.") -;; (thread-sleep! 3) ;; give the flush three seconds to do it's stuff -;; (debug:print 0 *default-log-port* " Done.") -;; (exit 4)) -;; "exit on ^C timer"))) -;; (thread-start! th2) -;; (thread-start! th1) -;; (thread-join! th2)))) - -;;=============================================== -;; Java script -;;=============================================== -(define (http-transport:show-jquery) - (let* ((data (tests:readlines *java-script-lib*))) -(string-join data "\n"))) - - - -;;====================================================================== -;; web pages -;;====================================================================== - -(define (http-transport:html-test-log $) - (let* ((run-id ($ 'runid)) - (test-item ($ 'testname)) - (parts (string-split test-item ":")) - (test-name (car parts)) - - (item-name (if (equal? (length parts) 1) - "" - (cadr parts)))) - ;(print $) -(tests:get-test-log run-id test-name item-name))) - - -(define (http-transport:html-dboard $) - (let* ((page ($ 'page)) - (oup (open-output-string)) - (bdy "--------------------------") - - (ret (tests:dynamic-dboard page))) - (s:output-new oup ret) - (close-output-port oup) - - (set! bdy (get-output-string oup)) - (conc "

Dashboard

" bdy "

" ))) - -(define (http-transport:main-page) - (let ((linkpath (root-path))) - (conc "

" (pathname-strip-directory *toppath*) "

" - "" - "Run area: " *toppath* - "

Server Stats

" - (http-transport:stats-table) - "
" - (http-transport:runs linkpath) - "
" - (http-transport:run-stats) - "" - ))) - -(define (http-transport:stats-table) - (mutex-lock! *heartbeat-mutex*) - (let ((res - (conc "" - ;; "" - "" - "" - "" - ;; "" - "" - "
Max cached queries " *max-cache-size* "
Number of cached writes " *number-of-writes* "
Average cached write time " (if (eq? *number-of-writes* 0) - "n/a (no writes)" - (/ *writes-total-delay* - *number-of-writes*)) - " ms
Number non-cached queries " *number-non-write-queries* "
Average non-cached time " (if (eq? *number-non-write-queries* 0) - ;; "n/a (no queries)" - ;; (/ *total-non-write-delay* - ;; *number-non-write-queries*)) - " ms
Last access" (seconds->time-string *db-last-access*) "
"))) - (mutex-unlock! *heartbeat-mutex*) - res)) - -(define (http-transport:runs linkpath) - (conc "

Runs

" - (string-intersperse - (let ((files (map pathname-strip-directory (glob (conc linkpath "/*"))))) - (map (lambda (p) - (conc "" p "
")) - files)) - " "))) - -(define (http-transport:run-stats) - (let ((stats (open-run-close db:get-running-stats #f))) - (conc "" - (string-intersperse - (map (lambda (stat) - (conc "")) - stats) - " ") - "
" (car stat) "" (cadr stat) "
"))) Index: items.scm ================================================================== --- items.scm +++ items.scm @@ -26,133 +26,5 @@ (declare (uses commonmod)) (import commonmod) (include "common_records.scm") - -;; Puts out all combinations -(define (process-itemlist hierdepth curritemkey itemlist) - (let ((res '())) - (if (not hierdepth) - (set! hierdepth (length itemlist))) - (let loop ((hed (car itemlist)) - (tal (cdr itemlist))) - (if (null? tal) - (for-each (lambda (item) - (if (> (length curritemkey) (- hierdepth 2)) - (set! res (append res (list (append curritemkey (list (list (car hed) item)))))))) - (cadr hed)) - (begin - (for-each (lambda (item) - (set! res (append res (process-itemlist hierdepth (append curritemkey (list (list (car hed) item))) tal)))) - (cadr hed)) - (loop (car tal)(cdr tal))))) - res)) - -;; (item-assoc->item-list '(("ANIMAL" "Elephant Lion")("SEASON" "Spring Fall"))) -;; => ((("ANIMAL" "Elephant") ("SEASON" "Spring")) -;; (("ANIMAL" "Elephant") ("SEASON" "Fall")) -;; (("ANIMAL" "Lion") ("SEASON" "Spring")) -;; (("ANIMAL" "Lion") ("SEASON" "Fall"))) -(define (item-assoc->item-list itemsdat) - (if (and itemsdat (not (null? itemsdat))) - (let ((itemlst (filter (lambda (x) - (list? x)) - (map (lambda (x) - (debug:print 6 *default-log-port* "item-assoc->item-list x: " x) - (if (< (length x) 2) - (begin - (debug:print-error 0 *default-log-port* "malformed items spec " (string-intersperse x " ")) - (list (car x)'())) - (let* ((name (car x)) - (items (cadr x)) - (ilist (list name (if (string? items) - (string-split items) - '())))) - (if (null? ilist) - (debug:print-error 0 *default-log-port* "No items specified for " name)) - ilist))) - itemsdat)))) - (let ((debuglevel 5)) - (debug:print 5 *default-log-port* "item-assoc->item-list: itemsdat => itemlst ") - (if (debug:debug-mode 5) - (begin - (pp itemsdat) - (print " => ") - (pp itemlst)))) - (if (> (length itemlst) 0) - (process-itemlist #f '() itemlst) - '())) - '())) ;; return a list consisting on a single null list for non-item runs - ;; Nope, not now, return null as of 6/6/2011 - -;; (item-table->item-list '(("ANIMAL" "Elephant Lion")("SEASON" "Spring Winter"))) -;; => ((("ANIMAL" "Elephant")("SEASON" "Spring")) -;; (("ANIMAL" "Lion") ("SEASON" "Winter"))) -(define (item-table->item-list itemtable) - (let ((newlst (map (lambda (x) - (if (> (length x) 1) - (list (car x) - (string-split (cadr x))) - (list x '()))) - itemtable)) - (res '())) ;; a list of items - (let loop ((indx 0) - (item '()) ;; an item will be ((KEYNAME1 VAL1)(KEYNAME2 VAL2) ...) - (elflag #f)) - (for-each (lambda (row) - (let ((rowname (car row)) - (rowdat (cadr row))) - (set! item (append item - (list - (if (< indx (length rowdat)) - (let ((new (list rowname (list-ref rowdat indx)))) - ;; (debug:print 0 *default-log-port* "New: " new) - (set! elflag #t) - new - ) ;; i.e. had at least on legit value to use - (list rowname "-"))))))) - newlst) - (if elflag - (begin - (set! res (append res (list item))) - (loop (+ indx 1) - '() - #f))) - res))) - ;; Nope, not now, return null as of 6/6/2011 - -(define (items:check-valid-items class item) - (let ((valid-values (let ((s (config-lookup *configdat* "validvalues" class))) - (if s (string-split s) #f)))) - (if valid-values - (if (member item valid-values) - item #f) - item))) - -(define (items:get-items-from-config tconfig) - (let* ((have-items (hash-table-ref/default tconfig "items" #f)) - (have-itable (hash-table-ref/default tconfig "itemstable" #f)) - (items (hash-table-ref/default tconfig "items" '())) - (itemstable (hash-table-ref/default tconfig "itemstable" '()))) - (debug:print 5 *default-log-port* "items: " items " itemstable: " itemstable) - (set! items (map (lambda (item) - (if (procedure? (cadr item)) - (list (car item)((cadr item))) ;; evaluate the proc - item)) - items)) - (set! itemstable (map (lambda (item) - (if (procedure? (cadr item)) - (list (car item)((cadr item))) ;; evaluate the proc - item)) - itemstable)) - (if (and have-items (null? items)) (debug:print 0 *default-log-port* "WARNING:[items] section in testconfig but no entries defined")) - (if (and have-itable (null? itemstable))(debug:print 0 *default-log-port* "WARNNG:[itemstable] section in testconfig but no entries defined")) - (if (or (not (null? items))(not (null? itemstable))) - (append (item-assoc->item-list items) - (item-table->item-list itemstable)) - '(())))) - -;; (pp (item-assoc->item-list itemdat)) - - - Index: keys.scm ================================================================== --- keys.scm +++ keys.scm @@ -30,49 +30,10 @@ (import commonmod) (include "key_records.scm") (include "common_records.scm") - -(define (args:usage . a) #f) - -;;====================================================================== -;; key <=> target routines -;;====================================================================== - -;; This invalidates using "/" in item names. Every key will be -;; available via args:get-arg as :keyfield. Since this only needs to -;; be called once let's use it to set the environment vars -;; -;; The setting of :keyfield in args should be turned off ASAP -;; -(define (keys:target-set-args keys target ht) - (if target - (let ((vals (string-split target "/"))) - (if (eq? (length vals)(length keys)) - (for-each (lambda (key val) - (setenv key val) - (if ht (hash-table-set! ht (conc ":" key) val))) - keys - vals) - (debug:print-error 0 *default-log-port* "wrong number of values in " target ", should match " keys)) - vals) - (debug:print 4 *default-log-port* "ERROR: keys:target-set-args called with no target."))) - -;; given the keys (a list of vectors or a list of keys) and a target return a keyval list -;; keyval list ( (key1 val1) (key2 val2) ...) -(define (keys:target->keyval keys target) - (let* ((targlist (string-split target "/")) - (numkeys (length keys)) - (numtarg (length targlist)) - (targtweaked (if (> numkeys numtarg) - (append targlist (make-list (- numkeys numtarg) "")) - targlist))) - (map (lambda (key targ) - (list key targ)) - keys targtweaked))) - ;;====================================================================== ;; config file related routines ;;====================================================================== ;; (define keys:config-get-fields common:get-fields) Index: keysmod.scm ================================================================== --- keysmod.scm +++ keysmod.scm @@ -39,10 +39,7 @@ (string-join (map (lambda (field)(conc (car field) " " (cadr field))) fields) ","))) -(define (keys->keystr keys) ;; => key1,key2,key3,additiona1, ... - (string-intersperse keys ",")) - ) Index: launch.scm ================================================================== --- launch.scm +++ launch.scm @@ -38,1718 +38,5 @@ (include "common_records.scm") (include "key_records.scm") (include "db_records.scm") -;;====================================================================== -;; ezsteps -;;====================================================================== - -;; ezsteps were going to be coded as -;; stepname[,predstep1,predstep2 ...] [{VAR1=first,second,third}] command to execute -;; BUT -;; now are -;; stepname {VAR=first,second,third ...} command ... -;; where the {VAR=first,second,third ...} is optional. - -;; given an exit code and whether or not logpro was used calculate OK/BAD -;; return #t if we are ok, #f otherwise -(define (steprun-good? logpro exitcode) - (or (eq? exitcode 0) - (and logpro (eq? exitcode 2)))) - -;; if handed a string, process it, else look for MT_CMDINFO -(define (launch:get-cmdinfo-assoc-list #!key (encoded-cmd #f)) - (let ((enccmd (if encoded-cmd encoded-cmd (getenv "MT_CMDINFO")))) - (if enccmd - (common:read-encoded-string enccmd) - '()))) - -;; 0 1 2 3 -(defstruct launch:einf (pid #t)(exit-status #t)(exit-code #t)(rollup-status 0)) - -;; return (conc status ": " comment) from the final section so that -;; the comment can be set in the step record in launch.scm -;; -(define (launch:load-logpro-dat run-id test-id stepname) - (let ((cname (conc stepname ".dat"))) - (if (common:file-exists? cname) - (let* ((dat (read-config cname #f #f)) - (csvr (db:logpro-dat->csv dat stepname)) - (csvt (let-values (((fmt-cell fmt-record fmt-csv) (make-format ","))) - (fmt-csv (map list->csv-record csvr)))) - (status (configf:lookup dat "final" "exit-status")) - (msg (configf:lookup dat "final" "message"))) - (if csvt ;; this if blocked stack dump caused by .dat file from logpro being 0-byte. fixed by upgrading logpro - (rmt:csv->test-data run-id test-id csvt) - (debug:print 0 *default-log-port* "ERROR: no csvdat exists for run-id: " run-id " test-id: " test-id " stepname: " stepname ", check that logpro version is 1.15 or newer")) - ;; (debug:print-info 13 *default-log-port* "Error: run-id/test-id/stepname="run-id"/"test-id"/"stepname" => bad csvr="csvr) - ;; ) - (cond - ((equal? status "PASS") "PASS") ;; skip the message part if status is pass - (status (conc (configf:lookup dat "final" "exit-status") ": " (if msg msg "no message"))) - (else #f))) - #f))) - -(define (launch:runstep ezstep run-id test-id exit-info m tal testconfig) ;;; TODO: deprecate me in favor of ezsteps.scm - (let* ((stepname (car ezstep)) ;; do stuff to run the step - (stepinfo (cadr ezstep)) - ;; (let ((info (cadr ezstep))) - ;; (if (proc? info) "" info))) - ;; (stepproc (let ((info (cadr ezstep))) - ;; (if (proc? info) info #f))) - (stepparts (string-match (regexp "^(\\{([^\\}\\{]*)\\}\\s*|)(.*)$") stepinfo)) - (stepparams (list-ref stepparts 2)) ;; for future use, {VAR=1,2,3}, run step for each - (paramparts (if (string? stepparams) - (map (lambda (x)(string-split x "=")) (string-split-fields "[^;]*=[^;]*" stepparams)) - '())) - (subrun (alist-ref "subrun" paramparts equal?)) - (stepcmd (list-ref stepparts 3)) - (script "") ; "#!/bin/bash\n") ;; yep, we depend on bin/bash FIXME!!!\ - (logpro-file (conc stepname ".logpro")) - (html-file (conc stepname ".html")) - (dat-file (conc stepname ".dat")) - (tconfig-logpro (configf:lookup testconfig "logpro" stepname)) - (logpro-used (common:file-exists? logpro-file))) - - (debug:print 0 *default-log-port* "stepparts: " stepparts ", stepparams: " stepparams - ", paramparts: " paramparts ", subrun: " subrun ", stepcmd: " stepcmd) - - (if (and tconfig-logpro - (not logpro-used)) ;; no logpro file found but have a defn in the testconfig - (begin - (with-output-to-file logpro-file - (lambda () - (print ";; logpro file extracted from testconfig\n" - ";;") - (print tconfig-logpro))) - (set! logpro-used #t))) - - ;; NB// can safely assume we are in test-area directory - (debug:print 4 *default-log-port* "ezsteps:\n stepname: " stepname " stepinfo: " stepinfo " stepparts: " stepparts - " stepparams: " stepparams " stepcmd: " stepcmd) - - ;; ;; first source the previous environment - ;; (let ((prev-env (conc ".ezsteps/" prevstep (if (string-search (regexp "csh") - ;; (get-environment-variable "SHELL")) ".csh" ".sh")))) - ;; (if (and prevstep (common:file-exists? prev-env)) - ;; (set! script (conc script "source " prev-env)))) - - ;; call the command using mt_ezstep - ;; (set! script (conc "mt_ezstep " stepname " " (if prevstep prevstep "x") " " stepcmd)) - - (debug:print 4 *default-log-port* "script: " script) - (rmt:teststep-set-status! run-id test-id stepname "start" "-" #f #f) - ;; now launch the actual process - (call-with-environment-variables - (list (cons "PATH" (conc (get-environment-variable "PATH") ":."))) - (lambda () ;; (process-run "/bin/bash" "-c" "exec ls -l /tmp/foobar > /tmp/delme-more.log 2>&1") - (let* ((cmd (conc stepcmd " > " stepname ".log 2>&1")) ;; >outfile 2>&1 - (pid #f)) - (let ((proc (lambda () - (set! pid (process-run "/bin/bash" (list "-c" cmd)))))) - (if subrun - (begin - (debug:print-info 0 *default-log-port* "Running without MT_.* environment variables.") - (common:without-vars proc "^MT_.*")) - (proc))) - - (with-output-to-file "Makefile.ezsteps" - (lambda () - (print stepname ".log :") - (print "\t" cmd) - (if (common:file-exists? (conc stepname ".logpro")) - (print "\tlogpro " stepname ".logpro " stepname ".html < " stepname ".log")) - (print) - (print stepname " : " stepname ".log") - (print)) - #:append) - - (rmt:test-set-top-process-pid run-id test-id pid) - (let processloop ((i 0)) - (let-values (((pid-val exit-status exit-code)(process-wait pid #t))) - (mutex-lock! m) - (launch:einf-pid-set! exit-info pid) ;; (vector-set! exit-info 0 pid) - (launch:einf-exit-status-set! exit-info exit-status) ;; (vector-set! exit-info 1 exit-status) - (launch:einf-exit-code-set! exit-info exit-code) ;; (vector-set! exit-info 2 exit-code) - (mutex-unlock! m) - (if (eq? pid-val 0) - (begin - (thread-sleep! 2) - (processloop (+ i 1)))) - ))))) - (debug:print-info 0 *default-log-port* "step " stepname " completed with exit code " (launch:einf-exit-code exit-info)) ;; (vector-ref exit-info 2)) - ;; now run logpro if needed - (if logpro-used - (let* ((logpro-exe (or (getenv "LOGPRO_EXE") "logpro")) - (pid (process-run (conc "/bin/sh -c '"logpro-exe" "logpro-file " " (conc stepname ".html") " < " stepname ".log > /dev/null'")))) - (let processloop ((i 0)) - (let-values (((pid-val exit-status exit-code)(process-wait pid #t))) - (mutex-lock! m) - ;; (make-launch:einf pid: pid exit-status: exit-status exit-code: exit-code) - (launch:einf-pid-set! exit-info pid) ;; (vector-set! exit-info 0 pid) - (launch:einf-exit-status-set! exit-info exit-status) ;; (vector-set! exit-info 1 exit-status) - (launch:einf-exit-code-set! exit-info exit-code) ;; (vector-set! exit-info 2 exit-code) - (mutex-unlock! m) - (if (eq? pid-val 0) - (begin - (thread-sleep! 2) - (processloop (+ i 1))))) - (debug:print-info 0 *default-log-port* "logpro for step " stepname " exited with code " (launch:einf-exit-code exit-info))))) ;; (vector-ref exit-info 2))))) - - (let ((exinfo (launch:einf-exit-code exit-info)) ;; (vector-ref exit-info 2)) - (logfna (if logpro-used (conc stepname ".html") "")) - (comment #f)) - (if logpro-used - (let ((datfile (conc stepname ".dat"))) - ;; load the .dat file into the test_data table if it exists - (if (common:file-exists? datfile) - (set! comment (launch:load-logpro-dat run-id test-id stepname))) - (rmt:test-set-log! run-id test-id (conc stepname ".html")))) - (rmt:teststep-set-status! run-id test-id stepname "end" exinfo comment logfna)) - ;; set the test final status - (let* ((process-exit-status (launch:einf-exit-code exit-info)) ;; (vector-ref exit-info 2)) - (this-step-status (cond - ((and (eq? process-exit-status 2) logpro-used) 'warn) ;; logpro 2 = warnings - ((and (eq? process-exit-status 3) logpro-used) 'check) ;; logpro 3 = check - ((and (eq? process-exit-status 4) logpro-used) 'waived) ;; logpro 4 = waived - ((and (eq? process-exit-status 5) logpro-used) 'abort) ;; logpro 5 = abort - ((and (eq? process-exit-status 6) logpro-used) 'skip) ;; logpro 6 = skip - ((eq? process-exit-status 0) 'pass) ;; logpro 0 = pass - (else 'fail))) - (overall-status (cond - ((eq? (launch:einf-rollup-status exit-info) 2) 'warn) ;; rollup-status (vector-ref exit-info 3) - ((eq? (launch:einf-rollup-status exit-info) 0) 'pass) ;; (vector-ref exit-info 3) - (else 'fail))) - (next-status (cond - ((eq? overall-status 'pass) this-step-status) - ((eq? overall-status 'warn) - (if (eq? this-step-status 'fail) 'fail 'warn)) - ((eq? overall-status 'abort) 'abort) - (else 'fail))) - (next-state ;; "RUNNING") ;; WHY WAS THIS CHANGED TO NOT USE (null? tal) ?? - (cond - ((null? tal) ;; more to run? - "COMPLETED") - (else "RUNNING")))) - (debug:print 4 *default-log-port* "Exit value received: " (launch:einf-exit-code exit-info) " logpro-used: " logpro-used - " this-step-status: " this-step-status " overall-status: " overall-status - " next-status: " next-status " rollup-status: " (launch:einf-rollup-status exit-info)) ;; (vector-ref exit-info 3)) - (case next-status - ((warn) - (launch:einf-rollup-status-set! exit-info 2) ;; (vector-set! exit-info 3 2) ;; rollup-status - ;; NB// test-set-status! does rdb calls under the hood - (tests:test-set-status! run-id test-id next-state "WARN" - (if (eq? this-step-status 'warn) "Logpro warning found" #f) - #f)) - ((check) - (launch:einf-rollup-status-set! exit-info 3) ;; (vector-set! exit-info 3 3) ;; rollup-status - ;; NB// test-set-status! does rdb calls under the hood - (tests:test-set-status! run-id test-id next-state "CHECK" - (if (eq? this-step-status 'check) "Logpro check found" #f) - #f)) - ((waived) - (launch:einf-rollup-status-set! exit-info 4) ;; (vector-set! exit-info 3 3) ;; rollup-status - ;; NB// test-set-status! does rdb calls under the hood - (tests:test-set-status! run-id test-id next-state "WAIVED" - (if (eq? this-step-status 'check) "Logpro waived found" #f) - #f)) - ((abort) - (launch:einf-rollup-status-set! exit-info 5) ;; (vector-set! exit-info 3 4) ;; rollup-status - ;; NB// test-set-status! does rdb calls under the hood - (tests:test-set-status! run-id test-id next-state "ABORT" - (if (eq? this-step-status 'abort) "Logpro abort found" #f) - #f)) - ((skip) - (launch:einf-rollup-status-set! exit-info 6) ;; (vector-set! exit-info 3 4) ;; rollup-status - ;; NB// test-set-status! does rdb calls under the hood - (tests:test-set-status! run-id test-id next-state "SKIP" - (if (eq? this-step-status 'skip) "Logpro skip found" #f) - #f)) - ((pass) - (tests:test-set-status! run-id test-id next-state "PASS" #f #f)) - (else ;; 'fail - (launch:einf-rollup-status-set! exit-info 1) ;; (vector-set! exit-info 3 1) ;; force fail, this used to be next-state but that doesn't make sense. should always be "COMPLETED" - (tests:test-set-status! run-id test-id "COMPLETED" "FAIL" (conc "Failed at step " stepname) #f) - ))) - logpro-used)) - -(define (launch:manage-steps run-id test-id item-path fullrunscript ezsteps subrun test-name tconfigreg exit-info m) - ;; (let-values - ;; (((pid exit-status exit-code) - ;; (run-n-wait fullrunscript))) - ;; (tests:test-set-status! test-id "RUNNING" "n/a" #f #f) - ;; Since we should have a clean slate at this time there is no need to do - ;; any of the other stuff that tests:test-set-status! does. Let's just - ;; force RUNNING/n/a - - ;; (thread-sleep! 0.3) - ;; (tests:test-force-state-status! run-id test-id "RUNNING" "n/a") - (rmt:set-state-status-and-roll-up-items run-id test-name item-path "RUNNING" #f #f) - ;; (thread-sleep! 0.3) ;; NFS slowness has caused grief here - - ;; if there is a runscript do it first - (if fullrunscript - (let ((pid (process-run fullrunscript))) - (rmt:test-set-top-process-pid run-id test-id pid) - (let loop ((i 0)) - (let-values - (((pid-val exit-status exit-code) (process-wait pid #t))) - (mutex-lock! m) - (launch:einf-pid-set! exit-info pid) ;; (vector-set! exit-info 0 pid) - (launch:einf-exit-status-set! exit-info exit-status) ;; (vector-set! exit-info 1 exit-status) - (launch:einf-exit-code-set! exit-info exit-code) ;; (vector-set! exit-info 2 exit-code) - (launch:einf-rollup-status-set! exit-info exit-code) ;; (vector-set! exit-info 3 exit-code) ;; rollup status - (mutex-unlock! m) - (if (eq? pid-val 0) - (begin - (thread-sleep! 2) - (loop (+ i 1))) - ))))) - ;; then, if runscript ran ok (or did not get called) - ;; do all the ezsteps (if any) - (if (or ezsteps subrun) - (let* ((test-run-dir (tests:get-test-path-from-environment)) - (testconfig ;; (read-config (conc work-area "/testconfig") #f #t environ-patt: "pre-launch-env-vars")) ;; FIXME??? is allow-system ok here? - ;; NOTE: it is tempting to turn off force-create of testconfig but dynamic - ;; ezstep names need a full re-eval here. - (tests:get-testconfig test-name item-path tconfigreg #t force-create: #t)) ;; 'return-procs))) - (ezstepslst (if (hash-table? testconfig) - (hash-table-ref/default testconfig "ezsteps" '()) - #f))) - (if testconfig - (hash-table-set! *testconfigs* test-name testconfig) ;; cached for lazy reads later ... - (begin - (launch:setup) - (debug:print 0 *default-log-port* "WARNING: no testconfig found for " test-name " in search path:\n " - (string-intersperse (tests:get-tests-search-path *configdat*) "\n ")))) - ;; after all that, still no testconfig? Time to abort - (if (not testconfig) - (begin - (debug:print-error 0 *default-log-port* "Failed to resolve megatest.config, runconfigs.config and testconfig issues. Giving up now") - (exit 1))) - - ;; create a proc for the subrun if requested, save that proc in the ezsteps table as the last entry - ;; 1. get section [runarun] - ;; 2. unset MT_* vars - ;; 3. fix target - ;; 4. fix runname - ;; 5. fix testpatt or calculate it from contour - ;; 6. launch the run - ;; 7. roll up the run result and or roll up the logpro processed result - (when (configf:lookup testconfig "subrun" "runwait") ;; we use runwait as the flag that a subrun is requested - (subrun:initialize-toprun-test testconfig test-run-dir) - (let* ((mt-cmd (subrun:launch-cmd test-run-dir))) - (debug:print-info 0 *default-log-port* "Subrun command is \"" mt-cmd "\"") - (set! ezsteps #t) ;; set the needed flag - (set! ezstepslst - (append (or ezstepslst '()) - (list (list "subrun" (conc "{subrun=true} " mt-cmd))))))) - - ;; process the ezsteps - (if ezsteps - (begin - (if (not (common:file-exists? ".ezsteps"))(create-directory ".ezsteps")) - ;; if ezsteps was defined then we are sure to have at least one step but check anyway - (if (not (> (length ezstepslst) 0)) - (debug:print-error 0 *default-log-port* "ezsteps defined but ezstepslst is zero length") - (let loop ((ezstep (car ezstepslst)) - (tal (cdr ezstepslst)) - (prevstep #f)) - (debug:print-info 0 *default-log-port* "Processing ezstep \"" (string-intersperse ezstep " ") "\"") - ;; check exit-info (vector-ref exit-info 1) - (if (launch:einf-exit-status exit-info) ;; (vector-ref exit-info 1) - (let ((logpro-used (launch:runstep ezstep run-id test-id exit-info m tal testconfig)) - (stepname (car ezstep))) - ;; if logpro-used read in the stepname.dat file - (if (and logpro-used (common:file-exists? (conc stepname ".dat"))) - (launch:load-logpro-dat run-id test-id stepname)) - (if (steprun-good? logpro-used (launch:einf-exit-code exit-info)) - (if (not (null? tal)) - (loop (car tal) (cdr tal) stepname)) - (debug:print 0 *default-log-port* "WARNING: step " (car ezstep) " failed. Stopping"))) - (debug:print 0 *default-log-port* "WARNING: a prior step failed, stopping at " ezstep))))))))) - -(define (launch:monitor-job run-id test-id item-path fullrunscript ezsteps test-name tconfigreg exit-info m work-area runtlim misc-flags) - (let* ((update-period (string->number (or (configf:lookup *configdat* "setup" "test-stats-update-period") "30"))) - (start-seconds (current-seconds)) - (calc-minutes (lambda () - (inexact->exact - (round - (- - (current-seconds) - start-seconds))))) - (kill-tries 0)) - ;; (tests:set-full-meta-info #f test-id run-id (calc-minutes) work-area) - ;; (tests:set-full-meta-info test-id run-id (calc-minutes) work-area) - (tests:set-full-meta-info #f test-id run-id (calc-minutes) work-area 10) - - (let loop ((minutes (calc-minutes)) - (cpu-load (alist-ref 'adj-core-load (common:get-normalized-cpu-load #f))) - (disk-free (get-df (current-directory))) - (last-sync (current-seconds))) - (common:telemetry-log "zombie" (conc "launch:monitor-job - top of loop encountered at "(current-seconds)" with last-sync="last-sync)) - (let* ((over-time (> (current-seconds) (+ last-sync update-period))) - (new-cpu-load (let* ((load (alist-ref 'adj-core-load (common:get-normalized-cpu-load #f))) - (delta (abs (- load cpu-load)))) - (if (> delta 0.1) ;; don't bother updating with small changes - load - #f))) - (new-disk-free (let* ((df (if over-time ;; only get df every 30 seconds - (get-df (current-directory)) - disk-free)) - (delta (abs (- df disk-free)))) - (if (and (> df 0) - (> (/ delta df) 0.1)) ;; (> delta 200) ;; ignore changes under 200 Meg - df - #f))) - (do-sync (or new-cpu-load new-disk-free over-time)) - - (test-info (rmt:get-test-info-by-id run-id test-id)) - (state (db:test-get-state test-info)) - (status (db:test-get-status test-info)) - (kill-reason "no kill reason specified") - (kill-job? #f)) - (common:telemetry-log "zombie" (conc "launch:monitor-job - decision time encountered at "(current-seconds)" with last-sync="last-sync" do-sync="do-sync" over-time="over-time" update-period="update-period)) - (cond - ((test-get-kill-request run-id test-id) - (set! kill-reason "KILLING TEST since received kill request (KILLREQ)") - (set! kill-job? #t)) - ((and runtlim (> (- (current-seconds) start-seconds) runtlim)) - (set! kill-reason (conc "KILLING TEST DUE TO TIME LIMIT EXCEEDED! Runtime=" (- (current-seconds) start-seconds) " seconds, limit=" runtlim)) - (set! kill-job? #t)) - ((equal? status "DEAD") - (tests:update-central-meta-info run-id test-id new-cpu-load new-disk-free (calc-minutes) #f #f) - (rmt:set-state-status-and-roll-up-items run-id test-id 'foo "RUNNING" "n/a" "was marked dead; really still running.") - ;;(set! kill-reason "KILLING TEST because it was marked as DEAD by launch:handle-zombie-tests (might indicate really overloaded server or else overzealous setup.deadtime)") ;; MARK RUNNING - (set! kill-job? #f))) - - (debug:print 4 *default-log-port* "cpu: " new-cpu-load " disk: " new-disk-free " last-sync: " last-sync " do-sync: " do-sync) - (launch:handle-zombie-tests run-id) - (when do-sync - ;;(with-output-to-file (conc (getenv "MT_TEST_RUN_DIR") "/last-loadinfo.log" #:append) - ;; (lambda () (pp (list (current-seconds) new-cpu-load new-disk-free (calc-minutes))))) - (common:telemetry-log "zombie" (conc "launch:monitor-job - dosync started at "(current-seconds))) - (tests:update-central-meta-info run-id test-id new-cpu-load new-disk-free (calc-minutes) #f #f) - (common:telemetry-log "zombie" (conc "launch:monitor-job - dosync finished at "(current-seconds)))) - - (if kill-job? - (begin - (debug:print-info 0 *default-log-port* "proceeding to kill test: "kill-reason) - (mutex-lock! m) - ;; NOTE: The pid can change as different steps are run. Do we need handshaking between this - ;; section and the runit section? Or add a loop that tries three times with a 1/4 second - ;; between tries? - (let* ((pid1 (launch:einf-pid exit-info)) ;; (vector-ref exit-info 0)) - (pid2 (rmt:test-get-top-process-pid run-id test-id)) - (pids (delete-duplicates (filter number? (list pid1 pid2))))) - (if (not (null? pids)) - (begin - (for-each - (lambda (pid) - (handle-exceptions - exn - (begin - (debug:print-info 0 *default-log-port* "Unable to kill process with pid " pid ", possibly already killed.") - (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))) - (debug:print 0 *default-log-port* "WARNING: Request received to kill job " pid) ;; " (attempt # " kill-tries ")") - (debug:print-info 0 *default-log-port* "Signal mask=" (signal-mask)) - ;; (if (process:alive? pid) - ;; (begin - (map (lambda (pid-num) - (process-signal pid-num signal/term)) - (process:get-sub-pids pid)) - (thread-sleep! 5) - ;; (if (process:process-alive? pid) - (map (lambda (pid-num) - (handle-exceptions - exn - #f - (process-signal pid-num signal/kill))) - (process:get-sub-pids pid)))) - ;; (debug:print-info 0 *default-log-port* "not killing process " pid " as it is not alive")))) - pids) - ;; BB: question to Matt -- does the tests:test-state-status! encompass rollup to toplevel? If not, should it? - (tests:test-set-status! run-id test-id "KILLED" "KILLED" (conc (args:get-arg "-m")" "kill-reason) #f)) ;; BB ADDED kill-reason -- confirm OK with Matt - (begin - (debug:print-error 0 *default-log-port* "Nothing to kill, pid1=" pid1 ", pid2=" pid2) - (tests:test-set-status! run-id test-id "KILLED" "FAILED TO KILL" (conc (args:get-arg "-m")" "kill-reason) #f) ;; BB ADDED kill-reason -- confirm OK with Matt - ))) - (mutex-unlock! m) - ;; no point in sticking around. Exit now. But run end of run before exiting? - (launch:end-of-run-check run-id) - (exit))) - (if (hash-table-ref/default misc-flags 'keep-going #f) - (begin - (thread-sleep! 3) ;; (+ 3 (random 6))) ;; add some jitter to the call home time to spread out the db accesses - (if (hash-table-ref/default misc-flags 'keep-going #f) ;; keep originals for cpu-load and disk-free unless they change more than the allowed delta - (loop (calc-minutes) - (or new-cpu-load cpu-load) - (or new-disk-free disk-free) - (if do-sync (current-seconds) last-sync))))))) - (tests:update-central-meta-info run-id test-id (get-cpu-load) (get-df (current-directory))(calc-minutes) #f #f))) ;; NOTE: Checking twice for keep-going is intentional - - -(define (launch:execute encoded-cmd) - (let* ((cmdinfo (common:read-encoded-string encoded-cmd)) - (tconfigreg #f)) - (setenv "MT_CMDINFO" encoded-cmd) - ;;(bb-check-path msg: "launch:execute incoming") - (if (list? cmdinfo) ;; ((testpath /tmp/mrwellan/jazzmind/src/example_run/tests/sqlitespeed) - ;; (test-name sqlitespeed) (runscript runscript.rb) (db-host localhost) (run-id 1)) - (let* ((testpath (assoc/default 'testpath cmdinfo)) ;; testpath is the test spec area - (top-path (assoc/default 'toppath cmdinfo)) - (work-area (assoc/default 'work-area cmdinfo)) ;; work-area is the test run area - (test-name (assoc/default 'test-name cmdinfo)) - (runscript (assoc/default 'runscript cmdinfo)) - (ezsteps (assoc/default 'ezsteps cmdinfo)) - (subrun (assoc/default 'subrun cmdinfo)) - (serverurl (assoc/default 'serverurl cmdinfo)) - (homehost (assoc/default 'homehost cmdinfo)) - (run-id (assoc/default 'run-id cmdinfo)) - (test-id (assoc/default 'test-id cmdinfo)) - (target (assoc/default 'target cmdinfo)) - (areaname (assoc/default 'areaname cmdinfo)) - (itemdat (assoc/default 'itemdat cmdinfo)) - (env-ovrd (assoc/default 'env-ovrd cmdinfo)) - (set-vars (assoc/default 'set-vars cmdinfo)) ;; pre-overrides from -setvar - (runname (assoc/default 'runname cmdinfo)) - (megatest (assoc/default 'megatest cmdinfo)) - (runtlim (assoc/default 'runtlim cmdinfo)) - (contour (assoc/default 'contour cmdinfo)) - (item-path (item-list->path itemdat)) - (mt-bindir-path (assoc/default 'mt-bindir-path cmdinfo)) - (keys #f) - (keyvals #f) - (fullrunscript (if (not runscript) - #f - (if (substring-index "/" runscript) - runscript ;; use unadultered if contains slashes - (let ((fulln (conc work-area "/" runscript))) - (if (and (common:file-exists? fulln) - (file-execute-access? fulln)) - fulln - runscript))))) ;; assume it is on the path - (check-work-area (lambda () - ;; NFS might not have propagated the directory meta data to the run host - give it time if needed - (let loop ((count 0)) - (if (or (common:directory-exists? work-area) - (> count 10)) - (change-directory work-area) - (begin - (debug:print 0 *default-log-port* "INFO: Not starting job yet - directory " work-area " not found") - (thread-sleep! 10) - (loop (+ count 1))))) - - (if (not (string=? (common:real-path work-area)(common:real-path (current-directory)))) - (begin - (debug:print 0 *default-log-port* - "INFO: we are expecting to be in directory " work-area "\n" - " but we are actually in the directory " (current-directory) "\n" - " doing another change dir.") - (change-directory work-area))) - - ;; spot check that the files in testpath are available. Too often NFS delays cause problems here. - (let ((files (glob (conc testpath "/*"))) - (bad-files '())) - (for-each - (lambda (fullname) - (let* ((fname (pathname-strip-directory fullname)) - (targn (conc work-area "/" fname))) - (if (not (file-exists? targn)) - (set! bad-files (cons fname bad-files))))) - files) - (if (not (null? bad-files)) - (begin - (debug:print 0 *default-log-port* "INFO: test data from " testpath " not copied properly or filesystem problems causing data to not be found. Re-running the copy command.") - (debug:print 0 *default-log-port* "INFO: missing files from " work-area ": " (string-intersperse bad-files ", ")) - (launch:test-copy testpath work-area)))) - ;; one more time, change to the work-area directory - (change-directory work-area))) - ) ;; let* - - (if contour (setenv "MT_CONTOUR" contour)) - - ;; immediated set some key variables from CMDINFO data, yes, these will be set again below ... - ;; - (setenv "MT_TESTSUITENAME" areaname) - (setenv "MT_RUN_AREA_HOME" top-path) - (set! *toppath* top-path) - (change-directory *toppath*) ;; temporarily switch to the run area home - (setenv "MT_TEST_RUN_DIR" work-area) - - (launch:setup) ;; should be properly in the run area home now - - (if contour (setenv "MT_CONTOUR" contour)) - - ;; immediated set some key variables from CMDINFO data, yes, these will be set again below ... - ;; - (setenv "MT_TESTSUITENAME" areaname) - (setenv "MT_RUN_AREA_HOME" top-path) - (set! *toppath* top-path) - (change-directory *toppath*) ;; temporarily switch to the run area home - (setenv "MT_TEST_RUN_DIR" work-area) - - (launch:setup) ;; should be properly in the run area home now - - (set! tconfigreg (tests:get-all)) ;; mapping of testname => test source path - (let ((sighand (lambda (signum) - ;; (signal-mask! signum) ;; to mask or not? seems to cause issues in exiting - (if (eq? signum signal/stop) - (debug:print-error 0 *default-log-port* "attempt to STOP process. Exiting.")) - (set! *time-to-exit* #t) - (print "Received signal " signum ", cleaning up before exit (set this test to COMPLETED/ABORT) . Please wait...") - (let ((th1 (make-thread (lambda () - (print "set test to COMPLETED/ABORT begin.") - (rmt:test-set-state-status run-id test-id "COMPLETED" "ABORT" "received kill signal") - (print "set test to COMPLETED/ABORT complete.") - (print "Killed by signal " signum ". Exiting") - (exit 1)))) - (th2 (make-thread (lambda () - (thread-sleep! 20) - (debug:print 0 *default-log-port* "Done") - (exit 4))))) - (thread-start! th2) - (thread-start! th1) - (thread-join! th2))))) - (set-signal-handler! signal/int sighand) - (set-signal-handler! signal/term sighand) - ) ;; (set-signal-handler! signal/stop sighand) - - ;; Do not run the test if it is REMOVING, RUNNING, KILLREQ or REMOTEHOSTSTART, - ;; Mark the test as REMOTEHOSTSTART *IMMEDIATELY* - ;; - (let* ((test-info (rmt:get-test-info-by-id run-id test-id)) - (test-host (if test-info - (db:test-get-host test-info) - (begin - (debug:print 0 *default-log-port* "ERROR: failed to find a record for test-id " test-id ", exiting.") - (exit)))) - (test-pid (db:test-get-process_id test-info))) - (cond - ;; -mrw- I'm removing KILLREQ from this list so that a test in KILLREQ state is treated as a "do not run" flag. - ((member (db:test-get-state test-info) '("INCOMPLETE" "KILLED" "UNKNOWN" "STUCK")) ;; prior run of this test didn't complete, go ahead and try to rerun - (debug:print 0 *default-log-port* "INFO: test is INCOMPLETE or KILLED, treat this execute call as a rerun request") - ;; (tests:test-force-state-status! run-id test-id "REMOTEHOSTSTART" "n/a") - - (rmt:general-call 'set-test-start-time #f test-id) - (rmt:test-set-state-status run-id test-id "REMOTEHOSTSTART" "n/a" #f) - ) ;; prime it for running - ((member (db:test-get-state test-info) '("RUNNING" "REMOTEHOSTSTART")) - (if (process:alive-on-host? test-host test-pid) - (debug:print-error 0 *default-log-port* "test state is " (db:test-get-state test-info) " and process " test-pid " is still running on host " test-host ", cannot proceed") - (exit))) - ((not (member (db:test-get-state test-info) '("REMOVING" "REMOTEHOSTSTART" "RUNNING" "KILLREQ"))) - ;; (tests:test-force-state-status! run-id test-id "REMOTEHOSTSTART" "n/a") - (rmt:general-call 'set-test-start-time #f test-id) - (rmt:test-set-state-status run-id test-id "REMOTEHOSTSTART" "n/a" #f) - ) - (else ;; (member (db:test-get-state test-info) '("REMOVING" "REMOTEHOSTSTART" "RUNNING" "KILLREQ")) - (debug:print-error 0 *default-log-port* "test state is " (db:test-get-state test-info) ", cannot proceed") - (exit)))) - - ;; cleanup prior execution's steps - (rmt:delete-steps-for-test! run-id test-id) - - (debug:print 2 *default-log-port* "Executing " test-name " (id: " test-id ") on " (get-host-name)) - (set! keys (rmt:get-keys)) - ;; (runs:set-megatest-env-vars run-id inkeys: keys inkeyvals: keyvals) ;; these may be needed by the launching process - ;; one of these is defunct/redundant ... - (if (not (launch:setup force-reread: #t)) - (begin - (debug:print 0 *default-log-port* "Failed to setup, exiting") - ;; (sqlite3:finalize! db) - ;; (sqlite3:finalize! tdb) - (exit 1))) - ;; validate that the test run area is available - (check-work-area) - - ;; still need to go back to run area home for next couple steps - (change-directory *toppath*) - - ;; NOTE: Current order is to process runconfigs *before* setting the MT_ vars. This - ;; seems non-ideal but could well break stuff - ;; BUG? BUG? BUG? - - (let ((rconfig (full-runconfigs-read)) ;; (read-config (conc *toppath* "/runconfigs.config") #f #t sections: (list "default" target)))) - (wconfig (read-config "waivers.config" #f #t sections: `( "default" ,target )))) ;; read the waivers config if it exists - ;; (setup-env-defaults (conc *toppath* "/runconfigs.config") run-id (make-hash-table) keyvals target) - ;; (set-run-config-vars run-id keyvals target) ;; (db:get-target db run-id)) - ;; Now have runconfigs data loaded, set environment vars - (for-each - (lambda (section) - (for-each - (lambda (varval) - (let ((var (car varval)) - (val (cadr varval))) - (if (and (string? var)(string? val)) - (begin - (safe-setenv var (config:eval-string-in-environment val))) ;; val) - (debug:print-error 0 *default-log-port* "bad variable spec, " var "=" val)))) - (configf:get-section rconfig section))) - (list "default" target))) - ;;(bb-check-path msg: "launch:execute post block 1") - - ;; NFS might not have propagated the directory meta data to the run host - give it time if needed - (let loop ((count 0)) - (if (or (common:file-exists? work-area) - (> count 10)) - (change-directory work-area) - (begin - (debug:print 0 *default-log-port* "INFO: Not starting job yet - directory " work-area " not found") - (thread-sleep! 10) - (loop (+ count 1))))) - - ;; now we can switch to the work-area? - (change-directory work-area) - ;;(bb-check-path msg: "launch:execute post block 1.5") - ;; (change-directory work-area) - (set! keyvals (keys:target->keyval keys target)) - ;; apply pre-overrides before other variables. The pre-override vars must not - ;; clobbers things from the official sources such as megatest.config and runconfigs.config - (if (string? set-vars) - (let ((varpairs (string-split set-vars ","))) - (debug:print 4 *default-log-port* "varpairs: " varpairs) - (map (lambda (varpair) - (let ((varval (string-split varpair "="))) - (if (eq? (length varval) 2) - (let ((var (car varval)) - (val (cadr varval))) - (debug:print 1 *default-log-port* "Adding pre-var/val " var " = " val " to the environment") - (setenv var val))))) - varpairs))) - ;;(bb-check-path msg: "launch:execute post block 2") - (for-each - (lambda (varval) - (let ((var (car varval)) - (val (cadr varval))) - (if val - (setenv var val) - (begin - (debug:print-error 0 *default-log-port* "required variable " var " does not have a valid value. Exiting") - (exit))))) - (list - (list "MT_TEST_RUN_DIR" work-area) - (list "MT_TEST_NAME" test-name) - (list "MT_ITEM_INFO" (conc itemdat)) - (list "MT_ITEMPATH" item-path) - (list "MT_RUNNAME" runname) - (list "MT_MEGATEST" megatest) - (list "MT_TARGET" target) - (list "MT_LINKTREE" (common:get-linktree)) ;; (configf:lookup *configdat* "setup" "linktree")) - (list "MT_TESTSUITENAME" (common:get-area-name *alldat*)))) - ;;(bb-check-path msg: "launch:execute post block 3") - - (if mt-bindir-path (setenv "PATH" (conc (getenv "PATH") ":" mt-bindir-path))) - ;;(bb-check-path msg: "launch:execute post block 4") - ;; (change-directory top-path) - ;; Can setup as client for server mode now - ;; (client:setup) - - - ;; environment overrides are done *before* the remaining critical envars. - (alist->env-vars env-ovrd) - ;;(bb-check-path msg: "launch:execute post block 41") - (runs:set-megatest-env-vars run-id inkeys: keys inkeyvals: keyvals) - ;;(bb-check-path msg: "launch:execute post block 42") - (set-item-env-vars itemdat) - ;;(bb-check-path msg: "launch:execute post block 43") - (let ((blacklist (configf:lookup *configdat* "setup" "blacklistvars"))) - (if blacklist - (save-environment-as-files "megatest" ignorevars: (string-split blacklist)) - (save-environment-as-files "megatest"))) - ;;(bb-check-path msg: "launch:execute post block 44") - ;; open-run-close not needed for test-set-meta-info - ;; (tests:set-full-meta-info #f test-id run-id 0 work-area) - ;; (tests:set-full-meta-info test-id run-id 0 work-area) - (tests:set-full-meta-info #f test-id run-id 0 work-area 10) - - ;; (thread-sleep! 0.3) ;; NFS slowness has caused grief here - - (if (args:get-arg "-xterm") - (set! fullrunscript "xterm") - (if (and fullrunscript - (common:file-exists? fullrunscript) - (not (file-execute-access? fullrunscript))) - (system (conc "chmod ug+x " fullrunscript)))) - - ;; We are about to actually kick off the test - ;; so this is a good place to remove the records for - ;; any previous runs - ;; (db:test-remove-steps db run-id testname itemdat) - ;; now is also a good time to write the .testconfig file - (let* ((tconfig-fname (conc work-area "/.testconfig")) - (tconfig-tmpfile (conc tconfig-fname ".tmp")) - (tconfig (tests:get-testconfig test-name item-path tconfigreg #t force-create: #t))) ;; 'return-procs))) - (configf:write-alist tconfig tconfig-tmpfile) - (file-move tconfig-tmpfile tconfig-fname #t)) - ;; - (let* ((m (make-mutex)) - (kill-job? #f) - (exit-info (make-launch:einf pid: #t exit-status: #t exit-code: #t rollup-status: 0)) ;; pid exit-status exit-code (i.e. process was successfully run) rollup-status - (job-thread #f) - ;; (keep-going #t) - (misc-flags (let ((ht (make-hash-table))) - (hash-table-set! ht 'keep-going #t) - ht)) - (runit (lambda () - (launch:manage-steps run-id test-id item-path fullrunscript ezsteps subrun test-name tconfigreg exit-info m))) - (monitorjob (lambda () - (launch:monitor-job run-id test-id item-path fullrunscript ezsteps test-name tconfigreg exit-info m work-area runtlim misc-flags))) - (th1 (make-thread monitorjob "monitor job")) - (th2 (make-thread runit "run job"))) - (set! job-thread th2) - (thread-start! th1) - (thread-start! th2) - (thread-join! th2) - (debug:print-info 0 *default-log-port* "Megatest exectute of test " test-name ", item path " item-path " complete. Notifying the db ...") - (hash-table-set! misc-flags 'keep-going #f) - (thread-join! th1) - (thread-sleep! 1) ;; givbe thread th1 a chance to be done TODO: Verify this is needed. At 0.1 I was getting fail to stop, increased to total of 1.1 sec. - (mutex-lock! m) - (let* ((item-path (item-list->path itemdat)) - ;; only state and status needed - use lazy routine - (testinfo (rmt:get-testinfo-state-status run-id test-id))) - ;; Am I completed? - (if (member (db:test-get-state testinfo) '("REMOTEHOSTSTART" "RUNNING")) ;; NOTE: It should *not* be REMOTEHOSTSTART but for reasons I don't yet understand it sometimes gets stuck in that state ;; (not (equal? (db:test-get-state testinfo) "COMPLETED")) - (let ((new-state (if kill-job? "KILLED" "COMPLETED") ;; (if (eq? (vector-ref exit-info 2) 0) ;; exited with "good" status - ;; "COMPLETED" ;; (db:test-get-state testinfo))) ;; else preseve the state as set within the test - ) - (new-status (cond - ((not (launch:einf-exit-status exit-info)) "FAIL") ;; job failed to run ... (vector-ref exit-info 1) - ((eq? (launch:einf-rollup-status exit-info) 0) ;; (vector-ref exit-info 3) - ;; if the current status is AUTO then defer to the calculated value (i.e. leave this AUTO) - (if (equal? (db:test-get-status testinfo) "AUTO") "AUTO" "PASS")) - ((eq? (launch:einf-rollup-status exit-info) 1) "FAIL") ;; (vector-ref exit-info 3) - ((eq? (launch:einf-rollup-status exit-info) 2) ;; (vector-ref exit-info 3) - ;; if the current status is AUTO the defer to the calculated value but qualify (i.e. make this AUTO-WARN) - (if (equal? (db:test-get-status testinfo) "AUTO") "AUTO-WARN" "WARN")) - ((eq? (launch:einf-rollup-status exit-info) 3) "CHECK") - ((eq? (launch:einf-rollup-status exit-info) 4) "WAIVED") - ((eq? (launch:einf-rollup-status exit-info) 5) "ABORT") - ((eq? (launch:einf-rollup-status exit-info) 6) "SKIP") - (else "FAIL")))) ;; (db:test-get-status testinfo))) - (debug:print-info 1 *default-log-port* "Test exited in state=" (db:test-get-state testinfo) ", setting state/status based on exit code of " (launch:einf-exit-status exit-info) " and rollup-status of " (launch:einf-rollup-status exit-info)) - (tests:test-set-status! run-id - test-id - new-state - new-status - (args:get-arg "-m") #f) - ;; need to update the top test record if PASS or FAIL and this is a subtest - ;; NO NEED TO CALL set-state-status-and-roll-up-items HERE, THIS IS DONE IN set-state-status-and-roll-up-items called by tests:test-set-status! - )) - ;; for automated creation of the rollup html file this is a good place... - (if (not (equal? item-path "")) - (tests:summarize-items run-id test-id test-name #f)) - (tests:summarize-test run-id test-id) ;; don't force - just update if no - (rmt:update-run-stats run-id (rmt:get-raw-run-stats run-id))) - (mutex-unlock! m) - (launch:end-of-run-check run-id ) - (debug:print 2 *default-log-port* "Output from running " fullrunscript ", pid " (launch:einf-pid exit-info) " in work area " - work-area ":\n====\n exit code " (launch:einf-exit-code exit-info) "\n" "====\n") - (if (not (launch:einf-exit-status exit-info)) - (exit 4)))) - ))) - -;; Spec for End of test -;; At end of each test call, after marking self as COMPLETED do run-state-status-rollup -;; At transition to run COMPLETED/X do hooks -;; Definition: test_dead if event_time + duration + 1 minute? < current_time AND -;; we can prove the process is not alive (ssh host pstree -A pid) -;; if dead safe to mark the test as killed in the db -;; State/status table -;; new -;; 100% COMPLETED/ (PASS,FAIL,ABORT etc.) ==> COMPLETED / X where X is same as itemized rollup -;; > 3 RUNNING with not test_dead do nothing (run should already be RUNNING/ na -;; > 0 RUNNING and test_dead then send KILLREQ ==> COMPLETED -;; 0 RUNNING ==> this is actually the first condition, should not get here - -(define (launch:end-of-run-check run-id ) - (let* ((not-completed-cnt (rmt:get-not-completed-cnt run-id)) - (running-cnt (rmt:get-count-tests-running-for-run-id run-id)) - (all-test-launched (rmt:get-var (conc "lunch-complete-" run-id))) - (current-state (rmt:get-run-state run-id)) - (current-status (rmt:get-run-status run-id))) - ;;get-vars run-id to query metadata table to check if all completed. if all-test-launched = yes then only not-completed-cnt = 0 means everyting is completed if no entry found in the table do nothing - (debug:print 0 *default-log-port* "rollup run state/status") - (rmt:set-state-status-and-roll-up-run run-id current-state current-status) - - (cond - ((and all-test-launched (eq? not-completed-cnt 0) (equal? all-test-launched "yes" )) - (debug:print 0 *default-log-port* "look for post hook.") - (runs:run-post-hook run-id)) - ((> running-cnt 3) - (debug:print 0 *default-log-port* "There are " running-cnt " tests running." )) - ((> running-cnt 0) - (debug:print 0 *default-log-port* "running cnt > 0 but <= 3 kill-running-tests-if-dead" ) - (let ((kill-cnt (launch:kill-tests-if-dead run-id))) - (if (and all-test-launched (equal? all-test-launched "yes") (eq? kill-cnt running-cnt)) - (launch:end-of-run-check run-id)))) ;;todo - (else (debug:print 0 *default-log-port* "Should it get here?? May be everything is not launched yet. Running test cnt:" running-cnt " Not completed test cnt:" not-completed-cnt) - (let* ((not-completed-tests (rmt:get-tests-for-run run-id "%" `("NOT_STARTED" "RUNNING" "LAUNCHED" "REMOTEHOSTSTART") `() #f #f #f #f #f #f #f #f))) - (if (> (length not-completed-tests) 0) - (let loop ((running-test (car not-completed-tests)) - (tal (cdr not-completed-tests))) - (let* ((test-name (vector-ref running-test 2)) - (item-path (vector-ref running-test 11))) - (debug:print 0 *default-log-port* "test " test-name "/" item-path " not completed") - (if (not (null? tal)) - (loop (car tal) (cdr tal))))))))))) - -(define (launch:is-test-alive host pid) -(if (and host pid (not (equal? host "n/a"))) -(let* ((cmd (conc "ssh " host " pstree -A " pid)) - (output (with-input-from-pipe cmd read-lines))) - (print "cmd: " cmd "\n op: " output ) - (if(eq? (length output) 0) - #f - #t)) -#t)) - -(define (launch:kill-tests-if-dead run-id) - (let* ((running-tests (rmt:get-tests-for-run run-id "%" `("RUNNING" "LAUNCHED" "REMOTEHOSTSTART") `() #f #f #f #f #f #f #f #f))) - (let loop ((running-test (car running-tests)) - (tal (cdr running-tests)) - (kill-cnt 0)) - (let* ((test-name (vector-ref running-test 2)) - (item-path (vector-ref running-test 11)) - (test-id (vector-ref running-test 0)) - (host (vector-ref running-test 6)) - (pid (rmt:test-get-top-process-pid run-id test-id)) - (event-time (vector-ref running-test 5)) - (duration (vector-ref running-test 12)) - (flag 0) - (curr-time (current-seconds))) - (if (and (< (+ event-time duration 600) curr-time) (not (launch:is-test-alive host pid))) ;;test has not updated duration in last 10 min then likely its not running but confirm before marking it as killed - (begin - (debug:print 0 *default-log-port* "test " test-name "/" item-path " needs to be killed") - (set! flag 1) - (rmt:set-state-status-and-roll-up-items run-id test-name item-path "KILLREQ" "n/a" #f))) - (if (not (null? tal)) - (loop (car tal) (cdr tal) (+ kill-cnt flag)) - (+ kill-cnt flag)))))) - -;; DO NOT USE - caching of configs is handled in launch:setup now. -;; -(define (launch:cache-config) - ;; if we have a linktree and -runtests and -target and the directory exists dump the config - ;; to megatest-(current-seconds).cfg and symlink it to megatest.cfg - (if (and *configdat* - (or (args:get-arg "-run") - (args:get-arg "-runtests") - (args:get-arg "-execute"))) - (let* ((linktree (common:get-linktree)) ;; (get-environment-variable "MT_LINKTREE")) - (target (common:args-get-target exit-if-bad: #t)) - (runname (or (args:get-arg "-runname") - (args:get-arg ":runname") - (getenv "MT_RUNNAME"))) - (fulldir (conc linktree "/" - target "/" - runname))) - (if (and linktree (common:file-exists? linktree)) ;; can't proceed without linktree - (begin - (debug:print-info 0 *default-log-port* "Have -run with target=" target ", runname=" runname ", fulldir=" fulldir ", testpatt=" (or (args:get-arg "-testpatt") "%")) - (if (not (common:file-exists? fulldir)) - (create-directory fulldir #t)) ;; need to protect with exception handler - (if (and target - runname - (common:file-exists? fulldir)) - (let ((tmpfile (conc fulldir "/.megatest.cfg." (current-seconds))) - (targfile (conc fulldir "/.megatest.cfg-" megatest-version "-" megatest-fossil-hash)) - (rconfig (conc fulldir "/.runconfig." megatest-version "-" megatest-fossil-hash))) - (if (common:file-exists? rconfig) ;; only cache megatest.config AFTER runconfigs has been cached - (begin - (debug:print-info 0 *default-log-port* "Caching megatest.config in " tmpfile) - (if (not (common:in-running-test?)) - (configf:write-alist *configdat* tmpfile)) - (system (conc "ln -sf " tmpfile " " targfile)))) - ))) - (debug:print-info 1 *default-log-port* "No linktree yet, no caching configs."))))) - - -;; gather available information, if legit read configs in this order: -;; -;; if have cache; -;; read it a return it -;; else -;; megatest.config (do not cache) -;; runconfigs.config (cache if all vars avail) -;; megatest.config (cache if all vars avail) -;; returns: -;; *toppath* -;; side effects: -;; sets; *configdat* (megatest.config info) -;; *runconfigdat* (runconfigs.config info) -;; *configstatus* (status of the read data) -;; -(define (launch:setup #!key (force-reread #f) (areapath #f)) - (mutex-lock! *launch-setup-mutex*) - (if (and *toppath* - (eq? *configstatus* 'fulldata) (not force-reread)) ;; got it all - (begin - (debug:print 2 *default-log-port* "NOTE: skipping launch:setup-body call since we have fulldata") - (mutex-unlock! *launch-setup-mutex*) - *toppath*) - (let ((res (launch:setup-body force-reread: force-reread areapath: areapath))) - (mutex-unlock! *launch-setup-mutex*) - res))) - -;; return paths depending on what info is available. -;; -(define (launch:get-cache-file-paths areapath toppath target mtconfig) - (let* ((use-cache (common:use-cache?)) - (runname (common:args-get-runname)) - (linktree (common:get-linktree)) - (testname (common:get-full-test-name)) - (rundir (if (and runname target linktree) - (common:directory-writable? (conc linktree "/" target "/" runname)) - #f)) - (testdir (if (and rundir testname) - (common:directory-writable? (conc rundir "/" testname)) - #f)) - (cachedir (or testdir rundir)) - (mtcachef (and cachedir (conc cachedir "/" ".megatest.cfg-" megatest-version "-" megatest-fossil-hash))) - (rccachef (and cachedir (conc cachedir "/" ".runconfigs.cfg-" megatest-version "-" megatest-fossil-hash)))) - (debug:print-info 6 *default-log-port* - "runname=" runname - "\n linktree=" linktree - "\n testname=" testname - "\n rundir=" rundir - "\n testdir=" testdir - "\n cachedir=" cachedir - "\n mtcachef=" mtcachef - "\n rccachef=" rccachef) - (cons mtcachef rccachef))) - -(define (launch:setup-body #!key (force-reread #f) (areapath #f)) - (if (and (eq? *configstatus* 'fulldata) - *toppath* - (not force-reread)) ;; no need to reprocess - *toppath* ;; return toppath - (let* ((use-cache (common:use-cache?)) ;; BB- use-cache checks - ;; *configdat* for - ;; use-cache setting. - ;; We do not have - ;; *configdat*. - ;; Bootstrapping problem - ;; here. - (toppath (or *toppath* areapath (getenv "MT_RUN_AREA_HOME"))) ;; preserve toppath - (target (common:args-get-target)) - (sections (if target (list "default" target) #f)) ;; for runconfigs - (mtconfig (or (args:get-arg "-config") "megatest.config")) ;; allow overriding megatest.config - (cachefiles (launch:get-cache-file-paths areapath toppath target mtconfig)) - ;; checking for null cachefiles should not be necessary, - ;; I was seeing error car of '(), might be a chicken bug - ;; or a red herring ... - (mtcachef (if (null? cachefiles) - #f - (car cachefiles))) ;; (and cachedir (conc - ;; cachedir "/" - ;; ".megatest.cfg-" - ;; megatest-version - ;; "-" - ;; megatest-fossil-hash))) - (rccachef (if (null? cachefiles) - #f - (cdr cachefiles)))) ;; (and cachedir - ;; (conc cachedir "/" - ;; ".runconfigs.cfg-" - ;; megatest-version - ;; "-" - ;; megatest-fossil-hash))) - ;; (cancreate (and - ;; cachedir - ;; (common:file-exists? - ;; cachedir)(file-write-access? - ;; cachedir) (not - ;; (common:in-running-test?))))) - (set! *toppath* toppath) ;; This is needed when we are running - ;; as a test using CMDINFO as a - ;; datasource (BB> "launch:setup-body - ;; -- cachefiles="cachefiles) - (cond - ;; if mtcachef exists just read it, however we need to assume - ;; toppath is available in $MT_RUN_AREA_HOME - ((and (not force-reread) - mtcachef rccachef - use-cache - (get-environment-variable "MT_RUN_AREA_HOME") - (common:file-exists? mtcachef) - (common:file-exists? rccachef)) - ;;(BB> "launch:setup-body -- cond branch 1 - use-cache") - (set! *configdat* (configf:read-alist mtcachef)) - ;;(BB> "launch:setup-body -- 1 set! *configdat*="*configdat*) - (set! *runconfigdat* (configf:read-alist rccachef)) - (set! *configinfo* (list *configdat* (get-environment-variable "MT_RUN_AREA_HOME"))) - (set! *configstatus* 'fulldata) - (set! *toppath* (get-environment-variable "MT_RUN_AREA_HOME")) - *toppath*) - ;; there are no existing cached configs, do full reads of the - ;; configs and cache them we have all the info needed to - ;; fully process runconfigs and megatest.config - ((and ;; (not force-reread) ;; force-reread is irrelevant in the AND, could however OR it? - mtcachef - rccachef) ;; BB- why are we doing this without asking if caching is desired? - ;;(BB> "launch:setup-body -- cond branch 2") - (let* ((first-pass (find-and-read-config ;; NB// sets MT_RUN_AREA_HOME as side effect - mtconfig - environ-patt: "env-override" - given-toppath: toppath - pathenvvar: "MT_RUN_AREA_HOME")) - (first-rundat (let ((toppath (if toppath - toppath - (car first-pass)))) - (read-config ;; (conc toppath "/runconfigs.config") ;; this should be converted to runconfig:read but it is non-trivial, leaving it for now. - (conc (if (string? toppath) - toppath - (get-environment-variable "MT_RUN_AREA_HOME")) - "/runconfigs.config") - *runconfigdat* #t - sections: sections)))) - (set! *runconfigdat* first-rundat) - (if first-pass ;; - (begin - ;;(BB> "launch:setup-body -- \"first-pass\"=first-pass") - (set! *configdat* (car first-pass)) - ;;(BB> "launch:setup-body -- 2 set! *configdat*="*configdat*) - (set! *configinfo* first-pass) - (set! *toppath* (or toppath (cadr first-pass))) ;; use the gathered data unless already have it - (set! toppath *toppath*) - (if (not *toppath*) - (begin - (debug:print-error 0 *default-log-port* "you are not in a megatest area!") - (exit 1))) - (setenv "MT_RUN_AREA_HOME" *toppath*) - ;; the seed read is done, now read runconfigs, cache it then read megatest.config one more time and cache it - (let* ((keys (rmt:get-keys)) - (key-vals (keys:target->keyval keys target)) - (linktree (common:get-linktree)) ;; (or (getenv "MT_LINKTREE")(if *configdat* (configf:lookup *configdat* "setup" "linktree") #f))) - ; (if *configdat* - ; (configf:lookup *configdat* "setup" "linktree") - ; (conc *toppath* "/lt")))) - (second-pass (find-and-read-config - mtconfig - environ-patt: "env-override" - given-toppath: toppath - pathenvvar: "MT_RUN_AREA_HOME")) - (runconfigdat (begin ;; this read of the runconfigs will see any adjustments made by re-reading megatest.config - (for-each (lambda (kt) - (setenv (car kt) (cadr kt))) - key-vals) - (read-config (conc toppath "/runconfigs.config") *runconfigdat* #t ;; consider using runconfig:read some day ... - sections: sections))) - (cachefiles (launch:get-cache-file-paths areapath toppath target mtconfig)) - (mtcachef (car cachefiles)) - (rccachef (cdr cachefiles))) - ;; trap exception due to stale NFS handle -- Error: (open-output-file) cannot open file - Stale NFS file handle: "/p/fdk/gwa/lefkowit/mtTesting/qa/primbeqa/links/p1222/11/PDK_r1.1.1/prim/clean/pcell_testgen/.runconfigs.cfg-1.6427-7d1e789cb3f62f9cde719a4865bb51b3c17ea853" - ticket 220546342 - ;; TODO - consider 1) using simple-lock to bracket cache write - ;; 2) cache in hash on server, since need to do rmt: anyway to lock. - - (if rccachef - (common:fail-safe - (lambda () - (configf:write-alist runconfigdat rccachef)) - (conc "Could not write cache file - "rccachef))) - (if mtcachef - (common:fail-safe - (lambda () - (configf:write-alist *configdat* mtcachef)) - (conc "Could not write cache file - "mtcachef))) - (set! *runconfigdat* runconfigdat) - (if (and rccachef mtcachef) (set! *configstatus* 'fulldata)))) - ;; no configs found? should not happen but let's try to recover gracefully, return an empty hash-table - (set! *configdat* (make-hash-table)) - ))) - - ;; else read what you can and set the flag accordingly - ;; here we don't have either mtconfig or rccachef - (else - ;;(BB> "launch:setup-body -- cond branch 3 - else") - (let* ((cfgdat (find-and-read-config - (or (args:get-arg "-config") "megatest.config") - environ-patt: "env-override" - given-toppath: (get-environment-variable "MT_RUN_AREA_HOME") - pathenvvar: "MT_RUN_AREA_HOME"))) - - (if (and cfgdat (list? cfgdat) (> (length cfgdat) 0) (hash-table? (car cfgdat))) - (let* ((toppath (or (get-environment-variable "MT_RUN_AREA_HOME")(cadr cfgdat))) - (rdat (read-config (conc toppath ;; convert this to use runconfig:read! - "/runconfigs.config") *runconfigdat* #t sections: sections))) - (set! *configinfo* cfgdat) - (set! *configdat* (car cfgdat)) - (set! *runconfigdat* rdat) - (set! *toppath* toppath) - (set! *configstatus* 'partial) - ;; set up as many vars in *alldat* as possible here - (alldat-areapath-set! *alldat* toppath) - (alldat-log-port-set! *alldat* *default-log-port*) - (alldat-mtconfig-set! *alldat* *configdat*) - - ) - (begin - (debug:print-error 0 *default-log-port* "No " mtconfig " file found. Giving up.") - (exit 2)))))) - ;; COND ends here. - - ;; additional house keeping - (let* ((linktree (or (common:get-linktree) - (conc *toppath* "/lt")))) - (if linktree - (begin - (if (not (common:file-exists? linktree)) - (begin - (handle-exceptions - exn - (begin - (debug:print-error 0 *default-log-port* "Something went wrong when trying to create linktree dir at " linktree) - (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) - (exit 1)) - (create-directory linktree #t)))) - (handle-exceptions - exn - (begin - (debug:print-error 0 *default-log-port* "Something went wrong when trying to create link to linktree at " *toppath*) - (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))) - (let ((tlink (conc *toppath* "/lt"))) - (if (not (common:file-exists? tlink)) - (create-symbolic-link linktree tlink))))) - (begin - (debug:print-error 0 *default-log-port* "linktree not defined in [setup] section of megatest.config") - ))) - (if (and *toppath* - (directory-exists? *toppath*)) - (begin - (setenv "MT_RUN_AREA_HOME" *toppath*) - (setenv "MT_TESTSUITENAME" (common:get-area-name *alldat*))) - (begin - (debug:print-error 0 *default-log-port* "failed to find the top path to your Megatest area.") - (set! *toppath* #f) ;; force it to be false so we return #f - #f)) - - ;; one more attempt to cache the configs for future reading - (let* ((cachefiles (launch:get-cache-file-paths areapath toppath target mtconfig)) - (mtcachef (car cachefiles)) - (rccachef (cdr cachefiles))) - - ;; trap exception due to stale NFS handle -- Error: (open-output-file) cannot open file - Stale NFS file handle: "...somepath.../.runconfigs.cfg-1.6427-7d1e789cb3f62f9cde719a4865bb51b3c17ea853" - ticket 220546342 - ;; TODO - consider 1) using simple-lock to bracket cache write - ;; 2) cache in hash on server, since need to do rmt: anyway to lock. - (if (and rccachef *runconfigdat* (not (common:file-exists? rccachef))) - (common:fail-safe - (lambda () - (configf:write-alist *runconfigdat* rccachef)) - (conc "Could not write cache file - "rccachef)) - ) - (if (and mtcachef *configdat* (not (common:file-exists? mtcachef))) - (common:fail-safe - (lambda () - (configf:write-alist *configdat* mtcachef)) - (conc "Could not write cache file - "mtcachef)) - ) - (if (and rccachef mtcachef *runconfigdat* *configdat*) - (set! *configstatus* 'fulldata))) - - ;; if have -append-config then read and append here - (let ((cfname (args:get-arg "-append-config"))) - (if (and cfname - (file-read-access? cfname)) - (read-config cfname *configdat* #t))) ;; values are added to the hash, no need to do anything special. - *toppath*))) - -(define (get-best-disk confdat testconfig) - (let* ((disks (or (and testconfig (hash-table-ref/default testconfig "disks" #f)) - (hash-table-ref/default confdat "disks" #f))) - (minspace (let ((m (configf:lookup confdat "setup" "minspace"))) - (string->number (or m "10000"))))) - (if disks - (let ((res (common:get-disk-with-most-free-space disks minspace))) ;; min size of 1000, seems tad dumb - (if res - (cdr res) - (begin -;; (if (common:low-noise-print 20 "No valid disks or no disk with enough space") -;; (debug:print-error 0 *default-log-port* "No valid disks found in megatest.config. Please add some to your [disks] section and ensure the directory exists and has enough space!\n You can change minspace in the [setup] section of megatest.config. Current setting is: " minspace)) - ;;(exit 1) - (if (null? disks) - (cons 1 (conc *toppath* "/runs")) - (let ((paths (sort disks (lambda (x y) (> (string-length (cadr x)) (string-length (cadr y))))))) - (let loop ((head (car paths)) (tail (cdr paths))) - (let ((result (handle-exceptions exn #f (create-directory (cadr head) #t)))) - (if result - result - (if (null? tail) - (cons 1 (conc *toppath* "/runs")) - (loop (car tail) (cdr tail)))))))))))))) ;; the code creates the necessary directories if it does not exist and returns the path. - - -(define (launch:test-copy test-src-path test-path) - (let* ((ovrcmd (let ((cmd (config-lookup *configdat* "setup" "testcopycmd"))) - (if cmd - ;; substitute the TEST_SRC_PATH and TEST_TARG_PATH - (string-substitute "TEST_TARG_PATH" test-path - (string-substitute "TEST_SRC_PATH" test-src-path cmd #t) #t) - #f))) - (cmd (if ovrcmd - ovrcmd - (conc "rsync -av" (if (debug:debug-mode 1) "" "q") " " test-src-path "/ " test-path "/" - " >> " test-path "/mt_launch.log 2>> " test-path "/mt_launch.log"))) - (status (system cmd))) - (if (not (eq? status 0)) - (debug:print 2 *default-log-port* "ERROR: problem with running \"" cmd "\"")))) - - -;; Desired directory structure: -;; -;; - - -. -;; | -;; v -;; - - -|- -;; -;; dir stored in test is: -;; -;; - - [ - ] -;; -;; All log file links should be stored relative to the top of link path -;; -;; - [ - ] -;; -(define (create-work-area run-id run-info keyvals test-id test-src-path disk-path testname itemdat #!key (remtries 2)) - (let* ((item-path (if (string? itemdat) itemdat (item-list->path itemdat))) ;; if pass in string - just use it - (runname (if (string? run-info) ;; if we pass in a string as run-info use it as run-name. - run-info - (db:get-value-by-header (db:get-rows run-info) - (db:get-header run-info) - "runname"))) - (contour #f) ;; NOT READY FOR THIS (args:get-arg "-contour")) - ;; convert back to db: from rdb: - this is always run at server end - (target (string-intersperse (map cadr keyvals) "/")) - - (not-iterated (equal? "" item-path)) - - ;; all tests are found at /test-base or /test-base - (testtop-base (conc target "/" runname "/" testname)) - (test-base (conc testtop-base (if not-iterated "" "/") item-path)) - - ;; nb// if itempath is not "" then it is prefixed with "/" - (toptest-path (conc disk-path (if contour (conc "/" contour) "") "/" testtop-base)) - (test-path (conc disk-path (if contour (conc "/" contour) "") "/" test-base)) - - ;; ensure this exists first as links to subtests must be created there - (linktree (common:get-linktree)) - ;; WAS: (let ((rd (config-lookup *configdat* "setup" "linktree"))) - ;; (if rd rd (conc *toppath* "/runs")))) - ;; which seems wrong ... - - (lnkbase (conc linktree (if contour (conc "/" contour) "") "/" target "/" runname)) - (lnkpath (conc lnkbase "/" testname)) - (lnkpathf (conc lnkpath (if not-iterated "" "/") item-path)) - (lnktarget (conc lnkpath "/" item-path))) - - ;; Update the rundir path in the test record for all, rundir=physical, shortdir=logical - ;; rundir shortdir - (rmt:general-call 'test-set-rundir-shortdir run-id lnkpathf test-path testname item-path run-id) - - (debug:print 2 *default-log-port* "INFO:\n lnkbase=" lnkbase "\n lnkpath=" lnkpath "\n toptest-path=" toptest-path "\n test-path=" test-path) - (if (not (common:file-exists? linktree)) - (begin - (debug:print 0 *default-log-port* "WARNING: linktree did not exist! Creating it now at " linktree) - (create-directory linktree #t))) ;; (system (conc "mkdir -p " linktree)))) - ;; create the directory for the tests dir links, this is needed no matter what... try up to three times - (let loop ((done 3)) - (let ((success (if (and (not (common:directory-exists? lnkbase)) - (not (common:file-exists? lnkbase))) - (handle-exceptions - exn - (begin - (debug:print-error 0 *default-log-port* "Problem creating linktree base at " lnkbase) - (print-error-message exn (current-error-port)) - #t) - (create-directory lnkbase #t) - #f)))) - (if (and (not success)(> done 0)) - (loop (- done 1))))) - - ;; update the toptest record with its location rundir, cache the path - ;; This wass highly inefficient, one db write for every subtest, potentially - ;; thousands of unnecessary updates, cache the fact it was set and don't set it - ;; again. - - ;; Now create the link from the test path to the link tree, however - ;; if the test is iterated it is necessary to create the parent path - ;; to the iteration. use pathname-directory to trim the path by one - ;; level - (if (not not-iterated) ;; i.e. iterated - (let ((iterated-parent (pathname-directory (conc lnkpath "/" item-path)))) - (debug:print-info 2 *default-log-port* "Creating iterated parent " iterated-parent) - (handle-exceptions - exn - (begin - (debug:print-error 0 *default-log-port* " Failed to create directory " iterated-parent ((condition-property-accessor 'exn 'message) exn) ", exiting") - (exit 1)) - (create-directory iterated-parent #t)))) - - (if (symbolic-link? lnkpath) - (handle-exceptions - exn - (begin - (debug:print-error 0 *default-log-port* " Failed to remove symlink " lnkpath ((condition-property-accessor 'exn 'message) exn) ", exiting") - (exit 1)) - (delete-file lnkpath))) - - (if (not (or (common:file-exists? lnkpath) - (symbolic-link? lnkpath))) - (handle-exceptions - exn - (begin - (debug:print-error 0 *default-log-port* " Failed to create symlink " lnkpath ((condition-property-accessor 'exn 'message) exn) ", exiting") - (exit 1)) - (create-symbolic-link toptest-path lnkpath))) - - ;; NB - This was not working right - some top tests are not getting the path set!!! - ;; - ;; Do the setting of this record after the paths are created so that the shortdir can - ;; be set to the real directory location. This is safer for future clean up if the link - ;; tree is damaged or lost. - ;; - (if (not (hash-table-ref/default *toptest-paths* testname #f)) - (let* ((testinfo (rmt:get-test-info-by-id run-id test-id)) ;; run-id testname item-path)) - (curr-test-path (if testinfo ;; (filedb:get-path *fdb* - ;; (db:get-path dbstruct - ;; (rmt:sdb-qry 'getstr - (db:test-get-rundir testinfo) ;; ) ;; ) - #f))) - (hash-table-set! *toptest-paths* testname curr-test-path) - ;; NB// Was this for the test or for the parent in an iterated test? - (rmt:general-call 'test-set-rundir-shortdir run-id lnkpath - (if (common:file-exists? lnkpath) - ;; (resolve-pathname lnkpath) - (common:nice-path lnkpath) - lnkpath) - testname "" run-id) - ;; (rmt:general-call 'test-set-rundir run-id lnkpath testname "") ;; toptest-path) - (if (or (not curr-test-path) - (not (directory-exists? toptest-path))) - (begin - (debug:print-info 2 *default-log-port* "Creating " toptest-path " and link " lnkpath) - (handle-exceptions - exn - #f ;; don't care to catch and deal with errors here for now. - (create-directory toptest-path #t)) - (hash-table-set! *toptest-paths* testname toptest-path))))) - - ;; The toptest path has been created, the link to the test in the linktree has - ;; been created. Now, if this is an iterated test the real test dir must be created - (if (not not-iterated) ;; this is an iterated test - (begin ;; (let ((lnktarget (conc lnkpath "/" item-path))) - (debug:print 2 *default-log-port* "Setting up sub test run area") - (debug:print 2 *default-log-port* " - creating run area in " test-path) - (handle-exceptions - exn - (begin - (debug:print-error 0 *default-log-port* " Failed to create directory " test-path ((condition-property-accessor 'exn 'message) exn) ", exiting") - (exit 1)) - (create-directory test-path #t)) - (debug:print 2 *default-log-port* - " - creating link from: " test-path "\n" - " to: " lnktarget) - - ;; If there is already a symlink delete it and recreate it. - (handle-exceptions - exn - (begin - (debug:print-error 0 *default-log-port* " Failed to re-create link " lnktarget ((condition-property-accessor 'exn 'message) exn) ", exiting") - (exit)) - (if (symbolic-link? lnktarget) (delete-file lnktarget)) - (if (not (common:file-exists? lnktarget)) (create-symbolic-link test-path lnktarget))))) - - (if (not (directory? test-path)) - (create-directory test-path #t)) ;; this is a hack, I don't know why out of the blue this path does not exist sometimes - - (if (and test-src-path (directory? test-path)) - (begin - (launch:test-copy test-src-path test-path) - (list lnkpathf lnkpath )) - (if (and test-src-path (> remtries 0)) - (begin - (debug:print-error 0 *default-log-port* "Failed to create work area at " test-path " with link at " lnktarget ", remaining attempts " remtries) - ;; - (create-work-area run-id run-info keyvals test-id test-src-path disk-path testname itemdat remtries: (- remtries 1))) - (list #f #f))))) - - -(define (launch:handle-zombie-tests run-id) - (let* ((key (conc "zombiescan-runid-"run-id)) - (now (current-seconds)) - (threshold (- (current-seconds) (* 2 (or (configf:lookup-number *configdat* "setup" "deadtime") 120)))) - (val (rmt:get-var key)) - (do-scan? - (cond - ((not val) - #t) - ((< val threshold) - #t) - (else #f)))) - (when do-scan? - (debug:print 1 *default-log-port* "INFO: search and mark zombie tests") - (rmt:set-var key (current-seconds)) - (rmt:find-and-mark-incomplete run-id #f)))) - - - - - -;; 1. look though disks list for disk with most space -;; 2. create run dir on disk, path name is meaningful -;; 3. create link from run dir to megatest runs area -;; 4. remotely run the test on allocated host -;; - could be ssh to host from hosts table (update regularly with load) -;; - could be netbatch -;; (launch-test db (cadr status) test-conf)) -(define (launch-test test-id run-id run-info keyvals runname test-conf test-name test-path itemdat params) - (mutex-lock! *launch-setup-mutex*) ;; setting variables and processing the testconfig is NOT thread-safe, reuse the launch-setup mutex - (let* ( ;; (lock-key (conc "test-" test-id)) - ;; (got-lock (let loop ((lock (rmt:no-sync-get-lock lock-key)) - ;; (expire-time (+ (current-seconds) 15))) ;; give up on getting the lock and steal it after 15 seconds - ;; (if (car lock) - ;; #t - ;; (if (> (current-seconds) expire-time) - ;; (begin - ;; (debug:print-info 0 *default-log-port* "Timed out waiting for a lock to launch test " keyvals " " runname " " test-name " " test-path) - ;; (rmt:no-sync-del! lock-key) ;; destroy the lock - ;; (loop (rmt:no-sync-get-lock lock-key) expire-time)) ;; - ;; (begin - ;; (thread-sleep! 1) - ;; (loop (rmt:no-sync-get-lock lock-key) expire-time)))))) - (item-path (item-list->path itemdat)) - (contour #f)) ;; NOT READY FOR THIS (args:get-arg "-contour"))) - (let loop ((delta (- (current-seconds) *last-launch*)) - (launch-delay (configf:lookup-number *configdat* "setup" "launch-delay" default: 1))) - (if (> launch-delay delta) - (begin - (if (common:low-noise-print 1200 "test launch delay") ;; every two hours or so remind the user about launch delay. - (debug:print-info 0 *default-log-port* "NOTE: test launches are delayed by " launch-delay " seconds. See megatest.config launch-delay setting to adjust.")) ;; launch of " test-name " for " (- launch-delay delta) " seconds")) - (thread-sleep! (- launch-delay delta)) - (loop (- (current-seconds) *last-launch*) launch-delay)))) - (change-directory *toppath*) - (alist->env-vars ;; consolidate this code with the code in megatest.scm for "-execute", *maybe* - the longer they are set the longer each launch takes (must be non-overlapping with the vars) - (append - (list - (list "MT_RUN_AREA_HOME" *toppath*) - (list "MT_TEST_NAME" test-name) - (list "MT_RUNNAME" runname) - (list "MT_ITEMPATH" item-path) - (list "MT_CONTOUR" contour) - ) - itemdat)) - (let* ((tregistry (tests:get-all)) ;; third param (below) is system-allowed - ;; for tconfig, why do we allow fallback to test-conf? - (tconfig (or (tests:get-testconfig test-name item-path tregistry #t force-create: #t) - (begin - (debug:print 0 *default-log-port* "WARNING: falling back to pre-calculated testconfig. This is likely not desired.") - test-conf))) ;; force re-read now that all vars are set - (useshell (let ((ush (config-lookup *configdat* "jobtools" "useshell"))) - (if ush - (if (equal? ush "no") ;; must use "no" to NOT use shell - #f - ush) - #t))) ;; default is yes - (runscript (config-lookup tconfig "setup" "runscript")) - (ezsteps (> (length (hash-table-ref/default tconfig "ezsteps" '())) 0)) ;; don't send all the steps, could be big, just send a flag - (subrun (> (length (hash-table-ref/default tconfig "subrun" '())) 0)) ;; send a flag to process a subrun - ;; (diskspace (config-lookup tconfig "requirements" "diskspace")) - ;; (memory (config-lookup tconfig "requirements" "memory")) - ;; (hosts (config-lookup *configdat* "jobtools" "workhosts")) ;; I'm pretty sure this was never completed - (remote-megatest (config-lookup *configdat* "setup" "executable")) - (run-time-limit (or (configf:lookup tconfig "requirements" "runtimelim") - (configf:lookup *configdat* "setup" "runtimelim"))) - ;; FIXME SOMEDAY: not good how this is so obtuse, this hack is to - ;; allow running from dashboard. Extract the path - ;; from the called megatest and convert dashboard - ;; or dboard to megatest - (local-megatest (let* ((lm (car (argv))) - (dir (pathname-directory lm)) - (exe (pathname-strip-directory lm))) - (conc (if dir (conc dir "/") "") - (case (string->symbol exe) - ((dboard) "../megatest") - ((mtest) "../megatest") - ((dashboard) "megatest") - (else exe))))) - (launcher (common:get-launcher *configdat* test-name item-path)) ;; (config-lookup *configdat* "jobtools" "launcher")) - (test-sig (conc (common:get-area-name *alldat*) ":" test-name ":" item-path)) ;; (item-list->path itemdat))) ;; test-path is the full path including the item-path - (work-area #f) - (toptest-work-area #f) ;; for iterated tests the top test contains data relevant for all - (diskpath #f) - (cmdparms #f) - (fullcmd #f) ;; (define a (with-output-to-string (lambda ()(write x)))) - (mt-bindir-path #f) - (testinfo (rmt:get-test-info-by-id run-id test-id)) - (mt_target (string-intersperse (map cadr keyvals) "/")) - (debug-param (append (if (args:get-arg "-debug") (list "-debug" (args:get-arg "-debug")) '()) - (if (args:get-arg "-logging")(list "-logging") '())))) - ;; (if hosts (set! hosts (string-split hosts))) - ;; set the megatest to be called on the remote host - (if (not remote-megatest)(set! remote-megatest local-megatest)) ;; "megatest")) - (set! mt-bindir-path (pathname-directory remote-megatest)) - (if launcher (set! launcher (string-split launcher))) - ;; set up the run work area for this test - (if (and (args:get-arg "-preclean") ;; user has requested to preclean for this run - (not (member (db:test-get-rundir testinfo)(list "n/a" "/tmp/badname")))) ;; n/a is a placeholder and thus not a read dir - (begin - (debug:print-info 0 *default-log-port* "attempting to preclean directory " (db:test-get-rundir testinfo) " for test " test-name "/" item-path) - (runs:remove-test-directory testinfo 'remove-data-only))) ;; remove data only, do not perturb the record - - ;; prevent overlapping actions - set to LAUNCHED as early as possible - ;; - ;; the following call handles waiver propogation. cannot yet condense into roll-up-pass-fail - (tests:test-set-status! run-id test-id "LAUNCHED" "n/a" #f #f) ;; (if launch-results launch-results "FAILED")) - (rmt:set-state-status-and-roll-up-items run-id test-name item-path #f "LAUNCHED" #f) - ;; (pp (hash-table->alist tconfig)) - (set! diskpath (get-best-disk *configdat* tconfig)) - (if diskpath - (let ((dat (create-work-area run-id run-info keyvals test-id test-path diskpath test-name itemdat))) - (set! work-area (car dat)) - (set! toptest-work-area (cadr dat)) - (debug:print-info 2 *default-log-port* "Using work area " work-area)) - (begin - (set! work-area (conc test-path "/tmp_run")) - (create-directory work-area #t) - (debug:print 0 *default-log-port* "WARNING: No disk work area specified - running in the test directory under tmp_run"))) - (set! cmdparms (base64:base64-encode - (z3:encode-buffer - (with-output-to-string - (lambda () ;; (list 'hosts hosts) - (write (list (list 'testpath test-path) - ;; (list 'transport (conc *transport-type*)) - ;; (list 'serverinf *server-info*) - (list 'homehost (let* ((hhdat (common:get-homehost))) - (if hhdat - (car hhdat) - #f))) - (list 'serverurl (if *alldat* - (alldat-server-url *alldat*) - #f)) ;; - (list 'areaname (common:get-area-name *alldat*)) - (list 'toppath *toppath*) - (list 'work-area work-area) - (list 'test-name test-name) - (list 'runscript runscript) - (list 'run-id run-id ) - (list 'test-id test-id ) - ;; (list 'item-path item-path ) - (list 'itemdat itemdat ) - (list 'megatest remote-megatest) - (list 'ezsteps ezsteps) - (list 'subrun subrun) - (list 'target mt_target) - (list 'contour contour) - (list 'runtlim (if run-time-limit (common:hms-string->seconds run-time-limit) #f)) - (list 'env-ovrd (hash-table-ref/default *configdat* "env-override" '())) - (list 'set-vars (if params (hash-table-ref/default params "-setvars" #f))) - (list 'runname runname) - (list 'mt-bindir-path mt-bindir-path)))))))) - - ;; clean out step records from previous run if they exist - ;; (rmt:delete-test-step-records run-id test-id) - ;; if the dir does not exist we may have a itempath where individual variables are a path, launch anyway - (if (common:file-exists? work-area) - (change-directory work-area)) ;; so that log files from the launch process don't clutter the test dir - (cond - ;; ((and launcher hosts) ;; must be using ssh hostname - ;; (set! fullcmd (append launcher (car hosts)(list remote-megatest "-m" test-sig "-execute" cmdparms) debug-param))) - ;; (set! fullcmd (append launcher (car hosts)(list remote-megatest test-sig "-execute" cmdparms)))) - (launcher - (set! fullcmd (append launcher (list remote-megatest "-m" test-sig "-execute" cmdparms) debug-param))) - ;; (set! fullcmd (append launcher (list remote-megatest test-sig "-execute" cmdparms)))) - (else - (if (not useshell)(debug:print 0 *default-log-port* "WARNING: internal launching will not work well without \"useshell yes\" in your [jobtools] section")) - (set! fullcmd (append (list remote-megatest "-m" test-sig "-execute" cmdparms) debug-param (list (if useshell "&" "")))))) - ;; (set! fullcmd (list remote-megatest test-sig "-execute" cmdparms (if useshell "&" ""))))) - (if (args:get-arg "-xterm")(set! fullcmd (append fullcmd (list "-xterm")))) - (debug:print 1 *default-log-port* "Launching " work-area) - ;; set pre-launch-env-vars before launching, keep the vars in prevvals and put the envionment back when done - (debug:print 4 *default-log-port* "fullcmd: " fullcmd) - (set! *last-launch* (current-seconds)) ;; all that junk above takes time, set this as late as possible. - (let* ((commonprevvals (alist->env-vars - (hash-table-ref/default *configdat* "env-override" '()))) - (miscprevvals (alist->env-vars ;; consolidate this code with the code in megatest.scm for "-execute" - (append (list (list "MT_TEST_RUN_DIR" work-area) - (list "MT_TEST_NAME" test-name) - (list "MT_ITEM_INFO" (conc itemdat)) - (list "MT_RUNNAME" runname) - (list "MT_TARGET" mt_target) - (list "MT_ITEMPATH" item-path) - ) - itemdat))) - (testprevvals (alist->env-vars - (hash-table-ref/default tconfig "pre-launch-env-overrides" '()))) - ;; Launchwait defaults to true, must override it to turn off wait - (launchwait (if (equal? (configf:lookup *configdat* "setup" "launchwait") "no") #f #t)) - (launch-results-prev (apply (if launchwait ;; BB: TODO: refactor this to examine return code of launcher, if nonzero, set state to launch failed. - process:cmd-run-with-stderr-and-exitcode->list - process-run) - (if useshell - (let ((cmdstr (string-intersperse fullcmd " "))) - (if launchwait - cmdstr - (conc cmdstr " >> mt_launch.log 2>&1 &"))) - (car fullcmd)) - (if useshell - '() - (cdr fullcmd)))) - (success (if launchwait (equal? 0 (cadr launch-results-prev)) #t)) - (launch-results (if launchwait (car launch-results-prev) launch-results-prev))) - (if (not success) - (tests:test-set-status! run-id test-id "COMPLETED" "DEAD" "launcher failed; exited non-zero; check mt_launch.log" #f)) ;; (if launch-results launch-results "FAILED")) - (mutex-unlock! *launch-setup-mutex*) ;; yes, really should mutex all the way to here. Need to put this entire process into a fork. - ;; (rmt:no-sync-del! lock-key) ;; release the lock for starting this test - (if (not launchwait) ;; give the OS a little time to allow the process to start - (thread-sleep! 0.01)) - (with-output-to-file "mt_launch.log" - (lambda () - (print "LAUNCHCMD: " (string-intersperse fullcmd " ")) - (if (list? launch-results) - (apply print launch-results) - (print "NOTE: launched \"" fullcmd "\"\n but did not wait for it to proceed. Add the following to megatest.config \n[setup]\nlaunchwait yes\n if you have problems with this")) - #:append)) - (debug:print 2 *default-log-port* "Launching completed, updating db") - (debug:print 2 *default-log-port* "Launch results: " launch-results) - (if (not launch-results) - (begin - (print "ERROR: Failed to run " (string-intersperse fullcmd " ") ", exiting now") - ;; (sqlite3:finalize! db) - ;; good ole "exit" seems not to work - ;; (_exit 9) - ;; but this hack will work! Thanks go to Alan Post of the Chicken email list - ;; NB// Is this still needed? Should be safe to go back to "exit" now? - (process-signal (current-process-id) signal/kill) - )) - (alist->env-vars miscprevvals) - (alist->env-vars testprevvals) - (alist->env-vars commonprevvals) - launch-results)) - (change-directory *toppath*))) - -;; recover a test where the top controlling mtest may have died -;; -(define (launch:recover-test run-id test-id) - ;; this function is called on the test run host via ssh - ;; - ;; 1. look at the process from pid - ;; - is it owned by calling user - ;; - it it's run directory correct for the test - ;; - is there a controlling mtest (maybe stuck) - ;; 2. if recovery is needed watch pid - ;; - when it exits take the exit code and do the needful - ;; - (let* ((pid (rmt:test-get-top-process-id run-id test-id)) - (psres (with-input-from-pipe - (conc "ps -F -u " (current-user-name) " | grep -E '" pid " ' | grep -v 'grep -E " pid "'") - (lambda () - (read-line)))) - (rundir (if (string? psres) ;; real process owned by user - (read-symbolic-link (conc "/proc/" pid "/cwd")) - #f))) - ;; now wait on that process if all is correct - ;; periodically update the db with runtime - ;; when the process exits look at the db, if still RUNNING after 10 seconds set - ;; state/status appropriately - (process-wait pid))) Index: lock-queue.scm ================================================================== --- lock-queue.scm +++ lock-queue.scm @@ -22,235 +22,5 @@ (declare (uses common)) (declare (uses tasks)) (declare (uses commonmod)) (import commonmod) - -;;====================================================================== -;; attempt to prevent overlapping updates of rollup files by queueing -;; update requests in an sqlite db -;;====================================================================== - -;;====================================================================== -;; db record, -;;====================================================================== - -(define (make-lock-queue:db-dat)(make-vector 3)) -(define-inline (lock-queue:db-dat-get-db vec) (vector-ref vec 0)) -(define-inline (lock-queue:db-dat-get-path vec) (vector-ref vec 1)) -(define-inline (lock-queue:db-dat-set-db! vec val)(vector-set! vec 0 val)) -(define-inline (lock-queue:db-dat-set-path! vec val)(vector-set! vec 1 val)) - -(define (lock-queue:delete-lock-db dbdat) - (let ((fname (lock-queue:db-dat-get-path dbdat))) - (system (conc "rm -f " fname "*")))) - -(define (lock-queue:open-db fname #!key (count 10)) - (let* ((actualfname (conc fname ".lockdb")) - (dbexists (common:file-exists? actualfname)) - (db (sqlite3:open-database actualfname)) - (handler (make-busy-timeout 136000))) - (if dbexists - (vector db actualfname) - (begin - (handle-exceptions - exn - (begin - (thread-sleep! 10) - (if (> count 0) - (lock-queue:open-db fname count: (- count 1)) - (vector db actualfname))) - (sqlite3:with-transaction - db - (lambda () - (sqlite3:execute - db - "CREATE TABLE IF NOT EXISTS queue ( - id INTEGER PRIMARY KEY, - test_id INTEGER, - start_time INTEGER, - state TEXT, - CONSTRAINT queue_constraint UNIQUE (test_id));") - (sqlite3:execute - db - "CREATE TABLE IF NOT EXISTS runlocks ( - id INTEGER PRIMARY KEY, - test_id INTEGER, - run_lock TEXT, - CONSTRAINT runlock_constraint UNIQUE (run_lock));")))))) - (sqlite3:set-busy-handler! db handler) - (vector db actualfname))) - -(define (lock-queue:set-state dbdat test-id newstate #!key (remtries 10)) - (tasks:wait-on-journal (lock-queue:db-dat-get-path dbdat) 1200) - (handle-exceptions - exn - (if (> remtries 0) - (begin - (debug:print 0 *default-log-port* "WARNING: exception on lock-queue:set-state. Trying again in 30 seconds.") - (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) - (thread-sleep! 30) - (lock-queue:set-state dbdat test-id newstate remtries: (- remtries 1))) - (begin - (debug:print-error 0 *default-log-port* " Failed to set lock state for test with id " test-id ", error: " ((condition-property-accessor 'exn 'message) exn) ", giving up.") - #f)) - (sqlite3:execute (lock-queue:db-dat-get-db dbdat) "UPDATE queue SET state=? WHERE test_id=?;" - newstate - test-id))) - -(define (lock-queue:any-younger? dbdat mystart test-id #!key (remtries 10)) - ;; no need to wait on journal on read only queries - ;; (tasks:wait-on-journal (lock-queue:db-dat-get-path dbdat) 1200) - (handle-exceptions - exn - (if (> remtries 0) - (begin - (debug:print 0 *default-log-port* "WARNING: exception on lock-queue:any-younger. Removing lockdb and trying again in 5 seconds.") - (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) - (thread-sleep! 5) - (lock-queue:delete-lock-db dbdat) - (lock-queue:any-younger? dbdat mystart test-id remtries: (- remtries 1))) - (begin - (debug:print-error 0 *default-log-port* " Failed to find younger locks for test with id " test-id ", error: " ((condition-property-accessor 'exn 'message) exn) ", giving up.") - #f)) - (let ((res #f)) - (sqlite3:for-each-row - (lambda (tid) - ;; Actually this should not be needed as mystart cannot be simultaneously less than and test-id same as - (if (not (equal? tid test-id)) - (set! res tid))) - (lock-queue:db-dat-get-db dbdat) - "SELECT test_id FROM queue WHERE start_time > ?;" mystart) - res))) - -(define (lock-queue:get-lock dbdat test-id #!key (count 10)(waiting-msg #f)) - (tasks:wait-on-journal (lock-queue:db-dat-get-path dbdat) 1200 remove: #t waiting-msg: "lock-queue:get-lock, waiting on journal") - (let* ((res #f) - (db (lock-queue:db-dat-get-db dbdat)) - (lckqry (sqlite3:prepare db "SELECT test_id,run_lock FROM runlocks WHERE run_lock='locked';")) - (mklckqry (sqlite3:prepare db "INSERT INTO runlocks (test_id,run_lock) VALUES (?,'locked');"))) - (let ((result - (handle-exceptions - exn - (begin - (debug:print 0 *default-log-port* "WARNING: failed to get queue lock. Removing lock db and returning fail") ;; Will try again in a few seconds") - (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) - (thread-sleep! 10) - ;; (if (> count 0) - ;; #f ;; (lock-queue:get-lock dbdat test-id count: (- count 1)) - give up on retries - ;; (begin ;; never recovered, remote the lock file and return #f, no lock obtained - (lock-queue:delete-lock-db dbdat) - #f) - (sqlite3:with-transaction - db - (lambda () - (sqlite3:for-each-row (lambda (tid lockstate) - (set! res (list tid lockstate))) - lckqry) - (if res - (if (equal? (car res) test-id) - #t ;; already have the lock - #f) - (begin - (sqlite3:execute mklckqry test-id) - ;; if no error handled then return #t for got the lock - #t))))))) - (sqlite3:finalize! lckqry) - (sqlite3:finalize! mklckqry) - result))) - -(define (lock-queue:release-lock fname test-id #!key (count 10)) - (let* ((dbdat (lock-queue:open-db fname))) - (tasks:wait-on-journal (lock-queue:db-dat-get-path dbdat) 1200 "lock-queue:release-lock; waiting on journal") - (handle-exceptions - exn - (begin - (debug:print 0 *default-log-port* "WARNING: Failed to release queue lock. Will try again in few seconds") - (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) - (thread-sleep! (/ count 10)) - (if (> count 0) - (begin - (sqlite3:finalize! (lock-queue:db-dat-get-db dbdat)) - (lock-queue:release-lock fname test-id count: (- count 1))) - (let ((journal (conc fname "-journal"))) - ;; If we've tried ten times and failed there is a serious problem - ;; try to remove the lock db and allow it to be recreated - (handle-exceptions - exn - #f - (if (common:file-exists? journal)(delete-file journal)) - (if (common:file-exists? fname) (delete-file fname)) - #f)))) - (sqlite3:execute (lock-queue:db-dat-get-db dbdat) "DELETE FROM runlocks WHERE test_id=?;" test-id) - (sqlite3:finalize! (lock-queue:db-dat-get-db dbdat))))) - -(define (lock-queue:steal-lock dbdat test-id #!key (count 10)) - (debug:print-info 0 *default-log-port* "Attempting to steal lock at " (lock-queue:db-dat-get-path dbdat)) - (tasks:wait-on-journal (lock-queue:db-dat-get-path dbdat) 1200 "lock-queue:steal-lock; waiting on journal") - (handle-exceptions - exn - (begin - (debug:print 0 *default-log-port* "WARNING: Failed to steal queue lock. Will try again in few seconds") - (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) - (thread-sleep! 10) - (if (> count 0) - (lock-queue:steal-lock dbdat test-id count: (- count 1)) - #f)) - (sqlite3:execute (lock-queue:db-dat-get-db dbdat) "DELETE FROM runlocks WHERE run_lock='locked';")) - (lock-queue:get-lock dbdat test-it)) - -;; returns #f if ok to skip the task -;; returns #t if ok to proceed with task -;; otherwise waits -;; -(define (lock-queue:wait-turn fname test-id #!key (count 10)(waiting-msg #f)) - (let* ((dbdat (lock-queue:open-db fname)) - (mystart (current-seconds)) - (db (lock-queue:db-dat-get-db dbdat))) - ;; (tasks:wait-on-journal (lock-queue:db-dat-get-path dbdat) 1200 waiting-msg: "lock-queue:wait-turn; waiting on journal file") - (handle-exceptions - exn - (begin - (debug:print 0 *default-log-port* "WARNING: Failed to find out if it is ok to skip the wait queue. Will try again in few seconds") - (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) - (print-call-chain (current-error-port)) - (thread-sleep! 10) - (if (> count 0) - (begin - (sqlite3:finalize! db) - (lock-queue:wait-turn fname test-id count: (- count 1))) - (begin - (debug:print 0 *default-log-port* "Giving up calls to lock-queue:wait-turn for test-id " test-id " at path " fname ", printing call chain") - (print-call-chain (current-error-port)) - #f))) - ;; wait 10 seconds and then check to see if someone is already updating the html - (thread-sleep! 10) - (if (not (lock-queue:any-younger? dbdat mystart test-id)) ;; no processing in flight, must try to start processing - (begin - (tasks:wait-on-journal (lock-queue:db-dat-get-path dbdat) 1200 waiting-msg: "lock-queue:wait-turn; waiting on journal file") - (sqlite3:execute - db - "INSERT OR REPLACE INTO queue (test_id,start_time,state) VALUES (?,?,'waiting');" - test-id mystart) - ;; (thread-sleep! 1) ;; give other tests a chance to register - (let ((result - (let loop ((younger-waiting (lock-queue:any-younger? dbdat mystart test-id))) - (if younger-waiting - (begin - ;; no need for us to wait. mark in the lock queue db as skipping - ;; no point in marking anything in the queue - simply never register this - ;; test as it is *covered* by a previously started update to the html file - ;; (lock-queue:set-state dbdat test-id "skipping") - #f) ;; let the calling process know that nothing needs to be done - (if (lock-queue:get-lock dbdat test-id) - #t - (if (> (- (current-seconds) mystart) 36000) ;; waited too long, steal the lock - (lock-queue:steal-lock dbdat test-id) - (begin - (thread-sleep! 1) - (loop (lock-queue:any-younger? dbdat mystart test-id))))))))) - (sqlite3:finalize! db) - result)))))) - - -;; (use trace) -;; (trace lock-queue:get-lock lock-queue:release-lock lock-queue:wait-turn lock-queue:any-younger? lock-queue:set-state) Index: margs.scm ================================================================== --- margs.scm +++ margs.scm @@ -17,72 +17,5 @@ (declare (unit margs)) ;; (declare (uses common)) -(define args:arg-hash (make-hash-table)) - -(define (args:get-arg arg . default) - (if (null? default) - (hash-table-ref/default args:arg-hash arg #f) - (hash-table-ref/default args:arg-hash arg (car default)))) - -(define (args:any? . args) - (not (null? (filter (lambda (x) x) - (map args:get-arg args))))) - -(define (args:get-arg-from ht arg . default) - (if (null? default) - (hash-table-ref/default ht arg #f) - (hash-table-ref/default ht arg (car default)))) - -(define (args:usage . args) - (if (> (length args) 0) - (apply print "ERROR: " args)) - (if (string? help) - (print help) - (print "Usage: " (car (argv)) " ... ")) - (exit 0)) - - ;; one-of args defined -(define (args:any-defined? . param) - (let ((res #f)) - (for-each - (lambda (arg) - (if (args:get-arg arg)(set! res #t))) - param) - res)) - -;; args: -(define (args:get-args args params switches arg-hash num-needed) - (let* ((numargs (length args)) - (adj-num-needed (if num-needed (+ num-needed 2) #f))) - (if (< numargs (if adj-num-needed adj-num-needed 2)) - (if (>= num-needed 1) - (args:usage "No arguments provided") - '()) - (let loop ((arg (cadr args)) - (tail (cddr args)) - (remargs '())) - (cond - ((member arg params) ;; args with params - (if (< (length tail) 1) - (args:usage "param given without argument " arg) - (let ((val (car tail)) - (newtail (cdr tail))) - (hash-table-set! arg-hash arg val) - (if (null? newtail) remargs - (loop (car newtail)(cdr newtail) remargs))))) - ((member arg switches) ;; args with no params (i.e. switches) - (hash-table-set! arg-hash arg #t) - (if (null? tail) remargs - (loop (car tail)(cdr tail) remargs))) - (else - (if (null? tail)(append remargs (list arg)) ;; return the non-used args - (loop (car tail)(cdr tail)(append remargs (list arg)))))))) - )) - -(define (args:print-args remargs arg-hash) - (print "ARGS: " remargs) - (for-each (lambda (arg) - (print " " arg " " (hash-table-ref/default arg-hash arg #f))) - (hash-table-keys arg-hash))) Index: mt.scm ================================================================== --- mt.scm +++ mt.scm @@ -38,271 +38,5 @@ (include "key_records.scm") (include "db_records.scm") (include "run_records.scm") (include "test_records.scm") -;; This is the Megatest API. All generally "useful" routines will be wrapped or extended -;; here. - -;;====================================================================== -;; R U N S -;;====================================================================== - -;; runs:get-runs-by-patt -;; get runs by list of criteria -;; register a test run with the db -;; -;; Use: (db-get-value-by-header (db:get-header runinfo)(db:get-rows runinfo)) -;; to extract info from the structure returned -;; -(define (mt:get-runs-by-patt keys runnamepatt targpatt) - (let loop ((runsdat (rmt:get-runs-by-patt keys runnamepatt targpatt 0 500 #f 0)) - (res '()) - (offset 0) - (limit 500)) - ;; (print "runsdat: " runsdat) - (let* ((header (vector-ref runsdat 0)) - (runslst (vector-ref runsdat 1)) - (full-list (append res runslst)) - (have-more (eq? (length runslst) limit))) - ;; (debug:print 0 *default-log-port* "header: " header " runslst: " runslst " have-more: " have-more) - (if have-more - (let ((new-offset (+ offset limit)) - (next-batch (rmt:get-runs-by-patt keys runnamepatt targpatt offset limit #f 0))) - (debug:print-info 4 *default-log-port* "More than " limit " runs, have " (length full-list) " runs so far.") - (debug:print-info 0 *default-log-port* "next-batch: " next-batch) - (loop next-batch - full-list - new-offset - limit)) - (vector header full-list))))) - -;;====================================================================== -;; T E S T S -;;====================================================================== - -(define (mt:get-tests-for-run run-id testpatt states status #!key (not-in #t) (sort-by 'event_time) (sort-order "ASC") (qryvals #f)(last-update #f)) - (let loop ((testsdat (rmt:get-tests-for-run run-id testpatt states status 0 500 not-in sort-by sort-order qryvals last-update 'normal)) - (res '()) - (offset 0) - (limit 500)) - (let* ((full-list (append res testsdat)) - (have-more (eq? (length testsdat) limit))) - (if have-more - (let ((new-offset (+ offset limit))) - (debug:print-info 4 *default-log-port* "More than " limit " tests, have " (length full-list) " tests so far.") - (loop (rmt:get-tests-for-run run-id testpatt states status new-offset limit not-in sort-by sort-order qryvals last-update 'normal) - full-list - new-offset - limit)) - full-list)))) - -(define (mt:lazy-get-prereqs-not-met run-id waitons ref-item-path #!key (mode '(normal))(itemmaps #f) ) - (let* ((key (list run-id waitons ref-item-path mode)) - (res (hash-table-ref/default *pre-reqs-met-cache* key #f)) - (useres (let ((last-time (if (vector? res) (vector-ref res 0) #f))) - (if last-time - (< (current-seconds)(+ last-time 5)) - #f)))) - (if useres - (let ((result (vector-ref res 1))) - (debug:print 4 *default-log-port* "Using lazy value res: " result) - result) - (let ((newres (rmt:get-prereqs-not-met run-id waitons ref-item-path mode: mode itemmaps: itemmaps))) - (hash-table-set! *pre-reqs-met-cache* key (vector (current-seconds) newres)) - newres)))) - -(define (mt:get-run-stats dbstruct run-id) -;; Get run stats from local access, move this ... but where? - (db:get-run-stats dbstruct run-id)) - -(define (mt:discard-blocked-tests run-id failed-test tests test-records) - (if (null? tests) - tests - (begin - (debug:print-info 1 *default-log-port* "Discarding tests from " tests " that are waiting on " failed-test) - (let loop ((testn (car tests)) - (remt (cdr tests)) - (res '())) - (let* ((test-dat (hash-table-ref/default test-records testn (vector #f #f '()))) - (waitons (vector-ref test-dat 2))) - ;; (print "mt:discard-blocked-tests run-id: " run-id " failed-test: " failed-test " testn: " testn " with waitons: " waitons) - (if (null? remt) - (let ((new-res (reverse res))) - ;; (print " new-res: " new-res) - new-res) - (loop (car remt) - (cdr remt) - (if (member failed-test waitons) - (begin - (debug:print 0 *default-log-port* "Discarding test " testn "(" test-dat ") due to " failed-test) - res) - (cons testn res))))))))) - -;;====================================================================== -;; T R I G G E R S -;;====================================================================== - -(define (mt:run-trigger cmd test-id test-rundir trigger logname test-name item-path event-time actual-state actual-status) - ;; Putting the commandline into ( )'s means no control over the shell. - ;; stdout and stderr will be caught in the NBFAKE or mt_launch.log files - ;; or equivalent. No need to do this. Just run it? - (let* ((fullcmd (conc "nbfake " - cmd " " - test-id " " - test-rundir " " - trigger " " - test-name " " - item-path " " ;; has / prepended to deal with toplevel tests - actual-state " " - actual-status " " - event-time - )) - (prev-nbfake-log (get-environment-variable "NBFAKE_LOG"))) - (setenv "NBFAKE_LOG" (conc (cond - ((and (directory-exists? test-rundir) - (file-write-access? test-rundir)) - test-rundir) - ((and (directory-exists? *toppath*) - (file-write-access? *toppath*)) - *toppath*) - (else (conc "/tmp/" (current-user-name)))) - "/" logname)) - (debug:print-info 0 *default-log-port* "TRIGGERED on " trigger ", running command " fullcmd " output at " (get-environment-variable "NBFAKE_LOG")) - ;; (call-with-environment-variables - ;; `(("NBFAKE_LOG" . ,(conc test-rundir "/" logname))) - ;; (lambda () - (process-run fullcmd) - (if prev-nbfake-log - (setenv "NBFAKE_LOG" prev-nbfake-log) - (unsetenv "NBFAKE_LOG")) - )) ;; )) - -(define (mt:process-triggers dbstruct run-id test-id newstate newstatus) - (if test-id - (let* ((test-dat (db:get-test-info-by-id dbstruct run-id test-id))) - (if test-dat - (let* ((test-rundir (db:test-get-rundir test-dat)) ;; ) ;; ) - (test-name (db:test-get-testname test-dat)) - (item-path (db:test-get-item-path test-dat)) - (duration (db:test-get-run_duration test-dat)) - (comment (db:test-get-comment test-dat)) - (event-time (db:test-get-event_time test-dat)) - (tconfig #f) - (state (if newstate newstate (db:test-get-state test-dat))) - (status (if newstatus newstatus (db:test-get-status test-dat)))) - ;; (mutex-lock! *triggers-mutex*) - (handle-exceptions - exn - (begin - (debug:print-error 0 *default-log-port* " Exception in mt:process-triggers for run-id="run-id" test-id="test-id" newstate="newstate" newstatus="newstatus - "\n error: " ((condition-property-accessor 'exn 'message) exn) - "\n test-rundir="test-rundir - "\n test-name="test-name - "\n item-path="item-path - "\n state="state - "\n status="status - "\n") - (print-call-chain (current-error-port)) - #f) - (if (and test-name - test-rundir) ;; #f means no dir set yet - ;; (common:file-exists? test-rundir) - ;; (directory? test-rundir)) - (call-with-environment-variables - (list (cons "MT_TEST_NAME" (or test-name "no such test")) - (cons "MT_TEST_RUN_DIR" (or test-rundir "no test directory yet")) - (cons "MT_ITEMPATH" (or item-path ""))) - (lambda () - (if (directory-exists? test-rundir) - (push-directory test-rundir) - (push-directory *toppath*)) - (set! tconfig (mt:lazy-read-test-config test-name)) - (for-each (lambda (trigger) - (let* ((munged-trigger (string-translate trigger "/ " "--")) - (logname (conc "last-trigger-" munged-trigger ".log"))) - ;; first any triggers from the testconfig - (let ((cmd (configf:lookup tconfig "triggers" trigger))) - (if cmd (mt:run-trigger cmd test-id test-rundir trigger (conc "tconfig-" logname) test-name item-path event-time state status))) - ;; next any triggers from megatest.config - (let ((cmd (configf:lookup *configdat* "triggers" trigger))) - (if cmd (mt:run-trigger cmd test-id test-rundir trigger (conc "mtconfig-" logname) test-name item-path event-time state status))))) - (list - (conc state "/" status) - (conc state "/") - (conc "/" status))) - (pop-directory)) - ))) - ;; (mutex-unlock! *triggers-mutex*) - ))))) - -;;====================================================================== -;; S T A T E A N D S T A T U S F O R T E S T S -;;====================================================================== - -;; speed up for common cases with a little logic -(define (mt:test-set-state-status-by-id run-id test-id newstate newstatus newcomment) - (if (not (and run-id test-id)) - (begin - (debug:print-error 0 *default-log-port* "bad data handed to mt:test-set-state-status-by-id, run-id=" run-id ", test-id=" test-id ", newstate=" newstate) - (print-call-chain (current-error-port)) - #f) - (begin - ;; cond - ;; ((and newstate newstatus newcomment) - ;; (rmt:general-call 'state-status-msg run-id newstate newstatus newcomment test-id)) - ;; ((and newstate newstatus) - ;; (rmt:general-call 'state-status run-id newstate newstatus test-id)) - ;; (else - ;; (if newstate (rmt:general-call 'set-test-state run-id newstate test-id)) - ;; (if newstatus (rmt:general-call 'set-test-status run-id newstatus test-id)) - ;; (if newcomment (rmt:general-call 'set-test-comment run-id newcomment test-id)))) - (rmt:set-state-status-and-roll-up-items run-id test-id #f newstate newstatus newcomment) - ;; (mt:process-triggers run-id test-id newstate newstatus) - #t))) - - -(define (mt:test-set-state-status-by-id-unless-completed run-id test-id newstate newstatus newcomment) - (let* ((test-vec (rmt:get-testinfo-state-status run-id test-id)) - (state (vector-ref test-vec 3))) - (if (equal? state "COMPLETED") - #t - (rmt:set-state-status-and-roll-up-items run-id test-id #f newstate newstatus newcomment)))) - - -(define (mt:test-set-state-status-by-testname run-id test-name item-path new-state new-status new-comment) - ;(let ((test-id (rmt:get-test-id run-id test-name item-path))) - (rmt:set-state-status-and-roll-up-items run-id test-name item-path new-state new-status new-comment) - ;; (mt:process-triggers run-id test-id new-state new-status) - #t);) - ;;(mt:test-set-state-status-by-id run-id test-id new-state new-status new-comment))) - -(define (mt:test-set-state-status-by-testname-unless-completed run-id test-name item-path new-state new-status new-comment) - (let ((test-id (rmt:get-test-id run-id test-name item-path))) - (mt:test-set-state-status-by-id-unless-completed run-id test-id new-state new-status new-comment))) - -(define (mt:lazy-read-test-config test-name) - (let ((tconf (hash-table-ref/default *testconfigs* test-name #f))) - (if tconf - tconf - (let ((test-dirs (tests:get-tests-search-path *configdat*))) - (let loop ((hed (car test-dirs)) - (tal (cdr test-dirs))) - ;; Setting MT_LINKTREE here is almost certainly unnecessary. - (let ((tconfig-file (conc hed "/" test-name "/testconfig"))) - (if (and (common:file-exists? tconfig-file) - (file-read-access? tconfig-file)) - (let ((link-tree-path (common:get-linktree)) ;; (configf:lookup *configdat* "setup" "linktree")) - (old-link-tree (get-environment-variable "MT_LINKTREE"))) - (if link-tree-path (setenv "MT_LINKTREE" link-tree-path)) - (let ((newtcfg (read-config tconfig-file #f #f))) ;; NOTE: Does NOT run [system ...] - (hash-table-set! *testconfigs* test-name newtcfg) - (if old-link-tree - (setenv "MT_LINKTREE" old-link-tree) - (unsetenv "MT_LINKTREE")) - newtcfg)) - (if (null? tal) - (begin - (debug:print-error 0 *default-log-port* "No readable testconfig found for " test-name) - #f) - (loop (car tal)(cdr tal)))))))))) - Index: portlogger.scm ================================================================== --- portlogger.scm +++ portlogger.scm @@ -24,166 +24,5 @@ (declare (unit portlogger)) (declare (uses db)) ;; lsof -i - - -(define (portlogger:open-db fname) - (let* ((avail (tasks:wait-on-journal fname 5 remove: #t)) ;; wait up to about 10 seconds for the journal to go away - (exists (common:file-exists? fname)) - (db (if avail - (sqlite3:open-database fname) - (begin - (system (conc "rm -f " fname)) - (sqlite3:open-database fname)))) - (handler (make-busy-timeout 136000)) - (canwrite (file-write-access? fname))) - ;; (db-init (lambda () - ;; (sqlite3:execute - ;; db - ;; "CREATE TABLE IF NOT EXISTS ports ( - ;; port INTEGER PRIMARY KEY, - ;; state TEXT DEFAULT 'not-used', - ;; fail_count INTEGER DEFAULT 0, - ;; update_time TIMESTAMP DEFAULT (strftime('%s','now')) );")))) - (sqlite3:set-busy-handler! db handler) - (db:set-sync db) ;; (sqlite3:execute db "PRAGMA synchronous = 0;") - ;; (if (not exists) ;; needed with IF NOT EXISTS? - (sqlite3:execute - db - "CREATE TABLE IF NOT EXISTS ports ( - port INTEGER PRIMARY KEY, - state TEXT DEFAULT 'not-used', - fail_count INTEGER DEFAULT 0, - update_time TIMESTAMP DEFAULT (strftime('%s','now')) );") - db)) - -(define (portlogger:open-run-close proc . params) - (let* ((fname (conc "/tmp/." (current-user-name) "-portlogger.db")) - (avail (tasks:wait-on-journal fname 10))) ;; wait up to about 10 seconds for the journal to go away - (handle-exceptions - exn - (begin - ;; (release-dot-lock fname) - (debug:print-error 0 *default-log-port* "portlogger:open-run-close failed. " proc " " params) - (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) - (debug:print 5 *default-log-port* "exn=" (condition->list exn)) - (if (common:file-exists? fname)(delete-file fname)) ;; brutally get rid of it - (print-call-chain (current-error-port))) - (let* (;; (lock (obtain-dot-lock fname 2 9 10)) - (db (portlogger:open-db fname)) - (res (apply proc db params))) - (sqlite3:finalize! db) - ;; (release-dot-lock fname) - res)))) - -;; (fold-row PROC INIT DATABASE SQL . PARAMETERS) -(define (portlogger:take-port db portnum) - (let* ((qry1 (sqlite3:prepare db "INSERT INTO ports (port,state) VALUES (?,?);")) - (qry2 (sqlite3:prepare db "UPDATE ports SET state=?,update_time=strftime('%s','now') WHERE port=?;")) - (qry3 (sqlite3:prepare db "SELECT state FROM ports WHERE port=?;")) - (res (sqlite3:with-transaction - db - (lambda () - ;; (fold-row (lambda (var curr) (or var curr)) #f db "SELECT var FROM foo WHERE id=100;") - (let* ((curr #f) - (res #f)) - (set! curr (sqlite3:fold-row - (lambda (var curr) - (or curr var curr)) - "not-tried" - qry3 - portnum)) - ;; (print "curr=" curr) - (set! res (case (string->symbol curr) - ((released) (sqlite3:execute qry2 "taken" portnum) 'taken) - ((not-tried) (sqlite3:execute qry1 portnum "taken") 'taken) - ((taken) 'already-taken) - ((failed) 'failed) - (else 'error))) - ;; (print "res=" res) - res))))) - (sqlite3:finalize! qry1) - (sqlite3:finalize! qry2) - (sqlite3:finalize! qry3) - res)) - -(define (portlogger:get-prev-used-port db) - (handle-exceptions - exn - (begin - (debug:print 0 *default-log-port* "EXCEPTION: portlogger database probably overloaded or unreadable. If you see this message again remove /tmp/.$USER-portlogger.db") - (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) - (debug:print 5 *default-log-port* "exn=" (condition->list exn)) - (print-call-chain (current-error-port)) - (debug:print 0 *default-log-port* "Continuing anyway.") - #f) - (sqlite3:fold-row - (lambda (var curr) - (or curr var curr)) - #f - db - "SELECT (port) FROM ports WHERE state='released' LIMIT 1;"))) - -(define (portlogger:find-port db) - (let* ((lowport (let ((val (configf:lookup *configdat* "server" "lowport"))) - (if (and val - (string->number val)) - (string->number val) - 32768))) - (portnum (or (portlogger:get-prev-used-port db) - (+ lowport ;; top of registered ports is 49152 but lets use ports in the registered range - (random (- 64000 lowport)))))) - (handle-exceptions - exn - (begin - (debug:print 0 *default-log-port* "EXCEPTION: portlogger database probably overloaded or unreadable. If you see this message again remove /tmp/.$USER-portlogger.db") - (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) - (debug:print 5 *default-log-port* "exn=" (condition->list exn)) - (print-call-chain (current-error-port)) - (debug:print 0 *default-log-port* "Continuing anyway.")) - (portlogger:take-port db portnum)) - portnum)) - -;; set port to "released", "failed" etc. -;; -(define (portlogger:set-port db portnum value) - (sqlite3:execute db "UPDATE ports SET state=?,update_time=strftime('%s','now') WHERE port=?;" value portnum)) - -;; set port to failed (attempted to take but got error) -;; -(define (portlogger:set-failed db portnum) - (sqlite3:execute db "UPDATE ports SET state='failed',fail_count=fail_count+1,update_time=strftime('%s','now') WHERE port=?;" portnum)) - -;;====================================================================== -;; MAIN -;;====================================================================== - -(define (portlogger:main . args) - (let* ((dbfname (conc "/tmp/." (current-user-name) "-portlogger.db")) - (db (portlogger:open-db dbfname)) - (numargs (length args)) - (result - (handle-exceptions - exn - (begin - (debug:print 0 *default-log-port* "EXCEPTION: portlogger database at " dbfname " probably overloaded or unreadable. Try removing it.") - (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) - (debug:print 5 *default-log-port* "exn=" (condition->list exn)) - (debug:print 0 *default-log-port* " status: " ((condition-property-accessor 'sqlite3 'status) exn)) - (print-call-chain (current-error-port)) - #f) - (case (string->symbol (car args)) ;; commands with two or more params - ((take)(portlogger:take-port db (string->number (cadr args)))) - ((find)(portlogger:find-port db)) - ((set) (let ((port (cadr args)) - (state (caddr args))) - (portlogger:set-port db - (if (number? port) port (string->number port)) - state) - state)) - ((failed)(portlogger:set-failed db (string->number (cadr args))) 'failed))))) - (sqlite3:finalize! db) - result)) - -;; (print (apply portlogger:main (cdr (argv)))) Index: runconfig.scm ================================================================== --- runconfig.scm +++ runconfig.scm @@ -25,131 +25,10 @@ (declare (unit runconfig)) (declare (uses common)) (include "common_records.scm") -(define (runconfig:read fname target environ-patt) - (let ((ht (make-hash-table))) - (if target (hash-table-set! ht target '())) - (read-config fname ht #t environ-patt: environ-patt sections: (if target (list "default" target) #f)))) - -;; NB// to process a runconfig ensure to use environ-patt with target! -;; -(define (setup-env-defaults fname run-id already-seen keyvals #!key (environ-patt #f)(change-env #t)) - (let* ((keys (map car keyvals)) - (thekey (if keyvals - (string-intersperse (map (lambda (x)(if x x "-na-")) (map cadr keyvals)) "/") - (or (common:args-get-target) - (get-environment-variable "MT_TARGET") - (begin - (debug:print-error 0 *default-log-port* "setup-env-defaults called with no run-id or -target or -reqtarg") - "nothing matches this I hope")))) - ;; Why was system disallowed in the reading of the runconfigs file? - ;; NOTE: Should be setting env vars based on (target|default) - (confdat (runconfig:read fname thekey environ-patt)) - (whatfound (make-hash-table)) - (finaldat (make-hash-table)) - (sections (list "default" thekey))) - (if (not *target*)(set! *target* thekey)) ;; may save a db access or two but repeats db:get-target code - (debug:print 4 *default-log-port* "Using key=\"" thekey "\"") - - (if change-env - (for-each ;; NB// This can be simplified with new content of keyvals having all that is needed. - (lambda (keyval) - (safe-setenv (car keyval)(cadr keyval))) - keyvals)) - - (for-each - (lambda (section) - (let ((section-dat (hash-table-ref/default confdat section #f))) - (if section-dat - (for-each - (lambda (envvar) - (let ((val (cadr (assoc envvar section-dat)))) - (hash-table-set! whatfound section (+ (hash-table-ref/default whatfound section 0) 1)) - (if (and (string? envvar) - (string? val) - change-env) - (safe-setenv envvar val)) - (hash-table-set! finaldat envvar val))) - (map car section-dat))))) - sections) - (if already-seen - (begin - (debug:print 2 *default-log-port* "Key settings found in runconfigs.config:") - (for-each (lambda (fullkey) - (debug:print 2 *default-log-port* (format #f "~20a ~a\n" fullkey (hash-table-ref/default whatfound fullkey 0)))) - sections) - (debug:print 2 *default-log-port* "---") - (set! *already-seen-runconfig-info* #t))) - ;; finaldat ;; was returning this "finaldat" which would be good but conflicts with other uses - confdat - )) - -(define (set-run-config-vars run-id keyvals targ-from-db) - (push-directory *toppath*) ;; the push/pop doesn't appear to do anything ... - (let ((runconfigf (conc *toppath* "/runconfigs.config")) - (targ (or (common:args-get-target) - targ-from-db - (get-environment-variable "MT_TARGET")))) - (pop-directory) - (if (common:file-exists? runconfigf) - (setup-env-defaults runconfigf run-id #t keyvals - environ-patt: (conc "(default" - (if targ - (conc "|" targ ")") - ")"))) - (debug:print 0 *default-log-port* "WARNING: You do not have a run config file: " runconfigf)))) - -;; given (a (b c) d) return ((a b d)(a c d)) -;; NOTE: this feels like it has been done before - perhaps with items handling? -;; -(define (runconfig:combinations inlst) - (let loop ((hed (car inlst)) - (tal (cdr inlst)) - (res '())) - ;; (print "res: " res " hed: " hed) - (if (list? hed) - (let ((newres (if (null? res) ;; first time through convert incoming items to list of items - (map list hed) - (apply append - (map (lambda (r) ;; iterate over items in res - (map (lambda (h) ;; iterate over items in hed - (append r (list h))) - hed)) - res))))) - ;; (print "newres1: " newres) - (if (null? tal) - newres - (loop (car tal)(cdr tal) newres))) - (let ((newres (if (null? res) - (list (list hed)) - (map (lambda (r) - (append r (list hed))) - res)))) - ;; (print "newres2: " newres) - (if (null? tal) - newres - (loop (car tal)(cdr tal) newres)))))) - -;; multi-part expand -;; Given a/b,c,d/e,f return a/b/e a/b/f a/c/e a/c/f a/d/e a/d/f -;; -(define (runconfig:expand target) - (let* ((parts (map (lambda (x) - (string-split x ",")) - (string-split target "/")))) - (map (lambda (x) - (string-intersperse x "/")) - (runconfig:combinations parts)))) - -;; multi-target expansion -;; a/b/c/x,y,z a/b/d/x,y => a/b/c/x a/b/c/y a/b/c/z a/b/d/x a/b/d/y -;; -(define (runconfig:expand-target target-strs) - (delete-duplicates - (apply append (map runconfig:expand (string-split target-strs " "))))) #| (if (null? target-strs) '() (let loop ((hed (car target-strs)) Index: runs.scm ================================================================== --- runs.scm +++ runs.scm @@ -38,2619 +38,5 @@ (include "key_records.scm") (include "db_records.scm") (include "run_records.scm") (include "test_records.scm") -;; (include "debugger.scm") - -;; use this struct to facilitate refactoring -;; - -(defstruct runs:dat - reglen regfull - runname max-concurrent-jobs run-id - test-patts required-tests test-registry - registry-mutex flags keyvals run-info all-tests-registry - can-run-more-tests - ((can-run-more-tests-count 0) : fixnum)) - -(defstruct runs:testdat - hed tal reg reruns test-record - test-name item-path jobgroup - waitons testmode newtal itemmaps prereqs-not-met) - - -(define (runs:get-mt-env-alist run-id runname target testname itempath) - ;;(bb-check-path msg: "runs:set-megatest-env-vars entry") - `(("MT_TEST_NAME" . ,testname) - - ("MT_ITEMPATH" . ,itempath) - - ("MT_TARGET" . ,target) - - ("MT_RUNNAME" . ,runname) - - ("MT_RUN_AREA_HOME" . ,*toppath*) - - ,@(let* ((link-tree (common:get-linktree))) ;; (configf:lookup *configdat* "setup" "linktree"))) - (if link-tree - (list (cons "MT_LINKTREE" link-tree) - - (cons "MT_TEST_RUN_DIR" - (conc link-tree "/" target "/" runname "/" testname - (if (and (string? itempath) (not (equal? itempath ""))) - (conc "/" itempath) - ""))) - ) - '())) - - ,@(map - (lambda (key) - (cons (car key) (cadr key))) - (keys:target->keyval (rmt:get-keys) target)) - - ,@(map (lambda (var) - (let ((val (configf:lookup *configdat* "env-override" var))) - (cons var val))) - (configf:section-vars *configdat* "env-override")))) - - - - - - -;; set up needed environment variables given a run-id and optionally a target, itempath etc. -;; -(define (runs:set-megatest-env-vars run-id #!key (inkeys #f)(inrunname #f)(inkeyvals #f)(intarget #f)(testname #f)(itempath #f)) - ;;(bb-check-path msg: "runs:set-megatest-env-vars entry") - (let* ((target (or intarget - (common:args-get-target) - (get-environment-variable "MT_TARGET"))) - (keys (if inkeys inkeys (rmt:get-keys))) - (keyvals (if inkeyvals inkeyvals (keys:target->keyval keys target))) - (vals (hash-table-ref/default *env-vars-by-run-id* run-id #f)) - (link-tree (common:get-linktree))) ;; (configf:lookup *configdat* "setup" "linktree"))) - (if testname (setenv "MT_TEST_NAME" testname)) - (if itempath (setenv "MT_ITEMPATH" itempath)) - - ;; get the info from the db and put it in the cache - (if link-tree - (setenv "MT_LINKTREE" link-tree) - (debug:print-error 0 *default-log-port* "linktree not set, should be set in megatest.config in [setup] section.")) - (if (not vals) - (let ((ht (make-hash-table))) - (hash-table-set! *env-vars-by-run-id* run-id ht) - (set! vals ht) - (for-each - (lambda (key) - (hash-table-set! vals (car key) (cadr key))) - keyvals))) - ;; from the cached data set the vars - - (hash-table-for-each - vals - (lambda (key val) - (debug:print 2 *default-log-port* "setenv " key " " val) - (safe-setenv key val))) - ;;(bb-check-path msg: "runs:set-megatest-env-vars block 1") - ;;(BB> "*env-vars-by-run-id*/runid("run-id" vals="(hash-table->alist vals)) - - (if (not (get-environment-variable "MT_TARGET"))(setenv "MT_TARGET" target)) - ;; we had a case where there was an exception generated by the hash-table-ref - ;; due to *configdat* being #f Adding a handle and exit - (let fatal-loop ((count 0)) - (handle-exceptions - exn - (let ((call-chain (get-call-chain)) - (msg ((condition-property-accessor 'exn 'message) exn))) - (if (< count 5) - (begin ;; this call is colliding, do some crude stuff to fix it. - (debug:print 0 *default-log-port* "ERROR: *configdat* was inaccessible! This should never happen. Retry #" count) - (launch:setup force-reread: #t) - (fatal-loop (+ count 1))) - (begin - (debug:print 0 *default-log-port* "FATAL: *configdat* was inaccessible! This should never happen. Retried " count " times. Message: " msg) - (debug:print 0 *default-log-port* "Call chain:") - (with-output-to-port *default-log-port* - - (lambda () - (print "*configdat* is >>"*configdat*"<<") - (pp *configdat*) - (pp call-chain))) - - (exit 1)))) - ;;(bb-check-path msg: "runs:set-megatest-env-vars block 1.5") - (when (or (not *configdat*) (not (hash-table? *configdat*))) - (debug:print 0 *default-log-port* "WARNING: *configdat* was inaccessible! This should never happen. Brute force reread.") - ;;(BB> "ERROR: *configdat* was inaccessible! This should never happen. Brute force reread.") - (thread-sleep! 2) ;; assuming nfs lag. - (launch:setup force-reread: #t)) - (alist->env-vars (hash-table-ref/default *configdat* "env-override" '())))) ;;;; environment is tainted HERE in this let block. - ;;(bb-check-path msg: "runs:set-megatest-env-vars block 2") - ;; Lets use this as an opportunity to put MT_RUNNAME in the environment - (let ((runname (if inrunname inrunname (rmt:get-run-name-from-id run-id)))) - (if runname - (setenv "MT_RUNNAME" runname) - (debug:print-error 0 *default-log-port* "no value for runname for id " run-id))) - (setenv "MT_RUN_AREA_HOME" *toppath*) - ;; if a testname and itempath are available set the remaining appropriate variables - (if testname (setenv "MT_TEST_NAME" testname)) - (if itempath (setenv "MT_ITEMPATH" itempath)) - ;;(bb-check-path msg: "runs:set-megatest-env-vars block 3") - (if (and testname link-tree) - (setenv "MT_TEST_RUN_DIR" (conc (getenv "MT_LINKTREE") "/" - (getenv "MT_TARGET") "/" - (getenv "MT_RUNNAME") "/" - (getenv "MT_TEST_NAME") - (if (and itempath - (not (equal? itempath ""))) - (conc "/" itempath) - "")))))) - -(define (set-item-env-vars itemdat) - (for-each (lambda (item) - (debug:print 2 *default-log-port* "setenv " (car item) " " (cadr item)) - (setenv (car item) (cadr item))) - itemdat)) - -;; Every time can-run-more-tests is called increment the delay -;; -;; NOTE: We run this server-side!! Do not use this global except in the runs:can-run-more-tests routine -;; -(define *last-num-running-tests* 0) -;; (define *runs:can-run-more-tests-count* 0) -(define (runs:shrink-can-run-more-tests-count runsdat) - (runs:dat-can-run-more-tests-count-set! runsdat 0)) - -(define (runs:inc-can-run-more-tests-count runsdat) - (runs:dat-can-run-more-tests-count-set! - runsdat - (+ (runs:dat-can-run-more-tests-count runsdat) 1))) - -;; (set! *runs:can-run-more-tests-count* 0)) ;; (/ *runs:can-run-more-tests-count* 2))) - -;; Temporary globals. Move these into the logic or into common -;; -(define *seen-cant-run-tests* (make-hash-table)) ;; use to track tests that we suspect cannot be run -(define (runs:inc-cant-run-tests testname) - (hash-table-set! *seen-cant-run-tests* testname - (+ (hash-table-ref/default *seen-cant-run-tests* testname 0) 1))) - -(define (runs:can-keep-running? testname n) - (< (hash-table-ref/default *seen-cant-run-tests* testname 0) n)) - -(define *runs:denoise* (make-hash-table)) ;; key => last-time-ran - -;; mechanism to limit printing info to the screen that is repetitive. -;; -;; Example: -;; (if (runs:lownoise "waiting on tasks" 60) -;; (debug:print-info 2 *default-log-port* "waiting for tasks to complete, sleeping briefly ...")) -;; -(define (runs:lownoise key waitval) - (let ((lasttime (hash-table-ref/default *runs:denoise* key 0)) - (currtime (current-seconds))) - (if (> (- currtime lasttime) waitval) - (begin - (hash-table-set! *runs:denoise* key currtime) - #t) - #f))) - -(define (runs:can-run-more-tests runsdat run-id jobgroup max-concurrent-jobs) - - ;; Take advantage of a good place to exit if running the one-pass methodology - (if (and (> (runs:dat-can-run-more-tests-count runsdat) 20) - (args:get-arg "-one-pass")) - (exit 0)) - - (thread-sleep! (cond ;; BB: check with Matt. Should this sleep move to cond clauses below where we determine we have too many jobs running rather than each time the and condition above is true (which seems like always)? - ((> (runs:dat-can-run-more-tests-count runsdat) 20) - (if (runs:lownoise "waiting on tasks" 60)(debug:print-info 2 *default-log-port* "waiting for tasks to complete, sleeping briefly ...")) - (configf:lookup-number *configdat* "setup" "inter-test-delay" default: 0.1) ;; was 2 - );; obviously haven't had any work to do for a while - (else 0))) - - (let* ((num-running (rmt:get-count-tests-running run-id)) - (num-running-in-jobgroup (rmt:get-count-tests-running-in-jobgroup run-id jobgroup)) - (job-group-limit (let ((jobg-count (config-lookup *configdat* "jobgroups" jobgroup))) - (if (string? jobg-count) - (string->number jobg-count) - jobg-count)))) - (if (> (+ num-running num-running-in-jobgroup) 0) - (runs:inc-can-run-more-tests-count runsdat)) ;; (set! *runs:can-run-more-tests-count* (+ *runs:can-run-more-tests-count* 1))) - (if (not (eq? *last-num-running-tests* num-running)) - (begin - (debug:print 2 *default-log-port* "max-concurrent-jobs: " max-concurrent-jobs ", num-running: " num-running) - (set! *last-num-running-tests* num-running))) - (if (not (eq? 0 *globalexitstatus*)) - (list #f num-running num-running-in-jobgroup max-concurrent-jobs job-group-limit) - (let* ((can-not-run-more (cond - ;; if max-concurrent-jobs is set and the number running is greater - ;; than it then cannot run more jobs - ((and max-concurrent-jobs (>= num-running max-concurrent-jobs)) - (if (runs:lownoise "mcj msg" 60) - (debug:print 0 *default-log-port* "WARNING: Max running jobs exceeded, current number running: " num-running - ", max_concurrent_jobs: " max-concurrent-jobs)) - #t) - ;; if job-group-limit is set and number of jobs in the group is greater - ;; than the limit then cannot run more jobs of this kind - ((and job-group-limit - (>= num-running-in-jobgroup job-group-limit)) - (if (runs:lownoise (conc "maxjobgroup " jobgroup) 60) - (debug:print 1 *default-log-port* "WARNING: number of jobs " num-running-in-jobgroup - " in jobgroup \"" jobgroup "\" exceeds limit of " job-group-limit)) - #t) - (else #f)))) - (list (not can-not-run-more) num-running num-running-in-jobgroup max-concurrent-jobs job-group-limit))))) - -(define (runs:run-pre-hook run-id) - (let* ((run-pre-hook (configf:lookup *configdat* "runs" "pre-hook")) - (existing-tests (if run-pre-hook - (rmt:get-tests-for-run run-id "%" '() '() ;; run-id testpatt states statuses - #f #f ;; offset limit - #f ;; not-in - #f ;; sort-by - #f ;; sort-order - #f ;; get full data (not 'shortlist) - 0 ;; (runs:gendat-inc-results-last-update *runs:general-data*) ;; last update time - 'dashboard) - '())) - (log-dir (conc *toppath* "/logs")) - (log-file (conc "pre-hook-" (string-translate (getenv "MT_TARGET") "/" "-") "-" (getenv "MT_RUNNAME") ".log")) - (full-log-fname (conc log-dir "/" log-file))) - (if run-pre-hook - (if (null? existing-tests) - (let* ((use-log-dir (if (not (directory-exists? log-dir)) - (handle-exceptions - exn - (begin - (debug:print 0 *default-log-port* "WARNING: Failed to create " log-dir) - #f) - (create-directory log-dir #t) - #t) - #t)) - (start-time (current-seconds)) - (actual-logf (if use-log-dir full-log-fname log-file))) - (handle-exceptions - exn - (begin - (print-call-chain *default-log-port*) - (debug:print 0 *default-log-port* "Message: " ((condition-property-accessor 'exn 'message) exn)) - (debug:print 0 *default-log-port* "ERROR: failed to run pre-hook " run-pre-hook ", check the log " log-file)) - (debug:print-info 0 *default-log-port* "running run-pre-hook: \"" run-pre-hook "\", log is " actual-logf) - (system (conc run-pre-hook " >> " actual-logf " 2>&1")) - (debug:print-info 0 *default-log-port* "pre-hook \"" run-pre-hook "\" took " (- (current-seconds) start-time) " seconds to run."))) - (debug:print 0 *default-log-port* "Skipping pre-hook call \"" run-pre-hook "\" as there are existing tests for this run."))))) - -(define (runs:run-post-hook run-id) - (let* ((run-post-hook (configf:lookup *configdat* "runs" "post-hook")) - (existing-tests (if run-post-hook - (rmt:get-tests-for-run run-id "%" '() '() ;; run-id testpatt states statuses - #f #f ;; offset limit - #f ;; not-in - #f ;; sort-by - #f ;; sort-order - #f ;; get full data (not 'shortlist) - 0 ;; (runs:gendat-inc-results-last-update *runs:general-data*) ;; last update time - 'dashboard) - '())) - (log-dir (conc *toppath* "/logs")) - (log-file (conc "post-hook-" (string-translate (getenv "MT_TARGET") "/" "-") "-" (getenv "MT_RUNNAME") ".log")) - (full-log-fname (conc log-dir "/" log-file))) - (if run-post-hook - ;; (if (null? existing-tests) - ;; (debug:print 0 *default-log-port* "Skipping post-hook call \"" run-post-hook "\" as there are existing tests for this run."))))) - (let* ((use-log-dir (if (not (directory-exists? log-dir)) - (handle-exceptions - exn - (begin - (debug:print 0 *default-log-port* "WARNING: Failed to create " log-dir) - #f) - (create-directory log-dir #t) - #t) - #t)) - (start-time (current-seconds)) - (actual-logf (if use-log-dir full-log-fname log-file))) - (handle-exceptions - exn - (begin - (print-call-chain *default-log-port*) - (debug:print 0 *default-log-port* "Message: " ((condition-property-accessor 'exn 'message) exn)) - (debug:print 0 *default-log-port* "ERROR: failed to run post-hook " run-post-hook ", check the log " log-file)) - (debug:print-info 0 *default-log-port* "running run-post-hook: \"" run-post-hook "\", log is " actual-logf) - (system (conc run-post-hook " >> " actual-logf " 2>&1")) - (debug:print-info 0 *default-log-port* "post-hook \"" run-post-hook "\" took " (- (current-seconds) start-time) " seconds to run.")))))) - -;; return #t when all items in waitors-upon list are represented in test-patt, #f otherwise. -(define (runs:testpatts-mention-waitors-upon? test-patt waitors-upon) - (null? (tests:filter-test-names-not-matched waitors-upon test-patt))) - -;;====================================================================== -;; runs:run-tests is called from megatest.scm and itself -;;====================================================================== -;; -;; test-names: Comma separated patterns same as test-patts but used in selection -;; of tests to run. The item portions are not respected. -;; FIXME: error out if /patt specified -;; -(define (runs:run-tests target runname test-patts user flags #!key (run-count 1)) ;; test-names - (let* ((keys (keys:config-get-fields *configdat*)) - (keyvals (keys:target->keyval keys target)) - (run-id (rmt:register-run keyvals runname "new" "n/a" user (args:get-arg "-contour"))) ;; test-name))) - ;; (deferred '()) ;; delay running these since they have a waiton clause - (runconfigf (conc *toppath* "/runconfigs.config")) - (dbfile (conc *toppath* "/megatest.db")) - (readonly-mode (not (file-write-access? dbfile))) - (test-records (make-hash-table)) - ;; need to process runconfigs before generating these lists - (all-tests-registry #f) ;; (tests:get-all)) ;; (tests:get-valid-tests (make-hash-table) test-search-path)) ;; all valid tests to check waiton names - (all-test-names #f) ;; (hash-table-keys all-tests-registry)) - (test-names #f) ;; Generated by a call to (tests:filter-test-names all-test-names test-patts)) - (required-tests #f) ;; Put fully qualified test/testpath names in this list to be done - (waitors-upon (make-hash-table)) ;; given a test, return list of tests waiting upon this test. - (task-key (conc (hash-table->alist flags) " " (get-host-name) " " (current-process-id))) - ;; (tdbdat (tasks:open-db)) - (config-reruns (let ((x (configf:lookup *configdat* "setup" "reruns"))) - (if x (string->number x) #f))) - (allowed-tests #f)) - - ;; check if readonly - (when readonly-mode - (debug:print-error 0 *default-log-port* "megatest.db is readonly. Cannot proceed.") - (exit 1)) - - ;; per user request. If less than 100Meg space on dbdir partition, bail out with error - ;; this will reduce issues in database corruption - (common:check-db-dir-and-exit-if-insufficient) - - ;; override the number of reruns from the configs - ;; this needs to be done at the place where is first runs:run-tests called - ;(if (and config-reruns - ; (> run-count config-reruns)) - ;(set! run-count config-reruns)) - - ;; (if (tasks:need-server run-id)(tasks:start-and-wait-for-server tdbdat run-id 10)) - - (let ((sighand (lambda (signum) - ;; (signal-mask! signum) ;; to mask or not? seems to cause issues in exiting - (set! *time-to-exit* #t) - (print "Received signal " signum ", cleaning up before exit. Please wait...") - (let ((th1 (make-thread (lambda () - ;; (let ((tdbdat (tasks:open-db))) - (rmt:tasks-set-state-given-param-key task-key "killed") ;; ) - (print "Killed by signal " signum ". Exiting") - (thread-sleep! 3) - (exit)))) - (th2 (make-thread (lambda () - (thread-sleep! 5) - (debug:print 0 *default-log-port* "Done") - (exit 4))))) - (thread-start! th2) - (thread-start! th1) - (thread-join! th2))))) - (set-signal-handler! signal/int sighand) - (set-signal-handler! signal/term sighand)) - - ;; force the starting of a server -- removed BB 17ww28 - no longer needed. - ;;(debug:print 0 *default-log-port* "waiting on server...") - ;;(server:start-and-wait *toppath*) - - (runs:set-megatest-env-vars run-id inkeys: keys inrunname: runname) ;; these may be needed by the launching process - (set! runconf (if (common:file-exists? runconfigf) - (setup-env-defaults runconfigf run-id *already-seen-runconfig-info* keyvals target) - (begin - (debug:print 0 *default-log-port* "WARNING: You do not have a run config file: " runconfigf) - #f))) - - (if (not test-patts) ;; first time in - adjust testpatt - (set! test-patts (common:args-get-testpatt runconf))) - ;; if test-patts is #f at this point there is something wrong and we need to bail out - (if (not test-patts) - (begin - (debug:print 0 *default-log-port* "WARNING: there is no test pattern for this run. Exiting now.") - (exit 0))) - - (if (args:get-arg "-tagexpr") - (begin - (set! allowed-tests (string-join (runs:get-tests-matching-tags (args:get-arg "-tagexpr")) ",")) - (debug:print-info 0 *default-log-port* "filtering initial test list with tagexpr: " (args:get-arg "-tagexpr") " => " allowed-tests) - ));; tests will be ANDed with this list - - ;; register this run in monitor.db - (rmt:tasks-add "run-tests" user target runname test-patts task-key) ;; params) - (rmt:tasks-set-state-given-param-key task-key "running") - - (common:telemetry-log "run-tests" - payload: - `( (target . ,target) - (run-name . ,runname) - (test-patts . ,test-patts) ) ) - - - ;; Now generate all the tests lists - (set! all-tests-registry (tests:get-all)) ;; hash of testname => path-to-test - (set! all-test-names (hash-table-keys all-tests-registry)) - ;; filter first for allowed-tests (from -tagexpr) then for test-patts. - (set! test-names (tests:filter-test-names - (if allowed-tests - (tests:filter-test-names all-test-names allowed-tests) - all-test-names) - test-patts)) - - ;; I think seeding required-tests with all test-names makes sense but lack analysis to back that up. - - ;; NEW STRATEGY HERE: - ;; 1. fill required tests with test-patts - ;; 2. scan testconfigs and if waitons, itemwait, itempatt calc prior test test-patt - ;; 3. repeat until all deps propagated - - ;; any tests with direct mention in test-patts can be added to required - ;;(set! required-tests (lset-intersection equal? (string-split test-patts ",") all-test-names)) - (set! required-tests (tests:filter-test-names all-test-names test-patts)) - ;; - ;; (set! required-tests (lset-intersection equal? test-names all-test-names)) - - ;; look up all tests matching the comma separated list of globs in - ;; test-patts (using % as wildcard) - - ;; (set! test-names (delete-duplicates (tests:get-valid-tests *toppath* test-patts))) - (debug:print-info 0 *default-log-port* "tests search path: " (string-intersperse (tests:get-tests-search-path *configdat*) " ")) - (debug:print-info 0 *default-log-port* "all tests: " (string-intersperse (sort all-test-names string<) " ")) - (debug:print-info 0 *default-log-port* "test names: " (string-intersperse (sort test-names string<) " ")) - (debug:print-info 0 *default-log-port* "required tests: " (string-intersperse (sort required-tests string<) " ")) - - ;; on the first pass or call to run-tests set FAILS to NOT_STARTED if - ;; -keepgoing is specified - (if (eq? *passnum* 0) - (begin - ;; Is this still necessary? I think not. Unreachable tests are marked as such and - ;; should not cause problems here. - ;; - ;; have to delete test records where NOT_STARTED since they can cause -keepgoing to - ;; get stuck due to becoming inaccessible from a failed test. I.e. if test B depends - ;; on test A but test B reached the point on being registered as NOT_STARTED and test - ;; A failed for some reason then on re-run using -keepgoing the run can never complete. - ;; - ;; (rmt:general-call 'delete-tests-in-state run-id "NOT_STARTED") - - ;; Now convert anything in allow-auto-rerun to NOT_STARTED - ;; - (for-each - (lambda (state-status) - (let* ((ss-lst (string-split-fields "/" state-status #:infix)) - (state (if (> (length ss-lst) 0)(car ss-lst) #f)) - (status (if (> (length ss-lst) 1)(cadr ss-lst) #f))) - (rmt:set-tests-state-status run-id test-names state status "NOT_STARTED" status))) - ;; list of state/status pairs separated by spaces - (string-split (or (configf:lookup *configdat* "setup" "allow-auto-rerun") ""))))) - - ;; Ensure all tests are registered in the test_meta table - (runs:update-all-test_meta #f) - - ;; run the run prehook if there are no tests yet run for this run: - ;; - (runs:run-pre-hook run-id) - ;; mark all test launced flag as false in the meta table - (rmt:set-var (conc "lunch-complete-" run-id) "no") - (rmt:set-run-state-status run-id "new" "n/a") - ;; now add non-directly referenced dependencies (i.e. waiton) - ;;====================================================================== - ;; refactoring this block into tests:get-full-data - ;; - ;; What happended, this code is now duplicated in tests!? - ;; - ;;====================================================================== - - (if (not (null? test-names)) ;; BEGIN test-names loop - (let loop ((hed (car test-names)) ;; NOTE: This is the main loop that iterates over the test-names - (tal (cdr test-names))) ;; 'return-procs tells the config reader to prep running system but return a proc - (debug:print-info 4 *default-log-port* "\n\ntestpatt elaboration loop => hed="hed " tal="tal" test-patts="test-patts" test-names="test-names) - (change-directory *toppath*) ;; PLEASE OPTIMIZE ME!!! I think this should be a no-op but there are several places where change-directories could be happening. - (setenv "MT_TEST_NAME" hed) ;; - (let*-values (((waitons waitors config) (tests:get-waitons hed all-tests-registry)) - - ;; NOTE: Have the config - can extract [waitons] section - - ((hed-mode) - (let ((m (config-lookup config "requirements" "mode"))) - (if m (map string->symbol (string-split m)) '(normal)))) - ((hed-itemized-waiton) ;; are items in hed waiting on items of waiton? - (not (null? (lset-intersection eq? hed-mode '(itemmatch itemwait))))) - ) - (debug:print-info 8 *default-log-port* "waitons: " waitons) - ;; check for hed in waitons => this would be circular, remove it and issue an - ;; error - (if (or (member hed waitons) - (member hed waitors)) - (begin - (debug:print-error 0 *default-log-port* "test " hed " has listed itself as a waiton or waitor, please correct this!") - (set! waitons (filter (lambda (x)(not (equal? x hed))) waitons)) - (set! waitors (filter (lambda (x)(not (equal? x hed))) waitors)))) - - ;; (items (items:get-items-from-config config))) - (if (not (hash-table-ref/default test-records hed #f)) ;; waiton-tconfig below will be #f until that test is visted here at least once - (hash-table-set! test-records ;; BB: we are doing a manual make-tests:testqueue - hed (vector hed ;; 0 ;; testname - config ;; 1 - waitons ;; 2 - (config-lookup config "requirements" "priority") ;; priority 3 - (tests:get-items config) ;; 4 ;; expand the [items] and or [itemstable] into explict items - #f ;; itemsdat 5 - #f ;; spare - used for item-path - waitors ;; - ))) - ;; update waitors-upon here - (for-each - (lambda (waiton) - (let* ((current-waitors-upon (hash-table-ref/default waitors-upon waiton '()))) - (debug:print-info 8 *default-log-port* " current-waiters-upon["waiton"] is "current-waitors-upon ) - (when (not (member hed current-waitors-upon)) - (debug:print-info 8 *default-log-port* " current-waiters-upon["waiton"] << "hed ) - (hash-table-set! waitors-upon waiton (cons hed current-waitors-upon))))) - (if (list? waitons) waitons '())) - (debug:print-info 8 *default-log-port* " process waitons&waitors of "hed": "(delete-duplicates (append waitons waitors))) - (for-each - (lambda (waiton) - (if (and waiton (not (member waiton test-names))) - (let* ((waitors-in-testpatt (runs:testpatts-mention-waitors-upon? test-patts (hash-table-ref/default waitors-upon waiton '()))) - (waiton-record (hash-table-ref/default test-records waiton #f)) - (waiton-tconfig (if waiton-record (vector-ref waiton-record 1) #f)) - (waiton-itemized (and waiton-tconfig - (or (hash-table-ref/default waiton-tconfig "items" #f) - (hash-table-ref/default waiton-tconfig "itemstable" #f)))) - (itemmaps (tests:get-itemmaps config)) ;; (configf:lookup config "requirements" "itemmap")) - (new-test-patts (tests:extend-test-patts test-patts hed waiton itemmaps hed-itemized-waiton))) - (debug:print-info 0 *default-log-port* "Test " waiton " has " (if waiton-record "a" "no") " waiton-record and" (if waiton-itemized " " " no ") "items") - ;; need to account for test-patt here, if I am test "a", selected with a test-patt of "hed/b%" - ;; and we are waiting on "waiton" we need to add "waiton/,waiton/b%" to test-patt - ;; is this satisfied by merely appending "/" to the waiton name added to the list? - ;; - ;; This approach causes all of the items in an upstream test to be run - ;; if we have this waiton already processed once we can analzye it for extending - ;; tests to be run, since we can't properly process waitons unless they have been - ;; initially added we add them again to be processed on second round AND add the hed - ;; back in to also be processed on second round - (if waiton-tconfig ;; BB: waiter should be in test-patts as well as the waiton have a tconfig. - (if waiton-itemized - (if waitors-in-testpatt - (begin - (debug:print-info 0 *default-log-port* "New test patts: " new-test-patts ", prev test patts: " test-patts) - (set! test-names (cons waiton test-names)) ;; need to process this one, only add once the waiton tconfig read - (set! required-tests (cons (conc waiton "/") required-tests)) - (set! test-patts new-test-patts)) - (begin - (debug:print-info 0 *default-log-port* "Waitor(s) not yet on testpatt for " waiton ", setting up to re-process it") - (set! tal (append (cons waiton tal)(list hed))))) - (begin - (debug:print-info 0 *default-log-port* "Adding non-itemized test " waiton " to required-tests") - (set! required-tests (cons waiton required-tests)) - (set! test-patts new-test-patts))) - (begin - (debug:print-info 0 *default-log-port* "No testconfig info yet for " waiton ", setting up to re-process it") - (set! tal (append (cons waiton tal)(list hed))))) ;; (cons (conc waiton "/") required-tests)) - ;; NOPE: didn't work. required needs to be plain test names. Try tacking on to test-patts - ;; - doesn't work - ;; (set! test-patts (conc test-patts "," waiton "/")) - ;; (set! test-names (cons waiton test-names))))) ;; was an append, now a cons - ))) - (delete-duplicates (append waitons waitors))) - (let ((remtests (delete-duplicates (append waitons tal)))) - (debug:print-info 8 *default-log-port* " remtests are "remtests) - (if (not (null? remtests)) - (begin - ;; (debug:print-info 0 *default-log-port* "Preprocessing continues for " (string-intersperse remtests ", ")) - (loop (car remtests)(cdr remtests)))))))) ;; END test-names loop - - (if (not (null? required-tests)) - (debug:print-info 1 *default-log-port* "Adding \"" (string-intersperse required-tests " ") "\" to the run queue")) - ;; NOTE: these are all parent tests, items are not expanded yet. - (debug:print-info 4 *default-log-port* "test-records=" (hash-table->alist test-records)) - (let ((reglen (configf:lookup *configdat* "setup" "runqueue"))) - (if (> (length (hash-table-keys test-records)) 0) - (let* ((keep-going #t) - (run-queue-retries 5) - #;(th1 (make-thread (lambda () - (handle-exceptions - exn - (begin - (print-call-chain) - (print " message: " ((condition-property-accessor 'exn 'message) exn))) - (runs:run-tests-queue run-id runname test-records keyvals flags test-patts required-tests - (any->number reglen) all-tests-registry))) - "runs:run-tests-queue")) - (th2 (make-thread (lambda () ;; BBQ: why are we visiting ALL runs here? - ;; (rmt:find-and-mark-incomplete-all-runs))))) CAN'T INTERRUPT IT ... - (let ((run-ids (rmt:get-all-run-ids))) - (for-each (lambda (run-id) - (if keep-going - (handle-exceptions - exn - (debug:print 0 *default-log-port* "error in calling find-and-mark-incomplete for run-id " run-id) - (rmt:find-and-mark-incomplete run-id #f)))) ;; ovr-deadtime))) ;; could be root of https://hsdes.intel.com/appstore/article/#/220546828/main -- Title: Megatest jobs show DEAD even though they are still running (1.64/27) - run-ids))) - "runs: mark-incompletes"))) - ;; (thread-start! th1) - (thread-start! th2) - ;; (thread-join! th1) - ;; just do the main stuff in the main thread - (runs:run-tests-queue run-id runname test-records keyvals flags test-patts required-tests - (any->number reglen) all-tests-registry) - (set! keep-going #f) - (thread-join! th2) - ;; if run-count > 0 call, set -preclean and -rerun STUCK/DEAD - (if (> run-count 0) ;; handle reruns - (begin - (if (not (hash-table-ref/default flags "-preclean" #f)) - (hash-table-set! flags "-preclean" #t)) - (if (not (hash-table-ref/default flags "-rerun" #f)) - (hash-table-set! flags "-rerun" "STUCK/DEAD,n/a,ZERO_ITEMS")) - ;; recursive call to self - (runs:run-tests target runname test-patts user flags run-count: (- run-count 1))))) - (debug:print-info 0 *default-log-port* "No tests to run"))) - (debug:print-info 4 *default-log-port* "All done by here") - ;; TODO: try putting post hook call here - ;(if (eq? run-count 0) - ; (begin - ; (debug:print-info 0 *default-log-port* "Calling Post Hook") - ; (debug:print-info 2 *default-log-port* " run-count " run-count) - ; (runs:run-post-hook run-id)) - ; (debug:print-info 2 *default-log-port* "Not calling post hook runcount = " run-count )) - (rmt:tasks-set-state-given-param-key task-key "done") - ;; (sqlite3:finalize! tasks-db) - )) - - -;; loop logic. These are used in runs:run-tests-queue to make it a bit more readable. -;; -;; If reg not full and have items in tal then loop with (car tal)(cdr tal) reg reruns -;; If reg is full (i.e. length >= n -;; loop with (car reg) tal (cdr reg) reruns -;; If tal is empty -;; but have items in reg; loop with (car reg)(cdr reg) '() reruns -;; If reg is empty => all done - -(define (runs:queue-next-hed tal reg n regfull) - (if regfull - (if (null? reg) #f (car reg)) - (if (null? tal) ;; tal is used up, pop from reg - (if (null? reg) #f (car reg)) - (car tal)))) - -(define (runs:queue-next-tal tal reg n regfull) - (if regfull - tal - (if (null? tal) ;; must transfer from reg - (if (null? reg) '() (cdr reg)) - (cdr tal)))) - -(define (runs:queue-next-reg tal reg n regfull) - (if regfull - (if (null? reg) '() (cdr reg)) ;; EXPLORE: reorder (cdr reg) such that looping is more efficient - (if (null? tal) ;; if tal is null and reg not full then '() as reg contents moved to tal - '() - reg))) - -;; this is the list of parameters to the named loop "loop" near the top of runs:run-tests-queue, look around line 1216 -;; -(define (runs:loop-values tal reg reglen regfull reruns) - (list (runs:queue-next-hed tal reg reglen regfull) ;; hed - (runs:queue-next-tal tal reg reglen regfull) ;; tal - (runs:queue-next-reg tal reg reglen regfull) ;; reg - reruns)) ;; reruns - -;; objective - iterate thru tests -;; => want to prioritize tests we haven't seen before -;; => sometimes need to squeeze things in (added to reg) -;; => review of a previously seen test is higher priority of never visited test -;; reg - list of previously visited tests -;; tal - list of never visited tests -;; prefer next hed to be from reg than tal. - -(define runs:nothing-left-in-queue-count 0) - -;;====================================================================== -;; runs:expand-items is called by runs:run-tests-queue -;;====================================================================== -;; -;; return value of runs:expand-items is passed back to runs-tests-queue and is fed to named loop with this signature: -;; (let loop ((hed (car sorted-test-names)) -;; (tal (cdr sorted-test-names)) -;; (reg '()) ;; registered, put these at the head of tal -;; (reruns '())) -(define (runs:expand-items hed tal reg reruns regfull newtal jobgroup max-concurrent-jobs run-id waitons item-path testmode test-record can-run-more items runname tconfig reglen test-registry test-records itemmaps) - (let* ((loop-list (list hed tal reg reruns)) - (prereqs-not-met (let ((res (rmt:get-prereqs-not-met run-id waitons hed item-path mode: testmode itemmaps: itemmaps))) - (if (list? res) - res - (begin - (debug:print 0 *default-log-port* - "ERROR: rmt:get-prereqs-not-met returned non-list!\n" - " res=" res " run-id=" run-id " waitons=" waitons " hed=" hed " item-path=" item-path " testmode=" testmode " itemmaps=" itemmaps) - '())))) - (have-itemized (not (null? (lset-intersection eq? testmode '(itemmatch itemwait))))) - ;; (prereqs-not-met (mt:lazy-get-prereqs-not-met run-id waitons item-path mode: testmode itemmap: itemmap)) - (fails (runs:calc-fails prereqs-not-met)) - (prereq-fails (runs:calc-prereq-fail prereqs-not-met)) - (non-completed (runs:calc-not-completed prereqs-not-met)) - (runnables (runs:calc-runnable prereqs-not-met)) - (unexpanded-prereqs - (filter (lambda (testname) - (let* ((test-rec (hash-table-ref test-records testname)) - (items (tests:testqueue-get-items test-rec))) - ;;(BB> "HEY " testname "=>"items) - (or (procedure? items)(eq? items 'have-procedure)))) - waitons)) - - - ) - (debug:print-info 4 *default-log-port* "START OF INNER COND #2 " - "\n can-run-more: " can-run-more - "\n testname: " hed - "\n prereqs-not-met: " (runs:pretty-string prereqs-not-met) - "\n non-completed: " (runs:pretty-string non-completed) - "\n prereq-fails: " (runs:pretty-string prereq-fails) - "\n fails: " (runs:pretty-string fails) - "\n testmode: " testmode - "\n (member 'toplevel testmode): " (member 'toplevel testmode) - "\n (null? non-completed): " (null? non-completed) - "\n reruns: " reruns - "\n items: " items - "\n can-run-more: " can-run-more) - - (cond - ;; all prereqs met, fire off the test - ;; or, if it is a 'toplevel test and all prereqs not met are COMPLETED then launch - - ((and (not (member 'toplevel testmode)) - (member (hash-table-ref/default test-registry (db:test-make-full-name hed item-path) 'n/a) - '(DONOTRUN removed CANNOTRUN))) ;; *common:cant-run-states-sym*) ;; '(COMPLETED KILLED WAIVED UNKNOWN INCOMPLETE)) ;; try to catch repeat processing of COMPLETED tests here - (debug:print-info 4 *default-log-port* "cond branch - " "ei-1") - (debug:print-info 1 *default-log-port* "Test " hed " set to \"" (hash-table-ref test-registry (db:test-make-full-name hed item-path)) "\". Removing it from the queue") - (if (or (not (null? tal)) - (not (null? reg))) - (runs:loop-values tal reg reglen regfull reruns) - (begin - (debug:print-info 0 *default-log-port* "Nothing left in the queue!") - ;; If get here twice then we know we've tried to expand all items - ;; since there must be a logic issue with the handling of loops in the - ;; items expand phase we will brute force an exit here. - (if (> runs:nothing-left-in-queue-count 2) - (begin - (debug:print 0 *default-log-port* "WARNING: this condition is triggered when there were no items to expand and nothing to run. Please check your run for completeness") - (exit 0)) - (set! runs:nothing-left-in-queue-count (+ runs:nothing-left-in-queue-count 1))) - #f))) - - ;; desired result of below cond branch: - ;; we want to expand items in our test of interest (hed) in the following cases: - ;; case 1 - mode is itemmatch or itemwait: - ;; - all prereq tests have been expanded - ;; - at least one prereq's items have completed - ;; case 2 - mode is toplevel - ;; - prereqs are completed. - ;; - or no prereqs can complete - ;; case 3 - mode not specified - ;; - prereqs are completed and passed (we could consider removing "and passed" -- it would change behavior from current) - ((or (null? prereqs-not-met) - (and (member 'toplevel testmode) - (null? non-completed))) - (debug:print-info 4 *default-log-port* "cond branch - " "ei-2") - (debug:print-info 4 *default-log-port* "runs:expand-items: (or (null? prereqs-not-met) (and (member 'toplevel testmode)(null? non-completed)))") - (let ((test-name (tests:testqueue-get-testname test-record))) - (setenv "MT_TEST_NAME" test-name) ;; - (setenv "MT_RUNNAME" runname) - (runs:set-megatest-env-vars run-id inrunname: runname) ;; these may be needed by the launching process - (let ((items-list (items:get-items-from-config tconfig))) - (if (list? items-list) - (begin - (if (null? items-list) - (let ((test-id (rmt:get-test-id run-id test-name "")) - (num-items (rmt:test-toplevel-num-items run-id test-name))) - (if (and test-id - (not (> num-items 0))) - (mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "ZERO_ITEMS" "Failed to run due to failed prerequisites")))) - (tests:testqueue-set-items! test-record items-list) - (list hed tal reg reruns)) - (begin - (debug:print-error 0 *default-log-port* "The proc from reading the items table did not yield a list - please report this") - (exit 1)))))) - - ((and (null? fails) - (null? prereq-fails) - (not (null? non-completed))) - (debug:print-info 4 *default-log-port* "cond branch - " "ei-3") - (let* ((allinqueue (map (lambda (x)(if (string? x) x (db:test-get-testname x))) - (append newtal reruns))) - ;; prereqstrs is a list of test names as strings that are prereqs for hed - (prereqstrs (delete-duplicates (map (lambda (x)(if (string? x) x (db:test-get-testname x))) - prereqs-not-met))) - ;; a prereq that is not found in allinqueue will be put in the notinqueue list - ;; - ;; (notinqueue (filter (lambda (x) - ;; (not (member x allinqueue))) - ;; prereqstrs)) - (give-up #f)) - - ;; We can get here when a prereq has not been run due to *it* having a prereq that failed. - ;; We need to use this to dequeue this item as CANNOTRUN - ;; - (if (member 'toplevel testmode) ;; '(toplevel)) ;; NOTE: this probably should be (member 'toplevel testmode) - (for-each (lambda (prereq) - (if (eq? (hash-table-ref/default test-registry prereq 'justfine) 'CANNOTRUN) - (set! give-up #t))) - prereqstrs)) - - (if (and give-up - (not (and (null? tal)(null? reg)))) - (let ((trimmed-tal (mt:discard-blocked-tests run-id hed tal test-records)) - (trimmed-reg (mt:discard-blocked-tests run-id hed reg test-records))) - (debug:print 1 *default-log-port* "WARNING: test " hed " has discarded prerequisites, removing it from the queue") - - (let ((test-id (rmt:get-test-id run-id hed ""))) - (if test-id (mt:test-set-state-status-by-id run-id test-id "COMPLETED" "PREQ_DISCARDED" "Failed to run due to discarded prerequisites"))) - - (if (and (null? trimmed-tal) - (null? trimmed-reg)) - #f - (runs:loop-values trimmed-tal trimmed-reg reglen regfull reruns) - )) - (list (car newtal)(append (cdr newtal) reg) '() reruns)))) - - ((and (null? fails) ;; have not-started tests, but unable to run them. everything looks completed with no prospect of unsticking something that is stuck. we should mark hed as moribund and exit or continue if there are more tests to consider - (null? prereq-fails) - (null? non-completed)) - (debug:print-info 4 *default-log-port* "cond branch - " "ei-4") - (if (runs:can-keep-running? hed 20) - (begin - (runs:inc-cant-run-tests hed) - (debug:print-info 0 *default-log-port* "no fails in prerequisites for " hed " but also none running, keeping " hed " for now. Try count: " (hash-table-ref/default *seen-cant-run-tests* hed 0)) ;; - ;; getting here likely means the system is way overloaded, kill a full minute before continuing - (thread-sleep! 60) ;; TODO: gate by normalized server load > 1.0 (maxload config thing) - ;; num-retries code was here - ;; we use this opportunity to move contents of reg to tal - (list (car newtal)(append (cdr newtal) reg) '() reruns)) ;; an issue with prereqs not yet met? - (begin - (debug:print-info 1 *default-log-port* "no fails in prerequisites for " hed " but nothing seen running in a while, dropping test " hed " from the run queue") - (let ((test-id (rmt:get-test-id run-id hed ""))) - (if test-id (mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "TIMED_OUT" "Nothing seen running in a while."))) - (runs:loop-values tal reg reglen regfull reruns) - ))) - - ((and - (or (not (null? fails)) - (not (null? prereq-fails))) - (member 'normal testmode)) - (debug:print-info 4 *default-log-port* "cond branch - " "ei-5") - (debug:print-info 1 *default-log-port* "test " hed " (mode=" testmode ") has failed prerequisite(s); " - (string-intersperse (map (lambda (t)(conc (db:test-get-testname t) ":" (db:test-get-state t)"/"(db:test-get-status t))) fails) ", ") - ", removing it from to-do list") - (let ((test-id (rmt:get-test-id run-id hed ""))) - (if test-id - (if (not (null? prereq-fails)) - (mt:test-set-state-status-by-id-unless-completed run-id test-id "COMPLETED" "PREQ_DISCARDED" "Failed to run due to prior failed prerequisites") - (mt:test-set-state-status-by-id-unless-completed run-id test-id "COMPLETED" "PREQ_FAIL" "Failed to run due to failed prerequisites")))) - ;; (debug:print 4 *default-log-port*"BB> set PREQ_FAIL on "hed) - ;; (mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "PREQ_FAIL" "Failed to run due to failed prerequisites")))) ;; BB: this works, btu equivalent for itemwait mode does not work. - (if (or (not (null? reg))(not (null? tal))) - (begin - (hash-table-set! test-registry hed 'CANNOTRUN) - (runs:loop-values tal reg reglen regfull (cons hed reruns)) - ) - #f)) ;; #f flags do not loop - - ((and (not (null? fails))(member 'toplevel testmode)) - (debug:print-info 4 *default-log-port* "cond branch - " "ei-6") - (if (or (not (null? reg))(not (null? tal))) - (list (car newtal)(append (cdr newtal) reg) '() reruns) - #f)) - ((null? runnables) - (debug:print-info 4 *default-log-port* "cond branch - " "ei-7") - #f) ;; if we get here and non-completed is null then it is all over. - (else - (debug:print-info 4 *default-log-port* "cond branch - " "ei-8") - (debug:print 0 *default-log-port* "WARNING: FAILS or incomplete tests maybe preventing completion of this run. Watch for issues with test " hed ", continuing for now") - (list (car newtal)(cdr newtal) reg reruns))))) - -(define (runs:mixed-list-testname-and-testrec->list-of-strings inlst) - (if (null? inlst) - '() - (map (lambda (t) - (cond - ((vector? t) - (let ((test-name (db:test-get-testname t)) - (item-path (db:test-get-item-path t)) - (test-state (db:test-get-state t)) - (test-status (db:test-get-status t))) - (conc test-name (if (equal? item-path "") "" "/") item-path ":" test-state "/" test-status))) - ((string? t) - t) - (else - (conc t)))) - inlst))) - - -;; hed tal reg reruns reglen regfull test-record runname test-name item-path jobgroup max-concurrent-jobs run-id waitons item-path testmode test-patts required-tests test-registry registry-mutex flags keyvals run-info newtal all-tests-registry itemmaps) -(define (runs:process-expanded-tests runsdat testdat) - ;; unroll the contents of runsdat and testdat (due to ongoing refactoring). - (debug:print 2 *default-log-port* "runs:process-expanded-tests; testdat:" ) - (debug:print 2 *default-log-port* (with-output-to-string - (lambda () (pp (runs:testdat->alist testdat) )))) - (let* ((hed (runs:testdat-hed testdat)) - (tal (runs:testdat-tal testdat)) - (reg (runs:testdat-reg testdat)) - (reruns (runs:testdat-reruns testdat)) - (test-name (runs:testdat-test-name testdat)) - (item-path (runs:testdat-item-path testdat)) - (jobgroup (runs:testdat-jobgroup testdat)) - (waitons (runs:testdat-waitons testdat)) - (item-path (runs:testdat-item-path testdat)) - (testmode (runs:testdat-testmode testdat)) - (newtal (runs:testdat-newtal testdat)) - (itemmaps (runs:testdat-itemmaps testdat)) - (test-record (runs:testdat-test-record testdat)) - (prereqs-not-met (runs:testdat-prereqs-not-met testdat)) - - (reglen (runs:dat-reglen runsdat)) - (regfull (runs:dat-regfull runsdat)) - (runname (runs:dat-runname runsdat)) - (max-concurrent-jobs (runs:dat-max-concurrent-jobs runsdat)) - (run-id (runs:dat-run-id runsdat)) - (test-patts (runs:dat-test-patts runsdat)) - (required-tests (runs:dat-required-tests runsdat)) - (test-registry (runs:dat-test-registry runsdat)) - (registry-mutex (runs:dat-registry-mutex runsdat)) - (flags (runs:dat-flags runsdat)) - (keyvals (runs:dat-keyvals runsdat)) - (run-info (runs:dat-run-info runsdat)) - (all-tests-registry (runs:dat-all-tests-registry runsdat)) - (run-limits-info (runs:dat-can-run-more-tests runsdat)) - ;; (runs:can-run-more-tests run-id jobgroup max-concurrent-jobs)) ;; look at the test jobgroup and tot jobs running - (have-resources (car run-limits-info)) - (num-running (list-ref run-limits-info 1)) - (num-running-in-jobgroup(list-ref run-limits-info 2)) - (max-concurrent-jobs (list-ref run-limits-info 3)) - (job-group-limit (list-ref run-limits-info 4)) - ;; (prereqs-not-met (rmt:get-prereqs-not-met run-id waitons hed item-path mode: testmode itemmaps: itemmaps)) - ;; (prereqs-not-met (mt:lazy-get-prereqs-not-met run-id waitons item-path mode: testmode itemmap: itemmap)) - (fails (if (list? prereqs-not-met) ;; TODO: rename fails to failed-prereqs - (runs:calc-fails prereqs-not-met) - (begin - (debug:print-error 0 *default-log-port* "prereqs-not-met is not a list! " prereqs-not-met) - '()))) - (non-completed (filter (lambda (x) ;; remove hed from not completed list, duh, of course it is not completed! - (not (equal? x hed))) - (runs:calc-not-completed prereqs-not-met))) - (loop-list (list hed tal reg reruns)) - ;; configure the load runner - (numcpus (common:get-num-cpus #f)) - (maxload (string->number (or (configf:lookup *configdat* "jobtools" "maxload") "3.0"))) ;; use a non-number string to disable - (maxhomehostload (string->number (or (configf:lookup *configdat* "jobtools" "maxhomehostload") "2.0"))) ;; use a non-number string to disable - (waitdelay (string->number (or (configf:lookup *configdat* "jobtools" "waitdelay") "60")))) - (debug:print-info 4 *default-log-port* "have-resources: " have-resources " prereqs-not-met: (" - (string-intersperse - (map (lambda (t) - (if (vector? t) - (conc (db:test-get-state t) "/" (db:test-get-status t)) - (conc " WARNING: t is not a vector=" t ))) - prereqs-not-met) - ", ") ") fails: " fails - "\nregistered? " (hash-table-ref/default test-registry (db:test-make-full-name test-name item-path) #f)) - - - - (if (and (not (null? prereqs-not-met)) - (runs:lownoise (conc "waiting on tests " prereqs-not-met hed) 60)) - (debug:print-info 2 *default-log-port* "waiting on tests; " (string-intersperse (runs:mixed-list-testname-and-testrec->list-of-strings prereqs-not-met) ", "))) - - ;; Don't know at this time if the test have been launched at some time in the past - ;; i.e. is this a re-launch? - (debug:print-info 4 *default-log-port* "run-limits-info = " run-limits-info) - - (cond ; cond 894- 1067 - - ;; Check item path against item-patts, - ;; - ((not (tests:match test-patts (tests:testqueue-get-testname test-record) item-path required: required-tests)) ;; This test/itempath is not to be run - ;; else the run is stuck, temporarily or permanently - ;; but should check if it is due to lack of resources vs. prerequisites - (debug:print-info 1 *default-log-port* "Skipping " (tests:testqueue-get-testname test-record) " " item-path " as it doesn't match " test-patts) - (if (or (not (null? tal))(not (null? reg))) - (runs:loop-values tal reg reglen regfull reruns) - #f)) - - ;; Register tests - ;; - ((not (hash-table-ref/default test-registry (db:test-make-full-name test-name item-path) #f)) - (debug:print-info 4 *default-log-port* "Pre-registering test " test-name "/" item-path " to create placeholder" ) - ;; always do firm registration now in v1.60 and greater ;; (eq? *transport-type* 'fs) ;; no point in parallel registration if use fs - (let register-loop ((numtries 15)) - (rmt:register-test run-id test-name item-path) - (if (rmt:get-test-id run-id test-name item-path) - (hash-table-set! test-registry (db:test-make-full-name test-name item-path) 'done) - (if (> numtries 0) - (begin - (thread-sleep! 0.5) - (register-loop (- numtries 1))) - (debug:print-error 0 *default-log-port* "failed to register test " (db:test-make-full-name test-name item-path))))) - (if (not (eq? (hash-table-ref/default test-registry (db:test-make-full-name test-name "") #f) 'done)) - (begin - (rmt:register-test run-id test-name "") - (if (rmt:get-test-id run-id test-name "") - (hash-table-set! test-registry (db:test-make-full-name test-name "") 'done)))) - (runs:shrink-can-run-more-tests-count runsdat) ;; DELAY TWEAKER (still needed?) - (if (and (null? tal)(null? reg)) - (list hed tal (append reg (list hed)) reruns) - (list (runs:queue-next-hed tal reg reglen regfull) ;; cannot replace with a call to runs:loop-values as the logic is different for reg - (runs:queue-next-tal tal reg reglen regfull) - ;; NB// Here we are building reg as we register tests - ;; if regfull we must pop the front item off reg - (if regfull - (append (cdr reg) (list hed)) - (append reg (list hed))) - reruns))) - - ;; At this point hed test registration must be completed. - ;; - ((eq? (hash-table-ref/default test-registry (db:test-make-full-name test-name item-path) #f) - 'start) - (debug:print-info 0 *default-log-port* "Waiting on test registration(s): " - (string-intersperse - (filter (lambda (x) - (eq? (hash-table-ref/default test-registry x #f) 'start)) - (hash-table-keys test-registry)) - ", ")) - (thread-sleep! 0.051) - (list hed tal reg reruns)) - - ;; If no resources are available just kill time and loop again - ;; - ((not have-resources) ;; simply try again after waiting a second - (if (runs:lownoise "no resources" 60) - (debug:print-info 1 *default-log-port* "no resources to run new tests, waiting ...")) - ;; Have gone back and forth on this but db starvation is an issue. - ;; wait one second before looking again to run jobs. - (thread-sleep! 1) - ;; could have done hed tal here but doing car/cdr of newtal to rotate tests - (list (car newtal)(cdr newtal) reg reruns)) - - ;; This is the final stage, everything is in place so launch the test - ;; - ((and have-resources - (or (null? prereqs-not-met) - (and (member 'toplevel testmode) ;; 'toplevel) - (null? non-completed) - (not (member 'exclusive testmode))))) - ;; (hash-table-delete! *max-tries-hash* (db:test-make-full-name test-name item-path)) - ;; we are going to reset all the counters for test retries by setting a new hash table - ;; this means they will increment only when nothing can be run - (set! *max-tries-hash* (make-hash-table)) - ;; well, first lets see if cpu load throttling is enabled. If so wait around until the - ;; average cpu load is under the threshold before continuing - (if maxload ;; only gate if maxload is specified - (common:wait-for-cpuload maxload numcpus waitdelay)) - (if maxhomehostload - (common:wait-for-homehost-load maxhomehostload (conc "Waiting for homehost load to drop below normalized value of " maxhomehostload))) - - (run:test run-id run-info keyvals runname test-record flags #f test-registry all-tests-registry) - (runs:incremental-print-results run-id) - (hash-table-set! test-registry (db:test-make-full-name test-name item-path) 'running) - (runs:shrink-can-run-more-tests-count runsdat) ;; DELAY TWEAKER (still needed?) - ;; (thread-sleep! *global-delta*) - (if (or (not (null? tal))(not (null? reg))) - (runs:loop-values tal reg reglen regfull reruns) - #f)) - - ;; must be we have unmet prerequisites - ;; - (else - (debug:print 4 *default-log-port* "FAILS: " fails) - ;; If one or more of the prereqs-not-met are FAIL then we can issue - ;; a message and drop hed from the items to be processed. - ;; (runs:mixed-list-testname-and-testrec->list-of-strings prereqs-not-met) - (if (and (not (null? prereqs-not-met)) - (runs:lownoise (conc "waiting on tests " prereqs-not-met hed) 60)) - (debug:print-info 1 *default-log-port* "waiting on tests; " (string-intersperse - (runs:mixed-list-testname-and-testrec->list-of-strings - prereqs-not-met) ", "))) - (if (or (null? fails) - (member 'toplevel testmode)) - (begin - ;; couldn't run, take a breather - (if (runs:lownoise "Waiting for more work to do..." 60) - (debug:print-info 0 *default-log-port* "Waiting for more work to do...")) - (thread-sleep! 1) - (list (car newtal)(cdr newtal) reg reruns)) - ;; the waiton is FAIL so no point in trying to run hed ever again - (begin - (let ((my-test-id (rmt:get-test-id run-id test-name item-path))) - (mt:test-set-state-status-by-id-unless-completed run-id my-test-id "COMPLETED" "PREQ_FAIL" "Failed to run due to failed prerequisites2")) - - - - (if (or (not (null? reg))(not (null? tal))) - (if (vector? hed) - (begin - (debug:print 1 *default-log-port* "WARNING: Dropping test " test-name "/" item-path - " from the launch list as it has prerequistes that are FAIL") - (let ((test-id (rmt:get-test-id run-id hed ""))) - (if test-id (mt:test-set-state-status-by-id-unless-completed run-id test-id "COMPLETED" "PREQ_FAIL" "Failed to run due to failed prerequisites"))) - (runs:shrink-can-run-more-tests-count runsdat) ;; DELAY TWEAKER (still needed?) - ;; (thread-sleep! *global-delta*) - ;; This next is for the items - - (if (not (null? fails)) - ;;(mt:test-set-state-status-by-testname run-id test-name item-path "NOT_STARTED" "PREQ_FAIL" #f) - (rmt:set-state-status-and-roll-up-items run-id test-name item-path "NOT_STARTED" "PREQ_FAIL" #f) - ;;(mt:test-set-state-status-by-testname run-id test-name item-path "NOT_STARTED" "BLOCKED" #f) - (rmt:set-state-status-and-roll-up-items run-id test-name item-path "NOT_STARTED" "BLOCKED" #f) ) - (hash-table-set! test-registry (db:test-make-full-name test-name item-path) 'removed) - (runs:loop-values tal reg reglen regfull reruns)) - (let ((nth-try (hash-table-ref/default test-registry hed 0))) ;; hed not a vector... - (debug:print 2 *default-log-port* "nth-try("hed")="nth-try) - (cond - ((member "RUNNING" (map db:test-get-state prereqs-not-met)) - (if (runs:lownoise (conc "possible RUNNING prerequistes " hed) 60) - (debug:print 0 *default-log-port* "WARNING: test " hed " has possible RUNNING prerequisites, don't give up on it yet.")) - (thread-sleep! 0.1) - (runs:loop-values tal reg reglen regfull reruns)) - ((or (not nth-try) ;; BB: condition on subsequent tries, condition below fires on first try - (and (number? nth-try) - (< nth-try 2))) - (hash-table-set! test-registry hed (if (number? nth-try) - (+ nth-try 1) - 0)) - (if (runs:lownoise (conc "not removing test " hed) 60) - (debug:print 1 *default-log-port* "WARNING: not removing test " hed " from queue although it may not be runnable due to FAILED prerequisites")) - ;; may not have processed correctly. Could be a race condition in your test implementation? Dropping test " hed) ;; " as it has prerequistes that are FAIL. (NOTE: hed is not a vector)") - (runs:shrink-can-run-more-tests-count runsdat) ;; DELAY TWEAKER (still needed?) - (runs:loop-values newtal reg reglen regfull reruns)) - ((symbol? nth-try) ;; BB: 'done matches here in one case where prereq itemwait failed. This is first "try" - (if (eq? nth-try 'removed) ;; removed is removed - drop it NOW - (if (null? tal) - #f ;; yes, really - (list (car tal)(cdr tal) reg reruns)) - (begin - (if (runs:lownoise (conc "FAILED prerequisites or other issue" hed) 60) - (debug:print 0 *default-log-port* "WARNING: test " hed " has FAILED prerequisites or other issue. Internal state >" nth-try "< will be overridden and we'll retry.")) - ;; was: (mt:test-set-state-status-by-testname run-id test-name item-path "NOT_STARTED" "KEEP_TRYING" #f) - (mt:test-set-state-status-by-testname-unless-completed run-id test-name item-path "COMPLETED" "PREQ_FAIL" #f) - (hash-table-set! test-registry hed 'removed) ;; was 0 - (if (not (and (null? reg) (null? tal))) - (runs:loop-values tal reg reglen regfull reruns) - #f)))) - (else - (if (runs:lownoise (conc "FAILED prerequitests and we tried" hed) 60) - (debug:print 0 *default-log-port* "WARNING: test " hed " has FAILED prerequitests and we've tried at least 10 times to run it. Giving up now.")) - ;; (debug:print 0 *default-log-port* " prereqs: " prereqs-not-met) - (hash-table-set! test-registry hed 'removed) - (mt:test-set-state-status-by-testname run-id test-name item-path "NOT_STARTED" "TEN_STRIKES" #f) - ;; I'm unclear on if this roll up is needed - it may be the root cause of the "all set to FAIL" bug. - (rmt:set-state-status-and-roll-up-items run-id test-name item-path #f "FAIL" #f) ;; treat as FAIL - (list (if (null? tal)(car newtal)(car tal)) - tal - reg - reruns))))) - ;; ELSE: can't drop this - maybe running? Just keep trying - - ;;(if (not (or (not (null? reg))(not (null? tal)))) ;; old experiment - (let ((runable-tests (runs:runable-tests prereqs-not-met))) ;; SUSPICIOUS: Should look at more than just prereqs-not-met? - (if (null? runable-tests) - #f ;; I think we are truly done here - (runs:loop-values newtal reg reglen regfull reruns))) - ;;) ;;from old experiment - ) ;; end if (or (not (null? reg))(not (null? tal))) - )))))) - -;; scan a list of tests looking to see if any are potentially runnable -;; -(define (runs:runable-tests tests) - (filter (lambda (t) - (if (not (vector? t)) - t - (let ((state (db:test-get-state t)) - (status (db:test-get-status t))) - (case (string->symbol state) - ((COMPLETED INCOMPLETE) #f) - ((NOT_STARTED) - (if (member status '("TEN_STRIKES" "BLOCKED" "PREQ_FAIL" "ZERO_ITEMS" "PREQ_DISCARDED" "TIMED_OUT" )) - #f - t)) - ((DELETED) #f) - (else t))))) - tests)) - -;; move all the miscellanea into this struct -;; -(defstruct runs:gendat inc-results inc-results-last-update inc-results-fmt run-info runname target) - -(define *runs:general-data* - (make-runs:gendat - inc-results: (make-hash-table) - inc-results-last-update: 0 - inc-results-fmt: "~12a~12a~20a~12a~40a\n" ;; state status time duration test-name item-path - run-info: #f - runname: #f - target: #f - ) - ) - -(define (runs:incremental-print-results run-id) - (let ((curr-sec (current-seconds))) - (if (> (- curr-sec (runs:gendat-inc-results-last-update *runs:general-data*)) 5) ;; at least five seconds since last update - (let* ((run-dat (or (runs:gendat-run-info *runs:general-data*)(rmt:get-run-info run-id))) - (runname (or (runs:gendat-runname *runs:general-data*) - (db:get-value-by-header (db:get-rows run-dat) - (db:get-header run-dat) "runname"))) - (target (or (runs:gendat-target *runs:general-data*)(rmt:get-target run-id))) - (testsdat (rmt:get-tests-for-run run-id "%" '() '() ;; run-id testpatt states statuses - #f #f ;; offset limit - #f ;; not-in - #f ;; sort-by - #f ;; sort-order - #f ;; get full data (not 'shortlist) - (runs:gendat-inc-results-last-update *runs:general-data*) ;; last update time - 'dashboard))) - (if (not (runs:gendat-run-info *runs:general-data*)) - (runs:gendat-run-info-set! *runs:general-data* run-dat)) - (if (not (runs:gendat-runname *runs:general-data*)) - (runs:gendat-runname-set! *runs:general-data* runname)) - (if (not (runs:gendat-target *runs:general-data*)) - (runs:gendat-target-set! *runs:general-data* target)) - (for-each - (lambda (testdat) - (let* ((test-id (db:test-get-id testdat)) - (prevdat (hash-table-ref/default (runs:gendat-inc-results *runs:general-data*) - (conc run-id "," test-id) #f)) - (test-name (db:test-get-testname testdat)) - (item-path (db:test-get-item-path testdat)) - (state (db:test-get-state testdat)) - (status (db:test-get-status testdat)) - (event-time (db:test-get-event_time testdat)) - (duration (db:test-get-run_duration testdat))) - (if (and (not (member state '("DELETED" "REMOTEHOSTSTART" "RUNNING" "LAUNCHED""NOT_STARTED"))) - (not (and prevdat - (equal? state (db:test-get-state prevdat)) - (equal? status (db:test-get-status prevdat))))) - (let ((fmt (runs:gendat-inc-results-fmt *runs:general-data*)) - (dtime (seconds->year-work-week/day-time event-time))) - (if (runs:lownoise "inc-print" 600) - (format #t fmt "State" "Status" "Start Time" "Duration" "Test path")) - ;; (debug:print 0 *default-log-port* "fmt: " fmt " state: " state " status: " status " test-name: " test-name " item-path: " item-path " dtime: " dtime) - ;; (debug:print 0 #f "event-time: " event-time " duration: " duration) - (format #t fmt - state - status - dtime - (seconds->hr-min-sec duration) - (conc "lt/" target "/" runname "/" test-name (if (string-null? item-path) "" (conc "/" item-path)))) - (hash-table-set! (runs:gendat-inc-results *runs:general-data*) (conc run-id "," test-id) testdat))))) - testsdat))) - (runs:gendat-inc-results-last-update-set! *runs:general-data* (- curr-sec 10)))) - -;; every time though the loop increment the test/itempatt val. -;; when the min is > max-allowed and none running then force exit -;; -(define *max-tries-hash* (make-hash-table)) - -;;====================================================================== -;; runs:run-tests-queue is called by runs:run-tests -;;====================================================================== -;; -;; test-records is a hash table testname:item_path => vector < testname testconfig waitons priority items-info ... > -(define (runs:run-tests-queue run-id runname test-records keyvals flags test-patts required-tests reglen-in all-tests-registry) - ;; At this point the list of parent tests is expanded - ;; NB// Should expand items here and then insert into the run queue. - (debug:print 5 *default-log-port* "test-records: " test-records ", flags: " (hash-table->alist flags)) - - ;; Do mark-and-find clean up of db before starting runing of quue - ;; - ;; (rmt:find-and-mark-incomplete) - - (let* ((run-info (rmt:get-run-info run-id)) - (tests-info (mt:get-tests-for-run run-id #f '() '())) ;; qryvals: "id,testname,item_path")) - (sorted-test-names (tests:sort-by-priority-and-waiton test-records)) - (test-registry (make-hash-table)) - (registry-mutex (make-mutex)) - (num-retries 0) - (max-retries (config-lookup *configdat* "setup" "maxretries")) - (max-concurrent-jobs (configf:lookup-number *configdat* "setup" "max_concurrent_jobs" default: 50)) - (reglen (if (number? reglen-in) reglen-in 1)) - (last-time-incomplete (- (current-seconds) 900)) ;; force at least one clean up cycle - (last-time-some-running (current-seconds)) - ;; (tdbdat (tasks:open-db)) - (runsdat (make-runs:dat - ;; hed: hed - ;; tal: tal - ;; reg: reg - ;; reruns: reruns - reglen: reglen - regfull: #f ;; regfull - ;; test-record: test-record - runname: runname - ;; test-name: test-name - ;; item-path: item-path - ;; jobgroup: jobgroup - max-concurrent-jobs: max-concurrent-jobs - run-id: run-id - ;; waitons: waitons - ;; testmode: testmode - test-patts: test-patts - required-tests: required-tests - test-registry: test-registry - registry-mutex: registry-mutex - flags: flags - keyvals: keyvals - run-info: run-info - ;; newtal: newtal - all-tests-registry: all-tests-registry - ;; itemmaps: itemmaps - ;; prereqs-not-met: (rmt:get-prereqs-not-met run-id waitons hed item-path mode: testmode itemmaps: itemmaps) - ;; can-run-more-tests: (runs:can-run-more-tests run-id jobgroup max-concurrent-jobs) ;; look at the test jobgroup and tot jobs running - ))) - - ;; Initialize the test-registery hash with tests that already have a record - ;; convert state to symbol and use that as the hash value - (for-each (lambda (trec) - (let ((id (db:test-get-id trec)) - (tn (db:test-get-testname trec)) - (ip (db:test-get-item-path trec)) - (st (db:test-get-state trec))) - (if (not (equal? st "DELETED")) - (hash-table-set! test-registry (db:test-make-full-name tn ip) (string->symbol st))))) - tests-info) - (set! max-retries (if (and max-retries (string->number max-retries))(string->number max-retries) 100)) - - (let loop ((hed (car sorted-test-names)) - (tal (cdr sorted-test-names)) - (reg '()) ;; registered, put these at the head of tal - (reruns '())) - - (runs:incremental-print-results run-id) - - (if (not (null? reruns))(debug:print-info 4 *default-log-port* "reruns=" reruns)) - - ;; Here we mark any old defunct tests as incomplete. Do this every fifteen minutes - ;; moving this to a parallel thread and just run it once. - ;; - (if (> (current-seconds)(+ last-time-incomplete 900)) - (begin - (set! last-time-incomplete (current-seconds)) - ;; (rmt:find-and-mark-incomplete-all-runs) - )) - - ;; (print "Top of loop, hed=" hed ", tal=" tal " ,reruns=" reruns) - (let* ((test-record (hash-table-ref test-records hed)) - (test-name (tests:testqueue-get-testname test-record)) - (tconfig (tests:testqueue-get-testconfig test-record)) - (jobgroup (config-lookup tconfig "test_meta" "jobgroup")) - (testmode (let ((m (config-lookup tconfig "requirements" "mode"))) - (if m (map string->symbol (string-split m)) '(normal)))) - (itemmaps (tests:get-itemmaps tconfig)) ;; (configf:lookup tconfig "requirements" "itemmap")) - (priority (tests:testqueue-get-priority test-record)) - (itemdat (tests:testqueue-get-itemdat test-record)) ;; itemdat can be a string, list or #f - (items (tests:testqueue-get-items test-record)) - (item-path (item-list->path itemdat)) - (tfullname (db:test-make-full-name test-name item-path)) - ;; these are hard coded item-item waits test/item-path => test/item-path2 ... - (extra-waits (let* ((section (configf:get-section (tests:testqueue-get-testconfig test-record) "waitons")) - (myextra (alist-ref tfullname section equal?))) - (if myextra - (let ((extras (string-split (car myextra)))) - (if (runs:lownoise (conc tfullname "extra-waitons" tfullname) 60) - (debug:print-info 0 *default-log-port* "HAVE EXTRA WAITONS for test " tfullname ": " myextra)) - (for-each - (lambda (extra) - ;; (debug:print 0 *default-log-port* "FYI: extra = " extra " reruns = " reruns) - (let ((basetestname (car (string-split extra "/")))) - #;(if (not (member extra tal)) - (set! reruns (append tal (list extra)))) - (if (not (member basetestname tal)) - (set! reruns (append tal (list basetestname)))) - )) - extras) - extras) - '()))) - (waitons (delete-duplicates (append (tests:testqueue-get-waitons test-record) extra-waits) equal?)) - (newtal (append tal (list hed))) - (regfull (>= (length reg) reglen)) - (num-running (rmt:get-count-tests-running-for-run-id run-id)) - (testdat (make-runs:testdat - hed: hed - tal: tal - reg: reg - reruns: reruns - test-record: test-record - test-name: test-name - item-path: item-path - jobgroup: jobgroup - waitons: waitons - testmode: testmode - newtal: newtal - itemmaps: itemmaps - ;; prereqs-not-met: prereqs-not-met - ))) - (runs:dat-regfull-set! runsdat regfull) - - (if (> num-running 0) - (set! last-time-some-running (current-seconds))) - - (if (> (current-seconds)(+ last-time-some-running (or (configf:lookup *configdat* "setup" "give-up-waiting") 36000))) - (hash-table-set! *max-tries-hash* tfullname (+ (hash-table-ref/default *max-tries-hash* tfullname 0) 1))) - ;; (debug:print 0 *default-log-port* "max-tries-hash: " (hash-table->alist *max-tries-hash*)) - - ;; Ensure all top level tests get registered. This way they show up as "NOT_STARTED" on the dashboard - ;; and it is clear they *should* have run but did not. - (if (not (hash-table-ref/default test-registry (db:test-make-full-name test-name "") #f)) - (begin - (rmt:register-test run-id test-name "") - (hash-table-set! test-registry (db:test-make-full-name test-name "") 'done))) - - ;; Fast skip of tests that are already "COMPLETED" - NO! Cannot do that as the items may not have been expanded yet :( - ;; - (if (member (hash-table-ref/default test-registry tfullname #f) - '(DONOTRUN removed)) ;; *common:cant-run-states-sym*) ;; '(COMPLETED KILLED WAIVED UNKNOWN INCOMPLETE)) - (begin - (if (runs:lownoise (conc "been marked do not run " tfullname) 60) - (debug:print-info 0 *default-log-port* "Skipping test " tfullname " as it has been marked do not run due to being completed or not runnable")) - (if (or (not (null? tal))(not (null? reg))) - (loop (runs:queue-next-hed tal reg reglen regfull) - (runs:queue-next-tal tal reg reglen regfull) - (runs:queue-next-reg tal reg reglen regfull) - reruns)))) - ;; (loop (car tal)(cdr tal) reg reruns)))) - - (runs:incremental-print-results run-id) - (debug:print 4 *default-log-port* "TOP OF LOOP => " - "test-name: " test-name - "\n hed: " hed - "\n tal: " tal - "\n reg: " reg - "\n test-record " test-record - "\n itemdat: " itemdat - "\n items: " items - "\n item-path: " item-path - "\n waitons: " waitons - "\n num-retries: " num-retries - "\n reruns: " reruns - "\n regfull: " regfull - "\n reglen: " reglen - "\n length reg: " (length reg) - ) - - ;; check for hed in waitons => this would be circular, remove it and issue an - ;; error - (if (member test-name waitons) - (begin - (debug:print-error 0 *default-log-port* "test " test-name " has listed itself as a waiton, please correct this!") - (set! waiton (filter (lambda (x)(not (equal? x hed))) waitons)))) - - (cond - - ;; We want to catch tests that have waitons that are NOT in the queue and discard them IFF - ;; they have been through the wringer 10 or more times - ((and (list? waitons) - (not (null? waitons)) - (> (hash-table-ref/default *max-tries-hash* tfullname 0) 10) - (not (null? (filter - number? - (map (lambda (waiton) - (if (and (not (member waiton tal)) ;; this waiton is not in the list to be tried to run - (not (member waiton reruns))) - 1 - #f)) - waitons))))) ;; could do this more elegantly with a marker.... - (debug:print-info 4 *default-log-port* "cond branch - " "rtq-1") - (debug:print 0 *default-log-port* "WARNING: Marking test " tfullname " as not runnable. It is waiting on tests that cannot be run. Giving up now.") - (hash-table-set! test-registry tfullname 'removed)) - - ;; items is #f then the test is ok to be handed off to launch (but not before) - ;; - ((not items) - (debug:print-info 4 *default-log-port* "cond branch - " "rtq-2") - (debug:print-info 4 *default-log-port* "OUTER COND: (not items)") - (if (and (not (tests:match test-patts (tests:testqueue-get-testname test-record) item-path required: required-tests)) - (not (null? tal))) - (loop (car tal)(cdr tal) reg reruns)) - (runs:testdat-prereqs-not-met-set! testdat (rmt:get-prereqs-not-met run-id waitons hed item-path mode: testmode itemmaps: itemmaps)) - (runs:dat-can-run-more-tests-set! runsdat (runs:can-run-more-tests runsdat run-id jobgroup max-concurrent-jobs)) ;; look at the test jobgroup and tot jobs running - (let ((loop-list (runs:process-expanded-tests runsdat testdat))) - (if loop-list (apply loop loop-list)))) - - ;; items processed into a list but not came in as a list been processed - ;; - ((and (list? items) ;; thus we know our items are already calculated - (not itemdat)) ;; and not yet expanded into the list of things to be done - (debug:print-info 4 *default-log-port* "cond branch - " "rtq-3") - (debug:print-info 4 *default-log-port* "OUTER COND: (and (list? items)(not itemdat))") - ;; Must determine if the items list is valid. Discard the test if it is not. - (if (and (list? items) - (> (length items) 0) - (and (list? (car items)) - (> (length (car items)) 0)) - (debug:debug-mode 1)) - (debug:print 2 *default-log-port* (map (lambda (row) - (conc (string-intersperse - (map (lambda (varval) - (string-intersperse varval "=")) - row) - " ") - "\n")) - items))) - - (let* ((items-in-testpatt - (filter - (lambda (my-itemdat) - (tests:match test-patts hed (item-list->path my-itemdat) )) - ;; was: (tests:match test-patts hed (item-list->path my-itemdat) required: required-tests)) - items) )) - (if (null? items-in-testpatt) - (let ((test-id (rmt:get-test-id run-id test-name ""))) - (debug:print-info 0 *default-log-port* "Test " (tests:testqueue-get-testname test-record) " is itemized but has no items matching test pattern -- marking status ZERO_ITEMS") - (if test-id - (mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "ZERO_ITEMS" "This test has no items which match test pattern."))) - - (for-each (lambda (my-itemdat) - (let* ((new-test-record (let ((newrec (make-tests:testqueue))) - (vector-copy! test-record newrec) - newrec)) - (my-item-path (item-list->path my-itemdat)) - - (newtestname (db:test-make-full-name hed my-item-path))) ;; test names are unique on testname/item-path - (tests:testqueue-set-items! new-test-record #f) - (tests:testqueue-set-itemdat! new-test-record my-itemdat) - (tests:testqueue-set-item_path! new-test-record my-item-path) - (hash-table-set! test-records newtestname new-test-record) - (set! tal (append tal (list newtestname))))) ;; since these are itemized create new test names testname/itempath - items-in-testpatt))) - - - - ;; At this point we have possibly added items to tal but all must be handed off to - ;; INNER COND logic. I think loop without rotating the queue - ;; (loop hed tal reg reruns)) - ;; (let ((newtal (append tal (list hed)))) ;; We should discard hed as it has been expanded into it's items? Yes, but only if this *is* an itemized test - ;; (loop (car newtal)(cdr newtal) reg reruns) - (if (null? tal) - #f - (loop (car tal)(cdr tal) reg reruns))) - - ;; if items is a proc then need to run items:get-items-from-config, get the list and loop - ;; - but only do that if resources exist to kick off the job - ;; EXPAND ITEMS - ((or (procedure? items)(eq? items 'have-procedure)) - (debug:print-info 4 *default-log-port* "cond branch - " "rtq-4") - (let ((can-run-more (runs:can-run-more-tests runsdat run-id jobgroup max-concurrent-jobs))) - (if (and (list? can-run-more) - (car can-run-more)) - (let ((loop-list (runs:expand-items hed tal reg reruns regfull newtal jobgroup max-concurrent-jobs run-id waitons item-path testmode test-record can-run-more items runname tconfig reglen test-registry test-records itemmaps))) ;; itemized test expanded here - (if loop-list - (apply loop loop-list) - (debug:print-info 4 *default-log-port* " -- Can't expand hed="hed) - ) - ) - ;; if can't run more just loop with next possible test - (loop (car newtal)(cdr newtal) reg reruns)))) - - ;; this case should not happen, added to help catch any bugs - ((and (list? items) itemdat) - (debug:print-info 4 *default-log-port* "cond branch - " "rtq-5") - (debug:print-error 0 *default-log-port* "Should not have a list of items in a test and the itemspath set - please report this") - (exit 1)) - ((not (null? reruns)) - (debug:print-info 4 *default-log-port* "cond branch - " "rtq-6") - (let* ((newlst (tests:filter-non-runnable run-id tal test-records)) ;; i.e. not FAIL, WAIVED, INCOMPLETE, PASS, KILLED, - (junked (lset-difference equal? tal newlst))) - (debug:print-info 4 *default-log-port* "full drop through, if reruns is less than 100 we will force retry them, reruns=" reruns ", tal=" tal) - (if (< num-retries max-retries) - (set! newlst (append reruns newlst))) - (set! num-retries (+ num-retries 1)) - ;; (thread-sleep! (+ 1 *global-delta*)) - (if (not (null? newlst)) - ;; since reruns have been tacked on to newlst create new reruns from junked - (loop (car newlst)(cdr newlst) reg (delete-duplicates junked))))) - ((not (null? tal)) - (debug:print-info 4 *default-log-port* "cond branch - " "rtq-7") - (debug:print-info 4 *default-log-port* "I'm pretty sure I shouldn't get here.")) - ((not (null? reg)) ;; could we get here with leftovers? - (debug:print-info 4 *default-log-port* "cond branch - " "rtq-8") - (debug:print-info 0 *default-log-port* "Have leftovers!") - (loop (car reg)(cdr reg) '() reruns)) - (else - (debug:print-info 4 *default-log-port* "cond branch - " "rtq-9") - (debug:print-info 4 *default-log-port* "Exiting loop with...\n hed=" hed "\n tal=" tal "\n reruns=" reruns)) - ))) ;; end loop on sorted test names - ;; this is the point where everything is launced and now you can mark the run in metadata table as all launced - (rmt:set-var (conc "lunch-complete-" run-id) "yes") - - ;; now *if* -run-wait we wait for all tests to be done - ;; Now wait for any RUNNING tests to complete (if in run-wait mode) - (thread-sleep! 10) ;; I think there is a race condition here. Let states/statuses settle - (let wait-loop ((num-running (rmt:get-count-tests-running-for-run-id run-id)) - (prev-num-running 0)) - ;; (debug:print-info 13 *default-log-port* "num-running=" num-running ", prev-num-running=" prev-num-running) - (if (and (or (args:get-arg "-run-wait") - (equal? (configf:lookup *configdat* "setup" "run-wait") "yes")) - (> num-running 0)) - (begin - ;; Here we mark any old defunct tests as incomplete. Do this every fifteen minutes - ;; (debug:print 0 *default-log-port* "Got here eh! num-running=" num-running " (> num-running 0) " (> num-running 0)) - (if (> (current-seconds)(+ last-time-incomplete 900)) - (begin - (debug:print-info 0 *default-log-port* "Marking stuck tests as INCOMPLETE while waiting for run " run-id ". Running as pid " (current-process-id) " on " (get-host-name)) - (set! last-time-incomplete (current-seconds)) - (rmt:find-and-mark-incomplete run-id #f))) - (if (not (eq? num-running prev-num-running)) - (debug:print-info 0 *default-log-port* "run-wait specified, waiting on " num-running " tests in RUNNING, REMOTEHOSTSTART or LAUNCHED state at " (time->string (seconds->local-time (current-seconds))))) - (thread-sleep! 5) - ;; (wait-loop (rmt:get-count-tests-running-for-run-id run-id) num-running)))) - (wait-loop (rmt:get-count-tests-running-for-run-id run-id) num-running)))) - ;; LET* ((test-record - ;; we get here on "drop through". All done! - ;; this is moved to runs:run-testes since this function is getting called twice to ensure everthing is completed. - ;; (debug:print-info 0 *default-log-port* "Calling Post Hook") - ;; (runs:run-post-hook run-id) - (debug:print-info 1 *default-log-port* "All tests launched"))) - -(define (runs:calc-fails prereqs-not-met) - (filter (lambda (test) - (and (vector? test) ;; not (string? test)) - (member (db:test-get-state test) '("INCOMPLETE" "COMPLETED")) ;; TODO: pull from *common:stuff...* - (not (member (db:test-get-status test) - '("PASS" "WARN" "CHECK" "WAIVED" "SKIP"))))) - prereqs-not-met)) - -(define (runs:calc-prereq-fail prereqs-not-met) ;; REMOVEME since NOT_STARTED/PREQ_FAIL is now COMPLETED/PREQ_FAIL - (filter (lambda (test) - (and (vector? test) ;; not (string? test)) - (equal? (db:test-get-state test) "NOT_STARTED") - (not (member (db:test-get-status test) - '("n/a" "KEEP_TRYING"))))) - prereqs-not-met)) - -(define (runs:calc-not-completed prereqs-not-met) - (filter - (lambda (t) - (or (not (vector? t)) - (not (member (db:test-get-state t) '("INCOMPLETE" "COMPLETED"))))) - prereqs-not-met)) - -;; (define (runs:calc-not-completed prereqs-not-met) -;; (filter -;; (lambda (t) -;; (or (not (vector? t)) -;; (not (equal? "COMPLETED" (db:test-get-state t))))) -;; prereqs-not-met)) - -(define (runs:calc-runnable prereqs-not-met) - (filter - (lambda (t) - (or (not (vector? t)) - (and (equal? "NOT_STARTED" (db:test-get-state t)) - (member (db:test-get-status t) - '("n/a" "KEEP_TRYING"))) - (and (equal? "RUNNING" (db:test-get-state t))))) ;; account for a test that is running - prereqs-not-met)) - -(define (runs:pretty-string lst) - (map (lambda (t) - (if (not (vector? t)) - (conc t) - (conc (db:test-get-testname t) ":" (db:test-get-state t) "/" (db:test-get-status t)))) - lst)) - -;; parent-test is there as a placeholder for when parent-tests can be run as a setup step -;; -(define (run:test run-id run-info keyvals runname test-record flags parent-test test-registry all-tests-registry) - ;; All these vars might be referenced by the testconfig file reader - (let* ((test-name (tests:testqueue-get-testname test-record)) - (test-waitons (tests:testqueue-get-waitons test-record)) - (test-conf (tests:testqueue-get-testconfig test-record)) - (itemdat (tests:testqueue-get-itemdat test-record)) - (test-path (hash-table-ref all-tests-registry test-name)) ;; (conc *toppath* "/tests/" test-name)) ;; could use tests:get-testconfig here ... - (force (hash-table-ref/default flags "-force" #f)) - (rerun (hash-table-ref/default flags "-rerun" #f)) - (keepgoing (hash-table-ref/default flags "-keepgoing" #f)) - (incomplete-timeout (string->number (or (configf:lookup *configdat* "setup" "incomplete-timeout") "x"))) - (item-path "") - (db #f) - (full-test-name #f)) - - ;; setting itemdat to a list if it is #f - (if (not itemdat)(set! itemdat '())) - (set! item-path (item-list->path itemdat)) - (set! full-test-name (db:test-make-full-name test-name item-path)) - (debug:print-info 4 *default-log-port* - "\nTESTNAME: " full-test-name - "\n test-config: " (hash-table->alist test-conf) - "\n itemdat: " itemdat - ) - (debug:print 2 *default-log-port* "Attempting to launch test " full-test-name) - ;; (setenv "MT_TEST_NAME" test-name) ;; - ;; (setenv "MT_ITEMPATH" item-path) - ;; (setenv "MT_RUNNAME" runname) - (runs:set-megatest-env-vars run-id inrunname: runname testname: test-name itempath: item-path) ;; these may be needed by the launching process - (change-directory *toppath*) - - ;; Here is where the test_meta table is best updated - ;; Yes, another use of a global for caching. Need a better way? - ;; - ;; There is now a single call to runs:update-all-test_meta and this - ;; per-test call is not needed. Given the delicacy of the move to - ;; v1.55 this code is being left in place for the time being. - ;; - (if (not (hash-table-ref/default *test-meta-updated* test-name #f)) - (begin - (hash-table-set! *test-meta-updated* test-name #t) - (runs:update-test_meta test-name test-conf))) - - ;; itemdat => ((ripeness "overripe") (temperature "cool") (season "summer")) - (let* ((new-test-path (string-intersperse (cons test-path (map cadr itemdat)) "/")) - (test-id (rmt:get-test-id run-id test-name item-path)) - (testdat (if test-id (rmt:get-test-info-by-id run-id test-id) #f))) - (if (not testdat) - (let loop () - ;; ensure that the path exists before registering the test - ;; NOPE: Cannot! Don't know yet which disk area will be assigned.... - ;; (system (conc "mkdir -p " new-test-path)) - ;; - ;; (open-run-close tests:register-test db run-id test-name item-path) - ;; - ;; NB// for the above line. I want the test to be registered long before this routine gets called! - ;; - (if (not test-id)(set! test-id (rmt:get-test-id run-id test-name item-path))) - (if (not test-id) - (begin - (debug:print 2 *default-log-port* "WARN: Test not pre-created? test-name=" test-name ", item-path=" item-path ", run-id=" run-id) - (rmt:register-test run-id test-name item-path) - (set! test-id (rmt:get-test-id run-id test-name item-path)))) - (debug:print-info 4 *default-log-port* "test-id=" test-id ", run-id=" run-id ", test-name=" test-name ", item-path=\"" item-path "\"") - (set! testdat (rmt:get-test-info-by-id run-id test-id)) - (if (not testdat) - (begin - (debug:print-info 0 *default-log-port* "WARNING: server is overloaded, trying again in one second") - (thread-sleep! 1) - (loop))))) - (if (not testdat) ;; should NOT happen - (debug:print-error 0 *default-log-port* "failed to get test record for test-id " test-id)) - (set! test-id (db:test-get-id testdat)) - (if (common:file-exists? test-path) - (change-directory test-path) - (begin - (debug:print-error 0 *default-log-port* "test run path not created before attempting to run the test. Perhaps you are running -remove-runs at the same time?") - (change-directory *toppath*))) - (case (if force ;; (args:get-arg "-force") - 'NOT_STARTED - (if testdat - (string->symbol (test:get-state testdat)) - 'failed-to-insert)) - ((failed-to-insert) - (debug:print-error 0 *default-log-port* "Failed to insert the record into the db")) - ((NOT_STARTED COMPLETED DELETED INCOMPLETE) - (let ((runflag #f)) - (cond - ;; -force, run no matter what - (force (set! runflag #t)) - ;; NOT_STARTED, run no matter what - ((member (test:get-state testdat) '("DELETED" "NOT_STARTED" "INCOMPLETE"))(set! runflag #t)) - ;; not -rerun and PASS, WARN or CHECK, do no run - ((and (or (not rerun) - keepgoing) - ;; Require to force re-run for COMPLETED or *anything* + PASS,WARN or CHECK - (or (member (test:get-status testdat) '("PASS" "WARN" "CHECK" "SKIP" "WAIVED")) - (member (test:get-state testdat) '("COMPLETED")))) - (debug:print-info 2 *default-log-port* "running test " test-name "/" item-path " suppressed as it is " (test:get-state testdat) " and " (test:get-status testdat)) - (hash-table-set! test-registry full-test-name 'DONOTRUN) ;; COMPLETED) - (set! runflag #f)) - ;; -rerun and status is one of the specifed, run it - ((and rerun - (let* ((rerunlst (string-split rerun ",")) - (must-rerun (member (test:get-status testdat) rerunlst))) - (debug:print-info 3 *default-log-port* "-rerun list: " rerun ", test-status: " (test:get-status testdat)", must-rerun: " must-rerun) - must-rerun)) - (debug:print-info 2 *default-log-port* "Rerun forced for test " test-name "/" item-path) - (set! runflag #t)) - ;; -keepgoing, do not rerun FAIL - ((and keepgoing - (member (test:get-status testdat) '("FAIL"))) - (set! runflag #f)) - ((and (not rerun) - (member (test:get-status testdat) '("FAIL" "n/a"))) - (set! runflag #t)) - (else (set! runflag #f))) - (debug:print 4 *default-log-port* "RUNNING => runflag: " runflag " STATE: " (test:get-state testdat) " STATUS: " (test:get-status testdat)) - (if (not runflag) - (if (not parent-test) - (if (runs:lownoise (conc "not starting test" full-test-name) 60) - (debug:print 1 *default-log-port* "NOTE: Not starting test " full-test-name " as it is state \"" (test:get-state testdat) - "\" and status \"" (test:get-status testdat) "\", use -rerun \"" (test:get-status testdat) - "\" or -force to override"))) - ;; NOTE: No longer be checking prerequisites here! Will never get here unless prereqs are - ;; already met. - ;; This would be a great place to do the process-fork - ;; - (let ((skip-test #f) - (skip-check (configf:get-section test-conf "skip"))) - (cond - ;; Have to check for skip conditions. This one skips if there are same-named tests - ;; currently running - ((and skip-check - (configf:lookup test-conf "skip" "prevrunning")) - ;; run-ids = #f means *all* runs - (let ((running-tests (rmt:get-tests-for-runs-mindata #f full-test-name '("RUNNING" "REMOTEHOSTSTART" "LAUNCHED") '() #f))) - (if (not (null? running-tests)) ;; have to skip - (set! skip-test "Skipping due to previous tests running")))) - ((and skip-check - (configf:lookup test-conf "skip" "fileexists")) - (if (common:file-exists? (configf:lookup test-conf "skip" "fileexists")) - (set! skip-test (conc "Skipping due to existance of file " (configf:lookup test-conf "skip" "fileexists"))))) - ((and skip-check - (configf:lookup test-conf "skip" "rundelay")) - ;; run-ids = #f means *all* runs - (let* ((numseconds (common:hms-string->seconds (configf:lookup test-conf "skip" "rundelay"))) - (running-tests (rmt:get-tests-for-runs-mindata #f full-test-name '("RUNNING" "REMOTEHOSTSTART" "LAUNCHED") '() #f)) - (completed-tests (rmt:get-tests-for-runs-mindata #f full-test-name '("COMPLETED" "INCOMPLETE") '("PASS" "FAIL" "ABORT") #f)) ;; ironically INCOMPLETE is same as COMPLETED in this contex - (last-run-times (map db:mintest-get-event_time completed-tests)) - (time-since-last (- (current-seconds) (if (null? last-run-times) 0 (common:max last-run-times))))) - (if (or (not (null? running-tests)) ;; have to skip if test is running - (> numseconds time-since-last)) - (set! skip-test (conc "Skipping due to previous test run less than " (configf:lookup test-conf "skip" "rundelay") " ago")))))) - - (if skip-test - (begin - (mt:test-set-state-status-by-id run-id test-id "COMPLETED" "SKIP" skip-test) - (debug:print-info 1 *default-log-port* "SKIPPING Test " full-test-name " due to " skip-test)) - ;; - ;; Here the test is handed off to launch.scm for launch-test to complete the launch process - ;; - (if (not (launch-test test-id run-id run-info keyvals runname test-conf test-name test-path itemdat flags)) - (begin - (print "ERROR: Failed to launch the test. Exiting as soon as possible") - (set! *globalexitstatus* 1) ;; - (process-signal (current-process-id) signal/kill)))))))) - ((KILLED) - (debug:print 1 *default-log-port* "NOTE: " full-test-name " is already running or was explictly killed, use -force to launch it.") - (hash-table-set! test-registry (db:test-make-full-name test-name test-path) 'DONOTRUN)) ;; KILLED)) - ((LAUNCHED REMOTEHOSTSTART RUNNING) - (debug:print 2 *default-log-port* "NOTE: " test-name " is already running")) - ;; (if (> (- (current-seconds)(+ (db:test-get-event_time testdat) - ;; (db:test-get-run_duration testdat))) - ;; (or incomplete-timeout - ;; 6000)) ;; i.e. no update for more than 6000 seconds - ;; (begin - ;; (debug:print 0 *default-log-port* "WARNING: Test " test-name " appears to be dead. Forcing it to state INCOMPLETE and status STUCK/DEAD") - ;; (tests:test-set-status! run-id test-id "INCOMPLETE" "STUCK/DEAD" "" #f)) - ;; ;; (tests:test-set-status! test-id "INCOMPLETE" "STUCK/DEAD" "" #f)) - ;; (debug:print 2 *default-log-port* "NOTE: " test-name " is already running"))) - (else - (debug:print-error 0 *default-log-port* "Failed to launch test " full-test-name ". Unrecognised state " (test:get-state testdat)) - (case (string->symbol (test:get-state testdat)) - ((COMPLETED INCOMPLETE) - (hash-table-set! test-registry (db:test-make-full-name test-name test-path) 'DONOTRUN)) - (else - (hash-table-set! test-registry (db:test-make-full-name test-name test-path) 'DONOTRUN)))))))) - -;;====================================================================== -;; END OF NEW STUFF -;;====================================================================== - -(define (get-dir-up-n dir . params) - (let ((dparts (string-split dir "/")) - (count (if (null? params) 1 (car params)))) - (conc "/" (string-intersperse - (take dparts (- (length dparts) count)) - "/")))) - -(define (runs:recursive-delete-with-error-msg real-dir) - (if (> (system (conc "rm -rf " real-dir)) 0) - (begin - ;; FAILED, possibly due to permissions, do chmod a+rwx then try one more time - (system (conc "chmod -R a+rwx " real-dir)) - (if (> (system (conc "rm -rf " real-dir)) 0) - (debug:print-error 0 *default-log-port* "There was a problem removing " real-dir " with rm -f"))))) - -(define (runs:safe-delete-test-dir real-dir) - ;; first delete all sub-directories - (directory-fold - (lambda (f x) - (let ((fullname (conc real-dir "/" f))) - (if (directory? fullname)(runs:recursive-delete-with-error-msg fullname))) - (+ 1 x)) - 0 real-dir) - ;; then files other than *testdat.db* - (directory-fold - (lambda (f x) - (let ((fullname (conc real-dir "/" f))) - (if (not (string-search (regexp "testdat.db") f)) - (runs:recursive-delete-with-error-msg fullname))) - (+ 1 x)) - 0 real-dir) - ;; then the entire directory - (runs:recursive-delete-with-error-msg real-dir)) - -;; cleanup often needs to remove all but the last N runs per target -;; -;; target-patts a1/b1/c1,a2/b2/c2 ... -;; -;; This will fail if called with empty target or a bad target (i.e. missing or extra fields) -;; -(define (runs:get-hash-by-target target-patts runpatt) - (let* ((targets (string-split target-patts ",")) - (keys (rmt:get-keys)) - (res-ht (make-hash-table))) ;; target -> ( runrecord1 runrecord2 ... ) - (for-each - (lambda (target-patt) - (let ((runs (rmt:simple-get-runs runpatt #f #f target-patt))) - (for-each - (lambda (run) - (let ((target (simple-run-target run))) - (hash-table-set! res-ht target (cons run (hash-table-ref/default res-ht target '()))))) - runs))) - targets) - res-ht)) - -;; delete runs older than X (weeks, days, months years etc.) -;; delete redundant runs within a target - N is the input -;; delete redundant runs within a target IFF older than given date/time AND keep at least N -;; -(define (runs:remove-all-but-last-n-runs-per-target target-patts runpatt num-to-keep #!key (actions '(print))) - (let* ((runs-ht (runs:get-hash-by-target target-patts runpatt)) - (age (if (args:get-arg "-age")(common:hms-string->seconds (args:get-arg "-age")) #f)) - (age-mark (if age (- (current-seconds) age) (+ (current-seconds) 86400))) - (precmd (or (args:get-arg "-precmd") ""))) - (print "Actions: " actions) - (for-each - (lambda (target) - (let* ((runs (hash-table-ref runs-ht target)) - (sorted (sort runs (lambda (a b)(< (simple-run-event_time a)(simple-run-event_time b))))) - (to-remove (let* ((len (length sorted)) - (trim-amt (- len num-to-keep))) - (if (> trim-amt 0) - (take sorted trim-amt) - '())))) - (hash-table-set! runs-ht target to-remove) - (print target ":") - (for-each - (lambda (run) - (let ((remove (member run to-remove (lambda (a b) - (eq? (simple-run-id a) - (simple-run-id b)))))) - (if (and age (> (simple-run-event_time run) age-mark)) - (print "Skipping handling of " target "/" (simple-run-runname run) " as it is younger than " (args:get-arg "-age")) - (for-each - (lambda (action) - (case action - ((print) - (print " " (simple-run-runname run) - " " (time->string (seconds->local-time (simple-run-event_time run)) "WW%V.%u %H:%M:%S") - " " (if remove "REMOVE" ""))) - ((remove-runs) - (if remove (system (conc precmd " megatest -remove-runs -target " target " -runname " (simple-run-runname run) " -testpatt %" - (if (member 'kill-runs actions) ;; if kill-runs is specified then set -kill-wait to 0 - " -kill-wait 0" - ""))))) - ((archive) - (if remove (system (conc precmd " megatest -archive save-remove -target " target " -runname " (simple-run-runname run) " -testpatt %")))) - ((kill-runs) - (if remove (system (conc precmd " megatest -kill-runs -target " target " -runname " (simple-run-runname run) " -testpatt %")))) - )) - actions)))) - sorted))) - ;; (print "Sorted: " (map simple-run-event_time sorted)) - ;; (print "Remove: " (map simple-run-event_time to-remove)))) - (hash-table-keys runs-ht)) - runs-ht)) - -;; (define (runs:remove-all-but-last-n-runs-per-target target-patts runpatt num-to-keep) -;; (let ((data (runs:get-all-but-most-recent-n-per-target target-patts runpatt num-to-keep))) -;; (for-each -;; (lambda (target) -;; (let ((runs-to-remove (hash-table-ref data target ))) -;; (for-each -;; (lambda (run) -;; (print "megatest -remove-runs -target " target " -runname " (simple-run-runname run) " -testpatt %")) -;; runs-to-remove))) -;; (hash-table-keys data)))) - -;; Remove runs -;; fields are passing in through -;; action: -;; 'remove-runs -;; 'set-state-status -;; -;; NB// should pass in keys? -;; -(define (runs:operate-on action target runnamepatt testpatt #!key (state #f)(status #f)(new-state-status #f)(mode #f)(options '())) - (common:clear-caches) ;; clear all caches - (let* ((db #f) - ;; (tdbdat (tasks:open-db)) - (keys (rmt:get-keys)) - (rundat (mt:get-runs-by-patt keys runnamepatt target)) - (header (vector-ref rundat 0)) - (runs (vector-ref rundat 1)) - (states (if state (string-split state ",") '())) - (statuses (if status (string-split status ",") '())) - (state-status (if (string? new-state-status) (string-split new-state-status ",") '(#f #f))) - (rp-mutex (make-mutex)) - (bup-mutex (make-mutex)) - (keep-records (args:get-arg "-keep-records"))) ;; used in conjunction with -remove-runs to keep the records, TODO: consolidate this with "mode". - - (let* ((write-access-actions '(remove-runs set-state-status archive run-wait kill-runs)) - (dbfile (conc *toppath* "/megatest.db")) - (readonly-mode (not (file-write-access? dbfile)))) - (when (and readonly-mode - (member action write-access-actions)) - (debug:print-error 0 *default-log-port* "megatest.db is readonly. Cannot proceed with action ["action"] in which write-access isrequired .") - (exit 1))) - - (debug:print-info 4 *default-log-port* "runs:operate-on => Header: " header " action: " action " new-state-status: " new-state-status) - (if (> 2 (length state-status)) - (begin - (debug:print-error 0 *default-log-port* "the parameter to -set-state-status is a comma delimited string. E.g. COMPLETED,FAIL") - (exit))) - (for-each - (lambda (run) - (let ((runkey (string-intersperse (map (lambda (k) - (db:get-value-by-header run header k)) keys) "/")) - (dirs-to-remove (make-hash-table)) - (proc-get-tests (lambda (run-id) - (mt:get-tests-for-run run-id - testpatt states statuses - not-in: #f - sort-by: (case action - ((remove-runs) 'rundir) - (else 'event_time)))))) - (let* ((run-id (db:get-value-by-header run header "id")) - (run-state (db:get-value-by-header run header "state")) - (run-name (db:get-value-by-header run header "runname")) - (tests (if (not (equal? run-state "locked")) - (proc-get-tests run-id) - '())) - (lasttpath "/does/not/exist/I/hope") - (worker-thread #f)) - (debug:print-info 4 *default-log-port* "runs:operate-on run=" run ", header=" header) - (if (not (null? tests)) - (begin - (case action - ((kill-runs) - (tasks:kill-runner target run-name "%") - (debug:print 1 *default-log-port* "Killing tests for run: " runkey " " (db:get-value-by-header run header "runname")) - ) - ((remove-runs) - ;; (if (tasks:need-server run-id)(tasks:start-and-wait-for-server tdbdat run-id 10)) - ;; seek and kill in flight -runtests with % as testpatt here - ;; (if (equal? testpatt "%") - (tasks:kill-runner target run-name testpatt) - ;; (debug:print 0 *default-log-port* "not attempting to kill any run launcher processes as testpatt is " testpatt)) - (debug:print 1 *default-log-port* "Removing tests for run: " runkey " " (db:get-value-by-header run header "runname"))) - ((set-state-status) - ;; (if (tasks:need-server run-id)(tasks:start-and-wait-for-server tdbdat run-id 10)) - (debug:print 1 *default-log-port* "Modifying state and staus for tests for run: " runkey " " (db:get-value-by-header run header "runname"))) - ((print-run) - (debug:print 1 *default-log-port* "Printing info for run " runkey ", run=" run ", tests=" tests ", header=" header) - action) - ((run-wait) - (debug:print 1 *default-log-port* "Waiting for run " runkey ", run=" runnamepatt " to complete")) - ((archive) - (debug:print 1 *default-log-port* "Archiving/restoring (" (args:get-arg "-archive") ") data for run: " runkey " " (db:get-value-by-header run header "runname")) - (set! worker-thread - (make-thread - (lambda () - (case (string->symbol (args:get-arg "-archive")) - ((save save-remove keep-html) - (archive:run-bup (args:get-arg "-archive") run-id run-name tests rp-mutex bup-mutex)) - ((restore) - (archive:bup-restore (args:get-arg "-archive") run-id run-name tests rp-mutex bup-mutex)) - (else - (debug:print-error 0 *default-log-port* "unrecognised sub command to -archive. Run \"megatest\" to see help") - (exit)))) - "archive-bup-thread")) - (thread-start! worker-thread)) - (else - (debug:print-info 0 *default-log-port* "action not recognised " action))) - - ;; actions that operate on one test at a time can be handled below - ;; - (let ((sorted-tests (filter - vector? - (sort tests (lambda (a b)(let ((dira ;; (rmt:sdb-qry 'getstr - (db:test-get-rundir a)) ;; ) ;; (filedb:get-path *fdb* (db:test-get-rundir a))) - (dirb ;; (rmt:sdb-qry 'getstr - (db:test-get-rundir b))) ;; ) ;; ((filedb:get-path *fdb* (db:test-get-rundir b)))) - (if (and (string? dira)(string? dirb)) - (> (string-length dira)(string-length dirb)) - #f)))))) - (toplevel-retries (make-hash-table)) ;; try three times to loop through and remove top level tests - (test-retry-time (make-hash-table)) - (backgrounded-remove-status (make-hash-table)) - (backgrounded-remove-last-visit (make-hash-table)) - (backgrounded-remove-result (make-hash-table)) - (allow-run-time (string->number (or (args:get-arg "-kill-wait") "10")))) ;; seconds to allow for killing tests before just brutally killing 'em - (let loop ((test (car sorted-tests)) - (tal (cdr sorted-tests))) - (let* ((test-id (db:test-get-id test)) - (new-test-dat (rmt:get-test-info-by-id run-id test-id))) - (if (not new-test-dat) - (begin - (debug:print-error 0 *default-log-port* "We have a test-id of " test-id " but no record was found. NOTE: No locking of records is done between processes, do not simultaneously remove the same run from two processes!") - (if (not (null? tal)) - (loop (car tal)(cdr tal)))) - (let* ((item-path (db:test-get-item-path new-test-dat)) - (test-name (db:test-get-testname new-test-dat)) - (run-dir ;;(filedb:get-path *fdb* - ;; (rmt:sdb-qry 'getid - (db:test-get-rundir new-test-dat)) ;; ) ;; run dir is from the link tree - (has-subrun (and (subrun:subrun-test-initialized? run-dir) - (not (subrun:subrun-removed? run-dir)))) - (test-state (db:test-get-state new-test-dat)) - (test-status (db:test-get-status new-test-dat)) - (test-fulln (db:test-get-fullname new-test-dat)) - (uname (db:test-get-uname new-test-dat)) - (toplevel-with-children (and (db:test-get-is-toplevel test) - (> (rmt:test-toplevel-num-items run-id test-name) 0)))) - - (case action - ((remove-runs) - ;; if the test is a toplevel-with-children issue an error and do not remove - (cond - (toplevel-with-children - (debug:print 0 *default-log-port* "WARNING: skipping removal of " test-fulln " with run-id " run-id " as it has sub tests") - (hash-table-set! toplevel-retries test-fulln (+ (hash-table-ref/default toplevel-retries test-fulln 0) 1)) - (if (> (hash-table-ref toplevel-retries test-fulln) 3) - (if (not (null? tal)) - (loop (car tal)(cdr tal))) ;; no else clause - drop it if no more in queue and > 3 tries - (let ((newtal (append tal (list test)))) - (loop (car newtal)(cdr newtal))))) ;; loop with test still in queue - (has-subrun - ;; - (let ((last-visit (hash-table-ref/default backgrounded-remove-last-visit test-fulln 0)) - (now (current-seconds)) - (rem-status (hash-table-ref/default backgrounded-remove-status test-fulln 'not-started))) - (case rem-status - ((not-started) - (debug:print 0 *default-log-port* "WARNING: postponing removal of " test-fulln " with run-id " run-id " as it has a subrun") - (hash-table-set! backgrounded-remove-status test-fulln 'started) - (hash-table-set! backgrounded-remove-last-visit test-fulln (current-seconds)) - (common:send-thunk-to-background-thread - (lambda () - (let* ((subrun-remove-succeeded - (subrun:remove-subrun run-dir keep-records))) - (hash-table-set! backgrounded-remove-result test-fulln subrun-remove-succeeded) - (hash-table-set! backgrounded-remove-status test-fulln 'done))) - name: (conc "remove-subrun:"test-fulln)) - - ;; send to back of line, loop - (let ((newtal (append tal (list test)))) - (loop (car newtal)(cdr newtal))) - ) - ((started) - ;; if last visit was within last second, sleep 1 second - (if (< (- now last-visit) 1.0) - (thread-sleep! 1.0)) - (hash-table-set! backgrounded-remove-last-visit test-fulln (current-seconds)) - ;; send to back of line, loop - (let ((newtal (append tal (list test)))) - (loop (car newtal)(cdr newtal))) - ) - ((done) - ;; drop this one; if remaining, loop, else finish - (hash-table-set! backgrounded-remove-last-visit test-fulln (current-seconds)) - (let ((subrun-remove-succeeded (hash-table-ref/default backgrounded-remove-result test-fulln 'exception))) - (cond - ((eq? subrun-remove-succeeded 'exception) - (let* ((logfile (subrun:get-log-path run-dir "remove"))) - (debug:print 0 *default-log-port* "ERROR: removing subrun of of " test-fulln " with run-id " run-id " ; see logfile @ "logfile)) - (if (not (null? tal)) - (loop (car tal)(cdr tal)))) - (subrun-remove-succeeded - (debug:print 0 *default-log-port* "Now removing of " test-fulln " with run-id " run-id " since subrun was removed.") - ;;(runs:remove-test-directory new-test-dat mode) ;; let normal case handle this. it will go thru loop again as non-subrun - (let ((newtal (append tal (list test)))) - (loop (car newtal)(cdr newtal)))) - (else - (let* ((logfile (subrun:get-log-path run-dir "remove"))) - (debug:print 0 *default-log-port* "WARNING: removal of subrun failed. Please check "logfile" for details.")) - ;; send to back of line, loop (will not match has-subrun next time through) - (if (not (null? tal)) - (loop (car tal)(cdr tal)))))) - ) - ) ; end case rem-status - ) ; end let - ); end cond has-subrun - - (else - ;; BB - TODO - consider backgrounding to threads to delete tests (work below) - (debug:print-info 0 *default-log-port* "test: " test-name " itest-state: " test-state) - (if (member test-state (list "RUNNING" "LAUNCHED" "REMOTEHOSTSTART" "KILLREQ")) - (begin - (if (not (hash-table-ref/default test-retry-time test-fulln #f)) - (begin - ;; want to set to REMOVING BUT CANNOT do it here? - (hash-table-set! test-retry-time test-fulln (current-seconds)))) - (if (> (- (current-seconds)(hash-table-ref test-retry-time test-fulln)) allow-run-time) - ;; This test is not in a correct state for cleaning up. Let's try some graceful shutdown steps first - ;; Set the test to "KILLREQ" and wait five seconds then try again. Repeat up to five times then give - ;; up and blow it away. - (begin - (debug:print 0 *default-log-port* "WARNING: could not gracefully remove test " test-fulln ", tried to kill it to no avail. Forcing state to FAILEDKILL and continuing") - (mt:test-set-state-status-by-id run-id (db:test-get-id test) "FAILEDKILL" "n/a" #f) - (thread-sleep! 1)) - (begin - (mt:test-set-state-status-by-id run-id (db:test-get-id test) "KILLREQ" "n/a" #f) - (thread-sleep! 1))) - ;; NOTE: This is suboptimal as the testdata will be used later and the state/status may have changed ... - (if (null? tal) - (loop new-test-dat tal) - (loop (car tal)(append tal (list new-test-dat))))) - (begin - (runs:remove-test-directory new-test-dat mode) ;; 'remove-all) - (if (not (null? tal)) - (loop (car tal)(cdr tal))))))) - (rmt:update-run-stats run-id (rmt:get-raw-run-stats run-id))) - ((kill-runs) - ;; RUNNING -> KILLREQ - ;; LAUNCHED,RUNNING,REMOTEHOSTSTART -> NOT STARTED - (cond - ((and has-subrun (member test-state (list "RUNNING" "LAUNCHED" "REMOTEHOSTSTART" "KILLREQ"))) - (common:send-thunk-to-background-thread - (lambda () - (let* ((subrun-remove-succeeded - (subrun:kill-subrun run-dir keep-records))) - #t))) - (if (not (null? tal)) - (loop (car tal)(cdr tal))) - ) - ((member test-state (list "RUNNING" "LAUNCHED" "REMOTEHOSTSTART" "KILLREQ")) - (debug:print 1 *default-log-port* "INFO: issuing killreq to test "test-fulln) - (mt:test-set-state-status-by-id run-id (db:test-get-id test) "KILLREQ" "n/a" #f) - (if (not (null? tal)) - (loop (car tal)(cdr tal)))) - ((and (member test-status '("PREQ_FAIL" "PREQ_DISCARDED" "BLOCKED" "ZERO_ITEMS" "KEEP_TRYING" "TEN_STRIKES" "TIMED_OUT"))) - (rmt:set-state-status-and-roll-up-items run-id (db:test-get-id test) 'foo "NOT_STARTED" "n/a" (conc "kill-run moved from "test-state":"test-status" to NOT_STARTED:n/a")) - ;;(mt:test-set-state-status-by-id run-id (db:test-get-id test) "NOT_STARTED" "n/a" (conc "kill-run moved from "test-state":"test-status" to NOT_STARTED:n/a")) - (if (not (null? tal)) - (loop (car tal)(cdr tal))) - ) - (else - (if (not (null? tal)) - (loop (car tal)(cdr tal))) - ))) - ((set-state-status) - (let* ((new-state (car state-status)) - (new-status (cadr state-status)) - (test-id (db:test-get-id test)) - (test-run-dir (db:test-get-rundir new-test-dat)) - (has-subrun (and (subrun:subrun-test-initialized? test-run-dir) - (not (subrun:subrun-removed? test-run-dir))))) - (when has-subrun - (common:send-thunk-to-background-thread - (lambda () - (subrun:set-state-status test-run-dir state status new-state-status) - ) - ) - ) - (debug:print-info 2 *default-log-port* "new state " new-state ", new status " new-status ) - (mt:test-set-state-status-by-id run-id test-id new-state new-status #f)) - (if (not (null? tal)) - (loop (car tal)(cdr tal)))) - ((run-wait) - ;; BB TODO - manage has-subrun case - (debug:print-info 2 *default-log-port* "still waiting, " (length tests) " tests still running") - (thread-sleep! 10) - (let ((new-tests (proc-get-tests run-id))) - (if (null? new-tests) - (debug:print-info 1 *default-log-port* "Run completed according to zero tests matching provided criteria.") - (loop (car new-tests)(cdr new-tests))))) - ((archive) - ;; BB TODO - manage has-subrun case - (if (and run-dir (not toplevel-with-children)) - (let ((ddir (conc run-dir "/"))) - (case (string->symbol (args:get-arg "-archive")) - ((save save-remove keep-html) - (if (common:file-exists? ddir) - (debug:print-info 0 *default-log-port* "Estimating disk space usage for " test-fulln ": " (common:get-disk-space-used ddir))))))) - (if (not (null? tal)) - (loop (car tal)(cdr tal)))) - ))) - ) - (if worker-thread (thread-join! worker-thread))) - (common:join-backgrounded-threads)))) - ;; remove the run if zero tests remain - (if (eq? action 'remove-runs) - (let* ((run-id (db:get-value-by-header run header "id")) ;; NB// masks run-id from above? - (remtests (mt:get-tests-for-run run-id #f '("DELETED") '("n/a") not-in: #t))) - (if (null? remtests) ;; no more tests remaining - (let* ((dparts (string-split lasttpath "/")) - (runpath (conc "/" (string-intersperse - (take dparts (- (length dparts) 1)) - "/")))) - (debug:print 1 *default-log-port* "Removing run: " runkey " " (db:get-value-by-header run header "runname") " and related record") - (if (not keep-records) - (begin - (rmt:delete-run run-id) - (rmt:delete-old-deleted-test-records))) - ;; (rmt:set-var "DELETED_TESTS" (current-seconds)) - ;; need to figure out the path to the run dir and remove it if empty - ;; (if (null? (glob (conc runpath "/*"))) - ;; (begin - ;; (debug:print 1 *default-log-port* "Removing run dir " runpath) - ;; (system (conc "rmdir -p " runpath)))) - ))))) - )) - runs) - ;; (sqlite3:finalize! (db:delay-if-busy tdbdat)) - ) - #t) - -(define (runs:remove-test-directory test mode) ;; remove-data-only) - (let* ((run-dir (db:test-get-rundir test)) ;; run dir is from the link tree - (real-dir (if (common:file-exists? run-dir) - ;; (resolve-pathname run-dir) - (common:nice-path run-dir) - #f)) - (clean-mode (or mode 'remove-all)) - (test-id (db:test-get-id test)) - ;; (lock-key (conc "test-" test-id)) - ;; (got-lock (let loop ((lock (rmt:no-sync-get-lock lock-key)) - ;; (expire-time (+ (current-seconds) 30))) ;; give up on getting the lock and steal it after 15 seconds - ;; (if (car lock) - ;; #t - ;; (if (> (current-seconds) expire-time) - ;; (begin - ;; (debug:print-info 0 *default-log-port* "Timed out waiting for a lock to clean test with id " test-id) - ;; (rmt:no-sync-del! lock-key) ;; destroy the lock - ;; (loop (rmt:no-sync-get-lock lock-key) expire-time)) ;; - ;; (begin - ;; (thread-sleep! 1) - ;; (loop (rmt:no-sync-get-lock lock-key) expire-time))))))) - ) - (case clean-mode - ((remove-data-only)(mt:test-set-state-status-by-id (db:test-get-run_id test)(db:test-get-id test) "CLEANING" "LOCKED" #f)) - ((remove-all) (mt:test-set-state-status-by-id (db:test-get-run_id test)(db:test-get-id test) "REMOVING" "LOCKED" #f)) - ((archive-remove) (mt:test-set-state-status-by-id (db:test-get-run_id test)(db:test-get-id test) "ARCHIVE_REMOVING" #f #f))) - (debug:print-info 1 *default-log-port* "Attempting to remove " (if real-dir (conc " dir " real-dir " and ") "") " link " run-dir) - (if (and real-dir - (> (string-length real-dir) 5) - (common:file-exists? real-dir)) ;; bad heuristic but should prevent /tmp /home etc. - (begin ;; let* ((realpath (resolve-pathname run-dir))) - (debug:print-info 1 *default-log-port* "Recursively removing " real-dir) - (if (common:file-exists? real-dir) - (runs:safe-delete-test-dir real-dir) - (debug:print 0 *default-log-port* "WARNING: test dir " real-dir " appears to not exist or is not readable"))) - (if real-dir - (debug:print 0 *default-log-port* "WARNING: directory " real-dir " does not exist") - (debug:print 0 *default-log-port* "WARNING: no real directory corrosponding to link " run-dir ", nothing done"))) - (if (symbolic-link? run-dir) - (begin - (debug:print-info 1 *default-log-port* "Removing symlink " run-dir) - (handle-exceptions - exn - (debug:print-error 0 *default-log-port* " Failed to remove symlink " run-dir ((condition-property-accessor 'exn 'message) exn) ", attempting to continue") - (delete-file run-dir))) - (if (directory? run-dir) - (if (> (directory-fold (lambda (f x)(+ 1 x)) 0 run-dir) 0) - (debug:print 0 *default-log-port* "WARNING: refusing to remove " run-dir " as it is not empty") - (handle-exceptions - exn - (debug:print-error 0 *default-log-port* " Failed to remove directory " run-dir ((condition-property-accessor 'exn 'message) exn) ", attempting to continue") - (delete-directory run-dir))) - (if (and run-dir - (not (member run-dir (list "n/a" "/tmp/badname")))) - (debug:print 0 *default-log-port* "WARNING: not removing " run-dir " as it either doesn't exist or is not a symlink") - (debug:print 0 *default-log-port* "NOTE: the run dir for this test is undefined. Test may have already been deleted.")) - )) - ;; Only delete the records *after* removing the directory. If things fail we have a record - (case clean-mode - ((remove-data-only)(mt:test-set-state-status-by-id (db:test-get-run_id test)(db:test-get-id test) (db:test-get-state test)(db:test-get-status test) #f)) - ((archive-remove) (mt:test-set-state-status-by-id (db:test-get-run_id test)(db:test-get-id test) "ARCHIVED" #f #f)) - (else (rmt:delete-test-records (db:test-get-run_id test) (db:test-get-id test)))) - ;; (rmt:no-sync-del! lock-key) - )) - -;;====================================================================== -;; Routines for manipulating runs -;;====================================================================== - -;; Since many calls to a run require pretty much the same setup -;; this wrapper is used to reduce the replication of code -(define (general-run-call switchname action-desc proc) - (let ((runname (or (args:get-arg "-runname")(args:get-arg ":runname"))) - (target (common:args-get-target))) - (cond - ((not target) - (debug:print-error 0 *default-log-port* "Missing required parameter for " switchname ", you must specify the target with -target") - (exit 3)) - ((not runname) - (debug:print-error 0 *default-log-port* "Missing required parameter for " switchname ", you must specify the run name with -runname runname") - (exit 3)) - (else - (let (;; (db #f) - (keys #f)) - (if (launch:setup) - (begin - (full-runconfigs-read) ;; cache the run config - ;; (launch:cache-config) ;; there are two independent config cache locations, turning this one off for now. MRW. - ) ;; do not cache here - need to be sure runconfigs is processed - (begin - (debug:print 0 *default-log-port* "Failed to setup, exiting") - (exit 1))) - - - (set! keys (keys:config-get-fields *configdat*)) - ;; have enough to process -target or -reqtarg here - (if (args:get-arg "-reqtarg") - (let* ((runconfigf (conc *toppath* "/runconfigs.config")) ;; DO NOT EVALUATE ALL - (runconfig (read-config runconfigf #f #t environ-patt: #f))) - (if (hash-table-ref/default runconfig (args:get-arg "-reqtarg") #f) - (keys:target-set-args keys (args:get-arg "-reqtarg") args:arg-hash) - - (begin - (debug:print-error 0 *default-log-port* "[" (args:get-arg "-reqtarg") "] not found in " runconfigf) - ;; (if db (sqlite3:finalize! db)) - (exit 1) - ))) - (if (args:get-arg "-target") - (keys:target-set-args keys (args:get-arg "-target" args:arg-hash) args:arg-hash))) - (if (not (car *configinfo*)) - (begin - (debug:print-error 0 *default-log-port* "Attempted to " action-desc " but run area config file not found") - (exit 1)) - ;; Extract out stuff needed in most or many calls - ;; here then call proc - (let* ((keyvals (keys:target->keyval keys target))) - (proc target runname keys keyvals))) - ;; (if db (sqlite3:finalize! db)) - (set! *didsomething* #t)))))) - -;;====================================================================== -;; Lock/unlock runs -;;====================================================================== - -(define (runs:handle-locking target keys runname lock unlock user) - (let* ((db #f) - (rundat (mt:get-runs-by-patt keys runname target)) - (header (vector-ref rundat 0)) - (runs (vector-ref rundat 1))) - (for-each (lambda (run) - (let ((run-id (db:get-value-by-header run header "id")) - (str (if lock - "lock" - "unlock"))) - (if (or lock - (and unlock - (or (args:get-arg "-force") - (begin - (print "Do you really wish to unlock run " run-id "?\n y/n: ") - (equal? "y" (read-line)))))) - (begin - (rmt:lock/unlock-run run-id lock unlock user) - (debug:print-info 0 *default-log-port* "Done " str " on run id " run-id)) - (debug:print-info 0 *default-log-port* "Skipping lock/unlock on " run-id)))) - runs))) -;;====================================================================== -;; Rollup runs -;;====================================================================== - -;; Update the test_meta table for this test -(define (runs:update-test_meta test-name test-conf) - (let ((currrecord (rmt:testmeta-get-record test-name))) - (if (not currrecord) - (begin - (set! currrecord (make-vector 11 #f)) - (rmt:testmeta-add-record test-name))) - (for-each - (lambda (key) - (let* ((idx (cadr key)) - (fld (car key)) - (val (config-lookup test-conf "test_meta" fld))) - ;; (debug:print 5 *default-log-port* "idx: " idx " fld: " fld " val: " val) - (if (and val (not (equal? (vector-ref currrecord idx) val))) - (begin - (print "Updating " test-name " " fld " to " val) - (rmt:testmeta-update-field test-name fld val))))) - '(("author" 2)("owner" 3)("description" 4)("reviewed" 5)("tags" 9)("jobgroup" 10))))) - -;; find tests with matching tags, tagpatt is a string "tagpatt1,tagpatt2%, ..." -;; -(define (runs:get-tests-matching-tags tagpatt) - (let* ((tagdata (rmt:get-tests-tags)) - (res '())) ;; list of tests that match one or more tags - (for-each - (lambda (row) - (let* ((tag (car row)) - (tests (cdr row))) - (if (patt-list-match tag tagpatt) - (set! res (append tests res))))) - tagdata) - res)) - - -;; Update test_meta for all tests -(define (runs:update-all-test_meta db) - (let ((test-names (tests:get-all))) ;; (tests:get-valid-tests))) - (for-each - (lambda (test-name) - (let* ((test-conf (mt:lazy-read-test-config test-name))) - (if test-conf (runs:update-test_meta test-name test-conf)))) - (hash-table-keys test-names)))) - -;; This could probably be refactored into one complex query ... -;; NOT PORTED - DO NOT USE YET -;; -(define (runs:rollup-run keys runname user keyvals) - (debug:print 4 *default-log-port* "runs:rollup-run, keys: " keys " -runname " runname " user: " user) - (let* ((db #f) - ;; register run operates on the main db - (new-run-id (rmt:register-run keyvals runname "new" "n/a" user (args:get-arg "-contour"))) - (prev-tests (rmt:get-matching-previous-test-run-records new-run-id "%" "%")) - (curr-tests (mt:get-tests-for-run new-run-id "%/%" '() '())) - (curr-tests-hash (make-hash-table))) - (rmt:update-run-event_time new-run-id) - ;; index the already saved tests by testname and itemdat in curr-tests-hash - (for-each - (lambda (testdat) - (let* ((testname (db:test-get-testname testdat)) - (item-path (db:test-get-item-path testdat)) - (full-name (conc testname "/" item-path))) - (hash-table-set! curr-tests-hash full-name testdat))) - curr-tests) - ;; NOPE: Non-optimal approach. Try this instead. - ;; 1. tests are received in a list, most recent first - ;; 2. replace the rollup test with the new *always* - (for-each - (lambda (testdat) - (let* ((testname (db:test-get-testname testdat)) - (item-path (db:test-get-item-path testdat)) - (full-name (conc testname "/" item-path)) - (prev-test-dat (hash-table-ref/default curr-tests-hash full-name #f)) - (test-steps (rmt:get-steps-for-test (db:test-get-id testdat))) - (new-test-record #f)) - ;; replace these with insert ... select - (apply sqlite3:execute - db - (conc "INSERT OR REPLACE INTO tests (run_id,testname,state,status,event_time,host,cpuload,diskfree,uname,rundir,item_path,run_duration,final_logf,comment) " - "VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?);") - new-run-id (cddr (vector->list testdat))) - (set! new-testdat (car (mt:get-tests-for-run new-run-id (conc testname "/" item-path) '() '()))) - (hash-table-set! curr-tests-hash full-name new-testdat) ;; this could be confusing, which record should go into the lookup table? - ;; Now duplicate the test steps - (debug:print 4 *default-log-port* "Copying records in test_steps from test_id=" (db:test-get-id testdat) " to " (db:test-get-id new-testdat)) - (cdb:remote-run ;; to be replaced, note: this routine is not used currently - (lambda () - (sqlite3:execute - db - (conc "INSERT OR REPLACE INTO test_steps (test_id,stepname,state,status,event_time,comment) " - "SELECT " (db:test-get-id new-testdat) ",stepname,state,status,event_time,comment FROM test_steps WHERE test_id=?;") - (db:test-get-id testdat)) - ;; Now duplicate the test data - (debug:print 4 *default-log-port* "Copying records in test_data from test_id=" (db:test-get-id testdat) " to " (db:test-get-id new-testdat)) - (sqlite3:execute - db - (conc "INSERT OR REPLACE INTO test_data (test_id,category,variable,value,expected,tol,units,comment) " - "SELECT " (db:test-get-id new-testdat) ",category,variable,value,expected,tol,units,comment FROM test_data WHERE test_id=?;") - (db:test-get-id testdat)))) - )) - prev-tests))) - - -;; clean cache files -(define (runs:clean-cache target runname toppath) - (if target - (if runname - (let* ((linktree (common:get-linktree)) ;; (if toppath (configf:lookup *configdat* "setup" "linktree"))) - (runtop (conc linktree "/" target "/" runname)) - (files (if (common:file-exists? runtop) - (append (glob (conc runtop "/.megatest*")) - (glob (conc runtop "/.runconfig*"))) - '()))) - (if (null? files) - (debug:print-info 0 *default-log-port* "No cached megatest or runconfigs files found. None removed.") - (begin - (debug:print-info 0 *default-log-port* "Removing cached files:\n " (string-intersperse files "\n ")) - (for-each - (lambda (f) - (handle-exceptions - exn - (debug:print 0 *default-log-port* "WARNING: Failed to remove file " f) - (delete-file f))) - files)))) - (debug:print-error 0 *default-log-port* "-clean-cache requires -runname.")) - (debug:print-error 0 *default-log-port* "-clean-cache requires -target or -reqtarg"))) Index: sdb.scm ================================================================== --- sdb.scm +++ sdb.scm @@ -28,89 +28,5 @@ (import (prefix sqlite3 sqlite3:)) (import (prefix base64 base64:)) (declare (unit sdb)) -;; -(define (sdb:open fname) - (let* ((dbpath (pathname-directory fname)) - (dbexists (let ((fe (common:file-exists? fname))) - (if fe - fe - (begin - (create-directory dbpath #t) - #f)))) - (sdb (sqlite3:open-database fname)) - (handler (make-busy-timeout 136000))) - (sqlite3:set-busy-handler! sdb handler) - (if (not dbexists) - (sdb:initialize sdb)) - (sqlite3:execute sdb "PRAGMA synchronous = 1;") - sdb)) - -(define (sdb:initialize sdb) - (sqlite3:execute sdb "CREATE TABLE IF NOT EXISTS strs - (id INTEGER PRIMARY KEY, - str TEXT, - CONSTRAINT str UNIQUE (str));") - (sqlite3:execute sdb "CREATE INDEX IF NOT EXISTS strindx ON strs (str);")) - -;; (define sumup (let ((a 0))(lambda (x)(set! a (+ x a)) a))) - -(define (sdb:register-string sdb str) - (sqlite3:execute sdb "INSERT OR IGNORE INTO strs (str) VALUES (?);" str)) - -(define (sdb:string->id sdb str-cache str) - (let ((id (hash-table-ref/default str-cache str #f))) - (if (not id) - (sqlite3:for-each-row - (lambda (sid) - (set! id sid) - (hash-table-set! str-cache str id)) - sdb - "SELECT id FROM strs WHERE str=?;" str)) - id)) - -(define (sdb:id->string sdb id-cache id) - (let ((str (hash-table-ref/default id-cache id #f))) - (if (not str) - (sqlite3:for-each-row - (lambda (istr) - (set! str istr) - (hash-table-set! id-cache id str)) - sdb - "SELECT str FROM strs WHERE id=?;" id)) - str)) - -;; Numbers get passed though in both directions -;; -(define (make-sdb:qry fname) - (let ((sdb #f) - (scache (make-hash-table)) - (icache (make-hash-table))) - (lambda (cmd var) - (case cmd - ((setup) (set! sdb (if (not sdb) - (sdb:open (if var var fname))))) - ((setdb) (set! sdb var)) - ((getdb) sdb) - ((finalize) (if sdb - (begin - (sqlite3:finalize! sdb) - (set! sdb #f)))) - ((getid) (let ((id (if (or (number? var) - (string->number var)) - var - (sdb:string->id sdb scache var)))) - (if id - id - (begin - (sdb:register-string sdb var) - (sdb:string->id sdb scache var))))) - ((getstr) (if (or (number? var) - (string->number var)) - (sdb:id->string sdb icache var) - var)) - ((passid) var) - ((passstr) var) - (else #f))))) - Index: server.scm ================================================================== --- server.scm +++ server.scm @@ -36,680 +36,5 @@ (import commonmod) (include "common_records.scm") (include "db_records.scm") -(define (server:make-server-url hostport) - (if (not hostport) - #f - (conc "http://" (car hostport) ":" (cadr hostport)))) - -(define *server-loop-heart-beat* (current-seconds)) - -;;====================================================================== -;; P K T S S T U F F -;;====================================================================== - -;; ??? - -;;====================================================================== -;; P K T S S T U F F -;;====================================================================== - -;; ??? - -;;====================================================================== -;; S E R V E R -;;====================================================================== - -;; Call this to start the actual server -;; - -;; all routes though here end in exit ... -;; -;; start_server -;; -(define (server:launch run-id transport-type) - (http-transport:launch)) - -;;====================================================================== -;; S E R V E R U T I L I T I E S -;;====================================================================== - -;; Get the transport -(define (server:get-transport) 'http) - -;; Generate a unique signature for this server -(define (server:mk-signature) - (message-digest-string (md5-primitive) - (with-output-to-string - (lambda () - (write (list (current-directory) - (argv))))))) - -;; When using zmq this would send the message back (two step process) -;; with spiffy or rpc this simply returns the return data to be returned -;; -(define (server:reply return-addr query-sig success/fail result) - (debug:print-info 11 *default-log-port* "server:reply return-addr=" return-addr ", result=" result) - (db:obj->string (vector success/fail query-sig result))) ;; (send-message pubsock target send-more: #t) - -;; Given a run id start a server process ### NOTE ### > file 2>&1 -;; if the run-id is zero and the target-host is set -;; try running on that host -;; incidental: rotate logs in logs/ dir. -;; -(define (server:run areapath) ;; areapath is *toppath* for a given testsuite area - (let* ((curr-host (get-host-name)) - ;; (attempt-in-progress (server:start-attempted? areapath)) - ;; (dot-server-url (server:check-if-running areapath)) - (curr-ip (server:get-best-guess-address curr-host)) - (curr-pid (current-process-id)) - (homehost (common:get-homehost)) ;; configf:lookup *configdat* "server" "homehost" )) - (target-host (car homehost)) - (testsuite (common:get-area-name *alldat*)) - (logfile (conc areapath "/logs/server.log")) ;; -" curr-pid "-" target-host ".log")) - (cmdln (conc (common:get-megatest-exe) - " -server " (or target-host "-") (if (equal? (configf:lookup *configdat* "server" "daemonize") "yes") - " -daemonize " - "") - ;; " -log " logfile - " -m testsuite:" testsuite)) ;; (conc " >> " logfile " 2>&1 &"))))) - (log-rotate (make-thread common:rotate-logs "server run, rotate logs thread")) - (load-limit (configf:lookup-number *configdat* "jobtools" "max-server-start-load" default: 3.0))) - ;; we want the remote server to start in *toppath* so push there - (push-directory areapath) - (debug:print 0 *default-log-port* "INFO: Trying to start server (" cmdln ") ...") - (thread-start! log-rotate) - - ;; host.domain.tld match host? - (if (and target-host - ;; look at target host, is it host.domain.tld or ip address and does it - ;; match current ip or hostname - (not (string-match (conc "("curr-host "|" curr-host"\\..*)") target-host)) - (not (equal? curr-ip target-host))) - (begin - (debug:print-info 0 *default-log-port* "Starting server on " target-host ", logfile is " logfile) - (setenv "TARGETHOST" target-host))) - - (setenv "TARGETHOST_LOGF" logfile) - (thread-sleep! (/ (random 5000) 1000)) ;; add about a random (up to 5 seconds) initial delay. It seems pretty common that many running tests request a server at the same time - (common:wait-for-normalized-load load-limit " delaying server start due to load" target-host) ;; do not try starting servers on an already overloaded machine, just wait forever - (system (conc "nbfake " cmdln)) - (unsetenv "TARGETHOST_LOGF") - (if (get-environment-variable "TARGETHOST")(unsetenv "TARGETHOST")) - (thread-join! log-rotate) - (pop-directory))) - -;; given a path to a server log return: host port startseconds -;; -(define (server:logf-get-start-info logf) - (let ((rx (regexp "^SERVER STARTED: (\\S+):(\\d+) AT ([\\d\\.]+)"))) ;; SERVER STARTED: host:port AT timesecs - (handle-exceptions - exn - (list #f #f #f) ;; no idea what went wrong, call it a bad server - (with-input-from-file - logf - (lambda () - (let loop ((inl (read-line)) - (lnum 0)) - (if (not (eof-object? inl)) - (let ((mlst (string-match rx inl))) - (if (not mlst) - (if (< lnum 500) ;; give up if more than 500 lines of server log read - (loop (read-line)(+ lnum 1)) - (list #f #f #f)) - (let ((dat (cdr mlst))) - (list (car dat) ;; host - (string->number (cadr dat)) ;; port - (string->number (caddr dat)))))) - (list #f #f #f)))))))) - -;; get a list of servers with all relevant data -;; ( mod-time host port start-time pid ) -;; -(define (server:get-list areapath #!key (limit #f)) - (let ((fname-rx (regexp "^(|.*/)server-(\\d+)-(\\S+).log$")) - (day-seconds (* 24 60 60))) - ;; if the directory exists continue to get the list - ;; otherwise attempt to create the logs dir and then - ;; continue - (if (if (directory-exists? (conc areapath "/logs")) - '() - (if (file-write-access? areapath) - (begin - (condition-case - (create-directory (conc areapath "/logs") #t) - (exn (i/o file)(debug:print 0 *default-log-port* "ERROR: Cannot create directory at " (conc areapath "/logs"))) - (exn ()(debug:print 0 *default-log-port* "ERROR: Unknown error attemtping to get server list."))) - (directory-exists? (conc areapath "/logs"))) - '())) - (let* ((server-logs (glob (conc areapath "/logs/server-*.log"))) - (num-serv-logs (length server-logs))) - (if (null? server-logs) - '() - (let loop ((hed (car server-logs)) - (tal (cdr server-logs)) - (res '())) - (let* ((mod-time (handle-exceptions - exn - (current-seconds) ;; 0 - (file-modification-time hed))) ;; default to *very* old so log gets ignored if deleted - (down-time (- (current-seconds) mod-time)) - (serv-dat (if (or (< num-serv-logs 10) - (< down-time 900)) ;; day-seconds)) - (server:logf-get-start-info hed) - '())) ;; don't waste time processing server files not touched in the 15 minutes if there are more than ten servers to look at - (serv-rec (cons mod-time serv-dat)) - (fmatch (string-match fname-rx hed)) - (pid (if fmatch (string->number (list-ref fmatch 2)) #f)) - (new-res (if (null? serv-dat) - res - (cons (append serv-rec (list pid)) res)))) - (if (null? tal) - (if (and limit - (> (length new-res) limit)) - new-res ;; (take new-res limit) <= need intelligent sorting before this will work - new-res) - (loop (car tal)(cdr tal) new-res))))))))) - -(define (server:get-num-alive srvlst) - (let ((num-alive 0)) - (for-each - (lambda (server) - (match-let (((mod-time host port start-time pid) - server)) - (let* ((uptime (- (current-seconds) mod-time)) - (runtime (if start-time - (- mod-time start-time) - 0))) - (if (< uptime 5)(set! num-alive (+ num-alive 1)))))) - srvlst) - num-alive)) - -;; given a list of servers get a list of valid servers, i.e. at least -;; 10 seconds old, has started and is less than 1 hour old and is -;; active (i.e. mod-time < 10 seconds -;; -;; mod-time host port start-time pid -;; -;; sort by start-time descending. I.e. get the oldest first. Young servers will thus drop off -;; and servers should stick around for about two hours or so. -;; -(define (server:get-best srvlst) - (let* ((nums (server:get-num-servers)) - (now (current-seconds)) - (slst (sort - (filter (lambda (rec) - (if (and (list? rec) - (> (length rec) 2)) - (let ((start-time (list-ref rec 3)) - (mod-time (list-ref rec 0))) - ;; (print "start-time: " start-time " mod-time: " mod-time) - (and start-time mod-time - (> (- now start-time) 0) ;; been running at least 0 seconds - (< (- now mod-time) 16) ;; still alive - file touched in last 16 seconds - (< (- now start-time) - (+ (- (string->number (or (configf:lookup *configdat* "server" "runtime") "3600")) - 180) - (random 360))) ;; under one hour running time +/- 180 - )) - #f)) - srvlst) - (lambda (a b) - (< (list-ref a 3) - (list-ref b 3)))))) - (if (> (length slst) nums) - (take slst nums) - slst))) - -(define (server:get-first-best areapath) - (let ((srvrs (server:get-best (server:get-list areapath)))) - (if (and srvrs - (not (null? srvrs))) - (car srvrs) - #f))) - -(define (server:get-rand-best areapath) - (let ((srvrs (server:get-best (server:get-list areapath)))) - (if (and (list? srvrs) - (not (null? srvrs))) - (let* ((len (length srvrs)) - (idx (random len))) - (list-ref srvrs idx)) - #f))) - - -(define (server:record->url servr) - (match-let (((mod-time host port start-time pid) - servr)) - (if (and host port) - (conc host ":" port) - #f))) - -(define (server:get-client-signature) ;; BB> why is this proc named "get-"? it returns nothing -- set! has not return value. - (if *my-client-signature* *my-client-signature* - (let ((sig (server:mk-signature))) - (set! *my-client-signature* sig) - *my-client-signature*))) - -;; kind start up of servers, wait 40 seconds before allowing another server for a given -;; run-id to be launched -(define (server:kind-run areapath) - (if (not (server:check-if-running areapath)) ;; why try if there is already a server running? - (let* ((last-run-dat (hash-table-ref/default *server-kind-run* areapath '(0 0))) ;; callnum, whenrun - (call-num (car last-run-dat)) - (when-run (cadr last-run-dat)) - (run-delay (+ (case call-num - ((0) 0) - ((1) 20) - ((2) 300) - (else 600)) - (random 5))) ;; add a small random number just in case a lot of jobs hit the work hosts simultaneously - (lock-file (conc areapath "/logs/server-start.lock"))) - (if (> (- (current-seconds) when-run) run-delay) - (begin - (common:simple-file-lock-and-wait lock-file expire-time: 15) - (server:run areapath) - (thread-sleep! 2) ;; don't release the lock for at least a few seconds - (common:simple-file-release-lock lock-file))) - (hash-table-set! *server-kind-run* areapath (list (+ call-num 1)(current-seconds)))))) - -(define (server:start-and-wait areapath #!key (timeout 60)) - (let ((give-up-time (+ (current-seconds) timeout))) - (let loop ((server-url (server:check-if-running areapath)) - (try-num 0)) - (if (or server-url - (> (current-seconds) give-up-time)) ;; server-url will be #f if no server available. - server-url - (let ((num-ok (length (server:get-best (server:get-list areapath))))) - (if (and (> try-num 0) ;; first time through simply wait a little while then try again - (< num-ok 1)) ;; if there are no decent candidates for servers then try starting a new one - (server:kind-run areapath)) - (thread-sleep! 5) - (loop (server:check-if-running areapath) - (+ try-num 1))))))) - -(define server:try-running server:run) ;; there is no more per-run servers ;; REMOVE ME. BUG. - -(define (server:get-num-servers #!key (numservers 2)) - (let ((ns (string->number - (or (configf:lookup *configdat* "server" "numservers") "notanumber")))) - (or ns numservers))) - -;; no longer care if multiple servers are started by accident. older servers will drop off in time. -;; -(define (server:check-if-running areapath) ;; #!key (numservers "2")) - (let* ((ns (server:get-num-servers)) - (servers (server:get-best (server:get-list areapath)))) - ;; (print "servers: " servers " ns: " ns) - (if (or (and servers - (null? servers)) - (not servers) - (and (list? servers) - (< (length servers) (random ns)))) ;; somewhere between 0 and numservers - #f - (let loop ((hed (car servers)) - (tal (cdr servers))) - (let ((res (server:check-server hed))) - (if res - res - (if (null? tal) - #f - (loop (car tal)(cdr tal))))))))) - -;; ping the given server -;; -(define (server:check-server server-record) - (let* ((server-url (server:record->url server-record)) - (res (case *transport-type* - ((http)(server:ping server-url)) - ;; ((nmsg)(nmsg-transport:ping (tasks:hostinfo-get-interface server) - ))) - (if res - server-url - #f))) - -(define (server:kill servr) - (match-let (((mod-time hostname port start-time pid) - servr)) - (tasks:kill-server hostname pid))) - -;; called in megatest.scm, host-port is string hostname:port -;; -;; NOTE: This is NOT called directly from clients as not all transports support a client running -;; in the same process as the server. -;; -(define (server:ping host-port-in #!key (do-exit #f)) - (let ((host:port (if (not host-port-in) ;; use read-dotserver to find - #f ;; (server:check-if-running *toppath*) - ;; (if (number? host-port-in) ;; we were handed a server-id - ;; (let ((srec (tasks:get-server-by-id (db:delay-if-busy (tasks:open-db)) host-port-in))) - ;; ;; (print "srec: " srec " host-port-in: " host-port-in) - ;; (if srec - ;; (conc (vector-ref srec 3) ":" (vector-ref srec 4)) - ;; (conc "no such server-id " host-port-in))) - host-port-in))) ;; ) - (let* ((host-port (if host:port - (let ((slst (string-split host:port ":"))) - (if (eq? (length slst) 2) - (list (car slst)(string->number (cadr slst))) - #f)) - #f))) -;; (toppath (launch:setup))) - ;; (print "host-port=" host-port) - (if (not host-port) - (begin - (if host-port-in - (debug:print 0 *default-log-port* "ERROR: bad host:port")) - (if do-exit (exit 1)) - #f) - (let* ((iface (car host-port)) - (port (cadr host-port)) - (server-dat (http-transport:client-connect iface port)) - (login-res (rmt:login-no-auto-client-setup server-dat))) - (if (and (list? login-res) - (car login-res)) - (begin - ;; (print "LOGIN_OK") - (if do-exit (exit 0)) - #t) - (begin - ;; (print "LOGIN_FAILED") - (if do-exit (exit 1)) - #f))))))) - -;; run ping in separate process, safest way in some cases -;; -(define (server:ping-server ifaceport) - (with-input-from-pipe - (conc (common:get-megatest-exe) " -ping " ifaceport) - (lambda () - (let loop ((inl (read-line)) - (res "NOREPLY")) - (if (eof-object? inl) - (case (string->symbol res) - ((NOREPLY) #f) - ((LOGIN_OK) #t) - (else #f)) - (loop (read-line) inl)))))) - -;; NOT USED (well, ok, was referenced in rpc-transport but otherwise -;; not used). -;; -(define (server:login toppath) - (lambda (toppath) - (set! *db-last-access* (current-seconds)) ;; might not be needed. - (if (equal? *toppath* toppath) - #t - #f))) - -;; timeout is hms string: 1h 5m 3s, default is 1 minute -;; -(define (server:expiration-timeout) - (let ((tmo (configf:lookup *configdat* "server" "timeout"))) - (if (and (string? tmo) - (common:hms-string->seconds tmo)) ;; BUG: hms-string->seconds is broken, if given "10" returns 0. Also, it doesn't belong in this logic unless the string->number is changed below - (* 3600 (string->number tmo)) - 60))) - -;; (define server:sync-lock-token "SERVER_SYNC_LOCK") -;; (define (server:release-sync-lock) -;; (db:no-sync-del! *no-sync-db* server:sync-lock-token)) -;; (define (server:have-sync-lock?) -;; (let* ((have-lock-pair (db:no-sync-get-lock *no-sync-db* server:sync-lock-token)) -;; (have-lock? (car have-lock-pair)) -;; (lock-time (cdr have-lock-pair)) -;; (lock-age (- (current-seconds) lock-time))) -;; (cond -;; (have-lock? #t) -;; ((>lock-age -;; (* 3 (configf:lookup-number *configdat* "server" "minimum-intersync-delay" default: 180))) -;; (server:release-sync-lock) -;; (server:have-sync-lock?)) -;; (else #f)))) - -;; moving this here as it needs access to db and cannot be in common. -;; - -(define (server:get-bruteforce-syncer dbstruct #!key (fork-to-background #f) (persist-until-sync #f)) - (let* ((sqlite-exe (or (get-environment-variable "MT_SQLITE3_EXE"))) ;; defined in cfg.sh - (sync-log (or (args:get-arg "-sync-log") (conc *toppath* "/logs/sync-" (current-process-id) "-" (get-host-name) ".log"))) - (tmp-area (common:get-db-tmp-area *alldat*)) - (tmp-db (conc tmp-area "/megatest.db")) - (staging-file (conc *toppath* "/.megatest.db")) - (mtdbfile (conc *toppath* "/megatest.db")) - (lockfile (common:get-sync-lock-filepath *alldat*)) - (sync-cmd-core (conc sqlite-exe" " tmp-db " .dump | "sqlite-exe" " staging-file "&>"sync-log)) - (sync-cmd (if fork-to-background - (conc "/usr/bin/env NBFAKE_LOG="*toppath*"/logs/last-server-sync-"(current-process-id)".log nbfake \""sync-cmd-core" && /bin/mv -f " staging-file " " mtdbfile" \"") - sync-cmd-core)) - (default-min-intersync-delay 2) - (min-intersync-delay (configf:lookup-number *configdat* "server" "minimum-intersync-delay" default: default-min-intersync-delay)) - (default-duty-cycle 0.1) - (duty-cycle (configf:lookup-number *configdat* "server" "sync-duty-cycle" default: default-duty-cycle)) - (last-sync-seconds 10) ;; we will adjust this to a measurement and delay last-sync-seconds * (1 - duty-cycle) - (calculate-off-time (lambda (work-duration duty-cycle) - (* (/ (- 1 duty-cycle) duty-cycle) last-sync-seconds))) - (off-time min-intersync-delay) ;; adjusted in closure below. - (do-a-sync - (lambda () - (BB> "Start do-a-sync with fork-to-background="fork-to-background" persist-until-sync="persist-until-sync) - (let* ((finalres - (let retry-loop ((num-tries 0)) - (if (common:simple-file-lock lockfile) - (begin - (cond - ((not (or fork-to-background persist-until-sync)) - (debug:print 0 *default-log-port* "INFO: syncer thread sleeping for max of (server.minimum-intersync-delay="min-intersync-delay - " , off-time="off-time" seconds ]") - (thread-sleep! (max off-time min-intersync-delay))) - (else - (debug:print 0 *default-log-port* "INFO: syncer thread NOT sleeping ; maybe time-to-exit..."))) - - (if (not (configf:lookup *configdat* "server" "disable-db-snapshot")) - (common:snapshot-file mtdbfile subdir: ".db-snapshot")) - (delete-file* staging-file) - (let* ((start-time (current-milliseconds)) - (res (system sync-cmd)) - (res2 - (cond - ((eq? 0 res) - (delete-file* (conc mtdbfile ".backup")) - (if (eq? 0 (file-size sync-log)) - (delete-file sync-log)) - (system (conc "/bin/mv " staging-file " " mtdbfile)) - - (set! last-sync-seconds (/ (- (current-milliseconds) start-time) 1000)) - (set! off-time (calculate-off-time - last-sync-seconds - (cond - ((and (number? duty-cycle) (> duty-cycle 0) (< duty-cycle 1)) - duty-cycle) - (else - (debug:print 0 *default-log-port* "WARNING: ["(common:human-time)"] server.sync-duty-cycle is invalid. Should be a number between 0 and 1, but "duty-cycle" was specified. Using default value: "default-duty-cycle) - default-duty-cycle)))) - - (debug:print 1 *default-log-port* "INFO: ["(common:human-time)"] pid="(current-process-id)" SYNC took "last-sync-seconds" sec") - (debug:print 1 *default-log-port* "INFO: ["(common:human-time)"] pid="(current-process-id)" SYNC took "last-sync-seconds" sec ; with duty-cycle of "duty-cycle" off time is now "off-time) - 'sync-completed) - (else - (system (conc "/bin/cp "sync-log" "sync-log".fail")) - (debug:print 0 *default-log-port* "ERROR: ["(common:human-time)"] Sync failed. See log at "sync-log".fail") - (if (file-exists? (conc mtdbfile ".backup")) - (system (conc "/bin/cp "mtdbfile ".backup " mtdbfile))) - #f)))) - (common:simple-file-release-lock lockfile) - (BB> "released lockfile: " lockfile) - (when (common:file-exists? lockfile) - (BB> "DID NOT ACTUALLY RELEASE LOCKFILE")) - res2) ;; end let - );; end begin - ;; else - (cond - (persist-until-sync - (thread-sleep! 1) - (debug:print 1 *default-log-port* "INFO: ["(common:human-time)"] pid="(current-process-id)" other SYNC in progress; we're in a fork-to-background so we need to succeed. Let's wait a jiffy and and try again. num-tries="num-tries" (waiting for lockfile="lockfile" to disappear)") - (retry-loop (add1 num-tries))) - (else - (thread-sleep! (max off-time (+ last-sync-seconds min-intersync-delay))) - (debug:print 1 *default-log-port* "INFO: ["(common:human-time)"] pid="(current-process-id)" other SYNC in progress; not syncing.") - 'parallel-sync-in-progress)) - ) ;; end if got lockfile - ) - )) - (BB> "End do-a-sync with fork-to-background="fork-to-background" persist-until-sync="persist-until-sync" and result="finalres) - finalres) - ) ;; end lambda - )) - do-a-sync)) - -(define (server:writable-watchdog-bruteforce dbstruct) - (thread-sleep! 1) ;; delay for startup - (let* ((do-a-sync (server:get-bruteforce-syncer dbstruct)) - (final-sync (server:get-bruteforce-syncer dbstruct fork-to-background: #t persist-until-sync: #t))) - (when (and (not (args:get-arg "-sync-to-megatest.db")) ;; conditions under which we do not run the sync - (args:get-arg "-server")) - - (let loop () - (do-a-sync) - (if (not *time-to-exit*) (loop))) ;; keep going unless time to exit - - ;; time to exit, close the no-sync db here - (final-sync) - - (if (common:low-noise-print 30) - (debug:print-info 0 *default-log-port* "Exiting watchdog timer, *time-to-exit* = " *time-to-exit*" pid="(current-process-id) - ))))) - -(define (server:writable-watchdog-deltasync dbstruct) - (thread-sleep! 0.05) ;; delay for startup - (let ((legacy-sync (common:run-sync?)) - (sync-stale-seconds (configf:lookup-number *configdat* "server" "sync-stale-seconds" default: 300)) - (debug-mode (debug:debug-mode 1)) - (last-time (current-seconds)) - (no-sync-db (db:open-no-sync-db)) - (sync-duration 0) ;; run time of the sync in milliseconds - ;;(this-wd-num (begin (mutex-lock! *wdnum*mutex) (let ((x *wdnum*)) (set! *wdnum* (add1 *wdnum*)) (mutex-unlock! *wdnum*mutex) x))) - ) - (set! *no-sync-db* no-sync-db) ;; make the no sync db available to api calls - (debug:print-info 2 *default-log-port* "Periodic sync thread started.") - (debug:print-info 3 *default-log-port* "watchdog starting. legacy-sync is " legacy-sync" pid="(current-process-id) );; " this-wd-num="this-wd-num) - (if (and legacy-sync (not *time-to-exit*)) - (let* (;;(dbstruct (db:setup)) - (mtdb (dbr:dbstruct-mtdb dbstruct)) - (mtpath (db:dbdat-get-path mtdb)) - (tmp-area (common:get-db-tmp-area *alldat*)) - (start-file (conc tmp-area "/.start-sync")) - (end-file (conc tmp-area "/.end-sync"))) - (debug:print-info 0 *default-log-port* "Server running, periodic sync started.") - (let loop () - ;; sync for filesystem local db writes - ;; - (mutex-lock! *db-multi-sync-mutex*) - (let* ((need-sync (>= *db-last-access* *db-last-sync*)) ;; no sync since last write - (sync-in-progress *db-sync-in-progress*) - (min-intersync-delay (configf:lookup-number *configdat* "server" "minimum-intersync-delay" default: 5)) - (should-sync (and (not *time-to-exit*) - (> (- (current-seconds) *db-last-sync*) min-intersync-delay))) ;; sync every five seconds minimum, deprecated logic, can probably be removed - (start-time (current-seconds)) - (cpu-load-adj (alist-ref 'adj-proc-load (common:get-normalized-cpu-load #f))) - (mt-mod-time (file-modification-time mtpath)) - (last-sync-start (if (common:file-exists? start-file) - (file-modification-time start-file) - 0)) - (last-sync-end (if (common:file-exists? end-file) - (file-modification-time end-file) - 10)) - (sync-period (+ 3 (* cpu-load-adj 30))) ;; as adjusted load increases increase the sync period - (recently-synced (and (< (- start-time mt-mod-time) sync-period) ;; not useful if sync didn't modify megatest.db! - (< mt-mod-time last-sync-start))) - (sync-done (<= last-sync-start last-sync-end)) - (sync-stale (> start-time (+ last-sync-start sync-stale-seconds))) - (will-sync (and (not *time-to-exit*) ;; do not start a sync if we are in the process of exiting - (or need-sync should-sync) - (or sync-done sync-stale) - (not sync-in-progress) - (not recently-synced)))) - (debug:print-info 13 *default-log-port* "WD writable-watchdog top of loop. need-sync="need-sync" sync-in-progress=" sync-in-progress - " should-sync="should-sync" start-time="start-time" mt-mod-time="mt-mod-time" recently-synced="recently-synced" will-sync="will-sync - " sync-done=" sync-done " sync-period=" sync-period) - (if (and (> sync-period 5) - (common:low-noise-print 30 "sync-period")) - (debug:print-info 0 *default-log-port* "Increased sync period due to long sync times, sync took: " sync-period " seconds.")) - ;; (if recently-synced (debug:print-info 0 *default-log-port* "Skipping sync due to recently-synced flag=" recently-synced)) - ;; (debug:print-info 0 *default-log-port* "need-sync: " need-sync " sync-in-progress: " sync-in-progress " should-sync: " should-sync " will-sync: " will-sync) - (if will-sync (set! *db-sync-in-progress* #t)) - (mutex-unlock! *db-multi-sync-mutex*) - (if will-sync - (let (;; (max-sync-duration (configf:lookup-number *configdat* "server" "max-sync-duration")) ;; KEEPING THIS AVAILABLE BUT SHOULD NOT USE, I'M PRETTY SURE IT DOES NOT WORK! - (sync-start (current-milliseconds))) - (with-output-to-file start-file (lambda ()(print (current-process-id)))) - - ;; put lock here - - ;; (if (or (not max-sync-duration) - ;; (< sync-duration max-sync-duration)) ;; NOTE: db:sync-to-megatest.db keeps track of time of last sync and syncs incrementally - (let ((res (db:sync-to-megatest.db dbstruct no-sync-db: no-sync-db))) ;; did we sync any data? If so need to set the db touched flag to keep the server alive - (set! sync-duration (- (current-milliseconds) sync-start)) - (if (> res 0) ;; some records were transferred, keep the db alive - (begin - (mutex-lock! *heartbeat-mutex*) - (set! *db-last-access* (current-seconds)) - (mutex-unlock! *heartbeat-mutex*) - (debug:print-info 0 *default-log-port* "sync called, " res " records transferred.")) - (debug:print-info 2 *default-log-port* "sync called but zero records transferred"))))) -;; ;; TODO: factor this next routine out into a function -;; (with-input-from-pipe ;; this should not block other threads but need to verify this -;; (conc "megatest -sync-to-megatest.db -m testsuite:" (common:get-area-name) ":" *toppath*) -;; (lambda () -;; (let loop ((inl (read-line)) -;; (res #f)) -;; (if (eof-object? inl) -;; (begin -;; (set! sync-duration (- (current-milliseconds) sync-start)) -;; (cond -;; ((not res) -;; (debug:print 0 *default-log-port* "ERROR: sync from /tmp db to megatest.db appears to have failed. Recommended that you stop your runs and run \"megatest -cleanup-db\"")) -;; ((> res 0) -;; (mutex-lock! *heartbeat-mutex*) -;; (set! *db-last-access* (current-seconds)) -;; (mutex-unlock! *heartbeat-mutex*)))) -;; (let ((num-synced (let ((matches (string-match "^Synced (\\d+).*$" inl))) -;; (if matches -;; (string->number (cadr matches)) -;; #f)))) -;; (loop (read-line) -;; (or num-synced res)))))))))) - (if will-sync - (begin - (mutex-lock! *db-multi-sync-mutex*) - (set! *db-sync-in-progress* #f) - (set! *db-last-sync* start-time) - (with-output-to-file end-file (lambda ()(print (current-process-id)))) - - ;; release lock here - - (mutex-unlock! *db-multi-sync-mutex*))) - (if (and debug-mode - (> (- start-time last-time) 60)) - (begin - (set! last-time start-time) - (debug:print-info 4 *default-log-port* "timestamp -> " (seconds->time-string (current-seconds)) ", time since start -> " (seconds->hr-min-sec (- (current-seconds) *time-zero*)))))) - - ;; keep going unless time to exit - ;; - (if (not *time-to-exit*) - (let delay-loop ((count 0)) - ;;(debug:print-info 13 *default-log-port* "delay-loop top; count="count" pid="(current-process-id)" this-wd-num="this-wd-num" *time-to-exit*="*time-to-exit*) - - (if (and (not *time-to-exit*) - (< count 6)) ;; was 11, changing to 4. - (begin - (thread-sleep! 1) - (delay-loop (+ count 1)))) - (if (not *time-to-exit*) (loop)))) - ;; time to exit, close the no-sync db here - (db:no-sync-close-db no-sync-db) - (if (common:low-noise-print 30) - (debug:print-info 0 *default-log-port* "Exiting watchdog timer, *time-to-exit* = " *time-to-exit*" pid="(current-process-id) ))))))) ;;" this-wd-num="this-wd-num))))))) - Index: subrun.scm ================================================================== --- subrun.scm +++ subrun.scm @@ -40,229 +40,5 @@ ;;(include "key_records.scm") (include "db_records.scm") ;; provides db:test-get-id ;;(include "run_records.scm") ;;(include "test_records.scm") -(define (subrun:subrun-test-initialized? test-run-dir) - (if (and (common:file-exists? (conc test-run-dir "/subrun-area") ) - (common:file-exists? (conc test-run-dir "/testconfig.subrun") )) - #t - #f)) - -(define (subrun:launch-dashboard test-run-dir) - (if (subrun:subrun-test-initialized? test-run-dir) - (let* ((subarea (subrun:get-runarea test-run-dir))) - (if (and subarea (common:file-exists? subarea)) - (system (conc "cd " subarea ";env -i PATH=$PATH DISPLAY=$DISPLAY HOME=$HOME USER=$USER dashboard &")))))) - -(define (subrun:subrun-removed? test-run-dir) - (if (subrun:subrun-test-initialized? test-run-dir) - (let ((flagfile (conc test-run-dir "/subrun.removed"))) - (if (common:file-exists? flagfile) - #t - #f)) - #t)) - -(define (subrun:set-subrun-removed test-run-dir) - (let ((flagfile (conc test-run-dir "/subrun.removed"))) - (if (and (subrun:subrun-test-initialized? test-run-dir) (not (common:file-exists? flagfile))) - (with-output-to-file flagfile - (lambda () (print (current-seconds))))))) - -(define (subrun:unset-subrun-removed test-run-dir) - (let ((flagfile (conc test-run-dir "/subrun.removed"))) - (if (and (subrun:subrun-test-initialized? test-run-dir) (common:file-exists? flagfile)) - (delete-file flagfile)))) - - -(define (subrun:testconfig-defines-subrun? testconfig) - (configf:lookup testconfig "subrun" "runwait")) ;; we use runwait as the flag that a subrun is requested - -(define (subrun:initialize-toprun-test testconfig test-run-dir) - (let ((ra (configf:lookup testconfig "subrun" "run-area")) - (logpro (configf:lookup testconfig "subrun" "logpro")) - (symlink-target (conc test-run-dir "/subrun-area")) - ) - (if (not ra) ;; when runarea is not set we default to *toppath*. However - (let ((fallback-run-area (or *toppath* (conc test-run-dir "/subrun")))) - ;; we need to force the setting in the testconfig so it will - ;; be preserved in the testconfig.subrun file - (configf:set-section-var testconfig "subrun" "run-area" fallback-run-area) - (set! ra fallback-run-area))) - (configf:set-section-var testconfig "logpro" "subrun" logpro) ;; append the logpro rules to the logpro section as stepname subrun - (if (common:file-exists? symlink-target) - (delete-file symlink-target)) - (create-symbolic-link ra symlink-target) - (configf:write-alist testconfig "testconfig.subrun"))) - -(define (subrun:set-state-status test-run-dir state status new-state-status) - (if (and (not (subrun:subrun-removed? test-run-dir)) (subrun:subrun-test-initialized? test-run-dir)) - (let* ((action-switches-str - (conc "-set-state-status "new-state-status - (if state (conc " -state "state) "") - (if status (conc " -status "status) ""))) - (log-prefix - (subrun:sanitize-path - (conc "set-state-status="new-state-status - (if state (conc ":state="state) "") - (if status (conc "+status="status) "")))) - (submt-result - (subrun:exec-sub-megatest test-run-dir action-switches-str log-prefix))) - submt-result))) - -(define (subrun:remove-subrun test-run-dir keep-records ) - (if (and (not (subrun:subrun-removed? test-run-dir)) (subrun:subrun-test-initialized? test-run-dir)) - (let* ((action-switches-str - (conc "-remove-runs" - (if keep-records "-keep-records " "") - )) - (remove-result - (subrun:exec-sub-megatest test-run-dir action-switches-str "remove"))) - (if remove-result - (begin - (subrun:set-subrun-removed test-run-dir) - #t) - #f)) - #t)) - -(define (subrun:kill-subrun test-run-dir ) - (if (and (not (subrun:subrun-removed? test-run-dir)) (subrun:subrun-test-initialized? test-run-dir)) - (let* ((action-switches-str - (conc "-kill-runs" )) - (kill-result - (subrun:exec-sub-megatest test-run-dir action-switches-str "kill"))) - kill-result) - #t)) - -(define (subrun:launch-cmd test-run-dir #!optional (sub-cmd "-run")) ;; BUG: "-run" should be changed to "-rerun-clean" but current doesn't work - (if (subrun:subrun-removed? test-run-dir) - (subrun:unset-subrun-removed test-run-dir)) - - (let* ((log-prefix "run") - (switches (subrun:selector+log-switches test-run-dir log-prefix)) - (run-wait #t) - (cmd (conc "megatest " sub-cmd " " switches" " - (if run-wait "-run-wait " "")))) - cmd)) - - -(define (subrun:sanitize-path inpath) - (let* ((insane-pattern (irregex "[^[a-zA-Z0-9_\\-]"))) - (regex#string-substitute insane-pattern "_" inpath #t))) - -(define (subrun:get-runarea test-run-dir) - (if (subrun:subrun-test-initialized? test-run-dir) - (let* ((info-alist (subrun:selector+log-alist - test-run-dir - "foo")) - (run-area (if (list? info-alist) - (alist-ref "-start-dir" info-alist equal? #f) - #f))) - run-area) - #f)) - -(define (subrun:selector+log-alist test-run-dir log-prefix) - (let* ((switch-def-alist (common:get-param-mapping flavor: 'config)) - (subrunfile (conc test-run-dir "/testconfig.subrun" )) - (subrundata (with-input-from-file subrunfile read)) - (subrunconfig (configf:alist->config subrundata)) - (run-area (configf:lookup subrunconfig "subrun" "run-area")) - (defvals `(("start-dir" . ,(or run-area ;; default values if not specified in subrun section of tconf - (get-environment-variable "MT_RUN_AREA_HOME") - "/no/rundir/found")) - ("run-name" . ,(or (get-environment-variable "MT_RUNNAME") "NO-RUNNAME")) - ("target" . ,(or (get-environment-variable "MT_TARGET") "NO-TARGET")))) - (switch-alist-pre (filter-map (lambda (item) - (let* ((config-key (car item)) - (switch (cdr item)) - (defval (alist-ref config-key defvals equal? #f)) - (val (or (configf:lookup subrunconfig "subrun" config-key) - defval))) - (if val - (cons switch val) - #f))) - switch-def-alist)) - - ;; testpatt may be modified if all three of mode-patt, tag-expr, and testpatt are null - (mode-patt (alist-ref "-modepatt" switch-alist-pre equal? #f)) - (tag-expr (alist-ref "-tagexpr" switch-alist-pre equal? #f)) - (testpatt (alist-ref "-testpatt" switch-alist-pre equal? - (if (not (or mode-patt tag-expr)) "%" #f))) ;; testpatt is % if not - ;; otherwise specified - - ;; define compact-stem for logfile - (target (alist-ref "-target" switch-alist-pre equal? #f)) ;; want data-structures alist-ref, not alist-lib alist-ref - (runname (alist-ref "-runname" switch-alist-pre equal? #f)) - - - (compact-stem (subrun:sanitize-path - (conc - target - "-" - runname - "-" (or testpatt mode-patt tag-expr "NO-TESTPATT")))) - (logfile (conc - test-run-dir "/" - (if log-prefix - (conc (subrun:sanitize-path log-prefix) "-") - "") - compact-stem - ".log")) - ;; swap out testpatt with modified test-patt and add -log - (switch-alist (cons - (cons "-log" logfile) - (map (lambda (item) - (if (equal? (car item) "-testpatt") - (cons "-testpatt" testpatt) - item)) - switch-alist-pre)))) - switch-alist)) - ;; note - get precmd from subrun section - ;; apply to submegatest commands - -(define (subrun:get-log-path test-run-dir log-prefix) - (let* ((alist (subrun:selector+log-alist test-run-dir log-prefix)) - (res (alist-ref "-log" alist equal? #f))) - res)) - -(define (subrun:selector+log-switches test-run-dir log-prefix) - (let* ((switch-alist (subrun:selector+log-alist test-run-dir log-prefix)) - (res - (string-intersperse - (apply - append - (map - (lambda (x) - (list (car x) (cdr x))) - switch-alist)) - " "))) - res)) - -(define (subrun:exec-sub-megatest test-run-dir action-switches-str log-prefix) - (let* ((selector-switches (subrun:selector+log-switches test-run-dir log-prefix)) - (cmd (conc "megatest " selector-switches " " action-switches-str )) - (pid #f) - (proc (lambda () - (debug:print-info 0 *default-log-port* "Running sub megatest command: "cmd) - ;;(set! pid (process-run "/usr/bin/xterm" (list )))))) - (set! pid (process-run "/bin/bash" (list "-c" cmd)))))) - (call-with-environment-variables - (list (cons "PATH" (conc (get-environment-variable "PATH") ":."))) - (lambda () - (common:without-vars proc "^MT_.*"))) - (let processloop ((i 0)) - (let-values (((pid-val exit-status exit-code)(process-wait pid #t))) - (if (eq? pid-val 0) - (begin - (thread-sleep! 2) - (processloop (+ i 1))) - (begin - (debug:print-info 0 *default-log-port* "sub megatest " action-switches-str " completed with exit code " exit-code) - (if (eq? 0 exit-code) - (begin - #t) - (begin - #f)))))))) - - - -;; (subrun:exec-sub-megatest "/nfs/pdx/disks/icf_env_disk001/bjbarcla/gwa/issues/mtdev/165/megatest/ext-tests/tests/subrun-usecases/toparea/links/SYSTEM_val/RELEASE_val/go/toptest" "-foo" "foo") Index: synchash.scm ================================================================== --- synchash.scm +++ synchash.scm @@ -29,105 +29,5 @@ (declare (unit synchash)) (declare (uses db)) (declare (uses server)) (include "db_records.scm") -(define (synchash:make) - (make-hash-table)) - -;; given an alist of objects '((id obj) ...) -;; 1. remove unchanged objects from the list -;; 2. create a list of removed objects by id -;; 3. remove removed objects from synchash -;; 4. replace or add new or changed objects to synchash -;; -(define (synchash:get-delta indat synchash) - (let ((deleted '()) - (changed '()) - (found '()) - (orig-keys (hash-table-keys synchash))) - (for-each - (lambda (item) - (let* ((id (car item)) - (dat (cadr item)) - (ref (hash-table-ref/default synchash id #f))) - (if (not (equal? dat ref)) ;; item changed or new - (begin - (set! changed (cons item changed)) - (hash-table-set! synchash id dat))) - (set! found (cons id found)))) - indat) - (for-each - (lambda (id) - (if (not (member id found)) - (begin - (set! deleted (cons id deleted)) - (hash-table-delete! synchash id)))) - orig-keys) - (list changed deleted) - ;; (list indat '()) ;; just for debugging - )) - -;; keynum => the field to use as the unique key (usually 0 but can be other field) -;; -(define (synchash:client-get proc synckey keynum synchash run-id . params) - (let* ((data (rmt:synchash-get run-id proc synckey keynum params)) - (newdat (car data)) - (removs (cadr data)) - (myhash (hash-table-ref/default synchash synckey #f))) - (if (not myhash) - (begin - (set! myhash (make-hash-table)) - (hash-table-set! synchash synckey myhash))) - (for-each - (lambda (item) - (let ((id (car item)) - (dat (cadr item))) - ;; (debug:print-info 2 *default-log-port* "Processing item: " item) - (hash-table-set! myhash id dat))) - newdat) - (for-each - (lambda (id) - (hash-table-delete! myhash id)) - removs) - ;; WHICH ONE!? - ;; data)) ;; return the changed and deleted list - (list newdat removs))) ;; synchash)) - -(define *synchashes* (make-hash-table)) - -(define (synchash:server-get dbstruct run-id proc synckey keynum params) - ;; (debug:print-info 2 *default-log-port* "synckey: " synckey ", keynum: " keynum ", params: " params) - (let* ((dbdat (db:get-db dbstruct run-id)) - (db (db:dbdat-get-db dbdat)) - (synchash (hash-table-ref/default *synchashes* synckey #f)) - (newdat (apply (case proc - ((db:get-runs) db:get-runs) - ((db:get-tests-for-run-mindata) db:get-tests-for-run-mindata) - ((db:get-test-info-by-ids) db:get-test-info-by-ids) - (else - (print "ERROR: sync for hash " proc " not setup! Edits needed in synchash.scm") - print)) - db params)) - (postdat #f) - (make-indexed (lambda (x) - (list (vector-ref x keynum) x)))) - ;; Now process newdat based on the query type - (set! postdat (case proc - ((db:get-runs) - ;; (debug:print-info 2 *default-log-port* "Get runs call") - (let ((header (vector-ref newdat 0)) - (data (vector-ref newdat 1))) - ;; (debug:print-info 2 *default-log-port* "header: " header ", data: " data) - (cons (list "header" header) ;; add the header keyed by the word "header" - (map make-indexed data)))) ;; add each element keyed by the keynum'th val - (else - ;; (debug:print-info 2 *default-log-port* "Non-get runs call") - (map make-indexed newdat)))) - ;; (debug:print-info 2 *default-log-port* "postdat: " postdat) - ;; (if (not indb)(sqlite3:finalize! db)) - (if (not synchash) - (begin - (set! synchash (make-hash-table)) - (hash-table-set! *synchashes* synckey synchash))) - (synchash:get-delta postdat synchash))) - Index: testsmod.scm ================================================================== --- testsmod.scm +++ testsmod.scm @@ -308,33 +308,10 @@ #t (if (null? tal) #f (loop (car tal)(cdr tal))))))))))) -;; if itempath is #f then look only at the testname part -;; -(define (tests:match->sqlqry patterns) - (if (string? patterns) - (let ((patts (string-split patterns ","))) - (if (null? patts) ;;; no pattern(s) means no match, we will do no query - #f - (let loop ((patt (car patts)) - (tal (cdr patts)) - (res '())) - ;; (print "loop: patt: " patt ", tal " tal) - (let* ((patt-parts (string-match (regexp "^([^\\/]*)(\\/(.*)|)$") patt)) - (test-patt (cadr patt-parts)) - (item-patt (cadddr patt-parts)) - (test-qry (db:patt->like "testname" test-patt)) - (item-qry (db:patt->like "item_path" item-patt)) - (qry (conc "(" test-qry " AND " item-qry ")"))) - ;; (print "tests:match => patt-parts: " patt-parts ", test-patt: " test-patt ", item-patt: " item-patt) - (if (null? tal) - (string-intersperse (append (reverse res)(list qry)) " OR ") - (loop (car tal)(cdr tal)(cons qry res))))))) - #f)) - ;; Check for waiver eligibility ;; (define (tests:check-waiver-eligibility testdat prev-testdat) (let* ((test-registry (make-hash-table)) (testconfig (tests:get-testconfig (db:test-get-testname testdat) (db:test-get-item-path testdat) test-registry #f)) Index: tree.scm ================================================================== --- tree.scm +++ tree.scm @@ -38,119 +38,5 @@ (include "common_records.scm") (include "db_records.scm") (include "key_records.scm") -;;====================================================================== -;; T R E E S T U F F -;;====================================================================== - -;; path is a list of nodes, each the child of the previous -;; this routine returns the id so another node can be added -;; either as a leaf or as a branch -;; -;; BUG: This needs a stop sensor for when a branch is exhausted -;; -(define (tree:find-node obj path) - ;; start at the base of the tree - (if (null? path) - #f ;; or 0 ???? - (let loop ((hed (car path)) - (tal (cdr path)) - (depth 0) - (nodenum 0)) - ;; nodes in iup tree are 100% sequential so iterate over nodenum - (if (iup:attribute obj (conc "DEPTH" nodenum)) ;; end when no more nodes - (let ((node-depth (string->number (iup:attribute obj (conc "DEPTH" nodenum)))) - (node-title (iup:attribute obj (conc "TITLE" nodenum)))) - (if (and (equal? depth node-depth) - (equal? hed node-title)) ;; yep, this is the one! - (if (null? tal) ;; end of the line - nodenum - (loop (car tal)(cdr tal)(+ depth 1)(+ 1 nodenum))) - ;; this is the case where we found part of the hierarchy but not - ;; all of it, i.e. the node-depth went from deep to less deep - (if (> depth node-depth) ;; (+ 1 node-depth)) - #f - (loop hed tal depth (+ nodenum 1))))) - #f)))) - -;; top is the top node name zeroeth node VALUE=0 -(define (tree:add-node obj top nodelst #!key (userdata #f)) - (let ((curr-top (iup:attribute obj "TITLE0"))) - (if (or (not (string? curr-top)) - (string-null? curr-top) - (string-match "^\\s*$" curr-top)) - (iup:attribute-set! obj "ADDBRANCH0" top)) - - - - (cond - ((not (equal? top (iup:attribute obj "TITLE0"))) - (print "ERROR: top name " top " doesn't match " (iup:attribute obj "TITLE0"))) - ((null? nodelst)) - (else - (let loop ((hed (car nodelst)) - (tal (cdr nodelst)) - (depth 1) - (pathl (list top))) - ;; Because the tree dialog changes node numbers when - ;; nodes are added or removed we must look up nodes - ;; each and every time. 0 is the top node so default - ;; to that. - (let* ((newpath (append pathl (list hed))) - (parentnode (tree:find-node obj pathl)) - (nodenum (tree:find-node obj newpath))) - ;; Add the branch under lastnode if not found - (if (not nodenum) - (begin - (iup:attribute-set! obj (conc "ADDBRANCH" parentnode) hed) - ;; ERROR? ADDING DATA TO PARENT, DONT WE WANT IT ON CREATED NODE? - (if userdata - (iup:attribute-set! obj (conc "USERDATA" parentnode) userdata)) - (if (null? tal) - #t - ;; reset to top - (loop (car nodelst)(cdr nodelst) 1 (list top)))) - (if (null? tal) ;; if null here then this path has already been added - #t - (loop (car tal)(cdr tal)(+ depth 1) newpath))))))))) - -(define (tree:node->path obj nodenum) - (let loop ((currnode 0) - (path '())) - (let* ((node-depth (string->number (iup:attribute obj (conc "DEPTH" currnode)))) - (node-title (iup:attribute obj (conc "TITLE" currnode))) - (trimpath (if (and (not (null? path)) - (> (length path) node-depth)) - (take path node-depth) - path)) - (newpath (append trimpath (list node-title)))) - (if (>= currnode nodenum) - newpath - (loop (+ currnode 1) - newpath))))) - -(define (tree:delete-node obj top node-path) ;; node-path is a list of strings - (let ((id (tree:find-node obj (cons top node-path)))) - (print "Found node to remove " id " for path " top " " node-path) - (iup:attribute-set! obj (conc "DELNODE" id) "SELECTED"))) - -#| - - (let* ((tb (iup:treebox - #:value 0 - #:name "Runs" - #:expand "YES" - #:addexpanded "NO" - #:selection-cb - (lambda (obj id state) - ;; (print "obj: " obj ", id: " id ", state: " state) - (let* ((run-path (tree:node->path obj id)) - (run-id (tree-path->run-id (cdr run-path)))) - (if run-id - (begin - (dboard:data-curr-run-id-set! data run-id) - (dashboard:update-run-summary-tab))) - ;; (print "path: " (tree:node->path obj id) " run-id: " run-id) - )))) -|#