Index: Makefile ================================================================== --- Makefile +++ Makefile @@ -30,11 +30,11 @@ MSRCFILES = dbmod.scm rmtmod.scm commonmod.scm apimod.scm \ archivemod.scm clientmod.scm envmod.scm ezstepsmod.scm itemsmod.scm \ keysmod.scm launchmod.scm odsmod.scm processmod.scm runconfigmod.scm \ runsmod.scm servermod.scm subrunmod.scm tasksmod.scm testsmod.scm \ pkts.scm mtargs.scm mtconfigf.scm ducttape-lib.scm ulex.scm \ -stml2.scm cookie.scm megamod.scm mutils.scm +stml2.scm cookie.scm megamod.scm mutils.scm pgdbmod.scm mtmod.scm GMSRCFILES = dcommonmod.scm vgmod.scm treemod.scm gutilsmod.scm # Eggs to install (straightforward ones) @@ -64,11 +64,11 @@ # mofiles/ducttape-lib.o : ducttape-lib.scm ducttape/*scm # csc -I ducttape -J -c ducttape-lib.scm -o mofiles/ducttape-lib.o mofiles/%.o %.import.scm : %.scm - @[ -e mofiles ] && mkdir -p mofiles + @[ -e mofiles ] || mkdir -p mofiles csc $(CSCOPTS) -I $* -J -c $< -o mofiles/$*.o @touch $*.import.scm # ensure it is touched after the .o is made # a.import.o : a.import.scm a.o # csc -unit a.import -c a.import.scm -o $*.o @@ -164,29 +164,37 @@ mofiles/gutilsmod.o mofiles/stml2.o : mofiles/cookie.o # special include based modules -mofiles/pkts.o : pkts/pkts.scm -mofiles/mtargs.o : mtargs/mtargs.scm +mofiles/pkts.o : pkts/pkts.scm +mofiles/mtargs.o : mtargs/mtargs.scm mofiles/mtconfigf.o : mtconfigf/mtconfigf.scm -mofiles/ulex.o : ulex/ulex.scm -mofiles/mutils.o : mutils/mutils.scm -mofiles/cookie.o : stml2/cookie.scm -mofiles/stml2.o : stml2/stml2.scm +mofiles/ulex.o : ulex/ulex.scm +mofiles/mutils.o : mutils/mutils.scm +mofiles/cookie.o : stml2/cookie.scm +mofiles/stml2.o : stml2/stml2.scm # for the modularized stuff mofiles/commonmod.o : megatest-fossil-hash.scm mofiles/stml2.o \ - mofiles/mtargs.o mofiles/pkts.o mofiles/mtconfigf.o + mofiles/mtargs.o mofiles/pkts.o mofiles/mtconfigf.o \ + mofiles/processmod.o +mofiles/pgdbmod.o : mofiles/commonmod.o mofiles/dbmod.o : mofiles/commonmod.o mofiles/keysmod.o \ mofiles/tasksmod.o mofiles/odsmod.o -mofiles/commonmod.o : mofiles/processmod.o -mofiles/rmtmod.o : mofiles/dbmod.o mofiles/commonmod.o \ - mofiles/apimod.o mofiles/ulex.o -mofiles/apimod.o : mofiles/dbmod.o -mofiles/runsmod.o : mofiles/testsmod.o +mofiles/tasksmod.o : mofiles/commonmod.o mofiles/pgdbmod.o +mofiles/rmtmod.o : mofiles/commonmod.o \ + mofiles/apimod.o mofiles/ulex.o mofiles/itemsmod.o +mofiles/apimod.o : mofiles/dbmod.o mofiles/commonmod.o mofiles/servermod.o +mofiles/runsmod.o : mofiles/testsmod.o mofiles/mtmod.o +mofiles/mtmod.o : mofiles/mtconfigf.o mofiles/rmtmod.o mofiles/tasksmod.o \ + mofiles/dbmod.o mofiles/pgdbmod.o mofiles/launchmod.o \ + mofiles/subrunmod.o +mofiles/servermod.o : mofiles/commonmod.o mofiles/dbmod.o +mofiles/testsmod.o : mofiles/servermod.o mofiles/dbmod.o +mofiles/launchmod.o : mofiles/subrunmod.o mofiles/testsmod.o # Removed from megamod.o dep: mofiles/ftail.o mofiles/megamod.o : \ mofiles/rmtmod.o \ mofiles/commonmod.o \ @@ -354,19 +362,23 @@ $(PREFIX)/bin/.$(ARCHSTR)/dboard : dboard $(FILES) utils/mk_wrapper utils/mk_wrapper $(PREFIX) dboard $(PREFIX)/bin/dashboard chmod a+x $(PREFIX)/bin/dashboard $(INSTALL) dboard $(PREFIX)/bin/.$(ARCHSTR)/dboard -install : $(PREFIX)/bin/.$(ARCHSTR) $(PREFIX)/bin/.$(ARCHSTR)/mtest $(PREFIX)/bin/megatest \ - $(PREFIX)/bin/.$(ARCHSTR)/dboard $(PREFIX)/bin/dashboard $(HELPERS) $(PREFIX)/bin/nbfake \ +install : minstall dinstall + +minstall : $(PREFIX)/bin/.$(ARCHSTR) $(PREFIX)/bin/.$(ARCHSTR)/mtest $(PREFIX)/bin/megatest \ + $(HELPERS) $(PREFIX)/bin/nbfake \ $(PREFIX)/bin/.$(ARCHSTR)/mtexec $(PREFIX)/bin/mtexec $(PREFIX)/bin/serialize-env \ $(PREFIX)/bin/nbfind $(PREFIX)/bin/loadrunner $(PREFIX)/bin/viewscreen $(PREFIX)/bin/mt_xterm \ $(PREFIX)/share/docs/megatest_manual.html $(PREFIX)/bin/remrun \ $(PREFIX)/share/docs/megatest_manual.html $(PREFIX)/bin/remrun $(PREFIX)/bin/mtutil \ $(PREFIX)/bin/tcmt $(PREFIX)/share/db/mt-pg.sql \ $(PREFIX)/share/js/jquery-3.1.0.slim.min.js -# $(PREFIX)/bin/.$(ARCHSTR)/ndboard + +dinstall : $(PREFIX)/bin/.$(ARCHSTR) $(PREFIX)/bin/.$(ARCHSTR)/dboard $(PREFIX)/bin/dashboard +# $(PREFIX)/bin/.$(ARCHSTR)/ndboard # $(PREFIX)/bin/newdashboard $(PREFIX)/bin/.$(ARCHSTR) : mkdir -p $(PREFIX)/bin/.$(ARCHSTR) Index: api-inc.scm ================================================================== --- api-inc.scm +++ api-inc.scm @@ -126,263 +126,5 @@ ;; TASKS tasks-add tasks-set-state-given-param-key )) -;; These are called by the server on recipt of /api calls -;; - keep it simple, only return the actual result of the call, i.e. no meta info here -;; -;; - returns #( flag result ) -;; -(define (api:execute-requests dbstruct dat) - ;; (handle-exceptions - ;; exn - ;; (let ((call-chain (get-call-chain))) - ;; (debug:print 0 *default-log-port* "WARNING: api:execute-requests received an exception from peer, dat=" dat) - ;; (print-call-chain (current-error-port)) - ;; (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) - ;; (vector #f (vector exn call-chain dat))) ;; return some stuff for debug if an exception happens - (cond - ((not (vector? dat)) ;; it is an error to not receive a vector - (vector #f (vector #f "remote must be called with a vector"))) - ((> *api-process-request-count* 20) ;; 20) - (debug:print 0 *default-log-port* "WARNING: api:execute-requests received an overloaded message.") - (set! *server-overloaded* #t) - (vector #f (vector #f 'overloaded))) ;; the inner vector is what gets returned. nope, don't know why. please refactor! - (else - (let* ((cmd-in (vector-ref dat 0)) - (cmd (if (symbol? cmd-in) - cmd-in - (string->symbol cmd-in))) - (params (vector-ref dat 1)) - (start-t (current-milliseconds)) - (readonly-mode (dbr:dbstruct-read-only dbstruct)) - (readonly-command (member cmd api:read-only-queries)) - (writecmd-in-readonly-mode (and readonly-mode (not readonly-command))) - #;(foo (begin - (common:telemetry-log (conc "api-in:"(->string cmd)) - payload: `((params . ,params))) - - #t)) - (res - (if writecmd-in-readonly-mode - (conc "attempt to run write command "cmd" on a read-only database") - (case cmd - ;;=============================================== - ;; READ/WRITE QUERIES - ;;=============================================== - - ((get-keys-write) (db:get-keys dbstruct)) ;; force a dummy "write" query to force server; for debug in -repl - - ;; SERVERS - ((start-server) (apply server:kind-run params)) - ((kill-server) (set! *server-run* #f)) - - ;; TESTS - - ;;((test-set-state-status-by-id) (apply mt:test-set-state-status-by-id dbstruct params)) - ;;BB - commented out above because it was calling below, eventually, incorrectly (dbstruct passed to mt:test-set-state-status-by-id, which previosly did more, but now only passes thru to db:set-state-status-and-roll-up-items. - ((test-set-state-status-by-id) - - ;; (define (db:set-state-status-and-roll-up-items dbstruct run-id test-name item-path state status comment) - (db:set-state-status-and-roll-up-items - dbstruct - (list-ref params 0) ; run-id - (list-ref params 1) ; test-name - #f ; item-path - (list-ref params 2) ; state - (list-ref params 3) ; status - (list-ref params 4) ; comment - )) - - ((delete-test-records) (apply db:delete-test-records dbstruct params)) - ((delete-old-deleted-test-records) (apply db:delete-old-deleted-test-records dbstruct params)) - ((test-set-state-status) (apply db:test-set-state-status dbstruct params)) - ((test-set-top-process-pid) (apply db:test-set-top-process-pid dbstruct params)) - ((set-state-status-and-roll-up-items) (apply db:set-state-status-and-roll-up-items dbstruct params)) - ((set-state-status-and-roll-up-run) (apply db:set-state-status-and-roll-up-run dbstruct params)) - ((top-test-set-per-pf-counts) (apply db:top-test-set-per-pf-counts dbstruct params)) - ((test-set-archive-block-id) (apply db:test-set-archive-block-id dbstruct params)) - - ;; RUNS - ((register-run) (apply db:register-run dbstruct params)) - ((set-tests-state-status) (apply db:set-tests-state-status dbstruct params)) - ((delete-run) (apply db:delete-run dbstruct params)) - ((lock/unlock-run) (apply db:lock/unlock-run dbstruct params)) - ((update-run-event_time) (apply db:update-run-event_time dbstruct params)) - ((update-run-stats) (apply db:update-run-stats dbstruct params)) - ((set-var) (apply db:set-var dbstruct params)) - ((del-var) (apply db:del-var dbstruct params)) - - ;; STEPS - ((teststep-set-status!) (apply db:teststep-set-status! dbstruct params)) - ((delete-steps-for-test!) (apply db:delete-steps-for-test! dbstruct params)) - - ;; TEST DATA - ((test-data-rollup) (apply db:test-data-rollup dbstruct params)) - ((csv->test-data) (apply db:csv->test-data dbstruct params)) - - ;; MISC - ((sync-inmem->db) (let ((run-id (car params))) - (db:sync-touched dbstruct run-id force-sync: #t))) - ((mark-incomplete) (apply db:find-and-mark-incomplete dbstruct params)) - - ;; TESTMETA - ((testmeta-add-record) (apply db:testmeta-add-record dbstruct params)) - ((testmeta-update-field) (apply db:testmeta-update-field dbstruct params)) - ((get-tests-tags) (db:get-tests-tags dbstruct)) - - ;; TASKS - ((tasks-add) (apply tasks:add dbstruct params)) - ((tasks-set-state-given-param-key) (apply tasks:set-state-given-param-key dbstruct params)) - ((tasks-get-last) (apply tasks:get-last dbstruct params)) - - ;; NO SYNC DB - ((no-sync-set) (apply db:no-sync-set *no-sync-db* params)) - ((no-sync-get/default) (apply db:no-sync-get/default *no-sync-db* params)) - ((no-sync-del!) (apply db:no-sync-del! *no-sync-db* params)) - ((no-sync-get-lock) (apply db:no-sync-get-lock *no-sync-db* params)) - - ;; ARCHIVES - ;; ((archive-get-allocations) - ((archive-register-disk) (apply db:archive-register-disk dbstruct params)) - ((archive-register-block-name)(apply db:archive-register-block-name dbstruct params)) - ;; ((archive-allocate-testsuite/area-to-block)(apply db:archive-allocate-testsuite/area-to-block dbstruct block-id testsuite-name areakey)) - - ;;====================================================================== - ;; READ ONLY QUERIES - ;;====================================================================== - - ;; KEYS - ((get-key-val-pairs) (apply db:get-key-val-pairs dbstruct params)) - ((get-keys) (db:get-keys dbstruct)) - ((get-key-vals) (apply db:get-key-vals dbstruct params)) - ((get-target) (apply db:get-target dbstruct params)) - ((get-targets) (db:get-targets dbstruct)) - - ;; ARCHIVES - ((test-get-archive-block-info) (apply db:test-get-archive-block-info dbstruct params)) - - ;; TESTS - ((test-toplevel-num-items) (apply db:test-toplevel-num-items dbstruct params)) - ((get-test-info-by-id) (apply db:get-test-info-by-id dbstruct params)) - ((test-get-rundir-from-test-id) (apply db:test-get-rundir-from-test-id dbstruct params)) - ((get-count-tests-running-for-testname) (apply db:get-count-tests-running-for-testname dbstruct params)) - ((get-count-tests-running) (apply db:get-count-tests-running dbstruct params)) - ((get-count-tests-running-in-jobgroup) (apply db:get-count-tests-running-in-jobgroup dbstruct params)) - ;; ((delete-test-step-records) (apply db:delete-test-step-records dbstruct params)) - ;; ((get-previous-test-run-record) (apply db:get-previous-test-run-record dbstruct params)) - ((get-matching-previous-test-run-records)(apply db:get-matching-previous-test-run-records dbstruct params)) - ((test-get-logfile-info) (apply db:test-get-logfile-info dbstruct params)) - ((test-get-records-for-index-file) (apply db:test-get-records-for-index-file dbstruct params)) - ((get-testinfo-state-status) (apply db:get-testinfo-state-status dbstruct params)) - ((test-get-top-process-pid) (apply db:test-get-top-process-pid dbstruct params)) - ((test-get-paths-matching-keynames-target-new) (apply db:test-get-paths-matching-keynames-target-new dbstruct params)) - ((get-prereqs-not-met) (apply db:get-prereqs-not-met dbstruct params)) - ((get-count-tests-running-for-run-id) (apply db:get-count-tests-running-for-run-id dbstruct params)) - ((get-not-completed-cnt) (apply db:get-not-completed-cnt dbstruct params)) - ;; ((synchash-get) (apply synchash:server-get dbstruct params)) - ((get-raw-run-stats) (apply db:get-raw-run-stats dbstruct params)) - ((get-test-times) (apply db:get-test-times dbstruct params)) - - ;; RUNS - ((get-run-info) (apply db:get-run-info dbstruct params)) - ((get-run-status) (apply db:get-run-status dbstruct params)) - ((get-run-state) (apply db:get-run-state dbstruct params)) - ((set-run-status) (apply db:set-run-status dbstruct params)) - ((set-run-state-status) (apply db:set-run-state-status dbstruct params)) - ((get-tests-for-run) (apply db:get-tests-for-run dbstruct params)) - ((get-test-id) (apply db:get-test-id dbstruct params)) - ((get-tests-for-run-mindata) (apply db:get-tests-for-run-mindata dbstruct params)) - ;; ((get-tests-for-runs-mindata) (apply db:get-tests-for-runs-mindata dbstruct params)) - ((get-runs) (apply db:get-runs dbstruct params)) - ((simple-get-runs) (apply db:simple-get-runs dbstruct params)) - ((get-num-runs) (apply db:get-num-runs dbstruct params)) - ((get-runs-cnt-by-patt) (apply db:get-runs-cnt-by-patt dbstruct params)) - ((get-all-run-ids) (db:get-all-run-ids dbstruct)) - ((get-prev-run-ids) (apply db:get-prev-run-ids dbstruct params)) - ((get-run-ids-matching-target) (apply db:get-run-ids-matching-target dbstruct params)) - ((get-runs-by-patt) (apply db:get-runs-by-patt dbstruct params)) - ((get-run-name-from-id) (apply db:get-run-name-from-id dbstruct params)) - ((get-main-run-stats) (apply db:get-main-run-stats dbstruct params)) - ((get-var) (apply db:get-var dbstruct params)) - ((get-run-stats) (apply db:get-run-stats dbstruct params)) - ((get-run-times) (apply db:get-run-times dbstruct params)) - - ;; STEPS - ((get-steps-data) (apply db:get-steps-data dbstruct params)) - ((get-steps-for-test) (apply db:get-steps-for-test dbstruct params)) - ((get-steps-info-by-id) (apply db:get-steps-info-by-id dbstruct params)) - - ;; TEST DATA - ((read-test-data) (apply db:read-test-data dbstruct params)) - ((read-test-data*) (apply db:read-test-data* dbstruct params)) - ((get-data-info-by-id) (apply db:get-data-info-by-id dbstruct params)) - - ;; MISC - ((get-latest-host-load) (apply db:get-latest-host-load dbstruct params)) - ((have-incompletes?) (apply db:have-incompletes? dbstruct params)) - ((login) (apply db:login dbstruct params)) - ((general-call) (let ((stmtname (car params)) - (run-id (cadr params)) - (realparams (cddr params))) - (db:general-call dbstruct stmtname realparams))) - ((sdb-qry) (apply sdb:qry params)) - ((ping) (current-process-id)) - ((get-changed-record-ids) (apply db:get-changed-record-ids dbstruct params)) - ((get-run-record-ids) (apply db:get-run-record-ids dbstruct params)) - ;; TESTMETA - ((testmeta-get-record) (apply db:testmeta-get-record dbstruct params)) - - ;; TASKS - ((find-task-queue-records) (apply tasks:find-task-queue-records dbstruct params)) - (else - (debug:print 0 *default-log-port* "ERROR: bad api call " cmd) - (conc "ERROR: BAD api call " cmd)))))) - - - ;; save all stats - (let ((delta-t (- (current-milliseconds) - start-t))) - (hash-table-set! *db-api-call-time* cmd - (cons delta-t (hash-table-ref/default *db-api-call-time* cmd '())))) - (if writecmd-in-readonly-mode - (begin - #;(common:telemetry-log (conc "api-out:"(->string cmd)) - payload: `((params . ,params) - (ok-res . #t))) - (vector #f res)) - (begin - #;(common:telemetry-log (conc "api-out:"(->string cmd)) - payload: `((params . ,params) - (ok-res . #f))) - (vector #t res))))))) ;; ) - -;; http-server send-response -;; api:process-request -;; db:* -;; -;; NB// Runs on the server as part of the server loop -;; -(define (api:process-request dbstruct $) ;; the $ is the request vars proc - (set! *api-process-request-count* (+ *api-process-request-count* 1)) - (let* ((cmd ($ 'cmd)) - (paramsj ($ 'params)) - (params (db:string->obj paramsj transport: 'http)) ;; incoming data from the POST (or is it a GET?) - (resdat (api:execute-requests dbstruct (vector cmd params))) ;; process the request, resdat = #( flag result ) - (success (vector-ref resdat 0)) - (res (vector-ref resdat 1))) ;; (vector flag payload), get the payload, ignore the flag (why?) - (if (not success) - (debug:print 0 *default-log-port* "ERROR: success flag is #f for " cmd " with params " params)) - (if (> *api-process-request-count* *max-api-process-requests*) - (set! *max-api-process-requests* *api-process-request-count*)) - (set! *api-process-request-count* (- *api-process-request-count* 1)) - ;; This can be here but needs controls to ensure it doesn't run more than every 4 seconds - ;; (rmt:dat->json-str - ;; (if (or (string? res) - ;; (list? res) - ;; (number? res) - ;; (boolean? res)) - ;; res - ;; (list "ERROR, not string, list, number or boolean" 1 cmd params res))))) - (db:obj->string res transport: 'http))) - Index: apimod.scm ================================================================== --- apimod.scm +++ apimod.scm @@ -19,11 +19,11 @@ ;;====================================================================== (declare (unit apimod)) (declare (uses commonmod)) (declare (uses dbmod)) -;; (declare (uses megamod)) +(declare (uses servermod)) (module apimod * (import scheme chicken data-structures extras) @@ -30,12 +30,269 @@ (import (prefix sqlite3 sqlite3:) posix typed-records srfi-18 srfi-69 format ports srfi-1 matchable s11n z3 (prefix base64 base64:) regex stack srfi-13 irregex) (import commonmod) (import dbmod) -;; (import megamod) +(import servermod) + ;; (use (prefix ulex ulex:)) -(include "common_records.scm") +;; These are called by the server on recipt of /api calls +;; - keep it simple, only return the actual result of the call, i.e. no meta info here +;; +;; - returns #( flag result ) +;; +(define (api:execute-requests dbstruct dat) + ;; (handle-exceptions + ;; exn + ;; (let ((call-chain (get-call-chain))) + ;; (debug:print 0 *default-log-port* "WARNING: api:execute-requests received an exception from peer, dat=" dat) + ;; (print-call-chain (current-error-port)) + ;; (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) + ;; (vector #f (vector exn call-chain dat))) ;; return some stuff for debug if an exception happens + (cond + ((not (vector? dat)) ;; it is an error to not receive a vector + (vector #f (vector #f "remote must be called with a vector"))) + ((> *api-process-request-count* 20) ;; 20) + (debug:print 0 *default-log-port* "WARNING: api:execute-requests received an overloaded message.") + (set! *server-overloaded* #t) + (vector #f (vector #f 'overloaded))) ;; the inner vector is what gets returned. nope, don't know why. please refactor! + (else + (let* ((cmd-in (vector-ref dat 0)) + (cmd (if (symbol? cmd-in) + cmd-in + (string->symbol cmd-in))) + (params (vector-ref dat 1)) + (start-t (current-milliseconds)) + (readonly-mode (dbr:dbstruct-read-only dbstruct)) + (readonly-command (member cmd api:read-only-queries)) + (writecmd-in-readonly-mode (and readonly-mode (not readonly-command))) + #;(foo (begin + (common:telemetry-log (conc "api-in:"(->string cmd)) + payload: `((params . ,params))) + + #t)) + (res + (if writecmd-in-readonly-mode + (conc "attempt to run write command "cmd" on a read-only database") + (case cmd + ;;=============================================== + ;; READ/WRITE QUERIES + ;;=============================================== + + ((get-keys-write) (db:get-keys dbstruct)) ;; force a dummy "write" query to force server; for debug in -repl + + ;; SERVERS + ;; ((start-server) (apply server:kind-run params)) + ;; ((kill-server) (set! *server-run* #f)) + + ;; TESTS + + ;;((test-set-state-status-by-id) (apply mt:test-set-state-status-by-id dbstruct params)) + ;;BB - commented out above because it was calling below, eventually, incorrectly (dbstruct passed to mt:test-set-state-status-by-id, which previosly did more, but now only passes thru to db:set-state-status-and-roll-up-items. + ((test-set-state-status-by-id) + + ;; (define (db:set-state-status-and-roll-up-items dbstruct run-id test-name item-path state status comment) + (db:set-state-status-and-roll-up-items + dbstruct + (list-ref params 0) ; run-id + (list-ref params 1) ; test-name + #f ; item-path + (list-ref params 2) ; state + (list-ref params 3) ; status + (list-ref params 4) ; comment + )) + + ((delete-test-records) (apply db:delete-test-records dbstruct params)) + ((delete-old-deleted-test-records) (apply db:delete-old-deleted-test-records dbstruct params)) + ((test-set-state-status) (apply db:test-set-state-status dbstruct params)) + ((test-set-top-process-pid) (apply db:test-set-top-process-pid dbstruct params)) + ((set-state-status-and-roll-up-items) (apply db:set-state-status-and-roll-up-items dbstruct params)) + ((set-state-status-and-roll-up-run) (apply db:set-state-status-and-roll-up-run dbstruct params)) + ((top-test-set-per-pf-counts) (apply db:top-test-set-per-pf-counts dbstruct params)) + ((test-set-archive-block-id) (apply db:test-set-archive-block-id dbstruct params)) + + ;; RUNS + ((register-run) (apply db:register-run dbstruct params)) + ((set-tests-state-status) (apply db:set-tests-state-status dbstruct params)) + ((delete-run) (apply db:delete-run dbstruct params)) + ((lock/unlock-run) (apply db:lock/unlock-run dbstruct params)) + ((update-run-event_time) (apply db:update-run-event_time dbstruct params)) + ((update-run-stats) (apply db:update-run-stats dbstruct params)) + ((set-var) (apply db:set-var dbstruct params)) + ((del-var) (apply db:del-var dbstruct params)) + + ;; STEPS + ((teststep-set-status!) (apply db:teststep-set-status! dbstruct params)) + ((delete-steps-for-test!) (apply db:delete-steps-for-test! dbstruct params)) + + ;; TEST DATA + ((test-data-rollup) (apply db:test-data-rollup dbstruct params)) + ((csv->test-data) (apply db:csv->test-data dbstruct params)) + + ;; MISC + ((sync-inmem->db) (let ((run-id (car params))) + (db:sync-touched dbstruct run-id force-sync: #t))) + ((mark-incomplete) (apply db:find-and-mark-incomplete dbstruct params)) + + ;; TESTMETA + ((testmeta-add-record) (apply db:testmeta-add-record dbstruct params)) + ((testmeta-update-field) (apply db:testmeta-update-field dbstruct params)) + ((get-tests-tags) (db:get-tests-tags dbstruct)) + + ;; TASKS + ((tasks-add) (apply tasks:add dbstruct params)) + ((tasks-set-state-given-param-key) (apply tasks:set-state-given-param-key dbstruct params)) + ((tasks-get-last) (apply tasks:get-last dbstruct params)) + + ;; NO SYNC DB + ((no-sync-set) (apply db:no-sync-set *no-sync-db* params)) + ((no-sync-get/default) (apply db:no-sync-get/default *no-sync-db* params)) + ((no-sync-del!) (apply db:no-sync-del! *no-sync-db* params)) + ((no-sync-get-lock) (apply db:no-sync-get-lock *no-sync-db* params)) + + ;; ARCHIVES + ;; ((archive-get-allocations) + ((archive-register-disk) (apply db:archive-register-disk dbstruct params)) + ((archive-register-block-name)(apply db:archive-register-block-name dbstruct params)) + ;; ((archive-allocate-testsuite/area-to-block)(apply db:archive-allocate-testsuite/area-to-block dbstruct block-id testsuite-name areakey)) + + ;;====================================================================== + ;; READ ONLY QUERIES + ;;====================================================================== + + ;; KEYS + ((get-key-val-pairs) (apply db:get-key-val-pairs dbstruct params)) + ((get-keys) (db:get-keys dbstruct)) + ((get-key-vals) (apply db:get-key-vals dbstruct params)) + ((get-target) (apply db:get-target dbstruct params)) + ((get-targets) (db:get-targets dbstruct)) + + ;; ARCHIVES + ((test-get-archive-block-info) (apply db:test-get-archive-block-info dbstruct params)) + + ;; TESTS + ((test-toplevel-num-items) (apply db:test-toplevel-num-items dbstruct params)) + ((get-test-info-by-id) (apply db:get-test-info-by-id dbstruct params)) + ((test-get-rundir-from-test-id) (apply db:test-get-rundir-from-test-id dbstruct params)) + ((get-count-tests-running-for-testname) (apply db:get-count-tests-running-for-testname dbstruct params)) + ((get-count-tests-running) (apply db:get-count-tests-running dbstruct params)) + ((get-count-tests-running-in-jobgroup) (apply db:get-count-tests-running-in-jobgroup dbstruct params)) + ;; ((delete-test-step-records) (apply db:delete-test-step-records dbstruct params)) + ;; ((get-previous-test-run-record) (apply db:get-previous-test-run-record dbstruct params)) + ((get-matching-previous-test-run-records)(apply db:get-matching-previous-test-run-records dbstruct params)) + ((test-get-logfile-info) (apply db:test-get-logfile-info dbstruct params)) + ((test-get-records-for-index-file) (apply db:test-get-records-for-index-file dbstruct params)) + ((get-testinfo-state-status) (apply db:get-testinfo-state-status dbstruct params)) + ((test-get-top-process-pid) (apply db:test-get-top-process-pid dbstruct params)) + ((test-get-paths-matching-keynames-target-new) (apply db:test-get-paths-matching-keynames-target-new dbstruct params)) + ((get-prereqs-not-met) (apply db:get-prereqs-not-met dbstruct params)) + ((get-count-tests-running-for-run-id) (apply db:get-count-tests-running-for-run-id dbstruct params)) + ((get-not-completed-cnt) (apply db:get-not-completed-cnt dbstruct params)) + ;; ((synchash-get) (apply synchash:server-get dbstruct params)) + ((get-raw-run-stats) (apply db:get-raw-run-stats dbstruct params)) + ((get-test-times) (apply db:get-test-times dbstruct params)) + + ;; RUNS + ((get-run-info) (apply db:get-run-info dbstruct params)) + ((get-run-status) (apply db:get-run-status dbstruct params)) + ((get-run-state) (apply db:get-run-state dbstruct params)) + ((set-run-status) (apply db:set-run-status dbstruct params)) + ((set-run-state-status) (apply db:set-run-state-status dbstruct params)) + ((get-tests-for-run) (apply db:get-tests-for-run dbstruct params)) + ((get-test-id) (apply db:get-test-id dbstruct params)) + ((get-tests-for-run-mindata) (apply db:get-tests-for-run-mindata dbstruct params)) + ;; ((get-tests-for-runs-mindata) (apply db:get-tests-for-runs-mindata dbstruct params)) + ((get-runs) (apply db:get-runs dbstruct params)) + ((simple-get-runs) (apply db:simple-get-runs dbstruct params)) + ((get-num-runs) (apply db:get-num-runs dbstruct params)) + ((get-runs-cnt-by-patt) (apply db:get-runs-cnt-by-patt dbstruct params)) + ((get-all-run-ids) (db:get-all-run-ids dbstruct)) + ((get-prev-run-ids) (apply db:get-prev-run-ids dbstruct params)) + ((get-run-ids-matching-target) (apply db:get-run-ids-matching-target dbstruct params)) + ((get-runs-by-patt) (apply db:get-runs-by-patt dbstruct params)) + ((get-run-name-from-id) (apply db:get-run-name-from-id dbstruct params)) + ((get-main-run-stats) (apply db:get-main-run-stats dbstruct params)) + ((get-var) (apply db:get-var dbstruct params)) + ((get-run-stats) (apply db:get-run-stats dbstruct params)) + ((get-run-times) (apply db:get-run-times dbstruct params)) + + ;; STEPS + ((get-steps-data) (apply db:get-steps-data dbstruct params)) + ((get-steps-for-test) (apply db:get-steps-for-test dbstruct params)) + ((get-steps-info-by-id) (apply db:get-steps-info-by-id dbstruct params)) + + ;; TEST DATA + ((read-test-data) (apply db:read-test-data dbstruct params)) + ((read-test-data*) (apply db:read-test-data* dbstruct params)) + ((get-data-info-by-id) (apply db:get-data-info-by-id dbstruct params)) + + ;; MISC + ((get-latest-host-load) (apply db:get-latest-host-load dbstruct params)) + ((have-incompletes?) (apply db:have-incompletes? dbstruct params)) + ((login) (apply db:login dbstruct params)) + ((general-call) (let ((stmtname (car params)) + (run-id (cadr params)) + (realparams (cddr params))) + (db:general-call dbstruct stmtname realparams))) + ((sdb-qry) (apply sdb:qry params)) + ((ping) (current-process-id)) + ((get-changed-record-ids) (apply db:get-changed-record-ids dbstruct params)) + ((get-run-record-ids) (apply db:get-run-record-ids dbstruct params)) + ;; TESTMETA + ((testmeta-get-record) (apply db:testmeta-get-record dbstruct params)) + + ;; TASKS + ((find-task-queue-records) (apply tasks:find-task-queue-records dbstruct params)) + (else + (debug:print 0 *default-log-port* "ERROR: bad api call " cmd) + (conc "ERROR: BAD api call " cmd)))))) + + + ;; save all stats + (let ((delta-t (- (current-milliseconds) + start-t))) + (hash-table-set! *db-api-call-time* cmd + (cons delta-t (hash-table-ref/default *db-api-call-time* cmd '())))) + (if writecmd-in-readonly-mode + (begin + #;(common:telemetry-log (conc "api-out:"(->string cmd)) + payload: `((params . ,params) + (ok-res . #t))) + (vector #f res)) + (begin + #;(common:telemetry-log (conc "api-out:"(->string cmd)) + payload: `((params . ,params) + (ok-res . #f))) + (vector #t res))))))) ;; ) + +;; http-server send-response +;; api:process-request +;; db:* +;; +;; NB// Runs on the server as part of the server loop +;; +(define (api:process-request dbstruct $) ;; the $ is the request vars proc + (set! *api-process-request-count* (+ *api-process-request-count* 1)) + (let* ((cmd ($ 'cmd)) + (paramsj ($ 'params)) + (params (db:string->obj paramsj transport: 'http)) ;; incoming data from the POST (or is it a GET?) + (resdat (api:execute-requests dbstruct (vector cmd params))) ;; process the request, resdat = #( flag result ) + (success (vector-ref resdat 0)) + (res (vector-ref resdat 1))) ;; (vector flag payload), get the payload, ignore the flag (why?) + (if (not success) + (debug:print 0 *default-log-port* "ERROR: success flag is #f for " cmd " with params " params)) + (if (> *api-process-request-count* *max-api-process-requests*) + (set! *max-api-process-requests* *api-process-request-count*)) + (set! *api-process-request-count* (- *api-process-request-count* 1)) + ;; This can be here but needs controls to ensure it doesn't run more than every 4 seconds + ;; (rmt:dat->json-str + ;; (if (or (string? res) + ;; (list? res) + ;; (number? res) + ;; (boolean? res)) + ;; res + ;; (list "ERROR, not string, list, number or boolean" 1 cmd params res))))) + (db:obj->string res transport: 'http))) ) Index: archive-inc.scm ================================================================== --- archive-inc.scm +++ archive-inc.scm @@ -16,367 +16,5 @@ ;; along with Megatest. If not, see . ;; ;; strftime('%m/%d/%Y %H:%M:%S','now','localtime') -;;====================================================================== -;; -;;====================================================================== - -;; NOT CURRENTLY USED - commented out as it has unresolved dependencies -;; -#;(define (archive:main linktree target runname testname itempath options) - (let ((testdir (conc linktree "/" target "/" runname "/" testname "/" itempatt)) - (flavor 'plain) ;; type of machine to run jobs on - (maxload 1.5) ;; max allowed load for this work - (adisks (archive:get-archive-disks))) - ;; get testdir size - ;; - hand off du to job mgr - (if (and (common:file-exists? testdir) - (file-is-writable? testdir)) - (let* ((dused (jobrunner:run-job - flavor ;; machine type - maxload ;; max allowed load - '() ;; prevars - environment vars to set for the job - common:get-disk-space-used ;; if a proc call it, if a string it is a unix command - (list testdir))) - (apath (archive:get-archive testname itempath dused))) - (jobrunner:run-job - flavor - maxload - '() - archive:run-bup - (list testdir apath)))))) - -;; Get archive disks from megatest.config -;; -(define (archive:get-archive-disks) - (let ((section (configf:get-section *configdat* "archive-disks"))) - (if section - section - '()))) - -;; look for the best candidate archive area, else create new -;; area -;; -(define (archive:get-archive testname itempath dused) - ;; look up in archive_allocations if there is a pre-used archive - ;; with adequate diskspace - ;; - (let* ((existing-blocks (rmt:archive-get-allocations testname itempath dused)) - (candidate-disks (map (lambda (block) - (list - (vector-ref block 1) ;; archive-area-name - (vector-ref block 2))) ;; disk-path - existing-blocks))) - (or (common:get-disk-with-most-free-space candidate-disks dused) - (archive:allocate-new-archive-block #f #f #f)))) ;; BROKEN. testname itempath)))) - -;; allocate a new archive area -;; -(define (archive:allocate-new-archive-block blockid-cache run-area-home testsuite-name dneeded target run-name test-name) - (let ((key (conc testsuite-name "/" target "/" run-name "/" test-name))) - (if (hash-table-exists? blockid-cache key) - (hash-table-ref blockid-cache key) - (let* ((pscript (configf:lookup *configdat* "archive" "pathscript")) - (pscript-cmd (conc pscript " " testsuite-name " " target " " run-name " " test-name)) - (apath (if pscript - (handle-exceptions - exn - (begin - (debug:print 0 "ERROR: script \"" pscript-cmd "\" failed to run properly.") - (exit 1)) - (with-input-from-pipe - pscript-cmd - read-line)) - #f)) ;; this is the user-calculated archive path - (adisks (archive:get-archive-disks)) - (best-disk (common:get-disk-with-most-free-space adisks dneeded))) - (if best-disk - (let* ((bdisk-name (car best-disk)) - (bdisk-path (cdr best-disk)) - (area-key (substring (message-digest-string (md5-primitive) run-area-home) 0 5)) - (bdisk-id (rmt:archive-register-disk bdisk-name bdisk-path (get-df bdisk-path))) - (archive-name (if apath - apath - (let ((sec (current-seconds))) - (conc (time->string (seconds->local-time sec) "%Y") - "_q" (seconds->quarter sec) "/" - testsuite-name "_" area-key)))) - (archive-path (conc bdisk-path "/" archive-name)) - (block-id (rmt:archive-register-block-name bdisk-id archive-path))) - ;; (allocation-id (rmt:archive-allocate-testsuite/area-to-block block-id testsuite-name area-key))) - (if block-id ;; (and block-id allocation-id) - (let ((res (cons block-id archive-path))) - (hash-table-set! blockid-cache key res) - res) - #f)) - #f)) ;; no best disk found - ))) - -;; archive - run bup -;; -;; 1. create the bup dir if not exists -;; 2. start the du of each directory -;; 3. gen index -;; 4. save -;; -(define (archive:run-bup archive-command run-id run-name tests rp-mutex bup-mutex) - ;; move the getting of archive space down into the below block so that a single run can - ;; allocate as needed should a disk fill up - ;; - (let* ((blockid-cache (make-hash-table)) - (tsname (common:get-testsuite-name)) - (min-space (string->number (or (configf:lookup *configdat* "archive" "minspace") "1000"))) - (arch-groups (make-hash-table)) ;; archive groups, each corrosponds to a bup area - (disk-groups (make-hash-table)) ;; - (test-groups (make-hash-table)) ;; these two (disk and test groups) could be combined nicely - (test-dirs (make-hash-table)) - (bup-exe (or (configf:lookup *configdat* "archive" "bup") "bup")) - (compress (or (configf:lookup *configdat* "archive" "compress") "9")) - (linktree (common:get-linktree)) ;; (configf:lookup *configdat* "setup" "linktree"))) - (archiver (let ((s (configf:lookup *configdat* "archive" "archiver"))) - (if s (string->symbol s) 'bup))) - (archiver-cmd (case archiver - ((tar) "tar cfj ARCHIVE_NAME.tar.bz2 ") - ((7z) " 7z u -t7z -m0=lzma -mx=9 -mfb=64 -md=32m -ms=on ARCHIVE_NAME.7z ") - (else #f))) - (print-prefix "Running: ") ;; change to #f to turn off printing - (preclean-spec (configf:get-section *configdat* "archive-preclean"))) - - ;; (tests:match patt testname itempath) - - ;; from the test info bin the path to the test by stem - ;; - (for-each - (lambda (test-dat) - (let* ((item-path (db:test-get-item-path test-dat)) - (test-name (db:test-get-testname test-dat)) - (test-id (db:test-get-id test-dat)) - (run-id (db:test-get-run_id test-dat)) - (target (string-intersperse (map cadr (rmt:get-key-val-pairs run-id)) "/")) - - (toplevel/children (and (db:test-get-is-toplevel test-dat) - (> (rmt:test-toplevel-num-items run-id test-name) 0))) - (test-partial-path (conc target "/" run-name "/" (db:test-make-full-name test-name item-path))) - ;; note the trailing slash to get the dir inspite of it being a link - (test-path (conc linktree "/" test-partial-path)) - (mutex-lock! rp-mutex) - (test-physical-path (if (common:file-exists? test-path) - (common:real-path test-path) - #f)) - (mutex-unlock! rp-mutex) - (partial-path-index (if test-physical-path (substring-index test-partial-path test-physical-path) #f)) - (test-base (if (and partial-path-index - test-physical-path ) - (substring test-physical-path - 0 - partial-path-index) - #f)) - ;; we need our archive dir checked for every test to enable folks who want to store other ways. - (archive-info (archive:allocate-new-archive-block blockid-cache *toppath* tsname min-space target run-name test-name)) - (archive-dir (if archive-info (cdr archive-info) #f)) - (archive-id (if archive-info (car archive-info) -1)) - - ) - - (if (not archive-dir) ;; no archive disk found, this is fatal - (begin - (debug:print 0 *default-log-port* "FATAL: No archive disks found. Please add disks with at least " - min-space " MB space to the [archive-disks] section of megatest.config") - (debug:print 0 *default-log-port* " use [archive] minspace to specify minimum available space") - (debug:print 0 *default-log-port* " disks: " - (string-intersperse (map cadr (archive:get-archive-disks)) "\n ")) - (exit 1)) - (debug:print-info 0 *default-log-port* "Using path " archive-dir " for archiving test " test-path)) - - ;; preclean the test directory per the spec if provided - (if (not (null? preclean-spec)) ;; we've been asked to preclean before archiving - (let loop ((spec (car preclean-spec)) - (tail (cdr preclean-spec))) - (if (> (length spec) 1) - (let ((testspec (car spec)) - (rules (cadr spec))) - (if (tests:match testspec test-name item-path) - (begin - (debug:print 0 *default-log-port* "INFO: cleanup requested for " test-physical-path) - (common:dir-clean-up test-physical-path rules remove-empty: #t)) - (if (not (null? tail)) - (loop (car tail)(cdr tail))))) - (begin - (debug:print 0 *default-log-port* "ERROR: bad spec line in [archive-preclean] section. \"" spec "\"") - (if (not (null? tail))(loop (car tail)(cdr tail))))))) - (cond - (toplevel/children - (debug:print 0 *default-log-port* "WARNING: cannot archive " test-name " with id " test-id - " as it is a toplevel test with children")) - ((not (common:file-exists? test-path)) - (debug:print 0 *default-log-port* "WARNING: Cannot archive " test-name "/" item-path - " as path " test-path " does not exist")) - (else - (debug:print 0 *default-log-port* - "From test-dat=" test-dat " derived the following:\n" - "test-partial-path = " test-partial-path "\n" - "test-path = " test-path "\n" - "test-physical-path = " test-physical-path "\n" - "partial-path-index = " partial-path-index "\n" - "test-base = " test-base) - (hash-table-set! disk-groups test-base - (cons test-physical-path (hash-table-ref/default disk-groups test-base '()))) - (hash-table-set! test-groups test-base - (cons test-dat (hash-table-ref/default test-groups test-base '()))) - (hash-table-set! arch-groups test-base - (cons archive-info (hash-table-ref/default arch-groups test-base '()))) - (hash-table-set! test-dirs test-id test-path))))) - ;; test-path)))) - tests) - (debug:print 0 *default-log-port* "INFO: DISK GROUPS=" (hash-table->alist disk-groups)) - ;; for each disk-group, initialize the bup area if needed - (for-each - (lambda (test-base) - (let* ((disk-group (hash-table-ref disk-groups test-base)) - (arch-group (hash-table-ref arch-groups test-base)) - (arch-info (car arch-group)) ;; don't know yet how this will work, can I get more than one possibility? - (archive-id (car arch-info)) - (archive-dir (cdr arch-info))) - (debug:print 0 *default-log-port* "Processing disk-group " test-base) - (let* ((test-paths (hash-table-ref disk-groups test-base))) - (if (not (common:file-exists? archive-dir)) - (create-directory archive-dir #t)) - (case archiver - ((bup) ;; Archive using bup - (let* ((bup-init-params (list "-d" archive-dir "init")) - (bup-index-params (append (list "-d" archive-dir "index") test-paths)) - (bup-save-params (append (list "-d" archive-dir "save" ;; (conc "--strip-path=" linktree) - (conc "-" compress) ;; or (conc "--compress=" compress) - "-n" (conc (common:get-testsuite-name) "-" run-id) - (conc "--strip-path=" test-base) ;; if we push to the directory do we need this? - ) - test-paths))) - (if (not (common:file-exists? (conc archive-dir "/HEAD"))) - (begin - ;; replace this with jobrunner stuff enventually - (debug:print-info 0 *default-log-port* "Init bup in " archive-dir) - ;; (mutex-lock! bup-mutex) - (run-n-wait bup-exe params: bup-init-params print-cmd: print-prefix) - ;; (mutex-unlock! bup-mutex) - )) - (debug:print-info 0 *default-log-port* "Indexing data to be archived") - ;; (mutex-lock! bup-mutex) - (run-n-wait bup-exe params: bup-index-params print-cmd: print-prefix) - (debug:print-info 0 *default-log-port* "Archiving data with bup") - (run-n-wait bup-exe params: bup-save-params print-cmd: print-prefix))) - ((7z tar) - (for-each - (lambda (test-dat) - (let* ((test-id (db:test-get-id test-dat)) - (test-name (db:test-get-testname test-dat)) - (item-path (db:test-get-item-path test-dat)) - (test-full-name (db:test-make-full-name test-name item-path)) - (run-id (db:test-get-run_id test-dat)) - (target (string-intersperse (map cadr (rmt:get-key-val-pairs run-id)) "/")) - (run-name (rmt:get-run-name-from-id run-id)) - (source-dir (hash-table-ref test-dirs test-id)) ;; (conc test-base "/" test-name "/" item-path)) - (target-dir (string-substitute "/$" "" (conc archive-dir "/" target "/" run-name "/" test-full-name)))) - ;; create the test and item-path levels under archive-dir - (create-directory (pathname-directory target-dir) #t) - (run-n-wait - (conc - (string-substitute "ARCHIVE_NAME" target-dir archiver-cmd) " " - "." - ) - print-cmd: print-prefix - run-dir: source-dir))) - (hash-table-ref test-groups test-base)))) - ;; (mutex-unlock! bup-mutex) - (for-each - (lambda (test-dat) - (let ((test-id (db:test-get-id test-dat)) - (run-id (db:test-get-run_id test-dat))) - (rmt:test-set-archive-block-id run-id test-id archive-id) - (if (member archive-command '("save-remove")) - (runs:remove-test-directory test-dat 'archive-remove)))) - (hash-table-ref test-groups test-base))))) - (hash-table-keys disk-groups)) - #t)) - -(define (archive:bup-restore archive-command run-id run-name tests rp-mutex bup-mutex) ;; move the getting of archive space down into the below block so that a single run can - ;; allocate as needed should a disk fill up - ;; - (let* ((bup-exe (or (configf:lookup *configdat* "archive" "bup") "bup")) - (linktree (common:get-linktree))) ;; (configf:lookup *configdat* "setup" "linktree"))) - - ;; from the test info bin the path to the test by stem - ;; - (for-each - (lambda (test-dat) - ;; When restoring test-dat will initially contain an old and invalid path to the test - (let* ((best-disk (get-best-disk *configdat* #f)) ;; BUG: get the testconfig and use it here. Otherwise data pulled out of archive could end up on the wrong kind of disk. - (item-path (db:test-get-item-path test-dat)) - (test-name (db:test-get-testname test-dat)) - (test-id (db:test-get-id test-dat)) - (run-id (db:test-get-run_id test-dat)) - (keyvals (rmt:get-key-val-pairs run-id)) - (target (string-intersperse (map cadr keyvals) "/")) - - (toplevel/children (and (db:test-get-is-toplevel test-dat) - (> (rmt:test-toplevel-num-items run-id test-name) 0))) - (test-partial-path (conc target "/" run-name "/" (db:test-make-full-name test-name item-path))) - ;; note the trailing slash to get the dir inspite of it being a link - (test-path (conc linktree "/" test-partial-path)) - ;; if the old path was not deleted then prev-test-physical-path will end up pointing to a real directory - (mutex-lock! rp-mutex) - (prev-test-physical-path (if (common:file-exists? test-path) - ;; (read-symbolic-link test-path #t) - (common:real-path test-path) - #f)) - (mutex-unlock! rp-mutex) - (new-test-physical-path (conc best-disk "/" test-partial-path)) - (archive-block-id (db:test-get-archived test-dat)) - (archive-block-info (rmt:test-get-archive-block-info archive-block-id)) - (archive-path (if (vector? archive-block-info) - (vector-ref archive-block-info 2) ;; look in db.scm for test-get-archive-block-info for the vector record info - #f)) ;; no archive found? - (archive-internal-path (conc (common:get-testsuite-name) "-" run-id "/latest/" test-partial-path))) - - ;; some sanity checks, move an existing path out of the way - iif it is not a toplevel with children - ;; - (if (and (not toplevel/children) ;; special handling needed for toplevel with children - prev-test-physical-path - (common:file-exists? prev-test-physical-path)) ;; what to do? abort or clean up or link it in? - (let* ((base (pathname-directory prev-test-physical-path)) - (dirn (pathname-file prev-test-physical-path)) - (newn (conc base "/." dirn))) - (debug:print-error 0 *default-log-port* "the old directory " prev-test-physical-path ", still exists! Moving it to " newn) - (rename-file prev-test-physical-path newn))) - - (if (and archive-path ;; no point in proceeding if there is no actual archive - (not toplevel/children)) - (begin - ;; CREATE WORK AREA - ;; test-src-path == #f ==> don't copy in data from tests directory - ;; itemdat == string ==> use directly - (create-work-area run-id run-name keyvals test-id #f best-disk test-name item-path) ;; #!key (remtries 2)) - - ;; 1. Get the block id from the test info - ;; 2. Get the block data given the block id - ;; 3. Construct the paths etc. for the following command: - ;; - ;; bup -d /tmp/matt/adisk1/2015_q1/fullrun_e1a40/ restore -C /tmp/seeme fullrun-30/latest/ubuntu/nfs/none/w02.1.20.54_b/ - - ;; DO BUP RESTORE - (let* ((new-test-dat (rmt:get-test-info-by-id run-id test-id)) - (new-test-path (if (vector? new-test-dat ) - (db:test-get-rundir new-test-dat) - (begin - (debug:print-error 0 *default-log-port* "unable to get data for run-id=" run-id ", test-id=" test-id) - (exit 1)))) - ;; new-test-path won't work - must use best-disk instead? Nope, new-test-path but tack on /.. - (bup-restore-params (list "-d" archive-path "restore" "-C" (conc new-test-path "/..") archive-internal-path))) - (debug:print-info 0 *default-log-port* "Restoring archived data to " new-test-physical-path " from archive in " archive-path " ... " archive-internal-path) - ;; (mutex-lock! bup-mutex) - (run-n-wait bup-exe params: bup-restore-params print-cmd: #f) - ;; (mutex-unlock! bup-mutex) - (mt:test-set-state-status-by-id run-id test-id "COMPLETED" #f #f))) - (debug:print-error 0 *default-log-port* "No archive path in the record for run-id=" run-id " test-id=" test-id)))) - (filter vector? tests)))) - ADDED attic/tdb-inc.scm Index: attic/tdb-inc.scm ================================================================== --- /dev/null +++ attic/tdb-inc.scm @@ -0,0 +1,396 @@ +;;====================================================================== +;; Copyright 2006-2013, Matthew Welland. +;; +;; This file is part of Megatest. +;; +;; Megatest is free software: you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation, either version 3 of the License, or +;; (at your option) any later version. +;; +;; Megatest is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with Megatest. If not, see . +;; +;;====================================================================== + +;;====================================================================== +;; Database access +;;====================================================================== + +;;====================================================================== +;; +;; T E S T D A T A B A S E S +;; +;;====================================================================== + +;;====================================================================== +;; T E S T S P E C I F I C D B +;;====================================================================== + +;; Create the sqlite db for the individual test(s) +;; +;; Moved these tables into .db +;; THIS CODE TO BE REMOVED +;; +(define (open-test-db work-area) + (debug:print-info 11 *default-log-port* "open-test-db " work-area) + (if (and work-area + (directory? work-area) + (file-read-access? work-area)) + (let* ((dbpath (conc work-area "/testdat.db")) + (dbexists (common:file-exists? dbpath)) + (work-area-writeable (file-write-access? work-area)) + (db (handle-exceptions ;; open the db if area writeable or db pre-existing. open in-mem otherwise. if exception, open in-mem + exn + (begin + (print-call-chain (current-error-port)) + (debug:print 2 *default-log-port* "ERROR: problem accessing test db " work-area ", you probably should clean and re-run this test" + ((condition-property-accessor 'exn 'message) exn)) + (set! dbexists #f) ;; must force re-creation of tables, more tom-foolery + (sqlite3:open-database ":memory:")) ;; open an in-memory db to allow readonly access + (if (or work-area-writeable + dbexists) + (sqlite3:open-database dbpath) + (sqlite3:open-database ":memory:")))) + (tdb-writeable (and (file-write-access? work-area) + (file-write-access? dbpath))) + (handler (sqlite3:make-busy-timeout (if (args:get-arg "-override-timeout") + (string->number (args:get-arg "-override-timeout")) + 136000)))) + + (if (and tdb-writeable + *db-write-access*) + (sqlite3:set-busy-handler! db handler)) + (if (not dbexists) + (begin + (db:set-sync db) ;; (sqlite3:execute db "PRAGMA synchronous = FULL;") + (debug:print-info 11 *default-log-port* "Initialized test database " dbpath) + (tdb:testdb-initialize db))) + ;; (sqlite3:execute db "PRAGMA synchronous = 0;") + (debug:print-info 11 *default-log-port* "open-test-db END (sucessful)" work-area) + ;; now let's test that everything is correct + (handle-exceptions + exn + (begin + (print-call-chain (current-error-port)) + (debug:print-error 0 *default-log-port* "problem accessing test db " work-area ", you probably should clean and re-run this test or remove the file " + dbpath ".\n " + ((condition-property-accessor 'exn 'message) exn)) + #f) + ;; Is there a cheaper single line operation that will check for existance of a table + ;; and raise an exception ? + (sqlite3:execute db "SELECT id FROM test_data LIMIT 1;")) + db) + ;; no work-area or not readable - create a placeholder to fake rest of world out + (let ((baddb (sqlite3:open-database ":memory:"))) + (debug:print-info 11 *default-log-port* "open-test-db END (unsucessful)" work-area) + ;; provide an in-mem db (this is dangerous!) + (tdb:testdb-initialize baddb) + baddb))) + +;; find and open the testdat.db file for an existing test +(define (tdb:open-test-db-by-test-id test-id #!key (work-area #f)) + (let* ((test-path (if work-area + work-area + (rmt:test-get-rundir-from-test-id test-id)))) + (debug:print 3 *default-log-port* "TEST PATH: " test-path) + (open-test-db test-path))) + +;; find and open the testdat.db file for an existing test +(define (tdb:open-test-db-by-test-id-local dbstruct run-id test-id #!key (work-area #f)) + (let* ((test-path (if work-area + work-area + (db:test-get-rundir-from-test-id dbstruct run-id test-id)))) + (debug:print 3 *default-log-port* "TEST PATH: " test-path) + (open-test-db test-path))) + +;; find and open the testdat.db file for an existing test +(define (tdb:open-run-close-db-by-test-id-local dbstruct run-id test-id work-area proc . params) + (let* ((test-path (if work-area + work-area + (db:test-get-rundir-from-test-id dbstruct run-id test-id))) + (tdb (open-test-db test-path))) + (apply proc tdb params))) + +(define (tdb:testdb-initialize db) + (debug:print 11 *default-log-port* "db:testdb-initialize START") + (sqlite3:with-transaction + db + (lambda () + (for-each + (lambda (sqlcmd) + (sqlite3:execute db sqlcmd)) + (list "CREATE TABLE IF NOT EXISTS test_rundat ( + id INTEGER PRIMARY KEY, + update_time TIMESTAMP, + cpuload INTEGER DEFAULT -1, + diskfree INTEGER DEFAULT -1, + diskusage INTGER DEFAULT -1, + run_duration INTEGER DEFAULT 0);" + "CREATE TABLE IF NOT EXISTS test_data ( + id INTEGER PRIMARY KEY, + test_id INTEGER, + category TEXT DEFAULT '', + variable TEXT, + value REAL, + expected REAL, + tol REAL, + units TEXT, + comment TEXT DEFAULT '', + status TEXT DEFAULT 'n/a', + type TEXT DEFAULT '', + CONSTRAINT test_data_constraint UNIQUE (test_id,category,variable));" + "CREATE TABLE IF NOT EXISTS test_steps ( + id INTEGER PRIMARY KEY, + test_id INTEGER, + stepname TEXT, + state TEXT DEFAULT 'NOT_STARTED', + status TEXT DEFAULT 'n/a', + event_time TIMESTAMP, + comment TEXT DEFAULT '', + logfile TEXT DEFAULT '', + CONSTRAINT test_steps_constraint UNIQUE (test_id,stepname,state));" + ;; test_meta can be used for handing commands to the test + ;; e.g. KILLREQ + ;; the ackstate is set to 1 once the command has been completed + "CREATE TABLE IF NOT EXISTS test_meta ( + id INTEGER PRIMARY KEY, + var TEXT, + val TEXT, + ackstate INTEGER DEFAULT 0, + CONSTRAINT metadat_constraint UNIQUE (var));")))) + (debug:print 11 *default-log-port* "db:testdb-initialize END")) + +;; This routine moved to db:read-test-data +;; +(define (tdb:read-test-data tdb test-id categorypatt) + (let ((res '())) + (sqlite3:for-each-row + (lambda (id test_id category variable value expected tol units comment status type) + (set! res (cons (vector id test_id category variable value expected tol units comment status type) res))) + tdb + "SELECT id,test_id,category,variable,value,expected,tol,units,comment,status,type FROM test_data WHERE test_id=? AND category LIKE ? ORDER BY category,variable;" test-id categorypatt) + (sqlite3:finalize! tdb) + (reverse res))) + +;;====================================================================== +;; T E S T D A T A +;;====================================================================== + +;; ;; get a list of test_data records matching categorypatt +;; (define (tdb:read-test-data test-id categorypatt #!key (work-area #f)) +;; (let ((tdb (tdb:open-test-db-by-test-id test-id work-area: work-area))) +;; (if (sqlite3:database? tdb) +;; (let ((res '())) +;; (sqlite3:for-each-row +;; (lambda (id test_id category variable value expected tol units comment status type) +;; (set! res (cons (vector id test_id category variable value expected tol units comment status type) res))) +;; tdb +;; "SELECT id,test_id,category,variable,value,expected,tol,units,comment,status,type FROM test_data WHERE test_id=? AND category LIKE ? ORDER BY category,variable;" test-id categorypatt) +;; (sqlite3:finalize! tdb) +;; (reverse res)) +;; '()))) + +;; NOTE: Run this local with #f for db !!! +(define (tdb:load-test-data run-id test-id) + (let loop ((lin (read-line))) + (if (not (eof-object? lin)) + (begin + (debug:print 4 *default-log-port* lin) + ;;(when lin ;; this when blocked stack dump caused by .dat file from logpro being 0-byte. fixed by upgrading logpro + (rmt:csv->test-data run-id test-id lin) + ;;) + (loop (read-line))))) + ;; roll up the current results. + ;; FIXME: Add the status too + (rmt:test-data-rollup run-id test-id #f)) + +;; NOTE: Run this local with #f for db !!! +(define (tdb:load-logpro-data run-id test-id) + (let loop ((lin (read-line))) + (if (not (eof-object? lin)) + (begin + (debug:print 4 *default-log-port* lin) + ;;(when lin ;; this when blocked stack dump caused by .dat file from logpro being 0-byte. fixed by upgrading logpro + (rmt:csv->test-data run-id test-id lin) + ;;) + (loop (read-line))))) + ;; roll up the current results. + ;; FIXME: Add the status too + (rmt:test-data-rollup run-id test-id #f)) + +(define (tdb:get-prev-tol-for-test tdb test-id category variable) + ;; Finish me? + (values #f #f #f)) + +;;====================================================================== +;; S T E P S +;;====================================================================== + +(define (tdb:step-get-time-as-string vec) + (seconds->time-string (tdb:step-get-event_time vec))) + +;; get a pretty table to summarize steps +;; +;; NOT USED, WILL BE REMOVED +;; +(define (tdb:get-steps-table steps);; organise the steps for better readability + (let ((res (make-hash-table))) + (for-each + (lambda (step) + (debug:print 6 *default-log-port* "step=" step) + (let ((record (hash-table-ref/default + res + (tdb:step-get-stepname step) + ;; stepname start end status Duration Logfile + (vector (tdb:step-get-stepname step) "" "" "" "" "")))) + (debug:print 6 *default-log-port* "record(before) = " record + "\nid: " (tdb:step-get-id step) + "\nstepname: " (tdb:step-get-stepname step) + "\nstate: " (tdb:step-get-state step) + "\nstatus: " (tdb:step-get-status step) + "\ntime: " (tdb:step-get-event_time step)) + (case (string->symbol (tdb:step-get-state step)) + ((start)(vector-set! record 1 (tdb:step-get-event_time step)) + (vector-set! record 3 (if (equal? (vector-ref record 3) "") + (tdb:step-get-status step))) + (if (> (string-length (tdb:step-get-logfile step)) + 0) + (vector-set! record 5 (tdb:step-get-logfile step)))) + ((end) + (vector-set! record 2 (any->number (tdb:step-get-event_time step))) + (vector-set! record 3 (tdb:step-get-status step)) + (vector-set! record 4 (let ((startt (any->number (vector-ref record 1))) + (endt (any->number (vector-ref record 2)))) + (debug:print 4 *default-log-port* "record[1]=" (vector-ref record 1) + ", startt=" startt ", endt=" endt + ", get-status: " (tdb:step-get-status step)) + (if (and (number? startt)(number? endt)) + (seconds->hr-min-sec (- endt startt)) "-1"))) + (if (> (string-length (tdb:step-get-logfile step)) + 0) + (vector-set! record 5 (tdb:step-get-logfile step)))) + (else + (vector-set! record 2 (tdb:step-get-state step)) + (vector-set! record 3 (tdb:step-get-status step)) + (vector-set! record 4 (tdb:step-get-event_time step)))) + (hash-table-set! res (tdb:step-get-stepname step) record) + (debug:print 6 *default-log-port* "record(after) = " record + "\nid: " (tdb:step-get-id step) + "\nstepname: " (tdb:step-get-stepname step) + "\nstate: " (tdb:step-get-state step) + "\nstatus: " (tdb:step-get-status step) + "\ntime: " (tdb:step-get-event_time step)))) + ;; (else (vector-set! record 1 (tdb:step-get-event_time step))) + (sort steps (lambda (a b) + (cond + ((< (tdb:step-get-event_time a)(tdb:step-get-event_time b)) #t) + ((eq? (tdb:step-get-event_time a)(tdb:step-get-event_time b)) + (< (tdb:step-get-id a) (tdb:step-get-id b))) + (else #f))))) + res)) + +;; Move this to steps.scm +;; +;; get a pretty table to summarize steps +;; +(define (tdb:get-steps-table-list steps) + ;; organise the steps for better readability + (let ((res (make-hash-table))) + (for-each + (lambda (step) + (debug:print 6 *default-log-port* "step=" step) + (let ((record (hash-table-ref/default + res + (tdb:step-get-stepname step) + ;; stepname start end status + (vector (tdb:step-get-stepname step) "" "" "" "" "")))) + (debug:print 6 *default-log-port* "record(before) = " record + "\nid: " (tdb:step-get-id step) + "\nstepname: " (tdb:step-get-stepname step) + "\nstate: " (tdb:step-get-state step) + "\nstatus: " (tdb:step-get-status step) + "\ntime: " (tdb:step-get-event_time step)) + (case (string->symbol (tdb:step-get-state step)) + ((start)(vector-set! record 1 (tdb:step-get-event_time step)) + (vector-set! record 3 (if (equal? (vector-ref record 3) "") + (tdb:step-get-status step))) + (if (> (string-length (tdb:step-get-logfile step)) + 0) + (vector-set! record 5 (tdb:step-get-logfile step)))) + ((end) + (vector-set! record 2 (any->number (tdb:step-get-event_time step))) + (vector-set! record 3 (tdb:step-get-status step)) + (vector-set! record 4 (let ((startt (any->number (vector-ref record 1))) + (endt (any->number (vector-ref record 2)))) + (debug:print 4 *default-log-port* "record[1]=" (vector-ref record 1) + ", startt=" startt ", endt=" endt + ", get-status: " (tdb:step-get-status step)) + (if (and (number? startt)(number? endt)) + (seconds->hr-min-sec (- endt startt)) "-1"))) + (if (> (string-length (tdb:step-get-logfile step)) + 0) + (vector-set! record 5 (tdb:step-get-logfile step)))) + (else + (vector-set! record 2 (tdb:step-get-state step)) + (vector-set! record 3 (tdb:step-get-status step)) + (vector-set! record 4 (tdb:step-get-event_time step)))) + (hash-table-set! res (tdb:step-get-stepname step) record) + (debug:print 6 *default-log-port* "record(after) = " record + "\nid: " (tdb:step-get-id step) + "\nstepname: " (tdb:step-get-stepname step) + "\nstate: " (tdb:step-get-state step) + "\nstatus: " (tdb:step-get-status step) + "\ntime: " (tdb:step-get-event_time step)))) + ;; (else (vector-set! record 1 (tdb:step-get-event_time step))) + (sort steps (lambda (a b) + (cond + ((< (tdb:step-get-event_time a)(tdb:step-get-event_time b)) #t) + ((eq? (tdb:step-get-event_time a)(tdb:step-get-event_time b)) + (< (tdb:step-get-id a) (tdb:step-get-id b))) + (else #f))))) + res)) + +;; +;; Move to steps.scm +;; +(define (tdb:get-compressed-steps comprsteps) ;; from tdb:get-steps-table + (map (lambda (x) + ;; take advantage of the \n on time->string + (vector + (vector-ref x 0) + (let ((s (vector-ref x 1))) + (if (number? s)(seconds->time-string s) s)) + (let ((s (vector-ref x 2))) + (if (number? s)(seconds->time-string s) s)) + (vector-ref x 3) ;; status + (vector-ref x 4) + (vector-ref x 5))) ;; time delta + (sort (hash-table-values comprsteps) + (lambda (a b) + (let ((time-a (vector-ref a 1)) + (time-b (vector-ref b 1))) + (if (and (number? time-a)(number? time-b)) + (if (< time-a time-b) + #t + (if (eq? time-a time-b) + (string 4.9) -(let-values (( (chicken-release-number chicken-major-version) - (apply values - (map string->number - (take - (string-split (chicken-version) ".") - 2))))) - (let ((resolve-pathname-broken? - (or (> chicken-release-number 4) - (and (eq? 4 chicken-release-number) (> chicken-major-version 9))))) - (if resolve-pathname-broken? - (define ##sys#expand-home-path pathname-expand)))) - -(define (realpath x) (resolve-pathname (pathname-expand (or x "/dev/null")) )) - -(define (common:get-this-exe-fullpath #!key (argv (argv))) - (let* ((this-script - (cond - ((and (> (length argv) 2) - (string-match "^(.*/csi|csi)$" (car argv)) - (string-match "^-(s|ss|sx|script)$" (cadr argv))) - (caddr argv)) - (else (car argv)))) - (fullpath (realpath this-script))) - fullpath)) -(define *common:this-exe-fullpath* (common:get-this-exe-fullpath)) -(define *common:this-exe-dir* (pathname-directory *common:this-exe-fullpath*)) -(define *common:this-exe-name* (pathname-strip-directory *common:this-exe-fullpath*)) - -;; Move me elsewhere ... -;; RADT => Why do we meed the version check here, this is called only if version misma -;; -(define (common:cleanup-db dbstruct #!key (full #f)) - (apply db:multi-db-sync - dbstruct - 'schema - ;; 'new2old - 'killservers - 'adj-target - ;; 'old2new - 'new2old - ;; (if full - '(dejunk) - ;; '()) - ) - (if (common:api-changed?) - (common:set-last-run-version))) - -(define (common:snapshot-file filepath #!key (subdir ".") ) - (if (file-exists? filepath) - (let* ((age-sec (lambda (file) - (if (file-exists? file) - (- (current-seconds) (file-modification-time file)) - 1000000000))) ;; return really old value if file doesn't exist. we want to clobber it if old or not exist. - (ok-flag #t) - (age-mins (lambda (file) (/ (age-sec file) 60))) - (age-hrs (lambda (file) (/ (age-mins file) 60))) - (age-days (lambda (file) (/ (age-hrs file) 24))) - (age-wks (lambda (file) (/ (age-days file) 7))) - (docmd (lambda (cmd) - (cond - (ok-flag - (let ((res (system cmd))) - (cond - ((eq? 0 res) - #t) - (else - (set! ok-flag #f) - (debug:print 0 *default-log-port* "ERROR: ["(common:human-time)"] Command failed with exit code " - (if (< res 0) - res - (/ res 8)) " ["cmd"]" ) - #f)))) - (else - (debug:print 0 *default-log-port* "ERROR: ["(common:human-time)"] Not runnining command due to prior error. ["cmd"]") - #f)))) - (copy (lambda (src dest) (docmd (conc "/bin/cp '"src"' '"dest"'")))) - (copy+zip (lambda (src dest) (docmd (conc "gzip -c - < '"src"' > '"dest"'")))) - (fullpath (realpath filepath)) - (basedir (pathname-directory fullpath)) - (basefile (pathname-strip-directory fullpath)) - ;;(prevfile (conc filepath ".prev.gz")) - (minsfile (conc basedir "/" subdir "/" basefile ".mins.gz")) - (hrsfile (conc basedir "/" subdir "/" basefile ".hrs.gz")) - (daysfile (conc basedir "/" subdir "/" basefile ".days.gz")) - (wksfile (conc basedir "/" subdir "/" basefile ".weeks.gz"))) - - ;; create subdir it not exists - (if (not (directory-exists? (conc basedir "/" subdir))) - (docmd (conc "/bin/mkdir -p '"(conc basedir "/" subdir)"'"))) - - ;; copy&zip to .mins if not exists - (if (not (file-exists? minsfile)) - (copy+zip filepath minsfile)) - ;; copy .mins to .hrs if not exists - (if (not (file-exists? hrsfile)) - (copy minsfile hrsfile)) - ;; copy .hrs to .days if not exists - (if (not (file-exists? daysfile)) - (copy hrsfile daysfile)) - ;; copy .days to .weeks if not exists - (if (not (file-exists? wksfile)) - (copy daysfile wksfile)) - - - ;; if age(.mins.gz) >= 1h: - ;; copy .mins.gz .hrs.gz - ;; copy .mins.gz - (when (>= (age-mins minsfile) 1) - (copy minsfile hrsfile) - (copy+zip filepath minsfile)) - - ;; if age(.hrs.gz) >= 1d: - ;; copy .hrs.gz .days.gz - ;; copy .mins.gz .hrs.gz - (when (>= (age-days hrsfile) 1) - (copy hrsfile daysfile) - (copy minsfile hrsfile)) - - ;; if age(.days.gz) >= 1w: - ;; copy .days.gz .weeks.gz - ;; copy .hrs.gz .days.gz - (when (>= (age-wks daysfile) 1) - (copy daysfile wksfile) - (copy hrsfile daysfile)) - #t) - #f)) - - - -;; Rotate logs, logic: -;; if > 500k and older than 1 week: -;; remove previous compressed log and compress this log -;; WARNING: This proc operates assuming that it is in the directory above the -;; logs directory you wish to log-rotate. -;; -(define (common:rotate-logs) - (let* ((all-files (make-hash-table)) - (stats (make-hash-table)) - (inc-stat (lambda (key) - (hash-table-set! stats key (+ (hash-table-ref/default stats key 0) 1)))) - (max-allowed (string->number (or (configf:lookup *configdat* "setup" "max-logfiles") "300")))) ;; name -> age - (if (not (directory-exists? "logs"))(create-directory "logs")) - (directory-fold - (lambda (file rem) - (handle-exceptions - exn - (begin - (debug:print-info 0 *default-log-port* "unable to rotate log " file ", probably handled by another process.") - (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) - (print-call-chain (current-error-port))) - (let* ((fullname (conc "logs/" file)) - (mod-time (file-modification-time fullname)) - (file-age (- (current-seconds) mod-time))) - (hash-table-set! all-files file mod-time) - (if (or (and (string-match "^.*.log" file) - (> (file-size fullname) 200000)) - (and (string-match "^server-.*.log" file) - (> (- (current-seconds) (file-modification-time fullname)) - (* 8 60 60)))) - (let ((gzfile (conc fullname ".gz"))) - (if (common:file-exists? gzfile) - (begin - (debug:print-info 0 *default-log-port* "removing " gzfile) - (delete-file* gzfile) - (hash-table-delete! all-files gzfile) ;; needed? - )) - (debug:print-info 0 *default-log-port* "compressing " file) - (system (conc "gzip " fullname)) - (inc-stat "gzipped") - (hash-table-set! all-files (conc file ".gz") file-age) ;; add the .gz file and remove the base file - (hash-table-delete! all-files file) - ) - (if (> file-age (* (string->number (or (configf:lookup *configdat* "setup" "log-expire-days") "30")) 24 3600)) - (handle-exceptions - exn - #f - (if (directory? fullname) - (begin - (debug:print-error 0 *default-log-port* fullname " in logs directory is a directory! Cannot rotate it, it is best to not put subdirectories in the logs dir.") - (inc-stat "directories")) - (begin - (delete-file* fullname) - (inc-stat "deleted"))) - (hash-table-delete! all-files file))))))) - '() - "logs") - (for-each - (lambda (category) - (let ((quant (hash-table-ref/default stats category 0))) - (if (> quant 0) - (debug:print-info 0 *default-log-port* category " log files: " quant)))) - `("deleted" "gzipped" "directories")) - (let ((num-logs (hash-table-size all-files))) - (if (> num-logs max-allowed) ;; because NFS => don't let number of logs exceed 300 - (let ((files (take (sort (hash-table-keys all-files) - (lambda (a b) - (< (hash-table-ref all-files a)(hash-table-ref all-files b)))) - (- num-logs max-allowed)))) - (for-each - (lambda (file) - (let* ((fullname (conc "logs/" file))) - (if (directory? fullname) - (debug:print-error 0 *default-log-port* fullname " in logs directory is a directory! Cannot rotate it, it is best to not put subdirectories in the logs dir.") - (handle-exceptions - exn - (debug:print-error 0 *default-log-port* "failed to remove " fullname) - (delete-file* fullname))))) - files) - (debug:print-info 0 *default-log-port* "Deleted " (length files) " files from logs, keeping " max-allowed " files.")))))) - -;; Force a megatest cleanup-db if version is changed and skip-version-check not specified -;; Do NOT check if not on homehost! -;; -(define (common:exit-on-version-changed) - (if (common:on-homehost?) - (if (common:api-changed?) - (let* ((mtconf (conc (get-environment-variable "MT_RUN_AREA_HOME") "/megatest.config")) - (dbfile (conc (get-environment-variable "MT_RUN_AREA_HOME") "/megatest.db")) - (read-only (not (file-write-access? dbfile))) - (dbstruct (db:setup #t))) - (debug:print 0 *default-log-port* - "WARNING: Version mismatch!\n" - " expected: " (common:version-signature) "\n" - " got: " (common:get-last-run-version)) - (cond - ((get-environment-variable "MT_SKIP_DB_MIGRATE") #t) - ((and (common:file-exists? mtconf) (common:file-exists? dbfile) (not read-only) - (eq? (current-user-id)(file-owner mtconf))) ;; safe to run -cleanup-db - (debug:print 0 *default-log-port* " I see you are the owner of megatest.config, attempting to cleanup and reset to new version") - (handle-exceptions - exn - (begin - (debug:print 0 *default-log-port* "Failed to switch versions.") - (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) - (print-call-chain (current-error-port)) - (exit 1)) - (common:cleanup-db dbstruct))) - ((not (common:file-exists? mtconf)) - (debug:print 0 *default-log-port* " megatest.config does not exist in this area. Cannot proceed with megatest version migration.") - (exit 1)) - ((not (common:file-exists? dbfile)) - (debug:print 0 *default-log-port* " megatest.db does not exist in this area. Cannot proceed with megatest version migration.") - (exit 1)) - ((not (eq? (current-user-id)(file-owner mtconf))) - (debug:print 0 *default-log-port* " You do not own megatest.db in this area. Cannot proceed with megatest version migration.") - (exit 1)) - (read-only - (debug:print 0 *default-log-port* " You have read-only access to this area. Cannot proceed with megatest version migration.") - (exit 1)) - (else - (debug:print 0 *default-log-port* " to switch versions you can run: \"megatest -cleanup-db\"") - (exit 1))))))) -;; (begin -;; (debug:print 0 *default-log-port* "ERROR: cannot migrate version unless on homehost. Exiting.") -;; (exit 1)))) - -;;====================================================================== -;; S P A R S E A R R A Y S -;;====================================================================== - -(define (make-sparse-array) - (let ((a (make-sparse-vector))) - (sparse-vector-set! a 0 (make-sparse-vector)) - a)) - -(define (sparse-array? a) - (and (sparse-vector? a) - (sparse-vector? (sparse-vector-ref a 0)))) - -(define (sparse-array-ref a x y) - (let ((row (sparse-vector-ref a x))) - (if row - (sparse-vector-ref row y) - #f))) - -(define (sparse-array-set! a x y val) - (let ((row (sparse-vector-ref a x))) - (if row - (sparse-vector-set! row y val) - (let ((new-row (make-sparse-vector))) - (sparse-vector-set! a x new-row) - (sparse-vector-set! new-row y val))))) - -;;====================================================================== -;; L O C K E R S A N D B L O C K E R S -;;====================================================================== - -;; block further accesses to databases. Call this before shutting db down -(define (common:db-block-further-queries) - (mutex-lock! *db-access-mutex*) - (set! *db-access-allowed* #f) - (mutex-unlock! *db-access-mutex*)) - -(define (common:db-access-allowed?) - (let ((val (begin - (mutex-lock! *db-access-mutex*) - *db-access-allowed* - (mutex-unlock! *db-access-mutex*)))) - val)) - -;;====================================================================== -;; U S E F U L S T U F F -;;====================================================================== - -;; dot-locking egg seems not to work, using this for now -;; if lock is older than expire-time then remove it and try again -;; to get the lock -;; -(define (common:simple-file-lock fname #!key (expire-time 300)) - (if (common:file-exists? fname) - (if (> (- (current-seconds)(file-modification-time fname)) expire-time) - (begin - (handle-exceptions exn #f (delete-file* fname)) - (common:simple-file-lock fname expire-time: expire-time)) - #f) - (let ((key-string (conc (get-host-name) "-" (current-process-id)))) - (with-output-to-file fname - (lambda () - (print key-string))) - (thread-sleep! 0.25) - (if (common:file-exists? fname) - (handle-exceptions exn - #f - (with-input-from-file fname - (lambda () - (equal? key-string (read-line))))) - #f)))) - -(define (common:simple-file-lock-and-wait fname #!key (expire-time 300)) - (let ((end-time (+ expire-time (current-seconds)))) - (let loop ((got-lock (common:simple-file-lock fname expire-time: expire-time))) - (if got-lock - #t - (if (> end-time (current-seconds)) - (begin - (thread-sleep! 3) - (loop (common:simple-file-lock fname expire-time: expire-time))) - #f))))) - -(define (common:simple-file-release-lock fname) - (handle-exceptions - exn - #f ;; I don't really care why this failed (at least for now) - (delete-file* fname))) - -;;====================================================================== -;; D E B U G G I N G S T U F F -;;====================================================================== - -;; (define *verbosity* 1) -;; (define *logging* #f) - -(define (common:set-last-run-version) - (rmt:set-var "MEGATEST_VERSION" (common:version-signature))) - -;; postive number if megatest version > db version -;; negative number if megatest version < db version -(define (common:version-db-delta) - (- megatest-version (common:get-last-run-version-number))) - -(define (common:version-changed?) - (not (equal? (common:get-last-run-version) - (common:version-signature)))) - -;; from metadat lookup MEGATEST_VERSION -;; -(define (common:get-last-run-version) ;; RADT => How does this work in send-receive function??; assume it is the value saved in some DB - (rmt:get-var "MEGATEST_VERSION")) - -(define (common:get-last-run-version-number) - (string->number - (substring (common:get-last-run-version) 0 6))) - -(define (common:api-changed?) - (not (equal? (substring (->string megatest-version) 0 4) - (substring (conc (common:get-last-run-version)) 0 4)))) - - -;;====================================================================== -;; E X I T H A N D L I N G -;;====================================================================== - -(define (common:run-sync?) - (and (common:on-homehost?) - (args:get-arg "-server"))) - -;; (let ((ohh (common:on-homehost?)) -;; (srv (args:get-arg "-server"))) -;; (and ohh srv))) - ;; (debug:print-info 0 *default-log-port* "common:run-sync? ohh=" ohh ", srv=" srv) - -(define *watchdog* (make-thread - (lambda () - (handle-exceptions - exn - (begin - (print-call-chain) - (print " message: " ((condition-property-accessor 'exn 'message) exn))) - (common:watchdog))) - "Watchdog thread")) - -;; currently the primary job of the watchdog is to run the sync back to megatest.db from the db in /tmp -;; if we are on the homehost and we are a server (by definition we are on the homehost if we are a server) -;; -(define (common:readonly-watchdog dbstruct) - (thread-sleep! 0.05) ;; delay for startup - (debug:print-info 13 *default-log-port* "common:readonly-watchdog entered.") - ;; sync megatest.db to /tmp/.../megatst.db - (let* ((sync-cool-off-duration 3) - (golden-mtdb (dbr:dbstruct-mtdb dbstruct)) - (golden-mtpath (db:dbdat-get-path golden-mtdb)) - (tmp-mtdb (dbr:dbstruct-tmpdb dbstruct)) - (tmp-mtpath (db:dbdat-get-path tmp-mtdb))) - (debug:print-info 0 *default-log-port* "Read-only periodic sync thread started.") - (let loop ((last-sync-time 0)) - (debug:print-info 13 *default-log-port* "loop top tmp-mtpath="tmp-mtpath" golden-mtpath="golden-mtpath) - (let* ((duration-since-last-sync (- (current-seconds) last-sync-time))) - (debug:print-info 13 *default-log-port* "duration-since-last-sync="duration-since-last-sync) - (if (and (not *time-to-exit*) - (< duration-since-last-sync sync-cool-off-duration)) - (thread-sleep! (- sync-cool-off-duration duration-since-last-sync))) - (if (not *time-to-exit*) - (let ((golden-mtdb-mtime (file-modification-time golden-mtpath)) - (tmp-mtdb-mtime (file-modification-time tmp-mtpath))) - (if (> golden-mtdb-mtime tmp-mtdb-mtime) - (if (< golden-mtdb-mtime (- (current-seconds) 3)) ;; file has NOT been touched in past three seconds, this way multiple servers won't fight to sync back - (let ((res (db:multi-db-sync dbstruct 'old2new))) - (debug:print-info 13 *default-log-port* "rosync called, " res " records transferred.")))) - (loop (current-seconds))) - #t))) - (debug:print-info 0 *default-log-port* "Exiting readonly-watchdog timer, *time-to-exit* = " *time-to-exit*" pid="(current-process-id)" mtpath="golden-mtpath))) - -;; TODO: for multiple areas, we will have multiple watchdogs; and multiple threads to manage -(define (common:watchdog) - (debug:print-info 13 *default-log-port* "common:watchdog entered.") - (if (launch:setup) - (if (common:on-homehost?) - (let ((dbstruct (db:setup #t))) - (debug:print-info 13 *default-log-port* "after db:setup with dbstruct=" dbstruct) - (cond - ((dbr:dbstruct-read-only dbstruct) - (debug:print-info 13 *default-log-port* "loading read-only watchdog") - (common:readonly-watchdog dbstruct)) - (else - (debug:print-info 13 *default-log-port* "loading writable-watchdog.") - (let* ((syncer (or (configf:lookup *configdat* "server" "sync-method") "brute-force-sync"))) - (cond - ((equal? syncer "brute-force-sync") - (server:writable-watchdog-bruteforce dbstruct)) - ((equal? syncer "delta-sync") - (server:writable-watchdog-deltasync dbstruct)) - (else - (debug:print-error 0 *default-log-port* "Unknown server/sync-method specified ("syncer") - valid values are brute-force-sync and delta-sync.") - (exit 1))) - ;;(debug:print 1 *default-log-port* "INFO: ["(common:human-time)"] Syncer started (method="syncer")") - ))) - (debug:print-info 13 *default-log-port* "watchdog done.")) - (debug:print-info 13 *default-log-port* "no need for watchdog on non-homehost")))) - - -(define (std-exit-procedure) - ;;(common:telemetry-log-close) - (on-exit (lambda () 0)) - ;;(debug:print-info 13 *default-log-port* "std-exit-procedure called; *time-to-exit*="*time-to-exit*) - (let ((no-hurry (if *time-to-exit* ;; hurry up - #f - (begin - (set! *time-to-exit* #t) - #t)))) - (debug:print-info 4 *default-log-port* "starting exit process, finalizing databases.") - (if (and no-hurry (debug:debug-mode 18)) - (rmt:print-db-stats)) - (let ((th1 (make-thread (lambda () ;; thread for cleaning up, give it five seconds - (if *dbstruct-db* (db:close-all *dbstruct-db*)) ;; one second allocated - (if *task-db* - (let ((db (cdr *task-db*))) - (if (sqlite3:database? db) - (begin - (sqlite3:interrupt! db) - (sqlite3:finalize! db #t) - ;; (vector-set! *task-db* 0 #f) - (set! *task-db* #f))))) - (http-client#close-all-connections!) - ;; (if (and *runremote* - ;; (remote-conndat *runremote*)) - ;; (begin - ;; (http-client#close-all-connections!))) ;; for http-client - (if (not (eq? *default-log-port* (current-error-port))) - (close-output-port *default-log-port*)) - (set! *default-log-port* (current-error-port))) "Cleanup db exit thread")) - (th2 (make-thread (lambda () - (debug:print 4 *default-log-port* "Attempting clean exit. Please be patient and wait a few seconds...") - (if no-hurry - (begin - (thread-sleep! 5)) ;; give the clean up few seconds to do it's stuff - (begin - (thread-sleep! 2))) - (debug:print 4 *default-log-port* " ... done") - ) - "clean exit"))) - (thread-start! th1) - (thread-start! th2) - (thread-join! th1) - ) - ) - - 0) - -(define (std-signal-handler signum) - ;; (signal-mask! signum) - (set! *time-to-exit* #t) - ;;(debug:print-info 13 *default-log-port* "got signal "signum) - (debug:print-error 0 *default-log-port* "Received signal " signum " aaa exiting promptly") - ;; (std-exit-procedure) ;; shouldn't need this since we are exiting and it will be called anyway - (exit)) - -(define (special-signal-handler signum) - ;; (signal-mask! signum) - (set! *time-to-exit* #t) - ;;(debug:print-info 13 *default-log-port* "got signal "signum) - (debug:print-error 0 *default-log-port* "Received signal " signum " sending email befor exiting!!") - ;;TODO send email to notify admin contact listed in the config that the lisner got killed - ;; (std-exit-procedure) ;; shouldn't need this since we are exiting and it will be called anyway - (exit)) - - -(set-signal-handler! signal/int std-signal-handler) ;; ^C -(set-signal-handler! signal/term std-signal-handler) - -;; (set-signal-handler! signal/stop std-signal-handler) ;; ^Z NO, do NOT handle ^Z! - -;; '(print (string-intersperse (map cadr (hash-table-ref/default (read-config "megatest.config" \#f \#t) "disks" '"'"'("none" ""))) "\n"))' -(define (common:get-disks #!key (configf #f)) - (hash-table-ref/default - (or configf (read-config "megatest.config" #f #t)) - "disks" '("none" ""))) - -;;====================================================================== -;; T A R G E T S , S T A T E , S T A T U S , -;; R U N N A M E A N D T E S T P A T T -;;====================================================================== - -;; (map print (map car (hash-table->alist (read-config "runconfigs.config" #f #t)))) -;; -(define (common:get-runconfig-targets #!key (configf #f)) - (let ((targs (sort (map car (hash-table->alist - (or configf ;; NOTE: There is no value in using runconfig:read here. - (read-config (conc *toppath* "/runconfigs.config") - #f #t) - (make-hash-table)))) - string trynum 0) - (begin - (thread-sleep! 2) - (common:get-homehost trynum: (- trynum 1))) - #f)) - (else - (let* ((currhost (get-host-name)) - (bestadrs (server:get-best-guess-address currhost)) - ;; first look in config, then look in file .homehost, create it if not found - (homehost (or (configf:lookup *configdat* "server" "homehost" ) - (handle-exceptions - exn - (if (> trynum 0) - (let ((delay-time (* (- 5 trynum) 5))) - (mutex-unlock! *homehost-mutex*) - (debug:print 0 *default-log-port* "ERROR: ["(common:human-time)"] Failed to read .homehost file, delaying " delay-time " seconds and trying again, message: " ((condition-property-accessor 'exn 'message) exn)) - (thread-sleep! delay-time) - (common:get-homehost trynum: (- trynum 1))) - (begin - (mutex-unlock! *homehost-mutex*) - (debug:print 0 *default-log-port* "ERROR: ["(common:human-time)"] Failed to read .homehost file after trying five times. Giving up and exiting, message: " ((condition-property-accessor 'exn 'message) exn)) - (exit 1))) - (let ((hhf (conc *toppath* "/.homehost"))) - (if (common:file-exists? hhf) - (with-input-from-file hhf read-line) - (if (file-write-access? *toppath*) - (begin - (with-output-to-file hhf - (lambda () - (print bestadrs))) - (begin - (mutex-unlock! *homehost-mutex*) - (car (common:get-homehost)))) - #f)))))) - (at-home (or (equal? homehost currhost) - (equal? homehost bestadrs)))) - (set! *home-host* (cons homehost at-home)) - (mutex-unlock! *homehost-mutex*) - *home-host*)))) - -;; am I on the homehost? -;; -(define (common:on-homehost?) - (let ((hh (common:get-homehost))) - (if hh - (cdr hh) - #f))) - -;; do we honor the caches of the config files? -;; -(define (common:use-cache?) - (let ((res #t)) ;; priority by order of evaluation - (if *configdat* ;; sillyness here. can't use setup/use-cache to know if we can use the cached files! - (if (equal? (configf:lookup *configdat* "setup" "use-cache") "no") - (set! res #f) - (if (equal? (configf:lookup *configdat* "setup" "use-cache") "yes") - (set! res #t)))) - (if (args:get-arg "-no-cache")(set! res #f)) ;; overrides setting in "setup" - (if (getenv "MT_USE_CACHE") - (if (equal? (getenv "MT_USE_CACHE") "yes") - (set! res #t) - (if (equal? (getenv "MT_USE_CACHE") "no") - (set! res #f)))) ;; overrides -no-cache switch - res)) - -;; force use of server? -;; -(define (common:force-server?) - (let* ((force-setting (configf:lookup *configdat* "server" "force")) - (force-type (if force-setting (string->symbol force-setting) #f)) - (force-result (case force-type - ((#f) #f) - ((always) #t) - ((test) (if (args:get-arg "-execute") ;; we are in a test - #t - #f)) - (else - (debug:print 0 *default-log-port* "ERROR: Bad server force setting " force-setting ", forcing server.") - #t)))) ;; default to requiring server - (if force-result - (begin - (debug:print-info 0 *default-log-port* "forcing use of server, force setting is \"" force-setting "\".") - #t) - #f))) - -;; moving common:htree->html to testsmod.scm to minimize deps on stml2 - -;; hash-table tree to alist tree -;; -(define (common:htree->atree ht) - (map (lambda (x) - (cons (car x) - (let ((y (cdr x))) - (if (hash-table? y) - (common:htree->atree y) - y)))) - (hash-table->alist ht))) - -;;====================================================================== -;; M U N G E D A T A I N T O N I C E F O R M S -;;====================================================================== - -;; Generate an index for a sparse list of key values -;; ( (rowname1 colname1 val1)(rowname2 colname2 val2) ) -;; -;; => -;; -;; ( (rowname1 0)(rowname2 1)) ;; rownames -> num -;; (colname1 0)(colname2 1)) ) ;; colnames -> num -;; -;; optional apply proc to rownum colnum value -(define (common:sparse-list-generate-index data #!key (proc #f)) - (if (null? data) - (list '() '()) - (let loop ((hed (car data)) - (tal (cdr data)) - (rownames '()) - (colnames '()) - (rownum 0) - (colnum 0)) - (let* ((rowkey (car hed)) - (colkey (cadr hed)) - (value (caddr hed)) - (existing-rowdat (assoc rowkey rownames)) - (existing-coldat (assoc colkey colnames)) - (curr-rownum (if existing-rowdat rownum (+ rownum 1))) - (curr-colnum (if existing-coldat colnum (+ colnum 1))) - (new-rownames (if existing-rowdat rownames (cons (list rowkey curr-rownum) rownames))) - (new-colnames (if existing-coldat colnames (cons (list colkey curr-colnum) colnames)))) - ;; (debug:print-info 0 *default-log-port* "Processing record: " hed ) - (if proc (proc curr-rownum curr-colnum rowkey colkey value)) - (if (null? tal) - (list new-rownames new-colnames) - (loop (car tal) - (cdr tal) - new-rownames - new-colnames - (if (> curr-rownum rownum) curr-rownum rownum) - (if (> curr-colnum colnum) curr-colnum colnum) - )))))) - -;; if it looks like a number -> convert it to a number, else return it -;; -(define (common:lazy-convert inval) - (let* ((as-num (if (string? inval)(string->number inval) #f))) - (or as-num inval))) - -;; convert string a=1; b=2; c=a silly thing; d= -;; to '((a . 1)(b . 2)(c . "a silly thing")(d . "")) -;; -(define (common:val->alist val #!key (convert #f)) - (let ((val-list (string-split-fields ";\\s*" val #:infix))) - (if val-list - (map (lambda (x) - (let ((f (string-split-fields "\\s*=\\s*" x #:infix))) - (case (length f) - ((0) `(,#f)) ;; null string case - ((1) `(,(string->symbol (car f)))) - ((2) `(,(string->symbol (car f)) . ,(let ((inval (cadr f))) - (if convert (common:lazy-convert inval) inval)))) - (else f)))) - val-list) - '()))) - -;;====================================================================== -;; S Y S T E M S T U F F -;;====================================================================== - -;; lazy-safe get file mod time. on any error (file not existing etc.) return 0 -;; -(define (common:lazy-modification-time fpath) - (handle-exceptions - exn - 0 - (file-modification-time fpath))) - -;; find timestamp of newest file associated with a sqlite db file -(define (common:lazy-sqlite-db-modification-time fpath) - (let* ((glob-list (handle-exceptions - exn - `(,(conc "/no/such/file, message: " ((condition-property-accessor 'exn 'message) exn))) - (glob (conc fpath "*")))) - (file-list (if (eq? 0 (length glob-list)) - '("/no/such/file") - glob-list))) - (apply max - (map - common:lazy-modification-time - file-list)))) - -;; return a nice clean pathname made absolute -(define (common:nice-path dir) - (let ((match (string-match "^(~[^\\/]*)(\\/.*|)$" dir))) - (if match ;; using ~ for home? - (common:nice-path (conc (common:read-link-f (cadr match)) "/" (caddr match))) - (normalize-pathname (if (absolute-pathname? dir) - dir - (conc (current-directory) "/" dir)))))) - -;; make "nice-path" available in config files and the repl -(define nice-path common:nice-path) - -(define (common:read-link-f path) - (handle-exceptions - exn - (begin - (debug:print-error 0 *default-log-port* "command \"/bin/readlink -f " path "\" failed.") - path) ;; just give up - (with-input-from-pipe - (conc "/bin/readlink -f " path) - (lambda () - (read-line))))) - -(define (get-cpu-load #!key (remote-host #f)) - (car (common:get-cpu-load remote-host))) -;; (let* ((load-res (process:cmd-run->list "uptime")) -;; (load-rx (regexp "load average:\\s+(\\d+)")) -;; (cpu-load #f)) -;; (for-each (lambda (l) -;; (let ((match (string-search load-rx l))) -;; (if match -;; (let ((newval (string->number (cadr match)))) -;; (if (number? newval) -;; (set! cpu-load newval)))))) -;; (car load-res)) -;; cpu-load)) - -;; get values from cached info from dropping file in logs dir -;; e.g. key is host and dtype is normalized-load -;; -(define (common:get-cached-info key dtype #!key (age 5)) - (let* ((fullpath (conc *toppath* "/logs/" key "-" dtype ".log"))) - (if (and (file-exists? fullpath) - (file-read-access? fullpath)) - (handle-exceptions - exn - #f - (debug:print 2 *default-log-port* "reading file " fullpath) - (let ((real-age (- (current-seconds)(file-change-time fullpath)))) - (if (< real-age age) - (with-input-from-file fullpath read) - (begin - (debug:print 2 *default-log-port* "file " fullpath " is too old (" real-age" seconds)to trust, skipping reading it") - #f)))) - (begin - (debug:print 2 *default-log-port* "not reading file " fullpath) - #f)))) - -(define (common:write-cached-info key dtype dat) - (let* ((fullpath (conc *toppath* "/logs/" key "-" dtype ".log"))) - (handle-exceptions - exn - #f - (with-output-to-file fullpath (lambda ()(pp dat)))))) - -;; get cpu load by reading from /proc/loadavg, return all three values -;; -(define (common:get-cpu-load remote-host) - (handle-exceptions - exn - '(99 99 99) - (let* ((actual-hostname (or remote-host (get-host-name) "localhost"))) - (or (common:get-cached-info actual-hostname "cpu-load") - (let ((result (if remote-host - (map (lambda (res) - (if (eof-object? res) 9e99 res)) - (with-input-from-pipe - (conc "ssh " remote-host " cat /proc/loadavg") - (lambda ()(list (read)(read)(read))))) - (with-input-from-file "/proc/loadavg" - (lambda ()(list (read)(read)(read))))))) - (common:write-cached-info actual-hostname "cpu-load" result) - result))))) - -;; get normalized cpu load by reading from /proc/loadavg and /proc/cpuinfo return all three values and the number of real cpus and the number of threads -;; returns alist '((adj-cpu-load . normalized-proc-load) ... etc. -;; keys: adj-proc-load, adj-core-load, 1m-load, 5m-load, 15m-load -;; -(define (common:get-normalized-cpu-load remote-host) - (let ((res (common:get-normalized-cpu-load-raw remote-host)) - (default `((adj-proc-load . 2) ;; there is no right answer - (adj-core-load . 2) - (1m-load . 2) - (5m-load . 0) ;; causes a large delta - thus causing default of throttling if stuff goes wrong - (15m-load . 0) - (proc . 1) - (core . 1) - (phys . 1) - (error . #t)))) - (cond - ((and (list? res) - (> (length res) 2)) - res) - ((eq? res #f) default) ;; add messages? - ((eq? res #f) default) ;; this would be the #eof - (else default)))) - -(define (common:get-normalized-cpu-load-raw remote-host) - (let* ((actual-host (or remote-host (get-host-name)))) ;; #f is localhost - (or (common:get-cached-info actual-host "normalized-load") - (let ((data (if remote-host - (with-input-from-pipe - (conc "ssh " remote-host " cat /proc/loadavg;cat /proc/cpuinfo;echo end") - read-lines) - (append - (with-input-from-file "/proc/loadavg" - read-lines) - (with-input-from-file "/proc/cpuinfo" - read-lines) - (list "end")))) - (load-rx (regexp "^([\\d\\.]+)\\s+([\\d\\.]+)\\s+([\\d\\.]+)\\s+.*$")) - (proc-rx (regexp "^processor\\s+:\\s+(\\d+)\\s*$")) - (core-rx (regexp "^core id\\s+:\\s+(\\d+)\\s*$")) - (phys-rx (regexp "^physical id\\s+:\\s+(\\d+)\\s*$")) - (max-num (lambda (p n)(max (string->number p) n)))) - ;; (print "data=" data) - (if (null? data) ;; something went wrong - #f - (let loop ((hed (car data)) - (tal (cdr data)) - (loads #f) - (proc-num 0) ;; processor includes threads - (phys-num 0) ;; physical chip on motherboard - (core-num 0)) ;; core - ;; (print hed ", " loads ", " proc-num ", " phys-num ", " core-num) - (if (null? tal) ;; have all our data, calculate normalized load and return result - (let* ((act-proc (+ proc-num 1)) - (act-phys (+ phys-num 1)) - (act-core (+ core-num 1)) - (adj-proc-load (/ (car loads) act-proc)) - (adj-core-load (/ (car loads) act-core)) - (result - (append (list (cons 'adj-proc-load adj-proc-load) - (cons 'adj-core-load adj-core-load)) - (list (cons '1m-load (car loads)) - (cons '5m-load (cadr loads)) - (cons '15m-load (caddr loads))) - (list (cons 'proc act-proc) - (cons 'core act-core) - (cons 'phys act-phys))))) - (common:write-cached-info actual-host "normalized-load" result) - result) - (regex-case - hed - (load-rx ( x l1 l5 l15 ) (loop (car tal)(cdr tal)(map string->number (list l1 l5 l15)) proc-num phys-num core-num)) - (proc-rx ( x p ) (loop (car tal)(cdr tal) loads (max-num p proc-num) phys-num core-num)) - (phys-rx ( x p ) (loop (car tal)(cdr tal) loads proc-num (max-num p phys-num) core-num)) - (core-rx ( x c ) (loop (car tal)(cdr tal) loads proc-num phys-num (max-num c core-num))) - (else - (begin - ;; (print "NO MATCH: " hed) - (loop (car tal)(cdr tal) loads proc-num phys-num core-num))))))))))) - -(define (common:unix-ping hostname) - (let ((res (system (conc "ping -c 1 " hostname " > /dev/null")))) - (eq? res 0))) - -;; ideally put all this info into the db, no need to preserve it across moving homehost -;; -;; return list of -;; ( reachable? cpuload update-time ) -(define (common:get-host-info hostname) - (let* ((loadinfo (rmt:get-latest-host-load hostname)) ;; if this host happens to have been recently used by a test reuse the load data - (load (car loadinfo)) - (load-sample-time (cdr loadinfo)) - (load-sample-age (- (current-seconds) load-sample-time)) - (loadinfo-timeout-seconds 6) ;; this was 20 seconds, seems way too lax. Switch to 6 seconds - (host-last-update-timeout-seconds 4) - (host-rec (hash-table-ref/default *host-loads* hostname #f)) - ) - (cond - ((< load-sample-age loadinfo-timeout-seconds) - (list #t - load-sample-time - load)) - ((and host-rec - (< (current-seconds) (+ (host-last-update host-rec) host-last-update-timeout-seconds))) - (list #t - (host-last-update host-rec) - (host-last-cpuload host-rec ))) - ((common:unix-ping hostname) - (list #t - (current-seconds) - (alist-ref 'adj-core-load (common:get-normalized-cpu-load hostname)))) ;; this is cheaper than you might think. get-normalized-cpu-load is cached for up to 5 seconds - (else - (list #f 0 -1) ;; bad host, don't use! - )))) - -;; see defstruct host at top of file. -;; host: reachable last-update last-used last-cpuload -;; -(define (common:update-host-loads-table hosts-raw) - (let* ((hosts (filter (lambda (x) - (string-match (regexp "^\\S+$") x)) - hosts-raw))) - (for-each - (lambda (hostname) - (let* ((rec (let ((h (hash-table-ref/default *host-loads* hostname #f))) - (if h - h - (let ((h (make-host))) - (hash-table-set! *host-loads* hostname h) - h)))) - (host-info (common:get-host-info hostname)) - (is-reachable (car host-info)) - (last-reached-time (cadr host-info)) - (load (caddr host-info))) - (host-reachable-set! rec is-reachable) - (host-last-update-set! rec last-reached-time) - (host-last-cpuload-set! rec load))) - hosts))) - -;; go through the hosts from least recently used to most recently used, pick the first that meets the load criteral from the -;; [host-rules] section. -;; -(define (common:get-least-loaded-host hosts-raw host-type configdat) - (let* ((rdat (configf:lookup configdat "host-rules" host-type)) - (rules (common:val->alist (or rdat "") convert: #t)) ;; maxnload, maxnjobs, maxjobrate - (maxnload (common:alist-ref/default 'maxnload rules 1.5)) ;; max normalized load - (maxnjobs (common:alist-ref/default 'maxnjobs rules 1.5)) ;; max normalized number of jobs - (maxjobrate (common:alist-ref/default 'maxjobrate rules (/ 1 6))) ;; max rate of submitting jobs to a given host in jobs/second - (hosts (filter (lambda (x) - (string-match (regexp "^\\S+$") x)) - hosts-raw)) - ;; (best-host #f) - (get-rec (lambda (hostname) - ;; (print "get-rec hostname=" hostname) - (let ((h (hash-table-ref/default *host-loads* hostname #f))) - (if h - h - (let ((h (make-host))) - (hash-table-set! *host-loads* hostname h) - h))))) - (best-load 99999) - (curr-time (current-seconds)) - (get-hosts-sorted (lambda (hosts) - (sort hosts (lambda (a b) - (let ((a-rec (get-rec a)) - (b-rec (get-rec b))) - ;; (print "a=" a " a-rec=" a-rec " host-last-used=" (host-last-used a-rec)) - ;; (print "b=" b " b-rec=" b-rec " host-last-used=" (host-last-used b-rec)) - (< (host-last-used a-rec) - (host-last-used b-rec)))))))) - (debug:print 0 *default-log-port* "INFO: hosts-sorted=" (get-hosts-sorted hosts)) - (if (null? hosts) - #f ;; no hosts to select from. All done and giving up now. - (let ((hosts-sorted (get-hosts-sorted hosts))) - (common:update-host-loads-table hosts) - (let loop ((hostname (car hosts-sorted)) - (tal (cdr hosts-sorted)) - (best-host #f)) - (let* ((rec (get-rec hostname)) - (reachable (host-reachable rec)) - (load (host-last-cpuload rec)) - (last-used (host-last-used rec)) - (delta (- curr-time last-used)) - (job-rate (if (> delta 0) - (/ 1 delta) - 999)) ;; jobs per second - (new-best - (cond - ((not reachable) - (debug:print 0 *default-log-port* "Skipping host " hostname " as it cannot be reached.") - best-host) - ((and (< load maxnload) ;; load is acceptable - (< job-rate maxjobrate)) ;; job rate is acceptable - (set! best-load load) - hostname) - (else best-host)))) - (debug:print 0 *default-log-port* "INFO: Trying host " hostname " with load " load ", last used " delta " seconds ago, with job-rate " job-rate " for running a test." ) - (if new-best - (begin ;; found a host, return it - (debug:print 0 *default-log-port* "INFO: Found host: " new-best " load: " load " last-used: " delta " seconds ago, with job-rate: " job-rate) - (host-last-used-set! rec curr-time) - new-best) - (if (null? tal) #f (loop (car tal)(cdr tal) best-host))))))))) - -(define (common:wait-for-cpuload maxload-in numcpus-in waitdelay #!key (count 1000) (msg #f)(remote-host #f)(force-maxload #f)) - (let* ((loadavg (common:get-cpu-load remote-host)) - (numcpus (if (<= 1 numcpus-in) ;; not possible to have zero. If we get 1, it's possible that we got the previous default, and we should check again - (common:get-num-cpus remote-host) - numcpus-in)) - (maxload (if force-maxload - maxload-in - (max maxload-in 0.5))) ;; so maxload must be greater than 0.5 for now BUG - FIXME? - (first (car loadavg)) - (next (cadr loadavg)) - (adjload (* maxload (max 1 numcpus))) ;; possible bug where numcpus (or could be maxload) is zero, crude fallback is to at least use 1 - (loadjmp (- first next)) - (adjwait (min (+ 300 (random 10)) (abs (* (+ (random 10)(/ (- 1000 count) 10) waitdelay) (- first adjload) )) ))) ;; add some randomness to the time to break any alignment where netbatch dumps many jobs to machines simultaneously - (debug:print-info 1 *default-log-port* "Checking cpuload on " (or remote-host "localhost") ", maxload: " maxload - ", load: " first ", adjload: " adjload ", loadjmp: " loadjmp) - (cond - ((and (> first adjload) - (> count 0)) - (debug:print-info 0 *default-log-port* "server start delayed " adjwait " seconds due to load " first " exceeding max of " adjload " on server " (or remote-host (get-host-name)) " (normalized load-limit: " maxload ") " (if msg msg "")) - (thread-sleep! adjwait) - (common:wait-for-cpuload maxload numcpus waitdelay count: (- count 1) msg: msg remote-host: remote-host)) - ((and (> loadjmp numcpus) - (> count 0)) - (debug:print-info 0 *default-log-port* "waiting " adjwait " seconds due to load jump " loadjmp " > numcpus " numcpus (if msg msg "")) - (thread-sleep! adjwait) - (common:wait-for-cpuload maxload numcpus waitdelay count: (- count 1) msg: msg remote-host: remote-host))))) - -(define (common:wait-for-homehost-load maxload msg) - (let* ((hh-dat (if (common:on-homehost?) ;; if we are on the homehost then pass in #f so the calls are local. - #f - (common:get-homehost))) - (hh (if hh-dat (car hh-dat) #f)) - (numcpus (common:get-num-cpus hh))) - (common:wait-for-normalized-load maxload msg hh))) - -(define (common:get-num-cpus remote-host) - (let* ((actual-host (or remote-host (get-host-name)))) - (or (common:get-cached-info actual-host "num-cpus" age: 86400) ;; hosts had better not be changing the number of cpus too often! - (let* ((proc (lambda () - (let loop ((numcpu 0) - (inl (read-line))) - (if (eof-object? inl) - (begin - (common:write-cached-info remote-host "num-cpus" numcpu) - numcpu) - (loop (if (string-match "^processor\\s+:\\s+\\d+$" inl) - (+ numcpu 1) - numcpu) - (read-line)))))) - (result (if remote-host - (with-input-from-pipe - (conc "ssh " remote-host " cat /proc/cpuinfo") - proc) - (with-input-from-file "/proc/cpuinfo" proc)))) - (common:write-cached-info actual-host "num-cpus" result) - result)))) - -;; wait for normalized cpu load to drop below maxload -;; -(define (common:wait-for-normalized-load maxload msg remote-host) - (let ((num-cpus (common:get-num-cpus remote-host))) - (common:wait-for-cpuload maxload num-cpus 15 msg: msg remote-host: remote-host))) - -(define (get-uname . params) - (let* ((uname-res (process:cmd-run->list (conc "uname " (if (null? params) "-a" (car params))))) - (uname #f)) - (if (null? (car uname-res)) - "unknown" - (caar uname-res)))) - -;; for reasons I don't understand multiple calls to real-path in parallel threads -;; must be protected by mutexes -;; -(define (common:real-path inpath) - ;; (process:cmd-run-with-stderr->list "readlink" "-f" inpath)) ;; cmd . params) - ;; (let-values - ;; (((inp oup pid) (process "readlink" (list "-f" inpath)))) - ;; (with-input-from-port inp - ;; (let loop ((inl (read-line)) - ;; (res #f)) - ;; (print "inl=" inl) - ;; (if (eof-object? inl) - ;; (begin - ;; (close-input-port inp) - ;; (close-output-port oup) - ;; ;; (process-wait pid) - ;; res) - ;; (loop (read-line) inl)))))) - (with-input-from-pipe (conc "readlink -f " inpath) read-line)) - -;;====================================================================== -;; D I S K S P A C E -;;====================================================================== - -(define (common:get-disk-space-used fpath) - (with-input-from-pipe (conc "/usr/bin/du -s " fpath) read)) - -;; given path get free space, allows override in [setup] -;; with free-space-script /path/to/some/script.sh -;; -(define (get-df path) - (if (configf:lookup *configdat* "setup" "free-space-script") - (with-input-from-pipe - (conc (configf:lookup *configdat* "setup" "free-space-script") " " path) - (lambda () - (let ((res (read-line))) - (if (string? res) - (string->number res))))) - (get-unix-df path))) - -(define (get-free-inodes path) - (if (configf:lookup *configdat* "setup" "free-inodes-script") - (with-input-from-pipe - (conc (configf:lookup *configdat* "setup" "free-inodes-script") " " path) - (lambda () - (let ((res (read-line))) - (if (string? res) - (string->number res))))) - (get-unix-inodes path))) - -(define (get-unix-df path) - (let* ((df-results (process:cmd-run->list (conc "df " path))) - (space-rx (regexp "([0-9]+)\\s+([0-9]+)%")) - (freespc #f)) - ;; (write df-results) - (for-each (lambda (l) - (let ((match (string-search space-rx l))) - (if match - (let ((newval (string->number (cadr match)))) - (if (number? newval) - (set! freespc newval)))))) - (car df-results)) - freespc)) - -(define (get-unix-inodes path) - (let* ((df-results (process:cmd-run->list (conc "df -i " path))) - (space-rx (regexp "([0-9]+)\\s+([0-9]+)%")) - (freenodes 0)) ;; 0 is a better failsafe than #f here. - ;; (write df-results) - (for-each (lambda (l) - (let ((match (string-search space-rx l))) - (if match - (let ((newval (string->number (cadr match)))) - (if (number? newval) - (set! freenodes newval)))))) - (car df-results)) - freenodes)) - -(define (common:check-space-in-dir dirpath required) - (let* ((dbspace (if (directory? dirpath) - (get-df dirpath) - 0))) - (list (> dbspace required) - dbspace - required - dirpath))) - -;; check space in dbdir and in megatest dir -;; returns: ok/not dbspace required-space -;; -(define (common:check-db-dir-space) - (let* ((required (string->number - (or (configf:lookup *configdat* "setup" "dbdir-space-required") - "100000"))) - (dbdir (common:get-db-tmp-area)) ;; (db:get-dbdir)) - (tdbspace (common:check-space-in-dir dbdir required)) - (mdbspace (common:check-space-in-dir *toppath* required))) - (sort (list tdbspace mdbspace) (lambda (a b) - (< (cadr a)(cadr b)))))) - -;; check available space in dbdir, exit if insufficient -;; -(define (common:check-db-dir-and-exit-if-insufficient) - (let* ((spacedat (car (common:check-db-dir-space))) ;; look only at worst for now - (is-ok (car spacedat)) - (dbspace (cadr spacedat)) - (required (caddr spacedat)) - (dbdir (cadddr spacedat))) - (if (not is-ok) - (begin - (debug:print-error 0 *default-log-port* "Insufficient space in " dbdir ", require " required ", have " dbspace ", exiting now.") - (exit 1))))) - -;; paths is list of lists ((name path) ... ) -;; -(define (common:get-disk-with-most-free-space disks minsize) - (let ((best #f) - (bestsize 0) - (min-inodes (or (string->number (if (configf:lookup *configdat* "setup" "min_inodes") (configf:lookup *configdat* "setup" "min_inodes") "0")) 0))) - (for-each - (lambda (disk-num) - (let* ((dirpath (cadr (assoc disk-num disks))) - (freespc (cond - ((not (directory? dirpath)) - (if (common:low-noise-print 300 "disks not a dir " disk-num) - (debug:print 0 *default-log-port* "WARNING: disk " disk-num " at path \"" dirpath "\" is not a directory - ignoring it.")) - -1) - ((not (file-write-access? dirpath)) - (if (common:low-noise-print 300 "disks not writeable " disk-num) - (debug:print 0 *default-log-port* "WARNING: disk " disk-num " at path \"" dirpath "\" is not writeable - ignoring it.")) - -1) - ((not (eq? (string-ref dirpath 0) #\/)) - (if (common:low-noise-print 300 "disks not a proper path " disk-num) - (debug:print 0 *default-log-port* "WARNING: disk " disk-num " at path \"" dirpath "\" is not a fully qualified path - ignoring it.")) - -1) - (else - (get-df dirpath)))) - (free-inodes (cond - ((not (directory? dirpath)) - (if (common:low-noise-print 300 "disks not a dir " disk-num) - (debug:print 0 *default-log-port* "WARNING: disk " disk-num " at path \"" dirpath "\" is not a directory - ignoring it.")) - -1) - ((not (file-write-access? dirpath)) - (if (common:low-noise-print 300 "disks not writeable " disk-num) - (debug:print 0 *default-log-port* "WARNING: disk " disk-num " at path \"" dirpath "\" is not writeable - ignoring it.")) - -1) - ((not (eq? (string-ref dirpath 0) #\/)) - (if (common:low-noise-print 300 "disks not a proper path " disk-num) - (debug:print 0 *default-log-port* "WARNING: disk " disk-num " at path \"" dirpath "\" is not a fully qualified path - ignoring it.")) - -1) - (else - (get-free-inodes dirpath)))) - ;;(free-inodes (get-free-inodes dirpath)) - ) - (if (and (> freespc bestsize)(> free-inodes min-inodes )) - (begin - (set! best (cons disk-num dirpath)) - (set! bestsize freespc))) - ;;(print "Processing: " disk-num " bestsize: " bestsize " best: " best " freespc: " freespc " min-inodes: " min-inodes " free-inodes: " free-inodes) - )) - (map car disks)) - (if (and best (> bestsize minsize)) - best - #f))) ;; #f means no disk candidate found - -;; convert a spec string to a list of vectors #( rx action rx-string ) -(define (common:spec-string->list-of-specs spec-string actions) - (let ((spec-strings (string-split-fields "\\s*;\\s*" spec-string #:infix)) - (actions-regex (regexp (conc "^(.*)\\s+(" (string-intersperse (map conc actions) "|") ")")))) - (filter - (lambda (x) x) - (map (lambda (s) - (let ((m (string-match actions-regex s))) - (if m - (vector (regexp (cadr m))(string->symbol (caddr m))(cadr m)) - (begin - (debug:print 0 *default-log-port* "WARNING: Unrecognised rule \"" s "\" in clean-up specification.") - #f)))) - spec-strings)))) - -;; given a list of specs rx . rule and a file return the first matching rule -;; -(define (common:file-find-rule fname rules) ;; rule is vector #( rx action rx-string) - (let loop ((rule (car rules)) - (tail (cdr rules))) - (let ((rx (vector-ref rule 0)) - (rn (vector-ref rule 1))) ;; rule name - (if (string-match rx fname) - rule ;; return the whole rule so regex can be printed etc. - (if (null? tail) - #f - (loop (car tail)(cdr tail))))))) - -;; given a spec apply some rules to a directory -;; -;; WARNING: This function will REMOVE files - be sure your spec and path is correct! -;; -;; spec format: -;; file-regex1 action; file-regex2 action; ... -;; e.g. -;; .*\.log$ keep; .* remove -;; --> keep all .log files, remove everything else -;; limitations: -;; cannot have a rule with ; as part of the spec -;; not very flexible, would be nice to return binned file names? -;; supported rules: -;; keep - keep this file -;; remove - remove this file -;; compress - compress this file -;; -(define (common:dir-clean-up path spec-string #!key (compress "gzip")(actions '(keep remove compress))(remove-empty #f)) - (let* ((specs (common:spec-string->list-of-specs spec-string actions)) - (keepers (make-hash-table)) - (directories (make-hash-table))) - (find-files - path - action: (lambda (p res) - (let ((rule (common:file-find-rule p specs))) - (cond - ((directory? p)(hash-table-set! directories p #t)) - (else - (case (vector-ref rule 1) - ((keep)(hash-table-set! keepers p rule)) - ((remove) - (print "Removing file " p) - (delete-file p)) - ((compress) - (print "Compressing file " p) - (system (conc compress " " p))) - (else - (print "No match for file " p)))))))) - (if remove-empty - (for-each - (lambda (d) - (if (null? (glob (conc d "/.*")(conc d "/*"))) - (begin - (print "Removing empty directory " d) - (delete-directory d)))) - (sort (hash-table-keys directories) (lambda (a b)(> (string-length a)(string-length b)))))) - )) - -;;====================================================================== -;; E N V I R O N M E N T V A R S -;;====================================================================== -(define (bb-check-path #!key (msg "check-path: ")) - (let ((path (or (get-environment-variable "PATH") "none"))) - (debug:print-info 0 *default-log-port* (conc msg" : $PATH="path)) - (if (string-match "^.*/isoenv-core/.*" path) - (debug:print-error 0 *default-log-port* (conc msg" : !!ISOENV PRESENT!!")) ;; remove for prod - (debug:print-info 1 *default-log-port* (conc msg" : **no isoenv present**"))))) - - -(define (save-environment-as-files fname #!key (ignorevars (list "USER" "HOME" "DISPLAY" "LS_COLORS" "XKEYSYMDB" "EDITOR" "MAKEFLAGS" "MAKEF" "MAKEOVERRIDES"))) - ;;(bb-check-path msg: "save-environment-as-files entry") - (let ((envvars (get-environment-variables)) - (whitesp (regexp "[^a-zA-Z0-9_\\-:,\\.\\/%$]")) - (mungeval (lambda (val) - (cond - ((eq? val #t) "") ;; convert #t to empty string - ((eq? val #f) #f) ;; convert #f to itself (still thinking about this one - (else val))))) - (with-output-to-file (conc fname ".csh") - (lambda () - (for-each (lambda (keyval) - (let* ((key (car keyval)) - (val (cdr keyval)) - (delim (if (string-search whitesp val) - "\"" - ""))) - (print (if (or (member key ignorevars) - (string-search whitesp key)) - "# setenv " - "setenv ") - key " " delim (mungeval val) delim))) - envvars))) - (with-output-to-file (conc fname ".sh") - (lambda () - (for-each (lambda (keyval) - (let* ((key (car keyval)) - (val (cdr keyval)) - (delim (if (string-search whitesp val) - "\"" - ""))) - (print (if (or (member key ignorevars) - (string-search whitesp key) - (string-search ":" key)) ;; internal only values to be skipped. - "# export " - "export ") - key "=" delim (mungeval val) delim))) - envvars))))) - - -(define (common:get-param-mapping #!key (flavor #f)) - "returns alist mapping string keys in testconfig/subrun to megatest command line switches; if flavor is switch-symbol, maps tcmt symbolic switches to megatest switches" - (let ((default '(("tag-expr" . "-tagexpr") - ("mode-patt" . "-modepatt") - ("run-name" . "-runname") - ("contour" . "-contour") - ("target" . "-target") - ("test-patt" . "-testpatt") - ("msg" . "-m") - ("log" . "-log") - ("start-dir" . "-start-dir") - ("new" . "-set-state-status")))) - (if (eq? flavor 'switch-symbol) - (map (lambda (x) - (cons (string->symbol (conc "-" (car x))) (cdr x))) - default) - default))) - -;; set some env vars from an alist, return an alist with original values -;; (("VAR" "value") ...) -;; a value of #f means "unset this var" -;; -(define (alist->env-vars lst) - (if (list? lst) - (let ((res '())) - (for-each (lambda (p) - (let* ((var (car p)) - (val (cadr p)) - (prv (get-environment-variable var))) - (set! res (cons (list var prv) res)) - (if val - (safe-setenv var (->string val)) - (unsetenv var)))) - lst) - res) - '())) - - -;; clear vars matching pattern, run proc, set vars back -;; if proc is a string run that string as a command with -;; system. -;; -(define *common:orig-env* - (let ((envvars (get-environment-variables))) - (if (get-environment-variable "MT_ORIG_ENV") - (with-input-from-string - (z3:decode-buffer (base64:base64-decode (get-environment-variable "MT_ORIG_ENV"))) - read) - (filter-map (lambda (x) - (if (string-match "^MT_.*" (car x)) - #f - x)) - envvars)))) - -(define (common:with-orig-env proc) - (let ((current-env (get-environment-variables))) - (for-each (lambda (x) (unsetenv (car x))) current-env) - (for-each (lambda (x) (setenv (car x) (cdr x))) *common:orig-env*) - (let ((rv (cond - ((string? proc)(system proc)) - (proc (proc))))) - (for-each (lambda (x) (unsetenv (car x))) *common:orig-env*) - (for-each (lambda (x) (setenv (car x) (cdr x))) current-env) - rv))) - -(define (common:without-vars proc . var-patts) - (let ((vars (make-hash-table))) - (for-each - (lambda (vardat) ;; each env var - (for-each - (lambda (var-patt) - (if (string-match var-patt (car vardat)) - (let ((var (car vardat)) - (val (cdr vardat))) - (hash-table-set! vars var val) - (unsetenv var)))) - var-patts)) - (get-environment-variables)) - (cond - ((string? proc)(system proc)) - (proc (proc))) - (hash-table-for-each - vars - (lambda (var val) - (setenv var val))) - vars)) - - -;;====================================================================== -;; T I M E A N D D A T E -;;====================================================================== - -;; Convert strings like "5s 2h 3m" => 60x60x2 + 3x60 + 5 -(define (common:hms-string->seconds tstr) - (let ((parts (string-split-fields "\\w+" tstr)) - (time-secs 0) - ;; s=seconds, m=minutes, h=hours, d=days, M=months, y=years, w=weeks - (trx (regexp "(\\d+)([smhdMyw])"))) - (for-each (lambda (part) - (let ((match (string-match trx part))) - (if match - (let ((val (string->number (cadr match))) - (unt (caddr match))) - (if val - (set! time-secs (+ time-secs (* val - (case (string->symbol unt) - ((s) 1) - ((m) 60) ;; minutes - ((h) 3600) - ((d) 86400) - ((w) 604800) - ((M) 2628000) ;; aproximately one month - ((y) 31536000) - (else #f)))))))))) - parts) - time-secs)) - -(define (seconds->hr-min-sec secs) - (let* ((hrs (quotient secs 3600)) - (min (quotient (- secs (* hrs 3600)) 60)) - (sec (- secs (* hrs 3600)(* min 60)))) - (conc (if (> hrs 0)(conc hrs "hr ") "") - (if (> min 0)(conc min "m ") "") - sec "s"))) - -(define (seconds->time-string sec) - (time->string - (seconds->local-time sec) "%H:%M:%S")) - -(define (seconds->work-week/day-time sec) - (time->string - (seconds->local-time sec) "ww%V.%u %H:%M")) - -(define (seconds->work-week/day sec) - (time->string - (seconds->local-time sec) "ww%V.%u")) - -(define (seconds->year-work-week/day sec) - (time->string - (seconds->local-time sec) "%yww%V.%w")) - -(define (seconds->year-work-week/day-time sec) - (time->string - (seconds->local-time sec) "%Yww%V.%w %H:%M")) - -(define (seconds->year-week/day-time sec) - (time->string - (seconds->local-time sec) "%Yw%V.%w %H:%M")) - -(define (seconds->quarter sec) - (case (string->number - (time->string - (seconds->local-time sec) - "%m")) - ((1 2 3) 1) - ((4 5 6) 2) - ((7 8 9) 3) - ((10 11 12) 4) - (else #f))) - -;; basic ISO8601 format (e.g. "2017-02-28 06:02:54") date time => Unix epoch -;; -(define (common:date-time->seconds datetime) - (local-time->seconds (string->time datetime "%Y-%m-%d %H:%M:%S"))) - -;; given span of seconds tstart to tend -;; find start time to mark and mark delta -;; -(define (common:find-start-mark-and-mark-delta tstart tend) - (let* ((deltat (- (max tend (+ tend 10)) tstart)) ;; can't handle runs of less than 4 seconds. Pad it to 10 seconds ... - (result #f) - (min 60) - (hr (* 60 60)) - (day (* 24 hr)) - (yr (* 365 day)) ;; year - (mo (/ yr 12)) - (wk (* day 7))) - (for-each - (lambda (max-blks) - (for-each - (lambda (span) ;; 5 2 1 - (if (not result) - (for-each - (lambda (timeunit timesym) ;; year month day hr min sec - (if (not result) - (let* ((time-blk (* span timeunit)) - (num-blks (quotient deltat time-blk))) - (if (and (> num-blks 4)(< num-blks max-blks)) - (let ((first (* (quotient tstart time-blk) time-blk))) - (set! result (list span timeunit time-blk first timesym)) - ))))) - (list yr mo wk day hr min 1) - '( y mo w d h m s)))) - (list 8 6 5 2 1))) - '(5 10 15 20 30 40 50 500)) - (if values - (apply values result) - (values 0 day 1 0 'd)))) - -;; given x y lim return the cron expansion -;; -(define (common:expand-cron-slash x y lim) - (let loop ((curr x) - (res `())) - (if (< curr lim) - (loop (+ curr y) (cons curr res)) - (reverse res)))) - -;; expand a complex cron string to a list of cron strings -;; -;; x/y => x, x+y, x+2y, x+3y while x+Ny a, b ,c -;; -;; NOTE: with flatten a lot of the crud below can be factored down. -;; -(define (common:cron-expand cron-str) - (if (list? cron-str) - (flatten - (fold (lambda (x res) - (if (list? x) - (let ((newres (map common:cron-expand x))) - (append x newres)) - (cons x res))) - '() - cron-str)) ;; (map common:cron-expand cron-str)) - (let ((cron-items (string-split cron-str)) - (slash-rx (regexp "(\\d+)/(\\d+)")) - (comma-rx (regexp ".*,.*")) - (max-vals '((min . 60) - (hour . 24) - (dayofmonth . 28) ;;; BUG!!!! This will be a bug for some combinations - (month . 12) - (dayofweek . 7)))) - (if (< (length cron-items) 5) ;; bad spec - cron-str ;; `(,cron-str) ;; just return the string, something downstream will fix it - (let loop ((hed (car cron-items)) - (tal (cdr cron-items)) - (type 'min) - (type-tal '(hour dayofmonth month dayofweek)) - (res '())) - (regex-case - hed - (slash-rx ( _ base incr ) (let* ((basen (string->number base)) - (incrn (string->number incr)) - (expanded-vals (common:expand-cron-slash basen incrn (alist-ref type max-vals))) - (new-list-crons (fold (lambda (x myres) - (cons (conc (if (null? res) - "" - (conc (string-intersperse res " ") " ")) - x " " (string-intersperse tal " ")) - myres)) - '() expanded-vals))) - ;; (print "new-list-crons: " new-list-crons) - ;; (fold (lambda (x res) - ;; (if (list? x) - ;; (let ((newres (map common:cron-expand x))) - ;; (append x newres)) - ;; (cons x res))) - ;; '() - (flatten (map common:cron-expand new-list-crons)))) - ;; (map common:cron-expand (map common:cron-expand new-list-crons)))) - (else (if (null? tal) - cron-str - (loop (car tal)(cdr tal)(car type-tal)(cdr type-tal)(append res (list hed))))))))))) - - -;; given a cron string and the last time event was processed return #t to run or #f to not run -;; -;; min hour dayofmonth month dayofweek -;; 0-59 0-23 1-31 1-12 0-6 ### NOTE: dayofweek does not include 7 -;; -;; #t => yes, run the job -;; #f => no, do not run the job -;; -(define (common:cron-event cron-str now-seconds-in last-done) ;; ref-seconds = #f is NOW. - (let* ((cron-items (map string->number (string-split cron-str))) - (now-seconds (or now-seconds-in (current-seconds))) - (now-time (seconds->local-time now-seconds)) - (last-done-time (seconds->local-time last-done)) - (all-times (make-hash-table))) - ;; (print "cron-items: " cron-items "(length cron-items): " (length cron-items)) - (if (not (eq? (length cron-items) 5)) ;; don't even try to figure out junk strings - #f - (match-let ((( cmin chour cdayofmonth cmonth cdayofweek) - cron-items) - ;; 0 1 2 3 4 5 6 - ((nsec nmin nhour ndayofmonth nmonth nyr ndayofweek n7 n8 n9) - (vector->list now-time)) - ((lsec lmin lhour ldayofmonth lmonth lyr ldayofweek l7 l8 l9) - (vector->list last-done-time))) - ;; create all possible time slots - ;; remove invalid slots due to (for example) day of week - ;; get the start and end entries for the ref-seconds (current) time - ;; if last-done > ref-seconds => this is an ERROR! - ;; does the last-done time fall in the legit region? - ;; yes => #f do not run again this command - ;; no => #t ok to run the command - (for-each ;; month - (lambda (month) - (for-each ;; dayofmonth - (lambda (dom) - (for-each - (lambda (hr) ;; hour - (for-each - (lambda (minute) ;; minute - (let ((copy-now (apply vector (vector->list now-time)))) - (vector-set! copy-now 0 0) ;; force seconds to zero - (vector-set! copy-now 1 minute) - (vector-set! copy-now 2 hr) - (vector-set! copy-now 3 dom) ;; dom is already corrected for zero referenced - (vector-set! copy-now 4 month) - (let* ((copy-now-secs (local-time->seconds copy-now)) - (new-copy (seconds->local-time copy-now-secs))) ;; remake the time vector - (if (or (not cdayofweek) - (equal? (vector-ref new-copy 6) - cdayofweek)) ;; if the day is specified and a match OR if the day is NOT specified - (if (or (not cdayofmonth) - (equal? (vector-ref new-copy 3) - (+ 1 cdayofmonth))) ;; if the month is specified and a match OR if the month is NOT specified - (hash-table-set! all-times copy-now-secs new-copy)))))) - (if cmin - `(,cmin) ;; if given cmin, have to use it - (list (- nmin 1) nmin (+ nmin 1))))) ;; minute - (if chour - `(,chour) - (list (- nhour 1) nhour (+ nhour 1))))) ;; hour - (if cdayofmonth - `(,cdayofmonth) - (list (- ndayofmonth 1) ndayofmonth (+ ndayofmonth 1))))) - (if cmonth - `(,cmonth) - (list (- nmonth 1) nmonth (+ nmonth 1)))) - (let ((before #f) - (is-in #f)) - (for-each - (lambda (moment) - (if (and before - (<= before now-seconds) - (>= moment now-seconds)) - (begin - ;; (print) - ;; (print "Before: " (time->string (seconds->local-time before))) - ;; (print "Now: " (time->string (seconds->local-time now-seconds))) - ;; (print "After: " (time->string (seconds->local-time moment))) - ;; (print "Last: " (time->string (seconds->local-time last-done))) - (if (< last-done before) - (set! is-in before)) - )) - (set! before moment)) - (sort (hash-table-keys all-times) <)) - is-in))))) - -(define (common:extended-cron cron-str now-seconds-in last-done) - (let ((expanded-cron (common:cron-expand cron-str))) - (if (string? expanded-cron) - (common:cron-event expanded-cron now-seconds-in last-done) - (let loop ((hed (car expanded-cron)) - (tal (cdr expanded-cron))) - (if (common:cron-event hed now-seconds-in last-done) - #t - (if (null? tal) - #f - (loop (car tal)(cdr tal)))))))) - -;;====================================================================== -;; C O L O R S -;;====================================================================== - -(define (common:name->iup-color name) - (case (string->symbol (string-downcase name)) - ((red) "223 33 49") - ((grey) "192 192 192") - ((orange) "255 172 13") - ((purple) "This is unfinished ..."))) - -;; (define (common:get-color-for-state-status state status) -;; (case (string->symbol state) -;; ((COMPLETED) -;; (case (string->symbol status) -;; ((PASS) "70 249 73") -;; ((WARN WAIVED) "255 172 13") -;; ((SKIP) "230 230 0") -;; (else "223 33 49"))) -;; ((LAUNCHED) "101 123 142") -;; ((CHECK) "255 100 50") -;; ((REMOTEHOSTSTART) "50 130 195") -;; ((RUNNING) "9 131 232") -;; ((KILLREQ) "39 82 206") -;; ((KILLED) "234 101 17") -;; ((NOT_STARTED) "240 240 240") -;; (else "192 192 192"))) - -(define (common:iup-color->rgb-hex instr) - (string-intersperse - (map (lambda (x) - (number->string x 16)) - (map string->number - (string-split instr))) - "/")) - -;;====================================================================== -;; L O C K I N G M E C H A N I S M S -;;====================================================================== - -;; faux-lock is deprecated. Please use simple-lock below -;; -(define (common:faux-lock keyname #!key (wait-time 8)(allow-lock-steal #t)) - (if (rmt:no-sync-get/default keyname #f) ;; do not be tempted to compare to pid. locking is a one-shot action, if already locked for this pid it doesn't actually count - (if (> wait-time 0) - (begin - (thread-sleep! 1) - (if (eq? wait-time 1) ;; only one second left, steal the lock - (begin - (debug:print-info 0 *default-log-port* "stealing lock for " keyname) - (common:faux-unlock keyname force: #t))) - (common:faux-lock keyname wait-time: (- wait-time 1))) - #f) - (begin - (rmt:no-sync-set keyname (conc (current-process-id))) - (equal? (conc (current-process-id)) (conc (rmt:no-sync-get/default keyname #f)))))) - -(define (common:faux-unlock keyname #!key (force #f)) - (if (or force (equal? (conc (current-process-id)) (conc (rmt:no-sync-get/default keyname #f)))) - (begin - (if (rmt:no-sync-get/default keyname #f) (rmt:no-sync-del! keyname)) - #t) - #f)) - -;; simple lock. improve and converge on this one. -;; -(define (common:simple-lock keyname) - (rmt:no-sync-get-lock keyname)) - -(define (common:simple-unlock keyname #!key (force #f)) - (rmt:no-sync-del! keyname)) - - -;;====================================================================== -;; -;;====================================================================== - -(define (common:in-running-test?) - (and (args:get-arg "-execute") (get-environment-variable "MT_CMDINFO"))) - -(define (common:get-color-from-status status) - (cond - ((equal? status "PASS") "green") - ((equal? status "FAIL") "red") - ((equal? status "WARN") "orange") - ((equal? status "KILLED") "orange") - ((equal? status "KILLREQ") "purple") - ((equal? status "RUNNING") "blue") - ((equal? status "ABORT") "brown") - (else "black"))) - -;; ;;====================================================================== -;; ;; N A N O M S G C L I E N T -;; ;;====================================================================== -;; -;; -;; -;; (define (common:send-dboard-main-changed) -;; (let* ((dashboard-ips (mddb:get-dashboards))) -;; (for-each -;; (lambda (ipadr) -;; (let* ((soc (common:open-nm-req (conc "tcp://" ipadr))) -;; (msg (conc "main " *toppath*)) -;; (res (common:nm-send-receive-timeout soc msg))) -;; (if (not res) ;; couldn't reach that dashboard - remove it from db -;; (print "ERROR: couldn't reach dashboard " ipadr)) -;; res)) -;; dashboard-ips))) -;; -;; -;; ;;====================================================================== -;; ;; D A S H B O A R D D B -;; ;;====================================================================== -;; -;; (define (mddb:open-db) -;; (let* ((db (open-database (conc (get-environment-variable "HOME") "/.dashboard.db")))) -;; (set-busy-handler! db (busy-timeout 10000)) -;; (for-each -;; (lambda (qry) -;; (exec (sql db qry))) -;; (list -;; "CREATE TABLE IF NOT EXISTS vars (id INTEGER PRIMARY KEY,key TEXT, val TEXT, CONSTRAINT varsconstraint UNIQUE (key));" -;; "CREATE TABLE IF NOT EXISTS dashboards ( -;; id INTEGER PRIMARY KEY, -;; pid INTEGER, -;; username TEXT, -;; hostname TEXT, -;; ipaddr TEXT, -;; portnum INTEGER, -;; start_time TIMESTAMP DEFAULT (strftime('%s','now')), -;; CONSTRAINT hostport UNIQUE (hostname,portnum) -;; );" -;; )) -;; db)) -;; -;; ;; register a dashboard -;; ;; -;; (define (mddb:register-dashboard port) -;; (let* ((pid (current-process-id)) -;; (hostname (get-host-name)) -;; (ipaddr (server:get-best-guess-address hostname)) -;; (username (current-user-name)) ;; (car userinfo))) -;; (db (mddb:open-db))) -;; (print "Register monitor, pid: " pid ", hostname: " hostname ", port: " port ", username: " username) -;; (exec (sql db "INSERT OR REPLACE INTO dashboards (pid,username,hostname,ipaddr,portnum) VALUES (?,?,?,?,?);") -;; pid username hostname ipaddr port) -;; (close-database db))) -;; -;; ;; unregister a monitor -;; ;; -;; (define (mddb:unregister-dashboard host port) -;; (let* ((db (mddb:open-db))) -;; (print "Register unregister monitor, host:port=" host ":" port) -;; (exec (sql db "DELETE FROM dashboards WHERE hostname=? AND portnum=?;") host port) -;; (close-database db))) -;; -;; ;; get registered dashboards -;; ;; -;; (define (mddb:get-dashboards) -;; (let ((db (mddb:open-db))) -;; (query fetch-column -;; (sql db "SELECT ipaddr || ':' || portnum FROM dashboards;")))) - -;;====================================================================== -;; T E S T L A U N C H I N G P E R I T E M W I T H H O S T T Y P E S -;;====================================================================== -;; -;; [hosts] -;; arm cubie01 cubie02 -;; x86_64 zeus xena myth01 -;; allhosts #{g hosts arm} #{g hosts x86_64} -;; -;; [host-types] -;; general #MTLOWESTLOAD #{g hosts allhosts} -;; arm #MTLOWESTLOAD #{g hosts arm} -;; nbgeneral nbjob run JOBCOMMAND -log $MT_LINKTREE/$MT_TARGET/$MT_RUNNAME.$MT_TESTNAME-$MT_ITEM_PATH.lgo -;; -;; [host-rules] -;; # maxnload => max normalized load -;; # maxnjobs => max jobs per cpu -;; # maxjobrate => max jobs per second -;; general maxnload=1.1; maxnjobs=1.2; maxjobrate=0.1 -;; -;; [launchers] -;; envsetup general -;; xor/%/n 4C16G -;; % nbgeneral -;; -;; [jobtools] -;; # if defined and not "no" flexi-launcher will bypass "launcher" unless no match. -;; flexi-launcher yes -;; launcher nbfake -;; -(define (common:get-launcher configdat testname itempath) - (let ((fallback-launcher (configf:lookup configdat "jobtools" "launcher"))) - (if (and (configf:lookup configdat "jobtools" "flexi-launcher") ;; overrides launcher - (not (equal? (configf:lookup configdat "jobtools" "flexi-launcher") "no"))) - (let* ((launchers (hash-table-ref/default configdat "launchers" '()))) - (if (null? launchers) - fallback-launcher - (let loop ((hed (car launchers)) - (tal (cdr launchers))) - (let ((patt (car hed)) - (host-type (cadr hed))) - (if (tests:match patt testname itempath) - (begin - (debug:print-info 2 *default-log-port* "Have flexi-launcher match for " testname "/" itempath " = " host-type) - (let ((launcher (configf:lookup configdat "host-types" host-type))) - (if launcher - (let* ((launcher-parts (string-split launcher)) - (launcher-exe (car launcher-parts))) - (if (equal? launcher-exe "#MTLOWESTLOAD") ;; this is our special case, we will find the lowest load and craft a nbfake commandline - (let host-loop ((targ-host (common:get-least-loaded-host (cdr launcher-parts) host-type configdat)) - (count 100)) - (if targ-host - (conc "remrun " targ-host) - (if (> count 0) - (begin - (debug:print 0 *default-log-port* "INFO: Waiting for a host for host-type " host-type) - (thread-sleep! (- 101 count)) - (host-loop (common:get-least-loaded-host (cdr launcher-parts) host-type configdat) - (- count 1))) - (begin - (debug:print 0 *default-log-port* "FATAL: Failed to find a host from #MTLOWESTLOAD for host-type " host-type) - (exit))))) - launcher)) - (begin - (debug:print-info 0 *default-log-port* "WARNING: no launcher found for host-type " host-type) - (if (null? tal) - fallback-launcher - (loop (car tal)(cdr tal))))))) - ;; no match, try again - (if (null? tal) - fallback-launcher - (loop (car tal)(cdr tal)))))))) - fallback-launcher))) - -;;====================================================================== -;; NMSG AND NEW API -;;====================================================================== - -;; nm based server experiment, keep around for now. -;; -#;(define (nm:start-server dbconn #!key (given-host-name #f)) - (let* ((srvdat (start-raw-server given-host-name: given-host-name)) - (host-name (srvdat-host srvdat)) - (soc (srvdat-soc srvdat))) - - ;; start the queue processor (save for second round of development) - ;; - (thread-start! (make-thread! (lambda ()(queue-processor dbconn) "Queue processor"))) - ;; msg is an alist - ;; 'r host:port <== where to return the data - ;; 'p params <== data to apply the command to - ;; 'e j|s|l <== encoding of the params. default is s (sexp), if not specified is assumed to be default - ;; 'c command <== look up the function to call using this key - ;; - (let loop ((msg-in (nn-recv soc))) - (if (not (equal? msg-in "quit")) - (let* ((dat (decode msg-in)) - (host-port (alist-ref 'r dat)) ;; this is for the reverse req rep where the server is a client of the original client - (params (alist-ref 'p dat)) - (command (let ((c (alist-ref 'c dat)))(if c (string->symbol c) #f))) - (all-good (and host-port params command (hash-table-exists? *commands* command)))) - (if all-good - (let ((cmddat (make-qitem - command: command - host-port: host-port - params: params))) - (queue-push cmddat) ;; put request into the queue - (nn-send soc "queued")) ;; reply with "queued" - (print "ERROR: ["(common:human-time)"] BAD request " dat)) - (loop (nn-recv soc))))) - (nn-close soc))) - -;;====================================================================== -;; D A S H B O A R D U S E R V I E W S -;;====================================================================== - -;; first read ~/views.config if it exists, then read $MTRAH/views.config if it exists -;; -(define (common:load-views-config) - (let* ((view-cfgdat (make-hash-table)) - (home-cfgfile (conc (get-environment-variable "HOME") "/.mtviews.config")) - (mthome-cfgfile (conc *toppath* "/.mtviews.config"))) - (if (common:file-exists? mthome-cfgfile) - (read-config mthome-cfgfile view-cfgdat #t)) - ;; we load the home dir file AFTER the MTRAH file so the user can clobber settings when running the dashboard in read-only areas - (if (common:file-exists? home-cfgfile) - (read-config home-cfgfile view-cfgdat #t)) - view-cfgdat)) - -;;====================================================================== -;; H I E R A R C H I C A L H A S H T A B L E S -;;====================================================================== - -;; Every element including top element is a vector: -;; - -(define (hh:make-hh #!key (ht #f)(value #f)) - (vector (or ht (make-hash-table)) value)) - -;; used internally -(define-inline (hh:set-ht! hh ht) (vector-set! hh 0 ht)) -(define-inline (hh:get-ht hh) (vector-ref hh 0)) -(define-inline (hh:set-value! hh value) (vector-set! hh 1 value)) -(define-inline (hh:get-value hh value) (vector-ref hh 1)) - -;; given a hierarchial hash and some keys look up the value ... -;; -(define (hh:get hh . keys) - (if (null? keys) - (vector-ref hh 1) ;; we have reached the end of the line, return the value sought - (let ((sub-ht (hh:get-ht hh))) - (if sub-ht ;; yes, there is more hierarchy - (let ((sub-hh (hash-table-ref/default sub-ht (car keys) #f))) - (if sub-hh - (apply hh:get sub-hh (cdr keys)) - #f)) - #f)))) - -;; given a hierarchial hash, a value and some keys, add needed hierarcy and insert the value -;; -(define (hh:set! hh value . keys) - (if (null? keys) - (hh:set-value! hh value) ;; we have reached the end of the line, store the value - (let ((sub-ht (hh:get-ht hh))) - (if sub-ht ;; yes, there is more hierarchy - (let ((sub-hh (hash-table-ref/default sub-ht (car keys) #f))) - (if (not sub-hh) ;; we'll need to add the next level of hierarchy - (let ((new-sub-hh (hh:make-hh))) - (hash-table-set! sub-ht (car keys) new-sub-hh) - (apply hh:set! new-sub-hh value (cdr keys))) - (apply hh:set! sub-hh value (cdr keys)))) ;; call the sub-hierhash with remaining keys - (begin - (hh:set-ht! hh (make-hash-table)) - (apply hh:set! hh value keys)))))) - -;; Manage pkts, used in servers, tests and likely other contexts so put -;; in common -;;====================================================================== - -(define common:pkts-spec - '((default . ((parent . P) - (action . a) - (filename . f))) - (configf . ((parent . P) - (action . a) - (filename . f))) - (server . ((action . a) - (pid . d) - (ipaddr . i) - (port . p) - (parent . P))) - - (test . ((cpuuse . c) - (diskuse . d) - (item-path . i) - (runname . r) - (state . s) - (target . t) - (status . u) - (parent . P))))) - -(define (common:get-pkts-dirs mtconf use-lt) - (let* ((pktsdirs-str (or (configf:lookup mtconf "setup" "pktsdirs") - (and use-lt - (conc (or *toppath* - (current-directory)) - "/lt/.pkts")))) - (pktsdirs (if pktsdirs-str - (string-split pktsdirs-str " ") - #f))) - pktsdirs)) - -;; use-lt is use linktree "lt" link to find pkts dir -(define (common:save-pkt pktalist-in mtconf use-lt #!key (add-only #f)) ;; add-only saves the pkt only if there is a parent already - (if (or add-only - (hash-table-exists? *pkts-info* 'last-parent)) - (let* ((parent (hash-table-ref/default *pkts-info* 'last-parent #f)) - (pktalist (if parent - (cons `(parent . ,parent) - pktalist-in) - pktalist-in))) - (let-values (((uuid pkt) - (alist->pkt pktalist common:pkts-spec))) - (hash-table-set! *pkts-info* 'last-parent uuid) - (let ((pktsdir (or (hash-table-ref/default *pkts-info* 'pkts-dir #f) - (let* ((pktsdirs (common:get-pkts-dirs mtconf use-lt)) - (pktsdir (car pktsdirs))) ;; assume it is there - (hash-table-set! *pkts-info* 'pkts-dir pktsdir) - pktsdir)))) - (handle-exceptions - exn - (debug:print-info 0 "failed to write out packet to " pktsdir) ;; don't care if this failed for now but MUST FIX - BUG!! - (if (not (file-exists? pktsdir)) - (create-directory pktsdir #t)) - (with-output-to-file - (conc pktsdir "/" uuid ".pkt") - (lambda () - (print pkt))))))))) - -(define (common:with-queue-db mtconf proc #!key (use-lt #f)(toppath-in #f)) - (let* ((pktsdirs (common:get-pkts-dirs mtconf use-lt)) - (pktsdir (if pktsdirs (car pktsdirs) #f)) - (toppath (or (configf:lookup mtconf "scratchdat" "toppath") - toppath-in)) - (pdbpath (or (configf:lookup mtconf "setup" "pdbpath") pktsdir))) - (cond - ((not (and pktsdir toppath pdbpath)) - (debug:print 0 *default-log-port* "ERROR: settings are missing in your megatest.config for area management.") - (debug:print 0 *default-log-port* " you need to have pktsdirs in the [setup] section.")) - ((not (common:file-exists? pktsdir)) - (debug:print 0 *default-log-port* "ERROR: pkts directory not found " pktsdir)) - ((not (equal? (file-owner pktsdir)(current-effective-user-id))) - (debug:print 0 *default-log-port* "ERROR: directory " pktsdir " is not owned by " (current-effective-user-name))) - (else - (let* ((pdb (open-queue-db pdbpath "pkts.db" - schema: '("CREATE TABLE groups (id INTEGER PRIMARY KEY,groupname TEXT, CONSTRAINT group_constraint UNIQUE (groupname));")))) - (proc pktsdirs pktsdir pdb) - (dbi:close pdb)))))) - -(define (common:load-pkts-to-db mtconf #!key (use-lt #f)) - (common:with-queue-db - mtconf - (lambda (pktsdirs pktsdir pdb) - (for-each - (lambda (pktsdir) ;; look at all - (cond - ((not (common:file-exists? pktsdir)) - (debug:print 0 *default-log-port* "ERROR: packets directory " pktsdir " does not exist.")) - ((not (directory? pktsdir)) - (debug:print 0 *default-log-port* "ERROR: packets directory path " pktsdir " is not a directory.")) - ((not (file-read-access? pktsdir)) - (debug:print 0 *default-log-port* "ERROR: packets directory path " pktsdir " is not readable.")) - (else - (debug:print-info 0 *default-log-port* "Loading packets found in " pktsdir) - (let ((pkts (glob (conc pktsdir "/*.pkt")))) - (for-each - (lambda (pkt) - (let* ((uuid (cadr (string-match ".*/([0-9a-f]+).pkt" pkt))) - (exists (lookup-by-uuid pdb uuid #f))) - (if (not exists) - (let* ((pktdat (string-intersperse - (with-input-from-file pkt read-lines) - "\n")) - (apkt (pkt->alist pktdat)) - (ptype (alist-ref 'T apkt))) - (add-to-queue pdb pktdat uuid (or ptype 'cmd) #f 0) - (debug:print 4 *default-log-port* "Added " uuid " of type " ptype " to queue")) - (debug:print 4 *default-log-port* "pkt: " uuid " exists, skipping...") - ))) - pkts))))) - pktsdirs)) - use-lt: use-lt)) - -(define (common:get-pkt-alists pkts) - (map (lambda (x) - (alist-ref 'apkt x)) ;; 'pkta pulls out the alist from the read pkt - pkts)) - -;; given list of pkts (alist mode) return list of D cards as Unix epoch, sorted descending -;; also delete duplicates by target i.e. (car pkt) -;; -(define (common:get-pkt-times pkts) - (delete-duplicates - (sort - (map (lambda (x) - `(,(alist-ref 't x) . ,(string->number (alist-ref 'D x)))) - pkts) - (lambda (a b)(> (cdr a)(cdr b)))) ;; sort descending - (lambda (a b)(equal? (car a)(car b))))) ;; remove duplicates by target - - - -;; accept an alist or hash table containing envvar/env value pairs (value of #f causes unset) -;; execute thunk in context of environment modified as per this list -;; restore env to prior state then return value of eval'd thunk. -;; ** this is not thread safe ** -(define (common:with-env-vars delta-env-alist-or-hash-table thunk) - (let* ((delta-env-alist (if (hash-table? delta-env-alist-or-hash-table) - (hash-table->alist delta-env-alist-or-hash-table) - delta-env-alist-or-hash-table)) - (restore-thunks - (filter - identity - (map (lambda (env-pair) - (let* ((env-var (car env-pair)) - (new-val (let ((tmp (cdr env-pair))) - (if (list? tmp) (car tmp) tmp))) - (current-val (get-environment-variable env-var)) - (restore-thunk - (cond - ((not current-val) (lambda () (unsetenv env-var))) - ((not (string? new-val)) #f) - ((eq? current-val new-val) #f) - (else - (lambda () (setenv env-var current-val)))))) - ;;(when (not (string? new-val)) - ;; (debug:print 0 *default-log-port* " PROBLEM: not a string: "new-val"\n from env-alist:\n"delta-env-alist) - ;; (pp delta-env-alist) - ;; (exit 1)) - - - (cond - ((not new-val) ;; modify env here - (unsetenv env-var)) - ((string? new-val) - (setenv env-var new-val))) - restore-thunk)) - delta-env-alist)))) - (let ((rv (thunk))) - (for-each (lambda (x) (x)) restore-thunks) ;; restore env to original state - rv))) - -(define *common:thread-punchlist* (make-hash-table)) -(define (common:send-thunk-to-background-thread thunk #!key (name #f)) - ;;(BB> "launched thread " name) - - ;; we need a unique name for the thread. - (let* ((realname (if name - (if (not (hash-table-ref/default *common:thread-punchlist* name #f)) - name - (conc name"-" (symbol->string (gensym)))) - (conc "anonymous-"(symbol->string (gensym))))) - (realthunk (lambda () - (let ((res (thunk))) - (hash-table-delete! *common:thread-punchlist* realname) - res))) - (thread (make-thread realthunk realname))) - (hash-table-set! *common:thread-punchlist* realname thread) - (thread-start! thread) - )) - -(define (common:join-backgrounded-threads) - ;; may need to trap and ignore exceptions -- dunno how atomic threads are... - (for-each - (lambda (thread-name) - (let* ((thread (hash-table-ref/default *common:thread-punchlist* thread-name #f))) - (if thread - (handle-exceptions - exn - #t ;; just ignore it, it might have died in the meantime so joining it will throw an exception - (thread-join! thread)) - ))) - (hash-table-keys *common:thread-punchlist*))) - -;; (define *common:telemetry-log-state* 'startup) -;; (define *common:telemetry-log-socket* #f) -;; -;; (define (common:telemetry-log-open) -;; (if (eq? *common:telemetry-log-state* 'startup) -;; (let* ((serverhost (configf:lookup *configdat* "telemetry" "host")) -;; (serverport (configf:lookup-number *configdat* "telemetry" "port")) -;; (user (or (get-environment-variable "USER") "unknown")) -;; (host (or (get-environment-variable "HOST") "unknown"))) -;; (set! *common:telemetry-log-state* -;; (handle-exceptions -;; exn -;; (begin -;; (debug:print-info 0 *default-log-port* "common-telemetry-log open udp port failure") -;; 'broken) -;; (if (and serverhost serverport user host) -;; (let* ((s (udp-open-socket))) -;; ;;(udp-bind! s #f 0) -;; (udp-connect! s serverhost serverport) -;; (set! *common:telemetry-log-socket* s) -;; 'open) -;; 'not-needed)))))) -;; -;; (define (common:telemetry-log event #!key (payload '())) -;; (if (eq? *common:telemetry-log-state* 'startup) -;; (common:telemetry-log-open)) -;; -;; (if (eq? 'open *common:telemetry-log-state*) -;; (handle-exceptions -;; exn -;; (begin -;; (debug:print-info 0 *default-log-port* "common-telemetry-log comms failure ; disabled (no server?)") -;; ;;(define *common:telemetry-log-state* 'broken-or-no-server-preclose) -;; ;;(common:telemetry-log-close) -;; (define *common:telemetry-log-state* 'broken-or-no-server) -;; (set! *common:telemetry-log-socket* #f) -;; ) -;; (if (and *common:telemetry-log-socket* event) ;; TODO - filter on event against telemetry.want-events -;; (let* ((user (or (get-environment-variable "USER") "unknown")) -;; (host (or (get-environment-variable "HOST") "unknown")) -;; (start (conc "[megatest "event"]")) -;; (toppath (or *toppath* "/dev/null")) -;; (payload-serialized -;; (base64:base64-encode -;; (z3:encode-buffer -;; (with-output-to-string (lambda () (pp payload)))))) -;; (msg (conc user":"host":"start":"(current-process-id)":"(car (argv))":" -;; toppath":"payload-serialized))) -;; (udp-send *common:telemetry-log-socket* msg)))))) -;; -;; (define (common:telemetry-log-close) -;; (when (or (member *common:telemetry-log-state* '(broken-or-no-server-preclose open)) *common:telemetry-log-socket*) -;; (handle-exceptions -;; exn -;; (begin -;; (define *common:telemetry-log-state* 'closed-fail) -;; (debug:print-info 0 *default-log-port* "common-telemetry-log closure failure") -;; ) -;; (begin -;; (define *common:telemetry-log-state* 'closed) -;; (udp-close-socket *common:telemetry-log-socket*) -;; (set! *common:telemetry-log-socket* #f))))) - Index: commonmod.scm ================================================================== --- commonmod.scm +++ commonmod.scm @@ -20,38 +20,66 @@ (declare (unit commonmod)) (declare (uses mtargs)) ;; (declare (uses stml2)) (declare (uses mtconfigf)) - +(declare (uses ulex)) +(declare (uses pkts)) (module commonmod * (import scheme chicken data-structures extras) - -(import (prefix sqlite3 sqlite3:) posix typed-records srfi-18 - srfi-1 files format srfi-13 matchable - srfi-69 ports - regex-case regex hostinfo srfi-4 - pkts (prefix dbi dbi:) - stack - md5 - message-digest - (prefix mtconfigf configf:) - ;; stml2 - ;; (prefix margs args:) - z3 (prefix base64 base64:) - (prefix mtargs args:)) + +(use (prefix sqlite3 sqlite3:) posix typed-records srfi-18 + srfi-1 files format srfi-13 matchable + srfi-69 ports + (prefix base64 base64:) + regex-case regex hostinfo srfi-4 + (prefix dbi dbi:) + stack + md5 + message-digest + z3 + directory-utils + sparse-vectors) + +(import pkts) +(import ulex) +(import (prefix mtconfigf configf:)) +(import (prefix mtargs args:)) (include "common_records.scm") (include "megatest-fossil-hash.scm") (include "megatest-version.scm") ;; no need to export this (define *verbosity-cache* (make-hash-table)) (define *verbosity* 0) + +(defstruct remote + (hh-dat (common:get-homehost)) ;; homehost record ( addr . hhflag ) + (server-url #f) ;; (if *toppath* (server:check-if-running *toppath*))) ;; (server:check-if-running *toppath*) #f)) + (last-server-check 0) ;; last time we checked to see if the server was alive + (conndat #f) + (transport *transport-type*) + (server-timeout #f) ;; (server:expiration-timeout)) + (force-server #f) + (ro-mode #f) + (ro-mode-checked #f)) ;; flag that indicates we have checked for ro-mode + +;; launching and hosts +(defstruct host + (reachable #f) + (last-update 0) + (last-used 0) + (last-cpuload 1)) + +(define (common:run-sync?) + (and (common:on-homehost?) + (args:get-arg "-server"))) + ;; GLOBALS ;; CONTEXTS @@ -78,11 +106,11 @@ ;; A hash table that can be accessed by #{scheme ...} calls in ;; config files. Allows communicating between confgs ;; (define *user-hash-data* (make-hash-table)) -(define *db-keys* #f) +;; (define *db-keys* #f) (define *pkts-info* (make-hash-table)) ;; store stuff like the last parent here (define *configinfo* #f) ;; raw results from setup, includes toppath and table from megatest.config (define *runconfigdat* #f) ;; run configs data (define *configdat* #f) ;; megatest.config data @@ -117,11 +145,11 @@ (define *db-api-call-time* (make-hash-table)) ;; hash of command => (list of times) ;; no sync db (define *no-sync-db* #f) ;; SERVER -(define *my-client-signature* #f) +;; (define *my-client-signature* #f) (define *transport-type* 'http) ;; override with [server] transport http|rpc|nmsg (define *runremote* #f) ;; if set up for server communication this will hold ;; (define *max-cache-size* 0) (define *logged-in-clients* (make-hash-table)) (define *server-id* #f) @@ -144,11 +172,11 @@ (define *rpc:listener* #f) ;; KEY info (define *target* (make-hash-table)) ;; cache the target here; target is keyval1/keyval2/.../keyvalN (define *keys* (make-hash-table)) ;; cache the keys here -(define *keyvals* (make-hash-table)) +;; (define *keyvals* (make-hash-table)) (define *toptest-paths* (make-hash-table)) ;; cache toptest path settings here (define *test-paths* (make-hash-table)) ;; cache test-id to test run paths here (define *test-ids* (make-hash-table)) ;; cache run-id, testname, and item-path => test-id (define *test-info* (make-hash-table)) ;; cache the test info records, update the state, status, run_duration etc. from testdat.db @@ -260,10 +288,24 @@ (let ((targ (common:args-get-target))) ;; (or (args:get-arg "-reqtarg")(args:get-arg "-target")(getenv "MT_TARGET")))) (if targ (or (configf:lookup config targ var) (configf:lookup config "default" var)) (configf:lookup config "default" var)))) + +;;====================================================================== +;; client stuff +;;====================================================================== + +;; client:get-signature +(define (common:client-signature) + ;; (if *my-client-signature* *my-client-signature* + ;; (let ((sig + (conc (get-host-name) " " (current-process-id))) + ;; )(set! *my-client-signature* sig) + ;; *my-client-signature*))) + +(define client:get-signature common:client-signature) (define (common:args-get-state) (or (args:get-arg "-state")(args:get-arg ":state"))) (define (common:args-get-status) @@ -603,19 +645,16 @@ (conc "/bin/bash -c \"echo " instr "\"") read-line))) (define (common:file-exists? path-string #!key (silent #f)) ;; this avoids stack dumps in the case where - ;;;; TODO: catch permission denied exceptions and emit appropriate warnings, eg: system error while trying to access file: "/nfs/pdx/disks/icf_env_disk001/bjbarcla/gwa/issues/mtdev/randy-slow/reproduce/q... (common:false-on-exception (lambda () (file-exists? path-string)) message: (if (not silent) (conc "Unable to access path: " path-string) #f) )) - - (define (common:false-on-exception thunk #!key (message #f)) (handle-exceptions exn (begin (if message @@ -638,10 +677,286 @@ #f (if (and (directory-exists? path-string) (file-write-access? path-string)) path-string #f))) +;;====================================================================== +;; T I M E A N D D A T E +;;====================================================================== + +;; Convert strings like "5s 2h 3m" => 60x60x2 + 3x60 + 5 +(define (common:hms-string->seconds tstr) + (let ((parts (string-split-fields "\\w+" tstr)) + (time-secs 0) + ;; s=seconds, m=minutes, h=hours, d=days, M=months, y=years, w=weeks + (trx (regexp "(\\d+)([smhdMyw])"))) + (for-each (lambda (part) + (let ((match (string-match trx part))) + (if match + (let ((val (string->number (cadr match))) + (unt (caddr match))) + (if val + (set! time-secs (+ time-secs (* val + (case (string->symbol unt) + ((s) 1) + ((m) 60) ;; minutes + ((h) 3600) + ((d) 86400) + ((w) 604800) + ((M) 2628000) ;; aproximately one month + ((y) 31536000) + (else #f)))))))))) + parts) + time-secs)) + +(define (seconds->hr-min-sec secs) + (let* ((hrs (quotient secs 3600)) + (min (quotient (- secs (* hrs 3600)) 60)) + (sec (- secs (* hrs 3600)(* min 60)))) + (conc (if (> hrs 0)(conc hrs "hr ") "") + (if (> min 0)(conc min "m ") "") + sec "s"))) + +(define (seconds->time-string sec) + (time->string + (seconds->local-time sec) "%H:%M:%S")) + +(define (seconds->work-week/day-time sec) + (time->string + (seconds->local-time sec) "ww%V.%u %H:%M")) + +(define (seconds->work-week/day sec) + (time->string + (seconds->local-time sec) "ww%V.%u")) + +(define (seconds->year-work-week/day sec) + (time->string + (seconds->local-time sec) "%yww%V.%w")) + +(define (seconds->year-work-week/day-time sec) + (time->string + (seconds->local-time sec) "%Yww%V.%w %H:%M")) + +(define (seconds->year-week/day-time sec) + (time->string + (seconds->local-time sec) "%Yw%V.%w %H:%M")) + +(define (seconds->quarter sec) + (case (string->number + (time->string + (seconds->local-time sec) + "%m")) + ((1 2 3) 1) + ((4 5 6) 2) + ((7 8 9) 3) + ((10 11 12) 4) + (else #f))) + +;; basic ISO8601 format (e.g. "2017-02-28 06:02:54") date time => Unix epoch +;; +(define (common:date-time->seconds datetime) + (local-time->seconds (string->time datetime "%Y-%m-%d %H:%M:%S"))) + +;; given span of seconds tstart to tend +;; find start time to mark and mark delta +;; +(define (common:find-start-mark-and-mark-delta tstart tend) + (let* ((deltat (- (max tend (+ tend 10)) tstart)) ;; can't handle runs of less than 4 seconds. Pad it to 10 seconds ... + (result #f) + (min 60) + (hr (* 60 60)) + (day (* 24 hr)) + (yr (* 365 day)) ;; year + (mo (/ yr 12)) + (wk (* day 7))) + (for-each + (lambda (max-blks) + (for-each + (lambda (span) ;; 5 2 1 + (if (not result) + (for-each + (lambda (timeunit timesym) ;; year month day hr min sec + (if (not result) + (let* ((time-blk (* span timeunit)) + (num-blks (quotient deltat time-blk))) + (if (and (> num-blks 4)(< num-blks max-blks)) + (let ((first (* (quotient tstart time-blk) time-blk))) + (set! result (list span timeunit time-blk first timesym)) + ))))) + (list yr mo wk day hr min 1) + '( y mo w d h m s)))) + (list 8 6 5 2 1))) + '(5 10 15 20 30 40 50 500)) + (if values + (apply values result) + (values 0 day 1 0 'd)))) + +;; given x y lim return the cron expansion +;; +(define (common:expand-cron-slash x y lim) + (let loop ((curr x) + (res `())) + (if (< curr lim) + (loop (+ curr y) (cons curr res)) + (reverse res)))) + +;; expand a complex cron string to a list of cron strings +;; +;; x/y => x, x+y, x+2y, x+3y while x+Ny a, b ,c +;; +;; NOTE: with flatten a lot of the crud below can be factored down. +;; +(define (common:cron-expand cron-str) + (if (list? cron-str) + (flatten + (fold (lambda (x res) + (if (list? x) + (let ((newres (map common:cron-expand x))) + (append x newres)) + (cons x res))) + '() + cron-str)) ;; (map common:cron-expand cron-str)) + (let ((cron-items (string-split cron-str)) + (slash-rx (regexp "(\\d+)/(\\d+)")) + (comma-rx (regexp ".*,.*")) + (max-vals '((min . 60) + (hour . 24) + (dayofmonth . 28) ;;; BUG!!!! This will be a bug for some combinations + (month . 12) + (dayofweek . 7)))) + (if (< (length cron-items) 5) ;; bad spec + cron-str ;; `(,cron-str) ;; just return the string, something downstream will fix it + (let loop ((hed (car cron-items)) + (tal (cdr cron-items)) + (type 'min) + (type-tal '(hour dayofmonth month dayofweek)) + (res '())) + (regex-case + hed + (slash-rx ( _ base incr ) (let* ((basen (string->number base)) + (incrn (string->number incr)) + (expanded-vals (common:expand-cron-slash basen incrn (alist-ref type max-vals))) + (new-list-crons (fold (lambda (x myres) + (cons (conc (if (null? res) + "" + (conc (string-intersperse res " ") " ")) + x " " (string-intersperse tal " ")) + myres)) + '() expanded-vals))) + ;; (print "new-list-crons: " new-list-crons) + ;; (fold (lambda (x res) + ;; (if (list? x) + ;; (let ((newres (map common:cron-expand x))) + ;; (append x newres)) + ;; (cons x res))) + ;; '() + (flatten (map common:cron-expand new-list-crons)))) + ;; (map common:cron-expand (map common:cron-expand new-list-crons)))) + (else (if (null? tal) + cron-str + (loop (car tal)(cdr tal)(car type-tal)(cdr type-tal)(append res (list hed))))))))))) + + +;; given a cron string and the last time event was processed return #t to run or #f to not run +;; +;; min hour dayofmonth month dayofweek +;; 0-59 0-23 1-31 1-12 0-6 ### NOTE: dayofweek does not include 7 +;; +;; #t => yes, run the job +;; #f => no, do not run the job +;; +(define (common:cron-event cron-str now-seconds-in last-done) ;; ref-seconds = #f is NOW. + (let* ((cron-items (map string->number (string-split cron-str))) + (now-seconds (or now-seconds-in (current-seconds))) + (now-time (seconds->local-time now-seconds)) + (last-done-time (seconds->local-time last-done)) + (all-times (make-hash-table))) + ;; (print "cron-items: " cron-items "(length cron-items): " (length cron-items)) + (if (not (eq? (length cron-items) 5)) ;; don't even try to figure out junk strings + #f + (match-let ((( cmin chour cdayofmonth cmonth cdayofweek) + cron-items) + ;; 0 1 2 3 4 5 6 + ((nsec nmin nhour ndayofmonth nmonth nyr ndayofweek n7 n8 n9) + (vector->list now-time)) + ((lsec lmin lhour ldayofmonth lmonth lyr ldayofweek l7 l8 l9) + (vector->list last-done-time))) + ;; create all possible time slots + ;; remove invalid slots due to (for example) day of week + ;; get the start and end entries for the ref-seconds (current) time + ;; if last-done > ref-seconds => this is an ERROR! + ;; does the last-done time fall in the legit region? + ;; yes => #f do not run again this command + ;; no => #t ok to run the command + (for-each ;; month + (lambda (month) + (for-each ;; dayofmonth + (lambda (dom) + (for-each + (lambda (hr) ;; hour + (for-each + (lambda (minute) ;; minute + (let ((copy-now (apply vector (vector->list now-time)))) + (vector-set! copy-now 0 0) ;; force seconds to zero + (vector-set! copy-now 1 minute) + (vector-set! copy-now 2 hr) + (vector-set! copy-now 3 dom) ;; dom is already corrected for zero referenced + (vector-set! copy-now 4 month) + (let* ((copy-now-secs (local-time->seconds copy-now)) + (new-copy (seconds->local-time copy-now-secs))) ;; remake the time vector + (if (or (not cdayofweek) + (equal? (vector-ref new-copy 6) + cdayofweek)) ;; if the day is specified and a match OR if the day is NOT specified + (if (or (not cdayofmonth) + (equal? (vector-ref new-copy 3) + (+ 1 cdayofmonth))) ;; if the month is specified and a match OR if the month is NOT specified + (hash-table-set! all-times copy-now-secs new-copy)))))) + (if cmin + `(,cmin) ;; if given cmin, have to use it + (list (- nmin 1) nmin (+ nmin 1))))) ;; minute + (if chour + `(,chour) + (list (- nhour 1) nhour (+ nhour 1))))) ;; hour + (if cdayofmonth + `(,cdayofmonth) + (list (- ndayofmonth 1) ndayofmonth (+ ndayofmonth 1))))) + (if cmonth + `(,cmonth) + (list (- nmonth 1) nmonth (+ nmonth 1)))) + (let ((before #f) + (is-in #f)) + (for-each + (lambda (moment) + (if (and before + (<= before now-seconds) + (>= moment now-seconds)) + (begin + ;; (print) + ;; (print "Before: " (time->string (seconds->local-time before))) + ;; (print "Now: " (time->string (seconds->local-time now-seconds))) + ;; (print "After: " (time->string (seconds->local-time moment))) + ;; (print "Last: " (time->string (seconds->local-time last-done))) + (if (< last-done before) + (set! is-in before)) + )) + (set! before moment)) + (sort (hash-table-keys all-times) <)) + is-in))))) + +(define (common:extended-cron cron-str now-seconds-in last-done) + (let ((expanded-cron (common:cron-expand cron-str))) + (if (string? expanded-cron) + (common:cron-event expanded-cron now-seconds-in last-done) + (let loop ((hed (car expanded-cron)) + (tal (cdr expanded-cron))) + (if (common:cron-event hed now-seconds-in last-done) + #t + (if (null? tal) + #f + (loop (car tal)(cdr tal)))))))) + ;;====================================================================== ;; M I S C L I S T S ;;====================================================================== @@ -849,11 +1164,11 @@ (status-sym->string (common:logpro-exit-code->status-sym exit-code))) (define (common:clear-caches) (set! *target* (make-hash-table)) (set! *keys* (make-hash-table)) - (set! *keyvals* (make-hash-table)) + ;; (set! *keyvals* (make-hash-table)) (set! *toptest-paths* (make-hash-table)) (set! *test-paths* (make-hash-table)) (set! *test-ids* (make-hash-table)) (set! *test-info* (make-hash-table)) (set! *run-info-cache* (make-hash-table)) @@ -1317,7 +1632,1821 @@ (define (common:get-area-path-signature) (message-digest-string (md5-primitive) *toppath*)) (define (common:get-signature str) (message-digest-string (md5-primitive) str)) + +;; NB// #f => return dbdir only +;; (was planned to be; zeroth db with name=main.db) +;; +;; If run-id is #f return to create and retrieve the path where the db will live. +;; +(define db:dbfile-path common:get-db-tmp-area) + +;; + + + + + + + + + + + + + + + + + + + + + +(use posix-extras pathname-expand files) + +;; this plugs a hole in posix-extras in recent chicken versions > 4.9) +(let-values (( (chicken-release-number chicken-major-version) + (apply values + (map string->number + (take + (string-split (chicken-version) ".") + 2))))) + (let ((resolve-pathname-broken? + (or (> chicken-release-number 4) + (and (eq? 4 chicken-release-number) (> chicken-major-version 9))))) + (if resolve-pathname-broken? + (define ##sys#expand-home-path pathname-expand)))) + +(define (realpath x) (resolve-pathname (pathname-expand (or x "/dev/null")) )) + +(define (common:get-this-exe-fullpath #!key (argv (argv))) + (let* ((this-script + (cond + ((and (> (length argv) 2) + (string-match "^(.*/csi|csi)$" (car argv)) + (string-match "^-(s|ss|sx|script)$" (cadr argv))) + (caddr argv)) + (else (car argv)))) + (fullpath (realpath this-script))) + fullpath)) +(define *common:this-exe-fullpath* (common:get-this-exe-fullpath)) +(define *common:this-exe-dir* (pathname-directory *common:this-exe-fullpath*)) +(define *common:this-exe-name* (pathname-strip-directory *common:this-exe-fullpath*)) + +(define (common:snapshot-file filepath #!key (subdir ".") ) + (if (file-exists? filepath) + (let* ((age-sec (lambda (file) + (if (file-exists? file) + (- (current-seconds) (file-modification-time file)) + 1000000000))) ;; return really old value if file doesn't exist. we want to clobber it if old or not exist. + (ok-flag #t) + (age-mins (lambda (file) (/ (age-sec file) 60))) + (age-hrs (lambda (file) (/ (age-mins file) 60))) + (age-days (lambda (file) (/ (age-hrs file) 24))) + (age-wks (lambda (file) (/ (age-days file) 7))) + (docmd (lambda (cmd) + (cond + (ok-flag + (let ((res (system cmd))) + (cond + ((eq? 0 res) + #t) + (else + (set! ok-flag #f) + (debug:print 0 *default-log-port* "ERROR: ["(common:human-time)"] Command failed with exit code " + (if (< res 0) + res + (/ res 8)) " ["cmd"]" ) + #f)))) + (else + (debug:print 0 *default-log-port* "ERROR: ["(common:human-time)"] Not runnining command due to prior error. ["cmd"]") + #f)))) + (copy (lambda (src dest) (docmd (conc "/bin/cp '"src"' '"dest"'")))) + (copy+zip (lambda (src dest) (docmd (conc "gzip -c - < '"src"' > '"dest"'")))) + (fullpath (realpath filepath)) + (basedir (pathname-directory fullpath)) + (basefile (pathname-strip-directory fullpath)) + ;;(prevfile (conc filepath ".prev.gz")) + (minsfile (conc basedir "/" subdir "/" basefile ".mins.gz")) + (hrsfile (conc basedir "/" subdir "/" basefile ".hrs.gz")) + (daysfile (conc basedir "/" subdir "/" basefile ".days.gz")) + (wksfile (conc basedir "/" subdir "/" basefile ".weeks.gz"))) + + ;; create subdir it not exists + (if (not (directory-exists? (conc basedir "/" subdir))) + (docmd (conc "/bin/mkdir -p '"(conc basedir "/" subdir)"'"))) + + ;; copy&zip to .mins if not exists + (if (not (file-exists? minsfile)) + (copy+zip filepath minsfile)) + ;; copy .mins to .hrs if not exists + (if (not (file-exists? hrsfile)) + (copy minsfile hrsfile)) + ;; copy .hrs to .days if not exists + (if (not (file-exists? daysfile)) + (copy hrsfile daysfile)) + ;; copy .days to .weeks if not exists + (if (not (file-exists? wksfile)) + (copy daysfile wksfile)) + + + ;; if age(.mins.gz) >= 1h: + ;; copy .mins.gz .hrs.gz + ;; copy .mins.gz + (when (>= (age-mins minsfile) 1) + (copy minsfile hrsfile) + (copy+zip filepath minsfile)) + + ;; if age(.hrs.gz) >= 1d: + ;; copy .hrs.gz .days.gz + ;; copy .mins.gz .hrs.gz + (when (>= (age-days hrsfile) 1) + (copy hrsfile daysfile) + (copy minsfile hrsfile)) + + ;; if age(.days.gz) >= 1w: + ;; copy .days.gz .weeks.gz + ;; copy .hrs.gz .days.gz + (when (>= (age-wks daysfile) 1) + (copy daysfile wksfile) + (copy hrsfile daysfile)) + #t) + #f)) + + + +;; Rotate logs, logic: +;; if > 500k and older than 1 week: +;; remove previous compressed log and compress this log +;; WARNING: This proc operates assuming that it is in the directory above the +;; logs directory you wish to log-rotate. +;; +(define (common:rotate-logs) + (let* ((all-files (make-hash-table)) + (stats (make-hash-table)) + (inc-stat (lambda (key) + (hash-table-set! stats key (+ (hash-table-ref/default stats key 0) 1)))) + (max-allowed (string->number (or (configf:lookup *configdat* "setup" "max-logfiles") "300")))) ;; name -> age + (if (not (directory-exists? "logs"))(create-directory "logs")) + (directory-fold + (lambda (file rem) + (handle-exceptions + exn + (begin + (debug:print-info 0 *default-log-port* "unable to rotate log " file ", probably handled by another process.") + (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) + (print-call-chain (current-error-port))) + (let* ((fullname (conc "logs/" file)) + (mod-time (file-modification-time fullname)) + (file-age (- (current-seconds) mod-time))) + (hash-table-set! all-files file mod-time) + (if (or (and (string-match "^.*.log" file) + (> (file-size fullname) 200000)) + (and (string-match "^server-.*.log" file) + (> (- (current-seconds) (file-modification-time fullname)) + (* 8 60 60)))) + (let ((gzfile (conc fullname ".gz"))) + (if (common:file-exists? gzfile) + (begin + (debug:print-info 0 *default-log-port* "removing " gzfile) + (delete-file* gzfile) + (hash-table-delete! all-files gzfile) ;; needed? + )) + (debug:print-info 0 *default-log-port* "compressing " file) + (system (conc "gzip " fullname)) + (inc-stat "gzipped") + (hash-table-set! all-files (conc file ".gz") file-age) ;; add the .gz file and remove the base file + (hash-table-delete! all-files file) + ) + (if (> file-age (* (string->number (or (configf:lookup *configdat* "setup" "log-expire-days") "30")) 24 3600)) + (handle-exceptions + exn + #f + (if (directory? fullname) + (begin + (debug:print-error 0 *default-log-port* fullname " in logs directory is a directory! Cannot rotate it, it is best to not put subdirectories in the logs dir.") + (inc-stat "directories")) + (begin + (delete-file* fullname) + (inc-stat "deleted"))) + (hash-table-delete! all-files file))))))) + '() + "logs") + (for-each + (lambda (category) + (let ((quant (hash-table-ref/default stats category 0))) + (if (> quant 0) + (debug:print-info 0 *default-log-port* category " log files: " quant)))) + `("deleted" "gzipped" "directories")) + (let ((num-logs (hash-table-size all-files))) + (if (> num-logs max-allowed) ;; because NFS => don't let number of logs exceed 300 + (let ((files (take (sort (hash-table-keys all-files) + (lambda (a b) + (< (hash-table-ref all-files a)(hash-table-ref all-files b)))) + (- num-logs max-allowed)))) + (for-each + (lambda (file) + (let* ((fullname (conc "logs/" file))) + (if (directory? fullname) + (debug:print-error 0 *default-log-port* fullname " in logs directory is a directory! Cannot rotate it, it is best to not put subdirectories in the logs dir.") + (handle-exceptions + exn + (debug:print-error 0 *default-log-port* "failed to remove " fullname) + (delete-file* fullname))))) + files) + (debug:print-info 0 *default-log-port* "Deleted " (length files) " files from logs, keeping " max-allowed " files.")))))) + +;;====================================================================== +;; S P A R S E A R R A Y S +;;====================================================================== + +(define (make-sparse-array) + (let ((a (make-sparse-vector))) + (sparse-vector-set! a 0 (make-sparse-vector)) + a)) + +(define (sparse-array? a) + (and (sparse-vector? a) + (sparse-vector? (sparse-vector-ref a 0)))) + +(define (sparse-array-ref a x y) + (let ((row (sparse-vector-ref a x))) + (if row + (sparse-vector-ref row y) + #f))) + +(define (sparse-array-set! a x y val) + (let ((row (sparse-vector-ref a x))) + (if row + (sparse-vector-set! row y val) + (let ((new-row (make-sparse-vector))) + (sparse-vector-set! a x new-row) + (sparse-vector-set! new-row y val))))) + +;;====================================================================== +;; L O C K E R S A N D B L O C K E R S +;;====================================================================== + +;; block further accesses to databases. Call this before shutting db down +(define (common:db-block-further-queries) + (mutex-lock! *db-access-mutex*) + (set! *db-access-allowed* #f) + (mutex-unlock! *db-access-mutex*)) + +(define (common:db-access-allowed?) + (let ((val (begin + (mutex-lock! *db-access-mutex*) + *db-access-allowed* + (mutex-unlock! *db-access-mutex*)))) + val)) + +;;====================================================================== +;; U S E F U L S T U F F +;;====================================================================== + +;; dot-locking egg seems not to work, using this for now +;; if lock is older than expire-time then remove it and try again +;; to get the lock +;; +(define (common:simple-file-lock fname #!key (expire-time 300)) + (if (common:file-exists? fname) + (if (> (- (current-seconds)(file-modification-time fname)) expire-time) + (begin + (handle-exceptions exn #f (delete-file* fname)) + (common:simple-file-lock fname expire-time: expire-time)) + #f) + (let ((key-string (conc (get-host-name) "-" (current-process-id)))) + (with-output-to-file fname + (lambda () + (print key-string))) + (thread-sleep! 0.25) + (if (common:file-exists? fname) + (handle-exceptions exn + #f + (with-input-from-file fname + (lambda () + (equal? key-string (read-line))))) + #f)))) + +(define (common:simple-file-lock-and-wait fname #!key (expire-time 300)) + (let ((end-time (+ expire-time (current-seconds)))) + (let loop ((got-lock (common:simple-file-lock fname expire-time: expire-time))) + (if got-lock + #t + (if (> end-time (current-seconds)) + (begin + (thread-sleep! 3) + (loop (common:simple-file-lock fname expire-time: expire-time))) + #f))))) + +(define (common:simple-file-release-lock fname) + (handle-exceptions + exn + #f ;; I don't really care why this failed (at least for now) + (delete-file* fname))) + +;;====================================================================== +;; T A R G E T S , S T A T E , S T A T U S , +;; R U N N A M E A N D T E S T P A T T +;;====================================================================== + +;; (map print (map car (hash-table->alist (read-config "runconfigs.config" #f #t)))) +;; +(define (common:get-runconfig-targets #!key (configf #f)) + (let ((targs (sort (map car (hash-table->alist + (or configf ;; NOTE: There is no value in using runconfig:read here. + (configf:read-config (conc *toppath* "/runconfigs.config") + #f #t) + (make-hash-table)))) + string trynum 0) +;; (begin +;; (thread-sleep! 2) +;; (common:get-homehost trynum: (- trynum 1))) +;; #f)) + (else + (let* ((currhost (get-host-name)) + (bestadrs (server:get-best-guess-address currhost)) + ;; first look in config, then look in file .homehost, create it if not found + (homehost (or (configf:lookup *configdat* "server" "homehost" ) + (handle-exceptions + exn + (if (> trynum 0) + (let ((delay-time (* (- 5 trynum) 5))) + (mutex-unlock! *homehost-mutex*) + (debug:print 0 *default-log-port* "ERROR: ["(common:human-time)"] Failed to read .homehost file, delaying " delay-time " seconds and trying again, message: " ((condition-property-accessor 'exn 'message) exn)) + (thread-sleep! delay-time) + (common:get-homehost trynum: (- trynum 1))) + (begin + (mutex-unlock! *homehost-mutex*) + (debug:print 0 *default-log-port* "ERROR: ["(common:human-time)"] Failed to read .homehost file after trying five times. Giving up and exiting, message: " ((condition-property-accessor 'exn 'message) exn)) + (exit 1))) + (let ((hhf (conc *toppath* "/.homehost"))) + (if (common:file-exists? hhf) + (with-input-from-file hhf read-line) + (if (file-write-access? *toppath*) + (begin + (with-output-to-file hhf + (lambda () + (print bestadrs))) + (begin + (mutex-unlock! *homehost-mutex*) + (car (common:get-homehost)))) + #f)))))) + (at-home (or (equal? homehost currhost) + (equal? homehost bestadrs)))) + (set! *home-host* (cons homehost at-home)) + (mutex-unlock! *homehost-mutex*) + *home-host*)))) + +;; am I on the homehost? +;; +(define (common:on-homehost?) + (let ((hh (common:get-homehost))) + (if hh + (cdr hh) + #f))) + +;; do we honor the caches of the config files? +;; +(define (common:use-cache?) + (let ((res #t)) ;; priority by order of evaluation + (if *configdat* ;; sillyness here. can't use setup/use-cache to know if we can use the cached files! + (if (equal? (configf:lookup *configdat* "setup" "use-cache") "no") + (set! res #f) + (if (equal? (configf:lookup *configdat* "setup" "use-cache") "yes") + (set! res #t)))) + (if (args:get-arg "-no-cache")(set! res #f)) ;; overrides setting in "setup" + (if (getenv "MT_USE_CACHE") + (if (equal? (getenv "MT_USE_CACHE") "yes") + (set! res #t) + (if (equal? (getenv "MT_USE_CACHE") "no") + (set! res #f)))) ;; overrides -no-cache switch + res)) + +;; force use of server? +;; +(define (common:force-server?) + (let* ((force-setting (configf:lookup *configdat* "server" "force")) + (force-type (if force-setting (string->symbol force-setting) #f)) + (force-result (case force-type + ((#f) #f) + ((always) #t) + ((test) (if (args:get-arg "-execute") ;; we are in a test + #t + #f)) + (else + (debug:print 0 *default-log-port* "ERROR: Bad server force setting " force-setting ", forcing server.") + #t)))) ;; default to requiring server + (if force-result + (begin + (debug:print-info 0 *default-log-port* "forcing use of server, force setting is \"" force-setting "\".") + #t) + #f))) + +;; moving common:htree->html to testsmod.scm to minimize deps on stml2 + +;; hash-table tree to alist tree +;; +(define (common:htree->atree ht) + (map (lambda (x) + (cons (car x) + (let ((y (cdr x))) + (if (hash-table? y) + (common:htree->atree y) + y)))) + (hash-table->alist ht))) + +;;====================================================================== +;; M U N G E D A T A I N T O N I C E F O R M S +;;====================================================================== + +;; Generate an index for a sparse list of key values +;; ( (rowname1 colname1 val1)(rowname2 colname2 val2) ) +;; +;; => +;; +;; ( (rowname1 0)(rowname2 1)) ;; rownames -> num +;; (colname1 0)(colname2 1)) ) ;; colnames -> num +;; +;; optional apply proc to rownum colnum value +(define (common:sparse-list-generate-index data #!key (proc #f)) + (if (null? data) + (list '() '()) + (let loop ((hed (car data)) + (tal (cdr data)) + (rownames '()) + (colnames '()) + (rownum 0) + (colnum 0)) + (let* ((rowkey (car hed)) + (colkey (cadr hed)) + (value (caddr hed)) + (existing-rowdat (assoc rowkey rownames)) + (existing-coldat (assoc colkey colnames)) + (curr-rownum (if existing-rowdat rownum (+ rownum 1))) + (curr-colnum (if existing-coldat colnum (+ colnum 1))) + (new-rownames (if existing-rowdat rownames (cons (list rowkey curr-rownum) rownames))) + (new-colnames (if existing-coldat colnames (cons (list colkey curr-colnum) colnames)))) + ;; (debug:print-info 0 *default-log-port* "Processing record: " hed ) + (if proc (proc curr-rownum curr-colnum rowkey colkey value)) + (if (null? tal) + (list new-rownames new-colnames) + (loop (car tal) + (cdr tal) + new-rownames + new-colnames + (if (> curr-rownum rownum) curr-rownum rownum) + (if (> curr-colnum colnum) curr-colnum colnum) + )))))) + +;; if it looks like a number -> convert it to a number, else return it +;; +(define (common:lazy-convert inval) + (let* ((as-num (if (string? inval)(string->number inval) #f))) + (or as-num inval))) + +;; convert string a=1; b=2; c=a silly thing; d= +;; to '((a . 1)(b . 2)(c . "a silly thing")(d . "")) +;; +(define (common:val->alist val #!key (convert #f)) + (let ((val-list (string-split-fields ";\\s*" val #:infix))) + (if val-list + (map (lambda (x) + (let ((f (string-split-fields "\\s*=\\s*" x #:infix))) + (case (length f) + ((0) `(,#f)) ;; null string case + ((1) `(,(string->symbol (car f)))) + ((2) `(,(string->symbol (car f)) . ,(let ((inval (cadr f))) + (if convert (common:lazy-convert inval) inval)))) + (else f)))) + val-list) + '()))) + +;;====================================================================== +;; S Y S T E M S T U F F +;;====================================================================== + +;; lazy-safe get file mod time. on any error (file not existing etc.) return 0 +;; +(define (common:lazy-modification-time fpath) + (handle-exceptions + exn + 0 + (file-modification-time fpath))) + +;; find timestamp of newest file associated with a sqlite db file +(define (common:lazy-sqlite-db-modification-time fpath) + (let* ((glob-list (handle-exceptions + exn + `(,(conc "/no/such/file, message: " ((condition-property-accessor 'exn 'message) exn))) + (glob (conc fpath "*")))) + (file-list (if (eq? 0 (length glob-list)) + '("/no/such/file") + glob-list))) + (apply max + (map + common:lazy-modification-time + file-list)))) + +;; return a nice clean pathname made absolute +(define (common:nice-path dir) + (let ((match (string-match "^(~[^\\/]*)(\\/.*|)$" dir))) + (if match ;; using ~ for home? + (common:nice-path (conc (common:read-link-f (cadr match)) "/" (caddr match))) + (normalize-pathname (if (absolute-pathname? dir) + dir + (conc (current-directory) "/" dir)))))) + +;; make "nice-path" available in config files and the repl +(define nice-path common:nice-path) + +(define (common:read-link-f path) + (handle-exceptions + exn + (begin + (debug:print-error 0 *default-log-port* "command \"/bin/readlink -f " path "\" failed.") + path) ;; just give up + (with-input-from-pipe + (conc "/bin/readlink -f " path) + (lambda () + (read-line))))) + +(define (get-cpu-load #!key (remote-host #f)) + (car (common:get-cpu-load remote-host))) +;; (let* ((load-res (process:cmd-run->list "uptime")) +;; (load-rx (regexp "load average:\\s+(\\d+)")) +;; (cpu-load #f)) +;; (for-each (lambda (l) +;; (let ((match (string-search load-rx l))) +;; (if match +;; (let ((newval (string->number (cadr match)))) +;; (if (number? newval) +;; (set! cpu-load newval)))))) +;; (car load-res)) +;; cpu-load)) + +;; get values from cached info from dropping file in logs dir +;; e.g. key is host and dtype is normalized-load +;; +(define (common:get-cached-info key dtype #!key (age 5)) + (let* ((fullpath (conc *toppath* "/logs/" key "-" dtype ".log"))) + (if (and (file-exists? fullpath) + (file-read-access? fullpath)) + (handle-exceptions + exn + #f + (debug:print 2 *default-log-port* "reading file " fullpath) + (let ((real-age (- (current-seconds)(file-change-time fullpath)))) + (if (< real-age age) + (with-input-from-file fullpath read) + (begin + (debug:print 2 *default-log-port* "file " fullpath " is too old (" real-age" seconds)to trust, skipping reading it") + #f)))) + (begin + (debug:print 2 *default-log-port* "not reading file " fullpath) + #f)))) + +(define (common:write-cached-info key dtype dat) + (let* ((fullpath (conc *toppath* "/logs/" key "-" dtype ".log"))) + (handle-exceptions + exn + #f + (with-output-to-file fullpath (lambda ()(pp dat)))))) + +;; get cpu load by reading from /proc/loadavg, return all three values +;; +(define (common:get-cpu-load remote-host) + (handle-exceptions + exn + '(99 99 99) + (let* ((actual-hostname (or remote-host (get-host-name) "localhost"))) + (or (common:get-cached-info actual-hostname "cpu-load") + (let ((result (if remote-host + (map (lambda (res) + (if (eof-object? res) 9e99 res)) + (with-input-from-pipe + (conc "ssh " remote-host " cat /proc/loadavg") + (lambda ()(list (read)(read)(read))))) + (with-input-from-file "/proc/loadavg" + (lambda ()(list (read)(read)(read))))))) + (common:write-cached-info actual-hostname "cpu-load" result) + result))))) + +;; get normalized cpu load by reading from /proc/loadavg and /proc/cpuinfo return all three values and the number of real cpus and the number of threads +;; returns alist '((adj-cpu-load . normalized-proc-load) ... etc. +;; keys: adj-proc-load, adj-core-load, 1m-load, 5m-load, 15m-load +;; +(define (common:get-normalized-cpu-load remote-host) + (let ((res (common:get-normalized-cpu-load-raw remote-host)) + (default `((adj-proc-load . 2) ;; there is no right answer + (adj-core-load . 2) + (1m-load . 2) + (5m-load . 0) ;; causes a large delta - thus causing default of throttling if stuff goes wrong + (15m-load . 0) + (proc . 1) + (core . 1) + (phys . 1) + (error . #t)))) + (cond + ((and (list? res) + (> (length res) 2)) + res) + ((eq? res #f) default) ;; add messages? + ((eq? res #f) default) ;; this would be the #eof + (else default)))) + +(define (common:get-normalized-cpu-load-raw remote-host) + (let* ((actual-host (or remote-host (get-host-name)))) ;; #f is localhost + (or (common:get-cached-info actual-host "normalized-load") + (let ((data (if remote-host + (with-input-from-pipe + (conc "ssh " remote-host " cat /proc/loadavg;cat /proc/cpuinfo;echo end") + read-lines) + (append + (with-input-from-file "/proc/loadavg" + read-lines) + (with-input-from-file "/proc/cpuinfo" + read-lines) + (list "end")))) + (load-rx (regexp "^([\\d\\.]+)\\s+([\\d\\.]+)\\s+([\\d\\.]+)\\s+.*$")) + (proc-rx (regexp "^processor\\s+:\\s+(\\d+)\\s*$")) + (core-rx (regexp "^core id\\s+:\\s+(\\d+)\\s*$")) + (phys-rx (regexp "^physical id\\s+:\\s+(\\d+)\\s*$")) + (max-num (lambda (p n)(max (string->number p) n)))) + ;; (print "data=" data) + (if (null? data) ;; something went wrong + #f + (let loop ((hed (car data)) + (tal (cdr data)) + (loads #f) + (proc-num 0) ;; processor includes threads + (phys-num 0) ;; physical chip on motherboard + (core-num 0)) ;; core + ;; (print hed ", " loads ", " proc-num ", " phys-num ", " core-num) + (if (null? tal) ;; have all our data, calculate normalized load and return result + (let* ((act-proc (+ proc-num 1)) + (act-phys (+ phys-num 1)) + (act-core (+ core-num 1)) + (adj-proc-load (/ (car loads) act-proc)) + (adj-core-load (/ (car loads) act-core)) + (result + (append (list (cons 'adj-proc-load adj-proc-load) + (cons 'adj-core-load adj-core-load)) + (list (cons '1m-load (car loads)) + (cons '5m-load (cadr loads)) + (cons '15m-load (caddr loads))) + (list (cons 'proc act-proc) + (cons 'core act-core) + (cons 'phys act-phys))))) + (common:write-cached-info actual-host "normalized-load" result) + result) + (regex-case + hed + (load-rx ( x l1 l5 l15 ) (loop (car tal)(cdr tal)(map string->number (list l1 l5 l15)) proc-num phys-num core-num)) + (proc-rx ( x p ) (loop (car tal)(cdr tal) loads (max-num p proc-num) phys-num core-num)) + (phys-rx ( x p ) (loop (car tal)(cdr tal) loads proc-num (max-num p phys-num) core-num)) + (core-rx ( x c ) (loop (car tal)(cdr tal) loads proc-num phys-num (max-num c core-num))) + (else + (begin + ;; (print "NO MATCH: " hed) + (loop (car tal)(cdr tal) loads proc-num phys-num core-num))))))))))) + +(define (common:unix-ping hostname) + (let ((res (system (conc "ping -c 1 " hostname " > /dev/null")))) + (eq? res 0))) + +(define (get-uname . params) + (let* ((uname-res (process:cmd-run->list (conc "uname " (if (null? params) "-a" (car params))))) + (uname #f)) + (if (null? (car uname-res)) + "unknown" + (caar uname-res)))) + +;; for reasons I don't understand multiple calls to real-path in parallel threads +;; must be protected by mutexes +;; +(define (common:real-path inpath) + ;; (process:cmd-run-with-stderr->list "readlink" "-f" inpath)) ;; cmd . params) + ;; (let-values + ;; (((inp oup pid) (process "readlink" (list "-f" inpath)))) + ;; (with-input-from-port inp + ;; (let loop ((inl (read-line)) + ;; (res #f)) + ;; (print "inl=" inl) + ;; (if (eof-object? inl) + ;; (begin + ;; (close-input-port inp) + ;; (close-output-port oup) + ;; ;; (process-wait pid) + ;; res) + ;; (loop (read-line) inl)))))) + (with-input-from-pipe (conc "readlink -f " inpath) read-line)) + +(define (server:get-best-guess-address hostname) + (let ((res #f)) + (for-each + (lambda (adr) + (if (not (eq? (u8vector-ref adr 0) 127)) + (set! res adr))) + ;; NOTE: This can fail when there is no mention of the host in /etc/hosts. FIXME + (vector->list (hostinfo-addresses (hostname->hostinfo hostname)))) + (string-intersperse + (map number->string + (u8vector->list + (if res res (hostname->ip hostname)))) "."))) + +;;====================================================================== +;; D I S K S P A C E +;;====================================================================== + +(define (common:get-disk-space-used fpath) + (with-input-from-pipe (conc "/usr/bin/du -s " fpath) read)) + +;; given path get free space, allows override in [setup] +;; with free-space-script /path/to/some/script.sh +;; +(define (get-df path) + (if (configf:lookup *configdat* "setup" "free-space-script") + (with-input-from-pipe + (conc (configf:lookup *configdat* "setup" "free-space-script") " " path) + (lambda () + (let ((res (read-line))) + (if (string? res) + (string->number res))))) + (get-unix-df path))) + +(define (get-free-inodes path) + (if (configf:lookup *configdat* "setup" "free-inodes-script") + (with-input-from-pipe + (conc (configf:lookup *configdat* "setup" "free-inodes-script") " " path) + (lambda () + (let ((res (read-line))) + (if (string? res) + (string->number res))))) + (get-unix-inodes path))) + +(define (get-unix-df path) + (let* ((df-results (process:cmd-run->list (conc "df " path))) + (space-rx (regexp "([0-9]+)\\s+([0-9]+)%")) + (freespc #f)) + ;; (write df-results) + (for-each (lambda (l) + (let ((match (string-search space-rx l))) + (if match + (let ((newval (string->number (cadr match)))) + (if (number? newval) + (set! freespc newval)))))) + (car df-results)) + freespc)) + +(define (get-unix-inodes path) + (let* ((df-results (process:cmd-run->list (conc "df -i " path))) + (space-rx (regexp "([0-9]+)\\s+([0-9]+)%")) + (freenodes 0)) ;; 0 is a better failsafe than #f here. + ;; (write df-results) + (for-each (lambda (l) + (let ((match (string-search space-rx l))) + (if match + (let ((newval (string->number (cadr match)))) + (if (number? newval) + (set! freenodes newval)))))) + (car df-results)) + freenodes)) + +(define (common:check-space-in-dir dirpath required) + (let* ((dbspace (if (directory? dirpath) + (get-df dirpath) + 0))) + (list (> dbspace required) + dbspace + required + dirpath))) + +;; check space in dbdir and in megatest dir +;; returns: ok/not dbspace required-space +;; +(define (common:check-db-dir-space) + (let* ((required (string->number + (or (configf:lookup *configdat* "setup" "dbdir-space-required") + "100000"))) + (dbdir (common:get-db-tmp-area)) ;; (db:get-dbdir)) + (tdbspace (common:check-space-in-dir dbdir required)) + (mdbspace (common:check-space-in-dir *toppath* required))) + (sort (list tdbspace mdbspace) (lambda (a b) + (< (cadr a)(cadr b)))))) + +;; check available space in dbdir, exit if insufficient +;; +(define (common:check-db-dir-and-exit-if-insufficient) + (let* ((spacedat (car (common:check-db-dir-space))) ;; look only at worst for now + (is-ok (car spacedat)) + (dbspace (cadr spacedat)) + (required (caddr spacedat)) + (dbdir (cadddr spacedat))) + (if (not is-ok) + (begin + (debug:print-error 0 *default-log-port* "Insufficient space in " dbdir ", require " required ", have " dbspace ", exiting now.") + (exit 1))))) + +;; paths is list of lists ((name path) ... ) +;; +(define (common:get-disk-with-most-free-space disks minsize) + (let ((best #f) + (bestsize 0) + (min-inodes (or (string->number (if (configf:lookup *configdat* "setup" "min_inodes") (configf:lookup *configdat* "setup" "min_inodes") "0")) 0))) + (for-each + (lambda (disk-num) + (let* ((dirpath (cadr (assoc disk-num disks))) + (freespc (cond + ((not (directory? dirpath)) + (if (common:low-noise-print 300 "disks not a dir " disk-num) + (debug:print 0 *default-log-port* "WARNING: disk " disk-num " at path \"" dirpath "\" is not a directory - ignoring it.")) + -1) + ((not (file-write-access? dirpath)) + (if (common:low-noise-print 300 "disks not writeable " disk-num) + (debug:print 0 *default-log-port* "WARNING: disk " disk-num " at path \"" dirpath "\" is not writeable - ignoring it.")) + -1) + ((not (eq? (string-ref dirpath 0) #\/)) + (if (common:low-noise-print 300 "disks not a proper path " disk-num) + (debug:print 0 *default-log-port* "WARNING: disk " disk-num " at path \"" dirpath "\" is not a fully qualified path - ignoring it.")) + -1) + (else + (get-df dirpath)))) + (free-inodes (cond + ((not (directory? dirpath)) + (if (common:low-noise-print 300 "disks not a dir " disk-num) + (debug:print 0 *default-log-port* "WARNING: disk " disk-num " at path \"" dirpath "\" is not a directory - ignoring it.")) + -1) + ((not (file-write-access? dirpath)) + (if (common:low-noise-print 300 "disks not writeable " disk-num) + (debug:print 0 *default-log-port* "WARNING: disk " disk-num " at path \"" dirpath "\" is not writeable - ignoring it.")) + -1) + ((not (eq? (string-ref dirpath 0) #\/)) + (if (common:low-noise-print 300 "disks not a proper path " disk-num) + (debug:print 0 *default-log-port* "WARNING: disk " disk-num " at path \"" dirpath "\" is not a fully qualified path - ignoring it.")) + -1) + (else + (get-free-inodes dirpath)))) + ;;(free-inodes (get-free-inodes dirpath)) + ) + (if (and (> freespc bestsize)(> free-inodes min-inodes )) + (begin + (set! best (cons disk-num dirpath)) + (set! bestsize freespc))) + ;;(print "Processing: " disk-num " bestsize: " bestsize " best: " best " freespc: " freespc " min-inodes: " min-inodes " free-inodes: " free-inodes) + )) + (map car disks)) + (if (and best (> bestsize minsize)) + best + #f))) ;; #f means no disk candidate found + +;; convert a spec string to a list of vectors #( rx action rx-string ) +(define (common:spec-string->list-of-specs spec-string actions) + (let ((spec-strings (string-split-fields "\\s*;\\s*" spec-string #:infix)) + (actions-regex (regexp (conc "^(.*)\\s+(" (string-intersperse (map conc actions) "|") ")")))) + (filter + (lambda (x) x) + (map (lambda (s) + (let ((m (string-match actions-regex s))) + (if m + (vector (regexp (cadr m))(string->symbol (caddr m))(cadr m)) + (begin + (debug:print 0 *default-log-port* "WARNING: Unrecognised rule \"" s "\" in clean-up specification.") + #f)))) + spec-strings)))) + +;; given a list of specs rx . rule and a file return the first matching rule +;; +(define (common:file-find-rule fname rules) ;; rule is vector #( rx action rx-string) + (let loop ((rule (car rules)) + (tail (cdr rules))) + (let ((rx (vector-ref rule 0)) + (rn (vector-ref rule 1))) ;; rule name + (if (string-match rx fname) + rule ;; return the whole rule so regex can be printed etc. + (if (null? tail) + #f + (loop (car tail)(cdr tail))))))) + +;; given a spec apply some rules to a directory +;; +;; WARNING: This function will REMOVE files - be sure your spec and path is correct! +;; +;; spec format: +;; file-regex1 action; file-regex2 action; ... +;; e.g. +;; .*\.log$ keep; .* remove +;; --> keep all .log files, remove everything else +;; limitations: +;; cannot have a rule with ; as part of the spec +;; not very flexible, would be nice to return binned file names? +;; supported rules: +;; keep - keep this file +;; remove - remove this file +;; compress - compress this file +;; +(define (common:dir-clean-up path spec-string #!key (compress "gzip")(actions '(keep remove compress))(remove-empty #f)) + (let* ((specs (common:spec-string->list-of-specs spec-string actions)) + (keepers (make-hash-table)) + (directories (make-hash-table))) + (find-files + path + action: (lambda (p res) + (let ((rule (common:file-find-rule p specs))) + (cond + ((directory? p)(hash-table-set! directories p #t)) + (else + (case (vector-ref rule 1) + ((keep)(hash-table-set! keepers p rule)) + ((remove) + (print "Removing file " p) + (delete-file p)) + ((compress) + (print "Compressing file " p) + (system (conc compress " " p))) + (else + (print "No match for file " p)))))))) + (if remove-empty + (for-each + (lambda (d) + (if (null? (glob (conc d "/.*")(conc d "/*"))) + (begin + (print "Removing empty directory " d) + (delete-directory d)))) + (sort (hash-table-keys directories) (lambda (a b)(> (string-length a)(string-length b)))))) + )) + +;;====================================================================== +;; E N V I R O N M E N T V A R S +;;====================================================================== +(define (bb-check-path #!key (msg "check-path: ")) + (let ((path (or (get-environment-variable "PATH") "none"))) + (debug:print-info 0 *default-log-port* (conc msg" : $PATH="path)) + (if (string-match "^.*/isoenv-core/.*" path) + (debug:print-error 0 *default-log-port* (conc msg" : !!ISOENV PRESENT!!")) ;; remove for prod + (debug:print-info 1 *default-log-port* (conc msg" : **no isoenv present**"))))) + + +(define (save-environment-as-files fname #!key (ignorevars (list "USER" "HOME" "DISPLAY" "LS_COLORS" "XKEYSYMDB" "EDITOR" "MAKEFLAGS" "MAKEF" "MAKEOVERRIDES"))) + ;;(bb-check-path msg: "save-environment-as-files entry") + (let ((envvars (get-environment-variables)) + (whitesp (regexp "[^a-zA-Z0-9_\\-:,\\.\\/%$]")) + (mungeval (lambda (val) + (cond + ((eq? val #t) "") ;; convert #t to empty string + ((eq? val #f) #f) ;; convert #f to itself (still thinking about this one + (else val))))) + (with-output-to-file (conc fname ".csh") + (lambda () + (for-each (lambda (keyval) + (let* ((key (car keyval)) + (val (cdr keyval)) + (delim (if (string-search whitesp val) + "\"" + ""))) + (print (if (or (member key ignorevars) + (string-search whitesp key)) + "# setenv " + "setenv ") + key " " delim (mungeval val) delim))) + envvars))) + (with-output-to-file (conc fname ".sh") + (lambda () + (for-each (lambda (keyval) + (let* ((key (car keyval)) + (val (cdr keyval)) + (delim (if (string-search whitesp val) + "\"" + ""))) + (print (if (or (member key ignorevars) + (string-search whitesp key) + (string-search ":" key)) ;; internal only values to be skipped. + "# export " + "export ") + key "=" delim (mungeval val) delim))) + envvars))))) + + +(define (common:get-param-mapping #!key (flavor #f)) + "returns alist mapping string keys in testconfig/subrun to megatest command line switches; if flavor is switch-symbol, maps tcmt symbolic switches to megatest switches" + (let ((default '(("tag-expr" . "-tagexpr") + ("mode-patt" . "-modepatt") + ("run-name" . "-runname") + ("contour" . "-contour") + ("target" . "-target") + ("test-patt" . "-testpatt") + ("msg" . "-m") + ("log" . "-log") + ("start-dir" . "-start-dir") + ("new" . "-set-state-status")))) + (if (eq? flavor 'switch-symbol) + (map (lambda (x) + (cons (string->symbol (conc "-" (car x))) (cdr x))) + default) + default))) + +;; set some env vars from an alist, return an alist with original values +;; (("VAR" "value") ...) +;; a value of #f means "unset this var" +;; +(define (alist->env-vars lst) + (if (list? lst) + (let ((res '())) + (for-each (lambda (p) + (let* ((var (car p)) + (val (cadr p)) + (prv (get-environment-variable var))) + (set! res (cons (list var prv) res)) + (if val + (safe-setenv var (->string val)) + (unsetenv var)))) + lst) + res) + '())) + + +;; clear vars matching pattern, run proc, set vars back +;; if proc is a string run that string as a command with +;; system. +;; +(define *common:orig-env* + (let ((envvars (get-environment-variables))) + (if (get-environment-variable "MT_ORIG_ENV") + (with-input-from-string + (z3:decode-buffer (base64:base64-decode (get-environment-variable "MT_ORIG_ENV"))) + read) + (filter-map (lambda (x) + (if (string-match "^MT_.*" (car x)) + #f + x)) + envvars)))) + +(define (common:with-orig-env proc) + (let ((current-env (get-environment-variables))) + (for-each (lambda (x) (unsetenv (car x))) current-env) + (for-each (lambda (x) (setenv (car x) (cdr x))) *common:orig-env*) + (let ((rv (cond + ((string? proc)(system proc)) + (proc (proc))))) + (for-each (lambda (x) (unsetenv (car x))) *common:orig-env*) + (for-each (lambda (x) (setenv (car x) (cdr x))) current-env) + rv))) + +(define (common:without-vars proc . var-patts) + (let ((vars (make-hash-table))) + (for-each + (lambda (vardat) ;; each env var + (for-each + (lambda (var-patt) + (if (string-match var-patt (car vardat)) + (let ((var (car vardat)) + (val (cdr vardat))) + (hash-table-set! vars var val) + (unsetenv var)))) + var-patts)) + (get-environment-variables)) + (cond + ((string? proc)(system proc)) + (proc (proc))) + (hash-table-for-each + vars + (lambda (var val) + (setenv var val))) + vars)) + + +;;====================================================================== +;; C O L O R S +;;====================================================================== + +(define (common:name->iup-color name) + (case (string->symbol (string-downcase name)) + ((red) "223 33 49") + ((grey) "192 192 192") + ((orange) "255 172 13") + ((purple) "This is unfinished ..."))) + +;; (define (common:get-color-for-state-status state status) +;; (case (string->symbol state) +;; ((COMPLETED) +;; (case (string->symbol status) +;; ((PASS) "70 249 73") +;; ((WARN WAIVED) "255 172 13") +;; ((SKIP) "230 230 0") +;; (else "223 33 49"))) +;; ((LAUNCHED) "101 123 142") +;; ((CHECK) "255 100 50") +;; ((REMOTEHOSTSTART) "50 130 195") +;; ((RUNNING) "9 131 232") +;; ((KILLREQ) "39 82 206") +;; ((KILLED) "234 101 17") +;; ((NOT_STARTED) "240 240 240") +;; (else "192 192 192"))) + +(define (common:iup-color->rgb-hex instr) + (string-intersperse + (map (lambda (x) + (number->string x 16)) + (map string->number + (string-split instr))) + "/")) + + +;;====================================================================== +;; +;;====================================================================== + +(define (common:in-running-test?) + (and (args:get-arg "-execute") (get-environment-variable "MT_CMDINFO"))) + +(define (common:get-color-from-status status) + (cond + ((equal? status "PASS") "green") + ((equal? status "FAIL") "red") + ((equal? status "WARN") "orange") + ((equal? status "KILLED") "orange") + ((equal? status "KILLREQ") "purple") + ((equal? status "RUNNING") "blue") + ((equal? status "ABORT") "brown") + (else "black"))) + +;; ;;====================================================================== +;; ;; N A N O M S G C L I E N T +;; ;;====================================================================== +;; +;; +;; +;; (define (common:send-dboard-main-changed) +;; (let* ((dashboard-ips (mddb:get-dashboards))) +;; (for-each +;; (lambda (ipadr) +;; (let* ((soc (common:open-nm-req (conc "tcp://" ipadr))) +;; (msg (conc "main " *toppath*)) +;; (res (common:nm-send-receive-timeout soc msg))) +;; (if (not res) ;; couldn't reach that dashboard - remove it from db +;; (print "ERROR: couldn't reach dashboard " ipadr)) +;; res)) +;; dashboard-ips))) +;; +;; +;; ;;====================================================================== +;; ;; D A S H B O A R D D B +;; ;;====================================================================== +;; +;; (define (mddb:open-db) +;; (let* ((db (open-database (conc (get-environment-variable "HOME") "/.dashboard.db")))) +;; (set-busy-handler! db (busy-timeout 10000)) +;; (for-each +;; (lambda (qry) +;; (exec (sql db qry))) +;; (list +;; "CREATE TABLE IF NOT EXISTS vars (id INTEGER PRIMARY KEY,key TEXT, val TEXT, CONSTRAINT varsconstraint UNIQUE (key));" +;; "CREATE TABLE IF NOT EXISTS dashboards ( +;; id INTEGER PRIMARY KEY, +;; pid INTEGER, +;; username TEXT, +;; hostname TEXT, +;; ipaddr TEXT, +;; portnum INTEGER, +;; start_time TIMESTAMP DEFAULT (strftime('%s','now')), +;; CONSTRAINT hostport UNIQUE (hostname,portnum) +;; );" +;; )) +;; db)) +;; +;; ;; register a dashboard +;; ;; +;; (define (mddb:register-dashboard port) +;; (let* ((pid (current-process-id)) +;; (hostname (get-host-name)) +;; (ipaddr (server:get-best-guess-address hostname)) +;; (username (current-user-name)) ;; (car userinfo))) +;; (db (mddb:open-db))) +;; (print "Register monitor, pid: " pid ", hostname: " hostname ", port: " port ", username: " username) +;; (exec (sql db "INSERT OR REPLACE INTO dashboards (pid,username,hostname,ipaddr,portnum) VALUES (?,?,?,?,?);") +;; pid username hostname ipaddr port) +;; (close-database db))) +;; +;; ;; unregister a monitor +;; ;; +;; (define (mddb:unregister-dashboard host port) +;; (let* ((db (mddb:open-db))) +;; (print "Register unregister monitor, host:port=" host ":" port) +;; (exec (sql db "DELETE FROM dashboards WHERE hostname=? AND portnum=?;") host port) +;; (close-database db))) +;; +;; ;; get registered dashboards +;; ;; +;; (define (mddb:get-dashboards) +;; (let ((db (mddb:open-db))) +;; (query fetch-column +;; (sql db "SELECT ipaddr || ':' || portnum FROM dashboards;")))) + +;;====================================================================== +;; NMSG AND NEW API +;;====================================================================== + +;; nm based server experiment, keep around for now. +;; +#;(define (nm:start-server dbconn #!key (given-host-name #f)) + (let* ((srvdat (start-raw-server given-host-name: given-host-name)) + (host-name (srvdat-host srvdat)) + (soc (srvdat-soc srvdat))) + + ;; start the queue processor (save for second round of development) + ;; + (thread-start! (make-thread! (lambda ()(queue-processor dbconn) "Queue processor"))) + ;; msg is an alist + ;; 'r host:port <== where to return the data + ;; 'p params <== data to apply the command to + ;; 'e j|s|l <== encoding of the params. default is s (sexp), if not specified is assumed to be default + ;; 'c command <== look up the function to call using this key + ;; + (let loop ((msg-in (nn-recv soc))) + (if (not (equal? msg-in "quit")) + (let* ((dat (decode msg-in)) + (host-port (alist-ref 'r dat)) ;; this is for the reverse req rep where the server is a client of the original client + (params (alist-ref 'p dat)) + (command (let ((c (alist-ref 'c dat)))(if c (string->symbol c) #f))) + (all-good (and host-port params command (hash-table-exists? *commands* command)))) + (if all-good + (let ((cmddat (make-qitem + command: command + host-port: host-port + params: params))) + (queue-push cmddat) ;; put request into the queue + (nn-send soc "queued")) ;; reply with "queued" + (print "ERROR: ["(common:human-time)"] BAD request " dat)) + (loop (nn-recv soc))))) + (nn-close soc))) + +;;====================================================================== +;; D A S H B O A R D U S E R V I E W S +;;====================================================================== + +;; first read ~/views.config if it exists, then read $MTRAH/views.config if it exists +;; +(define (common:load-views-config) + (let* ((view-cfgdat (make-hash-table)) + (home-cfgfile (conc (get-environment-variable "HOME") "/.mtviews.config")) + (mthome-cfgfile (conc *toppath* "/.mtviews.config"))) + (if (common:file-exists? mthome-cfgfile) + (configf:read-config mthome-cfgfile view-cfgdat #t)) + ;; we load the home dir file AFTER the MTRAH file so the user can clobber settings when running the dashboard in read-only areas + (if (common:file-exists? home-cfgfile) + (configf:read-config home-cfgfile view-cfgdat #t)) + view-cfgdat)) + +;;====================================================================== +;; H I E R A R C H I C A L H A S H T A B L E S +;;====================================================================== + +;; Every element including top element is a vector: +;; + +(define (hh:make-hh #!key (ht #f)(value #f)) + (vector (or ht (make-hash-table)) value)) + +;; used internally +(define-inline (hh:set-ht! hh ht) (vector-set! hh 0 ht)) +(define-inline (hh:get-ht hh) (vector-ref hh 0)) +(define-inline (hh:set-value! hh value) (vector-set! hh 1 value)) +(define-inline (hh:get-value hh value) (vector-ref hh 1)) + +;; given a hierarchial hash and some keys look up the value ... +;; +(define (hh:get hh . keys) + (if (null? keys) + (vector-ref hh 1) ;; we have reached the end of the line, return the value sought + (let ((sub-ht (hh:get-ht hh))) + (if sub-ht ;; yes, there is more hierarchy + (let ((sub-hh (hash-table-ref/default sub-ht (car keys) #f))) + (if sub-hh + (apply hh:get sub-hh (cdr keys)) + #f)) + #f)))) + +;; given a hierarchial hash, a value and some keys, add needed hierarcy and insert the value +;; +(define (hh:set! hh value . keys) + (if (null? keys) + (hh:set-value! hh value) ;; we have reached the end of the line, store the value + (let ((sub-ht (hh:get-ht hh))) + (if sub-ht ;; yes, there is more hierarchy + (let ((sub-hh (hash-table-ref/default sub-ht (car keys) #f))) + (if (not sub-hh) ;; we'll need to add the next level of hierarchy + (let ((new-sub-hh (hh:make-hh))) + (hash-table-set! sub-ht (car keys) new-sub-hh) + (apply hh:set! new-sub-hh value (cdr keys))) + (apply hh:set! sub-hh value (cdr keys)))) ;; call the sub-hierhash with remaining keys + (begin + (hh:set-ht! hh (make-hash-table)) + (apply hh:set! hh value keys)))))) + +;; Manage pkts, used in servers, tests and likely other contexts so put +;; in common +;;====================================================================== + +(define common:pkts-spec + '((default . ((parent . P) + (action . a) + (filename . f))) + (configf . ((parent . P) + (action . a) + (filename . f))) + (server . ((action . a) + (pid . d) + (ipaddr . i) + (port . p) + (parent . P))) + + (test . ((cpuuse . c) + (diskuse . d) + (item-path . i) + (runname . r) + (state . s) + (target . t) + (status . u) + (parent . P))))) + +(define (common:get-pkts-dirs mtconf use-lt) + (let* ((pktsdirs-str (or (configf:lookup mtconf "setup" "pktsdirs") + (and use-lt + (conc (or *toppath* + (current-directory)) + "/lt/.pkts")))) + (pktsdirs (if pktsdirs-str + (string-split pktsdirs-str " ") + #f))) + pktsdirs)) + +;; use-lt is use linktree "lt" link to find pkts dir +(define (common:save-pkt pktalist-in mtconf use-lt #!key (add-only #f)) ;; add-only saves the pkt only if there is a parent already + (if (or add-only + (hash-table-exists? *pkts-info* 'last-parent)) + (let* ((parent (hash-table-ref/default *pkts-info* 'last-parent #f)) + (pktalist (if parent + (cons `(parent . ,parent) + pktalist-in) + pktalist-in))) + (let-values (((uuid pkt) + (alist->pkt pktalist common:pkts-spec))) + (hash-table-set! *pkts-info* 'last-parent uuid) + (let ((pktsdir (or (hash-table-ref/default *pkts-info* 'pkts-dir #f) + (let* ((pktsdirs (common:get-pkts-dirs mtconf use-lt)) + (pktsdir (car pktsdirs))) ;; assume it is there + (hash-table-set! *pkts-info* 'pkts-dir pktsdir) + pktsdir)))) + (handle-exceptions + exn + (debug:print-info 0 "failed to write out packet to " pktsdir) ;; don't care if this failed for now but MUST FIX - BUG!! + (if (not (file-exists? pktsdir)) + (create-directory pktsdir #t)) + (with-output-to-file + (conc pktsdir "/" uuid ".pkt") + (lambda () + (print pkt))))))))) + +(define (common:with-queue-db mtconf proc #!key (use-lt #f)(toppath-in #f)) + (let* ((pktsdirs (common:get-pkts-dirs mtconf use-lt)) + (pktsdir (if pktsdirs (car pktsdirs) #f)) + (toppath (or (configf:lookup mtconf "scratchdat" "toppath") + toppath-in)) + (pdbpath (or (configf:lookup mtconf "setup" "pdbpath") pktsdir))) + (cond + ((not (and pktsdir toppath pdbpath)) + (debug:print 0 *default-log-port* "ERROR: settings are missing in your megatest.config for area management.") + (debug:print 0 *default-log-port* " you need to have pktsdirs in the [setup] section.")) + ((not (common:file-exists? pktsdir)) + (debug:print 0 *default-log-port* "ERROR: pkts directory not found " pktsdir)) + ((not (equal? (file-owner pktsdir)(current-effective-user-id))) + (debug:print 0 *default-log-port* "ERROR: directory " pktsdir " is not owned by " (current-effective-user-name))) + (else + (let* ((pdb (open-queue-db pdbpath "pkts.db" + schema: '("CREATE TABLE groups (id INTEGER PRIMARY KEY,groupname TEXT, CONSTRAINT group_constraint UNIQUE (groupname));")))) + (proc pktsdirs pktsdir pdb) + (dbi:close pdb)))))) + +(define (common:load-pkts-to-db mtconf #!key (use-lt #f)) + (common:with-queue-db + mtconf + (lambda (pktsdirs pktsdir pdb) + (for-each + (lambda (pktsdir) ;; look at all + (cond + ((not (common:file-exists? pktsdir)) + (debug:print 0 *default-log-port* "ERROR: packets directory " pktsdir " does not exist.")) + ((not (directory? pktsdir)) + (debug:print 0 *default-log-port* "ERROR: packets directory path " pktsdir " is not a directory.")) + ((not (file-read-access? pktsdir)) + (debug:print 0 *default-log-port* "ERROR: packets directory path " pktsdir " is not readable.")) + (else + (debug:print-info 0 *default-log-port* "Loading packets found in " pktsdir) + (let ((pkts (glob (conc pktsdir "/*.pkt")))) + (for-each + (lambda (pkt) + (let* ((uuid (cadr (string-match ".*/([0-9a-f]+).pkt" pkt))) + (exists (lookup-by-uuid pdb uuid #f))) + (if (not exists) + (let* ((pktdat (string-intersperse + (with-input-from-file pkt read-lines) + "\n")) + (apkt (pkt->alist pktdat)) + (ptype (alist-ref 'T apkt))) + (add-to-queue pdb pktdat uuid (or ptype 'cmd) #f 0) + (debug:print 4 *default-log-port* "Added " uuid " of type " ptype " to queue")) + (debug:print 4 *default-log-port* "pkt: " uuid " exists, skipping...") + ))) + pkts))))) + pktsdirs)) + use-lt: use-lt)) + +(define (common:get-pkt-alists pkts) + (map (lambda (x) + (alist-ref 'apkt x)) ;; 'pkta pulls out the alist from the read pkt + pkts)) + +;; given list of pkts (alist mode) return list of D cards as Unix epoch, sorted descending +;; also delete duplicates by target i.e. (car pkt) +;; +(define (common:get-pkt-times pkts) + (delete-duplicates + (sort + (map (lambda (x) + `(,(alist-ref 't x) . ,(string->number (alist-ref 'D x)))) + pkts) + (lambda (a b)(> (cdr a)(cdr b)))) ;; sort descending + (lambda (a b)(equal? (car a)(car b))))) ;; remove duplicates by target + +(define *common:thread-punchlist* (make-hash-table)) +(define (common:send-thunk-to-background-thread thunk #!key (name #f)) + ;;(BB> "launched thread " name) + + ;; we need a unique name for the thread. + (let* ((realname (if name + (if (not (hash-table-ref/default *common:thread-punchlist* name #f)) + name + (conc name"-" (symbol->string (gensym)))) + (conc "anonymous-"(symbol->string (gensym))))) + (realthunk (lambda () + (let ((res (thunk))) + (hash-table-delete! *common:thread-punchlist* realname) + res))) + (thread (make-thread realthunk realname))) + (hash-table-set! *common:thread-punchlist* realname thread) + (thread-start! thread) + )) + +(define (common:join-backgrounded-threads) + ;; may need to trap and ignore exceptions -- dunno how atomic threads are... + (for-each + (lambda (thread-name) + (let* ((thread (hash-table-ref/default *common:thread-punchlist* thread-name #f))) + (if thread + (handle-exceptions + exn + #t ;; just ignore it, it might have died in the meantime so joining it will throw an exception + (thread-join! thread)) + ))) + (hash-table-keys *common:thread-punchlist*))) + +;; (define *common:telemetry-log-state* 'startup) +;; (define *common:telemetry-log-socket* #f) +;; +;; (define (common:telemetry-log-open) +;; (if (eq? *common:telemetry-log-state* 'startup) +;; (let* ((serverhost (configf:lookup *configdat* "telemetry" "host")) +;; (serverport (configf:lookup-number *configdat* "telemetry" "port")) +;; (user (or (get-environment-variable "USER") "unknown")) +;; (host (or (get-environment-variable "HOST") "unknown"))) +;; (set! *common:telemetry-log-state* +;; (handle-exceptions +;; exn +;; (begin +;; (debug:print-info 0 *default-log-port* "common-telemetry-log open udp port failure") +;; 'broken) +;; (if (and serverhost serverport user host) +;; (let* ((s (udp-open-socket))) +;; ;;(udp-bind! s #f 0) +;; (udp-connect! s serverhost serverport) +;; (set! *common:telemetry-log-socket* s) +;; 'open) +;; 'not-needed)))))) +;; +;; (define (common:telemetry-log event #!key (payload '())) +;; (if (eq? *common:telemetry-log-state* 'startup) +;; (common:telemetry-log-open)) +;; +;; (if (eq? 'open *common:telemetry-log-state*) +;; (handle-exceptions +;; exn +;; (begin +;; (debug:print-info 0 *default-log-port* "common-telemetry-log comms failure ; disabled (no server?)") +;; ;;(define *common:telemetry-log-state* 'broken-or-no-server-preclose) +;; ;;(common:telemetry-log-close) +;; (define *common:telemetry-log-state* 'broken-or-no-server) +;; (set! *common:telemetry-log-socket* #f) +;; ) +;; (if (and *common:telemetry-log-socket* event) ;; TODO - filter on event against telemetry.want-events +;; (let* ((user (or (get-environment-variable "USER") "unknown")) +;; (host (or (get-environment-variable "HOST") "unknown")) +;; (start (conc "[megatest "event"]")) +;; (toppath (or *toppath* "/dev/null")) +;; (payload-serialized +;; (base64:base64-encode +;; (z3:encode-buffer +;; (with-output-to-string (lambda () (pp payload)))))) +;; (msg (conc user":"host":"start":"(current-process-id)":"(car (argv))":" +;; toppath":"payload-serialized))) +;; (udp-send *common:telemetry-log-socket* msg)))))) +;; +;; (define (common:telemetry-log-close) +;; (when (or (member *common:telemetry-log-state* '(broken-or-no-server-preclose open)) *common:telemetry-log-socket*) +;; (handle-exceptions +;; exn +;; (begin +;; (define *common:telemetry-log-state* 'closed-fail) +;; (debug:print-info 0 *default-log-port* "common-telemetry-log closure failure") +;; ) +;; (begin +;; (define *common:telemetry-log-state* 'closed) +;; (udp-close-socket *common:telemetry-log-socket*) +;; (set! *common:telemetry-log-socket* #f))))) + +;;====================================================================== +;; process related stuff +;;====================================================================== + +(define (process:alive? pid) + (handle-exceptions + exn + ;; possibly pid is a process not a child, look in /proc to see if it is running still + (common:file-exists? (conc "/proc/" pid)) + (let-values (((rpid exit-type exit-signal)(process-wait pid #t))) + (and (number? rpid) + (equal? rpid pid))))) + +(define (process:alive-on-host? host pid) + (let ((cmd (conc "ssh " host " ps -o pid= -p " pid))) + (handle-exceptions + exn + #f ;; anything goes wrong - assume the process in NOT running. + (with-input-from-pipe + cmd + (lambda () + (let loop ((inl (read-line))) + (if (eof-object? inl) + #f + (let* ((clean-str (string-substitute "^[^\\d]*([0-9]+)[^\\d]*$" "\\1" inl)) + (innum (string->number clean-str))) + (and innum + (eq? pid innum)))))))))) + +(define (process:get-sub-pids pid) + (with-input-from-pipe + (conc "pstree -A -p " pid) ;; | tr 'a-z\\-+`()\\.' ' ' " pid) + (lambda () + (let loop ((inl (read-line)) + (res '())) + (if (eof-object? inl) + (reverse res) + (let ((nums (map string->number + (string-split-fields "\\d+" inl)))) + (loop (read-line) + (append res nums)))))))) + +;; here is an example line where the shell is sh or bash +;; "find / -print 2&>1 > findall.log" +(define (run-n-wait cmdline #!key (params #f)(print-cmd #f)(run-dir #f)) + (if print-cmd + (debug:print 0 *default-log-port* + (if (string? print-cmd) + print-cmd + "") + (if run-dir (conc "Run in " run-dir ";") "") + cmdline + (if params + (conc " " (string-intersperse params " ")) + ""))) + (if (and run-dir + (directory-exists? run-dir)) + (push-directory run-dir)) + (let ((pid (if params + (process-run cmdline params) + (process-run cmdline)))) + (let loop ((i 0)) + (let-values (((pid-val exit-status exit-code) (process-wait pid #t))) + (if (eq? pid-val 0) + (begin + (thread-sleep! 2) + (loop (+ i 1))) + (begin + (if (and run-dir + (directory-exists? run-dir)) + (pop-directory)) + (values pid-val exit-status exit-code))))))) + + + +;; accept an alist or hash table containing envvar/env value pairs (value of #f causes unset) +;; execute thunk in context of environment modified as per this list +;; restore env to prior state then return value of eval'd thunk. +;; ** this is not thread safe ** +(define (common:with-env-vars delta-env-alist-or-hash-table thunk) + (let* ((delta-env-alist (if (hash-table? delta-env-alist-or-hash-table) + (hash-table->alist delta-env-alist-or-hash-table) + delta-env-alist-or-hash-table)) + (restore-thunks + (filter + identity + (map (lambda (env-pair) + (let* ((env-var (car env-pair)) + (new-val (let ((tmp (cdr env-pair))) + (if (list? tmp) (car tmp) tmp))) + (current-val (get-environment-variable env-var)) + (restore-thunk + (cond + ((not current-val) (lambda () (unsetenv env-var))) + ((not (string? new-val)) #f) + ((eq? current-val new-val) #f) + (else + (lambda () (setenv env-var current-val)))))) + ;;(when (not (string? new-val)) + ;; (debug:print 0 *default-log-port* " PROBLEM: not a string: "new-val"\n from env-alist:\n"delta-env-alist) + ;; (pp delta-env-alist) + ;; (exit 1)) + + + (cond + ((not new-val) ;; modify env here + (unsetenv env-var)) + ((string? new-val) + (setenv env-var new-val))) + restore-thunk)) + delta-env-alist)))) + (let ((rv (thunk))) + (for-each (lambda (x) (x)) restore-thunks) ;; restore env to original state + rv))) + +;;====================================================================== +;; Process convience utils +;;====================================================================== + +(define (process:conservative-read port) + (let loop ((res "")) + (if (not (eof-object? (peek-char port))) + (loop (conc res (read-char port))) + res))) + +(define (process:cmd-run-with-stderr->list cmd . params) + ;; (print "Called with cmd=" cmd ", proc=" proc ", params=" params) +;; (handle-exceptions +;; exn +;; (begin +;; (print "ERROR: Failed to run command: " cmd " " (string-intersperse params " ")) +;; (print " " ((condition-property-accessor 'exn 'message) exn)) +;; #f) + (let-values (((fh fho pid fhe) (if (null? params) + (process* cmd) + (process* cmd params)))) + (let loop ((curr (read-line fh)) + (result '())) + (let ((errstr (process:conservative-read fhe))) + (if (not (string=? errstr "")) + (set! result (append result (list errstr))))) + (if (not (eof-object? curr)) + (loop (read-line fh) + (append result (list curr))) + (begin + (close-input-port fh) + (close-input-port fhe) + (close-output-port fho) + result))))) ;; ) + +(define (process:cmd-run-with-stderr-and-exitcode->list cmd . params) + ;; (print "Called with cmd=" cmd ", proc=" proc ", params=" params) +;; (handle-exceptions +;; exn +;; (begin +;; (print "ERROR: Failed to run command: " cmd " " (string-intersperse params " ")) +;; (print " " ((condition-property-accessor 'exn 'message) exn)) +;; #f) + (let-values (((fh fho pid fhe) (if (null? params) + (process* cmd) + (process* cmd params)))) + (let loop ((curr (read-line fh)) + (result '())) + (let ((errstr (process:conservative-read fhe))) + (if (not (string=? errstr "")) + (set! result (append result (list errstr))))) + (if (not (eof-object? curr)) + (loop (read-line fh) + (append result (list curr))) + (begin + (let-values (((anotherpid normalexit? exitstatus) (process-wait pid))) + (close-input-port fh) + (close-input-port fhe) + (close-output-port fho) + (list result (if normalexit? exitstatus -1)))))))) + +#;(define (process:cmd-run-proc-each-line cmd proc . params) + ;; (print "Called with cmd=" cmd ", proc=" proc ", params=" params) + (handle-exceptions + exn + (begin + (print "ERROR: Failed to run command: " cmd " " (string-intersperse params " ")) + (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) + (debug:print 5 *default-log-port* "exn=" (condition->list exn)) + #f) + (let-values (((fh fho pid) (if (null? params) + (process cmd) + (process cmd params)))) + (let loop ((curr (read-line fh)) + (result '())) + (if (not (eof-object? curr)) + (loop (read-line fh) + (append result (list (proc curr)))) + (begin + (close-input-port fh) + ;;(close-input-port fhe) + (close-output-port fho) + result)))))) + +#;(define (process:cmd-run-proc-each-line-alt cmd proc) + (let* ((fh (open-input-pipe cmd)) + (res (port-proc->list fh proc)) + (status (close-input-pipe fh))) + (if (eq? status 0) res #f))) + +(define (process:cmd-run->list cmd #!key (delta-env-alist-or-hash-table '())) + (common:with-env-vars + delta-env-alist-or-hash-table + (lambda () + (let* ((fh (open-input-pipe cmd)) + (res (port->list fh)) + (status (close-input-pipe fh))) + (list res status))))) + +(define (port->list fh) + (if (eof-object? fh) #f + (let loop ((curr (read-line fh)) + (result '())) + (if (not (eof-object? curr)) + (loop (read-line fh) + (append result (list curr))) + result)))) + +(define (port-proc->list fh proc) + (if (eof-object? fh) #f + (let loop ((curr (proc (read-line fh))) + (result '())) + (if (not (eof-object? curr)) + (loop (let ((l (read-line fh))) + (if (eof-object? l) l (proc l))) + (append result (list curr))) + result)))) + +;;====================================================================== +;; MISC PROCESS RELATED STUFF +;;====================================================================== + +(define (process:children proc) + (with-input-from-pipe + (conc "ps h --ppid " (current-process-id) " -o pid") + (lambda () + (let loop ((inl (read-line)) + (res '())) + (if (eof-object? inl) + (reverse res) + (let ((pid (string->number inl))) + (if proc (proc pid)) + (loop (read-line) (cons pid res)))))))) + ) Index: dashboard.scm ================================================================== --- dashboard.scm +++ dashboard.scm @@ -31,22 +31,22 @@ #;(prefix mtconfigf configf:) ) (import (prefix sqlite3 sqlite3:)) (declare (uses mtargs)) -(import (prefix mtargs args:)) (declare (uses pkts)) -(import pkts) (declare (uses ulex)) -(import ulex) (declare (uses cookie)) +(declare (uses stml2)) +(declare (uses mtconfigf)) +(declare (uses ducttape-lib)) +(import (prefix mtargs args:)) +(import pkts) +(import ulex) (import cookie) -(declare (uses stml2)) (import stml2) -(declare (uses mtconfigf)) (import (prefix mtconfigf configf:)) -(declare (uses ducttape-lib)) (import ducttape-lib) ;; invoke the imports - ORDER IS IMPORTANT! (declare (uses mtargs.import)) (declare (uses pkts.import)) @@ -55,24 +55,26 @@ (declare (uses stml2.import)) (declare (uses ducttape-lib.import)) (declare (uses mtconfigf.import)) (declare (uses gutilsmod)) -(import gutilsmod) (declare (uses megamod)) -(import megamod) (declare (uses commonmod)) -(import commonmod) (declare (uses rmtmod)) -(import rmtmod) (declare (uses runsmod)) -(import runsmod) (declare (uses dbmod)) -(import dbmod) (declare (uses testsmod)) -(import testsmod) (declare (uses dcommonmod)) + + +(import gutilsmod) +(import megamod) +(import commonmod) +(import rmtmod) +(import runsmod) +(import dbmod) +(import testsmod) (import dcommonmod) (include "common_records.scm") (include "db_records.scm") (include "run_records.scm") Index: db-inc.scm ================================================================== --- db-inc.scm +++ db-inc.scm @@ -15,4639 +15,6 @@ ;; ;; You should have received a copy of the GNU General Public License ;; along with Megatest. If not, see . ;; ;;====================================================================== - -;;====================================================================== -;; Database access -;;====================================================================== - -;; dbstruct vector containing all the relevant dbs like main.db, megatest.db, run.db etc - -(define *rundb-mutex* (make-mutex)) ;; prevent problems opening/closing rundb's -(define *number-of-writes* 0) -(define *number-non-write-queries* 0) - -;;====================================================================== -;; R E C O R D S -;;====================================================================== - -;; each db entry is a pair ( db . dbfilepath ) -;; I propose this record evolves into the area record -;; -(defstruct dbr:dbstruct - (tmpdb #f) - (dbstack #f) ;; stack for tmp db handles, do not initialize with a stack - (mtdb #f) - (refndb #f) - (homehost #f) ;; not used yet - (on-homehost #f) ;; not used yet - (read-only #f) - ) ;; goal is to converge on one struct for an area but for now it is too confusing - - -;; record for keeping state,status and count for doing roll-ups in -;; iterated tests -;; -(defstruct dbr:counts - (state #f) - (status #f) - (count 0)) - -;;====================================================================== -;; SQLITE3 HELPERS -;;====================================================================== - -(define (db:general-sqlite-error-dump exn stmt . params) - (let ((err-status ((condition-property-accessor 'sqlite3 'status #f) exn))) ;; RADT ... how does this work? - ;; check for (exn sqlite3) ((condition-property-accessor 'exn 'message) exn) - (print "err-status: " err-status) - (debug:print-error 0 *default-log-port* " query " stmt " failed, params: " params ", error: " ((condition-property-accessor 'exn 'message) exn)) - (print-call-chain (current-error-port)))) - -;; convert to -inline -;; -(define (db:first-result-default db stmt default . params) - (handle-exceptions - exn - (let ((err-status ((condition-property-accessor 'sqlite3 'status #f) exn))) - ;; check for (exn sqlite3) ((condition-property-accessor 'exn 'message) exn) - (if (eq? err-status 'done) - default - (begin - (debug:print-error 0 *default-log-port* " query " stmt " failed, params: " params ", error: " ((condition-property-accessor 'exn 'message) exn)) - (print-call-chain (current-error-port)) - default))) - (apply sqlite3:first-result db stmt params))) - -;; Get/open a database -;; if run-id => get run specific db -;; if #f => get main db -;; if db already open - return inmem -;; if db not open, open inmem, rundb and sync then return inmem -;; inuse gets set automatically for rundb's -;; -(define (db:get-db dbstruct) ;; run-id) - (if (stack? (dbr:dbstruct-dbstack dbstruct)) - (if (stack-empty? (dbr:dbstruct-dbstack dbstruct)) - (let ((newdb (db:open-megatest-db path: (db:dbfile-path)))) - ;; (stack-push! (dbr:dbstruct-dbstack dbstruct) newdb) - newdb) - (stack-pop! (dbr:dbstruct-dbstack dbstruct))) - (db:open-db dbstruct))) - -;; ;; legacy handling of structure for managing db's. Refactor this into dbr:? -(define (db:dbdat-get-db dbdat) - (if (pair? dbdat) - (car dbdat) - dbdat)) - -(define (db:dbdat-get-path dbdat) - (if (pair? dbdat) - (cdr dbdat) - #f)) - -;; mod-read: -;; 'mod modified data -;; 'read read data -;; Locks the mutex and depending on 'mod or 'read passed, sets the last timestamp in dbstruct -;; -;; (define (db:done-with dbstruct run-id mod-read) -;; (if (not (sqlite3:database? dbstruct)) -;; (begin -;; (mutex-lock! *rundb-mutex*) -;; (if (eq? mod-read 'mod) -;; (dbr:dbstruct-mtime-set! dbstruct (current-milliseconds)) -;; (dbr:dbstruct-rtime-set! dbstruct (current-milliseconds))) -;; (dbr:dbstruct-inuse-set! dbstruct #f) -;; (mutex-unlock! *rundb-mutex*)))) - -;; (db:with-db dbstruct run-id sqlite3:exec "select blah fgrom blaz;") -;; r/w is a flag to indicate if the db is modified by this query #t = yes, #f = no -;; -(define (db:with-db dbstruct run-id r/w proc . params) - (let* ((have-struct (dbr:dbstruct? dbstruct)) - (dbdat (if have-struct - (db:get-db dbstruct) - #f)) - (db (if have-struct - (db:dbdat-get-db dbdat) - dbstruct)) - (use-mutex (> *api-process-request-count* 25))) - (if (and use-mutex - (common:low-noise-print 120 "over-50-parallel-api-requests")) - (debug:print-info 0 *default-log-port* *api-process-request-count* " parallel api requests being processed in process " (current-process-id) ", throttling access")) - (if (common:low-noise-print 600 (conc "parallel-api-requests" *max-api-process-requests*)) - (debug:print-info 2 *default-log-port* "Parallel api request count: " *api-process-request-count* " max parallel requests: " *max-api-process-requests*)) - (handle-exceptions - exn - (begin - (print-call-chain (current-error-port)) - (debug:print-error 0 *default-log-port* "sqlite3 issue in db:with-db, dbstruct=" dbstruct ", run-id=" run-id ", proc=" proc ", params=" params " error: " ((condition-property-accessor 'exn 'message) exn)) - ;; there is no recovering at this time. exit - (exit 50)) - (if use-mutex (mutex-lock! *db-with-db-mutex*)) - (let ((res (apply proc db params))) - (if use-mutex (mutex-unlock! *db-with-db-mutex*)) - ;; (if (vector? dbstruct)(db:done-with dbstruct run-id r/w)) - (if dbdat (stack-push! (dbr:dbstruct-dbstack dbstruct) dbdat)) - res)))) - -;;====================================================================== -;; K E E P F I L E D B I N dbstruct -;;====================================================================== - -;; (define (db:get-filedb dbstruct run-id) -;; (let ((db (vector-ref dbstruct 2))) -;; (if db -;; db -;; (let ((fdb (filedb:open-db (conc *toplevel* "/db/files.db")))) -;; (vector-set! dbstruct 2 fdb) -;; fdb)))) -;; -;; ;; Can also be used to save arbitrary strings -;; ;; -;; (define (db:save-path dbstruct path) -;; (let ((fdb (db:get-filedb dbstruct)))b -;; (filedb:register-path fdb path))) -;; -;; ;; Use to get a path. To get an arbitrary string see next define -;; ;; -;; (define (db:get-path dbstruct id) -;; (let ((fdb (db:get-filedb dbstruct))) -;; (filedb:get-path db id))) - -;; NB// #f => return dbdir only -;; (was planned to be; zeroth db with name=main.db) -;; -;; If run-id is #f return to create and retrieve the path where the db will live. -;; -(define db:dbfile-path common:get-db-tmp-area) - -(define (db:set-sync db) - (let ((syncprag (configf:lookup *configdat* "setup" "sychronous"))) - (sqlite3:execute db (conc "PRAGMA synchronous = " (or syncprag 0) ";")))) - -;; open an sql database inside a file lock -;; returns: db existed-prior-to-opening -;; RA => Returns a db handler; sets the lock if opened in writable mode -;; -;; (define *db-open-mutex* (make-mutex)) - -(define (db:lock-create-open fname initproc) - (let* ((parent-dir (or (pathname-directory fname)(current-directory))) ;; no parent? go local - (raw-fname (pathname-file fname)) - (dir-writable (file-write-access? parent-dir)) - (file-exists (common:file-exists? fname)) - (file-write (if file-exists - (file-write-access? fname) - dir-writable ))) - ;; (mutex-lock! *db-open-mutex*) ;; tried this mutex, not clear it helped. - (if file-write ;; dir-writable - (condition-case - (let* ((lockfname (conc fname ".lock")) - (readyfname (conc parent-dir "/.ready-" raw-fname)) - (readyexists (common:file-exists? readyfname))) - (if (not readyexists) - (common:simple-file-lock-and-wait lockfname)) - (let ((db (sqlite3:open-database fname))) - (sqlite3:set-busy-handler! db (sqlite3:make-busy-timeout 136000)) - (sqlite3:execute db "PRAGMA synchronous = 0;") - (if (and (configf:lookup *configdat* "setup" "tmp_mode") (string-match "^/tmp/.*" fname)) - (begin - ;;(print "DEBUG: Setting tmp_mode for " fname) - (sqlite3:execute db (configf:lookup *configdat* "setup" "tmp_mode")) - ) - ) - (if (and (configf:lookup *configdat* "setup" "nfs_mode") (not (string-match "^/tmp/.*" fname))) - (begin - ;;(print "DEBUG: Setting nfs_mode for " fname) - (sqlite3:execute db (configf:lookup *configdat* "setup" "nfs_mode")) - ) - ) - (if (and (not (or (configf:lookup *configdat* "setup" "tmp_mode") (configf:lookup *configdat* "setup" "nfs_mode"))) - (configf:lookup *configdat* "setup" "use-wal") - (string-match "^/tmp/.*" fname)) ;; this is a file in /tmp - (sqlite3:execute db "PRAGMA journal_mode=WAL;") - (debug:print 2 *default-log-port* "Creating " fname " in NON-WAL mode.")) - (if (not file-exists) - (initproc db)) - (if (not readyexists) - (begin - (common:simple-file-release-lock lockfname) - (with-output-to-file - readyfname - (lambda () - (print "Ready at " - (seconds->year-work-week/day-time - (current-seconds))))))) - db)) - (exn (io-error) (debug:print 0 *default-log-port* "ERROR: i/o error with " fname ". Check permissions, disk space etc. and try again.")) - (exn (corrupt) (debug:print 0 *default-log-port* "ERROR: database " fname " is corrupt. Repair it to proceed.")) - (exn (busy) (debug:print 0 *default-log-port* "ERROR: database " fname " is locked. Try copying to another location, remove original and copy back.")) - (exn (permission)(debug:print 0 *default-log-port* "ERROR: database " fname " has some permissions problem.")) - (exn () (debug:print 0 *default-log-port* "ERROR: Unknown error with database " fname " message: " ((condition-property-accessor 'exn 'message) exn)))) - - (condition-case - (begin - (debug:print 2 *default-log-port* "WARNING: opening db in non-writable dir " fname) - (let ((db (sqlite3:open-database fname))) - (sqlite3:set-busy-handler! db (sqlite3:make-busy-timeout 136000)) - (sqlite3:execute db "PRAGMA synchronous = 0;") - ;; (mutex-unlock! *db-open-mutex*) - db)) - (exn (io-error) (debug:print 0 *default-log-port* "ERROR: i/o error with " fname ". Check permissions, disk space etc. and try again.")) - (exn (corrupt) (debug:print 0 *default-log-port* "ERROR: database " fname " is corrupt. Repair it to proceed.")) - (exn (busy) (debug:print 0 *default-log-port* "ERROR: database " fname " is locked. Try copying to another location, remove original and copy back.")) - (exn (permission)(debug:print 0 *default-log-port* "ERROR: database " fname " has some permissions problem.")) - (exn () (debug:print 0 *default-log-port* "ERROR: Unknown error with database " fname " message: " ((condition-property-accessor 'exn 'message) exn)))) - ))) - - -;; This routine creates the db if not already present. It is only called if the db is not already opened -;; -(define (db:open-db dbstruct #!key (areapath #f)(do-sync #t)) ;; TODO: actually use areapath - (let ((tmpdb-stack (dbr:dbstruct-dbstack dbstruct))) ;; RA => Returns the first reference in dbstruct - (if (stack? tmpdb-stack) - (db:get-db tmpdb-stack) ;; get previously opened db (will create new db handle if all in the stack are already used - (let* ((max-stale-tmp (configf:lookup-number *configdat* "server" "filling-db-max-stale-seconds" default: 10)) - (dbpath (db:dbfile-path )) ;; path to tmp db area - (dbexists (common:file-exists? dbpath)) - (tmpdbfname (conc dbpath "/megatest.db")) - (dbfexists (common:file-exists? tmpdbfname)) ;; (conc dbpath "/megatest.db"))) - (mtdbexists (common:file-exists? (conc *toppath* "/megatest.db"))) - - (mtdbmodtime (if mtdbexists (common:lazy-sqlite-db-modification-time (conc *toppath* "/megatest.db")) #f)) - (tmpdbmodtime (if dbfexists (common:lazy-sqlite-db-modification-time tmpdbfname) #f)) - (mtdb (db:open-megatest-db)) - (mtdbpath (db:dbdat-get-path mtdb)) - (tmpdb (db:open-megatest-db path: dbpath)) ;; lock-create-open dbpath db:initialize-main-db)) - (refndb (db:open-megatest-db path: dbpath name: "megatest_ref.db")) - (write-access (file-write-access? mtdbpath)) - ;(mtdbmodtime (if mtdbexists (common:lazy-sqlite-db-modification-time mtdbpath) #f)) ; moving this before db:open-megatest-db is called. if wal mode is on -WAL and -shm file get created with causing the tmpdbmodtime timestamp always greater than mtdbmodtime - ;(tmpdbmodtime (if dbfexists (common:lazy-sqlite-db-modification-time tmpdbfname) #f)) - ;if wal mode is on -WAL and -shm file get created when db:open-megatest-db is called. modtimedelta will always be < 10 so db in tmp not get synced - ;(tmpdbmodtime (if dbfexists (db:get-last-update-time (car tmpdb)) #f)) - ;(fmt (file-modification-time tmpdbfname)) - (modtimedelta (and mtdbmodtime tmpdbmodtime (- mtdbmodtime tmpdbmodtime)))) - - (when write-access - (sqlite3:execute (car mtdb) "drop trigger if exists update_tests_trigger") - (sqlite3:execute (car mtdb) "drop trigger if exists update_runs_trigger")) - - ;(print "mtdbmodtime " mtdbmodtime " tmpdbmodtime " tmpdbmodtime " mtdbpath " mtdbpath " " (conc *toppath* "/megatest.db")) - ;;(debug:print-info 13 *default-log-port* "db:open-db>> mtdbpath="mtdbpath" mtdbexists="mtdbexists" and write-access="write-access) - (if (and dbexists (not write-access)) - (begin - (set! *db-write-access* #f) - (dbr:dbstruct-read-only-set! dbstruct #t))) - (dbr:dbstruct-mtdb-set! dbstruct mtdb) - (dbr:dbstruct-tmpdb-set! dbstruct tmpdb) - (dbr:dbstruct-dbstack-set! dbstruct (make-stack)) ;; BB: why a stack? Why would the number of db's be indeterminate? Is this a legacy of 1.db 2.db .. ? - (stack-push! (dbr:dbstruct-dbstack dbstruct) tmpdb) ;; olddb is already a (cons db path) - (dbr:dbstruct-refndb-set! dbstruct refndb) - ;; (mutex-unlock! *rundb-mutex*) - (if (and (or (not dbfexists) - (and modtimedelta - (> modtimedelta max-stale-tmp))) ;; if db in tmp is over ten seconds older than the file in MTRA then do a sync back - do-sync) - (begin - (debug:print 1 *default-log-port* "filling db " (db:dbdat-get-path tmpdb) " with data \n from " (db:dbdat-get-path mtdb) " mod time delta: " modtimedelta) - (db:sync-tables (db:sync-all-tables-list dbstruct) #f mtdb refndb tmpdb) - ;touch tmp db to avoid wal mode wierdness - (set! (file-modification-time tmpdbfname) (current-seconds)) - (debug:print-info 13 *default-log-port* "db:sync-all-tables-list done.") - ) - (debug:print 4 *default-log-port* " db, " (db:dbdat-get-path tmpdb) " already exists or fresh enough, not propogating data from\n " (db:dbdat-get-path mtdb) " mod time delta: " modtimedelta) ) - ;; (db:multi-db-sync dbstruct 'old2new)) ;; migrate data from megatest.db automatically - tmpdb)))) - - -(define (db:get-last-update-time db) -; (db:with-db -; dbstruct #f #f -; (lambda (db) - (let ((last-update-time #f)) - (sqlite3:for-each-row - (lambda (lup) - (set! last-update-time lup)) - db - "select max(lup) from ( select max(last_update) as lup from tests union select max(last_update) as lup from runs);") - last-update-time)) -;)) - -;; Make the dbstruct, setup up auxillary db's and call for main db at least once -;; -;; called in http-transport and replicated in rmt.scm for *local* access. -;; -(define (db:setup do-sync #!key (areapath #f)) - ;; - (cond - (*dbstruct-db* *dbstruct-db*);; TODO: when multiple areas are supported, this optimization will be a hazard - (else ;;(common:on-homehost?) - (debug:print-info 13 *default-log-port* "db:setup entered (first time, not cached.)") - (let* ((dbstruct (make-dbr:dbstruct))) - (when (not *toppath*) - (debug:print-info 13 *default-log-port* "in db:setup, *toppath* not set; calling launch:setup") - (launch:setup areapath: areapath)) - (debug:print-info 13 *default-log-port* "Begin db:open-db") - (db:open-db dbstruct areapath: areapath do-sync: do-sync) - (debug:print-info 13 *default-log-port* "Done db:open-db") - (set! *dbstruct-db* dbstruct) - ;;(debug:print-info 13 *default-log-port* "new dbstruct = "(dbr:dbstruct->alist dbstruct)) - dbstruct)))) - ;; (else - ;; (debug:print 0 *default-log-port* "ERROR: attempt to open database when not on homehost. Exiting. Homehost: " (common:get-homehost)) - ;; (exit 1)))) - -;; Open the classic megatest.db file (defaults to open in toppath) -;; -;; NOTE: returns a dbdat not a dbstruct! -;; - -;;(define (db:reopen-megatest-db - -(define (db:open-megatest-db #!key (path #f)(name #f)) - (let* ((dbdir (or path *toppath*)) - (dbpath (conc dbdir "/" (or name "megatest.db"))) - (dbexists (common:file-exists? dbpath)) - (db (db:lock-create-open dbpath - (lambda (db) - (db:initialize-main-db db) - ;;(db:initialize-run-id-db db) - ))) - (write-access (file-write-access? dbpath))) - (debug:print-info 13 *default-log-port* "db:open-megatest-db "dbpath) - (if (and dbexists (not write-access)) - (set! *db-write-access* #f)) - (cons db dbpath))) - -;; sync run to disk if touched -;; -(define (db:sync-touched dbstruct run-id #!key (force-sync #f)) - (let ((tmpdb (db:get-db dbstruct)) - (mtdb (dbr:dbstruct-mtdb dbstruct)) - (refndb (dbr:dbstruct-refndb dbstruct)) - (start-t (current-seconds))) - (debug:print-info 4 *default-log-port* "Syncing for run-id: " run-id) - (mutex-lock! *db-multi-sync-mutex*) - (let ((update_info (cons (if force-sync 0 *db-last-sync*) "last_update"))) - (mutex-unlock! *db-multi-sync-mutex*) - (db:sync-tables (db:sync-all-tables-list dbstruct) update_info tmpdb refndb mtdb)) - (mutex-lock! *db-multi-sync-mutex*) - (set! *db-last-sync* start-t) - (set! *db-last-access* start-t) - (mutex-unlock! *db-multi-sync-mutex*) - (stack-push! (dbr:dbstruct-dbstack dbstruct) tmpdb))) - -(define (db:safely-close-sqlite3-db db #!key (try-num 3)) - (if (<= try-num 0) - #f - (handle-exceptions - exn - (begin - (thread-sleep! 3) - (sqlite3:interrupt! db) - (db:safely-close-sqlite3-db db try-num: (- try-num 1))) - (if (sqlite3:database? db) - (begin - (sqlite3:finalize! db) - #t) - #f)))) - -;; close all opened run-id dbs -(define (db:close-all dbstruct) - (if (dbr:dbstruct? dbstruct) - (handle-exceptions - exn - (begin - (debug:print 0 *default-log-port* "WARNING: Finalizing failed, " ((condition-property-accessor 'exn 'message) exn)) - (print-call-chain *default-log-port*)) - ;; (db:sync-touched dbstruct 0 force-sync: #t) ;; NO. Do not do this here. Instead we rely on a server to be started when there are writes, even if the server itself is not going to be used as a server. - (let ((tdbs (map db:dbdat-get-db - (stack->list (dbr:dbstruct-dbstack dbstruct)))) - (mdb (db:dbdat-get-db (dbr:dbstruct-mtdb dbstruct))) - (rdb (db:dbdat-get-db (dbr:dbstruct-refndb dbstruct)))) - (map (lambda (db) - (db:safely-close-sqlite3-db db)) -;; (if (sqlite3:database? db) -;; (sqlite3:finalize! db))) - tdbs) - (db:safely-close-sqlite3-db mdb) ;; (if (sqlite3:database? mdb) (sqlite3:finalize! mdb)) - (db:safely-close-sqlite3-db rdb))))) ;; (if (sqlite3:database? rdb) (sqlite3:finalize! rdb)))))) - -;; (let ((locdbs (dbr:dbstruct-locdbs dbstruct))) -;; (if (hash-table? locdbs) -;; (for-each (lambda (run-id) -;; (db:close-run-db dbstruct run-id)) -;; (hash-table-keys locdbs))))) - -;; (define (db:open-inmem-db) -;; (let* ((db (sqlite3:open-database ":memory:")) -;; (handler (make-busy-timeout 3600))) -;; (sqlite3:set-busy-handler! db handler) -;; (db:initialize-run-id-db db) -;; (cons db #f))) - -;; just tests, test_steps and test_data tables -(define db:sync-tests-only - (list - ;; (list "strs" - ;; '("id" #f) - ;; '("str" #f)) - (list "tests" - '("id" #f) - '("run_id" #f) - '("testname" #f) - '("host" #f) - '("cpuload" #f) - '("diskfree" #f) - '("uname" #f) - '("rundir" #f) - '("shortdir" #f) - '("item_path" #f) - '("state" #f) - '("status" #f) - '("attemptnum" #f) - '("final_logf" #f) - '("logdat" #f) - '("run_duration" #f) - '("comment" #f) - '("event_time" #f) - '("fail_count" #f) - '("pass_count" #f) - '("archived" #f) - '("last_update" #f)) - (list "test_steps" - '("id" #f) - '("test_id" #f) - '("stepname" #f) - '("state" #f) - '("status" #f) - '("event_time" #f) - '("comment" #f) - '("logfile" #f) - '("last_update" #f)) - (list "test_data" - '("id" #f) - '("test_id" #f) - '("category" #f) - '("variable" #f) - '("value" #f) - '("expected" #f) - '("tol" #f) - '("units" #f) - '("comment" #f) - '("status" #f) - '("type" #f) - '("last_update" #f)))) - -;; needs db to get keys, this is for syncing all tables -;; -(define (db:sync-main-list dbstruct) - (let ((keys (db:get-keys dbstruct))) - (list - (list "keys" - '("id" #f) - '("fieldname" #f) - '("fieldtype" #f)) - (list "metadat" '("var" #f) '("val" #f)) - (append (list "runs" - '("id" #f)) - (map (lambda (k)(list k #f)) - (append keys - (list "runname" "state" "status" "owner" "event_time" "comment" "fail_count" "pass_count" "contour" "last_update")))) - (list "test_meta" - '("id" #f) - '("testname" #f) - '("owner" #f) - '("description" #f) - '("reviewed" #f) - '("iterated" #f) - '("avg_runtime" #f) - '("avg_disk" #f) - '("tags" #f) - '("jobgroup" #f))))) - -(define (db:sync-all-tables-list dbstruct) - (append (db:sync-main-list dbstruct) - db:sync-tests-only)) - -;; use bunch of Unix commands to try to break the lock and recreate the db -;; -(define (db:move-and-recreate-db dbdat) - (let* ((dbpath (db:dbdat-get-path dbdat)) - (dbdir (pathname-directory dbpath)) - (fname (pathname-strip-directory dbpath)) - (fnamejnl (conc fname "-journal")) - (tmpname (conc fname "." (current-process-id))) - (tmpjnl (conc fnamejnl "." (current-process-id)))) - (debug:print-error 0 *default-log-port* "" fname " appears corrupted. Making backup \"old/" fname "\"") - (system (conc "cd " dbdir ";mkdir -p old;cat " fname " > old/" tmpname)) - (system (conc "rm -f " dbpath)) - (if (common:file-exists? fnamejnl) - (begin - (debug:print-error 0 *default-log-port* "" fnamejnl " found, moving it to old dir as " tmpjnl) - (system (conc "cd " dbdir ";mkdir -p old;cat " fnamejnl " > old/" tmpjnl)) - (system (conc "rm -f " dbdir "/" fnamejnl)))) - ;; attempt to recreate database - (system (conc "cd " dbdir ";sqlite3 old/" tmpname " .dump | sqlite3 " fname)))) - -;; return #f to indicate the dbdat should be closed/reopened -;; else return dbdat -;; -(define (db:repair-db dbdat #!key (numtries 1)) - (let* ((dbpath (db:dbdat-get-path dbdat)) - (dbdir (pathname-directory dbpath)) - (fname (pathname-strip-directory dbpath))) - (debug:print-info 0 *default-log-port* "Checking db " dbpath " for errors.") - (cond - ((not (file-write-access? dbdir)) - (debug:print 0 *default-log-port* "WARNING: can't write to " dbdir ", can't fix " fname) - #f) - - ;; handle special cases, megatest.db and monitor.db - ;; - ;; NOPE: apply this same approach to all db files - ;; - (else ;; ((equal? fname "megatest.db") ;; this file can be regenerated if needed - (handle-exceptions - exn - (begin - ;; (db:move-and-recreate-db dbdat) - (if (> numtries 0) - (db:repair-db dbdat numtries: (- numtries 1)) - #f) - (debug:print 0 *default-log-port* "FATAL: file " dbpath " was found corrupted, an attempt to fix has been made but you must start over.") - (debug:print 0 *default-log-port* - " check the following:\n" - " 1. full directories, look in ~/ /tmp and " dbdir "\n" - " 2. write access to " dbdir "\n\n" - " if the automatic recovery failed you may be able to recover data by doing \"" - (if (member fname '("megatest.db" "monitor.db")) - "megatest -cleanup-db" - "megatest -import-megatest.db;megatest -cleanup-db") - "\"\n") - (exit) ;; we can not safely continue when a db was corrupted - even if fixed. - ) - ;; test read/write access to the database - (let ((db (sqlite3:open-database dbpath))) - (sqlite3:set-busy-handler! db (sqlite3:make-busy-timeout 136000)) - (sqlite3:execute db "PRAGMA synchronous = 0;") - (cond - ((equal? fname "megatest.db") - (sqlite3:execute db "DELETE FROM tests WHERE state='DELETED';")) - ((equal? fname "main.db") - (sqlite3:execute db "DELETE FROM runs WHERE state='deleted';")) - ((string-match "\\d.db" fname) - (sqlite3:execute db "UPDATE tests SET state='DELETED' WHERE state='DELETED';")) - ((equal? fname "monitor.db") - (sqlite3:execute "DELETE FROM servers WHERE state LIKE 'defunct%';")) - (else - (sqlite3:execute db "vacuum;"))) - - (sqlite3:finalize! db) - #t)))))) - -;; tbls is ( ("tablename" ( "field1" [#f|proc1] ) ( "field2" [#f|proc2] ) .... ) ) -;; db's are dbdat's -;; -;; if last-update specified ("field-name" . time-in-seconds) -;; then sync only records where field-name >= time-in-seconds -;; IFF field-name exists -;; -(define (db:sync-tables tbls last-update fromdb todb . slave-dbs) - (handle-exceptions - exn - (begin - (debug:print 0 *default-log-port* "EXCEPTION: database probably overloaded or unreadable in db:sync-tables.") - (print-call-chain (current-error-port)) - (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) - (debug:print 5 *default-log-port* "exn=" (condition->list exn)) - (debug:print 0 *default-log-port* " status: " ((condition-property-accessor 'sqlite3 'status) exn)) - (debug:print 0 *default-log-port* " src db: " (db:dbdat-get-path fromdb)) - (for-each (lambda (dbdat) - (let ((dbpath (db:dbdat-get-path dbdat))) - (debug:print 0 *default-log-port* " dbpath: " dbpath) - (if (not (db:repair-db dbdat)) - (begin - (debug:print-error 0 *default-log-port* "Failed to rebuild " dbpath ", exiting now.") - (exit))))) - (cons todb slave-dbs)) - - 0) - ;; this is the work to be done - (cond - ((not fromdb) (debug:print 3 *default-log-port* "WARNING: db:sync-tables called with fromdb missing") - -1) - ((not todb) (debug:print 3 *default-log-port* "WARNING: db:sync-tables called with todb missing") - -2) - ((not (sqlite3:database? (db:dbdat-get-db fromdb))) - (debug:print-error 0 *default-log-port* "db:sync-tables called with fromdb not a database " fromdb) - -3) - ((not (sqlite3:database? (db:dbdat-get-db todb))) - (debug:print-error 0 *default-log-port* "db:sync-tables called with todb not a database " todb) - -4) - - ((not (file-write-access? (db:dbdat-get-path todb))) - (debug:print-error 0 *default-log-port* "db:sync-tables called with todb not a read-only database " todb) - -5) - ((not (null? (let ((readonly-slave-dbs - (filter - (lambda (dbdat) - (not (file-write-access? (db:dbdat-get-path todb)))) - slave-dbs))) - (for-each - (lambda (bad-dbdat) - (debug:print-error - 0 *default-log-port* "db:sync-tables called with todb not a read-only database " bad-dbdat)) - readonly-slave-dbs) - readonly-slave-dbs))) -6) - (else - (let ((stmts (make-hash-table)) ;; table-field => stmt - (all-stmts '()) ;; ( ( stmt1 value1 ) ( stml2 value2 )) - (numrecs (make-hash-table)) - (start-time (current-milliseconds)) - (tot-count 0)) - (for-each ;; table - (lambda (tabledat) - (let* ((tablename (car tabledat)) - (fields (cdr tabledat)) - (has-last-update (member "last_update" fields)) - (use-last-update (cond - ((and has-last-update - (member "last_update" fields)) - #t) ;; if given a number, just use it for all fields - ((number? last-update) #f) ;; if not matched first entry then ignore last-update for this table - ((and (pair? last-update) - (member (car last-update) ;; last-update field name - (map car fields))) #t) - (last-update - (debug:print 0 *default-log-port* "ERROR: parameter last-update for db:sync-tables must be a pair or a number, received: " last-update) ;; found in fields - #f) - (else - #f))) - (last-update-value (if use-last-update ;; no need to check for has-last-update - it is already accounted for - (if (number? last-update) - last-update - (cdr last-update)) - #f)) - (last-update-field (if use-last-update - (if (number? last-update) - "last_update" - (car last-update)) - #f)) - (num-fields (length fields)) - (field->num (make-hash-table)) - (num->field (apply vector (map car fields))) ;; BBHERE - (full-sel (conc "SELECT " (string-intersperse (map car fields) ",") - " FROM " tablename (if use-last-update ;; apply last-update criteria - (conc " WHERE " last-update-field " >= " last-update-value) - "") - ";")) - (full-ins (conc "INSERT OR REPLACE INTO " tablename " ( " (string-intersperse (map car fields) ",") " ) " - " VALUES ( " (string-intersperse (make-list num-fields "?") ",") " );")) - (fromdat '()) - (fromdats '()) - (totrecords 0) - (batch-len (string->number (or (configf:lookup *configdat* "sync" "batchsize") "100"))) - (todat (make-hash-table)) - (count 0) - - (delay-handicap (string->number (or (configf:lookup *configdat* "sync" "delay-handicap") "0"))) - ) - - ;; set up the field->num table - (for-each - (lambda (field) - (hash-table-set! field->num field count) - (set! count (+ count 1))) - fields) - - ;; read the source table - (sqlite3:for-each-row - (lambda (a . b) - (set! fromdat (cons (apply vector a b) fromdat)) - (if (> (length fromdat) batch-len) - (begin - (set! fromdats (cons fromdat fromdats)) - (set! fromdat '()) - (set! totrecords (+ totrecords 1))))) - (db:dbdat-get-db fromdb) - full-sel) - - ;; tack on remaining records in fromdat - (if (not (null? fromdat)) - (set! fromdats (cons fromdat fromdats))) - - (if (common:low-noise-print 120 "sync-records") - (debug:print-info 4 *default-log-port* "found " totrecords " records to sync")) - - ;; read the target table; BBHERE - (sqlite3:for-each-row - (lambda (a . b) - (hash-table-set! todat a (apply vector a b))) - (db:dbdat-get-db todb) - full-sel) - - (when (and delay-handicap (> delay-handicap 0)) - (debug:print-info 0 *default-log-port* "imposing synthetic sync delay of "delay-handicap" seconds since sync/delay-handicap is configured") - (thread-sleep! delay-handicap) - (debug:print-info 0 *default-log-port* "synthetic sync delay of "delay-handicap" seconds completed") - ) - - ;; first pass implementation, just insert all changed rows - (for-each - (lambda (targdb) - (let* ((db (db:dbdat-get-db targdb)) - (stmth (sqlite3:prepare db full-ins))) - (db:delay-if-busy targdb) ;; NO WAITING - (for-each - (lambda (fromdat-lst) - (sqlite3:with-transaction - db - (lambda () - (for-each ;; - (lambda (fromrow) - (let* ((a (vector-ref fromrow 0)) - (curr (hash-table-ref/default todat a #f)) - (same #t)) - (let loop ((i 0)) - (if (or (not curr) - (not (equal? (vector-ref fromrow i)(vector-ref curr i)))) - (set! same #f)) - (if (and same - (< i (- num-fields 1))) - (loop (+ i 1)))) - (if (not same) - (begin - (apply sqlite3:execute stmth (vector->list fromrow)) - (hash-table-set! numrecs tablename (+ 1 (hash-table-ref/default numrecs tablename 0))))))) - fromdat-lst)) - )) - fromdats) - (sqlite3:finalize! stmth))) - (append (list todb) slave-dbs)))) - tbls) - (let* ((runtime (- (current-milliseconds) start-time)) - (should-print (or (debug:debug-mode 12) - (common:low-noise-print 120 "db sync" (> runtime 500))))) ;; low and high sync times treated as separate. - (if should-print (debug:print 3 *default-log-port* "INFO: db sync, total run time " runtime " ms")) - (for-each - (lambda (dat) - (let ((tblname (car dat)) - (count (cdr dat))) - (set! tot-count (+ tot-count count)) - (if (> count 0) - (if should-print (debug:print 0 *default-log-port* (format #f " ~10a ~5a" tblname count)))))) - (sort (hash-table->alist numrecs)(lambda (a b)(> (cdr a)(cdr b)))))) - tot-count))))) - -(define (db:patch-schema-rundb frundb) - ;; - ;; remove this some time after September 2016 (added in version v1.6031 - ;; - (for-each - (lambda (table-name) - (handle-exceptions - exn - (if (string-match ".*duplicate.*" ((condition-property-accessor 'exn 'message) exn)) - (debug:print 0 *default-log-port* "Column last_update already added to " table-name " table") - (db:general-sqlite-error-dump exn "alter table " table-name " ..." #f "none")) - (sqlite3:execute - frundb - (conc "ALTER TABLE " table-name " ADD COLUMN last_update INTEGER DEFAULT 0"))) - (sqlite3:execute - frundb - (conc "DROP TRIGGER IF EXISTS update_" table-name "_trigger;")) - (sqlite3:execute - frundb - (conc "CREATE TRIGGER IF NOT EXISTS update_" table-name "_trigger AFTER UPDATE ON " table-name " - FOR EACH ROW - BEGIN - UPDATE " table-name " SET last_update=(strftime('%s','now')) - WHERE id=old.id; - END;")) - ) - '("tests" "test_steps" "test_data"))) - -(define (db:patch-schema-maindb maindb) - ;; - ;; remove all these some time after september 2016 (added in v1.6031 - ;; - (for-each - (lambda (column type default) - (handle-exceptions - exn - (if (string-match ".*duplicate.*" ((condition-property-accessor 'exn 'message) exn)) - (debug:print 0 *default-log-port* "Column " column " already added to runs table") - (db:general-sqlite-error-dump exn "alter table runs ..." #f "none")) - (sqlite3:execute - maindb - (conc "ALTER TABLE runs ADD COLUMN " column " " type " DEFAULT " default)))) - (list "last_update" "contour") - (list "INTEGER" "TEXT" ) - (list "0" "''" )) - ;; these schema changes don't need exception handling - (sqlite3:execute - maindb - "CREATE TRIGGER IF NOT EXISTS update_runs_trigger AFTER UPDATE ON runs - FOR EACH ROW - BEGIN - UPDATE runs SET last_update=(strftime('%s','now')) - WHERE id=old.id; - END;") - (sqlite3:execute maindb "CREATE TABLE IF NOT EXISTS run_stats ( - id INTEGER PRIMARY KEY, - run_id INTEGER, - state TEXT, - status TEXT, - count INTEGER, - last_update INTEGER DEFAULT (strftime('%s','now')))") - (sqlite3:execute maindb "CREATE TRIGGER IF NOT EXISTS update_run_stats_trigger AFTER UPDATE ON run_stats - FOR EACH ROW - BEGIN - UPDATE run_stats SET last_update=(strftime('%s','now')) - WHERE id=old.id; - END;") - (sqlite3:execute maindb "CREATE TABLE IF NOT EXISTS test_rundat ( - id INTEGER PRIMARY KEY, - test_id INTEGER, - update_time TIMESTAMP, - cpuload INTEGER DEFAULT -1, - diskfree INTEGER DEFAULT -1, - diskusage INTGER DEFAULT -1, - run_duration INTEGER DEFAULT 0);")) - -(define (db:adj-target db) - (let ((fields (configf:get-section *configdat* "fields")) - (field-num 0)) - ;; because we will be refreshing the keys table it is best to clear it here - (sqlite3:execute db "DELETE FROM keys;") - (for-each - (lambda (field) - (let ((column (car field)) - (spec (cadr field))) - (handle-exceptions - exn - (if (string-match ".*duplicate.*" ((condition-property-accessor 'exn 'message) exn)) - (debug:print 0 *default-log-port* "Target field " column " already exists in the runs table") - (db:general-sqlite-error-dump exn "alter table runs ..." #f "none")) - ;; Add the column if needed - (sqlite3:execute - db - (conc "ALTER TABLE runs ADD COLUMN " column " " spec))) - ;; correct the entry in the keys column - (sqlite3:execute - db - "INSERT INTO keys (id,fieldname,fieldtype) VALUES (?,?,?);" - field-num column spec) - ;; fill in blanks (not allowed as it would be part of the path - (sqlite3:execute - db - (conc "UPDATE runs SET " column "='x' WHERE " column "='';")) - (set! field-num (+ field-num 1)))) - fields))) - -(define *global-db-store* (make-hash-table)) - -(define (db:get-access-mode) - (if (args:get-arg "-use-db-cache") 'cached 'rmt)) - -;; Add db direct -;; -(define (db:dispatch-query access-mode rmt-cmd db-cmd . params) - (if (eq? access-mode 'cached) - (debug:print 2 *default-log-port* "not doing cached calls right now")) -;; (apply db:call-with-cached-db db-cmd params) - (apply rmt-cmd params)) -;;) - -;; return the target db handle so it can be used -;; -(define (db:cache-for-read-only source target #!key (use-last-update #f)) - (if (and (hash-table-ref/default *global-db-store* target #f) - (>= (file-modification-time target)(file-modification-time source))) - (hash-table-ref *global-db-store* target) - (let* ((toppath (launch:setup)) - (targ-db-last-mod (if (common:file-exists? target) - (file-modification-time target) - 0)) - (cache-db (or (hash-table-ref/default *global-db-store* target #f) - (db:open-megatest-db path: target))) - (source-db (db:open-megatest-db path: source)) - (curr-time (current-seconds)) - (res '()) - (last-update (if use-last-update (cons "last_update" targ-db-last-mod) #f))) - (db:sync-tables (db:sync-main-list source-db) last-update source-db cache-db) - (db:sync-tables db:sync-tests-only last-update source-db cache-db) - (hash-table-set! *global-db-store* target cache-db) - cache-db))) - -;; ;; call a proc with a cached db -;; ;; -;; (define (db:call-with-cached-db proc . params) -;; ;; first cache the db in /tmp -;; (let* ((cname-part (conc "megatest_cache/" (common:get-testsuite-name))) -;; (fname (conc (common:get-area-path-signature) ".db")) -;; (cache-dir (common:get-create-writeable-dir -;; (list (conc "/tmp/" (current-user-name) "/" cname-part) -;; (conc "/tmp/" (current-user-name) "-" cname-part) -;; (conc "/tmp/" (current-user-name) "_" cname-part)))) -;; (megatest-db (conc *toppath* "/megatest.db"))) -;; ;; (debug:print-info 0 *default-log-port* "Using cache dir " cache-dir) -;; (if (not cache-dir) -;; (begin -;; (debug:print 0 *default-log-port* "ERROR: Failed to find an area to write the cache db") -;; (exit 1)) -;; (let* ((th1 (make-thread -;; (lambda () -;; (if (and (common:file-exists? megatest-db) -;; (file-write-access? megatest-db)) -;; (begin -;; (db:sync-to-megatest.db dbstruct 'timestamps) ;; internally mutexes on *db-local-sync* -;; (debug:print-info 2 *default-log-port* "Done syncing to megatest.db")))) -;; "call-with-cached-db sync-to-megatest.db")) -;; (cache-db (db:cache-for-read-only -;; megatest-db -;; (conc cache-dir "/" fname) -;; use-last-update: #t))) -;; (thread-start! th1) -;; (apply proc cache-db params) -;; )))) - -;; options: -;; -;; 'killservers - kills all servers -;; 'dejunk - removes junk records -;; 'adj-testids - move test-ids into correct ranges -;; 'old2new - sync megatest.db to /tmp/.../megatest.db and /tmp/.../megatest_ref.db -;; 'new2old - sync /tmp/.../megatest.db to megatest.db and /tmp/.../megatest_ref.db (and update data_synced) -;; 'closeall - close all opened dbs -;; 'schema - attempt to apply schema changes -;; run-ids: '(1 2 3 ...) or #f (for all) -;; -(define (db:multi-db-sync dbstruct . options) - ;; (if (not (launch:setup)) - ;; (debug:print 0 *default-log-port* "ERROR: not able to setup up for megatest.") - (let* ((mtdb (dbr:dbstruct-mtdb dbstruct)) - (tmpdb (db:get-db dbstruct)) - (refndb (dbr:dbstruct-refndb dbstruct)) - (allow-cleanup #t) ;; (if run-ids #f #t)) - (servers (server:get-list *toppath*)) ;; (tasks:get-all-servers (db:delay-if-busy tdbdat))) - (data-synced 0)) ;; count of changed records (I hope) - - (for-each - (lambda (option) - - (case option - ;; kill servers - ((killservers) - (for-each - (lambda (server) - (match-let (((mod-time host port start-time pid) server)) - (if (and host pid) - (tasks:kill-server host pid)))) - servers) - - ;; /tmp/bjbarcla/megatest_localdb/fullrun/.nfs.pdx.disks.icf_env_disk001.bjbarcla.gwa.issues.mtdev.matt-bisect.megatest.ext-tests.runs.all.v1.65.1.6524.dev.bb-24-justrollup-f8.rollup.fullrun/megatest.db.lock - (delete-file* (common:get-sync-lock-filepath)) - ) - - ;; clear out junk records - ;; - ((dejunk) - (db:delay-if-busy mtdb) ;; ok to delay on mtdb - (when (file-write-access? (db:dbdat-get-path mtdb)) (db:clean-up mtdb)) - (db:clean-up tmpdb) - (db:clean-up refndb)) - - ;; sync runs, test_meta etc. - ;; - ((old2new) - (set! data-synced - (+ (db:sync-tables (db:sync-all-tables-list dbstruct) #f mtdb tmpdb refndb) - data-synced))) - - ;; now ensure all newdb data are synced to megatest.db - ;; do not use the run-ids list passed in to the function - ;; - ((new2old) - (set! data-synced - (+ (db:sync-tables (db:sync-all-tables-list dbstruct) #f tmpdb refndb mtdb) - data-synced))) - - ((adj-target) - (db:adj-target (db:dbdat-get-db mtdb)) - (db:adj-target (db:dbdat-get-db tmpdb)) - (db:adj-target (db:dbdat-get-db refndb))) - - ((schema) - (db:patch-schema-maindb (db:dbdat-get-db mtdb)) - (db:patch-schema-maindb (db:dbdat-get-db tmpdb)) - (db:patch-schema-maindb (db:dbdat-get-db refndb)) - (db:patch-schema-rundb (db:dbdat-get-db mtdb)) - (db:patch-schema-rundb (db:dbdat-get-db tmpdb)) - (db:patch-schema-rundb (db:dbdat-get-db refndb)))) - - (stack-push! (dbr:dbstruct-dbstack dbstruct) tmpdb)) - options) - data-synced)) - -(define (db:tmp->megatest.db-sync dbstruct last-update) - (let* ((mtdb (dbr:dbstruct-mtdb dbstruct)) - (tmpdb (db:get-db dbstruct)) - (refndb (dbr:dbstruct-refndb dbstruct)) - (res (db:sync-tables (db:sync-all-tables-list dbstruct) last-update tmpdb refndb mtdb))) - (stack-push! (dbr:dbstruct-dbstack dbstruct) tmpdb) - res)) - -;;;; run-ids -;; if #f use *db-local-sync* : or 'local-sync-flags -;; if #t use timestamps : or 'timestamps -;; -;; NB// no-sync-db is the db handle, not a flag! -;; -(define (db:sync-to-megatest.db dbstruct #!key (no-sync-db #f)) - (let* ((start-time (current-seconds)) - (last-full-update (if no-sync-db - (db:no-sync-get/default no-sync-db "LAST_FULL_UPDATE" 0) - 0)) - (full-sync-needed (> (- start-time last-full-update) 3600)) ;; every hour do a full sync - (last-update (if full-sync-needed - 0 - (if no-sync-db - (db:no-sync-get/default no-sync-db "LAST_UPDATE" 0) - 0))) ;; (or (db:get-var dbstruct "LAST_UPDATE") 0)) - (sync-needed (> (- start-time last-update) 6)) - (res (if (or sync-needed ;; don't sync if a sync already occurred in the past 6 seconds - full-sync-needed) - (begin - (if no-sync-db - (begin - (if full-sync-needed (db:no-sync-set no-sync-db "LAST_FULL_UPDATE" start-time)) - (db:no-sync-set no-sync-db "LAST_UPDATE" start-time))) - (db:tmp->megatest.db-sync dbstruct last-update)) - 0)) - (sync-time (- (current-seconds) start-time))) - (debug:print-info 3 *default-log-port* "Sync of newdb to olddb completed in " sync-time " seconds pid="(current-process-id)) - (if (common:low-noise-print 30 "sync new to old") - (if sync-needed - (debug:print-info 0 *default-log-port* "Sync of " res " records from newdb to olddb completed in " sync-time " seconds pid="(current-process-id)) - (debug:print-info 0 *default-log-port* "No sync needed, last updated " (- start-time last-update) " seconds ago"))) - res)) - -;; keeping it around for debugging purposes only -#;(define (open-run-close-no-exception-handling proc idb . params) - (debug:print-info 11 *default-log-port* "open-run-close-no-exception-handling START given a db=" (if idb "yes " "no ") ", params=" params) - (print "I don't work anymore. open-run-close-no-exception-handling needs fixing or removing...") - (exit) - (if (or *db-write-access* - (not #t)) ;; was: (member proc * db:all-write-procs *))) - (let* ((db (cond - ((pair? idb) (db:dbdat-get-db idb)) - ((sqlite3:database? idb) idb) - ((not idb) (debug:print-error 0 *default-log-port* "cannot open-run-close with #f anymore")) - ((procedure? idb) (idb)) - (else (debug:print-error 0 *default-log-port* "cannot open-run-close with #f anymore")))) - (res #f)) - (set! res (apply proc db params)) - (if (not idb)(sqlite3:finalize! dbstruct)) - (debug:print-info 11 *default-log-port* "open-run-close-no-exception-handling END" ) - res) - #f)) - -#;(define (open-run-close-exception-handling proc idb . params) - (handle-exceptions - exn - (let ((sleep-time (random 30)) - (err-status ((condition-property-accessor 'sqlite3 'status #f) exn))) - (case err-status - ((busy) - (thread-sleep! sleep-time)) - (else - (debug:print 0 *default-log-port* "EXCEPTION: database probably overloaded or unreadable.") - (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) - (debug:print 5 *default-log-port* "exn=" (condition->list exn)) - (debug:print 0 *default-log-port* " status: " ((condition-property-accessor 'sqlite3 'status) exn)) - (print-call-chain (current-error-port)) - (thread-sleep! sleep-time) - (debug:print-info 0 *default-log-port* "trying db call one more time....this may never recover, if necessary kill process " (current-process-id) " on host " (get-host-name) " to clean up"))) - (apply open-run-close-exception-handling proc idb params)) - (apply open-run-close-no-exception-handling proc idb params))) - -;; (define open-run-close -#;(define open-run-close open-run-close-exception-handling) - ;; open-run-close-no-exception-handling -;; open-run-close-exception-handling) -;;) - -(define (db:initialize-main-db dbdat) - (when (not *configinfo*) - (launch:setup)) ;; added because Elena was getting stack dump because *configinfo* below was #f. - (let* ((configdat (car *configinfo*)) ;; tut tut, global warning... - (keys (keys:config-get-fields configdat)) - (havekeys (> (length keys) 0)) - (keystr (keys->keystr keys)) - (fieldstr (keys:make-key/field-string configdat)) - (db (db:dbdat-get-db dbdat))) - (for-each (lambda (key) - (let ((keyn key)) - (if (member (string-downcase keyn) - (list "runname" "state" "status" "owner" "event_time" "comment" "fail_count" - "pass_count" "contour")) - (begin - (print "ERROR: your key cannot be named " keyn " as this conflicts with the same named field in the runs table, you must remove your megatest.db and /.db before trying again.") - (exit 1))))) - keys) - (sqlite3:with-transaction - db - (lambda () - ;; handle-exceptions - ;; exn - ;; (begin - ;; (debug:print 0 "ERROR: Failed to create tables. Look at your [fields] section, should be: fieldname TEXT DEFAULT 'yourdefault'") - ;; (exit)) - (sqlite3:execute db "CREATE TABLE IF NOT EXISTS keys (id INTEGER PRIMARY KEY, fieldname TEXT, fieldtype TEXT, CONSTRAINT keyconstraint UNIQUE (fieldname));") - (for-each (lambda (key) - (sqlite3:execute db "INSERT OR REPLACE INTO keys (fieldname,fieldtype) VALUES (?,?);" key "TEXT")) - keys) - (sqlite3:execute db (conc - "CREATE TABLE IF NOT EXISTS runs (id INTEGER PRIMARY KEY, \n " - fieldstr (if havekeys "," "") " - runname TEXT DEFAULT 'norun', - contour TEXT DEFAULT '', - state TEXT DEFAULT '', - status TEXT DEFAULT '', - owner TEXT DEFAULT '', - event_time TIMESTAMP DEFAULT (strftime('%s','now')), - comment TEXT DEFAULT '', - fail_count INTEGER DEFAULT 0, - pass_count INTEGER DEFAULT 0, - last_update INTEGER DEFAULT (strftime('%s','now')), - CONSTRAINT runsconstraint UNIQUE (runname" (if havekeys "," "") keystr "));")) - (sqlite3:execute db "CREATE TRIGGER IF NOT EXISTS update_runs_trigger AFTER UPDATE ON runs - FOR EACH ROW - BEGIN - UPDATE runs SET last_update=(strftime('%s','now')) - WHERE id=old.id; - END;") - (sqlite3:execute db "CREATE TABLE IF NOT EXISTS run_stats ( - id INTEGER PRIMARY KEY, - run_id INTEGER, - state TEXT, - status TEXT, - count INTEGER, - last_update INTEGER DEFAULT (strftime('%s','now')))") - (sqlite3:execute db "CREATE TRIGGER IF NOT EXISTS update_run_stats_trigger AFTER UPDATE ON run_stats - FOR EACH ROW - BEGIN - UPDATE run_stats SET last_update=(strftime('%s','now')) - WHERE id=old.id; - END;") - (sqlite3:execute db "CREATE TABLE IF NOT EXISTS test_meta ( - id INTEGER PRIMARY KEY, - testname TEXT DEFAULT '', - author TEXT DEFAULT '', - owner TEXT DEFAULT '', - description TEXT DEFAULT '', - reviewed TIMESTAMP, - iterated TEXT DEFAULT '', - avg_runtime REAL, - avg_disk REAL, - tags TEXT DEFAULT '', - jobgroup TEXT DEFAULT 'default', - CONSTRAINT test_meta_constraint UNIQUE (testname));") - (sqlite3:execute db "CREATE TABLE IF NOT EXISTS tasks_queue (id INTEGER PRIMARY KEY, - action TEXT DEFAULT '', - owner TEXT, - state TEXT DEFAULT 'new', - target TEXT DEFAULT '', - name TEXT DEFAULT '', - testpatt TEXT DEFAULT '', - keylock TEXT, - params TEXT, - creation_time TIMESTAMP DEFAULT (strftime('%s','now')), - execution_time TIMESTAMP);") - ;; archive disk areas, cached info from [archive-disks] - (sqlite3:execute db "CREATE TABLE IF NOT EXISTS archive_disks ( - id INTEGER PRIMARY KEY, - archive_area_name TEXT, - disk_path TEXT, - last_df INTEGER DEFAULT -1, - last_df_time TIMESTAMP DEFAULT (strftime('%s','now')), - creation_time TIMESTAMP DEFAULT (strftime('%','now')));") - ;; individual bup (or tar) data chunks - (sqlite3:execute db "CREATE TABLE IF NOT EXISTS archive_blocks ( - id INTEGER PRIMARY KEY, - archive_disk_id INTEGER, - disk_path TEXT, - last_du INTEGER DEFAULT -1, - last_du_time TIMESTAMP DEFAULT (strftime('%s','now')), - creation_time TIMESTAMP DEFAULT (strftime('%','now')));") - ;; tests allocated to what chunks. reusing a chunk for a test/item_path is very efficient - ;; NB// the per run/test recording of where the archive is stored is done in the test - ;; record. - (sqlite3:execute db "CREATE TABLE IF NOT EXISTS archive_allocations ( - id INTEGER PRIMARY KEY, - archive_block_id INTEGER, - testname TEXT, - item_path TEXT, - creation_time TIMESTAMP DEFAULT (strftime('%','now')));") - ;; move this clean up call somewhere else - (sqlite3:execute db "DELETE FROM tasks_queue WHERE state='done' AND creation_time < ?;" (- (current-seconds)(* 24 60 60))) ;; remove older than 24 hrs - (sqlite3:execute db (conc "CREATE INDEX IF NOT EXISTS runs_index ON runs (runname" (if havekeys "," "") keystr ");")) - ;; (sqlite3:execute db "CREATE VIEW runs_tests AS SELECT * FROM runs INNER JOIN tests ON runs.id=tests.run_id;") - (sqlite3:execute db "CREATE TABLE IF NOT EXISTS extradat (id INTEGER PRIMARY KEY, run_id INTEGER, key TEXT, val TEXT);") - (sqlite3:execute db "CREATE TABLE IF NOT EXISTS metadat (id INTEGER PRIMARY KEY, var TEXT, val TEXT, - CONSTRAINT metadat_constraint UNIQUE (var));") - (sqlite3:execute db "CREATE TABLE IF NOT EXISTS access_log (id INTEGER PRIMARY KEY, user TEXT, accessed TIMESTAMP, args TEXT);") - ;; Must do this *after* running patch db !! No more. - ;; cannot use db:set-var since it will deadlock, hardwire the code here - (sqlite3:execute db "INSERT OR REPLACE INTO metadat (var,val) VALUES (?,?);" "MEGATEST_VERSION" (common:version-signature)) - (debug:print-info 11 *default-log-port* "db:initialize END") ;; )))) - - ;;====================================================================== - ;; R U N S P E C I F I C D B - ;;====================================================================== - - ;; (define (db:initialize-run-id-db db) - ;; (sqlite3:with-transaction - ;; db - ;; (lambda () - (sqlite3:execute db "CREATE TABLE IF NOT EXISTS tests - (id INTEGER PRIMARY KEY, - run_id INTEGER DEFAULT -1, - testname TEXT DEFAULT 'noname', - host TEXT DEFAULT 'n/a', - cpuload REAL DEFAULT -1, - diskfree INTEGER DEFAULT -1, - uname TEXT DEFAULT 'n/a', - rundir TEXT DEFAULT '/tmp/badname', - shortdir TEXT DEFAULT '/tmp/badname', - item_path TEXT DEFAULT '', - state TEXT DEFAULT 'NOT_STARTED', - status TEXT DEFAULT 'FAIL', - attemptnum INTEGER DEFAULT 0, - final_logf TEXT DEFAULT 'logs/final.log', - logdat TEXT DEFAULT '', - run_duration INTEGER DEFAULT 0, - comment TEXT DEFAULT '', - event_time TIMESTAMP DEFAULT (strftime('%s','now')), - fail_count INTEGER DEFAULT 0, - pass_count INTEGER DEFAULT 0, - archived INTEGER DEFAULT 0, -- 0=no, > 1=archive block id where test data can be found - last_update INTEGER DEFAULT (strftime('%s','now')), - CONSTRAINT testsconstraint UNIQUE (run_id, testname, item_path));") - ;; deprecated -- (sqlite3:execute db "CREATE INDEX IF NOT EXISTS tests_index ON tests (run_id, testname, item_path, uname);") - - (sqlite3:execute db "CREATE INDEX IF NOT EXISTS tests_run_id_index ON tests (run_id);") ;; new - (sqlite3:execute db "CREATE INDEX IF NOT EXISTS tests_testname_index ON tests (testname,item_path);") ;; new - (sqlite3:execute db "CREATE INDEX IF NOT EXISTS tests_state_status_index ON tests (state, status); ") ;; new - - (sqlite3:execute db "CREATE TRIGGER IF NOT EXISTS update_tests_trigger AFTER UPDATE ON tests - FOR EACH ROW - BEGIN - UPDATE tests SET last_update=(strftime('%s','now')) - WHERE id=old.id; - END;") - (sqlite3:execute db "CREATE TABLE IF NOT EXISTS test_steps - (id INTEGER PRIMARY KEY, - test_id INTEGER, - stepname TEXT, - state TEXT DEFAULT 'NOT_STARTED', - status TEXT DEFAULT 'n/a', - event_time TIMESTAMP, - comment TEXT DEFAULT '', - logfile TEXT DEFAULT '', - last_update INTEGER DEFAULT (strftime('%s','now')), - CONSTRAINT test_steps_constraint UNIQUE (test_id,stepname,state));") - (sqlite3:execute db "CREATE INDEX IF NOT EXISTS teststeps_index ON tests (run_id, testname, item_path);") - (sqlite3:execute db "CREATE TRIGGER IF NOT EXISTS update_teststeps_trigger AFTER UPDATE ON test_steps - FOR EACH ROW - BEGIN - UPDATE test_steps SET last_update=(strftime('%s','now')) - WHERE id=old.id; - END;") - (sqlite3:execute db "CREATE TABLE IF NOT EXISTS test_data (id INTEGER PRIMARY KEY, - test_id INTEGER, - category TEXT DEFAULT '', - variable TEXT, - value REAL, - expected REAL, - tol REAL, - units TEXT, - comment TEXT DEFAULT '', - status TEXT DEFAULT 'n/a', - type TEXT DEFAULT '', - last_update INTEGER DEFAULT (strftime('%s','now')), - CONSTRAINT test_data_constraint UNIQUE (test_id,category,variable));") - (sqlite3:execute db "CREATE INDEX IF NOT EXISTS test_data_index ON test_data (test_id);") - (sqlite3:execute db "CREATE TRIGGER IF NOT EXISTS update_test_data_trigger AFTER UPDATE ON test_data - FOR EACH ROW - BEGIN - UPDATE test_data SET last_update=(strftime('%s','now')) - WHERE id=old.id; - END;") - (sqlite3:execute db "CREATE TABLE IF NOT EXISTS test_rundat ( - id INTEGER PRIMARY KEY, - test_id INTEGER, - update_time TIMESTAMP, - cpuload INTEGER DEFAULT -1, - diskfree INTEGER DEFAULT -1, - diskusage INTGER DEFAULT -1, - run_duration INTEGER DEFAULT 0);") - (sqlite3:execute db "CREATE TABLE IF NOT EXISTS archives ( - id INTEGER PRIMARY KEY, - test_id INTEGER, - state TEXT DEFAULT 'new', - status TEXT DEFAULT 'n/a', - archive_type TEXT DEFAULT 'bup', - du INTEGER, - archive_path TEXT);"))) - db)) ;; ) - -;;====================================================================== -;; A R C H I V E S -;;====================================================================== - -;; dneeded is minimum space needed, scan for existing archives that -;; are on disks with adequate space and already have this test/itempath -;; archived -;; -(define (db:archive-get-allocations dbstruct testname itempath dneeded) - (let* ((dbdat (db:get-db dbstruct)) ;; archive tables are in main.db - (db (db:dbdat-get-db dbdat)) - (res '()) - (blocks '())) ;; a block is an archive chunck that can be added too if there is space - (sqlite3:for-each-row - (lambda (id archive-disk-id disk-path last-du last-du-time) - (set! res (cons (vector id archive-disk-id disk-path last-du last-du-time) res))) - db - "SELECT b.id,b.archive_disk_id,b.disk_path,b.last_du,b.last_du_time FROM archive_blocks AS b - INNER JOIN archive_allocations AS a ON a.archive_block_id=b.id - WHERE a.testname=? AND a.item_path=?;" - testname itempath) - ;; Now res has list of candidate paths, look in archive_disks for candidate with potential free space - (if (null? res) - '() - (sqlite3:for-each-row - (lambda (id archive-area-name disk-path last-df last-df-time) - (set! blocks (cons (vector id archive-area-name disk-path last-df last-df-time) blocks))) - db - (conc - "SELECT d.id,d.archive_area_name,disk_path,last_df,last_df_time FROM archive_disks AS d - INNER JOIN archive_blocks AS b ON d.id=b.archive_disk_id - WHERE b.id IN (" (string-intersperse (map conc res) ",") ") AND - last_df > ?;") - dneeded)) - (stack-push! (dbr:dbstruct-dbstack dbstruct) dbdat) - blocks)) - -;; returns id of the record, register a disk allocated to archiving and record it's last known -;; available space -;; -(define (db:archive-register-disk dbstruct bdisk-name bdisk-path df) - (let* ((dbdat (db:get-db dbstruct)) ;; archive tables are in main.db - (db (db:dbdat-get-db dbdat)) - (res #f)) - (sqlite3:for-each-row - (lambda (id) - (set! res id)) - db - "SELECT id FROM archive_disks WHERE archive_area_name=? AND disk_path=?;" - bdisk-name bdisk-path) - (if res ;; record exists, update df and return id - (begin - (sqlite3:execute db "UPDATE archive_disks SET last_df=?,last_df_time=(strftime('%s','now')) - WHERE archive_area_name=? AND disk_path=?;" - df bdisk-name bdisk-path) - (stack-push! (dbr:dbstruct-dbstack dbstruct) dbdat) - res) - (begin - (sqlite3:execute - db - "INSERT OR REPLACE INTO archive_disks (archive_area_name,disk_path,last_df) - VALUES (?,?,?);" - bdisk-name bdisk-path df) - (stack-push! (dbr:dbstruct-dbstack dbstruct) dbdat) - (db:archive-register-disk dbstruct bdisk-name bdisk-path df))))) - -;; record an archive path created on a given archive disk (identified by it's bdisk-id) -;; if path starts with / then it is full, otherwise it is relative to the archive disk -;; preference is to store the relative path. -;; -(define (db:archive-register-block-name dbstruct bdisk-id archive-path #!key (du #f)) - (let* ((dbdat (db:get-db dbstruct)) ;; archive tables are in main.db - (db (db:dbdat-get-db dbdat)) - (res #f)) - ;; first look to see if this path is already registered - (sqlite3:for-each-row - (lambda (id) - (set! res id)) - db - "SELECT id FROM archive_blocks WHERE archive_disk_id=? AND disk_path=?;" - bdisk-id archive-path) - (if res ;; record exists, update du if applicable and return res - (begin - (if du (sqlite3:execute db "UPDATE archive_blocks SET last_du=?,last_du_time=(strftime('%s','now')) - WHERE archive_disk_id=? AND disk_path=?;" - bdisk-id archive-path du)) - res) - (begin - (sqlite3:execute db "INSERT OR REPLACE INTO archive_blocks (archive_disk_id,disk_path,last_du) - VALUES (?,?,?);" - bdisk-id archive-path (or du 0)) - (db:archive-register-block-name dbstruct bdisk-id archive-path du: du))) - (stack-push! (dbr:dbstruct-dbstack dbstruct) dbdat) - res)) - - -;; The "archived" field in tests is overloaded; 0 = not archived, > 0 archived in block with given id -;; -(define (db:test-set-archive-block-id dbstruct run-id test-id archive-block-id) - (db:with-db - dbstruct - run-id - #f - (lambda (db) - (sqlite3:execute db "UPDATE tests SET archived=? WHERE id=?;" - archive-block-id test-id)))) - -;; Look up the archive block info given a block-id -;; -(define (db:test-get-archive-block-info dbstruct archive-block-id) - (db:with-db - dbstruct - #f - #f - (lambda (db) - (let ((res #f)) - (sqlite3:for-each-row - ;; 0 1 2 3 4 5 - (lambda (id archive-disk-id disk-path last-du last-du-time creation-time) - (set! res (vector id archive-disk-id disk-path last-du last-du-time creation-time))) - db - "SELECT id,archive_disk_id,disk_path,last_du,last_du_time,creation_time FROM archive_blocks WHERE id=?;" - archive-block-id) - res)))) - -;; (define (db:archive-allocate-testsuite/area-to-block block-id testsuite-name areakey) -;; (let* ((dbdat (db:get-db dbstruct #f)) ;; archive tables are in main.db -;; (db (db:dbdat-get-db dbdat)) -;; (res '()) -;; (blocks '())) ;; a block is an archive chunck that can be added too if there is space -;; (sqlite3:for-each-row #f) - -;;====================================================================== -;; L O G G I N G D B -;;====================================================================== - -(define (open-logging-db) - (let* ((dbpath (conc (if *toppath* (conc *toppath* "/") "") "logging.db")) ;; fname) - (dbexists (common:file-exists? dbpath)) - (db (sqlite3:open-database dbpath)) - (handler (sqlite3:make-busy-timeout (if (args:get-arg "-override-timeout") - (string->number (args:get-arg "-override-timeout")) - 136000)))) ;; 136000))) - (sqlite3:set-busy-handler! db handler) - (if (not dbexists) - (begin - (sqlite3:execute db "CREATE TABLE IF NOT EXISTS log (id INTEGER PRIMARY KEY,event_time TIMESTAMP DEFAULT (strftime('%s','now')),logline TEXT,pwd TEXT,cmdline TEXT,pid INTEGER);") - (db:set-sync db) ;; (sqlite3:execute db (conc "PRAGMA synchronous = 0;")) - )) - db)) - -(define (db:log-local-event . loglst) - (let ((logline (apply conc loglst))) - (db:log-event logline))) - -(define (db:log-event logline) - (let ((db (open-logging-db))) - (sqlite3:execute db "INSERT INTO log (logline,pwd,cmdline,pid) VALUES (?,?,?,?);" - logline - (current-directory) - (string-intersperse (argv) " ") - (current-process-id)) - (sqlite3:finalize! db) - logline)) - -;;====================================================================== -;; D B U T I L S -;;====================================================================== - -;;====================================================================== -;; M A I N T E N A N C E -;;====================================================================== - -(define (db:have-incompletes? dbstruct run-id ovr-deadtime) - (let* ((incompleted '()) - (oldlaunched '()) - (toplevels '()) - (deadtime-str (configf:lookup *configdat* "setup" "deadtime")) - (deadtime (if (and deadtime-str - (string->number deadtime-str)) - (string->number deadtime-str) - 72000))) ;; twenty hours - (db:with-db - dbstruct #f #f - (lambda (db) - (if (number? ovr-deadtime)(set! deadtime ovr-deadtime)) - - ;; in RUNNING or REMOTEHOSTSTART for more than 10 minutes - ;; - ;; HOWEVER: this code in run:test seems to work fine - ;; (> (- (current-seconds)(+ (db:test-get-event_time testdat) - ;; (db:test-get-run_duration testdat))) - ;; 600) - ;; (db:delay-if-busy dbdat) - (sqlite3:for-each-row - (lambda (test-id run-dir uname testname item-path) - (if (and (equal? uname "n/a") - (equal? item-path "")) ;; this is a toplevel test - ;; what to do with toplevel? call rollup? - (begin - (set! toplevels (cons (list test-id run-dir uname testname item-path run-id) toplevels)) - (debug:print-info 0 *default-log-port* "Found old toplevel test in RUNNING state, test-id=" test-id)) - (set! incompleted (cons (list test-id run-dir uname testname item-path run-id) incompleted)))) - db - "SELECT id,rundir,uname,testname,item_path FROM tests WHERE run_id=? AND (strftime('%s','now') - event_time) > (run_duration + ?) AND state IN ('RUNNING','REMOTEHOSTSTART');" - run-id deadtime) - - ;; in LAUNCHED for more than one day. Could be long due to job queues TODO/BUG: Need override for this in config - ;; - ;; (db:delay-if-busy dbdat) - (sqlite3:for-each-row - (lambda (test-id run-dir uname testname item-path) - (if (and (equal? uname "n/a") - (equal? item-path "")) ;; this is a toplevel test - ;; what to do with toplevel? call rollup? - (set! toplevels (cons (list test-id run-dir uname testname item-path run-id) toplevels)) - (set! oldlaunched (cons (list test-id run-dir uname testname item-path run-id) oldlaunched)))) - db - "SELECT id,rundir,uname,testname,item_path FROM tests WHERE run_id=? AND (strftime('%s','now') - event_time) > 86400 AND state IN ('LAUNCHED');" - run-id) - - (debug:print-info 18 *default-log-port* "Found " (length oldlaunched) " old LAUNCHED items, " (length toplevels) " old LAUNCHED toplevel tests and " (length incompleted) " tests marked RUNNING but apparently dead.") - (if (and (null? incompleted) - (null? oldlaunched) - (null? toplevels)) - #f - #t))))) - -;; given a launch delay (minimum time from last launch) return amount of time to wait -;; -;; (define (db:launch-delay-left dbstruct run-id launch-delay) - - -;; select end_time-now from -;; (select testname,item_path,event_time+run_duration as -;; end_time,strftime('%s','now') as now from tests where state in -;; ('RUNNING','REMOTEHOSTSTART','LAUNCHED')); - -(define (db:find-and-mark-incomplete dbstruct run-id ovr-deadtime) - (let* ((incompleted '()) - (oldlaunched '()) - (toplevels '()) - (deadtime-trim (or ovr-deadtime (configf:lookup-number *configdat* "setup" "deadtime"))) - (server-start-allowance 200) - (server-overloaded-budget 200) - (launch-monitor-off-time (or (configf:lookup-number *configdat* "setup" "test-stats-update-period") 30)) - (launch-monitor-on-time-budget 30) - (launch-monitor-period (+ launch-monitor-off-time launch-monitor-on-time-budget server-overloaded-budget)) - (remotehoststart-deadtime-default (+ server-start-allowance server-overloaded-budget 30)) - (remotehoststart-deadtime (or deadtime-trim remotehoststart-deadtime-default)) - (running-deadtime-default (+ server-start-allowance (* 2 launch-monitor-period))) - (running-deadtime (or deadtime-trim running-deadtime-default)) ;; two minutes (30 seconds between updates, this leaves 3x grace period) - ) - (db:with-db - dbstruct #f #f - (lambda (db) - ;; in RUNNING or REMOTEHOSTSTART for more than 10 minutes - ;; - ;; HOWEVER: this code in run:test seems to work fine - ;; (> (- (current-seconds)(+ (db:test-get-event_time testdat) - ;; (db:test-get-run_duration testdat))) - ;; 600) - ;; (db:delay-if-busy dbdat) - (sqlite3:for-each-row - (lambda (test-id run-dir uname testname item-path event-time run-duration) - (if (and (equal? uname "n/a") - (equal? item-path "")) ;; this is a toplevel test - ;; what to do with toplevel? call rollup? - (begin - (set! toplevels (cons (list test-id run-dir uname testname item-path run-id) toplevels)) - (debug:print-info 0 *default-log-port* "Found old toplevel test in RUNNING state, test-id=" test-id)) - (begin - (set! incompleted (cons (list test-id run-dir uname testname item-path run-id) incompleted)) - (debug:print-info 0 *default-log-port* "Found old test in RUNNING state, test-id=" test-id" exceeded running-deadtime "running-deadtime" now="(current-seconds)" event-time="event-time" run-duration="run-duration)))) - db - "SELECT id,rundir,uname,testname,item_path,event_time,run_duration FROM tests WHERE run_id=? AND (strftime('%s','now') - event_time) > (run_duration + ?) AND state IN ('RUNNING');" - run-id running-deadtime) - - - (sqlite3:for-each-row - (lambda (test-id run-dir uname testname item-path event-time run-duration) - (if (and (equal? uname "n/a") - (equal? item-path "")) ;; this is a toplevel test - ;; what to do with toplevel? call rollup? - (begin - (set! toplevels (cons (list test-id run-dir uname testname item-path run-id) toplevels)) - (debug:print-info 0 *default-log-port* "Found old toplevel test in RUNNING state, test-id=" test-id)) - (begin - (debug:print-info 0 *default-log-port* "Found old test in REMOTEHOSTSTART state, test-id=" test-id" exceeded running-deadtime "running-deadtime" now="(current-seconds)" event-time="event-time" run-duration="run-duration) - (set! incompleted (cons (list test-id run-dir uname testname item-path run-id) incompleted))))) - db - "SELECT id,rundir,uname,testname,item_path,event_time,run_duration FROM tests WHERE run_id=? AND (strftime('%s','now') - event_time) > (run_duration + ?) AND state IN ('REMOTEHOSTSTART');" - run-id remotehoststart-deadtime) - - ;; in LAUNCHED for more than one day. Could be long due to job queues TODO/BUG: Need override for this in config - ;; - ;; (db:delay-if-busy dbdat) - (sqlite3:for-each-row - (lambda (test-id run-dir uname testname item-path) - (if (and (equal? uname "n/a") - (equal? item-path "")) ;; this is a toplevel test - ;; what to do with toplevel? call rollup? - (set! toplevels (cons (list test-id run-dir uname testname item-path run-id) toplevels)) - (begin - (debug:print-info 0 *default-log-port* "Found old test in LAUNCHED state, test-id=" test-id" 1 day since event_time marked") - (set! oldlaunched (cons (list test-id run-dir uname testname item-path run-id) oldlaunched))))) - db - "SELECT id,rundir,uname,testname,item_path FROM tests WHERE run_id=? AND (strftime('%s','now') - event_time) > 86400 AND state IN ('LAUNCHED');" - run-id) - - (debug:print-info 18 *default-log-port* "Found " (length oldlaunched) " old LAUNCHED items, " (length toplevels) " old LAUNCHED toplevel tests and " (length incompleted) " tests marked RUNNING but apparently dead.") - - ;; These are defunct tests, do not do all the overhead of set-state-status. Force them to INCOMPLETE. - ;; - ;; (db:delay-if-busy dbdat) - (let* (;; (min-incompleted (filter (lambda (x) - ;; (let* ((testpath (cadr x)) - ;; (tdatpath (conc testpath "/testdat.db")) - ;; (dbexists (common:file-exists? tdatpath))) - ;; (or (not dbexists) ;; if no file then something wrong - mark as incomplete - ;; (> (- (current-seconds)(file-modification-time tdatpath)) 600)))) ;; no change in 10 minutes to testdat.db - she's dead Jim - ;; incompleted)) - (min-incompleted-ids (map car incompleted)) ;; do 'em all - (all-ids (append min-incompleted-ids (map car oldlaunched)))) - (if (> (length all-ids) 0) - (begin - (debug:print 0 *default-log-port* "WARNING: Marking test(s); " (string-intersperse (map conc all-ids) ", ") " as DEAD") - (for-each - (lambda (test-id) - (db:set-state-status-and-roll-up-items dbstruct run-id test-id 'foo "COMPLETED" "DEAD" "Test stopped responding while in RUNNING or REMOTEHOSTSTART; presumed dead.")) - ;;(db:test-set-state-status dbstruct run-id test-id "COMPLETED" "DEAD" "Test stopped responding while in RUNNING or REMOTEHOSTSTART; presumed dead.")) ;; fix for one aspect of Randy's ticket 1405717332 ;; TODO - fix problem where test goes to COMPLETED/DEAD while in progress, only later to go to COMPLETED/PASS. ref ticket 220546828 - all-ids)))))))) - -;; ALL REPLACED BY THE BLOCK ABOVE -;; -;; (sqlite3:execute -;; db -;; (conc "UPDATE tests SET state='INCOMPLETE' WHERE run_id=? AND id IN (" -;; (string-intersperse (map conc all-ids) ",") -;; ");") -;; run-id)))) -;; -;; ;; Now do rollups for the toplevel tests -;; ;; -;; ;; (db:delay-if-busy dbdat) -;; (for-each -;; (lambda (toptest) -;; (let ((test-name (list-ref toptest 3))) -;; ;; (run-id (list-ref toptest 5))) -;; (db:top-test-set-per-pf-counts dbstruct run-id test-name))) -;; toplevels))) - -;; BUG: Probably broken - does not explicitly use run-id in the query -;; -(define (db:top-test-set-per-pf-counts dbstruct run-id test-name) - (db:general-call dbstruct 'top-test-set-per-pf-counts (list test-name test-name test-name test-name test-name test-name test-name test-name test-name test-name test-name test-name test-name test-name test-name test-name test-name))) - -;; Clean out old junk and vacuum the database -;; -;; Ultimately do something like this: -;; -;; 1. Look at test records either deleted or part of deleted run: -;; a. If test dir exists, set the the test to state='UNKNOWN', Set the run to 'unknown' -;; b. If test dir gone, delete the test record -;; 2. Look at run records -;; a. If have tests that are not deleted, set state='unknown' -;; b. .... -;; -(define (db:clean-up dbdat) - ;; (debug:print 0 *default-log-port* "WARNING: db clean up not fully ported to v1.60, cleanup action will be on megatest.db") - (let* ((keep-record-age ( - (current-seconds) (common:hms-string->seconds (or (configf:lookup *configdat* "setup" "delete-record-age") "30d")))) - (db (db:dbdat-get-db dbdat)) - (count-stmt (sqlite3:prepare db "SELECT (SELECT count(id) FROM tests)+(SELECT count(id) FROM runs);")) - (statements - (map (lambda (stmt) - (sqlite3:prepare db stmt)) - (list - ;; delete all tests that belong to runs that are 'deleted' - (conc "DELETE FROM tests WHERE run_id in (SELECT id FROM runs WHERE state='deleted') and last_update < " keep-record-age ";") - ;; delete all tests that are 'DELETED' - (conc "DELETE FROM tests WHERE state='DELETED' and last_update < " keep-record-age " ;") - ;; delete all tests that have no run - (conc "DELETE FROM tests WHERE run_id NOT IN (SELECT DISTINCT id FROM runs) and last_update < " keep-record-age "; ") - ;; delete all runs that are state='deleted' - (conc "DELETE FROM runs WHERE state='deleted' and last_update < " keep-record-age ";") - ;; delete empty runs - (conc "DELETE FROM runs WHERE id NOT IN (SELECT DISTINCT r.id FROM runs AS r INNER JOIN tests AS t ON t.run_id=r.id) and last_update < " keep-record-age ";") - ;; remove orphaned test_rundat entries - (conc "DELETE FROM test_rundat where test_id NOT IN (SELECT id FROM tests);") - ;; remove orphaned test_steps entries - (conc "DELETE FROM test_steps WHERE test_id NOT IN (SELECT id FROM tests);") - ;; remove orphaned test_dat entries - (conc "DELETE FROM test_data WHERE test_id NOT IN (SELECT id FROM tests);") - - )))) - ;; (db:delay-if-busy dbdat) - ;(debug:print-info 0 *default-log-port* statements) - (sqlite3:with-transaction - db - (lambda () - (sqlite3:for-each-row (lambda (tot) - (debug:print-info 0 *default-log-port* "Records count before clean: " tot)) - count-stmt) - (map sqlite3:execute statements) - (sqlite3:for-each-row (lambda (tot) - (debug:print-info 0 *default-log-port* "Records count after clean: " tot)) - count-stmt))) - (map sqlite3:finalize! statements) - (sqlite3:finalize! count-stmt) - ;; (db:find-and-mark-incomplete db) - ;; (db:delay-if-busy dbdat) - (sqlite3:execute db "VACUUM;"))) - -;; Clean out old junk and vacuum the database -;; -;; Ultimately do something like this: -;; -;; 1. Look at test records either deleted or part of deleted run: -;; a. If test dir exists, set the the test to state='UNKNOWN', Set the run to 'unknown' -;; b. If test dir gone, delete the test record -;; 2. Look at run records -;; a. If have tests that are not deleted, set state='unknown' -;; b. .... -;; -(define (db:clean-up-rundb dbdat) - ;; (debug:print 0 *default-log-port* "WARNING: db clean up not fully ported to v1.60, cleanup action will be on megatest.db") - (let* ((db (db:dbdat-get-db dbdat)) - (count-stmt (sqlite3:prepare db "SELECT (SELECT count(id) FROM tests);")) - (statements - (map (lambda (stmt) - (sqlite3:prepare db stmt)) - (list - ;; delete all tests that belong to runs that are 'deleted' - ;; (conc "DELETE FROM tests WHERE run_id NOT IN (" (string-intersperse (map conc valid-runs) ",") ");") - ;; delete all tests that are 'DELETED' - "DELETE FROM tests WHERE state='DELETED';" - )))) - ;; (db:delay-if-busy dbdat) - (sqlite3:with-transaction - db - (lambda () - (sqlite3:for-each-row (lambda (tot) - (debug:print-info 0 *default-log-port* "Records count before clean: " tot)) - count-stmt) - (map sqlite3:execute statements) - (sqlite3:for-each-row (lambda (tot) - (debug:print-info 0 *default-log-port* "Records count after clean: " tot)) - count-stmt))) - (map sqlite3:finalize! statements) - (sqlite3:finalize! count-stmt) - ;; (db:find-and-mark-incomplete db) - ;; (db:delay-if-busy dbdat) - (sqlite3:execute db "VACUUM;"))) - -;; Clean out old junk and vacuum the database -;; -;; Ultimately do something like this: -;; -;; 1. Look at test records either deleted or part of deleted run: -;; a. If test dir exists, set the the test to state='UNKNOWN', Set the run to 'unknown' -;; b. If test dir gone, delete the test record -;; 2. Look at run records -;; a. If have tests that are not deleted, set state='unknown' -;; b. .... -;; -(define (db:clean-up-maindb dbdat) - ;; (debug:print 0 *default-log-port* "WARNING: db clean up not fully ported to v1.60, cleanup action will be on megatest.db") - (let* ((db (db:dbdat-get-db dbdat)) - (count-stmt (sqlite3:prepare db "SELECT (SELECT count(id) FROM runs);")) - (statements - (map (lambda (stmt) - (sqlite3:prepare db stmt)) - (list - ;; delete all tests that belong to runs that are 'deleted' - ;; (conc "DELETE FROM tests WHERE run_id NOT IN (" (string-intersperse (map conc valid-runs) ",") ");") - ;; delete all tests that are 'DELETED' - "DELETE FROM runs WHERE state='deleted';" - ))) - (dead-runs '())) - (sqlite3:for-each-row - (lambda (run-id) - (set! dead-runs (cons run-id dead-runs))) - db - "SELECT id FROM runs WHERE state='deleted';") - ;; (db:delay-if-busy dbdat) - (sqlite3:with-transaction - db - (lambda () - (sqlite3:for-each-row (lambda (tot) - (debug:print-info 0 *default-log-port* "Records count before clean: " tot)) - count-stmt) - (map sqlite3:execute statements) - (sqlite3:for-each-row (lambda (tot) - (debug:print-info 0 *default-log-port* "Records count after clean: " tot)) - count-stmt))) - (map sqlite3:finalize! statements) - (sqlite3:finalize! count-stmt) - ;; (db:find-and-mark-incomplete db) - ;; (db:delay-if-busy dbdat) - (sqlite3:execute db "VACUUM;") - dead-runs)) - -;;====================================================================== -;; M E T A G E T A N D S E T V A R S -;;====================================================================== - -;; returns number if string->number is successful, string otherwise -;; also updates *global-delta* -;; -(define (db:get-var dbstruct var) - (let* ((res #f)) - (db:with-db - dbstruct #f #f - (lambda (db) - (sqlite3:for-each-row - (lambda (val) - (set! res val)) - db - "SELECT val FROM metadat WHERE var=?;" var) - ;; convert to number if can - (if (string? res) - (let ((valnum (string->number res))) - (if valnum (set! res valnum)))) - res)))) - -;; This was part of db:get-var. It was used to estimate the load on -;; the database files. -;; -;; scale by 10, average with current value. -;; (set! *global-delta* (/ (+ *global-delta* (* (- (current-milliseconds) start-ms) -;; (if throttle throttle 0.01))) -;; 2)) -;; (if (> (abs (- *last-global-delta-printed* *global-delta*)) 0.08) ;; don't print all the time, only if it changes a bit -;; (begin -;; (debug:print-info 4 *default-log-port* "launch throttle factor=" *global-delta*) -;; (set! *last-global-delta-printed* *global-delta*))) - -(define (db:set-var dbstruct var val) - (db:with-db dbstruct #f #t - (lambda (db) - (sqlite3:execute db "INSERT OR REPLACE INTO metadat (var,val) VALUES (?,?);" var val)))) - -(define (db:del-var dbstruct var) - (db:with-db dbstruct #f #t - (lambda (db) - (sqlite3:execute db "DELETE FROM metadat WHERE var=?;" var)))) - -;;====================================================================== -;; no-sync.db - small bits of data to be shared between servers -;;====================================================================== - -(define (db:open-no-sync-db) - (let* ((dbpath (db:dbfile-path)) - (dbname (conc dbpath "/no-sync.db")) - (db-exists (common:file-exists? dbname)) - (db (sqlite3:open-database dbname))) - (sqlite3:set-busy-handler! db (sqlite3:make-busy-timeout 136000)) - (if (not db-exists) - (begin - (sqlite3:execute db "PRAGMA synchronous = 0;") - (sqlite3:execute db "CREATE TABLE IF NOT EXISTS no_sync_metadat (var TEXT,val TEXT, CONSTRAINT no_sync_metadat_constraint UNIQUE (var));") - (sqlite3:execute db "PRAGMA journal_mode=WAL;"))) - db)) - -;; if we are not a server create a db handle. this is not finalized -;; so watch for problems. I'm still not clear if it is needed to manually -;; finalize sqlite3 dbs with the sqlite3 egg. -;; -(define (db:no-sync-db db-in) - (mutex-lock! *db-access-mutex*) - (let ((res (if db-in - db-in - (let ((db (db:open-no-sync-db))) - (set! *no-sync-db* db) - db)))) - (mutex-unlock! *db-access-mutex*) - res)) - -(define (db:no-sync-set db var val) - (sqlite3:execute (db:no-sync-db db) "INSERT OR REPLACE INTO no_sync_metadat (var,val) VALUES (?,?);" var val)) - -(define (db:no-sync-del! db var) - (sqlite3:execute (db:no-sync-db db) "DELETE FROM no_sync_metadat WHERE var=?;" var)) - -(define (db:no-sync-get/default db var default) - (let ((res default)) - (sqlite3:for-each-row - (lambda (val) - (set! res val)) - (db:no-sync-db db) - "SELECT val FROM no_sync_metadat WHERE var=?;" - var) - (if res - (let ((newres (if (string? res) - (string->number res) - #f))) - (if newres - newres - res)) - res))) - -(define (db:no-sync-close-db db) - (db:safely-close-sqlite3-db db)) - -;; transaction protected lock aquisition -;; either: -;; fails returns (#f . lock-creation-time) -;; succeeds (returns (#t . lock-creation-time) -;; use (db:no-sync-del! db keyname) to release the lock -;; -(define (db:no-sync-get-lock db-in keyname) - (let ((db (db:no-sync-db db-in))) - (sqlite3:with-transaction - db - (lambda () - (handle-exceptions - exn - (let ((lock-time (current-seconds))) - (sqlite3:execute db "INSERT INTO no_sync_metadat (var,val) VALUES(?,?);" keyname lock-time) - `(#t . ,lock-time)) - `(#f . ,(sqlite3:first-result db "SELECT val FROM no_sync_metadat WHERE var=?;" keyname))))))) - - - -;; use a global for some primitive caching, it is just silly to -;; re-read the db over and over again for the keys since they never -;; change - -;; why get the keys from the db? why not get from the *configdat* -;; using keys:config-get-fields? - -(define (db:get-keys dbstruct) - (if *db-keys* *db-keys* - (let ((res '())) - (db:with-db dbstruct #f #f - (lambda (db) - (sqlite3:for-each-row - (lambda (key) - (set! res (cons key res))) - db - "SELECT fieldname FROM keys ORDER BY id DESC;"))) - (set! *db-keys* res) - res))) - -;; look up values in a header/data structure -(define (db:get-value-by-header row header field) - (if (or (null? header) (not row)) - #f - (let loop ((hed (car header)) - (tal (cdr header)) - (n 0)) - (if (equal? hed field) - (handle-exceptions - exn - (begin - (debug:print 0 *default-log-port* "WARNING: attempt to read non-existant field, row=" row " header=" header " field=" field) - #f) - (vector-ref row n)) - (if (null? tal) #f (loop (car tal)(cdr tal)(+ n 1))))))) - -;; Accessors for the header/data structure -;; get rows and header from -(define (db:get-header vec)(vector-ref vec 0)) -(define (db:get-rows vec)(vector-ref vec 1)) - -;;====================================================================== -;; R U N S -;;====================================================================== - - - - - -(define (db:get-run-times dbstruct run-patt target-patt) -(let ((res `()) - (qry (conc "select runname, (max(end_time)-min(event_time))/60 as runtime, target from (select runname, run_id,tests.event_time,tests.event_time+run_duration AS end_time, " (string-join (db:get-keys dbstruct) " || '/' || ") " as target from tests inner join runs on tests.run_id = runs.id where runs.runname like ? and target like ?) group by run_id ;"))) -;(print qry) -(db:with-db - dbstruct - #f ;; this is for the main runs db - #f ;; does not modify db - (lambda (db) - (sqlite3:for-each-row - (lambda (runname runtime target ) - (set! res (cons (vector runname runtime target) res))) - db - qry - run-patt target-patt) - - res)))) - - - -(define (db:get-run-name-from-id dbstruct run-id) - (db:with-db - dbstruct - #f ;; this is for the main runs db - #f ;; does not modify db - (lambda (db) - (let ((res #f)) - (sqlite3:for-each-row - (lambda (runname) - (set! res runname)) - db - "SELECT runname FROM runs WHERE id=?;" - run-id) - res)))) - -(define (db:get-run-key-val dbstruct run-id key) - (db:with-db - dbstruct - #f - #f - (lambda (db) - (let ((res #f)) - (sqlite3:for-each-row - (lambda (val) - (set! res val)) - db - (conc "SELECT " key " FROM runs WHERE id=?;") - run-id) - res)))) - -;; register a test run with the db, this accesses the main.db and does NOT -;; use server api -;; -(define (db:register-run dbstruct keyvals runname state status user contour-in) - (let* ((keys (map car keyvals)) - (keystr (keys->keystr keys)) - (contour (or contour-in "")) ;; empty string to force no hierarcy and be backwards compatible. - (comma (if (> (length keys) 0) "," "")) - (andstr (if (> (length keys) 0) " AND " "")) - (valslots (keys->valslots keys)) ;; ?,?,? ... - (allvals (append (list runname state status user contour) (map cadr keyvals))) - (qryvals (append (list runname) (map cadr keyvals))) - (key=?str (string-intersperse (map (lambda (k)(conc k "=?")) keys) " AND "))) - (debug:print 3 *default-log-port* "keys: " keys " allvals: " allvals " keyvals: " keyvals " key=?str is " key=?str) - (debug:print 2 *default-log-port* "NOTE: using target " (string-intersperse (map cadr keyvals) "/") " for this run") - (if (and runname (null? (filter (lambda (x)(not x)) keyvals))) ;; there must be a better way to "apply and" - (db:with-db - dbstruct #f #f - (lambda (db) - (let ((res #f)) - (apply sqlite3:execute db (conc "INSERT OR IGNORE INTO runs (runname,state,status,owner,event_time,contour" comma keystr ") VALUES (?,?,?,?,strftime('%s','now'),?" comma valslots ");") - allvals) - (apply sqlite3:for-each-row - (lambda (id) - (set! res id)) - db - (let ((qry (conc "SELECT id FROM runs WHERE (runname=? " andstr key=?str ");"))) - qry) - qryvals) - (sqlite3:execute db "UPDATE runs SET state=?,status=?,event_time=strftime('%s','now') WHERE id=? AND state='deleted';" state status res) - res))) - (begin - (debug:print-error 0 *default-log-port* "Called without all necessary keys") - #f)))) - -;; replace header and keystr with a call to runs:get-std-run-fields -;; -;; keypatts: ( (KEY1 "abc%def")(KEY2 "%") ) -;; runpatts: patt1,patt2 ... -;; -(define (db:get-runs dbstruct runpatt count offset keypatts) - (let* ((res '()) - (keys (db:get-keys dbstruct)) - (runpattstr (db:patt->like "runname" runpatt)) - (remfields (list "id" "runname" "state" "status" "owner" "event_time")) - (header (append keys remfields)) - (keystr (conc (keys->keystr keys) "," - (string-intersperse remfields ","))) - (qrystr (conc "SELECT " keystr " FROM runs WHERE (" runpattstr ") " ;; runname LIKE ? " - ;; Generate: " AND x LIKE 'keypatt' ..." - (if (null? keypatts) "" - (conc " AND " - (string-join - (map (lambda (keypatt) - (let ((key (car keypatt)) - (patt (cadr keypatt))) - (db:patt->like key patt))) - keypatts) - " AND "))) - " AND state != 'deleted' ORDER BY event_time DESC " - (if (number? count) - (conc " LIMIT " count) - "") - (if (number? offset) - (conc " OFFSET " offset) - "")))) - (debug:print-info 11 *default-log-port* "db:get-runs START qrystr: " qrystr " keypatts: " keypatts " offset: " offset " limit: " count) - (db:with-db dbstruct #f #f - (lambda (db) - (sqlite3:for-each-row - (lambda (a . x) - (set! res (cons (apply vector a x) res))) - db - qrystr - ))) - (debug:print-info 11 *default-log-port* "db:get-runs END qrystr: " qrystr " keypatts: " keypatts " offset: " offset " limit: " count) - (vector header res))) - - -(define-record simple-run target id runname state status owner event_time) -(define-record-printer (simple-run x out) - (fprintf out "#,(simple-run ~S ~S ~S ~S)" - (simple-run-target x) (simple-run-id x) (simple-run-runname x) (time->string (seconds->local-time (simple-run-event_time x) )))) - -;; simple get-runs -;; -(define (db:simple-get-runs dbstruct runpatt count offset target) - (let* ((res '()) - (keys (db:get-keys dbstruct)) - (runpattstr (db:patt->like "runname" runpatt)) - (remfields (list "id" "runname" "state" "status" "owner" "event_time")) - (targstr (string-intersperse keys "||'/'||")) - (keystr (conc targstr " AS target," - (string-intersperse remfields ","))) - (qrystr (conc "SELECT " keystr " FROM runs WHERE (" runpattstr ") " ;; runname LIKE ? " - ;; Generate: " AND x LIKE 'keypatt' ..." - " AND target LIKE '" target "'" - " AND state != 'deleted' ORDER BY event_time DESC " - (if (number? count) - (conc " LIMIT " count) - "") - (if (number? offset) - (conc " OFFSET " offset) - "")))) - (debug:print-info 11 *default-log-port* "db:get-runs START qrystr: " qrystr " target: " target " offset: " offset " limit: " count) - (db:with-db dbstruct #f #f - (lambda (db) - (sqlite3:for-each-row - (lambda (target id runname state status owner event_time) - (set! res (cons (make-simple-run target id runname state status owner event_time) res))) - db - qrystr - ))) - (debug:print-info 11 *default-log-port* "db:get-runs END qrystr: " qrystr " target: " target " offset: " offset " limit: " count) - res)) - -;; TODO: Switch this to use max(update_time) from each run db? Then if using a server there is no disk traffic (using inmem db) -;; -(define (db:get-changed-run-ids since-time) - (let* ((dbdir (db:dbfile-path)) ;; (configf:lookup *configdat* "setup" "dbdir")) - (alldbs (glob (conc dbdir "/[0-9]*.db"))) - (changed (filter (lambda (dbfile) - (> (file-modification-time dbfile) since-time)) - alldbs))) - (delete-duplicates - (map (lambda (dbfile) - (let* ((res (string-match ".*\\/(\\d)*\\.db" dbfile))) - (if res - (string->number (cadr res)) - (begin - (debug:print 2 *default-log-port* "WARNING: Failed to process " dbfile " for run-id") - 0)))) - changed)))) - -;; Get all targets from the db -;; -(define (db:get-targets dbstruct) - (let* ((res '()) - (keys (db:get-keys dbstruct)) - (header keys) ;; (map key:get-fieldname keys)) - (keystr (keys->keystr keys)) - (qrystr (conc "SELECT " keystr " FROM runs WHERE state != 'deleted';")) - (seen (make-hash-table))) - (db:with-db - dbstruct - #f - #f - (lambda (db) - (sqlite3:for-each-row - (lambda (a . x) - (let ((targ (cons a x))) - (if (not (hash-table-ref/default seen targ #f)) - (begin - (hash-table-set! seen targ #t) - (set! res (cons (apply vector targ) res)))))) - db - qrystr) - (debug:print-info 11 *default-log-port* "db:get-targets END qrystr: " qrystr ) - (vector header res))))) - -;; just get count of runs -(define (db:get-num-runs dbstruct runpatt) - (db:with-db - dbstruct - #f - #f - (lambda (db) - (let ((numruns 0)) - (debug:print-info 11 *default-log-port* "db:get-num-runs START " runpatt) - (sqlite3:for-each-row - (lambda (count) - (set! numruns count)) - db - "SELECT COUNT(id) FROM runs WHERE runname LIKE ? AND state != 'deleted';" runpatt) - (debug:print-info 11 *default-log-port* "db:get-num-runs END " runpatt) - numruns)))) - -;; just get count of runs -(define (db:get-runs-cnt-by-patt dbstruct runpatt targetpatt keys) - (db:with-db - dbstruct - #f - #f - (lambda (db) - (let ((numruns 0) - (qry-str #f) - (key-patt "") - (keyvals (if targetpatt (keys:target->keyval keys targetpatt) '()))) - - (for-each (lambda (keyval) - (let* ((key (car keyval)) - (patt (cadr keyval)) - (fulkey (conc ":" key)) - (wildtype (if (substring-index "%" patt) "like" "glob"))) - - (if patt - (set! key-patt (conc key-patt " AND " key " " wildtype " '" patt "'")) - (begin - (debug:print-error 0 *default-log-port* "searching for runs with no pattern set for " fulkey) - (exit 6))))) - keyvals) - ;(print runpatt " -- " key-patt) - (set! qry-str (conc "SELECT COUNT(id) FROM runs WHERE state != 'deleted' AND runname like '" runpatt "'" key-patt)) - ;(print qry-str ) - - (sqlite3:for-each-row - (lambda (count) - (set! numruns count)) - db - qry-str) - (debug:print-info 11 *default-log-port* "db:get-num-runs END " runpatt) - numruns)))) - - -;; (sqlite3#fold-row proc3670 init3671 db-or-stmt3672 . params3673)> -;; -(define (db:get-raw-run-stats dbstruct run-id) - (db:with-db - dbstruct - run-id - #f - (lambda (db) - (sqlite3:fold-row - (lambda (res state status count) - (cons (list state status count) res)) - '() - db - "SELECT state,status,count(id) AS count FROM tests WHERE run_id=? AND NOT(uname='n/a' AND item_path='') GROUP BY state,status;" - run-id)))) - -;; Update run_stats for given run_id -;; input data is a list (state status count) -;; -(define (db:update-run-stats dbstruct run-id stats) - ;; (mutex-lock! *db-transaction-mutex*) - (db:with-db - dbstruct - #f - #f - - (lambda (db) - ;; remove previous data - - (let* ((stmt1 (sqlite3:prepare db "DELETE FROM run_stats WHERE run_id=? AND state=? AND status=?;")) - (stmt2 (sqlite3:prepare db "INSERT INTO run_stats (run_id,state,status,count) VALUES (?,?,?,?);")) - (res - (sqlite3:with-transaction - db - (lambda () - (for-each - (lambda (dat) - (sqlite3:execute stmt1 run-id (car dat)(cadr dat)) - (apply sqlite3:execute stmt2 run-id dat)) - stats))))) - (sqlite3:finalize! stmt1) - (sqlite3:finalize! stmt2) - ;; (mutex-unlock! *db-transaction-mutex*) - res)))) - -(define (db:get-main-run-stats dbstruct run-id) - (db:with-db - dbstruct - #f ;; this data comes from main - #f - (lambda (db) - (sqlite3:fold-row - (lambda (res state status count) - (cons (list state status count) res)) - '() - db - "SELECT state,status,count FROM run_stats WHERE run_id=? AND run_id IN (SELECT id FROM runs WHERE state NOT IN ('DELETED','deleted'));" - run-id)))) - -(define (db:print-current-query-stats) - ;; generate stats from *db-api-call-time* - (let ((ordered-keys (sort (hash-table-keys *db-api-call-time*) - (lambda (a b) - (let ((sum-a (common:sum (hash-table-ref *db-api-call-time* a))) - (sum-b (common:sum (hash-table-ref *db-api-call-time* b)))) - (> sum-a sum-b))))) - (total 0)) - (for-each - (lambda (cmd-key) - (let* ((dat (hash-table-ref *db-api-call-time* cmd-key)) - (num (length dat)) - (avg (if (> num 0) - (/ (common:sum dat)(length dat))))) - (set! total (+ total num)) - (debug:print-info 0 *default-log-port* cmd-key "\tavg: " avg " max: " (common:max dat) " min: " (common:min-max < dat) " num: " (length dat)))) - ordered-keys) - (debug:print-info 0 *default-log-port* "TOTAL: " total " api calls since start."))) - -(define (db:get-all-run-ids dbstruct) - (db:with-db - dbstruct - #f - #f - (lambda (db) - (let ((run-ids '())) - (sqlite3:for-each-row - (lambda (run-id) - (set! run-ids (cons run-id run-ids))) - db - "SELECT id FROM runs WHERE state != 'deleted' ORDER BY event_time DESC;") - (reverse run-ids))))) - -;; get some basic run stats -;; -;; data structure: -;; -;; ( (runname (( state count ) ... )) -;; ( ... -;; -(define (db:get-run-stats dbstruct) - (let* ((totals (make-hash-table)) - (curr (make-hash-table)) - (res '()) - (runs-info '())) - ;; First get all the runname/run-ids - (db:with-db - dbstruct #f #f - (lambda (db) - (sqlite3:for-each-row - (lambda (run-id runname) - (set! runs-info (cons (list run-id runname) runs-info))) - db - "SELECT id,runname FROM runs WHERE state != 'deleted' ORDER BY event_time DESC;"))) ;; If you change this to the more logical ASC please adjust calls to db:get-run-stats - ;; for each run get stats data - (for-each - (lambda (run-info) - ;; get the net state/status counts for this run - (let* ((run-id (car run-info)) - (run-name (cadr run-info))) - (db:with-db - dbstruct - run-id - #f - (lambda (db) - (sqlite3:for-each-row - (lambda (state status count) - (let ((netstate (if (equal? state "COMPLETED") status state))) - (if (string? netstate) - (begin - (hash-table-set! totals netstate (+ (hash-table-ref/default totals netstate 0) count)) - (hash-table-set! curr netstate (+ (hash-table-ref/default curr netstate 0) count)))))) - db - "SELECT state,status,count(id) FROM tests AS t WHERE run_id=? GROUP BY state,status ORDER BY state,status DESC;" - run-id) - ;; add the per run counts to res - (for-each (lambda (state) - (set! res (cons (list run-name state (hash-table-ref curr state)) res))) - (sort (hash-table-keys curr) string>=)) - (set! curr (make-hash-table)))))) - runs-info) - (for-each (lambda (state) - (set! res (cons (list "Totals" state (hash-table-ref totals state)) res))) - (sort (hash-table-keys totals) string>=)) - res)) - -;; db:get-runs-by-patt -;; get runs by list of criteria -;; register a test run with the db -;; -;; Use: (db:get-value-by-header (db:get-header runinfo)(db:get-rows runinfo)) -;; to extract info from the structure returned -;; -(define (db:get-runs-by-patt dbstruct keys runnamepatt targpatt offset limit fields last-update sort-order ) ;; test-name) - (let* ((tmp (runs:get-std-run-fields keys (or fields '("id" "runname" "state" "status" "owner" "event_time")))) - (keystr (car tmp)) - (header (cadr tmp)) - (key-patt "") - (runwildtype (if (substring-index "%" (or runnamepatt "%")) "like" "glob")) - (qry-str #f) - (keyvals (if targpatt (keys:target->keyval keys targpatt) '()))) - (for-each (lambda (keyval) - (let* ((key (car keyval)) - (patt (cadr keyval)) - (fulkey (conc ":" key)) - (wildtype (if (substring-index "%" patt) "like" "glob"))) - (if patt - (set! key-patt (conc key-patt " AND " key " " wildtype " '" patt "'")) - (begin - (debug:print-error 0 *default-log-port* "searching for runs with no pattern set for " fulkey) - (exit 6))))) - keyvals) - (set! qry-str (conc "SELECT " keystr " FROM runs WHERE state != 'deleted' AND runname " runwildtype " ? " key-patt - (if last-update - (conc " AND last_update >= " last-update " ") - " ") - " ORDER BY event_time " sort-order " " - (if limit (conc " LIMIT " limit) "") - (if offset (conc " OFFSET " offset) "") - ";")) - (debug:print-info 4 *default-log-port* "runs:get-runs-by-patt qry=" qry-str " " runnamepatt) - ;(print "runs:get-runs-by-patt qry=" qry-str " " runnamepatt) - - (vector header - (reverse - (db:with-db dbstruct #f #f ;; reads db, does not write to it. - (lambda (db) - (sqlite3:fold-row - (lambda (res . r) - (cons (list->vector r) res)) - '() - db - qry-str - runnamepatt))))))) - -;; use (get-value-by-header (db:get-header runinfo)(db:get-rows runinfo)) -;; NOTE: Does NOT return a list of rows (or one row) for the first slot of the vector -;; this is inconsistent with get-runs but it makes some sense. -;; -(define (db:get-run-info dbstruct run-id) - ;;(if (hash-table-ref/default *run-info-cache* run-id #f) - ;; (hash-table-ref *run-info-cache* run-id) - (let* ((res (vector #f #f #f #f)) - (keys (db:get-keys dbstruct)) - (remfields (list "id" "runname" "state" "status" "owner" "event_time" "comment" "fail_count" "pass_count" "contour" "last_update")) ;; "area_id")) - (header (append keys remfields)) - (keystr (conc (keys->keystr keys) "," - (string-intersperse remfields ",")))) - (debug:print-info 11 *default-log-port* "db:get-run-info run-id: " run-id " header: " header " keystr: " keystr) - - (db:with-db - dbstruct #f #f - (lambda (db) - (sqlite3:for-each-row - (lambda (a . x) - (set! res (apply vector a x))) - db - (conc "SELECT " keystr " FROM runs WHERE id=?;") - run-id))) - (debug:print-info 11 *default-log-port* "db:get-run-info run-id: " run-id " header: " header " keystr: " keystr) - (let ((finalres (vector header res))) - ;; (hash-table-set! *run-info-cache* run-id finalres) - finalres))) - -(define (db:set-comment-for-run dbstruct run-id comment) - (db:with-db - dbstruct #f #f - (lambda (db) - (sqlite3:execute db "UPDATE runs SET comment=? WHERE id=?;" comment ;; (sdb:qry 'getid comment) - run-id)))) - -;; does not (obviously!) removed dependent data. But why not!!? -(define (db:delete-run dbstruct run-id) - (db:with-db - dbstruct #f #f - (lambda (db) - (sqlite3:with-transaction - db - (lambda () - (sqlite3:execute db "DELETE FROM test_steps WHERE test_id IN (SELECT id FROM tests WHERE run_id=?);" run-id) - (sqlite3:execute db "DELETE FROM test_data WHERE test_id IN (SELECT id FROM tests WHERE run_id=?);" run-id) - (sqlite3:execute db "UPDATE tests SET state='DELETED',comment='' WHERE run_id=?;" run-id) - ;; (db:delay-if-busy dbdat) - (sqlite3:execute db "UPDATE runs SET state='deleted',comment='' WHERE id=?;" run-id)))))) - -(define (db:update-run-event_time dbstruct run-id) - (db:with-db - dbstruct #f #t - (lambda (db) - (sqlite3:execute db "UPDATE runs SET event_time=strftime('%s','now') WHERE id=?;" run-id)))) - -(define (db:lock/unlock-run dbstruct run-id lock unlock user) - (db:with-db - dbstruct #f #t - (lambda (db) - (let ((newlockval (if lock "locked" - (if unlock - "unlocked" - "locked")))) ;; semi-failsafe - (sqlite3:execute db "UPDATE runs SET state=? WHERE id=?;" newlockval run-id) - (sqlite3:execute db "INSERT INTO access_log (user,accessed,args) VALUES(?,strftime('%s','now'),?);" - user (conc newlockval " " run-id)) - (debug:print-info 1 *default-log-port* "" newlockval " run number " run-id))))) - -(define (db:set-run-status dbstruct run-id status msg) - (db:with-db - dbstruct #f #f - (lambda (db) - (if msg - (sqlite3:execute db "UPDATE runs SET status=?,comment=? WHERE id=?;" status msg run-id) - (sqlite3:execute db "UPDATE runs SET status=? WHERE id=?;" status run-id))))) - -(define (db:set-run-state-status dbstruct run-id state status ) - (db:with-db - dbstruct #f #f - (lambda (db) - (sqlite3:execute db "UPDATE runs SET status=?,state=? WHERE id=?;" status state run-id)))) - - - -(define (db:get-run-status dbstruct run-id) - (let ((res "n/a")) - (db:with-db - dbstruct #f #f - (lambda (db) - (sqlite3:for-each-row - (lambda (status) - (set! res status)) - db - "SELECT status FROM runs WHERE id=?;" - run-id) - res)))) - -(define (db:get-run-state dbstruct run-id) - (let ((res "n/a")) - (db:with-db - dbstruct #f #f - (lambda (db) - (sqlite3:for-each-row - (lambda (status) - (set! res status)) - db - "SELECT state FROM runs WHERE id=?;" - run-id) - res)))) - - -;;====================================================================== -;; K E Y S -;;====================================================================== - -;; get key val pairs for a given run-id -;; ( (FIELDNAME1 keyval1) (FIELDNAME2 keyval2) ... ) -(define (db:get-key-val-pairs dbstruct run-id) - (let* ((keys (db:get-keys dbstruct)) - (res '())) - (db:with-db - dbstruct #f #f - (lambda (db) - (for-each - (lambda (key) - (let ((qry (conc "SELECT " key " FROM runs WHERE id=?;"))) - (sqlite3:for-each-row - (lambda (key-val) - (set! res (cons (list key (if (string? key-val) key-val "")) res))) ;; replace non-string bad values with empty string to prevent crashes. This scenario can happen when Megatest is killed on updating the db - db qry run-id))) - keys))) - (reverse res))) - -;; get key vals for a given run-id -(define (db:get-key-vals dbstruct run-id) - (let* ((keys (db:get-keys dbstruct)) - (res '())) - (db:with-db - dbstruct #f #f - (lambda (db) - (for-each - (lambda (key) - (let ((qry (conc "SELECT " key " FROM runs WHERE id=?;"))) - ;; (db:delay-if-busy dbdat) - (sqlite3:for-each-row - (lambda (key-val) - (set! res (cons (if (string? key-val) key-val "") res))) ;; check that the key-val is a string for cases where a crash injected bad data in the megatest.db - db qry run-id))) - keys))) - (let ((final-res (reverse res))) - (hash-table-set! *keyvals* run-id final-res) - final-res))) - -;; The target is keyval1/keyval2..., cached in *target* as it is used often -(define (db:get-target dbstruct run-id) - (let* ((keyvals (db:get-key-vals dbstruct run-id)) - (thekey (string-intersperse (map (lambda (x)(if x x "-na-")) keyvals) "/"))) - thekey)) - -;; Get run-ids for runs with same target but different runnames and NOT run-id -;; -(define (db:get-prev-run-ids dbstruct run-id) - (let* ((keyvals (db:get-key-val-pairs dbstruct run-id)) - (kvalues (map cadr keyvals)) - (keys (rmt:get-keys)) - (qrystr (string-intersperse (map (lambda (x)(conc x "=?")) keys) " AND "))) - (let ((prev-run-ids '())) - (if (null? keyvals) - '() - (begin - (db:with-db dbstruct #f #f ;; #f means work with the zeroth db - i.e. the runs db - (lambda (db) - (apply sqlite3:for-each-row - (lambda (id) - (set! prev-run-ids (cons id prev-run-ids))) - db - (conc "SELECT id FROM runs WHERE " qrystr " AND state != 'deleted' AND id != ?;") - (append kvalues (list run-id))))) - prev-run-ids))))) - -;;====================================================================== -;; T E S T S -;;====================================================================== - -;; states and statuses are lists, turn them into ("PASS","FAIL"...) and use NOT IN -;; i.e. these lists define what to NOT show. -;; states and statuses are required to be lists, empty is ok -;; not-in #t = above behaviour, #f = must match -;; mode: -;; 'dashboard - use state = 'COMPLETED' AND status in ( statuses ) OR state in ( states ) -;; -(define (db:get-tests-for-run dbstruct run-id testpatt states statuses offset limit not-in sort-by sort-order qryvals last-update mode) - (let* ((qryvalstr (case qryvals - ((shortlist) "id,run_id,testname,item_path,state,status") - ((#f) db:test-record-qry-selector) ;; "id,run_id,testname,state,status,event_time,host,cpuload,diskfree,uname,rundir,item_path,run_duration,final_logf,comment") - (else qryvals))) - (res '()) - ;; if states or statuses are null then assume match all when not-in is false - (states-qry (if (null? states) - #f - (conc " state " - (if (eq? mode 'dashboard) - " IN ('" - (if not-in - " NOT IN ('" - " IN ('")) - (string-intersperse states "','") - "')"))) - (statuses-qry (if (null? statuses) - #f - (conc " status " - (if (eq? mode 'dashboard) - " IN ('" - (if not-in - " NOT IN ('" - " IN ('") ) - (string-intersperse statuses "','") - "')"))) - (interim-qry (conc " AND " (if not-in "NOT " "") "( state='COMPLETED' " (if statuses-qry (conc " AND " statuses-qry " ) ") " ) ") - (if states-qry - (conc (if not-in " AND " " OR ") states-qry ) ;; " ) ") - ""))) - (states-statuses-qry - (cond - ((and states-qry statuses-qry) - (case mode - ((dashboard) - (if not-in - (conc " AND (state='COMPLETED' AND status NOT IN ('" (string-intersperse statuses "','") "')) " - " OR (state != 'COMPLETED' AND state NOT IN ('" (string-intersperse states "','") "')) ") - (conc " AND (state='COMPLETED' AND status IN ('" (string-intersperse statuses "','") "')) " - " OR (state NOT IN ('COMPLETED','DELETED') AND state IN ('" (string-intersperse states "','") "')) "))) - (else (conc " AND ( " states-qry " AND " statuses-qry " ) ")))) - (states-qry - (case mode - ((dashboard) (conc " AND " (if not-in "NOT " "") " state IN ('" (string-intersperse states "','") "') ")) ;; interim-qry) - (else (conc " AND " states-qry)))) - (statuses-qry - (case mode - ((dashboard) (conc " AND " (if not-in "NOT " "") " status IN ('" (string-intersperse statuses "','") "') ")) ;; interim-qry) - (else (conc " AND " statuses-qry)))) - (else ""))) - (tests-match-qry (tests:match->sqlqry testpatt)) - (qry (conc "SELECT " qryvalstr - (if run-id - " FROM tests WHERE run_id=? " - " FROM tests WHERE ? > 0 ") ;; should work? - (if last-update " " " AND state != 'DELETED' ") ;; if using last-update we want deleted tests? - states-statuses-qry - (if tests-match-qry (conc " AND (" tests-match-qry ") ") "") - (if last-update (conc " AND last_update >= " last-update " ") "") - (case sort-by - ((rundir) " ORDER BY length(rundir) ") - ((testname) (conc " ORDER BY testname " (if sort-order (conc sort-order ",") "") " item_path ")) - ((statestatus) (conc " ORDER BY state " (if sort-order (conc sort-order ",") "") " status ")) - ((event_time) " ORDER BY event_time ") - (else (if (string? sort-by) - (conc " ORDER BY " sort-by " ") - " "))) - (if sort-order sort-order " ") - (if limit (conc " LIMIT " limit) " ") - (if offset (conc " OFFSET " offset) " ") - ";" - ))) - (debug:print-info 8 *default-log-port* "db:get-tests-for-run run-id=" run-id ", qry=" qry) - (db:with-db dbstruct run-id #f - (lambda (db) - (sqlite3:for-each-row - (lambda (a . b) ;; id run-id testname state status event-time host cpuload diskfree uname rundir item-path run-duration final-logf comment) - (set! res (cons (apply vector a b) res))) ;; id run-id testname state status event-time host cpuload diskfree uname rundir item-path run-duration final-logf comment) res))) - db - qry - (or run-id 1) ;; 1 > 0 , for the case where we are seeking tests matching criteral for all runs - ))) - (case qryvals - ((shortlist)(map db:test-short-record->norm res)) - ((#f) res) - (else res)))) - -(define (db:test-short-record->norm inrec) - ;; "id,run_id,testname,item_path,state,status" - ;; "id,run_id,testname,state,status,event_time,host,cpuload,diskfree,uname,rundir,item_path,run_duration,final_logf,comment - (vector (vector-ref inrec 0) ;; id - (vector-ref inrec 1) ;; run_id - (vector-ref inrec 2) ;; testname - (vector-ref inrec 4) ;; state - (vector-ref inrec 5) ;; status - -1 "" -1 -1 "" "-" - (vector-ref inrec 3) ;; item-path - -1 "-" "-")) - -(define (db:get-tests-for-run-state-status dbstruct run-id testpatt) - (let* ((res '()) - (tests-match-qry (tests:match->sqlqry testpatt)) - (qry (conc "SELECT id,testname,item_path,state,status FROM tests WHERE run_id=? " - (if tests-match-qry (conc " AND (" tests-match-qry ") ") "")))) - (debug:print-info 8 *default-log-port* "db:get-tests-for-run qry=" qry) - (db:with-db dbstruct run-id #f - (lambda (db) - (sqlite3:for-each-row - (lambda (id testname item-path state status) - ;; id,run_id,testname,state,status,event_time,host,cpuload,diskfree,uname,rundir,item_path,run_duration,final_logf,comment - (set! res (cons (vector id run-id testname state status -1 "" -1 -1 "" "-" item-path -1 "-" "-") res))) - db - qry - run-id))) - res)) - -(define (db:get-testinfo-state-status dbstruct run-id test-id) - (let ((res #f)) - (db:with-db dbstruct run-id #f - (lambda (db) - (sqlite3:for-each-row - (lambda (run-id testname item-path state status) - ;; id,run_id,testname,state,status,event_time,host,cpuload,diskfree,uname,rundir,item_path,run_duration,final_logf,comment - (set! res (vector test-id run-id testname state status -1 "" -1 -1 "" "-" item-path -1 "-" "-"))) - db - "SELECT run_id,testname,item_path,state,status FROM tests WHERE id=?;" - test-id))) - res)) - -;; get a useful subset of the tests data (used in dashboard -;; use db:mintest-get-{id ,run_id,testname ...} -;; -(define (db:get-tests-for-run-mindata dbstruct run-id testpatt states statuses not-in) - (db:get-tests-for-run dbstruct run-id testpatt states statuses #f #f not-in #f #f "id,run_id,testname,state,status,event_time,item_path" 0 #f)) - -;; do not use. -;; -(define (db:get-tests-for-runs dbstruct run-ids testpatt states statuses #!key (not-in #f)(qryvals #f)) - ;; (db:delay-if-busy) - (let ((res '())) - (for-each - (lambda (run-id) - (set! res (append - res - (db:get-tests-for-run dbstruct run-id testpatt states statuses #f #f not-in #f #f qryvals #f 'normal)))) - (if run-ids - run-ids - (db:get-all-run-ids dbstruct))) - res)) - -;; Convert calling routines to get list of run-ids and loop, do not use the get-tests-for-runs -;; - -(define (db:delete-test-records dbstruct run-id test-id) - (db:general-call dbstruct 'delete-test-step-records (list test-id)) - (db:general-call dbstruct 'delete-test-data-records (list test-id)) - (db:with-db - dbstruct #f #f - (lambda (db) - (sqlite3:execute db "UPDATE tests SET state='DELETED',status='n/a',comment='' WHERE id=?;" test-id)))) - -;; -(define (db:delete-old-deleted-test-records dbstruct) - (let (;; (run-ids (db:get-all-run-ids dbstruct)) - (targtime (- (current-seconds)(* 30 24 60 60)))) ;; one month in the past - (db:with-db - dbstruct - 0 - #t - (lambda (db) - (sqlite3:with-transaction - db - (lambda () - (sqlite3:execute db "DELETE FROM test_steps WHERE test_id IN (SELECT id FROM tests WHERE state='DELETED' AND event_timenumber fieldname fields) - (if (null? fields) - #f - (let loop ((hed (car fields)) - (tal (cdr fields)) - (indx 0)) - (if (equal? fieldname hed) - indx - (if (null? tal) - #f - (loop (car tal)(cdr tal)(+ indx 1))))))) - -(define db:test-record-qry-selector (string-intersperse db:test-record-fields ",")) - - -;; NOTE: Use db:test-get* to access records -;; NOTE: This needs rundir decoding? Decide, decode here or where used? For the moment decode where used. -(define (db:get-all-tests-info-by-run-id dbstruct run-id) - (let* ((res '())) - (db:with-db - dbstruct #f #f - (lambda (db) - (sqlite3:for-each-row - (lambda (id run-id testname state status event-time host cpuload diskfree uname rundir item-path run-duration final-logf comment shortdir attemptnum archived) - ;; 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 - (set! res (cons (vector id run-id testname state status event-time host cpuload diskfree uname rundir item-path run-duration final-logf comment shortdir attemptnum archived) - res))) - db - (conc "SELECT " db:test-record-qry-selector " FROM tests WHERE state != 'DELETED' AND run_id=?;") - run-id))) - res)) - -(define (db:replace-test-records dbstruct run-id testrecs) - (db:with-db dbstruct run-id #t - (lambda (db) - (let* ((qmarks (string-intersperse (make-list (length db:test-record-fields) "?") ",")) - (qrystr (conc "INSERT OR REPLACE INTO tests (" db:test-record-qry-selector ") VALUES (" qmarks ") WHERE run_id=?;")) - (qry (sqlite3:prepare db qrystr))) - (debug:print 0 *default-log-port* "INFO: migrating test records for run with id " run-id) - (sqlite3:with-transaction - db - (lambda () - (for-each - (lambda (rec) - ;; (debug:print 0 *default-log-port* "INFO: Inserting values: " (string-intersperse (map conc (vector->list rec)) ",") "\n") - (apply sqlite3:execute qry (append (vector->list rec)(list run-id)))) - testrecs))) - (sqlite3:finalize! qry))))) - -;; map a test-id into the proper range -;; -(define (db:adj-test-id mtdb min-test-id test-id) - (if (>= test-id min-test-id) - test-id - (let loop ((new-id min-test-id)) - (let ((test-id-found #f)) - (sqlite3:for-each-row - (lambda (id) - (set! test-id-found id)) - (db:dbdat-get-db mtdb) - "SELECT id FROM tests WHERE id=?;" - new-id) - ;; if test-id-found then need to try again - (if test-id-found - (loop (+ new-id 1)) - (begin - (debug:print-info 0 *default-log-port* "New test id " new-id " selected for test with id " test-id) - (sqlite3:execute mtdb "UPDATE tests SET id=? WHERE id=?;" new-id test-id))))))) - -;; move test ids into the 30k * run_id range -;; -(define (db:prep-megatest.db-adj-test-ids mtdb run-id testrecs) - (debug:print-info 0 *default-log-port* "Adjusting test ids in megatest.db for run " run-id) - (let ((min-test-id (* run-id 30000))) - (for-each - (lambda (testrec) - (let* ((test-id (vector-ref testrec (db:field->number "id" db:test-record-fields)))) - (db:adj-test-id (db:dbdat-get-db mtdb) min-test-id test-id))) - testrecs))) - -;; 1. move test ids into the 30k * run_id range -;; 2. move step ids into the 30k * run_id range -;; -(define (db:prep-megatest.db-for-migration mtdb) - (let* ((run-ids (db:get-all-run-ids mtdb))) - (for-each - (lambda (run-id) - (let ((testrecs (db:get-all-tests-info-by-run-id mtdb run-id))) - (db:prep-megatest.db-adj-test-ids (db:dbdat-get-db mtdb) run-id testrecs))) - run-ids))) - -;; Get test data using test_id, run-id is not used -;; -(define (db:get-test-info-by-id dbstruct run-id test-id) - (db:with-db - dbstruct - #f ;; run-id - #f - (lambda (db) - (let ((res #f)) - (sqlite3:for-each-row ;; attemptnum added to hold pid of top process (not Megatest) controlling a test - (lambda (id run-id testname state status event-time host cpuload diskfree uname rundir-id item-path run_duration final-logf-id comment short-dir-id attemptnum archived last-update) - ;; 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 - (set! res (vector id run-id testname state status event-time host cpuload diskfree uname rundir-id item-path run_duration final-logf-id comment short-dir-id attemptnum archived last-update))) - db - (conc "SELECT " db:test-record-qry-selector " FROM tests WHERE id=?;") - test-id) - res)))) - -;; Use db:test-get* to access -;; Get test data using test_ids. NB// Only works within a single run!! -;; -(define (db:get-test-info-by-ids dbstruct run-id test-ids) - (db:with-db - dbstruct - run-id - #f - (lambda (db) - (let ((res '())) - (sqlite3:for-each-row - (lambda (a . b) - ;; 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 - (set! res (cons (apply vector a b) res))) - db - (conc "SELECT " db:test-record-qry-selector " FROM tests WHERE id in (" - (string-intersperse (map conc test-ids) ",") ");")) - res)))) - -(define (db:get-test-info dbstruct run-id test-name item-path) - (db:with-db - dbstruct - run-id - #f - (lambda (db) - (let ((res #f)) - (sqlite3:for-each-row - (lambda (a . b) - (set! res (apply vector a b))) - db - (conc "SELECT " db:test-record-qry-selector " FROM tests WHERE testname=? AND item_path=? AND run_id=?;") - test-name item-path run-id) - res)))) - -(define (db:test-get-rundir-from-test-id dbstruct run-id test-id) - (db:with-db - dbstruct - run-id - #f - (lambda (db) - (db:first-result-default - db - "SELECT rundir FROM tests WHERE id=?;" - #f ;; default result - test-id)))) - -(define (db:get-test-times dbstruct run-name target) - (let ((res `()) - (qry (conc "select testname, item_path, run_duration, " (string-join (db:get-keys dbstruct) " || '/' || ") " as target from tests inner join runs on tests.run_id = runs.id where runs.runname = ? and target = ? ;"))) - - (db:with-db - dbstruct - #f ;; this is for the main runs db - #f ;; does not modify db - (lambda (db) - (sqlite3:for-each-row - (lambda (test-name item-path test-time target ) - (set! res (cons (vector test-name item-path test-time) res))) - db - qry - run-name target) - res)))) - -(define (db:get-test-times dbstruct run-name target) - (let ((res `()) - (qry (conc "select testname, item_path, run_duration, " (string-join (db:get-keys dbstruct) " || '/' || ") " as target from tests inner join runs on tests.run_id = runs.id where runs.runname = ? and target = ? ;"))) - - (db:with-db - dbstruct - #f ;; this is for the main runs db - #f ;; does not modify db - (lambda (db) - (sqlite3:for-each-row - (lambda (test-name item-path test-time target ) - (set! res (cons (vector test-name item-path test-time) res))) - db - qry - run-name target) - res)))) - -;;====================================================================== -;; S T E P S -;;====================================================================== - -(define (db:teststep-set-status! dbstruct run-id test-id teststep-name state-in status-in comment logfile) - (db:with-db - dbstruct - run-id - #t - (lambda (db) - (sqlite3:execute - db - "INSERT OR REPLACE into test_steps (test_id,stepname,state,status,event_time,comment,logfile) VALUES(?,?,?,?,?,?,?);" - test-id teststep-name state-in status-in (current-seconds) - (if comment comment "") - (if logfile logfile ""))))) - - - -(define (db:delete-steps-for-test! dbstruct run-id test-id) - ;; TODO: figure out why status is the key field rather than state (note: CONSTRAINT test_steps_constraint UNIQUE (test_id,stepname,state) ) - (db:with-db - dbstruct - run-id - #t - (lambda (db) - (sqlite3:execute - db - "UPDATE test_steps set status='DELETED' where test_id=?" ;; and run_id=? !! - run_id not in table (bummer) TODO: get run_id into schema for test_steps - test-id)))) - - -;; db-get-test-steps-for-run -(define (db:get-steps-for-test dbstruct run-id test-id) - (db:with-db - dbstruct - run-id - #f - (lambda (db) - (let* ((res '())) - (sqlite3:for-each-row - (lambda (id test-id stepname state status event-time logfile comment) - (set! res (cons (vector id test-id stepname state status event-time (if (string? logfile) logfile "") comment) res))) - db - "SELECT id,test_id,stepname,state,status,event_time,logfile,comment FROM test_steps WHERE status != 'DELETED' AND test_id=? ORDER BY id ASC;" ;; event_time DESC,id ASC; - test-id) - (reverse res))))) - - (define (db:get-steps-info-by-id dbstruct test-step-id) - (db:with-db - dbstruct - #f - #f - (lambda (db) - (let* ((res (vector #f #f #f #f #f #f #f #f #f))) - (sqlite3:for-each-row - (lambda (id test-id stepname state status event-time logfile comment last-update) - (set! res (vector id test-id stepname state status event-time (if (string? logfile) logfile "") comment last-update))) - db - "SELECT id,test_id,stepname,state,status,event_time,logfile,comment,last_update FROM test_steps WHERE id=? ORDER BY id ASC;" ;; event_time DESC,id ASC; - test-step-id) - res)))) - -(define (db:get-steps-data dbstruct run-id test-id) - (db:with-db - dbstruct - run-id - #f - (lambda (db) - (let ((res '())) - (sqlite3:for-each-row - (lambda (id test-id stepname state status event-time logfile) - (set! res (cons (vector id test-id stepname state status event-time (if (string? logfile) logfile "")) res))) - db - "SELECT id,test_id,stepname,state,status,event_time,logfile FROM test_steps WHERE status != 'DELETED' AND test_id=? ORDER BY id ASC;" ;; event_time DESC,id ASC; - test-id) - (reverse res))))) - -;;====================================================================== -;; T E S T D A T A -;;====================================================================== - - (define (db:get-data-info-by-id dbstruct test-data-id) - (db:with-db - dbstruct - #f - #f - (lambda (db) - (let* ((res (vector #f #f #f #f #f #f #f #f #f #f #f #f))) - (sqlite3:for-each-row - (lambda (id test-id category variable value expected tol units comment status type last-update) - (set! res (vector id test-id category variable value expected tol units comment status type last-update))) - db - "SELECT id,test_id, category, variable, value, expected, tol, units, comment, status, type, last_update FROM test_data WHERE id=? ORDER BY id ASC;" ;; event_time DESC,id ASC; - test-data-id) - res)))) - - -;; WARNING: Do NOT call this for the parent test on an iterated test -;; Roll up test_data pass/fail results -;; look at the test_data status field, -;; if all are pass (any case) and the test status is PASS or NULL or '' then set test status to PASS. -;; if one or more are fail (any case) then set test status to PASS, non "pass" or "fail" are ignored -(define (db:test-data-rollup dbstruct run-id test-id status) - (let* ((fail-count 0) - (pass-count 0)) - (db:with-db - dbstruct #f #f - (lambda (db) - (sqlite3:for-each-row - (lambda (fcount pcount) - (set! fail-count fcount) - (set! pass-count pcount)) - db - "SELECT (SELECT count(id) FROM test_data WHERE test_id=? AND status like 'fail') AS fail_count, - (SELECT count(id) FROM test_data WHERE test_id=? AND status like 'pass') AS pass_count;" - test-id test-id) - ;; Now rollup the counts to the central megatest.db - (db:general-call dbstruct 'pass-fail-counts (list pass-count fail-count test-id)) - ;; if the test is not FAIL then set status based on the fail and pass counts. - (db:general-call dbstruct 'test_data-pf-rollup (list test-id test-id test-id test-id)))))) - -;; each section is a rule except "final" which is the final result -;; -;; [rule-5] -;; operator in -;; section LogFileBody -;; desc Output voltage -;; status OK -;; expected 1.9 -;; measured 1.8 -;; type +/- -;; tolerance 0.1 -;; pass 1 -;; fail 0 -;; -;; [final] -;; exit-code 6 -;; exit-status SKIP -;; message If flagged we are asking for this to exit with code 6 -;; -;; recorded in steps table: -;; category: stepname -;; variable: rule-N -;; value: measured -;; expected: expected -;; tol: tolerance -;; units: - -;; comment: desc or message -;; status: status -;; type: type -;; -(define (db:logpro-dat->csv dat stepname) - (let ((res '())) - (for-each - (lambda (entry-name) - (if (equal? entry-name "final") - (set! res (append - res - (list - (list stepname - entry-name - (configf:lookup dat entry-name "exit-code") ;; 0 ;; Value - 0 ;; 1 ;; Expected - 0 ;; 2 ;; Tolerance - "n/a" ;; 3 ;; Units - (configf:lookup dat entry-name "message") ;; 4 ;; Comment - (configf:lookup dat entry-name "exit-status") ;; 5 ;; Status - "logpro" ;; 6 ;; Type - )))) - (let* ((value (or (configf:lookup dat entry-name "measured") "n/a")) - (expected (or (configf:lookup dat entry-name "expected") 0.0)) - (tolerance (or (configf:lookup dat entry-name "tolerance") 0.0)) - (comment (or (configf:lookup dat entry-name "comment") - (configf:lookup dat entry-name "desc") "n/a")) - (status (or (configf:lookup dat entry-name "status") "n/a")) - (type (or (configf:lookup dat entry-name "expected") "n/a"))) - (set! res (append - res - (list (list stepname - entry-name - value ;; 0 - expected ;; 1 - tolerance ;; 2 - "n/a" ;; 3 Units - comment ;; 4 - status ;; 5 - type ;; 6 - ))))))) - (hash-table-keys dat)) - res)) - -;; $MT_MEGATEST -load-test-data << EOF -;; foo,bar, 1.2, 1.9, > -;; foo,rab, 1.0e9, 10e9, 1e9 -;; foo,bla, 1.2, 1.9, < -;; foo,bal, 1.2, 1.2, < , ,Check for overload -;; foo,alb, 1.2, 1.2, <= , Amps,This is the high power circuit test -;; foo,abl, 1.2, 1.3, 0.1 -;; foo,bra, 1.2, pass, silly stuff -;; faz,bar, 10, 8mA, , ,"this is a comment" -;; EOF - -(define (db:csv->test-data dbstruct run-id test-id csvdata) - (debug:print 4 *default-log-port* "test-id " test-id ", csvdata: " csvdata) - (db:with-db - dbstruct #f #f - (lambda (db) - (let* ((csvlist (csv->list (make-csv-reader - (open-input-string csvdata) - '((strip-leading-whitespace? #t) - (strip-trailing-whitespace? #t)))))) ;; (csv->list csvdata))) - (for-each - (lambda (csvrow) - (let* ((padded-row (take (append csvrow (list #f #f #f #f #f #f #f #f #f)) 9)) - (category (list-ref padded-row 0)) - (variable (list-ref padded-row 1)) - (value (any->number-if-possible (list-ref padded-row 2))) - (expected (any->number-if-possible (list-ref padded-row 3))) - (tol (any->number-if-possible (list-ref padded-row 4))) ;; >, <, >=, <=, or a number - (units (list-ref padded-row 5)) - (comment (list-ref padded-row 6)) - (status (let ((s (list-ref padded-row 7))) - (if (and (string? s)(or (string-match (regexp "^\\s*$") s) - (string-match (regexp "^n/a$") s))) - #f - s))) ;; if specified on the input then use, else calculate - (type (list-ref padded-row 8))) - ;; look up expected,tol,units from previous best fit test if they are all either #f or '' - (debug:print 4 *default-log-port* "BEFORE: category: " category " variable: " variable " value: " value - ", expected: " expected " tol: " tol " units: " units " status: " status " comment: " comment " type: " type) - - (if (and (or (not expected)(equal? expected "")) - (or (not tol) (equal? expected "")) - (or (not units) (equal? expected ""))) - (let-values (((new-expected new-tol new-units)(tdb:get-prev-tol-for-test #f test-id category variable))) - (set! expected new-expected) - (set! tol new-tol) - (set! units new-units))) - - (debug:print 4 *default-log-port* "AFTER: category: " category " variable: " variable " value: " value - ", expected: " expected " tol: " tol " units: " units " status: " status " comment: " comment) - ;; calculate status if NOT specified - (if (and (not status)(number? expected)(number? value)) ;; need expected and value to be numbers - (if (number? tol) ;; if tol is a number then we do the standard comparison - (let* ((max-val (+ expected tol)) - (min-val (- expected tol)) - (result (and (>= value min-val)(<= value max-val)))) - (debug:print 4 *default-log-port* "max-val: " max-val " min-val: " min-val " result: " result) - (set! status (if result "pass" "fail"))) - (set! status ;; NB// need to assess each one (i.e. not return operator since need to act if not valid op. - (case (string->symbol tol) ;; tol should be >, <, >=, <= - ((>) (if (> value expected) "pass" "fail")) - ((<) (if (< value expected) "pass" "fail")) - ((>=) (if (>= value expected) "pass" "fail")) - ((<=) (if (<= value expected) "pass" "fail")) - (else (conc "ERROR: bad tol comparator " tol)))))) - (debug:print 4 *default-log-port* "AFTER2: category: " category " variable: " variable " value: " value - ", expected: " expected " tol: " tol " units: " units " status: " status " comment: " comment) - ;; (db:delay-if-busy dbdat) - (sqlite3:execute db "INSERT OR REPLACE INTO test_data (test_id,category,variable,value,expected,tol,units,comment,status,type) VALUES (?,?,?,?,?,?,?,?,?,?);" - test-id category variable value expected tol units (if comment comment "") status type))) - csvlist))))) - -;; This routine moved from tdb.scm, tdb:read-test-data -;; -(define (db:read-test-data dbstruct run-id test-id categorypatt) - (let* ((res '())) - (db:with-db - dbstruct #f #f - (lambda (db) - (sqlite3:for-each-row - (lambda (id test_id category variable value expected tol units comment status type) - (set! res (cons (vector id test_id category variable value expected tol units comment status type) res))) - db - "SELECT id,test_id,category,variable,value,expected,tol,units,comment,status,type FROM test_data WHERE test_id=? AND category LIKE ? ORDER BY category,variable;" test-id categorypatt) - (reverse res))))) - -;; This routine moved from tdb.scm, :read-test-data -;; -(define (db:read-test-data* dbstruct run-id test-id categorypatt varpatt) - (let* ((res '())) - (db:with-db - dbstruct #f #f - (lambda (db) - (sqlite3:for-each-row - (lambda (id test_id category variable value expected tol units comment status type) - (set! res (cons (vector id test_id category variable value expected tol units comment status type) res))) - db - "SELECT id,test_id,category,variable,value,expected,tol,units,comment,status,type FROM test_data WHERE test_id=? AND category LIKE ? AND variable LIKE ? ORDER BY category,variable;" test-id categorypatt varpatt) - (reverse res))))) - - -;;====================================================================== -;; Misc. test related queries -;;====================================================================== - -(define (db:get-run-ids-matching-target dbstruct keynames target res runname testpatt statepatt statuspatt) - (db:with-db - dbstruct #f #f - (lambda (db) - (let* ((row-ids '()) - (keystr (string-intersperse - (map (lambda (key val) - (conc key " like '" val "'")) - keynames - (string-split target "/")) - " AND ")) - ;; (testqry (tests:match->sqlqry testpatt)) - (runsqry (sqlite3:prepare db (conc "SELECT id FROM runs WHERE " keystr " AND runname LIKE '" runname "';")))) - ;; (debug:print 8 *default-log-port* "db:test-get-paths-matching-keynames-target-new\n runsqry=" runsqry "\n tstsqry=" testqry) - (sqlite3:for-each-row - (lambda (rid) - (set! row-ids (cons rid row-ids))) - runsqry) - (sqlite3:finalize! runsqry) - row-ids)))) - -;; finds latest matching all patts for given run-id -;; -(define (db:test-get-paths-matching-keynames-target-new dbstruct run-id keynames target res testpatt statepatt statuspatt runname) - (let* ((testqry (tests:match->sqlqry testpatt)) - (tstsqry (conc "SELECT rundir FROM tests WHERE run_id=? AND " testqry " AND state LIKE '" statepatt "' AND status LIKE '" statuspatt "' ORDER BY event_time ASC;"))) - (db:with-db - dbstruct - run-id - #f - (lambda (db) - (sqlite3:for-each-row - (lambda (p) - (set! res (cons p res))) - db - tstsqry - run-id) - res)))) - -(define (db:test-toplevel-num-items dbstruct run-id testname) - (db:with-db - dbstruct - run-id - #f - (lambda (db) - (let ((res 0)) - (sqlite3:for-each-row - (lambda (num-items) - (set! res num-items)) - db - "SELECT count(id) FROM tests WHERE run_id=? AND testname=? AND item_path != '' AND state NOT IN ('DELETED');" - run-id - testname) - res)))) - -;;====================================================================== -;; QUEUE UP META, TEST STATUS AND STEPS REMOTE ACCESS -;;====================================================================== - -;; NOTE: Can remove the regex and base64 encoding for zmq -(define (db:obj->string obj #!key (transport 'http)) - (case transport - ;; ((fs) obj) - ((http fs) - (string-substitute - (regexp "=") "_" - (base64:base64-encode - (z3:encode-buffer - (with-output-to-string - (lambda ()(serialize obj))))) ;; BB: serialize - this is what causes problems between different builds of megatest communicating. serialize is sensitive to binary image of mtest. - #t)) - ((zmq nmsg)(with-output-to-string (lambda ()(serialize obj)))) - (else obj))) ;; rpc - -(define (db:string->obj msg #!key (transport 'http)) - (case transport - ;; ((fs) msg) - ((http fs) - (if (string? msg) - (with-input-from-string - (z3:decode-buffer - (base64:base64-decode - (string-substitute - (regexp "_") "=" msg #t))) - (lambda ()(deserialize))) - (begin - (debug:print-error 0 *default-log-port* "reception failed. Received \"" msg "\" but cannot translate it.") - (print-call-chain (current-error-port)) - msg))) ;; crude reply for when things go awry - ((zmq nmsg)(with-input-from-string msg (lambda ()(deserialize)))) - (else msg))) ;; rpc - -;; ; This is to be the big daddy call NOPE: Replaced by db:set-state-status-and-roll-up-items -;; ; -;; define (db:test-set-state-status dbstruct run-id test-id state status msg) -;; (let ((dbdat (db:get-db dbstruct run-id))) -;; (if (member state '("LAUNCHED" "REMOTEHOSTSTART")) -;; (db:general-call dbdat 'set-test-start-time (list test-id))) -;; ;; (if msg -;; ;; (db:general-call dbdat 'state-status-msg (list state status msg test-id)) -;; ;; (db:general-call dbdat 'state-status (list state status test-id))) -;; (db:set-state-status-and-roll-up-items dbstruct run-id test-id #f state status msg) -;; ;; process the test_data table -;; (if (and test-id state status (equal? status "AUTO")) -;; (db:test-data-rollup dbstruct run-id test-id status)) -;; (mt:process-triggers dbstruct run-id test-id state status))) - -;; state is the priority rollup of all states -;; status is the priority rollup of all completed statesfu -;; -;; if test-name is an integer work off that instead of test-name test-path -;; -(define (db:set-state-status-and-roll-up-items dbstruct run-id test-name item-path state status comment) - ;; establish info on incoming test followed by info on top level test - ;; BBnote - for mode itemwait, linkage between upstream test & matching item status is propagated to run queue in db:prereqs-not-met - (let* ((testdat (if (number? test-name) - (db:get-test-info-by-id dbstruct run-id test-name) ;; test-name is actually a test-id - (db:get-test-info dbstruct run-id test-name item-path))) - (test-id (db:test-get-id testdat)) - (test-name (if (number? test-name) - (db:test-get-testname testdat) - test-name)) - (item-path (db:test-get-item-path testdat)) - (tl-testdat (db:get-test-info dbstruct run-id test-name "")) - (tl-test-id (if tl-testdat - (db:test-get-id tl-testdat) - #f))) - (if (member state '("LAUNCHED" "REMOTEHOSTSTART")) - (db:general-call dbstruct 'set-test-start-time (list test-id))) - (mutex-lock! *db-transaction-mutex*) - (db:with-db - dbstruct #f #f - (lambda (db) - (let ((tr-res - (sqlite3:with-transaction - db - (lambda () - ;; NB// Pass the db so it is part fo the transaction - (db:test-set-state-status db run-id test-id state status comment) ;; this call sets the item state/status - (if (not (equal? item-path "")) ;; only roll up IF incoming test is an item - (let* ((state-status-counts (db:get-all-state-status-counts-for-test dbstruct run-id test-name item-path state status)) ;; item-path is used to exclude current state/status of THIS test - (state-stauses (db:roll-up-rules state-status-counts state status)) - (newstate (car state-stauses)) - (newstatus (cadr state-stauses))) - (debug:print 4 *default-log-port* "BB> tl-test-id="tl-test-id" ; "test-name":"item-path" newstate="newstate" newstatus="newstatus" len(sscs)="(length state-status-counts) " state-status-counts: " - (apply conc - (map (lambda (x) - (conc - (with-output-to-string (lambda () (pp (dbr:counts->alist x)))) " | ")) - state-status-counts))); end debug:print - - (if tl-test-id - (db:test-set-state-status db run-id tl-test-id newstate newstatus #f)) ;; we are still in the transaction - must access the db and not the dbstruct - )))))) - (mutex-unlock! *db-transaction-mutex*) - (if (and test-id state status (equal? status "AUTO")) - (db:test-data-rollup dbstruct run-id test-id status)) - tr-res))))) - -(define (db:roll-up-rules state-status-counts state status) - (let* ((running (length (filter (lambda (x) - (member (dbr:counts-state x) *common:running-states*)) - state-status-counts))) - (bad-not-started (length (filter (lambda (x) - (and (equal? (dbr:counts-state x) "NOT_STARTED") - (not (member (dbr:counts-status x) *common:not-started-ok-statuses*)))) - state-status-counts))) - (all-curr-states (common:special-sort ;; worst -> best (sort of) - (delete-duplicates - (if (and state (not (member state *common:dont-roll-up-states*))) - (cons state (map dbr:counts-state state-status-counts)) - (map dbr:counts-state state-status-counts))) - *common:std-states* >)) - (all-curr-statuses (common:special-sort ;; worst -> best - (delete-duplicates - (if (and state status (not (member state *common:dont-roll-up-states*))) - (cons status (map dbr:counts-status state-status-counts)) - (map dbr:counts-status state-status-counts))) - *common:std-statuses* >)) - (non-completes (filter (lambda (x) - (not (member x (cons "COMPLETED" *common:dont-roll-up-states*)))) - all-curr-states)) - (preq-fails (filter (lambda (x) - (equal? x "PREQ_FAIL")) - all-curr-statuses)) - (num-non-completes (length non-completes)) - (newstate (cond - ((> running 0) "RUNNING") ;; anything running, call the situation running - ((> (length preq-fails) 0) "NOT_STARTED") - ((> bad-not-started 0) "COMPLETED") ;; we have an ugly situation, it is completed in the sense we cannot do more. - ((> num-non-completes 0) (car non-completes)) ;; (remove (lambda (x)(equal? "COMPLETED" x)) all-curr-states))) ;; only rollup DELETED if all DELETED - (else (car all-curr-states)))) - (newstatus (cond - ((> (length preq-fails) 0) "PREQ_FAIL") - ((or (> bad-not-started 0) - (and (equal? newstate "NOT_STARTED") - (> num-non-completes 0))) - "STARTED") - (else (car all-curr-statuses))))) - (debug:print-info 2 *default-log-port* - "\n--> probe db:set-state-status-and-roll-up-items: " - "\n--> state-status-counts: "(map dbr:counts->alist state-status-counts) - "\n--> running: "running - "\n--> bad-not-started: "bad-not-started - "\n--> non-non-completes: "num-non-completes - "\n--> non-completes: "non-completes - "\n--> all-curr-states: "all-curr-states - "\n--> all-curr-statuses: "all-curr-statuses - "\n--> newstate "newstate - "\n--> newstatus "newstatus - "\n\n") - - ;; NB// Pass the db so it is part of the transaction - (list newstate newstatus))) - -(define (db:set-state-status-and-roll-up-run dbstruct run-id curr-state curr-status) - (mutex-lock! *db-transaction-mutex*) - (db:with-db - dbstruct #f #f - (lambda (db) - (let ((tr-res - (sqlite3:with-transaction - db - (lambda () - (let* ((state-status-counts (db:get-all-state-status-counts-for-run dbstruct run-id)) - (state-stauses (db:roll-up-rules state-status-counts #f #f )) - (newstate (car state-stauses)) - (newstatus (cadr state-stauses))) - (if (or (not (eq? newstate curr-state)) (not (eq? newstatus curr-status))) - (db:set-run-state-status dbstruct run-id newstate newstatus ))))))) - (mutex-unlock! *db-transaction-mutex*) - tr-res)))) - - -(define (db:get-all-state-status-counts-for-run dbstruct run-id) - (let* ((test-count-recs (db:with-db - dbstruct #f #f - (lambda (db) - (sqlite3:map-row - (lambda (state status count) - (make-dbr:counts state: state status: status count: count)) - db - "SELECT state,status,count(id) FROM tests WHERE run_id=? GROUP BY state,status;" - run-id ))))) - test-count-recs)) - - -;; BBnote: db:get-all-state-status-counts-for-test returns dbr:counts object aggregating state and status of items of a given test, *not including rollup state/status* -(define (db:get-all-state-status-counts-for-test dbstruct run-id test-name item-path item-state-in item-status-in) - (let* ((test-info (db:get-test-info dbstruct run-id test-name item-path)) - (item-state (or item-state-in (db:test-get-state test-info))) - (item-status (or item-status-in (db:test-get-status test-info))) - (other-items-count-recs (db:with-db - dbstruct #f #f - (lambda (db) - (sqlite3:map-row - (lambda (state status count) - (make-dbr:counts state: state status: status count: count)) - db - ;; ignore current item because we have changed its value in the current transation so this select will see the old value. - "SELECT state,status,count(id) FROM tests WHERE run_id=? AND testname=? AND item_path != '' AND item_path !=? GROUP BY state,status;" - run-id test-name item-path)))) - - ;; add current item to tally outside of sql query - (match-countrec-lambda (lambda (countrec) - (and (equal? (dbr:counts-state countrec) item-state) - (equal? (dbr:counts-status countrec) item-status)))) - - (already-have-count-rec-list - (filter match-countrec-lambda other-items-count-recs)) ;; will have either 0 or 1 count recs depending if another item shares this item's state/status - - (updated-count-rec (if (null? already-have-count-rec-list) - (make-dbr:counts state: item-state status: item-status count: 1) - (let* ((our-count-rec (car already-have-count-rec-list)) - (new-count (add1 (dbr:counts-count our-count-rec)))) - (make-dbr:counts state: item-state status: item-status count: new-count)))) - - (nonmatch-countrec-lambda (lambda (countrec) (not (match-countrec-lambda countrec)))) - - (unrelated-rec-list - (filter nonmatch-countrec-lambda other-items-count-recs))) - - (cons updated-count-rec unrelated-rec-list))) - -;; (define (db:get-all-item-states db run-id test-name) -;; (sqlite3:map-row -;; (lambda (a) a) -;; db -;; "SELECT DISTINCT state FROM tests WHERE item_path != '' AND state != 'DELETED' AND run_id=? AND testname=?" -;; run-id test-name)) -;; -;; (define (db:get-all-item-statuses db run-id test-name) -;; (sqlite3:map-row -;; (lambda (a) a) -;; db -;; "SELECT DISTINCT status FROM tests WHERE item_path != '' AND state != 'DELETED' AND state='COMPLETED' AND run_id=? AND testname=?" -;; run-id test-name)) - -(define (db:test-get-logfile-info dbstruct run-id test-name) - (db:with-db - dbstruct - run-id - #f - (lambda (db) - (let ((res #f)) - (sqlite3:for-each-row - (lambda (path final_logf) - ;; (let ((path (sdb:qry 'getstr path-id)) - ;; (final_logf (sdb:qry 'getstr final_logf-id))) - (set! logf final_logf) - (set! res (list path final_logf)) - (if (directory? path) - (debug:print 2 *default-log-port* "Found path: " path) - (debug:print 2 *default-log-port* "No such path: " path))) ;; ) - db - "SELECT rundir,final_logf FROM tests WHERE testname=? AND item_path='' AND run_id=?;" - test-name run-id) - res)))) - -;;====================================================================== -;; A G R E G A T E D T R A N S A C T I O N D B W R I T E S -;;====================================================================== - -(define db:queries - (list '(update-run-duration "UPDATE tests SET run_duration=? WHERE id=?;") - - ;; TESTS - '(register-test "INSERT OR IGNORE INTO tests (run_id,testname,event_time,item_path,state,status) VALUES (?,?,strftime('%s','now'),?,'NOT_STARTED','n/a');") - ;; Test state and status - '(set-test-state "UPDATE tests SET state=? WHERE id=?;") - '(set-test-status "UPDATE tests SET state=? WHERE id=?;") - '(state-status "UPDATE tests SET state=?,status=? WHERE id=?;") ;; D/ONE - '(state-status-msg "UPDATE tests SET state=?,status=?,comment=? WHERE id=?;") ;; DONE - ;; Test comment - '(set-test-comment "UPDATE tests SET comment=? WHERE id=?;") - '(set-test-start-time "UPDATE tests SET event_time=strftime('%s','now') WHERE id=?;") ;; DONE - '(pass-fail-counts "UPDATE tests SET pass_count=?,fail_count=? WHERE id=?;") - ;; test_data-pf-rollup is used to set a tests PASS/FAIL based on the pass/fail info from the steps - '(test_data-pf-rollup "UPDATE tests - SET status=CASE WHEN (SELECT fail_count FROM tests WHERE id=?) > 0 - THEN 'FAIL' - WHEN (SELECT pass_count FROM tests WHERE id=?) > 0 AND - (SELECT status FROM tests WHERE id=?) NOT IN ('WARN','FAIL') - THEN 'PASS' - ELSE status - END WHERE id=?;") ;; DONE - '(test-set-log "UPDATE tests SET final_logf=? WHERE id=?;") ;; DONE - ;; '(test-set-rundir-by-test-id "UPDATE tests SET rundir=? WHERE id=?") ;; DONE - ;; '(test-set-rundir "UPDATE tests SET rundir=? AND testname=? AND item_path=?;") ;; DONE - '(test-set-rundir-shortdir "UPDATE tests SET rundir=?,shortdir=? WHERE testname=? AND item_path=? AND run_id=?;") ;; BROKEN!!! NEEDS run-id - '(delete-tests-in-state ;; "DELETE FROM tests WHERE state=?;") ;; DONE - "UPDATE tests SET state='DELETED' WHERE state=?") - '(tests:test-set-toplog "UPDATE tests SET final_logf=? WHERE run_id=? AND testname=? AND item_path='';") - '(update-cpuload-diskfree "UPDATE tests SET cpuload=?,diskfree=? WHERE id=?;") ;; DONE - '(update-uname-host "UPDATE tests SET uname=?,host=? WHERE id=?;") ;; DONE - '(update-test-rundat "INSERT INTO test_rundat (test_id,update_time,cpuload,diskfree,diskusage,run_duration) VALUES (?,?,?,?,?,?);") - '(update-test-state "UPDATE tests SET state=? WHERE state=? AND run_id=? AND testname=? AND NOT (item_path='' AND testname IN (SELECT DISTINCT testname FROM tests WHERE testname=? AND item_path != ''));") - '(update-test-status "UPDATE tests SET status=? WHERE status like ? AND run_id=? AND testname=? AND NOT (item_path='' AND testname IN (SELECT DISTINCT testname FROM tests WHERE testname=? AND item_path != ''));") - ;; stuff for set-state-status-and-roll-up-items - '(update-pass-fail-counts "UPDATE tests - SET fail_count=(SELECT count(id) FROM tests WHERE testname=? AND item_path != '' AND status IN ('FAIL','CHECK','INCOMPLETE','ABORT')), - pass_count=(SELECT count(id) FROM tests WHERE testname=? AND item_path != '' AND status IN ('PASS','WARN','WAIVED')) - WHERE testname=? AND item_path='' AND run_id=?;") ;; DONE ;; BROKEN!!! NEEDS run-id - '(top-test-set-running "UPDATE tests SET state='RUNNING' WHERE testname=? AND item_path='' AND run_id=?;") ;; DONE ;; BROKEN!!! NEEDS run-id - - ;; NOT USED - ;; - ;; Might be the following top-test-set-per-pf-counts query could be better based off of something like this: - ;; - ;; select state,status,count(state) from tests where run_id=59 AND testname='runfirst' group by state,status; - ;; - '(top-test-set-per-pf-counts "UPDATE tests - SET state=CASE - WHEN (SELECT count(id) FROM tests - WHERE testname=? - AND item_path != '' - AND status NOT IN ('n/a') - AND state in ('NOT_STARTED')) > 0 THEN 'UNKNOWN' - WHEN (SELECT count(id) FROM tests - WHERE testname=? - AND item_path != '' - AND (status NOT IN ('TEN_STRIKES','BLOCKED') OR status IN ('INCOMPLETE')) - AND state in ('RUNNING','NOT_STARTED','LAUNCHED','REMOTEHOSTSTART')) > 0 THEN 'RUNNING' - WHEN (SELECT count(id) FROM tests - WHERE testname=? - AND item_path != '' - AND state NOT IN ('COMPLETED','DELETED')) = 0 THEN 'COMPLETED' - WHEN (SELECT count(id) FROM tests - WHERE testname=? - AND item_path != '' - AND state = 'NOT_STARTED') > 0 THEN 'NOT_STARTED' - ELSE 'UNKNOWN' END, - status=CASE - WHEN fail_count > 0 THEN 'FAIL' - WHEN (SELECT count(id) FROM tests - WHERE testname=? - AND item_path != '' - AND state IN ('BLOCKED','INCOMPLETE')) > 0 THEN 'FAIL' - WHEN (SELECT count(id) FROM tests - WHERE testname=? - AND item_path != '' - AND state NOT IN ('DELETED') - AND status IN ('INCOMPLETE','ABORT')) > 0 THEN 'ABORT' - WHEN (SELECT count(id) FROM tests - WHERE testname=? - AND item_path != '' - AND state NOT IN ('DELETED') - AND status = 'AUTO') > 0 THEN 'AUTO' - WHEN (SELECT count(id) FROM tests - WHERE testname=? - AND item_path != '' - AND state NOT IN ('DELETED') - AND status IN ('STUCK/INCOMPLETE', 'INCOMPLETE')) > 0 THEN 'INCOMPLETE' - WHEN (SELECT count(id) FROM tests - WHERE testname=? - AND item_path != '' - AND state IN ('COMPLETED','STUCK/INCOMPLETE','INCOMPLETE') - AND status = 'FAIL') > 0 THEN 'FAIL' - WHEN (SELECT count(id) FROM tests - WHERE testname=? - AND item_path != '' - AND state NOT IN ('DELETED') - AND status = 'CHECK') > 0 THEN 'CHECK' - WHEN (SELECT count(id) FROM tests - WHERE testname=? - AND item_path != '' - AND state NOT IN ('DELETED') - AND status = 'SKIP') > 0 THEN 'SKIP' - WHEN (SELECT count(id) FROM tests - WHERE testname=? - AND item_path != '' - AND state NOT IN ('DELETED') - AND status = 'WARN') > 0 THEN 'WARN' - WHEN (SELECT count(id) FROM tests - WHERE testname=? - AND item_path != '' - AND state NOT IN ('DELETED') - AND status = 'WAIVED') > 0 THEN 'WAIVED' - WHEN (SELECT count(id) FROM tests - WHERE testname=? - AND item_path != '' - AND state NOT IN ('DELETED') - AND status NOT IN ('PASS','FAIL','WARN','WAIVED')) > 0 THEN 'ABORT' - WHEN (SELECT count(id) FROM tests - WHERE testname=? - AND item_path != '' - AND state='NOT_STARTED') > 0 THEN 'n/a' - WHEN (SELECT count(id) FROM tests - WHERE testname=? - AND item_path != '' - AND state = 'COMPLETED' - AND status = 'PASS') > 0 THEN 'PASS' - WHEN pass_count > 0 AND fail_count=0 THEN 'PASS' - ELSE 'UNKNOWN' END - WHERE testname=? AND item_path='';") ;; DONE ;; BROKEN!!! NEEDS run-id - - ;; STEPS - '(delete-test-step-records "UPDATE test_steps SET status='DELETED' WHERE test_id=?;") - '(delete-test-data-records "UPDATE test_data SET status='DELETED' WHERE test_id=?;") ;; using status since no state field - )) - -(define (db:lookup-query qry-name) - (let ((q (alist-ref qry-name db:queries))) - (if q (car q) #f))) - -;; do not run these as part of the transaction -(define db:special-queries '(rollup-tests-pass-fail - ;; db:set-state-status-and-roll-up-items ;; WHY NOT!? - login - immediate - flush - sync - set-verbosity - killserver - )) - -(define (db:login dbstruct calling-path calling-version client-signature) - (cond - ((not (equal? calling-path *toppath*)) - (list #f "Login failed due to mismatch paths: " calling-path ", " *toppath*)) - ;; ((not (equal? *run-id* run-id)) - ;; (list #f "Login failed due to mismatch run-id: " run-id ", " *run-id*)) - ((not (equal? megatest-version calling-version)) - (list #t (conc "Login warning due to mismatch megatest version: " calling-version ", " megatest-version))) - (else - (hash-table-set! *logged-in-clients* client-signature (current-seconds)) - '(#t "successful login")))) - -(define (db:general-call dbstruct stmtname params) - (let ((query (let ((q (alist-ref (if (string? stmtname) - (string->symbol stmtname) - stmtname) - db:queries))) - (if q (car q) #f)))) - (db:with-db - dbstruct #f #f - (lambda (db) - (apply sqlite3:execute db query params) - #t)))) - -;; get a summary of state and status counts to calculate a rollup -;; -(define (db:get-state-status-summary dbstruct run-id testname) - (let ((res '())) - (db:with-db - dbstruct #f #f - (lambda (db) - (sqlite3:for-each-row - (lambda (state status count) - (set! res (cons (vector state status count) res))) - db - "SELECT state,status,count(state) FROM tests WHERE run_id=? AND testname=? AND item_path='' GROUP BY state,status;" - run-id testname) - res)))) - -(define (db:get-latest-host-load dbstruct raw-hostname) - (let* ((hostname (string-substitute "\\..*$" "" raw-hostname)) - (res (cons -1 0))) - (db:with-db - dbstruct #f #f - (lambda (db) - (sqlite3:for-each-row - (lambda (cpuload update-time) (set! res (cons cpuload update-time))) - db - "SELECT tr.cpuload, tr.update_time FROM test_rundat tr, tests t WHERE t.host=? AND tr.cpuload != -1 AND tr.test_id=t.id ORDER BY tr.update_time DESC LIMIT 1;" - hostname))) res )) - -(define (db:set-top-level-from-items dbstruct run-id testname) - (let* ((summ (db:get-state-status-summary dbstruct run-id testname)) - (find (lambda (state status) - (if (null? summ) - #f - (let loop ((hed (car summ)) - (tal (cdr summ))) - (if (and (string-match state (vector-ref hed 0)) - (string-match status (vector-ref hed 1))) - hed - (if (null? tal) - #f - (loop (car tal)(cdr tal))))))))) - - - ;;; E D I T M E ! ! - - - (cond - ((> (find "COMPLETED" ".*") 0) #f)))) - - - -;; get the previous records for when these tests were run where all keys match but runname -;; NB// Merge this with test:get-previous-test-run-records? This one looks for all matching tests -;; can use wildcards. Also can likely be factored in with get test paths? -;; -;; Run this remotely!! -;; -(define (db:get-matching-previous-test-run-records dbstruct run-id test-name item-path) - (let* ((keys (db:get-keys dbstruct)) - (selstr (string-intersperse keys ",")) - (qrystr (string-intersperse (map (lambda (x)(conc x "=?")) keys) " AND ")) - (keyvals #f) - (tests-hash (make-hash-table))) - ;; first look up the key values from the run selected by run-id - (db:with-db - dbstruct #f #f - (lambda (db) - (sqlite3:for-each-row - (lambda (a . b) - (set! keyvals (cons a b))) - db - (conc "SELECT " selstr " FROM runs WHERE id=? ORDER BY event_time DESC;") run-id))) - (if (not keyvals) - '() - (let ((prev-run-ids '())) - (db:with-db - dbstruct #f #f - (lambda (db) - (apply sqlite3:for-each-row - (lambda (id) - (set! prev-run-ids (cons id prev-run-ids))) - db - (conc "SELECT id FROM runs WHERE " qrystr " AND id != ?;") (append keyvals (list run-id))))) - ;; collect all matching tests for the runs then - ;; extract the most recent test and return that. - (debug:print 4 *default-log-port* "selstr: " selstr ", qrystr: " qrystr ", keyvals: " keyvals - ", previous run ids found: " prev-run-ids) - (if (null? prev-run-ids) '() ;; no previous runs? return null - (let loop ((hed (car prev-run-ids)) - (tal (cdr prev-run-ids))) - (let ((results (db:get-tests-for-run dbstruct hed (conc test-name "/" item-path) '() '() #f #f #f #f #f #f #f 'normal))) - (debug:print 4 *default-log-port* "Got tests for run-id " run-id ", test-name " test-name - ", item-path " item-path " results: " (intersperse results "\n")) - ;; Keep only the youngest of any test/item combination - (for-each - (lambda (testdat) - (let* ((full-testname (conc (db:test-get-testname testdat) "/" (db:test-get-item-path testdat))) - (stored-test (hash-table-ref/default tests-hash full-testname #f))) - (if (or (not stored-test) - (and stored-test - (> (db:test-get-event_time testdat)(db:test-get-event_time stored-test)))) - ;; this test is younger, store it in the hash - (hash-table-set! tests-hash full-testname testdat)))) - results) - (if (null? tal) - (map cdr (hash-table->alist tests-hash)) ;; return a list of the most recent tests - (loop (car tal)(cdr tal)))))))))) - -;; Function recursively checks if .journal exists; if yes means db busy; call itself after delayed interval -;; return the sqlite3 db handle if possible -;; -(define (db:delay-if-busy dbdat #!key (count 6)) - (if (not (configf:lookup *configdat* "server" "delay-on-busy")) - (and dbdat (db:dbdat-get-db dbdat)) - (if dbdat - (let* ((dbpath (db:dbdat-get-path dbdat)) - (db (db:dbdat-get-db dbdat)) ;; we'll return this so (db:delay--if-busy can be called inline - (dbfj (conc dbpath "-journal"))) - (if (handle-exceptions - exn - (begin - (debug:print-info 0 *default-log-port* "WARNING: failed to test for existance of " dbfj) - (thread-sleep! 1) - (db:delay-if-busy count (- count 1))) - (common:file-exists? dbfj)) - (case count - ((6) - (thread-sleep! 0.2) - (db:delay-if-busy count: 5)) - ((5) - (thread-sleep! 0.4) - (db:delay-if-busy count: 4)) - ((4) - (thread-sleep! 0.8) - (db:delay-if-busy count: 3)) - ((3) - (thread-sleep! 1.6) - (db:delay-if-busy count: 2)) - ((2) - (thread-sleep! 3.2) - (db:delay-if-busy count: 1)) - ((1) - (thread-sleep! 6.4) - (db:delay-if-busy count: 0)) - (else - (debug:print-info 0 *default-log-port* "delaying db access due to high database load.") - (thread-sleep! 12.8)))) - db) - "bogus result from db:delay-if-busy"))) - -(define (db:test-get-records-for-index-file dbstruct run-id test-name) - (let ((res '())) - (db:with-db - dbstruct - run-id - #f - (lambda (db) - (sqlite3:for-each-row - (lambda (id itempath state status run_duration logf comment) - (set! res (cons (vector id itempath state status run_duration logf comment) res))) - db - "SELECT id,item_path,state,status,run_duration,final_logf,comment FROM tests WHERE testname=? AND item_path != '' AND run_id=?;" ;; BUG! WHY NO run_id? - test-name - run-id) - res)))) - -;;====================================================================== -;; Tests meta data -;;====================================================================== - -;; returns a hash table of tags to tests -;; -(define (db:get-tests-tags dbstruct) - (db:with-db - dbstruct #f #f - (lambda (db) - (let* ((res (make-hash-table))) - (sqlite3:for-each-row - (lambda (testname tags-in) - (let ((tags (string-split tags-in ","))) - (for-each - (lambda (tag) - (hash-table-set! res tag - (delete-duplicates - (cons testname (hash-table-ref/default res tag '()))))) - tags))) - db - "SELECT testname,tags FROM test_meta") - (hash-table->alist res))))) - -;; read the record given a testname -(define (db:testmeta-get-record dbstruct testname) - (let ((res #f)) - (db:with-db - dbstruct - #f - #f - (lambda (db) - (sqlite3:for-each-row - (lambda (id testname author owner description reviewed iterated avg_runtime avg_disk tags jobgroup) - (set! res (vector id testname author owner description reviewed iterated avg_runtime avg_disk tags jobgroup))) - db - "SELECT id,testname,author,owner,description,reviewed,iterated,avg_runtime,avg_disk,tags,jobgroup FROM test_meta WHERE testname=?;" - testname) - res)))) - -;; create a new record for a given testname -(define (db:testmeta-add-record dbstruct testname) - (db:with-db dbstruct #f #f - (lambda (db) - (sqlite3:execute - db - "INSERT OR IGNORE INTO test_meta (testname,author,owner,description,reviewed,iterated,avg_runtime,avg_disk,tags) VALUES (?,'','','','','','','','');" testname)))) - -;; update one of the testmeta fields -(define (db:testmeta-update-field dbstruct testname field value) - (db:with-db dbstruct #f #f - (lambda (db) - (sqlite3:execute - db - (conc "UPDATE test_meta SET " field "=? WHERE testname=?;") value testname)))) - -(define (db:testmeta-get-all dbstruct) - (db:with-db dbstruct #f #f - (lambda (db) - (let ((res '())) - (sqlite3:for-each-row - (lambda (a . b) - (set! res (cons (apply vector a b) res))) - db - "SELECT id,testname,author,owner,description,reviewed,iterated,avg_runtime,avg_disk,tags,jobgroup FROM test_meta;") - res)))) - -;;====================================================================== -;; M I S C M A N A G E M E N T I T E M S -;;====================================================================== - - - -;; the new prereqs calculation, looks also at itempath if specified -;; all prereqs must be met -;; if prereq test with itempath='' is COMPLETED and PASS, WARN, CHECK, or WAIVED then prereq is met -;; if prereq test with itempath=ref-item-path and COMPLETED with PASS, WARN, CHECK, or WAIVED then prereq is met -;; -;; Note: mode 'normal means that tests must be COMPLETED and ok (i.e. PASS, WARN, CHECK, SKIP or WAIVED) -;; mode 'toplevel means that tests must be COMPLETED only -;; mode 'itemmatch or 'itemwait means that tests items must be COMPLETED and (PASS|WARN|WAIVED|CHECK) [[ NB// NOT IMPLEMENTED YET ]] -;; mode 'exclusive means this test/item cannot run if the same test/item is LAUNCHED,REMOTEHOSTSTART or RUNNING -;; -;; IDEA for consideration: -;; 1. collect all tests "upstream" -;; 2. any NOT completed and good? if yes => return those as prereqs not met, if no => return null list -;; -;; (define (db:get-prereqs-not-met dbstruct run-id waitons ref-item-path mode) -(define (db:get-prereqs-not-met dbstruct run-id waitons ref-test-name ref-item-path mode itemmaps) ;; #!key (mode '(normal))(itemmap #f)) - ;; BBnote - rollup of an itemized test's overall state/status done in db:set-state-status-and-roll-up-items - (append - (if (member 'exclusive mode) - (let ((running-tests (db:get-tests-for-run dbstruct - #f ;; run-id of #f means for all runs. - (if (string=? ref-item-path "") ;; testpatt - ref-test-name - (conc ref-test-name "/" ref-item-path)) - '("LAUNCHED" "REMOTEHOSTSTART" "RUNNING") ;; states - '() ;; statuses - #f ;; offset - #f ;; limit - #f ;; not-in - #f ;; sort by - #f ;; sort order - 'shortlist ;; query type - 0 ;; last update, beginning of time .... - #f ;; mode - ))) - ;;(map (lambda (testdat) - ;; (if (equal? (db:test-get-item-path testdat) "") - ;; (db:test-get-testname testdat) - ;; (conc (db:test-get-testname testdat) - ;; "/" - ;; (db:test-get-item-path testdat)))) - running-tests) ;; calling functions want the entire data - '()) - - ;; collection of: for each waiton - - ;; if this ref-test-name is an item in an itemized test and mode is itemwait/itemmatch: - ;; if waiton is not itemized - if waiton is not both completed and in ok status, add as unmet prerequisite - ;; if waiton is itemized: - ;; and waiton's items are not expanded, add as unmet prerequisite - ;; else if matching waiton item is not both completed and in an ok status, add as unmet prerequisite - ;; else - ;; if waiton toplevel is not in both completed and ok status, add as unmet prerequisite - - (if (or (not waitons) - (null? waitons)) - '() - (let* ((ref-test-itemized-mode (not (null? (lset-intersection eq? mode '(itemmatch itemwait))))) ;; how is this different from using member? - (ref-test-toplevel-mode (not (null? (lset-intersection eq? mode '(toplevel))))) - (ref-test-is-toplevel (equal? ref-item-path "")) - (ref-test-is-item (not ref-test-is-toplevel)) - (unmet-pre-reqs '()) - (result '()) - (unmet-prereq-items '()) - ) - (for-each ; waitons - (lambda (waitontest-name) - ;; by getting the tests with matching name we are looking only at the matching test - ;; and related sub items - ;; next should be using mt:get-tests-for-run? - - (let (;(waiton-is-itemized ...) - ;(waiton-items-are-expanded ...) - (waiton-tests (db:get-tests-for-run-state-status dbstruct run-id waitontest-name)) - (ever-seen #f) - (parent-waiton-met #f) - (item-waiton-met #f) - - ) - (for-each ; test expanded from waiton - (lambda (waiton-test) - (let* ((waiton-state (db:test-get-state waiton-test)) - (waiton-status (db:test-get-status waiton-test)) - (waiton-item-path (db:test-get-item-path waiton-test)) ;; BB- this is the upstream itempath - (waiton-test-name (db:test-get-testname waiton-test)) - (waiton-is-toplevel (equal? waiton-item-path "")) - (waiton-is-item (not waiton-is-toplevel)) - (waiton-is-completed (member waiton-state *common:ended-states*)) - (waiton-is-running (member waiton-state *common:running-states*)) - (waiton-is-killed (member waiton-state *common:badly-ended-states*)) - (waiton-is-ok (member waiton-status *common:well-ended-states*)) - ;; testname-b path-a path-b - (same-itempath (db:compare-itempaths ref-test-name waiton-item-path ref-item-path itemmaps)) ;; (equal? ref-item-path waiton-item-path))) - (real-ref-test-name (car (string-split ref-test-name "/"))) ;; I THINK ref-test-name SHOULD NEVER HAVE THE ITEM_PATH! - (test-and-ref-are-same (equal? real-ref-test-name waiton-test-name))) - (debug:print 4 *default-log-port* "waiton-test-name " waiton-test-name " ref-test-name: " ref-test-name " test-and-ref-are-same: " test-and-ref-are-same) - (set! ever-seen #t) - ;;(BB> "***consider waiton "waiton-test"/"waiton-item-path"***") - (cond - ;; case 0 - toplevel of an itemized test, at least one item in prereq has completed - ((and waiton-is-item ref-test-is-toplevel ref-test-itemized-mode waiton-is-completed) - (set! parent-waiton-met #t)) - - ;; case 1, non-item (parent test) is - ((and waiton-is-toplevel ;; this is the parent test of the waiton being examined - waiton-is-completed - ;;(BB> "cond1") - (or waiton-is-ok ref-test-toplevel-mode)) ;; itemmatch itemwait)))))) - (set! parent-waiton-met #t)) - ;; Special case for toplevel and KILLED - ((and waiton-is-toplevel ;; this is the parent test - waiton-is-killed - (member 'toplevel mode)) - ;;(BB> "cond2") - (set! parent-waiton-met #t)) - ;; For itemwait mode IFF the previous matching item is good the set parent-waiton-met - ((and ref-test-itemized-mode ref-test-is-item same-itempath) - ;;(BB> "cond3") - (if (and waiton-is-completed (or waiton-is-ok ref-test-toplevel-mode)) - (set! item-waiton-met #t) - (set! unmet-prereq-items (cons waiton-test unmet-prereq-items))) - (if (and waiton-is-toplevel ;; if upstream rollup test is completed, parent-waiton-met is set - (or waiton-is-completed waiton-is-running)) - (set! parent-waiton-met #t))) - ;; normal checking of parent items, any parent or parent item not ok blocks running - ((and waiton-is-completed - (or waiton-is-ok - (member 'toplevel mode)) ;; toplevel does not block on FAIL - (and waiton-is-ok (member 'itemmatch mode) ;; itemmatch blocks on not ok ;; TODO: THIS IS PROBABLY A BUG. ITEMMATCH AND ITEMWAIT ARE SYNONYMS!! WHAT HAPPENED OT ITEMWAIT??? - )) - ;;(BB> "cond4") - (set! item-waiton-met #t)) - ((and waiton-is-completed waiton-is-ok same-itempath) - ;;(BB> "cond5") - (set! item-waiton-met #t)) - ((and waiton-is-completed waiton-is-ok test-and-ref-are-same) ;; probably from [waitons] table - (set! item-waiton-met #t)) - (else - #t - ;;(BB> "condelse") - )))) - waiton-tests) - ;; both requirements, parent and item-waiton must be met to NOT add item to - ;; prereq's not met list - ;; (BB> - ;; "\n* waiton-tests "waiton-tests - ;; "\n* parent-waiton-met "parent-waiton-met - ;; "\n* item-waiton-met "item-waiton-met - ;; "\n* ever-seen "ever-seen - ;; "\n* ref-test-itemized-mode "ref-test-itemized-mode - ;; "\n* unmet-prereq-items "unmet-prereq-items - ;; "\n* result (pre) "result - ;; "\n* ever-seen "ever-seen - ;; "\n") - - (cond - ((and ref-test-itemized-mode ref-test-is-item (not (null? unmet-prereq-items))) - (set! result (append unmet-prereq-items result))) - ((not (or parent-waiton-met item-waiton-met)) - (set! result (append (if (null? waiton-tests) (list waitontest-name) waiton-tests) result))) ;; appends the string if the full record is not available - ;; if the test is not found then clearly the waiton is not met... - ;; (if (not ever-seen)(set! result (cons waitontest-name result))))) - ((not ever-seen) - (set! result (append (if (null? waiton-tests)(list waitontest-name) waiton-tests) result)))))) - waitons) - (delete-duplicates result))))) - -;;====================================================================== -;; To sync individual run -;;====================================================================== -(define (db:get-run-record-ids dbstruct target run keynames test-patt) -(let ((backcons (lambda (lst item)(cons item lst)))) - (db:with-db - dbstruct #f #f - (lambda (db) - (let* ((keystr (string-intersperse - (map (lambda (key val) - (conc key " like '" val "'")) - keynames - (string-split target "/")) - " AND ")) - (run-qry (conc "SELECT id FROM runs WHERE " keystr " and runname='" run"'")) - (test-qry (conc "SELECT id FROM tests WHERE run_id in (" run-qry ") and testname like '" test-patt "'"))) - (print run-qry) - (print test-qry) - `((runs . ,(sqlite3:fold-row backcons '() db run-qry)) - (tests . ,(sqlite3:fold-row backcons '() db test-qry)) - (test_steps . ,(sqlite3:fold-row backcons '() db (conc "SELECT id FROM test_steps WHERE test_id in (" test-qry ")"))) - (test_data . ,(sqlite3:fold-row backcons '() db (conc "SELECT id FROM test_data WHERE test_id in (" test-qry ")" ))) - )))))) - -;;====================================================================== -;; Just for sync, procedures to make sync easy -;;====================================================================== - -;; get an alist of record ids changed since time since-time -;; '((runs . (1 2 3 ...))(steps . (5 6 7 ...) ...)) -;; -(define (db:get-changed-record-ids dbstruct since-time) - ;; no transaction, allow the db to be accessed between the big queries - (let ((backcons (lambda (lst item)(cons item lst)))) - (db:with-db - dbstruct #f #f - (lambda (db) - `((runs . ,(sqlite3:fold-row backcons '() db "SELECT id FROM runs WHERE last_update>=?" since-time)) - (tests . ,(sqlite3:fold-row backcons '() db "SELECT id FROM tests WHERE last_update>=?" since-time)) - (test_steps . ,(sqlite3:fold-row backcons '() db "SELECT id FROM test_steps WHERE last_update>=?" since-time)) - (test_data . ,(sqlite3:fold-row backcons '() db "SELECT id FROM test_data WHERE last_update>=?" since-time)) - ;; (test_meta . ,(fold-row backcons '() db "SELECT id FROM test_meta WHERE last_update>?" since-time)) - (run_stats . ,(sqlite3:fold-row backcons '() db "SELECT id FROM run_stats WHERE last_update>=?" since-time)) - ))))) - -;;====================================================================== -;; Extract ods file from the db -;;====================================================================== - -;; NOT REWRITTEN YET!!!!! - -;; runspatt is a comma delimited list of run patterns -;; keypatt-alist must contain *all* keys with an associated pattern: '( ("KEY1" "%") .. ) -(define (db:extract-ods-file dbstruct outputfile keypatt-alist runspatt pathmod) - (let* ((keysstr (string-intersperse (map car keypatt-alist) ",")) - (keyqry (string-intersperse (map (lambda (p)(conc (car p) " LIKE ? ")) keypatt-alist) " AND ")) - (numkeys (length keypatt-alist)) - (test-ids '()) - (dbdat (db:get-db dbstruct)) - (db (db:dbdat-get-db dbdat)) - (windows (and pathmod (substring-index "\\" pathmod))) - (tempdir (conc "/tmp/" (current-user-name) "/" runspatt "_" (random 10000) "_" (current-process-id))) - (runsheader (append (list "Run Id" "Runname") ; 0 1 - (map car keypatt-alist) ; + N = length keypatt-alist - (list "Testname" ; 2 - "Item Path" ; 3 - "Description" ; 4 - "State" ; 5 - "Status" ; 6 - "Final Log" ; 7 - "Run Duration" ; 8 - "When Run" ; 9 - "Tags" ; 10 - "Run Owner" ; 11 - "Comment" ; 12 - "Author" ; 13 - "Test Owner" ; 14 - "Reviewed" ; 15 - "Diskfree" ; 16 - "Uname" ; 17 - "Rundir" ; 18 - "Host" ; 19 - "Cpu Load" ; 20 - ))) - (results (list runsheader)) - (testdata-header (list "Run Id" "Testname" "Item Path" "Category" "Variable" "Value" "Expected" "Tol" "Units" "Status" "Comment")) - (mainqry (conc "SELECT - t.testname,r.id,runname," keysstr ",t.testname, - t.item_path,tm.description,t.state,t.status, - final_logf,run_duration, - strftime('%m/%d/%Y %H:%M:%S',datetime(t.event_time,'unixepoch'),'localtime'), - tm.tags,r.owner,t.comment, - author, - tm.owner,reviewed, - diskfree,uname,rundir, - host,cpuload - FROM tests AS t JOIN runs AS r ON t.run_id=r.id JOIN test_meta AS tm ON tm.testname=t.testname - WHERE runname LIKE ? AND " keyqry ";"))) - (debug:print 2 *default-log-port* "Using " tempdir " for constructing the ods file. keyqry: " keyqry " keystr: " keysstr " with keys: " (map cadr keypatt-alist) - "\n mainqry: " mainqry) - ;; "Expected Value" - ;; "Value Found" - ;; "Tolerance" - (apply sqlite3:for-each-row - (lambda (test-id . b) - (set! test-ids (cons test-id test-ids)) ;; test-id is now testname - (set! results (append results ;; note, drop the test-id - (list - (if pathmod - (let* ((vb (apply vector b)) - (keyvals (let loop ((i 0) - (res '())) - (if (>= i numkeys) - res - (loop (+ i 1) - (append res (list (vector-ref vb (+ i 2)))))))) - (runname (vector-ref vb 1)) - (testname (vector-ref vb (+ 2 numkeys))) - (item-path (vector-ref vb (+ 3 numkeys))) - (final-log (vector-ref vb (+ 7 numkeys))) - (run-dir (vector-ref vb (+ 18 numkeys))) - (log-fpath (conc run-dir "/" final-log))) ;; (string-intersperse keyvals "/") "/" testname "/" item-path "/" - (debug:print 4 *default-log-port* "log: " log-fpath " exists: " (common:file-exists? log-fpath)) - (vector-set! vb (+ 7 numkeys) (if (common:file-exists? log-fpath) - (let ((newpath (conc pathmod "/" - (string-intersperse keyvals "/") - "/" runname "/" testname "/" - (if (string=? item-path "") "" (conc "/" item-path)) - final-log))) - ;; for now throw away newpath and use the log-fpath conc'd with pathmod - (set! newpath (conc pathmod log-fpath)) - (if windows (string-translate newpath "/" "\\") newpath)) - (if (debug:debug-mode 1) - (conc final-log " not-found") - ""))) - (vector->list vb)) - b))))) - db - mainqry - runspatt (map cadr keypatt-alist)) - (debug:print 2 *default-log-port* "Found " (length test-ids) " records") - (set! results (list (cons "Runs" results))) - ;; now, for each test, collect the test_data info and add a new sheet - (for-each - (lambda (test-id) - (let ((test-data (list testdata-header)) - (curr-test-name #f)) - (sqlite3:for-each-row - (lambda (run-id testname item-path category variable value expected tol units status comment) - (set! curr-test-name testname) - (set! test-data (append test-data (list (list run-id testname item-path category variable value expected tol units status comment))))) - db - ;; "SELECT run_id,testname,item_path,category,variable,td.value AS value,expected,tol,units,td.status AS status,td.comment AS comment FROM test_data AS td INNER JOIN tests ON tests.id=td.test_id WHERE test_id=?;" - "SELECT run_id,testname,item_path,category,variable,td.value AS value,td.expected,td.tol,td.units,td.status AS status,td.comment AS comment FROM test_data AS td INNER JOIN tests ON tests.id=td.test_id WHERE testname=?;" - test-id) - (if curr-test-name - (set! results (append results (list (cons curr-test-name test-data))))) - )) - (sort (delete-duplicates test-ids) string<=)) - (system (conc "mkdir -p " tempdir)) - ;; (pp results) - (ods:list->ods - tempdir - (if (string-match (regexp "^[/~]+.*") outputfile) ;; full path? - outputfile - (begin - (debug:print 0 *default-log-port* "WARNING: path given, " outputfile " is relative, prefixing with current directory") - (conc (current-directory) "/" outputfile))) - results) - ;; brutal clean up - (stack-push! (dbr:dbstruct-dbstack dbstruct) dbdat) - (system "rm -rf tempdir"))) - -;; (db:extract-ods-file db "outputfile.ods" '(("sysname" "%")("fsname" "%")("datapath" "%")) "%") - Index: dbmod.scm ================================================================== --- dbmod.scm +++ dbmod.scm @@ -17,30 +17,5134 @@ ;; along with Megatest. If not, see . ;;====================================================================== (declare (unit dbmod)) + (declare (uses commonmod)) (declare (uses keysmod)) -(declare (uses tasksmod)) +(declare (uses mtargs)) +(declare (uses mtconfigf)) (declare (uses odsmod)) +(declare (uses tasksmod)) +;; (declare (uses servermod)) +;; (declare (uses testsmod)) (module dbmod * (import scheme chicken data-structures extras) (import (prefix sqlite3 sqlite3:) posix typed-records srfi-18 srfi-69 format ports srfi-1 matchable stack regex - srfi-13 stack) + srfi-13 stack s11n + (prefix base64 base64:) + z3 + csv csv-xml + directory-utils + call-with-environment-variables) (import commonmod) (import keysmod) (import files) (import tasksmod) (import odsmod) +;; (import testsmod) +(import (prefix mtargs args:)) +(import (prefix mtconfigf configf:)) +;; (import servermod) ;; (use (prefix ulex ulex:)) (include "common_records.scm") +(include "db_records.scm") +(include "key_records.scm") + +;;====================================================================== +;; Database access +;;====================================================================== + +;; dbstruct vector containing all the relevant dbs like main.db, megatest.db, run.db etc + +(define *rundb-mutex* (make-mutex)) ;; prevent problems opening/closing rundb's +(define *number-of-writes* 0) +(define *number-non-write-queries* 0) + +;;====================================================================== +;; R E C O R D S +;;====================================================================== + +;; each db entry is a pair ( db . dbfilepath ) +;; I propose this record evolves into the area record +;; +(defstruct dbr:dbstruct + (tmpdb #f) + (dbstack #f) ;; stack for tmp db handles, do not initialize with a stack + (mtdb #f) + (refndb #f) + (homehost #f) ;; not used yet + (on-homehost #f) ;; not used yet + (read-only #f) + ) ;; goal is to converge on one struct for an area but for now it is too confusing + + +;; record for keeping state,status and count for doing roll-ups in +;; iterated tests +;; +(defstruct dbr:counts + (state #f) + (status #f) + (count 0)) + +;; more crap. + + + + +(define (tests:get-tests-search-path cfgdat) + (let ((paths (let ((section (if cfgdat + (configf:get-section cfgdat "tests-paths") + #f))) + (if section + (map cadr section) + '())))) + (filter (lambda (d) + (if (directory-exists? d) + d + (begin + (if (common:low-noise-print 60 "tests:get-tests-search-path" d) + (debug:print 0 *default-log-port* "WARNING: problem with directory " d ", dropping it from tests path")) + #f))) + (append paths (list (conc *toppath* "/tests")))))) + +(define (mt:lazy-read-test-config test-name) + (let ((tconf (hash-table-ref/default *testconfigs* test-name #f))) + (if tconf + tconf + (let ((test-dirs (tests:get-tests-search-path *configdat*))) + (let loop ((hed (car test-dirs)) + (tal (cdr test-dirs))) + ;; Setting MT_LINKTREE here is almost certainly unnecessary. + (let ((tconfig-file (conc hed "/" test-name "/testconfig"))) + (if (and (common:file-exists? tconfig-file) + (file-read-access? tconfig-file)) + (let ((link-tree-path (common:get-linktree)) ;; (configf:lookup *configdat* "setup" "linktree")) + (old-link-tree (get-environment-variable "MT_LINKTREE"))) + (if link-tree-path (setenv "MT_LINKTREE" link-tree-path)) + (let ((newtcfg (configf:read-config tconfig-file #f #f))) ;; NOTE: Does NOT run [system ...] + (hash-table-set! *testconfigs* test-name newtcfg) + (if old-link-tree + (setenv "MT_LINKTREE" old-link-tree) + (unsetenv "MT_LINKTREE")) + newtcfg)) + (if (null? tal) + (begin + (debug:print-error 0 *default-log-port* "No readable testconfig found for " test-name) + #f) + (loop (car tal)(cdr tal)))))))))) + +;; if itempath is #f then look only at the testname part +;; +(define (tests:match->sqlqry patterns) + (if (string? patterns) + (let ((patts (string-split patterns ","))) + (if (null? patts) ;;; no pattern(s) means no match, we will do no query + #f + (let loop ((patt (car patts)) + (tal (cdr patts)) + (res '())) + ;; (print "loop: patt: " patt ", tal " tal) + (let* ((patt-parts (string-match (regexp "^([^\\/]*)(\\/(.*)|)$") patt)) + (test-patt (cadr patt-parts)) + (item-patt (cadddr patt-parts)) + (test-qry (db:patt->like "testname" test-patt)) + (item-qry (db:patt->like "item_path" item-patt)) + (qry (conc "(" test-qry " AND " item-qry ")"))) + ;; (print "tests:match => patt-parts: " patt-parts ", test-patt: " test-patt ", item-patt: " item-patt) + (if (null? tal) + (string-intersperse (append (reverse res)(list qry)) " OR ") + (loop (car tal)(cdr tal)(cons qry res))))))) + #f)) + +;; keys list to key1,key2,key3 ... +(define (runs:get-std-run-fields keys remfields) + (let* ((header (append keys remfields)) + (keystr (conc (keys->keystr keys) "," + (string-intersperse remfields ",")))) + (list keystr header))) + +;; A routine to map itempaths using a itemmap +;; patha and pathb must be strings or this will fail +;; +;; path-b is waiting on path-a +;; +(define (db:compare-itempaths test-b-name path-a path-b itemmaps ) + (debug:print-info 6 *default-log-port* "ITEMMAPS: " itemmaps) + (let* ((itemmap (tests:lookup-itemmap itemmaps test-b-name))) + (if itemmap + (let ((path-b-mapped (db:multi-pattern-apply path-b itemmap))) + (debug:print-info 6 *default-log-port* "ITEMMAP is " itemmap ", path: " path-b ", mapped path: " path-b-mapped) + (equal? path-a path-b-mapped)) + (equal? path-b path-a)))) + +;;====================================================================== +;; Run keys, these are used to hierarchially organise tests and run areas +;;====================================================================== + +(define (keys->keystr keys) ;; => key1,key2,key3,additiona1, ... + (string-intersperse keys ",")) + +;;====================================================================== +;; config file related routines +;;====================================================================== + +(define keys:config-get-fields common:get-fields) +(define (keys:make-key/field-string confdat) + (let ((fields (configf:get-section confdat "fields"))) + (string-join + (map (lambda (field)(conc (car field) " " (cadr field))) + fields) + ","))) + +;; given the keys (a list of vectors or a list of keys) and a target return a keyval list +;; keyval list ( (key1 val1) (key2 val2) ...) +(define (keys:target->keyval keys target) + (let* ((targlist (string-split target "/")) + (numkeys (length keys)) + (numtarg (length targlist)) + (targtweaked (if (> numkeys numtarg) + (append targlist (make-list (- numkeys numtarg) "")) + targlist))) + (map (lambda (key targ) + (list key targ)) + keys targtweaked))) + +;; patterns are: +;; "rx1" "replacement1"\n +;; "rx2" "replacement2" +;; etc. +;; +(define (db:multi-pattern-apply item-path itemmap) + (let ((all-patts (string-split itemmap "\n"))) + (if (null? all-patts) + item-path + (let loop ((hed (car all-patts)) + (tal (cdr all-patts)) + (res item-path)) + (let* ((parts (string-split hed)) + (patt (car parts)) + + (repl (if (> (length parts) 1)(cadr parts) "")) + + (newr (if (and patt repl) + (begin + (handle-exceptions + exn + (begin + (debug:print 0 *default-log-port* + "WARNING: itemmap has problem \"" itemmap "\", patt: " patt ", repl: " repl) + res) + (string-substitute patt repl res)) + + + ) + (begin + (debug:print 0 *default-log-port* + "WARNING: itemmap has problem \"" itemmap "\", patt: " patt ", repl: " repl) + res)))) + (if (null? tal) + newr + (loop (car tal)(cdr tal) newr))))))) + +;; given a list of itemmaps (testname . map), return the first match +;; +(define (tests:lookup-itemmap itemmaps testname) + (let ((best-matches (filter (lambda (itemmap) + (tests:match (car itemmap) testname #f)) + itemmaps))) + (if (null? best-matches) + #f + (let ((res (car best-matches))) + ;; (debug:print 0 *default-log-port* "res=" res) + (cond + ((string? res) res) ;;; FIX THE ROOT CAUSE HERE .... + ((null? res) #f) + ((string? (cdr res)) (cdr res)) ;; it is a pair + ((string? (cadr res))(cadr res)) ;; it is a list + (else cadr res)))))) + +;; make a query (fieldname like 'patt1' OR fieldname +(define (db:patt->like fieldname pattstr #!key (comparator " OR ")) + (let ((patts (if (string? pattstr) + (string-split pattstr ",") + '("%")))) + (string-intersperse (map (lambda (patt) + (let ((wildtype (if (substring-index "%" patt) "LIKE" "GLOB"))) + (conc fieldname " " wildtype " '" patt "'"))) + (if (null? patts) + '("") + patts)) + comparator))) + +;; if itempath is #f then look only at the testname part +;; +(define (tests:match patterns testname itempath #!key (required '())) + (if (string? patterns) + (let ((patts (append (string-split patterns ",") required))) + (if (null? patts) ;;; no pattern(s) means no match + #f + (let loop ((patt (car patts)) + (tal (cdr patts))) + ;; (print "loop: patt: " patt ", tal " tal) + (if (string=? patt "") + #f ;; nothing ever matches empty string - policy + (let* ((patt-parts (string-match (regexp "^([^\\/]*)(\\/(.*)|)$") patt)) + (test-patt (cadr patt-parts)) + (item-patt (cadddr patt-parts))) + ;; special case: test vs. test/ + ;; test => "test" "%" + ;; test/ => "test" "" + (if (and (not (substring-index "/" patt)) ;; no slash in the original + (or (not item-patt) + (equal? item-patt ""))) ;; should always be true that item-patt is "" + (set! item-patt "%")) + ;; (print "tests:match => patt-parts: " patt-parts ", test-patt: " test-patt ", item-patt: " item-patt) + (if (and (tests:glob-like-match test-patt testname) + (or (not itempath) + (tests:glob-like-match (if item-patt item-patt "") itempath))) + #t + (if (null? tal) + #f + (loop (car tal)(cdr tal))))))))))) + +;; tests:glob-like-match +(define (tests:glob-like-match patt str) + (let ((like (substring-index "%" patt))) + (let* ((notpatt (equal? (substring-index "~" patt) 0)) + (newpatt (if notpatt (substring patt 1) patt)) + (finpatt (if like + (string-substitute (regexp "%") ".*" newpatt #f) + (string-substitute (regexp "\\*") ".*" newpatt #f))) + (res #f)) + ;; (print "tests:glob-like-match => notpatt: " notpatt ", newpatt: " newpatt ", finpatt: " finpatt) + (set! res (string-match (regexp finpatt (if like #t #f)) str)) + (if notpatt (not res) res)))) + + +;;====================================================================== +;; SQLITE3 HELPERS +;;====================================================================== + +(define (db:general-sqlite-error-dump exn stmt . params) + (let ((err-status ((condition-property-accessor 'sqlite3 'status #f) exn))) ;; RADT ... how does this work? + ;; check for (exn sqlite3) ((condition-property-accessor 'exn 'message) exn) + (print "err-status: " err-status) + (debug:print-error 0 *default-log-port* " query " stmt " failed, params: " params ", error: " ((condition-property-accessor 'exn 'message) exn)) + (print-call-chain (current-error-port)))) + +;; convert to -inline +;; +(define (db:first-result-default db stmt default . params) + (handle-exceptions + exn + (let ((err-status ((condition-property-accessor 'sqlite3 'status #f) exn))) + ;; check for (exn sqlite3) ((condition-property-accessor 'exn 'message) exn) + (if (eq? err-status 'done) + default + (begin + (debug:print-error 0 *default-log-port* " query " stmt " failed, params: " params ", error: " ((condition-property-accessor 'exn 'message) exn)) + (print-call-chain (current-error-port)) + default))) + (apply sqlite3:first-result db stmt params))) + +;; Get/open a database +;; if run-id => get run specific db +;; if #f => get main db +;; if db already open - return inmem +;; if db not open, open inmem, rundb and sync then return inmem +;; inuse gets set automatically for rundb's +;; +(define (db:get-db dbstruct) ;; run-id) + (if (stack? (dbr:dbstruct-dbstack dbstruct)) + (if (stack-empty? (dbr:dbstruct-dbstack dbstruct)) + (let ((newdb (db:open-megatest-db path: (db:dbfile-path)))) + ;; (stack-push! (dbr:dbstruct-dbstack dbstruct) newdb) + newdb) + (stack-pop! (dbr:dbstruct-dbstack dbstruct))) + (db:open-db dbstruct))) + +;; ;; legacy handling of structure for managing db's. Refactor this into dbr:? +(define (db:dbdat-get-db dbdat) + (if (pair? dbdat) + (car dbdat) + dbdat)) + +(define (db:dbdat-get-path dbdat) + (if (pair? dbdat) + (cdr dbdat) + #f)) + +;; mod-read: +;; 'mod modified data +;; 'read read data +;; Locks the mutex and depending on 'mod or 'read passed, sets the last timestamp in dbstruct +;; +;; (define (db:done-with dbstruct run-id mod-read) +;; (if (not (sqlite3:database? dbstruct)) +;; (begin +;; (mutex-lock! *rundb-mutex*) +;; (if (eq? mod-read 'mod) +;; (dbr:dbstruct-mtime-set! dbstruct (current-milliseconds)) +;; (dbr:dbstruct-rtime-set! dbstruct (current-milliseconds))) +;; (dbr:dbstruct-inuse-set! dbstruct #f) +;; (mutex-unlock! *rundb-mutex*)))) + +;; (db:with-db dbstruct run-id sqlite3:exec "select blah fgrom blaz;") +;; r/w is a flag to indicate if the db is modified by this query #t = yes, #f = no +;; +(define (db:with-db dbstruct run-id r/w proc . params) + (let* ((have-struct (dbr:dbstruct? dbstruct)) + (dbdat (if have-struct + (db:get-db dbstruct) + #f)) + (db (if have-struct + (db:dbdat-get-db dbdat) + dbstruct)) + (use-mutex (> *api-process-request-count* 25))) + (if (and use-mutex + (common:low-noise-print 120 "over-50-parallel-api-requests")) + (debug:print-info 0 *default-log-port* *api-process-request-count* " parallel api requests being processed in process " (current-process-id) ", throttling access")) + (if (common:low-noise-print 600 (conc "parallel-api-requests" *max-api-process-requests*)) + (debug:print-info 2 *default-log-port* "Parallel api request count: " *api-process-request-count* " max parallel requests: " *max-api-process-requests*)) + (handle-exceptions + exn + (begin + (print-call-chain (current-error-port)) + (debug:print-error 0 *default-log-port* "sqlite3 issue in db:with-db, dbstruct=" dbstruct ", run-id=" run-id ", proc=" proc ", params=" params " error: " ((condition-property-accessor 'exn 'message) exn)) + ;; there is no recovering at this time. exit + (exit 50)) + (if use-mutex (mutex-lock! *db-with-db-mutex*)) + (let ((res (apply proc db params))) + (if use-mutex (mutex-unlock! *db-with-db-mutex*)) + ;; (if (vector? dbstruct)(db:done-with dbstruct run-id r/w)) + (if dbdat (stack-push! (dbr:dbstruct-dbstack dbstruct) dbdat)) + res)))) + +;;====================================================================== +;; K E E P F I L E D B I N dbstruct +;;====================================================================== + +;; (define (db:get-filedb dbstruct run-id) +;; (let ((db (vector-ref dbstruct 2))) +;; (if db +;; db +;; (let ((fdb (filedb:open-db (conc *toplevel* "/db/files.db")))) +;; (vector-set! dbstruct 2 fdb) +;; fdb)))) +;; +;; ;; Can also be used to save arbitrary strings +;; ;; +;; (define (db:save-path dbstruct path) +;; (let ((fdb (db:get-filedb dbstruct)))b +;; (filedb:register-path fdb path))) +;; +;; ;; Use to get a path. To get an arbitrary string see next define +;; ;; +;; (define (db:get-path dbstruct id) +;; (let ((fdb (db:get-filedb dbstruct))) +;; (filedb:get-path db id))) + + +;; open an sql database inside a file lock +;; returns: db existed-prior-to-opening +;; RA => Returns a db handler; sets the lock if opened in writable mode +;; +;; (define *db-open-mutex* (make-mutex)) + +(define (db:lock-create-open fname initproc) + (let* ((parent-dir (or (pathname-directory fname)(current-directory))) ;; no parent? go local + (raw-fname (pathname-file fname)) + (dir-writable (file-write-access? parent-dir)) + (file-exists (common:file-exists? fname)) + (file-write (if file-exists + (file-write-access? fname) + dir-writable ))) + ;; (mutex-lock! *db-open-mutex*) ;; tried this mutex, not clear it helped. + (if file-write ;; dir-writable + (condition-case + (let* ((lockfname (conc fname ".lock")) + (readyfname (conc parent-dir "/.ready-" raw-fname)) + (readyexists (common:file-exists? readyfname))) + (if (not readyexists) + (common:simple-file-lock-and-wait lockfname)) + (let ((db (sqlite3:open-database fname))) + (sqlite3:set-busy-handler! db (sqlite3:make-busy-timeout 136000)) + (sqlite3:execute db "PRAGMA synchronous = 0;") + (if (and (configf:lookup *configdat* "setup" "tmp_mode") (string-match "^/tmp/.*" fname)) + (begin + ;;(print "DEBUG: Setting tmp_mode for " fname) + (sqlite3:execute db (configf:lookup *configdat* "setup" "tmp_mode")) + ) + ) + (if (and (configf:lookup *configdat* "setup" "nfs_mode") (not (string-match "^/tmp/.*" fname))) + (begin + ;;(print "DEBUG: Setting nfs_mode for " fname) + (sqlite3:execute db (configf:lookup *configdat* "setup" "nfs_mode")) + ) + ) + (if (and (not (or (configf:lookup *configdat* "setup" "tmp_mode") (configf:lookup *configdat* "setup" "nfs_mode"))) + (configf:lookup *configdat* "setup" "use-wal") + (string-match "^/tmp/.*" fname)) ;; this is a file in /tmp + (sqlite3:execute db "PRAGMA journal_mode=WAL;") + (debug:print 2 *default-log-port* "Creating " fname " in NON-WAL mode.")) + (if (not file-exists) + (initproc db)) + (if (not readyexists) + (begin + (common:simple-file-release-lock lockfname) + (with-output-to-file + readyfname + (lambda () + (print "Ready at " + (seconds->year-work-week/day-time + (current-seconds))))))) + db)) + (exn (io-error) (debug:print 0 *default-log-port* "ERROR: i/o error with " fname ". Check permissions, disk space etc. and try again.")) + (exn (corrupt) (debug:print 0 *default-log-port* "ERROR: database " fname " is corrupt. Repair it to proceed.")) + (exn (busy) (debug:print 0 *default-log-port* "ERROR: database " fname " is locked. Try copying to another location, remove original and copy back.")) + (exn (permission)(debug:print 0 *default-log-port* "ERROR: database " fname " has some permissions problem.")) + (exn () (debug:print 0 *default-log-port* "ERROR: Unknown error with database " fname " message: " ((condition-property-accessor 'exn 'message) exn)))) + + (condition-case + (begin + (debug:print 2 *default-log-port* "WARNING: opening db in non-writable dir " fname) + (let ((db (sqlite3:open-database fname))) + (sqlite3:set-busy-handler! db (sqlite3:make-busy-timeout 136000)) + (sqlite3:execute db "PRAGMA synchronous = 0;") + ;; (mutex-unlock! *db-open-mutex*) + db)) + (exn (io-error) (debug:print 0 *default-log-port* "ERROR: i/o error with " fname ". Check permissions, disk space etc. and try again.")) + (exn (corrupt) (debug:print 0 *default-log-port* "ERROR: database " fname " is corrupt. Repair it to proceed.")) + (exn (busy) (debug:print 0 *default-log-port* "ERROR: database " fname " is locked. Try copying to another location, remove original and copy back.")) + (exn (permission)(debug:print 0 *default-log-port* "ERROR: database " fname " has some permissions problem.")) + (exn () (debug:print 0 *default-log-port* "ERROR: Unknown error with database " fname " message: " ((condition-property-accessor 'exn 'message) exn)))) + ))) + + +;; This routine creates the db if not already present. It is only called if the db is not already opened +;; +(define (db:open-db dbstruct #!key (areapath #f)(do-sync #t)) ;; TODO: actually use areapath + (let ((tmpdb-stack (dbr:dbstruct-dbstack dbstruct))) ;; RA => Returns the first reference in dbstruct + (if (stack? tmpdb-stack) + (db:get-db tmpdb-stack) ;; get previously opened db (will create new db handle if all in the stack are already used + (let* ((max-stale-tmp (configf:lookup-number *configdat* "server" "filling-db-max-stale-seconds" default: 10)) + (dbpath (db:dbfile-path )) ;; path to tmp db area + (dbexists (common:file-exists? dbpath)) + (tmpdbfname (conc dbpath "/megatest.db")) + (dbfexists (common:file-exists? tmpdbfname)) ;; (conc dbpath "/megatest.db"))) + (mtdbexists (common:file-exists? (conc *toppath* "/megatest.db"))) + + (mtdbmodtime (if mtdbexists (common:lazy-sqlite-db-modification-time (conc *toppath* "/megatest.db")) #f)) + (tmpdbmodtime (if dbfexists (common:lazy-sqlite-db-modification-time tmpdbfname) #f)) + (mtdb (db:open-megatest-db)) + (mtdbpath (db:dbdat-get-path mtdb)) + (tmpdb (db:open-megatest-db path: dbpath)) ;; lock-create-open dbpath db:initialize-main-db)) + (refndb (db:open-megatest-db path: dbpath name: "megatest_ref.db")) + (write-access (file-write-access? mtdbpath)) + ;(mtdbmodtime (if mtdbexists (common:lazy-sqlite-db-modification-time mtdbpath) #f)) ; moving this before db:open-megatest-db is called. if wal mode is on -WAL and -shm file get created with causing the tmpdbmodtime timestamp always greater than mtdbmodtime + ;(tmpdbmodtime (if dbfexists (common:lazy-sqlite-db-modification-time tmpdbfname) #f)) + ;if wal mode is on -WAL and -shm file get created when db:open-megatest-db is called. modtimedelta will always be < 10 so db in tmp not get synced + ;(tmpdbmodtime (if dbfexists (db:get-last-update-time (car tmpdb)) #f)) + ;(fmt (file-modification-time tmpdbfname)) + (modtimedelta (and mtdbmodtime tmpdbmodtime (- mtdbmodtime tmpdbmodtime)))) + + (when write-access + (sqlite3:execute (car mtdb) "drop trigger if exists update_tests_trigger") + (sqlite3:execute (car mtdb) "drop trigger if exists update_runs_trigger")) + + ;(print "mtdbmodtime " mtdbmodtime " tmpdbmodtime " tmpdbmodtime " mtdbpath " mtdbpath " " (conc *toppath* "/megatest.db")) + ;;(debug:print-info 13 *default-log-port* "db:open-db>> mtdbpath="mtdbpath" mtdbexists="mtdbexists" and write-access="write-access) + (if (and dbexists (not write-access)) + (begin + (set! *db-write-access* #f) + (dbr:dbstruct-read-only-set! dbstruct #t))) + (dbr:dbstruct-mtdb-set! dbstruct mtdb) + (dbr:dbstruct-tmpdb-set! dbstruct tmpdb) + (dbr:dbstruct-dbstack-set! dbstruct (make-stack)) ;; BB: why a stack? Why would the number of db's be indeterminate? Is this a legacy of 1.db 2.db .. ? + (stack-push! (dbr:dbstruct-dbstack dbstruct) tmpdb) ;; olddb is already a (cons db path) + (dbr:dbstruct-refndb-set! dbstruct refndb) + ;; (mutex-unlock! *rundb-mutex*) + (if (and (or (not dbfexists) + (and modtimedelta + (> modtimedelta max-stale-tmp))) ;; if db in tmp is over ten seconds older than the file in MTRA then do a sync back + do-sync) + (begin + (debug:print 1 *default-log-port* "filling db " (db:dbdat-get-path tmpdb) " with data \n from " (db:dbdat-get-path mtdb) " mod time delta: " modtimedelta) + (db:sync-tables (db:sync-all-tables-list dbstruct) #f mtdb refndb tmpdb) + ;touch tmp db to avoid wal mode wierdness + (set! (file-modification-time tmpdbfname) (current-seconds)) + (debug:print-info 13 *default-log-port* "db:sync-all-tables-list done.") + ) + (debug:print 4 *default-log-port* " db, " (db:dbdat-get-path tmpdb) " already exists or fresh enough, not propogating data from\n " (db:dbdat-get-path mtdb) " mod time delta: " modtimedelta) ) + ;; (db:multi-db-sync dbstruct 'old2new)) ;; migrate data from megatest.db automatically + tmpdb)))) + + +(define (db:get-last-update-time db) +; (db:with-db +; dbstruct #f #f +; (lambda (db) + (let ((last-update-time #f)) + (sqlite3:for-each-row + (lambda (lup) + (set! last-update-time lup)) + db + "select max(lup) from ( select max(last_update) as lup from tests union select max(last_update) as lup from runs);") + last-update-time)) +;)) + +;; Make the dbstruct, setup up auxillary db's and call for main db at least once +;; +;; called in http-transport and replicated in rmt.scm for *local* access. +;; +(define (db:setup do-sync #!key (areapath #f)) + ;; + (cond + (*dbstruct-db* *dbstruct-db*);; TODO: when multiple areas are supported, this optimization will be a hazard + (else ;;(common:on-homehost?) + (debug:print-info 13 *default-log-port* "db:setup entered (first time, not cached.)") + (let* ((dbstruct (make-dbr:dbstruct))) + (when (not *toppath*) + ;; (debug:print-info 13 *default-log-port* "in db:setup, *toppath* not set; calling launch:setup") + (debug:print-info 13 *default-log-port* "in db:setup, *toppath* not set; exiting") + ;; (launch:setup areapath: areapath) + (exit 1) + ) + (debug:print-info 13 *default-log-port* "Begin db:open-db") + (db:open-db dbstruct areapath: areapath do-sync: do-sync) + (debug:print-info 13 *default-log-port* "Done db:open-db") + (set! *dbstruct-db* dbstruct) + ;;(debug:print-info 13 *default-log-port* "new dbstruct = "(dbr:dbstruct->alist dbstruct)) + dbstruct)))) + ;; (else + ;; (debug:print 0 *default-log-port* "ERROR: attempt to open database when not on homehost. Exiting. Homehost: " (common:get-homehost)) + ;; (exit 1)))) + +;; Open the classic megatest.db file (defaults to open in toppath) +;; +;; NOTE: returns a dbdat not a dbstruct! +;; + +;;(define (db:reopen-megatest-db + +(define (db:open-megatest-db #!key (path #f)(name #f)) + (let* ((dbdir (or path *toppath*)) + (dbpath (conc dbdir "/" (or name "megatest.db"))) + (dbexists (common:file-exists? dbpath)) + (db (db:lock-create-open dbpath + (lambda (db) + (db:initialize-main-db db) + ;;(db:initialize-run-id-db db) + ))) + (write-access (file-write-access? dbpath))) + (debug:print-info 13 *default-log-port* "db:open-megatest-db "dbpath) + (if (and dbexists (not write-access)) + (set! *db-write-access* #f)) + (cons db dbpath))) + +;; sync run to disk if touched +;; +(define (db:sync-touched dbstruct run-id #!key (force-sync #f)) + (let ((tmpdb (db:get-db dbstruct)) + (mtdb (dbr:dbstruct-mtdb dbstruct)) + (refndb (dbr:dbstruct-refndb dbstruct)) + (start-t (current-seconds))) + (debug:print-info 4 *default-log-port* "Syncing for run-id: " run-id) + (mutex-lock! *db-multi-sync-mutex*) + (let ((update_info (cons (if force-sync 0 *db-last-sync*) "last_update"))) + (mutex-unlock! *db-multi-sync-mutex*) + (db:sync-tables (db:sync-all-tables-list dbstruct) update_info tmpdb refndb mtdb)) + (mutex-lock! *db-multi-sync-mutex*) + (set! *db-last-sync* start-t) + (set! *db-last-access* start-t) + (mutex-unlock! *db-multi-sync-mutex*) + (stack-push! (dbr:dbstruct-dbstack dbstruct) tmpdb))) + +(define (db:safely-close-sqlite3-db db #!key (try-num 3)) + (if (<= try-num 0) + #f + (handle-exceptions + exn + (begin + (thread-sleep! 3) + (sqlite3:interrupt! db) + (db:safely-close-sqlite3-db db try-num: (- try-num 1))) + (if (sqlite3:database? db) + (begin + (sqlite3:finalize! db) + #t) + #f)))) + +;; close all opened run-id dbs +(define (db:close-all dbstruct) + (if (dbr:dbstruct? dbstruct) + (handle-exceptions + exn + (begin + (debug:print 0 *default-log-port* "WARNING: Finalizing failed, " ((condition-property-accessor 'exn 'message) exn)) + (print-call-chain *default-log-port*)) + ;; (db:sync-touched dbstruct 0 force-sync: #t) ;; NO. Do not do this here. Instead we rely on a server to be started when there are writes, even if the server itself is not going to be used as a server. + (let ((tdbs (map db:dbdat-get-db + (stack->list (dbr:dbstruct-dbstack dbstruct)))) + (mdb (db:dbdat-get-db (dbr:dbstruct-mtdb dbstruct))) + (rdb (db:dbdat-get-db (dbr:dbstruct-refndb dbstruct)))) + (map (lambda (db) + (db:safely-close-sqlite3-db db)) +;; (if (sqlite3:database? db) +;; (sqlite3:finalize! db))) + tdbs) + (db:safely-close-sqlite3-db mdb) ;; (if (sqlite3:database? mdb) (sqlite3:finalize! mdb)) + (db:safely-close-sqlite3-db rdb))))) ;; (if (sqlite3:database? rdb) (sqlite3:finalize! rdb)))))) + +;; (let ((locdbs (dbr:dbstruct-locdbs dbstruct))) +;; (if (hash-table? locdbs) +;; (for-each (lambda (run-id) +;; (db:close-run-db dbstruct run-id)) +;; (hash-table-keys locdbs))))) + +;; (define (db:open-inmem-db) +;; (let* ((db (sqlite3:open-database ":memory:")) +;; (handler (make-busy-timeout 3600))) +;; (sqlite3:set-busy-handler! db handler) +;; (db:initialize-run-id-db db) +;; (cons db #f))) + +;; just tests, test_steps and test_data tables +(define db:sync-tests-only + (list + ;; (list "strs" + ;; '("id" #f) + ;; '("str" #f)) + (list "tests" + '("id" #f) + '("run_id" #f) + '("testname" #f) + '("host" #f) + '("cpuload" #f) + '("diskfree" #f) + '("uname" #f) + '("rundir" #f) + '("shortdir" #f) + '("item_path" #f) + '("state" #f) + '("status" #f) + '("attemptnum" #f) + '("final_logf" #f) + '("logdat" #f) + '("run_duration" #f) + '("comment" #f) + '("event_time" #f) + '("fail_count" #f) + '("pass_count" #f) + '("archived" #f) + '("last_update" #f)) + (list "test_steps" + '("id" #f) + '("test_id" #f) + '("stepname" #f) + '("state" #f) + '("status" #f) + '("event_time" #f) + '("comment" #f) + '("logfile" #f) + '("last_update" #f)) + (list "test_data" + '("id" #f) + '("test_id" #f) + '("category" #f) + '("variable" #f) + '("value" #f) + '("expected" #f) + '("tol" #f) + '("units" #f) + '("comment" #f) + '("status" #f) + '("type" #f) + '("last_update" #f)))) + +;; needs db to get keys, this is for syncing all tables +;; +(define (db:sync-main-list dbstruct) + (let ((keys (db:get-keys dbstruct))) + (list + (list "keys" + '("id" #f) + '("fieldname" #f) + '("fieldtype" #f)) + (list "metadat" '("var" #f) '("val" #f)) + (append (list "runs" + '("id" #f)) + (map (lambda (k)(list k #f)) + (append keys + (list "runname" "state" "status" "owner" "event_time" "comment" "fail_count" "pass_count" "contour" "last_update")))) + (list "test_meta" + '("id" #f) + '("testname" #f) + '("owner" #f) + '("description" #f) + '("reviewed" #f) + '("iterated" #f) + '("avg_runtime" #f) + '("avg_disk" #f) + '("tags" #f) + '("jobgroup" #f))))) + +(define (db:sync-all-tables-list dbstruct) + (append (db:sync-main-list dbstruct) + db:sync-tests-only)) + +;; use bunch of Unix commands to try to break the lock and recreate the db +;; +(define (db:move-and-recreate-db dbdat) + (let* ((dbpath (db:dbdat-get-path dbdat)) + (dbdir (pathname-directory dbpath)) + (fname (pathname-strip-directory dbpath)) + (fnamejnl (conc fname "-journal")) + (tmpname (conc fname "." (current-process-id))) + (tmpjnl (conc fnamejnl "." (current-process-id)))) + (debug:print-error 0 *default-log-port* "" fname " appears corrupted. Making backup \"old/" fname "\"") + (system (conc "cd " dbdir ";mkdir -p old;cat " fname " > old/" tmpname)) + (system (conc "rm -f " dbpath)) + (if (common:file-exists? fnamejnl) + (begin + (debug:print-error 0 *default-log-port* "" fnamejnl " found, moving it to old dir as " tmpjnl) + (system (conc "cd " dbdir ";mkdir -p old;cat " fnamejnl " > old/" tmpjnl)) + (system (conc "rm -f " dbdir "/" fnamejnl)))) + ;; attempt to recreate database + (system (conc "cd " dbdir ";sqlite3 old/" tmpname " .dump | sqlite3 " fname)))) + +;; return #f to indicate the dbdat should be closed/reopened +;; else return dbdat +;; +(define (db:repair-db dbdat #!key (numtries 1)) + (let* ((dbpath (db:dbdat-get-path dbdat)) + (dbdir (pathname-directory dbpath)) + (fname (pathname-strip-directory dbpath))) + (debug:print-info 0 *default-log-port* "Checking db " dbpath " for errors.") + (cond + ((not (file-write-access? dbdir)) + (debug:print 0 *default-log-port* "WARNING: can't write to " dbdir ", can't fix " fname) + #f) + + ;; handle special cases, megatest.db and monitor.db + ;; + ;; NOPE: apply this same approach to all db files + ;; + (else ;; ((equal? fname "megatest.db") ;; this file can be regenerated if needed + (handle-exceptions + exn + (begin + ;; (db:move-and-recreate-db dbdat) + (if (> numtries 0) + (db:repair-db dbdat numtries: (- numtries 1)) + #f) + (debug:print 0 *default-log-port* "FATAL: file " dbpath " was found corrupted, an attempt to fix has been made but you must start over.") + (debug:print 0 *default-log-port* + " check the following:\n" + " 1. full directories, look in ~/ /tmp and " dbdir "\n" + " 2. write access to " dbdir "\n\n" + " if the automatic recovery failed you may be able to recover data by doing \"" + (if (member fname '("megatest.db" "monitor.db")) + "megatest -cleanup-db" + "megatest -import-megatest.db;megatest -cleanup-db") + "\"\n") + (exit) ;; we can not safely continue when a db was corrupted - even if fixed. + ) + ;; test read/write access to the database + (let ((db (sqlite3:open-database dbpath))) + (sqlite3:set-busy-handler! db (sqlite3:make-busy-timeout 136000)) + (sqlite3:execute db "PRAGMA synchronous = 0;") + (cond + ((equal? fname "megatest.db") + (sqlite3:execute db "DELETE FROM tests WHERE state='DELETED';")) + ((equal? fname "main.db") + (sqlite3:execute db "DELETE FROM runs WHERE state='deleted';")) + ((string-match "\\d.db" fname) + (sqlite3:execute db "UPDATE tests SET state='DELETED' WHERE state='DELETED';")) + ((equal? fname "monitor.db") + (sqlite3:execute "DELETE FROM servers WHERE state LIKE 'defunct%';")) + (else + (sqlite3:execute db "vacuum;"))) + + (sqlite3:finalize! db) + #t)))))) + +;; tbls is ( ("tablename" ( "field1" [#f|proc1] ) ( "field2" [#f|proc2] ) .... ) ) +;; db's are dbdat's +;; +;; if last-update specified ("field-name" . time-in-seconds) +;; then sync only records where field-name >= time-in-seconds +;; IFF field-name exists +;; +(define (db:sync-tables tbls last-update fromdb todb . slave-dbs) + (handle-exceptions + exn + (begin + (debug:print 0 *default-log-port* "EXCEPTION: database probably overloaded or unreadable in db:sync-tables.") + (print-call-chain (current-error-port)) + (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) + (debug:print 5 *default-log-port* "exn=" (condition->list exn)) + (debug:print 0 *default-log-port* " status: " ((condition-property-accessor 'sqlite3 'status) exn)) + (debug:print 0 *default-log-port* " src db: " (db:dbdat-get-path fromdb)) + (for-each (lambda (dbdat) + (let ((dbpath (db:dbdat-get-path dbdat))) + (debug:print 0 *default-log-port* " dbpath: " dbpath) + (if (not (db:repair-db dbdat)) + (begin + (debug:print-error 0 *default-log-port* "Failed to rebuild " dbpath ", exiting now.") + (exit))))) + (cons todb slave-dbs)) + + 0) + ;; this is the work to be done + (cond + ((not fromdb) (debug:print 3 *default-log-port* "WARNING: db:sync-tables called with fromdb missing") + -1) + ((not todb) (debug:print 3 *default-log-port* "WARNING: db:sync-tables called with todb missing") + -2) + ((not (sqlite3:database? (db:dbdat-get-db fromdb))) + (debug:print-error 0 *default-log-port* "db:sync-tables called with fromdb not a database " fromdb) + -3) + ((not (sqlite3:database? (db:dbdat-get-db todb))) + (debug:print-error 0 *default-log-port* "db:sync-tables called with todb not a database " todb) + -4) + + ((not (file-write-access? (db:dbdat-get-path todb))) + (debug:print-error 0 *default-log-port* "db:sync-tables called with todb not a read-only database " todb) + -5) + ((not (null? (let ((readonly-slave-dbs + (filter + (lambda (dbdat) + (not (file-write-access? (db:dbdat-get-path todb)))) + slave-dbs))) + (for-each + (lambda (bad-dbdat) + (debug:print-error + 0 *default-log-port* "db:sync-tables called with todb not a read-only database " bad-dbdat)) + readonly-slave-dbs) + readonly-slave-dbs))) -6) + (else + (let ((stmts (make-hash-table)) ;; table-field => stmt + (all-stmts '()) ;; ( ( stmt1 value1 ) ( stml2 value2 )) + (numrecs (make-hash-table)) + (start-time (current-milliseconds)) + (tot-count 0)) + (for-each ;; table + (lambda (tabledat) + (let* ((tablename (car tabledat)) + (fields (cdr tabledat)) + (has-last-update (member "last_update" fields)) + (use-last-update (cond + ((and has-last-update + (member "last_update" fields)) + #t) ;; if given a number, just use it for all fields + ((number? last-update) #f) ;; if not matched first entry then ignore last-update for this table + ((and (pair? last-update) + (member (car last-update) ;; last-update field name + (map car fields))) #t) + (last-update + (debug:print 0 *default-log-port* "ERROR: parameter last-update for db:sync-tables must be a pair or a number, received: " last-update) ;; found in fields + #f) + (else + #f))) + (last-update-value (if use-last-update ;; no need to check for has-last-update - it is already accounted for + (if (number? last-update) + last-update + (cdr last-update)) + #f)) + (last-update-field (if use-last-update + (if (number? last-update) + "last_update" + (car last-update)) + #f)) + (num-fields (length fields)) + (field->num (make-hash-table)) + (num->field (apply vector (map car fields))) ;; BBHERE + (full-sel (conc "SELECT " (string-intersperse (map car fields) ",") + " FROM " tablename (if use-last-update ;; apply last-update criteria + (conc " WHERE " last-update-field " >= " last-update-value) + "") + ";")) + (full-ins (conc "INSERT OR REPLACE INTO " tablename " ( " (string-intersperse (map car fields) ",") " ) " + " VALUES ( " (string-intersperse (make-list num-fields "?") ",") " );")) + (fromdat '()) + (fromdats '()) + (totrecords 0) + (batch-len (string->number (or (configf:lookup *configdat* "sync" "batchsize") "100"))) + (todat (make-hash-table)) + (count 0) + + (delay-handicap (string->number (or (configf:lookup *configdat* "sync" "delay-handicap") "0"))) + ) + + ;; set up the field->num table + (for-each + (lambda (field) + (hash-table-set! field->num field count) + (set! count (+ count 1))) + fields) + + ;; read the source table + (sqlite3:for-each-row + (lambda (a . b) + (set! fromdat (cons (apply vector a b) fromdat)) + (if (> (length fromdat) batch-len) + (begin + (set! fromdats (cons fromdat fromdats)) + (set! fromdat '()) + (set! totrecords (+ totrecords 1))))) + (db:dbdat-get-db fromdb) + full-sel) + + ;; tack on remaining records in fromdat + (if (not (null? fromdat)) + (set! fromdats (cons fromdat fromdats))) + + (if (common:low-noise-print 120 "sync-records") + (debug:print-info 4 *default-log-port* "found " totrecords " records to sync")) + + ;; read the target table; BBHERE + (sqlite3:for-each-row + (lambda (a . b) + (hash-table-set! todat a (apply vector a b))) + (db:dbdat-get-db todb) + full-sel) + + (when (and delay-handicap (> delay-handicap 0)) + (debug:print-info 0 *default-log-port* "imposing synthetic sync delay of "delay-handicap" seconds since sync/delay-handicap is configured") + (thread-sleep! delay-handicap) + (debug:print-info 0 *default-log-port* "synthetic sync delay of "delay-handicap" seconds completed") + ) + + ;; first pass implementation, just insert all changed rows + (for-each + (lambda (targdb) + (let* ((db (db:dbdat-get-db targdb)) + (stmth (sqlite3:prepare db full-ins))) + (db:delay-if-busy targdb) ;; NO WAITING + (for-each + (lambda (fromdat-lst) + (sqlite3:with-transaction + db + (lambda () + (for-each ;; + (lambda (fromrow) + (let* ((a (vector-ref fromrow 0)) + (curr (hash-table-ref/default todat a #f)) + (same #t)) + (let loop ((i 0)) + (if (or (not curr) + (not (equal? (vector-ref fromrow i)(vector-ref curr i)))) + (set! same #f)) + (if (and same + (< i (- num-fields 1))) + (loop (+ i 1)))) + (if (not same) + (begin + (apply sqlite3:execute stmth (vector->list fromrow)) + (hash-table-set! numrecs tablename (+ 1 (hash-table-ref/default numrecs tablename 0))))))) + fromdat-lst)) + )) + fromdats) + (sqlite3:finalize! stmth))) + (append (list todb) slave-dbs)))) + tbls) + (let* ((runtime (- (current-milliseconds) start-time)) + (should-print (or (debug:debug-mode 12) + (common:low-noise-print 120 "db sync" (> runtime 500))))) ;; low and high sync times treated as separate. + (if should-print (debug:print 3 *default-log-port* "INFO: db sync, total run time " runtime " ms")) + (for-each + (lambda (dat) + (let ((tblname (car dat)) + (count (cdr dat))) + (set! tot-count (+ tot-count count)) + (if (> count 0) + (if should-print (debug:print 0 *default-log-port* (format #f " ~10a ~5a" tblname count)))))) + (sort (hash-table->alist numrecs)(lambda (a b)(> (cdr a)(cdr b)))))) + tot-count))))) + +(define (db:patch-schema-rundb frundb) + ;; + ;; remove this some time after September 2016 (added in version v1.6031 + ;; + (for-each + (lambda (table-name) + (handle-exceptions + exn + (if (string-match ".*duplicate.*" ((condition-property-accessor 'exn 'message) exn)) + (debug:print 0 *default-log-port* "Column last_update already added to " table-name " table") + (db:general-sqlite-error-dump exn "alter table " table-name " ..." #f "none")) + (sqlite3:execute + frundb + (conc "ALTER TABLE " table-name " ADD COLUMN last_update INTEGER DEFAULT 0"))) + (sqlite3:execute + frundb + (conc "DROP TRIGGER IF EXISTS update_" table-name "_trigger;")) + (sqlite3:execute + frundb + (conc "CREATE TRIGGER IF NOT EXISTS update_" table-name "_trigger AFTER UPDATE ON " table-name " + FOR EACH ROW + BEGIN + UPDATE " table-name " SET last_update=(strftime('%s','now')) + WHERE id=old.id; + END;")) + ) + '("tests" "test_steps" "test_data"))) + +(define (db:patch-schema-maindb maindb) + ;; + ;; remove all these some time after september 2016 (added in v1.6031 + ;; + (for-each + (lambda (column type default) + (handle-exceptions + exn + (if (string-match ".*duplicate.*" ((condition-property-accessor 'exn 'message) exn)) + (debug:print 0 *default-log-port* "Column " column " already added to runs table") + (db:general-sqlite-error-dump exn "alter table runs ..." #f "none")) + (sqlite3:execute + maindb + (conc "ALTER TABLE runs ADD COLUMN " column " " type " DEFAULT " default)))) + (list "last_update" "contour") + (list "INTEGER" "TEXT" ) + (list "0" "''" )) + ;; these schema changes don't need exception handling + (sqlite3:execute + maindb + "CREATE TRIGGER IF NOT EXISTS update_runs_trigger AFTER UPDATE ON runs + FOR EACH ROW + BEGIN + UPDATE runs SET last_update=(strftime('%s','now')) + WHERE id=old.id; + END;") + (sqlite3:execute maindb "CREATE TABLE IF NOT EXISTS run_stats ( + id INTEGER PRIMARY KEY, + run_id INTEGER, + state TEXT, + status TEXT, + count INTEGER, + last_update INTEGER DEFAULT (strftime('%s','now')))") + (sqlite3:execute maindb "CREATE TRIGGER IF NOT EXISTS update_run_stats_trigger AFTER UPDATE ON run_stats + FOR EACH ROW + BEGIN + UPDATE run_stats SET last_update=(strftime('%s','now')) + WHERE id=old.id; + END;") + (sqlite3:execute maindb "CREATE TABLE IF NOT EXISTS test_rundat ( + id INTEGER PRIMARY KEY, + test_id INTEGER, + update_time TIMESTAMP, + cpuload INTEGER DEFAULT -1, + diskfree INTEGER DEFAULT -1, + diskusage INTGER DEFAULT -1, + run_duration INTEGER DEFAULT 0);")) + +(define (db:adj-target db) + (let ((fields (configf:get-section *configdat* "fields")) + (field-num 0)) + ;; because we will be refreshing the keys table it is best to clear it here + (sqlite3:execute db "DELETE FROM keys;") + (for-each + (lambda (field) + (let ((column (car field)) + (spec (cadr field))) + (handle-exceptions + exn + (if (string-match ".*duplicate.*" ((condition-property-accessor 'exn 'message) exn)) + (debug:print 0 *default-log-port* "Target field " column " already exists in the runs table") + (db:general-sqlite-error-dump exn "alter table runs ..." #f "none")) + ;; Add the column if needed + (sqlite3:execute + db + (conc "ALTER TABLE runs ADD COLUMN " column " " spec))) + ;; correct the entry in the keys column + (sqlite3:execute + db + "INSERT INTO keys (id,fieldname,fieldtype) VALUES (?,?,?);" + field-num column spec) + ;; fill in blanks (not allowed as it would be part of the path + (sqlite3:execute + db + (conc "UPDATE runs SET " column "='x' WHERE " column "='';")) + (set! field-num (+ field-num 1)))) + fields))) + +(define *global-db-store* (make-hash-table)) + +(define (db:get-access-mode) + (if (args:get-arg "-use-db-cache") 'cached 'rmt)) + +;; Add db direct +;; +(define (db:dispatch-query access-mode rmt-cmd db-cmd . params) + (if (eq? access-mode 'cached) + (debug:print 2 *default-log-port* "not doing cached calls right now")) +;; (apply db:call-with-cached-db db-cmd params) + (apply rmt-cmd params)) +;;) + +;; return the target db handle so it can be used +;; +(define (db:cache-for-read-only source target #!key (use-last-update #f)) + (if (and (hash-table-ref/default *global-db-store* target #f) + (>= (file-modification-time target)(file-modification-time source))) + (hash-table-ref *global-db-store* target) + (let* ((toppath *toppath*) ;; (launch:setup)) + (targ-db-last-mod (if (common:file-exists? target) + (file-modification-time target) + 0)) + (cache-db (or (hash-table-ref/default *global-db-store* target #f) + (db:open-megatest-db path: target))) + (source-db (db:open-megatest-db path: source)) + (curr-time (current-seconds)) + (res '()) + (last-update (if use-last-update (cons "last_update" targ-db-last-mod) #f))) + (db:sync-tables (db:sync-main-list source-db) last-update source-db cache-db) + (db:sync-tables db:sync-tests-only last-update source-db cache-db) + (hash-table-set! *global-db-store* target cache-db) + cache-db))) + +;; ;; call a proc with a cached db +;; ;; +;; (define (db:call-with-cached-db proc . params) +;; ;; first cache the db in /tmp +;; (let* ((cname-part (conc "megatest_cache/" (common:get-testsuite-name))) +;; (fname (conc (common:get-area-path-signature) ".db")) +;; (cache-dir (common:get-create-writeable-dir +;; (list (conc "/tmp/" (current-user-name) "/" cname-part) +;; (conc "/tmp/" (current-user-name) "-" cname-part) +;; (conc "/tmp/" (current-user-name) "_" cname-part)))) +;; (megatest-db (conc *toppath* "/megatest.db"))) +;; ;; (debug:print-info 0 *default-log-port* "Using cache dir " cache-dir) +;; (if (not cache-dir) +;; (begin +;; (debug:print 0 *default-log-port* "ERROR: Failed to find an area to write the cache db") +;; (exit 1)) +;; (let* ((th1 (make-thread +;; (lambda () +;; (if (and (common:file-exists? megatest-db) +;; (file-write-access? megatest-db)) +;; (begin +;; (db:sync-to-megatest.db dbstruct 'timestamps) ;; internally mutexes on *db-local-sync* +;; (debug:print-info 2 *default-log-port* "Done syncing to megatest.db")))) +;; "call-with-cached-db sync-to-megatest.db")) +;; (cache-db (db:cache-for-read-only +;; megatest-db +;; (conc cache-dir "/" fname) +;; use-last-update: #t))) +;; (thread-start! th1) +;; (apply proc cache-db params) +;; )))) + + +;; keeping it around for debugging purposes only +#;(define (open-run-close-no-exception-handling proc idb . params) + (debug:print-info 11 *default-log-port* "open-run-close-no-exception-handling START given a db=" (if idb "yes " "no ") ", params=" params) + (print "I don't work anymore. open-run-close-no-exception-handling needs fixing or removing...") + (exit) + (if (or *db-write-access* + (not #t)) ;; was: (member proc * db:all-write-procs *))) + (let* ((db (cond + ((pair? idb) (db:dbdat-get-db idb)) + ((sqlite3:database? idb) idb) + ((not idb) (debug:print-error 0 *default-log-port* "cannot open-run-close with #f anymore")) + ((procedure? idb) (idb)) + (else (debug:print-error 0 *default-log-port* "cannot open-run-close with #f anymore")))) + (res #f)) + (set! res (apply proc db params)) + (if (not idb)(sqlite3:finalize! dbstruct)) + (debug:print-info 11 *default-log-port* "open-run-close-no-exception-handling END" ) + res) + #f)) + +#;(define (open-run-close-exception-handling proc idb . params) + (handle-exceptions + exn + (let ((sleep-time (random 30)) + (err-status ((condition-property-accessor 'sqlite3 'status #f) exn))) + (case err-status + ((busy) + (thread-sleep! sleep-time)) + (else + (debug:print 0 *default-log-port* "EXCEPTION: database probably overloaded or unreadable.") + (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) + (debug:print 5 *default-log-port* "exn=" (condition->list exn)) + (debug:print 0 *default-log-port* " status: " ((condition-property-accessor 'sqlite3 'status) exn)) + (print-call-chain (current-error-port)) + (thread-sleep! sleep-time) + (debug:print-info 0 *default-log-port* "trying db call one more time....this may never recover, if necessary kill process " (current-process-id) " on host " (get-host-name) " to clean up"))) + (apply open-run-close-exception-handling proc idb params)) + (apply open-run-close-no-exception-handling proc idb params))) + +;; (define open-run-close +#;(define open-run-close open-run-close-exception-handling) + ;; open-run-close-no-exception-handling +;; open-run-close-exception-handling) +;;) + +(define (db:initialize-main-db dbdat) + (when (not *configinfo*) + ;; (launch:setup) + (debug:print-error 0 *default-log-port* "db:initialize-main-db called but no *configinfo*. Exiting.") + (exit 1) + ) ;; added because Elena was getting stack dump because *configinfo* below was #f. + (let* ((configdat (car *configinfo*)) ;; tut tut, global warning... + (keys (keys:config-get-fields configdat)) + (havekeys (> (length keys) 0)) + (keystr (keys->keystr keys)) + (fieldstr (keys:make-key/field-string configdat)) + (db (db:dbdat-get-db dbdat))) + (for-each (lambda (key) + (let ((keyn key)) + (if (member (string-downcase keyn) + (list "runname" "state" "status" "owner" "event_time" "comment" "fail_count" + "pass_count" "contour")) + (begin + (print "ERROR: your key cannot be named " keyn " as this conflicts with the same named field in the runs table, you must remove your megatest.db and /.db before trying again.") + (exit 1))))) + keys) + (sqlite3:with-transaction + db + (lambda () + ;; handle-exceptions + ;; exn + ;; (begin + ;; (debug:print 0 "ERROR: Failed to create tables. Look at your [fields] section, should be: fieldname TEXT DEFAULT 'yourdefault'") + ;; (exit)) + (sqlite3:execute db "CREATE TABLE IF NOT EXISTS keys (id INTEGER PRIMARY KEY, fieldname TEXT, fieldtype TEXT, CONSTRAINT keyconstraint UNIQUE (fieldname));") + (for-each (lambda (key) + (sqlite3:execute db "INSERT OR REPLACE INTO keys (fieldname,fieldtype) VALUES (?,?);" key "TEXT")) + keys) + (sqlite3:execute db (conc + "CREATE TABLE IF NOT EXISTS runs (id INTEGER PRIMARY KEY, \n " + fieldstr (if havekeys "," "") " + runname TEXT DEFAULT 'norun', + contour TEXT DEFAULT '', + state TEXT DEFAULT '', + status TEXT DEFAULT '', + owner TEXT DEFAULT '', + event_time TIMESTAMP DEFAULT (strftime('%s','now')), + comment TEXT DEFAULT '', + fail_count INTEGER DEFAULT 0, + pass_count INTEGER DEFAULT 0, + last_update INTEGER DEFAULT (strftime('%s','now')), + CONSTRAINT runsconstraint UNIQUE (runname" (if havekeys "," "") keystr "));")) + (sqlite3:execute db "CREATE TRIGGER IF NOT EXISTS update_runs_trigger AFTER UPDATE ON runs + FOR EACH ROW + BEGIN + UPDATE runs SET last_update=(strftime('%s','now')) + WHERE id=old.id; + END;") + (sqlite3:execute db "CREATE TABLE IF NOT EXISTS run_stats ( + id INTEGER PRIMARY KEY, + run_id INTEGER, + state TEXT, + status TEXT, + count INTEGER, + last_update INTEGER DEFAULT (strftime('%s','now')))") + (sqlite3:execute db "CREATE TRIGGER IF NOT EXISTS update_run_stats_trigger AFTER UPDATE ON run_stats + FOR EACH ROW + BEGIN + UPDATE run_stats SET last_update=(strftime('%s','now')) + WHERE id=old.id; + END;") + (sqlite3:execute db "CREATE TABLE IF NOT EXISTS test_meta ( + id INTEGER PRIMARY KEY, + testname TEXT DEFAULT '', + author TEXT DEFAULT '', + owner TEXT DEFAULT '', + description TEXT DEFAULT '', + reviewed TIMESTAMP, + iterated TEXT DEFAULT '', + avg_runtime REAL, + avg_disk REAL, + tags TEXT DEFAULT '', + jobgroup TEXT DEFAULT 'default', + CONSTRAINT test_meta_constraint UNIQUE (testname));") + (sqlite3:execute db "CREATE TABLE IF NOT EXISTS tasks_queue (id INTEGER PRIMARY KEY, + action TEXT DEFAULT '', + owner TEXT, + state TEXT DEFAULT 'new', + target TEXT DEFAULT '', + name TEXT DEFAULT '', + testpatt TEXT DEFAULT '', + keylock TEXT, + params TEXT, + creation_time TIMESTAMP DEFAULT (strftime('%s','now')), + execution_time TIMESTAMP);") + ;; archive disk areas, cached info from [archive-disks] + (sqlite3:execute db "CREATE TABLE IF NOT EXISTS archive_disks ( + id INTEGER PRIMARY KEY, + archive_area_name TEXT, + disk_path TEXT, + last_df INTEGER DEFAULT -1, + last_df_time TIMESTAMP DEFAULT (strftime('%s','now')), + creation_time TIMESTAMP DEFAULT (strftime('%','now')));") + ;; individual bup (or tar) data chunks + (sqlite3:execute db "CREATE TABLE IF NOT EXISTS archive_blocks ( + id INTEGER PRIMARY KEY, + archive_disk_id INTEGER, + disk_path TEXT, + last_du INTEGER DEFAULT -1, + last_du_time TIMESTAMP DEFAULT (strftime('%s','now')), + creation_time TIMESTAMP DEFAULT (strftime('%','now')));") + ;; tests allocated to what chunks. reusing a chunk for a test/item_path is very efficient + ;; NB// the per run/test recording of where the archive is stored is done in the test + ;; record. + (sqlite3:execute db "CREATE TABLE IF NOT EXISTS archive_allocations ( + id INTEGER PRIMARY KEY, + archive_block_id INTEGER, + testname TEXT, + item_path TEXT, + creation_time TIMESTAMP DEFAULT (strftime('%','now')));") + ;; move this clean up call somewhere else + (sqlite3:execute db "DELETE FROM tasks_queue WHERE state='done' AND creation_time < ?;" (- (current-seconds)(* 24 60 60))) ;; remove older than 24 hrs + (sqlite3:execute db (conc "CREATE INDEX IF NOT EXISTS runs_index ON runs (runname" (if havekeys "," "") keystr ");")) + ;; (sqlite3:execute db "CREATE VIEW runs_tests AS SELECT * FROM runs INNER JOIN tests ON runs.id=tests.run_id;") + (sqlite3:execute db "CREATE TABLE IF NOT EXISTS extradat (id INTEGER PRIMARY KEY, run_id INTEGER, key TEXT, val TEXT);") + (sqlite3:execute db "CREATE TABLE IF NOT EXISTS metadat (id INTEGER PRIMARY KEY, var TEXT, val TEXT, + CONSTRAINT metadat_constraint UNIQUE (var));") + (sqlite3:execute db "CREATE TABLE IF NOT EXISTS access_log (id INTEGER PRIMARY KEY, user TEXT, accessed TIMESTAMP, args TEXT);") + ;; Must do this *after* running patch db !! No more. + ;; cannot use db:set-var since it will deadlock, hardwire the code here + (sqlite3:execute db "INSERT OR REPLACE INTO metadat (var,val) VALUES (?,?);" "MEGATEST_VERSION" (common:version-signature)) + (debug:print-info 11 *default-log-port* "db:initialize END") ;; )))) + + ;;====================================================================== + ;; R U N S P E C I F I C D B + ;;====================================================================== + + ;; (define (db:initialize-run-id-db db) + ;; (sqlite3:with-transaction + ;; db + ;; (lambda () + (sqlite3:execute db "CREATE TABLE IF NOT EXISTS tests + (id INTEGER PRIMARY KEY, + run_id INTEGER DEFAULT -1, + testname TEXT DEFAULT 'noname', + host TEXT DEFAULT 'n/a', + cpuload REAL DEFAULT -1, + diskfree INTEGER DEFAULT -1, + uname TEXT DEFAULT 'n/a', + rundir TEXT DEFAULT '/tmp/badname', + shortdir TEXT DEFAULT '/tmp/badname', + item_path TEXT DEFAULT '', + state TEXT DEFAULT 'NOT_STARTED', + status TEXT DEFAULT 'FAIL', + attemptnum INTEGER DEFAULT 0, + final_logf TEXT DEFAULT 'logs/final.log', + logdat TEXT DEFAULT '', + run_duration INTEGER DEFAULT 0, + comment TEXT DEFAULT '', + event_time TIMESTAMP DEFAULT (strftime('%s','now')), + fail_count INTEGER DEFAULT 0, + pass_count INTEGER DEFAULT 0, + archived INTEGER DEFAULT 0, -- 0=no, > 1=archive block id where test data can be found + last_update INTEGER DEFAULT (strftime('%s','now')), + CONSTRAINT testsconstraint UNIQUE (run_id, testname, item_path));") + ;; deprecated -- (sqlite3:execute db "CREATE INDEX IF NOT EXISTS tests_index ON tests (run_id, testname, item_path, uname);") + + (sqlite3:execute db "CREATE INDEX IF NOT EXISTS tests_run_id_index ON tests (run_id);") ;; new + (sqlite3:execute db "CREATE INDEX IF NOT EXISTS tests_testname_index ON tests (testname,item_path);") ;; new + (sqlite3:execute db "CREATE INDEX IF NOT EXISTS tests_state_status_index ON tests (state, status); ") ;; new + + (sqlite3:execute db "CREATE TRIGGER IF NOT EXISTS update_tests_trigger AFTER UPDATE ON tests + FOR EACH ROW + BEGIN + UPDATE tests SET last_update=(strftime('%s','now')) + WHERE id=old.id; + END;") + (sqlite3:execute db "CREATE TABLE IF NOT EXISTS test_steps + (id INTEGER PRIMARY KEY, + test_id INTEGER, + stepname TEXT, + state TEXT DEFAULT 'NOT_STARTED', + status TEXT DEFAULT 'n/a', + event_time TIMESTAMP, + comment TEXT DEFAULT '', + logfile TEXT DEFAULT '', + last_update INTEGER DEFAULT (strftime('%s','now')), + CONSTRAINT test_steps_constraint UNIQUE (test_id,stepname,state));") + (sqlite3:execute db "CREATE INDEX IF NOT EXISTS teststeps_index ON tests (run_id, testname, item_path);") + (sqlite3:execute db "CREATE TRIGGER IF NOT EXISTS update_teststeps_trigger AFTER UPDATE ON test_steps + FOR EACH ROW + BEGIN + UPDATE test_steps SET last_update=(strftime('%s','now')) + WHERE id=old.id; + END;") + (sqlite3:execute db "CREATE TABLE IF NOT EXISTS test_data (id INTEGER PRIMARY KEY, + test_id INTEGER, + category TEXT DEFAULT '', + variable TEXT, + value REAL, + expected REAL, + tol REAL, + units TEXT, + comment TEXT DEFAULT '', + status TEXT DEFAULT 'n/a', + type TEXT DEFAULT '', + last_update INTEGER DEFAULT (strftime('%s','now')), + CONSTRAINT test_data_constraint UNIQUE (test_id,category,variable));") + (sqlite3:execute db "CREATE INDEX IF NOT EXISTS test_data_index ON test_data (test_id);") + (sqlite3:execute db "CREATE TRIGGER IF NOT EXISTS update_test_data_trigger AFTER UPDATE ON test_data + FOR EACH ROW + BEGIN + UPDATE test_data SET last_update=(strftime('%s','now')) + WHERE id=old.id; + END;") + (sqlite3:execute db "CREATE TABLE IF NOT EXISTS test_rundat ( + id INTEGER PRIMARY KEY, + test_id INTEGER, + update_time TIMESTAMP, + cpuload INTEGER DEFAULT -1, + diskfree INTEGER DEFAULT -1, + diskusage INTGER DEFAULT -1, + run_duration INTEGER DEFAULT 0);") + (sqlite3:execute db "CREATE TABLE IF NOT EXISTS archives ( + id INTEGER PRIMARY KEY, + test_id INTEGER, + state TEXT DEFAULT 'new', + status TEXT DEFAULT 'n/a', + archive_type TEXT DEFAULT 'bup', + du INTEGER, + archive_path TEXT);"))) + db)) ;; ) + +;;====================================================================== +;; A R C H I V E S +;;====================================================================== + +;; dneeded is minimum space needed, scan for existing archives that +;; are on disks with adequate space and already have this test/itempath +;; archived +;; +(define (db:archive-get-allocations dbstruct testname itempath dneeded) + (let* ((dbdat (db:get-db dbstruct)) ;; archive tables are in main.db + (db (db:dbdat-get-db dbdat)) + (res '()) + (blocks '())) ;; a block is an archive chunck that can be added too if there is space + (sqlite3:for-each-row + (lambda (id archive-disk-id disk-path last-du last-du-time) + (set! res (cons (vector id archive-disk-id disk-path last-du last-du-time) res))) + db + "SELECT b.id,b.archive_disk_id,b.disk_path,b.last_du,b.last_du_time FROM archive_blocks AS b + INNER JOIN archive_allocations AS a ON a.archive_block_id=b.id + WHERE a.testname=? AND a.item_path=?;" + testname itempath) + ;; Now res has list of candidate paths, look in archive_disks for candidate with potential free space + (if (null? res) + '() + (sqlite3:for-each-row + (lambda (id archive-area-name disk-path last-df last-df-time) + (set! blocks (cons (vector id archive-area-name disk-path last-df last-df-time) blocks))) + db + (conc + "SELECT d.id,d.archive_area_name,disk_path,last_df,last_df_time FROM archive_disks AS d + INNER JOIN archive_blocks AS b ON d.id=b.archive_disk_id + WHERE b.id IN (" (string-intersperse (map conc res) ",") ") AND + last_df > ?;") + dneeded)) + (stack-push! (dbr:dbstruct-dbstack dbstruct) dbdat) + blocks)) + +;; returns id of the record, register a disk allocated to archiving and record it's last known +;; available space +;; +(define (db:archive-register-disk dbstruct bdisk-name bdisk-path df) + (let* ((dbdat (db:get-db dbstruct)) ;; archive tables are in main.db + (db (db:dbdat-get-db dbdat)) + (res #f)) + (sqlite3:for-each-row + (lambda (id) + (set! res id)) + db + "SELECT id FROM archive_disks WHERE archive_area_name=? AND disk_path=?;" + bdisk-name bdisk-path) + (if res ;; record exists, update df and return id + (begin + (sqlite3:execute db "UPDATE archive_disks SET last_df=?,last_df_time=(strftime('%s','now')) + WHERE archive_area_name=? AND disk_path=?;" + df bdisk-name bdisk-path) + (stack-push! (dbr:dbstruct-dbstack dbstruct) dbdat) + res) + (begin + (sqlite3:execute + db + "INSERT OR REPLACE INTO archive_disks (archive_area_name,disk_path,last_df) + VALUES (?,?,?);" + bdisk-name bdisk-path df) + (stack-push! (dbr:dbstruct-dbstack dbstruct) dbdat) + (db:archive-register-disk dbstruct bdisk-name bdisk-path df))))) + +;; record an archive path created on a given archive disk (identified by it's bdisk-id) +;; if path starts with / then it is full, otherwise it is relative to the archive disk +;; preference is to store the relative path. +;; +(define (db:archive-register-block-name dbstruct bdisk-id archive-path #!key (du #f)) + (let* ((dbdat (db:get-db dbstruct)) ;; archive tables are in main.db + (db (db:dbdat-get-db dbdat)) + (res #f)) + ;; first look to see if this path is already registered + (sqlite3:for-each-row + (lambda (id) + (set! res id)) + db + "SELECT id FROM archive_blocks WHERE archive_disk_id=? AND disk_path=?;" + bdisk-id archive-path) + (if res ;; record exists, update du if applicable and return res + (begin + (if du (sqlite3:execute db "UPDATE archive_blocks SET last_du=?,last_du_time=(strftime('%s','now')) + WHERE archive_disk_id=? AND disk_path=?;" + bdisk-id archive-path du)) + res) + (begin + (sqlite3:execute db "INSERT OR REPLACE INTO archive_blocks (archive_disk_id,disk_path,last_du) + VALUES (?,?,?);" + bdisk-id archive-path (or du 0)) + (db:archive-register-block-name dbstruct bdisk-id archive-path du: du))) + (stack-push! (dbr:dbstruct-dbstack dbstruct) dbdat) + res)) + + +;; The "archived" field in tests is overloaded; 0 = not archived, > 0 archived in block with given id +;; +(define (db:test-set-archive-block-id dbstruct run-id test-id archive-block-id) + (db:with-db + dbstruct + run-id + #f + (lambda (db) + (sqlite3:execute db "UPDATE tests SET archived=? WHERE id=?;" + archive-block-id test-id)))) + +;; Look up the archive block info given a block-id +;; +(define (db:test-get-archive-block-info dbstruct archive-block-id) + (db:with-db + dbstruct + #f + #f + (lambda (db) + (let ((res #f)) + (sqlite3:for-each-row + ;; 0 1 2 3 4 5 + (lambda (id archive-disk-id disk-path last-du last-du-time creation-time) + (set! res (vector id archive-disk-id disk-path last-du last-du-time creation-time))) + db + "SELECT id,archive_disk_id,disk_path,last_du,last_du_time,creation_time FROM archive_blocks WHERE id=?;" + archive-block-id) + res)))) + +;; (define (db:archive-allocate-testsuite/area-to-block block-id testsuite-name areakey) +;; (let* ((dbdat (db:get-db dbstruct #f)) ;; archive tables are in main.db +;; (db (db:dbdat-get-db dbdat)) +;; (res '()) +;; (blocks '())) ;; a block is an archive chunck that can be added too if there is space +;; (sqlite3:for-each-row #f) + +;;====================================================================== +;; L O G G I N G D B +;;====================================================================== + +(define (open-logging-db) + (let* ((dbpath (conc (if *toppath* (conc *toppath* "/") "") "logging.db")) ;; fname) + (dbexists (common:file-exists? dbpath)) + (db (sqlite3:open-database dbpath)) + (handler (sqlite3:make-busy-timeout (if (args:get-arg "-override-timeout") + (string->number (args:get-arg "-override-timeout")) + 136000)))) ;; 136000))) + (sqlite3:set-busy-handler! db handler) + (if (not dbexists) + (begin + (sqlite3:execute db "CREATE TABLE IF NOT EXISTS log (id INTEGER PRIMARY KEY,event_time TIMESTAMP DEFAULT (strftime('%s','now')),logline TEXT,pwd TEXT,cmdline TEXT,pid INTEGER);") + (db:set-sync db) ;; (sqlite3:execute db (conc "PRAGMA synchronous = 0;")) + )) + db)) + +(define (db:log-local-event . loglst) + (let ((logline (apply conc loglst))) + (db:log-event logline))) + +(define (db:log-event logline) + (let ((db (open-logging-db))) + (sqlite3:execute db "INSERT INTO log (logline,pwd,cmdline,pid) VALUES (?,?,?,?);" + logline + (current-directory) + (string-intersperse (argv) " ") + (current-process-id)) + (sqlite3:finalize! db) + logline)) + +;;====================================================================== +;; D B U T I L S +;;====================================================================== + +;;====================================================================== +;; M A I N T E N A N C E +;;====================================================================== + +(define (db:have-incompletes? dbstruct run-id ovr-deadtime) + (let* ((incompleted '()) + (oldlaunched '()) + (toplevels '()) + (deadtime-str (configf:lookup *configdat* "setup" "deadtime")) + (deadtime (if (and deadtime-str + (string->number deadtime-str)) + (string->number deadtime-str) + 72000))) ;; twenty hours + (db:with-db + dbstruct #f #f + (lambda (db) + (if (number? ovr-deadtime)(set! deadtime ovr-deadtime)) + + ;; in RUNNING or REMOTEHOSTSTART for more than 10 minutes + ;; + ;; HOWEVER: this code in run:test seems to work fine + ;; (> (- (current-seconds)(+ (db:test-get-event_time testdat) + ;; (db:test-get-run_duration testdat))) + ;; 600) + ;; (db:delay-if-busy dbdat) + (sqlite3:for-each-row + (lambda (test-id run-dir uname testname item-path) + (if (and (equal? uname "n/a") + (equal? item-path "")) ;; this is a toplevel test + ;; what to do with toplevel? call rollup? + (begin + (set! toplevels (cons (list test-id run-dir uname testname item-path run-id) toplevels)) + (debug:print-info 0 *default-log-port* "Found old toplevel test in RUNNING state, test-id=" test-id)) + (set! incompleted (cons (list test-id run-dir uname testname item-path run-id) incompleted)))) + db + "SELECT id,rundir,uname,testname,item_path FROM tests WHERE run_id=? AND (strftime('%s','now') - event_time) > (run_duration + ?) AND state IN ('RUNNING','REMOTEHOSTSTART');" + run-id deadtime) + + ;; in LAUNCHED for more than one day. Could be long due to job queues TODO/BUG: Need override for this in config + ;; + ;; (db:delay-if-busy dbdat) + (sqlite3:for-each-row + (lambda (test-id run-dir uname testname item-path) + (if (and (equal? uname "n/a") + (equal? item-path "")) ;; this is a toplevel test + ;; what to do with toplevel? call rollup? + (set! toplevels (cons (list test-id run-dir uname testname item-path run-id) toplevels)) + (set! oldlaunched (cons (list test-id run-dir uname testname item-path run-id) oldlaunched)))) + db + "SELECT id,rundir,uname,testname,item_path FROM tests WHERE run_id=? AND (strftime('%s','now') - event_time) > 86400 AND state IN ('LAUNCHED');" + run-id) + + (debug:print-info 18 *default-log-port* "Found " (length oldlaunched) " old LAUNCHED items, " (length toplevels) " old LAUNCHED toplevel tests and " (length incompleted) " tests marked RUNNING but apparently dead.") + (if (and (null? incompleted) + (null? oldlaunched) + (null? toplevels)) + #f + #t))))) + +;; given a launch delay (minimum time from last launch) return amount of time to wait +;; +;; (define (db:launch-delay-left dbstruct run-id launch-delay) + + +;; select end_time-now from +;; (select testname,item_path,event_time+run_duration as +;; end_time,strftime('%s','now') as now from tests where state in +;; ('RUNNING','REMOTEHOSTSTART','LAUNCHED')); + +(define (db:find-and-mark-incomplete dbstruct run-id ovr-deadtime) + (let* ((incompleted '()) + (oldlaunched '()) + (toplevels '()) + (deadtime-trim (or ovr-deadtime (configf:lookup-number *configdat* "setup" "deadtime"))) + (server-start-allowance 200) + (server-overloaded-budget 200) + (launch-monitor-off-time (or (configf:lookup-number *configdat* "setup" "test-stats-update-period") 30)) + (launch-monitor-on-time-budget 30) + (launch-monitor-period (+ launch-monitor-off-time launch-monitor-on-time-budget server-overloaded-budget)) + (remotehoststart-deadtime-default (+ server-start-allowance server-overloaded-budget 30)) + (remotehoststart-deadtime (or deadtime-trim remotehoststart-deadtime-default)) + (running-deadtime-default (+ server-start-allowance (* 2 launch-monitor-period))) + (running-deadtime (or deadtime-trim running-deadtime-default)) ;; two minutes (30 seconds between updates, this leaves 3x grace period) + ) + (db:with-db + dbstruct #f #f + (lambda (db) + ;; in RUNNING or REMOTEHOSTSTART for more than 10 minutes + ;; + ;; HOWEVER: this code in run:test seems to work fine + ;; (> (- (current-seconds)(+ (db:test-get-event_time testdat) + ;; (db:test-get-run_duration testdat))) + ;; 600) + ;; (db:delay-if-busy dbdat) + (sqlite3:for-each-row + (lambda (test-id run-dir uname testname item-path event-time run-duration) + (if (and (equal? uname "n/a") + (equal? item-path "")) ;; this is a toplevel test + ;; what to do with toplevel? call rollup? + (begin + (set! toplevels (cons (list test-id run-dir uname testname item-path run-id) toplevels)) + (debug:print-info 0 *default-log-port* "Found old toplevel test in RUNNING state, test-id=" test-id)) + (begin + (set! incompleted (cons (list test-id run-dir uname testname item-path run-id) incompleted)) + (debug:print-info 0 *default-log-port* "Found old test in RUNNING state, test-id=" test-id" exceeded running-deadtime "running-deadtime" now="(current-seconds)" event-time="event-time" run-duration="run-duration)))) + db + "SELECT id,rundir,uname,testname,item_path,event_time,run_duration FROM tests WHERE run_id=? AND (strftime('%s','now') - event_time) > (run_duration + ?) AND state IN ('RUNNING');" + run-id running-deadtime) + + + (sqlite3:for-each-row + (lambda (test-id run-dir uname testname item-path event-time run-duration) + (if (and (equal? uname "n/a") + (equal? item-path "")) ;; this is a toplevel test + ;; what to do with toplevel? call rollup? + (begin + (set! toplevels (cons (list test-id run-dir uname testname item-path run-id) toplevels)) + (debug:print-info 0 *default-log-port* "Found old toplevel test in RUNNING state, test-id=" test-id)) + (begin + (debug:print-info 0 *default-log-port* "Found old test in REMOTEHOSTSTART state, test-id=" test-id" exceeded running-deadtime "running-deadtime" now="(current-seconds)" event-time="event-time" run-duration="run-duration) + (set! incompleted (cons (list test-id run-dir uname testname item-path run-id) incompleted))))) + db + "SELECT id,rundir,uname,testname,item_path,event_time,run_duration FROM tests WHERE run_id=? AND (strftime('%s','now') - event_time) > (run_duration + ?) AND state IN ('REMOTEHOSTSTART');" + run-id remotehoststart-deadtime) + + ;; in LAUNCHED for more than one day. Could be long due to job queues TODO/BUG: Need override for this in config + ;; + ;; (db:delay-if-busy dbdat) + (sqlite3:for-each-row + (lambda (test-id run-dir uname testname item-path) + (if (and (equal? uname "n/a") + (equal? item-path "")) ;; this is a toplevel test + ;; what to do with toplevel? call rollup? + (set! toplevels (cons (list test-id run-dir uname testname item-path run-id) toplevels)) + (begin + (debug:print-info 0 *default-log-port* "Found old test in LAUNCHED state, test-id=" test-id" 1 day since event_time marked") + (set! oldlaunched (cons (list test-id run-dir uname testname item-path run-id) oldlaunched))))) + db + "SELECT id,rundir,uname,testname,item_path FROM tests WHERE run_id=? AND (strftime('%s','now') - event_time) > 86400 AND state IN ('LAUNCHED');" + run-id) + + (debug:print-info 18 *default-log-port* "Found " (length oldlaunched) " old LAUNCHED items, " (length toplevels) " old LAUNCHED toplevel tests and " (length incompleted) " tests marked RUNNING but apparently dead.") + + ;; These are defunct tests, do not do all the overhead of set-state-status. Force them to INCOMPLETE. + ;; + ;; (db:delay-if-busy dbdat) + (let* (;; (min-incompleted (filter (lambda (x) + ;; (let* ((testpath (cadr x)) + ;; (tdatpath (conc testpath "/testdat.db")) + ;; (dbexists (common:file-exists? tdatpath))) + ;; (or (not dbexists) ;; if no file then something wrong - mark as incomplete + ;; (> (- (current-seconds)(file-modification-time tdatpath)) 600)))) ;; no change in 10 minutes to testdat.db - she's dead Jim + ;; incompleted)) + (min-incompleted-ids (map car incompleted)) ;; do 'em all + (all-ids (append min-incompleted-ids (map car oldlaunched)))) + (if (> (length all-ids) 0) + (begin + (debug:print 0 *default-log-port* "WARNING: Marking test(s); " (string-intersperse (map conc all-ids) ", ") " as DEAD") + (for-each + (lambda (test-id) + (db:set-state-status-and-roll-up-items dbstruct run-id test-id 'foo "COMPLETED" "DEAD" "Test stopped responding while in RUNNING or REMOTEHOSTSTART; presumed dead.")) + ;;(db:test-set-state-status dbstruct run-id test-id "COMPLETED" "DEAD" "Test stopped responding while in RUNNING or REMOTEHOSTSTART; presumed dead.")) ;; fix for one aspect of Randy's ticket 1405717332 ;; TODO - fix problem where test goes to COMPLETED/DEAD while in progress, only later to go to COMPLETED/PASS. ref ticket 220546828 + all-ids)))))))) + +;; ALL REPLACED BY THE BLOCK ABOVE +;; +;; (sqlite3:execute +;; db +;; (conc "UPDATE tests SET state='INCOMPLETE' WHERE run_id=? AND id IN (" +;; (string-intersperse (map conc all-ids) ",") +;; ");") +;; run-id)))) +;; +;; ;; Now do rollups for the toplevel tests +;; ;; +;; ;; (db:delay-if-busy dbdat) +;; (for-each +;; (lambda (toptest) +;; (let ((test-name (list-ref toptest 3))) +;; ;; (run-id (list-ref toptest 5))) +;; (db:top-test-set-per-pf-counts dbstruct run-id test-name))) +;; toplevels))) + +;; BUG: Probably broken - does not explicitly use run-id in the query +;; +(define (db:top-test-set-per-pf-counts dbstruct run-id test-name) + (db:general-call dbstruct 'top-test-set-per-pf-counts (list test-name test-name test-name test-name test-name test-name test-name test-name test-name test-name test-name test-name test-name test-name test-name test-name test-name))) + +;; Clean out old junk and vacuum the database +;; +;; Ultimately do something like this: +;; +;; 1. Look at test records either deleted or part of deleted run: +;; a. If test dir exists, set the the test to state='UNKNOWN', Set the run to 'unknown' +;; b. If test dir gone, delete the test record +;; 2. Look at run records +;; a. If have tests that are not deleted, set state='unknown' +;; b. .... +;; +(define (db:clean-up dbdat) + ;; (debug:print 0 *default-log-port* "WARNING: db clean up not fully ported to v1.60, cleanup action will be on megatest.db") + (let* ((keep-record-age ( - (current-seconds) (common:hms-string->seconds (or (configf:lookup *configdat* "setup" "delete-record-age") "30d")))) + (db (db:dbdat-get-db dbdat)) + (count-stmt (sqlite3:prepare db "SELECT (SELECT count(id) FROM tests)+(SELECT count(id) FROM runs);")) + (statements + (map (lambda (stmt) + (sqlite3:prepare db stmt)) + (list + ;; delete all tests that belong to runs that are 'deleted' + (conc "DELETE FROM tests WHERE run_id in (SELECT id FROM runs WHERE state='deleted') and last_update < " keep-record-age ";") + ;; delete all tests that are 'DELETED' + (conc "DELETE FROM tests WHERE state='DELETED' and last_update < " keep-record-age " ;") + ;; delete all tests that have no run + (conc "DELETE FROM tests WHERE run_id NOT IN (SELECT DISTINCT id FROM runs) and last_update < " keep-record-age "; ") + ;; delete all runs that are state='deleted' + (conc "DELETE FROM runs WHERE state='deleted' and last_update < " keep-record-age ";") + ;; delete empty runs + (conc "DELETE FROM runs WHERE id NOT IN (SELECT DISTINCT r.id FROM runs AS r INNER JOIN tests AS t ON t.run_id=r.id) and last_update < " keep-record-age ";") + ;; remove orphaned test_rundat entries + (conc "DELETE FROM test_rundat where test_id NOT IN (SELECT id FROM tests);") + ;; remove orphaned test_steps entries + (conc "DELETE FROM test_steps WHERE test_id NOT IN (SELECT id FROM tests);") + ;; remove orphaned test_dat entries + (conc "DELETE FROM test_data WHERE test_id NOT IN (SELECT id FROM tests);") + + )))) + ;; (db:delay-if-busy dbdat) + ;(debug:print-info 0 *default-log-port* statements) + (sqlite3:with-transaction + db + (lambda () + (sqlite3:for-each-row (lambda (tot) + (debug:print-info 0 *default-log-port* "Records count before clean: " tot)) + count-stmt) + (map sqlite3:execute statements) + (sqlite3:for-each-row (lambda (tot) + (debug:print-info 0 *default-log-port* "Records count after clean: " tot)) + count-stmt))) + (map sqlite3:finalize! statements) + (sqlite3:finalize! count-stmt) + ;; (db:find-and-mark-incomplete db) + ;; (db:delay-if-busy dbdat) + (sqlite3:execute db "VACUUM;"))) + +;; Clean out old junk and vacuum the database +;; +;; Ultimately do something like this: +;; +;; 1. Look at test records either deleted or part of deleted run: +;; a. If test dir exists, set the the test to state='UNKNOWN', Set the run to 'unknown' +;; b. If test dir gone, delete the test record +;; 2. Look at run records +;; a. If have tests that are not deleted, set state='unknown' +;; b. .... +;; +(define (db:clean-up-rundb dbdat) + ;; (debug:print 0 *default-log-port* "WARNING: db clean up not fully ported to v1.60, cleanup action will be on megatest.db") + (let* ((db (db:dbdat-get-db dbdat)) + (count-stmt (sqlite3:prepare db "SELECT (SELECT count(id) FROM tests);")) + (statements + (map (lambda (stmt) + (sqlite3:prepare db stmt)) + (list + ;; delete all tests that belong to runs that are 'deleted' + ;; (conc "DELETE FROM tests WHERE run_id NOT IN (" (string-intersperse (map conc valid-runs) ",") ");") + ;; delete all tests that are 'DELETED' + "DELETE FROM tests WHERE state='DELETED';" + )))) + ;; (db:delay-if-busy dbdat) + (sqlite3:with-transaction + db + (lambda () + (sqlite3:for-each-row (lambda (tot) + (debug:print-info 0 *default-log-port* "Records count before clean: " tot)) + count-stmt) + (map sqlite3:execute statements) + (sqlite3:for-each-row (lambda (tot) + (debug:print-info 0 *default-log-port* "Records count after clean: " tot)) + count-stmt))) + (map sqlite3:finalize! statements) + (sqlite3:finalize! count-stmt) + ;; (db:find-and-mark-incomplete db) + ;; (db:delay-if-busy dbdat) + (sqlite3:execute db "VACUUM;"))) + +;; Clean out old junk and vacuum the database +;; +;; Ultimately do something like this: +;; +;; 1. Look at test records either deleted or part of deleted run: +;; a. If test dir exists, set the the test to state='UNKNOWN', Set the run to 'unknown' +;; b. If test dir gone, delete the test record +;; 2. Look at run records +;; a. If have tests that are not deleted, set state='unknown' +;; b. .... +;; +(define (db:clean-up-maindb dbdat) + ;; (debug:print 0 *default-log-port* "WARNING: db clean up not fully ported to v1.60, cleanup action will be on megatest.db") + (let* ((db (db:dbdat-get-db dbdat)) + (count-stmt (sqlite3:prepare db "SELECT (SELECT count(id) FROM runs);")) + (statements + (map (lambda (stmt) + (sqlite3:prepare db stmt)) + (list + ;; delete all tests that belong to runs that are 'deleted' + ;; (conc "DELETE FROM tests WHERE run_id NOT IN (" (string-intersperse (map conc valid-runs) ",") ");") + ;; delete all tests that are 'DELETED' + "DELETE FROM runs WHERE state='deleted';" + ))) + (dead-runs '())) + (sqlite3:for-each-row + (lambda (run-id) + (set! dead-runs (cons run-id dead-runs))) + db + "SELECT id FROM runs WHERE state='deleted';") + ;; (db:delay-if-busy dbdat) + (sqlite3:with-transaction + db + (lambda () + (sqlite3:for-each-row (lambda (tot) + (debug:print-info 0 *default-log-port* "Records count before clean: " tot)) + count-stmt) + (map sqlite3:execute statements) + (sqlite3:for-each-row (lambda (tot) + (debug:print-info 0 *default-log-port* "Records count after clean: " tot)) + count-stmt))) + (map sqlite3:finalize! statements) + (sqlite3:finalize! count-stmt) + ;; (db:find-and-mark-incomplete db) + ;; (db:delay-if-busy dbdat) + (sqlite3:execute db "VACUUM;") + dead-runs)) + +;;====================================================================== +;; M E T A G E T A N D S E T V A R S +;;====================================================================== + +;; returns number if string->number is successful, string otherwise +;; also updates *global-delta* +;; +(define (db:get-var dbstruct var) + (let* ((res #f)) + (db:with-db + dbstruct #f #f + (lambda (db) + (sqlite3:for-each-row + (lambda (val) + (set! res val)) + db + "SELECT val FROM metadat WHERE var=?;" var) + ;; convert to number if can + (if (string? res) + (let ((valnum (string->number res))) + (if valnum (set! res valnum)))) + res)))) + +;; This was part of db:get-var. It was used to estimate the load on +;; the database files. +;; +;; scale by 10, average with current value. +;; (set! *global-delta* (/ (+ *global-delta* (* (- (current-milliseconds) start-ms) +;; (if throttle throttle 0.01))) +;; 2)) +;; (if (> (abs (- *last-global-delta-printed* *global-delta*)) 0.08) ;; don't print all the time, only if it changes a bit +;; (begin +;; (debug:print-info 4 *default-log-port* "launch throttle factor=" *global-delta*) +;; (set! *last-global-delta-printed* *global-delta*))) + +(define (db:set-var dbstruct var val) + (db:with-db dbstruct #f #t + (lambda (db) + (sqlite3:execute db "INSERT OR REPLACE INTO metadat (var,val) VALUES (?,?);" var val)))) + +(define (db:del-var dbstruct var) + (db:with-db dbstruct #f #t + (lambda (db) + (sqlite3:execute db "DELETE FROM metadat WHERE var=?;" var)))) + +;;====================================================================== +;; no-sync.db - small bits of data to be shared between servers +;;====================================================================== + +(define (db:open-no-sync-db) + (let* ((dbpath (db:dbfile-path)) + (dbname (conc dbpath "/no-sync.db")) + (db-exists (common:file-exists? dbname)) + (db (sqlite3:open-database dbname))) + (sqlite3:set-busy-handler! db (sqlite3:make-busy-timeout 136000)) + (if (not db-exists) + (begin + (sqlite3:execute db "PRAGMA synchronous = 0;") + (sqlite3:execute db "CREATE TABLE IF NOT EXISTS no_sync_metadat (var TEXT,val TEXT, CONSTRAINT no_sync_metadat_constraint UNIQUE (var));") + (sqlite3:execute db "PRAGMA journal_mode=WAL;"))) + db)) + +;; if we are not a server create a db handle. this is not finalized +;; so watch for problems. I'm still not clear if it is needed to manually +;; finalize sqlite3 dbs with the sqlite3 egg. +;; +(define (db:no-sync-db db-in) + (mutex-lock! *db-access-mutex*) + (let ((res (if db-in + db-in + (let ((db (db:open-no-sync-db))) + (set! *no-sync-db* db) + db)))) + (mutex-unlock! *db-access-mutex*) + res)) + +(define (db:no-sync-set db var val) + (sqlite3:execute (db:no-sync-db db) "INSERT OR REPLACE INTO no_sync_metadat (var,val) VALUES (?,?);" var val)) + +(define (db:no-sync-del! db var) + (sqlite3:execute (db:no-sync-db db) "DELETE FROM no_sync_metadat WHERE var=?;" var)) + +(define (db:no-sync-get/default db var default) + (let ((res default)) + (sqlite3:for-each-row + (lambda (val) + (set! res val)) + (db:no-sync-db db) + "SELECT val FROM no_sync_metadat WHERE var=?;" + var) + (if res + (let ((newres (if (string? res) + (string->number res) + #f))) + (if newres + newres + res)) + res))) + +(define (db:no-sync-close-db db) + (db:safely-close-sqlite3-db db)) + +;; transaction protected lock aquisition +;; either: +;; fails returns (#f . lock-creation-time) +;; succeeds (returns (#t . lock-creation-time) +;; use (db:no-sync-del! db keyname) to release the lock +;; +(define (db:no-sync-get-lock db-in keyname) + (let ((db (db:no-sync-db db-in))) + (sqlite3:with-transaction + db + (lambda () + (handle-exceptions + exn + (let ((lock-time (current-seconds))) + (sqlite3:execute db "INSERT INTO no_sync_metadat (var,val) VALUES(?,?);" keyname lock-time) + `(#t . ,lock-time)) + `(#f . ,(sqlite3:first-result db "SELECT val FROM no_sync_metadat WHERE var=?;" keyname))))))) + + + +;; use a global for some primitive caching, it is just silly to +;; re-read the db over and over again for the keys since they never +;; change + +;; why get the keys from the db? why not get from the *configdat* +;; using keys:config-get-fields? + +(define (db:get-keys dbstruct) + ;; (if *db-keys* *db-keys* + (let ((res '())) + (db:with-db dbstruct #f #f + (lambda (db) + (sqlite3:for-each-row + (lambda (key) + (set! res (cons key res))) + db + "SELECT fieldname FROM keys ORDER BY id DESC;"))) + ;; (set! *db-keys* res) + res)) ;; ) + +;; look up values in a header/data structure +(define (db:get-value-by-header row header field) + (if (or (null? header) (not row)) + #f + (let loop ((hed (car header)) + (tal (cdr header)) + (n 0)) + (if (equal? hed field) + (handle-exceptions + exn + (begin + (debug:print 0 *default-log-port* "WARNING: attempt to read non-existant field, row=" row " header=" header " field=" field) + #f) + (vector-ref row n)) + (if (null? tal) #f (loop (car tal)(cdr tal)(+ n 1))))))) + +;; Accessors for the header/data structure +;; get rows and header from +(define (db:get-header vec)(vector-ref vec 0)) +(define (db:get-rows vec)(vector-ref vec 1)) + +;;====================================================================== +;; R U N S +;;====================================================================== + + + + + +(define (db:get-run-times dbstruct run-patt target-patt) +(let ((res `()) + (qry (conc "select runname, (max(end_time)-min(event_time))/60 as runtime, target from (select runname, run_id,tests.event_time,tests.event_time+run_duration AS end_time, " (string-join (db:get-keys dbstruct) " || '/' || ") " as target from tests inner join runs on tests.run_id = runs.id where runs.runname like ? and target like ?) group by run_id ;"))) +;(print qry) +(db:with-db + dbstruct + #f ;; this is for the main runs db + #f ;; does not modify db + (lambda (db) + (sqlite3:for-each-row + (lambda (runname runtime target ) + (set! res (cons (vector runname runtime target) res))) + db + qry + run-patt target-patt) + + res)))) + + + +(define (db:get-run-name-from-id dbstruct run-id) + (db:with-db + dbstruct + #f ;; this is for the main runs db + #f ;; does not modify db + (lambda (db) + (let ((res #f)) + (sqlite3:for-each-row + (lambda (runname) + (set! res runname)) + db + "SELECT runname FROM runs WHERE id=?;" + run-id) + res)))) + +(define (db:get-run-key-val dbstruct run-id key) + (db:with-db + dbstruct + #f + #f + (lambda (db) + (let ((res #f)) + (sqlite3:for-each-row + (lambda (val) + (set! res val)) + db + (conc "SELECT " key " FROM runs WHERE id=?;") + run-id) + res)))) + +;; register a test run with the db, this accesses the main.db and does NOT +;; use server api +;; +(define (db:register-run dbstruct keyvals runname state status user contour-in) + (let* ((keys (map car keyvals)) + (keystr (keys->keystr keys)) + (contour (or contour-in "")) ;; empty string to force no hierarcy and be backwards compatible. + (comma (if (> (length keys) 0) "," "")) + (andstr (if (> (length keys) 0) " AND " "")) + (valslots (keys->valslots keys)) ;; ?,?,? ... + (allvals (append (list runname state status user contour) (map cadr keyvals))) + (qryvals (append (list runname) (map cadr keyvals))) + (key=?str (string-intersperse (map (lambda (k)(conc k "=?")) keys) " AND "))) + (debug:print 3 *default-log-port* "keys: " keys " allvals: " allvals " keyvals: " keyvals " key=?str is " key=?str) + (debug:print 2 *default-log-port* "NOTE: using target " (string-intersperse (map cadr keyvals) "/") " for this run") + (if (and runname (null? (filter (lambda (x)(not x)) keyvals))) ;; there must be a better way to "apply and" + (db:with-db + dbstruct #f #f + (lambda (db) + (let ((res #f)) + (apply sqlite3:execute db (conc "INSERT OR IGNORE INTO runs (runname,state,status,owner,event_time,contour" comma keystr ") VALUES (?,?,?,?,strftime('%s','now'),?" comma valslots ");") + allvals) + (apply sqlite3:for-each-row + (lambda (id) + (set! res id)) + db + (let ((qry (conc "SELECT id FROM runs WHERE (runname=? " andstr key=?str ");"))) + qry) + qryvals) + (sqlite3:execute db "UPDATE runs SET state=?,status=?,event_time=strftime('%s','now') WHERE id=? AND state='deleted';" state status res) + res))) + (begin + (debug:print-error 0 *default-log-port* "Called without all necessary keys") + #f)))) + +;; replace header and keystr with a call to runs:get-std-run-fields +;; +;; keypatts: ( (KEY1 "abc%def")(KEY2 "%") ) +;; runpatts: patt1,patt2 ... +;; +(define (db:get-runs dbstruct runpatt count offset keypatts) + (let* ((res '()) + (keys (db:get-keys dbstruct)) + (runpattstr (db:patt->like "runname" runpatt)) + (remfields (list "id" "runname" "state" "status" "owner" "event_time")) + (header (append keys remfields)) + (keystr (conc (keys->keystr keys) "," + (string-intersperse remfields ","))) + (qrystr (conc "SELECT " keystr " FROM runs WHERE (" runpattstr ") " ;; runname LIKE ? " + ;; Generate: " AND x LIKE 'keypatt' ..." + (if (null? keypatts) "" + (conc " AND " + (string-join + (map (lambda (keypatt) + (let ((key (car keypatt)) + (patt (cadr keypatt))) + (db:patt->like key patt))) + keypatts) + " AND "))) + " AND state != 'deleted' ORDER BY event_time DESC " + (if (number? count) + (conc " LIMIT " count) + "") + (if (number? offset) + (conc " OFFSET " offset) + "")))) + (debug:print-info 11 *default-log-port* "db:get-runs START qrystr: " qrystr " keypatts: " keypatts " offset: " offset " limit: " count) + (db:with-db dbstruct #f #f + (lambda (db) + (sqlite3:for-each-row + (lambda (a . x) + (set! res (cons (apply vector a x) res))) + db + qrystr + ))) + (debug:print-info 11 *default-log-port* "db:get-runs END qrystr: " qrystr " keypatts: " keypatts " offset: " offset " limit: " count) + (vector header res))) + + +(define-record simple-run target id runname state status owner event_time) +(define-record-printer (simple-run x out) + (fprintf out "#,(simple-run ~S ~S ~S ~S)" + (simple-run-target x) (simple-run-id x) (simple-run-runname x) (time->string (seconds->local-time (simple-run-event_time x) )))) + +;; simple get-runs +;; +(define (db:simple-get-runs dbstruct runpatt count offset target) + (let* ((res '()) + (keys (db:get-keys dbstruct)) + (runpattstr (db:patt->like "runname" runpatt)) + (remfields (list "id" "runname" "state" "status" "owner" "event_time")) + (targstr (string-intersperse keys "||'/'||")) + (keystr (conc targstr " AS target," + (string-intersperse remfields ","))) + (qrystr (conc "SELECT " keystr " FROM runs WHERE (" runpattstr ") " ;; runname LIKE ? " + ;; Generate: " AND x LIKE 'keypatt' ..." + " AND target LIKE '" target "'" + " AND state != 'deleted' ORDER BY event_time DESC " + (if (number? count) + (conc " LIMIT " count) + "") + (if (number? offset) + (conc " OFFSET " offset) + "")))) + (debug:print-info 11 *default-log-port* "db:get-runs START qrystr: " qrystr " target: " target " offset: " offset " limit: " count) + (db:with-db dbstruct #f #f + (lambda (db) + (sqlite3:for-each-row + (lambda (target id runname state status owner event_time) + (set! res (cons (make-simple-run target id runname state status owner event_time) res))) + db + qrystr + ))) + (debug:print-info 11 *default-log-port* "db:get-runs END qrystr: " qrystr " target: " target " offset: " offset " limit: " count) + res)) + +;; TODO: Switch this to use max(update_time) from each run db? Then if using a server there is no disk traffic (using inmem db) +;; +(define (db:get-changed-run-ids since-time) + (let* ((dbdir (db:dbfile-path)) ;; (configf:lookup *configdat* "setup" "dbdir")) + (alldbs (glob (conc dbdir "/[0-9]*.db"))) + (changed (filter (lambda (dbfile) + (> (file-modification-time dbfile) since-time)) + alldbs))) + (delete-duplicates + (map (lambda (dbfile) + (let* ((res (string-match ".*\\/(\\d)*\\.db" dbfile))) + (if res + (string->number (cadr res)) + (begin + (debug:print 2 *default-log-port* "WARNING: Failed to process " dbfile " for run-id") + 0)))) + changed)))) + +;; Get all targets from the db +;; +(define (db:get-targets dbstruct) + (let* ((res '()) + (keys (db:get-keys dbstruct)) + (header keys) ;; (map key:get-fieldname keys)) + (keystr (keys->keystr keys)) + (qrystr (conc "SELECT " keystr " FROM runs WHERE state != 'deleted';")) + (seen (make-hash-table))) + (db:with-db + dbstruct + #f + #f + (lambda (db) + (sqlite3:for-each-row + (lambda (a . x) + (let ((targ (cons a x))) + (if (not (hash-table-ref/default seen targ #f)) + (begin + (hash-table-set! seen targ #t) + (set! res (cons (apply vector targ) res)))))) + db + qrystr) + (debug:print-info 11 *default-log-port* "db:get-targets END qrystr: " qrystr ) + (vector header res))))) + +;; just get count of runs +(define (db:get-num-runs dbstruct runpatt) + (db:with-db + dbstruct + #f + #f + (lambda (db) + (let ((numruns 0)) + (debug:print-info 11 *default-log-port* "db:get-num-runs START " runpatt) + (sqlite3:for-each-row + (lambda (count) + (set! numruns count)) + db + "SELECT COUNT(id) FROM runs WHERE runname LIKE ? AND state != 'deleted';" runpatt) + (debug:print-info 11 *default-log-port* "db:get-num-runs END " runpatt) + numruns)))) + +;; just get count of runs +(define (db:get-runs-cnt-by-patt dbstruct runpatt targetpatt keys) + (db:with-db + dbstruct + #f + #f + (lambda (db) + (let ((numruns 0) + (qry-str #f) + (key-patt "") + (keyvals (if targetpatt (keys:target->keyval keys targetpatt) '()))) + + (for-each (lambda (keyval) + (let* ((key (car keyval)) + (patt (cadr keyval)) + (fulkey (conc ":" key)) + (wildtype (if (substring-index "%" patt) "like" "glob"))) + + (if patt + (set! key-patt (conc key-patt " AND " key " " wildtype " '" patt "'")) + (begin + (debug:print-error 0 *default-log-port* "searching for runs with no pattern set for " fulkey) + (exit 6))))) + keyvals) + ;(print runpatt " -- " key-patt) + (set! qry-str (conc "SELECT COUNT(id) FROM runs WHERE state != 'deleted' AND runname like '" runpatt "'" key-patt)) + ;(print qry-str ) + + (sqlite3:for-each-row + (lambda (count) + (set! numruns count)) + db + qry-str) + (debug:print-info 11 *default-log-port* "db:get-num-runs END " runpatt) + numruns)))) + + +;; (sqlite3#fold-row proc3670 init3671 db-or-stmt3672 . params3673)> +;; +(define (db:get-raw-run-stats dbstruct run-id) + (db:with-db + dbstruct + run-id + #f + (lambda (db) + (sqlite3:fold-row + (lambda (res state status count) + (cons (list state status count) res)) + '() + db + "SELECT state,status,count(id) AS count FROM tests WHERE run_id=? AND NOT(uname='n/a' AND item_path='') GROUP BY state,status;" + run-id)))) + +;; Update run_stats for given run_id +;; input data is a list (state status count) +;; +(define (db:update-run-stats dbstruct run-id stats) + ;; (mutex-lock! *db-transaction-mutex*) + (db:with-db + dbstruct + #f + #f + + (lambda (db) + ;; remove previous data + + (let* ((stmt1 (sqlite3:prepare db "DELETE FROM run_stats WHERE run_id=? AND state=? AND status=?;")) + (stmt2 (sqlite3:prepare db "INSERT INTO run_stats (run_id,state,status,count) VALUES (?,?,?,?);")) + (res + (sqlite3:with-transaction + db + (lambda () + (for-each + (lambda (dat) + (sqlite3:execute stmt1 run-id (car dat)(cadr dat)) + (apply sqlite3:execute stmt2 run-id dat)) + stats))))) + (sqlite3:finalize! stmt1) + (sqlite3:finalize! stmt2) + ;; (mutex-unlock! *db-transaction-mutex*) + res)))) + +(define (db:get-main-run-stats dbstruct run-id) + (db:with-db + dbstruct + #f ;; this data comes from main + #f + (lambda (db) + (sqlite3:fold-row + (lambda (res state status count) + (cons (list state status count) res)) + '() + db + "SELECT state,status,count FROM run_stats WHERE run_id=? AND run_id IN (SELECT id FROM runs WHERE state NOT IN ('DELETED','deleted'));" + run-id)))) + +(define (db:print-current-query-stats) + ;; generate stats from *db-api-call-time* + (let ((ordered-keys (sort (hash-table-keys *db-api-call-time*) + (lambda (a b) + (let ((sum-a (common:sum (hash-table-ref *db-api-call-time* a))) + (sum-b (common:sum (hash-table-ref *db-api-call-time* b)))) + (> sum-a sum-b))))) + (total 0)) + (for-each + (lambda (cmd-key) + (let* ((dat (hash-table-ref *db-api-call-time* cmd-key)) + (num (length dat)) + (avg (if (> num 0) + (/ (common:sum dat)(length dat))))) + (set! total (+ total num)) + (debug:print-info 0 *default-log-port* cmd-key "\tavg: " avg " max: " (common:max dat) " min: " (common:min-max < dat) " num: " (length dat)))) + ordered-keys) + (debug:print-info 0 *default-log-port* "TOTAL: " total " api calls since start."))) + +(define (db:get-all-run-ids dbstruct) + (db:with-db + dbstruct + #f + #f + (lambda (db) + (let ((run-ids '())) + (sqlite3:for-each-row + (lambda (run-id) + (set! run-ids (cons run-id run-ids))) + db + "SELECT id FROM runs WHERE state != 'deleted' ORDER BY event_time DESC;") + (reverse run-ids))))) + +;; get some basic run stats +;; +;; data structure: +;; +;; ( (runname (( state count ) ... )) +;; ( ... +;; +(define (db:get-run-stats dbstruct) + (let* ((totals (make-hash-table)) + (curr (make-hash-table)) + (res '()) + (runs-info '())) + ;; First get all the runname/run-ids + (db:with-db + dbstruct #f #f + (lambda (db) + (sqlite3:for-each-row + (lambda (run-id runname) + (set! runs-info (cons (list run-id runname) runs-info))) + db + "SELECT id,runname FROM runs WHERE state != 'deleted' ORDER BY event_time DESC;"))) ;; If you change this to the more logical ASC please adjust calls to db:get-run-stats + ;; for each run get stats data + (for-each + (lambda (run-info) + ;; get the net state/status counts for this run + (let* ((run-id (car run-info)) + (run-name (cadr run-info))) + (db:with-db + dbstruct + run-id + #f + (lambda (db) + (sqlite3:for-each-row + (lambda (state status count) + (let ((netstate (if (equal? state "COMPLETED") status state))) + (if (string? netstate) + (begin + (hash-table-set! totals netstate (+ (hash-table-ref/default totals netstate 0) count)) + (hash-table-set! curr netstate (+ (hash-table-ref/default curr netstate 0) count)))))) + db + "SELECT state,status,count(id) FROM tests AS t WHERE run_id=? GROUP BY state,status ORDER BY state,status DESC;" + run-id) + ;; add the per run counts to res + (for-each (lambda (state) + (set! res (cons (list run-name state (hash-table-ref curr state)) res))) + (sort (hash-table-keys curr) string>=)) + (set! curr (make-hash-table)))))) + runs-info) + (for-each (lambda (state) + (set! res (cons (list "Totals" state (hash-table-ref totals state)) res))) + (sort (hash-table-keys totals) string>=)) + res)) + +;; db:get-runs-by-patt +;; get runs by list of criteria +;; register a test run with the db +;; +;; Use: (db:get-value-by-header (db:get-header runinfo)(db:get-rows runinfo)) +;; to extract info from the structure returned +;; +(define (db:get-runs-by-patt dbstruct keys runnamepatt targpatt offset limit fields last-update sort-order ) ;; test-name) + (let* ((tmp (runs:get-std-run-fields keys (or fields '("id" "runname" "state" "status" "owner" "event_time")))) + (keystr (car tmp)) + (header (cadr tmp)) + (key-patt "") + (runwildtype (if (substring-index "%" (or runnamepatt "%")) "like" "glob")) + (qry-str #f) + (keyvals (if targpatt (keys:target->keyval keys targpatt) '()))) + (for-each (lambda (keyval) + (let* ((key (car keyval)) + (patt (cadr keyval)) + (fulkey (conc ":" key)) + (wildtype (if (substring-index "%" patt) "like" "glob"))) + (if patt + (set! key-patt (conc key-patt " AND " key " " wildtype " '" patt "'")) + (begin + (debug:print-error 0 *default-log-port* "searching for runs with no pattern set for " fulkey) + (exit 6))))) + keyvals) + (set! qry-str (conc "SELECT " keystr " FROM runs WHERE state != 'deleted' AND runname " runwildtype " ? " key-patt + (if last-update + (conc " AND last_update >= " last-update " ") + " ") + " ORDER BY event_time " sort-order " " + (if limit (conc " LIMIT " limit) "") + (if offset (conc " OFFSET " offset) "") + ";")) + (debug:print-info 4 *default-log-port* "runs:get-runs-by-patt qry=" qry-str " " runnamepatt) + ;(print "runs:get-runs-by-patt qry=" qry-str " " runnamepatt) + + (vector header + (reverse + (db:with-db dbstruct #f #f ;; reads db, does not write to it. + (lambda (db) + (sqlite3:fold-row + (lambda (res . r) + (cons (list->vector r) res)) + '() + db + qry-str + runnamepatt))))))) + +;; use (get-value-by-header (db:get-header runinfo)(db:get-rows runinfo)) +;; NOTE: Does NOT return a list of rows (or one row) for the first slot of the vector +;; this is inconsistent with get-runs but it makes some sense. +;; +(define (db:get-run-info dbstruct run-id) + ;;(if (hash-table-ref/default *run-info-cache* run-id #f) + ;; (hash-table-ref *run-info-cache* run-id) + (let* ((res (vector #f #f #f #f)) + (keys (db:get-keys dbstruct)) + (remfields (list "id" "runname" "state" "status" "owner" "event_time" "comment" "fail_count" "pass_count" "contour" "last_update")) ;; "area_id")) + (header (append keys remfields)) + (keystr (conc (keys->keystr keys) "," + (string-intersperse remfields ",")))) + (debug:print-info 11 *default-log-port* "db:get-run-info run-id: " run-id " header: " header " keystr: " keystr) + + (db:with-db + dbstruct #f #f + (lambda (db) + (sqlite3:for-each-row + (lambda (a . x) + (set! res (apply vector a x))) + db + (conc "SELECT " keystr " FROM runs WHERE id=?;") + run-id))) + (debug:print-info 11 *default-log-port* "db:get-run-info run-id: " run-id " header: " header " keystr: " keystr) + (let ((finalres (vector header res))) + ;; (hash-table-set! *run-info-cache* run-id finalres) + finalres))) + +(define (db:set-comment-for-run dbstruct run-id comment) + (db:with-db + dbstruct #f #f + (lambda (db) + (sqlite3:execute db "UPDATE runs SET comment=? WHERE id=?;" comment ;; (sdb:qry 'getid comment) + run-id)))) + +;; does not (obviously!) removed dependent data. But why not!!? +(define (db:delete-run dbstruct run-id) + (db:with-db + dbstruct #f #f + (lambda (db) + (sqlite3:with-transaction + db + (lambda () + (sqlite3:execute db "DELETE FROM test_steps WHERE test_id IN (SELECT id FROM tests WHERE run_id=?);" run-id) + (sqlite3:execute db "DELETE FROM test_data WHERE test_id IN (SELECT id FROM tests WHERE run_id=?);" run-id) + (sqlite3:execute db "UPDATE tests SET state='DELETED',comment='' WHERE run_id=?;" run-id) + ;; (db:delay-if-busy dbdat) + (sqlite3:execute db "UPDATE runs SET state='deleted',comment='' WHERE id=?;" run-id)))))) + +(define (db:update-run-event_time dbstruct run-id) + (db:with-db + dbstruct #f #t + (lambda (db) + (sqlite3:execute db "UPDATE runs SET event_time=strftime('%s','now') WHERE id=?;" run-id)))) + +(define (db:lock/unlock-run dbstruct run-id lock unlock user) + (db:with-db + dbstruct #f #t + (lambda (db) + (let ((newlockval (if lock "locked" + (if unlock + "unlocked" + "locked")))) ;; semi-failsafe + (sqlite3:execute db "UPDATE runs SET state=? WHERE id=?;" newlockval run-id) + (sqlite3:execute db "INSERT INTO access_log (user,accessed,args) VALUES(?,strftime('%s','now'),?);" + user (conc newlockval " " run-id)) + (debug:print-info 1 *default-log-port* "" newlockval " run number " run-id))))) + +(define (db:set-run-status dbstruct run-id status msg) + (db:with-db + dbstruct #f #f + (lambda (db) + (if msg + (sqlite3:execute db "UPDATE runs SET status=?,comment=? WHERE id=?;" status msg run-id) + (sqlite3:execute db "UPDATE runs SET status=? WHERE id=?;" status run-id))))) + +(define (db:set-run-state-status dbstruct run-id state status ) + (db:with-db + dbstruct #f #f + (lambda (db) + (sqlite3:execute db "UPDATE runs SET status=?,state=? WHERE id=?;" status state run-id)))) + + + +(define (db:get-run-status dbstruct run-id) + (let ((res "n/a")) + (db:with-db + dbstruct #f #f + (lambda (db) + (sqlite3:for-each-row + (lambda (status) + (set! res status)) + db + "SELECT status FROM runs WHERE id=?;" + run-id) + res)))) + +(define (db:get-run-state dbstruct run-id) + (let ((res "n/a")) + (db:with-db + dbstruct #f #f + (lambda (db) + (sqlite3:for-each-row + (lambda (status) + (set! res status)) + db + "SELECT state FROM runs WHERE id=?;" + run-id) + res)))) + + +;;====================================================================== +;; K E Y S +;;====================================================================== + +;; get key val pairs for a given run-id +;; ( (FIELDNAME1 keyval1) (FIELDNAME2 keyval2) ... ) +(define (db:get-key-val-pairs dbstruct run-id) + (let* ((keys (db:get-keys dbstruct)) + (res '())) + (db:with-db + dbstruct #f #f + (lambda (db) + (for-each + (lambda (key) + (let ((qry (conc "SELECT " key " FROM runs WHERE id=?;"))) + (sqlite3:for-each-row + (lambda (key-val) + (set! res (cons (list key (if (string? key-val) key-val "")) res))) ;; replace non-string bad values with empty string to prevent crashes. This scenario can happen when Megatest is killed on updating the db + db qry run-id))) + keys))) + (reverse res))) + +;; get key vals for a given run-id +(define (db:get-key-vals dbstruct run-id) + (let* ((keys (db:get-keys dbstruct)) + (res '())) + (db:with-db + dbstruct #f #f + (lambda (db) + (for-each + (lambda (key) + (let ((qry (conc "SELECT " key " FROM runs WHERE id=?;"))) + ;; (db:delay-if-busy dbdat) + (sqlite3:for-each-row + (lambda (key-val) + (set! res (cons (if (string? key-val) key-val "") res))) ;; check that the key-val is a string for cases where a crash injected bad data in the megatest.db + db qry run-id))) + keys))) + (let ((final-res (reverse res))) + ;; (hash-table-set! *keyvals* run-id final-res) + final-res))) + +;; The target is keyval1/keyval2..., cached in *target* as it is used often +(define (db:get-target dbstruct run-id) + (let* ((keyvals (db:get-key-vals dbstruct run-id)) + (thekey (string-intersperse (map (lambda (x)(if x x "-na-")) keyvals) "/"))) + thekey)) + +;; Get run-ids for runs with same target but different runnames and NOT run-id +;; +(define (db:get-prev-run-ids dbstruct run-id) + (let* ((keyvals (db:get-key-val-pairs dbstruct run-id)) + (kvalues (map cadr keyvals)) + (keys (db:get-keys dbstruct)) + (qrystr (string-intersperse (map (lambda (x)(conc x "=?")) keys) " AND "))) + (let ((prev-run-ids '())) + (if (null? keyvals) + '() + (begin + (db:with-db dbstruct #f #f ;; #f means work with the zeroth db - i.e. the runs db + (lambda (db) + (apply sqlite3:for-each-row + (lambda (id) + (set! prev-run-ids (cons id prev-run-ids))) + db + (conc "SELECT id FROM runs WHERE " qrystr " AND state != 'deleted' AND id != ?;") + (append kvalues (list run-id))))) + prev-run-ids))))) + +;;====================================================================== +;; T E S T S +;;====================================================================== + +;; states and statuses are lists, turn them into ("PASS","FAIL"...) and use NOT IN +;; i.e. these lists define what to NOT show. +;; states and statuses are required to be lists, empty is ok +;; not-in #t = above behaviour, #f = must match +;; mode: +;; 'dashboard - use state = 'COMPLETED' AND status in ( statuses ) OR state in ( states ) +;; +(define (db:get-tests-for-run dbstruct run-id testpatt states statuses offset limit not-in sort-by sort-order qryvals last-update mode) + (let* ((qryvalstr (case qryvals + ((shortlist) "id,run_id,testname,item_path,state,status") + ((#f) db:test-record-qry-selector) ;; "id,run_id,testname,state,status,event_time,host,cpuload,diskfree,uname,rundir,item_path,run_duration,final_logf,comment") + (else qryvals))) + (res '()) + ;; if states or statuses are null then assume match all when not-in is false + (states-qry (if (null? states) + #f + (conc " state " + (if (eq? mode 'dashboard) + " IN ('" + (if not-in + " NOT IN ('" + " IN ('")) + (string-intersperse states "','") + "')"))) + (statuses-qry (if (null? statuses) + #f + (conc " status " + (if (eq? mode 'dashboard) + " IN ('" + (if not-in + " NOT IN ('" + " IN ('") ) + (string-intersperse statuses "','") + "')"))) + (interim-qry (conc " AND " (if not-in "NOT " "") "( state='COMPLETED' " (if statuses-qry (conc " AND " statuses-qry " ) ") " ) ") + (if states-qry + (conc (if not-in " AND " " OR ") states-qry ) ;; " ) ") + ""))) + (states-statuses-qry + (cond + ((and states-qry statuses-qry) + (case mode + ((dashboard) + (if not-in + (conc " AND (state='COMPLETED' AND status NOT IN ('" (string-intersperse statuses "','") "')) " + " OR (state != 'COMPLETED' AND state NOT IN ('" (string-intersperse states "','") "')) ") + (conc " AND (state='COMPLETED' AND status IN ('" (string-intersperse statuses "','") "')) " + " OR (state NOT IN ('COMPLETED','DELETED') AND state IN ('" (string-intersperse states "','") "')) "))) + (else (conc " AND ( " states-qry " AND " statuses-qry " ) ")))) + (states-qry + (case mode + ((dashboard) (conc " AND " (if not-in "NOT " "") " state IN ('" (string-intersperse states "','") "') ")) ;; interim-qry) + (else (conc " AND " states-qry)))) + (statuses-qry + (case mode + ((dashboard) (conc " AND " (if not-in "NOT " "") " status IN ('" (string-intersperse statuses "','") "') ")) ;; interim-qry) + (else (conc " AND " statuses-qry)))) + (else ""))) + (tests-match-qry (tests:match->sqlqry testpatt)) + (qry (conc "SELECT " qryvalstr + (if run-id + " FROM tests WHERE run_id=? " + " FROM tests WHERE ? > 0 ") ;; should work? + (if last-update " " " AND state != 'DELETED' ") ;; if using last-update we want deleted tests? + states-statuses-qry + (if tests-match-qry (conc " AND (" tests-match-qry ") ") "") + (if last-update (conc " AND last_update >= " last-update " ") "") + (case sort-by + ((rundir) " ORDER BY length(rundir) ") + ((testname) (conc " ORDER BY testname " (if sort-order (conc sort-order ",") "") " item_path ")) + ((statestatus) (conc " ORDER BY state " (if sort-order (conc sort-order ",") "") " status ")) + ((event_time) " ORDER BY event_time ") + (else (if (string? sort-by) + (conc " ORDER BY " sort-by " ") + " "))) + (if sort-order sort-order " ") + (if limit (conc " LIMIT " limit) " ") + (if offset (conc " OFFSET " offset) " ") + ";" + ))) + (debug:print-info 8 *default-log-port* "db:get-tests-for-run run-id=" run-id ", qry=" qry) + (db:with-db dbstruct run-id #f + (lambda (db) + (sqlite3:for-each-row + (lambda (a . b) ;; id run-id testname state status event-time host cpuload diskfree uname rundir item-path run-duration final-logf comment) + (set! res (cons (apply vector a b) res))) ;; id run-id testname state status event-time host cpuload diskfree uname rundir item-path run-duration final-logf comment) res))) + db + qry + (or run-id 1) ;; 1 > 0 , for the case where we are seeking tests matching criteral for all runs + ))) + (case qryvals + ((shortlist)(map db:test-short-record->norm res)) + ((#f) res) + (else res)))) + +(define (db:test-short-record->norm inrec) + ;; "id,run_id,testname,item_path,state,status" + ;; "id,run_id,testname,state,status,event_time,host,cpuload,diskfree,uname,rundir,item_path,run_duration,final_logf,comment + (vector (vector-ref inrec 0) ;; id + (vector-ref inrec 1) ;; run_id + (vector-ref inrec 2) ;; testname + (vector-ref inrec 4) ;; state + (vector-ref inrec 5) ;; status + -1 "" -1 -1 "" "-" + (vector-ref inrec 3) ;; item-path + -1 "-" "-")) + +(define (db:get-tests-for-run-state-status dbstruct run-id testpatt) + (let* ((res '()) + (tests-match-qry (tests:match->sqlqry testpatt)) + (qry (conc "SELECT id,testname,item_path,state,status FROM tests WHERE run_id=? " + (if tests-match-qry (conc " AND (" tests-match-qry ") ") "")))) + (debug:print-info 8 *default-log-port* "db:get-tests-for-run qry=" qry) + (db:with-db dbstruct run-id #f + (lambda (db) + (sqlite3:for-each-row + (lambda (id testname item-path state status) + ;; id,run_id,testname,state,status,event_time,host,cpuload,diskfree,uname,rundir,item_path,run_duration,final_logf,comment + (set! res (cons (vector id run-id testname state status -1 "" -1 -1 "" "-" item-path -1 "-" "-") res))) + db + qry + run-id))) + res)) + +(define (db:get-testinfo-state-status dbstruct run-id test-id) + (let ((res #f)) + (db:with-db dbstruct run-id #f + (lambda (db) + (sqlite3:for-each-row + (lambda (run-id testname item-path state status) + ;; id,run_id,testname,state,status,event_time,host,cpuload,diskfree,uname,rundir,item_path,run_duration,final_logf,comment + (set! res (vector test-id run-id testname state status -1 "" -1 -1 "" "-" item-path -1 "-" "-"))) + db + "SELECT run_id,testname,item_path,state,status FROM tests WHERE id=?;" + test-id))) + res)) + +;; get a useful subset of the tests data (used in dashboard +;; use db:mintest-get-{id ,run_id,testname ...} +;; +(define (db:get-tests-for-run-mindata dbstruct run-id testpatt states statuses not-in) + (db:get-tests-for-run dbstruct run-id testpatt states statuses #f #f not-in #f #f "id,run_id,testname,state,status,event_time,item_path" 0 #f)) + +;; do not use. +;; +(define (db:get-tests-for-runs dbstruct run-ids testpatt states statuses #!key (not-in #f)(qryvals #f)) + ;; (db:delay-if-busy) + (let ((res '())) + (for-each + (lambda (run-id) + (set! res (append + res + (db:get-tests-for-run dbstruct run-id testpatt states statuses #f #f not-in #f #f qryvals #f 'normal)))) + (if run-ids + run-ids + (db:get-all-run-ids dbstruct))) + res)) + +;; Convert calling routines to get list of run-ids and loop, do not use the get-tests-for-runs +;; + +(define (db:delete-test-records dbstruct run-id test-id) + (db:general-call dbstruct 'delete-test-step-records (list test-id)) + (db:general-call dbstruct 'delete-test-data-records (list test-id)) + (db:with-db + dbstruct #f #f + (lambda (db) + (sqlite3:execute db "UPDATE tests SET state='DELETED',status='n/a',comment='' WHERE id=?;" test-id)))) + +;; +(define (db:delete-old-deleted-test-records dbstruct) + (let (;; (run-ids (db:get-all-run-ids dbstruct)) + (targtime (- (current-seconds)(* 30 24 60 60)))) ;; one month in the past + (db:with-db + dbstruct + 0 + #t + (lambda (db) + (sqlite3:with-transaction + db + (lambda () + (sqlite3:execute db "DELETE FROM test_steps WHERE test_id IN (SELECT id FROM tests WHERE state='DELETED' AND event_timenumber fieldname fields) + (if (null? fields) + #f + (let loop ((hed (car fields)) + (tal (cdr fields)) + (indx 0)) + (if (equal? fieldname hed) + indx + (if (null? tal) + #f + (loop (car tal)(cdr tal)(+ indx 1))))))) + +(define db:test-record-qry-selector (string-intersperse db:test-record-fields ",")) + + +;; NOTE: Use db:test-get* to access records +;; NOTE: This needs rundir decoding? Decide, decode here or where used? For the moment decode where used. +(define (db:get-all-tests-info-by-run-id dbstruct run-id) + (let* ((res '())) + (db:with-db + dbstruct #f #f + (lambda (db) + (sqlite3:for-each-row + (lambda (id run-id testname state status event-time host cpuload diskfree uname rundir item-path run-duration final-logf comment shortdir attemptnum archived) + ;; 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 + (set! res (cons (vector id run-id testname state status event-time host cpuload diskfree uname rundir item-path run-duration final-logf comment shortdir attemptnum archived) + res))) + db + (conc "SELECT " db:test-record-qry-selector " FROM tests WHERE state != 'DELETED' AND run_id=?;") + run-id))) + res)) + +(define (db:replace-test-records dbstruct run-id testrecs) + (db:with-db dbstruct run-id #t + (lambda (db) + (let* ((qmarks (string-intersperse (make-list (length db:test-record-fields) "?") ",")) + (qrystr (conc "INSERT OR REPLACE INTO tests (" db:test-record-qry-selector ") VALUES (" qmarks ") WHERE run_id=?;")) + (qry (sqlite3:prepare db qrystr))) + (debug:print 0 *default-log-port* "INFO: migrating test records for run with id " run-id) + (sqlite3:with-transaction + db + (lambda () + (for-each + (lambda (rec) + ;; (debug:print 0 *default-log-port* "INFO: Inserting values: " (string-intersperse (map conc (vector->list rec)) ",") "\n") + (apply sqlite3:execute qry (append (vector->list rec)(list run-id)))) + testrecs))) + (sqlite3:finalize! qry))))) + +;; map a test-id into the proper range +;; +(define (db:adj-test-id mtdb min-test-id test-id) + (if (>= test-id min-test-id) + test-id + (let loop ((new-id min-test-id)) + (let ((test-id-found #f)) + (sqlite3:for-each-row + (lambda (id) + (set! test-id-found id)) + (db:dbdat-get-db mtdb) + "SELECT id FROM tests WHERE id=?;" + new-id) + ;; if test-id-found then need to try again + (if test-id-found + (loop (+ new-id 1)) + (begin + (debug:print-info 0 *default-log-port* "New test id " new-id " selected for test with id " test-id) + (sqlite3:execute mtdb "UPDATE tests SET id=? WHERE id=?;" new-id test-id))))))) + +;; move test ids into the 30k * run_id range +;; +(define (db:prep-megatest.db-adj-test-ids mtdb run-id testrecs) + (debug:print-info 0 *default-log-port* "Adjusting test ids in megatest.db for run " run-id) + (let ((min-test-id (* run-id 30000))) + (for-each + (lambda (testrec) + (let* ((test-id (vector-ref testrec (db:field->number "id" db:test-record-fields)))) + (db:adj-test-id (db:dbdat-get-db mtdb) min-test-id test-id))) + testrecs))) + +;; 1. move test ids into the 30k * run_id range +;; 2. move step ids into the 30k * run_id range +;; +(define (db:prep-megatest.db-for-migration mtdb) + (let* ((run-ids (db:get-all-run-ids mtdb))) + (for-each + (lambda (run-id) + (let ((testrecs (db:get-all-tests-info-by-run-id mtdb run-id))) + (db:prep-megatest.db-adj-test-ids (db:dbdat-get-db mtdb) run-id testrecs))) + run-ids))) + +;; Get test data using test_id, run-id is not used +;; +(define (db:get-test-info-by-id dbstruct run-id test-id) + (db:with-db + dbstruct + #f ;; run-id + #f + (lambda (db) + (let ((res #f)) + (sqlite3:for-each-row ;; attemptnum added to hold pid of top process (not Megatest) controlling a test + (lambda (id run-id testname state status event-time host cpuload diskfree uname rundir-id item-path run_duration final-logf-id comment short-dir-id attemptnum archived last-update) + ;; 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 + (set! res (vector id run-id testname state status event-time host cpuload diskfree uname rundir-id item-path run_duration final-logf-id comment short-dir-id attemptnum archived last-update))) + db + (conc "SELECT " db:test-record-qry-selector " FROM tests WHERE id=?;") + test-id) + res)))) + +;; Use db:test-get* to access +;; Get test data using test_ids. NB// Only works within a single run!! +;; +(define (db:get-test-info-by-ids dbstruct run-id test-ids) + (db:with-db + dbstruct + run-id + #f + (lambda (db) + (let ((res '())) + (sqlite3:for-each-row + (lambda (a . b) + ;; 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 + (set! res (cons (apply vector a b) res))) + db + (conc "SELECT " db:test-record-qry-selector " FROM tests WHERE id in (" + (string-intersperse (map conc test-ids) ",") ");")) + res)))) + +(define (db:get-test-info dbstruct run-id test-name item-path) + (db:with-db + dbstruct + run-id + #f + (lambda (db) + (let ((res #f)) + (sqlite3:for-each-row + (lambda (a . b) + (set! res (apply vector a b))) + db + (conc "SELECT " db:test-record-qry-selector " FROM tests WHERE testname=? AND item_path=? AND run_id=?;") + test-name item-path run-id) + res)))) + +(define (db:test-get-rundir-from-test-id dbstruct run-id test-id) + (db:with-db + dbstruct + run-id + #f + (lambda (db) + (db:first-result-default + db + "SELECT rundir FROM tests WHERE id=?;" + #f ;; default result + test-id)))) + +(define (db:get-test-times dbstruct run-name target) + (let ((res `()) + (qry (conc "select testname, item_path, run_duration, " (string-join (db:get-keys dbstruct) " || '/' || ") " as target from tests inner join runs on tests.run_id = runs.id where runs.runname = ? and target = ? ;"))) + + (db:with-db + dbstruct + #f ;; this is for the main runs db + #f ;; does not modify db + (lambda (db) + (sqlite3:for-each-row + (lambda (test-name item-path test-time target ) + (set! res (cons (vector test-name item-path test-time) res))) + db + qry + run-name target) + res)))) + +(define (db:get-test-times dbstruct run-name target) + (let ((res `()) + (qry (conc "select testname, item_path, run_duration, " (string-join (db:get-keys dbstruct) " || '/' || ") " as target from tests inner join runs on tests.run_id = runs.id where runs.runname = ? and target = ? ;"))) + + (db:with-db + dbstruct + #f ;; this is for the main runs db + #f ;; does not modify db + (lambda (db) + (sqlite3:for-each-row + (lambda (test-name item-path test-time target ) + (set! res (cons (vector test-name item-path test-time) res))) + db + qry + run-name target) + res)))) + +;;====================================================================== +;; S T E P S +;;====================================================================== + +(define (db:teststep-set-status! dbstruct run-id test-id teststep-name state-in status-in comment logfile) + (db:with-db + dbstruct + run-id + #t + (lambda (db) + (sqlite3:execute + db + "INSERT OR REPLACE into test_steps (test_id,stepname,state,status,event_time,comment,logfile) VALUES(?,?,?,?,?,?,?);" + test-id teststep-name state-in status-in (current-seconds) + (if comment comment "") + (if logfile logfile ""))))) + + + +(define (db:delete-steps-for-test! dbstruct run-id test-id) + ;; TODO: figure out why status is the key field rather than state (note: CONSTRAINT test_steps_constraint UNIQUE (test_id,stepname,state) ) + (db:with-db + dbstruct + run-id + #t + (lambda (db) + (sqlite3:execute + db + "UPDATE test_steps set status='DELETED' where test_id=?" ;; and run_id=? !! - run_id not in table (bummer) TODO: get run_id into schema for test_steps + test-id)))) + + +;; db-get-test-steps-for-run +(define (db:get-steps-for-test dbstruct run-id test-id) + (db:with-db + dbstruct + run-id + #f + (lambda (db) + (let* ((res '())) + (sqlite3:for-each-row + (lambda (id test-id stepname state status event-time logfile comment) + (set! res (cons (vector id test-id stepname state status event-time (if (string? logfile) logfile "") comment) res))) + db + "SELECT id,test_id,stepname,state,status,event_time,logfile,comment FROM test_steps WHERE status != 'DELETED' AND test_id=? ORDER BY id ASC;" ;; event_time DESC,id ASC; + test-id) + (reverse res))))) + + (define (db:get-steps-info-by-id dbstruct test-step-id) + (db:with-db + dbstruct + #f + #f + (lambda (db) + (let* ((res (vector #f #f #f #f #f #f #f #f #f))) + (sqlite3:for-each-row + (lambda (id test-id stepname state status event-time logfile comment last-update) + (set! res (vector id test-id stepname state status event-time (if (string? logfile) logfile "") comment last-update))) + db + "SELECT id,test_id,stepname,state,status,event_time,logfile,comment,last_update FROM test_steps WHERE id=? ORDER BY id ASC;" ;; event_time DESC,id ASC; + test-step-id) + res)))) + +(define (db:get-steps-data dbstruct run-id test-id) + (db:with-db + dbstruct + run-id + #f + (lambda (db) + (let ((res '())) + (sqlite3:for-each-row + (lambda (id test-id stepname state status event-time logfile) + (set! res (cons (vector id test-id stepname state status event-time (if (string? logfile) logfile "")) res))) + db + "SELECT id,test_id,stepname,state,status,event_time,logfile FROM test_steps WHERE status != 'DELETED' AND test_id=? ORDER BY id ASC;" ;; event_time DESC,id ASC; + test-id) + (reverse res))))) + +;;====================================================================== +;; T E S T D A T A +;;====================================================================== + + (define (db:get-data-info-by-id dbstruct test-data-id) + (db:with-db + dbstruct + #f + #f + (lambda (db) + (let* ((res (vector #f #f #f #f #f #f #f #f #f #f #f #f))) + (sqlite3:for-each-row + (lambda (id test-id category variable value expected tol units comment status type last-update) + (set! res (vector id test-id category variable value expected tol units comment status type last-update))) + db + "SELECT id,test_id, category, variable, value, expected, tol, units, comment, status, type, last_update FROM test_data WHERE id=? ORDER BY id ASC;" ;; event_time DESC,id ASC; + test-data-id) + res)))) + + +;; WARNING: Do NOT call this for the parent test on an iterated test +;; Roll up test_data pass/fail results +;; look at the test_data status field, +;; if all are pass (any case) and the test status is PASS or NULL or '' then set test status to PASS. +;; if one or more are fail (any case) then set test status to PASS, non "pass" or "fail" are ignored +(define (db:test-data-rollup dbstruct run-id test-id status) + (let* ((fail-count 0) + (pass-count 0)) + (db:with-db + dbstruct #f #f + (lambda (db) + (sqlite3:for-each-row + (lambda (fcount pcount) + (set! fail-count fcount) + (set! pass-count pcount)) + db + "SELECT (SELECT count(id) FROM test_data WHERE test_id=? AND status like 'fail') AS fail_count, + (SELECT count(id) FROM test_data WHERE test_id=? AND status like 'pass') AS pass_count;" + test-id test-id) + ;; Now rollup the counts to the central megatest.db + (db:general-call dbstruct 'pass-fail-counts (list pass-count fail-count test-id)) + ;; if the test is not FAIL then set status based on the fail and pass counts. + (db:general-call dbstruct 'test_data-pf-rollup (list test-id test-id test-id test-id)))))) + +;; each section is a rule except "final" which is the final result +;; +;; [rule-5] +;; operator in +;; section LogFileBody +;; desc Output voltage +;; status OK +;; expected 1.9 +;; measured 1.8 +;; type +/- +;; tolerance 0.1 +;; pass 1 +;; fail 0 +;; +;; [final] +;; exit-code 6 +;; exit-status SKIP +;; message If flagged we are asking for this to exit with code 6 +;; +;; recorded in steps table: +;; category: stepname +;; variable: rule-N +;; value: measured +;; expected: expected +;; tol: tolerance +;; units: - +;; comment: desc or message +;; status: status +;; type: type +;; +(define (db:logpro-dat->csv dat stepname) + (let ((res '())) + (for-each + (lambda (entry-name) + (if (equal? entry-name "final") + (set! res (append + res + (list + (list stepname + entry-name + (configf:lookup dat entry-name "exit-code") ;; 0 ;; Value + 0 ;; 1 ;; Expected + 0 ;; 2 ;; Tolerance + "n/a" ;; 3 ;; Units + (configf:lookup dat entry-name "message") ;; 4 ;; Comment + (configf:lookup dat entry-name "exit-status") ;; 5 ;; Status + "logpro" ;; 6 ;; Type + )))) + (let* ((value (or (configf:lookup dat entry-name "measured") "n/a")) + (expected (or (configf:lookup dat entry-name "expected") 0.0)) + (tolerance (or (configf:lookup dat entry-name "tolerance") 0.0)) + (comment (or (configf:lookup dat entry-name "comment") + (configf:lookup dat entry-name "desc") "n/a")) + (status (or (configf:lookup dat entry-name "status") "n/a")) + (type (or (configf:lookup dat entry-name "expected") "n/a"))) + (set! res (append + res + (list (list stepname + entry-name + value ;; 0 + expected ;; 1 + tolerance ;; 2 + "n/a" ;; 3 Units + comment ;; 4 + status ;; 5 + type ;; 6 + ))))))) + (hash-table-keys dat)) + res)) + +;; $MT_MEGATEST -load-test-data << EOF +;; foo,bar, 1.2, 1.9, > +;; foo,rab, 1.0e9, 10e9, 1e9 +;; foo,bla, 1.2, 1.9, < +;; foo,bal, 1.2, 1.2, < , ,Check for overload +;; foo,alb, 1.2, 1.2, <= , Amps,This is the high power circuit test +;; foo,abl, 1.2, 1.3, 0.1 +;; foo,bra, 1.2, pass, silly stuff +;; faz,bar, 10, 8mA, , ,"this is a comment" +;; EOF + + +(define (tdb:get-prev-tol-for-test tdb test-id category variable) + ;; Finish me? + (values #f #f #f)) + + +(define (db:csv->test-data dbstruct run-id test-id csvdata) + (debug:print 4 *default-log-port* "test-id " test-id ", csvdata: " csvdata) + (db:with-db + dbstruct #f #f + (lambda (db) + (let* ((csvlist (csv->list (make-csv-reader + (open-input-string csvdata) + '((strip-leading-whitespace? #t) + (strip-trailing-whitespace? #t)))))) ;; (csv->list csvdata))) + (for-each + (lambda (csvrow) + (let* ((padded-row (take (append csvrow (list #f #f #f #f #f #f #f #f #f)) 9)) + (category (list-ref padded-row 0)) + (variable (list-ref padded-row 1)) + (value (any->number-if-possible (list-ref padded-row 2))) + (expected (any->number-if-possible (list-ref padded-row 3))) + (tol (any->number-if-possible (list-ref padded-row 4))) ;; >, <, >=, <=, or a number + (units (list-ref padded-row 5)) + (comment (list-ref padded-row 6)) + (status (let ((s (list-ref padded-row 7))) + (if (and (string? s)(or (string-match (regexp "^\\s*$") s) + (string-match (regexp "^n/a$") s))) + #f + s))) ;; if specified on the input then use, else calculate + (type (list-ref padded-row 8))) + ;; look up expected,tol,units from previous best fit test if they are all either #f or '' + (debug:print 4 *default-log-port* "BEFORE: category: " category " variable: " variable " value: " value + ", expected: " expected " tol: " tol " units: " units " status: " status " comment: " comment " type: " type) + + (if (and (or (not expected)(equal? expected "")) + (or (not tol) (equal? expected "")) + (or (not units) (equal? expected ""))) + (let-values (((new-expected new-tol new-units)(tdb:get-prev-tol-for-test #f test-id category variable))) + (set! expected new-expected) + (set! tol new-tol) + (set! units new-units))) + + (debug:print 4 *default-log-port* "AFTER: category: " category " variable: " variable " value: " value + ", expected: " expected " tol: " tol " units: " units " status: " status " comment: " comment) + ;; calculate status if NOT specified + (if (and (not status)(number? expected)(number? value)) ;; need expected and value to be numbers + (if (number? tol) ;; if tol is a number then we do the standard comparison + (let* ((max-val (+ expected tol)) + (min-val (- expected tol)) + (result (and (>= value min-val)(<= value max-val)))) + (debug:print 4 *default-log-port* "max-val: " max-val " min-val: " min-val " result: " result) + (set! status (if result "pass" "fail"))) + (set! status ;; NB// need to assess each one (i.e. not return operator since need to act if not valid op. + (case (string->symbol tol) ;; tol should be >, <, >=, <= + ((>) (if (> value expected) "pass" "fail")) + ((<) (if (< value expected) "pass" "fail")) + ((>=) (if (>= value expected) "pass" "fail")) + ((<=) (if (<= value expected) "pass" "fail")) + (else (conc "ERROR: bad tol comparator " tol)))))) + (debug:print 4 *default-log-port* "AFTER2: category: " category " variable: " variable " value: " value + ", expected: " expected " tol: " tol " units: " units " status: " status " comment: " comment) + ;; (db:delay-if-busy dbdat) + (sqlite3:execute db "INSERT OR REPLACE INTO test_data (test_id,category,variable,value,expected,tol,units,comment,status,type) VALUES (?,?,?,?,?,?,?,?,?,?);" + test-id category variable value expected tol units (if comment comment "") status type))) + csvlist))))) + +;; This routine moved from tdb.scm, tdb:read-test-data +;; +(define (db:read-test-data dbstruct run-id test-id categorypatt) + (let* ((res '())) + (db:with-db + dbstruct #f #f + (lambda (db) + (sqlite3:for-each-row + (lambda (id test_id category variable value expected tol units comment status type) + (set! res (cons (vector id test_id category variable value expected tol units comment status type) res))) + db + "SELECT id,test_id,category,variable,value,expected,tol,units,comment,status,type FROM test_data WHERE test_id=? AND category LIKE ? ORDER BY category,variable;" test-id categorypatt) + (reverse res))))) + +;; This routine moved from tdb.scm, :read-test-data +;; +(define (db:read-test-data* dbstruct run-id test-id categorypatt varpatt) + (let* ((res '())) + (db:with-db + dbstruct #f #f + (lambda (db) + (sqlite3:for-each-row + (lambda (id test_id category variable value expected tol units comment status type) + (set! res (cons (vector id test_id category variable value expected tol units comment status type) res))) + db + "SELECT id,test_id,category,variable,value,expected,tol,units,comment,status,type FROM test_data WHERE test_id=? AND category LIKE ? AND variable LIKE ? ORDER BY category,variable;" test-id categorypatt varpatt) + (reverse res))))) + + +;;====================================================================== +;; Misc. test related queries +;;====================================================================== + +(define (db:get-run-ids-matching-target dbstruct keynames target res runname testpatt statepatt statuspatt) + (db:with-db + dbstruct #f #f + (lambda (db) + (let* ((row-ids '()) + (keystr (string-intersperse + (map (lambda (key val) + (conc key " like '" val "'")) + keynames + (string-split target "/")) + " AND ")) + ;; (testqry (tests:match->sqlqry testpatt)) + (runsqry (sqlite3:prepare db (conc "SELECT id FROM runs WHERE " keystr " AND runname LIKE '" runname "';")))) + ;; (debug:print 8 *default-log-port* "db:test-get-paths-matching-keynames-target-new\n runsqry=" runsqry "\n tstsqry=" testqry) + (sqlite3:for-each-row + (lambda (rid) + (set! row-ids (cons rid row-ids))) + runsqry) + (sqlite3:finalize! runsqry) + row-ids)))) + +;; finds latest matching all patts for given run-id +;; +(define (db:test-get-paths-matching-keynames-target-new dbstruct run-id keynames target res testpatt statepatt statuspatt runname) + (let* ((testqry (tests:match->sqlqry testpatt)) + (tstsqry (conc "SELECT rundir FROM tests WHERE run_id=? AND " testqry " AND state LIKE '" statepatt "' AND status LIKE '" statuspatt "' ORDER BY event_time ASC;"))) + (db:with-db + dbstruct + run-id + #f + (lambda (db) + (sqlite3:for-each-row + (lambda (p) + (set! res (cons p res))) + db + tstsqry + run-id) + res)))) + +(define (db:test-toplevel-num-items dbstruct run-id testname) + (db:with-db + dbstruct + run-id + #f + (lambda (db) + (let ((res 0)) + (sqlite3:for-each-row + (lambda (num-items) + (set! res num-items)) + db + "SELECT count(id) FROM tests WHERE run_id=? AND testname=? AND item_path != '' AND state NOT IN ('DELETED');" + run-id + testname) + res)))) + +;;====================================================================== +;; QUEUE UP META, TEST STATUS AND STEPS REMOTE ACCESS +;;====================================================================== + +;; NOTE: Can remove the regex and base64 encoding for zmq +(define (db:obj->string obj #!key (transport 'http)) + (case transport + ;; ((fs) obj) + ((http fs) + (string-substitute + (regexp "=") "_" + (base64:base64-encode + (z3:encode-buffer + (with-output-to-string + (lambda ()(serialize obj))))) ;; BB: serialize - this is what causes problems between different builds of megatest communicating. serialize is sensitive to binary image of mtest. + #t)) + ((zmq nmsg)(with-output-to-string (lambda ()(serialize obj)))) + (else obj))) ;; rpc + +(define (db:string->obj msg #!key (transport 'http)) + (case transport + ;; ((fs) msg) + ((http fs) + (if (string? msg) + (with-input-from-string + (z3:decode-buffer + (base64:base64-decode + (string-substitute + (regexp "_") "=" msg #t))) + (lambda ()(deserialize))) + (begin + (debug:print-error 0 *default-log-port* "reception failed. Received \"" msg "\" but cannot translate it.") + (print-call-chain (current-error-port)) + msg))) ;; crude reply for when things go awry + ((zmq nmsg)(with-input-from-string msg (lambda ()(deserialize)))) + (else msg))) ;; rpc + +;; ; This is to be the big daddy call NOPE: Replaced by db:set-state-status-and-roll-up-items +;; ; +;; define (db:test-set-state-status dbstruct run-id test-id state status msg) +;; (let ((dbdat (db:get-db dbstruct run-id))) +;; (if (member state '("LAUNCHED" "REMOTEHOSTSTART")) +;; (db:general-call dbdat 'set-test-start-time (list test-id))) +;; ;; (if msg +;; ;; (db:general-call dbdat 'state-status-msg (list state status msg test-id)) +;; ;; (db:general-call dbdat 'state-status (list state status test-id))) +;; (db:set-state-status-and-roll-up-items dbstruct run-id test-id #f state status msg) +;; ;; process the test_data table +;; (if (and test-id state status (equal? status "AUTO")) +;; (db:test-data-rollup dbstruct run-id test-id status)) +;; (mt:process-triggers dbstruct run-id test-id state status))) + +;; state is the priority rollup of all states +;; status is the priority rollup of all completed statesfu +;; +;; if test-name is an integer work off that instead of test-name test-path +;; +(define (db:set-state-status-and-roll-up-items dbstruct run-id test-name item-path state status comment) + ;; establish info on incoming test followed by info on top level test + ;; BBnote - for mode itemwait, linkage between upstream test & matching item status is propagated to run queue in db:prereqs-not-met + (let* ((testdat (if (number? test-name) + (db:get-test-info-by-id dbstruct run-id test-name) ;; test-name is actually a test-id + (db:get-test-info dbstruct run-id test-name item-path))) + (test-id (db:test-get-id testdat)) + (test-name (if (number? test-name) + (db:test-get-testname testdat) + test-name)) + (item-path (db:test-get-item-path testdat)) + (tl-testdat (db:get-test-info dbstruct run-id test-name "")) + (tl-test-id (if tl-testdat + (db:test-get-id tl-testdat) + #f))) + (if (member state '("LAUNCHED" "REMOTEHOSTSTART")) + (db:general-call dbstruct 'set-test-start-time (list test-id))) + (mutex-lock! *db-transaction-mutex*) + (db:with-db + dbstruct #f #f + (lambda (db) + (let ((tr-res + (sqlite3:with-transaction + db + (lambda () + ;; NB// Pass the db so it is part fo the transaction + (db:test-set-state-status db run-id test-id state status comment) ;; this call sets the item state/status + (if (not (equal? item-path "")) ;; only roll up IF incoming test is an item + (let* ((state-status-counts (db:get-all-state-status-counts-for-test dbstruct run-id test-name item-path state status)) ;; item-path is used to exclude current state/status of THIS test + (state-stauses (db:roll-up-rules state-status-counts state status)) + (newstate (car state-stauses)) + (newstatus (cadr state-stauses))) + (debug:print 4 *default-log-port* "BB> tl-test-id="tl-test-id" ; "test-name":"item-path" newstate="newstate" newstatus="newstatus" len(sscs)="(length state-status-counts) " state-status-counts: " + (apply conc + (map (lambda (x) + (conc + (with-output-to-string (lambda () (pp (dbr:counts->alist x)))) " | ")) + state-status-counts))); end debug:print + + (if tl-test-id + (db:test-set-state-status db run-id tl-test-id newstate newstatus #f)) ;; we are still in the transaction - must access the db and not the dbstruct + )))))) + (mutex-unlock! *db-transaction-mutex*) + (if (and test-id state status (equal? status "AUTO")) + (db:test-data-rollup dbstruct run-id test-id status)) + tr-res))))) + +(define (db:roll-up-rules state-status-counts state status) + (let* ((running (length (filter (lambda (x) + (member (dbr:counts-state x) *common:running-states*)) + state-status-counts))) + (bad-not-started (length (filter (lambda (x) + (and (equal? (dbr:counts-state x) "NOT_STARTED") + (not (member (dbr:counts-status x) *common:not-started-ok-statuses*)))) + state-status-counts))) + (all-curr-states (common:special-sort ;; worst -> best (sort of) + (delete-duplicates + (if (and state (not (member state *common:dont-roll-up-states*))) + (cons state (map dbr:counts-state state-status-counts)) + (map dbr:counts-state state-status-counts))) + *common:std-states* >)) + (all-curr-statuses (common:special-sort ;; worst -> best + (delete-duplicates + (if (and state status (not (member state *common:dont-roll-up-states*))) + (cons status (map dbr:counts-status state-status-counts)) + (map dbr:counts-status state-status-counts))) + *common:std-statuses* >)) + (non-completes (filter (lambda (x) + (not (member x (cons "COMPLETED" *common:dont-roll-up-states*)))) + all-curr-states)) + (preq-fails (filter (lambda (x) + (equal? x "PREQ_FAIL")) + all-curr-statuses)) + (num-non-completes (length non-completes)) + (newstate (cond + ((> running 0) "RUNNING") ;; anything running, call the situation running + ((> (length preq-fails) 0) "NOT_STARTED") + ((> bad-not-started 0) "COMPLETED") ;; we have an ugly situation, it is completed in the sense we cannot do more. + ((> num-non-completes 0) (car non-completes)) ;; (remove (lambda (x)(equal? "COMPLETED" x)) all-curr-states))) ;; only rollup DELETED if all DELETED + (else (car all-curr-states)))) + (newstatus (cond + ((> (length preq-fails) 0) "PREQ_FAIL") + ((or (> bad-not-started 0) + (and (equal? newstate "NOT_STARTED") + (> num-non-completes 0))) + "STARTED") + (else (car all-curr-statuses))))) + (debug:print-info 2 *default-log-port* + "\n--> probe db:set-state-status-and-roll-up-items: " + "\n--> state-status-counts: "(map dbr:counts->alist state-status-counts) + "\n--> running: "running + "\n--> bad-not-started: "bad-not-started + "\n--> non-non-completes: "num-non-completes + "\n--> non-completes: "non-completes + "\n--> all-curr-states: "all-curr-states + "\n--> all-curr-statuses: "all-curr-statuses + "\n--> newstate "newstate + "\n--> newstatus "newstatus + "\n\n") + + ;; NB// Pass the db so it is part of the transaction + (list newstate newstatus))) + +(define (db:set-state-status-and-roll-up-run dbstruct run-id curr-state curr-status) + (mutex-lock! *db-transaction-mutex*) + (db:with-db + dbstruct #f #f + (lambda (db) + (let ((tr-res + (sqlite3:with-transaction + db + (lambda () + (let* ((state-status-counts (db:get-all-state-status-counts-for-run dbstruct run-id)) + (state-stauses (db:roll-up-rules state-status-counts #f #f )) + (newstate (car state-stauses)) + (newstatus (cadr state-stauses))) + (if (or (not (eq? newstate curr-state)) (not (eq? newstatus curr-status))) + (db:set-run-state-status dbstruct run-id newstate newstatus ))))))) + (mutex-unlock! *db-transaction-mutex*) + tr-res)))) + + +(define (db:get-all-state-status-counts-for-run dbstruct run-id) + (let* ((test-count-recs (db:with-db + dbstruct #f #f + (lambda (db) + (sqlite3:map-row + (lambda (state status count) + (make-dbr:counts state: state status: status count: count)) + db + "SELECT state,status,count(id) FROM tests WHERE run_id=? GROUP BY state,status;" + run-id ))))) + test-count-recs)) + + +;; BBnote: db:get-all-state-status-counts-for-test returns dbr:counts object aggregating state and status of items of a given test, *not including rollup state/status* +(define (db:get-all-state-status-counts-for-test dbstruct run-id test-name item-path item-state-in item-status-in) + (let* ((test-info (db:get-test-info dbstruct run-id test-name item-path)) + (item-state (or item-state-in (db:test-get-state test-info))) + (item-status (or item-status-in (db:test-get-status test-info))) + (other-items-count-recs (db:with-db + dbstruct #f #f + (lambda (db) + (sqlite3:map-row + (lambda (state status count) + (make-dbr:counts state: state status: status count: count)) + db + ;; ignore current item because we have changed its value in the current transation so this select will see the old value. + "SELECT state,status,count(id) FROM tests WHERE run_id=? AND testname=? AND item_path != '' AND item_path !=? GROUP BY state,status;" + run-id test-name item-path)))) + + ;; add current item to tally outside of sql query + (match-countrec-lambda (lambda (countrec) + (and (equal? (dbr:counts-state countrec) item-state) + (equal? (dbr:counts-status countrec) item-status)))) + + (already-have-count-rec-list + (filter match-countrec-lambda other-items-count-recs)) ;; will have either 0 or 1 count recs depending if another item shares this item's state/status + + (updated-count-rec (if (null? already-have-count-rec-list) + (make-dbr:counts state: item-state status: item-status count: 1) + (let* ((our-count-rec (car already-have-count-rec-list)) + (new-count (add1 (dbr:counts-count our-count-rec)))) + (make-dbr:counts state: item-state status: item-status count: new-count)))) + + (nonmatch-countrec-lambda (lambda (countrec) (not (match-countrec-lambda countrec)))) + + (unrelated-rec-list + (filter nonmatch-countrec-lambda other-items-count-recs))) + + (cons updated-count-rec unrelated-rec-list))) + +;; (define (db:get-all-item-states db run-id test-name) +;; (sqlite3:map-row +;; (lambda (a) a) +;; db +;; "SELECT DISTINCT state FROM tests WHERE item_path != '' AND state != 'DELETED' AND run_id=? AND testname=?" +;; run-id test-name)) +;; +;; (define (db:get-all-item-statuses db run-id test-name) +;; (sqlite3:map-row +;; (lambda (a) a) +;; db +;; "SELECT DISTINCT status FROM tests WHERE item_path != '' AND state != 'DELETED' AND state='COMPLETED' AND run_id=? AND testname=?" +;; run-id test-name)) + +(define (db:test-get-logfile-info dbstruct run-id test-name) + (db:with-db + dbstruct + run-id + #f + (lambda (db) + (let ((res #f)) + (sqlite3:for-each-row + (lambda (path final_logf) + ;; (let ((path (sdb:qry 'getstr path-id)) + ;; (final_logf (sdb:qry 'getstr final_logf-id))) + (set! logf final_logf) + (set! res (list path final_logf)) + (if (directory? path) + (debug:print 2 *default-log-port* "Found path: " path) + (debug:print 2 *default-log-port* "No such path: " path))) ;; ) + db + "SELECT rundir,final_logf FROM tests WHERE testname=? AND item_path='' AND run_id=?;" + test-name run-id) + res)))) + +;;====================================================================== +;; A G R E G A T E D T R A N S A C T I O N D B W R I T E S +;;====================================================================== + +(define db:queries + (list '(update-run-duration "UPDATE tests SET run_duration=? WHERE id=?;") + + ;; TESTS + '(register-test "INSERT OR IGNORE INTO tests (run_id,testname,event_time,item_path,state,status) VALUES (?,?,strftime('%s','now'),?,'NOT_STARTED','n/a');") + ;; Test state and status + '(set-test-state "UPDATE tests SET state=? WHERE id=?;") + '(set-test-status "UPDATE tests SET state=? WHERE id=?;") + '(state-status "UPDATE tests SET state=?,status=? WHERE id=?;") ;; D/ONE + '(state-status-msg "UPDATE tests SET state=?,status=?,comment=? WHERE id=?;") ;; DONE + ;; Test comment + '(set-test-comment "UPDATE tests SET comment=? WHERE id=?;") + '(set-test-start-time "UPDATE tests SET event_time=strftime('%s','now') WHERE id=?;") ;; DONE + '(pass-fail-counts "UPDATE tests SET pass_count=?,fail_count=? WHERE id=?;") + ;; test_data-pf-rollup is used to set a tests PASS/FAIL based on the pass/fail info from the steps + '(test_data-pf-rollup "UPDATE tests + SET status=CASE WHEN (SELECT fail_count FROM tests WHERE id=?) > 0 + THEN 'FAIL' + WHEN (SELECT pass_count FROM tests WHERE id=?) > 0 AND + (SELECT status FROM tests WHERE id=?) NOT IN ('WARN','FAIL') + THEN 'PASS' + ELSE status + END WHERE id=?;") ;; DONE + '(test-set-log "UPDATE tests SET final_logf=? WHERE id=?;") ;; DONE + ;; '(test-set-rundir-by-test-id "UPDATE tests SET rundir=? WHERE id=?") ;; DONE + ;; '(test-set-rundir "UPDATE tests SET rundir=? AND testname=? AND item_path=?;") ;; DONE + '(test-set-rundir-shortdir "UPDATE tests SET rundir=?,shortdir=? WHERE testname=? AND item_path=? AND run_id=?;") ;; BROKEN!!! NEEDS run-id + '(delete-tests-in-state ;; "DELETE FROM tests WHERE state=?;") ;; DONE + "UPDATE tests SET state='DELETED' WHERE state=?") + '(tests:test-set-toplog "UPDATE tests SET final_logf=? WHERE run_id=? AND testname=? AND item_path='';") + '(update-cpuload-diskfree "UPDATE tests SET cpuload=?,diskfree=? WHERE id=?;") ;; DONE + '(update-uname-host "UPDATE tests SET uname=?,host=? WHERE id=?;") ;; DONE + '(update-test-rundat "INSERT INTO test_rundat (test_id,update_time,cpuload,diskfree,diskusage,run_duration) VALUES (?,?,?,?,?,?);") + '(update-test-state "UPDATE tests SET state=? WHERE state=? AND run_id=? AND testname=? AND NOT (item_path='' AND testname IN (SELECT DISTINCT testname FROM tests WHERE testname=? AND item_path != ''));") + '(update-test-status "UPDATE tests SET status=? WHERE status like ? AND run_id=? AND testname=? AND NOT (item_path='' AND testname IN (SELECT DISTINCT testname FROM tests WHERE testname=? AND item_path != ''));") + ;; stuff for set-state-status-and-roll-up-items + '(update-pass-fail-counts "UPDATE tests + SET fail_count=(SELECT count(id) FROM tests WHERE testname=? AND item_path != '' AND status IN ('FAIL','CHECK','INCOMPLETE','ABORT')), + pass_count=(SELECT count(id) FROM tests WHERE testname=? AND item_path != '' AND status IN ('PASS','WARN','WAIVED')) + WHERE testname=? AND item_path='' AND run_id=?;") ;; DONE ;; BROKEN!!! NEEDS run-id + '(top-test-set-running "UPDATE tests SET state='RUNNING' WHERE testname=? AND item_path='' AND run_id=?;") ;; DONE ;; BROKEN!!! NEEDS run-id + + ;; NOT USED + ;; + ;; Might be the following top-test-set-per-pf-counts query could be better based off of something like this: + ;; + ;; select state,status,count(state) from tests where run_id=59 AND testname='runfirst' group by state,status; + ;; + '(top-test-set-per-pf-counts "UPDATE tests + SET state=CASE + WHEN (SELECT count(id) FROM tests + WHERE testname=? + AND item_path != '' + AND status NOT IN ('n/a') + AND state in ('NOT_STARTED')) > 0 THEN 'UNKNOWN' + WHEN (SELECT count(id) FROM tests + WHERE testname=? + AND item_path != '' + AND (status NOT IN ('TEN_STRIKES','BLOCKED') OR status IN ('INCOMPLETE')) + AND state in ('RUNNING','NOT_STARTED','LAUNCHED','REMOTEHOSTSTART')) > 0 THEN 'RUNNING' + WHEN (SELECT count(id) FROM tests + WHERE testname=? + AND item_path != '' + AND state NOT IN ('COMPLETED','DELETED')) = 0 THEN 'COMPLETED' + WHEN (SELECT count(id) FROM tests + WHERE testname=? + AND item_path != '' + AND state = 'NOT_STARTED') > 0 THEN 'NOT_STARTED' + ELSE 'UNKNOWN' END, + status=CASE + WHEN fail_count > 0 THEN 'FAIL' + WHEN (SELECT count(id) FROM tests + WHERE testname=? + AND item_path != '' + AND state IN ('BLOCKED','INCOMPLETE')) > 0 THEN 'FAIL' + WHEN (SELECT count(id) FROM tests + WHERE testname=? + AND item_path != '' + AND state NOT IN ('DELETED') + AND status IN ('INCOMPLETE','ABORT')) > 0 THEN 'ABORT' + WHEN (SELECT count(id) FROM tests + WHERE testname=? + AND item_path != '' + AND state NOT IN ('DELETED') + AND status = 'AUTO') > 0 THEN 'AUTO' + WHEN (SELECT count(id) FROM tests + WHERE testname=? + AND item_path != '' + AND state NOT IN ('DELETED') + AND status IN ('STUCK/INCOMPLETE', 'INCOMPLETE')) > 0 THEN 'INCOMPLETE' + WHEN (SELECT count(id) FROM tests + WHERE testname=? + AND item_path != '' + AND state IN ('COMPLETED','STUCK/INCOMPLETE','INCOMPLETE') + AND status = 'FAIL') > 0 THEN 'FAIL' + WHEN (SELECT count(id) FROM tests + WHERE testname=? + AND item_path != '' + AND state NOT IN ('DELETED') + AND status = 'CHECK') > 0 THEN 'CHECK' + WHEN (SELECT count(id) FROM tests + WHERE testname=? + AND item_path != '' + AND state NOT IN ('DELETED') + AND status = 'SKIP') > 0 THEN 'SKIP' + WHEN (SELECT count(id) FROM tests + WHERE testname=? + AND item_path != '' + AND state NOT IN ('DELETED') + AND status = 'WARN') > 0 THEN 'WARN' + WHEN (SELECT count(id) FROM tests + WHERE testname=? + AND item_path != '' + AND state NOT IN ('DELETED') + AND status = 'WAIVED') > 0 THEN 'WAIVED' + WHEN (SELECT count(id) FROM tests + WHERE testname=? + AND item_path != '' + AND state NOT IN ('DELETED') + AND status NOT IN ('PASS','FAIL','WARN','WAIVED')) > 0 THEN 'ABORT' + WHEN (SELECT count(id) FROM tests + WHERE testname=? + AND item_path != '' + AND state='NOT_STARTED') > 0 THEN 'n/a' + WHEN (SELECT count(id) FROM tests + WHERE testname=? + AND item_path != '' + AND state = 'COMPLETED' + AND status = 'PASS') > 0 THEN 'PASS' + WHEN pass_count > 0 AND fail_count=0 THEN 'PASS' + ELSE 'UNKNOWN' END + WHERE testname=? AND item_path='';") ;; DONE ;; BROKEN!!! NEEDS run-id + + ;; STEPS + '(delete-test-step-records "UPDATE test_steps SET status='DELETED' WHERE test_id=?;") + '(delete-test-data-records "UPDATE test_data SET status='DELETED' WHERE test_id=?;") ;; using status since no state field + )) + +(define (db:lookup-query qry-name) + (let ((q (alist-ref qry-name db:queries))) + (if q (car q) #f))) + +;; do not run these as part of the transaction +(define db:special-queries '(rollup-tests-pass-fail + ;; db:set-state-status-and-roll-up-items ;; WHY NOT!? + login + immediate + flush + sync + set-verbosity + killserver + )) + +(define (db:login dbstruct calling-path calling-version client-signature) + (cond + ((not (equal? calling-path *toppath*)) + (list #f "Login failed due to mismatch paths: " calling-path ", " *toppath*)) + ;; ((not (equal? *run-id* run-id)) + ;; (list #f "Login failed due to mismatch run-id: " run-id ", " *run-id*)) + ((not (equal? megatest-version calling-version)) + (list #t (conc "Login warning due to mismatch megatest version: " calling-version ", " megatest-version))) + (else + (hash-table-set! *logged-in-clients* client-signature (current-seconds)) + '(#t "successful login")))) + +(define (db:general-call dbstruct stmtname params) + (let ((query (let ((q (alist-ref (if (string? stmtname) + (string->symbol stmtname) + stmtname) + db:queries))) + (if q (car q) #f)))) + (db:with-db + dbstruct #f #f + (lambda (db) + (apply sqlite3:execute db query params) + #t)))) + +;; get a summary of state and status counts to calculate a rollup +;; +(define (db:get-state-status-summary dbstruct run-id testname) + (let ((res '())) + (db:with-db + dbstruct #f #f + (lambda (db) + (sqlite3:for-each-row + (lambda (state status count) + (set! res (cons (vector state status count) res))) + db + "SELECT state,status,count(state) FROM tests WHERE run_id=? AND testname=? AND item_path='' GROUP BY state,status;" + run-id testname) + res)))) + +(define (db:get-latest-host-load dbstruct raw-hostname) + (let* ((hostname (string-substitute "\\..*$" "" raw-hostname)) + (res (cons -1 0))) + (db:with-db + dbstruct #f #f + (lambda (db) + (sqlite3:for-each-row + (lambda (cpuload update-time) (set! res (cons cpuload update-time))) + db + "SELECT tr.cpuload, tr.update_time FROM test_rundat tr, tests t WHERE t.host=? AND tr.cpuload != -1 AND tr.test_id=t.id ORDER BY tr.update_time DESC LIMIT 1;" + hostname))) res )) + +(define (db:set-top-level-from-items dbstruct run-id testname) + (let* ((summ (db:get-state-status-summary dbstruct run-id testname)) + (find (lambda (state status) + (if (null? summ) + #f + (let loop ((hed (car summ)) + (tal (cdr summ))) + (if (and (string-match state (vector-ref hed 0)) + (string-match status (vector-ref hed 1))) + hed + (if (null? tal) + #f + (loop (car tal)(cdr tal))))))))) + + + ;;; E D I T M E ! ! + + + (cond + ((> (find "COMPLETED" ".*") 0) #f)))) + + + +;; get the previous records for when these tests were run where all keys match but runname +;; NB// Merge this with test:get-previous-test-run-records? This one looks for all matching tests +;; can use wildcards. Also can likely be factored in with get test paths? +;; +;; Run this remotely!! +;; +(define (db:get-matching-previous-test-run-records dbstruct run-id test-name item-path) + (let* ((keys (db:get-keys dbstruct)) + (selstr (string-intersperse keys ",")) + (qrystr (string-intersperse (map (lambda (x)(conc x "=?")) keys) " AND ")) + (keyvals #f) + (tests-hash (make-hash-table))) + ;; first look up the key values from the run selected by run-id + (db:with-db + dbstruct #f #f + (lambda (db) + (sqlite3:for-each-row + (lambda (a . b) + (set! keyvals (cons a b))) + db + (conc "SELECT " selstr " FROM runs WHERE id=? ORDER BY event_time DESC;") run-id))) + (if (not keyvals) + '() + (let ((prev-run-ids '())) + (db:with-db + dbstruct #f #f + (lambda (db) + (apply sqlite3:for-each-row + (lambda (id) + (set! prev-run-ids (cons id prev-run-ids))) + db + (conc "SELECT id FROM runs WHERE " qrystr " AND id != ?;") (append keyvals (list run-id))))) + ;; collect all matching tests for the runs then + ;; extract the most recent test and return that. + (debug:print 4 *default-log-port* "selstr: " selstr ", qrystr: " qrystr ", keyvals: " keyvals + ", previous run ids found: " prev-run-ids) + (if (null? prev-run-ids) '() ;; no previous runs? return null + (let loop ((hed (car prev-run-ids)) + (tal (cdr prev-run-ids))) + (let ((results (db:get-tests-for-run dbstruct hed (conc test-name "/" item-path) '() '() #f #f #f #f #f #f #f 'normal))) + (debug:print 4 *default-log-port* "Got tests for run-id " run-id ", test-name " test-name + ", item-path " item-path " results: " (intersperse results "\n")) + ;; Keep only the youngest of any test/item combination + (for-each + (lambda (testdat) + (let* ((full-testname (conc (db:test-get-testname testdat) "/" (db:test-get-item-path testdat))) + (stored-test (hash-table-ref/default tests-hash full-testname #f))) + (if (or (not stored-test) + (and stored-test + (> (db:test-get-event_time testdat)(db:test-get-event_time stored-test)))) + ;; this test is younger, store it in the hash + (hash-table-set! tests-hash full-testname testdat)))) + results) + (if (null? tal) + (map cdr (hash-table->alist tests-hash)) ;; return a list of the most recent tests + (loop (car tal)(cdr tal)))))))))) + +;; Function recursively checks if .journal exists; if yes means db busy; call itself after delayed interval +;; return the sqlite3 db handle if possible +;; +(define (db:delay-if-busy dbdat #!key (count 6)) + (if (not (configf:lookup *configdat* "server" "delay-on-busy")) + (and dbdat (db:dbdat-get-db dbdat)) + (if dbdat + (let* ((dbpath (db:dbdat-get-path dbdat)) + (db (db:dbdat-get-db dbdat)) ;; we'll return this so (db:delay--if-busy can be called inline + (dbfj (conc dbpath "-journal"))) + (if (handle-exceptions + exn + (begin + (debug:print-info 0 *default-log-port* "WARNING: failed to test for existance of " dbfj) + (thread-sleep! 1) + (db:delay-if-busy count (- count 1))) + (common:file-exists? dbfj)) + (case count + ((6) + (thread-sleep! 0.2) + (db:delay-if-busy count: 5)) + ((5) + (thread-sleep! 0.4) + (db:delay-if-busy count: 4)) + ((4) + (thread-sleep! 0.8) + (db:delay-if-busy count: 3)) + ((3) + (thread-sleep! 1.6) + (db:delay-if-busy count: 2)) + ((2) + (thread-sleep! 3.2) + (db:delay-if-busy count: 1)) + ((1) + (thread-sleep! 6.4) + (db:delay-if-busy count: 0)) + (else + (debug:print-info 0 *default-log-port* "delaying db access due to high database load.") + (thread-sleep! 12.8)))) + db) + "bogus result from db:delay-if-busy"))) + +(define (db:test-get-records-for-index-file dbstruct run-id test-name) + (let ((res '())) + (db:with-db + dbstruct + run-id + #f + (lambda (db) + (sqlite3:for-each-row + (lambda (id itempath state status run_duration logf comment) + (set! res (cons (vector id itempath state status run_duration logf comment) res))) + db + "SELECT id,item_path,state,status,run_duration,final_logf,comment FROM tests WHERE testname=? AND item_path != '' AND run_id=?;" ;; BUG! WHY NO run_id? + test-name + run-id) + res)))) + +;;====================================================================== +;; Tests meta data +;;====================================================================== + +;; returns a hash table of tags to tests +;; +(define (db:get-tests-tags dbstruct) + (db:with-db + dbstruct #f #f + (lambda (db) + (let* ((res (make-hash-table))) + (sqlite3:for-each-row + (lambda (testname tags-in) + (let ((tags (string-split tags-in ","))) + (for-each + (lambda (tag) + (hash-table-set! res tag + (delete-duplicates + (cons testname (hash-table-ref/default res tag '()))))) + tags))) + db + "SELECT testname,tags FROM test_meta") + (hash-table->alist res))))) + +;; read the record given a testname +(define (db:testmeta-get-record dbstruct testname) + (let ((res #f)) + (db:with-db + dbstruct + #f + #f + (lambda (db) + (sqlite3:for-each-row + (lambda (id testname author owner description reviewed iterated avg_runtime avg_disk tags jobgroup) + (set! res (vector id testname author owner description reviewed iterated avg_runtime avg_disk tags jobgroup))) + db + "SELECT id,testname,author,owner,description,reviewed,iterated,avg_runtime,avg_disk,tags,jobgroup FROM test_meta WHERE testname=?;" + testname) + res)))) + +;; create a new record for a given testname +(define (db:testmeta-add-record dbstruct testname) + (db:with-db dbstruct #f #f + (lambda (db) + (sqlite3:execute + db + "INSERT OR IGNORE INTO test_meta (testname,author,owner,description,reviewed,iterated,avg_runtime,avg_disk,tags) VALUES (?,'','','','','','','','');" testname)))) + +;; update one of the testmeta fields +(define (db:testmeta-update-field dbstruct testname field value) + (db:with-db dbstruct #f #f + (lambda (db) + (sqlite3:execute + db + (conc "UPDATE test_meta SET " field "=? WHERE testname=?;") value testname)))) + +(define (db:testmeta-get-all dbstruct) + (db:with-db dbstruct #f #f + (lambda (db) + (let ((res '())) + (sqlite3:for-each-row + (lambda (a . b) + (set! res (cons (apply vector a b) res))) + db + "SELECT id,testname,author,owner,description,reviewed,iterated,avg_runtime,avg_disk,tags,jobgroup FROM test_meta;") + res)))) + +;;====================================================================== +;; M I S C M A N A G E M E N T I T E M S +;;====================================================================== + + + +;; the new prereqs calculation, looks also at itempath if specified +;; all prereqs must be met +;; if prereq test with itempath='' is COMPLETED and PASS, WARN, CHECK, or WAIVED then prereq is met +;; if prereq test with itempath=ref-item-path and COMPLETED with PASS, WARN, CHECK, or WAIVED then prereq is met +;; +;; Note: mode 'normal means that tests must be COMPLETED and ok (i.e. PASS, WARN, CHECK, SKIP or WAIVED) +;; mode 'toplevel means that tests must be COMPLETED only +;; mode 'itemmatch or 'itemwait means that tests items must be COMPLETED and (PASS|WARN|WAIVED|CHECK) [[ NB// NOT IMPLEMENTED YET ]] +;; mode 'exclusive means this test/item cannot run if the same test/item is LAUNCHED,REMOTEHOSTSTART or RUNNING +;; +;; IDEA for consideration: +;; 1. collect all tests "upstream" +;; 2. any NOT completed and good? if yes => return those as prereqs not met, if no => return null list +;; +;; (define (db:get-prereqs-not-met dbstruct run-id waitons ref-item-path mode) +(define (db:get-prereqs-not-met dbstruct run-id waitons ref-test-name ref-item-path mode itemmaps) ;; #!key (mode '(normal))(itemmap #f)) + ;; BBnote - rollup of an itemized test's overall state/status done in db:set-state-status-and-roll-up-items + (append + (if (member 'exclusive mode) + (let ((running-tests (db:get-tests-for-run dbstruct + #f ;; run-id of #f means for all runs. + (if (string=? ref-item-path "") ;; testpatt + ref-test-name + (conc ref-test-name "/" ref-item-path)) + '("LAUNCHED" "REMOTEHOSTSTART" "RUNNING") ;; states + '() ;; statuses + #f ;; offset + #f ;; limit + #f ;; not-in + #f ;; sort by + #f ;; sort order + 'shortlist ;; query type + 0 ;; last update, beginning of time .... + #f ;; mode + ))) + ;;(map (lambda (testdat) + ;; (if (equal? (db:test-get-item-path testdat) "") + ;; (db:test-get-testname testdat) + ;; (conc (db:test-get-testname testdat) + ;; "/" + ;; (db:test-get-item-path testdat)))) + running-tests) ;; calling functions want the entire data + '()) + + ;; collection of: for each waiton - + ;; if this ref-test-name is an item in an itemized test and mode is itemwait/itemmatch: + ;; if waiton is not itemized - if waiton is not both completed and in ok status, add as unmet prerequisite + ;; if waiton is itemized: + ;; and waiton's items are not expanded, add as unmet prerequisite + ;; else if matching waiton item is not both completed and in an ok status, add as unmet prerequisite + ;; else + ;; if waiton toplevel is not in both completed and ok status, add as unmet prerequisite + + (if (or (not waitons) + (null? waitons)) + '() + (let* ((ref-test-itemized-mode (not (null? (lset-intersection eq? mode '(itemmatch itemwait))))) ;; how is this different from using member? + (ref-test-toplevel-mode (not (null? (lset-intersection eq? mode '(toplevel))))) + (ref-test-is-toplevel (equal? ref-item-path "")) + (ref-test-is-item (not ref-test-is-toplevel)) + (unmet-pre-reqs '()) + (result '()) + (unmet-prereq-items '()) + ) + (for-each ; waitons + (lambda (waitontest-name) + ;; by getting the tests with matching name we are looking only at the matching test + ;; and related sub items + ;; next should be using mt:get-tests-for-run? + + (let (;(waiton-is-itemized ...) + ;(waiton-items-are-expanded ...) + (waiton-tests (db:get-tests-for-run-state-status dbstruct run-id waitontest-name)) + (ever-seen #f) + (parent-waiton-met #f) + (item-waiton-met #f) + + ) + (for-each ; test expanded from waiton + (lambda (waiton-test) + (let* ((waiton-state (db:test-get-state waiton-test)) + (waiton-status (db:test-get-status waiton-test)) + (waiton-item-path (db:test-get-item-path waiton-test)) ;; BB- this is the upstream itempath + (waiton-test-name (db:test-get-testname waiton-test)) + (waiton-is-toplevel (equal? waiton-item-path "")) + (waiton-is-item (not waiton-is-toplevel)) + (waiton-is-completed (member waiton-state *common:ended-states*)) + (waiton-is-running (member waiton-state *common:running-states*)) + (waiton-is-killed (member waiton-state *common:badly-ended-states*)) + (waiton-is-ok (member waiton-status *common:well-ended-states*)) + ;; testname-b path-a path-b + (same-itempath (db:compare-itempaths ref-test-name waiton-item-path ref-item-path itemmaps)) ;; (equal? ref-item-path waiton-item-path))) + (real-ref-test-name (car (string-split ref-test-name "/"))) ;; I THINK ref-test-name SHOULD NEVER HAVE THE ITEM_PATH! + (test-and-ref-are-same (equal? real-ref-test-name waiton-test-name))) + (debug:print 4 *default-log-port* "waiton-test-name " waiton-test-name " ref-test-name: " ref-test-name " test-and-ref-are-same: " test-and-ref-are-same) + (set! ever-seen #t) + ;;(BB> "***consider waiton "waiton-test"/"waiton-item-path"***") + (cond + ;; case 0 - toplevel of an itemized test, at least one item in prereq has completed + ((and waiton-is-item ref-test-is-toplevel ref-test-itemized-mode waiton-is-completed) + (set! parent-waiton-met #t)) + + ;; case 1, non-item (parent test) is + ((and waiton-is-toplevel ;; this is the parent test of the waiton being examined + waiton-is-completed + ;;(BB> "cond1") + (or waiton-is-ok ref-test-toplevel-mode)) ;; itemmatch itemwait)))))) + (set! parent-waiton-met #t)) + ;; Special case for toplevel and KILLED + ((and waiton-is-toplevel ;; this is the parent test + waiton-is-killed + (member 'toplevel mode)) + ;;(BB> "cond2") + (set! parent-waiton-met #t)) + ;; For itemwait mode IFF the previous matching item is good the set parent-waiton-met + ((and ref-test-itemized-mode ref-test-is-item same-itempath) + ;;(BB> "cond3") + (if (and waiton-is-completed (or waiton-is-ok ref-test-toplevel-mode)) + (set! item-waiton-met #t) + (set! unmet-prereq-items (cons waiton-test unmet-prereq-items))) + (if (and waiton-is-toplevel ;; if upstream rollup test is completed, parent-waiton-met is set + (or waiton-is-completed waiton-is-running)) + (set! parent-waiton-met #t))) + ;; normal checking of parent items, any parent or parent item not ok blocks running + ((and waiton-is-completed + (or waiton-is-ok + (member 'toplevel mode)) ;; toplevel does not block on FAIL + (and waiton-is-ok (member 'itemmatch mode) ;; itemmatch blocks on not ok ;; TODO: THIS IS PROBABLY A BUG. ITEMMATCH AND ITEMWAIT ARE SYNONYMS!! WHAT HAPPENED OT ITEMWAIT??? + )) + ;;(BB> "cond4") + (set! item-waiton-met #t)) + ((and waiton-is-completed waiton-is-ok same-itempath) + ;;(BB> "cond5") + (set! item-waiton-met #t)) + ((and waiton-is-completed waiton-is-ok test-and-ref-are-same) ;; probably from [waitons] table + (set! item-waiton-met #t)) + (else + #t + ;;(BB> "condelse") + )))) + waiton-tests) + ;; both requirements, parent and item-waiton must be met to NOT add item to + ;; prereq's not met list + ;; (BB> + ;; "\n* waiton-tests "waiton-tests + ;; "\n* parent-waiton-met "parent-waiton-met + ;; "\n* item-waiton-met "item-waiton-met + ;; "\n* ever-seen "ever-seen + ;; "\n* ref-test-itemized-mode "ref-test-itemized-mode + ;; "\n* unmet-prereq-items "unmet-prereq-items + ;; "\n* result (pre) "result + ;; "\n* ever-seen "ever-seen + ;; "\n") + + (cond + ((and ref-test-itemized-mode ref-test-is-item (not (null? unmet-prereq-items))) + (set! result (append unmet-prereq-items result))) + ((not (or parent-waiton-met item-waiton-met)) + (set! result (append (if (null? waiton-tests) (list waitontest-name) waiton-tests) result))) ;; appends the string if the full record is not available + ;; if the test is not found then clearly the waiton is not met... + ;; (if (not ever-seen)(set! result (cons waitontest-name result))))) + ((not ever-seen) + (set! result (append (if (null? waiton-tests)(list waitontest-name) waiton-tests) result)))))) + waitons) + (delete-duplicates result))))) + +;;====================================================================== +;; To sync individual run +;;====================================================================== +(define (db:get-run-record-ids dbstruct target run keynames test-patt) +(let ((backcons (lambda (lst item)(cons item lst)))) + (db:with-db + dbstruct #f #f + (lambda (db) + (let* ((keystr (string-intersperse + (map (lambda (key val) + (conc key " like '" val "'")) + keynames + (string-split target "/")) + " AND ")) + (run-qry (conc "SELECT id FROM runs WHERE " keystr " and runname='" run"'")) + (test-qry (conc "SELECT id FROM tests WHERE run_id in (" run-qry ") and testname like '" test-patt "'"))) + (print run-qry) + (print test-qry) + `((runs . ,(sqlite3:fold-row backcons '() db run-qry)) + (tests . ,(sqlite3:fold-row backcons '() db test-qry)) + (test_steps . ,(sqlite3:fold-row backcons '() db (conc "SELECT id FROM test_steps WHERE test_id in (" test-qry ")"))) + (test_data . ,(sqlite3:fold-row backcons '() db (conc "SELECT id FROM test_data WHERE test_id in (" test-qry ")" ))) + )))))) + +;;====================================================================== +;; Just for sync, procedures to make sync easy +;;====================================================================== + +;; get an alist of record ids changed since time since-time +;; '((runs . (1 2 3 ...))(steps . (5 6 7 ...) ...)) +;; +(define (db:get-changed-record-ids dbstruct since-time) + ;; no transaction, allow the db to be accessed between the big queries + (let ((backcons (lambda (lst item)(cons item lst)))) + (db:with-db + dbstruct #f #f + (lambda (db) + `((runs . ,(sqlite3:fold-row backcons '() db "SELECT id FROM runs WHERE last_update>=?" since-time)) + (tests . ,(sqlite3:fold-row backcons '() db "SELECT id FROM tests WHERE last_update>=?" since-time)) + (test_steps . ,(sqlite3:fold-row backcons '() db "SELECT id FROM test_steps WHERE last_update>=?" since-time)) + (test_data . ,(sqlite3:fold-row backcons '() db "SELECT id FROM test_data WHERE last_update>=?" since-time)) + ;; (test_meta . ,(fold-row backcons '() db "SELECT id FROM test_meta WHERE last_update>?" since-time)) + (run_stats . ,(sqlite3:fold-row backcons '() db "SELECT id FROM run_stats WHERE last_update>=?" since-time)) + ))))) + +;;====================================================================== +;; Extract ods file from the db +;;====================================================================== + +;; NOT REWRITTEN YET!!!!! + +;; runspatt is a comma delimited list of run patterns +;; keypatt-alist must contain *all* keys with an associated pattern: '( ("KEY1" "%") .. ) +(define (db:extract-ods-file dbstruct outputfile keypatt-alist runspatt pathmod) + (let* ((keysstr (string-intersperse (map car keypatt-alist) ",")) + (keyqry (string-intersperse (map (lambda (p)(conc (car p) " LIKE ? ")) keypatt-alist) " AND ")) + (numkeys (length keypatt-alist)) + (test-ids '()) + (dbdat (db:get-db dbstruct)) + (db (db:dbdat-get-db dbdat)) + (windows (and pathmod (substring-index "\\" pathmod))) + (tempdir (conc "/tmp/" (current-user-name) "/" runspatt "_" (random 10000) "_" (current-process-id))) + (runsheader (append (list "Run Id" "Runname") ; 0 1 + (map car keypatt-alist) ; + N = length keypatt-alist + (list "Testname" ; 2 + "Item Path" ; 3 + "Description" ; 4 + "State" ; 5 + "Status" ; 6 + "Final Log" ; 7 + "Run Duration" ; 8 + "When Run" ; 9 + "Tags" ; 10 + "Run Owner" ; 11 + "Comment" ; 12 + "Author" ; 13 + "Test Owner" ; 14 + "Reviewed" ; 15 + "Diskfree" ; 16 + "Uname" ; 17 + "Rundir" ; 18 + "Host" ; 19 + "Cpu Load" ; 20 + ))) + (results (list runsheader)) + (testdata-header (list "Run Id" "Testname" "Item Path" "Category" "Variable" "Value" "Expected" "Tol" "Units" "Status" "Comment")) + (mainqry (conc "SELECT + t.testname,r.id,runname," keysstr ",t.testname, + t.item_path,tm.description,t.state,t.status, + final_logf,run_duration, + strftime('%m/%d/%Y %H:%M:%S',datetime(t.event_time,'unixepoch'),'localtime'), + tm.tags,r.owner,t.comment, + author, + tm.owner,reviewed, + diskfree,uname,rundir, + host,cpuload + FROM tests AS t JOIN runs AS r ON t.run_id=r.id JOIN test_meta AS tm ON tm.testname=t.testname + WHERE runname LIKE ? AND " keyqry ";"))) + (debug:print 2 *default-log-port* "Using " tempdir " for constructing the ods file. keyqry: " keyqry " keystr: " keysstr " with keys: " (map cadr keypatt-alist) + "\n mainqry: " mainqry) + ;; "Expected Value" + ;; "Value Found" + ;; "Tolerance" + (apply sqlite3:for-each-row + (lambda (test-id . b) + (set! test-ids (cons test-id test-ids)) ;; test-id is now testname + (set! results (append results ;; note, drop the test-id + (list + (if pathmod + (let* ((vb (apply vector b)) + (keyvals (let loop ((i 0) + (res '())) + (if (>= i numkeys) + res + (loop (+ i 1) + (append res (list (vector-ref vb (+ i 2)))))))) + (runname (vector-ref vb 1)) + (testname (vector-ref vb (+ 2 numkeys))) + (item-path (vector-ref vb (+ 3 numkeys))) + (final-log (vector-ref vb (+ 7 numkeys))) + (run-dir (vector-ref vb (+ 18 numkeys))) + (log-fpath (conc run-dir "/" final-log))) ;; (string-intersperse keyvals "/") "/" testname "/" item-path "/" + (debug:print 4 *default-log-port* "log: " log-fpath " exists: " (common:file-exists? log-fpath)) + (vector-set! vb (+ 7 numkeys) (if (common:file-exists? log-fpath) + (let ((newpath (conc pathmod "/" + (string-intersperse keyvals "/") + "/" runname "/" testname "/" + (if (string=? item-path "") "" (conc "/" item-path)) + final-log))) + ;; for now throw away newpath and use the log-fpath conc'd with pathmod + (set! newpath (conc pathmod log-fpath)) + (if windows (string-translate newpath "/" "\\") newpath)) + (if (debug:debug-mode 1) + (conc final-log " not-found") + ""))) + (vector->list vb)) + b))))) + db + mainqry + runspatt (map cadr keypatt-alist)) + (debug:print 2 *default-log-port* "Found " (length test-ids) " records") + (set! results (list (cons "Runs" results))) + ;; now, for each test, collect the test_data info and add a new sheet + (for-each + (lambda (test-id) + (let ((test-data (list testdata-header)) + (curr-test-name #f)) + (sqlite3:for-each-row + (lambda (run-id testname item-path category variable value expected tol units status comment) + (set! curr-test-name testname) + (set! test-data (append test-data (list (list run-id testname item-path category variable value expected tol units status comment))))) + db + ;; "SELECT run_id,testname,item_path,category,variable,td.value AS value,expected,tol,units,td.status AS status,td.comment AS comment FROM test_data AS td INNER JOIN tests ON tests.id=td.test_id WHERE test_id=?;" + "SELECT run_id,testname,item_path,category,variable,td.value AS value,td.expected,td.tol,td.units,td.status AS status,td.comment AS comment FROM test_data AS td INNER JOIN tests ON tests.id=td.test_id WHERE testname=?;" + test-id) + (if curr-test-name + (set! results (append results (list (cons curr-test-name test-data))))) + )) + (sort (delete-duplicates test-ids) string<=)) + (system (conc "mkdir -p " tempdir)) + ;; (pp results) + (ods:list->ods + tempdir + (if (string-match (regexp "^[/~]+.*") outputfile) ;; full path? + outputfile + (begin + (debug:print 0 *default-log-port* "WARNING: path given, " outputfile " is relative, prefixing with current directory") + (conc (current-directory) "/" outputfile))) + results) + ;; brutal clean up + (stack-push! (dbr:dbstruct-dbstack dbstruct) dbdat) + (system "rm -rf tempdir"))) + +;; (db:extract-ods-file db "outputfile.ods" '(("sysname" "%")("fsname" "%")("datapath" "%")) "%") + +;;====================================================================== +;; T R I G G E R S +;;====================================================================== + +(define (mt:run-trigger cmd test-id test-rundir trigger logname test-name item-path event-time actual-state actual-status) + ;; Putting the commandline into ( )'s means no control over the shell. + ;; stdout and stderr will be caught in the NBFAKE or mt_launch.log files + ;; or equivalent. No need to do this. Just run it? + (let* ((fullcmd (conc "nbfake " + cmd " " + test-id " " + test-rundir " " + trigger " " + test-name " " + item-path " " ;; has / prepended to deal with toplevel tests + actual-state " " + actual-status " " + event-time + )) + (prev-nbfake-log (get-environment-variable "NBFAKE_LOG"))) + (setenv "NBFAKE_LOG" (conc (cond + ((and (directory-exists? test-rundir) + (file-write-access? test-rundir)) + test-rundir) + ((and (directory-exists? *toppath*) + (file-write-access? *toppath*)) + *toppath*) + (else (conc "/tmp/" (current-user-name)))) + "/" logname)) + (debug:print-info 0 *default-log-port* "TRIGGERED on " trigger ", running command " fullcmd " output at " (get-environment-variable "NBFAKE_LOG")) + ;; (call-with-environment-variables + ;; `(("NBFAKE_LOG" . ,(conc test-rundir "/" logname))) + ;; (lambda () + (process-run fullcmd) + (if prev-nbfake-log + (setenv "NBFAKE_LOG" prev-nbfake-log) + (unsetenv "NBFAKE_LOG")) + )) ;; )) + +(define (mt:process-triggers dbstruct run-id test-id newstate newstatus) + (if test-id + (let* ((test-dat (db:get-test-info-by-id dbstruct run-id test-id))) + (if test-dat + (let* ((test-rundir (db:test-get-rundir test-dat)) ;; ) ;; ) + (test-name (db:test-get-testname test-dat)) + (item-path (db:test-get-item-path test-dat)) + (duration (db:test-get-run_duration test-dat)) + (comment (db:test-get-comment test-dat)) + (event-time (db:test-get-event_time test-dat)) + (tconfig #f) + (state (if newstate newstate (db:test-get-state test-dat))) + (status (if newstatus newstatus (db:test-get-status test-dat)))) + ;; (mutex-lock! *triggers-mutex*) + (handle-exceptions + exn + (begin + (debug:print-error 0 *default-log-port* " Exception in mt:process-triggers for run-id="run-id" test-id="test-id" newstate="newstate" newstatus="newstatus + "\n error: " ((condition-property-accessor 'exn 'message) exn) + "\n test-rundir="test-rundir + "\n test-name="test-name + "\n item-path="item-path + "\n state="state + "\n status="status + "\n") + (print-call-chain (current-error-port)) + #f) + (if (and test-name + test-rundir) ;; #f means no dir set yet + ;; (common:file-exists? test-rundir) + ;; (directory? test-rundir)) + (call-with-environment-variables + (list (cons "MT_TEST_NAME" (or test-name "no such test")) + (cons "MT_TEST_RUN_DIR" (or test-rundir "no test directory yet")) + (cons "MT_ITEMPATH" (or item-path ""))) + (lambda () + (if (directory-exists? test-rundir) + (push-directory test-rundir) + (push-directory *toppath*)) + (set! tconfig (mt:lazy-read-test-config test-name)) + (for-each (lambda (trigger) + (let* ((munged-trigger (string-translate trigger "/ " "--")) + (logname (conc "last-trigger-" munged-trigger ".log"))) + ;; first any triggers from the testconfig + (let ((cmd (configf:lookup tconfig "triggers" trigger))) + (if cmd (mt:run-trigger cmd test-id test-rundir trigger (conc "tconfig-" logname) test-name item-path event-time state status))) + ;; next any triggers from megatest.config + (let ((cmd (configf:lookup *configdat* "triggers" trigger))) + (if cmd (mt:run-trigger cmd test-id test-rundir trigger (conc "mtconfig-" logname) test-name item-path event-time state status))))) + (list + (conc state "/" status) + (conc state "/") + (conc "/" status))) + (pop-directory)) + ))) + ;; (mutex-unlock! *triggers-mutex*) + ))))) +;;====================================================================== +;; TASKS STUFF +;;====================================================================== + + +#;(define (tasks:process-queue dbstruct) + (let* ((task (tasks:snag-a-task dbstruct)) + (action (if task (tasks:task-get-action task) #f))) + (if action (print "tasks:process-queue task: " task)) + (if action + (case (string->symbol action) + ((run) (tasks:start-run dbstruct task)) + ((remove) (tasks:remove-runs dbstruct task)) + ((lock) (tasks:lock-runs dbstruct task)) + ;; ((monitor) (tasks:start-monitor db task)) + #;((rollup) (tasks:rollup-runs dbstruct task)) + ((updatemeta)(tasks:update-meta dbstruct task)) + #;((kill) (tasks:kill-monitors dbstruct task)))))) + +(define (tasks:tasks->text tasks) + (let ((fmtstr "~10a~10a~10a~12a~20a~12a~12a~10a")) + (conc (format #f fmtstr "id" "action" "owner" "state" "target" "runname" "testpatts" "params") "\n" + (string-intersperse + (map (lambda (task) + (format #f fmtstr + (tasks:task-get-id task) + (tasks:task-get-action task) + (tasks:task-get-owner task) + (tasks:task-get-state task) + (tasks:task-get-target task) + (tasks:task-get-name task) + (tasks:task-get-testpatt task) + ;; (tasks:task-get-item task) + (tasks:task-get-params task))) + tasks) "\n")))) + +(define (tasks:set-state dbstruct task-id state) + (db:with-db + dbstruct #f #t + (lambda (db) + (sqlite3:execute db "UPDATE tasks_queue SET state=? WHERE id=?;" + state + task-id)))) + +;;====================================================================== +;; Access using task key (stored in params; (hash-table->alist flags) hostname pid +;;====================================================================== + +(define (tasks:param-key->id dbstruct task-params) + (db:with-db + dbstruct #f #f + (lambda (db) + (handle-exceptions + exn + #f + (sqlite3:first-result db "SELECT id FROM tasks_queue WHERE params LIKE ?;" + task-params))))) + +(define (tasks:set-state-given-param-key dbstruct param-key new-state) + (db:with-db + dbstruct #f #t + (lambda (db) + (sqlite3:execute db "UPDATE tasks_queue SET state=? WHERE params LIKE ?;" new-state param-key)))) + +(define (tasks:get-records-given-param-key dbstruct param-key state-patt action-patt test-patt) + (db:with-db + dbstruct #f #f + (lambda (db) + (handle-exceptions + exn + '() + (sqlite3:first-row db "SELECT id,action,owner,state,target,name,testpatt,keylock,params WHERE + params LIKE ? AND state LIKE ? AND action LIKE ? AND testpatt LIKE ?;" + param-key state-patt action-patt test-patt))))) + +(define (tasks:find-task-queue-records dbstruct target run-name test-patt state-patt action-patt) + ;; (handle-exceptions + ;; exn + ;; '() + ;; (sqlite3:first-row + (let ((db (db:delay-if-busy (db:get-db dbstruct))) + (res '())) + (sqlite3:for-each-row + (lambda (a . b) + (set! res (cons (cons a b) res))) + db "SELECT id,action,owner,state,target,name,testpatt,keylock,params FROM tasks_queue + WHERE + target = ? AND name = ? AND state LIKE ? AND action LIKE ? AND testpatt LIKE ?;" + target run-name state-patt action-patt test-patt) + res)) ;; ) + +;; (define (tasks:start-run dbstruct mdb task) +;; (let ((flags (make-hash-table))) +;; (hash-table-set! flags "-rerun" "NOT_STARTED") +;; (if (not (string=? (tasks:task-get-params task) "")) +;; (hash-table-set! flags "-setvars" (tasks:task-get-params task))) +;; (print "Starting run " task) +;; ;; sillyness, just call the damn routine with the task vector and be done with it. FIXME SOMEDAY +;; (runs:run-tests db +;; (tasks:task-get-target task) +;; (tasks:task-get-name task) +;; (tasks:task-get-test task) +;; (tasks:task-get-item task) +;; (tasks:task-get-owner task) +;; flags) +;; (tasks:set-state mdb (tasks:task-get-id task) "waiting"))) +;; +;; (define (tasks:rollup-runs db mdb task) +;; (let* ((flags (make-hash-table)) +;; (keys (db:get-keys db)) +;; (keyvals (keys:target-keyval keys (tasks:task-get-target task)))) +;; ;; (hash-table-set! flags "-rerun" "NOT_STARTED") +;; (print "Starting rollup " task) +;; ;; sillyness, just call the damn routine with the task vector and be done with it. FIXME SOMEDAY +;; (runs:rollup-run db +;; keys +;; keyvals +;; (tasks:task-get-name task) +;; (tasks:task-get-owner task)) +;; (tasks:set-state mdb (tasks:task-get-id task) "waiting"))) + + +;; register a task +(define (tasks:add dbstruct action owner target runname testpatt params) + (db:with-db + dbstruct #f #t + (lambda (db) + (sqlite3:execute db "INSERT INTO tasks_queue (action,owner,state,target,name,testpatt,params,creation_time,execution_time) + VALUES (?,?,'new',?,?,?,?,strftime('%s','now'),0);" + action + owner + target + runname + testpatt + (if params params ""))))) + +(define (keys:key-vals-hash->target keys key-params) + (let ((tmp (hash-table-ref/default key-params (vector-ref (car keys) 0) ""))) + (if (> (length keys) 1) + (for-each (lambda (key) + (set! tmp (conc tmp "/" (hash-table-ref/default key-params (vector-ref key 0) "")))) + (cdr keys))) + tmp)) + +;; for use from the gui, not ported +;; +;; (define (tasks:add-from-params mdb action keys key-params var-params) +;; (let ((target (keys:key-vals-hash->target keys key-params)) +;; (owner (car (user-information (current-user-id)))) +;; (runname (hash-table-ref/default var-params "runname" #f)) +;; (testpatts (hash-table-ref/default var-params "testpatts" "%")) +;; (params (hash-table-ref/default var-params "params" ""))) +;; (tasks:add mdb action owner target runname testpatts params))) + +;; return one task from those who are 'new' OR 'waiting' AND more than 10sec old +;; +(define (tasks:snag-a-task dbstruct) + (let ((res #f) + (keytxt (conc (current-process-id) "-" (get-host-name) "-" (car (user-information (current-user-id)))))) + (db:with-db + dbstruct #f #t + (lambda (db) + ;; first randomly set a new to pid-hostname-hostname + (sqlite3:execute + db + "UPDATE tasks_queue SET keylock=? WHERE id IN + (SELECT id FROM tasks_queue + WHERE state='new' OR + (state='waiting' AND (strftime('%s','now')-execution_time) > 10) OR + state='reset' + ORDER BY RANDOM() LIMIT 1);" keytxt) + + (sqlite3:for-each-row + (lambda (id . rem) + (set! res (apply vector id rem))) + db + "SELECT id,action,owner,state,target,name,test,item,params,creation_time,execution_time FROM tasks_queue WHERE keylock=? ORDER BY execution_time ASC LIMIT 1;" keytxt) + (if res ;; yep, have work to be done + (begin + (sqlite3:execute db "UPDATE tasks_queue SET state='inprogress',execution_time=strftime('%s','now') WHERE id=?;" + (tasks:task-get-id res)) + res) + #f))))) + +(define (tasks:reset-stuck-tasks dbstruct) + (let ((res '())) + (db:with-db + dbstruct #f #t + (lambda (db) + (sqlite3:for-each-row + (lambda (id delta) + (set! res (cons id res))) + db + "SELECT id,strftime('%s','now')-execution_time AS delta FROM tasks_queue WHERE state='inprogress' AND delta>700 ORDER BY delta DESC LIMIT 2;") + (sqlite3:execute + db + (conc "UPDATE tasks_queue SET state='reset' WHERE id IN ('" (string-intersperse (map conc res) "','") "');") + ))))) + +;; return all tasks in the tasks_queue table +;; +(define (tasks:get-tasks dbstruct types states) + (let ((res '())) + (db:with-db + dbstruct #f #f + (lambda (db) + (sqlite3:for-each-row + (lambda (id . rem) + (set! res (cons (apply vector id rem) res))) + db + (conc "SELECT id,action,owner,state,target,name,test,item,params,creation_time,execution_time + FROM tasks_queue " + ;; WHERE + ;; state IN " statesstr " AND + ;; action IN " actionsstr + " ORDER BY creation_time DESC;")) + res)))) + +(define (tasks:get-last dbstruct target runname) + (let ((res #f)) + (db:with-db + dbstruct #f #f + (lambda (db) + (sqlite3:for-each-row + (lambda (id . rem) + (set! res (apply vector id rem))) + db + (conc "SELECT id,action,owner,state,target,name,testpatt,keylock,params,creation_time,execution_time + FROM tasks_queue + WHERE + target = ? AND name =? + ORDER BY creation_time DESC LIMIT 1;") + target runname) + res)))) + +;; remove tasks given by a string of numbers comma separated +(define (tasks:remove-queue-entries dbstruct task-ids) + (db:with-db + dbstruct #f #t + (lambda (db) + (sqlite3:execute db (conc "DELETE FROM tasks_queue WHERE id IN (" task-ids ");"))))) + ) Index: dcommonmod.scm ================================================================== --- dcommonmod.scm +++ dcommonmod.scm @@ -23,10 +23,15 @@ (declare (uses testsmod)) (declare (uses megamod)) (declare (uses mtargs)) (declare (uses mtconfigf)) (declare (uses gutilsmod)) +(declare (uses subrunmod)) +(declare (uses runsmod)) +(declare (uses rmtmod)) +(declare (uses dbmod)) +(declare (uses servermod)) (module dcommonmod * (import scheme chicken data-structures extras) @@ -86,10 +91,15 @@ (import (prefix mtconfigf configf:)) (import gutilsmod) (import commonmod) (import testsmod) (import megamod) +(import subrunmod) +(import runsmod) +(import rmtmod) +(import dbmod) +(import servermod) (import canvas-draw) (import canvas-draw-iup) (use (prefix iup iup:)) (import (prefix mtargs args:)) @@ -103,14 +113,14 @@ (include "run_records.scm") (include "task_records.scm") (include "test_records.scm") (include "vg_records.scm") -(include "dcommon-inc.scm") (include "dashboard-tests-inc.scm") +(include "dcommon-inc.scm") (include "vg-inc.scm") (include "tree-inc.scm") (include "dashboard-context-menu-inc.scm") (include "ezsteps-inc.scm") -;; (include "gutils-inc.scm") +;;;; (include "gutils-inc.scm") ) Index: docs/code/module-hierarchy.dot ================================================================== --- docs/code/module-hierarchy.dot +++ docs/code/module-hierarchy.dot @@ -17,70 +17,130 @@ // digraph megatest_code_hierarchy { ranksep=0.05; // rankdir=LR - node [shape=box,style=filled]; - - "common" [label="common module"]; + node [shape=box,style=filled,fontname="clear",fontsize="10"]; subgraph cluster_megatest { label="megatest"; - "process" -> "common" -> "megamod" -> "api"; - "megamod" -> "rmt"; - "common" -> "api" -> "rmt"; - "common" -> "rmt"; - } - -// subgraph cluster_code_hierarchy { -// label="Packets"; -// "SPKTS" [ label = "Sensor Packets" ]; -// "run code-hierarchy"; -// "work code-hierarchy"; -// "user request code-hierarchy"; -// } -} - -// "SENSORS" [ label = "{ Sensor Processing | { file | git | fossil | script }}" -// shape = "record"; ]; -// -// "RUNS" [ label = "{ Runs Processing | { launch | clean | re-run | archive } | { dispatcher }}"; -// shape = "record"; ]; -// -// "WORK" [ label = "{ Work Items | { start task | task competed }}"; -// shape = "record"; ]; -// -// "USERREQ" [ label = "{ User Requests (Unix and Web) | { launch | clean | re-run | archive }}"; -// shape = "record"; ]; -// -// "MTAREA1" [ label = "{ Megatest Area 1 | { parallel job\nmanagement | test\nmanagement | data\nrollup }}"; -// shape = "record"; ]; -// -// "MTAREA2" [ label = "{ Megatest Area 2 | { parallel job\nmanagement | test\nmanagement | data\nrollup }}"; -// shape = "record"; ]; -// -// "MTAREA3" [ label = "More Megatest Areas ... "; -// shape = "record"; ]; -// -// "PGDB" [ label = "postgres database"; -// shape = "cylinder"; ]; -// -// "WEBAPP" [ label = "{ Web View | { Runs | Contours | Control | Time View }}"; -// shape = "record"; ]; -// -// // "WEBCTRL" [ label = "{ Web View \n(control) }"; -// // shape = "record"; ]; -// -// "SENSORS" -> "SPKTS"; -// "RUNS" -> "run code-hierarchy"; -// "run code-hierarchy" -> "RUNS"; -// "WORK" -> "work code-hierarchy"; -// "work code-hierarchy" -> "RUNS"; -// "USERREQ" -> "user request code-hierarchy"; -// "SPKTS" -> "RUNS"; -// "user request code-hierarchy" -> "RUNS"; -// "RUNS" -> "MTAREA1" -> "PGDB"; -// "RUNS" -> "MTAREA2" -> "PGDB"; -// "RUNS" -> "MTAREA3" -> "PGDB"; -// "PGDB" -> "WEBAPP"; -// // "WEBCTRL" -> "run code-hierarchy"; - + + rmtmod [label="rmt mod"]; + // httpmod [label="http-transportmod"]; + // commonmod +// archivemod.scm +"commonmod" -> "archivemod"; +// clientmod.scm +"commonmod" -> "clientmod"; +// configfmod.scm +// subrunmod.scm +"commonmod" -> "subrunmod"; +// ezstepsmod.scm +"commonmod" -> "ezstepsmod"; +// itemsmod.scm +"commonmod" -> "itemsmod"; +// gutilsmod.scm +// testsmod.scm +"commonmod" -> "testsmod"; +"servermod" -> "testsmod"; +"itemsmod" -> "testsmod"; +"dbmod" -> "testsmod"; +// runconfigmod.scm +"commonmod" -> "runconfigmod"; +// keysmod.scm +"srfi-13" -> "keysmod"; +// launchmod.scm +"commonmod" -> "launchmod"; +// megamod.scm +"spiffy" -> "megamod"; +"stml2" -> "megamod"; +"commonmod" -> "megamod"; +"keysmod" -> "megamod"; +"pgdbmod" -> "megamod"; +"tasksmod" -> "megamod"; +"dbmod" -> "megamod"; +"apimod" -> "megamod"; +"ducttape-lib" -> "megamod"; +"itemsmod" -> "megamod"; +"pkts" -> "megamod"; +"rmtmod" -> "megamod"; +"mtmod" -> "megamod"; +"testsmod" -> "megamod"; +"servermod" -> "megamod"; +"subrunmod" -> "megamod"; +"itemsmod" -> "megamod"; +"runsmod" -> "megamod"; +// odsmod.scm +"commonmod" -> "odsmod"; +// envmod.scm +"commonmod" -> "envmod"; +// http-transportmod.scm +"commonmod" -> "http-transportmod"; +"stml2" -> "http-transportmod"; +"apimod" -> "http-transportmod"; +"dbmod" -> "http-transportmod"; +"testsmod" -> "http-transportmod"; +// processmod.scm +// mtmod.scm +"commonmod" -> "mtmod"; +"dbmod" -> "mtmod"; +"pgdbmod" -> "mtmod"; +"rmtmod" -> "mtmod"; +"servermod" -> "mtmod"; +"stml2" -> "mtmod"; +"subrunmod" -> "mtmod"; +"tasksmod" -> "mtmod"; +"testsmod" -> "mtmod"; +// pgdbmod.scm +"commonmod" -> "pgdbmod"; +// runsmod.scm +"commonmod" -> "runsmod"; +"dbmod" -> "runsmod"; +"itemsmod" -> "runsmod"; +"mtmod" -> "runsmod"; +"pgdbmod" -> "runsmod"; +"rmtmod" -> "runsmod"; +"servermod" -> "runsmod"; +"stml2" -> "runsmod"; +"subrunmod" -> "runsmod"; +"tasksmod" -> "runsmod"; +"testsmod" -> "runsmod"; +// apimod.scm +"commonmod" -> "apimod"; +"dbmod" -> "apimod"; +"servermod" -> "apimod"; +// dbmod.scm +"commonmod" -> "dbmod"; +"keysmod" -> "dbmod"; +"files" -> "dbmod"; +"tasksmod" -> "dbmod"; +"odsmod" -> "dbmod"; +// dcommonmod.scm +"gutilsmod" -> "dcommonmod"; +"commonmod" -> "dcommonmod"; +"testsmod" -> "dcommonmod"; +"megamod" -> "dcommonmod"; +"canvas-draw" -> "dcommonmod"; +"canvas-draw-iup" -> "dcommonmod"; +// tasksmod.scm +"commonmod" -> "tasksmod"; +"pgdbmod" -> "tasksmod"; +// template-mod.scm +"commonmod" -> "template-mod"; +// servermod.scm +"commonmod" -> "servermod"; +"dbmod" -> "servermod"; +"tasksmod" -> "servermod"; +// treemod.scm +// commonmod.scm +"pkts" -> "commonmod"; +// vgmod.scm +// rmtmod.scm +"commonmod" -> "rmtmod"; +"itemsmod" -> "rmtmod"; +"apimod" -> "rmtmod"; +"dbmod" -> "rmtmod"; + +} + +} + DELETED http-transport-inc.scm Index: http-transport-inc.scm ================================================================== --- http-transport-inc.scm +++ /dev/null @@ -1,680 +0,0 @@ - -;; Copyright 2006-2012, Matthew Welland. -;; -;; This file is part of Megatest. -;; -;; Megatest is free software: you can redistribute it and/or modify -;; it under the terms of the GNU General Public License as published by -;; the Free Software Foundation, either version 3 of the License, or -;; (at your option) any later version. -;; -;; Megatest is distributed in the hope that it will be useful, -;; but WITHOUT ANY WARRANTY; without even the implied warranty of -;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -;; GNU General Public License for more details. -;; -;; You should have received a copy of the GNU General Public License -;; along with Megatest. If not, see . - -(define (http-transport:make-server-url hostport) - (if (not hostport) - #f - (conc "http://" (car hostport) ":" (cadr hostport)))) - -(define *server-loop-heart-beat* (current-seconds)) - -;;====================================================================== -;; S E R V E R -;; ====================================================================== - -;; Call this to start the actual server -;; - -(define *db:process-queue-mutex* (make-mutex)) - -(define (http-transport:run hostn) - ;; Configurations for server - (tcp-buffer-size 2048) - (max-connections 2048) - (debug:print 2 *default-log-port* "Attempting to start the server ...") - (let* ((db #f) ;; (open-db)) ;; we don't want the server to be opening and closing the db unnecesarily - (hostname (get-host-name)) - (ipaddrstr (let ((ipstr (if (string=? "-" hostn) - ;; (string-intersperse (map number->string (u8vector->list (hostname->ip hostname))) ".") - (server:get-best-guess-address hostname) - #f))) - (if ipstr ipstr hostn))) ;; hostname))) - (start-port (portlogger:open-run-close portlogger:find-port)) - (link-tree-path (common:get-linktree)) - (tmp-area (common:get-db-tmp-area)) - (start-file (conc tmp-area "/.server-start"))) - (debug:print-info 0 *default-log-port* "portlogger recommended port: " start-port) - ;; set some parameters for the server - (root-path (if link-tree-path - link-tree-path - (current-directory))) ;; WARNING: SECURITY HOLE. FIX ASAP! - (handle-directory spiffy-directory-listing) - (handle-exception (lambda (exn chain) - (signal (make-composite-condition - (make-property-condition - 'server - 'message "server error"))))) - - ;; http-transport:handle-directory) ;; simple-directory-handler) - ;; Setup the web server and a /ctrl interface - ;; - (vhost-map `(((* any) . ,(lambda (continue) - ;; open the db on the first call - ;; This is were we set up the database connections - (let* (($ (request-vars source: 'both)) - (dat ($ 'dat)) - (res #f)) - (cond - ((equal? (uri-path (request-uri (current-request))) - '(/ "api")) - (send-response body: (api:process-request *dbstruct-db* $) ;; the $ is the request vars proc - headers: '((content-type text/plain))) - (mutex-lock! *heartbeat-mutex*) - (set! *db-last-access* (current-seconds)) - (mutex-unlock! *heartbeat-mutex*)) - ((equal? (uri-path (request-uri (current-request))) - '(/ "")) - (send-response body: (http-transport:main-page))) - ((equal? (uri-path (request-uri (current-request))) - '(/ "json_api")) - (send-response body: (http-transport:main-page))) - ((equal? (uri-path (request-uri (current-request))) - '(/ "runs")) - (send-response body: (http-transport:main-page))) - ((equal? (uri-path (request-uri (current-request))) - '(/ any)) - (send-response body: "hey there!\n" - headers: '((content-type text/plain)))) - ((equal? (uri-path (request-uri (current-request))) - '(/ "hey")) - (send-response body: "hey there!\n" - headers: '((content-type text/plain)))) - ((equal? (uri-path (request-uri (current-request))) - '(/ "jquery3.1.0.js")) - (send-response body: (http-transport:show-jquery) - headers: '((content-type application/javascript)))) - ((equal? (uri-path (request-uri (current-request))) - '(/ "test_log")) - (send-response body: (http-transport:html-test-log $) - headers: '((content-type text/HTML)))) - ((equal? (uri-path (request-uri (current-request))) - '(/ "dashboard")) - (send-response body: (http-transport:html-dboard $) - headers: '((content-type text/HTML)))) - (else (continue)))))))) - (with-output-to-file start-file (lambda ()(print (current-process-id)))) - (http-transport:try-start-server ipaddrstr start-port))) - -;; This is recursively run by http-transport:run until sucessful -;; -(define (http-transport:try-start-server ipaddrstr portnum) - (let ((config-hostname (configf:lookup *configdat* "server" "hostname")) - (config-use-proxy (equal? (configf:lookup *configdat* "client" "use-http_proxy") "yes"))) - (if (not config-use-proxy) - (determine-proxy (constantly #f))) - (debug:print-info 0 *default-log-port* "http-transport:try-start-server time=" (seconds->time-string (current-seconds)) " ipaddrsstr=" ipaddrstr " portnum=" portnum " config-hostname=" config-hostname) - (handle-exceptions - exn - (begin - (print-error-message exn) - (if (< portnum 64000) - (begin - (debug:print 0 *default-log-port* "WARNING: attempt to start server failed. Trying again ...") - (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) - (debug:print 5 *default-log-port* "exn=" (condition->list exn)) - (portlogger:open-run-close portlogger:set-failed portnum) - (debug:print 0 *default-log-port* "WARNING: failed to start on portnum: " portnum ", trying next port") - (thread-sleep! 0.1) - - ;; get_next_port goes here - (http-transport:try-start-server ipaddrstr - (portlogger:open-run-close portlogger:find-port))) - (begin - (print "ERROR: Tried and tried but could not start the server")))) - ;; any error in following steps will result in a retry - (set! *server-info* (list ipaddrstr portnum)) - (debug:print 0 *default-log-port* "INFO: Trying to start server on " ipaddrstr ":" portnum) - ;; This starts the spiffy server - ;; NEED WAY TO SET IP TO #f TO BIND ALL - ;; (start-server bind-address: ipaddrstr port: portnum) - (if config-hostname ;; this is a hint to bind directly - (start-server port: portnum bind-address: (if (equal? config-hostname "-") - ipaddrstr - config-hostname)) - (start-server port: portnum)) - (portlogger:open-run-close portlogger:set-port portnum "released") - (debug:print 1 *default-log-port* "INFO: server has been stopped")))) - -;;====================================================================== -;; S E R V E R U T I L I T I E S -;;====================================================================== - -;;====================================================================== -;; C L I E N T S -;;====================================================================== - -(define *http-mutex* (make-mutex)) - -;; NOTE: Large block of code from 32436b426188080f72fceb6894af541fbad9921e removed here -;; I'm pretty sure it is defunct. - -;; This next block all imported en-mass from the api branch -(define *http-requests-in-progress* 0) -(define *http-connections-next-cleanup* (current-seconds)) - -(define (http-transport:get-time-to-cleanup) - (let ((res #f)) - (mutex-lock! *http-mutex*) - (set! res (> (current-seconds) *http-connections-next-cleanup*)) - (mutex-unlock! *http-mutex*) - res)) - -(define (http-transport:inc-requests-count) - (mutex-lock! *http-mutex*) - (set! *http-requests-in-progress* (+ 1 *http-requests-in-progress*)) - ;; Use this opportunity to slow things down iff there are too many requests in flight - (if (> *http-requests-in-progress* 5) - (begin - (debug:print-info 0 *default-log-port* "Whoa there buddy, ease up...") - (thread-sleep! 1))) - (mutex-unlock! *http-mutex*)) - -(define (http-transport:dec-requests-count proc) - (mutex-lock! *http-mutex*) - (proc) - (set! *http-requests-in-progress* (- *http-requests-in-progress* 1)) - (mutex-unlock! *http-mutex*)) - -(define (http-transport:dec-requests-count-and-close-all-connections) - (set! *http-requests-in-progress* (- *http-requests-in-progress* 1)) - (let loop ((etime (+ (current-seconds) 5))) ;; give up in five seconds - (if (> *http-requests-in-progress* 0) - (if (> etime (current-seconds)) - (begin - (thread-sleep! 0.05) - (loop etime)) - (debug:print-error 0 *default-log-port* "requests still in progress after 5 seconds of waiting. I'm going to pass on cleaning up http connections")) - (close-all-connections!))) - (set! *http-connections-next-cleanup* (+ (current-seconds) 10)) - (mutex-unlock! *http-mutex*)) - -(define (http-transport:inc-requests-and-prep-to-close-all-connections) - (mutex-lock! *http-mutex*) - (set! *http-requests-in-progress* (+ 1 *http-requests-in-progress*))) - -;; Send "cmd" with json payload "params" to serverdat and receive result -;; -(define (http-transport:client-api-send-receive run-id serverdat cmd params #!key (numretries 3)(area-dat #f)) - (let* ((fullurl (if (vector? serverdat) - (http-transport:server-dat-get-api-req serverdat) - (begin - (debug:print 0 *default-log-port* "FATAL ERROR: http-transport:client-api-send-receive called with no server info") - (exit 1)))) - (res (vector #f "uninitialized")) - (success #t) - (sparams (db:obj->string params transport: 'http)) - (runremote (or area-dat *runremote*))) - (debug:print-info 11 *default-log-port* "fullurl=" fullurl ", cmd=" cmd ", params=" params ", run-id=" run-id "\n") - ;; set up the http-client here - (max-retry-attempts 1) - ;; consider all requests indempotent - (retry-request? (lambda (request) - #f)) - ;; send the data and get the response - ;; extract the needed info from the http data and - ;; process and return it. - (let* ((send-recieve (lambda () - (mutex-lock! *http-mutex*) - ;; (condition-case (with-input-from-request "http://localhost"; #f read-lines) - ;; ((exn http client-error) e (print e))) - (set! res (vector ;;; DON'T FORGET - THIS IS THE CLIENT SIDE! NOTE: consider moving this to client.scm since we are only supporting http transport at this time. - success - (db:string->obj - (handle-exceptions - exn - (let ((call-chain (get-call-chain)) - (msg ((condition-property-accessor 'exn 'message) exn))) - (set! success #f) - (if (debug:debug-mode 1) - (debug:print-info 0 *default-log-port* "couldn't talk to server, trying again ...") - (begin - (debug:print 0 *default-log-port* "WARNING: failure in with-input-from-request to " fullurl ".") - (debug:print 0 *default-log-port* " message: " msg) - (debug:print 0 *default-log-port* " cmd: " cmd " params: " params) - (debug:print 0 *default-log-port* " call-chain: " call-chain))) - (if runremote - (remote-conndat-set! runremote #f)) - ;; Killing associated server to allow clean retry.") - ;; (tasks:kill-server-run-id run-id) ;; better to kill the server in the logic that called this routine? - (mutex-unlock! *http-mutex*) - ;;; (signal (make-composite-condition - ;;; (make-property-condition 'commfail 'message "failed to connect to server"))) - ;;; "communications failed" - (db:obj->string #f)) - (with-input-from-request ;; was dat - fullurl - (list (cons 'key (or *server-id* "thekey")) - (cons 'cmd cmd) - (cons 'params sparams)) - read-string)) - transport: 'http) - 0)) ;; added this speculatively - ;; Shouldn't this be a call to the managed call-all-connections stuff above? - (close-all-connections!) - (mutex-unlock! *http-mutex*) - )) - (time-out (lambda () - (thread-sleep! 45) - #f)) - (th1 (make-thread send-recieve "with-input-from-request")) - (th2 (make-thread time-out "time out"))) - (thread-start! th1) - (thread-start! th2) - (thread-join! th1) - (thread-terminate! th2) - (debug:print-info 11 *default-log-port* "got res=" res) - (if (vector? res) - (if (vector-ref res 0) ;; this is the first flag or the second flag? - res ;; this is the *inner* vector? seriously? why? - (if (debug:debug-mode 11) - (let ((call-chain (get-call-chain))) ;; note: this code also called in nmsg-transport - consider consolidating it - (print-call-chain (current-error-port)) - (debug:print-error 11 *default-log-port* "error above occured at server, res=" res) ;; " message: " ((condition-property-accessor 'exn 'message) exn)) - (debug:print 11 *default-log-port* " server call chain:") - (pp (vector-ref res 1) (current-error-port)) - (signal (vector-ref res 0))) - res)) - (signal (make-composite-condition - (make-property-condition - 'timeout - 'message "nmsg-transport:client-api-send-receive-raw timed out talking to server"))))))) - -;; careful closing of connections stored in *runremote* -;; -(define (http-transport:close-connections #!key (area-dat #f)) - (let* ((runremote (or area-dat *runremote*)) - (server-dat (if runremote - (remote-conndat runremote) - #f))) ;; (hash-table-ref/default *runremote* run-id #f))) - (if (vector? server-dat) - (let ((api-dat (http-transport:server-dat-get-api-uri server-dat))) - (handle-exceptions - exn - (begin - (print-call-chain *default-log-port*) - (debug:print-error 0 *default-log-port* " closing connection failed with error: " ((condition-property-accessor 'exn 'message) exn))) - (close-connection! api-dat) - ;;(close-idle-connections!) - #t)) - #f))) - -;; Moved to commonmod -;; -;; (define (make-http-transport:server-dat)(make-vector 6)) -;; (define (http-transport:server-dat-get-iface vec) (vector-ref vec 0)) -;; (define (http-transport:server-dat-get-port vec) (vector-ref vec 1)) -;; (define (http-transport:server-dat-get-api-uri vec) (vector-ref vec 2)) -;; (define (http-transport:server-dat-get-api-url vec) (vector-ref vec 3)) -;; (define (http-transport:server-dat-get-api-req vec) (vector-ref vec 4)) -;; (define (http-transport:server-dat-get-last-access vec) (vector-ref vec 5)) -;; (define (http-transport:server-dat-get-socket vec) (vector-ref vec 6)) -;; -;; (define (http-transport:server-dat-make-url vec) -;; (if (and (http-transport:server-dat-get-iface vec) -;; (http-transport:server-dat-get-port vec)) -;; (conc "http://" -;; (http-transport:server-dat-get-iface vec) -;; ":" -;; (http-transport:server-dat-get-port vec)) -;; #f)) -;; -;; (define (http-transport:server-dat-update-last-access vec) -;; (if (vector? vec) -;; (vector-set! vec 5 (current-seconds)) -;; (begin -;; (print-call-chain (current-error-port)) -;; (debug:print-error 0 *default-log-port* "call to http-transport:server-dat-update-last-access with non-vector!!")))) - -;; -;; connect -;; -(define (http-transport:client-connect iface port) - (let* ((api-url (conc "http://" iface ":" port "/api")) - (api-uri (uri-reference (conc "http://" iface ":" port "/api"))) - (api-req (make-request method: 'POST uri: api-uri)) - (server-dat (vector iface port api-uri api-url api-req (current-seconds)))) - server-dat)) - -;; run http-transport:keep-running in a parallel thread to monitor that the db is being -;; used and to shutdown after sometime if it is not. -;; -(define (http-transport:keep-running) - ;; if none running or if > 20 seconds since - ;; server last used then start shutdown - ;; This thread waits for the server to come alive - (debug:print-info 0 *default-log-port* "Starting the sync-back, keep alive thread in server") - (let* ((sdat #f) - (tmp-area (common:get-db-tmp-area)) - (started-file (conc tmp-area "/.server-started")) - (server-start-time (current-seconds)) - (server-info (let loop ((start-time (current-seconds)) - (changed #t) - (last-sdat "not this")) - (begin ;; let ((sdat #f)) - (thread-sleep! 0.01) - (debug:print-info 0 *default-log-port* "Waiting for server alive signature") - (mutex-lock! *heartbeat-mutex*) - (set! sdat *server-info*) - (mutex-unlock! *heartbeat-mutex*) - (if (and sdat - (not changed) - (> (- (current-seconds) start-time) 2)) - (begin - (debug:print-info 0 *default-log-port* "Received server alive signature") - (common:save-pkt `((action . alive) - (T . server) - (pid . ,(current-process-id)) - (ipaddr . ,(car sdat)) - (port . ,(cadr sdat))) - *configdat* #t) - sdat) - (begin - (debug:print-info 0 *default-log-port* "Still waiting, last-sdat=" last-sdat) - (sleep 4) - (if (> (- (current-seconds) start-time) 120) ;; been waiting for two minutes - (begin - (debug:print-error 0 *default-log-port* "transport appears to have died, exiting server") - (common:save-pkt `((action . died) - (T . server) - (pid . ,(current-process-id)) - (ipaddr . ,(car sdat)) - (port . ,(cadr sdat)) - (msg . "Transport died?")) - *configdat* #t) - (exit)) - (loop start-time - (equal? sdat last-sdat) - sdat))))))) - (iface (car server-info)) - (port (cadr server-info)) - (last-access 0) - (server-timeout (server:expiration-timeout)) - (server-going #f) - (server-log-file (args:get-arg "-log"))) ;; always set when we are a server - - (with-output-to-file started-file (lambda ()(print (current-process-id)))) - - (let loop ((count 0) - (server-state 'available) - (bad-sync-count 0) - (start-time (current-milliseconds))) - ;; Use this opportunity to sync the tmp db to megatest.db - (if (not server-going) ;; *dbstruct-db* - (begin - (debug:print 0 *default-log-port* "SERVER: dbprep") - (set! *dbstruct-db* (db:setup #t)) ;; run-id)) - (set! server-going #t) - (debug:print 0 *default-log-port* "SERVER: running, megatest version: " (common:get-full-version)) ;; NOTE: the server is NOT yet marked as running in the log. We do that in the keep-running routine. - (thread-start! *watchdog*))) - - ;; when things go wrong we don't want to be doing the various queries too often - ;; so we strive to run this stuff only every four seconds or so. - (let* ((sync-time (- (current-milliseconds) start-time)) - (rem-time (quotient (- 4000 sync-time) 1000))) - (if (and (<= rem-time 4) - (> rem-time 0)) - (thread-sleep! rem-time))) - - (if (< count 1) ;; 3x3 = 9 secs aprox - (loop (+ count 1) 'running bad-sync-count (current-milliseconds))) - - ;; Check that iface and port have not changed (can happen if server port collides) - (mutex-lock! *heartbeat-mutex*) - (set! sdat *server-info*) - (mutex-unlock! *heartbeat-mutex*) - - (if (not (equal? sdat (list iface port))) - (let ((new-iface (car sdat)) - (new-port (cadr sdat))) - (debug:print-info 0 *default-log-port* "WARNING: interface changed, refreshing iface and port info") - (set! iface new-iface) - (set! port new-port) - (debug:print 0 *default-log-port* "SERVER STARTED: " iface ":" port " AT " (current-seconds)) - (flush-output *default-log-port*))) - - ;; Transfer *db-last-access* to last-access to use in checking that we are still alive - (mutex-lock! *heartbeat-mutex*) - (set! last-access *db-last-access*) - (mutex-unlock! *heartbeat-mutex*) - - (if (common:low-noise-print 120 (conc "server running on " iface ":" port)) - (begin - (debug:print 0 *default-log-port* "SERVER STARTED: " iface ":" port " AT " (current-seconds)) - (flush-output *default-log-port*))) - (if (common:low-noise-print 60 "dbstats") - (begin - (debug:print 0 *default-log-port* "Server stats:") - (db:print-current-query-stats))) - (let* ((hrs-since-start (/ (- (current-seconds) server-start-time) 3600))) - (cond - ((and *server-run* - (> (+ last-access server-timeout) - (current-seconds))) - (if (common:low-noise-print 120 "server continuing") - (debug:print-info 0 *default-log-port* "Server continuing, seconds since last db access: " (- (current-seconds) last-access)) - (let ((curr-time (current-seconds))) - (handle-exceptions - exn - (debug:print 0 *default-log-port* "ERROR: Failed to change timestamp on log file " server-log-file ". Are you out of space on that disk?") - (if (not *server-overloaded*) - (change-file-times server-log-file curr-time curr-time))))) - (loop 0 server-state bad-sync-count (current-milliseconds))) - (else - (debug:print-info 0 *default-log-port* "Server timed out. seconds since last db access: " (- (current-seconds) last-access)) - (http-transport:server-shutdown port))))))) - -(define (http-transport:server-shutdown port) - (begin - ;;(BB> "http-transport:server-shutdown called") - (debug:print-info 0 *default-log-port* "Starting to shutdown the server. pid="(current-process-id)) - ;; - ;; start_shutdown - ;; - (set! *time-to-exit* #t) ;; tell on-exit to be fast as we've already cleaned up - (portlogger:open-run-close portlogger:set-port port "released") - (thread-sleep! 1) - - ;; (debug:print-info 0 *default-log-port* "Max cached queries was " *max-cache-size*) - ;; (debug:print-info 0 *default-log-port* "Number of cached writes " *number-of-writes*) - ;; (debug:print-info 0 *default-log-port* "Average cached write time " - ;; (if (eq? *number-of-writes* 0) - ;; "n/a (no writes)" - ;; (/ *writes-total-delay* - ;; *number-of-writes*)) - ;; " ms") - ;; (debug:print-info 0 *default-log-port* "Number non-cached queries " *number-non-write-queries*) - ;; (debug:print-info 0 *default-log-port* "Average non-cached time " - ;; (if (eq? *number-non-write-queries* 0) - ;; "n/a (no queries)" - ;; (/ *total-non-write-delay* - ;; *number-non-write-queries*)) - ;; " ms") - - (db:print-current-query-stats) - (common:save-pkt `((action . exit) - (T . server) - (pid . ,(current-process-id))) - *configdat* #t) - (debug:print-info 0 *default-log-port* "Server shutdown complete. Exiting") - (exit))) - -;; all routes though here end in exit ... -;; -;; start_server? -;; -(define (http-transport:launch) - ;; check that a server start is in progress, pause or exit if so - (let* ((tmp-area (common:get-db-tmp-area)) - (server-start (conc tmp-area "/.server-start")) - (server-started (conc tmp-area "/.server-started")) - (start-time (common:lazy-modification-time server-start)) - (started-time (common:lazy-modification-time server-started)) - (server-starting (< start-time started-time)) ;; if start-time is less than started-time then a server is still starting - (start-time-old (> (- (current-seconds) start-time) 5)) - (cleanup-proc (lambda (msg) - (let* ((serv-fname (conc "server-" (current-process-id) "-" (get-host-name) ".log")) - (full-serv-fname (conc *toppath* "/logs/" serv-fname)) - (new-serv-fname (conc *toppath* "/logs/" "defunct-" serv-fname))) - (debug:print 0 *default-log-port* msg) - (if (common:file-exists? full-serv-fname) - (system (conc "sleep 1;mv -f " full-serv-fname " " new-serv-fname)) - (debug:print 0 *default-log-port* "INFO: cannot move " full-serv-fname " to " new-serv-fname)) - (exit))))) - (if (and (not start-time-old) ;; last server start try was less than five seconds ago - (not server-starting)) - (begin - (cleanup-proc "NOT starting server, there is either a recently started server or a server in process of starting") - (exit))) - ;; lets not even bother to start if there are already three or more server files ready to go - (let* ((num-alive (server:get-num-alive (server:get-list *toppath*)))) - (if (> num-alive 3) - (begin - (cleanup-proc (conc "ERROR: Aborting server start because there are already " num-alive " possible servers either running or starting up")) - (exit)))) - (common:save-pkt `((action . start) - (T . server) - (pid . ,(current-process-id))) - *configdat* #t) - (let* ((th2 (make-thread (lambda () - (debug:print-info 0 *default-log-port* "Server run thread started") - (http-transport:run - (if (args:get-arg "-server") - (args:get-arg "-server") - "-") - )) "Server run")) - (th3 (make-thread (lambda () - (debug:print-info 0 *default-log-port* "Server monitor thread started") - (http-transport:keep-running) - "Keep running")))) - (thread-start! th2) - (thread-sleep! 0.25) ;; give the server time to settle before starting the keep-running monitor. - (thread-start! th3) - (set! *didsomething* #t) - (thread-join! th2) - (exit)))) - -;; (define (http-transport:server-signal-handler signum) -;; (signal-mask! signum) -;; (handle-exceptions -;; exn -;; (debug:print 0 *default-log-port* " ... exiting ...") -;; (let ((th1 (make-thread (lambda () -;; (thread-sleep! 1)) -;; "eat response")) -;; (th2 (make-thread (lambda () -;; (debug:print-error 0 *default-log-port* "Received ^C, attempting clean exit. Please be patient and wait a few seconds before hitting ^C again.") -;; (thread-sleep! 3) ;; give the flush three seconds to do it's stuff -;; (debug:print 0 *default-log-port* " Done.") -;; (exit 4)) -;; "exit on ^C timer"))) -;; (thread-start! th2) -;; (thread-start! th1) -;; (thread-join! th2)))) - -;;=============================================== -;; Java script -;;=============================================== -(define (http-transport:show-jquery) - (let* ((data (tests:readlines *java-script-lib*))) -(string-join data "\n"))) - - - -;;====================================================================== -;; web pages -;;====================================================================== - -(define (http-transport:html-test-log $) - (let* ((run-id ($ 'runid)) - (test-item ($ 'testname)) - (parts (string-split test-item ":")) - (test-name (car parts)) - - (item-name (if (equal? (length parts) 1) - "" - (cadr parts)))) - ;(print $) -(tests:get-test-log run-id test-name item-name))) - - -(define (http-transport:html-dboard $) - (let* ((page ($ 'page)) - (oup (open-output-string)) - (bdy "--------------------------") - - (ret (tests:dynamic-dboard page))) - (s:output-new oup ret) - (close-output-port oup) - - (set! bdy (get-output-string oup)) - (conc "

Dashboard

" bdy "

" ))) - -(define (http-transport:main-page) - (let ((linkpath (root-path))) - (conc "

" (pathname-strip-directory *toppath*) "

" - "" - "Run area: " *toppath* - "

Server Stats

" - (http-transport:stats-table) - "
" - (http-transport:runs linkpath) - "
" - ;; (http-transport:run-stats) - "" - ))) - -(define (http-transport:stats-table) - (mutex-lock! *heartbeat-mutex*) - (let ((res - (conc "" - ;; "" - "" - "" - "" - ;; "" - "" - "
Max cached queries " *max-cache-size* "
Number of cached writes " *number-of-writes* "
Average cached write time " (if (eq? *number-of-writes* 0) - "n/a (no writes)" - (/ *writes-total-delay* - *number-of-writes*)) - " ms
Number non-cached queries " *number-non-write-queries* "
Average non-cached time " (if (eq? *number-non-write-queries* 0) - ;; "n/a (no queries)" - ;; (/ *total-non-write-delay* - ;; *number-non-write-queries*)) - " ms
Last access" (seconds->time-string *db-last-access*) "
"))) - (mutex-unlock! *heartbeat-mutex*) - res)) - -(define (http-transport:runs linkpath) - (conc "

Runs

" - (string-intersperse - (let ((files (map pathname-strip-directory (glob (conc linkpath "/*"))))) - (map (lambda (p) - (conc "" p "
")) - files)) - " "))) - -#;(define (http-transport:run-stats) - (let ((stats (open-run-close db:get-running-stats #f))) - (conc "" - (string-intersperse - (map (lambda (stat) - (conc "")) - stats) - " ") - "
" (car stat) "" (cadr stat) "
"))) ADDED http-transportmod.scm Index: http-transportmod.scm ================================================================== --- /dev/null +++ http-transportmod.scm @@ -0,0 +1,719 @@ + +;; Copyright 2006-2012, Matthew Welland. +;; +;; This file is part of Megatest. +;; +;; Megatest is free software: you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation, either version 3 of the License, or +;; (at your option) any later version. +;; +;; Megatest is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with Megatest. If not, see . + +(declare (unit apimod)) +(declare (uses commonmod)) +(declare (uses stml2)) +(declare (uses apimod)) +(declare (uses dbmod)) +(declare (uses mtargs)) +(declare (uses mtconfigf)) +(declare (uses testsmod)) + +(module apimod + * + +(import scheme chicken data-structures extras posix files + srfi-13 srfi-18 spiffy http-client + spiffy-directory-listing + spiffy-request-vars tcp + ) +(import commonmod) +(import stml2) +(import apimod) +(import dbmod) +(import testsmod) +(import (prefix mtargs args:)) +(import (prefix mtconfigf configf:)) + +(define (http-transport:make-server-url hostport) + (if (not hostport) + #f + (conc "http://" (car hostport) ":" (cadr hostport)))) + +(define *server-loop-heart-beat* (current-seconds)) +(define *writes-total-delay* 0) + +;;====================================================================== +;; S E R V E R +;; ====================================================================== + +;; Call this to start the actual server +;; +;; all routes though here end in exit ... +;; +;; start_server +;; +(define (server:launch run-id transport-type) + ;; (case transport-type + ;; ((http) + (http-transport:launch)) + ;;((nmsg)(nmsg-transport:launch run-id)) + ;;((rpc) (rpc-transport:launch run-id)) + ;; (else (debug:print-error 0 *default-log-port* "unknown server type " transport-type)))) + +(define *db:process-queue-mutex* (make-mutex)) + +(define (http-transport:run hostn) + ;; Configurations for server + (tcp-buffer-size 2048) + (max-connections 2048) + (debug:print 2 *default-log-port* "Attempting to start the server ...") + (let* ((db #f) ;; (open-db)) ;; we don't want the server to be opening and closing the db unnecesarily + (hostname (get-host-name)) + (ipaddrstr (let ((ipstr (if (string=? "-" hostn) + ;; (string-intersperse (map number->string (u8vector->list (hostname->ip hostname))) ".") + (server:get-best-guess-address hostname) + #f))) + (if ipstr ipstr hostn))) ;; hostname))) + (start-port (portlogger:open-run-close portlogger:find-port)) + (link-tree-path (common:get-linktree)) + (tmp-area (common:get-db-tmp-area)) + (start-file (conc tmp-area "/.server-start"))) + (debug:print-info 0 *default-log-port* "portlogger recommended port: " start-port) + ;; set some parameters for the server + (root-path (if link-tree-path + link-tree-path + (current-directory))) ;; WARNING: SECURITY HOLE. FIX ASAP! + (handle-directory spiffy-directory-listing) + (handle-exception (lambda (exn chain) + (signal (make-composite-condition + (make-property-condition + 'server + 'message "server error"))))) + + ;; http-transport:handle-directory) ;; simple-directory-handler) + ;; Setup the web server and a /ctrl interface + ;; + (vhost-map `(((* any) . ,(lambda (continue) + ;; open the db on the first call + ;; This is were we set up the database connections + (let* (($ (request-vars source: 'both)) + (dat ($ 'dat)) + (res #f)) + (cond + ((equal? (uri-path (request-uri (current-request))) + '(/ "api")) + (send-response body: (api:process-request *dbstruct-db* $) ;; the $ is the request vars proc + headers: '((content-type text/plain))) + (mutex-lock! *heartbeat-mutex*) + (set! *db-last-access* (current-seconds)) + (mutex-unlock! *heartbeat-mutex*)) + ((equal? (uri-path (request-uri (current-request))) + '(/ "")) + (send-response body: (http-transport:main-page))) + ((equal? (uri-path (request-uri (current-request))) + '(/ "json_api")) + (send-response body: (http-transport:main-page))) + ((equal? (uri-path (request-uri (current-request))) + '(/ "runs")) + (send-response body: (http-transport:main-page))) + ((equal? (uri-path (request-uri (current-request))) + '(/ any)) + (send-response body: "hey there!\n" + headers: '((content-type text/plain)))) + ((equal? (uri-path (request-uri (current-request))) + '(/ "hey")) + (send-response body: "hey there!\n" + headers: '((content-type text/plain)))) + ((equal? (uri-path (request-uri (current-request))) + '(/ "jquery3.1.0.js")) + (send-response body: (http-transport:show-jquery) + headers: '((content-type application/javascript)))) + #;((equal? (uri-path (request-uri (current-request))) + '(/ "test_log")) + (send-response body: (http-transport:html-test-log $) + headers: '((content-type text/HTML)))) + ;; removing dashboard due to dependency challenges + #;((equal? (uri-path (request-uri (current-request))) + '(/ "dashboard")) + (send-response body: (http-transport:html-dboard $) + headers: '((content-type text/HTML)))) + (else (continue)))))))) + (with-output-to-file start-file (lambda ()(print (current-process-id)))) + (http-transport:try-start-server ipaddrstr start-port))) + +;; This is recursively run by http-transport:run until sucessful +;; +(define (http-transport:try-start-server ipaddrstr portnum) + (let ((config-hostname (configf:lookup *configdat* "server" "hostname")) + (config-use-proxy (equal? (configf:lookup *configdat* "client" "use-http_proxy") "yes"))) + (if (not config-use-proxy) + (determine-proxy (constantly #f))) + (debug:print-info 0 *default-log-port* "http-transport:try-start-server time=" (seconds->time-string (current-seconds)) " ipaddrsstr=" ipaddrstr " portnum=" portnum " config-hostname=" config-hostname) + (handle-exceptions + exn + (begin + (print-error-message exn) + (if (< portnum 64000) + (begin + (debug:print 0 *default-log-port* "WARNING: attempt to start server failed. Trying again ...") + (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) + (debug:print 5 *default-log-port* "exn=" (condition->list exn)) + (portlogger:open-run-close portlogger:set-failed portnum) + (debug:print 0 *default-log-port* "WARNING: failed to start on portnum: " portnum ", trying next port") + (thread-sleep! 0.1) + + ;; get_next_port goes here + (http-transport:try-start-server ipaddrstr + (portlogger:open-run-close portlogger:find-port))) + (begin + (print "ERROR: Tried and tried but could not start the server")))) + ;; any error in following steps will result in a retry + (set! *server-info* (list ipaddrstr portnum)) + (debug:print 0 *default-log-port* "INFO: Trying to start server on " ipaddrstr ":" portnum) + ;; This starts the spiffy server + ;; NEED WAY TO SET IP TO #f TO BIND ALL + ;; (start-server bind-address: ipaddrstr port: portnum) + (if config-hostname ;; this is a hint to bind directly + (start-server port: portnum bind-address: (if (equal? config-hostname "-") + ipaddrstr + config-hostname)) + (start-server port: portnum)) + (portlogger:open-run-close portlogger:set-port portnum "released") + (debug:print 1 *default-log-port* "INFO: server has been stopped")))) + +;;====================================================================== +;; S E R V E R U T I L I T I E S +;;====================================================================== + +;;====================================================================== +;; C L I E N T S +;;====================================================================== + +(define *http-mutex* (make-mutex)) + +;; NOTE: Large block of code from 32436b426188080f72fceb6894af541fbad9921e removed here +;; I'm pretty sure it is defunct. + +;; This next block all imported en-mass from the api branch +(define *http-requests-in-progress* 0) +(define *http-connections-next-cleanup* (current-seconds)) + +(define (http-transport:get-time-to-cleanup) + (let ((res #f)) + (mutex-lock! *http-mutex*) + (set! res (> (current-seconds) *http-connections-next-cleanup*)) + (mutex-unlock! *http-mutex*) + res)) + +(define (http-transport:inc-requests-count) + (mutex-lock! *http-mutex*) + (set! *http-requests-in-progress* (+ 1 *http-requests-in-progress*)) + ;; Use this opportunity to slow things down iff there are too many requests in flight + (if (> *http-requests-in-progress* 5) + (begin + (debug:print-info 0 *default-log-port* "Whoa there buddy, ease up...") + (thread-sleep! 1))) + (mutex-unlock! *http-mutex*)) + +(define (http-transport:dec-requests-count proc) + (mutex-lock! *http-mutex*) + (proc) + (set! *http-requests-in-progress* (- *http-requests-in-progress* 1)) + (mutex-unlock! *http-mutex*)) + +(define (http-transport:dec-requests-count-and-close-all-connections) + (set! *http-requests-in-progress* (- *http-requests-in-progress* 1)) + (let loop ((etime (+ (current-seconds) 5))) ;; give up in five seconds + (if (> *http-requests-in-progress* 0) + (if (> etime (current-seconds)) + (begin + (thread-sleep! 0.05) + (loop etime)) + (debug:print-error 0 *default-log-port* "requests still in progress after 5 seconds of waiting. I'm going to pass on cleaning up http connections")) + (close-all-connections!))) + (set! *http-connections-next-cleanup* (+ (current-seconds) 10)) + (mutex-unlock! *http-mutex*)) + +(define (http-transport:inc-requests-and-prep-to-close-all-connections) + (mutex-lock! *http-mutex*) + (set! *http-requests-in-progress* (+ 1 *http-requests-in-progress*))) + +;; Send "cmd" with json payload "params" to serverdat and receive result +;; +(define (http-transport:client-api-send-receive run-id serverdat cmd params #!key (numretries 3)(area-dat #f)) + (let* ((fullurl (if (vector? serverdat) + (http-transport:server-dat-get-api-req serverdat) + (begin + (debug:print 0 *default-log-port* "FATAL ERROR: http-transport:client-api-send-receive called with no server info") + (exit 1)))) + (res (vector #f "uninitialized")) + (success #t) + (sparams (db:obj->string params transport: 'http)) + (runremote (or area-dat *runremote*))) + (debug:print-info 11 *default-log-port* "fullurl=" fullurl ", cmd=" cmd ", params=" params ", run-id=" run-id "\n") + ;; set up the http-client here + (max-retry-attempts 1) + ;; consider all requests indempotent + (retry-request? (lambda (request) + #f)) + ;; send the data and get the response + ;; extract the needed info from the http data and + ;; process and return it. + (let* ((send-recieve (lambda () + (mutex-lock! *http-mutex*) + ;; (condition-case (with-input-from-request "http://localhost"; #f read-lines) + ;; ((exn http client-error) e (print e))) + (set! res (vector ;;; DON'T FORGET - THIS IS THE CLIENT SIDE! NOTE: consider moving this to client.scm since we are only supporting http transport at this time. + success + (db:string->obj + (handle-exceptions + exn + (let ((call-chain (get-call-chain)) + (msg ((condition-property-accessor 'exn 'message) exn))) + (set! success #f) + (if (debug:debug-mode 1) + (debug:print-info 0 *default-log-port* "couldn't talk to server, trying again ...") + (begin + (debug:print 0 *default-log-port* "WARNING: failure in with-input-from-request to " fullurl ".") + (debug:print 0 *default-log-port* " message: " msg) + (debug:print 0 *default-log-port* " cmd: " cmd " params: " params) + (debug:print 0 *default-log-port* " call-chain: " call-chain))) + (if runremote + (remote-conndat-set! runremote #f)) + ;; Killing associated server to allow clean retry.") + ;; (tasks:kill-server-run-id run-id) ;; better to kill the server in the logic that called this routine? + (mutex-unlock! *http-mutex*) + ;;; (signal (make-composite-condition + ;;; (make-property-condition 'commfail 'message "failed to connect to server"))) + ;;; "communications failed" + (db:obj->string #f)) + (with-input-from-request ;; was dat + fullurl + (list (cons 'key (or *server-id* "thekey")) + (cons 'cmd cmd) + (cons 'params sparams)) + read-string)) + transport: 'http) + 0)) ;; added this speculatively + ;; Shouldn't this be a call to the managed call-all-connections stuff above? + (close-all-connections!) + (mutex-unlock! *http-mutex*) + )) + (time-out (lambda () + (thread-sleep! 45) + #f)) + (th1 (make-thread send-recieve "with-input-from-request")) + (th2 (make-thread time-out "time out"))) + (thread-start! th1) + (thread-start! th2) + (thread-join! th1) + (thread-terminate! th2) + (debug:print-info 11 *default-log-port* "got res=" res) + (if (vector? res) + (if (vector-ref res 0) ;; this is the first flag or the second flag? + res ;; this is the *inner* vector? seriously? why? + (if (debug:debug-mode 11) + (let ((call-chain (get-call-chain))) ;; note: this code also called in nmsg-transport - consider consolidating it + (print-call-chain (current-error-port)) + (debug:print-error 11 *default-log-port* "error above occured at server, res=" res) ;; " message: " ((condition-property-accessor 'exn 'message) exn)) + (debug:print 11 *default-log-port* " server call chain:") + (pp (vector-ref res 1) (current-error-port)) + (signal (vector-ref res 0))) + res)) + (signal (make-composite-condition + (make-property-condition + 'timeout + 'message "nmsg-transport:client-api-send-receive-raw timed out talking to server"))))))) + +;; careful closing of connections stored in *runremote* +;; +(define (http-transport:close-connections #!key (area-dat #f)) + (let* ((runremote (or area-dat *runremote*)) + (server-dat (if runremote + (remote-conndat runremote) + #f))) ;; (hash-table-ref/default *runremote* run-id #f))) + (if (vector? server-dat) + (let ((api-dat (http-transport:server-dat-get-api-uri server-dat))) + (handle-exceptions + exn + (begin + (print-call-chain *default-log-port*) + (debug:print-error 0 *default-log-port* " closing connection failed with error: " ((condition-property-accessor 'exn 'message) exn))) + (close-connection! api-dat) + ;;(close-idle-connections!) + #t)) + #f))) + +;; Moved to commonmod +;; +;; (define (make-http-transport:server-dat)(make-vector 6)) +;; (define (http-transport:server-dat-get-iface vec) (vector-ref vec 0)) +;; (define (http-transport:server-dat-get-port vec) (vector-ref vec 1)) +;; (define (http-transport:server-dat-get-api-uri vec) (vector-ref vec 2)) +;; (define (http-transport:server-dat-get-api-url vec) (vector-ref vec 3)) +;; (define (http-transport:server-dat-get-api-req vec) (vector-ref vec 4)) +;; (define (http-transport:server-dat-get-last-access vec) (vector-ref vec 5)) +;; (define (http-transport:server-dat-get-socket vec) (vector-ref vec 6)) +;; +;; (define (http-transport:server-dat-make-url vec) +;; (if (and (http-transport:server-dat-get-iface vec) +;; (http-transport:server-dat-get-port vec)) +;; (conc "http://" +;; (http-transport:server-dat-get-iface vec) +;; ":" +;; (http-transport:server-dat-get-port vec)) +;; #f)) +;; +;; (define (http-transport:server-dat-update-last-access vec) +;; (if (vector? vec) +;; (vector-set! vec 5 (current-seconds)) +;; (begin +;; (print-call-chain (current-error-port)) +;; (debug:print-error 0 *default-log-port* "call to http-transport:server-dat-update-last-access with non-vector!!")))) + +;; +;; connect +;; +(define (http-transport:client-connect iface port) + (let* ((api-url (conc "http://" iface ":" port "/api")) + (api-uri (uri-reference (conc "http://" iface ":" port "/api"))) + (api-req (make-request method: 'POST uri: api-uri)) + (server-dat (vector iface port api-uri api-url api-req (current-seconds)))) + server-dat)) + +;; run http-transport:keep-running in a parallel thread to monitor that the db is being +;; used and to shutdown after sometime if it is not. +;; +(define (http-transport:keep-running) + ;; if none running or if > 20 seconds since + ;; server last used then start shutdown + ;; This thread waits for the server to come alive + (debug:print-info 0 *default-log-port* "Starting the sync-back, keep alive thread in server") + (let* ((sdat #f) + (tmp-area (common:get-db-tmp-area)) + (started-file (conc tmp-area "/.server-started")) + (server-start-time (current-seconds)) + (server-info (let loop ((start-time (current-seconds)) + (changed #t) + (last-sdat "not this")) + (begin ;; let ((sdat #f)) + (thread-sleep! 0.01) + (debug:print-info 0 *default-log-port* "Waiting for server alive signature") + (mutex-lock! *heartbeat-mutex*) + (set! sdat *server-info*) + (mutex-unlock! *heartbeat-mutex*) + (if (and sdat + (not changed) + (> (- (current-seconds) start-time) 2)) + (begin + (debug:print-info 0 *default-log-port* "Received server alive signature") + (common:save-pkt `((action . alive) + (T . server) + (pid . ,(current-process-id)) + (ipaddr . ,(car sdat)) + (port . ,(cadr sdat))) + *configdat* #t) + sdat) + (begin + (debug:print-info 0 *default-log-port* "Still waiting, last-sdat=" last-sdat) + (sleep 4) + (if (> (- (current-seconds) start-time) 120) ;; been waiting for two minutes + (begin + (debug:print-error 0 *default-log-port* "transport appears to have died, exiting server") + (common:save-pkt `((action . died) + (T . server) + (pid . ,(current-process-id)) + (ipaddr . ,(car sdat)) + (port . ,(cadr sdat)) + (msg . "Transport died?")) + *configdat* #t) + (exit)) + (loop start-time + (equal? sdat last-sdat) + sdat))))))) + (iface (car server-info)) + (port (cadr server-info)) + (last-access 0) + (server-timeout (server:expiration-timeout)) + (server-going #f) + (server-log-file (args:get-arg "-log"))) ;; always set when we are a server + + (with-output-to-file started-file (lambda ()(print (current-process-id)))) + + (let loop ((count 0) + (server-state 'available) + (bad-sync-count 0) + (start-time (current-milliseconds))) + ;; Use this opportunity to sync the tmp db to megatest.db + (if (not server-going) ;; *dbstruct-db* + (begin + (debug:print 0 *default-log-port* "SERVER: dbprep") + (set! *dbstruct-db* (db:setup #t)) ;; run-id)) + (set! server-going #t) + (debug:print 0 *default-log-port* "SERVER: running, megatest version: " (common:get-full-version)) ;; NOTE: the server is NOT yet marked as running in the log. We do that in the keep-running routine. + (thread-start! *watchdog*))) + + ;; when things go wrong we don't want to be doing the various queries too often + ;; so we strive to run this stuff only every four seconds or so. + (let* ((sync-time (- (current-milliseconds) start-time)) + (rem-time (quotient (- 4000 sync-time) 1000))) + (if (and (<= rem-time 4) + (> rem-time 0)) + (thread-sleep! rem-time))) + + (if (< count 1) ;; 3x3 = 9 secs aprox + (loop (+ count 1) 'running bad-sync-count (current-milliseconds))) + + ;; Check that iface and port have not changed (can happen if server port collides) + (mutex-lock! *heartbeat-mutex*) + (set! sdat *server-info*) + (mutex-unlock! *heartbeat-mutex*) + + (if (not (equal? sdat (list iface port))) + (let ((new-iface (car sdat)) + (new-port (cadr sdat))) + (debug:print-info 0 *default-log-port* "WARNING: interface changed, refreshing iface and port info") + (set! iface new-iface) + (set! port new-port) + (debug:print 0 *default-log-port* "SERVER STARTED: " iface ":" port " AT " (current-seconds)) + (flush-output *default-log-port*))) + + ;; Transfer *db-last-access* to last-access to use in checking that we are still alive + (mutex-lock! *heartbeat-mutex*) + (set! last-access *db-last-access*) + (mutex-unlock! *heartbeat-mutex*) + + (if (common:low-noise-print 120 (conc "server running on " iface ":" port)) + (begin + (debug:print 0 *default-log-port* "SERVER STARTED: " iface ":" port " AT " (current-seconds)) + (flush-output *default-log-port*))) + (if (common:low-noise-print 60 "dbstats") + (begin + (debug:print 0 *default-log-port* "Server stats:") + (db:print-current-query-stats))) + (let* ((hrs-since-start (/ (- (current-seconds) server-start-time) 3600))) + (cond + ((and *server-run* + (> (+ last-access server-timeout) + (current-seconds))) + (if (common:low-noise-print 120 "server continuing") + (debug:print-info 0 *default-log-port* "Server continuing, seconds since last db access: " (- (current-seconds) last-access)) + (let ((curr-time (current-seconds))) + (handle-exceptions + exn + (debug:print 0 *default-log-port* "ERROR: Failed to change timestamp on log file " server-log-file ". Are you out of space on that disk?") + (if (not *server-overloaded*) + (change-file-times server-log-file curr-time curr-time))))) + (loop 0 server-state bad-sync-count (current-milliseconds))) + (else + (debug:print-info 0 *default-log-port* "Server timed out. seconds since last db access: " (- (current-seconds) last-access)) + (http-transport:server-shutdown port))))))) + +(define (http-transport:server-shutdown port) + (begin + ;;(BB> "http-transport:server-shutdown called") + (debug:print-info 0 *default-log-port* "Starting to shutdown the server. pid="(current-process-id)) + ;; + ;; start_shutdown + ;; + (set! *time-to-exit* #t) ;; tell on-exit to be fast as we've already cleaned up + (portlogger:open-run-close portlogger:set-port port "released") + (thread-sleep! 1) + + ;; (debug:print-info 0 *default-log-port* "Max cached queries was " *max-cache-size*) + ;; (debug:print-info 0 *default-log-port* "Number of cached writes " *number-of-writes*) + ;; (debug:print-info 0 *default-log-port* "Average cached write time " + ;; (if (eq? *number-of-writes* 0) + ;; "n/a (no writes)" + ;; (/ *writes-total-delay* + ;; *number-of-writes*)) + ;; " ms") + ;; (debug:print-info 0 *default-log-port* "Number non-cached queries " *number-non-write-queries*) + ;; (debug:print-info 0 *default-log-port* "Average non-cached time " + ;; (if (eq? *number-non-write-queries* 0) + ;; "n/a (no queries)" + ;; (/ *total-non-write-delay* + ;; *number-non-write-queries*)) + ;; " ms") + + (db:print-current-query-stats) + (common:save-pkt `((action . exit) + (T . server) + (pid . ,(current-process-id))) + *configdat* #t) + (debug:print-info 0 *default-log-port* "Server shutdown complete. Exiting") + (exit))) + +;; all routes though here end in exit ... +;; +;; start_server? +;; +(define (http-transport:launch) + ;; check that a server start is in progress, pause or exit if so + (let* ((tmp-area (common:get-db-tmp-area)) + (server-start (conc tmp-area "/.server-start")) + (server-started (conc tmp-area "/.server-started")) + (start-time (common:lazy-modification-time server-start)) + (started-time (common:lazy-modification-time server-started)) + (server-starting (< start-time started-time)) ;; if start-time is less than started-time then a server is still starting + (start-time-old (> (- (current-seconds) start-time) 5)) + (cleanup-proc (lambda (msg) + (let* ((serv-fname (conc "server-" (current-process-id) "-" (get-host-name) ".log")) + (full-serv-fname (conc *toppath* "/logs/" serv-fname)) + (new-serv-fname (conc *toppath* "/logs/" "defunct-" serv-fname))) + (debug:print 0 *default-log-port* msg) + (if (common:file-exists? full-serv-fname) + (system (conc "sleep 1;mv -f " full-serv-fname " " new-serv-fname)) + (debug:print 0 *default-log-port* "INFO: cannot move " full-serv-fname " to " new-serv-fname)) + (exit))))) + (if (and (not start-time-old) ;; last server start try was less than five seconds ago + (not server-starting)) + (begin + (cleanup-proc "NOT starting server, there is either a recently started server or a server in process of starting") + (exit))) + ;; lets not even bother to start if there are already three or more server files ready to go + (let* ((num-alive (server:get-num-alive (server:get-list *toppath*)))) + (if (> num-alive 3) + (begin + (cleanup-proc (conc "ERROR: Aborting server start because there are already " num-alive " possible servers either running or starting up")) + (exit)))) + (common:save-pkt `((action . start) + (T . server) + (pid . ,(current-process-id))) + *configdat* #t) + (let* ((th2 (make-thread (lambda () + (debug:print-info 0 *default-log-port* "Server run thread started") + (http-transport:run + (if (args:get-arg "-server") + (args:get-arg "-server") + "-") + )) "Server run")) + (th3 (make-thread (lambda () + (debug:print-info 0 *default-log-port* "Server monitor thread started") + (http-transport:keep-running) + "Keep running")))) + (thread-start! th2) + (thread-sleep! 0.25) ;; give the server time to settle before starting the keep-running monitor. + (thread-start! th3) + (set! *didsomething* #t) + (thread-join! th2) + (exit)))) + +;; (define (http-transport:server-signal-handler signum) +;; (signal-mask! signum) +;; (handle-exceptions +;; exn +;; (debug:print 0 *default-log-port* " ... exiting ...") +;; (let ((th1 (make-thread (lambda () +;; (thread-sleep! 1)) +;; "eat response")) +;; (th2 (make-thread (lambda () +;; (debug:print-error 0 *default-log-port* "Received ^C, attempting clean exit. Please be patient and wait a few seconds before hitting ^C again.") +;; (thread-sleep! 3) ;; give the flush three seconds to do it's stuff +;; (debug:print 0 *default-log-port* " Done.") +;; (exit 4)) +;; "exit on ^C timer"))) +;; (thread-start! th2) +;; (thread-start! th1) +;; (thread-join! th2)))) + +;;=============================================== +;; Java script +;;=============================================== +(define (http-transport:show-jquery) + (let* ((data (tests:readlines *java-script-lib*))) +(string-join data "\n"))) + + + +;;====================================================================== +;; web pages +;;====================================================================== + +#;(define (http-transport:html-test-log $) + (let* ((run-id ($ 'runid)) + (test-item ($ 'testname)) + (parts (string-split test-item ":")) + (test-name (car parts)) + + (item-name (if (equal? (length parts) 1) + "" + (cadr parts)))) + ;(print $) + (tests:get-test-log run-id test-name item-name))) + + +#;(define (http-transport:html-dboard $) + (let* ((page ($ 'page)) + (oup (open-output-string)) + (bdy "--------------------------") + + (ret (tests:dynamic-dboard page))) + (s:output-new oup ret) + (close-output-port oup) + + (set! bdy (get-output-string oup)) + (conc "

Dashboard

" bdy "

" ))) + +(define (http-transport:main-page) + (let ((linkpath (root-path))) + (conc "

" (pathname-strip-directory *toppath*) "

" + "" + "Run area: " *toppath* + "

Server Stats

" + (http-transport:stats-table) + "
" + (http-transport:runs linkpath) + "
" + ;; (http-transport:run-stats) + "" + ))) + +(define (http-transport:stats-table) + (mutex-lock! *heartbeat-mutex*) + (let ((res + (conc "" + ;; "" + "" + "" + "" + ;; "" + "" + "
Max cached queries " *max-cache-size* "
Number of cached writes " *number-of-writes* "
Average cached write time " (if (eq? *number-of-writes* 0) + "n/a (no writes)" + (/ *writes-total-delay* + *number-of-writes*)) + " ms
Number non-cached queries " *number-non-write-queries* "
Average non-cached time " (if (eq? *number-non-write-queries* 0) + ;; "n/a (no queries)" + ;; (/ *total-non-write-delay* + ;; *number-non-write-queries*)) + " ms
Last access" (seconds->time-string *db-last-access*) "
"))) + (mutex-unlock! *heartbeat-mutex*) + res)) + +(define (http-transport:runs linkpath) + (conc "

Runs

" + (string-intersperse + (let ((files (map pathname-strip-directory (glob (conc linkpath "/*"))))) + (map (lambda (p) + (conc "" p "
")) + files)) + " "))) + +#;(define (http-transport:run-stats) + (let ((stats (open-run-close db:get-running-stats #f))) + (conc "" + (string-intersperse + (map (lambda (stat) + (conc "")) + stats) + " ") + "
" (car stat) "" (cadr stat) "
"))) +) Index: items-inc.scm ================================================================== --- items-inc.scm +++ items-inc.scm @@ -20,132 +20,8 @@ ;; (define itemdat '((ripeness "green ripe overripe") ;; (temperature "cool medium hot") ;; (season "summer winter fall spring"))) -;; Puts out all combinations -(define (process-itemlist hierdepth curritemkey itemlist) - (let ((res '())) - (if (not hierdepth) - (set! hierdepth (length itemlist))) - (let loop ((hed (car itemlist)) - (tal (cdr itemlist))) - (if (null? tal) - (for-each (lambda (item) - (if (> (length curritemkey) (- hierdepth 2)) - (set! res (append res (list (append curritemkey (list (list (car hed) item)))))))) - (cadr hed)) - (begin - (for-each (lambda (item) - (set! res (append res (process-itemlist hierdepth (append curritemkey (list (list (car hed) item))) tal)))) - (cadr hed)) - (loop (car tal)(cdr tal))))) - res)) - -;; (item-assoc->item-list '(("ANIMAL" "Elephant Lion")("SEASON" "Spring Fall"))) -;; => ((("ANIMAL" "Elephant") ("SEASON" "Spring")) -;; (("ANIMAL" "Elephant") ("SEASON" "Fall")) -;; (("ANIMAL" "Lion") ("SEASON" "Spring")) -;; (("ANIMAL" "Lion") ("SEASON" "Fall"))) -(define (item-assoc->item-list itemsdat) - (if (and itemsdat (not (null? itemsdat))) - (let ((itemlst (filter (lambda (x) - (list? x)) - (map (lambda (x) - (debug:print 6 *default-log-port* "item-assoc->item-list x: " x) - (if (< (length x) 2) - (begin - (debug:print-error 0 *default-log-port* "malformed items spec " (string-intersperse x " ")) - (list (car x)'())) - (let* ((name (car x)) - (items (cadr x)) - (ilist (list name (if (string? items) - (string-split items) - '())))) - (if (null? ilist) - (debug:print-error 0 *default-log-port* "No items specified for " name)) - ilist))) - itemsdat)))) - (let ((debuglevel 5)) - (debug:print 5 *default-log-port* "item-assoc->item-list: itemsdat => itemlst ") - (if (debug:debug-mode 5) - (begin - (pp itemsdat) - (print " => ") - (pp itemlst)))) - (if (> (length itemlst) 0) - (process-itemlist #f '() itemlst) - '())) - '())) ;; return a list consisting on a single null list for non-item runs - ;; Nope, not now, return null as of 6/6/2011 - -;; (item-table->item-list '(("ANIMAL" "Elephant Lion")("SEASON" "Spring Winter"))) -;; => ((("ANIMAL" "Elephant")("SEASON" "Spring")) -;; (("ANIMAL" "Lion") ("SEASON" "Winter"))) -(define (item-table->item-list itemtable) - (let ((newlst (map (lambda (x) - (if (> (length x) 1) - (list (car x) - (string-split (cadr x))) - (list x '()))) - itemtable)) - (res '())) ;; a list of items - (let loop ((indx 0) - (item '()) ;; an item will be ((KEYNAME1 VAL1)(KEYNAME2 VAL2) ...) - (elflag #f)) - (for-each (lambda (row) - (let ((rowname (car row)) - (rowdat (cadr row))) - (set! item (append item - (list - (if (< indx (length rowdat)) - (let ((new (list rowname (list-ref rowdat indx)))) - ;; (debug:print 0 *default-log-port* "New: " new) - (set! elflag #t) - new - ) ;; i.e. had at least on legit value to use - (list rowname "-"))))))) - newlst) - (if elflag - (begin - (set! res (append res (list item))) - (loop (+ indx 1) - '() - #f))) - res))) - ;; Nope, not now, return null as of 6/6/2011 - -(define (items:check-valid-items class item) - (let ((valid-values (let ((s (configf:lookup *configdat* "validvalues" class))) - (if s (string-split s) #f)))) - (if valid-values - (if (member item valid-values) - item #f) - item))) - -(define (items:get-items-from-config tconfig) - (let* ((have-items (hash-table-ref/default tconfig "items" #f)) - (have-itable (hash-table-ref/default tconfig "itemstable" #f)) - (items (hash-table-ref/default tconfig "items" '())) - (itemstable (hash-table-ref/default tconfig "itemstable" '()))) - (debug:print 5 *default-log-port* "items: " items " itemstable: " itemstable) - (set! items (map (lambda (item) - (if (procedure? (cadr item)) - (list (car item)((cadr item))) ;; evaluate the proc - item)) - items)) - (set! itemstable (map (lambda (item) - (if (procedure? (cadr item)) - (list (car item)((cadr item))) ;; evaluate the proc - item)) - itemstable)) - (if (and have-items (null? items)) (debug:print 0 *default-log-port* "WARNING:[items] section in testconfig but no entries defined")) - (if (and have-itable (null? itemstable))(debug:print 0 *default-log-port* "WARNNG:[itemstable] section in testconfig but no entries defined")) - (if (or (not (null? items))(not (null? itemstable))) - (append (item-assoc->item-list items) - (item-table->item-list itemstable)) - '(())))) - -;; (pp (item-assoc->item-list itemdat)) Index: itemsmod.scm ================================================================== --- itemsmod.scm +++ itemsmod.scm @@ -18,18 +18,145 @@ ;;====================================================================== (declare (unit itemsmod)) (declare (uses commonmod)) +(declare (uses mtconfigf)) (module itemsmod * (import scheme chicken data-structures extras) -(import (prefix sqlite3 sqlite3:) posix typed-records srfi-18 srfi-69 format ports srfi-1 matchable) +(use (prefix sqlite3 sqlite3:) posix typed-records srfi-18 srfi-69 format ports srfi-1 matchable) (import commonmod) +(import(prefix mtconfigf configf:)) ;; (use (prefix ulex ulex:)) -(include "common_records.scm") +;; (include "common_records.scm") +;; (include "items-inc.scm") + +;; Puts out all combinations +(define (process-itemlist hierdepth curritemkey itemlist) + (let ((res '())) + (if (not hierdepth) + (set! hierdepth (length itemlist))) + (let loop ((hed (car itemlist)) + (tal (cdr itemlist))) + (if (null? tal) + (for-each (lambda (item) + (if (> (length curritemkey) (- hierdepth 2)) + (set! res (append res (list (append curritemkey (list (list (car hed) item)))))))) + (cadr hed)) + (begin + (for-each (lambda (item) + (set! res (append res (process-itemlist hierdepth (append curritemkey (list (list (car hed) item))) tal)))) + (cadr hed)) + (loop (car tal)(cdr tal))))) + res)) + +;; (item-assoc->item-list '(("ANIMAL" "Elephant Lion")("SEASON" "Spring Fall"))) +;; => ((("ANIMAL" "Elephant") ("SEASON" "Spring")) +;; (("ANIMAL" "Elephant") ("SEASON" "Fall")) +;; (("ANIMAL" "Lion") ("SEASON" "Spring")) +;; (("ANIMAL" "Lion") ("SEASON" "Fall"))) +(define (item-assoc->item-list itemsdat) + (if (and itemsdat (not (null? itemsdat))) + (let ((itemlst (filter (lambda (x) + (list? x)) + (map (lambda (x) + (debug:print 6 *default-log-port* "item-assoc->item-list x: " x) + (if (< (length x) 2) + (begin + (debug:print-error 0 *default-log-port* "malformed items spec " (string-intersperse x " ")) + (list (car x)'())) + (let* ((name (car x)) + (items (cadr x)) + (ilist (list name (if (string? items) + (string-split items) + '())))) + (if (null? ilist) + (debug:print-error 0 *default-log-port* "No items specified for " name)) + ilist))) + itemsdat)))) + (let ((debuglevel 5)) + (debug:print 5 *default-log-port* "item-assoc->item-list: itemsdat => itemlst ") + (if (debug:debug-mode 5) + (begin + (pp itemsdat) + (print " => ") + (pp itemlst)))) + (if (> (length itemlst) 0) + (process-itemlist #f '() itemlst) + '())) + '())) ;; return a list consisting on a single null list for non-item runs + ;; Nope, not now, return null as of 6/6/2011 + +;; (item-table->item-list '(("ANIMAL" "Elephant Lion")("SEASON" "Spring Winter"))) +;; => ((("ANIMAL" "Elephant")("SEASON" "Spring")) +;; (("ANIMAL" "Lion") ("SEASON" "Winter"))) +(define (item-table->item-list itemtable) + (let ((newlst (map (lambda (x) + (if (> (length x) 1) + (list (car x) + (string-split (cadr x))) + (list x '()))) + itemtable)) + (res '())) ;; a list of items + (let loop ((indx 0) + (item '()) ;; an item will be ((KEYNAME1 VAL1)(KEYNAME2 VAL2) ...) + (elflag #f)) + (for-each (lambda (row) + (let ((rowname (car row)) + (rowdat (cadr row))) + (set! item (append item + (list + (if (< indx (length rowdat)) + (let ((new (list rowname (list-ref rowdat indx)))) + ;; (debug:print 0 *default-log-port* "New: " new) + (set! elflag #t) + new + ) ;; i.e. had at least on legit value to use + (list rowname "-"))))))) + newlst) + (if elflag + (begin + (set! res (append res (list item))) + (loop (+ indx 1) + '() + #f))) + res))) + ;; Nope, not now, return null as of 6/6/2011 + +(define (items:check-valid-items class item) + (let ((valid-values (let ((s (configf:lookup *configdat* "validvalues" class))) + (if s (string-split s) #f)))) + (if valid-values + (if (member item valid-values) + item #f) + item))) + +(define (items:get-items-from-config tconfig) + (let* ((have-items (hash-table-ref/default tconfig "items" #f)) + (have-itable (hash-table-ref/default tconfig "itemstable" #f)) + (items (hash-table-ref/default tconfig "items" '())) + (itemstable (hash-table-ref/default tconfig "itemstable" '()))) + (debug:print 5 *default-log-port* "items: " items " itemstable: " itemstable) + (set! items (map (lambda (item) + (if (procedure? (cadr item)) + (list (car item)((cadr item))) ;; evaluate the proc + item)) + items)) + (set! itemstable (map (lambda (item) + (if (procedure? (cadr item)) + (list (car item)((cadr item))) ;; evaluate the proc + item)) + itemstable)) + (if (and have-items (null? items)) (debug:print 0 *default-log-port* "WARNING:[items] section in testconfig but no entries defined")) + (if (and have-itable (null? itemstable))(debug:print 0 *default-log-port* "WARNNG:[itemstable] section in testconfig but no entries defined")) + (if (or (not (null? items))(not (null? itemstable))) + (append (item-assoc->item-list items) + (item-table->item-list itemstable)) + '(())))) +;; (pp (item-assoc->item-list itemdat)) ) Index: keysmod.scm ================================================================== --- keysmod.scm +++ keysmod.scm @@ -17,11 +17,11 @@ ;; along with Megatest. If not, see . ;;====================================================================== (declare (unit keysmod)) -;; (declare (uses commonmod)) + (module keysmod * (import scheme chicken data-structures extras) (import (prefix sqlite3 sqlite3:) posix typed-records srfi-18 srfi-69 format ports srfi-1 matchable) Index: launch-inc.scm ================================================================== --- launch-inc.scm +++ launch-inc.scm @@ -14,1699 +14,5 @@ ;; GNU General Public License for more details. ;; ;; You should have received a copy of the GNU General Public License ;; along with Megatest. If not, see . -;;====================================================================== -;; launch a task - this runs on the originating host, tests themselves -;; -;;====================================================================== - - -;;====================================================================== -;; ezsteps -;;====================================================================== - -;; ezsteps were going to be coded as -;; stepname[,predstep1,predstep2 ...] [{VAR1=first,second,third}] command to execute -;; BUT -;; now are -;; stepname {VAR=first,second,third ...} command ... -;; where the {VAR=first,second,third ...} is optional. - -;; given an exit code and whether or not logpro was used calculate OK/BAD -;; return #t if we are ok, #f otherwise -(define (steprun-good? logpro exitcode) - (or (eq? exitcode 0) - (and logpro (eq? exitcode 2)))) - -;; if handed a string, process it, else look for MT_CMDINFO -(define (launch:get-cmdinfo-assoc-list #!key (encoded-cmd #f)) - (let ((enccmd (if encoded-cmd encoded-cmd (getenv "MT_CMDINFO")))) - (if enccmd - (common:read-encoded-string enccmd) - '()))) - -;; 0 1 2 3 -(defstruct launch:einf (pid #t)(exit-status #t)(exit-code #t)(rollup-status 0)) - -;; return (conc status ": " comment) from the final section so that -;; the comment can be set in the step record in launch.scm -;; -(define (launch:load-logpro-dat run-id test-id stepname) - (let ((cname (conc stepname ".dat"))) - (if (common:file-exists? cname) - (let* ((dat (read-config cname #f #f)) - (csvr (db:logpro-dat->csv dat stepname)) - (csvt (let-values (((fmt-cell fmt-record fmt-csv) (make-format ","))) - (fmt-csv (map list->csv-record csvr)))) - (status (configf:lookup dat "final" "exit-status")) - (msg (configf:lookup dat "final" "message"))) - (if csvt ;; this if blocked stack dump caused by .dat file from logpro being 0-byte. fixed by upgrading logpro - (rmt:csv->test-data run-id test-id csvt) - (debug:print 0 *default-log-port* "ERROR: no csvdat exists for run-id: " run-id " test-id: " test-id " stepname: " stepname ", check that logpro version is 1.15 or newer")) - ;; (debug:print-info 13 *default-log-port* "Error: run-id/test-id/stepname="run-id"/"test-id"/"stepname" => bad csvr="csvr) - ;; ) - (cond - ((equal? status "PASS") "PASS") ;; skip the message part if status is pass - (status (conc (configf:lookup dat "final" "exit-status") ": " (if msg msg "no message"))) - (else #f))) - #f))) - -(define (launch:runstep ezstep run-id test-id exit-info m tal testconfig) ;;; TODO: deprecate me in favor of ezsteps.scm - (let* ((stepname (car ezstep)) ;; do stuff to run the step - (stepinfo (cadr ezstep)) - ;; (let ((info (cadr ezstep))) - ;; (if (proc? info) "" info))) - ;; (stepproc (let ((info (cadr ezstep))) - ;; (if (proc? info) info #f))) - (stepparts (string-match (regexp "^(\\{([^\\}\\{]*)\\}\\s*|)(.*)$") stepinfo)) - (stepparams (list-ref stepparts 2)) ;; for future use, {VAR=1,2,3}, run step for each - (paramparts (if (string? stepparams) - (map (lambda (x)(string-split x "=")) (string-split-fields "[^;]*=[^;]*" stepparams)) - '())) - (subrun (alist-ref "subrun" paramparts equal?)) - (stepcmd (list-ref stepparts 3)) - (script "") ; "#!/bin/bash\n") ;; yep, we depend on bin/bash FIXME!!!\ - (logpro-file (conc stepname ".logpro")) - (html-file (conc stepname ".html")) - (dat-file (conc stepname ".dat")) - (tconfig-logpro (configf:lookup testconfig "logpro" stepname)) - (logpro-used (common:file-exists? logpro-file))) - - (debug:print 0 *default-log-port* "stepparts: " stepparts ", stepparams: " stepparams - ", paramparts: " paramparts ", subrun: " subrun ", stepcmd: " stepcmd) - - (if (and tconfig-logpro - (not logpro-used)) ;; no logpro file found but have a defn in the testconfig - (begin - (with-output-to-file logpro-file - (lambda () - (print ";; logpro file extracted from testconfig\n" - ";;") - (print tconfig-logpro))) - (set! logpro-used #t))) - - ;; NB// can safely assume we are in test-area directory - (debug:print 4 *default-log-port* "ezsteps:\n stepname: " stepname " stepinfo: " stepinfo " stepparts: " stepparts - " stepparams: " stepparams " stepcmd: " stepcmd) - - ;; ;; first source the previous environment - ;; (let ((prev-env (conc ".ezsteps/" prevstep (if (string-search (regexp "csh") - ;; (get-environment-variable "SHELL")) ".csh" ".sh")))) - ;; (if (and prevstep (common:file-exists? prev-env)) - ;; (set! script (conc script "source " prev-env)))) - - ;; call the command using mt_ezstep - ;; (set! script (conc "mt_ezstep " stepname " " (if prevstep prevstep "x") " " stepcmd)) - - (debug:print 4 *default-log-port* "script: " script) - (rmt:teststep-set-status! run-id test-id stepname "start" "-" #f #f) - ;; now launch the actual process - (call-with-environment-variables - (list (cons "PATH" (conc (get-environment-variable "PATH") ":."))) - (lambda () ;; (process-run "/bin/bash" "-c" "exec ls -l /tmp/foobar > /tmp/delme-more.log 2>&1") - (let* ((cmd (conc stepcmd " > " stepname ".log 2>&1")) ;; >outfile 2>&1 - (pid #f)) - (let ((proc (lambda () - (set! pid (process-run "/bin/bash" (list "-c" cmd)))))) - (if subrun - (begin - (debug:print-info 0 *default-log-port* "Running without MT_.* environment variables.") - (common:without-vars proc "^MT_.*")) - (proc))) - - (with-output-to-file "Makefile.ezsteps" - (lambda () - (print stepname ".log :") - (print "\t" cmd) - (if (common:file-exists? (conc stepname ".logpro")) - (print "\tlogpro " stepname ".logpro " stepname ".html < " stepname ".log")) - (print) - (print stepname " : " stepname ".log") - (print)) - #:append) - - (rmt:test-set-top-process-pid run-id test-id pid) - (let processloop ((i 0)) - (let-values (((pid-val exit-status exit-code)(process-wait pid #t))) - (mutex-lock! m) - (launch:einf-pid-set! exit-info pid) ;; (vector-set! exit-info 0 pid) - (launch:einf-exit-status-set! exit-info exit-status) ;; (vector-set! exit-info 1 exit-status) - (launch:einf-exit-code-set! exit-info exit-code) ;; (vector-set! exit-info 2 exit-code) - (mutex-unlock! m) - (if (eq? pid-val 0) - (begin - (thread-sleep! 2) - (processloop (+ i 1)))) - ))))) - (debug:print-info 0 *default-log-port* "step " stepname " completed with exit code " (launch:einf-exit-code exit-info)) ;; (vector-ref exit-info 2)) - ;; now run logpro if needed - (if logpro-used - (let* ((logpro-exe (or (getenv "LOGPRO_EXE") "logpro")) - (pid (process-run (conc "/bin/sh -c '"logpro-exe" "logpro-file " " (conc stepname ".html") " < " stepname ".log > /dev/null'")))) - (let processloop ((i 0)) - (let-values (((pid-val exit-status exit-code)(process-wait pid #t))) - (mutex-lock! m) - ;; (make-launch:einf pid: pid exit-status: exit-status exit-code: exit-code) - (launch:einf-pid-set! exit-info pid) ;; (vector-set! exit-info 0 pid) - (launch:einf-exit-status-set! exit-info exit-status) ;; (vector-set! exit-info 1 exit-status) - (launch:einf-exit-code-set! exit-info exit-code) ;; (vector-set! exit-info 2 exit-code) - (mutex-unlock! m) - (if (eq? pid-val 0) - (begin - (thread-sleep! 2) - (processloop (+ i 1))))) - (debug:print-info 0 *default-log-port* "logpro for step " stepname " exited with code " (launch:einf-exit-code exit-info))))) ;; (vector-ref exit-info 2))))) - - (let ((exinfo (launch:einf-exit-code exit-info)) ;; (vector-ref exit-info 2)) - (logfna (if logpro-used (conc stepname ".html") "")) - (comment #f)) - (if logpro-used - (let ((datfile (conc stepname ".dat"))) - ;; load the .dat file into the test_data table if it exists - (if (common:file-exists? datfile) - (set! comment (launch:load-logpro-dat run-id test-id stepname))) - (rmt:test-set-log! run-id test-id (conc stepname ".html")))) - (rmt:teststep-set-status! run-id test-id stepname "end" exinfo comment logfna)) - ;; set the test final status - (let* ((process-exit-status (launch:einf-exit-code exit-info)) ;; (vector-ref exit-info 2)) - (this-step-status (cond - ((and (eq? process-exit-status 2) logpro-used) 'warn) ;; logpro 2 = warnings - ((and (eq? process-exit-status 3) logpro-used) 'check) ;; logpro 3 = check - ((and (eq? process-exit-status 4) logpro-used) 'waived) ;; logpro 4 = waived - ((and (eq? process-exit-status 5) logpro-used) 'abort) ;; logpro 5 = abort - ((and (eq? process-exit-status 6) logpro-used) 'skip) ;; logpro 6 = skip - ((eq? process-exit-status 0) 'pass) ;; logpro 0 = pass - (else 'fail))) - (overall-status (cond - ((eq? (launch:einf-rollup-status exit-info) 2) 'warn) ;; rollup-status (vector-ref exit-info 3) - ((eq? (launch:einf-rollup-status exit-info) 0) 'pass) ;; (vector-ref exit-info 3) - (else 'fail))) - (next-status (cond - ((eq? overall-status 'pass) this-step-status) - ((eq? overall-status 'warn) - (if (eq? this-step-status 'fail) 'fail 'warn)) - ((eq? overall-status 'abort) 'abort) - (else 'fail))) - (next-state ;; "RUNNING") ;; WHY WAS THIS CHANGED TO NOT USE (null? tal) ?? - (cond - ((null? tal) ;; more to run? - "COMPLETED") - (else "RUNNING")))) - (debug:print 4 *default-log-port* "Exit value received: " (launch:einf-exit-code exit-info) " logpro-used: " logpro-used - " this-step-status: " this-step-status " overall-status: " overall-status - " next-status: " next-status " rollup-status: " (launch:einf-rollup-status exit-info)) ;; (vector-ref exit-info 3)) - (case next-status - ((warn) - (launch:einf-rollup-status-set! exit-info 2) ;; (vector-set! exit-info 3 2) ;; rollup-status - ;; NB// test-set-status! does rdb calls under the hood - (tests:test-set-status! run-id test-id next-state "WARN" - (if (eq? this-step-status 'warn) "Logpro warning found" #f) - #f)) - ((check) - (launch:einf-rollup-status-set! exit-info 3) ;; (vector-set! exit-info 3 3) ;; rollup-status - ;; NB// test-set-status! does rdb calls under the hood - (tests:test-set-status! run-id test-id next-state "CHECK" - (if (eq? this-step-status 'check) "Logpro check found" #f) - #f)) - ((waived) - (launch:einf-rollup-status-set! exit-info 4) ;; (vector-set! exit-info 3 3) ;; rollup-status - ;; NB// test-set-status! does rdb calls under the hood - (tests:test-set-status! run-id test-id next-state "WAIVED" - (if (eq? this-step-status 'check) "Logpro waived found" #f) - #f)) - ((abort) - (launch:einf-rollup-status-set! exit-info 5) ;; (vector-set! exit-info 3 4) ;; rollup-status - ;; NB// test-set-status! does rdb calls under the hood - (tests:test-set-status! run-id test-id next-state "ABORT" - (if (eq? this-step-status 'abort) "Logpro abort found" #f) - #f)) - ((skip) - (launch:einf-rollup-status-set! exit-info 6) ;; (vector-set! exit-info 3 4) ;; rollup-status - ;; NB// test-set-status! does rdb calls under the hood - (tests:test-set-status! run-id test-id next-state "SKIP" - (if (eq? this-step-status 'skip) "Logpro skip found" #f) - #f)) - ((pass) - (tests:test-set-status! run-id test-id next-state "PASS" #f #f)) - (else ;; 'fail - (launch:einf-rollup-status-set! exit-info 1) ;; (vector-set! exit-info 3 1) ;; force fail, this used to be next-state but that doesn't make sense. should always be "COMPLETED" - (tests:test-set-status! run-id test-id "COMPLETED" "FAIL" (conc "Failed at step " stepname) #f) - ))) - logpro-used)) - -(define (launch:manage-steps run-id test-id item-path fullrunscript ezsteps subrun test-name tconfigreg exit-info m) - ;; (let-values - ;; (((pid exit-status exit-code) - ;; (run-n-wait fullrunscript))) - ;; (tests:test-set-status! test-id "RUNNING" "n/a" #f #f) - ;; Since we should have a clean slate at this time there is no need to do - ;; any of the other stuff that tests:test-set-status! does. Let's just - ;; force RUNNING/n/a - - ;; (thread-sleep! 0.3) - ;; (tests:test-force-state-status! run-id test-id "RUNNING" "n/a") - (rmt:set-state-status-and-roll-up-items run-id test-name item-path "RUNNING" #f #f) - ;; (thread-sleep! 0.3) ;; NFS slowness has caused grief here - - ;; if there is a runscript do it first - (if fullrunscript - (let ((pid (process-run fullrunscript))) - (rmt:test-set-top-process-pid run-id test-id pid) - (let loop ((i 0)) - (let-values - (((pid-val exit-status exit-code) (process-wait pid #t))) - (mutex-lock! m) - (launch:einf-pid-set! exit-info pid) ;; (vector-set! exit-info 0 pid) - (launch:einf-exit-status-set! exit-info exit-status) ;; (vector-set! exit-info 1 exit-status) - (launch:einf-exit-code-set! exit-info exit-code) ;; (vector-set! exit-info 2 exit-code) - (launch:einf-rollup-status-set! exit-info exit-code) ;; (vector-set! exit-info 3 exit-code) ;; rollup status - (mutex-unlock! m) - (if (eq? pid-val 0) - (begin - (thread-sleep! 2) - (loop (+ i 1))) - ))))) - ;; then, if runscript ran ok (or did not get called) - ;; do all the ezsteps (if any) - (if (or ezsteps subrun) - (let* ((test-run-dir (tests:get-test-path-from-environment)) - (testconfig ;; (read-config (conc work-area "/testconfig") #f #t environ-patt: "pre-launch-env-vars")) ;; FIXME??? is allow-system ok here? - ;; NOTE: it is tempting to turn off force-create of testconfig but dynamic - ;; ezstep names need a full re-eval here. - (tests:get-testconfig test-name item-path tconfigreg #t force-create: #t)) ;; 'return-procs))) - (ezstepslst (if (hash-table? testconfig) - (hash-table-ref/default testconfig "ezsteps" '()) - #f))) - (if testconfig - (hash-table-set! *testconfigs* test-name testconfig) ;; cached for lazy reads later ... - (begin - (launch:setup) - (debug:print 0 *default-log-port* "WARNING: no testconfig found for " test-name " in search path:\n " - (string-intersperse (tests:get-tests-search-path *configdat*) "\n ")))) - ;; after all that, still no testconfig? Time to abort - (if (not testconfig) - (begin - (debug:print-error 0 *default-log-port* "Failed to resolve megatest.config, runconfigs.config and testconfig issues. Giving up now") - (exit 1))) - - ;; create a proc for the subrun if requested, save that proc in the ezsteps table as the last entry - ;; 1. get section [runarun] - ;; 2. unset MT_* vars - ;; 3. fix target - ;; 4. fix runname - ;; 5. fix testpatt or calculate it from contour - ;; 6. launch the run - ;; 7. roll up the run result and or roll up the logpro processed result - (when (configf:lookup testconfig "subrun" "runwait") ;; we use runwait as the flag that a subrun is requested - (subrun:initialize-toprun-test testconfig test-run-dir) - (let* ((mt-cmd (subrun:launch-cmd test-run-dir))) - (debug:print-info 0 *default-log-port* "Subrun command is \"" mt-cmd "\"") - (set! ezsteps #t) ;; set the needed flag - (set! ezstepslst - (append (or ezstepslst '()) - (list (list "subrun" (conc "{subrun=true} " mt-cmd))))))) - - ;; process the ezsteps - (if ezsteps - (begin - (if (not (common:file-exists? ".ezsteps"))(create-directory ".ezsteps")) - ;; if ezsteps was defined then we are sure to have at least one step but check anyway - (if (not (> (length ezstepslst) 0)) - (debug:print-error 0 *default-log-port* "ezsteps defined but ezstepslst is zero length") - (let loop ((ezstep (car ezstepslst)) - (tal (cdr ezstepslst)) - (prevstep #f)) - (debug:print-info 0 *default-log-port* "Processing ezstep \"" (string-intersperse ezstep " ") "\"") - ;; check exit-info (vector-ref exit-info 1) - (if (launch:einf-exit-status exit-info) ;; (vector-ref exit-info 1) - (let ((logpro-used (launch:runstep ezstep run-id test-id exit-info m tal testconfig)) - (stepname (car ezstep))) - ;; if logpro-used read in the stepname.dat file - (if (and logpro-used (common:file-exists? (conc stepname ".dat"))) - (launch:load-logpro-dat run-id test-id stepname)) - (if (steprun-good? logpro-used (launch:einf-exit-code exit-info)) - (if (not (null? tal)) - (loop (car tal) (cdr tal) stepname)) - (debug:print 0 *default-log-port* "WARNING: step " (car ezstep) " failed. Stopping"))) - (debug:print 0 *default-log-port* "WARNING: a prior step failed, stopping at " ezstep))))))))) - -(define (launch:monitor-job run-id test-id item-path fullrunscript ezsteps test-name tconfigreg exit-info m work-area runtlim misc-flags) - (let* ((update-period (string->number (or (configf:lookup *configdat* "setup" "test-stats-update-period") "30"))) - (start-seconds (current-seconds)) - (calc-minutes (lambda () - (inexact->exact - (round - (- - (current-seconds) - start-seconds))))) - (kill-tries 0)) - ;; (tests:set-full-meta-info #f test-id run-id (calc-minutes) work-area) - ;; (tests:set-full-meta-info test-id run-id (calc-minutes) work-area) - (tests:set-full-meta-info #f test-id run-id (calc-minutes) work-area 10) - - (let loop ((minutes (calc-minutes)) - (cpu-load (alist-ref 'adj-core-load (common:get-normalized-cpu-load #f))) - (disk-free (get-df (current-directory))) - (last-sync (current-seconds))) - #;(common:telemetry-log "zombie" (conc "launch:monitor-job - top of loop encountered at "(current-seconds)" with last-sync="last-sync)) - (let* ((over-time (> (current-seconds) (+ last-sync update-period))) - (new-cpu-load (let* ((load (alist-ref 'adj-core-load (common:get-normalized-cpu-load #f))) - (delta (abs (- load cpu-load)))) - (if (> delta 0.1) ;; don't bother updating with small changes - load - #f))) - (new-disk-free (let* ((df (if over-time ;; only get df every 30 seconds - (get-df (current-directory)) - disk-free)) - (delta (abs (- df disk-free)))) - (if (and (> df 0) - (> (/ delta df) 0.1)) ;; (> delta 200) ;; ignore changes under 200 Meg - df - #f))) - (do-sync (or new-cpu-load new-disk-free over-time)) - - (test-info (rmt:get-test-info-by-id run-id test-id)) - (state (db:test-get-state test-info)) - (status (db:test-get-status test-info)) - (kill-reason "no kill reason specified") - (kill-job? #f)) - #;(common:telemetry-log "zombie" (conc "launch:monitor-job - decision time encountered at "(current-seconds)" with last-sync="last-sync" do-sync="do-sync" over-time="over-time" update-period="update-period)) - (cond - ((test-get-kill-request run-id test-id) - (set! kill-reason "KILLING TEST since received kill request (KILLREQ)") - (set! kill-job? #t)) - ((and runtlim (> (- (current-seconds) start-seconds) runtlim)) - (set! kill-reason (conc "KILLING TEST DUE TO TIME LIMIT EXCEEDED! Runtime=" (- (current-seconds) start-seconds) " seconds, limit=" runtlim)) - (set! kill-job? #t)) - ((equal? status "DEAD") - (tests:update-central-meta-info run-id test-id new-cpu-load new-disk-free (calc-minutes) #f #f) - (rmt:set-state-status-and-roll-up-items run-id test-id 'foo "RUNNING" "n/a" "was marked dead; really still running.") - ;;(set! kill-reason "KILLING TEST because it was marked as DEAD by launch:handle-zombie-tests (might indicate really overloaded server or else overzealous setup.deadtime)") ;; MARK RUNNING - (set! kill-job? #f))) - - (debug:print 4 *default-log-port* "cpu: " new-cpu-load " disk: " new-disk-free " last-sync: " last-sync " do-sync: " do-sync) - (launch:handle-zombie-tests run-id) - (when do-sync - ;;(with-output-to-file (conc (getenv "MT_TEST_RUN_DIR") "/last-loadinfo.log" #:append) - ;; (lambda () (pp (list (current-seconds) new-cpu-load new-disk-free (calc-minutes))))) - #;(common:telemetry-log "zombie" (conc "launch:monitor-job - dosync started at "(current-seconds))) - (tests:update-central-meta-info run-id test-id new-cpu-load new-disk-free (calc-minutes) #f #f) - #;(common:telemetry-log "zombie" (conc "launch:monitor-job - dosync finished at "(current-seconds)))) - - (if kill-job? - (begin - (debug:print-info 0 *default-log-port* "proceeding to kill test: "kill-reason) - (mutex-lock! m) - ;; NOTE: The pid can change as different steps are run. Do we need handshaking between this - ;; section and the runit section? Or add a loop that tries three times with a 1/4 second - ;; between tries? - (let* ((pid1 (launch:einf-pid exit-info)) ;; (vector-ref exit-info 0)) - (pid2 (rmt:test-get-top-process-pid run-id test-id)) - (pids (delete-duplicates (filter number? (list pid1 pid2))))) - (if (not (null? pids)) - (begin - (for-each - (lambda (pid) - (handle-exceptions - exn - (begin - (debug:print-info 0 *default-log-port* "Unable to kill process with pid " pid ", possibly already killed.") - (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))) - (debug:print 0 *default-log-port* "WARNING: Request received to kill job " pid) ;; " (attempt # " kill-tries ")") - (debug:print-info 0 *default-log-port* "Signal mask=" (signal-mask)) - ;; (if (process:alive? pid) - ;; (begin - (map (lambda (pid-num) - (process-signal pid-num signal/term)) - (process:get-sub-pids pid)) - (thread-sleep! 5) - ;; (if (process:process-alive? pid) - (map (lambda (pid-num) - (handle-exceptions - exn - #f - (process-signal pid-num signal/kill))) - (process:get-sub-pids pid)))) - ;; (debug:print-info 0 *default-log-port* "not killing process " pid " as it is not alive")))) - pids) - ;; BB: question to Matt -- does the tests:test-state-status! encompass rollup to toplevel? If not, should it? - (tests:test-set-status! run-id test-id "KILLED" "KILLED" (conc (args:get-arg "-m")" "kill-reason) #f)) ;; BB ADDED kill-reason -- confirm OK with Matt - (begin - (debug:print-error 0 *default-log-port* "Nothing to kill, pid1=" pid1 ", pid2=" pid2) - (tests:test-set-status! run-id test-id "KILLED" "FAILED TO KILL" (conc (args:get-arg "-m")" "kill-reason) #f) ;; BB ADDED kill-reason -- confirm OK with Matt - ))) - (mutex-unlock! m) - ;; no point in sticking around. Exit now. But run end of run before exiting? - (launch:end-of-run-check run-id) - (exit))) - (if (hash-table-ref/default misc-flags 'keep-going #f) - (begin - (thread-sleep! 3) ;; (+ 3 (random 6))) ;; add some jitter to the call home time to spread out the db accesses - (if (hash-table-ref/default misc-flags 'keep-going #f) ;; keep originals for cpu-load and disk-free unless they change more than the allowed delta - (loop (calc-minutes) - (or new-cpu-load cpu-load) - (or new-disk-free disk-free) - (if do-sync (current-seconds) last-sync))))))) - (tests:update-central-meta-info run-id test-id (get-cpu-load) (get-df (current-directory))(calc-minutes) #f #f))) ;; NOTE: Checking twice for keep-going is intentional - - -(define (launch:execute encoded-cmd) - (let* ((cmdinfo (common:read-encoded-string encoded-cmd)) - (tconfigreg #f)) - (setenv "MT_CMDINFO" encoded-cmd) - ;;(bb-check-path msg: "launch:execute incoming") - (if (list? cmdinfo) ;; ((testpath /tmp/mrwellan/jazzmind/src/example_run/tests/sqlitespeed) - ;; (test-name sqlitespeed) (runscript runscript.rb) (db-host localhost) (run-id 1)) - (let* ((testpath (assoc/default 'testpath cmdinfo)) ;; testpath is the test spec area - (top-path (assoc/default 'toppath cmdinfo)) - (work-area (assoc/default 'work-area cmdinfo)) ;; work-area is the test run area - (test-name (assoc/default 'test-name cmdinfo)) - (runscript (assoc/default 'runscript cmdinfo)) - (ezsteps (assoc/default 'ezsteps cmdinfo)) - (subrun (assoc/default 'subrun cmdinfo)) - ;; (runremote (assoc/default 'runremote cmdinfo)) - ;; (transport (assoc/default 'transport cmdinfo)) ;; not used - ;; (serverinf (assoc/default 'serverinf cmdinfo)) - ;; (port (assoc/default 'port cmdinfo)) - (serverurl (assoc/default 'serverurl cmdinfo)) - (homehost (assoc/default 'homehost cmdinfo)) - (run-id (assoc/default 'run-id cmdinfo)) - (test-id (assoc/default 'test-id cmdinfo)) - (target (assoc/default 'target cmdinfo)) - (areaname (assoc/default 'areaname cmdinfo)) - (itemdat (assoc/default 'itemdat cmdinfo)) - (env-ovrd (assoc/default 'env-ovrd cmdinfo)) - (set-vars (assoc/default 'set-vars cmdinfo)) ;; pre-overrides from -setvar - (runname (assoc/default 'runname cmdinfo)) - (megatest (assoc/default 'megatest cmdinfo)) - (runtlim (assoc/default 'runtlim cmdinfo)) - (contour (assoc/default 'contour cmdinfo)) - (item-path (item-list->path itemdat)) - (mt-bindir-path (assoc/default 'mt-bindir-path cmdinfo)) - (keys #f) - (keyvals #f) - (fullrunscript (if (not runscript) - #f - (if (substring-index "/" runscript) - runscript ;; use unadultered if contains slashes - (let ((fulln (conc work-area "/" runscript))) - (if (and (common:file-exists? fulln) - (file-execute-access? fulln)) - fulln - runscript))))) ;; assume it is on the path - (check-work-area (lambda () - ;; NFS might not have propagated the directory meta data to the run host - give it time if needed - (let loop ((count 0)) - (if (or (common:directory-exists? work-area) - (> count 10)) - (change-directory work-area) - (begin - (debug:print 0 *default-log-port* "INFO: Not starting job yet - directory " work-area " not found") - (thread-sleep! 10) - (loop (+ count 1))))) - - (if (not (string=? (common:real-path work-area)(common:real-path (current-directory)))) - (begin - (debug:print 0 *default-log-port* - "INFO: we are expecting to be in directory " work-area "\n" - " but we are actually in the directory " (current-directory) "\n" - " doing another change dir.") - (change-directory work-area))) - - ;; spot check that the files in testpath are available. Too often NFS delays cause problems here. - (let ((files (glob (conc testpath "/*"))) - (bad-files '())) - (for-each - (lambda (fullname) - (let* ((fname (pathname-strip-directory fullname)) - (targn (conc work-area "/" fname))) - (if (not (file-exists? targn)) - (set! bad-files (cons fname bad-files))))) - files) - (if (not (null? bad-files)) - (begin - (debug:print 0 *default-log-port* "INFO: test data from " testpath " not copied properly or filesystem problems causing data to not be found. Re-running the copy command.") - (debug:print 0 *default-log-port* "INFO: missing files from " work-area ": " (string-intersperse bad-files ", ")) - (launch:test-copy testpath work-area)))) - ;; one more time, change to the work-area directory - (change-directory work-area))) - ) ;; let* - - (if contour (setenv "MT_CONTOUR" contour)) - - ;; immediated set some key variables from CMDINFO data, yes, these will be set again below ... - ;; - (setenv "MT_TESTSUITENAME" areaname) - (setenv "MT_RUN_AREA_HOME" top-path) - (set! *toppath* top-path) - (change-directory *toppath*) ;; temporarily switch to the run area home - (setenv "MT_TEST_RUN_DIR" work-area) - - (launch:setup) ;; should be properly in the run area home now - - (if contour (setenv "MT_CONTOUR" contour)) - - ;; immediated set some key variables from CMDINFO data, yes, these will be set again below ... - ;; - (setenv "MT_TESTSUITENAME" areaname) - (setenv "MT_RUN_AREA_HOME" top-path) - (set! *toppath* top-path) - (change-directory *toppath*) ;; temporarily switch to the run area home - (setenv "MT_TEST_RUN_DIR" work-area) - - (launch:setup) ;; should be properly in the run area home now - - (set! tconfigreg (tests:get-all)) ;; mapping of testname => test source path - (let ((sighand (lambda (signum) - ;; (signal-mask! signum) ;; to mask or not? seems to cause issues in exiting - (if (eq? signum signal/stop) - (debug:print-error 0 *default-log-port* "attempt to STOP process. Exiting.")) - (set! *time-to-exit* #t) - (print "Received signal " signum ", cleaning up before exit (set this test to COMPLETED/ABORT) . Please wait...") - (let ((th1 (make-thread (lambda () - (print "set test to COMPLETED/ABORT begin.") - (rmt:test-set-state-status run-id test-id "COMPLETED" "ABORT" "received kill signal") - (print "set test to COMPLETED/ABORT complete.") - (print "Killed by signal " signum ". Exiting") - (exit 1)))) - (th2 (make-thread (lambda () - (thread-sleep! 20) - (debug:print 0 *default-log-port* "Done") - (exit 4))))) - (thread-start! th2) - (thread-start! th1) - (thread-join! th2))))) - (set-signal-handler! signal/int sighand) - (set-signal-handler! signal/term sighand) - ) ;; (set-signal-handler! signal/stop sighand) - - ;; Do not run the test if it is REMOVING, RUNNING, KILLREQ or REMOTEHOSTSTART, - ;; Mark the test as REMOTEHOSTSTART *IMMEDIATELY* - ;; - (let* ((test-info (rmt:get-test-info-by-id run-id test-id)) - (test-host (if test-info - (db:test-get-host test-info) - (begin - (debug:print 0 *default-log-port* "ERROR: failed to find a record for test-id " test-id ", exiting.") - (exit)))) - (test-pid (db:test-get-process_id test-info))) - (cond - ;; -mrw- I'm removing KILLREQ from this list so that a test in KILLREQ state is treated as a "do not run" flag. - ((member (db:test-get-state test-info) '("INCOMPLETE" "KILLED" "UNKNOWN" "STUCK")) ;; prior run of this test didn't complete, go ahead and try to rerun - (debug:print 0 *default-log-port* "INFO: test is INCOMPLETE or KILLED, treat this execute call as a rerun request") - ;; (tests:test-force-state-status! run-id test-id "REMOTEHOSTSTART" "n/a") - - (rmt:general-call 'set-test-start-time #f test-id) - (rmt:test-set-state-status run-id test-id "REMOTEHOSTSTART" "n/a" #f) - ) ;; prime it for running - ((member (db:test-get-state test-info) '("RUNNING" "REMOTEHOSTSTART")) - (if (process:alive-on-host? test-host test-pid) - (debug:print-error 0 *default-log-port* "test state is " (db:test-get-state test-info) " and process " test-pid " is still running on host " test-host ", cannot proceed") - (exit))) - ((not (member (db:test-get-state test-info) '("REMOVING" "REMOTEHOSTSTART" "RUNNING" "KILLREQ"))) - ;; (tests:test-force-state-status! run-id test-id "REMOTEHOSTSTART" "n/a") - (rmt:general-call 'set-test-start-time #f test-id) - (rmt:test-set-state-status run-id test-id "REMOTEHOSTSTART" "n/a" #f) - ) - (else ;; (member (db:test-get-state test-info) '("REMOVING" "REMOTEHOSTSTART" "RUNNING" "KILLREQ")) - (debug:print-error 0 *default-log-port* "test state is " (db:test-get-state test-info) ", cannot proceed") - (exit)))) - - ;; cleanup prior execution's steps - (rmt:delete-steps-for-test! run-id test-id) - - (debug:print 2 *default-log-port* "Executing " test-name " (id: " test-id ") on " (get-host-name)) - (set! keys (rmt:get-keys)) - ;; (runs:set-megatest-env-vars run-id inkeys: keys inkeyvals: keyvals) ;; these may be needed by the launching process - ;; one of these is defunct/redundant ... - (if (not (launch:setup force-reread: #t)) - (begin - (debug:print 0 *default-log-port* "Failed to setup, exiting") - ;; (sqlite3:finalize! db) - ;; (sqlite3:finalize! tdb) - (exit 1))) - ;; validate that the test run area is available - (check-work-area) - - ;; still need to go back to run area home for next couple steps - (change-directory *toppath*) - - ;; NOTE: Current order is to process runconfigs *before* setting the MT_ vars. This - ;; seems non-ideal but could well break stuff - ;; BUG? BUG? BUG? - - (let ((rconfig (full-runconfigs-read)) ;; (read-config (conc *toppath* "/runconfigs.config") #f #t sections: (list "default" target)))) - (wconfig (read-config "waivers.config" #f #t sections: `( "default" ,target )))) ;; read the waivers config if it exists - ;; (setup-env-defaults (conc *toppath* "/runconfigs.config") run-id (make-hash-table) keyvals target) - ;; (set-run-config-vars run-id keyvals target) ;; (db:get-target db run-id)) - ;; Now have runconfigs data loaded, set environment vars - (for-each - (lambda (section) - (for-each - (lambda (varval) - (let ((var (car varval)) - (val (cadr varval))) - (if (and (string? var)(string? val)) - (begin - (safe-setenv var (configf:eval-string-in-environment val))) ;; val) - (debug:print-error 0 *default-log-port* "bad variable spec, " var "=" val)))) - (configf:get-section rconfig section))) - (list "default" target))) - ;;(bb-check-path msg: "launch:execute post block 1") - - ;; NFS might not have propagated the directory meta data to the run host - give it time if needed - (let loop ((count 0)) - (if (or (common:file-exists? work-area) - (> count 10)) - (change-directory work-area) - (begin - (debug:print 0 *default-log-port* "INFO: Not starting job yet - directory " work-area " not found") - (thread-sleep! 10) - (loop (+ count 1))))) - - ;; now we can switch to the work-area? - (change-directory work-area) - ;;(bb-check-path msg: "launch:execute post block 1.5") - ;; (change-directory work-area) - (set! keyvals (keys:target->keyval keys target)) - ;; apply pre-overrides before other variables. The pre-override vars must not - ;; clobbers things from the official sources such as megatest.config and runconfigs.config - (if (string? set-vars) - (let ((varpairs (string-split set-vars ","))) - (debug:print 4 *default-log-port* "varpairs: " varpairs) - (map (lambda (varpair) - (let ((varval (string-split varpair "="))) - (if (eq? (length varval) 2) - (let ((var (car varval)) - (val (cadr varval))) - (debug:print 1 *default-log-port* "Adding pre-var/val " var " = " val " to the environment") - (setenv var val))))) - varpairs))) - ;;(bb-check-path msg: "launch:execute post block 2") - (for-each - (lambda (varval) - (let ((var (car varval)) - (val (cadr varval))) - (if val - (setenv var val) - (begin - (debug:print-error 0 *default-log-port* "required variable " var " does not have a valid value. Exiting") - (exit))))) - (list - (list "MT_TEST_RUN_DIR" work-area) - (list "MT_TEST_NAME" test-name) - (list "MT_ITEM_INFO" (conc itemdat)) - (list "MT_ITEMPATH" item-path) - (list "MT_RUNNAME" runname) - (list "MT_MEGATEST" megatest) - (list "MT_TARGET" target) - (list "MT_LINKTREE" (common:get-linktree)) ;; (configf:lookup *configdat* "setup" "linktree")) - (list "MT_TESTSUITENAME" (common:get-testsuite-name)))) - ;;(bb-check-path msg: "launch:execute post block 3") - - (if mt-bindir-path (setenv "PATH" (conc (getenv "PATH") ":" mt-bindir-path))) - ;;(bb-check-path msg: "launch:execute post block 4") - ;; (change-directory top-path) - ;; Can setup as client for server mode now - ;; (client:setup) - - - ;; environment overrides are done *before* the remaining critical envars. - (alist->env-vars env-ovrd) - ;;(bb-check-path msg: "launch:execute post block 41") - (runs:set-megatest-env-vars run-id inkeys: keys inkeyvals: keyvals) - ;;(bb-check-path msg: "launch:execute post block 42") - (set-item-env-vars itemdat) - ;;(bb-check-path msg: "launch:execute post block 43") - (let ((blacklist (configf:lookup *configdat* "setup" "blacklistvars"))) - (if blacklist - (let ((vars (string-split blacklist))) - (save-environment-as-files "megatest" ignorevars: vars) - (for-each (lambda (var) - (unsetenv var)) - vars)) - (save-environment-as-files "megatest"))) - ;;(bb-check-path msg: "launch:execute post block 44") - ;; open-run-close not needed for test-set-meta-info - ;; (tests:set-full-meta-info #f test-id run-id 0 work-area) - ;; (tests:set-full-meta-info test-id run-id 0 work-area) - (tests:set-full-meta-info #f test-id run-id 0 work-area 10) - - ;; (thread-sleep! 0.3) ;; NFS slowness has caused grief here - - (if (args:get-arg "-xterm") - (set! fullrunscript "xterm") - (if (and fullrunscript - (common:file-exists? fullrunscript) - (not (file-execute-access? fullrunscript))) - (system (conc "chmod ug+x " fullrunscript)))) - - ;; We are about to actually kick off the test - ;; so this is a good place to remove the records for - ;; any previous runs - ;; (db:test-remove-steps db run-id testname itemdat) - ;; now is also a good time to write the .testconfig file - (let* ((tconfig-fname (conc work-area "/.testconfig")) - (tconfig-tmpfile (conc tconfig-fname ".tmp")) - (tconfig (tests:get-testconfig test-name item-path tconfigreg #t force-create: #t))) ;; 'return-procs))) - (configf:write-alist tconfig tconfig-tmpfile) - (file-move tconfig-tmpfile tconfig-fname #t)) - ;; - (let* ((m (make-mutex)) - (kill-job? #f) - (exit-info (make-launch:einf pid: #t exit-status: #t exit-code: #t rollup-status: 0)) ;; pid exit-status exit-code (i.e. process was successfully run) rollup-status - (job-thread #f) - ;; (keep-going #t) - (misc-flags (let ((ht (make-hash-table))) - (hash-table-set! ht 'keep-going #t) - ht)) - (runit (lambda () - (launch:manage-steps run-id test-id item-path fullrunscript ezsteps subrun test-name tconfigreg exit-info m))) - (monitorjob (lambda () - (launch:monitor-job run-id test-id item-path fullrunscript ezsteps test-name tconfigreg exit-info m work-area runtlim misc-flags))) - (th1 (make-thread monitorjob "monitor job")) - (th2 (make-thread runit "run job"))) - (set! job-thread th2) - (thread-start! th1) - (thread-start! th2) - (thread-join! th2) - (debug:print-info 0 *default-log-port* "Megatest exectute of test " test-name ", item path " item-path " complete. Notifying the db ...") - (hash-table-set! misc-flags 'keep-going #f) - (thread-join! th1) - (thread-sleep! 1) ;; givbe thread th1 a chance to be done TODO: Verify this is needed. At 0.1 I was getting fail to stop, increased to total of 1.1 sec. - (mutex-lock! m) - (let* ((item-path (item-list->path itemdat)) - ;; only state and status needed - use lazy routine - (testinfo (rmt:get-testinfo-state-status run-id test-id))) - ;; Am I completed? - (if (member (db:test-get-state testinfo) '("REMOTEHOSTSTART" "RUNNING")) ;; NOTE: It should *not* be REMOTEHOSTSTART but for reasons I don't yet understand it sometimes gets stuck in that state ;; (not (equal? (db:test-get-state testinfo) "COMPLETED")) - (let ((new-state (if kill-job? "KILLED" "COMPLETED") ;; (if (eq? (vector-ref exit-info 2) 0) ;; exited with "good" status - ;; "COMPLETED" ;; (db:test-get-state testinfo))) ;; else preseve the state as set within the test - ) - (new-status (cond - ((not (launch:einf-exit-status exit-info)) "FAIL") ;; job failed to run ... (vector-ref exit-info 1) - ((eq? (launch:einf-rollup-status exit-info) 0) ;; (vector-ref exit-info 3) - ;; if the current status is AUTO then defer to the calculated value (i.e. leave this AUTO) - (if (equal? (db:test-get-status testinfo) "AUTO") "AUTO" "PASS")) - ((eq? (launch:einf-rollup-status exit-info) 1) "FAIL") ;; (vector-ref exit-info 3) - ((eq? (launch:einf-rollup-status exit-info) 2) ;; (vector-ref exit-info 3) - ;; if the current status is AUTO the defer to the calculated value but qualify (i.e. make this AUTO-WARN) - (if (equal? (db:test-get-status testinfo) "AUTO") "AUTO-WARN" "WARN")) - ((eq? (launch:einf-rollup-status exit-info) 3) "CHECK") - ((eq? (launch:einf-rollup-status exit-info) 4) "WAIVED") - ((eq? (launch:einf-rollup-status exit-info) 5) "ABORT") - ((eq? (launch:einf-rollup-status exit-info) 6) "SKIP") - (else "FAIL")))) ;; (db:test-get-status testinfo))) - (debug:print-info 1 *default-log-port* "Test exited in state=" (db:test-get-state testinfo) ", setting state/status based on exit code of " (launch:einf-exit-status exit-info) " and rollup-status of " (launch:einf-rollup-status exit-info)) - (tests:test-set-status! run-id - test-id - new-state - new-status - (args:get-arg "-m") #f) - ;; need to update the top test record if PASS or FAIL and this is a subtest - ;; NO NEED TO CALL set-state-status-and-roll-up-items HERE, THIS IS DONE IN set-state-status-and-roll-up-items called by tests:test-set-status! - )) - ;; for automated creation of the rollup html file this is a good place... - (if (not (equal? item-path "")) - (tests:summarize-items run-id test-id test-name #f)) - (tests:summarize-test run-id test-id) ;; don't force - just update if no - (rmt:update-run-stats run-id (rmt:get-raw-run-stats run-id))) - (mutex-unlock! m) - (launch:end-of-run-check run-id ) - (debug:print 2 *default-log-port* "Output from running " fullrunscript ", pid " (launch:einf-pid exit-info) " in work area " - work-area ":\n====\n exit code " (launch:einf-exit-code exit-info) "\n" "====\n") - (if (not (launch:einf-exit-status exit-info)) - (exit 4)))) - ))) - -;; Spec for End of test -;; At end of each test call, after marking self as COMPLETED do run-state-status-rollup -;; At transition to run COMPLETED/X do hooks -;; Definition: test_dead if event_time + duration + 1 minute? < current_time AND -;; we can prove the process is not alive (ssh host pstree -A pid) -;; if dead safe to mark the test as killed in the db -;; State/status table -;; new -;; 100% COMPLETED/ (PASS,FAIL,ABORT etc.) ==> COMPLETED / X where X is same as itemized rollup -;; > 3 RUNNING with not test_dead do nothing (run should already be RUNNING/ na -;; > 0 RUNNING and test_dead then send KILLREQ ==> COMPLETED -;; 0 RUNNING ==> this is actually the first condition, should not get here - -(define (launch:end-of-run-check run-id ) - (let* ((not-completed-cnt (rmt:get-not-completed-cnt run-id)) - (running-cnt (rmt:get-count-tests-running-for-run-id run-id)) - (all-test-launched (rmt:get-var (conc "lunch-complete-" run-id))) - (current-state (rmt:get-run-state run-id)) - (current-status (rmt:get-run-status run-id))) - ;;get-vars run-id to query metadata table to check if all completed. if all-test-launched = yes then only not-completed-cnt = 0 means everyting is completed if no entry found in the table do nothing - (debug:print 0 *default-log-port* "rollup run state/status") - (rmt:set-state-status-and-roll-up-run run-id current-state current-status) - - (cond - ((and all-test-launched (eq? not-completed-cnt 0) (equal? all-test-launched "yes" )) - (debug:print 0 *default-log-port* "look for post hook.") - (runs:run-post-hook run-id)) - ((> running-cnt 3) - (debug:print 0 *default-log-port* "There are " running-cnt " tests running." )) - ((> running-cnt 0) - (debug:print 0 *default-log-port* "running cnt > 0 but <= 3 kill-running-tests-if-dead" ) - (let ((kill-cnt (launch:kill-tests-if-dead run-id))) - (if (and all-test-launched (equal? all-test-launched "yes") (eq? kill-cnt running-cnt)) - (launch:end-of-run-check run-id)))) ;;todo - (else (debug:print 0 *default-log-port* "Should it get here?? May be everything is not launched yet. Running test cnt:" running-cnt " Not completed test cnt:" not-completed-cnt) - (let* ((not-completed-tests (rmt:get-tests-for-run run-id "%" `("NOT_STARTED" "RUNNING" "LAUNCHED" "REMOTEHOSTSTART") `() #f #f #f #f #f #f #f #f))) - (if (> (length not-completed-tests) 0) - (let loop ((running-test (car not-completed-tests)) - (tal (cdr not-completed-tests))) - (let* ((test-name (vector-ref running-test 2)) - (item-path (vector-ref running-test 11))) - (debug:print 0 *default-log-port* "test " test-name "/" item-path " not completed") - (if (not (null? tal)) - (loop (car tal) (cdr tal))))))))))) - -(define (launch:is-test-alive host pid) -(if (and host pid (not (equal? host "n/a"))) -(let* ((cmd (conc "ssh " host " pstree -A " pid)) - (output (with-input-from-pipe cmd read-lines))) - (print "cmd: " cmd "\n op: " output ) - (if(eq? (length output) 0) - #f - #t)) -#t)) - -(define (launch:kill-tests-if-dead run-id) - (let* ((running-tests (rmt:get-tests-for-run run-id "%" `("RUNNING" "LAUNCHED" "REMOTEHOSTSTART") `() #f #f #f #f #f #f #f #f))) - (let loop ((running-test (car running-tests)) - (tal (cdr running-tests)) - (kill-cnt 0)) - (let* ((test-name (vector-ref running-test 2)) - (item-path (vector-ref running-test 11)) - (test-id (vector-ref running-test 0)) - (host (vector-ref running-test 6)) - (pid (rmt:test-get-top-process-pid run-id test-id)) - (event-time (vector-ref running-test 5)) - (duration (vector-ref running-test 12)) - (flag 0) - (curr-time (current-seconds))) - (if (and (< (+ event-time duration 600) curr-time) (not (launch:is-test-alive host pid))) ;;test has not updated duration in last 10 min then likely its not running but confirm before marking it as killed - (begin - (debug:print 0 *default-log-port* "test " test-name "/" item-path " needs to be killed") - (set! flag 1) - (rmt:set-state-status-and-roll-up-items run-id test-name item-path "KILLREQ" "n/a" #f))) - (if (not (null? tal)) - (loop (car tal) (cdr tal) (+ kill-cnt flag)) - (+ kill-cnt flag)))))) - -;; DO NOT USE - caching of configs is handled in launch:setup now. -;; -(define (launch:cache-config) - ;; if we have a linktree and -runtests and -target and the directory exists dump the config - ;; to megatest-(current-seconds).cfg and symlink it to megatest.cfg - (if (and *configdat* - (or (args:get-arg "-run") - (args:get-arg "-runtests") - (args:get-arg "-execute"))) - (let* ((linktree (common:get-linktree)) ;; (get-environment-variable "MT_LINKTREE")) - (target (common:args-get-target exit-if-bad: #t)) - (runname (or (args:get-arg "-runname") - (args:get-arg ":runname") - (getenv "MT_RUNNAME"))) - (fulldir (conc linktree "/" - target "/" - runname))) - (if (and linktree (common:file-exists? linktree)) ;; can't proceed without linktree - (begin - (debug:print-info 0 *default-log-port* "Have -run with target=" target ", runname=" runname ", fulldir=" fulldir ", testpatt=" (or (args:get-arg "-testpatt") "%")) - (if (not (common:file-exists? fulldir)) - (create-directory fulldir #t)) ;; need to protect with exception handler - (if (and target - runname - (common:file-exists? fulldir)) - (let ((tmpfile (conc fulldir "/.megatest.cfg." (current-seconds))) - (targfile (conc fulldir "/.megatest.cfg-" megatest-version "-" megatest-fossil-hash)) - (rconfig (conc fulldir "/.runconfig." megatest-version "-" megatest-fossil-hash))) - (if (common:file-exists? rconfig) ;; only cache megatest.config AFTER runconfigs has been cached - (begin - (debug:print-info 0 *default-log-port* "Caching megatest.config in " tmpfile) - (if (not (common:in-running-test?)) - (configf:write-alist *configdat* tmpfile)) - (system (conc "ln -sf " tmpfile " " targfile)))) - ))) - (debug:print-info 1 *default-log-port* "No linktree yet, no caching configs."))))) - - -;; gather available information, if legit read configs in this order: -;; -;; if have cache; -;; read it a return it -;; else -;; megatest.config (do not cache) -;; runconfigs.config (cache if all vars avail) -;; megatest.config (cache if all vars avail) -;; returns: -;; *toppath* -;; side effects: -;; sets; *configdat* (megatest.config info) -;; *runconfigdat* (runconfigs.config info) -;; *configstatus* (status of the read data) -;; -(define (launch:setup #!key (force-reread #f) (areapath #f)) - (mutex-lock! *launch-setup-mutex*) - (if (and *toppath* - (eq? *configstatus* 'fulldata) (not force-reread)) ;; got it all - (begin - (debug:print 2 *default-log-port* "NOTE: skipping launch:setup-body call since we have fulldata") - (mutex-unlock! *launch-setup-mutex*) - *toppath*) - (let ((res (launch:setup-body force-reread: force-reread areapath: areapath))) - (mutex-unlock! *launch-setup-mutex*) - res))) - -;; return paths depending on what info is available. -;; -(define (launch:get-cache-file-paths areapath toppath target mtconfig) - (let* ((use-cache (common:use-cache?)) - (runname (common:args-get-runname)) - (linktree (common:get-linktree)) - (testname (common:get-full-test-name)) - (rundir (if (and runname target linktree) - (common:directory-writable? (conc linktree "/" target "/" runname)) - #f)) - (testdir (if (and rundir testname) - (common:directory-writable? (conc rundir "/" testname)) - #f)) - (cachedir (or testdir rundir)) - (mtcachef (and cachedir (conc cachedir "/" ".megatest.cfg-" megatest-version "-" megatest-fossil-hash))) - (rccachef (and cachedir (conc cachedir "/" ".runconfigs.cfg-" megatest-version "-" megatest-fossil-hash)))) - (debug:print-info 6 *default-log-port* - "runname=" runname - "\n linktree=" linktree - "\n testname=" testname - "\n rundir=" rundir - "\n testdir=" testdir - "\n cachedir=" cachedir - "\n mtcachef=" mtcachef - "\n rccachef=" rccachef) - (cons mtcachef rccachef))) - -(define (launch:setup-body #!key (force-reread #f) (areapath #f)) - (if (and (eq? *configstatus* 'fulldata) - *toppath* - (not force-reread)) ;; no need to reprocess - *toppath* ;; return toppath - (let* ((use-cache (common:use-cache?)) ;; BB- use-cache checks *configdat* for use-cache setting. We do not have *configdat*. Bootstrapping problem here. - (toppath (or *toppath* areapath (getenv "MT_RUN_AREA_HOME"))) ;; preserve toppath - (target (common:args-get-target)) - (sections (if target (list "default" target) #f)) ;; for runconfigs - (mtconfig (or (args:get-arg "-config") "megatest.config")) ;; allow overriding megatest.config - (cachefiles (launch:get-cache-file-paths areapath toppath target mtconfig)) - ;; checking for null cachefiles should not be necessary, I was seeing error car of '(), might be a chicken bug or a red herring ... - (mtcachef (if (null? cachefiles) - #f - (car cachefiles))) ;; (and cachedir (conc cachedir "/" ".megatest.cfg-" megatest-version "-" megatest-fossil-hash))) - (rccachef (if (null? cachefiles) - #f - (cdr cachefiles)))) ;; (and cachedir (conc cachedir "/" ".runconfigs.cfg-" megatest-version "-" megatest-fossil-hash))) - ;; (cancreate (and cachedir (common:file-exists? cachedir)(file-write-access? cachedir) (not (common:in-running-test?))))) - (set! *toppath* toppath) ;; This is needed when we are running as a test using CMDINFO as a datasource - ;;(BB> "launch:setup-body -- cachefiles="cachefiles) - (cond - ;; if mtcachef exists just read it, however we need to assume toppath is available in $MT_RUN_AREA_HOME - ((and (not force-reread) - mtcachef rccachef - use-cache - (get-environment-variable "MT_RUN_AREA_HOME") - (common:file-exists? mtcachef) - (common:file-exists? rccachef)) - ;;(BB> "launch:setup-body -- cond branch 1 - use-cache") - (set! *configdat* (configf:read-alist mtcachef)) - ;;(BB> "launch:setup-body -- 1 set! *configdat*="*configdat*) - (set! *runconfigdat* (configf:read-alist rccachef)) - (set! *configinfo* (list *configdat* (get-environment-variable "MT_RUN_AREA_HOME"))) - (set! *configstatus* 'fulldata) - (set! *toppath* (get-environment-variable "MT_RUN_AREA_HOME")) - *toppath*) - ;; there are no existing cached configs, do full reads of the configs and cache them - ;; we have all the info needed to fully process runconfigs and megatest.config - ((and ;; (not force-reread) ;; force-reread is irrelevant in the AND, could however OR it? - mtcachef - rccachef) ;; BB- why are we doing this without asking if caching is desired? - ;;(BB> "launch:setup-body -- cond branch 2") - (let* ((first-pass (find-and-read-config ;; NB// sets MT_RUN_AREA_HOME as side effect - mtconfig - environ-patt: "env-override" - given-toppath: toppath - pathenvvar: "MT_RUN_AREA_HOME")) - (first-rundat (let ((toppath (if toppath - toppath - (car first-pass)))) - (read-config ;; (conc toppath "/runconfigs.config") ;; this should be converted to runconfig:read but it is non-trivial, leaving it for now. - (conc (if (string? toppath) - toppath - (get-environment-variable "MT_RUN_AREA_HOME")) - "/runconfigs.config") - *runconfigdat* #t - sections: sections)))) - (set! *runconfigdat* first-rundat) - (if first-pass ;; - (begin - ;;(BB> "launch:setup-body -- \"first-pass\"=first-pass") - (set! *configdat* (car first-pass)) - ;;(BB> "launch:setup-body -- 2 set! *configdat*="*configdat*) - (set! *configinfo* first-pass) - (set! *toppath* (or toppath (cadr first-pass))) ;; use the gathered data unless already have it - (set! toppath *toppath*) - (if (not *toppath*) - (begin - (debug:print-error 0 *default-log-port* "you are not in a megatest area!") - (exit 1))) - (setenv "MT_RUN_AREA_HOME" *toppath*) - ;; the seed read is done, now read runconfigs, cache it then read megatest.config one more time and cache it - (let* ((keys (rmt:get-keys)) - (key-vals (keys:target->keyval keys target)) - (linktree (common:get-linktree)) ;; (or (getenv "MT_LINKTREE")(if *configdat* (configf:lookup *configdat* "setup" "linktree") #f))) - ; (if *configdat* - ; (configf:lookup *configdat* "setup" "linktree") - ; (conc *toppath* "/lt")))) - (second-pass (find-and-read-config - mtconfig - environ-patt: "env-override" - given-toppath: toppath - pathenvvar: "MT_RUN_AREA_HOME")) - (runconfigdat (begin ;; this read of the runconfigs will see any adjustments made by re-reading megatest.config - (for-each (lambda (kt) - (setenv (car kt) (cadr kt))) - key-vals) - (read-config (conc toppath "/runconfigs.config") *runconfigdat* #t ;; consider using runconfig:read some day ... - sections: sections))) - (cachefiles (launch:get-cache-file-paths areapath toppath target mtconfig)) - (mtcachef (car cachefiles)) - (rccachef (cdr cachefiles))) - ;; trap exception due to stale NFS handle -- Error: (open-output-file) cannot open file - Stale NFS file handle: "/p/fdk/gwa/lefkowit/mtTesting/qa/primbeqa/links/p1222/11/PDK_r1.1.1/prim/clean/pcell_testgen/.runconfigs.cfg-1.6427-7d1e789cb3f62f9cde719a4865bb51b3c17ea853" - ticket 220546342 - ;; TODO - consider 1) using simple-lock to bracket cache write - ;; 2) cache in hash on server, since need to do rmt: anyway to lock. - - (if rccachef - (common:fail-safe - (lambda () - (configf:write-alist runconfigdat rccachef)) - (conc "Could not write cache file - "rccachef))) - (if mtcachef - (common:fail-safe - (lambda () - (configf:write-alist *configdat* mtcachef)) - (conc "Could not write cache file - "mtcachef))) - (set! *runconfigdat* runconfigdat) - (if (and rccachef mtcachef) (set! *configstatus* 'fulldata)))) - ;; no configs found? should not happen but let's try to recover gracefully, return an empty hash-table - (set! *configdat* (make-hash-table)) - ))) - - ;; else read what you can and set the flag accordingly - ;; here we don't have either mtconfig or rccachef - (else - ;;(BB> "launch:setup-body -- cond branch 3 - else") - (let* ((cfgdat (find-and-read-config - (or (args:get-arg "-config") "megatest.config") - environ-patt: "env-override" - given-toppath: (get-environment-variable "MT_RUN_AREA_HOME") - pathenvvar: "MT_RUN_AREA_HOME"))) - - (if (and cfgdat (list? cfgdat) (> (length cfgdat) 0) (hash-table? (car cfgdat))) - (let* ((toppath (or (get-environment-variable "MT_RUN_AREA_HOME")(cadr cfgdat))) - (rdat (read-config (conc toppath ;; convert this to use runconfig:read! - "/runconfigs.config") *runconfigdat* #t sections: sections))) - (set! *configinfo* cfgdat) - (set! *configdat* (car cfgdat)) - (set! *runconfigdat* rdat) - (set! *toppath* toppath) - (set! *configstatus* 'partial)) - (begin - (debug:print-error 0 *default-log-port* "No " mtconfig " file found. Giving up.") - (exit 2)))))) - ;; COND ends here. - - ;; additional house keeping - (let* ((linktree (or (common:get-linktree) - (conc *toppath* "/lt")))) - (if linktree - (begin - (if (not (common:file-exists? linktree)) - (begin - (handle-exceptions - exn - (begin - (debug:print-error 0 *default-log-port* "Something went wrong when trying to create linktree dir at " linktree) - (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) - (exit 1)) - (create-directory linktree #t)))) - (handle-exceptions - exn - (begin - (debug:print-error 0 *default-log-port* "Something went wrong when trying to create link to linktree at " *toppath*) - (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))) - (let ((tlink (conc *toppath* "/lt"))) - (if (not (common:file-exists? tlink)) - (create-symbolic-link linktree tlink))))) - (begin - (debug:print-error 0 *default-log-port* "linktree not defined in [setup] section of megatest.config") - ))) - (if (and *toppath* - (directory-exists? *toppath*)) - (begin - (setenv "MT_RUN_AREA_HOME" *toppath*) - (setenv "MT_TESTSUITENAME" (common:get-testsuite-name))) - (begin - (debug:print-error 0 *default-log-port* "failed to find the top path to your Megatest area.") - (set! *toppath* #f) ;; force it to be false so we return #f - #f)) - - ;; one more attempt to cache the configs for future reading - (let* ((cachefiles (launch:get-cache-file-paths areapath toppath target mtconfig)) - (mtcachef (car cachefiles)) - (rccachef (cdr cachefiles))) - - ;; trap exception due to stale NFS handle -- Error: (open-output-file) cannot open file - Stale NFS file handle: "...somepath.../.runconfigs.cfg-1.6427-7d1e789cb3f62f9cde719a4865bb51b3c17ea853" - ticket 220546342 - ;; TODO - consider 1) using simple-lock to bracket cache write - ;; 2) cache in hash on server, since need to do rmt: anyway to lock. - (if (and rccachef *runconfigdat* (not (common:file-exists? rccachef))) - (common:fail-safe - (lambda () - (configf:write-alist *runconfigdat* rccachef)) - (conc "Could not write cache file - "rccachef)) - ) - (if (and mtcachef *configdat* (not (common:file-exists? mtcachef))) - (common:fail-safe - (lambda () - (configf:write-alist *configdat* mtcachef)) - (conc "Could not write cache file - "mtcachef)) - ) - (if (and rccachef mtcachef *runconfigdat* *configdat*) - (set! *configstatus* 'fulldata))) - - ;; if have -append-config then read and append here - (let ((cfname (args:get-arg "-append-config"))) - (if (and cfname - (file-read-access? cfname)) - (read-config cfname *configdat* #t))) ;; values are added to the hash, no need to do anything special. - *toppath*))) - -(define (get-best-disk confdat testconfig) - (let* ((disks (or (and testconfig (hash-table-ref/default testconfig "disks" #f)) - (hash-table-ref/default confdat "disks" #f))) - (minspace (let ((m (configf:lookup confdat "setup" "minspace"))) - (string->number (or m "10000"))))) - (if disks - (let ((res (common:get-disk-with-most-free-space disks minspace))) ;; min size of 1000, seems tad dumb - (if res - (cdr res) - (begin -;; (if (common:low-noise-print 20 "No valid disks or no disk with enough space") -;; (debug:print-error 0 *default-log-port* "No valid disks found in megatest.config. Please add some to your [disks] section and ensure the directory exists and has enough space!\n You can change minspace in the [setup] section of megatest.config. Current setting is: " minspace)) - ;;(exit 1) - (if (null? disks) - (cons 1 (conc *toppath* "/runs")) - (let ((paths (sort disks (lambda (x y) (> (string-length (cadr x)) (string-length (cadr y))))))) - (let loop ((head (car paths)) (tail (cdr paths))) - (let ((result (handle-exceptions exn #f (create-directory (cadr head) #t)))) - (if result - result - (if (null? tail) - (cons 1 (conc *toppath* "/runs")) - (loop (car tail) (cdr tail)))))))))))))) ;; the code creates the necessary directories if it does not exist and returns the path. - - -(define (launch:test-copy test-src-path test-path) - (let* ((ovrcmd (let ((cmd (configf:lookup *configdat* "setup" "testcopycmd"))) - (if cmd - ;; substitute the TEST_SRC_PATH and TEST_TARG_PATH - (string-substitute "TEST_TARG_PATH" test-path - (string-substitute "TEST_SRC_PATH" test-src-path cmd #t) #t) - #f))) - (cmd (if ovrcmd - ovrcmd - (conc "rsync -av" (if (debug:debug-mode 1) "" "q") " " test-src-path "/ " test-path "/" - " >> " test-path "/mt_launch.log 2>> " test-path "/mt_launch.log"))) - (status (system cmd))) - (if (not (eq? status 0)) - (debug:print 2 *default-log-port* "ERROR: problem with running \"" cmd "\"")))) - - -;; Desired directory structure: -;; -;; - - -. -;; | -;; v -;; - - -|- -;; -;; dir stored in test is: -;; -;; - - [ - ] -;; -;; All log file links should be stored relative to the top of link path -;; -;; - [ - ] -;; -(define (create-work-area run-id run-info keyvals test-id test-src-path disk-path testname itemdat #!key (remtries 2)) - (let* ((item-path (if (string? itemdat) itemdat (item-list->path itemdat))) ;; if pass in string - just use it - (runname (if (string? run-info) ;; if we pass in a string as run-info use it as run-name. - run-info - (db:get-value-by-header (db:get-rows run-info) - (db:get-header run-info) - "runname"))) - (contour #f) ;; NOT READY FOR THIS (args:get-arg "-contour")) - ;; convert back to db: from rdb: - this is always run at server end - (target (string-intersperse (map cadr keyvals) "/")) - - (not-iterated (equal? "" item-path)) - - ;; all tests are found at /test-base or /test-base - (testtop-base (conc target "/" runname "/" testname)) - (test-base (conc testtop-base (if not-iterated "" "/") item-path)) - - ;; nb// if itempath is not "" then it is prefixed with "/" - (toptest-path (conc disk-path (if contour (conc "/" contour) "") "/" testtop-base)) - (test-path (conc disk-path (if contour (conc "/" contour) "") "/" test-base)) - - ;; ensure this exists first as links to subtests must be created there - (linktree (common:get-linktree)) - ;; WAS: (let ((rd (configf:lookup *configdat* "setup" "linktree"))) - ;; (if rd rd (conc *toppath* "/runs")))) - ;; which seems wrong ... - - (lnkbase (conc linktree (if contour (conc "/" contour) "") "/" target "/" runname)) - (lnkpath (conc lnkbase "/" testname)) - (lnkpathf (conc lnkpath (if not-iterated "" "/") item-path)) - (lnktarget (conc lnkpath "/" item-path))) - - ;; Update the rundir path in the test record for all, rundir=physical, shortdir=logical - ;; rundir shortdir - (rmt:general-call 'test-set-rundir-shortdir run-id lnkpathf test-path testname item-path run-id) - - (debug:print 2 *default-log-port* "INFO:\n lnkbase=" lnkbase "\n lnkpath=" lnkpath "\n toptest-path=" toptest-path "\n test-path=" test-path) - (if (not (common:file-exists? linktree)) - (begin - (debug:print 0 *default-log-port* "WARNING: linktree did not exist! Creating it now at " linktree) - (create-directory linktree #t))) ;; (system (conc "mkdir -p " linktree)))) - ;; create the directory for the tests dir links, this is needed no matter what... try up to three times - (let loop ((done 3)) - (let ((success (if (and (not (common:directory-exists? lnkbase)) - (not (common:file-exists? lnkbase))) - (handle-exceptions - exn - (begin - (debug:print-error 0 *default-log-port* "Problem creating linktree base at " lnkbase) - (print-error-message exn (current-error-port)) - #t) - (create-directory lnkbase #t) - #f)))) - (if (and (not success)(> done 0)) - (loop (- done 1))))) - - ;; update the toptest record with its location rundir, cache the path - ;; This wass highly inefficient, one db write for every subtest, potentially - ;; thousands of unnecessary updates, cache the fact it was set and don't set it - ;; again. - - ;; Now create the link from the test path to the link tree, however - ;; if the test is iterated it is necessary to create the parent path - ;; to the iteration. use pathname-directory to trim the path by one - ;; level - (if (not not-iterated) ;; i.e. iterated - (let ((iterated-parent (pathname-directory (conc lnkpath "/" item-path)))) - (debug:print-info 2 *default-log-port* "Creating iterated parent " iterated-parent) - (handle-exceptions - exn - (begin - (debug:print-error 0 *default-log-port* " Failed to create directory " iterated-parent ((condition-property-accessor 'exn 'message) exn) ", exiting") - (exit 1)) - (create-directory iterated-parent #t)))) - - (if (symbolic-link? lnkpath) - (handle-exceptions - exn - (begin - (debug:print-error 0 *default-log-port* " Failed to remove symlink " lnkpath ((condition-property-accessor 'exn 'message) exn) ", exiting") - (exit 1)) - (delete-file lnkpath))) - - (if (not (or (common:file-exists? lnkpath) - (symbolic-link? lnkpath))) - (handle-exceptions - exn - (begin - (debug:print-error 0 *default-log-port* " Failed to create symlink " lnkpath ((condition-property-accessor 'exn 'message) exn) ", exiting") - (exit 1)) - (create-symbolic-link toptest-path lnkpath))) - - ;; NB - This was not working right - some top tests are not getting the path set!!! - ;; - ;; Do the setting of this record after the paths are created so that the shortdir can - ;; be set to the real directory location. This is safer for future clean up if the link - ;; tree is damaged or lost. - ;; - (if (not (hash-table-ref/default *toptest-paths* testname #f)) - (let* ((testinfo (rmt:get-test-info-by-id run-id test-id)) ;; run-id testname item-path)) - (curr-test-path (if testinfo ;; (filedb:get-path *fdb* - ;; (db:get-path dbstruct - ;; (rmt:sdb-qry 'getstr - (db:test-get-rundir testinfo) ;; ) ;; ) - #f))) - (hash-table-set! *toptest-paths* testname curr-test-path) - ;; NB// Was this for the test or for the parent in an iterated test? - (rmt:general-call 'test-set-rundir-shortdir run-id lnkpath - (if (common:file-exists? lnkpath) - ;; (resolve-pathname lnkpath) - (common:nice-path lnkpath) - lnkpath) - testname "" run-id) - ;; (rmt:general-call 'test-set-rundir run-id lnkpath testname "") ;; toptest-path) - (if (or (not curr-test-path) - (not (directory-exists? toptest-path))) - (begin - (debug:print-info 2 *default-log-port* "Creating " toptest-path " and link " lnkpath) - (handle-exceptions - exn - #f ;; don't care to catch and deal with errors here for now. - (create-directory toptest-path #t)) - (hash-table-set! *toptest-paths* testname toptest-path))))) - - ;; The toptest path has been created, the link to the test in the linktree has - ;; been created. Now, if this is an iterated test the real test dir must be created - (if (not not-iterated) ;; this is an iterated test - (begin ;; (let ((lnktarget (conc lnkpath "/" item-path))) - (debug:print 2 *default-log-port* "Setting up sub test run area") - (debug:print 2 *default-log-port* " - creating run area in " test-path) - (handle-exceptions - exn - (begin - (debug:print-error 0 *default-log-port* " Failed to create directory " test-path ((condition-property-accessor 'exn 'message) exn) ", exiting") - (exit 1)) - (create-directory test-path #t)) - (debug:print 2 *default-log-port* - " - creating link from: " test-path "\n" - " to: " lnktarget) - - ;; If there is already a symlink delete it and recreate it. - (handle-exceptions - exn - (begin - (debug:print-error 0 *default-log-port* " Failed to re-create link " lnktarget ((condition-property-accessor 'exn 'message) exn) ", exiting") - (exit)) - (if (symbolic-link? lnktarget) (delete-file lnktarget)) - (if (not (common:file-exists? lnktarget)) (create-symbolic-link test-path lnktarget))))) - - (if (not (directory? test-path)) - (create-directory test-path #t)) ;; this is a hack, I don't know why out of the blue this path does not exist sometimes - - (if (and test-src-path (directory? test-path)) - (begin - (launch:test-copy test-src-path test-path) - (list lnkpathf lnkpath )) - (if (and test-src-path (> remtries 0)) - (begin - (debug:print-error 0 *default-log-port* "Failed to create work area at " test-path " with link at " lnktarget ", remaining attempts " remtries) - ;; - (create-work-area run-id run-info keyvals test-id test-src-path disk-path testname itemdat remtries: (- remtries 1))) - (list #f #f))))) - - -(define (launch:handle-zombie-tests run-id) - (let* ((key (conc "zombiescan-runid-"run-id)) - (now (current-seconds)) - (threshold (- (current-seconds) (* 2 (or (configf:lookup-number *configdat* "setup" "deadtime") 120)))) - (val (rmt:get-var key)) - (do-scan? - (cond - ((not val) - #t) - ((< val threshold) - #t) - (else #f)))) - (when do-scan? - (debug:print 1 *default-log-port* "INFO: search and mark zombie tests") - (rmt:set-var key (current-seconds)) - (rmt:find-and-mark-incomplete run-id #f)))) - - - - - -;; 1. look though disks list for disk with most space -;; 2. create run dir on disk, path name is meaningful -;; 3. create link from run dir to megatest runs area -;; 4. remotely run the test on allocated host -;; - could be ssh to host from hosts table (update regularly with load) -;; - could be netbatch -;; (launch-test db (cadr status) test-conf)) -(define (launch-test test-id run-id run-info keyvals runname test-conf test-name test-path itemdat params) - (mutex-lock! *launch-setup-mutex*) ;; setting variables and processing the testconfig is NOT thread-safe, reuse the launch-setup mutex - (let* ( ;; (lock-key (conc "test-" test-id)) - ;; (got-lock (let loop ((lock (rmt:no-sync-get-lock lock-key)) - ;; (expire-time (+ (current-seconds) 15))) ;; give up on getting the lock and steal it after 15 seconds - ;; (if (car lock) - ;; #t - ;; (if (> (current-seconds) expire-time) - ;; (begin - ;; (debug:print-info 0 *default-log-port* "Timed out waiting for a lock to launch test " keyvals " " runname " " test-name " " test-path) - ;; (rmt:no-sync-del! lock-key) ;; destroy the lock - ;; (loop (rmt:no-sync-get-lock lock-key) expire-time)) ;; - ;; (begin - ;; (thread-sleep! 1) - ;; (loop (rmt:no-sync-get-lock lock-key) expire-time)))))) - (item-path (item-list->path itemdat)) - (contour #f)) ;; NOT READY FOR THIS (args:get-arg "-contour"))) - (let loop ((delta (- (current-seconds) *last-launch*)) - (launch-delay (configf:lookup-number *configdat* "setup" "launch-delay" default: 1))) - (if (> launch-delay delta) - (begin - (if (common:low-noise-print 1200 "test launch delay") ;; every two hours or so remind the user about launch delay. - (debug:print-info 0 *default-log-port* "NOTE: test launches are delayed by " launch-delay " seconds. See megatest.config launch-delay setting to adjust.")) ;; launch of " test-name " for " (- launch-delay delta) " seconds")) - (thread-sleep! (- launch-delay delta)) - (loop (- (current-seconds) *last-launch*) launch-delay)))) - (change-directory *toppath*) - (alist->env-vars ;; consolidate this code with the code in megatest.scm for "-execute", *maybe* - the longer they are set the longer each launch takes (must be non-overlapping with the vars) - (append - (list - (list "MT_RUN_AREA_HOME" *toppath*) - (list "MT_TEST_NAME" test-name) - (list "MT_RUNNAME" runname) - (list "MT_ITEMPATH" item-path) - (list "MT_CONTOUR" contour) - ) - itemdat)) - (let* ((tregistry (tests:get-all)) ;; third param (below) is system-allowed - ;; for tconfig, why do we allow fallback to test-conf? - (tconfig (or (tests:get-testconfig test-name item-path tregistry #t force-create: #t) - (begin - (debug:print 0 *default-log-port* "WARNING: falling back to pre-calculated testconfig. This is likely not desired.") - test-conf))) ;; force re-read now that all vars are set - (useshell (let ((ush (configf:lookup *configdat* "jobtools" "useshell"))) - (if ush - (if (equal? ush "no") ;; must use "no" to NOT use shell - #f - ush) - #t))) ;; default is yes - (runscript (configf:lookup tconfig "setup" "runscript")) - (ezsteps (> (length (hash-table-ref/default tconfig "ezsteps" '())) 0)) ;; don't send all the steps, could be big, just send a flag - (subrun (> (length (hash-table-ref/default tconfig "subrun" '())) 0)) ;; send a flag to process a subrun - ;; (diskspace (configf:lookup tconfig "requirements" "diskspace")) - ;; (memory (configf:lookup tconfig "requirements" "memory")) - ;; (hosts (configf:lookup *configdat* "jobtools" "workhosts")) ;; I'm pretty sure this was never completed - (remote-megatest (configf:lookup *configdat* "setup" "executable")) - (run-time-limit (or (configf:lookup tconfig "requirements" "runtimelim") - (configf:lookup *configdat* "setup" "runtimelim"))) - ;; FIXME SOMEDAY: not good how this is so obtuse, this hack is to - ;; allow running from dashboard. Extract the path - ;; from the called megatest and convert dashboard - ;; or dboard to megatest - (local-megatest (let* ((lm (car (argv))) - (dir (pathname-directory lm)) - (exe (pathname-strip-directory lm))) - (conc (if dir (conc dir "/") "") - (case (string->symbol exe) - ((dboard) "../megatest") - ((mtest) "../megatest") - ((dashboard) "megatest") - (else exe))))) - (launcher (common:get-launcher *configdat* test-name item-path)) ;; (configf:lookup *configdat* "jobtools" "launcher")) - (test-sig (conc (common:get-testsuite-name) ":" test-name ":" item-path)) ;; (item-list->path itemdat))) ;; test-path is the full path including the item-path - (work-area #f) - (toptest-work-area #f) ;; for iterated tests the top test contains data relevant for all - (diskpath #f) - (cmdparms #f) - (fullcmd #f) ;; (define a (with-output-to-string (lambda ()(write x)))) - (mt-bindir-path #f) - (testinfo (rmt:get-test-info-by-id run-id test-id)) - (mt_target (string-intersperse (map cadr keyvals) "/")) - (debug-param (append (if (args:get-arg "-debug") (list "-debug" (args:get-arg "-debug")) '()) - (if (args:get-arg "-logging")(list "-logging") '())))) - ;; (if hosts (set! hosts (string-split hosts))) - ;; set the megatest to be called on the remote host - (if (not remote-megatest)(set! remote-megatest local-megatest)) ;; "megatest")) - (set! mt-bindir-path (pathname-directory remote-megatest)) - (if launcher (set! launcher (string-split launcher))) - ;; set up the run work area for this test - (if (and (args:get-arg "-preclean") ;; user has requested to preclean for this run - (not (member (db:test-get-rundir testinfo)(list "n/a" "/tmp/badname")))) ;; n/a is a placeholder and thus not a read dir - (begin - (debug:print-info 0 *default-log-port* "attempting to preclean directory " (db:test-get-rundir testinfo) " for test " test-name "/" item-path) - (runs:remove-test-directory testinfo 'remove-data-only))) ;; remove data only, do not perturb the record - - ;; prevent overlapping actions - set to LAUNCHED as early as possible - ;; - ;; the following call handles waiver propogation. cannot yet condense into roll-up-pass-fail - (tests:test-set-status! run-id test-id "LAUNCHED" "n/a" #f #f) ;; (if launch-results launch-results "FAILED")) - (rmt:set-state-status-and-roll-up-items run-id test-name item-path #f "LAUNCHED" #f) - ;; (pp (hash-table->alist tconfig)) - (set! diskpath (get-best-disk *configdat* tconfig)) - (if diskpath - (let ((dat (create-work-area run-id run-info keyvals test-id test-path diskpath test-name itemdat))) - (set! work-area (car dat)) - (set! toptest-work-area (cadr dat)) - (debug:print-info 2 *default-log-port* "Using work area " work-area)) - (begin - (set! work-area (conc test-path "/tmp_run")) - (create-directory work-area #t) - (debug:print 0 *default-log-port* "WARNING: No disk work area specified - running in the test directory under tmp_run"))) - (set! cmdparms (base64:base64-encode - (z3:encode-buffer - (with-output-to-string - (lambda () ;; (list 'hosts hosts) - (write (list (list 'testpath test-path) - ;; (list 'transport (conc *transport-type*)) - ;; (list 'serverinf *server-info*) - (list 'homehost (let* ((hhdat (common:get-homehost))) - (if hhdat - (car hhdat) - #f))) - (list 'serverurl (if *runremote* - (remote-server-url *runremote*) - #f)) ;; - (list 'areaname (common:get-testsuite-name)) - (list 'toppath *toppath*) - (list 'work-area work-area) - (list 'test-name test-name) - (list 'runscript runscript) - (list 'run-id run-id ) - (list 'test-id test-id ) - ;; (list 'item-path item-path ) - (list 'itemdat itemdat ) - (list 'megatest remote-megatest) - (list 'ezsteps ezsteps) - (list 'subrun subrun) - (list 'target mt_target) - (list 'contour contour) - (list 'runtlim (if run-time-limit (common:hms-string->seconds run-time-limit) #f)) - (list 'env-ovrd (hash-table-ref/default *configdat* "env-override" '())) - (list 'set-vars (if params (hash-table-ref/default params "-setvars" #f))) - (list 'runname runname) - (list 'mt-bindir-path mt-bindir-path)))))))) - - ;; clean out step records from previous run if they exist - ;; (rmt:delete-test-step-records run-id test-id) - ;; if the dir does not exist we may have a itempath where individual variables are a path, launch anyway - (if (common:file-exists? work-area) - (change-directory work-area)) ;; so that log files from the launch process don't clutter the test dir - (cond - ;; ((and launcher hosts) ;; must be using ssh hostname - ;; (set! fullcmd (append launcher (car hosts)(list remote-megatest "-m" test-sig "-execute" cmdparms) debug-param))) - ;; (set! fullcmd (append launcher (car hosts)(list remote-megatest test-sig "-execute" cmdparms)))) - (launcher - (set! fullcmd (append launcher (list remote-megatest "-m" test-sig "-execute" cmdparms) debug-param))) - ;; (set! fullcmd (append launcher (list remote-megatest test-sig "-execute" cmdparms)))) - (else - (if (not useshell)(debug:print 0 *default-log-port* "WARNING: internal launching will not work well without \"useshell yes\" in your [jobtools] section")) - (set! fullcmd (append (list remote-megatest "-m" test-sig "-execute" cmdparms) debug-param (list (if useshell "&" "")))))) - ;; (set! fullcmd (list remote-megatest test-sig "-execute" cmdparms (if useshell "&" ""))))) - (if (args:get-arg "-xterm")(set! fullcmd (append fullcmd (list "-xterm")))) - (debug:print 1 *default-log-port* "Launching " work-area) - ;; set pre-launch-env-vars before launching, keep the vars in prevvals and put the envionment back when done - (debug:print 4 *default-log-port* "fullcmd: " fullcmd) - (set! *last-launch* (current-seconds)) ;; all that junk above takes time, set this as late as possible. - (let* ((commonprevvals (alist->env-vars - (hash-table-ref/default *configdat* "env-override" '()))) - (miscprevvals (alist->env-vars ;; consolidate this code with the code in megatest.scm for "-execute" - (append (list (list "MT_TEST_RUN_DIR" work-area) - (list "MT_TEST_NAME" test-name) - (list "MT_ITEM_INFO" (conc itemdat)) - (list "MT_RUNNAME" runname) - (list "MT_TARGET" mt_target) - (list "MT_ITEMPATH" item-path) - ) - itemdat))) - (testprevvals (alist->env-vars - (hash-table-ref/default tconfig "pre-launch-env-overrides" '()))) - ;; Launchwait defaults to true, must override it to turn off wait - (launchwait (if (equal? (configf:lookup *configdat* "setup" "launchwait") "no") #f #t)) - (launch-results-prev (apply (if launchwait ;; BB: TODO: refactor this to examine return code of launcher, if nonzero, set state to launch failed. - process:cmd-run-with-stderr-and-exitcode->list - process-run) - (if useshell - (let ((cmdstr (string-intersperse fullcmd " "))) - (if launchwait - cmdstr - (conc cmdstr " >> mt_launch.log 2>&1 &"))) - (car fullcmd)) - (if useshell - '() - (cdr fullcmd)))) - (success (if launchwait (equal? 0 (cadr launch-results-prev)) #t)) - (launch-results (if launchwait (car launch-results-prev) launch-results-prev))) - (if (not success) - (tests:test-set-status! run-id test-id "COMPLETED" "DEAD" "launcher failed; exited non-zero; check mt_launch.log" #f)) ;; (if launch-results launch-results "FAILED")) - (mutex-unlock! *launch-setup-mutex*) ;; yes, really should mutex all the way to here. Need to put this entire process into a fork. - ;; (rmt:no-sync-del! lock-key) ;; release the lock for starting this test - (if (not launchwait) ;; give the OS a little time to allow the process to start - (thread-sleep! 0.01)) - (with-output-to-file "mt_launch.log" - (lambda () - (print "LAUNCHCMD: " (string-intersperse fullcmd " ")) - (if (list? launch-results) - (apply print launch-results) - (print "NOTE: launched \"" fullcmd "\"\n but did not wait for it to proceed. Add the following to megatest.config \n[setup]\nlaunchwait yes\n if you have problems with this")) - #:append)) - (debug:print 2 *default-log-port* "Launching completed, updating db") - (debug:print 2 *default-log-port* "Launch results: " launch-results) - (if (not launch-results) - (begin - (print "ERROR: Failed to run " (string-intersperse fullcmd " ") ", exiting now") - ;; (sqlite3:finalize! db) - ;; good ole "exit" seems not to work - ;; (_exit 9) - ;; but this hack will work! Thanks go to Alan Post of the Chicken email list - ;; NB// Is this still needed? Should be safe to go back to "exit" now? - (process-signal (current-process-id) signal/kill) - )) - (alist->env-vars miscprevvals) - (alist->env-vars testprevvals) - (alist->env-vars commonprevvals) - launch-results)) - (change-directory *toppath*))) - -;; recover a test where the top controlling mtest may have died -;; -(define (launch:recover-test run-id test-id) - ;; this function is called on the test run host via ssh - ;; - ;; 1. look at the process from pid - ;; - is it owned by calling user - ;; - it it's run directory correct for the test - ;; - is there a controlling mtest (maybe stuck) - ;; 2. if recovery is needed watch pid - ;; - when it exits take the exit code and do the needful - ;; - (let* ((pid (rmt:test-get-top-process-pid run-id test-id)) - (psres (with-input-from-pipe - (conc "ps -F -u " (current-user-name) " | grep -E '" pid " ' | grep -v 'grep -E " pid "'") - (lambda () - (read-line)))) - (rundir (if (string? psres) ;; real process owned by user - (read-symbolic-link (conc "/proc/" pid "/cwd")) - #f))) - ;; now wait on that process if all is correct - ;; periodically update the db with runtime - ;; when the process exits look at the db, if still RUNNING after 10 seconds set - ;; state/status appropriately - (process-wait pid))) Index: launchmod.scm ================================================================== --- launchmod.scm +++ launchmod.scm @@ -17,19 +17,19 @@ ;; along with Megatest. If not, see . ;;====================================================================== (declare (unit launchmod)) + (declare (uses commonmod)) (module launchmod * -(import scheme chicken data-structures extras) -(import (prefix sqlite3 sqlite3:) posix typed-records srfi-18 srfi-69 format ports srfi-1 matchable) +(import scheme chicken data-structures extras files) +(import (prefix sqlite3 sqlite3:) posix typed-records + srfi-18 srfi-69 format ports srfi-1 matchable + z3 (prefix base64 base64:) regex + call-with-environment-variables csv) (import commonmod) -;; (use (prefix ulex ulex:)) - -(include "common_records.scm") - ) Index: megamod.scm ================================================================== --- megamod.scm +++ megamod.scm @@ -17,39 +17,32 @@ ;; along with Megatest. If not, see . ;;====================================================================== (declare (unit megamod)) + +(declare (uses stml2)) +(declare (uses mtargs)) + (declare (uses commonmod)) -;; (declare (uses dbmod)) -;; ;;(declare (uses apimod)) -;; (declare (uses ftail)) -;; ;; (declare (uses rmtmod)) -;; (declare (uses apimod)) -;; (declare (uses archivemod)) -;; (declare (uses clientmod)) -;; (declare (uses dbmod)) -;; (declare (uses dcommonmod)) -;; (declare (uses envmod)) -;; (declare (uses ezstepsmod)) -;; (declare (uses itemsmod)) -;; (declare (uses keysmod)) -;; (declare (uses launchmod)) -;; (declare (uses odsmod)) -;; (declare (uses processmod)) -;; (declare (uses runconfigmod)) -(declare (uses runsmod)) -;; (declare (uses servermod)) -;; (declare (uses subrunmod)) -;; (declare (uses tasksmod)) +(declare (uses apimod)) +(declare (uses keysmod)) +(declare (uses dbmod)) +(declare (uses ducttape-lib)) +(declare (uses itemsmod)) +(declare (uses mtconfigf)) +(declare (uses odsmod)) +(declare (uses pgdbmod)) +(declare (uses pkts)) +(declare (uses rmtmod)) +(declare (uses mtmod)) +(declare (uses servermod)) +(declare (uses tasksmod)) (declare (uses testsmod)) -;; (declare (uses vgmod)) -(declare (uses pkts)) -(declare (uses mtargs)) -(declare (uses mtconfigf)) -(declare (uses ducttape-lib)) -(declare (uses stml2)) +(declare (uses subrunmod)) +(declare (uses itemsmod)) +(declare (uses runsmod)) (module megamod * (import scheme chicken data-structures extras) @@ -105,37 +98,31 @@ (define read-config configf:read-config) (define find-and-read-config configf:find-and-read-config) ;; (define config:eval-string-in-environment configf:eval-string-in-environment) (import spiffy) + (import stml2) +(import (prefix mtargs args:)) -;; (import apimod) -;; (import archivemod) -;; (import clientmod) +;; Order here is important! (import commonmod) -;; (import dbmod) -;; (import dcommonmod) -;; (import envmod) -;; (import ezstepsmod) -;; (import ftail) -;; (import itemsmod) -;; (import keysmod) -;; (import launchmod) -;; (import odsmod) -;; (import processmod) +(import keysmod) +(import pgdbmod) +(import tasksmod) +(import dbmod) +(import apimod) +(import ducttape-lib) +(import itemsmod) +(import pkts) (import rmtmod) -;; (import runconfigmod) -(import runsmod) -;; (import servermod) -;; (import subrunmod) -;; (import tasksmod) +(import mtmod) (import testsmod) -;; (import vgmod) -(import pkts) -(import (prefix mtargs args:)) -(import ducttape-lib) +(import servermod) +(import subrunmod) +(import itemsmod) +(import runsmod) ;; (use (prefix ulex ulex:)) (include "common_records.scm") (include "db_records.scm") @@ -148,11 +135,10 @@ ;;====================================================================== ;; L O C K I N G M E C H A N I S M S ;;====================================================================== ;; globals -(define *writes-total-delay* 0) (define *exit-started* #f) (define *last-monitor-update-time* 0) (define *default-log-port* (current-error-port)) ;; The watchdog is to keep an eye on things like db sync etc. @@ -168,35 +154,15 @@ (print-call-chain) (print " message: " ((condition-property-accessor 'exn 'message) exn))) (common:watchdog))) "Watchdog thread")) -(include "api-inc.scm") ;; L3 (include "archive-inc.scm") (include "client-inc.scm") -(include "common-inc.scm") ;; L5 -(include "db-inc.scm") ;; L4 (include "env-inc.scm") -(include "http-transport-inc.scm") -(include "items-inc.scm") -;; (include "keys-inc.scm") -(include "launch-inc.scm") ;; L1 -;; (include "margs-inc.scm") -(include "mt-inc.scm") -(include "ods-inc.scm") ;; L1 -(include "pgdb-inc.scm") (include "portlogger-inc.scm") (include "process-inc.scm") ;; L6 -(include "rmt-inc.scm") ;; L2 (include "runconfig-inc.scm") -(include "runs-inc.scm") ;; L1.5 -(include "server-inc.scm") -(include "subrun-inc.scm") -(include "tasks-inc.scm") -(include "tdb-inc.scm") -(include "tests-inc.scm") - -;; (include "js-path.scm") ;; moved into init procedure in tests-inc.scm ) ;; http-transport:server-dat definition moved to common_records.scm ;; bunch of small functions factored out of send-receive to make debug easier Index: megatest.scm ================================================================== --- megatest.scm +++ megatest.scm @@ -29,56 +29,89 @@ ;; Added for csv stuff - will be removed ;; (use sparse-vectors) (declare (uses mtargs)) -(import (prefix mtargs args:)) (declare (uses mtconfigf)) -(import (prefix mtconfigf configf:)) -(declare (uses ducttape-lib)) -(import ducttape-lib) +(declare (uses stml2)) (declare (uses pkts)) -(import pkts) (declare (uses ulex)) + +(import (prefix mtargs args:)) +(import (prefix mtconfigf configf:)) +(import cookie) +(import stml2) +(import pkts) (import ulex) + +;; order matters? +(declare (uses commonmod)) +(declare (uses itemsmod)) +(declare (uses apimod)) (declare (uses cookie)) -(import cookie) -(declare (uses stml2)) -(import stml2) +(declare (uses keysmod)) +(declare (uses odsmod)) +(declare (uses dbmod)) +(declare (uses servermod)) +(declare (uses ducttape-lib)) (declare (uses mutils)) +(declare (uses pgdbmod)) +(declare (uses rmtmod)) +(declare (uses subrunmod)) +(declare (uses mtmod)) +(declare (uses runsmod)) +(declare (uses tasksmod)) +(declare (uses testsmod)) +(declare (uses megamod)) + +(import commonmod) +(import itemsmod) +(import keysmod) +(import pgdbmod) +(import tasksmod) +(import odsmod) +(import dbmod) +(import servermod) +(import apimod) +(import ducttape-lib) (import mutils) +(import rmtmod) +(import testsmod) +(import subrunmod) +(import mtmod) +(import runsmod) +(import megamod) + +;; NOTE: Order is important! ;; invoke the imports (declare (uses mtargs.import)) (declare (uses mtconfigf.import)) -(declare (uses ducttape-lib.import)) +(declare (uses cookie.import)) +(declare (uses stml2.import)) (declare (uses pkts.import)) (declare (uses ulex.import)) -(declare (uses cookie.import)) -(declare (uses stml2.import)) -(declare (uses mutils.import)) - -(declare (uses commonmod)) -(import commonmod) - -(declare (uses rmtmod)) -(import rmtmod) -(declare (uses dbmod)) -(import dbmod) -(declare (uses runsmod)) -(import runsmod) -(declare (uses testsmod)) -(import testsmod) -(declare (uses megamod)) -(import megamod) ;; invoke the imports (declare (uses commonmod.import)) -(declare (uses testsmod.import)) +(declare (uses itemsmod.import)) +(declare (uses keysmod.import)) +(declare (uses pgdbmod.import)) +(declare (uses tasksmod.import)) +(declare (uses odsmod.import)) +(declare (uses dbmod.import)) +(declare (uses servermod.import)) +(declare (uses apimod.import)) +(declare (uses ducttape-lib.import)) +(declare (uses mutils.import)) (declare (uses rmtmod.import)) +(declare (uses subrunmod.import)) +(declare (uses testsmod.import)) +(declare (uses mtmod.import)) (declare (uses runsmod.import)) (declare (uses megamod.import)) + (configf:set-debug-printers debug:print debug:print-info debug:print-error *default-log-port*) (define *db* #f) ;; this is only for the repl, do not use in general!!!! (define *default-log-port* (current-error-port)) Index: mt-inc.scm ================================================================== --- mt-inc.scm +++ mt-inc.scm @@ -13,273 +13,5 @@ ;; GNU General Public License for more details. ;; ;; You should have received a copy of the GNU General Public License ;; along with Megatest. If not, see . ;; - - -;; This is the Megatest API. All generally "useful" routines will be wrapped or extended -;; here. - -;;====================================================================== -;; R U N S -;;====================================================================== - -;; runs:get-runs-by-patt -;; get runs by list of criteria -;; register a test run with the db -;; -;; Use: (db-get-value-by-header (db:get-header runinfo)(db:get-rows runinfo)) -;; to extract info from the structure returned -;; -(define (mt:get-runs-by-patt keys runnamepatt targpatt) - (let loop ((runsdat (rmt:get-runs-by-patt keys runnamepatt targpatt 0 500 #f 0)) - (res '()) - (offset 0) - (limit 500)) - ;; (print "runsdat: " runsdat) - (let* ((header (vector-ref runsdat 0)) - (runslst (vector-ref runsdat 1)) - (full-list (append res runslst)) - (have-more (eq? (length runslst) limit))) - ;; (debug:print 0 *default-log-port* "header: " header " runslst: " runslst " have-more: " have-more) - (if have-more - (let ((new-offset (+ offset limit)) - (next-batch (rmt:get-runs-by-patt keys runnamepatt targpatt offset limit #f 0))) - (debug:print-info 4 *default-log-port* "More than " limit " runs, have " (length full-list) " runs so far.") - (debug:print-info 0 *default-log-port* "next-batch: " next-batch) - (loop next-batch - full-list - new-offset - limit)) - (vector header full-list))))) - -;;====================================================================== -;; T E S T S -;;====================================================================== - -(define (mt:get-tests-for-run run-id testpatt states status #!key (not-in #t) (sort-by 'event_time) (sort-order "ASC") (qryvals #f)(last-update #f)) - (let loop ((testsdat (rmt:get-tests-for-run run-id testpatt states status 0 500 not-in sort-by sort-order qryvals last-update 'normal)) - (res '()) - (offset 0) - (limit 500)) - (let* ((full-list (append res testsdat)) - (have-more (eq? (length testsdat) limit))) - (if have-more - (let ((new-offset (+ offset limit))) - (debug:print-info 4 *default-log-port* "More than " limit " tests, have " (length full-list) " tests so far.") - (loop (rmt:get-tests-for-run run-id testpatt states status new-offset limit not-in sort-by sort-order qryvals last-update 'normal) - full-list - new-offset - limit)) - full-list)))) - -(define (mt:lazy-get-prereqs-not-met run-id waitons ref-item-path #!key (mode '(normal))(itemmaps #f) ) - (let* ((key (list run-id waitons ref-item-path mode)) - (res (hash-table-ref/default *pre-reqs-met-cache* key #f)) - (useres (let ((last-time (if (vector? res) (vector-ref res 0) #f))) - (if last-time - (< (current-seconds)(+ last-time 5)) - #f)))) - (if useres - (let ((result (vector-ref res 1))) - (debug:print 4 *default-log-port* "Using lazy value res: " result) - result) - (let ((newres (rmt:get-prereqs-not-met run-id waitons ref-item-path mode: mode itemmaps: itemmaps))) - (hash-table-set! *pre-reqs-met-cache* key (vector (current-seconds) newres)) - newres)))) - -(define (mt:get-run-stats dbstruct run-id) -;; Get run stats from local access, move this ... but where? - (db:get-run-stats dbstruct run-id)) - -(define (mt:discard-blocked-tests run-id failed-test tests test-records) - (if (null? tests) - tests - (begin - (debug:print-info 1 *default-log-port* "Discarding tests from " tests " that are waiting on " failed-test) - (let loop ((testn (car tests)) - (remt (cdr tests)) - (res '())) - (let* ((test-dat (hash-table-ref/default test-records testn (vector #f #f '()))) - (waitons (vector-ref test-dat 2))) - ;; (print "mt:discard-blocked-tests run-id: " run-id " failed-test: " failed-test " testn: " testn " with waitons: " waitons) - (if (null? remt) - (let ((new-res (reverse res))) - ;; (print " new-res: " new-res) - new-res) - (loop (car remt) - (cdr remt) - (if (member failed-test waitons) - (begin - (debug:print 0 *default-log-port* "Discarding test " testn "(" test-dat ") due to " failed-test) - res) - (cons testn res))))))))) - -;;====================================================================== -;; T R I G G E R S -;;====================================================================== - -(define (mt:run-trigger cmd test-id test-rundir trigger logname test-name item-path event-time actual-state actual-status) - ;; Putting the commandline into ( )'s means no control over the shell. - ;; stdout and stderr will be caught in the NBFAKE or mt_launch.log files - ;; or equivalent. No need to do this. Just run it? - (let* ((fullcmd (conc "nbfake " - cmd " " - test-id " " - test-rundir " " - trigger " " - test-name " " - item-path " " ;; has / prepended to deal with toplevel tests - actual-state " " - actual-status " " - event-time - )) - (prev-nbfake-log (get-environment-variable "NBFAKE_LOG"))) - (setenv "NBFAKE_LOG" (conc (cond - ((and (directory-exists? test-rundir) - (file-write-access? test-rundir)) - test-rundir) - ((and (directory-exists? *toppath*) - (file-write-access? *toppath*)) - *toppath*) - (else (conc "/tmp/" (current-user-name)))) - "/" logname)) - (debug:print-info 0 *default-log-port* "TRIGGERED on " trigger ", running command " fullcmd " output at " (get-environment-variable "NBFAKE_LOG")) - ;; (call-with-environment-variables - ;; `(("NBFAKE_LOG" . ,(conc test-rundir "/" logname))) - ;; (lambda () - (process-run fullcmd) - (if prev-nbfake-log - (setenv "NBFAKE_LOG" prev-nbfake-log) - (unsetenv "NBFAKE_LOG")) - )) ;; )) - -(define (mt:process-triggers dbstruct run-id test-id newstate newstatus) - (if test-id - (let* ((test-dat (db:get-test-info-by-id dbstruct run-id test-id))) - (if test-dat - (let* ((test-rundir (db:test-get-rundir test-dat)) ;; ) ;; ) - (test-name (db:test-get-testname test-dat)) - (item-path (db:test-get-item-path test-dat)) - (duration (db:test-get-run_duration test-dat)) - (comment (db:test-get-comment test-dat)) - (event-time (db:test-get-event_time test-dat)) - (tconfig #f) - (state (if newstate newstate (db:test-get-state test-dat))) - (status (if newstatus newstatus (db:test-get-status test-dat)))) - ;; (mutex-lock! *triggers-mutex*) - (handle-exceptions - exn - (begin - (debug:print-error 0 *default-log-port* " Exception in mt:process-triggers for run-id="run-id" test-id="test-id" newstate="newstate" newstatus="newstatus - "\n error: " ((condition-property-accessor 'exn 'message) exn) - "\n test-rundir="test-rundir - "\n test-name="test-name - "\n item-path="item-path - "\n state="state - "\n status="status - "\n") - (print-call-chain (current-error-port)) - #f) - (if (and test-name - test-rundir) ;; #f means no dir set yet - ;; (common:file-exists? test-rundir) - ;; (directory? test-rundir)) - (call-with-environment-variables - (list (cons "MT_TEST_NAME" (or test-name "no such test")) - (cons "MT_TEST_RUN_DIR" (or test-rundir "no test directory yet")) - (cons "MT_ITEMPATH" (or item-path ""))) - (lambda () - (if (directory-exists? test-rundir) - (push-directory test-rundir) - (push-directory *toppath*)) - (set! tconfig (mt:lazy-read-test-config test-name)) - (for-each (lambda (trigger) - (let* ((munged-trigger (string-translate trigger "/ " "--")) - (logname (conc "last-trigger-" munged-trigger ".log"))) - ;; first any triggers from the testconfig - (let ((cmd (configf:lookup tconfig "triggers" trigger))) - (if cmd (mt:run-trigger cmd test-id test-rundir trigger (conc "tconfig-" logname) test-name item-path event-time state status))) - ;; next any triggers from megatest.config - (let ((cmd (configf:lookup *configdat* "triggers" trigger))) - (if cmd (mt:run-trigger cmd test-id test-rundir trigger (conc "mtconfig-" logname) test-name item-path event-time state status))))) - (list - (conc state "/" status) - (conc state "/") - (conc "/" status))) - (pop-directory)) - ))) - ;; (mutex-unlock! *triggers-mutex*) - ))))) - -;;====================================================================== -;; S T A T E A N D S T A T U S F O R T E S T S -;;====================================================================== - -;; speed up for common cases with a little logic -(define (mt:test-set-state-status-by-id run-id test-id newstate newstatus newcomment) - (if (not (and run-id test-id)) - (begin - (debug:print-error 0 *default-log-port* "bad data handed to mt:test-set-state-status-by-id, run-id=" run-id ", test-id=" test-id ", newstate=" newstate) - (print-call-chain (current-error-port)) - #f) - (begin - ;; cond - ;; ((and newstate newstatus newcomment) - ;; (rmt:general-call 'state-status-msg run-id newstate newstatus newcomment test-id)) - ;; ((and newstate newstatus) - ;; (rmt:general-call 'state-status run-id newstate newstatus test-id)) - ;; (else - ;; (if newstate (rmt:general-call 'set-test-state run-id newstate test-id)) - ;; (if newstatus (rmt:general-call 'set-test-status run-id newstatus test-id)) - ;; (if newcomment (rmt:general-call 'set-test-comment run-id newcomment test-id)))) - (rmt:set-state-status-and-roll-up-items run-id test-id #f newstate newstatus newcomment) - ;; (mt:process-triggers run-id test-id newstate newstatus) - #t))) - - -(define (mt:test-set-state-status-by-id-unless-completed run-id test-id newstate newstatus newcomment) - (let* ((test-vec (rmt:get-testinfo-state-status run-id test-id)) - (state (vector-ref test-vec 3))) - (if (equal? state "COMPLETED") - #t - (rmt:set-state-status-and-roll-up-items run-id test-id #f newstate newstatus newcomment)))) - - -(define (mt:test-set-state-status-by-testname run-id test-name item-path new-state new-status new-comment) - ;(let ((test-id (rmt:get-test-id run-id test-name item-path))) - (rmt:set-state-status-and-roll-up-items run-id test-name item-path new-state new-status new-comment) - ;; (mt:process-triggers run-id test-id new-state new-status) - #t);) - ;;(mt:test-set-state-status-by-id run-id test-id new-state new-status new-comment))) - -(define (mt:test-set-state-status-by-testname-unless-completed run-id test-name item-path new-state new-status new-comment) - (let ((test-id (rmt:get-test-id run-id test-name item-path))) - (mt:test-set-state-status-by-id-unless-completed run-id test-id new-state new-status new-comment))) - -(define (mt:lazy-read-test-config test-name) - (let ((tconf (hash-table-ref/default *testconfigs* test-name #f))) - (if tconf - tconf - (let ((test-dirs (tests:get-tests-search-path *configdat*))) - (let loop ((hed (car test-dirs)) - (tal (cdr test-dirs))) - ;; Setting MT_LINKTREE here is almost certainly unnecessary. - (let ((tconfig-file (conc hed "/" test-name "/testconfig"))) - (if (and (common:file-exists? tconfig-file) - (file-read-access? tconfig-file)) - (let ((link-tree-path (common:get-linktree)) ;; (configf:lookup *configdat* "setup" "linktree")) - (old-link-tree (get-environment-variable "MT_LINKTREE"))) - (if link-tree-path (setenv "MT_LINKTREE" link-tree-path)) - (let ((newtcfg (read-config tconfig-file #f #f))) ;; NOTE: Does NOT run [system ...] - (hash-table-set! *testconfigs* test-name newtcfg) - (if old-link-tree - (setenv "MT_LINKTREE" old-link-tree) - (unsetenv "MT_LINKTREE")) - newtcfg)) - (if (null? tal) - (begin - (debug:print-error 0 *default-log-port* "No readable testconfig found for " test-name) - #f) - (loop (car tal)(cdr tal)))))))))) - ADDED mtmod.scm Index: mtmod.scm ================================================================== --- /dev/null +++ mtmod.scm @@ -0,0 +1,65 @@ +;;====================================================================== +;; Copyright 2019, Matthew Welland. +;; +;; This file is part of Megatest. +;; +;; Megatest is free software: you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation, either version 3 of the License, or +;; (at your option) any later version. +;; +;; Megatest is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with Megatest. If not, see . + +;;====================================================================== + +(declare (unit mtmod)) + +(declare (uses commonmod)) +(declare (uses dbmod)) +(declare (uses launchmod)) +(declare (uses mtargs)) +(declare (uses mtconfigf)) +(declare (uses pgdbmod)) +(declare (uses rmtmod)) +(declare (uses servermod)) +(declare (uses stml2)) +(declare (uses subrunmod)) +(declare (uses tasksmod)) +(declare (uses testsmod)) + +(module mtmod + * + +(import scheme chicken data-structures extras posix ports files) + +(use (prefix sqlite3 sqlite3:) + srfi-69 regex srfi-18 srfi-13 srfi-1 + call-with-environment-variables z3 (prefix base64 base64:) + typed-records + csv directory-utils) + +(import (prefix mtargs args:)) +(import (prefix mtconfigf configf:)) +(import commonmod) +(import dbmod) +(import pgdbmod) +(import rmtmod) +(import servermod) +(import stml2) +(import subrunmod) +(import tasksmod) +(import testsmod) + +(include "run_records.scm") +(include "db_records.scm") +(include "test_records.scm") + + +) + Index: ods-inc.scm ================================================================== --- ods-inc.scm +++ ods-inc.scm @@ -14,208 +14,5 @@ ;; ;; You should have received a copy of the GNU General Public License ;; along with Megatest. If not, see . ;; -(define ods:dirs - '("Configurations2" - "Configurations2/toolpanel" - "Configurations2/menubar" - "Configurations2/toolbar" - "Configurations2/progressbar" - "Configurations2/floater" - "Configurations2/images" - "Configurations2/images/Bitmaps" - "Configurations2/statusbar" - "Configurations2/popupmenu" - "Configurations2/accelerator" - "META-INF" - "Thumbnails")) - -(define ods:0-len-files - '("Configurations2/accelerator/current.xml" - ;; "Thumbnails/thumbnail.png" - "content.xml" - )) - -(define ods:files - '(("META-INF/manifest.xml" - ("\n" - "\n" - "\n" - "\n" - "\n" - "\n" - "\n" - "\n" - "\n" - "\n" - "\n" - "\n" - "\n" - "\n" - "\n" - "\n" - "\n" - "\n" - "\n" - "\n" - "\n" - "\n")) - ("styles.xml" - ("\n" - "$-$???Page 1??? (???)09/06/2011, 20:48:51Page 1 / 99\n")) - ("settings.xml" - ("\n" - "0045161799view100000020000010060true04000020000010060trueSheet2270010060falsetruetruetrue12632256truetruetruetruefalsefalse1270127011truefalsetrue3falsetruetruetrue12701270false1truetrue1true12632256falsefalsetrue0truetruetruefalsetrue\n")) - ("mimetype" - ("application/vnd.oasis.opendocument.spreadsheet")) - ("meta.xml" - ("\n" - "Matt Welland2011-09-06T20:46:232011-09-06T20:48:51Matt WellandPT2M29S1LibreOffice/3.3$Linux LibreOffice_project/330m19$Build-301\n")))) - -(define ods:content-header - '("\n" - "\n" - "\n" - "\n" - "\n" - "\n" - "\n" - "\n" - "\n" - "\n" - "\n" - "\n" - "\n" - "\n" - "\n" - "\n" - "\n" - "\n" - "\n" - "\n" - "\n" - "\n")) - -(define ods:content-footer - '("\n" - "\n" - "\n")) - -(define (ods:make-thumbnail path) - (let ((oup (open-output-pipe (conc "uudecode -o " path "/Thumbnails/thumbnail.png")))) - (with-output-to-port oup - (lambda () - (print "begin-base64 640 Thumbnail.png -iVBORw0KGgoAAAANSUhEUgAAAL4AAAEACAIAAACCoVt7AAAEWElEQVR4nO3X -MU4bWQCA4bGUo5gUKCcgJwCaVNvShdI06VKmSxNKp6PdKg3xCcgJIhr7Ll6P -DTgBRbv5i11W+r7Gw7yZx0jv5415sV6vB/h9L/7rB+D/apfO4nxy8nk8OPq0 -vDm9Pr8+nc+mv75pcXl5MNtfsLp8fXDxbRjefl3Pj//xb340yW+N8gyM6awu -vxwu1+txnVar1Xj2z7PJpoUxhYNdFmNSs+EukdHRcHpzt7Kr69s/luub6Wa1 -V8Px9tx9TLsSH2a4OxwjWx5+uLgYhtOr4ezXo8Ori4tt0b8XJf+KMZ3p7N3w -ejIZV227hMP3V+/XNweX59erxZddK98uPi5eDvfdbC672u8I09l8tvlYDC/v -z93HNJa4+Hj7fr0+3mxs54vTw1e7BM+vh9n7T8PBbPlx8jD/k9HT4WzsRzfP -0/aFtVi+vNl9W75b4MODhwv2C7c4vz/e7C8/zzK+8Iav6ycLPJ1Ol3/zAPv5 -N5vfo7tnN+vZuIFNJvJ5frYvrOHLh8nJyfjjuOsM1/slPH53uNmPTnYDD8dH -R5ut4uGFdf9F6WQy3C3wdPbmdjKZDNsw7u56PPMw3F6cXS6vDs/u57/66cE2 -o+e3w+fP203p7RvdPDvbF9bx/GY935/bvYDuPsa//IeBH473jufrH+9+cu54 -f9dPM893u9QPcz4dnT+emGfDP+dE0iGSDpF0iKRDJB0i6RBJh0g6RNIhkg6R -dIikQyQdIukQSYdIOkTSIZIOkXSIpEMkHSLpEEmHSDpE0iGSDpF0iKRDJB0i -6RBJh0g6RNIhkg6RdIikQyQdIukQSYdIOkTSIZIOkXSIpEMkHSLpEEmHSDpE -0iGSDpF0iKRDJB0i6RBJh0g6RNIhkg6RdIikQyQdIukQSYdIOkTSIZIOkXSI -pEMkHSLpEEmHSDpE0iGSDpF0iKRDJB0i6RBJh0g6RNIhkg6RdIikQyQdIukQ -SYdIOkTSIZIOkXSIpEMkHSLpEEmHSDpE0iGSDpF0iKRDJB0i6RBJh0g6RNIh -kg6RdIikQyQdIukQSYdIOkTSIZIOkXSIpEMkHSLpEEmHSDpE0iGSDpF0iKRD -JB0i6RBJh0g6RNIhkg6RdIikQyQdIukQSYdIOkTSIZIOkXSIpEMkHSLpEEmH -SDpE0iGSDpF0iKRDJB0i6RBJh0g6RNIhkg6RdIikQyQdIukQSYdIOkTSIZIO -kXSIpEMkHSLpEEmHSDpE0iGSDpF0iKRDJB0i6RBJh0g6RNIhkg6RdIikQyQd -IukQSYdIOkTSIZIOkXSIpEMkHSLpEEmHSDpE0iGSDpF0iKRDJB0i6RBJh0g6 -RNIhkg6RdIikQyQdIukQSYdIOkTSIZIOkXSIpEMkHSLpEEmHSDpE0iGSDpF0 -iKRDJB0i6RBJh0g6RNIhkg6RdIikQyQdIukQSYdIOkTSIZIOkXSIpEMkHSLp -EEmHSDpE0iGSDpF0iKRDJB0i6RBJh+gv8TgE/jVPQbMAAAAASUVORK5CYII= -===="))))) - -;; sheetdat is '("sheetname" (r1c1 r2c2 ...)(r2c1 r2c2 ...) ...) -(define (ods:sheet sheetdat) - (let ((name (car sheetdat)) - (rows (cdr sheetdat))) - (conc "\n" - (conc (ods:column) - (string-join (map ods:row rows) "")) - ""))) - -;; seems to be called once at top of each sheet, i.e. a column of rows -(define (ods:column) - "\n") - -;; cells is a list of ... -(define (ods:row cells) - (conc "\n" - (string-join (map ods:cell cells) "") - "\n")) - -;; types are "string" or "float" -(define (ods:cell value) - (let* ((type (cond - ((string? value) "string") - ((symbol? value) "string") - ((number? value) "float") - (else #f))) - (tmpval (if (symbol? value) - (symbol->string value) - (if type value ""))) ;; convert everything else to an empty string - (escval (if (string? tmpval)(string-substitute (regexp "<") "<" (string-substitute (regexp ">") ">" tmpval)) tmpval))) - (conc "\n" - "" escval "" "\n" - "" "\n"))) - -;; create the directories -(define (ods:construct-dir path) - (for-each - (lambda (subdir) - (system (conc "mkdir -p " path "/" subdir))) - ods:dirs)) - -;; populate the necessary, non-constructed, files -(define (ods:add-non-content-files path) - ;; first the zero-length files, nb// the dir should already be created - (for-each - (lambda (fname) - (system (conc "touch " path "/" fname))) - ods:0-len-files) - ;; create the files with stuff in them - (for-each - (lambda (fdat) - (let* ((name (car fdat)) - (lines (cadr fdat))) - (with-output-to-file (conc path "/" name) - (lambda () - (for-each - (lambda (line) - (display line)) - lines))))) - ods:files)) - -;; data format: -;; '( (sheet1 (r1c1 r1c2 r1c3 ...) -;; (r2c1 r2c3 r2c3 ...) ) -;; (sheet2 ( ... ) -;; ( ... ) ) ) -(define (ods:list->ods path fname data) - (if (not (common:file-exists? path)) - (print "ERROR: path to create ods data must pre-exist") - (begin - (with-output-to-file (conc path "/content.xml") - (lambda () - (ods:construct-dir path) - (ods:add-non-content-files path) - (ods:make-thumbnail path) - (map display ods:content-header) - ;; process each sheet - (map print - (map ods:sheet data)) - (map display ods:content-footer))) - (system (conc "cd " path "; zip " fname " -n mimetype mimetype `find . |grep -v mimetype` > /dev/null"))))) - Index: odsmod.scm ================================================================== --- odsmod.scm +++ odsmod.scm @@ -28,212 +28,209 @@ (import (prefix sqlite3 sqlite3:) posix typed-records srfi-18 srfi-69 format ports srfi-1 matchable srfi-13) (import commonmod) ;; (use (prefix ulex ulex:)) -(include "common_records.scm") - -;; (define ods:dirs -;; '("Configurations2" -;; "Configurations2/toolpanel" -;; "Configurations2/menubar" -;; "Configurations2/toolbar" -;; "Configurations2/progressbar" -;; "Configurations2/floater" -;; "Configurations2/images" -;; "Configurations2/images/Bitmaps" -;; "Configurations2/statusbar" -;; "Configurations2/popupmenu" -;; "Configurations2/accelerator" -;; "META-INF" -;; "Thumbnails")) -;; -;; (define ods:0-len-files -;; '("Configurations2/accelerator/current.xml" -;; ;; "Thumbnails/thumbnail.png" -;; "content.xml" -;; )) -;; -;; (define ods:files -;; '(("META-INF/manifest.xml" -;; ("\n" -;; "\n" -;; "\n" -;; "\n" -;; "\n" -;; "\n" -;; "\n" -;; "\n" -;; "\n" -;; "\n" -;; "\n" -;; "\n" -;; "\n" -;; "\n" -;; "\n" -;; "\n" -;; "\n" -;; "\n" -;; "\n" -;; "\n" -;; "\n" -;; "\n")) -;; ("styles.xml" -;; ("\n" -;; "$-$???Page 1??? (???)09/06/2011, 20:48:51Page 1 / 99\n")) -;; ("settings.xml" -;; ("\n" -;; "0045161799view100000020000010060true04000020000010060trueSheet2270010060falsetruetruetrue12632256truetruetruetruefalsefalse1270127011truefalsetrue3falsetruetruetrue12701270false1truetrue1true12632256falsefalsetrue0truetruetruefalsetrue\n")) -;; ("mimetype" -;; ("application/vnd.oasis.opendocument.spreadsheet")) -;; ("meta.xml" -;; ("\n" -;; "Matt Welland2011-09-06T20:46:232011-09-06T20:48:51Matt WellandPT2M29S1LibreOffice/3.3$Linux LibreOffice_project/330m19$Build-301\n")))) -;; -;; (define ods:content-header -;; '("\n" -;; "\n" -;; "\n" -;; "\n" -;; "\n" -;; "\n" -;; "\n" -;; "\n" -;; "\n" -;; "\n" -;; "\n" -;; "\n" -;; "\n" -;; "\n" -;; "\n" -;; "\n" -;; "\n" -;; "\n" -;; "\n" -;; "\n" -;; "\n" -;; "\n")) -;; -;; (define ods:content-footer -;; '("\n" -;; "\n" -;; "\n")) -;; -;; (define (ods:make-thumbnail path) -;; (let ((oup (open-output-pipe (conc "uudecode -o " path "/Thumbnails/thumbnail.png")))) -;; (with-output-to-port oup -;; (lambda () -;; (print "begin-base64 640 Thumbnail.png -;; iVBORw0KGgoAAAANSUhEUgAAAL4AAAEACAIAAACCoVt7AAAEWElEQVR4nO3X -;; MU4bWQCA4bGUo5gUKCcgJwCaVNvShdI06VKmSxNKp6PdKg3xCcgJIhr7Ll6P -;; DTgBRbv5i11W+r7Gw7yZx0jv5415sV6vB/h9L/7rB+D/apfO4nxy8nk8OPq0 -;; vDm9Pr8+nc+mv75pcXl5MNtfsLp8fXDxbRjefl3Pj//xb340yW+N8gyM6awu -;; vxwu1+txnVar1Xj2z7PJpoUxhYNdFmNSs+EukdHRcHpzt7Kr69s/luub6Wa1 -;; V8Px9tx9TLsSH2a4OxwjWx5+uLgYhtOr4ezXo8Ori4tt0b8XJf+KMZ3p7N3w -;; ejIZV227hMP3V+/XNweX59erxZddK98uPi5eDvfdbC672u8I09l8tvlYDC/v -;; z93HNJa4+Hj7fr0+3mxs54vTw1e7BM+vh9n7T8PBbPlx8jD/k9HT4WzsRzfP -;; 0/aFtVi+vNl9W75b4MODhwv2C7c4vz/e7C8/zzK+8Iav6ycLPJ1Ol3/zAPv5 -;; N5vfo7tnN+vZuIFNJvJ5frYvrOHLh8nJyfjjuOsM1/slPH53uNmPTnYDD8dH -;; R5ut4uGFdf9F6WQy3C3wdPbmdjKZDNsw7u56PPMw3F6cXS6vDs/u57/66cE2 -;; o+e3w+fP203p7RvdPDvbF9bx/GY935/bvYDuPsa//IeBH473jufrH+9+cu54 -;; f9dPM893u9QPcz4dnT+emGfDP+dE0iGSDpF0iKRDJB0i6RBJh0g6RNIhkg6R -;; dIikQyQdIukQSYdIOkTSIZIOkXSIpEMkHSLpEEmHSDpE0iGSDpF0iKRDJB0i -;; 6RBJh0g6RNIhkg6RdIikQyQdIukQSYdIOkTSIZIOkXSIpEMkHSLpEEmHSDpE -;; 0iGSDpF0iKRDJB0i6RBJh0g6RNIhkg6RdIikQyQdIukQSYdIOkTSIZIOkXSI -;; pEMkHSLpEEmHSDpE0iGSDpF0iKRDJB0i6RBJh0g6RNIhkg6RdIikQyQdIukQ -;; SYdIOkTSIZIOkXSIpEMkHSLpEEmHSDpE0iGSDpF0iKRDJB0i6RBJh0g6RNIh -;; kg6RdIikQyQdIukQSYdIOkTSIZIOkXSIpEMkHSLpEEmHSDpE0iGSDpF0iKRD -;; JB0i6RBJh0g6RNIhkg6RdIikQyQdIukQSYdIOkTSIZIOkXSIpEMkHSLpEEmH -;; SDpE0iGSDpF0iKRDJB0i6RBJh0g6RNIhkg6RdIikQyQdIukQSYdIOkTSIZIO -;; kXSIpEMkHSLpEEmHSDpE0iGSDpF0iKRDJB0i6RBJh0g6RNIhkg6RdIikQyQd -;; IukQSYdIOkTSIZIOkXSIpEMkHSLpEEmHSDpE0iGSDpF0iKRDJB0i6RBJh0g6 -;; RNIhkg6RdIikQyQdIukQSYdIOkTSIZIOkXSIpEMkHSLpEEmHSDpE0iGSDpF0 -;; iKRDJB0i6RBJh0g6RNIhkg6RdIikQyQdIukQSYdIOkTSIZIOkXSIpEMkHSLp -;; EEmHSDpE0iGSDpF0iKRDJB0i6RBJh+gv8TgE/jVPQbMAAAAASUVORK5CYII= -;; ===="))))) -;; -;; ;; sheetdat is '("sheetname" (r1c1 r2c2 ...)(r2c1 r2c2 ...) ...) -;; (define (ods:sheet sheetdat) -;; (let ((name (car sheetdat)) -;; (rows (cdr sheetdat))) -;; (conc "\n" -;; (conc (ods:column) -;; (string-join (map ods:row rows) "")) -;; ""))) -;; -;; ;; seems to be called once at top of each sheet, i.e. a column of rows -;; (define (ods:column) -;; "\n") -;; -;; ;; cells is a list of ... -;; (define (ods:row cells) -;; (conc "\n" -;; (string-join (map ods:cell cells) "") -;; "\n")) -;; -;; ;; types are "string" or "float" -;; (define (ods:cell value) -;; (let* ((type (cond -;; ((string? value) "string") -;; ((symbol? value) "string") -;; ((number? value) "float") -;; (else #f))) -;; (tmpval (if (symbol? value) -;; (symbol->string value) -;; (if type value ""))) ;; convert everything else to an empty string -;; (escval (if (string? tmpval)(string-substitute (regexp "<") "<" (string-substitute (regexp ">") ">" tmpval)) tmpval))) -;; (conc "\n" -;; "" escval "" "\n" -;; "" "\n"))) -;; -;; ;; create the directories -;; (define (ods:construct-dir path) -;; (for-each -;; (lambda (subdir) -;; (system (conc "mkdir -p " path "/" subdir))) -;; ods:dirs)) -;; -;; ;; populate the necessary, non-constructed, files -;; (define (ods:add-non-content-files path) -;; ;; first the zero-length files, nb// the dir should already be created -;; (for-each -;; (lambda (fname) -;; (system (conc "touch " path "/" fname))) -;; ods:0-len-files) -;; ;; create the files with stuff in them -;; (for-each -;; (lambda (fdat) -;; (let* ((name (car fdat)) -;; (lines (cadr fdat))) -;; (with-output-to-file (conc path "/" name) -;; (lambda () -;; (for-each -;; (lambda (line) -;; (display line)) -;; lines))))) -;; ods:files)) -;; -;; ;; data format: -;; ;; '( (sheet1 (r1c1 r1c2 r1c3 ...) -;; ;; (r2c1 r2c3 r2c3 ...) ) -;; ;; (sheet2 ( ... ) -;; ;; ( ... ) ) ) -;; (define (ods:list->ods path fname data) -;; (if (not (file-exists? path)) -;; (print "ERROR: path to create ods data must pre-exist") -;; (begin -;; (with-output-to-file (conc path "/content.xml") -;; (lambda () -;; (ods:construct-dir path) -;; (ods:add-non-content-files path) -;; (ods:make-thumbnail path) -;; (map display ods:content-header) -;; ;; process each sheet -;; (map print -;; (map ods:sheet data)) -;; (map display ods:content-footer))) -;; (system (conc "cd " path "; zip " fname " -n mimetype mimetype `find . |grep -v mimetype` > /dev/null"))))) -;; -;; +(define ods:dirs + '("Configurations2" + "Configurations2/toolpanel" + "Configurations2/menubar" + "Configurations2/toolbar" + "Configurations2/progressbar" + "Configurations2/floater" + "Configurations2/images" + "Configurations2/images/Bitmaps" + "Configurations2/statusbar" + "Configurations2/popupmenu" + "Configurations2/accelerator" + "META-INF" + "Thumbnails")) + +(define ods:0-len-files + '("Configurations2/accelerator/current.xml" + ;; "Thumbnails/thumbnail.png" + "content.xml" + )) + +(define ods:files + '(("META-INF/manifest.xml" + ("\n" + "\n" + "\n" + "\n" + "\n" + "\n" + "\n" + "\n" + "\n" + "\n" + "\n" + "\n" + "\n" + "\n" + "\n" + "\n" + "\n" + "\n" + "\n" + "\n" + "\n" + "\n")) + ("styles.xml" + ("\n" + "$-$???Page 1??? (???)09/06/2011, 20:48:51Page 1 / 99\n")) + ("settings.xml" + ("\n" + "0045161799view100000020000010060true04000020000010060trueSheet2270010060falsetruetruetrue12632256truetruetruetruefalsefalse1270127011truefalsetrue3falsetruetruetrue12701270false1truetrue1true12632256falsefalsetrue0truetruetruefalsetrue\n")) + ("mimetype" + ("application/vnd.oasis.opendocument.spreadsheet")) + ("meta.xml" + ("\n" + "Matt Welland2011-09-06T20:46:232011-09-06T20:48:51Matt WellandPT2M29S1LibreOffice/3.3$Linux LibreOffice_project/330m19$Build-301\n")))) + +(define ods:content-header + '("\n" + "\n" + "\n" + "\n" + "\n" + "\n" + "\n" + "\n" + "\n" + "\n" + "\n" + "\n" + "\n" + "\n" + "\n" + "\n" + "\n" + "\n" + "\n" + "\n" + "\n" + "\n")) + +(define ods:content-footer + '("\n" + "\n" + "\n")) + +(define (ods:make-thumbnail path) + (let ((oup (open-output-pipe (conc "uudecode -o " path "/Thumbnails/thumbnail.png")))) + (with-output-to-port oup + (lambda () + (print "begin-base64 640 Thumbnail.png +iVBORw0KGgoAAAANSUhEUgAAAL4AAAEACAIAAACCoVt7AAAEWElEQVR4nO3X +MU4bWQCA4bGUo5gUKCcgJwCaVNvShdI06VKmSxNKp6PdKg3xCcgJIhr7Ll6P +DTgBRbv5i11W+r7Gw7yZx0jv5415sV6vB/h9L/7rB+D/apfO4nxy8nk8OPq0 +vDm9Pr8+nc+mv75pcXl5MNtfsLp8fXDxbRjefl3Pj//xb340yW+N8gyM6awu +vxwu1+txnVar1Xj2z7PJpoUxhYNdFmNSs+EukdHRcHpzt7Kr69s/luub6Wa1 +V8Px9tx9TLsSH2a4OxwjWx5+uLgYhtOr4ezXo8Ori4tt0b8XJf+KMZ3p7N3w +ejIZV227hMP3V+/XNweX59erxZddK98uPi5eDvfdbC672u8I09l8tvlYDC/v +z93HNJa4+Hj7fr0+3mxs54vTw1e7BM+vh9n7T8PBbPlx8jD/k9HT4WzsRzfP +0/aFtVi+vNl9W75b4MODhwv2C7c4vz/e7C8/zzK+8Iav6ycLPJ1Ol3/zAPv5 +N5vfo7tnN+vZuIFNJvJ5frYvrOHLh8nJyfjjuOsM1/slPH53uNmPTnYDD8dH +R5ut4uGFdf9F6WQy3C3wdPbmdjKZDNsw7u56PPMw3F6cXS6vDs/u57/66cE2 +o+e3w+fP203p7RvdPDvbF9bx/GY935/bvYDuPsa//IeBH473jufrH+9+cu54 +f9dPM893u9QPcz4dnT+emGfDP+dE0iGSDpF0iKRDJB0i6RBJh0g6RNIhkg6R +dIikQyQdIukQSYdIOkTSIZIOkXSIpEMkHSLpEEmHSDpE0iGSDpF0iKRDJB0i +6RBJh0g6RNIhkg6RdIikQyQdIukQSYdIOkTSIZIOkXSIpEMkHSLpEEmHSDpE +0iGSDpF0iKRDJB0i6RBJh0g6RNIhkg6RdIikQyQdIukQSYdIOkTSIZIOkXSI +pEMkHSLpEEmHSDpE0iGSDpF0iKRDJB0i6RBJh0g6RNIhkg6RdIikQyQdIukQ +SYdIOkTSIZIOkXSIpEMkHSLpEEmHSDpE0iGSDpF0iKRDJB0i6RBJh0g6RNIh +kg6RdIikQyQdIukQSYdIOkTSIZIOkXSIpEMkHSLpEEmHSDpE0iGSDpF0iKRD +JB0i6RBJh0g6RNIhkg6RdIikQyQdIukQSYdIOkTSIZIOkXSIpEMkHSLpEEmH +SDpE0iGSDpF0iKRDJB0i6RBJh0g6RNIhkg6RdIikQyQdIukQSYdIOkTSIZIO +kXSIpEMkHSLpEEmHSDpE0iGSDpF0iKRDJB0i6RBJh0g6RNIhkg6RdIikQyQd +IukQSYdIOkTSIZIOkXSIpEMkHSLpEEmHSDpE0iGSDpF0iKRDJB0i6RBJh0g6 +RNIhkg6RdIikQyQdIukQSYdIOkTSIZIOkXSIpEMkHSLpEEmHSDpE0iGSDpF0 +iKRDJB0i6RBJh0g6RNIhkg6RdIikQyQdIukQSYdIOkTSIZIOkXSIpEMkHSLp +EEmHSDpE0iGSDpF0iKRDJB0i6RBJh+gv8TgE/jVPQbMAAAAASUVORK5CYII= +===="))))) + +;; sheetdat is '("sheetname" (r1c1 r2c2 ...)(r2c1 r2c2 ...) ...) +(define (ods:sheet sheetdat) + (let ((name (car sheetdat)) + (rows (cdr sheetdat))) + (conc "\n" + (conc (ods:column) + (string-join (map ods:row rows) "")) + ""))) + +;; seems to be called once at top of each sheet, i.e. a column of rows +(define (ods:column) + "\n") + +;; cells is a list of ... +(define (ods:row cells) + (conc "\n" + (string-join (map ods:cell cells) "") + "\n")) + +;; types are "string" or "float" +(define (ods:cell value) + (let* ((type (cond + ((string? value) "string") + ((symbol? value) "string") + ((number? value) "float") + (else #f))) + (tmpval (if (symbol? value) + (symbol->string value) + (if type value ""))) ;; convert everything else to an empty string + (escval (if (string? tmpval)(string-substitute (regexp "<") "<" (string-substitute (regexp ">") ">" tmpval)) tmpval))) + (conc "\n" + "" escval "" "\n" + "" "\n"))) + +;; create the directories +(define (ods:construct-dir path) + (for-each + (lambda (subdir) + (system (conc "mkdir -p " path "/" subdir))) + ods:dirs)) + +;; populate the necessary, non-constructed, files +(define (ods:add-non-content-files path) + ;; first the zero-length files, nb// the dir should already be created + (for-each + (lambda (fname) + (system (conc "touch " path "/" fname))) + ods:0-len-files) + ;; create the files with stuff in them + (for-each + (lambda (fdat) + (let* ((name (car fdat)) + (lines (cadr fdat))) + (with-output-to-file (conc path "/" name) + (lambda () + (for-each + (lambda (line) + (display line)) + lines))))) + ods:files)) + +;; data format: +;; '( (sheet1 (r1c1 r1c2 r1c3 ...) +;; (r2c1 r2c3 r2c3 ...) ) +;; (sheet2 ( ... ) +;; ( ... ) ) ) +(define (ods:list->ods path fname data) + (if (not (common:file-exists? path)) + (print "ERROR: path to create ods data must pre-exist") + (begin + (with-output-to-file (conc path "/content.xml") + (lambda () + (ods:construct-dir path) + (ods:add-non-content-files path) + (ods:make-thumbnail path) + (map display ods:content-header) + ;; process each sheet + (map print + (map ods:sheet data)) + (map display ods:content-footer))) + (system (conc "cd " path "; zip " fname " -n mimetype mimetype `find . |grep -v mimetype` > /dev/null"))))) + ) Index: pgdb-inc.scm ================================================================== --- pgdb-inc.scm +++ pgdb-inc.scm @@ -27,625 +27,5 @@ ;; ;; (import scheme) ;; (import data-structures) ;; (import chicken) -;; given a configdat lookup the connection info and open the db -;; -(define (pgdb:open configdat #!key (dbname #f)(dbispec #f)) - (let ((pgconf (or dbispec - (args:get-arg "-pgsync") - (if configdat - (configf:lookup configdat "ext-sync" (or dbname "pgdb")) - #f) - ))) - (if pgconf - (let* ((confdat (map (lambda (conf-item) - (let ((parts (string-split conf-item ":"))) - (if (> (length parts) 1) - (let ((key (car parts)) - (val (cadr parts))) - (cons (string->symbol key) val)) - (begin - (print "ERROR: Bad config setting " conf-item ", should be key:val") - `(,(string->symbol (car parts)) . #f))))) - (string-split pgconf))) - (dbtype (string->symbol (or (alist-ref 'dbtype confdat) "pg")))) - (if (alist-ref 'dbtype confdat) - (dbi:open dbtype (alist-delete 'dbtype confdat)))) - #f))) - -;;====================================================================== -;; A R E A S -;;====================================================================== - -(defstruct area id area-name area-path last-update) - -(define (pgdb:add-area dbh area-name area-path) - (dbi:exec dbh "INSERT INTO areas (area_name,area_path) VALUES (?,?)" area-name area-path)) - -(define (pgdb:get-areas dbh) - ;; (map - ;; (lambda (row) - ;; (print "row: " row)) - (dbi:get-rows dbh "SELECT id,area_name,area_path,last_sync FROM areas;")) ;; ) - -;; given an area_path get the area info -;; -(define (pgdb:get-area-by-path dbh area-path) - (dbi:get-one-row dbh "SELECT id,area_name,area_path,last_sync FROM areas WHERE area_path=?;" area-path)) - -(define (pgdb:write-sync-time dbh area-info new-sync-time) - (let ((area-id (vector-ref area-info 0))) - (dbi:exec dbh "UPDATE areas SET last_sync=? WHERE id=?;" new-sync-time area-id))) - -;;====================================================================== -;; T A R G E T S -;;====================================================================== - -;; Given a target-spec, return the id. Should probably handle this with a join... -;; if target-spec not found, create a record for it. -;; -(define (pgdb:get-ttype dbh target-spec) - (let ((spec-id (dbi:get-one dbh "SELECT id FROM ttype WHERE target_spec=?;" target-spec))) - (or spec-id - (if (handle-exceptions - exn - (begin - (print-call-chain) - (debug:print 0 *default-log-port* "ERROR: cannot create ttype entry, " ((condition-property-accessor 'exn 'message) exn)) - #f) - (dbi:exec dbh "INSERT INTO ttype (target_spec) VALUES (?);" target-spec)) - (pgdb:get-ttype dbh target-spec))))) - -;;====================================================================== -;; T A G S -;;====================================================================== - - -(define (pgdb:get-tag-info-by-name dbh tag) - (dbi:get-one-row dbh "SELECT id,tag_name FROM tags where tag_name=?;" tag)) - -(define (pgdb:insert-tag dbh name ) - (dbi:exec dbh "INSERT INTO tags (tag_name) VALUES (?)" name )) - -(define (pgdb:insert-area-tag dbh tag-id area-id ) - (dbi:exec dbh "INSERT INTO area_tags (tag_id, area_id) VALUES (?,?)" tag-id area-id )) - -(define (pgdb:insert-run-tag dbh tag-id run-id ) - (dbi:exec dbh "INSERT INTO run_tags (tag_id, run_id) VALUES (?,?)" tag-id run-id )) - - -(define (pgdb:is-area-taged dbh area-id) - (let ((area-tag-id (dbi:get-one dbh "SELECT id FROM area_tags WHERE area_id=?;" area-id))) - (if area-tag-id - #t - #f))) - -(define (pgdb:is-area-taged-with-a-tag dbh tag-id area-id) - (let ((area-tag-id (dbi:get-one dbh "SELECT id FROM area_tags WHERE area_id=? and tag_id=?;" area-id tag-id))) - (if area-tag-id - #t - #f))) - -(define (pgdb:is-run-taged-with-a-tag dbh tag-id run-id) - (let ((run-tag-id (dbi:get-one dbh "SELECT id FROM run_tags WHERE run_id=? and tag_id=?;" run-id tag-id))) - (if run-tag-id - #t - #f))) - - - -;;====================================================================== -;; R U N S -;;====================================================================== - -;; given a target spec id, target and run-name return the run-id -;; if no run found return #f -;; -(define (pgdb:get-run-id dbh spec-id target run-name area-id) - (dbi:get-one dbh "SELECT id FROM runs WHERE ttype_id=? AND target=? AND run_name=? and area_id=?;" - spec-id target run-name area-id)) - -;; given a target spec id, target and run-name return the run-id -;; if no run found return #f -;; -(define (pgdb:get-run-last-update dbh id ) - (dbi:get-one dbh "SELECT last_update FROM runs WHERE id=?;" - id)) - -;; given a run-id return all the run info -;; -(define (pgdb:get-run-info dbh run-id ) ;; to join ttype or not? - (dbi:get-one-row - dbh ;; 0 1 2 3 4 5 6 7 8 9 10 11 12 - "SELECT id,target,ttype_id,run_name,state,status,owner,event_time,comment,fail_count,pass_count,last_update,area_id - FROM runs WHERE id=? ;" run-id )) - -;; refresh the data in a run record -;; -(define (pgdb:refresh-run-info dbh run-id state status owner event-time comment fail-count pass-count area-id last_update publish-time) ;; area-id) - (dbi:exec - dbh - "UPDATE runs SET - state=?,status=?,owner=?,event_time=?,comment=?,fail_count=?,pass_count=?,last_update=?,publish_time=? - WHERE id=? and area_id=?;" - state status owner event-time comment fail-count pass-count last_update publish-time run-id area-id )) - -;; given all needed info create run record -;; -(define (pgdb:insert-run dbh ttype-id target run-name state status owner event-time comment fail-count pass-count area-id last-update publish-time) - (dbi:exec - dbh - "INSERT INTO runs (ttype_id,target,run_name,state,status,owner,event_time,comment,fail_count,pass_count,area_id,last_update,publish_time) - VALUES (?,?,?,?,?,?,?,?,?,?,?,?, ?);" - ttype-id target run-name state status owner event-time comment fail-count pass-count area-id last-update publish-time)) - -;;====================================================================== -;; T E S T - S T E P S -;;====================================================================== - -(define (pgdb:get-test-step-id dbh test-id stepname state) - (dbi:get-one - dbh - "SELECT id FROM test_steps WHERE test_id=? AND stepname=? and state = ? ;" - test-id stepname state)) - -(define (pgdb:get-test-step-last-update dbh id ) - (dbi:get-one - dbh - "SELECT last_update FROM test_steps WHERE id=? ;" - id)) - -(define (pgdb:insert-test-step dbh test-id stepname state status event_time comment logfile last-update ) - (dbi:exec - dbh - "INSERT INTO test_steps (test_id,stepname,state,status,event_time,logfile,comment,last_update) - VALUES (?,?,?,?,?,?,?,? );" - test-id stepname state status event_time logfile comment last-update)) - -(define (pgdb:update-test-step dbh step-id test-id stepname state status event_time comment logfile last-update) - (dbi:exec - dbh - "UPDATE test_steps SET - test_id=?,stepname=?,state=?,status=?,event_time=?,logfile=?,comment=?,last_update=? - WHERE id=?;" - test-id stepname state status event_time logfile comment last-update step-id)) - - -;;====================================================================== -;; T E S T - D A T A -;;====================================================================== - -(define (pgdb:get-test-data-id dbh test-id category variable) - (dbi:get-one - dbh - "SELECT id FROM test_data WHERE test_id=? AND category=? and variable = ? ;" - test-id category variable)) - -(define (pgdb:get-test-data-last-update dbh test-data-id ) - (dbi:get-one - dbh - "SELECT last_update FROM test_data WHERE id=? ;" - test-data-id)) - -(define (pgdb:insert-test-data dbh test-id category variable value expected tol units comment status type last-update) - ; (print "INSERT INTO test_data (test_id, category, variable, value, expected, tol, units, comment, status, type) - ; VALUES (?,?,?,?,?,?,?,?,?,?) " test-id " " category " " variable " " value " " expected " " tol " " units " " comment " " status " " type) - (if (not (string? units)) - (set! units "" )) - (if (not (string? variable)) - (set! variable "" )) - (if (not (real? value)) - (set! value 0 )) - (if (not (real? expected)) - (set! expected 0 )) -(if (not (real? tol)) - (set! tol 0 )) - - (dbi:exec - dbh - "INSERT INTO test_data (test_id, category, variable, value, expected, tol, units, comment, status, type, last_update) - VALUES (?,?,?,?,?,?,?,?,?,?, ?);" - test-id category variable value expected tol units comment status type last-update)) - -(define (pgdb:update-test-data dbh data-id test-id category variable value expected tol units comment status type last-update) - (dbi:exec - dbh - "UPDATE test_data SET - test_id=?, category=?, variable=?, value=?, expected=?, tol=?, units=?, comment=?, status=?, type=?, last_update=? - WHERE id=?;" - test-id category variable value expected tol units comment status type last-update data-id )) - - - -;;====================================================================== -;; T E S T S -;;====================================================================== - -;; given run-id, test_name and item_path return test-id -;; -(define (pgdb:get-test-id dbh run-id test-name item-path) - (dbi:get-one - dbh - "SELECT id FROM tests WHERE run_id=? AND test_name=? AND item_path=?;" - run-id test-name item-path)) - -(define (pgdb:get-test-last-update dbh id) - (dbi:get-one - dbh - "SELECT last_update FROM tests WHERE id=? ;" - id )) - - -;; create new test record -;; -(define (pgdb:insert-test dbh run-id test-name item-path state status host cpuload diskfree uname run-dir log-file run-duration comment event-time archived last-update pid) - (dbi:exec - dbh - "INSERT INTO tests (run_id,test_name,item_path,state,status,host,cpuload,diskfree,uname,rundir,final_logf,run_duration,comment,event_time,archived,last_update,attemptnum) - VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?);" - - run-id test-name item-path state status host cpuload diskfree uname - run-dir log-file run-duration comment event-time archived last-update pid)) - -;; update existing test record -;; -(define (pgdb:update-test dbh test-id run-id test-name item-path state status host cpuload diskfree uname run-dir log-file run-duration comment event-time archived last-update pid) - (dbi:exec - dbh - "UPDATE tests SET - run_id=?,test_name=?,item_path=?,state=?,status=?,host=?,cpuload=?,diskfree=?,uname=?,rundir=?,final_logf=?,run_duration=?,comment=?,event_time=?,archived=?,last_update=?,attemptnum=? - WHERE id=?;" - - run-id test-name item-path state status host cpuload diskfree uname - run-dir log-file run-duration comment event-time archived last-update pid test-id)) - -(define (pgdb:get-tests dbh target-patt) - (dbi:get-rows - dbh - "SELECT t.id,t.run_id,t.test_name,t.item_path,t.state,t.status,t.host,t.cpuload,t.diskfree,t.uname,t.rundir,t.final_logf,t.run_duration,t.comment,t.event_time,t.archived, - r.id,r.target,r.ttype_id,r.run_name,r.state,r.status,r.owner,r.event_time,r.comment - FROM tests AS t INNER JOIN runs AS r ON t.run_id=r.id - WHERE r.target LIKE ?;" target-patt)) - -(define (pgdb:get-stats-given-type-target dbh ttype-id target-patt) - (dbi:get-rows - dbh - ;; "SELECT COUNT(t.id),t.status,r.target FROM tests AS t INNER JOIN runs AS r ON t.run_id=r.id - ;; WHERE t.state='COMPLETED' AND ttype_id=? AND r.target LIKE ? GROUP BY r.target,t.status;" - "SELECT r.target,COUNT(*) AS total, - SUM(CASE WHEN t.status='PASS' THEN 1 ELSE 0 END) AS pass, - SUM(CASE WHEN t.status='FAIL' THEN 1 ELSE 0 END) AS fail, - SUM(CASE WHEN t.status IN ('PASS','FAIL') THEN 0 ELSE 1 END) AS other - FROM tests AS t INNER JOIN runs AS r ON t.run_id=r.id - WHERE t.state='COMPLETED' AND ttype_id=? AND r.target LIKE ? GROUP BY r.target;" - ttype-id target-patt)) - -(define (pgdb:get-stats-given-target dbh target-patt) - (dbi:get-rows - dbh - ;; "SELECT COUNT(t.id),t.status,r.target FROM tests AS t INNER JOIN runs AS r ON t.run_id=r.id - ;; WHERE t.state='COMPLETED' AND ttype_id=? AND r.target LIKE ? GROUP BY r.target,t.status;" - "SELECT r.target,COUNT(*) AS total, - SUM(CASE WHEN t.status='PASS' THEN 1 ELSE 0 END) AS pass, - SUM(CASE WHEN t.status='FAIL' THEN 1 ELSE 0 END) AS fail, - SUM(CASE WHEN t.status IN ('PASS','FAIL') THEN 0 ELSE 1 END) AS other - FROM tests AS t INNER JOIN runs AS r ON t.run_id=r.id - WHERE t.state='COMPLETED' AND r.target LIKE ? GROUP BY r.target;" - target-patt)) - - -(define (pgdb:get-latest-run-stats-given-target dbh ttype-id target-patt limit offset) - (dbi:get-rows - dbh - ;; "SELECT COUNT(t.id),t.status,r.target FROM tests AS t INNER JOIN runs AS r ON t.run_id=r.id - ;; WHERE t.state='COMPLETED' AND ttype_id=? AND r.target LIKE ? GROUP BY r.target,t.status;" - "SELECT r.target, r.event_time, COUNT(*) AS total, - SUM(CASE WHEN t.status='PASS' THEN 1 ELSE 0 END) AS pass, - SUM(CASE WHEN t.status='FAIL' THEN 1 ELSE 0 END) AS fail, - SUM(CASE WHEN t.status IN ('PASS','FAIL') THEN 0 ELSE 1 END) AS other, r.id - FROM tests AS t INNER JOIN runs AS r ON t.run_id=r.id - WHERE t.state like '%' AND ttype_id=? AND r.target LIKE ? - and r.id in - (SELECT DISTINCT on (target) id from runs where target like ? AND ttype_id=? order by target,event_time desc) - GROUP BY r.target,r.id - order by r.event_time desc limit ? offset ? ;" - ttype-id target-patt target-patt ttype-id limit offset)) - -(define (pgdb:get-latest-run-stats-given-pattern dbh patt limit offset) - (dbi:get-rows - dbh - ;; "SELECT COUNT(t.id),t.status,r.target FROM tests AS t INNER JOIN runs AS r ON t.run_id=r.id - ;; WHERE t.state='COMPLETED' AND ttype_id=? AND r.target ILIKE ? GROUP BY r.target,t.status;" - "SELECT r.target, r.event_time, COUNT(*) AS total, - SUM(CASE WHEN t.status='PASS' THEN 1 ELSE 0 END) AS pass, - SUM(CASE WHEN t.status='FAIL' THEN 1 ELSE 0 END) AS fail, - SUM(CASE WHEN t.status IN ('PASS','FAIL') THEN 0 ELSE 1 END) AS other, r.id - FROM tests AS t INNER JOIN runs AS r ON t.run_id=r.id - WHERE t.state like '%' AND r.target ILIKE ? - and r.id in - (SELECT DISTINCT on (target) id from runs where target ilike ? order by target,event_time desc) - GROUP BY r.target,r.id - order by r.event_time desc limit ? offset ? ;" - patt patt limit offset)) - - -(define (pgdb:get-count-data-stats-target-latest dbh ttype-id target-patt) - (dbi:get-rows - dbh - "SELECT count(*) from - (SELECT DISTINCT on (target) id - from runs where target like ? AND ttype_id = ? - order by target, event_time desc - ) as x;" - target-patt ttype-id)) - -(define (pgdb:get-latest-run-cnt dbh ttype-id target-patt) - (let* ((cnt-result (pgdb:get-count-data-stats-target-latest dbh ttype-id target-patt)) - ;(cnt-row (car (cnt-result))) - (cnt 0) - ) - (for-each - (lambda (row) - (set! cnt (vector-ref row 0 ))) - cnt-result) - -cnt)) - -(define (pgdb:get-count-data-stats-latest-pattern dbh patt) - (dbi:get-rows - dbh - "SELECT count(*) from - (SELECT DISTINCT on (target) id - from runs where target ilike ? - order by target, event_time desc - ) as x;" - patt)) - -(define (pgdb:get-latest-run-cnt-by-pattern dbh target-patt) - (let* ((cnt-result (pgdb:get-count-data-stats-latest-pattern dbh target-patt)) - ;(cnt-row (car (cnt-result))) - (cnt 0) - ) - (for-each - (lambda (row) - (set! cnt (vector-ref row 0 ))) - cnt-result) - -cnt)) - - - - - -(define (pgdb:get-run-stats-history-given-target dbh ttype-id target-patt) - (dbi:get-rows - dbh - ;; "SELECT COUNT(t.id),t.status,r.target FROM tests AS t INNER JOIN runs AS r ON t.run_id=r.id - ;; WHERE t.state='COMPLETED' AND ttype_id=? AND r.target LIKE ? GROUP BY r.target,t.status;" - "SELECT r.run_name,COUNT(*) AS total, - SUM(CASE WHEN t.status='PASS' THEN 1 ELSE 0 END) AS pass, - SUM(CASE WHEN t.status='FAIL' THEN 1 ELSE 0 END) AS fail, - SUM(CASE WHEN t.status IN ('PASS','FAIL') THEN 0 ELSE 1 END) AS other - FROM tests AS t INNER JOIN runs AS r ON t.run_id=r.id - WHERE t.state like '%' AND ttype_id=? AND r.target LIKE ? - GROUP BY r.run_name;" - ttype-id target-patt )) - -(define (pgdb:get-all-run-stats-target-slice dbh target-patt limit offset) - (dbi:get-rows - dbh - "SELECT r.target, r.run_name,r.event_time, COUNT(*) AS total, - SUM(CASE WHEN t.status='PASS' THEN 1 ELSE 0 END) AS pass, - SUM(CASE WHEN t.status='FAIL' THEN 1 ELSE 0 END) AS fail, - SUM(CASE WHEN t.status IN ('PASS','FAIL') THEN 0 ELSE 1 END) AS other - FROM tests AS t INNER JOIN runs AS r ON t.run_id=r.id - WHERE r.target LIKE ? - GROUP BY r.target,r.run_name, r.event_time - order by r.target,r.event_time desc limit ? offset ? ;" - target-patt limit offset)) - - -(define (pgdb:get-count-data-stats-target-slice dbh target-patt) - (dbi:get-rows - dbh - "SELECT count(*) from (SELECT r.target, r.run_name,r.event_time, COUNT(*) AS total - FROM tests AS t INNER JOIN runs AS r ON t.run_id=r.id - WHERE r.target LIKE ? - GROUP BY r.target,r.run_name, r.event_time - ) as x;" - target-patt)) - -(define (pgdb:get-slice-cnt dbh target-patt) - (let* ((cnt-result (pgdb:get-count-data-stats-target-slice dbh target-patt)) - ;(cnt-row (car (cnt-result))) - (cnt 0) - ) - (for-each - (lambda (row) - (set! cnt (vector-ref row 0 ))) - cnt-result) - -cnt)) - - -(define (pgdb:get-target-types dbh) - (dbi:get-rows dbh "SELECT id,target_spec FROM ttype;")) - - (define (pgdb:get-distict-target-slice dbh) - (dbi:get-rows dbh " select distinct on (split_part (target, '/', 1)) (split_part (target, '/', 1)) from runs;")) - - (define (pgdb:get-distict-target-slice3 dbh) - (dbi:get-rows dbh " select distinct on (split_part (target, '/', 3)) (split_part (target, '/', 3)) from runs;")) -;; -(define (pgdb:get-targets dbh target-patt) - (let ((ttypes (pgdb:get-target-types dbh))) - (map - (lambda (ttype-dat) - (let ((tt-id (vector-ref ttype-dat 0)) - (ttype (vector-ref ttype-dat 1))) - (cons ttype - (dbi:get-rows - dbh - "SELECT DISTINCT target FROM runs WHERE target LIKE ? AND ttype_id=?;" target-patt tt-id)) - )) - ttypes))) - -(define (pgdb:get-targets-of-type dbh ttype-id target-patt) - (dbi:get-rows dbh "SELECT DISTINCT target FROM runs WHERE target LIKE ? AND ttype_id=?;" target-patt ttype-id)) - -(define (pgdb:get-runs-by-target dbh targets run-patt) - (dbi:get-rows dbh "SELECT r.run_name, t.test_name, t.status, t.item_path, t.id, t.rundir, t.final_logf FROM runs as r INNER JOIN tests AS t ON t.run_id=r.id - WHERE t.state='COMPLETED' AND r.target like ? AND r.run_name like ?;" targets run-patt) -) - -(define (pgdb:get-test-by-id dbh id) - (dbi:get-rows dbh "SELECT t.test_name, t.item_path, t.rundir, t.final_logf FROM runs as r INNER JOIN tests AS t ON t.run_id=r.id - WHERE t.id = ?;" id) -) - -;;====================================================================== -;; V A R I O U S D A T A M A S S A G E R O U T I N E S -;;====================================================================== - -;; probably want to move these to a different model file - -;; create a hash of hashes with keys extracted from all-parts -;; using row-or-col to choose row or column -;; ht{row key}=>ht{col key}=>data -;; -;; fnum is the field number in the tuples to be split -;; - -(define (pgdb:mk-pattern dot type bp rel) - (let* ((typ (if (equal? type "all") - "%" - type)) - (dotprocess (if (equal? dot "all") - "%" - dot)) - (rel-num (if (equal? rel "") - "%" - rel)) - (pattern (conc "%/" bp "/" dotprocess "/" typ "_" rel-num))) -pattern)) - -(define (pgdb:coalesce-runs dbh runs all-parts row-or-col fnum) - (let* ((data (make-hash-table))) - - (for-each - (lambda (run) - (let* ((target (vector-ref run fnum)) - (parts (string-split target "/")) - (first (car parts)) - (rest (string-intersperse (cdr parts) "/")) - (coldat (hash-table-ref/default data first #f))) - (if (not coldat)(let ((newht (make-hash-table))) - (hash-table-set! data first newht) - (set! coldat newht))) - (hash-table-set! coldat rest run))) - runs) - data)) - - -(define (pgdb:coalesce-runs1 runs ) - (let* ((data (make-hash-table))) - - (for-each - (lambda (run) - (let* ((target (vector-ref run 0)) - (parts (string-split target "/")) - (first (car parts)) - (rest (string-intersperse (cdr parts) "/")) - (coldat (hash-table-ref/default data first #f))) - (if (not coldat)(let ((newht (make-hash-table))) - (hash-table-set! data first newht) - (set! coldat newht))) - (hash-table-set! coldat rest run))) - runs) - data)) - -;; given ordered data hash return a-keys -;; -(define (pgdb:ordered-data->a-keys ordered-data) - (sort (hash-table-keys ordered-data) string>=?)) - -;; given ordered data hash return b-keys -;; -(define (pgdb:ordered-data->b-keys ordered-data a-keys) - (delete-duplicates - (sort (apply - append - (map (lambda (sub-key) - (let ((subdat (hash-table-ref ordered-data sub-key))) - (hash-table-keys subdat))) - a-keys)) - string>=?))) - -;; given ordered data hash return a-keys -;; -(define (pgdb:ordered-data->a-keys ordered-data) - (sort (hash-table-keys ordered-data) string>=?)) - -;; given ordered data hash return b-keys -;; -(define (pgdb:ordered-data->b-keys ordered-data a-keys) - (delete-duplicates - (sort (apply - append - (map (lambda (sub-key) - (let ((subdat (hash-table-ref ordered-data sub-key))) - (hash-table-keys subdat))) - a-keys)) - string>=?))) - -(define (pgdb:coalesce-runs-by-slice runs slice) - (let* ((data (make-hash-table))) - (for-each - (lambda (run) - (let* ((target (vector-ref run 0)) - (run-name (vector-ref run 1)) - (parts (string-split target "/")) - (first (car parts)) - (rest (string-intersperse (cdr parts) "/")) - (coldat (hash-table-ref/default data rest #f))) - (if (not coldat)(let ((newht (make-hash-table))) - (hash-table-set! data rest newht) - (set! coldat newht))) - (hash-table-set! coldat run-name run))) - runs) - data)) - - -(define (pgdb:runs-to-hash runs ) - (let* ((data (make-hash-table))) - (for-each - (lambda (run) - (let* ((run-name (vector-ref run 0)) - (test (conc (vector-ref run 1) ":" (vector-ref run 3))) - (coldat (hash-table-ref/default data run-name #f))) - (if (not coldat)(let ((newht (make-hash-table))) - (hash-table-set! data run-name newht) - (set! coldat newht))) - (hash-table-set! coldat test run))) - runs) - data)) - -(define (pgdb:get-history-hash runs) - (let* ((data (make-hash-table))) - (for-each - (lambda (run) - (let* ((run-name (vector-ref run 0))) - (hash-table-set! data run-name run))) - runs) - data)) - -(define (pgdb:get-pg-lst tab2-pages) - (let loop ((i 1) - (lst `())) - (cond - ((> i tab2-pages ) - lst) - (else - (loop (+ i 1) (append lst (list i))))))) - ADDED pgdbmod.scm Index: pgdbmod.scm ================================================================== --- /dev/null +++ pgdbmod.scm @@ -0,0 +1,661 @@ +;;====================================================================== +;; Copyright 2019, Matthew Welland. +;; +;; This file is part of Megatest. +;; +;; Megatest is free software: you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation, either version 3 of the License, or +;; (at your option) any later version. +;; +;; Megatest is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with Megatest. If not, see . + +;;====================================================================== + +(declare (unit pgdbmod)) +(declare (uses mtconfigf)) +(declare (uses commonmod)) +(declare (uses mtargs)) + +(module pgdbmod + * + +(import scheme chicken data-structures extras files) +(use posix + (prefix dbi dbi:) + srfi-69 srfi-1 typed-records) + +(import (prefix mtconfigf configf:)) +(import (prefix mtargs args:)) +(import commonmod) + +;; given a configdat lookup the connection info and open the db +;; +(define (pgdb:open configdat #!key (dbname #f)(dbispec #f)) + (let ((pgconf (or dbispec + (args:get-arg "-pgsync") + (if configdat + (configf:lookup configdat "ext-sync" (or dbname "pgdb")) + #f) + ))) + (if pgconf + (let* ((confdat (map (lambda (conf-item) + (let ((parts (string-split conf-item ":"))) + (if (> (length parts) 1) + (let ((key (car parts)) + (val (cadr parts))) + (cons (string->symbol key) val)) + (begin + (print "ERROR: Bad config setting " conf-item ", should be key:val") + `(,(string->symbol (car parts)) . #f))))) + (string-split pgconf))) + (dbtype (string->symbol (or (alist-ref 'dbtype confdat) "pg")))) + (if (alist-ref 'dbtype confdat) + (dbi:open dbtype (alist-delete 'dbtype confdat)))) + #f))) + +;;====================================================================== +;; A R E A S +;;====================================================================== + +(defstruct area id area-name area-path last-update) + +(define (pgdb:add-area dbh area-name area-path) + (dbi:exec dbh "INSERT INTO areas (area_name,area_path) VALUES (?,?)" area-name area-path)) + +(define (pgdb:get-areas dbh) + ;; (map + ;; (lambda (row) + ;; (print "row: " row)) + (dbi:get-rows dbh "SELECT id,area_name,area_path,last_sync FROM areas;")) ;; ) + +;; given an area_path get the area info +;; +(define (pgdb:get-area-by-path dbh area-path) + (dbi:get-one-row dbh "SELECT id,area_name,area_path,last_sync FROM areas WHERE area_path=?;" area-path)) + +(define (pgdb:write-sync-time dbh area-info new-sync-time) + (let ((area-id (vector-ref area-info 0))) + (dbi:exec dbh "UPDATE areas SET last_sync=? WHERE id=?;" new-sync-time area-id))) + +;;====================================================================== +;; T A R G E T S +;;====================================================================== + +;; Given a target-spec, return the id. Should probably handle this with a join... +;; if target-spec not found, create a record for it. +;; +(define (pgdb:get-ttype dbh target-spec) + (let ((spec-id (dbi:get-one dbh "SELECT id FROM ttype WHERE target_spec=?;" target-spec))) + (or spec-id + (if (handle-exceptions + exn + (begin + (print-call-chain) + (debug:print 0 *default-log-port* "ERROR: cannot create ttype entry, " ((condition-property-accessor 'exn 'message) exn)) + #f) + (dbi:exec dbh "INSERT INTO ttype (target_spec) VALUES (?);" target-spec)) + (pgdb:get-ttype dbh target-spec))))) + +;;====================================================================== +;; T A G S +;;====================================================================== + + +(define (pgdb:get-tag-info-by-name dbh tag) + (dbi:get-one-row dbh "SELECT id,tag_name FROM tags where tag_name=?;" tag)) + +(define (pgdb:insert-tag dbh name ) + (dbi:exec dbh "INSERT INTO tags (tag_name) VALUES (?)" name )) + +(define (pgdb:insert-area-tag dbh tag-id area-id ) + (dbi:exec dbh "INSERT INTO area_tags (tag_id, area_id) VALUES (?,?)" tag-id area-id )) + +(define (pgdb:insert-run-tag dbh tag-id run-id ) + (dbi:exec dbh "INSERT INTO run_tags (tag_id, run_id) VALUES (?,?)" tag-id run-id )) + + +(define (pgdb:is-area-taged dbh area-id) + (let ((area-tag-id (dbi:get-one dbh "SELECT id FROM area_tags WHERE area_id=?;" area-id))) + (if area-tag-id + #t + #f))) + +(define (pgdb:is-area-taged-with-a-tag dbh tag-id area-id) + (let ((area-tag-id (dbi:get-one dbh "SELECT id FROM area_tags WHERE area_id=? and tag_id=?;" area-id tag-id))) + (if area-tag-id + #t + #f))) + +(define (pgdb:is-run-taged-with-a-tag dbh tag-id run-id) + (let ((run-tag-id (dbi:get-one dbh "SELECT id FROM run_tags WHERE run_id=? and tag_id=?;" run-id tag-id))) + (if run-tag-id + #t + #f))) + + + +;;====================================================================== +;; R U N S +;;====================================================================== + +;; given a target spec id, target and run-name return the run-id +;; if no run found return #f +;; +(define (pgdb:get-run-id dbh spec-id target run-name area-id) + (dbi:get-one dbh "SELECT id FROM runs WHERE ttype_id=? AND target=? AND run_name=? and area_id=?;" + spec-id target run-name area-id)) + +;; given a target spec id, target and run-name return the run-id +;; if no run found return #f +;; +(define (pgdb:get-run-last-update dbh id ) + (dbi:get-one dbh "SELECT last_update FROM runs WHERE id=?;" + id)) + +;; given a run-id return all the run info +;; +(define (pgdb:get-run-info dbh run-id ) ;; to join ttype or not? + (dbi:get-one-row + dbh ;; 0 1 2 3 4 5 6 7 8 9 10 11 12 + "SELECT id,target,ttype_id,run_name,state,status,owner,event_time,comment,fail_count,pass_count,last_update,area_id + FROM runs WHERE id=? ;" run-id )) + +;; refresh the data in a run record +;; +(define (pgdb:refresh-run-info dbh run-id state status owner event-time comment fail-count pass-count area-id last_update publish-time) ;; area-id) + (dbi:exec + dbh + "UPDATE runs SET + state=?,status=?,owner=?,event_time=?,comment=?,fail_count=?,pass_count=?,last_update=?,publish_time=? + WHERE id=? and area_id=?;" + state status owner event-time comment fail-count pass-count last_update publish-time run-id area-id )) + +;; given all needed info create run record +;; +(define (pgdb:insert-run dbh ttype-id target run-name state status owner event-time comment fail-count pass-count area-id last-update publish-time) + (dbi:exec + dbh + "INSERT INTO runs (ttype_id,target,run_name,state,status,owner,event_time,comment,fail_count,pass_count,area_id,last_update,publish_time) + VALUES (?,?,?,?,?,?,?,?,?,?,?,?, ?);" + ttype-id target run-name state status owner event-time comment fail-count pass-count area-id last-update publish-time)) + +;;====================================================================== +;; T E S T - S T E P S +;;====================================================================== + +(define (pgdb:get-test-step-id dbh test-id stepname state) + (dbi:get-one + dbh + "SELECT id FROM test_steps WHERE test_id=? AND stepname=? and state = ? ;" + test-id stepname state)) + +(define (pgdb:get-test-step-last-update dbh id ) + (dbi:get-one + dbh + "SELECT last_update FROM test_steps WHERE id=? ;" + id)) + +(define (pgdb:insert-test-step dbh test-id stepname state status event_time comment logfile last-update ) + (dbi:exec + dbh + "INSERT INTO test_steps (test_id,stepname,state,status,event_time,logfile,comment,last_update) + VALUES (?,?,?,?,?,?,?,? );" + test-id stepname state status event_time logfile comment last-update)) + +(define (pgdb:update-test-step dbh step-id test-id stepname state status event_time comment logfile last-update) + (dbi:exec + dbh + "UPDATE test_steps SET + test_id=?,stepname=?,state=?,status=?,event_time=?,logfile=?,comment=?,last_update=? + WHERE id=?;" + test-id stepname state status event_time logfile comment last-update step-id)) + + +;;====================================================================== +;; T E S T - D A T A +;;====================================================================== + +(define (pgdb:get-test-data-id dbh test-id category variable) + (dbi:get-one + dbh + "SELECT id FROM test_data WHERE test_id=? AND category=? and variable = ? ;" + test-id category variable)) + +(define (pgdb:get-test-data-last-update dbh test-data-id ) + (dbi:get-one + dbh + "SELECT last_update FROM test_data WHERE id=? ;" + test-data-id)) + +(define (pgdb:insert-test-data dbh test-id category variable value expected tol units comment status type last-update) + ; (print "INSERT INTO test_data (test_id, category, variable, value, expected, tol, units, comment, status, type) + ; VALUES (?,?,?,?,?,?,?,?,?,?) " test-id " " category " " variable " " value " " expected " " tol " " units " " comment " " status " " type) + (if (not (string? units)) + (set! units "" )) + (if (not (string? variable)) + (set! variable "" )) + (if (not (real? value)) + (set! value 0 )) + (if (not (real? expected)) + (set! expected 0 )) +(if (not (real? tol)) + (set! tol 0 )) + + (dbi:exec + dbh + "INSERT INTO test_data (test_id, category, variable, value, expected, tol, units, comment, status, type, last_update) + VALUES (?,?,?,?,?,?,?,?,?,?, ?);" + test-id category variable value expected tol units comment status type last-update)) + +(define (pgdb:update-test-data dbh data-id test-id category variable value expected tol units comment status type last-update) + (dbi:exec + dbh + "UPDATE test_data SET + test_id=?, category=?, variable=?, value=?, expected=?, tol=?, units=?, comment=?, status=?, type=?, last_update=? + WHERE id=?;" + test-id category variable value expected tol units comment status type last-update data-id )) + + + +;;====================================================================== +;; T E S T S +;;====================================================================== + +;; given run-id, test_name and item_path return test-id +;; +(define (pgdb:get-test-id dbh run-id test-name item-path) + (dbi:get-one + dbh + "SELECT id FROM tests WHERE run_id=? AND test_name=? AND item_path=?;" + run-id test-name item-path)) + +(define (pgdb:get-test-last-update dbh id) + (dbi:get-one + dbh + "SELECT last_update FROM tests WHERE id=? ;" + id )) + + +;; create new test record +;; +(define (pgdb:insert-test dbh run-id test-name item-path state status host cpuload diskfree uname run-dir log-file run-duration comment event-time archived last-update pid) + (dbi:exec + dbh + "INSERT INTO tests (run_id,test_name,item_path,state,status,host,cpuload,diskfree,uname,rundir,final_logf,run_duration,comment,event_time,archived,last_update,attemptnum) + VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?);" + + run-id test-name item-path state status host cpuload diskfree uname + run-dir log-file run-duration comment event-time archived last-update pid)) + +;; update existing test record +;; +(define (pgdb:update-test dbh test-id run-id test-name item-path state status host cpuload diskfree uname run-dir log-file run-duration comment event-time archived last-update pid) + (dbi:exec + dbh + "UPDATE tests SET + run_id=?,test_name=?,item_path=?,state=?,status=?,host=?,cpuload=?,diskfree=?,uname=?,rundir=?,final_logf=?,run_duration=?,comment=?,event_time=?,archived=?,last_update=?,attemptnum=? + WHERE id=?;" + + run-id test-name item-path state status host cpuload diskfree uname + run-dir log-file run-duration comment event-time archived last-update pid test-id)) + +(define (pgdb:get-tests dbh target-patt) + (dbi:get-rows + dbh + "SELECT t.id,t.run_id,t.test_name,t.item_path,t.state,t.status,t.host,t.cpuload,t.diskfree,t.uname,t.rundir,t.final_logf,t.run_duration,t.comment,t.event_time,t.archived, + r.id,r.target,r.ttype_id,r.run_name,r.state,r.status,r.owner,r.event_time,r.comment + FROM tests AS t INNER JOIN runs AS r ON t.run_id=r.id + WHERE r.target LIKE ?;" target-patt)) + +(define (pgdb:get-stats-given-type-target dbh ttype-id target-patt) + (dbi:get-rows + dbh + ;; "SELECT COUNT(t.id),t.status,r.target FROM tests AS t INNER JOIN runs AS r ON t.run_id=r.id + ;; WHERE t.state='COMPLETED' AND ttype_id=? AND r.target LIKE ? GROUP BY r.target,t.status;" + "SELECT r.target,COUNT(*) AS total, + SUM(CASE WHEN t.status='PASS' THEN 1 ELSE 0 END) AS pass, + SUM(CASE WHEN t.status='FAIL' THEN 1 ELSE 0 END) AS fail, + SUM(CASE WHEN t.status IN ('PASS','FAIL') THEN 0 ELSE 1 END) AS other + FROM tests AS t INNER JOIN runs AS r ON t.run_id=r.id + WHERE t.state='COMPLETED' AND ttype_id=? AND r.target LIKE ? GROUP BY r.target;" + ttype-id target-patt)) + +(define (pgdb:get-stats-given-target dbh target-patt) + (dbi:get-rows + dbh + ;; "SELECT COUNT(t.id),t.status,r.target FROM tests AS t INNER JOIN runs AS r ON t.run_id=r.id + ;; WHERE t.state='COMPLETED' AND ttype_id=? AND r.target LIKE ? GROUP BY r.target,t.status;" + "SELECT r.target,COUNT(*) AS total, + SUM(CASE WHEN t.status='PASS' THEN 1 ELSE 0 END) AS pass, + SUM(CASE WHEN t.status='FAIL' THEN 1 ELSE 0 END) AS fail, + SUM(CASE WHEN t.status IN ('PASS','FAIL') THEN 0 ELSE 1 END) AS other + FROM tests AS t INNER JOIN runs AS r ON t.run_id=r.id + WHERE t.state='COMPLETED' AND r.target LIKE ? GROUP BY r.target;" + target-patt)) + + +(define (pgdb:get-latest-run-stats-given-target dbh ttype-id target-patt limit offset) + (dbi:get-rows + dbh + ;; "SELECT COUNT(t.id),t.status,r.target FROM tests AS t INNER JOIN runs AS r ON t.run_id=r.id + ;; WHERE t.state='COMPLETED' AND ttype_id=? AND r.target LIKE ? GROUP BY r.target,t.status;" + "SELECT r.target, r.event_time, COUNT(*) AS total, + SUM(CASE WHEN t.status='PASS' THEN 1 ELSE 0 END) AS pass, + SUM(CASE WHEN t.status='FAIL' THEN 1 ELSE 0 END) AS fail, + SUM(CASE WHEN t.status IN ('PASS','FAIL') THEN 0 ELSE 1 END) AS other, r.id + FROM tests AS t INNER JOIN runs AS r ON t.run_id=r.id + WHERE t.state like '%' AND ttype_id=? AND r.target LIKE ? + and r.id in + (SELECT DISTINCT on (target) id from runs where target like ? AND ttype_id=? order by target,event_time desc) + GROUP BY r.target,r.id + order by r.event_time desc limit ? offset ? ;" + ttype-id target-patt target-patt ttype-id limit offset)) + +(define (pgdb:get-latest-run-stats-given-pattern dbh patt limit offset) + (dbi:get-rows + dbh + ;; "SELECT COUNT(t.id),t.status,r.target FROM tests AS t INNER JOIN runs AS r ON t.run_id=r.id + ;; WHERE t.state='COMPLETED' AND ttype_id=? AND r.target ILIKE ? GROUP BY r.target,t.status;" + "SELECT r.target, r.event_time, COUNT(*) AS total, + SUM(CASE WHEN t.status='PASS' THEN 1 ELSE 0 END) AS pass, + SUM(CASE WHEN t.status='FAIL' THEN 1 ELSE 0 END) AS fail, + SUM(CASE WHEN t.status IN ('PASS','FAIL') THEN 0 ELSE 1 END) AS other, r.id + FROM tests AS t INNER JOIN runs AS r ON t.run_id=r.id + WHERE t.state like '%' AND r.target ILIKE ? + and r.id in + (SELECT DISTINCT on (target) id from runs where target ilike ? order by target,event_time desc) + GROUP BY r.target,r.id + order by r.event_time desc limit ? offset ? ;" + patt patt limit offset)) + + +(define (pgdb:get-count-data-stats-target-latest dbh ttype-id target-patt) + (dbi:get-rows + dbh + "SELECT count(*) from + (SELECT DISTINCT on (target) id + from runs where target like ? AND ttype_id = ? + order by target, event_time desc + ) as x;" + target-patt ttype-id)) + +(define (pgdb:get-latest-run-cnt dbh ttype-id target-patt) + (let* ((cnt-result (pgdb:get-count-data-stats-target-latest dbh ttype-id target-patt)) + ;(cnt-row (car (cnt-result))) + (cnt 0) + ) + (for-each + (lambda (row) + (set! cnt (vector-ref row 0 ))) + cnt-result) + +cnt)) + +(define (pgdb:get-count-data-stats-latest-pattern dbh patt) + (dbi:get-rows + dbh + "SELECT count(*) from + (SELECT DISTINCT on (target) id + from runs where target ilike ? + order by target, event_time desc + ) as x;" + patt)) + +(define (pgdb:get-latest-run-cnt-by-pattern dbh target-patt) + (let* ((cnt-result (pgdb:get-count-data-stats-latest-pattern dbh target-patt)) + ;(cnt-row (car (cnt-result))) + (cnt 0) + ) + (for-each + (lambda (row) + (set! cnt (vector-ref row 0 ))) + cnt-result) + +cnt)) + + + + + +(define (pgdb:get-run-stats-history-given-target dbh ttype-id target-patt) + (dbi:get-rows + dbh + ;; "SELECT COUNT(t.id),t.status,r.target FROM tests AS t INNER JOIN runs AS r ON t.run_id=r.id + ;; WHERE t.state='COMPLETED' AND ttype_id=? AND r.target LIKE ? GROUP BY r.target,t.status;" + "SELECT r.run_name,COUNT(*) AS total, + SUM(CASE WHEN t.status='PASS' THEN 1 ELSE 0 END) AS pass, + SUM(CASE WHEN t.status='FAIL' THEN 1 ELSE 0 END) AS fail, + SUM(CASE WHEN t.status IN ('PASS','FAIL') THEN 0 ELSE 1 END) AS other + FROM tests AS t INNER JOIN runs AS r ON t.run_id=r.id + WHERE t.state like '%' AND ttype_id=? AND r.target LIKE ? + GROUP BY r.run_name;" + ttype-id target-patt )) + +(define (pgdb:get-all-run-stats-target-slice dbh target-patt limit offset) + (dbi:get-rows + dbh + "SELECT r.target, r.run_name,r.event_time, COUNT(*) AS total, + SUM(CASE WHEN t.status='PASS' THEN 1 ELSE 0 END) AS pass, + SUM(CASE WHEN t.status='FAIL' THEN 1 ELSE 0 END) AS fail, + SUM(CASE WHEN t.status IN ('PASS','FAIL') THEN 0 ELSE 1 END) AS other + FROM tests AS t INNER JOIN runs AS r ON t.run_id=r.id + WHERE r.target LIKE ? + GROUP BY r.target,r.run_name, r.event_time + order by r.target,r.event_time desc limit ? offset ? ;" + target-patt limit offset)) + + +(define (pgdb:get-count-data-stats-target-slice dbh target-patt) + (dbi:get-rows + dbh + "SELECT count(*) from (SELECT r.target, r.run_name,r.event_time, COUNT(*) AS total + FROM tests AS t INNER JOIN runs AS r ON t.run_id=r.id + WHERE r.target LIKE ? + GROUP BY r.target,r.run_name, r.event_time + ) as x;" + target-patt)) + +(define (pgdb:get-slice-cnt dbh target-patt) + (let* ((cnt-result (pgdb:get-count-data-stats-target-slice dbh target-patt)) + ;(cnt-row (car (cnt-result))) + (cnt 0) + ) + (for-each + (lambda (row) + (set! cnt (vector-ref row 0 ))) + cnt-result) + +cnt)) + + +(define (pgdb:get-target-types dbh) + (dbi:get-rows dbh "SELECT id,target_spec FROM ttype;")) + + (define (pgdb:get-distict-target-slice dbh) + (dbi:get-rows dbh " select distinct on (split_part (target, '/', 1)) (split_part (target, '/', 1)) from runs;")) + + (define (pgdb:get-distict-target-slice3 dbh) + (dbi:get-rows dbh " select distinct on (split_part (target, '/', 3)) (split_part (target, '/', 3)) from runs;")) +;; +(define (pgdb:get-targets dbh target-patt) + (let ((ttypes (pgdb:get-target-types dbh))) + (map + (lambda (ttype-dat) + (let ((tt-id (vector-ref ttype-dat 0)) + (ttype (vector-ref ttype-dat 1))) + (cons ttype + (dbi:get-rows + dbh + "SELECT DISTINCT target FROM runs WHERE target LIKE ? AND ttype_id=?;" target-patt tt-id)) + )) + ttypes))) + +(define (pgdb:get-targets-of-type dbh ttype-id target-patt) + (dbi:get-rows dbh "SELECT DISTINCT target FROM runs WHERE target LIKE ? AND ttype_id=?;" target-patt ttype-id)) + +(define (pgdb:get-runs-by-target dbh targets run-patt) + (dbi:get-rows dbh "SELECT r.run_name, t.test_name, t.status, t.item_path, t.id, t.rundir, t.final_logf FROM runs as r INNER JOIN tests AS t ON t.run_id=r.id + WHERE t.state='COMPLETED' AND r.target like ? AND r.run_name like ?;" targets run-patt) +) + +(define (pgdb:get-test-by-id dbh id) + (dbi:get-rows dbh "SELECT t.test_name, t.item_path, t.rundir, t.final_logf FROM runs as r INNER JOIN tests AS t ON t.run_id=r.id + WHERE t.id = ?;" id) +) + +;;====================================================================== +;; V A R I O U S D A T A M A S S A G E R O U T I N E S +;;====================================================================== + +;; probably want to move these to a different model file + +;; create a hash of hashes with keys extracted from all-parts +;; using row-or-col to choose row or column +;; ht{row key}=>ht{col key}=>data +;; +;; fnum is the field number in the tuples to be split +;; + +(define (pgdb:mk-pattern dot type bp rel) + (let* ((typ (if (equal? type "all") + "%" + type)) + (dotprocess (if (equal? dot "all") + "%" + dot)) + (rel-num (if (equal? rel "") + "%" + rel)) + (pattern (conc "%/" bp "/" dotprocess "/" typ "_" rel-num))) +pattern)) + +(define (pgdb:coalesce-runs dbh runs all-parts row-or-col fnum) + (let* ((data (make-hash-table))) + + (for-each + (lambda (run) + (let* ((target (vector-ref run fnum)) + (parts (string-split target "/")) + (first (car parts)) + (rest (string-intersperse (cdr parts) "/")) + (coldat (hash-table-ref/default data first #f))) + (if (not coldat)(let ((newht (make-hash-table))) + (hash-table-set! data first newht) + (set! coldat newht))) + (hash-table-set! coldat rest run))) + runs) + data)) + + +(define (pgdb:coalesce-runs1 runs ) + (let* ((data (make-hash-table))) + + (for-each + (lambda (run) + (let* ((target (vector-ref run 0)) + (parts (string-split target "/")) + (first (car parts)) + (rest (string-intersperse (cdr parts) "/")) + (coldat (hash-table-ref/default data first #f))) + (if (not coldat)(let ((newht (make-hash-table))) + (hash-table-set! data first newht) + (set! coldat newht))) + (hash-table-set! coldat rest run))) + runs) + data)) + +;; given ordered data hash return a-keys +;; +(define (pgdb:ordered-data->a-keys ordered-data) + (sort (hash-table-keys ordered-data) string>=?)) + +;; given ordered data hash return b-keys +;; +(define (pgdb:ordered-data->b-keys ordered-data a-keys) + (delete-duplicates + (sort (apply + append + (map (lambda (sub-key) + (let ((subdat (hash-table-ref ordered-data sub-key))) + (hash-table-keys subdat))) + a-keys)) + string>=?))) + +;; given ordered data hash return a-keys +;; +(define (pgdb:ordered-data->a-keys ordered-data) + (sort (hash-table-keys ordered-data) string>=?)) + +;; given ordered data hash return b-keys +;; +(define (pgdb:ordered-data->b-keys ordered-data a-keys) + (delete-duplicates + (sort (apply + append + (map (lambda (sub-key) + (let ((subdat (hash-table-ref ordered-data sub-key))) + (hash-table-keys subdat))) + a-keys)) + string>=?))) + +(define (pgdb:coalesce-runs-by-slice runs slice) + (let* ((data (make-hash-table))) + (for-each + (lambda (run) + (let* ((target (vector-ref run 0)) + (run-name (vector-ref run 1)) + (parts (string-split target "/")) + (first (car parts)) + (rest (string-intersperse (cdr parts) "/")) + (coldat (hash-table-ref/default data rest #f))) + (if (not coldat)(let ((newht (make-hash-table))) + (hash-table-set! data rest newht) + (set! coldat newht))) + (hash-table-set! coldat run-name run))) + runs) + data)) + + +(define (pgdb:runs-to-hash runs ) + (let* ((data (make-hash-table))) + (for-each + (lambda (run) + (let* ((run-name (vector-ref run 0)) + (test (conc (vector-ref run 1) ":" (vector-ref run 3))) + (coldat (hash-table-ref/default data run-name #f))) + (if (not coldat)(let ((newht (make-hash-table))) + (hash-table-set! data run-name newht) + (set! coldat newht))) + (hash-table-set! coldat test run))) + runs) + data)) + +(define (pgdb:get-history-hash runs) + (let* ((data (make-hash-table))) + (for-each + (lambda (run) + (let* ((run-name (vector-ref run 0))) + (hash-table-set! data run-name run))) + runs) + data)) + +(define (pgdb:get-pg-lst tab2-pages) + (let loop ((i 1) + (lst `())) + (cond + ((> i tab2-pages ) + lst) + (else + (loop (+ i 1) (append lst (list i))))))) + + + +) + Index: process-inc.scm ================================================================== --- process-inc.scm +++ process-inc.scm @@ -16,207 +16,5 @@ ;; You should have received a copy of the GNU General Public License ;; along with Megatest. If not, see . ;;====================================================================== -;;====================================================================== -;; Process convience utils -;;====================================================================== - -(define (process:conservative-read port) - (let loop ((res "")) - (if (not (eof-object? (peek-char port))) - (loop (conc res (read-char port))) - res))) - -(define (process:cmd-run-with-stderr->list cmd . params) - ;; (print "Called with cmd=" cmd ", proc=" proc ", params=" params) -;; (handle-exceptions -;; exn -;; (begin -;; (print "ERROR: Failed to run command: " cmd " " (string-intersperse params " ")) -;; (print " " ((condition-property-accessor 'exn 'message) exn)) -;; #f) - (let-values (((fh fho pid fhe) (if (null? params) - (process* cmd) - (process* cmd params)))) - (let loop ((curr (read-line fh)) - (result '())) - (let ((errstr (process:conservative-read fhe))) - (if (not (string=? errstr "")) - (set! result (append result (list errstr))))) - (if (not (eof-object? curr)) - (loop (read-line fh) - (append result (list curr))) - (begin - (close-input-port fh) - (close-input-port fhe) - (close-output-port fho) - result))))) ;; ) - -(define (process:cmd-run-with-stderr-and-exitcode->list cmd . params) - ;; (print "Called with cmd=" cmd ", proc=" proc ", params=" params) -;; (handle-exceptions -;; exn -;; (begin -;; (print "ERROR: Failed to run command: " cmd " " (string-intersperse params " ")) -;; (print " " ((condition-property-accessor 'exn 'message) exn)) -;; #f) - (let-values (((fh fho pid fhe) (if (null? params) - (process* cmd) - (process* cmd params)))) - (let loop ((curr (read-line fh)) - (result '())) - (let ((errstr (process:conservative-read fhe))) - (if (not (string=? errstr "")) - (set! result (append result (list errstr))))) - (if (not (eof-object? curr)) - (loop (read-line fh) - (append result (list curr))) - (begin - (let-values (((anotherpid normalexit? exitstatus) (process-wait pid))) - (close-input-port fh) - (close-input-port fhe) - (close-output-port fho) - (list result (if normalexit? exitstatus -1)))))))) - -(define (process:cmd-run-proc-each-line cmd proc . params) - ;; (print "Called with cmd=" cmd ", proc=" proc ", params=" params) - (handle-exceptions - exn - (begin - (print "ERROR: Failed to run command: " cmd " " (string-intersperse params " ")) - (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) - (debug:print 5 *default-log-port* "exn=" (condition->list exn)) - #f) - (let-values (((fh fho pid) (if (null? params) - (process cmd) - (process cmd params)))) - (let loop ((curr (read-line fh)) - (result '())) - (if (not (eof-object? curr)) - (loop (read-line fh) - (append result (list (proc curr)))) - (begin - (close-input-port fh) - ;;(close-input-port fhe) - (close-output-port fho) - result)))))) - -(define (process:cmd-run-proc-each-line-alt cmd proc) - (let* ((fh (open-input-pipe cmd)) - (res (port-proc->list fh proc)) - (status (close-input-pipe fh))) - (if (eq? status 0) res #f))) - -(define (process:cmd-run->list cmd #!key (delta-env-alist-or-hash-table '())) - (common:with-env-vars - delta-env-alist-or-hash-table - (lambda () - (let* ((fh (open-input-pipe cmd)) - (res (port->list fh)) - (status (close-input-pipe fh))) - (list res status))))) - -(define (port->list fh) - (if (eof-object? fh) #f - (let loop ((curr (read-line fh)) - (result '())) - (if (not (eof-object? curr)) - (loop (read-line fh) - (append result (list curr))) - result)))) - -(define (port-proc->list fh proc) - (if (eof-object? fh) #f - (let loop ((curr (proc (read-line fh))) - (result '())) - (if (not (eof-object? curr)) - (loop (let ((l (read-line fh))) - (if (eof-object? l) l (proc l))) - (append result (list curr))) - result)))) - -;; here is an example line where the shell is sh or bash -;; "find / -print 2&>1 > findall.log" -(define (run-n-wait cmdline #!key (params #f)(print-cmd #f)(run-dir #f)) - (if print-cmd - (debug:print 0 *default-log-port* - (if (string? print-cmd) - print-cmd - "") - (if run-dir (conc "Run in " run-dir ";") "") - cmdline - (if params - (conc " " (string-intersperse params " ")) - ""))) - (if (and run-dir - (directory-exists? run-dir)) - (push-directory run-dir)) - (let ((pid (if params - (process-run cmdline params) - (process-run cmdline)))) - (let loop ((i 0)) - (let-values (((pid-val exit-status exit-code) (process-wait pid #t))) - (if (eq? pid-val 0) - (begin - (thread-sleep! 2) - (loop (+ i 1))) - (begin - (if (and run-dir - (directory-exists? run-dir)) - (pop-directory)) - (values pid-val exit-status exit-code))))))) - -;;====================================================================== -;; MISC PROCESS RELATED STUFF -;;====================================================================== - -(define (process:children proc) - (with-input-from-pipe - (conc "ps h --ppid " (current-process-id) " -o pid") - (lambda () - (let loop ((inl (read-line)) - (res '())) - (if (eof-object? inl) - (reverse res) - (let ((pid (string->number inl))) - (if proc (proc pid)) - (loop (read-line) (cons pid res)))))))) - -(define (process:alive? pid) - (handle-exceptions - exn - ;; possibly pid is a process not a child, look in /proc to see if it is running still - (common:file-exists? (conc "/proc/" pid)) - (let-values (((rpid exit-type exit-signal)(process-wait pid #t))) - (and (number? rpid) - (equal? rpid pid))))) - -(define (process:alive-on-host? host pid) - (let ((cmd (conc "ssh " host " ps -o pid= -p " pid))) - (handle-exceptions - exn - #f ;; anything goes wrong - assume the process in NOT running. - (with-input-from-pipe - cmd - (lambda () - (let loop ((inl (read-line))) - (if (eof-object? inl) - #f - (let* ((clean-str (string-substitute "^[^\\d]*([0-9]+)[^\\d]*$" "\\1" inl)) - (innum (string->number clean-str))) - (and innum - (eq? pid innum)))))))))) - -(define (process:get-sub-pids pid) - (with-input-from-pipe - (conc "pstree -A -p " pid) ;; | tr 'a-z\\-+`()\\.' ' ' " pid) - (lambda () - (let loop ((inl (read-line)) - (res '())) - (if (eof-object? inl) - (reverse res) - (let ((nums (map string->number - (string-split-fields "\\d+" inl)))) - (loop (read-line) - (append res nums)))))))) Index: processmod.scm ================================================================== --- processmod.scm +++ processmod.scm @@ -17,260 +17,18 @@ ;; along with Megatest. If not, see . ;;====================================================================== (declare (unit processmod)) -;; (declare (uses commonmod)) (module processmod * (import scheme chicken data-structures extras) (import (prefix sqlite3 sqlite3:) posix typed-records srfi-18 srfi-69 format ports srfi-1 matchable regex directory-utils) -;; (import commonmod) -;; (use (prefix ulex ulex:)) - -;; (include "common_records.scm") - -;; -;; -;; ;; accept an alist or hash table containing envvar/env value pairs (value of #f causes unset) -;; ;; execute thunk in context of environment modified as per this list -;; ;; restore env to prior state then return value of eval'd thunk. -;; ;; ** this is not thread safe ** -;; (define (common:with-env-vars delta-env-alist-or-hash-table thunk) -;; (let* ((delta-env-alist (if (hash-table? delta-env-alist-or-hash-table) -;; (hash-table->alist delta-env-alist-or-hash-table) -;; delta-env-alist-or-hash-table)) -;; (restore-thunks -;; (filter -;; identity -;; (map (lambda (env-pair) -;; (let* ((env-var (car env-pair)) -;; (new-val (let ((tmp (cdr env-pair))) -;; (if (list? tmp) (car tmp) tmp))) -;; (current-val (get-environment-variable env-var)) -;; (restore-thunk -;; (cond -;; ((not current-val) (lambda () (unsetenv env-var))) -;; ((not (string? new-val)) #f) -;; ((eq? current-val new-val) #f) -;; (else -;; (lambda () (setenv env-var current-val)))))) -;; ;;(when (not (string? new-val)) -;; ;; (debug:print 0 *default-log-port* " PROBLEM: not a string: "new-val"\n from env-alist:\n"delta-env-alist) -;; ;; (pp delta-env-alist) -;; ;; (exit 1)) -;; -;; -;; (cond -;; ((not new-val) ;; modify env here -;; (unsetenv env-var)) -;; ((string? new-val) -;; (setenv env-var new-val))) -;; restore-thunk)) -;; delta-env-alist)))) -;; (let ((rv (thunk))) -;; (for-each (lambda (x) (x)) restore-thunks) ;; restore env to original state -;; rv))) -;; -;; (define (process:conservative-read port) -;; (let loop ((res "")) -;; (if (not (eof-object? (peek-char port))) -;; (loop (conc res (read-char port))) -;; res))) -;; -;; (define (process:cmd-run-with-stderr->list cmd . params) -;; ;; (print "Called with cmd=" cmd ", proc=" proc ", params=" params) -;; ;; (handle-exceptions -;; ;; exn -;; ;; (begin -;; ;; (print "ERROR: Failed to run command: " cmd " " (string-intersperse params " ")) -;; ;; (print " " ((condition-property-accessor 'exn 'message) exn)) -;; ;; #f) -;; (let-values (((fh fho pid fhe) (if (null? params) -;; (process* cmd) -;; (process* cmd params)))) -;; (let loop ((curr (read-line fh)) -;; (result '())) -;; (let ((errstr (process:conservative-read fhe))) -;; (if (not (string=? errstr "")) -;; (set! result (append result (list errstr))))) -;; (if (not (eof-object? curr)) -;; (loop (read-line fh) -;; (append result (list curr))) -;; (begin -;; (close-input-port fh) -;; (close-input-port fhe) -;; (close-output-port fho) -;; result))))) ;; ) -;; -;; (define (process:cmd-run-with-stderr-and-exitcode->list cmd . params) -;; ;; (print "Called with cmd=" cmd ", proc=" proc ", params=" params) -;; ;; (handle-exceptions -;; ;; exn -;; ;; (begin -;; ;; (print "ERROR: Failed to run command: " cmd " " (string-intersperse params " ")) -;; ;; (print " " ((condition-property-accessor 'exn 'message) exn)) -;; ;; #f) -;; (let-values (((fh fho pid fhe) (if (null? params) -;; (process* cmd) -;; (process* cmd params)))) -;; (let loop ((curr (read-line fh)) -;; (result '())) -;; (let ((errstr (process:conservative-read fhe))) -;; (if (not (string=? errstr "")) -;; (set! result (append result (list errstr))))) -;; (if (not (eof-object? curr)) -;; (loop (read-line fh) -;; (append result (list curr))) -;; (begin -;; (let-values (((anotherpid normalexit? exitstatus) (process-wait pid))) -;; (close-input-port fh) -;; (close-input-port fhe) -;; (close-output-port fho) -;; (list result (if normalexit? exitstatus -1)))))))) -;; -;; (define (process:cmd-run-proc-each-line cmd proc . params) -;; ;; (print "Called with cmd=" cmd ", proc=" proc ", params=" params) -;; (handle-exceptions -;; exn -;; (begin -;; (print "ERROR: Failed to run command: " cmd " " (string-intersperse params " ")) -;; (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) -;; (debug:print 5 *default-log-port* "exn=" (condition->list exn)) -;; #f) -;; (let-values (((fh fho pid) (if (null? params) -;; (process cmd) -;; (process cmd params)))) -;; (let loop ((curr (read-line fh)) -;; (result '())) -;; (if (not (eof-object? curr)) -;; (loop (read-line fh) -;; (append result (list (proc curr)))) -;; (begin -;; (close-input-port fh) -;; ;; (close-input-port fhe) -;; (close-output-port fho) -;; result)))))) -;; -;; (define (process:cmd-run-proc-each-line-alt cmd proc) -;; (let* ((fh (open-input-pipe cmd)) -;; (res (port-proc->list fh proc)) -;; (status (close-input-pipe fh))) -;; (if (eq? status 0) res #f))) -;; -;; (define (process:cmd-run->list cmd #!key (delta-env-alist-or-hash-table '())) -;; (common:with-env-vars -;; delta-env-alist-or-hash-table -;; (lambda () -;; (let* ((fh (open-input-pipe cmd)) -;; (res (port->list fh)) -;; (status (close-input-pipe fh))) -;; (list res status))))) -;; -;; (define (port->list fh) -;; (if (eof-object? fh) #f -;; (let loop ((curr (read-line fh)) -;; (result '())) -;; (if (not (eof-object? curr)) -;; (loop (read-line fh) -;; (append result (list curr))) -;; result)))) -;; -;; (define (port-proc->list fh proc) -;; (if (eof-object? fh) #f -;; (let loop ((curr (proc (read-line fh))) -;; (result '())) -;; (if (not (eof-object? curr)) -;; (loop (let ((l (read-line fh))) -;; (if (eof-object? l) l (proc l))) -;; (append result (list curr))) -;; result)))) -;; -;; ;; here is an example line where the shell is sh or bash -;; ;; "find / -print 2&>1 > findall.log" -;; (define (run-n-wait cmdline #!key (params #f)(print-cmd #f)(run-dir #f)) -;; (if print-cmd -;; (debug:print 0 *default-log-port* -;; (if (string? print-cmd) -;; print-cmd -;; "") -;; (if run-dir (conc "Run in " run-dir ";") "") -;; cmdline -;; (if params -;; (conc " " (string-intersperse params " ")) -;; ""))) -;; (if (and run-dir -;; (directory-exists? run-dir)) -;; (push-directory run-dir)) -;; (let ((pid (if params -;; (process-run cmdline params) -;; (process-run cmdline)))) -;; (let loop ((i 0)) -;; (let-values (((pid-val exit-status exit-code) (process-wait pid #t))) -;; (if (eq? pid-val 0) -;; (begin -;; (thread-sleep! 2) -;; (loop (+ i 1))) -;; (begin -;; (if (and run-dir -;; (directory-exists? run-dir)) -;; (pop-directory)) -;; (values pid-val exit-status exit-code))))))) -;; -;; ;;====================================================================== -;; ;; MISC PROCESS RELATED STUFF -;; ;;====================================================================== -;; -;; (define (process:children proc) -;; (with-input-from-pipe -;; (conc "ps h --ppid " (current-process-id) " -o pid") -;; (lambda () -;; (let loop ((inl (read-line)) -;; (res '())) -;; (if (eof-object? inl) -;; (reverse res) -;; (let ((pid (string->number inl))) -;; (if proc (proc pid)) -;; (loop (read-line) (cons pid res)))))))) -;; -;; (define (process:alive? pid) -;; (handle-exceptions -;; exn -;; ;; possibly pid is a process not a child, look in /proc to see if it is running still -;; (file-exists? (conc "/proc/" pid)) -;; (let-values (((rpid exit-type exit-signal)(process-wait pid #t))) -;; (and (number? rpid) -;; (equal? rpid pid))))) -;; -;; (define (process:alive-on-host? host pid) -;; (let ((cmd (conc "ssh " host " ps -o pid= -p " pid))) -;; (handle-exceptions -;; exn -;; #f ;; anything goes wrong - assume the process in NOT running. -;; (with-input-from-pipe -;; cmd -;; (lambda () -;; (let loop ((inl (read-line))) -;; (if (eof-object? inl) -;; #f -;; (let* ((clean-str (string-substitute "^[^\\d]*([0-9]+)[^\\d]*$" "\\1" inl)) -;; (innum (string->number clean-str))) -;; (and innum -;; (eq? pid innum)))))))))) -;; -;; (define (process:get-sub-pids pid) -;; (with-input-from-pipe -;; (conc "pstree -A -p " pid) ;; | tr 'a-z\\-+`()\\.' ' ' " pid) -;; (lambda () -;; (let loop ((inl (read-line)) -;; (res '())) -;; (if (eof-object? inl) -;; (reverse res) -;; (let ((nums (map string->number -;; (string-split-fields "\\d+" inl)))) -;; (loop (read-line) -;; (append res nums)))))))) + +;;====================================================================== +;; supporting functions +;;====================================================================== ) Index: rmt-inc.scm ================================================================== --- rmt-inc.scm +++ rmt-inc.scm @@ -15,954 +15,5 @@ ;; ;; You should have received a copy of the GNU General Public License ;; along with Megatest. If not, see . ;; ;;====================================================================== - -;; -;; THESE ARE ALL CALLED ON THE CLIENT SIDE!!! -;; - -;; generate entries for ~/.megatestrc with the following -;; -;; grep define ../rmt.scm | grep rmt: |perl -pi -e 's/\(define\s+\((\S+)\W.*$/\1/'|sort -u - -;; Globally used variables - -;; db stats -(define *db-stats* (make-hash-table)) ;; hash of vectors < count duration-total > -(define *db-stats-mutex* (make-mutex)) - - -(defstruct remote - (hh-dat (common:get-homehost)) ;; homehost record ( addr . hhflag ) - (server-url (if *toppath* (server:check-if-running *toppath*))) ;; (server:check-if-running *toppath*) #f)) - (last-server-check 0) ;; last time we checked to see if the server was alive - (conndat #f) - (transport *transport-type*) - (server-timeout (server:expiration-timeout)) - (force-server #f) - (ro-mode #f) - (ro-mode-checked #f)) ;; flag that indicates we have checked for ro-mode - -;; launching and hosts -(defstruct host - (reachable #f) - (last-update 0) - (last-used 0) - (last-cpuload 1)) - - -;;====================================================================== -;; S U P P O R T F U N C T I O N S -;;====================================================================== - -;; if a server is either running or in the process of starting call client:setup -;; else return #f to let the calling proc know that there is no server available -;; -(define (rmt:get-connection-info areapath #!key (area-dat #f)) ;; TODO: push areapath down. - (let* ((runremote (or area-dat *runremote*)) - (cinfo (if (remote? runremote) - (remote-conndat runremote) - #f))) - (if cinfo - cinfo - (if (server:check-if-running areapath) - (client:setup areapath) - #f)))) - -(define *send-receive-mutex* (make-mutex)) ;; should have separate mutex per run-id - -(define (rmt:send-receive cmd rid params #!key (attemptnum 1)(area-dat #f)) ;; start attemptnum at 1 so the modulo below works as expected - (rmt:open-qry-close-locally cmd 0 params)) -;; -;; ;; #;(common:telemetry-log (conc "rmt:"(->string cmd)) -;; ;; #;(define (rmt:send-receive cmd rid params #!key (attemptnum 1)(area-dat #f)) ;; start attemptnum at 1 so the modulo below works as expected -;; ;; -;; ;; #;(common:telemetry-log (conc "rmt:"(->string cmd)) -;; ;; payload: `((rid . ,rid) -;; ;; (params . ,params))) -;; ;; -;; ;; do all the prep locked under the rmt-mutex -;; (mutex-lock! *rmt-mutex*) -;; -;; ;; 1. check if server is started IFF cmd is a write OR if we are not on the homehost, store in runremote -;; ;; 2. check the age of the connections. refresh the connection if it is older than timeout-20 seconds. -;; ;; 3. do the query, if on homehost use local access -;; ;; -;; (let* ((start-time (current-seconds)) ;; snapshot time so all use cases get same value -;; (areapath *toppath*);; TODO - resolve from dbstruct to be compatible with multiple areas -;; (runremote (or area-dat -;; *runremote*)) -;; (readonly-mode (rmtmod:calc-ro-mode runremote *toppath*))) -;; -;; ;; ensure we have a record for our connection for given area -;; (if (not runremote) ;; can remove this one. should never get here. -;; (begin -;; (set! *runremote* (make-remote)) -;; (set! runremote *runremote*))) ;; new runremote will come from this on next iteration -;; -;; ;; ensure we have a homehost record -;; (if (not (pair? (remote-hh-dat runremote))) ;; not on homehost -;; (thread-sleep! 0.1) ;; since we shouldn't get here, delay a little -;; (remote-hh-dat-set! runremote (common:get-homehost))) -;; -;; ;;(print "BB> readonly-mode is "readonly-mode" dbfile is "dbfile) -;; (cond -;; ;; give up if more than 15 attempts -;; ((> attemptnum 15) -;; (debug:print 0 *default-log-port* "ERROR: 15 tries to start/connect to server. Giving up.") -;; (exit 1)) -;; -;; ;; readonly mode, read request- handle it - case 2 -;; ((and readonly-mode -;; (member cmd api:read-only-queries)) -;; (mutex-unlock! *rmt-mutex*) -;; (debug:print-info 12 *default-log-port* "rmt:send-receive, case 2") -;; (rmt:open-qry-close-locally cmd 0 params) -;; ) -;; -;; ;; readonly mode, write request. Do nothing, return #f -;; (readonly-mode (extras-readonly-mode *rmt-mutex* *default-log-port* cmd params)) -;; -;; ;; This block was for pre-emptively resetting the connection if there had been no communication for some time. -;; ;; I don't think it adds any value. If the server is not there, just fail and start a new connection. -;; ;; also, the expire-time calculation might not be correct. We want, time-since-last-server-access > (server:get-timeout) -;; ;; -;; ;; reset the connection if it has been unused too long -;; ((and runremote -;; (remote-conndat runremote) -;; (> (current-seconds) ;; if it has been more than server-timeout seconds since last contact, close this connection and start a new on -;; (+ (http-transport:server-dat-get-last-access (remote-conndat runremote)) -;; (remote-server-timeout runremote)))) -;; (debug:print-info 0 *default-log-port* "Connection to " (remote-server-url runremote) " expired due to no accesses, forcing new connection.") -;; (http-transport:close-connections area-dat: runremote) -;; (remote-conndat-set! runremote #f) ;; invalidate the connection, thus forcing a new connection. -;; (mutex-unlock! *rmt-mutex*) -;; (rmt:send-receive cmd rid params attemptnum: attemptnum)) -;; -;; ;; on homehost and this is a read -;; ((and (not (remote-force-server runremote)) ;; honor forced use of server, i.e. server NOT required -;; (cdr (remote-hh-dat runremote)) ;; on homehost -;; (member cmd api:read-only-queries)) ;; this is a read -;; (mutex-unlock! *rmt-mutex*) -;; (debug:print-info 12 *default-log-port* "rmt:send-receive, case 5") -;; (rmt:open-qry-close-locally cmd 0 params)) -;; -;; ;; on homehost and this is a write, we already have a server, but server has died -;; ((and (cdr (remote-hh-dat runremote)) ;; on homehost -;; (not (member cmd api:read-only-queries)) ;; this is a write -;; (remote-server-url runremote) ;; have a server -;; (not (server:ping (remote-server-url runremote)))) ;; server has died. NOTE: this is not a cheap call! Need better approach. -;; (set! *runremote* (make-remote)) -;; (remote-force-server-set! runremote (common:force-server?)) -;; (mutex-unlock! *rmt-mutex*) -;; (debug:print-info 12 *default-log-port* "rmt:send-receive, case 6") -;; (rmt:send-receive cmd rid params attemptnum: attemptnum)) -;; -;; ;; on homehost and this is a write, we already have a server -;; ((and (not (remote-force-server runremote)) ;; honor forced use of server, i.e. server NOT required -;; (cdr (remote-hh-dat runremote)) ;; on homehost -;; (not (member cmd api:read-only-queries)) ;; this is a write -;; (remote-server-url runremote)) ;; have a server -;; (mutex-unlock! *rmt-mutex*) -;; (debug:print-info 12 *default-log-port* "rmt:send-receive, case 4.1") -;; (rmt:open-qry-close-locally cmd 0 params)) -;; -;; ;; on homehost, no server contact made and this is a write, passively start a server -;; ((and (not (remote-force-server runremote)) ;; honor forced use of server, i.e. server NOT required -;; (cdr (remote-hh-dat runremote)) ;; have homehost -;; (not (remote-server-url runremote)) ;; no connection yet -;; (not (member cmd api:read-only-queries))) ;; not a read-only query -;; (debug:print-info 12 *default-log-port* "rmt:send-receive, case 8") -;; (let ((server-url (server:check-if-running *toppath*))) ;; (server:read-dotserver->url *toppath*))) ;; (server:check-if-running *toppath*))) ;; Do NOT want to run server:check-if-running - very expensive to do for every write call -;; (if server-url -;; (remote-server-url-set! runremote server-url) ;; the string can be consumed by the client setup if needed -;; (if (common:force-server?) -;; (server:start-and-wait *toppath*) -;; (server:kind-run *toppath*)))) -;; (remote-force-server-set! runremote (common:force-server?)) -;; (mutex-unlock! *rmt-mutex*) -;; (debug:print-info 12 *default-log-port* "rmt:send-receive, case 8.1") -;; (rmt:open-qry-close-locally cmd 0 params)) -;; -;; ((or (and (remote-force-server runremote) ;; we are forcing a server and don't yet have a connection to one -;; (not (remote-conndat runremote))) -;; (and (not (cdr (remote-hh-dat runremote))) ;; not on a homehost -;; (not (remote-conndat runremote)))) ;; and no connection -;; (debug:print-info 12 *default-log-port* "rmt:send-receive, case 9, hh-dat: " (remote-hh-dat runremote) " conndat: " (remote-conndat runremote)) -;; (mutex-unlock! *rmt-mutex*) -;; (if (not (server:check-if-running *toppath*)) ;; who knows, maybe one has started up? -;; (server:start-and-wait *toppath*)) -;; (remote-conndat-set! runremote (rmt:get-connection-info *toppath*)) ;; calls client:setup which calls client:setup-http -;; (rmt:send-receive cmd rid params attemptnum: attemptnum)) ;; TODO: add back-off timeout as -;; -;; ;; all set up if get this far, dispatch the query -;; ((and (not (remote-force-server runremote)) -;; (cdr (remote-hh-dat runremote))) ;; we are on homehost -;; (mutex-unlock! *rmt-mutex*) -;; (debug:print-info 12 *default-log-port* "rmt:send-receive, case 10") -;; (rmt:open-qry-close-locally cmd (if rid rid 0) params)) -;; -;; ;; not on homehost, do server query -;; (else (extras-case-11 *default-log-port* runremote cmd params attemptnum rid))))) - -;; bunch of small functions factored out of send-receive to make debug easier -;; - -(define (extras-case-11 *default-log-port* runremote cmd params attemptnum rid) - ;; (mutex-unlock! *rmt-mutex*) - (debug:print-info 12 *default-log-port* "rmt:send-receive, case 9") - ;; (mutex-lock! *rmt-mutex*) - (let* ((conninfo (remote-conndat runremote)) - (dat (case (remote-transport runremote) - ((http) (condition-case ;; handling here has - ;; caused a lot of - ;; problems. However it - ;; is needed to deal with - ;; attemtped - ;; communication to - ;; servers that have gone - ;; away - (http-transport:client-api-send-receive 0 conninfo cmd params) - ((commfail)(vector #f "communications fail")) - ((exn)(vector #f "other fail" (print-call-chain))))) - (else - (debug:print 0 *default-log-port* "ERROR: transport " (remote-transport runremote) " not supported") - (exit)))) - (success (if (vector? dat) (vector-ref dat 0) #f)) - (res (if (vector? dat) (vector-ref dat 1) #f))) - (if (and (vector? conninfo) (< 5 (vector-length conninfo))) - (http-transport:server-dat-update-last-access conninfo) ;; refresh access time - (begin - (debug:print 0 *default-log-port* "INFO: Should not get here! conninfo=" conninfo) - (set! conninfo #f) - (remote-conndat-set! *runremote* #f) ;; NOTE: *runremote* is global copy of runremote. Purpose: factor out global. - (http-transport:close-connections area-dat: runremote))) - (debug:print-info 13 *default-log-port* "rmt:send-receive, case 9. conninfo=" conninfo " dat=" dat " runremote = " runremote) - (mutex-unlock! *rmt-mutex*) - (if success ;; success only tells us that the transport was - ;; successful, have to examine the data to see if - ;; there was a detected issue at the other end - (extras-transport-succeded *default-log-port* *rmt-mutex* attemptnum runremote res params rid cmd) - (extras-transport-failed *default-log-port* *rmt-mutex* attemptnum runremote cmd rid params) - ))) - -;; (define (rmt:update-db-stats run-id rawcmd params duration) -;; (mutex-lock! *db-stats-mutex*) -;; (handle-exceptions -;; exn -;; (begin -;; (debug:print 0 *default-log-port* "WARNING: stats collection failed in update-db-stats") -;; (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) -;; (print "exn=" (condition->list exn)) -;; #f) ;; if this fails we don't care, it is just stats -;; (let* ((cmd (conc "run-id=" run-id " " (if (eq? rawcmd 'general-call) (car params) rawcmd))) -;; (stat-vec (hash-table-ref/default *db-stats* cmd #f))) -;; (if (not (vector? stat-vec)) -;; (let ((newvec (vector 0 0))) -;; (hash-table-set! *db-stats* cmd newvec) -;; (set! stat-vec newvec))) -;; (vector-set! stat-vec 0 (+ (vector-ref stat-vec 0) 1)) -;; (vector-set! stat-vec 1 (+ (vector-ref stat-vec 1) duration)))) -;; (mutex-unlock! *db-stats-mutex*)) - -(define (rmt:print-db-stats) - (let ((fmtstr "~40a~7-d~9-d~20,2-f")) ;; "~20,2-f" - (debug:print 18 *default-log-port* "DB Stats\n========") - (debug:print 18 *default-log-port* (format #f "~40a~8a~10a~10a" "Cmd" "Count" "TotTime" "Avg")) - (for-each (lambda (cmd) - (let ((cmd-dat (hash-table-ref *db-stats* cmd))) - (debug:print 18 *default-log-port* (format #f fmtstr cmd (vector-ref cmd-dat 0) (vector-ref cmd-dat 1) (/ (vector-ref cmd-dat 1)(vector-ref cmd-dat 0)))))) - (sort (hash-table-keys *db-stats*) - (lambda (a b) - (> (vector-ref (hash-table-ref *db-stats* a) 0) - (vector-ref (hash-table-ref *db-stats* b) 0))))))) - -(define (rmt:get-max-query-average run-id) - (mutex-lock! *db-stats-mutex*) - (let* ((runkey (conc "run-id=" run-id " ")) - (cmds (filter (lambda (x) - (substring-index runkey x)) - (hash-table-keys *db-stats*))) - (res (if (null? cmds) - (cons 'none 0) - (let loop ((cmd (car cmds)) - (tal (cdr cmds)) - (max-cmd (car cmds)) - (res 0)) - (let* ((cmd-dat (hash-table-ref *db-stats* cmd)) - (tot (vector-ref cmd-dat 0)) - (curravg (/ (vector-ref cmd-dat 1) (vector-ref cmd-dat 0))) ;; count is never zero by construction - (currmax (max res curravg)) - (newmax-cmd (if (> curravg res) cmd max-cmd))) - (if (null? tal) - (if (> tot 10) - (cons newmax-cmd currmax) - (cons 'none 0)) - (loop (car tal)(cdr tal) newmax-cmd currmax))))))) - (mutex-unlock! *db-stats-mutex*) - res)) - -(define (rmt:open-qry-close-locally cmd run-id params #!key (remretries 5)) - (let* ((qry-is-write (not (member cmd api:read-only-queries))) - (db-file-path (db:dbfile-path)) ;; 0)) - (dbstruct-local (db:setup #t)) ;; make-dbr:dbstruct path: dbdir local: #t))) - (read-only (not (file-write-access? db-file-path))) - (start (current-milliseconds)) - (resdat (if (not (and read-only qry-is-write)) - (let ((v (api:execute-requests dbstruct-local (vector (symbol->string cmd) params)))) - (handle-exceptions ;; there has been a long history of receiving strange errors from values returned by the client when things go wrong.. - exn ;; This is an attempt to detect that situation and recover gracefully - (begin - (debug:print 0 *default-log-port* "ERROR: bad data from server " v " message: " ((condition-property-accessor 'exn 'message) exn)) - (vector #t '())) ;; should always get a vector but if something goes wrong return a dummy - (if (and (vector? v) - (> (vector-length v) 1)) - (let ((newvec (vector (vector-ref v 0)(vector-ref v 1)))) - newvec) ;; by copying the vector while inside the error handler we should force the detection of a corrupted record - (vector #t '())))) ;; we could also check that the returned types are valid - (vector #t '()))) - (success (vector-ref resdat 0)) - (res (vector-ref resdat 1)) - (duration (- (current-milliseconds) start))) - (if (and read-only qry-is-write) - (debug:print 0 *default-log-port* "ERROR: attempt to write to read-only database ignored. cmd=" cmd)) - (if (not success) - (if (> remretries 0) - (begin - (debug:print-error 0 *default-log-port* "local query failed. Trying again.") - (thread-sleep! (/ (random 5000) 1000)) ;; some random delay - (rmt:open-qry-close-locally cmd run-id params remretries: (- remretries 1))) - (begin - (debug:print-error 0 *default-log-port* "too many retries in rmt:open-qry-close-locally, giving up") - #f)) - (begin - ;; (rmt:update-db-stats run-id cmd params duration) - ;; mark this run as dirty if this was a write, the watchdog is responsible for syncing it - (if qry-is-write - (let ((start-time (current-seconds))) - (mutex-lock! *db-multi-sync-mutex*) -/ (set! *db-last-access* start-time) ;; THIS IS PROBABLY USELESS? (we are on a client) - (mutex-unlock! *db-multi-sync-mutex*))))) - res)) - -(define (rmt:send-receive-no-auto-client-setup connection-info cmd run-id params) - (let* ((run-id (if run-id run-id 0)) - (res (handle-exceptions - exn - #f - (http-transport:client-api-send-receive run-id connection-info cmd params)))) - (if (and res (vector-ref res 0)) - (vector-ref res 1) ;;; YES!! THIS IS CORRECT!! CHANGE IT HERE, THEN CHANGE rmt:send-receive ALSO!!! - #f))) - -;; ;; Wrap json library for strings (why the ports crap in the first place?) -;; (define (rmt:dat->json-str dat) -;; (with-output-to-string -;; (lambda () -;; (json-write dat)))) -;; -;; (define (rmt:json-str->dat json-str) -;; (with-input-from-string json-str -;; (lambda () -;; (json-read)))) - -;;====================================================================== -;; -;; A C T U A L A P I C A L L S -;; -;;====================================================================== - -;;====================================================================== -;; S E R V E R -;;====================================================================== - -(define (rmt:kill-server run-id) - (rmt:send-receive 'kill-server run-id (list run-id))) - -(define (rmt:start-server run-id) - (rmt:send-receive 'start-server 0 (list run-id))) - -;;====================================================================== -;; M I S C -;;====================================================================== - -(define (rmt:login run-id) - (rmt:send-receive 'login run-id (list *toppath* megatest-version *my-client-signature*))) - -;; This login does no retries under the hood - it acts a bit like a ping. -;; Deprecated for nmsg-transport. -;; -(define (rmt:login-no-auto-client-setup connection-info) - (case *transport-type* ;; run-id of 0 is just a placeholder - ((http)(rmt:send-receive-no-auto-client-setup connection-info 'login 0 (list *toppath* megatest-version *my-client-signature*))) - ;;((nmsg)(nmsg-transport:client-api-send-receive run-id connection-info 'login (list *toppath* megatest-version run-id *my-client-signature*))) - )) - -;; hand off a call to one of the db:queries statements -;; added run-id to make looking up the correct db possible -;; -(define (rmt:general-call stmtname run-id . params) - (rmt:send-receive 'general-call run-id (append (list stmtname run-id) params))) - - -;; given a hostname, return a pair of cpu load and update time representing latest intelligence from tests running on that host -(define (rmt:get-latest-host-load hostname) - (rmt:send-receive 'get-latest-host-load 0 (list hostname))) - -;; (define (rmt:sync-inmem->db run-id) -;; (rmt:send-receive 'sync-inmem->db run-id '())) - -(define (rmt:sdb-qry qry val run-id) - ;; add caching if qry is 'getid or 'getstr - (rmt:send-receive 'sdb-qry run-id (list qry val))) - -;; NOT COMPLETED -(define (rmt:runtests user run-id testpatt params) - (rmt:send-receive 'runtests run-id testpatt)) - -(define (rmt:get-run-record-ids target run keynames test-patt) - (rmt:send-receive 'get-run-record-ids #f (list target run keynames test-patt))) - -(define (rmt:get-changed-record-ids since-time) - (rmt:send-receive 'get-changed-record-ids #f (list since-time)) ) - -;;====================================================================== -;; T E S T M E T A -;;====================================================================== - -(define (rmt:get-tests-tags) - (rmt:send-receive 'get-tests-tags #f '())) - -;;====================================================================== -;; K E Y S -;;====================================================================== - -;; These require run-id because the values come from the run! -;; -(define (rmt:get-key-val-pairs run-id) - (rmt:send-receive 'get-key-val-pairs run-id (list run-id))) - -(define (rmt:get-keys) - (if *db-keys* *db-keys* - (let ((res (rmt:send-receive 'get-keys #f '()))) - (set! *db-keys* res) - res))) - -(define (rmt:get-keys-write) ;; dummy query to force server start - (let ((res (rmt:send-receive 'get-keys-write #f '()))) - (set! *db-keys* res) - res)) - -;; we don't reuse run-id's (except possibly *after* a db cleanup) so it is safe -;; to cache the resuls in a hash -;; -(define (rmt:get-key-vals run-id) - (or (hash-table-ref/default *keyvals* run-id #f) - (let ((res (rmt:send-receive 'get-key-vals #f (list run-id)))) - (hash-table-set! *keyvals* run-id res) - res))) - -(define (rmt:get-targets) - (rmt:send-receive 'get-targets #f '())) - -(define (rmt:get-target run-id) - (rmt:send-receive 'get-target run-id (list run-id))) - -(define (rmt:get-run-times runpatt targetpatt) - (rmt:send-receive 'get-run-times #f (list runpatt targetpatt ))) - - -;;====================================================================== -;; T E S T S -;;====================================================================== - -;; Just some syntatic sugar -(define (rmt:register-test run-id test-name item-path) - (rmt:general-call 'register-test run-id run-id test-name item-path)) - -(define (rmt:get-test-id run-id testname item-path) - (rmt:send-receive 'get-test-id run-id (list run-id testname item-path))) - -;; run-id is NOT used -;; -(define (rmt:get-test-info-by-id run-id test-id) - (if (number? test-id) - (rmt:send-receive 'get-test-info-by-id run-id (list run-id test-id)) - (begin - (debug:print 0 *default-log-port* "WARNING: Bad data handed to rmt:get-test-info-by-id run-id=" run-id ", test-id=" test-id) - (print-call-chain (current-error-port)) - #f))) - -(define (rmt:test-get-rundir-from-test-id run-id test-id) - (rmt:send-receive 'test-get-rundir-from-test-id run-id (list run-id test-id))) - -(define (rmt:open-test-db-by-test-id run-id test-id #!key (work-area #f)) - (let* ((test-path (if (string? work-area) - work-area - (rmt:test-get-rundir-from-test-id run-id test-id)))) - (debug:print 3 *default-log-port* "TEST PATH: " test-path) - (open-test-db test-path))) - -;; WARNING: This currently bypasses the transaction wrapped writes system -(define (rmt:test-set-state-status-by-id run-id test-id newstate newstatus newcomment) - (rmt:send-receive 'test-set-state-status-by-id run-id (list run-id test-id newstate newstatus newcomment))) - -(define (rmt:set-tests-state-status run-id testnames currstate currstatus newstate newstatus) - (rmt:send-receive 'set-tests-state-status run-id (list run-id testnames currstate currstatus newstate newstatus))) - -(define (rmt:get-tests-for-run run-id testpatt states statuses offset limit not-in sort-by sort-order qryvals last-update mode) - ;; (if (number? run-id) - (rmt:send-receive 'get-tests-for-run run-id (list run-id testpatt states statuses offset limit not-in sort-by sort-order qryvals last-update mode))) - ;; (begin - ;; (debug:print-error 0 *default-log-port* "rmt:get-tests-for-run called with bad run-id=" run-id) - ;; (print-call-chain (current-error-port)) - ;; '()))) - -;; get stuff via synchash -(define (rmt:synchash-get run-id proc synckey keynum params) - (rmt:send-receive 'synchash-get run-id (list run-id proc synckey keynum params))) - -(define (rmt:get-tests-for-run-mindata run-id testpatt states status not-in) - (rmt:send-receive 'get-tests-for-run-mindata run-id (list run-id testpatt states status not-in))) - -;; IDEA: Threadify these - they spend a lot of time waiting ... -;; -(define (rmt:get-tests-for-runs-mindata run-ids testpatt states status not-in) - (let ((multi-run-mutex (make-mutex)) - (run-id-list (if run-ids - run-ids - (rmt:get-all-run-ids))) - (result '())) - (if (null? run-id-list) - '() - (let loop ((hed (car run-id-list)) - (tal (cdr run-id-list)) - (threads '())) - (if (> (length threads) 5) - (loop hed tal (filter (lambda (th)(not (member (thread-state th) '(terminated dead)))) threads)) - (let* ((newthread (make-thread - (lambda () - (let ((res (rmt:send-receive 'get-tests-for-run-mindata hed (list hed testpatt states status not-in)))) - (if (list? res) - (begin - (mutex-lock! multi-run-mutex) - (set! result (append result res)) - (mutex-unlock! multi-run-mutex)) - (debug:print-error 0 *default-log-port* "get-tests-for-run-mindata failed for run-id " hed ", testpatt " testpatt ", states " states ", status " status ", not-in " not-in)))) - (conc "multi-run-thread for run-id " hed))) - (newthreads (cons newthread threads))) - (thread-start! newthread) - (thread-sleep! 0.05) ;; give that thread some time to start - (if (null? tal) - newthreads - (loop (car tal)(cdr tal) newthreads)))))) - result)) - -;; ;; IDEA: Threadify these - they spend a lot of time waiting ... -;; ;; -;; (define (rmt:get-tests-for-runs-mindata run-ids testpatt states status not-in) -;; (let ((run-id-list (if run-ids -;; run-ids -;; (rmt:get-all-run-ids)))) -;; (apply append (map (lambda (run-id) -;; (rmt:send-receive 'get-tests-for-run-mindata run-id (list run-ids testpatt states status not-in))) -;; run-id-list)))) - -(define (rmt:delete-test-records run-id test-id) - (rmt:send-receive 'delete-test-records run-id (list run-id test-id))) - -;; This is not needed as test steps are deleted on test delete call -;; -;; (define (rmt:delete-test-step-records run-id test-id) -;; (rmt:send-receive 'delete-test-step-records run-id (list run-id test-id))) - -(define (rmt:test-set-state-status run-id test-id state status msg) - (rmt:send-receive 'test-set-state-status run-id (list run-id test-id state status msg))) - -(define (rmt:test-toplevel-num-items run-id test-name) - (rmt:send-receive 'test-toplevel-num-items run-id (list run-id test-name))) - -;; (define (rmt:get-previous-test-run-record run-id test-name item-path) -;; (rmt:send-receive 'get-previous-test-run-record run-id (list run-id test-name item-path))) - -(define (rmt:get-matching-previous-test-run-records run-id test-name item-path) - (rmt:send-receive 'get-matching-previous-test-run-records run-id (list run-id test-name item-path))) - -(define (rmt:test-get-logfile-info run-id test-name) - (rmt:send-receive 'test-get-logfile-info run-id (list run-id test-name))) - -(define (rmt:test-get-records-for-index-file run-id test-name) - (rmt:send-receive 'test-get-records-for-index-file run-id (list run-id test-name))) - -(define (rmt:get-testinfo-state-status run-id test-id) - (rmt:send-receive 'get-testinfo-state-status run-id (list run-id test-id))) - -(define (rmt:test-set-log! run-id test-id logf) - (if (string? logf)(rmt:general-call 'test-set-log run-id logf test-id))) - -(define (rmt:test-set-top-process-pid run-id test-id pid) - (rmt:send-receive 'test-set-top-process-pid run-id (list run-id test-id pid))) - -(define (rmt:test-get-top-process-pid run-id test-id) - (rmt:send-receive 'test-get-top-process-pid run-id (list run-id test-id))) - -(define (rmt:get-run-ids-matching-target keynames target res runname testpatt statepatt statuspatt) - (rmt:send-receive 'get-run-ids-matching-target #f (list keynames target res runname testpatt statepatt statuspatt))) - -;; NOTE: This will open and access ALL run databases. -;; -(define (rmt:test-get-paths-matching-keynames-target-new keynames target res testpatt statepatt statuspatt runname) - (let ((run-ids (rmt:get-run-ids-matching-target keynames target res runname testpatt statepatt statuspatt))) - (apply append - (map (lambda (run-id) - (rmt:send-receive 'test-get-paths-matching-keynames-target-new run-id (list run-id keynames target res testpatt statepatt statuspatt runname))) - run-ids)))) - -;; (define (rmt:get-run-ids-matching keynames target res) -;; (rmt:send-receive #f 'get-run-ids-matching (list keynames target res))) - -(define (rmt:get-prereqs-not-met run-id waitons ref-test-name ref-item-path #!key (mode '(normal))(itemmaps #f)) - (rmt:send-receive 'get-prereqs-not-met run-id (list run-id waitons ref-test-name ref-item-path mode itemmaps))) - -(define (rmt:get-count-tests-running-for-run-id run-id) - (rmt:send-receive 'get-count-tests-running-for-run-id run-id (list run-id))) - -(define (rmt:get-not-completed-cnt run-id) - (rmt:send-receive 'get-not-completed-cnt run-id (list run-id))) - - -;; Statistical queries - -(define (rmt:get-count-tests-running run-id) - (rmt:send-receive 'get-count-tests-running run-id (list run-id))) - -(define (rmt:get-count-tests-running-for-testname run-id testname) - (rmt:send-receive 'get-count-tests-running-for-testname run-id (list run-id testname))) - -(define (rmt:get-count-tests-running-in-jobgroup run-id jobgroup) - (rmt:send-receive 'get-count-tests-running-in-jobgroup run-id (list run-id jobgroup))) - -;; state and status are extra hints not usually used in the calculation -;; -(define (rmt:set-state-status-and-roll-up-items run-id test-name item-path state status comment) - (rmt:send-receive 'set-state-status-and-roll-up-items run-id (list run-id test-name item-path state status comment))) - -(define (rmt:set-state-status-and-roll-up-run run-id state status) - (rmt:send-receive 'set-state-status-and-roll-up-run run-id (list run-id state status))) - - -(define (rmt:update-pass-fail-counts run-id test-name) - (rmt:general-call 'update-pass-fail-counts run-id test-name test-name test-name)) - -(define (rmt:top-test-set-per-pf-counts run-id test-name) - (rmt:send-receive 'top-test-set-per-pf-counts run-id (list run-id test-name))) - -(define (rmt:get-raw-run-stats run-id) - (rmt:send-receive 'get-raw-run-stats run-id (list run-id))) - -(define (rmt:get-test-times runname target) - (rmt:send-receive 'get-test-times #f (list runname target ))) - -;;====================================================================== -;; R U N S -;;====================================================================== - -(define (rmt:get-run-info run-id) - (rmt:send-receive 'get-run-info run-id (list run-id))) - -(define (rmt:get-num-runs runpatt) - (rmt:send-receive 'get-num-runs #f (list runpatt))) - -(define (rmt:get-runs-cnt-by-patt runpatt targetpatt keys) - (rmt:send-receive 'get-runs-cnt-by-patt #f (list runpatt targetpatt keys))) - -;; Use the special run-id == #f scenario here since there is no run yet -(define (rmt:register-run keyvals runname state status user contour) - (rmt:send-receive 'register-run #f (list keyvals runname state status user contour))) - -(define (rmt:get-run-name-from-id run-id) - (rmt:send-receive 'get-run-name-from-id run-id (list run-id))) - -(define (rmt:delete-run run-id) - (rmt:send-receive 'delete-run run-id (list run-id))) - -(define (rmt:update-run-stats run-id stats) - (rmt:send-receive 'update-run-stats #f (list run-id stats))) - -(define (rmt:delete-old-deleted-test-records) - (rmt:send-receive 'delete-old-deleted-test-records #f '())) - -(define (rmt:get-runs runpatt count offset keypatts) - (rmt:send-receive 'get-runs #f (list runpatt count offset keypatts))) - -(define (rmt:simple-get-runs runpatt count offset target) - (rmt:send-receive 'simple-get-runs #f (list runpatt count offset target))) - -(define (rmt:get-all-run-ids) - (rmt:send-receive 'get-all-run-ids #f '())) - -(define (rmt:get-prev-run-ids run-id) - (rmt:send-receive 'get-prev-run-ids #f (list run-id))) - -(define (rmt:lock/unlock-run run-id lock unlock user) - (rmt:send-receive 'lock/unlock-run #f (list run-id lock unlock user))) - -;; set/get status -(define (rmt:get-run-status run-id) - (rmt:send-receive 'get-run-status #f (list run-id))) - -(define (rmt:get-run-state run-id) - (rmt:send-receive 'get-run-state #f (list run-id))) - - -(define (rmt:set-run-status run-id run-status #!key (msg #f)) - (rmt:send-receive 'set-run-status #f (list run-id run-status msg))) - -(define (rmt:set-run-state-status run-id state status ) - (rmt:send-receive 'set-run-state-status #f (list run-id state status))) - - -(define (rmt:update-run-event_time run-id) - (rmt:send-receive 'update-run-event_time #f (list run-id))) - -(define (rmt:get-runs-by-patt keys runnamepatt targpatt offset limit fields last-runs-update #!key (sort-order "asc")) ;; fields of #f uses default - (rmt:send-receive 'get-runs-by-patt #f (list keys runnamepatt targpatt offset limit fields last-runs-update sort-order))) - -(define (rmt:find-and-mark-incomplete run-id ovr-deadtime) - ;; (if (rmt:send-receive 'have-incompletes? run-id (list run-id ovr-deadtime)) - (rmt:send-receive 'mark-incomplete run-id (list run-id ovr-deadtime))) ;; ) - -(define (rmt:get-main-run-stats run-id) - (rmt:send-receive 'get-main-run-stats #f (list run-id))) - -(define (rmt:get-var varname) - (rmt:send-receive 'get-var #f (list varname))) - -(define (rmt:del-var varname) - (rmt:send-receive 'del-var #f (list varname))) - -(define (rmt:set-var varname value) - (rmt:send-receive 'set-var #f (list varname value))) - -;;====================================================================== -;; M U L T I R U N Q U E R I E S -;;====================================================================== - -;; Need to move this to multi-run section and make associated changes -(define (rmt:find-and-mark-incomplete-all-runs #!key (ovr-deadtime #f)) - (let ((run-ids (rmt:get-all-run-ids))) - (for-each (lambda (run-id) - (rmt:find-and-mark-incomplete run-id ovr-deadtime)) - run-ids))) - -;; get the previous record for when this test was run where all keys match but runname -;; returns #f if no such test found, returns a single test record if found -;; -;; Run this at the client end since we have to connect to multiple run-id dbs -;; -(define (rmt:get-previous-test-run-record run-id test-name item-path) - (let* ((keyvals (rmt:get-key-val-pairs run-id)) - (keys (rmt:get-keys)) - (selstr (string-intersperse keys ",")) - (qrystr (string-intersperse (map (lambda (x)(conc x "=?")) keys) " AND "))) - (if (not keyvals) - #f - (let ((prev-run-ids (rmt:get-prev-run-ids run-id))) - ;; for each run starting with the most recent look to see if there is a matching test - ;; if found then return that matching test record - (debug:print 4 *default-log-port* "selstr: " selstr ", qrystr: " qrystr ", keyvals: " keyvals ", previous run ids found: " prev-run-ids) - (if (null? prev-run-ids) #f - (let loop ((hed (car prev-run-ids)) - (tal (cdr prev-run-ids))) - (let ((results (rmt:get-tests-for-run hed (conc test-name "/" item-path) '() '() ;; run-id testpatt states statuses - #f #f #f ;; offset limit not-in hide/not-hide - #f #f #f #f 'normal))) ;; sort-by sort-order qryvals last-update mode - (debug:print 4 *default-log-port* "Got tests for run-id " run-id ", test-name " test-name ", item-path " item-path ": " results) - (if (and (null? results) - (not (null? tal))) - (loop (car tal)(cdr tal)) - (if (null? results) #f - (car results)))))))))) - -(define (rmt:get-run-stats) - (rmt:send-receive 'get-run-stats #f '())) - -;;====================================================================== -;; S T E P S -;;====================================================================== - -;; Getting steps is more complicated. -;; -;; If given work area -;; 1. Find the testdat.db file -;; 2. Open the testdat.db file and do the query -;; If not given the work area -;; 1. Do a remote call to get the test path -;; 2. Continue as above -;; -;;(define (rmt:get-steps-for-test run-id test-id) -;; (rmt:send-receive 'get-steps-data run-id (list test-id))) - -(define (rmt:teststep-set-status! run-id test-id teststep-name state-in status-in comment logfile) - (let* ((state (items:check-valid-items "state" state-in)) - (status (items:check-valid-items "status" status-in))) - (if (or (not state)(not status)) - (debug:print 3 *default-log-port* "WARNING: Invalid " (if status "status" "state") - " value \"" (if status state-in status-in) "\", update your validvalues section in megatest.config")) - (rmt:send-receive 'teststep-set-status! run-id (list run-id test-id teststep-name state-in status-in comment logfile)))) - - -(define (rmt:delete-steps-for-test! run-id test-id) - (rmt:send-receive 'delete-steps-for-test! run-id (list run-id test-id))) - -(define (rmt:get-steps-for-test run-id test-id) - (rmt:send-receive 'get-steps-for-test run-id (list run-id test-id))) - -(define (rmt:get-steps-info-by-id test-step-id) - (rmt:send-receive 'get-steps-info-by-id #f (list test-step-id))) - -;;====================================================================== -;; T E S T D A T A -;;====================================================================== - -(define (rmt:read-test-data run-id test-id categorypatt #!key (work-area #f)) - (rmt:send-receive 'read-test-data run-id (list run-id test-id categorypatt))) -(define (rmt:read-test-data* run-id test-id categorypatt varpatt #!key (work-area #f)) - (rmt:send-receive 'read-test-data* run-id (list run-id test-id categorypatt varpatt))) - -(define (rmt:get-data-info-by-id test-data-id) - (rmt:send-receive 'get-data-info-by-id #f (list test-data-id))) - -(define (rmt:testmeta-add-record testname) - (rmt:send-receive 'testmeta-add-record #f (list testname))) - -(define (rmt:testmeta-get-record testname) - (rmt:send-receive 'testmeta-get-record #f (list testname))) - -(define (rmt:testmeta-update-field test-name fld val) - (rmt:send-receive 'testmeta-update-field #f (list test-name fld val))) - -(define (rmt:test-data-rollup run-id test-id status) - (rmt:send-receive 'test-data-rollup run-id (list run-id test-id status))) - -(define (rmt:csv->test-data run-id test-id csvdata) - (rmt:send-receive 'csv->test-data run-id (list run-id test-id csvdata))) - -;;====================================================================== -;; T A S K S -;;====================================================================== - -(define (rmt:tasks-find-task-queue-records target run-name test-patt state-patt action-patt) - (rmt:send-receive 'find-task-queue-records #f (list target run-name test-patt state-patt action-patt))) - -(define (rmt:tasks-add action owner target runname testpatt params) - (rmt:send-receive 'tasks-add #f (list action owner target runname testpatt params))) - -(define (rmt:tasks-set-state-given-param-key param-key new-state) - (rmt:send-receive 'tasks-set-state-given-param-key #f (list param-key new-state))) - -(define (rmt:tasks-get-last target runname) - (rmt:send-receive 'tasks-get-last #f (list target runname))) - -;;====================================================================== -;; N O S Y N C D B -;;====================================================================== - -(define (rmt:no-sync-set var val) - (rmt:send-receive 'no-sync-set #f `(,var ,val))) - -(define (rmt:no-sync-get/default var default) - (rmt:send-receive 'no-sync-get/default #f `(,var ,default))) - -(define (rmt:no-sync-del! var) - (rmt:send-receive 'no-sync-del! #f `(,var))) - -(define (rmt:no-sync-get-lock keyname) - (rmt:send-receive 'no-sync-get-lock #f `(,keyname))) - -;;====================================================================== -;; A R C H I V E S -;;====================================================================== - -(define (rmt:archive-get-allocations testname itempath dneeded) - (rmt:send-receive 'archive-get-allocations #f (list testname itempath dneeded))) - -(define (rmt:archive-register-block-name bdisk-id archive-path) - (rmt:send-receive 'archive-register-block-name #f (list bdisk-id archive-path))) - -(define (rmt:archive-allocate-testsuite/area-to-block block-id testsuite-name areakey) - (rmt:send-receive 'archive-allocate-test-to-block #f (list block-id testsuite-name areakey))) - -(define (rmt:archive-register-disk bdisk-name bdisk-path df) - (rmt:send-receive 'archive-register-disk #f (list bdisk-name bdisk-path df))) - -(define (rmt:test-set-archive-block-id run-id test-id archive-block-id) - (rmt:send-receive 'test-set-archive-block-id run-id (list run-id test-id archive-block-id))) - -(define (rmt:test-get-archive-block-info archive-block-id) - (rmt:send-receive 'test-get-archive-block-info #f (list archive-block-id))) - -#;(set-functions rmt:send-receive remote-server-url-set! - http-transport:close-connections remote-conndat-set! - debug:print debug:print-info - remote-ro-mode remote-ro-mode-set! - remote-ro-mode-checked-set! remote-ro-mode-checked) - -(define (rmtmod:calc-ro-mode runremote *toppath*) - (if (and runremote - (remote-ro-mode-checked runremote)) - (remote-ro-mode runremote) - (let* ((dbfile (conc *toppath* "/megatest.db")) - (ro-mode (not (file-write-access? dbfile)))) ;; TODO: use dbstruct or runremote to figure this out in future - (if runremote - (begin - (remote-ro-mode-set! runremote ro-mode) - (remote-ro-mode-checked-set! runremote #t) - ro-mode) - ro-mode)))) - -(define (extras-readonly-mode rmt-mutex log-port cmd params) - (mutex-unlock! rmt-mutex) - (debug:print-info 12 log-port "rmt:send-receive, case 3") - (debug:print 0 log-port "WARNING: write transaction requested on a readonly area. cmd="cmd" params="params) - #f) - -(define (extras-transport-failed *default-log-port* *rmt-mutex* attemptnum runremote cmd rid params) - (debug:print 0 *default-log-port* "WARNING: communication failed. Trying again, try num: " attemptnum) - (mutex-lock! *rmt-mutex*) - (remote-conndat-set! runremote #f) - (http-transport:close-connections area-dat: runremote) - (remote-server-url-set! runremote #f) - (mutex-unlock! *rmt-mutex*) - (debug:print-info 12 *default-log-port* "rmt:send-receive, case 9.1") - (rmt:send-receive cmd rid params attemptnum: (+ attemptnum 1))) - -(define (extras-transport-succeded *default-log-port* *rmt-mutex* attemptnum runremote res params rid cmd) - (if (and (vector? res) - (eq? (vector-length res) 2) - (eq? (vector-ref res 1) 'overloaded)) ;; since we are - ;; looking at the - ;; data to carry the - ;; error we'll use a - ;; fairly obtuse - ;; combo to minimise - ;; the chances of - ;; some sort of - ;; collision. this - ;; is the case where - ;; the returned data - ;; is bad or the - ;; server is - ;; overloaded and we - ;; want to ease off - ;; the queries - (let ((wait-delay (+ attemptnum (* attemptnum 10)))) - (debug:print 0 *default-log-port* "WARNING: server is overloaded. Delaying " wait-delay " seconds and trying call again.") - (mutex-lock! *rmt-mutex*) - (http-transport:close-connections area-dat: runremote) - (set! *runremote* #f) ;; force starting over - (mutex-unlock! *rmt-mutex*) - (thread-sleep! wait-delay) - (rmt:send-receive cmd rid params attemptnum: (+ attemptnum 1))) - res)) ;; All good, return res Index: rmtmod.scm ================================================================== --- rmtmod.scm +++ rmtmod.scm @@ -17,24 +17,960 @@ ;; along with Megatest. If not, see . ;;====================================================================== (declare (unit rmtmod)) + +(declare (uses apimod)) +(declare (uses commonmod)) +(declare (uses dbmod)) +(declare (uses itemsmod)) (declare (uses ulex)) -;; (declare (uses commonmod)) -;; (declare (uses dbmod)) -;; (declare (uses megamod)) (module rmtmod * (import scheme chicken data-structures extras) (import (prefix sqlite3 sqlite3:) posix typed-records srfi-18 srfi-69 format ports srfi-1 matchable) -;; (import commonmod) ;;; DO NOT ALLOW rmt*scm TO DEPEND ON common*scm!!!! -;; (import dbmod) -;; (import megamod) (import (prefix ulex ulex:)) + +(import commonmod) +(import itemsmod) +(import apimod) +(import dbmod) + ;; (include "rmt-inc.scm") +;; +;; THESE ARE ALL CALLED ON THE CLIENT SIDE!!! +;; + +;; generate entries for ~/.megatestrc with the following +;; +;; grep define ../rmt.scm | grep rmt: |perl -pi -e 's/\(define\s+\((\S+)\W.*$/\1/'|sort -u + +;; Globally used variables + +;; db stats +(define *db-stats* (make-hash-table)) ;; hash of vectors < count duration-total > +(define *db-stats-mutex* (make-mutex)) + + +;;====================================================================== +;; S U P P O R T F U N C T I O N S +;;====================================================================== + +;; if a server is either running or in the process of starting call client:setup +;; else return #f to let the calling proc know that there is no server available +;; +#;(define (rmt:get-connection-info areapath #!key (area-dat #f)) ;; TODO: push areapath down. + (let* ((runremote (or area-dat *runremote*)) + (cinfo (if (remote? runremote) + (remote-conndat runremote) + #f))) + (if cinfo + cinfo + (if (server:check-if-running areapath) + (client:setup areapath) + #f)))) + +(define *send-receive-mutex* (make-mutex)) ;; should have separate mutex per run-id + +(define (rmt:send-receive cmd rid params #!key (attemptnum 1)(area-dat #f)) ;; start attemptnum at 1 so the modulo below works as expected + (rmt:open-qry-close-locally cmd 0 params)) +;; +;; ;; #;(common:telemetry-log (conc "rmt:"(->string cmd)) +;; ;; #;(define (rmt:send-receive cmd rid params #!key (attemptnum 1)(area-dat #f)) ;; start attemptnum at 1 so the modulo below works as expected +;; ;; +;; ;; #;(common:telemetry-log (conc "rmt:"(->string cmd)) +;; ;; payload: `((rid . ,rid) +;; ;; (params . ,params))) +;; ;; +;; ;; do all the prep locked under the rmt-mutex +;; (mutex-lock! *rmt-mutex*) +;; +;; ;; 1. check if server is started IFF cmd is a write OR if we are not on the homehost, store in runremote +;; ;; 2. check the age of the connections. refresh the connection if it is older than timeout-20 seconds. +;; ;; 3. do the query, if on homehost use local access +;; ;; +;; (let* ((start-time (current-seconds)) ;; snapshot time so all use cases get same value +;; (areapath *toppath*);; TODO - resolve from dbstruct to be compatible with multiple areas +;; (runremote (or area-dat +;; *runremote*)) +;; (readonly-mode (rmtmod:calc-ro-mode runremote *toppath*))) +;; +;; ;; ensure we have a record for our connection for given area +;; (if (not runremote) ;; can remove this one. should never get here. +;; (begin +;; (set! *runremote* (make-remote)) +;; (set! runremote *runremote*))) ;; new runremote will come from this on next iteration +;; +;; ;; ensure we have a homehost record +;; (if (not (pair? (remote-hh-dat runremote))) ;; not on homehost +;; (thread-sleep! 0.1) ;; since we shouldn't get here, delay a little +;; (remote-hh-dat-set! runremote (common:get-homehost))) +;; +;; ;;(print "BB> readonly-mode is "readonly-mode" dbfile is "dbfile) +;; (cond +;; ;; give up if more than 15 attempts +;; ((> attemptnum 15) +;; (debug:print 0 *default-log-port* "ERROR: 15 tries to start/connect to server. Giving up.") +;; (exit 1)) +;; +;; ;; readonly mode, read request- handle it - case 2 +;; ((and readonly-mode +;; (member cmd api:read-only-queries)) +;; (mutex-unlock! *rmt-mutex*) +;; (debug:print-info 12 *default-log-port* "rmt:send-receive, case 2") +;; (rmt:open-qry-close-locally cmd 0 params) +;; ) +;; +;; ;; readonly mode, write request. Do nothing, return #f +;; (readonly-mode (extras-readonly-mode *rmt-mutex* *default-log-port* cmd params)) +;; +;; ;; This block was for pre-emptively resetting the connection if there had been no communication for some time. +;; ;; I don't think it adds any value. If the server is not there, just fail and start a new connection. +;; ;; also, the expire-time calculation might not be correct. We want, time-since-last-server-access > (server:get-timeout) +;; ;; +;; ;; reset the connection if it has been unused too long +;; ((and runremote +;; (remote-conndat runremote) +;; (> (current-seconds) ;; if it has been more than server-timeout seconds since last contact, close this connection and start a new on +;; (+ (http-transport:server-dat-get-last-access (remote-conndat runremote)) +;; (remote-server-timeout runremote)))) +;; (debug:print-info 0 *default-log-port* "Connection to " (remote-server-url runremote) " expired due to no accesses, forcing new connection.") +;; (http-transport:close-connections area-dat: runremote) +;; (remote-conndat-set! runremote #f) ;; invalidate the connection, thus forcing a new connection. +;; (mutex-unlock! *rmt-mutex*) +;; (rmt:send-receive cmd rid params attemptnum: attemptnum)) +;; +;; ;; on homehost and this is a read +;; ((and (not (remote-force-server runremote)) ;; honor forced use of server, i.e. server NOT required +;; (cdr (remote-hh-dat runremote)) ;; on homehost +;; (member cmd api:read-only-queries)) ;; this is a read +;; (mutex-unlock! *rmt-mutex*) +;; (debug:print-info 12 *default-log-port* "rmt:send-receive, case 5") +;; (rmt:open-qry-close-locally cmd 0 params)) +;; +;; ;; on homehost and this is a write, we already have a server, but server has died +;; ((and (cdr (remote-hh-dat runremote)) ;; on homehost +;; (not (member cmd api:read-only-queries)) ;; this is a write +;; (remote-server-url runremote) ;; have a server +;; (not (server:ping (remote-server-url runremote)))) ;; server has died. NOTE: this is not a cheap call! Need better approach. +;; (set! *runremote* (make-remote)) +;; (remote-force-server-set! runremote (common:force-server?)) +;; (mutex-unlock! *rmt-mutex*) +;; (debug:print-info 12 *default-log-port* "rmt:send-receive, case 6") +;; (rmt:send-receive cmd rid params attemptnum: attemptnum)) +;; +;; ;; on homehost and this is a write, we already have a server +;; ((and (not (remote-force-server runremote)) ;; honor forced use of server, i.e. server NOT required +;; (cdr (remote-hh-dat runremote)) ;; on homehost +;; (not (member cmd api:read-only-queries)) ;; this is a write +;; (remote-server-url runremote)) ;; have a server +;; (mutex-unlock! *rmt-mutex*) +;; (debug:print-info 12 *default-log-port* "rmt:send-receive, case 4.1") +;; (rmt:open-qry-close-locally cmd 0 params)) +;; +;; ;; on homehost, no server contact made and this is a write, passively start a server +;; ((and (not (remote-force-server runremote)) ;; honor forced use of server, i.e. server NOT required +;; (cdr (remote-hh-dat runremote)) ;; have homehost +;; (not (remote-server-url runremote)) ;; no connection yet +;; (not (member cmd api:read-only-queries))) ;; not a read-only query +;; (debug:print-info 12 *default-log-port* "rmt:send-receive, case 8") +;; (let ((server-url (server:check-if-running *toppath*))) ;; (server:read-dotserver->url *toppath*))) ;; (server:check-if-running *toppath*))) ;; Do NOT want to run server:check-if-running - very expensive to do for every write call +;; (if server-url +;; (remote-server-url-set! runremote server-url) ;; the string can be consumed by the client setup if needed +;; (if (common:force-server?) +;; (server:start-and-wait *toppath*) +;; (server:kind-run *toppath*)))) +;; (remote-force-server-set! runremote (common:force-server?)) +;; (mutex-unlock! *rmt-mutex*) +;; (debug:print-info 12 *default-log-port* "rmt:send-receive, case 8.1") +;; (rmt:open-qry-close-locally cmd 0 params)) +;; +;; ((or (and (remote-force-server runremote) ;; we are forcing a server and don't yet have a connection to one +;; (not (remote-conndat runremote))) +;; (and (not (cdr (remote-hh-dat runremote))) ;; not on a homehost +;; (not (remote-conndat runremote)))) ;; and no connection +;; (debug:print-info 12 *default-log-port* "rmt:send-receive, case 9, hh-dat: " (remote-hh-dat runremote) " conndat: " (remote-conndat runremote)) +;; (mutex-unlock! *rmt-mutex*) +;; (if (not (server:check-if-running *toppath*)) ;; who knows, maybe one has started up? +;; (server:start-and-wait *toppath*)) +;; (remote-conndat-set! runremote (rmt:get-connection-info *toppath*)) ;; calls client:setup which calls client:setup-http +;; (rmt:send-receive cmd rid params attemptnum: attemptnum)) ;; TODO: add back-off timeout as +;; +;; ;; all set up if get this far, dispatch the query +;; ((and (not (remote-force-server runremote)) +;; (cdr (remote-hh-dat runremote))) ;; we are on homehost +;; (mutex-unlock! *rmt-mutex*) +;; (debug:print-info 12 *default-log-port* "rmt:send-receive, case 10") +;; (rmt:open-qry-close-locally cmd (if rid rid 0) params)) +;; +;; ;; not on homehost, do server query +;; (else (extras-case-11 *default-log-port* runremote cmd params attemptnum rid))))) + +;; bunch of small functions factored out of send-receive to make debug easier +;; + +#;(define (extras-case-11 *default-log-port* runremote cmd params attemptnum rid) + ;; (mutex-unlock! *rmt-mutex*) + (debug:print-info 12 *default-log-port* "rmt:send-receive, case 9") + ;; (mutex-lock! *rmt-mutex*) + (let* ((conninfo (remote-conndat runremote)) + (dat (case (remote-transport runremote) + ((http) (condition-case ;; handling here has + ;; caused a lot of + ;; problems. However it + ;; is needed to deal with + ;; attemtped + ;; communication to + ;; servers that have gone + ;; away + #;(http-transport:client-api-send-receive 0 conninfo cmd params) + ((commfail)(vector #f "communications fail")) + ((exn)(vector #f "other fail" (print-call-chain))))) + (else + (debug:print 0 *default-log-port* "ERROR: transport " (remote-transport runremote) " not supported") + (exit)))) + (success (if (vector? dat) (vector-ref dat 0) #f)) + (res (if (vector? dat) (vector-ref dat 1) #f))) + (if (and (vector? conninfo) (< 5 (vector-length conninfo))) + #t #;(http-transport:server-dat-update-last-access conninfo) ;; refresh access time + (begin + (debug:print 0 *default-log-port* "INFO: Should not get here! conninfo=" conninfo) + (set! conninfo #f) + (remote-conndat-set! *runremote* #f) ;; NOTE: *runremote* is global copy of runremote. Purpose: factor out global. + #;(http-transport:close-connections area-dat: runremote))) + (debug:print-info 13 *default-log-port* "rmt:send-receive, case 9. conninfo=" conninfo " dat=" dat " runremote = " runremote) + (mutex-unlock! *rmt-mutex*) + (if success ;; success only tells us that the transport was + ;; successful, have to examine the data to see if + ;; there was a detected issue at the other end + (extras-transport-succeded *default-log-port* *rmt-mutex* attemptnum runremote res params rid cmd) + (extras-transport-failed *default-log-port* *rmt-mutex* attemptnum runremote cmd rid params) + ))) + +;; (define (rmt:update-db-stats run-id rawcmd params duration) +;; (mutex-lock! *db-stats-mutex*) +;; (handle-exceptions +;; exn +;; (begin +;; (debug:print 0 *default-log-port* "WARNING: stats collection failed in update-db-stats") +;; (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) +;; (print "exn=" (condition->list exn)) +;; #f) ;; if this fails we don't care, it is just stats +;; (let* ((cmd (conc "run-id=" run-id " " (if (eq? rawcmd 'general-call) (car params) rawcmd))) +;; (stat-vec (hash-table-ref/default *db-stats* cmd #f))) +;; (if (not (vector? stat-vec)) +;; (let ((newvec (vector 0 0))) +;; (hash-table-set! *db-stats* cmd newvec) +;; (set! stat-vec newvec))) +;; (vector-set! stat-vec 0 (+ (vector-ref stat-vec 0) 1)) +;; (vector-set! stat-vec 1 (+ (vector-ref stat-vec 1) duration)))) +;; (mutex-unlock! *db-stats-mutex*)) + +(define (rmt:print-db-stats) + (let ((fmtstr "~40a~7-d~9-d~20,2-f")) ;; "~20,2-f" + (debug:print 18 *default-log-port* "DB Stats\n========") + (debug:print 18 *default-log-port* (format #f "~40a~8a~10a~10a" "Cmd" "Count" "TotTime" "Avg")) + (for-each (lambda (cmd) + (let ((cmd-dat (hash-table-ref *db-stats* cmd))) + (debug:print 18 *default-log-port* (format #f fmtstr cmd (vector-ref cmd-dat 0) (vector-ref cmd-dat 1) (/ (vector-ref cmd-dat 1)(vector-ref cmd-dat 0)))))) + (sort (hash-table-keys *db-stats*) + (lambda (a b) + (> (vector-ref (hash-table-ref *db-stats* a) 0) + (vector-ref (hash-table-ref *db-stats* b) 0))))))) + +(define (rmt:get-max-query-average run-id) + (mutex-lock! *db-stats-mutex*) + (let* ((runkey (conc "run-id=" run-id " ")) + (cmds (filter (lambda (x) + (substring-index runkey x)) + (hash-table-keys *db-stats*))) + (res (if (null? cmds) + (cons 'none 0) + (let loop ((cmd (car cmds)) + (tal (cdr cmds)) + (max-cmd (car cmds)) + (res 0)) + (let* ((cmd-dat (hash-table-ref *db-stats* cmd)) + (tot (vector-ref cmd-dat 0)) + (curravg (/ (vector-ref cmd-dat 1) (vector-ref cmd-dat 0))) ;; count is never zero by construction + (currmax (max res curravg)) + (newmax-cmd (if (> curravg res) cmd max-cmd))) + (if (null? tal) + (if (> tot 10) + (cons newmax-cmd currmax) + (cons 'none 0)) + (loop (car tal)(cdr tal) newmax-cmd currmax))))))) + (mutex-unlock! *db-stats-mutex*) + res)) + +(define (rmt:open-qry-close-locally cmd run-id params #!key (remretries 5)) + (let* ((qry-is-write (not (member cmd api:read-only-queries))) + (db-file-path (db:dbfile-path)) ;; 0)) + (dbstruct-local (db:setup #t)) ;; make-dbr:dbstruct path: dbdir local: #t))) + (read-only (not (file-write-access? db-file-path))) + (start (current-milliseconds)) + (resdat (if (not (and read-only qry-is-write)) + (let ((v (api:execute-requests dbstruct-local (vector (symbol->string cmd) params)))) + (handle-exceptions ;; there has been a long history of receiving strange errors from values returned by the client when things go wrong.. + exn ;; This is an attempt to detect that situation and recover gracefully + (begin + (debug:print 0 *default-log-port* "ERROR: bad data from server " v " message: " ((condition-property-accessor 'exn 'message) exn)) + (vector #t '())) ;; should always get a vector but if something goes wrong return a dummy + (if (and (vector? v) + (> (vector-length v) 1)) + (let ((newvec (vector (vector-ref v 0)(vector-ref v 1)))) + newvec) ;; by copying the vector while inside the error handler we should force the detection of a corrupted record + (vector #t '())))) ;; we could also check that the returned types are valid + (vector #t '()))) + (success (vector-ref resdat 0)) + (res (vector-ref resdat 1)) + (duration (- (current-milliseconds) start))) + (if (and read-only qry-is-write) + (debug:print 0 *default-log-port* "ERROR: attempt to write to read-only database ignored. cmd=" cmd)) + (if (not success) + (if (> remretries 0) + (begin + (debug:print-error 0 *default-log-port* "local query failed. Trying again.") + (thread-sleep! (/ (random 5000) 1000)) ;; some random delay + (rmt:open-qry-close-locally cmd run-id params remretries: (- remretries 1))) + (begin + (debug:print-error 0 *default-log-port* "too many retries in rmt:open-qry-close-locally, giving up") + #f)) + (begin + ;; (rmt:update-db-stats run-id cmd params duration) + ;; mark this run as dirty if this was a write, the watchdog is responsible for syncing it + (if qry-is-write + (let ((start-time (current-seconds))) + (mutex-lock! *db-multi-sync-mutex*) + (set! *db-last-access* start-time) ;; THIS IS PROBABLY USELESS? (we are on a client) + (mutex-unlock! *db-multi-sync-mutex*))))) + res)) + +(define (rmt:send-receive-no-auto-client-setup connection-info cmd run-id params) + (let* ((run-id (if run-id run-id 0)) + (res (handle-exceptions + exn + #f + #;(http-transport:client-api-send-receive run-id connection-info cmd params)))) + (if (and res (vector-ref res 0)) + (vector-ref res 1) ;;; YES!! THIS IS CORRECT!! CHANGE IT HERE, THEN CHANGE rmt:send-receive ALSO!!! + #f))) + +;; ;; Wrap json library for strings (why the ports crap in the first place?) +;; (define (rmt:dat->json-str dat) +;; (with-output-to-string +;; (lambda () +;; (json-write dat)))) +;; +;; (define (rmt:json-str->dat json-str) +;; (with-input-from-string json-str +;; (lambda () +;; (json-read)))) + +;;====================================================================== +;; +;; A C T U A L A P I C A L L S +;; +;;====================================================================== + +;;====================================================================== +;; S E R V E R +;;====================================================================== + +(define (rmt:kill-server run-id) + (rmt:send-receive 'kill-server run-id (list run-id))) + +(define (rmt:start-server run-id) + (rmt:send-receive 'start-server 0 (list run-id))) + +;;====================================================================== +;; M I S C +;;====================================================================== + +(define (rmt:login run-id) + (rmt:send-receive 'login run-id (list *toppath* megatest-version (common:client-signature)))) + +;; This login does no retries under the hood - it acts a bit like a ping. +;; Deprecated for nmsg-transport. +;; +(define (rmt:login-no-auto-client-setup connection-info) + (rmt:send-receive-no-auto-client-setup connection-info 'login 0 (list *toppath* megatest-version + (common:client-signature)))) + +;; hand off a call to one of the db:queries statements +;; added run-id to make looking up the correct db possible +;; +(define (rmt:general-call stmtname run-id . params) + (rmt:send-receive 'general-call run-id (append (list stmtname run-id) params))) + + +;; given a hostname, return a pair of cpu load and update time representing latest intelligence from tests running on that host +(define (rmt:get-latest-host-load hostname) + (rmt:send-receive 'get-latest-host-load 0 (list hostname))) + +;; (define (rmt:sync-inmem->db run-id) +;; (rmt:send-receive 'sync-inmem->db run-id '())) + +(define (rmt:sdb-qry qry val run-id) + ;; add caching if qry is 'getid or 'getstr + (rmt:send-receive 'sdb-qry run-id (list qry val))) + +;; NOT COMPLETED +(define (rmt:runtests user run-id testpatt params) + (rmt:send-receive 'runtests run-id testpatt)) + +(define (rmt:get-run-record-ids target run keynames test-patt) + (rmt:send-receive 'get-run-record-ids #f (list target run keynames test-patt))) + +(define (rmt:get-changed-record-ids since-time) + (rmt:send-receive 'get-changed-record-ids #f (list since-time)) ) + +;;====================================================================== +;; T E S T M E T A +;;====================================================================== + +(define (rmt:get-tests-tags) + (rmt:send-receive 'get-tests-tags #f '())) + +;;====================================================================== +;; K E Y S +;;====================================================================== + +;; These require run-id because the values come from the run! +;; +(define (rmt:get-key-val-pairs run-id) + (rmt:send-receive 'get-key-val-pairs run-id (list run-id))) + +(define (rmt:get-keys) + ;; (if *db-keys* *db-keys* + ;; (let ((res + (rmt:send-receive 'get-keys #f '())) + ;; )(set! *db-keys* res) + ;; res))) + +(define (rmt:get-keys-write) ;; dummy query to force server start + ;; (let ((res + (rmt:send-receive 'get-keys-write #f '())) ;; ) + ;; (set! *db-keys* res) + ;; res)) + +;; we don't reuse run-id's (except possibly *after* a db cleanup) so it is safe +;; to cache the resuls in a hash +;; +(define (rmt:get-key-vals run-id) + ;; (or (hash-table-ref/default *keyvals* run-id #f) + ;; (let ((res + (rmt:send-receive 'get-key-vals #f (list run-id))) + ;; (hash-table-set! *keyvals* run-id res) + ;; res)) ;; ) + +(define (rmt:get-targets) + (rmt:send-receive 'get-targets #f '())) + +(define (rmt:get-target run-id) + (rmt:send-receive 'get-target run-id (list run-id))) + +(define (rmt:get-run-times runpatt targetpatt) + (rmt:send-receive 'get-run-times #f (list runpatt targetpatt ))) + + +;;====================================================================== +;; T E S T S +;;====================================================================== + +;; Just some syntatic sugar +(define (rmt:register-test run-id test-name item-path) + (rmt:general-call 'register-test run-id run-id test-name item-path)) + +(define (rmt:get-test-id run-id testname item-path) + (rmt:send-receive 'get-test-id run-id (list run-id testname item-path))) + +;; run-id is NOT used +;; +(define (rmt:get-test-info-by-id run-id test-id) + (if (number? test-id) + (rmt:send-receive 'get-test-info-by-id run-id (list run-id test-id)) + (begin + (debug:print 0 *default-log-port* "WARNING: Bad data handed to rmt:get-test-info-by-id run-id=" run-id ", test-id=" test-id) + (print-call-chain (current-error-port)) + #f))) + +(define (rmt:test-get-rundir-from-test-id run-id test-id) + (rmt:send-receive 'test-get-rundir-from-test-id run-id (list run-id test-id))) + +#;(define (rmt:open-test-db-by-test-id run-id test-id #!key (work-area #f)) + (let* ((test-path (if (string? work-area) + work-area + (rmt:test-get-rundir-from-test-id run-id test-id)))) + (debug:print 3 *default-log-port* "TEST PATH: " test-path) + (open-test-db test-path))) + +;; WARNING: This currently bypasses the transaction wrapped writes system +(define (rmt:test-set-state-status-by-id run-id test-id newstate newstatus newcomment) + (rmt:send-receive 'test-set-state-status-by-id run-id (list run-id test-id newstate newstatus newcomment))) + +(define (rmt:set-tests-state-status run-id testnames currstate currstatus newstate newstatus) + (rmt:send-receive 'set-tests-state-status run-id (list run-id testnames currstate currstatus newstate newstatus))) + +(define (rmt:get-tests-for-run run-id testpatt states statuses offset limit not-in sort-by sort-order qryvals last-update mode) + ;; (if (number? run-id) + (rmt:send-receive 'get-tests-for-run run-id (list run-id testpatt states statuses offset limit not-in sort-by sort-order qryvals last-update mode))) + ;; (begin + ;; (debug:print-error 0 *default-log-port* "rmt:get-tests-for-run called with bad run-id=" run-id) + ;; (print-call-chain (current-error-port)) + ;; '()))) + +;; get stuff via synchash +(define (rmt:synchash-get run-id proc synckey keynum params) + (rmt:send-receive 'synchash-get run-id (list run-id proc synckey keynum params))) + +(define (rmt:get-tests-for-run-mindata run-id testpatt states status not-in) + (rmt:send-receive 'get-tests-for-run-mindata run-id (list run-id testpatt states status not-in))) + +;; IDEA: Threadify these - they spend a lot of time waiting ... +;; +(define (rmt:get-tests-for-runs-mindata run-ids testpatt states status not-in) + (let ((multi-run-mutex (make-mutex)) + (run-id-list (if run-ids + run-ids + (rmt:get-all-run-ids))) + (result '())) + (if (null? run-id-list) + '() + (let loop ((hed (car run-id-list)) + (tal (cdr run-id-list)) + (threads '())) + (if (> (length threads) 5) + (loop hed tal (filter (lambda (th)(not (member (thread-state th) '(terminated dead)))) threads)) + (let* ((newthread (make-thread + (lambda () + (let ((res (rmt:send-receive 'get-tests-for-run-mindata hed (list hed testpatt states status not-in)))) + (if (list? res) + (begin + (mutex-lock! multi-run-mutex) + (set! result (append result res)) + (mutex-unlock! multi-run-mutex)) + (debug:print-error 0 *default-log-port* "get-tests-for-run-mindata failed for run-id " hed ", testpatt " testpatt ", states " states ", status " status ", not-in " not-in)))) + (conc "multi-run-thread for run-id " hed))) + (newthreads (cons newthread threads))) + (thread-start! newthread) + (thread-sleep! 0.05) ;; give that thread some time to start + (if (null? tal) + newthreads + (loop (car tal)(cdr tal) newthreads)))))) + result)) + +;; ;; IDEA: Threadify these - they spend a lot of time waiting ... +;; ;; +;; (define (rmt:get-tests-for-runs-mindata run-ids testpatt states status not-in) +;; (let ((run-id-list (if run-ids +;; run-ids +;; (rmt:get-all-run-ids)))) +;; (apply append (map (lambda (run-id) +;; (rmt:send-receive 'get-tests-for-run-mindata run-id (list run-ids testpatt states status not-in))) +;; run-id-list)))) + +(define (rmt:delete-test-records run-id test-id) + (rmt:send-receive 'delete-test-records run-id (list run-id test-id))) + +;; This is not needed as test steps are deleted on test delete call +;; +;; (define (rmt:delete-test-step-records run-id test-id) +;; (rmt:send-receive 'delete-test-step-records run-id (list run-id test-id))) + +(define (rmt:test-set-state-status run-id test-id state status msg) + (rmt:send-receive 'test-set-state-status run-id (list run-id test-id state status msg))) + +(define (rmt:test-toplevel-num-items run-id test-name) + (rmt:send-receive 'test-toplevel-num-items run-id (list run-id test-name))) + +;; (define (rmt:get-previous-test-run-record run-id test-name item-path) +;; (rmt:send-receive 'get-previous-test-run-record run-id (list run-id test-name item-path))) + +(define (rmt:get-matching-previous-test-run-records run-id test-name item-path) + (rmt:send-receive 'get-matching-previous-test-run-records run-id (list run-id test-name item-path))) + +(define (rmt:test-get-logfile-info run-id test-name) + (rmt:send-receive 'test-get-logfile-info run-id (list run-id test-name))) + +(define (rmt:test-get-records-for-index-file run-id test-name) + (rmt:send-receive 'test-get-records-for-index-file run-id (list run-id test-name))) + +(define (rmt:get-testinfo-state-status run-id test-id) + (rmt:send-receive 'get-testinfo-state-status run-id (list run-id test-id))) + +(define (rmt:test-set-log! run-id test-id logf) + (if (string? logf)(rmt:general-call 'test-set-log run-id logf test-id))) + +(define (rmt:test-set-top-process-pid run-id test-id pid) + (rmt:send-receive 'test-set-top-process-pid run-id (list run-id test-id pid))) + +(define (rmt:test-get-top-process-pid run-id test-id) + (rmt:send-receive 'test-get-top-process-pid run-id (list run-id test-id))) + +(define (rmt:get-run-ids-matching-target keynames target res runname testpatt statepatt statuspatt) + (rmt:send-receive 'get-run-ids-matching-target #f (list keynames target res runname testpatt statepatt statuspatt))) + +;; NOTE: This will open and access ALL run databases. +;; +(define (rmt:test-get-paths-matching-keynames-target-new keynames target res testpatt statepatt statuspatt runname) + (let ((run-ids (rmt:get-run-ids-matching-target keynames target res runname testpatt statepatt statuspatt))) + (apply append + (map (lambda (run-id) + (rmt:send-receive 'test-get-paths-matching-keynames-target-new run-id (list run-id keynames target res testpatt statepatt statuspatt runname))) + run-ids)))) + +;; (define (rmt:get-run-ids-matching keynames target res) +;; (rmt:send-receive #f 'get-run-ids-matching (list keynames target res))) + +(define (rmt:get-prereqs-not-met run-id waitons ref-test-name ref-item-path #!key (mode '(normal))(itemmaps #f)) + (rmt:send-receive 'get-prereqs-not-met run-id (list run-id waitons ref-test-name ref-item-path mode itemmaps))) + +(define (rmt:get-count-tests-running-for-run-id run-id) + (rmt:send-receive 'get-count-tests-running-for-run-id run-id (list run-id))) + +(define (rmt:get-not-completed-cnt run-id) + (rmt:send-receive 'get-not-completed-cnt run-id (list run-id))) + + +;; Statistical queries + +(define (rmt:get-count-tests-running run-id) + (rmt:send-receive 'get-count-tests-running run-id (list run-id))) + +(define (rmt:get-count-tests-running-for-testname run-id testname) + (rmt:send-receive 'get-count-tests-running-for-testname run-id (list run-id testname))) + +(define (rmt:get-count-tests-running-in-jobgroup run-id jobgroup) + (rmt:send-receive 'get-count-tests-running-in-jobgroup run-id (list run-id jobgroup))) + +;; state and status are extra hints not usually used in the calculation +;; +(define (rmt:set-state-status-and-roll-up-items run-id test-name item-path state status comment) + (rmt:send-receive 'set-state-status-and-roll-up-items run-id (list run-id test-name item-path state status comment))) + +(define (rmt:set-state-status-and-roll-up-run run-id state status) + (rmt:send-receive 'set-state-status-and-roll-up-run run-id (list run-id state status))) + + +(define (rmt:update-pass-fail-counts run-id test-name) + (rmt:general-call 'update-pass-fail-counts run-id test-name test-name test-name)) + +(define (rmt:top-test-set-per-pf-counts run-id test-name) + (rmt:send-receive 'top-test-set-per-pf-counts run-id (list run-id test-name))) + +(define (rmt:get-raw-run-stats run-id) + (rmt:send-receive 'get-raw-run-stats run-id (list run-id))) + +(define (rmt:get-test-times runname target) + (rmt:send-receive 'get-test-times #f (list runname target ))) + +;;====================================================================== +;; R U N S +;;====================================================================== + +(define (rmt:get-run-info run-id) + (rmt:send-receive 'get-run-info run-id (list run-id))) + +(define (rmt:get-num-runs runpatt) + (rmt:send-receive 'get-num-runs #f (list runpatt))) + +(define (rmt:get-runs-cnt-by-patt runpatt targetpatt keys) + (rmt:send-receive 'get-runs-cnt-by-patt #f (list runpatt targetpatt keys))) + +;; Use the special run-id == #f scenario here since there is no run yet +(define (rmt:register-run keyvals runname state status user contour) + (rmt:send-receive 'register-run #f (list keyvals runname state status user contour))) + +(define (rmt:get-run-name-from-id run-id) + (rmt:send-receive 'get-run-name-from-id run-id (list run-id))) + +(define (rmt:delete-run run-id) + (rmt:send-receive 'delete-run run-id (list run-id))) + +(define (rmt:update-run-stats run-id stats) + (rmt:send-receive 'update-run-stats #f (list run-id stats))) + +(define (rmt:delete-old-deleted-test-records) + (rmt:send-receive 'delete-old-deleted-test-records #f '())) + +(define (rmt:get-runs runpatt count offset keypatts) + (rmt:send-receive 'get-runs #f (list runpatt count offset keypatts))) + +(define (rmt:simple-get-runs runpatt count offset target) + (rmt:send-receive 'simple-get-runs #f (list runpatt count offset target))) + +(define (rmt:get-all-run-ids) + (rmt:send-receive 'get-all-run-ids #f '())) + +(define (rmt:get-prev-run-ids run-id) + (rmt:send-receive 'get-prev-run-ids #f (list run-id))) + +(define (rmt:lock/unlock-run run-id lock unlock user) + (rmt:send-receive 'lock/unlock-run #f (list run-id lock unlock user))) + +;; set/get status +(define (rmt:get-run-status run-id) + (rmt:send-receive 'get-run-status #f (list run-id))) + +(define (rmt:get-run-state run-id) + (rmt:send-receive 'get-run-state #f (list run-id))) + + +(define (rmt:set-run-status run-id run-status #!key (msg #f)) + (rmt:send-receive 'set-run-status #f (list run-id run-status msg))) + +(define (rmt:set-run-state-status run-id state status ) + (rmt:send-receive 'set-run-state-status #f (list run-id state status))) + + +(define (rmt:update-run-event_time run-id) + (rmt:send-receive 'update-run-event_time #f (list run-id))) + +(define (rmt:get-runs-by-patt keys runnamepatt targpatt offset limit fields last-runs-update #!key (sort-order "asc")) ;; fields of #f uses default + (rmt:send-receive 'get-runs-by-patt #f (list keys runnamepatt targpatt offset limit fields last-runs-update sort-order))) + +(define (rmt:find-and-mark-incomplete run-id ovr-deadtime) + ;; (if (rmt:send-receive 'have-incompletes? run-id (list run-id ovr-deadtime)) + (rmt:send-receive 'mark-incomplete run-id (list run-id ovr-deadtime))) ;; ) + +(define (rmt:get-main-run-stats run-id) + (rmt:send-receive 'get-main-run-stats #f (list run-id))) + +(define (rmt:get-var varname) + (rmt:send-receive 'get-var #f (list varname))) + +(define (rmt:del-var varname) + (rmt:send-receive 'del-var #f (list varname))) + +(define (rmt:set-var varname value) + (rmt:send-receive 'set-var #f (list varname value))) + +;;====================================================================== +;; M U L T I R U N Q U E R I E S +;;====================================================================== + +;; Need to move this to multi-run section and make associated changes +(define (rmt:find-and-mark-incomplete-all-runs #!key (ovr-deadtime #f)) + (let ((run-ids (rmt:get-all-run-ids))) + (for-each (lambda (run-id) + (rmt:find-and-mark-incomplete run-id ovr-deadtime)) + run-ids))) + +;; get the previous record for when this test was run where all keys match but runname +;; returns #f if no such test found, returns a single test record if found +;; +;; Run this at the client end since we have to connect to multiple run-id dbs +;; +(define (rmt:get-previous-test-run-record run-id test-name item-path) + (let* ((keyvals (rmt:get-key-val-pairs run-id)) + (keys (rmt:get-keys)) + (selstr (string-intersperse keys ",")) + (qrystr (string-intersperse (map (lambda (x)(conc x "=?")) keys) " AND "))) + (if (not keyvals) + #f + (let ((prev-run-ids (rmt:get-prev-run-ids run-id))) + ;; for each run starting with the most recent look to see if there is a matching test + ;; if found then return that matching test record + (debug:print 4 *default-log-port* "selstr: " selstr ", qrystr: " qrystr ", keyvals: " keyvals ", previous run ids found: " prev-run-ids) + (if (null? prev-run-ids) #f + (let loop ((hed (car prev-run-ids)) + (tal (cdr prev-run-ids))) + (let ((results (rmt:get-tests-for-run hed (conc test-name "/" item-path) '() '() ;; run-id testpatt states statuses + #f #f #f ;; offset limit not-in hide/not-hide + #f #f #f #f 'normal))) ;; sort-by sort-order qryvals last-update mode + (debug:print 4 *default-log-port* "Got tests for run-id " run-id ", test-name " test-name ", item-path " item-path ": " results) + (if (and (null? results) + (not (null? tal))) + (loop (car tal)(cdr tal)) + (if (null? results) #f + (car results)))))))))) + +(define (rmt:get-run-stats) + (rmt:send-receive 'get-run-stats #f '())) + +;;====================================================================== +;; S T E P S +;;====================================================================== + +;; Getting steps is more complicated. +;; +;; If given work area +;; 1. Find the testdat.db file +;; 2. Open the testdat.db file and do the query +;; If not given the work area +;; 1. Do a remote call to get the test path +;; 2. Continue as above +;; +;;(define (rmt:get-steps-for-test run-id test-id) +;; (rmt:send-receive 'get-steps-data run-id (list test-id))) + +(define (rmt:teststep-set-status! run-id test-id teststep-name state-in status-in comment logfile) + (let* ((state (items:check-valid-items "state" state-in)) + (status (items:check-valid-items "status" status-in))) + (if (or (not state)(not status)) + (debug:print 3 *default-log-port* "WARNING: Invalid " (if status "status" "state") + " value \"" (if status state-in status-in) "\", update your validvalues section in megatest.config")) + (rmt:send-receive 'teststep-set-status! run-id (list run-id test-id teststep-name state-in status-in comment logfile)))) + + +(define (rmt:delete-steps-for-test! run-id test-id) + (rmt:send-receive 'delete-steps-for-test! run-id (list run-id test-id))) + +(define (rmt:get-steps-for-test run-id test-id) + (rmt:send-receive 'get-steps-for-test run-id (list run-id test-id))) + +(define (rmt:get-steps-info-by-id test-step-id) + (rmt:send-receive 'get-steps-info-by-id #f (list test-step-id))) + +;;====================================================================== +;; T E S T D A T A +;;====================================================================== + +(define (rmt:read-test-data run-id test-id categorypatt #!key (work-area #f)) + (rmt:send-receive 'read-test-data run-id (list run-id test-id categorypatt))) +(define (rmt:read-test-data* run-id test-id categorypatt varpatt #!key (work-area #f)) + (rmt:send-receive 'read-test-data* run-id (list run-id test-id categorypatt varpatt))) + +(define (rmt:get-data-info-by-id test-data-id) + (rmt:send-receive 'get-data-info-by-id #f (list test-data-id))) + +(define (rmt:testmeta-add-record testname) + (rmt:send-receive 'testmeta-add-record #f (list testname))) + +(define (rmt:testmeta-get-record testname) + (rmt:send-receive 'testmeta-get-record #f (list testname))) + +(define (rmt:testmeta-update-field test-name fld val) + (rmt:send-receive 'testmeta-update-field #f (list test-name fld val))) + +(define (rmt:test-data-rollup run-id test-id status) + (rmt:send-receive 'test-data-rollup run-id (list run-id test-id status))) + +(define (rmt:csv->test-data run-id test-id csvdata) + (rmt:send-receive 'csv->test-data run-id (list run-id test-id csvdata))) + +;;====================================================================== +;; T A S K S +;;====================================================================== + +(define (rmt:tasks-find-task-queue-records target run-name test-patt state-patt action-patt) + (rmt:send-receive 'find-task-queue-records #f (list target run-name test-patt state-patt action-patt))) + +(define (rmt:tasks-add action owner target runname testpatt params) + (rmt:send-receive 'tasks-add #f (list action owner target runname testpatt params))) + +(define (rmt:tasks-set-state-given-param-key param-key new-state) + (rmt:send-receive 'tasks-set-state-given-param-key #f (list param-key new-state))) + +(define (rmt:tasks-get-last target runname) + (rmt:send-receive 'tasks-get-last #f (list target runname))) + +;;====================================================================== +;; N O S Y N C D B +;;====================================================================== + +(define (rmt:no-sync-set var val) + (rmt:send-receive 'no-sync-set #f `(,var ,val))) + +(define (rmt:no-sync-get/default var default) + (rmt:send-receive 'no-sync-get/default #f `(,var ,default))) + +(define (rmt:no-sync-del! var) + (rmt:send-receive 'no-sync-del! #f `(,var))) + +(define (rmt:no-sync-get-lock keyname) + (rmt:send-receive 'no-sync-get-lock #f `(,keyname))) + +;;====================================================================== +;; A R C H I V E S +;;====================================================================== + +(define (rmt:archive-get-allocations testname itempath dneeded) + (rmt:send-receive 'archive-get-allocations #f (list testname itempath dneeded))) + +(define (rmt:archive-register-block-name bdisk-id archive-path) + (rmt:send-receive 'archive-register-block-name #f (list bdisk-id archive-path))) + +(define (rmt:archive-allocate-testsuite/area-to-block block-id testsuite-name areakey) + (rmt:send-receive 'archive-allocate-test-to-block #f (list block-id testsuite-name areakey))) + +(define (rmt:archive-register-disk bdisk-name bdisk-path df) + (rmt:send-receive 'archive-register-disk #f (list bdisk-name bdisk-path df))) + +(define (rmt:test-set-archive-block-id run-id test-id archive-block-id) + (rmt:send-receive 'test-set-archive-block-id run-id (list run-id test-id archive-block-id))) + +(define (rmt:test-get-archive-block-info archive-block-id) + (rmt:send-receive 'test-get-archive-block-info #f (list archive-block-id))) + +#;(set-functions rmt:send-receive remote-server-url-set! + http-transport:close-connections remote-conndat-set! + debug:print debug:print-info + remote-ro-mode remote-ro-mode-set! + remote-ro-mode-checked-set! remote-ro-mode-checked) + +(define (rmtmod:calc-ro-mode runremote *toppath*) + (if (and runremote + (remote-ro-mode-checked runremote)) + (remote-ro-mode runremote) + (let* ((dbfile (conc *toppath* "/megatest.db")) + (ro-mode (not (file-write-access? dbfile)))) ;; TODO: use dbstruct or runremote to figure this out in future + (if runremote + (begin + (remote-ro-mode-set! runremote ro-mode) + (remote-ro-mode-checked-set! runremote #t) + ro-mode) + ro-mode)))) + +(define (extras-readonly-mode rmt-mutex log-port cmd params) + (mutex-unlock! rmt-mutex) + (debug:print-info 12 log-port "rmt:send-receive, case 3") + (debug:print 0 log-port "WARNING: write transaction requested on a readonly area. cmd="cmd" params="params) + #f) + +(define (extras-transport-failed *default-log-port* *rmt-mutex* attemptnum runremote cmd rid params) + (debug:print 0 *default-log-port* "WARNING: communication failed. Trying again, try num: " attemptnum) + (mutex-lock! *rmt-mutex*) + (remote-conndat-set! runremote #f) + #;(http-transport:close-connections area-dat: runremote) + (remote-server-url-set! runremote #f) + (mutex-unlock! *rmt-mutex*) + (debug:print-info 12 *default-log-port* "rmt:send-receive, case 9.1") + (rmt:send-receive cmd rid params attemptnum: (+ attemptnum 1))) + +(define (extras-transport-succeded *default-log-port* *rmt-mutex* attemptnum runremote res params rid cmd) + (if (and (vector? res) + (eq? (vector-length res) 2) + (eq? (vector-ref res 1) 'overloaded)) ;; since we are + ;; looking at the + ;; data to carry the + ;; error we'll use a + ;; fairly obtuse + ;; combo to minimise + ;; the chances of + ;; some sort of + ;; collision. this + ;; is the case where + ;; the returned data + ;; is bad or the + ;; server is + ;; overloaded and we + ;; want to ease off + ;; the queries + (let ((wait-delay (+ attemptnum (* attemptnum 10)))) + (debug:print 0 *default-log-port* "WARNING: server is overloaded. Delaying " wait-delay " seconds and trying call again.") + (mutex-lock! *rmt-mutex*) + #;(http-transport:close-connections area-dat: runremote) + (set! *runremote* #f) ;; force starting over + (mutex-unlock! *rmt-mutex*) + (thread-sleep! wait-delay) + (rmt:send-receive cmd rid params attemptnum: (+ attemptnum 1))) + res)) ;; All good, return res + ;; (include "common_records.scm") ) Index: runconfig-inc.scm ================================================================== --- runconfig-inc.scm +++ runconfig-inc.scm @@ -13,189 +13,5 @@ ;; GNU General Public License for more details. ;; ;; You should have received a copy of the GNU General Public License ;; along with Megatest. If not, see . -;;====================================================================== -;; read a config file, loading only the section pertinent -;; to this run field1val/field2val/field3val ... -;;====================================================================== - -(define (runconfig:read fname target environ-patt) - (let ((ht (make-hash-table))) - (if target (hash-table-set! ht target '())) - (read-config fname ht #t environ-patt: environ-patt sections: (if target (list "default" target) #f)))) - -;; NB// to process a runconfig ensure to use environ-patt with target! -;; -(define (setup-env-defaults fname run-id already-seen keyvals #!key (environ-patt #f)(change-env #t)) - (let* ((keys (map car keyvals)) - (thekey (if keyvals - (string-intersperse (map (lambda (x)(if x x "-na-")) (map cadr keyvals)) "/") - (or (common:args-get-target) - (get-environment-variable "MT_TARGET") - (begin - (debug:print-error 0 *default-log-port* "setup-env-defaults called with no run-id or -target or -reqtarg") - "nothing matches this I hope")))) - ;; Why was system disallowed in the reading of the runconfigs file? - ;; NOTE: Should be setting env vars based on (target|default) - (confdat (runconfig:read fname thekey environ-patt)) - (whatfound (make-hash-table)) - (finaldat (make-hash-table)) - (sections (list "default" thekey))) - (if (not *target*)(set! *target* thekey)) ;; may save a db access or two but repeats db:get-target code - (debug:print 4 *default-log-port* "Using key=\"" thekey "\"") - - (if change-env - (for-each ;; NB// This can be simplified with new content of keyvals having all that is needed. - (lambda (keyval) - (safe-setenv (car keyval)(cadr keyval))) - keyvals)) - - (for-each - (lambda (section) - (let ((section-dat (hash-table-ref/default confdat section #f))) - (if section-dat - (for-each - (lambda (envvar) - (let ((val (cadr (assoc envvar section-dat)))) - (hash-table-set! whatfound section (+ (hash-table-ref/default whatfound section 0) 1)) - (if (and (string? envvar) - (string? val) - change-env) - (safe-setenv envvar val)) - (hash-table-set! finaldat envvar val))) - (map car section-dat))))) - sections) - (if already-seen - (begin - (debug:print 2 *default-log-port* "Key settings found in runconfigs.config:") - (for-each (lambda (fullkey) - (debug:print 2 *default-log-port* (format #f "~20a ~a\n" fullkey (hash-table-ref/default whatfound fullkey 0)))) - sections) - (debug:print 2 *default-log-port* "---") - (set! *already-seen-runconfig-info* #t))) - ;; finaldat ;; was returning this "finaldat" which would be good but conflicts with other uses - confdat - )) - -(define (set-run-config-vars run-id keyvals targ-from-db) - (push-directory *toppath*) ;; the push/pop doesn't appear to do anything ... - (let ((runconfigf (conc *toppath* "/runconfigs.config")) - (targ (or (common:args-get-target) - targ-from-db - (get-environment-variable "MT_TARGET")))) - (pop-directory) - (if (common:file-exists? runconfigf) - (setup-env-defaults runconfigf run-id #t keyvals - environ-patt: (conc "(default" - (if targ - (conc "|" targ ")") - ")"))) - (debug:print 0 *default-log-port* "WARNING: You do not have a run config file: " runconfigf)))) - -;; given (a (b c) d) return ((a b d)(a c d)) -;; NOTE: this feels like it has been done before - perhaps with items handling? -;; -(define (runconfig:combinations inlst) - (let loop ((hed (car inlst)) - (tal (cdr inlst)) - (res '())) - ;; (print "res: " res " hed: " hed) - (if (list? hed) - (let ((newres (if (null? res) ;; first time through convert incoming items to list of items - (map list hed) - (apply append - (map (lambda (r) ;; iterate over items in res - (map (lambda (h) ;; iterate over items in hed - (append r (list h))) - hed)) - res))))) - ;; (print "newres1: " newres) - (if (null? tal) - newres - (loop (car tal)(cdr tal) newres))) - (let ((newres (if (null? res) - (list (list hed)) - (map (lambda (r) - (append r (list hed))) - res)))) - ;; (print "newres2: " newres) - (if (null? tal) - newres - (loop (car tal)(cdr tal) newres)))))) - -;; multi-part expand -;; Given a/b,c,d/e,f return a/b/e a/b/f a/c/e a/c/f a/d/e a/d/f -;; -(define (runconfig:expand target) - (let* ((parts (map (lambda (x) - (string-split x ",")) - (string-split target "/")))) - (map (lambda (x) - (string-intersperse x "/")) - (runconfig:combinations parts)))) - -;; multi-target expansion -;; a/b/c/x,y,z a/b/d/x,y => a/b/c/x a/b/c/y a/b/c/z a/b/d/x a/b/d/y -;; -(define (runconfig:expand-target target-strs) - (delete-duplicates - (apply append (map runconfig:expand (string-split target-strs " "))))) - -#| - (if (null? target-strs) - '() - (let loop ((hed (car target-strs)) - (tal (cdr target-strs)) - (res '())) - ;; first break all parts into individual target patterns - (if (string-index hed " ") ;; this is a multi-target target - (let ((newres (append (string-split hed " ") res))) - (runconfig:expand-target newres)) - (if (string-index hed ",") ;; this is a multi-target where one or more parts are comma separated - -|# - -;; cache the runconfigs in $MT_LINKTREE/$MT_TARGET/$MT_RUNNAME/.runconfig -;; -(define (full-runconfigs-read) -;; in the envprocessing branch the below code replaces the further below code -;; (if (eq? *configstatus* 'fulldata) -;; *runconfigdat* -;; (begin -;; (launch:setup) -;; *runconfigdat*))) - - (let* ((rundir (if (and (getenv "MT_LINKTREE")(getenv "MT_TARGET")(getenv "MT_RUNNAME")) - (conc (getenv "MT_LINKTREE") "/" (getenv "MT_TARGET") "/" (getenv "MT_RUNNAME")) - #f)) - (cfgf (if rundir (conc rundir "/.runconfig." megatest-version "-" megatest-fossil-hash) #f))) - (if (and cfgf - (common:file-exists? cfgf) - (file-write-access? cfgf) - (common:use-cache?)) - (configf:read-alist cfgf) - (let* ((keys (rmt:get-keys)) - (target (common:args-get-target)) - (key-vals (if target (keys:target->keyval keys target) #f)) - (sections (if target (list "default" target) #f)) - (data (begin - (setenv "MT_RUN_AREA_HOME" *toppath*) - (if key-vals - (for-each (lambda (kt) - (setenv (car kt) (cadr kt))) - key-vals)) - ;; (read-config (conc *toppath* "/runconfigs.config") #f #t sections: sections)))) - (runconfig:read (conc *toppath* "/runconfigs.config") target #f)))) - (if (and rundir ;; have all needed variabless - (directory-exists? rundir) - (file-write-access? rundir)) - (begin - (if (not (common:in-running-test?)) - (configf:write-alist data cfgf)) - ;; force re-read of megatest.config - this resolves circular references between megatest.config - (launch:setup force-reread: #t) - ;; (launch:cache-config) ;; there are two independent config cache locations, turning this one off for now. MRW. - )) ;; we can safely cache megatest.config since we have a valid runconfig - data)))) - Index: runs-inc.scm ================================================================== --- runs-inc.scm +++ runs-inc.scm @@ -17,2592 +17,5 @@ ;; strftime('%m/%d/%Y %H:%M:%S','now','localtime') ;; use this struct to facilitate refactoring ;; - -(define (runs:get-mt-env-alist run-id runname target testname itempath) - ;;(bb-check-path msg: "runs:set-megatest-env-vars entry") - `(("MT_TEST_NAME" . ,testname) - - ("MT_ITEMPATH" . ,itempath) - - ("MT_TARGET" . ,target) - - ("MT_RUNNAME" . ,runname) - - ("MT_RUN_AREA_HOME" . ,*toppath*) - - ,@(let* ((link-tree (common:get-linktree))) ;; (configf:lookup *configdat* "setup" "linktree"))) - (if link-tree - (list (cons "MT_LINKTREE" link-tree) - - (cons "MT_TEST_RUN_DIR" - (conc link-tree "/" target "/" runname "/" testname - (if (and (string? itempath) (not (equal? itempath ""))) - (conc "/" itempath) - ""))) - ) - '())) - - ,@(map - (lambda (key) - (cons (car key) (cadr key))) - (keys:target->keyval (rmt:get-keys) target)) - - ,@(map (lambda (var) - (let ((val (configf:lookup *configdat* "env-override" var))) - (cons var val))) - (configf:section-vars *configdat* "env-override")))) - - - - - - -;; set up needed environment variables given a run-id and optionally a target, itempath etc. -;; -(define (runs:set-megatest-env-vars run-id #!key (inkeys #f)(inrunname #f)(inkeyvals #f)(intarget #f)(testname #f)(itempath #f)) - ;;(bb-check-path msg: "runs:set-megatest-env-vars entry") - (let* ((target (or intarget - (common:args-get-target) - (get-environment-variable "MT_TARGET"))) - (keys (if inkeys inkeys (rmt:get-keys))) - (keyvals (if inkeyvals inkeyvals (keys:target->keyval keys target))) - (vals (hash-table-ref/default *env-vars-by-run-id* run-id #f)) - (link-tree (common:get-linktree))) ;; (configf:lookup *configdat* "setup" "linktree"))) - (if testname (setenv "MT_TEST_NAME" testname)) - (if itempath (setenv "MT_ITEMPATH" itempath)) - - ;; get the info from the db and put it in the cache - (if link-tree - (setenv "MT_LINKTREE" link-tree) - (debug:print-error 0 *default-log-port* "linktree not set, should be set in megatest.config in [setup] section.")) - (if (not vals) - (let ((ht (make-hash-table))) - (hash-table-set! *env-vars-by-run-id* run-id ht) - (set! vals ht) - (for-each - (lambda (key) - (hash-table-set! vals (car key) (cadr key))) - keyvals))) - ;; from the cached data set the vars - - (hash-table-for-each - vals - (lambda (key val) - (debug:print 2 *default-log-port* "setenv " key " " val) - (safe-setenv key val))) - ;;(bb-check-path msg: "runs:set-megatest-env-vars block 1") - ;;(BB> "*env-vars-by-run-id*/runid("run-id" vals="(hash-table->alist vals)) - - (if (not (get-environment-variable "MT_TARGET"))(setenv "MT_TARGET" target)) - ;; we had a case where there was an exception generated by the hash-table-ref - ;; due to *configdat* being #f Adding a handle and exit - (let fatal-loop ((count 0)) - (handle-exceptions - exn - (let ((call-chain (get-call-chain)) - (msg ((condition-property-accessor 'exn 'message) exn))) - (if (< count 5) - (begin ;; this call is colliding, do some crude stuff to fix it. - (debug:print 0 *default-log-port* "ERROR: *configdat* was inaccessible! This should never happen. Retry #" count) - (launch:setup force-reread: #t) - (fatal-loop (+ count 1))) - (begin - (debug:print 0 *default-log-port* "FATAL: *configdat* was inaccessible! This should never happen. Retried " count " times. Message: " msg) - (debug:print 0 *default-log-port* "Call chain:") - (with-output-to-port *default-log-port* - - (lambda () - (print "*configdat* is >>"*configdat*"<<") - (pp *configdat*) - (pp call-chain))) - - (exit 1)))) - ;;(bb-check-path msg: "runs:set-megatest-env-vars block 1.5") - (when (or (not *configdat*) (not (hash-table? *configdat*))) - (debug:print 0 *default-log-port* "WARNING: *configdat* was inaccessible! This should never happen. Brute force reread.") - ;;(BB> "ERROR: *configdat* was inaccessible! This should never happen. Brute force reread.") - (thread-sleep! 2) ;; assuming nfs lag. - (launch:setup force-reread: #t)) - (alist->env-vars (hash-table-ref/default *configdat* "env-override" '())))) ;;;; environment is tainted HERE in this let block. - ;;(bb-check-path msg: "runs:set-megatest-env-vars block 2") - ;; Lets use this as an opportunity to put MT_RUNNAME in the environment - (let ((runname (if inrunname inrunname (rmt:get-run-name-from-id run-id)))) - (if runname - (setenv "MT_RUNNAME" runname) - (debug:print-error 0 *default-log-port* "no value for runname for id " run-id))) - (setenv "MT_RUN_AREA_HOME" *toppath*) - ;; if a testname and itempath are available set the remaining appropriate variables - (if testname (setenv "MT_TEST_NAME" testname)) - (if itempath (setenv "MT_ITEMPATH" itempath)) - ;;(bb-check-path msg: "runs:set-megatest-env-vars block 3") - (if (and testname link-tree) - (setenv "MT_TEST_RUN_DIR" (conc (getenv "MT_LINKTREE") "/" - (getenv "MT_TARGET") "/" - (getenv "MT_RUNNAME") "/" - (getenv "MT_TEST_NAME") - (if (and itempath - (not (equal? itempath ""))) - (conc "/" itempath) - "")))))) - -(define (runs:can-run-more-tests runsdat run-id jobgroup max-concurrent-jobs) - - ;; Take advantage of a good place to exit if running the one-pass methodology - (if (and (> (runs:dat-can-run-more-tests-count runsdat) 20) - (args:get-arg "-one-pass")) - (exit 0)) - - (thread-sleep! (cond ;; BB: check with Matt. Should this sleep move to cond clauses below where we determine we have too many jobs running rather than each time the and condition above is true (which seems like always)? - ((> (runs:dat-can-run-more-tests-count runsdat) 20) - (if (runs:lownoise "waiting on tasks" 60)(debug:print-info 2 *default-log-port* "waiting for tasks to complete, sleeping briefly ...")) - (configf:lookup-number *configdat* "setup" "inter-test-delay" default: 0.1) ;; was 2 - );; obviously haven't had any work to do for a while - (else 0))) - - (let* ((num-running (rmt:get-count-tests-running run-id)) - (num-running-in-jobgroup (rmt:get-count-tests-running-in-jobgroup run-id jobgroup)) - (job-group-limit (let ((jobg-count (configf:lookup *configdat* "jobgroups" jobgroup))) - (if (string? jobg-count) - (string->number jobg-count) - jobg-count)))) - (if (> (+ num-running num-running-in-jobgroup) 0) - (runs:inc-can-run-more-tests-count runsdat)) ;; (set! *runs:can-run-more-tests-count* (+ *runs:can-run-more-tests-count* 1))) - (if (not (eq? *last-num-running-tests* num-running)) - (begin - (debug:print 2 *default-log-port* "max-concurrent-jobs: " max-concurrent-jobs ", num-running: " num-running) - (set! *last-num-running-tests* num-running))) - (if (not (eq? 0 *globalexitstatus*)) - (list #f num-running num-running-in-jobgroup max-concurrent-jobs job-group-limit) - (let* ((can-not-run-more (cond - ;; if max-concurrent-jobs is set and the number running is greater - ;; than it then cannot run more jobs - ((and max-concurrent-jobs (>= num-running max-concurrent-jobs)) - (if (runs:lownoise "mcj msg" 60) - (debug:print 0 *default-log-port* "WARNING: Max running jobs exceeded, current number running: " num-running - ", max_concurrent_jobs: " max-concurrent-jobs)) - #t) - ;; if job-group-limit is set and number of jobs in the group is greater - ;; than the limit then cannot run more jobs of this kind - ((and job-group-limit - (>= num-running-in-jobgroup job-group-limit)) - (if (runs:lownoise (conc "maxjobgroup " jobgroup) 60) - (debug:print 1 *default-log-port* "WARNING: number of jobs " num-running-in-jobgroup - " in jobgroup \"" jobgroup "\" exceeds limit of " job-group-limit)) - #t) - (else #f)))) - (list (not can-not-run-more) num-running num-running-in-jobgroup max-concurrent-jobs job-group-limit))))) - -(define (runs:run-pre-hook run-id) - (let* ((run-pre-hook (configf:lookup *configdat* "runs" "pre-hook")) - (existing-tests (if run-pre-hook - (rmt:get-tests-for-run run-id "%" '() '() ;; run-id testpatt states statuses - #f #f ;; offset limit - #f ;; not-in - #f ;; sort-by - #f ;; sort-order - #f ;; get full data (not 'shortlist) - 0 ;; (runs:gendat-inc-results-last-update *runs:general-data*) ;; last update time - 'dashboard) - '())) - (log-dir (conc *toppath* "/logs")) - (log-file (conc "pre-hook-" (string-translate (getenv "MT_TARGET") "/" "-") "-" (getenv "MT_RUNNAME") ".log")) - (full-log-fname (conc log-dir "/" log-file))) - (if run-pre-hook - (if (null? existing-tests) - (let* ((use-log-dir (if (not (directory-exists? log-dir)) - (handle-exceptions - exn - (begin - (debug:print 0 *default-log-port* "WARNING: Failed to create " log-dir) - #f) - (create-directory log-dir #t) - #t) - #t)) - (start-time (current-seconds)) - (actual-logf (if use-log-dir full-log-fname log-file))) - (handle-exceptions - exn - (begin - (print-call-chain *default-log-port*) - (debug:print 0 *default-log-port* "Message: " ((condition-property-accessor 'exn 'message) exn)) - (debug:print 0 *default-log-port* "ERROR: failed to run pre-hook " run-pre-hook ", check the log " log-file)) - (debug:print-info 0 *default-log-port* "running run-pre-hook: \"" run-pre-hook "\", log is " actual-logf) - (system (conc run-pre-hook " >> " actual-logf " 2>&1")) - (debug:print-info 0 *default-log-port* "pre-hook \"" run-pre-hook "\" took " (- (current-seconds) start-time) " seconds to run."))) - (debug:print 0 *default-log-port* "Skipping pre-hook call \"" run-pre-hook "\" as there are existing tests for this run."))))) - -(define (runs:run-post-hook run-id) - (let* ((run-post-hook (configf:lookup *configdat* "runs" "post-hook")) - (existing-tests (if run-post-hook - (rmt:get-tests-for-run run-id "%" '() '() ;; run-id testpatt states statuses - #f #f ;; offset limit - #f ;; not-in - #f ;; sort-by - #f ;; sort-order - #f ;; get full data (not 'shortlist) - 0 ;; (runs:gendat-inc-results-last-update *runs:general-data*) ;; last update time - 'dashboard) - '())) - (log-dir (conc *toppath* "/logs")) - (log-file (conc "post-hook-" (string-translate (getenv "MT_TARGET") "/" "-") "-" (getenv "MT_RUNNAME") ".log")) - (full-log-fname (conc log-dir "/" log-file))) - (if run-post-hook - ;; (if (null? existing-tests) - ;; (debug:print 0 *default-log-port* "Skipping post-hook call \"" run-post-hook "\" as there are existing tests for this run."))))) - (let* ((use-log-dir (if (not (directory-exists? log-dir)) - (handle-exceptions - exn - (begin - (debug:print 0 *default-log-port* "WARNING: Failed to create " log-dir) - #f) - (create-directory log-dir #t) - #t) - #t)) - (start-time (current-seconds)) - (actual-logf (if use-log-dir full-log-fname log-file))) - (handle-exceptions - exn - (begin - (print-call-chain *default-log-port*) - (debug:print 0 *default-log-port* "Message: " ((condition-property-accessor 'exn 'message) exn)) - (debug:print 0 *default-log-port* "ERROR: failed to run post-hook " run-post-hook ", check the log " log-file)) - (debug:print-info 0 *default-log-port* "running run-post-hook: \"" run-post-hook "\", log is " actual-logf) - (system (conc run-post-hook " >> " actual-logf " 2>&1")) - (debug:print-info 0 *default-log-port* "post-hook \"" run-post-hook "\" took " (- (current-seconds) start-time) " seconds to run.")))))) - -;;====================================================================== -;; runs:run-tests is called from megatest.scm and itself -;;====================================================================== -;; -;; test-names: Comma separated patterns same as test-patts but used in selection -;; of tests to run. The item portions are not respected. -;; FIXME: error out if /patt specified -;; -(define (runs:run-tests target runname test-patts user flags #!key (run-count 1)) ;; test-names - (let* ((keys (keys:config-get-fields *configdat*)) - (keyvals (keys:target->keyval keys target)) - (run-id (rmt:register-run keyvals runname "new" "n/a" user (args:get-arg "-contour"))) ;; test-name))) - ;; (deferred '()) ;; delay running these since they have a waiton clause - (runconfigf (conc *toppath* "/runconfigs.config")) - (dbfile (conc *toppath* "/megatest.db")) - (readonly-mode (not (file-write-access? dbfile))) - (test-records (make-hash-table)) - ;; need to process runconfigs before generating these lists - (all-tests-registry #f) ;; (tests:get-all)) ;; (tests:get-valid-tests (make-hash-table) test-search-path)) ;; all valid tests to check waiton names - (all-test-names #f) ;; (hash-table-keys all-tests-registry)) - (test-names #f) ;; Generated by a call to (tests:filter-test-names all-test-names test-patts)) - (required-tests #f) ;; Put fully qualified test/testpath names in this list to be done - (waitors-upon (make-hash-table)) ;; given a test, return list of tests waiting upon this test. - (task-key (conc (hash-table->alist flags) " " (get-host-name) " " (current-process-id))) - ;; (tdbdat (tasks:open-db)) - (config-reruns (let ((x (configf:lookup *configdat* "setup" "reruns"))) - (if x (string->number x) #f))) - (allowed-tests #f) - (runconf #f)) - - ;; check if readonly - (when readonly-mode - (debug:print-error 0 *default-log-port* "megatest.db is readonly. Cannot proceed.") - (exit 1)) - - ;; per user request. If less than 100Meg space on dbdir partition, bail out with error - ;; this will reduce issues in database corruption - (common:check-db-dir-and-exit-if-insufficient) - - ;; override the number of reruns from the configs - ;; this needs to be done at the place where is first runs:run-tests called - ;(if (and config-reruns - ; (> run-count config-reruns)) - ;(set! run-count config-reruns)) - - ;; (if (tasks:need-server run-id)(tasks:start-and-wait-for-server tdbdat run-id 10)) - - (let ((sighand (lambda (signum) - ;; (signal-mask! signum) ;; to mask or not? seems to cause issues in exiting - (set! *time-to-exit* #t) - (print "Received signal " signum ", cleaning up before exit. Please wait...") - (let ((th1 (make-thread (lambda () - ;; (let ((tdbdat (tasks:open-db))) - (rmt:tasks-set-state-given-param-key task-key "killed") ;; ) - (print "Killed by signal " signum ". Exiting") - (thread-sleep! 3) - (exit)))) - (th2 (make-thread (lambda () - (thread-sleep! 5) - (debug:print 0 *default-log-port* "Done") - (exit 4))))) - (thread-start! th2) - (thread-start! th1) - (thread-join! th2))))) - (set-signal-handler! signal/int sighand) - (set-signal-handler! signal/term sighand)) - - ;; force the starting of a server -- removed BB 17ww28 - no longer needed. - ;;(debug:print 0 *default-log-port* "waiting on server...") - ;;(server:start-and-wait *toppath*) - - (runs:set-megatest-env-vars run-id inkeys: keys inrunname: runname) ;; these may be needed by the launching process - (set! runconf (if (common:file-exists? runconfigf) - (setup-env-defaults runconfigf run-id *already-seen-runconfig-info* keyvals target) - (begin - (debug:print 0 *default-log-port* "WARNING: You do not have a run config file: " runconfigf) - #f))) - - (if (not test-patts) ;; first time in - adjust testpatt - (set! test-patts (common:args-get-testpatt runconf))) - ;; if test-patts is #f at this point there is something wrong and we need to bail out - (if (not test-patts) - (begin - (debug:print 0 *default-log-port* "WARNING: there is no test pattern for this run. Exiting now.") - (exit 0))) - - (if (args:get-arg "-tagexpr") - (begin - (set! allowed-tests (string-join (runs:get-tests-matching-tags (args:get-arg "-tagexpr")) ",")) - (debug:print-info 0 *default-log-port* "filtering initial test list with tagexpr: " (args:get-arg "-tagexpr") " => " allowed-tests) - ));; tests will be ANDed with this list - - ;; register this run in monitor.db - (rmt:tasks-add "run-tests" user target runname test-patts task-key) ;; params) - (rmt:tasks-set-state-given-param-key task-key "running") - - #;(common:telemetry-log "run-tests" - payload: - `( (target . ,target) - (run-name . ,runname) - (test-patts . ,test-patts) ) ) - - - ;; Now generate all the tests lists - (set! all-tests-registry (tests:get-all)) ;; hash of testname => path-to-test - (set! all-test-names (hash-table-keys all-tests-registry)) - ;; filter first for allowed-tests (from -tagexpr) then for test-patts. - (set! test-names (tests:filter-test-names - (if allowed-tests - (tests:filter-test-names all-test-names allowed-tests) - all-test-names) - test-patts)) - - ;; I think seeding required-tests with all test-names makes sense but lack analysis to back that up. - - ;; NEW STRATEGY HERE: - ;; 1. fill required tests with test-patts - ;; 2. scan testconfigs and if waitons, itemwait, itempatt calc prior test test-patt - ;; 3. repeat until all deps propagated - - ;; any tests with direct mention in test-patts can be added to required - ;;(set! required-tests (lset-intersection equal? (string-split test-patts ",") all-test-names)) - (set! required-tests (tests:filter-test-names all-test-names test-patts)) - ;; - ;; (set! required-tests (lset-intersection equal? test-names all-test-names)) - - ;; look up all tests matching the comma separated list of globs in - ;; test-patts (using % as wildcard) - - ;; (set! test-names (delete-duplicates (tests:get-valid-tests *toppath* test-patts))) - (debug:print-info 0 *default-log-port* "tests search path: " (string-intersperse (tests:get-tests-search-path *configdat*) " ")) - (debug:print-info 0 *default-log-port* "all tests: " (string-intersperse (sort all-test-names string<) " ")) - (debug:print-info 0 *default-log-port* "test names: " (string-intersperse (sort test-names string<) " ")) - (debug:print-info 0 *default-log-port* "required tests: " (string-intersperse (sort required-tests string<) " ")) - - ;; on the first pass or call to run-tests set FAILS to NOT_STARTED if - ;; -keepgoing is specified - (if (eq? *passnum* 0) - (begin - ;; Is this still necessary? I think not. Unreachable tests are marked as such and - ;; should not cause problems here. - ;; - ;; have to delete test records where NOT_STARTED since they can cause -keepgoing to - ;; get stuck due to becoming inaccessible from a failed test. I.e. if test B depends - ;; on test A but test B reached the point on being registered as NOT_STARTED and test - ;; A failed for some reason then on re-run using -keepgoing the run can never complete. - ;; - ;; (rmt:general-call 'delete-tests-in-state run-id "NOT_STARTED") - - ;; Now convert anything in allow-auto-rerun to NOT_STARTED - ;; - (for-each - (lambda (state-status) - (let* ((ss-lst (string-split-fields "/" state-status #:infix)) - (state (if (> (length ss-lst) 0)(car ss-lst) #f)) - (status (if (> (length ss-lst) 1)(cadr ss-lst) #f))) - (rmt:set-tests-state-status run-id test-names state status "NOT_STARTED" status))) - ;; list of state/status pairs separated by spaces - (string-split (or (configf:lookup *configdat* "setup" "allow-auto-rerun") ""))))) - - ;; Ensure all tests are registered in the test_meta table - (runs:update-all-test_meta #f) - - ;; run the run prehook if there are no tests yet run for this run: - ;; - (runs:run-pre-hook run-id) - ;; mark all test launced flag as false in the meta table - (rmt:set-var (conc "lunch-complete-" run-id) "no") - (rmt:set-run-state-status run-id "new" "n/a") - ;; now add non-directly referenced dependencies (i.e. waiton) - ;;====================================================================== - ;; refactoring this block into tests:get-full-data - ;; - ;; What happended, this code is now duplicated in tests!? - ;; - ;;====================================================================== - - (if (not (null? test-names)) ;; BEGIN test-names loop - (let loop ((hed (car test-names)) ;; NOTE: This is the main loop that iterates over the test-names - (tal (cdr test-names))) ;; 'return-procs tells the config reader to prep running system but return a proc - (debug:print-info 4 *default-log-port* "\n\ntestpatt elaboration loop => hed="hed " tal="tal" test-patts="test-patts" test-names="test-names) - (change-directory *toppath*) ;; PLEASE OPTIMIZE ME!!! I think this should be a no-op but there are several places where change-directories could be happening. - (setenv "MT_TEST_NAME" hed) ;; - (let*-values (((waitons waitors config) (tests:get-waitons hed all-tests-registry)) - - ;; NOTE: Have the config - can extract [waitons] section - - ((hed-mode) - (let ((m (configf:lookup config "requirements" "mode"))) - (if m (map string->symbol (string-split m)) '(normal)))) - ((hed-itemized-waiton) ;; are items in hed waiting on items of waiton? - (not (null? (lset-intersection eq? hed-mode '(itemmatch itemwait))))) - ) - (debug:print-info 8 *default-log-port* "waitons: " waitons) - ;; check for hed in waitons => this would be circular, remove it and issue an - ;; error - (if (or (member hed waitons) - (member hed waitors)) - (begin - (debug:print-error 0 *default-log-port* "test " hed " has listed itself as a waiton or waitor, please correct this!") - (set! waitons (filter (lambda (x)(not (equal? x hed))) waitons)) - (set! waitors (filter (lambda (x)(not (equal? x hed))) waitors)))) - - ;; (items (items:get-items-from-config config))) - (if (not (hash-table-ref/default test-records hed #f)) ;; waiton-tconfig below will be #f until that test is visted here at least once - (hash-table-set! test-records ;; BB: we are doing a manual make-tests:testqueue - hed (vector hed ;; 0 ;; testname - config ;; 1 - waitons ;; 2 - (configf:lookup config "requirements" "priority") ;; priority 3 - (tests:get-items config) ;; 4 ;; expand the [items] and or [itemstable] into explict items - #f ;; itemsdat 5 - #f ;; spare - used for item-path - waitors ;; - ))) - ;; update waitors-upon here - (for-each - (lambda (waiton) - (let* ((current-waitors-upon (hash-table-ref/default waitors-upon waiton '()))) - (debug:print-info 8 *default-log-port* " current-waiters-upon["waiton"] is "current-waitors-upon ) - (when (not (member hed current-waitors-upon)) - (debug:print-info 8 *default-log-port* " current-waiters-upon["waiton"] << "hed ) - (hash-table-set! waitors-upon waiton (cons hed current-waitors-upon))))) - (if (list? waitons) waitons '())) - (debug:print-info 8 *default-log-port* " process waitons&waitors of "hed": "(delete-duplicates (append waitons waitors))) - (for-each - (lambda (waiton) - (if (and waiton (not (member waiton test-names))) - (let* ((waitors-in-testpatt (runs:testpatts-mention-waitors-upon? test-patts (hash-table-ref/default waitors-upon waiton '()))) - (waiton-record (hash-table-ref/default test-records waiton #f)) - (waiton-tconfig (if waiton-record (vector-ref waiton-record 1) #f)) - (waiton-itemized (and waiton-tconfig - (or (hash-table-ref/default waiton-tconfig "items" #f) - (hash-table-ref/default waiton-tconfig "itemstable" #f)))) - (itemmaps (tests:get-itemmaps config)) ;; (configf:lookup config "requirements" "itemmap")) - (new-test-patts (tests:extend-test-patts test-patts hed waiton itemmaps hed-itemized-waiton))) - (debug:print-info 0 *default-log-port* "Test " waiton " has " (if waiton-record "a" "no") " waiton-record and" (if waiton-itemized " " " no ") "items") - ;; need to account for test-patt here, if I am test "a", selected with a test-patt of "hed/b%" - ;; and we are waiting on "waiton" we need to add "waiton/,waiton/b%" to test-patt - ;; is this satisfied by merely appending "/" to the waiton name added to the list? - ;; - ;; This approach causes all of the items in an upstream test to be run - ;; if we have this waiton already processed once we can analzye it for extending - ;; tests to be run, since we can't properly process waitons unless they have been - ;; initially added we add them again to be processed on second round AND add the hed - ;; back in to also be processed on second round - (if waiton-tconfig ;; BB: waiter should be in test-patts as well as the waiton have a tconfig. - (if waiton-itemized - (if waitors-in-testpatt - (begin - (debug:print-info 0 *default-log-port* "New test patts: " new-test-patts ", prev test patts: " test-patts) - (set! test-names (cons waiton test-names)) ;; need to process this one, only add once the waiton tconfig read - (set! required-tests (cons (conc waiton "/") required-tests)) - (set! test-patts new-test-patts)) - (begin - (debug:print-info 0 *default-log-port* "Waitor(s) not yet on testpatt for " waiton ", setting up to re-process it") - (set! tal (append (cons waiton tal)(list hed))))) - (begin - (debug:print-info 0 *default-log-port* "Adding non-itemized test " waiton " to required-tests") - (set! required-tests (cons waiton required-tests)) - (set! test-patts new-test-patts))) - (begin - (debug:print-info 0 *default-log-port* "No testconfig info yet for " waiton ", setting up to re-process it") - (set! tal (append (cons waiton tal)(list hed))))) ;; (cons (conc waiton "/") required-tests)) - ;; NOPE: didn't work. required needs to be plain test names. Try tacking on to test-patts - ;; - doesn't work - ;; (set! test-patts (conc test-patts "," waiton "/")) - ;; (set! test-names (cons waiton test-names))))) ;; was an append, now a cons - ))) - (delete-duplicates (append waitons waitors))) - (let ((remtests (delete-duplicates (append waitons tal)))) - (debug:print-info 8 *default-log-port* " remtests are "remtests) - (if (not (null? remtests)) - (begin - ;; (debug:print-info 0 *default-log-port* "Preprocessing continues for " (string-intersperse remtests ", ")) - (loop (car remtests)(cdr remtests)))))))) ;; END test-names loop - - (if (not (null? required-tests)) - (debug:print-info 1 *default-log-port* "Adding \"" (string-intersperse required-tests " ") "\" to the run queue")) - ;; NOTE: these are all parent tests, items are not expanded yet. - (debug:print-info 4 *default-log-port* "test-records=" (hash-table->alist test-records)) - (let ((reglen (configf:lookup *configdat* "setup" "runqueue"))) - (if (> (length (hash-table-keys test-records)) 0) - (let* ((keep-going #t) - (run-queue-retries 5) - #;(th1 (make-thread (lambda () - (handle-exceptions - exn - (begin - (print-call-chain) - (print " message: " ((condition-property-accessor 'exn 'message) exn))) - (runs:run-tests-queue run-id runname test-records keyvals flags test-patts required-tests - (any->number reglen) all-tests-registry))) - "runs:run-tests-queue")) - (th2 (make-thread (lambda () ;; BBQ: why are we visiting ALL runs here? - ;; (rmt:find-and-mark-incomplete-all-runs))))) CAN'T INTERRUPT IT ... - (let ((run-ids (rmt:get-all-run-ids))) - (for-each (lambda (run-id) - (if keep-going - (handle-exceptions - exn - (debug:print 0 *default-log-port* "error in calling find-and-mark-incomplete for run-id " run-id) - (rmt:find-and-mark-incomplete run-id #f)))) ;; ovr-deadtime))) ;; could be root of https://hsdes.intel.com/appstore/article/#/220546828/main -- Title: Megatest jobs show DEAD even though they are still running (1.64/27) - run-ids))) - "runs: mark-incompletes"))) - ;; (thread-start! th1) - (thread-start! th2) - ;; (thread-join! th1) - ;; just do the main stuff in the main thread - (runs:run-tests-queue run-id runname test-records keyvals flags test-patts required-tests - (any->number reglen) all-tests-registry) - (set! keep-going #f) - (thread-join! th2) - ;; if run-count > 0 call, set -preclean and -rerun STUCK/DEAD - (if (> run-count 0) ;; handle reruns - (begin - (if (not (hash-table-ref/default flags "-preclean" #f)) - (hash-table-set! flags "-preclean" #t)) - (if (not (hash-table-ref/default flags "-rerun" #f)) - (hash-table-set! flags "-rerun" "STUCK/DEAD,n/a,ZERO_ITEMS")) - ;; recursive call to self - (runs:run-tests target runname test-patts user flags run-count: (- run-count 1))))) - (debug:print-info 0 *default-log-port* "No tests to run"))) - (debug:print-info 4 *default-log-port* "All done by here") - ;; TODO: try putting post hook call here - ;(if (eq? run-count 0) - ; (begin - ; (debug:print-info 0 *default-log-port* "Calling Post Hook") - ; (debug:print-info 2 *default-log-port* " run-count " run-count) - ; (runs:run-post-hook run-id)) - ; (debug:print-info 2 *default-log-port* "Not calling post hook runcount = " run-count )) - (rmt:tasks-set-state-given-param-key task-key "done") - ;; (sqlite3:finalize! tasks-db) - )) - - -;; loop logic. These are used in runs:run-tests-queue to make it a bit more readable. -;; -;; If reg not full and have items in tal then loop with (car tal)(cdr tal) reg reruns -;; If reg is full (i.e. length >= n -;; loop with (car reg) tal (cdr reg) reruns -;; If tal is empty -;; but have items in reg; loop with (car reg)(cdr reg) '() reruns -;; If reg is empty => all done - -(define (runs:queue-next-hed tal reg n regfull) - (if regfull - (if (null? reg) #f (car reg)) - (if (null? tal) ;; tal is used up, pop from reg - (if (null? reg) #f (car reg)) - (car tal)))) - -(define (runs:queue-next-tal tal reg n regfull) - (if regfull - tal - (if (null? tal) ;; must transfer from reg - (if (null? reg) '() (cdr reg)) - (cdr tal)))) - -(define (runs:queue-next-reg tal reg n regfull) - (if regfull - (if (null? reg) '() (cdr reg)) ;; EXPLORE: reorder (cdr reg) such that looping is more efficient - (if (null? tal) ;; if tal is null and reg not full then '() as reg contents moved to tal - '() - reg))) - -;; this is the list of parameters to the named loop "loop" near the top of runs:run-tests-queue, look around line 1216 -;; -(define (runs:loop-values tal reg reglen regfull reruns) - (list (runs:queue-next-hed tal reg reglen regfull) ;; hed - (runs:queue-next-tal tal reg reglen regfull) ;; tal - (runs:queue-next-reg tal reg reglen regfull) ;; reg - reruns)) ;; reruns - -;; objective - iterate thru tests -;; => want to prioritize tests we haven't seen before -;; => sometimes need to squeeze things in (added to reg) -;; => review of a previously seen test is higher priority of never visited test -;; reg - list of previously visited tests -;; tal - list of never visited tests -;; prefer next hed to be from reg than tal. - -(define runs:nothing-left-in-queue-count 0) - -;;====================================================================== -;; runs:expand-items is called by runs:run-tests-queue -;;====================================================================== -;; -;; return value of runs:expand-items is passed back to runs-tests-queue and is fed to named loop with this signature: -;; (let loop ((hed (car sorted-test-names)) -;; (tal (cdr sorted-test-names)) -;; (reg '()) ;; registered, put these at the head of tal -;; (reruns '())) -(define (runs:expand-items hed tal reg reruns regfull newtal jobgroup max-concurrent-jobs run-id waitons item-path testmode test-record can-run-more items runname tconfig reglen test-registry test-records itemmaps) - (let* ((loop-list (list hed tal reg reruns)) - (prereqs-not-met (let ((res (rmt:get-prereqs-not-met run-id waitons hed item-path mode: testmode itemmaps: itemmaps))) - (if (list? res) - res - (begin - (debug:print 0 *default-log-port* - "ERROR: rmt:get-prereqs-not-met returned non-list!\n" - " res=" res " run-id=" run-id " waitons=" waitons " hed=" hed " item-path=" item-path " testmode=" testmode " itemmaps=" itemmaps) - '())))) - (have-itemized (not (null? (lset-intersection eq? testmode '(itemmatch itemwait))))) - ;; (prereqs-not-met (mt:lazy-get-prereqs-not-met run-id waitons item-path mode: testmode itemmap: itemmap)) - (fails (runs:calc-fails prereqs-not-met)) - (prereq-fails (runs:calc-prereq-fail prereqs-not-met)) - (non-completed (runs:calc-not-completed prereqs-not-met)) - (runnables (runs:calc-runnable prereqs-not-met)) - (unexpanded-prereqs - (filter (lambda (testname) - (let* ((test-rec (hash-table-ref test-records testname)) - (items (tests:testqueue-get-items test-rec))) - ;;(BB> "HEY " testname "=>"items) - (or (procedure? items)(eq? items 'have-procedure)))) - waitons)) - - - ) - (debug:print-info 4 *default-log-port* "START OF INNER COND #2 " - "\n can-run-more: " can-run-more - "\n testname: " hed - "\n prereqs-not-met: " (runs:pretty-string prereqs-not-met) - "\n non-completed: " (runs:pretty-string non-completed) - "\n prereq-fails: " (runs:pretty-string prereq-fails) - "\n fails: " (runs:pretty-string fails) - "\n testmode: " testmode - "\n (member 'toplevel testmode): " (member 'toplevel testmode) - "\n (null? non-completed): " (null? non-completed) - "\n reruns: " reruns - "\n items: " items - "\n can-run-more: " can-run-more) - - (cond - ;; all prereqs met, fire off the test - ;; or, if it is a 'toplevel test and all prereqs not met are COMPLETED then launch - - ((and (not (member 'toplevel testmode)) - (member (hash-table-ref/default test-registry (db:test-make-full-name hed item-path) 'n/a) - '(DONOTRUN removed CANNOTRUN))) ;; *common:cant-run-states-sym*) ;; '(COMPLETED KILLED WAIVED UNKNOWN INCOMPLETE)) ;; try to catch repeat processing of COMPLETED tests here - (debug:print-info 4 *default-log-port* "cond branch - " "ei-1") - (debug:print-info 1 *default-log-port* "Test " hed " set to \"" (hash-table-ref test-registry (db:test-make-full-name hed item-path)) "\". Removing it from the queue") - (if (or (not (null? tal)) - (not (null? reg))) - (runs:loop-values tal reg reglen regfull reruns) - (begin - (debug:print-info 0 *default-log-port* "Nothing left in the queue!") - ;; If get here twice then we know we've tried to expand all items - ;; since there must be a logic issue with the handling of loops in the - ;; items expand phase we will brute force an exit here. - (if (> runs:nothing-left-in-queue-count 2) - (begin - (debug:print 0 *default-log-port* "WARNING: this condition is triggered when there were no items to expand and nothing to run. Please check your run for completeness") - (exit 0)) - (set! runs:nothing-left-in-queue-count (+ runs:nothing-left-in-queue-count 1))) - #f))) - - ;; desired result of below cond branch: - ;; we want to expand items in our test of interest (hed) in the following cases: - ;; case 1 - mode is itemmatch or itemwait: - ;; - all prereq tests have been expanded - ;; - at least one prereq's items have completed - ;; case 2 - mode is toplevel - ;; - prereqs are completed. - ;; - or no prereqs can complete - ;; case 3 - mode not specified - ;; - prereqs are completed and passed (we could consider removing "and passed" -- it would change behavior from current) - ((or (null? prereqs-not-met) - (and (member 'toplevel testmode) - (null? non-completed))) - (debug:print-info 4 *default-log-port* "cond branch - " "ei-2") - (debug:print-info 4 *default-log-port* "runs:expand-items: (or (null? prereqs-not-met) (and (member 'toplevel testmode)(null? non-completed)))") - (let ((test-name (tests:testqueue-get-testname test-record))) - (setenv "MT_TEST_NAME" test-name) ;; - (setenv "MT_RUNNAME" runname) - (runs:set-megatest-env-vars run-id inrunname: runname) ;; these may be needed by the launching process - (let ((items-list (items:get-items-from-config tconfig))) - (if (list? items-list) - (begin - (if (null? items-list) - (let ((test-id (rmt:get-test-id run-id test-name "")) - (num-items (rmt:test-toplevel-num-items run-id test-name))) - (if (and test-id - (not (> num-items 0))) - (mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "ZERO_ITEMS" "Failed to run due to failed prerequisites")))) - (tests:testqueue-set-items! test-record items-list) - (list hed tal reg reruns)) - (begin - (debug:print-error 0 *default-log-port* "The proc from reading the items table did not yield a list - please report this") - (exit 1)))))) - - ((and (null? fails) - (null? prereq-fails) - (not (null? non-completed))) - (debug:print-info 4 *default-log-port* "cond branch - " "ei-3") - (let* ((allinqueue (map (lambda (x)(if (string? x) x (db:test-get-testname x))) - (append newtal reruns))) - ;; prereqstrs is a list of test names as strings that are prereqs for hed - (prereqstrs (delete-duplicates (map (lambda (x)(if (string? x) x (db:test-get-testname x))) - prereqs-not-met))) - ;; a prereq that is not found in allinqueue will be put in the notinqueue list - ;; - ;; (notinqueue (filter (lambda (x) - ;; (not (member x allinqueue))) - ;; prereqstrs)) - (give-up #f)) - - ;; We can get here when a prereq has not been run due to *it* having a prereq that failed. - ;; We need to use this to dequeue this item as CANNOTRUN - ;; - (if (member 'toplevel testmode) ;; '(toplevel)) ;; NOTE: this probably should be (member 'toplevel testmode) - (for-each (lambda (prereq) - (if (eq? (hash-table-ref/default test-registry prereq 'justfine) 'CANNOTRUN) - (set! give-up #t))) - prereqstrs)) - - (if (and give-up - (not (and (null? tal)(null? reg)))) - (let ((trimmed-tal (mt:discard-blocked-tests run-id hed tal test-records)) - (trimmed-reg (mt:discard-blocked-tests run-id hed reg test-records))) - (debug:print 1 *default-log-port* "WARNING: test " hed " has discarded prerequisites, removing it from the queue") - - (let ((test-id (rmt:get-test-id run-id hed ""))) - (if test-id (mt:test-set-state-status-by-id run-id test-id "COMPLETED" "PREQ_DISCARDED" "Failed to run due to discarded prerequisites"))) - - (if (and (null? trimmed-tal) - (null? trimmed-reg)) - #f - (runs:loop-values trimmed-tal trimmed-reg reglen regfull reruns) - )) - (list (car newtal)(append (cdr newtal) reg) '() reruns)))) - - ((and (null? fails) ;; have not-started tests, but unable to run them. everything looks completed with no prospect of unsticking something that is stuck. we should mark hed as moribund and exit or continue if there are more tests to consider - (null? prereq-fails) - (null? non-completed)) - (debug:print-info 4 *default-log-port* "cond branch - " "ei-4") - (if (runs:can-keep-running? hed 20) - (begin - (runs:inc-cant-run-tests hed) - (debug:print-info 0 *default-log-port* "no fails in prerequisites for " hed " but also none running, keeping " hed " for now. Try count: " (hash-table-ref/default *seen-cant-run-tests* hed 0)) ;; - ;; getting here likely means the system is way overloaded, kill a full minute before continuing - (thread-sleep! 60) ;; TODO: gate by normalized server load > 1.0 (maxload config thing) - ;; num-retries code was here - ;; we use this opportunity to move contents of reg to tal - (list (car newtal)(append (cdr newtal) reg) '() reruns)) ;; an issue with prereqs not yet met? - (begin - (debug:print-info 1 *default-log-port* "no fails in prerequisites for " hed " but nothing seen running in a while, dropping test " hed " from the run queue") - (let ((test-id (rmt:get-test-id run-id hed ""))) - (if test-id (mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "TIMED_OUT" "Nothing seen running in a while."))) - (runs:loop-values tal reg reglen regfull reruns) - ))) - - ((and - (or (not (null? fails)) - (not (null? prereq-fails))) - (member 'normal testmode)) - (debug:print-info 4 *default-log-port* "cond branch - " "ei-5") - (debug:print-info 1 *default-log-port* "test " hed " (mode=" testmode ") has failed prerequisite(s); " - (string-intersperse (map (lambda (t)(conc (db:test-get-testname t) ":" (db:test-get-state t)"/"(db:test-get-status t))) fails) ", ") - ", removing it from to-do list") - (let ((test-id (rmt:get-test-id run-id hed ""))) - (if test-id - (if (not (null? prereq-fails)) - (mt:test-set-state-status-by-id-unless-completed run-id test-id "COMPLETED" "PREQ_DISCARDED" "Failed to run due to prior failed prerequisites") - (mt:test-set-state-status-by-id-unless-completed run-id test-id "COMPLETED" "PREQ_FAIL" "Failed to run due to failed prerequisites")))) - ;; (debug:print 4 *default-log-port*"BB> set PREQ_FAIL on "hed) - ;; (mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "PREQ_FAIL" "Failed to run due to failed prerequisites")))) ;; BB: this works, btu equivalent for itemwait mode does not work. - (if (or (not (null? reg))(not (null? tal))) - (begin - (hash-table-set! test-registry hed 'CANNOTRUN) - (runs:loop-values tal reg reglen regfull (cons hed reruns)) - ) - #f)) ;; #f flags do not loop - - ((and (not (null? fails))(member 'toplevel testmode)) - (debug:print-info 4 *default-log-port* "cond branch - " "ei-6") - (if (or (not (null? reg))(not (null? tal))) - (list (car newtal)(append (cdr newtal) reg) '() reruns) - #f)) - ((null? runnables) - (debug:print-info 4 *default-log-port* "cond branch - " "ei-7") - #f) ;; if we get here and non-completed is null then it is all over. - (else - (debug:print-info 4 *default-log-port* "cond branch - " "ei-8") - (debug:print 0 *default-log-port* "WARNING: FAILS or incomplete tests maybe preventing completion of this run. Watch for issues with test " hed ", continuing for now") - (list (car newtal)(cdr newtal) reg reruns))))) - -(define (runs:mixed-list-testname-and-testrec->list-of-strings inlst) - (if (null? inlst) - '() - (map (lambda (t) - (cond - ((vector? t) - (let ((test-name (db:test-get-testname t)) - (item-path (db:test-get-item-path t)) - (test-state (db:test-get-state t)) - (test-status (db:test-get-status t))) - (conc test-name (if (equal? item-path "") "" "/") item-path ":" test-state "/" test-status))) - ((string? t) - t) - (else - (conc t)))) - inlst))) - - -;; hed tal reg reruns reglen regfull test-record runname test-name item-path jobgroup max-concurrent-jobs run-id waitons item-path testmode test-patts required-tests test-registry registry-mutex flags keyvals run-info newtal all-tests-registry itemmaps) -(define (runs:process-expanded-tests runsdat testdat) - ;; unroll the contents of runsdat and testdat (due to ongoing refactoring). - (debug:print 2 *default-log-port* "runs:process-expanded-tests; testdat:" ) - (debug:print 2 *default-log-port* (with-output-to-string - (lambda () (pp (runs:testdat->alist testdat) )))) - (let* ((hed (runs:testdat-hed testdat)) - (tal (runs:testdat-tal testdat)) - (reg (runs:testdat-reg testdat)) - (reruns (runs:testdat-reruns testdat)) - (test-name (runs:testdat-test-name testdat)) - (item-path (runs:testdat-item-path testdat)) - (jobgroup (runs:testdat-jobgroup testdat)) - (waitons (runs:testdat-waitons testdat)) - (item-path (runs:testdat-item-path testdat)) - (testmode (runs:testdat-testmode testdat)) - (newtal (runs:testdat-newtal testdat)) - (itemmaps (runs:testdat-itemmaps testdat)) - (test-record (runs:testdat-test-record testdat)) - (prereqs-not-met (runs:testdat-prereqs-not-met testdat)) - - (reglen (runs:dat-reglen runsdat)) - (regfull (runs:dat-regfull runsdat)) - (runname (runs:dat-runname runsdat)) - (max-concurrent-jobs (runs:dat-max-concurrent-jobs runsdat)) - (run-id (runs:dat-run-id runsdat)) - (test-patts (runs:dat-test-patts runsdat)) - (required-tests (runs:dat-required-tests runsdat)) - (test-registry (runs:dat-test-registry runsdat)) - (registry-mutex (runs:dat-registry-mutex runsdat)) - (flags (runs:dat-flags runsdat)) - (keyvals (runs:dat-keyvals runsdat)) - (run-info (runs:dat-run-info runsdat)) - (all-tests-registry (runs:dat-all-tests-registry runsdat)) - (run-limits-info (runs:dat-can-run-more-tests runsdat)) - ;; (runs:can-run-more-tests run-id jobgroup max-concurrent-jobs)) ;; look at the test jobgroup and tot jobs running - (have-resources (car run-limits-info)) - (num-running (list-ref run-limits-info 1)) - (num-running-in-jobgroup(list-ref run-limits-info 2)) - (max-concurrent-jobs (list-ref run-limits-info 3)) - (job-group-limit (list-ref run-limits-info 4)) - ;; (prereqs-not-met (rmt:get-prereqs-not-met run-id waitons hed item-path mode: testmode itemmaps: itemmaps)) - ;; (prereqs-not-met (mt:lazy-get-prereqs-not-met run-id waitons item-path mode: testmode itemmap: itemmap)) - (fails (if (list? prereqs-not-met) ;; TODO: rename fails to failed-prereqs - (runs:calc-fails prereqs-not-met) - (begin - (debug:print-error 0 *default-log-port* "prereqs-not-met is not a list! " prereqs-not-met) - '()))) - (non-completed (filter (lambda (x) ;; remove hed from not completed list, duh, of course it is not completed! - (not (equal? x hed))) - (runs:calc-not-completed prereqs-not-met))) - (loop-list (list hed tal reg reruns)) - ;; configure the load runner - (numcpus (common:get-num-cpus #f)) - (maxload (string->number (or (configf:lookup *configdat* "jobtools" "maxload") "3.0"))) ;; use a non-number string to disable - (maxhomehostload (string->number (or (configf:lookup *configdat* "jobtools" "maxhomehostload") "2.0"))) ;; use a non-number string to disable - (waitdelay (string->number (or (configf:lookup *configdat* "jobtools" "waitdelay") "60")))) - (debug:print-info 4 *default-log-port* "have-resources: " have-resources " prereqs-not-met: (" - (string-intersperse - (map (lambda (t) - (if (vector? t) - (conc (db:test-get-state t) "/" (db:test-get-status t)) - (conc " WARNING: t is not a vector=" t ))) - prereqs-not-met) - ", ") ") fails: " fails - "\nregistered? " (hash-table-ref/default test-registry (db:test-make-full-name test-name item-path) #f)) - - - - (if (and (not (null? prereqs-not-met)) - (runs:lownoise (conc "waiting on tests " prereqs-not-met hed) 60)) - (debug:print-info 2 *default-log-port* "waiting on tests; " (string-intersperse (runs:mixed-list-testname-and-testrec->list-of-strings prereqs-not-met) ", "))) - - ;; Don't know at this time if the test have been launched at some time in the past - ;; i.e. is this a re-launch? - (debug:print-info 4 *default-log-port* "run-limits-info = " run-limits-info) - - (cond ; cond 894- 1067 - - ;; Check item path against item-patts, - ;; - ((not (tests:match test-patts (tests:testqueue-get-testname test-record) item-path required: required-tests)) ;; This test/itempath is not to be run - ;; else the run is stuck, temporarily or permanently - ;; but should check if it is due to lack of resources vs. prerequisites - (debug:print-info 1 *default-log-port* "Skipping " (tests:testqueue-get-testname test-record) " " item-path " as it doesn't match " test-patts) - (if (or (not (null? tal))(not (null? reg))) - (runs:loop-values tal reg reglen regfull reruns) - #f)) - - ;; Register tests - ;; - ((not (hash-table-ref/default test-registry (db:test-make-full-name test-name item-path) #f)) - (debug:print-info 4 *default-log-port* "Pre-registering test " test-name "/" item-path " to create placeholder" ) - ;; always do firm registration now in v1.60 and greater ;; (eq? *transport-type* 'fs) ;; no point in parallel registration if use fs - (let register-loop ((numtries 15)) - (rmt:register-test run-id test-name item-path) - (if (rmt:get-test-id run-id test-name item-path) - (hash-table-set! test-registry (db:test-make-full-name test-name item-path) 'done) - (if (> numtries 0) - (begin - (thread-sleep! 0.5) - (register-loop (- numtries 1))) - (debug:print-error 0 *default-log-port* "failed to register test " (db:test-make-full-name test-name item-path))))) - (if (not (eq? (hash-table-ref/default test-registry (db:test-make-full-name test-name "") #f) 'done)) - (begin - (rmt:register-test run-id test-name "") - (if (rmt:get-test-id run-id test-name "") - (hash-table-set! test-registry (db:test-make-full-name test-name "") 'done)))) - (runs:shrink-can-run-more-tests-count runsdat) ;; DELAY TWEAKER (still needed?) - (if (and (null? tal)(null? reg)) - (list hed tal (append reg (list hed)) reruns) - (list (runs:queue-next-hed tal reg reglen regfull) ;; cannot replace with a call to runs:loop-values as the logic is different for reg - (runs:queue-next-tal tal reg reglen regfull) - ;; NB// Here we are building reg as we register tests - ;; if regfull we must pop the front item off reg - (if regfull - (append (cdr reg) (list hed)) - (append reg (list hed))) - reruns))) - - ;; At this point hed test registration must be completed. - ;; - ((eq? (hash-table-ref/default test-registry (db:test-make-full-name test-name item-path) #f) - 'start) - (debug:print-info 0 *default-log-port* "Waiting on test registration(s): " - (string-intersperse - (filter (lambda (x) - (eq? (hash-table-ref/default test-registry x #f) 'start)) - (hash-table-keys test-registry)) - ", ")) - (thread-sleep! 0.051) - (list hed tal reg reruns)) - - ;; If no resources are available just kill time and loop again - ;; - ((not have-resources) ;; simply try again after waiting a second - (if (runs:lownoise "no resources" 60) - (debug:print-info 1 *default-log-port* "no resources to run new tests, waiting ...")) - ;; Have gone back and forth on this but db starvation is an issue. - ;; wait one second before looking again to run jobs. - (thread-sleep! 1) - ;; could have done hed tal here but doing car/cdr of newtal to rotate tests - (list (car newtal)(cdr newtal) reg reruns)) - - ;; This is the final stage, everything is in place so launch the test - ;; - ((and have-resources - (or (null? prereqs-not-met) - (and (member 'toplevel testmode) ;; 'toplevel) - (null? non-completed) - (not (member 'exclusive testmode))))) - ;; (hash-table-delete! *max-tries-hash* (db:test-make-full-name test-name item-path)) - ;; we are going to reset all the counters for test retries by setting a new hash table - ;; this means they will increment only when nothing can be run - (set! *max-tries-hash* (make-hash-table)) - ;; well, first lets see if cpu load throttling is enabled. If so wait around until the - ;; average cpu load is under the threshold before continuing - (if maxload ;; only gate if maxload is specified - (common:wait-for-cpuload maxload numcpus waitdelay)) - (if maxhomehostload - (common:wait-for-homehost-load maxhomehostload (conc "Waiting for homehost load to drop below normalized value of " maxhomehostload))) - - (run:test run-id run-info keyvals runname test-record flags #f test-registry all-tests-registry) - (runs:incremental-print-results run-id) - (hash-table-set! test-registry (db:test-make-full-name test-name item-path) 'running) - (runs:shrink-can-run-more-tests-count runsdat) ;; DELAY TWEAKER (still needed?) - ;; (thread-sleep! *global-delta*) - (if (or (not (null? tal))(not (null? reg))) - (runs:loop-values tal reg reglen regfull reruns) - #f)) - - ;; must be we have unmet prerequisites - ;; - (else - (debug:print 4 *default-log-port* "FAILS: " fails) - ;; If one or more of the prereqs-not-met are FAIL then we can issue - ;; a message and drop hed from the items to be processed. - ;; (runs:mixed-list-testname-and-testrec->list-of-strings prereqs-not-met) - (if (and (not (null? prereqs-not-met)) - (runs:lownoise (conc "waiting on tests " prereqs-not-met hed) 60)) - (debug:print-info 1 *default-log-port* "waiting on tests; " (string-intersperse - (runs:mixed-list-testname-and-testrec->list-of-strings - prereqs-not-met) ", "))) - (if (or (null? fails) - (member 'toplevel testmode)) - (begin - ;; couldn't run, take a breather - (if (runs:lownoise "Waiting for more work to do..." 60) - (debug:print-info 0 *default-log-port* "Waiting for more work to do...")) - (thread-sleep! 1) - (list (car newtal)(cdr newtal) reg reruns)) - ;; the waiton is FAIL so no point in trying to run hed ever again - (begin - (let ((my-test-id (rmt:get-test-id run-id test-name item-path))) - (mt:test-set-state-status-by-id-unless-completed run-id my-test-id "COMPLETED" "PREQ_FAIL" "Failed to run due to failed prerequisites2")) - - - - (if (or (not (null? reg))(not (null? tal))) - (if (vector? hed) - (begin - (debug:print 1 *default-log-port* "WARNING: Dropping test " test-name "/" item-path - " from the launch list as it has prerequistes that are FAIL") - (let ((test-id (rmt:get-test-id run-id hed ""))) - (if test-id (mt:test-set-state-status-by-id-unless-completed run-id test-id "COMPLETED" "PREQ_FAIL" "Failed to run due to failed prerequisites"))) - (runs:shrink-can-run-more-tests-count runsdat) ;; DELAY TWEAKER (still needed?) - ;; (thread-sleep! *global-delta*) - ;; This next is for the items - - (if (not (null? fails)) - ;;(mt:test-set-state-status-by-testname run-id test-name item-path "NOT_STARTED" "PREQ_FAIL" #f) - (rmt:set-state-status-and-roll-up-items run-id test-name item-path "NOT_STARTED" "PREQ_FAIL" #f) - ;;(mt:test-set-state-status-by-testname run-id test-name item-path "NOT_STARTED" "BLOCKED" #f) - (rmt:set-state-status-and-roll-up-items run-id test-name item-path "NOT_STARTED" "BLOCKED" #f) ) - (hash-table-set! test-registry (db:test-make-full-name test-name item-path) 'removed) - (runs:loop-values tal reg reglen regfull reruns)) - (let ((nth-try (hash-table-ref/default test-registry hed 0))) ;; hed not a vector... - (debug:print 2 *default-log-port* "nth-try("hed")="nth-try) - (cond - ((member "RUNNING" (map db:test-get-state prereqs-not-met)) - (if (runs:lownoise (conc "possible RUNNING prerequistes " hed) 60) - (debug:print 0 *default-log-port* "WARNING: test " hed " has possible RUNNING prerequisites, don't give up on it yet.")) - (thread-sleep! 0.1) - (runs:loop-values tal reg reglen regfull reruns)) - ((or (not nth-try) ;; BB: condition on subsequent tries, condition below fires on first try - (and (number? nth-try) - (< nth-try 2))) - (hash-table-set! test-registry hed (if (number? nth-try) - (+ nth-try 1) - 0)) - (if (runs:lownoise (conc "not removing test " hed) 60) - (debug:print 1 *default-log-port* "WARNING: not removing test " hed " from queue although it may not be runnable due to FAILED prerequisites")) - ;; may not have processed correctly. Could be a race condition in your test implementation? Dropping test " hed) ;; " as it has prerequistes that are FAIL. (NOTE: hed is not a vector)") - (runs:shrink-can-run-more-tests-count runsdat) ;; DELAY TWEAKER (still needed?) - (runs:loop-values newtal reg reglen regfull reruns)) - ((symbol? nth-try) ;; BB: 'done matches here in one case where prereq itemwait failed. This is first "try" - (if (eq? nth-try 'removed) ;; removed is removed - drop it NOW - (if (null? tal) - #f ;; yes, really - (list (car tal)(cdr tal) reg reruns)) - (begin - (if (runs:lownoise (conc "FAILED prerequisites or other issue" hed) 60) - (debug:print 0 *default-log-port* "WARNING: test " hed " has FAILED prerequisites or other issue. Internal state >" nth-try "< will be overridden and we'll retry.")) - ;; was: (mt:test-set-state-status-by-testname run-id test-name item-path "NOT_STARTED" "KEEP_TRYING" #f) - (mt:test-set-state-status-by-testname-unless-completed run-id test-name item-path "COMPLETED" "PREQ_FAIL" #f) - (hash-table-set! test-registry hed 'removed) ;; was 0 - (if (not (and (null? reg) (null? tal))) - (runs:loop-values tal reg reglen regfull reruns) - #f)))) - (else - (if (runs:lownoise (conc "FAILED prerequitests and we tried" hed) 60) - (debug:print 0 *default-log-port* "WARNING: test " hed " has FAILED prerequitests and we've tried at least 10 times to run it. Giving up now.")) - ;; (debug:print 0 *default-log-port* " prereqs: " prereqs-not-met) - (hash-table-set! test-registry hed 'removed) - (mt:test-set-state-status-by-testname run-id test-name item-path "NOT_STARTED" "TEN_STRIKES" #f) - ;; I'm unclear on if this roll up is needed - it may be the root cause of the "all set to FAIL" bug. - (rmt:set-state-status-and-roll-up-items run-id test-name item-path #f "FAIL" #f) ;; treat as FAIL - (list (if (null? tal)(car newtal)(car tal)) - tal - reg - reruns))))) - ;; ELSE: can't drop this - maybe running? Just keep trying - - ;;(if (not (or (not (null? reg))(not (null? tal)))) ;; old experiment - (let ((runable-tests (runs:runable-tests prereqs-not-met))) ;; SUSPICIOUS: Should look at more than just prereqs-not-met? - (if (null? runable-tests) - #f ;; I think we are truly done here - (runs:loop-values newtal reg reglen regfull reruns))) - ;;) ;;from old experiment - ) ;; end if (or (not (null? reg))(not (null? tal))) - )))))) - -;; scan a list of tests looking to see if any are potentially runnable -;; -(define (runs:runable-tests tests) - (filter (lambda (t) - (if (not (vector? t)) - t - (let ((state (db:test-get-state t)) - (status (db:test-get-status t))) - (case (string->symbol state) - ((COMPLETED INCOMPLETE) #f) - ((NOT_STARTED) - (if (member status '("TEN_STRIKES" "BLOCKED" "PREQ_FAIL" "ZERO_ITEMS" "PREQ_DISCARDED" "TIMED_OUT" )) - #f - t)) - ((DELETED) #f) - (else t))))) - tests)) - -;; move all the miscellanea into this struct -;; -(defstruct runs:gendat inc-results inc-results-last-update inc-results-fmt run-info runname target) - -(define *runs:general-data* - (make-runs:gendat - inc-results: (make-hash-table) - inc-results-last-update: 0 - inc-results-fmt: "~12a~12a~20a~12a~40a\n" ;; state status time duration test-name item-path - run-info: #f - runname: #f - target: #f - ) - ) - -(define (runs:incremental-print-results run-id) - (let ((curr-sec (current-seconds))) - (if (> (- curr-sec (runs:gendat-inc-results-last-update *runs:general-data*)) 5) ;; at least five seconds since last update - (let* ((run-dat (or (runs:gendat-run-info *runs:general-data*)(rmt:get-run-info run-id))) - (runname (or (runs:gendat-runname *runs:general-data*) - (db:get-value-by-header (db:get-rows run-dat) - (db:get-header run-dat) "runname"))) - (target (or (runs:gendat-target *runs:general-data*)(rmt:get-target run-id))) - (testsdat (rmt:get-tests-for-run run-id "%" '() '() ;; run-id testpatt states statuses - #f #f ;; offset limit - #f ;; not-in - #f ;; sort-by - #f ;; sort-order - #f ;; get full data (not 'shortlist) - (runs:gendat-inc-results-last-update *runs:general-data*) ;; last update time - 'dashboard))) - (if (not (runs:gendat-run-info *runs:general-data*)) - (runs:gendat-run-info-set! *runs:general-data* run-dat)) - (if (not (runs:gendat-runname *runs:general-data*)) - (runs:gendat-runname-set! *runs:general-data* runname)) - (if (not (runs:gendat-target *runs:general-data*)) - (runs:gendat-target-set! *runs:general-data* target)) - (for-each - (lambda (testdat) - (let* ((test-id (db:test-get-id testdat)) - (prevdat (hash-table-ref/default (runs:gendat-inc-results *runs:general-data*) - (conc run-id "," test-id) #f)) - (test-name (db:test-get-testname testdat)) - (item-path (db:test-get-item-path testdat)) - (state (db:test-get-state testdat)) - (status (db:test-get-status testdat)) - (event-time (db:test-get-event_time testdat)) - (duration (db:test-get-run_duration testdat))) - (if (and (not (member state '("DELETED" "REMOTEHOSTSTART" "RUNNING" "LAUNCHED""NOT_STARTED"))) - (not (and prevdat - (equal? state (db:test-get-state prevdat)) - (equal? status (db:test-get-status prevdat))))) - (let ((fmt (runs:gendat-inc-results-fmt *runs:general-data*)) - (dtime (seconds->year-work-week/day-time event-time))) - (if (runs:lownoise "inc-print" 600) - (format #t fmt "State" "Status" "Start Time" "Duration" "Test path")) - ;; (debug:print 0 *default-log-port* "fmt: " fmt " state: " state " status: " status " test-name: " test-name " item-path: " item-path " dtime: " dtime) - ;; (debug:print 0 #f "event-time: " event-time " duration: " duration) - (format #t fmt - state - status - dtime - (seconds->hr-min-sec duration) - (conc "lt/" target "/" runname "/" test-name (if (string-null? item-path) "" (conc "/" item-path)))) - (hash-table-set! (runs:gendat-inc-results *runs:general-data*) (conc run-id "," test-id) testdat))))) - testsdat))) - (runs:gendat-inc-results-last-update-set! *runs:general-data* (- curr-sec 10)))) - -;; every time though the loop increment the test/itempatt val. -;; when the min is > max-allowed and none running then force exit -;; -(define *max-tries-hash* (make-hash-table)) - -;;====================================================================== -;; runs:run-tests-queue is called by runs:run-tests -;;====================================================================== -;; -;; test-records is a hash table testname:item_path => vector < testname testconfig waitons priority items-info ... > -(define (runs:run-tests-queue run-id runname test-records keyvals flags test-patts required-tests reglen-in all-tests-registry) - ;; At this point the list of parent tests is expanded - ;; NB// Should expand items here and then insert into the run queue. - (debug:print 5 *default-log-port* "test-records: " test-records ", flags: " (hash-table->alist flags)) - - ;; Do mark-and-find clean up of db before starting runing of quue - ;; - ;; (rmt:find-and-mark-incomplete) - - (let* ((run-info (rmt:get-run-info run-id)) - (tests-info (mt:get-tests-for-run run-id #f '() '())) ;; qryvals: "id,testname,item_path")) - (sorted-test-names (tests:sort-by-priority-and-waiton test-records)) - (test-registry (make-hash-table)) - (registry-mutex (make-mutex)) - (num-retries 0) - (max-retries (configf:lookup *configdat* "setup" "maxretries")) - (max-concurrent-jobs (configf:lookup-number *configdat* "setup" "max_concurrent_jobs" default: 50)) - (reglen (if (number? reglen-in) reglen-in 1)) - (last-time-incomplete (- (current-seconds) 900)) ;; force at least one clean up cycle - (last-time-some-running (current-seconds)) - ;; (tdbdat (tasks:open-db)) - (runsdat (make-runs:dat - ;; hed: hed - ;; tal: tal - ;; reg: reg - ;; reruns: reruns - reglen: reglen - regfull: #f ;; regfull - ;; test-record: test-record - runname: runname - ;; test-name: test-name - ;; item-path: item-path - ;; jobgroup: jobgroup - max-concurrent-jobs: max-concurrent-jobs - run-id: run-id - ;; waitons: waitons - ;; testmode: testmode - test-patts: test-patts - required-tests: required-tests - test-registry: test-registry - registry-mutex: registry-mutex - flags: flags - keyvals: keyvals - run-info: run-info - ;; newtal: newtal - all-tests-registry: all-tests-registry - ;; itemmaps: itemmaps - ;; prereqs-not-met: (rmt:get-prereqs-not-met run-id waitons hed item-path mode: testmode itemmaps: itemmaps) - ;; can-run-more-tests: (runs:can-run-more-tests run-id jobgroup max-concurrent-jobs) ;; look at the test jobgroup and tot jobs running - ))) - - ;; Initialize the test-registery hash with tests that already have a record - ;; convert state to symbol and use that as the hash value - (for-each (lambda (trec) - (let ((id (db:test-get-id trec)) - (tn (db:test-get-testname trec)) - (ip (db:test-get-item-path trec)) - (st (db:test-get-state trec))) - (if (not (equal? st "DELETED")) - (hash-table-set! test-registry (db:test-make-full-name tn ip) (string->symbol st))))) - tests-info) - (set! max-retries (if (and max-retries (string->number max-retries))(string->number max-retries) 100)) - - (let loop ((hed (car sorted-test-names)) - (tal (cdr sorted-test-names)) - (reg '()) ;; registered, put these at the head of tal - (reruns '())) - - (runs:incremental-print-results run-id) - - (if (not (null? reruns))(debug:print-info 4 *default-log-port* "reruns=" reruns)) - - ;; Here we mark any old defunct tests as incomplete. Do this every fifteen minutes - ;; moving this to a parallel thread and just run it once. - ;; - (if (> (current-seconds)(+ last-time-incomplete 900)) - (begin - (set! last-time-incomplete (current-seconds)) - ;; (rmt:find-and-mark-incomplete-all-runs) - )) - - ;; (print "Top of loop, hed=" hed ", tal=" tal " ,reruns=" reruns) - (let* ((test-record (hash-table-ref test-records hed)) - (test-name (tests:testqueue-get-testname test-record)) - (tconfig (tests:testqueue-get-testconfig test-record)) - (jobgroup (configf:lookup tconfig "test_meta" "jobgroup")) - (testmode (let ((m (configf:lookup tconfig "requirements" "mode"))) - (if m (map string->symbol (string-split m)) '(normal)))) - (itemmaps (tests:get-itemmaps tconfig)) ;; (configf:lookup tconfig "requirements" "itemmap")) - (priority (tests:testqueue-get-priority test-record)) - (itemdat (tests:testqueue-get-itemdat test-record)) ;; itemdat can be a string, list or #f - (items (tests:testqueue-get-items test-record)) - (item-path (item-list->path itemdat)) - (tfullname (db:test-make-full-name test-name item-path)) - ;; these are hard coded item-item waits test/item-path => test/item-path2 ... - (extra-waits (let* ((section (configf:get-section (tests:testqueue-get-testconfig test-record) "waitons")) - (myextra (alist-ref tfullname section equal?))) - (if myextra - (let ((extras (string-split (car myextra)))) - (if (runs:lownoise (conc tfullname "extra-waitons" tfullname) 60) - (debug:print-info 0 *default-log-port* "HAVE EXTRA WAITONS for test " tfullname ": " myextra)) - (for-each - (lambda (extra) - ;; (debug:print 0 *default-log-port* "FYI: extra = " extra " reruns = " reruns) - (let ((basetestname (car (string-split extra "/")))) - #;(if (not (member extra tal)) - (set! reruns (append tal (list extra)))) - (if (not (member basetestname tal)) - (set! reruns (append tal (list basetestname)))) - )) - extras) - extras) - '()))) - (waitons (delete-duplicates (append (tests:testqueue-get-waitons test-record) extra-waits) equal?)) - (newtal (append tal (list hed))) - (regfull (>= (length reg) reglen)) - (num-running (rmt:get-count-tests-running-for-run-id run-id)) - (testdat (make-runs:testdat - hed: hed - tal: tal - reg: reg - reruns: reruns - test-record: test-record - test-name: test-name - item-path: item-path - jobgroup: jobgroup - waitons: waitons - testmode: testmode - newtal: newtal - itemmaps: itemmaps - ;; prereqs-not-met: prereqs-not-met - ))) - (runs:dat-regfull-set! runsdat regfull) - - ;; -- removed BB 17ww28 - no longer needed. - ;; every 15 minutes verify the server is there for this run - ;; (if (and (common:low-noise-print 240 "try start server" run-id) - ;; (not (or (and *runremote* - ;; (remote-server-url *runremote*) - ;; (server:ping (remote-server-url *runremote*))) - ;; (server:check-if-running *toppath*)))) - ;; (server:kind-run *toppath*)) - - (if (> num-running 0) - (set! last-time-some-running (current-seconds))) - - (if (> (current-seconds)(+ last-time-some-running (or (configf:lookup *configdat* "setup" "give-up-waiting") 36000))) - (hash-table-set! *max-tries-hash* tfullname (+ (hash-table-ref/default *max-tries-hash* tfullname 0) 1))) - ;; (debug:print 0 *default-log-port* "max-tries-hash: " (hash-table->alist *max-tries-hash*)) - - ;; Ensure all top level tests get registered. This way they show up as "NOT_STARTED" on the dashboard - ;; and it is clear they *should* have run but did not. - (if (not (hash-table-ref/default test-registry (db:test-make-full-name test-name "") #f)) - (begin - (rmt:register-test run-id test-name "") - (hash-table-set! test-registry (db:test-make-full-name test-name "") 'done))) - - ;; Fast skip of tests that are already "COMPLETED" - NO! Cannot do that as the items may not have been expanded yet :( - ;; - (if (member (hash-table-ref/default test-registry tfullname #f) - '(DONOTRUN removed)) ;; *common:cant-run-states-sym*) ;; '(COMPLETED KILLED WAIVED UNKNOWN INCOMPLETE)) - (begin - (if (runs:lownoise (conc "been marked do not run " tfullname) 60) - (debug:print-info 0 *default-log-port* "Skipping test " tfullname " as it has been marked do not run due to being completed or not runnable")) - (if (or (not (null? tal))(not (null? reg))) - (loop (runs:queue-next-hed tal reg reglen regfull) - (runs:queue-next-tal tal reg reglen regfull) - (runs:queue-next-reg tal reg reglen regfull) - reruns)))) - ;; (loop (car tal)(cdr tal) reg reruns)))) - - (runs:incremental-print-results run-id) - (debug:print 4 *default-log-port* "TOP OF LOOP => " - "test-name: " test-name - "\n hed: " hed - "\n tal: " tal - "\n reg: " reg - "\n test-record " test-record - "\n itemdat: " itemdat - "\n items: " items - "\n item-path: " item-path - "\n waitons: " waitons - "\n num-retries: " num-retries - "\n reruns: " reruns - "\n regfull: " regfull - "\n reglen: " reglen - "\n length reg: " (length reg) - ) - - ;; check for hed in waitons => this would be circular, remove it and issue an - ;; error - (if (member test-name waitons) - (begin - (debug:print-error 0 *default-log-port* "test " test-name " has listed itself as a waiton, please correct this!") - (set! waiton (filter (lambda (x)(not (equal? x hed))) waitons)))) - - (cond - - ;; We want to catch tests that have waitons that are NOT in the queue and discard them IFF - ;; they have been through the wringer 10 or more times - ((and (list? waitons) - (not (null? waitons)) - (> (hash-table-ref/default *max-tries-hash* tfullname 0) 10) - (not (null? (filter - number? - (map (lambda (waiton) - (if (and (not (member waiton tal)) ;; this waiton is not in the list to be tried to run - (not (member waiton reruns))) - 1 - #f)) - waitons))))) ;; could do this more elegantly with a marker.... - (debug:print-info 4 *default-log-port* "cond branch - " "rtq-1") - (debug:print 0 *default-log-port* "WARNING: Marking test " tfullname " as not runnable. It is waiting on tests that cannot be run. Giving up now.") - (hash-table-set! test-registry tfullname 'removed)) - - ;; items is #f then the test is ok to be handed off to launch (but not before) - ;; - ((not items) - (debug:print-info 4 *default-log-port* "cond branch - " "rtq-2") - (debug:print-info 4 *default-log-port* "OUTER COND: (not items)") - (if (and (not (tests:match test-patts (tests:testqueue-get-testname test-record) item-path required: required-tests)) - (not (null? tal))) - (loop (car tal)(cdr tal) reg reruns)) - (runs:testdat-prereqs-not-met-set! testdat (rmt:get-prereqs-not-met run-id waitons hed item-path mode: testmode itemmaps: itemmaps)) - (runs:dat-can-run-more-tests-set! runsdat (runs:can-run-more-tests runsdat run-id jobgroup max-concurrent-jobs)) ;; look at the test jobgroup and tot jobs running - (let ((loop-list (runs:process-expanded-tests runsdat testdat))) - (if loop-list (apply loop loop-list)))) - - ;; items processed into a list but not came in as a list been processed - ;; - ((and (list? items) ;; thus we know our items are already calculated - (not itemdat)) ;; and not yet expanded into the list of things to be done - (debug:print-info 4 *default-log-port* "cond branch - " "rtq-3") - (debug:print-info 4 *default-log-port* "OUTER COND: (and (list? items)(not itemdat))") - ;; Must determine if the items list is valid. Discard the test if it is not. - (if (and (list? items) - (> (length items) 0) - (and (list? (car items)) - (> (length (car items)) 0)) - (debug:debug-mode 1)) - (debug:print 2 *default-log-port* (map (lambda (row) - (conc (string-intersperse - (map (lambda (varval) - (string-intersperse varval "=")) - row) - " ") - "\n")) - items))) - - (let* ((items-in-testpatt - (filter - (lambda (my-itemdat) - (tests:match test-patts hed (item-list->path my-itemdat) )) - ;; was: (tests:match test-patts hed (item-list->path my-itemdat) required: required-tests)) - items) )) - (if (null? items-in-testpatt) - (let ((test-id (rmt:get-test-id run-id test-name ""))) - (debug:print-info 0 *default-log-port* "Test " (tests:testqueue-get-testname test-record) " is itemized but has no items matching test pattern -- marking status ZERO_ITEMS") - (if test-id - (mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "ZERO_ITEMS" "This test has no items which match test pattern."))) - - (for-each (lambda (my-itemdat) - (let* ((new-test-record (let ((newrec (make-tests:testqueue))) - (vector-copy! test-record newrec) - newrec)) - (my-item-path (item-list->path my-itemdat)) - - (newtestname (db:test-make-full-name hed my-item-path))) ;; test names are unique on testname/item-path - (tests:testqueue-set-items! new-test-record #f) - (tests:testqueue-set-itemdat! new-test-record my-itemdat) - (tests:testqueue-set-item_path! new-test-record my-item-path) - (hash-table-set! test-records newtestname new-test-record) - (set! tal (append tal (list newtestname))))) ;; since these are itemized create new test names testname/itempath - items-in-testpatt))) - - - - ;; At this point we have possibly added items to tal but all must be handed off to - ;; INNER COND logic. I think loop without rotating the queue - ;; (loop hed tal reg reruns)) - ;; (let ((newtal (append tal (list hed)))) ;; We should discard hed as it has been expanded into it's items? Yes, but only if this *is* an itemized test - ;; (loop (car newtal)(cdr newtal) reg reruns) - (if (null? tal) - #f - (loop (car tal)(cdr tal) reg reruns))) - - ;; if items is a proc then need to run items:get-items-from-config, get the list and loop - ;; - but only do that if resources exist to kick off the job - ;; EXPAND ITEMS - ((or (procedure? items)(eq? items 'have-procedure)) - (debug:print-info 4 *default-log-port* "cond branch - " "rtq-4") - (let ((can-run-more (runs:can-run-more-tests runsdat run-id jobgroup max-concurrent-jobs))) - (if (and (list? can-run-more) - (car can-run-more)) - (let ((loop-list (runs:expand-items hed tal reg reruns regfull newtal jobgroup max-concurrent-jobs run-id waitons item-path testmode test-record can-run-more items runname tconfig reglen test-registry test-records itemmaps))) ;; itemized test expanded here - (if loop-list - (apply loop loop-list) - (debug:print-info 4 *default-log-port* " -- Can't expand hed="hed) - ) - ) - ;; if can't run more just loop with next possible test - (loop (car newtal)(cdr newtal) reg reruns)))) - - ;; this case should not happen, added to help catch any bugs - ((and (list? items) itemdat) - (debug:print-info 4 *default-log-port* "cond branch - " "rtq-5") - (debug:print-error 0 *default-log-port* "Should not have a list of items in a test and the itemspath set - please report this") - (exit 1)) - ((not (null? reruns)) - (debug:print-info 4 *default-log-port* "cond branch - " "rtq-6") - (let* ((newlst (tests:filter-non-runnable run-id tal test-records)) ;; i.e. not FAIL, WAIVED, INCOMPLETE, PASS, KILLED, - (junked (lset-difference equal? tal newlst))) - (debug:print-info 4 *default-log-port* "full drop through, if reruns is less than 100 we will force retry them, reruns=" reruns ", tal=" tal) - (if (< num-retries max-retries) - (set! newlst (append reruns newlst))) - (set! num-retries (+ num-retries 1)) - ;; (thread-sleep! (+ 1 *global-delta*)) - (if (not (null? newlst)) - ;; since reruns have been tacked on to newlst create new reruns from junked - (loop (car newlst)(cdr newlst) reg (delete-duplicates junked))))) - ((not (null? tal)) - (debug:print-info 4 *default-log-port* "cond branch - " "rtq-7") - (debug:print-info 4 *default-log-port* "I'm pretty sure I shouldn't get here.")) - ((not (null? reg)) ;; could we get here with leftovers? - (debug:print-info 4 *default-log-port* "cond branch - " "rtq-8") - (debug:print-info 0 *default-log-port* "Have leftovers!") - (loop (car reg)(cdr reg) '() reruns)) - (else - (debug:print-info 4 *default-log-port* "cond branch - " "rtq-9") - (debug:print-info 4 *default-log-port* "Exiting loop with...\n hed=" hed "\n tal=" tal "\n reruns=" reruns)) - ))) ;; end loop on sorted test names - ;; this is the point where everything is launched and now you can mark the run in metadata table as all launched - (rmt:set-var (conc "lunch-complete-" run-id) "yes") - - ;; now *if* -run-wait we wait for all tests to be done - ;; Now wait for any RUNNING tests to complete (if in run-wait mode) - (thread-sleep! 10) ;; I think there is a race condition here. Let states/statuses settle - (let wait-loop ((num-running (rmt:get-count-tests-running-for-run-id run-id)) - (prev-num-running 0)) - ;; (debug:print-info 13 *default-log-port* "num-running=" num-running ", prev-num-running=" prev-num-running) - (if (and (or (args:get-arg "-run-wait") - (equal? (configf:lookup *configdat* "setup" "run-wait") "yes")) - (> num-running 0)) - (begin - ;; Here we mark any old defunct tests as incomplete. Do this every fifteen minutes - ;; (debug:print 0 *default-log-port* "Got here eh! num-running=" num-running " (> num-running 0) " (> num-running 0)) - (if (> (current-seconds)(+ last-time-incomplete 900)) - (begin - (debug:print-info 0 *default-log-port* "Marking stuck tests as INCOMPLETE while waiting for run " run-id ". Running as pid " (current-process-id) " on " (get-host-name)) - (set! last-time-incomplete (current-seconds)) - (rmt:find-and-mark-incomplete run-id #f))) - (if (not (eq? num-running prev-num-running)) - (debug:print-info 0 *default-log-port* "run-wait specified, waiting on " num-running " tests in RUNNING, REMOTEHOSTSTART or LAUNCHED state at " (time->string (seconds->local-time (current-seconds))))) - (thread-sleep! 5) - ;; (wait-loop (rmt:get-count-tests-running-for-run-id run-id) num-running)))) - (wait-loop (rmt:get-count-tests-running-for-run-id run-id) num-running)))) - ;; LET* ((test-record - ;; we get here on "drop through". All done! - ;; this is moved to runs:run-testes since this function is getting called twice to ensure everthing is completed. - ;; (debug:print-info 0 *default-log-port* "Calling Post Hook") - ;; (runs:run-post-hook run-id) - (debug:print-info 1 *default-log-port* "All tests launched"))) - -(define (runs:calc-fails prereqs-not-met) - (filter (lambda (test) - (and (vector? test) ;; not (string? test)) - (member (db:test-get-state test) '("INCOMPLETE" "COMPLETED")) ;; TODO: pull from *common:stuff...* - (not (member (db:test-get-status test) - '("PASS" "WARN" "CHECK" "WAIVED" "SKIP"))))) - prereqs-not-met)) - -(define (runs:calc-prereq-fail prereqs-not-met) ;; REMOVEME since NOT_STARTED/PREQ_FAIL is now COMPLETED/PREQ_FAIL - (filter (lambda (test) - (and (vector? test) ;; not (string? test)) - (equal? (db:test-get-state test) "NOT_STARTED") - (not (member (db:test-get-status test) - '("n/a" "KEEP_TRYING"))))) - prereqs-not-met)) - -(define (runs:calc-not-completed prereqs-not-met) - (filter - (lambda (t) - (or (not (vector? t)) - (not (member (db:test-get-state t) '("INCOMPLETE" "COMPLETED"))))) - prereqs-not-met)) - -;; (define (runs:calc-not-completed prereqs-not-met) -;; (filter -;; (lambda (t) -;; (or (not (vector? t)) -;; (not (equal? "COMPLETED" (db:test-get-state t))))) -;; prereqs-not-met)) - -(define (runs:calc-runnable prereqs-not-met) - (filter - (lambda (t) - (or (not (vector? t)) - (and (equal? "NOT_STARTED" (db:test-get-state t)) - (member (db:test-get-status t) - '("n/a" "KEEP_TRYING"))) - (and (equal? "RUNNING" (db:test-get-state t))))) ;; account for a test that is running - prereqs-not-met)) - -(define (runs:pretty-string lst) - (map (lambda (t) - (if (not (vector? t)) - (conc t) - (conc (db:test-get-testname t) ":" (db:test-get-state t) "/" (db:test-get-status t)))) - lst)) - -;; parent-test is there as a placeholder for when parent-tests can be run as a setup step -;; -(define (run:test run-id run-info keyvals runname test-record flags parent-test test-registry all-tests-registry) - ;; All these vars might be referenced by the testconfig file reader - (let* ((test-name (tests:testqueue-get-testname test-record)) - (test-waitons (tests:testqueue-get-waitons test-record)) - (itemdat (tests:testqueue-get-itemdat test-record)) - (item-path "") - (db #f) - (full-test-name #f) - (all-vars (get-environment-variables))) - ;; setting itemdat to a list if it is #f - (if (not itemdat)(set! itemdat '())) - (set! item-path (item-list->path itemdat)) - (set! full-test-name (db:test-make-full-name test-name item-path)) - (runs:set-megatest-env-vars run-id inrunname: runname testname: test-name itempath: item-path) ;; these may be needed by the launching process - (let* ((test-conf ;; re-instate the tests:get-testconfig once the kinks are worked out. FIXME!!! - ;; (tests:get-testconfig test-name item-path all-tests-registry #t force-create: #t)) - (tests:testqueue-get-testconfig test-record )) - (test-path (hash-table-ref all-tests-registry test-name)) ;; (conc *toppath* "/tests/" test-name)) ;; could use tests:get-testconfig here ... - (force (hash-table-ref/default flags "-force" #f)) - (rerun (hash-table-ref/default flags "-rerun" #f)) - (keepgoing (hash-table-ref/default flags "-keepgoing" #f)) - (incomplete-timeout (string->number (or (configf:lookup *configdat* "setup" "incomplete-timeout") "x"))) - ) - - (debug:print-info 4 *default-log-port* - "\nTESTNAME: " full-test-name - "\n test-config: " (hash-table->alist test-conf) - "\n itemdat: " itemdat - ) - (debug:print 2 *default-log-port* "Attempting to launch test " full-test-name) - ;; (setenv "MT_TEST_NAME" test-name) ;; - ;; (setenv "MT_ITEMPATH" item-path) - ;; (setenv "MT_RUNNAME" runname) - (change-directory *toppath*) - - ;; Here is where the test_meta table is best updated - ;; Yes, another use of a global for caching. Need a better way? - ;; - ;; There is now a single call to runs:update-all-test_meta and this - ;; per-test call is not needed. Given the delicacy of the move to - ;; v1.55 this code is being left in place for the time being. - ;; - (if (not (hash-table-ref/default *test-meta-updated* test-name #f)) - (begin - (hash-table-set! *test-meta-updated* test-name #t) - (runs:update-test_meta test-name test-conf))) - - ;; itemdat => ((ripeness "overripe") (temperature "cool") (season "summer")) - (let* ((new-test-path (string-intersperse (cons test-path (map cadr itemdat)) "/")) - (test-id (rmt:get-test-id run-id test-name item-path)) - (testdat (if test-id (rmt:get-test-info-by-id run-id test-id) #f))) - (if (not testdat) - (let loop () - ;; ensure that the path exists before registering the test - ;; NOPE: Cannot! Don't know yet which disk area will be assigned.... - ;; (system (conc "mkdir -p " new-test-path)) - ;; - ;; (open-run-close tests:register-test db run-id test-name item-path) - ;; - ;; NB// for the above line. I want the test to be registered long before this routine gets called! - ;; - (if (not test-id)(set! test-id (rmt:get-test-id run-id test-name item-path))) - (if (not test-id) - (begin - (debug:print 2 *default-log-port* "WARN: Test not pre-created? test-name=" test-name ", item-path=" item-path ", run-id=" run-id) - (rmt:register-test run-id test-name item-path) - (set! test-id (rmt:get-test-id run-id test-name item-path)))) - (debug:print-info 4 *default-log-port* "test-id=" test-id ", run-id=" run-id ", test-name=" test-name ", item-path=\"" item-path "\"") - (set! testdat (rmt:get-test-info-by-id run-id test-id)) - (if (not testdat) - (begin - (debug:print-info 0 *default-log-port* "WARNING: server is overloaded, trying again in one second") - (thread-sleep! 1) - (loop))))) - (if (not testdat) ;; should NOT happen - (debug:print-error 0 *default-log-port* "failed to get test record for test-id " test-id)) - (set! test-id (db:test-get-id testdat)) - (if (common:file-exists? test-path) - (change-directory test-path) - (begin - (debug:print-error 0 *default-log-port* "test run path not created before attempting to run the test. Perhaps you are running -remove-runs at the same time?") - (change-directory *toppath*))) - (case (if force ;; (args:get-arg "-force") - 'NOT_STARTED - (if testdat - (string->symbol (test:get-state testdat)) - 'failed-to-insert)) - ((failed-to-insert) - (debug:print-error 0 *default-log-port* "Failed to insert the record into the db")) - ((NOT_STARTED COMPLETED DELETED INCOMPLETE) - (let ((runflag #f)) - (cond - ;; -force, run no matter what - (force (set! runflag #t)) - ;; NOT_STARTED, run no matter what - ((member (test:get-state testdat) '("DELETED" "NOT_STARTED" "INCOMPLETE"))(set! runflag #t)) - ;; not -rerun and PASS, WARN or CHECK, do no run - ((and (or (not rerun) - keepgoing) - ;; Require to force re-run for COMPLETED or *anything* + PASS,WARN or CHECK - (or (member (test:get-status testdat) '("PASS" "WARN" "CHECK" "SKIP" "WAIVED")) - (member (test:get-state testdat) '("COMPLETED")))) - (debug:print-info 2 *default-log-port* "running test " test-name "/" item-path " suppressed as it is " (test:get-state testdat) " and " (test:get-status testdat)) - (hash-table-set! test-registry full-test-name 'DONOTRUN) ;; COMPLETED) - (set! runflag #f)) - ;; -rerun and status is one of the specifed, run it - ((and rerun - (let* ((rerunlst (string-split rerun ",")) - (must-rerun (member (test:get-status testdat) rerunlst))) - (debug:print-info 3 *default-log-port* "-rerun list: " rerun ", test-status: " (test:get-status testdat)", must-rerun: " must-rerun) - must-rerun)) - (debug:print-info 2 *default-log-port* "Rerun forced for test " test-name "/" item-path) - (set! runflag #t)) - ;; -keepgoing, do not rerun FAIL - ((and keepgoing - (member (test:get-status testdat) '("FAIL"))) - (set! runflag #f)) - ((and (not rerun) - (member (test:get-status testdat) '("FAIL" "n/a"))) - (set! runflag #t)) - (else (set! runflag #f))) - (debug:print 4 *default-log-port* "RUNNING => runflag: " runflag " STATE: " (test:get-state testdat) " STATUS: " (test:get-status testdat)) - (if (not runflag) - (if (not parent-test) - (if (runs:lownoise (conc "not starting test" full-test-name) 60) - (debug:print 1 *default-log-port* "NOTE: Not starting test " full-test-name " as it is state \"" (test:get-state testdat) - "\" and status \"" (test:get-status testdat) "\", use -rerun \"" (test:get-status testdat) - "\" or -force to override"))) - ;; NOTE: No longer be checking prerequisites here! Will never get here unless prereqs are - ;; already met. - ;; This would be a great place to do the process-fork - ;; - (let ((skip-test #f) - (skip-check (configf:get-section test-conf "skip"))) - (cond - ;; Have to check for skip conditions. This one skips if there are same-named tests - ;; currently running - ((and skip-check - (configf:lookup test-conf "skip" "prevrunning")) - ;; run-ids = #f means *all* runs - (let ((running-tests (rmt:get-tests-for-runs-mindata #f full-test-name '("RUNNING" "REMOTEHOSTSTART" "LAUNCHED") '() #f))) - (if (not (null? running-tests)) ;; have to skip - (set! skip-test "Skipping due to previous tests running")))) - - ;; split the string and OR of file-exists? - ((and skip-check - (configf:lookup test-conf "skip" "fileexists")) - (let* ((files (string-split (configf:lookup test-conf "skip" "fileexists"))) - (existing (filter common:file-exists? files))) - (if (not (null? existing)) ;; (common:file-exists? (configf:lookup test-conf "skip" "fileexists")) - (set! skip-test (conc "Skipping due to existance of file(s) " (string-intersperse existing ", ")))))) ;; (configf:lookup test-conf "skip" "fileexists"))))) - - ((and skip-check - (configf:lookup test-conf "skip" "filenotexists")) - (let* ((files (string-split (configf:lookup test-conf "skip" "filenotexists"))) - (existing (filter common:file-exists? files))) - (if (null? existing) ;; (common:file-exists? (configf:lookup test-conf "skip" "filenotexists"))) - (set! skip-test (conc "Skipping due to non existance of files " (string-intersperse files ", ")))))) ;; (configf:lookup test-conf "skip" "filenotexists"))))) - - ((and skip-check - (configf:lookup test-conf "skip" "script")) - (if (= (system (configf:lookup test-conf "skip" "script")) 0) - (set! skip-test (conc "Skipping due to zero return value of script " (configf:lookup test-conf "skip" "script"))))) - - ((and skip-check - (configf:lookup test-conf "skip" "rundelay")) - ;; run-ids = #f means *all* runs - (let* ((numseconds (common:hms-string->seconds (configf:lookup test-conf "skip" "rundelay"))) - (running-tests (rmt:get-tests-for-runs-mindata #f full-test-name '("RUNNING" "REMOTEHOSTSTART" "LAUNCHED") '() #f)) - (completed-tests (rmt:get-tests-for-runs-mindata #f full-test-name '("COMPLETED" "INCOMPLETE") '("PASS" "FAIL" "ABORT") #f)) ;; ironically INCOMPLETE is same as COMPLETED in this contex - (last-run-times (map db:mintest-get-event_time completed-tests)) - (time-since-last (- (current-seconds) (if (null? last-run-times) 0 (common:max last-run-times))))) - (if (or (not (null? running-tests)) ;; have to skip if test is running - (> numseconds time-since-last)) - (set! skip-test (conc "Skipping due to previous test run less than " (configf:lookup test-conf "skip" "rundelay") " ago")))))) - - (if skip-test - (begin - (mt:test-set-state-status-by-id run-id test-id "COMPLETED" "SKIP" skip-test) - (debug:print-info 1 *default-log-port* "SKIPPING Test " full-test-name " due to " skip-test)) - ;; - ;; Here the test is handed off to launch.scm for launch-test to complete the launch process - ;; - (if (not (launch-test test-id run-id run-info keyvals runname test-conf test-name test-path itemdat flags)) - (begin - (print "ERROR: Failed to launch the test. Exiting as soon as possible") - (set! *globalexitstatus* 1) ;; - (process-signal (current-process-id) signal/kill)))))))) - ((KILLED) - (debug:print 1 *default-log-port* "NOTE: " full-test-name " is already running or was explictly killed, use -force to launch it.") - (hash-table-set! test-registry (db:test-make-full-name test-name test-path) 'DONOTRUN)) ;; KILLED)) - ((LAUNCHED REMOTEHOSTSTART RUNNING) - (debug:print 2 *default-log-port* "NOTE: " test-name " is already running")) - ;; (if (> (- (current-seconds)(+ (db:test-get-event_time testdat) - ;; (db:test-get-run_duration testdat))) - ;; (or incomplete-timeout - ;; 6000)) ;; i.e. no update for more than 6000 seconds - ;; (begin - ;; (debug:print 0 *default-log-port* "WARNING: Test " test-name " appears to be dead. Forcing it to state INCOMPLETE and status STUCK/DEAD") - ;; (tests:test-set-status! run-id test-id "INCOMPLETE" "STUCK/DEAD" "" #f)) - ;; ;; (tests:test-set-status! test-id "INCOMPLETE" "STUCK/DEAD" "" #f)) - ;; (debug:print 2 *default-log-port* "NOTE: " test-name " is already running"))) - (else - (debug:print-error 0 *default-log-port* "Failed to launch test " full-test-name ". Unrecognised state " (test:get-state testdat)) - (case (string->symbol (test:get-state testdat)) - ((COMPLETED INCOMPLETE) - (hash-table-set! test-registry (db:test-make-full-name test-name test-path) 'DONOTRUN)) - (else - (hash-table-set! test-registry (db:test-make-full-name test-name test-path) 'DONOTRUN))))))) - ;; put any changed environment variables back to how they were - TODO - turn this into some sort of with- - (common:set-vars-back all-vars) - #;(for-each - (lambda (vardat) - (let ((var (car vardat)) - (val (cdr vardat))) - (if (not (equal? (get-environment-variable var) val)) - (handle-exceptions - exn - (debug:print-error 0 *default-log-port* "Failed to set " var " to " val) - (setenv var val))))) - all-vars) - )) - -;;====================================================================== -;; END OF NEW STUFF -;;====================================================================== - -(define (get-dir-up-n dir . params) - (let ((dparts (string-split dir "/")) - (count (if (null? params) 1 (car params)))) - (conc "/" (string-intersperse - (take dparts (- (length dparts) count)) - "/")))) - -(define (runs:recursive-delete-with-error-msg real-dir) - (if (> (system (conc "rm -rf " real-dir)) 0) - (begin - ;; FAILED, possibly due to permissions, do chmod a+rwx then try one more time - (system (conc "chmod -R a+rwx " real-dir)) - (if (> (system (conc "rm -rf " real-dir)) 0) - (debug:print-error 0 *default-log-port* "There was a problem removing " real-dir " with rm -f"))))) - -(define (runs:safe-delete-test-dir real-dir) - ;; first delete all sub-directories - (directory-fold - (lambda (f x) - (let ((fullname (conc real-dir "/" f))) - (if (directory? fullname)(runs:recursive-delete-with-error-msg fullname))) - (+ 1 x)) - 0 real-dir) - ;; then files other than *testdat.db* - (directory-fold - (lambda (f x) - (let ((fullname (conc real-dir "/" f))) - (if (not (string-search (regexp "testdat.db") f)) - (runs:recursive-delete-with-error-msg fullname))) - (+ 1 x)) - 0 real-dir) - ;; then the entire directory - (runs:recursive-delete-with-error-msg real-dir)) - -;; cleanup often needs to remove all but the last N runs per target -;; -;; target-patts a1/b1/c1,a2/b2/c2 ... -;; -;; This will fail if called with empty target or a bad target (i.e. missing or extra fields) -;; -(define (runs:get-hash-by-target target-patts runpatt) - (let* ((targets (string-split target-patts ",")) - (keys (rmt:get-keys)) - (res-ht (make-hash-table))) ;; target -> ( runrecord1 runrecord2 ... ) - (for-each - (lambda (target-patt) - (let ((runs (rmt:simple-get-runs runpatt #f #f target-patt))) - (for-each - (lambda (run) - (let ((target (simple-run-target run))) - (hash-table-set! res-ht target (cons run (hash-table-ref/default res-ht target '()))))) - runs))) - targets) - res-ht)) - -;; delete runs older than X (weeks, days, months years etc.) -;; delete redundant runs within a target - N is the input -;; delete redundant runs within a target IFF older than given date/time AND keep at least N -;; -(define (runs:remove-all-but-last-n-runs-per-target target-patts runpatt num-to-keep #!key (actions '(print))) - (let* ((runs-ht (runs:get-hash-by-target target-patts runpatt)) - (age (if (args:get-arg "-age")(common:hms-string->seconds (args:get-arg "-age")) #f)) - (age-mark (if age (- (current-seconds) age) (+ (current-seconds) 86400))) - (precmd (or (args:get-arg "-precmd") ""))) - (print "Actions: " actions) - (for-each - (lambda (target) - (let* ((runs (hash-table-ref runs-ht target)) - (sorted (sort runs (lambda (a b)(< (simple-run-event_time a)(simple-run-event_time b))))) - (to-remove (let* ((len (length sorted)) - (trim-amt (- len num-to-keep))) - (if (> trim-amt 0) - (take sorted trim-amt) - '())))) - (hash-table-set! runs-ht target to-remove) - (print target ":") - (for-each - (lambda (run) - (let ((remove (member run to-remove (lambda (a b) - (eq? (simple-run-id a) - (simple-run-id b)))))) - (if (and age (> (simple-run-event_time run) age-mark)) - (print "Skipping handling of " target "/" (simple-run-runname run) " as it is younger than " (args:get-arg "-age")) - (for-each - (lambda (action) - (case action - ((print) - (print " " (simple-run-runname run) - " " (time->string (seconds->local-time (simple-run-event_time run)) "WW%V.%u %H:%M:%S") - " " (if remove "REMOVE" ""))) - ((remove-runs) - (if remove (system (conc precmd " megatest -remove-runs -target " target " -runname " (simple-run-runname run) " -testpatt %" - (if (member 'kill-runs actions) ;; if kill-runs is specified then set -kill-wait to 0 - " -kill-wait 0" - ""))))) - ((archive) - (if remove (system (conc precmd " megatest -archive save-remove -target " target " -runname " (simple-run-runname run) " -testpatt %")))) - ((kill-runs) - (if remove (system (conc precmd " megatest -kill-runs -target " target " -runname " (simple-run-runname run) " -testpatt %")))) - )) - actions)))) - sorted))) - ;; (print "Sorted: " (map simple-run-event_time sorted)) - ;; (print "Remove: " (map simple-run-event_time to-remove)))) - (hash-table-keys runs-ht)) - runs-ht)) - -;; (define (runs:remove-all-but-last-n-runs-per-target target-patts runpatt num-to-keep) -;; (let ((data (runs:get-all-but-most-recent-n-per-target target-patts runpatt num-to-keep))) -;; (for-each -;; (lambda (target) -;; (let ((runs-to-remove (hash-table-ref data target ))) -;; (for-each -;; (lambda (run) -;; (print "megatest -remove-runs -target " target " -runname " (simple-run-runname run) " -testpatt %")) -;; runs-to-remove))) -;; (hash-table-keys data)))) - -;; Remove runs -;; fields are passing in through -;; action: -;; 'remove-runs -;; 'set-state-status -;; -;; NB// should pass in keys? -;; -(define (runs:operate-on action target runnamepatt testpatt #!key (state #f)(status #f)(new-state-status #f)(mode #f)(options '())) - (common:clear-caches) ;; clear all caches - (let* ((db #f) - ;; (tdbdat (tasks:open-db)) - (keys (rmt:get-keys)) - (rundat (mt:get-runs-by-patt keys runnamepatt target)) - (header (vector-ref rundat 0)) - (runs (vector-ref rundat 1)) - (states (if state (string-split state ",") '())) - (statuses (if status (string-split status ",") '())) - (state-status (if (string? new-state-status) (string-split new-state-status ",") '(#f #f))) - (rp-mutex (make-mutex)) - (bup-mutex (make-mutex)) - (keep-records (args:get-arg "-keep-records"))) ;; used in conjunction with -remove-runs to keep the records, TODO: consolidate this with "mode". - - (let* ((write-access-actions '(remove-runs set-state-status archive run-wait kill-runs)) - (dbfile (conc *toppath* "/megatest.db")) - (readonly-mode (not (file-write-access? dbfile)))) - (when (and readonly-mode - (member action write-access-actions)) - (debug:print-error 0 *default-log-port* "megatest.db is readonly. Cannot proceed with action ["action"] in which write-access isrequired .") - (exit 1))) - - (debug:print-info 4 *default-log-port* "runs:operate-on => Header: " header " action: " action " new-state-status: " new-state-status) - (if (> 2 (length state-status)) - (begin - (debug:print-error 0 *default-log-port* "the parameter to -set-state-status is a comma delimited string. E.g. COMPLETED,FAIL") - (exit))) - (for-each - (lambda (run) - (let ((runkey (string-intersperse (map (lambda (k) - (db:get-value-by-header run header k)) keys) "/")) - (dirs-to-remove (make-hash-table)) - (proc-get-tests (lambda (run-id) - (mt:get-tests-for-run run-id - testpatt states statuses - not-in: #f - sort-by: (case action - ((remove-runs) 'rundir) - (else 'event_time)))))) - (let* ((run-id (db:get-value-by-header run header "id")) - (run-state (db:get-value-by-header run header "state")) - (run-name (db:get-value-by-header run header "runname")) - (tests (if (not (equal? run-state "locked")) - (proc-get-tests run-id) - '())) - (lasttpath "/does/not/exist/I/hope") - (worker-thread #f)) - (debug:print-info 4 *default-log-port* "runs:operate-on run=" run ", header=" header) - (if (not (null? tests)) - (begin - (case action - ((kill-runs) - (tasks:kill-runner target run-name "%") - (debug:print 1 *default-log-port* "Killing tests for run: " runkey " " (db:get-value-by-header run header "runname")) - ) - ((remove-runs) - ;; (if (tasks:need-server run-id)(tasks:start-and-wait-for-server tdbdat run-id 10)) - ;; seek and kill in flight -runtests with % as testpatt here - ;; (if (equal? testpatt "%") - (tasks:kill-runner target run-name testpatt) - ;; (debug:print 0 *default-log-port* "not attempting to kill any run launcher processes as testpatt is " testpatt)) - (debug:print 1 *default-log-port* "Removing tests for run: " runkey " " (db:get-value-by-header run header "runname"))) - ((set-state-status) - ;; (if (tasks:need-server run-id)(tasks:start-and-wait-for-server tdbdat run-id 10)) - (debug:print 1 *default-log-port* "Modifying state and staus for tests for run: " runkey " " (db:get-value-by-header run header "runname"))) - ((print-run) - (debug:print 1 *default-log-port* "Printing info for run " runkey ", run=" run ", tests=" tests ", header=" header) - action) - ((run-wait) - (debug:print 1 *default-log-port* "Waiting for run " runkey ", run=" runnamepatt " to complete")) - ((archive) - (debug:print 1 *default-log-port* "Archiving/restoring (" (args:get-arg "-archive") ") data for run: " runkey " " (db:get-value-by-header run header "runname")) - (set! worker-thread - (make-thread - (lambda () - (case (string->symbol (args:get-arg "-archive")) - ((save save-remove keep-html) - (archive:run-bup (args:get-arg "-archive") run-id run-name tests rp-mutex bup-mutex)) - ((restore) - (archive:bup-restore (args:get-arg "-archive") run-id run-name tests rp-mutex bup-mutex)) - (else - (debug:print-error 0 *default-log-port* "unrecognised sub command to -archive. Run \"megatest\" to see help") - (exit)))) - "archive-bup-thread")) - (thread-start! worker-thread)) - (else - (debug:print-info 0 *default-log-port* "action not recognised " action))) - - ;; actions that operate on one test at a time can be handled below - ;; - (let ((sorted-tests (filter - vector? - (sort tests (lambda (a b)(let ((dira ;; (rmt:sdb-qry 'getstr - (db:test-get-rundir a)) ;; ) ;; (filedb:get-path *fdb* (db:test-get-rundir a))) - (dirb ;; (rmt:sdb-qry 'getstr - (db:test-get-rundir b))) ;; ) ;; ((filedb:get-path *fdb* (db:test-get-rundir b)))) - (if (and (string? dira)(string? dirb)) - (> (string-length dira)(string-length dirb)) - #f)))))) - (toplevel-retries (make-hash-table)) ;; try three times to loop through and remove top level tests - (test-retry-time (make-hash-table)) - (backgrounded-remove-status (make-hash-table)) - (backgrounded-remove-last-visit (make-hash-table)) - (backgrounded-remove-result (make-hash-table)) - (allow-run-time (string->number (or (args:get-arg "-kill-wait") "10")))) ;; seconds to allow for killing tests before just brutally killing 'em - (let loop ((test (car sorted-tests)) - (tal (cdr sorted-tests))) - (let* ((test-id (db:test-get-id test)) - (new-test-dat (rmt:get-test-info-by-id run-id test-id))) - (if (not new-test-dat) - (begin - (debug:print-error 0 *default-log-port* "We have a test-id of " test-id " but no record was found. NOTE: No locking of records is done between processes, do not simultaneously remove the same run from two processes!") - (if (not (null? tal)) - (loop (car tal)(cdr tal)))) - (let* ((item-path (db:test-get-item-path new-test-dat)) - (test-name (db:test-get-testname new-test-dat)) - (run-dir ;;(filedb:get-path *fdb* - ;; (rmt:sdb-qry 'getid - (db:test-get-rundir new-test-dat)) ;; ) ;; run dir is from the link tree - (has-subrun (and (subrun:subrun-test-initialized? run-dir) - (not (subrun:subrun-removed? run-dir)))) - (test-state (db:test-get-state new-test-dat)) - (test-status (db:test-get-status new-test-dat)) - (test-fulln (db:test-get-fullname new-test-dat)) - (uname (db:test-get-uname new-test-dat)) - (toplevel-with-children (and (db:test-get-is-toplevel test) - (> (rmt:test-toplevel-num-items run-id test-name) 0)))) - - (case action - ((remove-runs) - ;; if the test is a toplevel-with-children issue an error and do not remove - (cond - (toplevel-with-children - (debug:print 0 *default-log-port* "WARNING: skipping removal of " test-fulln " with run-id " run-id " as it has sub tests") - (hash-table-set! toplevel-retries test-fulln (+ (hash-table-ref/default toplevel-retries test-fulln 0) 1)) - (if (> (hash-table-ref toplevel-retries test-fulln) 3) - (if (not (null? tal)) - (loop (car tal)(cdr tal))) ;; no else clause - drop it if no more in queue and > 3 tries - (let ((newtal (append tal (list test)))) - (loop (car newtal)(cdr newtal))))) ;; loop with test still in queue - (has-subrun - ;; - (let ((last-visit (hash-table-ref/default backgrounded-remove-last-visit test-fulln 0)) - (now (current-seconds)) - (rem-status (hash-table-ref/default backgrounded-remove-status test-fulln 'not-started))) - (case rem-status - ((not-started) - (debug:print 0 *default-log-port* "WARNING: postponing removal of " test-fulln " with run-id " run-id " as it has a subrun") - (hash-table-set! backgrounded-remove-status test-fulln 'started) - (hash-table-set! backgrounded-remove-last-visit test-fulln (current-seconds)) - (common:send-thunk-to-background-thread - (lambda () - (let* ((subrun-remove-succeeded - (subrun:remove-subrun run-dir keep-records))) - (hash-table-set! backgrounded-remove-result test-fulln subrun-remove-succeeded) - (hash-table-set! backgrounded-remove-status test-fulln 'done))) - name: (conc "remove-subrun:"test-fulln)) - - ;; send to back of line, loop - (let ((newtal (append tal (list test)))) - (loop (car newtal)(cdr newtal))) - ) - ((started) - ;; if last visit was within last second, sleep 1 second - (if (< (- now last-visit) 1.0) - (thread-sleep! 1.0)) - (hash-table-set! backgrounded-remove-last-visit test-fulln (current-seconds)) - ;; send to back of line, loop - (let ((newtal (append tal (list test)))) - (loop (car newtal)(cdr newtal))) - ) - ((done) - ;; drop this one; if remaining, loop, else finish - (hash-table-set! backgrounded-remove-last-visit test-fulln (current-seconds)) - (let ((subrun-remove-succeeded (hash-table-ref/default backgrounded-remove-result test-fulln 'exception))) - (cond - ((eq? subrun-remove-succeeded 'exception) - (let* ((logfile (subrun:get-log-path run-dir "remove"))) - (debug:print 0 *default-log-port* "ERROR: removing subrun of of " test-fulln " with run-id " run-id " ; see logfile @ "logfile)) - (if (not (null? tal)) - (loop (car tal)(cdr tal)))) - (subrun-remove-succeeded - (debug:print 0 *default-log-port* "Now removing of " test-fulln " with run-id " run-id " since subrun was removed.") - ;;(runs:remove-test-directory new-test-dat mode) ;; let normal case handle this. it will go thru loop again as non-subrun - (let ((newtal (append tal (list test)))) - (loop (car newtal)(cdr newtal)))) - (else - (let* ((logfile (subrun:get-log-path run-dir "remove"))) - (debug:print 0 *default-log-port* "WARNING: removal of subrun failed. Please check "logfile" for details.")) - ;; send to back of line, loop (will not match has-subrun next time through) - (if (not (null? tal)) - (loop (car tal)(cdr tal)))))) - ) - ) ; end case rem-status - ) ; end let - ); end cond has-subrun - - (else - ;; BB - TODO - consider backgrounding to threads to delete tests (work below) - (debug:print-info 0 *default-log-port* "test: " test-name " itest-state: " test-state) - (if (member test-state (list "RUNNING" "LAUNCHED" "REMOTEHOSTSTART" "KILLREQ")) - (begin - (if (not (hash-table-ref/default test-retry-time test-fulln #f)) - (begin - ;; want to set to REMOVING BUT CANNOT do it here? - (hash-table-set! test-retry-time test-fulln (current-seconds)))) - (if (> (- (current-seconds)(hash-table-ref test-retry-time test-fulln)) allow-run-time) - ;; This test is not in a correct state for cleaning up. Let's try some graceful shutdown steps first - ;; Set the test to "KILLREQ" and wait five seconds then try again. Repeat up to five times then give - ;; up and blow it away. - (begin - (debug:print 0 *default-log-port* "WARNING: could not gracefully remove test " test-fulln ", tried to kill it to no avail. Forcing state to FAILEDKILL and continuing") - (mt:test-set-state-status-by-id run-id (db:test-get-id test) "FAILEDKILL" "n/a" #f) - (thread-sleep! 1)) - (begin - (mt:test-set-state-status-by-id run-id (db:test-get-id test) "KILLREQ" "n/a" #f) - (thread-sleep! 1))) - ;; NOTE: This is suboptimal as the testdata will be used later and the state/status may have changed ... - (if (null? tal) - (loop new-test-dat tal) - (loop (car tal)(append tal (list new-test-dat))))) - (begin - (runs:remove-test-directory new-test-dat mode) ;; 'remove-all) - (if (not (null? tal)) - (loop (car tal)(cdr tal))))))) - (rmt:update-run-stats run-id (rmt:get-raw-run-stats run-id))) - ((kill-runs) - ;; RUNNING -> KILLREQ - ;; LAUNCHED,RUNNING,REMOTEHOSTSTART -> NOT STARTED - (cond - ((and has-subrun (member test-state (list "RUNNING" "LAUNCHED" "REMOTEHOSTSTART" "KILLREQ"))) - (common:send-thunk-to-background-thread - (lambda () - (let* ((subrun-remove-succeeded - (subrun:kill-subrun run-dir keep-records))) - #t))) - (if (not (null? tal)) - (loop (car tal)(cdr tal))) - ) - ((member test-state (list "RUNNING" "LAUNCHED" "REMOTEHOSTSTART" "KILLREQ")) - (debug:print 1 *default-log-port* "INFO: issuing killreq to test "test-fulln) - (mt:test-set-state-status-by-id run-id (db:test-get-id test) "KILLREQ" "n/a" #f) - (if (not (null? tal)) - (loop (car tal)(cdr tal)))) - ((and (member test-status '("PREQ_FAIL" "PREQ_DISCARDED" "BLOCKED" "ZERO_ITEMS" "KEEP_TRYING" "TEN_STRIKES" "TIMED_OUT"))) - (rmt:set-state-status-and-roll-up-items run-id (db:test-get-id test) 'foo "NOT_STARTED" "n/a" (conc "kill-run moved from "test-state":"test-status" to NOT_STARTED:n/a")) - ;;(mt:test-set-state-status-by-id run-id (db:test-get-id test) "NOT_STARTED" "n/a" (conc "kill-run moved from "test-state":"test-status" to NOT_STARTED:n/a")) - (if (not (null? tal)) - (loop (car tal)(cdr tal))) - ) - (else - (if (not (null? tal)) - (loop (car tal)(cdr tal))) - ))) - ((set-state-status) - (let* ((new-state (car state-status)) - (new-status (cadr state-status)) - (test-id (db:test-get-id test)) - (test-run-dir (db:test-get-rundir new-test-dat)) - (has-subrun (and (subrun:subrun-test-initialized? test-run-dir) - (not (subrun:subrun-removed? test-run-dir))))) - (when has-subrun - (common:send-thunk-to-background-thread - (lambda () - (subrun:set-state-status test-run-dir state status new-state-status) - ) - ) - ) - (debug:print-info 2 *default-log-port* "new state " new-state ", new status " new-status ) - (mt:test-set-state-status-by-id run-id test-id new-state new-status #f)) - (if (not (null? tal)) - (loop (car tal)(cdr tal)))) - ((run-wait) - ;; BB TODO - manage has-subrun case - (debug:print-info 2 *default-log-port* "still waiting, " (length tests) " tests still running") - (thread-sleep! 10) - (let ((new-tests (proc-get-tests run-id))) - (if (null? new-tests) - (debug:print-info 1 *default-log-port* "Run completed according to zero tests matching provided criteria.") - (loop (car new-tests)(cdr new-tests))))) - ((archive) - ;; BB TODO - manage has-subrun case - (if (and run-dir (not toplevel-with-children)) - (let ((ddir (conc run-dir "/"))) - (case (string->symbol (args:get-arg "-archive")) - ((save save-remove keep-html) - (if (common:file-exists? ddir) - (debug:print-info 0 *default-log-port* "Estimating disk space usage for " test-fulln ": " (common:get-disk-space-used ddir))))))) - (if (not (null? tal)) - (loop (car tal)(cdr tal)))) - ))) - ) - (if worker-thread (thread-join! worker-thread))) - (common:join-backgrounded-threads)))) - ;; remove the run if zero tests remain - (if (eq? action 'remove-runs) - (let* ((run-id (db:get-value-by-header run header "id")) ;; NB// masks run-id from above? - (remtests (mt:get-tests-for-run run-id #f '("DELETED") '("n/a") not-in: #t))) - (if (null? remtests) ;; no more tests remaining - (let* ((dparts (string-split lasttpath "/")) - (runpath (conc "/" (string-intersperse - (take dparts (- (length dparts) 1)) - "/")))) - (debug:print 1 *default-log-port* "Removing run: " runkey " " (db:get-value-by-header run header "runname") " and related record") - (if (not keep-records) - (begin - (rmt:delete-run run-id) - (rmt:delete-old-deleted-test-records))) - ;; (rmt:set-var "DELETED_TESTS" (current-seconds)) - ;; need to figure out the path to the run dir and remove it if empty - ;; (if (null? (glob (conc runpath "/*"))) - ;; (begin - ;; (debug:print 1 *default-log-port* "Removing run dir " runpath) - ;; (system (conc "rmdir -p " runpath)))) - ))))) - )) - runs) - ;; (sqlite3:finalize! (db:delay-if-busy tdbdat)) - ) - #t) - -(define (runs:remove-test-directory test mode) ;; remove-data-only) - (let* ((run-dir (db:test-get-rundir test)) ;; run dir is from the link tree - (real-dir (if (common:file-exists? run-dir) - ;; (resolve-pathname run-dir) - (common:nice-path run-dir) - #f)) - (clean-mode (or mode 'remove-all)) - (test-id (db:test-get-id test)) - ;; (lock-key (conc "test-" test-id)) - ;; (got-lock (let loop ((lock (rmt:no-sync-get-lock lock-key)) - ;; (expire-time (+ (current-seconds) 30))) ;; give up on getting the lock and steal it after 15 seconds - ;; (if (car lock) - ;; #t - ;; (if (> (current-seconds) expire-time) - ;; (begin - ;; (debug:print-info 0 *default-log-port* "Timed out waiting for a lock to clean test with id " test-id) - ;; (rmt:no-sync-del! lock-key) ;; destroy the lock - ;; (loop (rmt:no-sync-get-lock lock-key) expire-time)) ;; - ;; (begin - ;; (thread-sleep! 1) - ;; (loop (rmt:no-sync-get-lock lock-key) expire-time))))))) - ) - (case clean-mode - ((remove-data-only)(mt:test-set-state-status-by-id (db:test-get-run_id test)(db:test-get-id test) "CLEANING" "LOCKED" #f)) - ((remove-all) (mt:test-set-state-status-by-id (db:test-get-run_id test)(db:test-get-id test) "REMOVING" "LOCKED" #f)) - ((archive-remove) (mt:test-set-state-status-by-id (db:test-get-run_id test)(db:test-get-id test) "ARCHIVE_REMOVING" #f #f))) - (debug:print-info 1 *default-log-port* "Attempting to remove " (if real-dir (conc " dir " real-dir " and ") "") " link " run-dir) - (if (and real-dir - (> (string-length real-dir) 5) - (common:file-exists? real-dir)) ;; bad heuristic but should prevent /tmp /home etc. - (begin ;; let* ((realpath (resolve-pathname run-dir))) - (debug:print-info 1 *default-log-port* "Recursively removing " real-dir) - (if (common:file-exists? real-dir) - (runs:safe-delete-test-dir real-dir) - (debug:print 0 *default-log-port* "WARNING: test dir " real-dir " appears to not exist or is not readable"))) - (if real-dir - (debug:print 0 *default-log-port* "WARNING: directory " real-dir " does not exist") - (debug:print 0 *default-log-port* "WARNING: no real directory corrosponding to link " run-dir ", nothing done"))) - (if (symbolic-link? run-dir) - (begin - (debug:print-info 1 *default-log-port* "Removing symlink " run-dir) - (handle-exceptions - exn - (debug:print-error 0 *default-log-port* " Failed to remove symlink " run-dir ((condition-property-accessor 'exn 'message) exn) ", attempting to continue") - (delete-file run-dir))) - (if (directory? run-dir) - (if (> (directory-fold (lambda (f x)(+ 1 x)) 0 run-dir) 0) - (debug:print 0 *default-log-port* "WARNING: refusing to remove " run-dir " as it is not empty") - (handle-exceptions - exn - (debug:print-error 0 *default-log-port* " Failed to remove directory " run-dir ((condition-property-accessor 'exn 'message) exn) ", attempting to continue") - (delete-directory run-dir))) - (if (and run-dir - (not (member run-dir (list "n/a" "/tmp/badname")))) - (debug:print 0 *default-log-port* "WARNING: not removing " run-dir " as it either doesn't exist or is not a symlink") - (debug:print 0 *default-log-port* "NOTE: the run dir for this test is undefined. Test may have already been deleted.")) - )) - ;; Only delete the records *after* removing the directory. If things fail we have a record - (case clean-mode - ((remove-data-only)(mt:test-set-state-status-by-id (db:test-get-run_id test)(db:test-get-id test) (db:test-get-state test)(db:test-get-status test) #f)) - ((archive-remove) (mt:test-set-state-status-by-id (db:test-get-run_id test)(db:test-get-id test) "ARCHIVED" #f #f)) - (else (rmt:delete-test-records (db:test-get-run_id test) (db:test-get-id test)))) - ;; (rmt:no-sync-del! lock-key) - )) - -;;====================================================================== -;; Routines for manipulating runs -;;====================================================================== - -;; Since many calls to a run require pretty much the same setup -;; this wrapper is used to reduce the replication of code -(define (general-run-call switchname action-desc proc) - (let ((runname (or (args:get-arg "-runname")(args:get-arg ":runname"))) - (target (common:args-get-target))) - (cond - ((not target) - (debug:print-error 0 *default-log-port* "Missing required parameter for " switchname ", you must specify the target with -target") - (exit 3)) - ((not runname) - (debug:print-error 0 *default-log-port* "Missing required parameter for " switchname ", you must specify the run name with -runname runname") - (exit 3)) - (else - (let (;; (db #f) - (keys #f)) - (if (launch:setup) - (begin - (full-runconfigs-read) ;; cache the run config - ;; (launch:cache-config) ;; there are two independent config cache locations, turning this one off for now. MRW. - ) ;; do not cache here - need to be sure runconfigs is processed - (begin - (debug:print 0 *default-log-port* "Failed to setup, exiting") - (exit 1))) - - - (set! keys (keys:config-get-fields *configdat*)) - ;; have enough to process -target or -reqtarg here - (if (args:get-arg "-reqtarg") - (let* ((runconfigf (conc *toppath* "/runconfigs.config")) ;; DO NOT EVALUATE ALL - (runconfig (read-config runconfigf #f #t environ-patt: #f))) - (if (hash-table-ref/default runconfig (args:get-arg "-reqtarg") #f) - (keys:target-set-args keys (args:get-arg "-reqtarg") args:arg-hash) - - (begin - (debug:print-error 0 *default-log-port* "[" (args:get-arg "-reqtarg") "] not found in " runconfigf) - ;; (if db (sqlite3:finalize! db)) - (exit 1) - ))) - (if (args:get-arg "-target") - (keys:target-set-args keys (args:get-arg "-target" args:arg-hash) args:arg-hash))) - (if (not (car *configinfo*)) - (begin - (debug:print-error 0 *default-log-port* "Attempted to " action-desc " but run area config file not found") - (exit 1)) - ;; Extract out stuff needed in most or many calls - ;; here then call proc - (let* ((keyvals (keys:target->keyval keys target))) - (proc target runname keys keyvals))) - ;; (if db (sqlite3:finalize! db)) - (set! *didsomething* #t)))))) - -;;====================================================================== -;; Lock/unlock runs -;;====================================================================== - -(define (runs:handle-locking target keys runname lock unlock user) - (let* ((db #f) - (rundat (mt:get-runs-by-patt keys runname target)) - (header (vector-ref rundat 0)) - (runs (vector-ref rundat 1))) - (for-each (lambda (run) - (let ((run-id (db:get-value-by-header run header "id")) - (str (if lock - "lock" - "unlock"))) - (if (or lock - (and unlock - (or (args:get-arg "-force") - (begin - (print "Do you really wish to unlock run " run-id "?\n y/n: ") - (equal? "y" (read-line)))))) - (begin - (rmt:lock/unlock-run run-id lock unlock user) - (debug:print-info 0 *default-log-port* "Done " str " on run id " run-id)) - (debug:print-info 0 *default-log-port* "Skipping lock/unlock on " run-id)))) - runs))) -;;====================================================================== -;; Rollup runs -;;====================================================================== - -;; Update the test_meta table for this test -(define (runs:update-test_meta test-name test-conf) - (let ((currrecord (rmt:testmeta-get-record test-name))) - (if (not currrecord) - (begin - (set! currrecord (make-vector 11 #f)) - (rmt:testmeta-add-record test-name))) - (for-each - (lambda (key) - (let* ((idx (cadr key)) - (fld (car key)) - (val (configf:lookup test-conf "test_meta" fld))) - ;; (debug:print 5 *default-log-port* "idx: " idx " fld: " fld " val: " val) - (if (and val (not (equal? (vector-ref currrecord idx) val))) - (begin - (print "Updating " test-name " " fld " to " val) - (rmt:testmeta-update-field test-name fld val))))) - '(("author" 2)("owner" 3)("description" 4)("reviewed" 5)("tags" 9)("jobgroup" 10))))) - -;; find tests with matching tags, tagpatt is a string "tagpatt1,tagpatt2%, ..." -;; -(define (runs:get-tests-matching-tags tagpatt) - (let* ((tagdata (rmt:get-tests-tags)) - (res '())) ;; list of tests that match one or more tags - (for-each - (lambda (row) - (let* ((tag (car row)) - (tests (cdr row))) - (if (patt-list-match tag tagpatt) - (set! res (append tests res))))) - tagdata) - res)) - - -;; Update test_meta for all tests -(define (runs:update-all-test_meta db) - (let ((test-names (tests:get-all))) ;; (tests:get-valid-tests))) - (for-each - (lambda (test-name) - (let* ((test-conf (mt:lazy-read-test-config test-name))) - (if test-conf (runs:update-test_meta test-name test-conf)))) - (hash-table-keys test-names)))) - -;; This could probably be refactored into one complex query ... -;; NOT PORTED - DO NOT USE YET -;; -#;(define (runs:rollup-run keys runname user keyvals) - (debug:print 4 *default-log-port* "runs:rollup-run, keys: " keys " -runname " runname " user: " user) - (let* ((db #f) - ;; register run operates on the main db - (new-run-id (rmt:register-run keyvals runname "new" "n/a" user (args:get-arg "-contour"))) - (prev-tests (rmt:get-matching-previous-test-run-records new-run-id "%" "%")) - (curr-tests (mt:get-tests-for-run new-run-id "%/%" '() '())) - (curr-tests-hash (make-hash-table))) - (rmt:update-run-event_time new-run-id) - ;; index the already saved tests by testname and itemdat in curr-tests-hash - (for-each - (lambda (testdat) - (let* ((testname (db:test-get-testname testdat)) - (item-path (db:test-get-item-path testdat)) - (full-name (conc testname "/" item-path))) - (hash-table-set! curr-tests-hash full-name testdat))) - curr-tests) - ;; NOPE: Non-optimal approach. Try this instead. - ;; 1. tests are received in a list, most recent first - ;; 2. replace the rollup test with the new *always* - (for-each - (lambda (testdat) - (let* ((testname (db:test-get-testname testdat)) - (item-path (db:test-get-item-path testdat)) - (full-name (conc testname "/" item-path)) - (prev-test-dat (hash-table-ref/default curr-tests-hash full-name #f)) - (test-steps (rmt:get-steps-for-test (db:test-get-id testdat))) - (new-test-record #f)) - ;; replace these with insert ... select - (apply sqlite3:execute - db - (conc "INSERT OR REPLACE INTO tests (run_id,testname,state,status,event_time,host,cpuload,diskfree,uname,rundir,item_path,run_duration,final_logf,comment) " - "VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?);") - new-run-id (cddr (vector->list testdat))) - (set! new-testdat (car (mt:get-tests-for-run new-run-id (conc testname "/" item-path) '() '()))) - (hash-table-set! curr-tests-hash full-name new-testdat) ;; this could be confusing, which record should go into the lookup table? - ;; Now duplicate the test steps - (debug:print 4 *default-log-port* "Copying records in test_steps from test_id=" (db:test-get-id testdat) " to " (db:test-get-id new-testdat)) - (cdb:remote-run ;; to be replaced, note: this routine is not used currently - (lambda () - (sqlite3:execute - db - (conc "INSERT OR REPLACE INTO test_steps (test_id,stepname,state,status,event_time,comment) " - "SELECT " (db:test-get-id new-testdat) ",stepname,state,status,event_time,comment FROM test_steps WHERE test_id=?;") - (db:test-get-id testdat)) - ;; Now duplicate the test data - (debug:print 4 *default-log-port* "Copying records in test_data from test_id=" (db:test-get-id testdat) " to " (db:test-get-id new-testdat)) - (sqlite3:execute - db - (conc "INSERT OR REPLACE INTO test_data (test_id,category,variable,value,expected,tol,units,comment) " - "SELECT " (db:test-get-id new-testdat) ",category,variable,value,expected,tol,units,comment FROM test_data WHERE test_id=?;") - (db:test-get-id testdat)))) - )) - prev-tests))) - - -;; clean cache files -(define (runs:clean-cache target runname toppath) - (if target - (if runname - (let* ((linktree (common:get-linktree)) ;; (if toppath (configf:lookup *configdat* "setup" "linktree"))) - (runtop (conc linktree "/" target "/" runname)) - (files (if (common:file-exists? runtop) - (append (glob (conc runtop "/.megatest*")) - (glob (conc runtop "/.runconfig*"))) - '()))) - (if (null? files) - (debug:print-info 0 *default-log-port* "No cached megatest or runconfigs files found. None removed.") - (begin - (debug:print-info 0 *default-log-port* "Removing cached files:\n " (string-intersperse files "\n ")) - (for-each - (lambda (f) - (handle-exceptions - exn - (debug:print 0 *default-log-port* "WARNING: Failed to remove file " f) - (delete-file f))) - files)))) - (debug:print-error 0 *default-log-port* "-clean-cache requires -runname.")) - (debug:print-error 0 *default-log-port* "-clean-cache requires -target or -reqtarg"))) Index: runsmod.scm ================================================================== --- runsmod.scm +++ runsmod.scm @@ -17,21 +17,71 @@ ;; along with Megatest. If not, see . ;;====================================================================== (declare (unit runsmod)) + (declare (uses commonmod)) +(declare (uses dbmod)) +(declare (uses launchmod)) +(declare (uses mtargs)) +(declare (uses mtconfigf)) +(declare (uses mtmod)) +(declare (uses pgdbmod)) +(declare (uses rmtmod)) +(declare (uses servermod)) +(declare (uses stml2)) +(declare (uses subrunmod)) +(declare (uses tasksmod)) (declare (uses testsmod)) +(declare (uses itemsmod)) (module runsmod * -(import scheme chicken data-structures extras) -(import (prefix sqlite3 sqlite3:) posix typed-records srfi-18 srfi-69 format ports srfi-1 matchable) +(import scheme chicken data-structures extras ports files) + +(use (prefix base64 base64:) + (prefix sqlite3 sqlite3:) + call-with-environment-variables + csv + directory-utils + format + matchable + message-digest + md5 + ports + posix + regex + srfi-1 + srfi-1 + srfi-13 + srfi-18 + srfi-18 + srfi-69 + typed-records + z3) + (import commonmod) +(import dbmod) +(import itemsmod) +(import mtmod) +(import pgdbmod) +(import (prefix mtargs args:)) +(import (prefix mtconfigf configf:)) +(import rmtmod) +(import servermod) +(import stml2) +(import subrunmod) +(import tasksmod) (import testsmod) + ;; (use (prefix ulex ulex:)) +(include "run_records.scm") +(include "db_records.scm") +(include "test_records.scm") +(include "key_records.scm") ;; (include "common_records.scm") (defstruct runs:dat reglen regfull runname max-concurrent-jobs run-id @@ -96,6 +146,7334 @@ ;; return #t when all items in waitors-upon list are represented in test-patt, #f otherwise. (define (runs:testpatts-mention-waitors-upon? test-patt waitors-upon) (null? (tests:filter-test-names-not-matched waitors-upon test-patt))) + +(define (runs:get-mt-env-alist run-id runname target testname itempath) + ;;(bb-check-path msg: "runs:set-megatest-env-vars entry") + `(("MT_TEST_NAME" . ,testname) + + ("MT_ITEMPATH" . ,itempath) + + ("MT_TARGET" . ,target) + + ("MT_RUNNAME" . ,runname) + + ("MT_RUN_AREA_HOME" . ,*toppath*) + + ,@(let* ((link-tree (common:get-linktree))) ;; (configf:lookup *configdat* "setup" "linktree"))) + (if link-tree + (list (cons "MT_LINKTREE" link-tree) + + (cons "MT_TEST_RUN_DIR" + (conc link-tree "/" target "/" runname "/" testname + (if (and (string? itempath) (not (equal? itempath ""))) + (conc "/" itempath) + ""))) + ) + '())) + + ,@(map + (lambda (key) + (cons (car key) (cadr key))) + (keys:target->keyval (rmt:get-keys) target)) + + ,@(map (lambda (var) + (let ((val (configf:lookup *configdat* "env-override" var))) + (cons var val))) + (configf:section-vars *configdat* "env-override")))) + + + + + + + +(define (runs:can-run-more-tests runsdat run-id jobgroup max-concurrent-jobs) + + ;; Take advantage of a good place to exit if running the one-pass methodology + (if (and (> (runs:dat-can-run-more-tests-count runsdat) 20) + (args:get-arg "-one-pass")) + (exit 0)) + + (thread-sleep! (cond ;; BB: check with Matt. Should this sleep move to cond clauses below where we determine we have too many jobs running rather than each time the and condition above is true (which seems like always)? + ((> (runs:dat-can-run-more-tests-count runsdat) 20) + (if (runs:lownoise "waiting on tasks" 60)(debug:print-info 2 *default-log-port* "waiting for tasks to complete, sleeping briefly ...")) + (configf:lookup-number *configdat* "setup" "inter-test-delay" default: 0.1) ;; was 2 + );; obviously haven't had any work to do for a while + (else 0))) + + (let* ((num-running (rmt:get-count-tests-running run-id)) + (num-running-in-jobgroup (rmt:get-count-tests-running-in-jobgroup run-id jobgroup)) + (job-group-limit (let ((jobg-count (configf:lookup *configdat* "jobgroups" jobgroup))) + (if (string? jobg-count) + (string->number jobg-count) + jobg-count)))) + (if (> (+ num-running num-running-in-jobgroup) 0) + (runs:inc-can-run-more-tests-count runsdat)) ;; (set! *runs:can-run-more-tests-count* (+ *runs:can-run-more-tests-count* 1))) + (if (not (eq? *last-num-running-tests* num-running)) + (begin + (debug:print 2 *default-log-port* "max-concurrent-jobs: " max-concurrent-jobs ", num-running: " num-running) + (set! *last-num-running-tests* num-running))) + (if (not (eq? 0 *globalexitstatus*)) + (list #f num-running num-running-in-jobgroup max-concurrent-jobs job-group-limit) + (let* ((can-not-run-more (cond + ;; if max-concurrent-jobs is set and the number running is greater + ;; than it then cannot run more jobs + ((and max-concurrent-jobs (>= num-running max-concurrent-jobs)) + (if (runs:lownoise "mcj msg" 60) + (debug:print 0 *default-log-port* "WARNING: Max running jobs exceeded, current number running: " num-running + ", max_concurrent_jobs: " max-concurrent-jobs)) + #t) + ;; if job-group-limit is set and number of jobs in the group is greater + ;; than the limit then cannot run more jobs of this kind + ((and job-group-limit + (>= num-running-in-jobgroup job-group-limit)) + (if (runs:lownoise (conc "maxjobgroup " jobgroup) 60) + (debug:print 1 *default-log-port* "WARNING: number of jobs " num-running-in-jobgroup + " in jobgroup \"" jobgroup "\" exceeds limit of " job-group-limit)) + #t) + (else #f)))) + (list (not can-not-run-more) num-running num-running-in-jobgroup max-concurrent-jobs job-group-limit))))) + +(define (runs:run-pre-hook run-id) + (let* ((run-pre-hook (configf:lookup *configdat* "runs" "pre-hook")) + (existing-tests (if run-pre-hook + (rmt:get-tests-for-run run-id "%" '() '() ;; run-id testpatt states statuses + #f #f ;; offset limit + #f ;; not-in + #f ;; sort-by + #f ;; sort-order + #f ;; get full data (not 'shortlist) + 0 ;; (runs:gendat-inc-results-last-update *runs:general-data*) ;; last update time + 'dashboard) + '())) + (log-dir (conc *toppath* "/logs")) + (log-file (conc "pre-hook-" (string-translate (getenv "MT_TARGET") "/" "-") "-" (getenv "MT_RUNNAME") ".log")) + (full-log-fname (conc log-dir "/" log-file))) + (if run-pre-hook + (if (null? existing-tests) + (let* ((use-log-dir (if (not (directory-exists? log-dir)) + (handle-exceptions + exn + (begin + (debug:print 0 *default-log-port* "WARNING: Failed to create " log-dir) + #f) + (create-directory log-dir #t) + #t) + #t)) + (start-time (current-seconds)) + (actual-logf (if use-log-dir full-log-fname log-file))) + (handle-exceptions + exn + (begin + (print-call-chain *default-log-port*) + (debug:print 0 *default-log-port* "Message: " ((condition-property-accessor 'exn 'message) exn)) + (debug:print 0 *default-log-port* "ERROR: failed to run pre-hook " run-pre-hook ", check the log " log-file)) + (debug:print-info 0 *default-log-port* "running run-pre-hook: \"" run-pre-hook "\", log is " actual-logf) + (system (conc run-pre-hook " >> " actual-logf " 2>&1")) + (debug:print-info 0 *default-log-port* "pre-hook \"" run-pre-hook "\" took " (- (current-seconds) start-time) " seconds to run."))) + (debug:print 0 *default-log-port* "Skipping pre-hook call \"" run-pre-hook "\" as there are existing tests for this run."))))) + +(define (runs:run-post-hook run-id) + (let* ((run-post-hook (configf:lookup *configdat* "runs" "post-hook")) + (existing-tests (if run-post-hook + (rmt:get-tests-for-run run-id "%" '() '() ;; run-id testpatt states statuses + #f #f ;; offset limit + #f ;; not-in + #f ;; sort-by + #f ;; sort-order + #f ;; get full data (not 'shortlist) + 0 ;; (runs:gendat-inc-results-last-update *runs:general-data*) ;; last update time + 'dashboard) + '())) + (log-dir (conc *toppath* "/logs")) + (log-file (conc "post-hook-" (string-translate (getenv "MT_TARGET") "/" "-") "-" (getenv "MT_RUNNAME") ".log")) + (full-log-fname (conc log-dir "/" log-file))) + (if run-post-hook + ;; (if (null? existing-tests) + ;; (debug:print 0 *default-log-port* "Skipping post-hook call \"" run-post-hook "\" as there are existing tests for this run."))))) + (let* ((use-log-dir (if (not (directory-exists? log-dir)) + (handle-exceptions + exn + (begin + (debug:print 0 *default-log-port* "WARNING: Failed to create " log-dir) + #f) + (create-directory log-dir #t) + #t) + #t)) + (start-time (current-seconds)) + (actual-logf (if use-log-dir full-log-fname log-file))) + (handle-exceptions + exn + (begin + (print-call-chain *default-log-port*) + (debug:print 0 *default-log-port* "Message: " ((condition-property-accessor 'exn 'message) exn)) + (debug:print 0 *default-log-port* "ERROR: failed to run post-hook " run-post-hook ", check the log " log-file)) + (debug:print-info 0 *default-log-port* "running run-post-hook: \"" run-post-hook "\", log is " actual-logf) + (system (conc run-post-hook " >> " actual-logf " 2>&1")) + (debug:print-info 0 *default-log-port* "post-hook \"" run-post-hook "\" took " (- (current-seconds) start-time) " seconds to run.")))))) + +;;====================================================================== +;; runs:run-tests is called from megatest.scm and itself +;;====================================================================== +;; +;; test-names: Comma separated patterns same as test-patts but used in selection +;; of tests to run. The item portions are not respected. +;; FIXME: error out if /patt specified +;; +(define (runs:run-tests target runname test-patts user flags #!key (run-count 1)) ;; test-names + (let* ((keys (keys:config-get-fields *configdat*)) + (keyvals (keys:target->keyval keys target)) + (run-id (rmt:register-run keyvals runname "new" "n/a" user (args:get-arg "-contour"))) ;; test-name))) + ;; (deferred '()) ;; delay running these since they have a waiton clause + (runconfigf (conc *toppath* "/runconfigs.config")) + (dbfile (conc *toppath* "/megatest.db")) + (readonly-mode (not (file-write-access? dbfile))) + (test-records (make-hash-table)) + ;; need to process runconfigs before generating these lists + (all-tests-registry #f) ;; (tests:get-all)) ;; (tests:get-valid-tests (make-hash-table) test-search-path)) ;; all valid tests to check waiton names + (all-test-names #f) ;; (hash-table-keys all-tests-registry)) + (test-names #f) ;; Generated by a call to (tests:filter-test-names all-test-names test-patts)) + (required-tests #f) ;; Put fully qualified test/testpath names in this list to be done + (waitors-upon (make-hash-table)) ;; given a test, return list of tests waiting upon this test. + (task-key (conc (hash-table->alist flags) " " (get-host-name) " " (current-process-id))) + ;; (tdbdat (tasks:open-db)) + (config-reruns (let ((x (configf:lookup *configdat* "setup" "reruns"))) + (if x (string->number x) #f))) + (allowed-tests #f) + (runconf #f)) + + ;; check if readonly + (when readonly-mode + (debug:print-error 0 *default-log-port* "megatest.db is readonly. Cannot proceed.") + (exit 1)) + + ;; per user request. If less than 100Meg space on dbdir partition, bail out with error + ;; this will reduce issues in database corruption + (common:check-db-dir-and-exit-if-insufficient) + + ;; override the number of reruns from the configs + ;; this needs to be done at the place where is first runs:run-tests called + ;(if (and config-reruns + ; (> run-count config-reruns)) + ;(set! run-count config-reruns)) + + ;; (if (tasks:need-server run-id)(tasks:start-and-wait-for-server tdbdat run-id 10)) + + (let ((sighand (lambda (signum) + ;; (signal-mask! signum) ;; to mask or not? seems to cause issues in exiting + (set! *time-to-exit* #t) + (print "Received signal " signum ", cleaning up before exit. Please wait...") + (let ((th1 (make-thread (lambda () + ;; (let ((tdbdat (tasks:open-db))) + (rmt:tasks-set-state-given-param-key task-key "killed") ;; ) + (print "Killed by signal " signum ". Exiting") + (thread-sleep! 3) + (exit)))) + (th2 (make-thread (lambda () + (thread-sleep! 5) + (debug:print 0 *default-log-port* "Done") + (exit 4))))) + (thread-start! th2) + (thread-start! th1) + (thread-join! th2))))) + (set-signal-handler! signal/int sighand) + (set-signal-handler! signal/term sighand)) + + ;; force the starting of a server -- removed BB 17ww28 - no longer needed. + ;;(debug:print 0 *default-log-port* "waiting on server...") + ;;(server:start-and-wait *toppath*) + + (runs:set-megatest-env-vars run-id inkeys: keys inrunname: runname) ;; these may be needed by the launching process + (set! runconf (if (common:file-exists? runconfigf) + (setup-env-defaults runconfigf run-id *already-seen-runconfig-info* keyvals target) + (begin + (debug:print 0 *default-log-port* "WARNING: You do not have a run config file: " runconfigf) + #f))) + + (if (not test-patts) ;; first time in - adjust testpatt + (set! test-patts (common:args-get-testpatt runconf))) + ;; if test-patts is #f at this point there is something wrong and we need to bail out + (if (not test-patts) + (begin + (debug:print 0 *default-log-port* "WARNING: there is no test pattern for this run. Exiting now.") + (exit 0))) + + (if (args:get-arg "-tagexpr") + (begin + (set! allowed-tests (string-join (runs:get-tests-matching-tags (args:get-arg "-tagexpr")) ",")) + (debug:print-info 0 *default-log-port* "filtering initial test list with tagexpr: " (args:get-arg "-tagexpr") " => " allowed-tests) + ));; tests will be ANDed with this list + + ;; register this run in monitor.db + (rmt:tasks-add "run-tests" user target runname test-patts task-key) ;; params) + (rmt:tasks-set-state-given-param-key task-key "running") + + #;(common:telemetry-log "run-tests" + payload: + `( (target . ,target) + (run-name . ,runname) + (test-patts . ,test-patts) ) ) + + + ;; Now generate all the tests lists + (set! all-tests-registry (tests:get-all)) ;; hash of testname => path-to-test + (set! all-test-names (hash-table-keys all-tests-registry)) + ;; filter first for allowed-tests (from -tagexpr) then for test-patts. + (set! test-names (tests:filter-test-names + (if allowed-tests + (tests:filter-test-names all-test-names allowed-tests) + all-test-names) + test-patts)) + + ;; I think seeding required-tests with all test-names makes sense but lack analysis to back that up. + + ;; NEW STRATEGY HERE: + ;; 1. fill required tests with test-patts + ;; 2. scan testconfigs and if waitons, itemwait, itempatt calc prior test test-patt + ;; 3. repeat until all deps propagated + + ;; any tests with direct mention in test-patts can be added to required + ;;(set! required-tests (lset-intersection equal? (string-split test-patts ",") all-test-names)) + (set! required-tests (tests:filter-test-names all-test-names test-patts)) + ;; + ;; (set! required-tests (lset-intersection equal? test-names all-test-names)) + + ;; look up all tests matching the comma separated list of globs in + ;; test-patts (using % as wildcard) + + ;; (set! test-names (delete-duplicates (tests:get-valid-tests *toppath* test-patts))) + (debug:print-info 0 *default-log-port* "tests search path: " (string-intersperse (tests:get-tests-search-path *configdat*) " ")) + (debug:print-info 0 *default-log-port* "all tests: " (string-intersperse (sort all-test-names string<) " ")) + (debug:print-info 0 *default-log-port* "test names: " (string-intersperse (sort test-names string<) " ")) + (debug:print-info 0 *default-log-port* "required tests: " (string-intersperse (sort required-tests string<) " ")) + + ;; on the first pass or call to run-tests set FAILS to NOT_STARTED if + ;; -keepgoing is specified + (if (eq? *passnum* 0) + (begin + ;; Is this still necessary? I think not. Unreachable tests are marked as such and + ;; should not cause problems here. + ;; + ;; have to delete test records where NOT_STARTED since they can cause -keepgoing to + ;; get stuck due to becoming inaccessible from a failed test. I.e. if test B depends + ;; on test A but test B reached the point on being registered as NOT_STARTED and test + ;; A failed for some reason then on re-run using -keepgoing the run can never complete. + ;; + ;; (rmt:general-call 'delete-tests-in-state run-id "NOT_STARTED") + + ;; Now convert anything in allow-auto-rerun to NOT_STARTED + ;; + (for-each + (lambda (state-status) + (let* ((ss-lst (string-split-fields "/" state-status #:infix)) + (state (if (> (length ss-lst) 0)(car ss-lst) #f)) + (status (if (> (length ss-lst) 1)(cadr ss-lst) #f))) + (rmt:set-tests-state-status run-id test-names state status "NOT_STARTED" status))) + ;; list of state/status pairs separated by spaces + (string-split (or (configf:lookup *configdat* "setup" "allow-auto-rerun") ""))))) + + ;; Ensure all tests are registered in the test_meta table + (runs:update-all-test_meta #f) + + ;; run the run prehook if there are no tests yet run for this run: + ;; + (runs:run-pre-hook run-id) + ;; mark all test launced flag as false in the meta table + (rmt:set-var (conc "lunch-complete-" run-id) "no") + (rmt:set-run-state-status run-id "new" "n/a") + ;; now add non-directly referenced dependencies (i.e. waiton) + ;;====================================================================== + ;; refactoring this block into tests:get-full-data + ;; + ;; What happended, this code is now duplicated in tests!? + ;; + ;;====================================================================== + + (if (not (null? test-names)) ;; BEGIN test-names loop + (let loop ((hed (car test-names)) ;; NOTE: This is the main loop that iterates over the test-names + (tal (cdr test-names))) ;; 'return-procs tells the config reader to prep running system but return a proc + (debug:print-info 4 *default-log-port* "\n\ntestpatt elaboration loop => hed="hed " tal="tal" test-patts="test-patts" test-names="test-names) + (change-directory *toppath*) ;; PLEASE OPTIMIZE ME!!! I think this should be a no-op but there are several places where change-directories could be happening. + (setenv "MT_TEST_NAME" hed) ;; + (let*-values (((waitons waitors config) (tests:get-waitons hed all-tests-registry)) + + ;; NOTE: Have the config - can extract [waitons] section + + ((hed-mode) + (let ((m (configf:lookup config "requirements" "mode"))) + (if m (map string->symbol (string-split m)) '(normal)))) + ((hed-itemized-waiton) ;; are items in hed waiting on items of waiton? + (not (null? (lset-intersection eq? hed-mode '(itemmatch itemwait))))) + ) + (debug:print-info 8 *default-log-port* "waitons: " waitons) + ;; check for hed in waitons => this would be circular, remove it and issue an + ;; error + (if (or (member hed waitons) + (member hed waitors)) + (begin + (debug:print-error 0 *default-log-port* "test " hed " has listed itself as a waiton or waitor, please correct this!") + (set! waitons (filter (lambda (x)(not (equal? x hed))) waitons)) + (set! waitors (filter (lambda (x)(not (equal? x hed))) waitors)))) + + ;; (items (items:get-items-from-config config))) + (if (not (hash-table-ref/default test-records hed #f)) ;; waiton-tconfig below will be #f until that test is visted here at least once + (hash-table-set! test-records ;; BB: we are doing a manual make-tests:testqueue + hed (vector hed ;; 0 ;; testname + config ;; 1 + waitons ;; 2 + (configf:lookup config "requirements" "priority") ;; priority 3 + (tests:get-items config) ;; 4 ;; expand the [items] and or [itemstable] into explict items + #f ;; itemsdat 5 + #f ;; spare - used for item-path + waitors ;; + ))) + ;; update waitors-upon here + (for-each + (lambda (waiton) + (let* ((current-waitors-upon (hash-table-ref/default waitors-upon waiton '()))) + (debug:print-info 8 *default-log-port* " current-waiters-upon["waiton"] is "current-waitors-upon ) + (when (not (member hed current-waitors-upon)) + (debug:print-info 8 *default-log-port* " current-waiters-upon["waiton"] << "hed ) + (hash-table-set! waitors-upon waiton (cons hed current-waitors-upon))))) + (if (list? waitons) waitons '())) + (debug:print-info 8 *default-log-port* " process waitons&waitors of "hed": "(delete-duplicates (append waitons waitors))) + (for-each + (lambda (waiton) + (if (and waiton (not (member waiton test-names))) + (let* ((waitors-in-testpatt (runs:testpatts-mention-waitors-upon? test-patts (hash-table-ref/default waitors-upon waiton '()))) + (waiton-record (hash-table-ref/default test-records waiton #f)) + (waiton-tconfig (if waiton-record (vector-ref waiton-record 1) #f)) + (waiton-itemized (and waiton-tconfig + (or (hash-table-ref/default waiton-tconfig "items" #f) + (hash-table-ref/default waiton-tconfig "itemstable" #f)))) + (itemmaps (tests:get-itemmaps config)) ;; (configf:lookup config "requirements" "itemmap")) + (new-test-patts (tests:extend-test-patts test-patts hed waiton itemmaps hed-itemized-waiton))) + (debug:print-info 0 *default-log-port* "Test " waiton " has " (if waiton-record "a" "no") " waiton-record and" (if waiton-itemized " " " no ") "items") + ;; need to account for test-patt here, if I am test "a", selected with a test-patt of "hed/b%" + ;; and we are waiting on "waiton" we need to add "waiton/,waiton/b%" to test-patt + ;; is this satisfied by merely appending "/" to the waiton name added to the list? + ;; + ;; This approach causes all of the items in an upstream test to be run + ;; if we have this waiton already processed once we can analzye it for extending + ;; tests to be run, since we can't properly process waitons unless they have been + ;; initially added we add them again to be processed on second round AND add the hed + ;; back in to also be processed on second round + (if waiton-tconfig ;; BB: waiter should be in test-patts as well as the waiton have a tconfig. + (if waiton-itemized + (if waitors-in-testpatt + (begin + (debug:print-info 0 *default-log-port* "New test patts: " new-test-patts ", prev test patts: " test-patts) + (set! test-names (cons waiton test-names)) ;; need to process this one, only add once the waiton tconfig read + (set! required-tests (cons (conc waiton "/") required-tests)) + (set! test-patts new-test-patts)) + (begin + (debug:print-info 0 *default-log-port* "Waitor(s) not yet on testpatt for " waiton ", setting up to re-process it") + (set! tal (append (cons waiton tal)(list hed))))) + (begin + (debug:print-info 0 *default-log-port* "Adding non-itemized test " waiton " to required-tests") + (set! required-tests (cons waiton required-tests)) + (set! test-patts new-test-patts))) + (begin + (debug:print-info 0 *default-log-port* "No testconfig info yet for " waiton ", setting up to re-process it") + (set! tal (append (cons waiton tal)(list hed))))) ;; (cons (conc waiton "/") required-tests)) + ;; NOPE: didn't work. required needs to be plain test names. Try tacking on to test-patts + ;; - doesn't work + ;; (set! test-patts (conc test-patts "," waiton "/")) + ;; (set! test-names (cons waiton test-names))))) ;; was an append, now a cons + ))) + (delete-duplicates (append waitons waitors))) + (let ((remtests (delete-duplicates (append waitons tal)))) + (debug:print-info 8 *default-log-port* " remtests are "remtests) + (if (not (null? remtests)) + (begin + ;; (debug:print-info 0 *default-log-port* "Preprocessing continues for " (string-intersperse remtests ", ")) + (loop (car remtests)(cdr remtests)))))))) ;; END test-names loop + + (if (not (null? required-tests)) + (debug:print-info 1 *default-log-port* "Adding \"" (string-intersperse required-tests " ") "\" to the run queue")) + ;; NOTE: these are all parent tests, items are not expanded yet. + (debug:print-info 4 *default-log-port* "test-records=" (hash-table->alist test-records)) + (let ((reglen (configf:lookup *configdat* "setup" "runqueue"))) + (if (> (length (hash-table-keys test-records)) 0) + (let* ((keep-going #t) + (run-queue-retries 5) + #;(th1 (make-thread (lambda () + (handle-exceptions + exn + (begin + (print-call-chain) + (print " message: " ((condition-property-accessor 'exn 'message) exn))) + (runs:run-tests-queue run-id runname test-records keyvals flags test-patts required-tests + (any->number reglen) all-tests-registry))) + "runs:run-tests-queue")) + (th2 (make-thread (lambda () ;; BBQ: why are we visiting ALL runs here? + ;; (rmt:find-and-mark-incomplete-all-runs))))) CAN'T INTERRUPT IT ... + (let ((run-ids (rmt:get-all-run-ids))) + (for-each (lambda (run-id) + (if keep-going + (handle-exceptions + exn + (debug:print 0 *default-log-port* "error in calling find-and-mark-incomplete for run-id " run-id) + (rmt:find-and-mark-incomplete run-id #f)))) ;; ovr-deadtime))) ;; could be root of https://hsdes.intel.com/appstore/article/#/220546828/main -- Title: Megatest jobs show DEAD even though they are still running (1.64/27) + run-ids))) + "runs: mark-incompletes"))) + ;; (thread-start! th1) + (thread-start! th2) + ;; (thread-join! th1) + ;; just do the main stuff in the main thread + (runs:run-tests-queue run-id runname test-records keyvals flags test-patts required-tests + (any->number reglen) all-tests-registry) + (set! keep-going #f) + (thread-join! th2) + ;; if run-count > 0 call, set -preclean and -rerun STUCK/DEAD + (if (> run-count 0) ;; handle reruns + (begin + (if (not (hash-table-ref/default flags "-preclean" #f)) + (hash-table-set! flags "-preclean" #t)) + (if (not (hash-table-ref/default flags "-rerun" #f)) + (hash-table-set! flags "-rerun" "STUCK/DEAD,n/a,ZERO_ITEMS")) + ;; recursive call to self + (runs:run-tests target runname test-patts user flags run-count: (- run-count 1))))) + (debug:print-info 0 *default-log-port* "No tests to run"))) + (debug:print-info 4 *default-log-port* "All done by here") + ;; TODO: try putting post hook call here + ;(if (eq? run-count 0) + ; (begin + ; (debug:print-info 0 *default-log-port* "Calling Post Hook") + ; (debug:print-info 2 *default-log-port* " run-count " run-count) + ; (runs:run-post-hook run-id)) + ; (debug:print-info 2 *default-log-port* "Not calling post hook runcount = " run-count )) + (rmt:tasks-set-state-given-param-key task-key "done") + ;; (sqlite3:finalize! tasks-db) + )) + + +;; loop logic. These are used in runs:run-tests-queue to make it a bit more readable. +;; +;; If reg not full and have items in tal then loop with (car tal)(cdr tal) reg reruns +;; If reg is full (i.e. length >= n +;; loop with (car reg) tal (cdr reg) reruns +;; If tal is empty +;; but have items in reg; loop with (car reg)(cdr reg) '() reruns +;; If reg is empty => all done + +(define (runs:queue-next-hed tal reg n regfull) + (if regfull + (if (null? reg) #f (car reg)) + (if (null? tal) ;; tal is used up, pop from reg + (if (null? reg) #f (car reg)) + (car tal)))) + +(define (runs:queue-next-tal tal reg n regfull) + (if regfull + tal + (if (null? tal) ;; must transfer from reg + (if (null? reg) '() (cdr reg)) + (cdr tal)))) + +(define (runs:queue-next-reg tal reg n regfull) + (if regfull + (if (null? reg) '() (cdr reg)) ;; EXPLORE: reorder (cdr reg) such that looping is more efficient + (if (null? tal) ;; if tal is null and reg not full then '() as reg contents moved to tal + '() + reg))) + +;; this is the list of parameters to the named loop "loop" near the top of runs:run-tests-queue, look around line 1216 +;; +(define (runs:loop-values tal reg reglen regfull reruns) + (list (runs:queue-next-hed tal reg reglen regfull) ;; hed + (runs:queue-next-tal tal reg reglen regfull) ;; tal + (runs:queue-next-reg tal reg reglen regfull) ;; reg + reruns)) ;; reruns + +;; objective - iterate thru tests +;; => want to prioritize tests we haven't seen before +;; => sometimes need to squeeze things in (added to reg) +;; => review of a previously seen test is higher priority of never visited test +;; reg - list of previously visited tests +;; tal - list of never visited tests +;; prefer next hed to be from reg than tal. + +(define runs:nothing-left-in-queue-count 0) + +;;====================================================================== +;; runs:expand-items is called by runs:run-tests-queue +;;====================================================================== +;; +;; return value of runs:expand-items is passed back to runs-tests-queue and is fed to named loop with this signature: +;; (let loop ((hed (car sorted-test-names)) +;; (tal (cdr sorted-test-names)) +;; (reg '()) ;; registered, put these at the head of tal +;; (reruns '())) +(define (runs:expand-items hed tal reg reruns regfull newtal jobgroup max-concurrent-jobs run-id waitons item-path testmode test-record can-run-more items runname tconfig reglen test-registry test-records itemmaps) + (let* ((loop-list (list hed tal reg reruns)) + (prereqs-not-met (let ((res (rmt:get-prereqs-not-met run-id waitons hed item-path mode: testmode itemmaps: itemmaps))) + (if (list? res) + res + (begin + (debug:print 0 *default-log-port* + "ERROR: rmt:get-prereqs-not-met returned non-list!\n" + " res=" res " run-id=" run-id " waitons=" waitons " hed=" hed " item-path=" item-path " testmode=" testmode " itemmaps=" itemmaps) + '())))) + (have-itemized (not (null? (lset-intersection eq? testmode '(itemmatch itemwait))))) + ;; (prereqs-not-met (mt:lazy-get-prereqs-not-met run-id waitons item-path mode: testmode itemmap: itemmap)) + (fails (runs:calc-fails prereqs-not-met)) + (prereq-fails (runs:calc-prereq-fail prereqs-not-met)) + (non-completed (runs:calc-not-completed prereqs-not-met)) + (runnables (runs:calc-runnable prereqs-not-met)) + (unexpanded-prereqs + (filter (lambda (testname) + (let* ((test-rec (hash-table-ref test-records testname)) + (items (tests:testqueue-get-items test-rec))) + ;;(BB> "HEY " testname "=>"items) + (or (procedure? items)(eq? items 'have-procedure)))) + waitons)) + + + ) + (debug:print-info 4 *default-log-port* "START OF INNER COND #2 " + "\n can-run-more: " can-run-more + "\n testname: " hed + "\n prereqs-not-met: " (runs:pretty-string prereqs-not-met) + "\n non-completed: " (runs:pretty-string non-completed) + "\n prereq-fails: " (runs:pretty-string prereq-fails) + "\n fails: " (runs:pretty-string fails) + "\n testmode: " testmode + "\n (member 'toplevel testmode): " (member 'toplevel testmode) + "\n (null? non-completed): " (null? non-completed) + "\n reruns: " reruns + "\n items: " items + "\n can-run-more: " can-run-more) + + (cond + ;; all prereqs met, fire off the test + ;; or, if it is a 'toplevel test and all prereqs not met are COMPLETED then launch + + ((and (not (member 'toplevel testmode)) + (member (hash-table-ref/default test-registry (db:test-make-full-name hed item-path) 'n/a) + '(DONOTRUN removed CANNOTRUN))) ;; *common:cant-run-states-sym*) ;; '(COMPLETED KILLED WAIVED UNKNOWN INCOMPLETE)) ;; try to catch repeat processing of COMPLETED tests here + (debug:print-info 4 *default-log-port* "cond branch - " "ei-1") + (debug:print-info 1 *default-log-port* "Test " hed " set to \"" (hash-table-ref test-registry (db:test-make-full-name hed item-path)) "\". Removing it from the queue") + (if (or (not (null? tal)) + (not (null? reg))) + (runs:loop-values tal reg reglen regfull reruns) + (begin + (debug:print-info 0 *default-log-port* "Nothing left in the queue!") + ;; If get here twice then we know we've tried to expand all items + ;; since there must be a logic issue with the handling of loops in the + ;; items expand phase we will brute force an exit here. + (if (> runs:nothing-left-in-queue-count 2) + (begin + (debug:print 0 *default-log-port* "WARNING: this condition is triggered when there were no items to expand and nothing to run. Please check your run for completeness") + (exit 0)) + (set! runs:nothing-left-in-queue-count (+ runs:nothing-left-in-queue-count 1))) + #f))) + + ;; desired result of below cond branch: + ;; we want to expand items in our test of interest (hed) in the following cases: + ;; case 1 - mode is itemmatch or itemwait: + ;; - all prereq tests have been expanded + ;; - at least one prereq's items have completed + ;; case 2 - mode is toplevel + ;; - prereqs are completed. + ;; - or no prereqs can complete + ;; case 3 - mode not specified + ;; - prereqs are completed and passed (we could consider removing "and passed" -- it would change behavior from current) + ((or (null? prereqs-not-met) + (and (member 'toplevel testmode) + (null? non-completed))) + (debug:print-info 4 *default-log-port* "cond branch - " "ei-2") + (debug:print-info 4 *default-log-port* "runs:expand-items: (or (null? prereqs-not-met) (and (member 'toplevel testmode)(null? non-completed)))") + (let ((test-name (tests:testqueue-get-testname test-record))) + (setenv "MT_TEST_NAME" test-name) ;; + (setenv "MT_RUNNAME" runname) + (runs:set-megatest-env-vars run-id inrunname: runname) ;; these may be needed by the launching process + (let ((items-list (items:get-items-from-config tconfig))) + (if (list? items-list) + (begin + (if (null? items-list) + (let ((test-id (rmt:get-test-id run-id test-name "")) + (num-items (rmt:test-toplevel-num-items run-id test-name))) + (if (and test-id + (not (> num-items 0))) + (mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "ZERO_ITEMS" "Failed to run due to failed prerequisites")))) + (tests:testqueue-set-items! test-record items-list) + (list hed tal reg reruns)) + (begin + (debug:print-error 0 *default-log-port* "The proc from reading the items table did not yield a list - please report this") + (exit 1)))))) + + ((and (null? fails) + (null? prereq-fails) + (not (null? non-completed))) + (debug:print-info 4 *default-log-port* "cond branch - " "ei-3") + (let* ((allinqueue (map (lambda (x)(if (string? x) x (db:test-get-testname x))) + (append newtal reruns))) + ;; prereqstrs is a list of test names as strings that are prereqs for hed + (prereqstrs (delete-duplicates (map (lambda (x)(if (string? x) x (db:test-get-testname x))) + prereqs-not-met))) + ;; a prereq that is not found in allinqueue will be put in the notinqueue list + ;; + ;; (notinqueue (filter (lambda (x) + ;; (not (member x allinqueue))) + ;; prereqstrs)) + (give-up #f)) + + ;; We can get here when a prereq has not been run due to *it* having a prereq that failed. + ;; We need to use this to dequeue this item as CANNOTRUN + ;; + (if (member 'toplevel testmode) ;; '(toplevel)) ;; NOTE: this probably should be (member 'toplevel testmode) + (for-each (lambda (prereq) + (if (eq? (hash-table-ref/default test-registry prereq 'justfine) 'CANNOTRUN) + (set! give-up #t))) + prereqstrs)) + + (if (and give-up + (not (and (null? tal)(null? reg)))) + (let ((trimmed-tal (mt:discard-blocked-tests run-id hed tal test-records)) + (trimmed-reg (mt:discard-blocked-tests run-id hed reg test-records))) + (debug:print 1 *default-log-port* "WARNING: test " hed " has discarded prerequisites, removing it from the queue") + + (let ((test-id (rmt:get-test-id run-id hed ""))) + (if test-id (mt:test-set-state-status-by-id run-id test-id "COMPLETED" "PREQ_DISCARDED" "Failed to run due to discarded prerequisites"))) + + (if (and (null? trimmed-tal) + (null? trimmed-reg)) + #f + (runs:loop-values trimmed-tal trimmed-reg reglen regfull reruns) + )) + (list (car newtal)(append (cdr newtal) reg) '() reruns)))) + + ((and (null? fails) ;; have not-started tests, but unable to run them. everything looks completed with no prospect of unsticking something that is stuck. we should mark hed as moribund and exit or continue if there are more tests to consider + (null? prereq-fails) + (null? non-completed)) + (debug:print-info 4 *default-log-port* "cond branch - " "ei-4") + (if (runs:can-keep-running? hed 20) + (begin + (runs:inc-cant-run-tests hed) + (debug:print-info 0 *default-log-port* "no fails in prerequisites for " hed " but also none running, keeping " hed " for now. Try count: " (hash-table-ref/default *seen-cant-run-tests* hed 0)) ;; + ;; getting here likely means the system is way overloaded, kill a full minute before continuing + (thread-sleep! 60) ;; TODO: gate by normalized server load > 1.0 (maxload config thing) + ;; num-retries code was here + ;; we use this opportunity to move contents of reg to tal + (list (car newtal)(append (cdr newtal) reg) '() reruns)) ;; an issue with prereqs not yet met? + (begin + (debug:print-info 1 *default-log-port* "no fails in prerequisites for " hed " but nothing seen running in a while, dropping test " hed " from the run queue") + (let ((test-id (rmt:get-test-id run-id hed ""))) + (if test-id (mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "TIMED_OUT" "Nothing seen running in a while."))) + (runs:loop-values tal reg reglen regfull reruns) + ))) + + ((and + (or (not (null? fails)) + (not (null? prereq-fails))) + (member 'normal testmode)) + (debug:print-info 4 *default-log-port* "cond branch - " "ei-5") + (debug:print-info 1 *default-log-port* "test " hed " (mode=" testmode ") has failed prerequisite(s); " + (string-intersperse (map (lambda (t)(conc (db:test-get-testname t) ":" (db:test-get-state t)"/"(db:test-get-status t))) fails) ", ") + ", removing it from to-do list") + (let ((test-id (rmt:get-test-id run-id hed ""))) + (if test-id + (if (not (null? prereq-fails)) + (mt:test-set-state-status-by-id-unless-completed run-id test-id "COMPLETED" "PREQ_DISCARDED" "Failed to run due to prior failed prerequisites") + (mt:test-set-state-status-by-id-unless-completed run-id test-id "COMPLETED" "PREQ_FAIL" "Failed to run due to failed prerequisites")))) + ;; (debug:print 4 *default-log-port*"BB> set PREQ_FAIL on "hed) + ;; (mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "PREQ_FAIL" "Failed to run due to failed prerequisites")))) ;; BB: this works, btu equivalent for itemwait mode does not work. + (if (or (not (null? reg))(not (null? tal))) + (begin + (hash-table-set! test-registry hed 'CANNOTRUN) + (runs:loop-values tal reg reglen regfull (cons hed reruns)) + ) + #f)) ;; #f flags do not loop + + ((and (not (null? fails))(member 'toplevel testmode)) + (debug:print-info 4 *default-log-port* "cond branch - " "ei-6") + (if (or (not (null? reg))(not (null? tal))) + (list (car newtal)(append (cdr newtal) reg) '() reruns) + #f)) + ((null? runnables) + (debug:print-info 4 *default-log-port* "cond branch - " "ei-7") + #f) ;; if we get here and non-completed is null then it is all over. + (else + (debug:print-info 4 *default-log-port* "cond branch - " "ei-8") + (debug:print 0 *default-log-port* "WARNING: FAILS or incomplete tests maybe preventing completion of this run. Watch for issues with test " hed ", continuing for now") + (list (car newtal)(cdr newtal) reg reruns))))) + +(define (runs:mixed-list-testname-and-testrec->list-of-strings inlst) + (if (null? inlst) + '() + (map (lambda (t) + (cond + ((vector? t) + (let ((test-name (db:test-get-testname t)) + (item-path (db:test-get-item-path t)) + (test-state (db:test-get-state t)) + (test-status (db:test-get-status t))) + (conc test-name (if (equal? item-path "") "" "/") item-path ":" test-state "/" test-status))) + ((string? t) + t) + (else + (conc t)))) + inlst))) + + +;; hed tal reg reruns reglen regfull test-record runname test-name item-path jobgroup max-concurrent-jobs run-id waitons item-path testmode test-patts required-tests test-registry registry-mutex flags keyvals run-info newtal all-tests-registry itemmaps) +(define (runs:process-expanded-tests runsdat testdat) + ;; unroll the contents of runsdat and testdat (due to ongoing refactoring). + (debug:print 2 *default-log-port* "runs:process-expanded-tests; testdat:" ) + (debug:print 2 *default-log-port* (with-output-to-string + (lambda () (pp (runs:testdat->alist testdat) )))) + (let* ((hed (runs:testdat-hed testdat)) + (tal (runs:testdat-tal testdat)) + (reg (runs:testdat-reg testdat)) + (reruns (runs:testdat-reruns testdat)) + (test-name (runs:testdat-test-name testdat)) + (item-path (runs:testdat-item-path testdat)) + (jobgroup (runs:testdat-jobgroup testdat)) + (waitons (runs:testdat-waitons testdat)) + (item-path (runs:testdat-item-path testdat)) + (testmode (runs:testdat-testmode testdat)) + (newtal (runs:testdat-newtal testdat)) + (itemmaps (runs:testdat-itemmaps testdat)) + (test-record (runs:testdat-test-record testdat)) + (prereqs-not-met (runs:testdat-prereqs-not-met testdat)) + + (reglen (runs:dat-reglen runsdat)) + (regfull (runs:dat-regfull runsdat)) + (runname (runs:dat-runname runsdat)) + (max-concurrent-jobs (runs:dat-max-concurrent-jobs runsdat)) + (run-id (runs:dat-run-id runsdat)) + (test-patts (runs:dat-test-patts runsdat)) + (required-tests (runs:dat-required-tests runsdat)) + (test-registry (runs:dat-test-registry runsdat)) + (registry-mutex (runs:dat-registry-mutex runsdat)) + (flags (runs:dat-flags runsdat)) + (keyvals (runs:dat-keyvals runsdat)) + (run-info (runs:dat-run-info runsdat)) + (all-tests-registry (runs:dat-all-tests-registry runsdat)) + (run-limits-info (runs:dat-can-run-more-tests runsdat)) + ;; (runs:can-run-more-tests run-id jobgroup max-concurrent-jobs)) ;; look at the test jobgroup and tot jobs running + (have-resources (car run-limits-info)) + (num-running (list-ref run-limits-info 1)) + (num-running-in-jobgroup(list-ref run-limits-info 2)) + (max-concurrent-jobs (list-ref run-limits-info 3)) + (job-group-limit (list-ref run-limits-info 4)) + ;; (prereqs-not-met (rmt:get-prereqs-not-met run-id waitons hed item-path mode: testmode itemmaps: itemmaps)) + ;; (prereqs-not-met (mt:lazy-get-prereqs-not-met run-id waitons item-path mode: testmode itemmap: itemmap)) + (fails (if (list? prereqs-not-met) ;; TODO: rename fails to failed-prereqs + (runs:calc-fails prereqs-not-met) + (begin + (debug:print-error 0 *default-log-port* "prereqs-not-met is not a list! " prereqs-not-met) + '()))) + (non-completed (filter (lambda (x) ;; remove hed from not completed list, duh, of course it is not completed! + (not (equal? x hed))) + (runs:calc-not-completed prereqs-not-met))) + (loop-list (list hed tal reg reruns)) + ;; configure the load runner + (numcpus (common:get-num-cpus #f)) + (maxload (string->number (or (configf:lookup *configdat* "jobtools" "maxload") "3.0"))) ;; use a non-number string to disable + (maxhomehostload (string->number (or (configf:lookup *configdat* "jobtools" "maxhomehostload") "2.0"))) ;; use a non-number string to disable + (waitdelay (string->number (or (configf:lookup *configdat* "jobtools" "waitdelay") "60")))) + (debug:print-info 4 *default-log-port* "have-resources: " have-resources " prereqs-not-met: (" + (string-intersperse + (map (lambda (t) + (if (vector? t) + (conc (db:test-get-state t) "/" (db:test-get-status t)) + (conc " WARNING: t is not a vector=" t ))) + prereqs-not-met) + ", ") ") fails: " fails + "\nregistered? " (hash-table-ref/default test-registry (db:test-make-full-name test-name item-path) #f)) + + + + (if (and (not (null? prereqs-not-met)) + (runs:lownoise (conc "waiting on tests " prereqs-not-met hed) 60)) + (debug:print-info 2 *default-log-port* "waiting on tests; " (string-intersperse (runs:mixed-list-testname-and-testrec->list-of-strings prereqs-not-met) ", "))) + + ;; Don't know at this time if the test have been launched at some time in the past + ;; i.e. is this a re-launch? + (debug:print-info 4 *default-log-port* "run-limits-info = " run-limits-info) + + (cond ; cond 894- 1067 + + ;; Check item path against item-patts, + ;; + ((not (tests:match test-patts (tests:testqueue-get-testname test-record) item-path required: required-tests)) ;; This test/itempath is not to be run + ;; else the run is stuck, temporarily or permanently + ;; but should check if it is due to lack of resources vs. prerequisites + (debug:print-info 1 *default-log-port* "Skipping " (tests:testqueue-get-testname test-record) " " item-path " as it doesn't match " test-patts) + (if (or (not (null? tal))(not (null? reg))) + (runs:loop-values tal reg reglen regfull reruns) + #f)) + + ;; Register tests + ;; + ((not (hash-table-ref/default test-registry (db:test-make-full-name test-name item-path) #f)) + (debug:print-info 4 *default-log-port* "Pre-registering test " test-name "/" item-path " to create placeholder" ) + ;; always do firm registration now in v1.60 and greater ;; (eq? *transport-type* 'fs) ;; no point in parallel registration if use fs + (let register-loop ((numtries 15)) + (rmt:register-test run-id test-name item-path) + (if (rmt:get-test-id run-id test-name item-path) + (hash-table-set! test-registry (db:test-make-full-name test-name item-path) 'done) + (if (> numtries 0) + (begin + (thread-sleep! 0.5) + (register-loop (- numtries 1))) + (debug:print-error 0 *default-log-port* "failed to register test " (db:test-make-full-name test-name item-path))))) + (if (not (eq? (hash-table-ref/default test-registry (db:test-make-full-name test-name "") #f) 'done)) + (begin + (rmt:register-test run-id test-name "") + (if (rmt:get-test-id run-id test-name "") + (hash-table-set! test-registry (db:test-make-full-name test-name "") 'done)))) + (runs:shrink-can-run-more-tests-count runsdat) ;; DELAY TWEAKER (still needed?) + (if (and (null? tal)(null? reg)) + (list hed tal (append reg (list hed)) reruns) + (list (runs:queue-next-hed tal reg reglen regfull) ;; cannot replace with a call to runs:loop-values as the logic is different for reg + (runs:queue-next-tal tal reg reglen regfull) + ;; NB// Here we are building reg as we register tests + ;; if regfull we must pop the front item off reg + (if regfull + (append (cdr reg) (list hed)) + (append reg (list hed))) + reruns))) + + ;; At this point hed test registration must be completed. + ;; + ((eq? (hash-table-ref/default test-registry (db:test-make-full-name test-name item-path) #f) + 'start) + (debug:print-info 0 *default-log-port* "Waiting on test registration(s): " + (string-intersperse + (filter (lambda (x) + (eq? (hash-table-ref/default test-registry x #f) 'start)) + (hash-table-keys test-registry)) + ", ")) + (thread-sleep! 0.051) + (list hed tal reg reruns)) + + ;; If no resources are available just kill time and loop again + ;; + ((not have-resources) ;; simply try again after waiting a second + (if (runs:lownoise "no resources" 60) + (debug:print-info 1 *default-log-port* "no resources to run new tests, waiting ...")) + ;; Have gone back and forth on this but db starvation is an issue. + ;; wait one second before looking again to run jobs. + (thread-sleep! 1) + ;; could have done hed tal here but doing car/cdr of newtal to rotate tests + (list (car newtal)(cdr newtal) reg reruns)) + + ;; This is the final stage, everything is in place so launch the test + ;; + ((and have-resources + (or (null? prereqs-not-met) + (and (member 'toplevel testmode) ;; 'toplevel) + (null? non-completed) + (not (member 'exclusive testmode))))) + ;; (hash-table-delete! *max-tries-hash* (db:test-make-full-name test-name item-path)) + ;; we are going to reset all the counters for test retries by setting a new hash table + ;; this means they will increment only when nothing can be run + (set! *max-tries-hash* (make-hash-table)) + ;; well, first lets see if cpu load throttling is enabled. If so wait around until the + ;; average cpu load is under the threshold before continuing + (if maxload ;; only gate if maxload is specified + (common:wait-for-cpuload maxload numcpus waitdelay)) + (if maxhomehostload + (common:wait-for-homehost-load maxhomehostload (conc "Waiting for homehost load to drop below normalized value of " maxhomehostload))) + + (run:test run-id run-info keyvals runname test-record flags #f test-registry all-tests-registry) + (runs:incremental-print-results run-id) + (hash-table-set! test-registry (db:test-make-full-name test-name item-path) 'running) + (runs:shrink-can-run-more-tests-count runsdat) ;; DELAY TWEAKER (still needed?) + ;; (thread-sleep! *global-delta*) + (if (or (not (null? tal))(not (null? reg))) + (runs:loop-values tal reg reglen regfull reruns) + #f)) + + ;; must be we have unmet prerequisites + ;; + (else + (debug:print 4 *default-log-port* "FAILS: " fails) + ;; If one or more of the prereqs-not-met are FAIL then we can issue + ;; a message and drop hed from the items to be processed. + ;; (runs:mixed-list-testname-and-testrec->list-of-strings prereqs-not-met) + (if (and (not (null? prereqs-not-met)) + (runs:lownoise (conc "waiting on tests " prereqs-not-met hed) 60)) + (debug:print-info 1 *default-log-port* "waiting on tests; " (string-intersperse + (runs:mixed-list-testname-and-testrec->list-of-strings + prereqs-not-met) ", "))) + (if (or (null? fails) + (member 'toplevel testmode)) + (begin + ;; couldn't run, take a breather + (if (runs:lownoise "Waiting for more work to do..." 60) + (debug:print-info 0 *default-log-port* "Waiting for more work to do...")) + (thread-sleep! 1) + (list (car newtal)(cdr newtal) reg reruns)) + ;; the waiton is FAIL so no point in trying to run hed ever again + (begin + (let ((my-test-id (rmt:get-test-id run-id test-name item-path))) + (mt:test-set-state-status-by-id-unless-completed run-id my-test-id "COMPLETED" "PREQ_FAIL" "Failed to run due to failed prerequisites2")) + + + + (if (or (not (null? reg))(not (null? tal))) + (if (vector? hed) + (begin + (debug:print 1 *default-log-port* "WARNING: Dropping test " test-name "/" item-path + " from the launch list as it has prerequistes that are FAIL") + (let ((test-id (rmt:get-test-id run-id hed ""))) + (if test-id (mt:test-set-state-status-by-id-unless-completed run-id test-id "COMPLETED" "PREQ_FAIL" "Failed to run due to failed prerequisites"))) + (runs:shrink-can-run-more-tests-count runsdat) ;; DELAY TWEAKER (still needed?) + ;; (thread-sleep! *global-delta*) + ;; This next is for the items + + (if (not (null? fails)) + ;;(mt:test-set-state-status-by-testname run-id test-name item-path "NOT_STARTED" "PREQ_FAIL" #f) + (rmt:set-state-status-and-roll-up-items run-id test-name item-path "NOT_STARTED" "PREQ_FAIL" #f) + ;;(mt:test-set-state-status-by-testname run-id test-name item-path "NOT_STARTED" "BLOCKED" #f) + (rmt:set-state-status-and-roll-up-items run-id test-name item-path "NOT_STARTED" "BLOCKED" #f) ) + (hash-table-set! test-registry (db:test-make-full-name test-name item-path) 'removed) + (runs:loop-values tal reg reglen regfull reruns)) + (let ((nth-try (hash-table-ref/default test-registry hed 0))) ;; hed not a vector... + (debug:print 2 *default-log-port* "nth-try("hed")="nth-try) + (cond + ((member "RUNNING" (map db:test-get-state prereqs-not-met)) + (if (runs:lownoise (conc "possible RUNNING prerequistes " hed) 60) + (debug:print 0 *default-log-port* "WARNING: test " hed " has possible RUNNING prerequisites, don't give up on it yet.")) + (thread-sleep! 0.1) + (runs:loop-values tal reg reglen regfull reruns)) + ((or (not nth-try) ;; BB: condition on subsequent tries, condition below fires on first try + (and (number? nth-try) + (< nth-try 2))) + (hash-table-set! test-registry hed (if (number? nth-try) + (+ nth-try 1) + 0)) + (if (runs:lownoise (conc "not removing test " hed) 60) + (debug:print 1 *default-log-port* "WARNING: not removing test " hed " from queue although it may not be runnable due to FAILED prerequisites")) + ;; may not have processed correctly. Could be a race condition in your test implementation? Dropping test " hed) ;; " as it has prerequistes that are FAIL. (NOTE: hed is not a vector)") + (runs:shrink-can-run-more-tests-count runsdat) ;; DELAY TWEAKER (still needed?) + (runs:loop-values newtal reg reglen regfull reruns)) + ((symbol? nth-try) ;; BB: 'done matches here in one case where prereq itemwait failed. This is first "try" + (if (eq? nth-try 'removed) ;; removed is removed - drop it NOW + (if (null? tal) + #f ;; yes, really + (list (car tal)(cdr tal) reg reruns)) + (begin + (if (runs:lownoise (conc "FAILED prerequisites or other issue" hed) 60) + (debug:print 0 *default-log-port* "WARNING: test " hed " has FAILED prerequisites or other issue. Internal state >" nth-try "< will be overridden and we'll retry.")) + ;; was: (mt:test-set-state-status-by-testname run-id test-name item-path "NOT_STARTED" "KEEP_TRYING" #f) + (mt:test-set-state-status-by-testname-unless-completed run-id test-name item-path "COMPLETED" "PREQ_FAIL" #f) + (hash-table-set! test-registry hed 'removed) ;; was 0 + (if (not (and (null? reg) (null? tal))) + (runs:loop-values tal reg reglen regfull reruns) + #f)))) + (else + (if (runs:lownoise (conc "FAILED prerequitests and we tried" hed) 60) + (debug:print 0 *default-log-port* "WARNING: test " hed " has FAILED prerequitests and we've tried at least 10 times to run it. Giving up now.")) + ;; (debug:print 0 *default-log-port* " prereqs: " prereqs-not-met) + (hash-table-set! test-registry hed 'removed) + (mt:test-set-state-status-by-testname run-id test-name item-path "NOT_STARTED" "TEN_STRIKES" #f) + ;; I'm unclear on if this roll up is needed - it may be the root cause of the "all set to FAIL" bug. + (rmt:set-state-status-and-roll-up-items run-id test-name item-path #f "FAIL" #f) ;; treat as FAIL + (list (if (null? tal)(car newtal)(car tal)) + tal + reg + reruns))))) + ;; ELSE: can't drop this - maybe running? Just keep trying + + ;;(if (not (or (not (null? reg))(not (null? tal)))) ;; old experiment + (let ((runable-tests (runs:runable-tests prereqs-not-met))) ;; SUSPICIOUS: Should look at more than just prereqs-not-met? + (if (null? runable-tests) + #f ;; I think we are truly done here + (runs:loop-values newtal reg reglen regfull reruns))) + ;;) ;;from old experiment + ) ;; end if (or (not (null? reg))(not (null? tal))) + )))))) + +;; scan a list of tests looking to see if any are potentially runnable +;; +(define (runs:runable-tests tests) + (filter (lambda (t) + (if (not (vector? t)) + t + (let ((state (db:test-get-state t)) + (status (db:test-get-status t))) + (case (string->symbol state) + ((COMPLETED INCOMPLETE) #f) + ((NOT_STARTED) + (if (member status '("TEN_STRIKES" "BLOCKED" "PREQ_FAIL" "ZERO_ITEMS" "PREQ_DISCARDED" "TIMED_OUT" )) + #f + t)) + ((DELETED) #f) + (else t))))) + tests)) + +;; move all the miscellanea into this struct +;; +(defstruct runs:gendat inc-results inc-results-last-update inc-results-fmt run-info runname target) + +(define *runs:general-data* + (make-runs:gendat + inc-results: (make-hash-table) + inc-results-last-update: 0 + inc-results-fmt: "~12a~12a~20a~12a~40a\n" ;; state status time duration test-name item-path + run-info: #f + runname: #f + target: #f + ) + ) + +(define (runs:incremental-print-results run-id) + (let ((curr-sec (current-seconds))) + (if (> (- curr-sec (runs:gendat-inc-results-last-update *runs:general-data*)) 5) ;; at least five seconds since last update + (let* ((run-dat (or (runs:gendat-run-info *runs:general-data*)(rmt:get-run-info run-id))) + (runname (or (runs:gendat-runname *runs:general-data*) + (db:get-value-by-header (db:get-rows run-dat) + (db:get-header run-dat) "runname"))) + (target (or (runs:gendat-target *runs:general-data*)(rmt:get-target run-id))) + (testsdat (rmt:get-tests-for-run run-id "%" '() '() ;; run-id testpatt states statuses + #f #f ;; offset limit + #f ;; not-in + #f ;; sort-by + #f ;; sort-order + #f ;; get full data (not 'shortlist) + (runs:gendat-inc-results-last-update *runs:general-data*) ;; last update time + 'dashboard))) + (if (not (runs:gendat-run-info *runs:general-data*)) + (runs:gendat-run-info-set! *runs:general-data* run-dat)) + (if (not (runs:gendat-runname *runs:general-data*)) + (runs:gendat-runname-set! *runs:general-data* runname)) + (if (not (runs:gendat-target *runs:general-data*)) + (runs:gendat-target-set! *runs:general-data* target)) + (for-each + (lambda (testdat) + (let* ((test-id (db:test-get-id testdat)) + (prevdat (hash-table-ref/default (runs:gendat-inc-results *runs:general-data*) + (conc run-id "," test-id) #f)) + (test-name (db:test-get-testname testdat)) + (item-path (db:test-get-item-path testdat)) + (state (db:test-get-state testdat)) + (status (db:test-get-status testdat)) + (event-time (db:test-get-event_time testdat)) + (duration (db:test-get-run_duration testdat))) + (if (and (not (member state '("DELETED" "REMOTEHOSTSTART" "RUNNING" "LAUNCHED""NOT_STARTED"))) + (not (and prevdat + (equal? state (db:test-get-state prevdat)) + (equal? status (db:test-get-status prevdat))))) + (let ((fmt (runs:gendat-inc-results-fmt *runs:general-data*)) + (dtime (seconds->year-work-week/day-time event-time))) + (if (runs:lownoise "inc-print" 600) + (format #t fmt "State" "Status" "Start Time" "Duration" "Test path")) + ;; (debug:print 0 *default-log-port* "fmt: " fmt " state: " state " status: " status " test-name: " test-name " item-path: " item-path " dtime: " dtime) + ;; (debug:print 0 #f "event-time: " event-time " duration: " duration) + (format #t fmt + state + status + dtime + (seconds->hr-min-sec duration) + (conc "lt/" target "/" runname "/" test-name (if (string-null? item-path) "" (conc "/" item-path)))) + (hash-table-set! (runs:gendat-inc-results *runs:general-data*) (conc run-id "," test-id) testdat))))) + testsdat))) + (runs:gendat-inc-results-last-update-set! *runs:general-data* (- curr-sec 10)))) + +;; every time though the loop increment the test/itempatt val. +;; when the min is > max-allowed and none running then force exit +;; +(define *max-tries-hash* (make-hash-table)) + +;;====================================================================== +;; runs:run-tests-queue is called by runs:run-tests +;;====================================================================== +;; +;; test-records is a hash table testname:item_path => vector < testname testconfig waitons priority items-info ... > +(define (runs:run-tests-queue run-id runname test-records keyvals flags test-patts required-tests reglen-in all-tests-registry) + ;; At this point the list of parent tests is expanded + ;; NB// Should expand items here and then insert into the run queue. + (debug:print 5 *default-log-port* "test-records: " test-records ", flags: " (hash-table->alist flags)) + + ;; Do mark-and-find clean up of db before starting runing of quue + ;; + ;; (rmt:find-and-mark-incomplete) + + (let* ((run-info (rmt:get-run-info run-id)) + (tests-info (mt:get-tests-for-run run-id #f '() '())) ;; qryvals: "id,testname,item_path")) + (sorted-test-names (tests:sort-by-priority-and-waiton test-records)) + (test-registry (make-hash-table)) + (registry-mutex (make-mutex)) + (num-retries 0) + (max-retries (configf:lookup *configdat* "setup" "maxretries")) + (max-concurrent-jobs (configf:lookup-number *configdat* "setup" "max_concurrent_jobs" default: 50)) + (reglen (if (number? reglen-in) reglen-in 1)) + (last-time-incomplete (- (current-seconds) 900)) ;; force at least one clean up cycle + (last-time-some-running (current-seconds)) + ;; (tdbdat (tasks:open-db)) + (runsdat (make-runs:dat + ;; hed: hed + ;; tal: tal + ;; reg: reg + ;; reruns: reruns + reglen: reglen + regfull: #f ;; regfull + ;; test-record: test-record + runname: runname + ;; test-name: test-name + ;; item-path: item-path + ;; jobgroup: jobgroup + max-concurrent-jobs: max-concurrent-jobs + run-id: run-id + ;; waitons: waitons + ;; testmode: testmode + test-patts: test-patts + required-tests: required-tests + test-registry: test-registry + registry-mutex: registry-mutex + flags: flags + keyvals: keyvals + run-info: run-info + ;; newtal: newtal + all-tests-registry: all-tests-registry + ;; itemmaps: itemmaps + ;; prereqs-not-met: (rmt:get-prereqs-not-met run-id waitons hed item-path mode: testmode itemmaps: itemmaps) + ;; can-run-more-tests: (runs:can-run-more-tests run-id jobgroup max-concurrent-jobs) ;; look at the test jobgroup and tot jobs running + ))) + + ;; Initialize the test-registery hash with tests that already have a record + ;; convert state to symbol and use that as the hash value + (for-each (lambda (trec) + (let ((id (db:test-get-id trec)) + (tn (db:test-get-testname trec)) + (ip (db:test-get-item-path trec)) + (st (db:test-get-state trec))) + (if (not (equal? st "DELETED")) + (hash-table-set! test-registry (db:test-make-full-name tn ip) (string->symbol st))))) + tests-info) + (set! max-retries (if (and max-retries (string->number max-retries))(string->number max-retries) 100)) + + (let loop ((hed (car sorted-test-names)) + (tal (cdr sorted-test-names)) + (reg '()) ;; registered, put these at the head of tal + (reruns '())) + + (runs:incremental-print-results run-id) + + (if (not (null? reruns))(debug:print-info 4 *default-log-port* "reruns=" reruns)) + + ;; Here we mark any old defunct tests as incomplete. Do this every fifteen minutes + ;; moving this to a parallel thread and just run it once. + ;; + (if (> (current-seconds)(+ last-time-incomplete 900)) + (begin + (set! last-time-incomplete (current-seconds)) + ;; (rmt:find-and-mark-incomplete-all-runs) + )) + + ;; (print "Top of loop, hed=" hed ", tal=" tal " ,reruns=" reruns) + (let* ((test-record (hash-table-ref test-records hed)) + (test-name (tests:testqueue-get-testname test-record)) + (tconfig (tests:testqueue-get-testconfig test-record)) + (jobgroup (configf:lookup tconfig "test_meta" "jobgroup")) + (testmode (let ((m (configf:lookup tconfig "requirements" "mode"))) + (if m (map string->symbol (string-split m)) '(normal)))) + (itemmaps (tests:get-itemmaps tconfig)) ;; (configf:lookup tconfig "requirements" "itemmap")) + (priority (tests:testqueue-get-priority test-record)) + (itemdat (tests:testqueue-get-itemdat test-record)) ;; itemdat can be a string, list or #f + (items (tests:testqueue-get-items test-record)) + (item-path (item-list->path itemdat)) + (tfullname (db:test-make-full-name test-name item-path)) + ;; these are hard coded item-item waits test/item-path => test/item-path2 ... + (extra-waits (let* ((section (configf:get-section (tests:testqueue-get-testconfig test-record) "waitons")) + (myextra (alist-ref tfullname section equal?))) + (if myextra + (let ((extras (string-split (car myextra)))) + (if (runs:lownoise (conc tfullname "extra-waitons" tfullname) 60) + (debug:print-info 0 *default-log-port* "HAVE EXTRA WAITONS for test " tfullname ": " myextra)) + (for-each + (lambda (extra) + ;; (debug:print 0 *default-log-port* "FYI: extra = " extra " reruns = " reruns) + (let ((basetestname (car (string-split extra "/")))) + #;(if (not (member extra tal)) + (set! reruns (append tal (list extra)))) + (if (not (member basetestname tal)) + (set! reruns (append tal (list basetestname)))) + )) + extras) + extras) + '()))) + (waitons (delete-duplicates (append (tests:testqueue-get-waitons test-record) extra-waits) equal?)) + (newtal (append tal (list hed))) + (regfull (>= (length reg) reglen)) + (num-running (rmt:get-count-tests-running-for-run-id run-id)) + (testdat (make-runs:testdat + hed: hed + tal: tal + reg: reg + reruns: reruns + test-record: test-record + test-name: test-name + item-path: item-path + jobgroup: jobgroup + waitons: waitons + testmode: testmode + newtal: newtal + itemmaps: itemmaps + ;; prereqs-not-met: prereqs-not-met + ))) + (runs:dat-regfull-set! runsdat regfull) + + ;; -- removed BB 17ww28 - no longer needed. + ;; every 15 minutes verify the server is there for this run + ;; (if (and (common:low-noise-print 240 "try start server" run-id) + ;; (not (or (and *runremote* + ;; (remote-server-url *runremote*) + ;; (server:ping (remote-server-url *runremote*))) + ;; (server:check-if-running *toppath*)))) + ;; (server:kind-run *toppath*)) + + (if (> num-running 0) + (set! last-time-some-running (current-seconds))) + + (if (> (current-seconds)(+ last-time-some-running (or (configf:lookup *configdat* "setup" "give-up-waiting") 36000))) + (hash-table-set! *max-tries-hash* tfullname (+ (hash-table-ref/default *max-tries-hash* tfullname 0) 1))) + ;; (debug:print 0 *default-log-port* "max-tries-hash: " (hash-table->alist *max-tries-hash*)) + + ;; Ensure all top level tests get registered. This way they show up as "NOT_STARTED" on the dashboard + ;; and it is clear they *should* have run but did not. + (if (not (hash-table-ref/default test-registry (db:test-make-full-name test-name "") #f)) + (begin + (rmt:register-test run-id test-name "") + (hash-table-set! test-registry (db:test-make-full-name test-name "") 'done))) + + ;; Fast skip of tests that are already "COMPLETED" - NO! Cannot do that as the items may not have been expanded yet :( + ;; + (if (member (hash-table-ref/default test-registry tfullname #f) + '(DONOTRUN removed)) ;; *common:cant-run-states-sym*) ;; '(COMPLETED KILLED WAIVED UNKNOWN INCOMPLETE)) + (begin + (if (runs:lownoise (conc "been marked do not run " tfullname) 60) + (debug:print-info 0 *default-log-port* "Skipping test " tfullname " as it has been marked do not run due to being completed or not runnable")) + (if (or (not (null? tal))(not (null? reg))) + (loop (runs:queue-next-hed tal reg reglen regfull) + (runs:queue-next-tal tal reg reglen regfull) + (runs:queue-next-reg tal reg reglen regfull) + reruns)))) + ;; (loop (car tal)(cdr tal) reg reruns)))) + + (runs:incremental-print-results run-id) + (debug:print 4 *default-log-port* "TOP OF LOOP => " + "test-name: " test-name + "\n hed: " hed + "\n tal: " tal + "\n reg: " reg + "\n test-record " test-record + "\n itemdat: " itemdat + "\n items: " items + "\n item-path: " item-path + "\n waitons: " waitons + "\n num-retries: " num-retries + "\n reruns: " reruns + "\n regfull: " regfull + "\n reglen: " reglen + "\n length reg: " (length reg) + ) + + ;; check for hed in waitons => this would be circular, remove it and issue an + ;; error + (if (member test-name waitons) + (begin + (debug:print-error 0 *default-log-port* "test " test-name " has listed itself as a waiton, please correct this!") + (set! waiton (filter (lambda (x)(not (equal? x hed))) waitons)))) + + (cond + + ;; We want to catch tests that have waitons that are NOT in the queue and discard them IFF + ;; they have been through the wringer 10 or more times + ((and (list? waitons) + (not (null? waitons)) + (> (hash-table-ref/default *max-tries-hash* tfullname 0) 10) + (not (null? (filter + number? + (map (lambda (waiton) + (if (and (not (member waiton tal)) ;; this waiton is not in the list to be tried to run + (not (member waiton reruns))) + 1 + #f)) + waitons))))) ;; could do this more elegantly with a marker.... + (debug:print-info 4 *default-log-port* "cond branch - " "rtq-1") + (debug:print 0 *default-log-port* "WARNING: Marking test " tfullname " as not runnable. It is waiting on tests that cannot be run. Giving up now.") + (hash-table-set! test-registry tfullname 'removed)) + + ;; items is #f then the test is ok to be handed off to launch (but not before) + ;; + ((not items) + (debug:print-info 4 *default-log-port* "cond branch - " "rtq-2") + (debug:print-info 4 *default-log-port* "OUTER COND: (not items)") + (if (and (not (tests:match test-patts (tests:testqueue-get-testname test-record) item-path required: required-tests)) + (not (null? tal))) + (loop (car tal)(cdr tal) reg reruns)) + (runs:testdat-prereqs-not-met-set! testdat (rmt:get-prereqs-not-met run-id waitons hed item-path mode: testmode itemmaps: itemmaps)) + (runs:dat-can-run-more-tests-set! runsdat (runs:can-run-more-tests runsdat run-id jobgroup max-concurrent-jobs)) ;; look at the test jobgroup and tot jobs running + (let ((loop-list (runs:process-expanded-tests runsdat testdat))) + (if loop-list (apply loop loop-list)))) + + ;; items processed into a list but not came in as a list been processed + ;; + ((and (list? items) ;; thus we know our items are already calculated + (not itemdat)) ;; and not yet expanded into the list of things to be done + (debug:print-info 4 *default-log-port* "cond branch - " "rtq-3") + (debug:print-info 4 *default-log-port* "OUTER COND: (and (list? items)(not itemdat))") + ;; Must determine if the items list is valid. Discard the test if it is not. + (if (and (list? items) + (> (length items) 0) + (and (list? (car items)) + (> (length (car items)) 0)) + (debug:debug-mode 1)) + (debug:print 2 *default-log-port* (map (lambda (row) + (conc (string-intersperse + (map (lambda (varval) + (string-intersperse varval "=")) + row) + " ") + "\n")) + items))) + + (let* ((items-in-testpatt + (filter + (lambda (my-itemdat) + (tests:match test-patts hed (item-list->path my-itemdat) )) + ;; was: (tests:match test-patts hed (item-list->path my-itemdat) required: required-tests)) + items) )) + (if (null? items-in-testpatt) + (let ((test-id (rmt:get-test-id run-id test-name ""))) + (debug:print-info 0 *default-log-port* "Test " (tests:testqueue-get-testname test-record) " is itemized but has no items matching test pattern -- marking status ZERO_ITEMS") + (if test-id + (mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "ZERO_ITEMS" "This test has no items which match test pattern."))) + + (for-each (lambda (my-itemdat) + (let* ((new-test-record (let ((newrec (make-tests:testqueue))) + (vector-copy! test-record newrec) + newrec)) + (my-item-path (item-list->path my-itemdat)) + + (newtestname (db:test-make-full-name hed my-item-path))) ;; test names are unique on testname/item-path + (tests:testqueue-set-items! new-test-record #f) + (tests:testqueue-set-itemdat! new-test-record my-itemdat) + (tests:testqueue-set-item_path! new-test-record my-item-path) + (hash-table-set! test-records newtestname new-test-record) + (set! tal (append tal (list newtestname))))) ;; since these are itemized create new test names testname/itempath + items-in-testpatt))) + + + + ;; At this point we have possibly added items to tal but all must be handed off to + ;; INNER COND logic. I think loop without rotating the queue + ;; (loop hed tal reg reruns)) + ;; (let ((newtal (append tal (list hed)))) ;; We should discard hed as it has been expanded into it's items? Yes, but only if this *is* an itemized test + ;; (loop (car newtal)(cdr newtal) reg reruns) + (if (null? tal) + #f + (loop (car tal)(cdr tal) reg reruns))) + + ;; if items is a proc then need to run items:get-items-from-config, get the list and loop + ;; - but only do that if resources exist to kick off the job + ;; EXPAND ITEMS + ((or (procedure? items)(eq? items 'have-procedure)) + (debug:print-info 4 *default-log-port* "cond branch - " "rtq-4") + (let ((can-run-more (runs:can-run-more-tests runsdat run-id jobgroup max-concurrent-jobs))) + (if (and (list? can-run-more) + (car can-run-more)) + (let ((loop-list (runs:expand-items hed tal reg reruns regfull newtal jobgroup max-concurrent-jobs run-id waitons item-path testmode test-record can-run-more items runname tconfig reglen test-registry test-records itemmaps))) ;; itemized test expanded here + (if loop-list + (apply loop loop-list) + (debug:print-info 4 *default-log-port* " -- Can't expand hed="hed) + ) + ) + ;; if can't run more just loop with next possible test + (loop (car newtal)(cdr newtal) reg reruns)))) + + ;; this case should not happen, added to help catch any bugs + ((and (list? items) itemdat) + (debug:print-info 4 *default-log-port* "cond branch - " "rtq-5") + (debug:print-error 0 *default-log-port* "Should not have a list of items in a test and the itemspath set - please report this") + (exit 1)) + ((not (null? reruns)) + (debug:print-info 4 *default-log-port* "cond branch - " "rtq-6") + (let* ((newlst (tests:filter-non-runnable run-id tal test-records)) ;; i.e. not FAIL, WAIVED, INCOMPLETE, PASS, KILLED, + (junked (lset-difference equal? tal newlst))) + (debug:print-info 4 *default-log-port* "full drop through, if reruns is less than 100 we will force retry them, reruns=" reruns ", tal=" tal) + (if (< num-retries max-retries) + (set! newlst (append reruns newlst))) + (set! num-retries (+ num-retries 1)) + ;; (thread-sleep! (+ 1 *global-delta*)) + (if (not (null? newlst)) + ;; since reruns have been tacked on to newlst create new reruns from junked + (loop (car newlst)(cdr newlst) reg (delete-duplicates junked))))) + ((not (null? tal)) + (debug:print-info 4 *default-log-port* "cond branch - " "rtq-7") + (debug:print-info 4 *default-log-port* "I'm pretty sure I shouldn't get here.")) + ((not (null? reg)) ;; could we get here with leftovers? + (debug:print-info 4 *default-log-port* "cond branch - " "rtq-8") + (debug:print-info 0 *default-log-port* "Have leftovers!") + (loop (car reg)(cdr reg) '() reruns)) + (else + (debug:print-info 4 *default-log-port* "cond branch - " "rtq-9") + (debug:print-info 4 *default-log-port* "Exiting loop with...\n hed=" hed "\n tal=" tal "\n reruns=" reruns)) + ))) ;; end loop on sorted test names + ;; this is the point where everything is launched and now you can mark the run in metadata table as all launched + (rmt:set-var (conc "lunch-complete-" run-id) "yes") + + ;; now *if* -run-wait we wait for all tests to be done + ;; Now wait for any RUNNING tests to complete (if in run-wait mode) + (thread-sleep! 10) ;; I think there is a race condition here. Let states/statuses settle + (let wait-loop ((num-running (rmt:get-count-tests-running-for-run-id run-id)) + (prev-num-running 0)) + ;; (debug:print-info 13 *default-log-port* "num-running=" num-running ", prev-num-running=" prev-num-running) + (if (and (or (args:get-arg "-run-wait") + (equal? (configf:lookup *configdat* "setup" "run-wait") "yes")) + (> num-running 0)) + (begin + ;; Here we mark any old defunct tests as incomplete. Do this every fifteen minutes + ;; (debug:print 0 *default-log-port* "Got here eh! num-running=" num-running " (> num-running 0) " (> num-running 0)) + (if (> (current-seconds)(+ last-time-incomplete 900)) + (begin + (debug:print-info 0 *default-log-port* "Marking stuck tests as INCOMPLETE while waiting for run " run-id ". Running as pid " (current-process-id) " on " (get-host-name)) + (set! last-time-incomplete (current-seconds)) + (rmt:find-and-mark-incomplete run-id #f))) + (if (not (eq? num-running prev-num-running)) + (debug:print-info 0 *default-log-port* "run-wait specified, waiting on " num-running " tests in RUNNING, REMOTEHOSTSTART or LAUNCHED state at " (time->string (seconds->local-time (current-seconds))))) + (thread-sleep! 5) + ;; (wait-loop (rmt:get-count-tests-running-for-run-id run-id) num-running)))) + (wait-loop (rmt:get-count-tests-running-for-run-id run-id) num-running)))) + ;; LET* ((test-record + ;; we get here on "drop through". All done! + ;; this is moved to runs:run-testes since this function is getting called twice to ensure everthing is completed. + ;; (debug:print-info 0 *default-log-port* "Calling Post Hook") + ;; (runs:run-post-hook run-id) + (debug:print-info 1 *default-log-port* "All tests launched"))) + +(define (runs:calc-fails prereqs-not-met) + (filter (lambda (test) + (and (vector? test) ;; not (string? test)) + (member (db:test-get-state test) '("INCOMPLETE" "COMPLETED")) ;; TODO: pull from *common:stuff...* + (not (member (db:test-get-status test) + '("PASS" "WARN" "CHECK" "WAIVED" "SKIP"))))) + prereqs-not-met)) + +(define (runs:calc-prereq-fail prereqs-not-met) ;; REMOVEME since NOT_STARTED/PREQ_FAIL is now COMPLETED/PREQ_FAIL + (filter (lambda (test) + (and (vector? test) ;; not (string? test)) + (equal? (db:test-get-state test) "NOT_STARTED") + (not (member (db:test-get-status test) + '("n/a" "KEEP_TRYING"))))) + prereqs-not-met)) + +(define (runs:calc-not-completed prereqs-not-met) + (filter + (lambda (t) + (or (not (vector? t)) + (not (member (db:test-get-state t) '("INCOMPLETE" "COMPLETED"))))) + prereqs-not-met)) + +;; (define (runs:calc-not-completed prereqs-not-met) +;; (filter +;; (lambda (t) +;; (or (not (vector? t)) +;; (not (equal? "COMPLETED" (db:test-get-state t))))) +;; prereqs-not-met)) + +(define (runs:calc-runnable prereqs-not-met) + (filter + (lambda (t) + (or (not (vector? t)) + (and (equal? "NOT_STARTED" (db:test-get-state t)) + (member (db:test-get-status t) + '("n/a" "KEEP_TRYING"))) + (and (equal? "RUNNING" (db:test-get-state t))))) ;; account for a test that is running + prereqs-not-met)) + +(define (runs:pretty-string lst) + (map (lambda (t) + (if (not (vector? t)) + (conc t) + (conc (db:test-get-testname t) ":" (db:test-get-state t) "/" (db:test-get-status t)))) + lst)) + +;; parent-test is there as a placeholder for when parent-tests can be run as a setup step +;; +(define (run:test run-id run-info keyvals runname test-record flags parent-test test-registry all-tests-registry) + ;; All these vars might be referenced by the testconfig file reader + (let* ((test-name (tests:testqueue-get-testname test-record)) + (test-waitons (tests:testqueue-get-waitons test-record)) + (itemdat (tests:testqueue-get-itemdat test-record)) + (item-path "") + (db #f) + (full-test-name #f) + (all-vars (get-environment-variables))) + ;; setting itemdat to a list if it is #f + (if (not itemdat)(set! itemdat '())) + (set! item-path (item-list->path itemdat)) + (set! full-test-name (db:test-make-full-name test-name item-path)) + (runs:set-megatest-env-vars run-id inrunname: runname testname: test-name itempath: item-path) ;; these may be needed by the launching process + (let* ((test-conf ;; re-instate the tests:get-testconfig once the kinks are worked out. FIXME!!! + ;; (tests:get-testconfig test-name item-path all-tests-registry #t force-create: #t)) + (tests:testqueue-get-testconfig test-record )) + (test-path (hash-table-ref all-tests-registry test-name)) ;; (conc *toppath* "/tests/" test-name)) ;; could use tests:get-testconfig here ... + (force (hash-table-ref/default flags "-force" #f)) + (rerun (hash-table-ref/default flags "-rerun" #f)) + (keepgoing (hash-table-ref/default flags "-keepgoing" #f)) + (incomplete-timeout (string->number (or (configf:lookup *configdat* "setup" "incomplete-timeout") "x"))) + ) + + (debug:print-info 4 *default-log-port* + "\nTESTNAME: " full-test-name + "\n test-config: " (hash-table->alist test-conf) + "\n itemdat: " itemdat + ) + (debug:print 2 *default-log-port* "Attempting to launch test " full-test-name) + ;; (setenv "MT_TEST_NAME" test-name) ;; + ;; (setenv "MT_ITEMPATH" item-path) + ;; (setenv "MT_RUNNAME" runname) + (change-directory *toppath*) + + ;; Here is where the test_meta table is best updated + ;; Yes, another use of a global for caching. Need a better way? + ;; + ;; There is now a single call to runs:update-all-test_meta and this + ;; per-test call is not needed. Given the delicacy of the move to + ;; v1.55 this code is being left in place for the time being. + ;; + (if (not (hash-table-ref/default *test-meta-updated* test-name #f)) + (begin + (hash-table-set! *test-meta-updated* test-name #t) + (runs:update-test_meta test-name test-conf))) + + ;; itemdat => ((ripeness "overripe") (temperature "cool") (season "summer")) + (let* ((new-test-path (string-intersperse (cons test-path (map cadr itemdat)) "/")) + (test-id (rmt:get-test-id run-id test-name item-path)) + (testdat (if test-id (rmt:get-test-info-by-id run-id test-id) #f))) + (if (not testdat) + (let loop () + ;; ensure that the path exists before registering the test + ;; NOPE: Cannot! Don't know yet which disk area will be assigned.... + ;; (system (conc "mkdir -p " new-test-path)) + ;; + ;; (open-run-close tests:register-test db run-id test-name item-path) + ;; + ;; NB// for the above line. I want the test to be registered long before this routine gets called! + ;; + (if (not test-id)(set! test-id (rmt:get-test-id run-id test-name item-path))) + (if (not test-id) + (begin + (debug:print 2 *default-log-port* "WARN: Test not pre-created? test-name=" test-name ", item-path=" item-path ", run-id=" run-id) + (rmt:register-test run-id test-name item-path) + (set! test-id (rmt:get-test-id run-id test-name item-path)))) + (debug:print-info 4 *default-log-port* "test-id=" test-id ", run-id=" run-id ", test-name=" test-name ", item-path=\"" item-path "\"") + (set! testdat (rmt:get-test-info-by-id run-id test-id)) + (if (not testdat) + (begin + (debug:print-info 0 *default-log-port* "WARNING: server is overloaded, trying again in one second") + (thread-sleep! 1) + (loop))))) + (if (not testdat) ;; should NOT happen + (debug:print-error 0 *default-log-port* "failed to get test record for test-id " test-id)) + (set! test-id (db:test-get-id testdat)) + (if (common:file-exists? test-path) + (change-directory test-path) + (begin + (debug:print-error 0 *default-log-port* "test run path not created before attempting to run the test. Perhaps you are running -remove-runs at the same time?") + (change-directory *toppath*))) + (case (if force ;; (args:get-arg "-force") + 'NOT_STARTED + (if testdat + (string->symbol (test:get-state testdat)) + 'failed-to-insert)) + ((failed-to-insert) + (debug:print-error 0 *default-log-port* "Failed to insert the record into the db")) + ((NOT_STARTED COMPLETED DELETED INCOMPLETE) + (let ((runflag #f)) + (cond + ;; -force, run no matter what + (force (set! runflag #t)) + ;; NOT_STARTED, run no matter what + ((member (test:get-state testdat) '("DELETED" "NOT_STARTED" "INCOMPLETE"))(set! runflag #t)) + ;; not -rerun and PASS, WARN or CHECK, do no run + ((and (or (not rerun) + keepgoing) + ;; Require to force re-run for COMPLETED or *anything* + PASS,WARN or CHECK + (or (member (test:get-status testdat) '("PASS" "WARN" "CHECK" "SKIP" "WAIVED")) + (member (test:get-state testdat) '("COMPLETED")))) + (debug:print-info 2 *default-log-port* "running test " test-name "/" item-path " suppressed as it is " (test:get-state testdat) " and " (test:get-status testdat)) + (hash-table-set! test-registry full-test-name 'DONOTRUN) ;; COMPLETED) + (set! runflag #f)) + ;; -rerun and status is one of the specifed, run it + ((and rerun + (let* ((rerunlst (string-split rerun ",")) + (must-rerun (member (test:get-status testdat) rerunlst))) + (debug:print-info 3 *default-log-port* "-rerun list: " rerun ", test-status: " (test:get-status testdat)", must-rerun: " must-rerun) + must-rerun)) + (debug:print-info 2 *default-log-port* "Rerun forced for test " test-name "/" item-path) + (set! runflag #t)) + ;; -keepgoing, do not rerun FAIL + ((and keepgoing + (member (test:get-status testdat) '("FAIL"))) + (set! runflag #f)) + ((and (not rerun) + (member (test:get-status testdat) '("FAIL" "n/a"))) + (set! runflag #t)) + (else (set! runflag #f))) + (debug:print 4 *default-log-port* "RUNNING => runflag: " runflag " STATE: " (test:get-state testdat) " STATUS: " (test:get-status testdat)) + (if (not runflag) + (if (not parent-test) + (if (runs:lownoise (conc "not starting test" full-test-name) 60) + (debug:print 1 *default-log-port* "NOTE: Not starting test " full-test-name " as it is state \"" (test:get-state testdat) + "\" and status \"" (test:get-status testdat) "\", use -rerun \"" (test:get-status testdat) + "\" or -force to override"))) + ;; NOTE: No longer be checking prerequisites here! Will never get here unless prereqs are + ;; already met. + ;; This would be a great place to do the process-fork + ;; + (let ((skip-test #f) + (skip-check (configf:get-section test-conf "skip"))) + (cond + ;; Have to check for skip conditions. This one skips if there are same-named tests + ;; currently running + ((and skip-check + (configf:lookup test-conf "skip" "prevrunning")) + ;; run-ids = #f means *all* runs + (let ((running-tests (rmt:get-tests-for-runs-mindata #f full-test-name '("RUNNING" "REMOTEHOSTSTART" "LAUNCHED") '() #f))) + (if (not (null? running-tests)) ;; have to skip + (set! skip-test "Skipping due to previous tests running")))) + + ;; split the string and OR of file-exists? + ((and skip-check + (configf:lookup test-conf "skip" "fileexists")) + (let* ((files (string-split (configf:lookup test-conf "skip" "fileexists"))) + (existing (filter common:file-exists? files))) + (if (not (null? existing)) ;; (common:file-exists? (configf:lookup test-conf "skip" "fileexists")) + (set! skip-test (conc "Skipping due to existance of file(s) " (string-intersperse existing ", ")))))) ;; (configf:lookup test-conf "skip" "fileexists"))))) + + ((and skip-check + (configf:lookup test-conf "skip" "filenotexists")) + (let* ((files (string-split (configf:lookup test-conf "skip" "filenotexists"))) + (existing (filter common:file-exists? files))) + (if (null? existing) ;; (common:file-exists? (configf:lookup test-conf "skip" "filenotexists"))) + (set! skip-test (conc "Skipping due to non existance of files " (string-intersperse files ", ")))))) ;; (configf:lookup test-conf "skip" "filenotexists"))))) + + ((and skip-check + (configf:lookup test-conf "skip" "script")) + (if (= (system (configf:lookup test-conf "skip" "script")) 0) + (set! skip-test (conc "Skipping due to zero return value of script " (configf:lookup test-conf "skip" "script"))))) + + ((and skip-check + (configf:lookup test-conf "skip" "rundelay")) + ;; run-ids = #f means *all* runs + (let* ((numseconds (common:hms-string->seconds (configf:lookup test-conf "skip" "rundelay"))) + (running-tests (rmt:get-tests-for-runs-mindata #f full-test-name '("RUNNING" "REMOTEHOSTSTART" "LAUNCHED") '() #f)) + (completed-tests (rmt:get-tests-for-runs-mindata #f full-test-name '("COMPLETED" "INCOMPLETE") '("PASS" "FAIL" "ABORT") #f)) ;; ironically INCOMPLETE is same as COMPLETED in this contex + (last-run-times (map db:mintest-get-event_time completed-tests)) + (time-since-last (- (current-seconds) (if (null? last-run-times) 0 (common:max last-run-times))))) + (if (or (not (null? running-tests)) ;; have to skip if test is running + (> numseconds time-since-last)) + (set! skip-test (conc "Skipping due to previous test run less than " (configf:lookup test-conf "skip" "rundelay") " ago")))))) + + (if skip-test + (begin + (mt:test-set-state-status-by-id run-id test-id "COMPLETED" "SKIP" skip-test) + (debug:print-info 1 *default-log-port* "SKIPPING Test " full-test-name " due to " skip-test)) + ;; + ;; Here the test is handed off to launch.scm for launch-test to complete the launch process + ;; + (if (not (launch-test test-id run-id run-info keyvals runname test-conf test-name test-path itemdat flags)) + (begin + (print "ERROR: Failed to launch the test. Exiting as soon as possible") + (set! *globalexitstatus* 1) ;; + (process-signal (current-process-id) signal/kill)))))))) + ((KILLED) + (debug:print 1 *default-log-port* "NOTE: " full-test-name " is already running or was explictly killed, use -force to launch it.") + (hash-table-set! test-registry (db:test-make-full-name test-name test-path) 'DONOTRUN)) ;; KILLED)) + ((LAUNCHED REMOTEHOSTSTART RUNNING) + (debug:print 2 *default-log-port* "NOTE: " test-name " is already running")) + ;; (if (> (- (current-seconds)(+ (db:test-get-event_time testdat) + ;; (db:test-get-run_duration testdat))) + ;; (or incomplete-timeout + ;; 6000)) ;; i.e. no update for more than 6000 seconds + ;; (begin + ;; (debug:print 0 *default-log-port* "WARNING: Test " test-name " appears to be dead. Forcing it to state INCOMPLETE and status STUCK/DEAD") + ;; (tests:test-set-status! run-id test-id "INCOMPLETE" "STUCK/DEAD" "" #f)) + ;; ;; (tests:test-set-status! test-id "INCOMPLETE" "STUCK/DEAD" "" #f)) + ;; (debug:print 2 *default-log-port* "NOTE: " test-name " is already running"))) + (else + (debug:print-error 0 *default-log-port* "Failed to launch test " full-test-name ". Unrecognised state " (test:get-state testdat)) + (case (string->symbol (test:get-state testdat)) + ((COMPLETED INCOMPLETE) + (hash-table-set! test-registry (db:test-make-full-name test-name test-path) 'DONOTRUN)) + (else + (hash-table-set! test-registry (db:test-make-full-name test-name test-path) 'DONOTRUN))))))) + ;; put any changed environment variables back to how they were - TODO - turn this into some sort of with- + (common:set-vars-back all-vars) + #;(for-each + (lambda (vardat) + (let ((var (car vardat)) + (val (cdr vardat))) + (if (not (equal? (get-environment-variable var) val)) + (handle-exceptions + exn + (debug:print-error 0 *default-log-port* "Failed to set " var " to " val) + (setenv var val))))) + all-vars) + )) + +;;====================================================================== +;; END OF NEW STUFF +;;====================================================================== + +(define (get-dir-up-n dir . params) + (let ((dparts (string-split dir "/")) + (count (if (null? params) 1 (car params)))) + (conc "/" (string-intersperse + (take dparts (- (length dparts) count)) + "/")))) + +(define (runs:recursive-delete-with-error-msg real-dir) + (if (> (system (conc "rm -rf " real-dir)) 0) + (begin + ;; FAILED, possibly due to permissions, do chmod a+rwx then try one more time + (system (conc "chmod -R a+rwx " real-dir)) + (if (> (system (conc "rm -rf " real-dir)) 0) + (debug:print-error 0 *default-log-port* "There was a problem removing " real-dir " with rm -f"))))) + +(define (runs:safe-delete-test-dir real-dir) + ;; first delete all sub-directories + (directory-fold + (lambda (f x) + (let ((fullname (conc real-dir "/" f))) + (if (directory? fullname)(runs:recursive-delete-with-error-msg fullname))) + (+ 1 x)) + 0 real-dir) + ;; then files other than *testdat.db* + (directory-fold + (lambda (f x) + (let ((fullname (conc real-dir "/" f))) + (if (not (string-search (regexp "testdat.db") f)) + (runs:recursive-delete-with-error-msg fullname))) + (+ 1 x)) + 0 real-dir) + ;; then the entire directory + (runs:recursive-delete-with-error-msg real-dir)) + +;; cleanup often needs to remove all but the last N runs per target +;; +;; target-patts a1/b1/c1,a2/b2/c2 ... +;; +;; This will fail if called with empty target or a bad target (i.e. missing or extra fields) +;; +(define (runs:get-hash-by-target target-patts runpatt) + (let* ((targets (string-split target-patts ",")) + (keys (rmt:get-keys)) + (res-ht (make-hash-table))) ;; target -> ( runrecord1 runrecord2 ... ) + (for-each + (lambda (target-patt) + (let ((runs (rmt:simple-get-runs runpatt #f #f target-patt))) + (for-each + (lambda (run) + (let ((target (simple-run-target run))) + (hash-table-set! res-ht target (cons run (hash-table-ref/default res-ht target '()))))) + runs))) + targets) + res-ht)) + +;; delete runs older than X (weeks, days, months years etc.) +;; delete redundant runs within a target - N is the input +;; delete redundant runs within a target IFF older than given date/time AND keep at least N +;; +(define (runs:remove-all-but-last-n-runs-per-target target-patts runpatt num-to-keep #!key (actions '(print))) + (let* ((runs-ht (runs:get-hash-by-target target-patts runpatt)) + (age (if (args:get-arg "-age")(common:hms-string->seconds (args:get-arg "-age")) #f)) + (age-mark (if age (- (current-seconds) age) (+ (current-seconds) 86400))) + (precmd (or (args:get-arg "-precmd") ""))) + (print "Actions: " actions) + (for-each + (lambda (target) + (let* ((runs (hash-table-ref runs-ht target)) + (sorted (sort runs (lambda (a b)(< (simple-run-event_time a)(simple-run-event_time b))))) + (to-remove (let* ((len (length sorted)) + (trim-amt (- len num-to-keep))) + (if (> trim-amt 0) + (take sorted trim-amt) + '())))) + (hash-table-set! runs-ht target to-remove) + (print target ":") + (for-each + (lambda (run) + (let ((remove (member run to-remove (lambda (a b) + (eq? (simple-run-id a) + (simple-run-id b)))))) + (if (and age (> (simple-run-event_time run) age-mark)) + (print "Skipping handling of " target "/" (simple-run-runname run) " as it is younger than " (args:get-arg "-age")) + (for-each + (lambda (action) + (case action + ((print) + (print " " (simple-run-runname run) + " " (time->string (seconds->local-time (simple-run-event_time run)) "WW%V.%u %H:%M:%S") + " " (if remove "REMOVE" ""))) + ((remove-runs) + (if remove (system (conc precmd " megatest -remove-runs -target " target " -runname " (simple-run-runname run) " -testpatt %" + (if (member 'kill-runs actions) ;; if kill-runs is specified then set -kill-wait to 0 + " -kill-wait 0" + ""))))) + ((archive) + (if remove (system (conc precmd " megatest -archive save-remove -target " target " -runname " (simple-run-runname run) " -testpatt %")))) + ((kill-runs) + (if remove (system (conc precmd " megatest -kill-runs -target " target " -runname " (simple-run-runname run) " -testpatt %")))) + )) + actions)))) + sorted))) + ;; (print "Sorted: " (map simple-run-event_time sorted)) + ;; (print "Remove: " (map simple-run-event_time to-remove)))) + (hash-table-keys runs-ht)) + runs-ht)) + +;; (define (runs:remove-all-but-last-n-runs-per-target target-patts runpatt num-to-keep) +;; (let ((data (runs:get-all-but-most-recent-n-per-target target-patts runpatt num-to-keep))) +;; (for-each +;; (lambda (target) +;; (let ((runs-to-remove (hash-table-ref data target ))) +;; (for-each +;; (lambda (run) +;; (print "megatest -remove-runs -target " target " -runname " (simple-run-runname run) " -testpatt %")) +;; runs-to-remove))) +;; (hash-table-keys data)))) + +;; Remove runs +;; fields are passing in through +;; action: +;; 'remove-runs +;; 'set-state-status +;; +;; NB// should pass in keys? +;; +(define (runs:operate-on action target runnamepatt testpatt #!key (state #f)(status #f)(new-state-status #f)(mode #f)(options '())) + (common:clear-caches) ;; clear all caches + (let* ((db #f) + ;; (tdbdat (tasks:open-db)) + (keys (rmt:get-keys)) + (rundat (mt:get-runs-by-patt keys runnamepatt target)) + (header (vector-ref rundat 0)) + (runs (vector-ref rundat 1)) + (states (if state (string-split state ",") '())) + (statuses (if status (string-split status ",") '())) + (state-status (if (string? new-state-status) (string-split new-state-status ",") '(#f #f))) + (rp-mutex (make-mutex)) + (bup-mutex (make-mutex)) + (keep-records (args:get-arg "-keep-records"))) ;; used in conjunction with -remove-runs to keep the records, TODO: consolidate this with "mode". + + (let* ((write-access-actions '(remove-runs set-state-status archive run-wait kill-runs)) + (dbfile (conc *toppath* "/megatest.db")) + (readonly-mode (not (file-write-access? dbfile)))) + (when (and readonly-mode + (member action write-access-actions)) + (debug:print-error 0 *default-log-port* "megatest.db is readonly. Cannot proceed with action ["action"] in which write-access isrequired .") + (exit 1))) + + (debug:print-info 4 *default-log-port* "runs:operate-on => Header: " header " action: " action " new-state-status: " new-state-status) + (if (> 2 (length state-status)) + (begin + (debug:print-error 0 *default-log-port* "the parameter to -set-state-status is a comma delimited string. E.g. COMPLETED,FAIL") + (exit))) + (for-each + (lambda (run) + (let ((runkey (string-intersperse (map (lambda (k) + (db:get-value-by-header run header k)) keys) "/")) + (dirs-to-remove (make-hash-table)) + (proc-get-tests (lambda (run-id) + (mt:get-tests-for-run run-id + testpatt states statuses + not-in: #f + sort-by: (case action + ((remove-runs) 'rundir) + (else 'event_time)))))) + (let* ((run-id (db:get-value-by-header run header "id")) + (run-state (db:get-value-by-header run header "state")) + (run-name (db:get-value-by-header run header "runname")) + (tests (if (not (equal? run-state "locked")) + (proc-get-tests run-id) + '())) + (lasttpath "/does/not/exist/I/hope") + (worker-thread #f)) + (debug:print-info 4 *default-log-port* "runs:operate-on run=" run ", header=" header) + (if (not (null? tests)) + (begin + (case action + ((kill-runs) + (tasks:kill-runner target run-name "%") + (debug:print 1 *default-log-port* "Killing tests for run: " runkey " " (db:get-value-by-header run header "runname")) + ) + ((remove-runs) + ;; (if (tasks:need-server run-id)(tasks:start-and-wait-for-server tdbdat run-id 10)) + ;; seek and kill in flight -runtests with % as testpatt here + ;; (if (equal? testpatt "%") + (tasks:kill-runner target run-name testpatt) + ;; (debug:print 0 *default-log-port* "not attempting to kill any run launcher processes as testpatt is " testpatt)) + (debug:print 1 *default-log-port* "Removing tests for run: " runkey " " (db:get-value-by-header run header "runname"))) + ((set-state-status) + ;; (if (tasks:need-server run-id)(tasks:start-and-wait-for-server tdbdat run-id 10)) + (debug:print 1 *default-log-port* "Modifying state and staus for tests for run: " runkey " " (db:get-value-by-header run header "runname"))) + ((print-run) + (debug:print 1 *default-log-port* "Printing info for run " runkey ", run=" run ", tests=" tests ", header=" header) + action) + ((run-wait) + (debug:print 1 *default-log-port* "Waiting for run " runkey ", run=" runnamepatt " to complete")) + ((archive) + (debug:print 1 *default-log-port* "Archiving/restoring (" (args:get-arg "-archive") ") data for run: " runkey " " (db:get-value-by-header run header "runname")) + (set! worker-thread + (make-thread + (lambda () + (case (string->symbol (args:get-arg "-archive")) + ((save save-remove keep-html) + (archive:run-bup (args:get-arg "-archive") run-id run-name tests rp-mutex bup-mutex)) + ((restore) + (archive:bup-restore (args:get-arg "-archive") run-id run-name tests rp-mutex bup-mutex)) + (else + (debug:print-error 0 *default-log-port* "unrecognised sub command to -archive. Run \"megatest\" to see help") + (exit)))) + "archive-bup-thread")) + (thread-start! worker-thread)) + (else + (debug:print-info 0 *default-log-port* "action not recognised " action))) + + ;; actions that operate on one test at a time can be handled below + ;; + (let ((sorted-tests (filter + vector? + (sort tests (lambda (a b)(let ((dira ;; (rmt:sdb-qry 'getstr + (db:test-get-rundir a)) ;; ) ;; (filedb:get-path *fdb* (db:test-get-rundir a))) + (dirb ;; (rmt:sdb-qry 'getstr + (db:test-get-rundir b))) ;; ) ;; ((filedb:get-path *fdb* (db:test-get-rundir b)))) + (if (and (string? dira)(string? dirb)) + (> (string-length dira)(string-length dirb)) + #f)))))) + (toplevel-retries (make-hash-table)) ;; try three times to loop through and remove top level tests + (test-retry-time (make-hash-table)) + (backgrounded-remove-status (make-hash-table)) + (backgrounded-remove-last-visit (make-hash-table)) + (backgrounded-remove-result (make-hash-table)) + (allow-run-time (string->number (or (args:get-arg "-kill-wait") "10")))) ;; seconds to allow for killing tests before just brutally killing 'em + (let loop ((test (car sorted-tests)) + (tal (cdr sorted-tests))) + (let* ((test-id (db:test-get-id test)) + (new-test-dat (rmt:get-test-info-by-id run-id test-id))) + (if (not new-test-dat) + (begin + (debug:print-error 0 *default-log-port* "We have a test-id of " test-id " but no record was found. NOTE: No locking of records is done between processes, do not simultaneously remove the same run from two processes!") + (if (not (null? tal)) + (loop (car tal)(cdr tal)))) + (let* ((item-path (db:test-get-item-path new-test-dat)) + (test-name (db:test-get-testname new-test-dat)) + (run-dir ;;(filedb:get-path *fdb* + ;; (rmt:sdb-qry 'getid + (db:test-get-rundir new-test-dat)) ;; ) ;; run dir is from the link tree + (has-subrun (and (subrun:subrun-test-initialized? run-dir) + (not (subrun:subrun-removed? run-dir)))) + (test-state (db:test-get-state new-test-dat)) + (test-status (db:test-get-status new-test-dat)) + (test-fulln (db:test-get-fullname new-test-dat)) + (uname (db:test-get-uname new-test-dat)) + (toplevel-with-children (and (db:test-get-is-toplevel test) + (> (rmt:test-toplevel-num-items run-id test-name) 0)))) + + (case action + ((remove-runs) + ;; if the test is a toplevel-with-children issue an error and do not remove + (cond + (toplevel-with-children + (debug:print 0 *default-log-port* "WARNING: skipping removal of " test-fulln " with run-id " run-id " as it has sub tests") + (hash-table-set! toplevel-retries test-fulln (+ (hash-table-ref/default toplevel-retries test-fulln 0) 1)) + (if (> (hash-table-ref toplevel-retries test-fulln) 3) + (if (not (null? tal)) + (loop (car tal)(cdr tal))) ;; no else clause - drop it if no more in queue and > 3 tries + (let ((newtal (append tal (list test)))) + (loop (car newtal)(cdr newtal))))) ;; loop with test still in queue + (has-subrun + ;; + (let ((last-visit (hash-table-ref/default backgrounded-remove-last-visit test-fulln 0)) + (now (current-seconds)) + (rem-status (hash-table-ref/default backgrounded-remove-status test-fulln 'not-started))) + (case rem-status + ((not-started) + (debug:print 0 *default-log-port* "WARNING: postponing removal of " test-fulln " with run-id " run-id " as it has a subrun") + (hash-table-set! backgrounded-remove-status test-fulln 'started) + (hash-table-set! backgrounded-remove-last-visit test-fulln (current-seconds)) + (common:send-thunk-to-background-thread + (lambda () + (let* ((subrun-remove-succeeded + (subrun:remove-subrun run-dir keep-records))) + (hash-table-set! backgrounded-remove-result test-fulln subrun-remove-succeeded) + (hash-table-set! backgrounded-remove-status test-fulln 'done))) + name: (conc "remove-subrun:"test-fulln)) + + ;; send to back of line, loop + (let ((newtal (append tal (list test)))) + (loop (car newtal)(cdr newtal))) + ) + ((started) + ;; if last visit was within last second, sleep 1 second + (if (< (- now last-visit) 1.0) + (thread-sleep! 1.0)) + (hash-table-set! backgrounded-remove-last-visit test-fulln (current-seconds)) + ;; send to back of line, loop + (let ((newtal (append tal (list test)))) + (loop (car newtal)(cdr newtal))) + ) + ((done) + ;; drop this one; if remaining, loop, else finish + (hash-table-set! backgrounded-remove-last-visit test-fulln (current-seconds)) + (let ((subrun-remove-succeeded (hash-table-ref/default backgrounded-remove-result test-fulln 'exception))) + (cond + ((eq? subrun-remove-succeeded 'exception) + (let* ((logfile (subrun:get-log-path run-dir "remove"))) + (debug:print 0 *default-log-port* "ERROR: removing subrun of of " test-fulln " with run-id " run-id " ; see logfile @ "logfile)) + (if (not (null? tal)) + (loop (car tal)(cdr tal)))) + (subrun-remove-succeeded + (debug:print 0 *default-log-port* "Now removing of " test-fulln " with run-id " run-id " since subrun was removed.") + ;;(runs:remove-test-directory new-test-dat mode) ;; let normal case handle this. it will go thru loop again as non-subrun + (let ((newtal (append tal (list test)))) + (loop (car newtal)(cdr newtal)))) + (else + (let* ((logfile (subrun:get-log-path run-dir "remove"))) + (debug:print 0 *default-log-port* "WARNING: removal of subrun failed. Please check "logfile" for details.")) + ;; send to back of line, loop (will not match has-subrun next time through) + (if (not (null? tal)) + (loop (car tal)(cdr tal)))))) + ) + ) ; end case rem-status + ) ; end let + ); end cond has-subrun + + (else + ;; BB - TODO - consider backgrounding to threads to delete tests (work below) + (debug:print-info 0 *default-log-port* "test: " test-name " itest-state: " test-state) + (if (member test-state (list "RUNNING" "LAUNCHED" "REMOTEHOSTSTART" "KILLREQ")) + (begin + (if (not (hash-table-ref/default test-retry-time test-fulln #f)) + (begin + ;; want to set to REMOVING BUT CANNOT do it here? + (hash-table-set! test-retry-time test-fulln (current-seconds)))) + (if (> (- (current-seconds)(hash-table-ref test-retry-time test-fulln)) allow-run-time) + ;; This test is not in a correct state for cleaning up. Let's try some graceful shutdown steps first + ;; Set the test to "KILLREQ" and wait five seconds then try again. Repeat up to five times then give + ;; up and blow it away. + (begin + (debug:print 0 *default-log-port* "WARNING: could not gracefully remove test " test-fulln ", tried to kill it to no avail. Forcing state to FAILEDKILL and continuing") + (mt:test-set-state-status-by-id run-id (db:test-get-id test) "FAILEDKILL" "n/a" #f) + (thread-sleep! 1)) + (begin + (mt:test-set-state-status-by-id run-id (db:test-get-id test) "KILLREQ" "n/a" #f) + (thread-sleep! 1))) + ;; NOTE: This is suboptimal as the testdata will be used later and the state/status may have changed ... + (if (null? tal) + (loop new-test-dat tal) + (loop (car tal)(append tal (list new-test-dat))))) + (begin + (runs:remove-test-directory new-test-dat mode) ;; 'remove-all) + (if (not (null? tal)) + (loop (car tal)(cdr tal))))))) + (rmt:update-run-stats run-id (rmt:get-raw-run-stats run-id))) + ((kill-runs) + ;; RUNNING -> KILLREQ + ;; LAUNCHED,RUNNING,REMOTEHOSTSTART -> NOT STARTED + (cond + ((and has-subrun (member test-state (list "RUNNING" "LAUNCHED" "REMOTEHOSTSTART" "KILLREQ"))) + (common:send-thunk-to-background-thread + (lambda () + (let* ((subrun-remove-succeeded + (subrun:kill-subrun run-dir keep-records))) + #t))) + (if (not (null? tal)) + (loop (car tal)(cdr tal))) + ) + ((member test-state (list "RUNNING" "LAUNCHED" "REMOTEHOSTSTART" "KILLREQ")) + (debug:print 1 *default-log-port* "INFO: issuing killreq to test "test-fulln) + (mt:test-set-state-status-by-id run-id (db:test-get-id test) "KILLREQ" "n/a" #f) + (if (not (null? tal)) + (loop (car tal)(cdr tal)))) + ((and (member test-status '("PREQ_FAIL" "PREQ_DISCARDED" "BLOCKED" "ZERO_ITEMS" "KEEP_TRYING" "TEN_STRIKES" "TIMED_OUT"))) + (rmt:set-state-status-and-roll-up-items run-id (db:test-get-id test) 'foo "NOT_STARTED" "n/a" (conc "kill-run moved from "test-state":"test-status" to NOT_STARTED:n/a")) + ;;(mt:test-set-state-status-by-id run-id (db:test-get-id test) "NOT_STARTED" "n/a" (conc "kill-run moved from "test-state":"test-status" to NOT_STARTED:n/a")) + (if (not (null? tal)) + (loop (car tal)(cdr tal))) + ) + (else + (if (not (null? tal)) + (loop (car tal)(cdr tal))) + ))) + ((set-state-status) + (let* ((new-state (car state-status)) + (new-status (cadr state-status)) + (test-id (db:test-get-id test)) + (test-run-dir (db:test-get-rundir new-test-dat)) + (has-subrun (and (subrun:subrun-test-initialized? test-run-dir) + (not (subrun:subrun-removed? test-run-dir))))) + (when has-subrun + (common:send-thunk-to-background-thread + (lambda () + (subrun:set-state-status test-run-dir state status new-state-status) + ) + ) + ) + (debug:print-info 2 *default-log-port* "new state " new-state ", new status " new-status ) + (mt:test-set-state-status-by-id run-id test-id new-state new-status #f)) + (if (not (null? tal)) + (loop (car tal)(cdr tal)))) + ((run-wait) + ;; BB TODO - manage has-subrun case + (debug:print-info 2 *default-log-port* "still waiting, " (length tests) " tests still running") + (thread-sleep! 10) + (let ((new-tests (proc-get-tests run-id))) + (if (null? new-tests) + (debug:print-info 1 *default-log-port* "Run completed according to zero tests matching provided criteria.") + (loop (car new-tests)(cdr new-tests))))) + ((archive) + ;; BB TODO - manage has-subrun case + (if (and run-dir (not toplevel-with-children)) + (let ((ddir (conc run-dir "/"))) + (case (string->symbol (args:get-arg "-archive")) + ((save save-remove keep-html) + (if (common:file-exists? ddir) + (debug:print-info 0 *default-log-port* "Estimating disk space usage for " test-fulln ": " (common:get-disk-space-used ddir))))))) + (if (not (null? tal)) + (loop (car tal)(cdr tal)))) + ))) + ) + (if worker-thread (thread-join! worker-thread))) + (common:join-backgrounded-threads)))) + ;; remove the run if zero tests remain + (if (eq? action 'remove-runs) + (let* ((run-id (db:get-value-by-header run header "id")) ;; NB// masks run-id from above? + (remtests (mt:get-tests-for-run run-id #f '("DELETED") '("n/a") not-in: #t))) + (if (null? remtests) ;; no more tests remaining + (let* ((dparts (string-split lasttpath "/")) + (runpath (conc "/" (string-intersperse + (take dparts (- (length dparts) 1)) + "/")))) + (debug:print 1 *default-log-port* "Removing run: " runkey " " (db:get-value-by-header run header "runname") " and related record") + (if (not keep-records) + (begin + (rmt:delete-run run-id) + (rmt:delete-old-deleted-test-records))) + ;; (rmt:set-var "DELETED_TESTS" (current-seconds)) + ;; need to figure out the path to the run dir and remove it if empty + ;; (if (null? (glob (conc runpath "/*"))) + ;; (begin + ;; (debug:print 1 *default-log-port* "Removing run dir " runpath) + ;; (system (conc "rmdir -p " runpath)))) + ))))) + )) + runs) + ;; (sqlite3:finalize! (db:delay-if-busy tdbdat)) + ) + #t) + +(define (runs:remove-test-directory test mode) ;; remove-data-only) + (let* ((run-dir (db:test-get-rundir test)) ;; run dir is from the link tree + (real-dir (if (common:file-exists? run-dir) + ;; (resolve-pathname run-dir) + (common:nice-path run-dir) + #f)) + (clean-mode (or mode 'remove-all)) + (test-id (db:test-get-id test)) + ;; (lock-key (conc "test-" test-id)) + ;; (got-lock (let loop ((lock (rmt:no-sync-get-lock lock-key)) + ;; (expire-time (+ (current-seconds) 30))) ;; give up on getting the lock and steal it after 15 seconds + ;; (if (car lock) + ;; #t + ;; (if (> (current-seconds) expire-time) + ;; (begin + ;; (debug:print-info 0 *default-log-port* "Timed out waiting for a lock to clean test with id " test-id) + ;; (rmt:no-sync-del! lock-key) ;; destroy the lock + ;; (loop (rmt:no-sync-get-lock lock-key) expire-time)) ;; + ;; (begin + ;; (thread-sleep! 1) + ;; (loop (rmt:no-sync-get-lock lock-key) expire-time))))))) + ) + (case clean-mode + ((remove-data-only)(mt:test-set-state-status-by-id (db:test-get-run_id test)(db:test-get-id test) "CLEANING" "LOCKED" #f)) + ((remove-all) (mt:test-set-state-status-by-id (db:test-get-run_id test)(db:test-get-id test) "REMOVING" "LOCKED" #f)) + ((archive-remove) (mt:test-set-state-status-by-id (db:test-get-run_id test)(db:test-get-id test) "ARCHIVE_REMOVING" #f #f))) + (debug:print-info 1 *default-log-port* "Attempting to remove " (if real-dir (conc " dir " real-dir " and ") "") " link " run-dir) + (if (and real-dir + (> (string-length real-dir) 5) + (common:file-exists? real-dir)) ;; bad heuristic but should prevent /tmp /home etc. + (begin ;; let* ((realpath (resolve-pathname run-dir))) + (debug:print-info 1 *default-log-port* "Recursively removing " real-dir) + (if (common:file-exists? real-dir) + (runs:safe-delete-test-dir real-dir) + (debug:print 0 *default-log-port* "WARNING: test dir " real-dir " appears to not exist or is not readable"))) + (if real-dir + (debug:print 0 *default-log-port* "WARNING: directory " real-dir " does not exist") + (debug:print 0 *default-log-port* "WARNING: no real directory corrosponding to link " run-dir ", nothing done"))) + (if (symbolic-link? run-dir) + (begin + (debug:print-info 1 *default-log-port* "Removing symlink " run-dir) + (handle-exceptions + exn + (debug:print-error 0 *default-log-port* " Failed to remove symlink " run-dir ((condition-property-accessor 'exn 'message) exn) ", attempting to continue") + (delete-file run-dir))) + (if (directory? run-dir) + (if (> (directory-fold (lambda (f x)(+ 1 x)) 0 run-dir) 0) + (debug:print 0 *default-log-port* "WARNING: refusing to remove " run-dir " as it is not empty") + (handle-exceptions + exn + (debug:print-error 0 *default-log-port* " Failed to remove directory " run-dir ((condition-property-accessor 'exn 'message) exn) ", attempting to continue") + (delete-directory run-dir))) + (if (and run-dir + (not (member run-dir (list "n/a" "/tmp/badname")))) + (debug:print 0 *default-log-port* "WARNING: not removing " run-dir " as it either doesn't exist or is not a symlink") + (debug:print 0 *default-log-port* "NOTE: the run dir for this test is undefined. Test may have already been deleted.")) + )) + ;; Only delete the records *after* removing the directory. If things fail we have a record + (case clean-mode + ((remove-data-only)(mt:test-set-state-status-by-id (db:test-get-run_id test)(db:test-get-id test) (db:test-get-state test)(db:test-get-status test) #f)) + ((archive-remove) (mt:test-set-state-status-by-id (db:test-get-run_id test)(db:test-get-id test) "ARCHIVED" #f #f)) + (else (rmt:delete-test-records (db:test-get-run_id test) (db:test-get-id test)))) + ;; (rmt:no-sync-del! lock-key) + )) + +;;====================================================================== +;; Routines for manipulating runs +;;====================================================================== + +;; Since many calls to a run require pretty much the same setup +;; this wrapper is used to reduce the replication of code +(define (general-run-call switchname action-desc proc) + (let ((runname (or (args:get-arg "-runname")(args:get-arg ":runname"))) + (target (common:args-get-target))) + (cond + ((not target) + (debug:print-error 0 *default-log-port* "Missing required parameter for " switchname ", you must specify the target with -target") + (exit 3)) + ((not runname) + (debug:print-error 0 *default-log-port* "Missing required parameter for " switchname ", you must specify the run name with -runname runname") + (exit 3)) + (else + (let (;; (db #f) + (keys #f)) + (if (launch:setup) + (begin + (full-runconfigs-read) ;; cache the run config + ;; (launch:cache-config) ;; there are two independent config cache locations, turning this one off for now. MRW. + ) ;; do not cache here - need to be sure runconfigs is processed + (begin + (debug:print 0 *default-log-port* "Failed to setup, exiting") + (exit 1))) + + + (set! keys (keys:config-get-fields *configdat*)) + ;; have enough to process -target or -reqtarg here + (if (args:get-arg "-reqtarg") + (let* ((runconfigf (conc *toppath* "/runconfigs.config")) ;; DO NOT EVALUATE ALL + (runconfig (configf:read-config runconfigf #f #t environ-patt: #f))) + (if (hash-table-ref/default runconfig (args:get-arg "-reqtarg") #f) + (keys:target-set-args keys (args:get-arg "-reqtarg") args:arg-hash) + + (begin + (debug:print-error 0 *default-log-port* "[" (args:get-arg "-reqtarg") "] not found in " runconfigf) + ;; (if db (sqlite3:finalize! db)) + (exit 1) + ))) + (if (args:get-arg "-target") + (keys:target-set-args keys (args:get-arg "-target" args:arg-hash) args:arg-hash))) + (if (not (car *configinfo*)) + (begin + (debug:print-error 0 *default-log-port* "Attempted to " action-desc " but run area config file not found") + (exit 1)) + ;; Extract out stuff needed in most or many calls + ;; here then call proc + (let* ((keyvals (keys:target->keyval keys target))) + (proc target runname keys keyvals))) + ;; (if db (sqlite3:finalize! db)) + (set! *didsomething* #t)))))) + +;;====================================================================== +;; Lock/unlock runs +;;====================================================================== + +(define (runs:handle-locking target keys runname lock unlock user) + (let* ((db #f) + (rundat (mt:get-runs-by-patt keys runname target)) + (header (vector-ref rundat 0)) + (runs (vector-ref rundat 1))) + (for-each (lambda (run) + (let ((run-id (db:get-value-by-header run header "id")) + (str (if lock + "lock" + "unlock"))) + (if (or lock + (and unlock + (or (args:get-arg "-force") + (begin + (print "Do you really wish to unlock run " run-id "?\n y/n: ") + (equal? "y" (read-line)))))) + (begin + (rmt:lock/unlock-run run-id lock unlock user) + (debug:print-info 0 *default-log-port* "Done " str " on run id " run-id)) + (debug:print-info 0 *default-log-port* "Skipping lock/unlock on " run-id)))) + runs))) +;;====================================================================== +;; Rollup runs +;;====================================================================== + +;; Update the test_meta table for this test +(define (runs:update-test_meta test-name test-conf) + (let ((currrecord (rmt:testmeta-get-record test-name))) + (if (not currrecord) + (begin + (set! currrecord (make-vector 11 #f)) + (rmt:testmeta-add-record test-name))) + (for-each + (lambda (key) + (let* ((idx (cadr key)) + (fld (car key)) + (val (configf:lookup test-conf "test_meta" fld))) + ;; (debug:print 5 *default-log-port* "idx: " idx " fld: " fld " val: " val) + (if (and val (not (equal? (vector-ref currrecord idx) val))) + (begin + (print "Updating " test-name " " fld " to " val) + (rmt:testmeta-update-field test-name fld val))))) + '(("author" 2)("owner" 3)("description" 4)("reviewed" 5)("tags" 9)("jobgroup" 10))))) + +;; find tests with matching tags, tagpatt is a string "tagpatt1,tagpatt2%, ..." +;; +(define (runs:get-tests-matching-tags tagpatt) + (let* ((tagdata (rmt:get-tests-tags)) + (res '())) ;; list of tests that match one or more tags + (for-each + (lambda (row) + (let* ((tag (car row)) + (tests (cdr row))) + (if (patt-list-match tag tagpatt) + (set! res (append tests res))))) + tagdata) + res)) + + +;; Update test_meta for all tests +(define (runs:update-all-test_meta db) + (let ((test-names (tests:get-all))) ;; (tests:get-valid-tests))) + (for-each + (lambda (test-name) + (let* ((test-conf (mt:lazy-read-test-config test-name))) + (if test-conf (runs:update-test_meta test-name test-conf)))) + (hash-table-keys test-names)))) + +;; This could probably be refactored into one complex query ... +;; NOT PORTED - DO NOT USE YET +;; +#;(define (runs:rollup-run keys runname user keyvals) + (debug:print 4 *default-log-port* "runs:rollup-run, keys: " keys " -runname " runname " user: " user) + (let* ((db #f) + ;; register run operates on the main db + (new-run-id (rmt:register-run keyvals runname "new" "n/a" user (args:get-arg "-contour"))) + (prev-tests (rmt:get-matching-previous-test-run-records new-run-id "%" "%")) + (curr-tests (mt:get-tests-for-run new-run-id "%/%" '() '())) + (curr-tests-hash (make-hash-table))) + (rmt:update-run-event_time new-run-id) + ;; index the already saved tests by testname and itemdat in curr-tests-hash + (for-each + (lambda (testdat) + (let* ((testname (db:test-get-testname testdat)) + (item-path (db:test-get-item-path testdat)) + (full-name (conc testname "/" item-path))) + (hash-table-set! curr-tests-hash full-name testdat))) + curr-tests) + ;; NOPE: Non-optimal approach. Try this instead. + ;; 1. tests are received in a list, most recent first + ;; 2. replace the rollup test with the new *always* + (for-each + (lambda (testdat) + (let* ((testname (db:test-get-testname testdat)) + (item-path (db:test-get-item-path testdat)) + (full-name (conc testname "/" item-path)) + (prev-test-dat (hash-table-ref/default curr-tests-hash full-name #f)) + (test-steps (rmt:get-steps-for-test (db:test-get-id testdat))) + (new-test-record #f)) + ;; replace these with insert ... select + (apply sqlite3:execute + db + (conc "INSERT OR REPLACE INTO tests (run_id,testname,state,status,event_time,host,cpuload,diskfree,uname,rundir,item_path,run_duration,final_logf,comment) " + "VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?);") + new-run-id (cddr (vector->list testdat))) + (set! new-testdat (car (mt:get-tests-for-run new-run-id (conc testname "/" item-path) '() '()))) + (hash-table-set! curr-tests-hash full-name new-testdat) ;; this could be confusing, which record should go into the lookup table? + ;; Now duplicate the test steps + (debug:print 4 *default-log-port* "Copying records in test_steps from test_id=" (db:test-get-id testdat) " to " (db:test-get-id new-testdat)) + (cdb:remote-run ;; to be replaced, note: this routine is not used currently + (lambda () + (sqlite3:execute + db + (conc "INSERT OR REPLACE INTO test_steps (test_id,stepname,state,status,event_time,comment) " + "SELECT " (db:test-get-id new-testdat) ",stepname,state,status,event_time,comment FROM test_steps WHERE test_id=?;") + (db:test-get-id testdat)) + ;; Now duplicate the test data + (debug:print 4 *default-log-port* "Copying records in test_data from test_id=" (db:test-get-id testdat) " to " (db:test-get-id new-testdat)) + (sqlite3:execute + db + (conc "INSERT OR REPLACE INTO test_data (test_id,category,variable,value,expected,tol,units,comment) " + "SELECT " (db:test-get-id new-testdat) ",category,variable,value,expected,tol,units,comment FROM test_data WHERE test_id=?;") + (db:test-get-id testdat)))) + )) + prev-tests))) + + +;; clean cache files +(define (runs:clean-cache target runname toppath) + (if target + (if runname + (let* ((linktree (common:get-linktree)) ;; (if toppath (configf:lookup *configdat* "setup" "linktree"))) + (runtop (conc linktree "/" target "/" runname)) + (files (if (common:file-exists? runtop) + (append (glob (conc runtop "/.megatest*")) + (glob (conc runtop "/.runconfig*"))) + '()))) + (if (null? files) + (debug:print-info 0 *default-log-port* "No cached megatest or runconfigs files found. None removed.") + (begin + (debug:print-info 0 *default-log-port* "Removing cached files:\n " (string-intersperse files "\n ")) + (for-each + (lambda (f) + (handle-exceptions + exn + (debug:print 0 *default-log-port* "WARNING: Failed to remove file " f) + (delete-file f))) + files)))) + (debug:print-error 0 *default-log-port* "-clean-cache requires -runname.")) + (debug:print-error 0 *default-log-port* "-clean-cache requires -target or -reqtarg"))) + + + +;; Spec for End of test +;; At end of each test call, after marking self as COMPLETED do run-state-status-rollup +;; At transition to run COMPLETED/X do hooks +;; Definition: test_dead if event_time + duration + 1 minute? < current_time AND +;; we can prove the process is not alive (ssh host pstree -A pid) +;; if dead safe to mark the test as killed in the db +;; State/status table +;; new +;; 100% COMPLETED/ (PASS,FAIL,ABORT etc.) ==> COMPLETED / X where X is same as itemized rollup +;; > 3 RUNNING with not test_dead do nothing (run should already be RUNNING/ na +;; > 0 RUNNING and test_dead then send KILLREQ ==> COMPLETED +;; 0 RUNNING ==> this is actually the first condition, should not get here + +(define (launch:end-of-run-check run-id ) + (let* ((not-completed-cnt (rmt:get-not-completed-cnt run-id)) + (running-cnt (rmt:get-count-tests-running-for-run-id run-id)) + (all-test-launched (rmt:get-var (conc "lunch-complete-" run-id))) + (current-state (rmt:get-run-state run-id)) + (current-status (rmt:get-run-status run-id))) + ;;get-vars run-id to query metadata table to check if all completed. if all-test-launched = yes then only not-completed-cnt = 0 means everyting is completed if no entry found in the table do nothing + (debug:print 0 *default-log-port* "rollup run state/status") + (rmt:set-state-status-and-roll-up-run run-id current-state current-status) + + (cond + ((and all-test-launched (eq? not-completed-cnt 0) (equal? all-test-launched "yes" )) + (debug:print 0 *default-log-port* "look for post hook.") + (runs:run-post-hook run-id)) + ((> running-cnt 3) + (debug:print 0 *default-log-port* "There are " running-cnt " tests running." )) + ((> running-cnt 0) + (debug:print 0 *default-log-port* "running cnt > 0 but <= 3 kill-running-tests-if-dead" ) + (let ((kill-cnt (launch:kill-tests-if-dead run-id))) + (if (and all-test-launched (equal? all-test-launched "yes") (eq? kill-cnt running-cnt)) + (launch:end-of-run-check run-id)))) ;;todo + (else (debug:print 0 *default-log-port* "Should it get here?? May be everything is not launched yet. Running test cnt:" running-cnt " Not completed test cnt:" not-completed-cnt) + (let* ((not-completed-tests (rmt:get-tests-for-run run-id "%" `("NOT_STARTED" "RUNNING" "LAUNCHED" "REMOTEHOSTSTART") `() #f #f #f #f #f #f #f #f))) + (if (> (length not-completed-tests) 0) + (let loop ((running-test (car not-completed-tests)) + (tal (cdr not-completed-tests))) + (let* ((test-name (vector-ref running-test 2)) + (item-path (vector-ref running-test 11))) + (debug:print 0 *default-log-port* "test " test-name "/" item-path " not completed") + (if (not (null? tal)) + (loop (car tal) (cdr tal))))))))))) + +;; This is the Megatest API. All generally "useful" routines will be wrapped or extended +;; here. + +;; 0 1 2 3 +(defstruct launch:einf (pid #t)(exit-status #t)(exit-code #t)(rollup-status 0)) + +;;====================================================================== +;; R U N S +;;====================================================================== + +;; runs:get-runs-by-patt +;; get runs by list of criteria +;; register a test run with the db +;; +;; Use: (db-get-value-by-header (db:get-header runinfo)(db:get-rows runinfo)) +;; to extract info from the structure returned +;; +(define (mt:get-runs-by-patt keys runnamepatt targpatt) + (let loop ((runsdat (rmt:get-runs-by-patt keys runnamepatt targpatt 0 500 #f 0)) + (res '()) + (offset 0) + (limit 500)) + ;; (print "runsdat: " runsdat) + (let* ((header (vector-ref runsdat 0)) + (runslst (vector-ref runsdat 1)) + (full-list (append res runslst)) + (have-more (eq? (length runslst) limit))) + ;; (debug:print 0 *default-log-port* "header: " header " runslst: " runslst " have-more: " have-more) + (if have-more + (let ((new-offset (+ offset limit)) + (next-batch (rmt:get-runs-by-patt keys runnamepatt targpatt offset limit #f 0))) + (debug:print-info 4 *default-log-port* "More than " limit " runs, have " (length full-list) " runs so far.") + (debug:print-info 0 *default-log-port* "next-batch: " next-batch) + (loop next-batch + full-list + new-offset + limit)) + (vector header full-list))))) + +;;====================================================================== +;; T E S T S +;;====================================================================== + +(define (mt:get-tests-for-run run-id testpatt states status #!key (not-in #t) (sort-by 'event_time) (sort-order "ASC") (qryvals #f)(last-update #f)) + (let loop ((testsdat (rmt:get-tests-for-run run-id testpatt states status 0 500 not-in sort-by sort-order qryvals last-update 'normal)) + (res '()) + (offset 0) + (limit 500)) + (let* ((full-list (append res testsdat)) + (have-more (eq? (length testsdat) limit))) + (if have-more + (let ((new-offset (+ offset limit))) + (debug:print-info 4 *default-log-port* "More than " limit " tests, have " (length full-list) " tests so far.") + (loop (rmt:get-tests-for-run run-id testpatt states status new-offset limit not-in sort-by sort-order qryvals last-update 'normal) + full-list + new-offset + limit)) + full-list)))) + +(define (mt:lazy-get-prereqs-not-met run-id waitons ref-item-path #!key (mode '(normal))(itemmaps #f) ) + (let* ((key (list run-id waitons ref-item-path mode)) + (res (hash-table-ref/default *pre-reqs-met-cache* key #f)) + (useres (let ((last-time (if (vector? res) (vector-ref res 0) #f))) + (if last-time + (< (current-seconds)(+ last-time 5)) + #f)))) + (if useres + (let ((result (vector-ref res 1))) + (debug:print 4 *default-log-port* "Using lazy value res: " result) + result) + (let ((newres (rmt:get-prereqs-not-met run-id waitons ref-item-path mode: mode itemmaps: itemmaps))) + (hash-table-set! *pre-reqs-met-cache* key (vector (current-seconds) newres)) + newres)))) + +(define (mt:get-run-stats dbstruct run-id) +;; Get run stats from local access, move this ... but where? + (db:get-run-stats dbstruct run-id)) + +(define (mt:discard-blocked-tests run-id failed-test tests test-records) + (if (null? tests) + tests + (begin + (debug:print-info 1 *default-log-port* "Discarding tests from " tests " that are waiting on " failed-test) + (let loop ((testn (car tests)) + (remt (cdr tests)) + (res '())) + (let* ((test-dat (hash-table-ref/default test-records testn (vector #f #f '()))) + (waitons (vector-ref test-dat 2))) + ;; (print "mt:discard-blocked-tests run-id: " run-id " failed-test: " failed-test " testn: " testn " with waitons: " waitons) + (if (null? remt) + (let ((new-res (reverse res))) + ;; (print " new-res: " new-res) + new-res) + (loop (car remt) + (cdr remt) + (if (member failed-test waitons) + (begin + (debug:print 0 *default-log-port* "Discarding test " testn "(" test-dat ") due to " failed-test) + res) + (cons testn res))))))))) + +;;====================================================================== +;; S T A T E A N D S T A T U S F O R T E S T S +;;====================================================================== + +;; speed up for common cases with a little logic +(define (mt:test-set-state-status-by-id run-id test-id newstate newstatus newcomment) + (if (not (and run-id test-id)) + (begin + (debug:print-error 0 *default-log-port* "bad data handed to mt:test-set-state-status-by-id, run-id=" run-id ", test-id=" test-id ", newstate=" newstate) + (print-call-chain (current-error-port)) + #f) + (begin + ;; cond + ;; ((and newstate newstatus newcomment) + ;; (rmt:general-call 'state-status-msg run-id newstate newstatus newcomment test-id)) + ;; ((and newstate newstatus) + ;; (rmt:general-call 'state-status run-id newstate newstatus test-id)) + ;; (else + ;; (if newstate (rmt:general-call 'set-test-state run-id newstate test-id)) + ;; (if newstatus (rmt:general-call 'set-test-status run-id newstatus test-id)) + ;; (if newcomment (rmt:general-call 'set-test-comment run-id newcomment test-id)))) + (rmt:set-state-status-and-roll-up-items run-id test-id #f newstate newstatus newcomment) + ;; (mt:process-triggers run-id test-id newstate newstatus) + #t))) + + +(define (mt:test-set-state-status-by-id-unless-completed run-id test-id newstate newstatus newcomment) + (let* ((test-vec (rmt:get-testinfo-state-status run-id test-id)) + (state (vector-ref test-vec 3))) + (if (equal? state "COMPLETED") + #t + (rmt:set-state-status-and-roll-up-items run-id test-id #f newstate newstatus newcomment)))) + + +(define (mt:test-set-state-status-by-testname run-id test-name item-path new-state new-status new-comment) + ;(let ((test-id (rmt:get-test-id run-id test-name item-path))) + (rmt:set-state-status-and-roll-up-items run-id test-name item-path new-state new-status new-comment) + ;; (mt:process-triggers run-id test-id new-state new-status) + #t);) + ;;(mt:test-set-state-status-by-id run-id test-id new-state new-status new-comment))) + +(define (mt:test-set-state-status-by-testname-unless-completed run-id test-name item-path new-state new-status new-comment) + (let ((test-id (rmt:get-test-id run-id test-name item-path))) + (mt:test-set-state-status-by-id-unless-completed run-id test-id new-state new-status new-comment))) + +;; kill any runner processes (i.e. processes handling -runtests) that match target/runname +;; +;; do a remote call to get the task queue info but do the killing as self here. +;; +(define (tasks:kill-runner target run-name testpatt) + (let ((records (rmt:tasks-find-task-queue-records target run-name testpatt "running" "run-tests")) + (hostpid-rx (regexp "\\s+(\\w+)\\s+(\\d+)$"))) ;; host pid is at end of param string + (if (null? records) + (debug:print 0 *default-log-port* "No run launching processes found for " target " / " run-name " with testpatt " (or testpatt "* no testpatt specified! *")) + (debug:print 0 *default-log-port* "Found " (length records) " run(s) to kill.")) + (for-each + (lambda (record) + (let* ((param-key (list-ref record 8)) + (match-dat (string-search hostpid-rx param-key))) + (if match-dat + (let ((hostname (cadr match-dat)) + (pid (string->number (caddr match-dat)))) + (debug:print 0 *default-log-port* "Sending SIGINT to process " pid " on host " hostname) + (if (equal? (get-host-name) hostname) + (if (process:alive? pid) + (begin + (handle-exceptions + exn + (begin + (debug:print 0 *default-log-port* "Kill of process " pid " on host " hostname " failed.") + (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) + #t) + (process-signal pid signal/int) + (thread-sleep! 5) + (if (process:alive? pid) + (process-signal pid signal/kill))))) + ;; (call-with-environment-variables + (let ((old-targethost (getenv "TARGETHOST"))) + (setenv "TARGETHOST" hostname) + (setenv "TARGETHOST_LOGF" "server-kills.log") + (system (conc "nbfake kill " pid)) + (if old-targethost (setenv "TARGETHOST" old-targethost)) + (unsetenv "TARGETHOST") + (unsetenv "TARGETHOST_LOGF")))) + (debug:print-error 0 *default-log-port* "no record or improper record for " target "/" run-name " in tasks_queue in main.db")))) + records))) + +(define (task:get-run-times) + (let* ( + (run-patt (if (args:get-arg "-run-patt") + (args:get-arg "-run-patt") + "%")) + (target-patt (if (args:get-arg "-target-patt") + (args:get-arg "-target-patt") + "%")) + + (run-times (rmt:get-run-times run-patt target-patt ))) + (if (eq? (length run-times) 0) + (begin + (print "Data not found!!") + (exit))) + (if (equal? (args:get-arg "-dumpmode") "json") + (task:print-runtime-as-json run-times) + (if (equal? (args:get-arg "-dumpmode") "csv") + (task:print-runtime run-times ",") + (task:print-runtime run-times " "))))) + + (define (task:get-test-times) + (let* ((runname (if (args:get-arg "-runname") + (args:get-arg "-runname") + #f)) + (target (if (args:get-arg "-target") + (args:get-arg "-target") + #f)) + + (test-times (rmt:get-test-times runname target ))) + (if (not runname) + (begin + (print "Error: Missing argument -runname") + (exit))) + (if (string-contains runname "%") + (begin + (print "Error: Invalid runname, '%' not allowed (" runname ") ") + (exit))) + (if (not target) + (begin + (print "Error: Missing argument -target") + (exit))) + (if (string-contains target "%") + (begin + (print "Error: Invalid target, '%' not allowed (" target ") ") + (exit))) + + (if (eq? (length test-times) 0) + (begin + (print "Data not found!!") + (exit))) + (if (equal? (args:get-arg "-dumpmode") "json") + (task:print-testtime-as-json test-times) + (if (equal? (args:get-arg "-dumpmode") "csv") + (task:print-testtime test-times ",") + (task:print-testtime test-times " "))))) + + + +;; gets mtpg-run-id and syncs the record if different +;; +(define (tasks:run-id->mtpg-run-id dbh cached-info run-id area-info smallest-last-update-time) + (let* ((runs-ht (hash-table-ref cached-info 'runs)) + (runinf (hash-table-ref/default runs-ht run-id #f)) + (area-id (vector-ref area-info 0))) + (if runinf + runinf ;; already cached + (let* ((run-dat (rmt:get-run-info run-id)) ;; NOTE: get-run-info returns a vector < row header > + (run-name (rmt:get-run-name-from-id run-id)) + (row (db:get-rows run-dat)) ;; yes, this returns a single row + (header (db:get-header run-dat)) + (state (db:get-value-by-header row header "state")) + (status (db:get-value-by-header row header "status")) + (owner (db:get-value-by-header row header "owner")) + (event-time (db:get-value-by-header row header "event_time")) + (comment (db:get-value-by-header row header "comment")) + (fail-count (db:get-value-by-header row header "fail_count")) + (pass-count (db:get-value-by-header row header "pass_count")) + (db-contour (db:get-value-by-header row header "contour")) + (contour (if (args:get-arg "-prepend-contour") + (if (and db-contour (not (equal? db-contour "")) (string? db-contour )) + (begin + (debug:print-info 1 *default-log-port* "db-contour") + db-contour) + (args:get-arg "-contour")))) + (run-tag (if (args:get-arg "-run-tag") + (args:get-arg "-run-tag") + "")) + (last-update (db:get-value-by-header row header "last_update")) + (keytarg (if (or (args:get-arg "-prepend-contour") (args:get-arg "-prefix-target")) + (conc "MT_CONTOUR/MT_AREA/" (string-intersperse (rmt:get-keys) "/")) (string-intersperse (rmt:get-keys) "/"))) ;; e.g. version/iteration/platform + (target (if (or (args:get-arg "-prepend-contour") (args:get-arg "-prefix-target")) + (conc (or (args:get-arg "-prefix-target") (conc contour "/" (common:get-area-name) "/")) (rmt:get-target run-id)) (rmt:get-target run-id))) ;; e.g. v1.63/a3e1/ubuntu + (spec-id (pgdb:get-ttype dbh keytarg)) + (publish-time (if (args:get-arg "-cp-eventtime-to-publishtime") + event-time + (current-seconds))) + (new-run-id (pgdb:get-run-id dbh spec-id target run-name area-id))) + (if new-run-id + (begin ;; let ((run-record (pgdb:get-run-info dbh new-run-id)) + (hash-table-set! runs-ht run-id new-run-id) + ;; ensure key fields are up to date + ;; if last_update == pgdb_last_update do not update smallest-last-update-time + (let* ((pgdb-last-update (pgdb:get-run-last-update dbh new-run-id)) + (smallest-time (hash-table-ref/default smallest-last-update-time "smallest-time" #f))) + (if (and (> last-update pgdb-last-update) (or (not smallest-time) (< last-update smallest-time))) + (hash-table-set! smallest-last-update-time "smallest-time" last-update))) + (pgdb:refresh-run-info + dbh + new-run-id + state status owner event-time comment fail-count pass-count area-id last-update publish-time) + (debug:print-info 0 *default-log-port* "Working on run-id " run-id " pgdb-id " new-run-id ) + (if (not (equal? run-tag "")) + (task:add-run-tag dbh new-run-id run-tag)) + new-run-id) + + (if (equal? state "deleted") + (begin + (debug:print-info 1 *default-log-port* "Warning: Run with id " run-id " was created after previous sync and deleted before the sync") #f) + (if (handle-exceptions + exn + (begin (print-call-chain) + (print ((condition-property-accessor 'exn 'message) exn)) + #f) + + (pgdb:insert-run + dbh + spec-id target run-name state status owner event-time comment fail-count pass-count area-id last-update publish-time)) + (let* ((smallest-time (hash-table-ref/default smallest-last-update-time "smallest-time" #f))) + (if (or (not smallest-time) (< last-update smallest-time)) + (hash-table-set! smallest-last-update-time "smallest-time" last-update)) + (tasks:run-id->mtpg-run-id dbh cached-info run-id area-info smallest-last-update-time)) + #f))))))) + +(define (task:add-run-tag dbh run-id tag) + (let* ((tag-info (pgdb:get-tag-info-by-name dbh tag))) + (if (not tag-info) + (begin + (if (handle-exceptions + exn + (begin + (debug:print-info 1 *default-log-port* ((condition-property-accessor 'exn 'message) exn)) + #f) + (pgdb:insert-tag dbh tag)) + (set! tag-info (pgdb:get-tag-info-by-name dbh tag)) + #f))) + ;;add to area_tags + (handle-exceptions + exn + (begin + (debug:print-info 1 *default-log-port* ((condition-property-accessor 'exn 'message) exn)) + #f) + (if (not (pgdb:is-run-taged-with-a-tag dbh (vector-ref tag-info 0) run-id)) + (pgdb:insert-run-tag dbh (vector-ref tag-info 0) run-id))))) + + +(define (tasks:sync-test-steps dbh cached-info test-step-ids smallest-last-update-time) + ; (print "Sync Steps " test-step-ids ) + (let ((test-ht (hash-table-ref cached-info 'tests)) + (step-ht (hash-table-ref cached-info 'steps))) + (for-each + (lambda (test-step-id) + (let* ((test-step-info (rmt:get-steps-info-by-id test-step-id)) + (step-id (tdb:step-get-id test-step-info)) + (test-id (tdb:step-get-test_id test-step-info)) + (stepname (tdb:step-get-stepname test-step-info)) + (state (tdb:step-get-state test-step-info)) + (status (tdb:step-get-status test-step-info)) + (event_time (tdb:step-get-event_time test-step-info)) + (comment (tdb:step-get-comment test-step-info)) + (logfile (tdb:step-get-logfile test-step-info)) + (last-update (tdb:step-get-last_update test-step-info)) + (pgdb-test-id (hash-table-ref/default test-ht test-id #f)) + (smallest-time (hash-table-ref/default smallest-last-update-time "smallest-time" #f)) + (pgdb-step-id (if pgdb-test-id + (pgdb:get-test-step-id dbh pgdb-test-id stepname state) + #f))) + (if step-id + (begin + (if pgdb-test-id + (begin + (if pgdb-step-id + (begin + (debug:print-info 1 *default-log-port* "Updating existing test-step with test-id: " test-id " and step-id " step-id " pgdb test id: " pgdb-test-id " pgdb step id " pgdb-step-id ) + (let* ((pgdb-last-update (pgdb:get-test-step-last-update dbh pgdb-step-id))) + (if (and (> last-update pgdb-last-update) (or (not smallest-time) (< last-update smallest-time))) + (hash-table-set! smallest-last-update-time "smallest-time" last-update))) + (pgdb:update-test-step dbh pgdb-step-id pgdb-test-id stepname state status event_time comment logfile last-update)) + (begin + (debug:print-info 1 *default-log-port* "Inserting test-step with test-id: " test-id " and step-id " step-id " pgdb test id: " pgdb-test-id) + (if (or (not smallest-time) (< last-update smallest-time)) + (hash-table-set! smallest-last-update-time "smallest-time" last-update)) + (pgdb:insert-test-step dbh pgdb-test-id stepname state status event_time comment logfile last-update ) + (set! pgdb-step-id (pgdb:get-test-step-id dbh pgdb-test-id stepname state)))) + (hash-table-set! step-ht step-id pgdb-step-id )) + (debug:print-info 1 *default-log-port* "Error: Test not cashed"))) + (debug:print-info 1 *default-log-port* "Error: Could not get test step info for step id " test-step-id )))) ;; this is a wierd senario need to debug + test-step-ids))) + +(define (tasks:sync-test-gen-data dbh cached-info test-data-ids smallest-last-update-time) + (let ((test-ht (hash-table-ref cached-info 'tests)) + (data-ht (hash-table-ref cached-info 'data))) + (for-each + (lambda (test-data-id) + (let* ((test-data-info (rmt:get-data-info-by-id test-data-id)) + (data-id (db:test-data-get-id test-data-info)) + (test-id (db:test-data-get-test_id test-data-info)) + (category (db:test-data-get-category test-data-info)) + (variable (db:test-data-get-variable test-data-info)) + (value (db:test-data-get-value test-data-info)) + (expected (db:test-data-get-expected test-data-info)) + (tol (db:test-data-get-tol test-data-info)) + (units (db:test-data-get-units test-data-info)) + (comment (db:test-data-get-comment test-data-info)) + (status (db:test-data-get-status test-data-info)) + (type (db:test-data-get-type test-data-info)) + (last-update (db:test-data-get-last_update test-data-info)) + (smallest-time (hash-table-ref/default smallest-last-update-time "smallest-time" #f)) + + (pgdb-test-id (hash-table-ref/default test-ht test-id #f)) + (pgdb-data-id (if pgdb-test-id + (pgdb:get-test-data-id dbh pgdb-test-id category variable) + #f))) + (if data-id + (begin + (if pgdb-test-id + (begin + (if pgdb-data-id + (begin + (debug:print-info 1 *default-log-port* "Updating existing test-data with test-id: " test-id " and data-id " data-id " pgdb test id: " pgdb-test-id " pgdb data id " pgdb-data-id) + (let* ((pgdb-last-update (pgdb:get-test-data-last-update dbh pgdb-data-id))) + (if (and (> last-update pgdb-last-update) (or (not smallest-time) (< last-update smallest-time))) + (hash-table-set! smallest-last-update-time "smallest-time" last-update))) + (pgdb:update-test-data dbh pgdb-data-id pgdb-test-id category variable value expected tol units comment status type last-update)) + (begin + (debug:print-info 1 *default-log-port* "Inserting test-data with test-id: " test-id " and data-id " data-id " pgdb test id: " pgdb-test-id) + (if (handle-exceptions + exn + (begin (print-call-chain) + (print ((condition-property-accessor 'exn 'message) exn)) + #f) + + (pgdb:insert-test-data dbh pgdb-test-id category variable value expected tol units comment status type last-update)) + ;(tasks:run-id->mtpg-run-id dbh cached-info run-id area-info) + (begin + ;(pgdb:insert-test-data dbh pgdb-test-id category variable value expected tol units comment status type ) + (if (or (not smallest-time) (< last-update smallest-time)) + (hash-table-set! smallest-last-update-time "smallest-time" last-update)) + (set! pgdb-data-id (pgdb:get-test-data-id dbh pgdb-test-id category variable))) + #f))) + (hash-table-set! data-ht data-id pgdb-data-id )) + (begin + (debug:print-info 1 *default-log-port* "Error: Test not in pgdb")))) + + (debug:print-info 1 *default-log-port* "Error: Could not get test data info for data id " test-data-id )))) ;; this is a wierd senario need to debug + test-data-ids))) + + + +(define (tasks:sync-tests-data dbh cached-info test-ids area-info smallest-last-update-time) + (let ((test-ht (hash-table-ref cached-info 'tests))) + (for-each + (lambda (test-id) + ; (print test-id) + (let* ((test-info (rmt:get-test-info-by-id #f test-id)) + (run-id (db:test-get-run_id test-info)) ;; look these up in db_records.scm + (test-id (db:test-get-id test-info)) + (test-name (db:test-get-testname test-info)) + (item-path (db:test-get-item-path test-info)) + (state (db:test-get-state test-info)) + (status (db:test-get-status test-info)) + (host (db:test-get-host test-info)) + (pid (db:test-get-process_id test-info)) + (cpuload (db:test-get-cpuload test-info)) + (diskfree (db:test-get-diskfree test-info)) + (uname (db:test-get-uname test-info)) + (run-dir (db:test-get-rundir test-info)) + (log-file (db:test-get-final_logf test-info)) + (run-duration (db:test-get-run_duration test-info)) + (comment (db:test-get-comment test-info)) + (event-time (db:test-get-event_time test-info)) + (archived (db:test-get-archived test-info)) + (last-update (db:test-get-last_update test-info)) + (pgdb-run-id (tasks:run-id->mtpg-run-id dbh cached-info run-id area-info smallest-last-update-time)) + (smallest-time (hash-table-ref/default smallest-last-update-time "smallest-time" #f)) + (pgdb-test-id (if pgdb-run-id + (begin + ;(print pgdb-run-id) + (pgdb:get-test-id dbh pgdb-run-id test-name item-path)) + #f))) + ;; "id" "run_id" "testname" "state" "status" "event_time" + ;; "host" "cpuload" "diskfree" "uname" "rundir" "item_path" + ;; "run_duration" "final_logf" "comment" "shortdir" "attemptnum" "archived" + (if pgdb-run-id + (begin + (if pgdb-test-id ;; have a record + (begin ;; let ((key-name (conc run-id "/" test-name "/" item-path))) + (debug:print-info 0 *default-log-port* "Updating existing test with run-id: " run-id " and test-id: " test-id " pgdb run id: " pgdb-run-id " pgdb-test-id " pgdb-test-id) + (let* ((pgdb-last-update (pgdb:get-test-last-update dbh pgdb-test-id))) + (if (and (> last-update pgdb-last-update) (or (not smallest-time) (< last-update smallest-time))) ;;if last-update is same as pgdb-last-update then it is safe to assume the records are identical and we can use a larger last update time. + (hash-table-set! smallest-last-update-time "smallest-time" last-update))) + (pgdb:update-test dbh pgdb-test-id pgdb-run-id test-name item-path state status host cpuload diskfree uname run-dir log-file run-duration comment event-time archived last-update pid)) + (begin + (debug:print-info 0 *default-log-port* "Inserting test with run-id: " run-id " and test-id: " test-id " pgdb run id: " pgdb-run-id) + (pgdb:insert-test dbh pgdb-run-id test-name item-path state status host cpuload diskfree uname run-dir log-file run-duration comment event-time archived last-update pid) + (if (or (not smallest-time) (< last-update smallest-time)) + (hash-table-set! smallest-last-update-time "smallest-time" last-update)) + (set! pgdb-test-id (pgdb:get-test-id dbh pgdb-run-id test-name item-path)))) + (hash-table-set! test-ht test-id pgdb-test-id)) + (debug:print-info 1 *default-log-port* "WARNING: Skipping run with run-id:" run-id ". This run was created after privious sync and removed before this sync.")))) + test-ids))) + +(define (task:add-area-tag dbh area-info tag) + (let* ((tag-info (pgdb:get-tag-info-by-name dbh tag))) + (if (not tag-info) + (begin + (if (handle-exceptions + exn + (begin + (debug:print-info 1 *default-log-port* ((condition-property-accessor 'exn 'message) exn)) + #f) + (pgdb:insert-tag dbh tag)) + (set! tag-info (pgdb:get-tag-info-by-name dbh tag)) + #f))) + ;;add to area_tags + (handle-exceptions + exn + (begin + (debug:print-info 1 *default-log-port* ((condition-property-accessor 'exn 'message) exn)) + #f) + (if (not (pgdb:is-area-taged-with-a-tag dbh (vector-ref tag-info 0) (vector-ref area-info 0))) + (pgdb:insert-area-tag dbh (vector-ref tag-info 0) (vector-ref area-info 0)))))) + +(define (tasks:sync-run-data dbh cached-info run-ids area-info smallest-last-update-time) + (for-each + (lambda (run-id) + (debug:print-info 1 *default-log-port* "Check if run with " run-id " needs to be synced" ) + (tasks:run-id->mtpg-run-id dbh cached-info run-id area-info smallest-last-update-time)) +run-ids)) + + +;; get runs changed since last sync +;; (define (tasks:sync-test-data dbh cached-info area-info) +;; (let* (( + +(define (tasks:sync-to-postgres configdat dest) + (print "In sync") + (let* ((dbh (pgdb:open configdat dbname: dest)) + (area-info (pgdb:get-area-by-path dbh *toppath*)) + (cached-info (make-hash-table)) + (start (current-seconds)) + (test-patt (if (args:get-arg "-testpatt") + (args:get-arg "-testpatt") + "%")) + (target (if (args:get-arg "-target") + (args:get-arg "-target") + #f)) + (run-name (if (args:get-arg "-runname") + (args:get-arg "-runname") + #f))) + (if (and target (not run-name)) + (begin + (print "Error: Provide runname") + (exit 1))) + (if (and (not target) run-name) + (begin + (print "Error: Provide target") + (exit 1))) + ;(print "123") + ;(exit 1) + (for-each (lambda (dtype) + (hash-table-set! cached-info dtype (make-hash-table))) + '(runs targets tests steps data)) + (hash-table-set! cached-info 'start start) ;; when done we'll set sync times to this + (if area-info + (let* ((last-sync-time (vector-ref area-info 3)) + (smallest-last-update-time (make-hash-table)) + (changed (if (and target run-name) + (rmt:get-run-record-ids target run-name (rmt:get-keys) test-patt) + (rmt:get-changed-record-ids last-sync-time))) + (run-ids (alist-ref 'runs changed)) + (test-ids (alist-ref 'tests changed)) + (test-step-ids (alist-ref 'test_steps changed)) + (test-data-ids (alist-ref 'test_data changed)) + (run-stat-ids (alist-ref 'run_stats changed)) + (area-tag (if (args:get-arg "-area-tag") + (args:get-arg "-area-tag") + (if (args:get-arg "-area") + (args:get-arg "-area") + "")))) + (if (and (equal? area-tag "") (not (pgdb:is-area-taged dbh (vector-ref area-info 0)))) + (set! area-tag *default-area-tag*)) + (if (not (equal? area-tag "")) + (task:add-area-tag dbh area-info area-tag)) + (if (or (not (null? test-ids)) (not (null? run-ids))) + (begin + (debug:print-info 0 *default-log-port* "syncing runs") + (tasks:sync-run-data dbh cached-info run-ids area-info smallest-last-update-time) + (debug:print-info 0 *default-log-port* "syncing tests") + (tasks:sync-tests-data dbh cached-info test-ids area-info smallest-last-update-time) + (debug:print-info 0 *default-log-port* "syncing test steps") + (tasks:sync-test-steps dbh cached-info test-step-ids smallest-last-update-time) + (debug:print-info 0 *default-log-port* "syncing test data") + (tasks:sync-test-gen-data dbh cached-info test-data-ids smallest-last-update-time) + (print "----------done---------------"))) + (let* ((smallest-time (hash-table-ref/default smallest-last-update-time "smallest-time" #f))) + (debug:print-info 0 "smallest-time :" smallest-time " last-sync-time " last-sync-time) + (if (not (and target run-name)) + (if (or (and smallest-time (> smallest-time last-sync-time)) (and smallest-time (eq? last-sync-time 0))) + (pgdb:write-sync-time dbh area-info smallest-time))))) ;;this needs to be changed + (if (tasks:set-area dbh configdat) + (tasks:sync-to-postgres configdat dest) + (begin + (debug:print 0 *default-log-port* "ERROR: unable to create an area record") + #f))))) + +;;====================================================================== +;; L O C K I N G M E C H A N I S M S +;;====================================================================== + +;; faux-lock is deprecated. Please use simple-lock below +;; +(define (common:faux-lock keyname #!key (wait-time 8)(allow-lock-steal #t)) + (if (rmt:no-sync-get/default keyname #f) ;; do not be tempted to compare to pid. locking is a one-shot action, if already locked for this pid it doesn't actually count + (if (> wait-time 0) + (begin + (thread-sleep! 1) + (if (eq? wait-time 1) ;; only one second left, steal the lock + (begin + (debug:print-info 0 *default-log-port* "stealing lock for " keyname) + (common:faux-unlock keyname force: #t))) + (common:faux-lock keyname wait-time: (- wait-time 1))) + #f) + (begin + (rmt:no-sync-set keyname (conc (current-process-id))) + (equal? (conc (current-process-id)) (conc (rmt:no-sync-get/default keyname #f)))))) + +(define (common:faux-unlock keyname #!key (force #f)) + (if (or force (equal? (conc (current-process-id)) (conc (rmt:no-sync-get/default keyname #f)))) + (begin + (if (rmt:no-sync-get/default keyname #f) (rmt:no-sync-del! keyname)) + #t) + #f)) + +;; simple lock. improve and converge on this one. +;; +(define (common:simple-lock keyname) + (rmt:no-sync-get-lock keyname)) + +(define (common:simple-unlock keyname #!key (force #f)) + (rmt:no-sync-del! keyname)) + +;;====================================================================== +;; db based host calls +;;====================================================================== + +;;====================================================================== +;; T E S T L A U N C H I N G P E R I T E M W I T H H O S T T Y P E S +;;====================================================================== +;; +;; [hosts] +;; arm cubie01 cubie02 +;; x86_64 zeus xena myth01 +;; allhosts #{g hosts arm} #{g hosts x86_64} +;; +;; [host-types] +;; general #MTLOWESTLOAD #{g hosts allhosts} +;; arm #MTLOWESTLOAD #{g hosts arm} +;; nbgeneral nbjob run JOBCOMMAND -log $MT_LINKTREE/$MT_TARGET/$MT_RUNNAME.$MT_TESTNAME-$MT_ITEM_PATH.lgo +;; +;; [host-rules] +;; # maxnload => max normalized load +;; # maxnjobs => max jobs per cpu +;; # maxjobrate => max jobs per second +;; general maxnload=1.1; maxnjobs=1.2; maxjobrate=0.1 +;; +;; [launchers] +;; envsetup general +;; xor/%/n 4C16G +;; % nbgeneral +;; +;; [jobtools] +;; # if defined and not "no" flexi-launcher will bypass "launcher" unless no match. +;; flexi-launcher yes +;; launcher nbfake +;; +(define (common:get-launcher configdat testname itempath) + (let ((fallback-launcher (configf:lookup configdat "jobtools" "launcher"))) + (if (and (configf:lookup configdat "jobtools" "flexi-launcher") ;; overrides launcher + (not (equal? (configf:lookup configdat "jobtools" "flexi-launcher") "no"))) + (let* ((launchers (hash-table-ref/default configdat "launchers" '()))) + (if (null? launchers) + fallback-launcher + (let loop ((hed (car launchers)) + (tal (cdr launchers))) + (let ((patt (car hed)) + (host-type (cadr hed))) + (if (tests:match patt testname itempath) + (begin + (debug:print-info 2 *default-log-port* "Have flexi-launcher match for " testname "/" itempath " = " host-type) + (let ((launcher (configf:lookup configdat "host-types" host-type))) + (if launcher + (let* ((launcher-parts (string-split launcher)) + (launcher-exe (car launcher-parts))) + (if (equal? launcher-exe "#MTLOWESTLOAD") ;; this is our special case, we will find the lowest load and craft a nbfake commandline + (let host-loop ((targ-host (common:get-least-loaded-host (cdr launcher-parts) host-type configdat)) + (count 100)) + (if targ-host + (conc "remrun " targ-host) + (if (> count 0) + (begin + (debug:print 0 *default-log-port* "INFO: Waiting for a host for host-type " host-type) + (thread-sleep! (- 101 count)) + (host-loop (common:get-least-loaded-host (cdr launcher-parts) host-type configdat) + (- count 1))) + (begin + (debug:print 0 *default-log-port* "FATAL: Failed to find a host from #MTLOWESTLOAD for host-type " host-type) + (exit))))) + launcher)) + (begin + (debug:print-info 0 *default-log-port* "WARNING: no launcher found for host-type " host-type) + (if (null? tal) + fallback-launcher + (loop (car tal)(cdr tal))))))) + ;; no match, try again + (if (null? tal) + fallback-launcher + (loop (car tal)(cdr tal)))))))) + fallback-launcher))) + +;; ideally put all this info into the db, no need to preserve it across moving homehost +;; +;; return list of +;; ( reachable? cpuload update-time ) +(define (common:get-host-info hostname) + (let* ((loadinfo (rmt:get-latest-host-load hostname)) ;; if this host happens to have been recently used by a test reuse the load data + (load (car loadinfo)) + (load-sample-time (cdr loadinfo)) + (load-sample-age (- (current-seconds) load-sample-time)) + (loadinfo-timeout-seconds 6) ;; this was 20 seconds, seems way too lax. Switch to 6 seconds + (host-last-update-timeout-seconds 4) + (host-rec (hash-table-ref/default *host-loads* hostname #f)) + ) + (cond + ((< load-sample-age loadinfo-timeout-seconds) + (list #t + load-sample-time + load)) + ((and host-rec + (< (current-seconds) (+ (host-last-update host-rec) host-last-update-timeout-seconds))) + (list #t + (host-last-update host-rec) + (host-last-cpuload host-rec ))) + ((common:unix-ping hostname) + (list #t + (current-seconds) + (alist-ref 'adj-core-load (common:get-normalized-cpu-load hostname)))) ;; this is cheaper than you might think. get-normalized-cpu-load is cached for up to 5 seconds + (else + (list #f 0 -1) ;; bad host, don't use! + )))) + +;; see defstruct host at top of file. +;; host: reachable last-update last-used last-cpuload +;; +(define (common:update-host-loads-table hosts-raw) + (let* ((hosts (filter (lambda (x) + (string-match (regexp "^\\S+$") x)) + hosts-raw))) + (for-each + (lambda (hostname) + (let* ((rec (let ((h (hash-table-ref/default *host-loads* hostname #f))) + (if h + h + (let ((h (make-host))) + (hash-table-set! *host-loads* hostname h) + h)))) + (host-info (common:get-host-info hostname)) + (is-reachable (car host-info)) + (last-reached-time (cadr host-info)) + (load (caddr host-info))) + (host-reachable-set! rec is-reachable) + (host-last-update-set! rec last-reached-time) + (host-last-cpuload-set! rec load))) + hosts))) + +;; go through the hosts from least recently used to most recently used, pick the first that meets the load criteral from the +;; [host-rules] section. +;; +(define (common:get-least-loaded-host hosts-raw host-type configdat) + (let* ((rdat (configf:lookup configdat "host-rules" host-type)) + (rules (common:val->alist (or rdat "") convert: #t)) ;; maxnload, maxnjobs, maxjobrate + (maxnload (common:alist-ref/default 'maxnload rules 1.5)) ;; max normalized load + (maxnjobs (common:alist-ref/default 'maxnjobs rules 1.5)) ;; max normalized number of jobs + (maxjobrate (common:alist-ref/default 'maxjobrate rules (/ 1 6))) ;; max rate of submitting jobs to a given host in jobs/second + (hosts (filter (lambda (x) + (string-match (regexp "^\\S+$") x)) + hosts-raw)) + ;; (best-host #f) + (get-rec (lambda (hostname) + ;; (print "get-rec hostname=" hostname) + (let ((h (hash-table-ref/default *host-loads* hostname #f))) + (if h + h + (let ((h (make-host))) + (hash-table-set! *host-loads* hostname h) + h))))) + (best-load 99999) + (curr-time (current-seconds)) + (get-hosts-sorted (lambda (hosts) + (sort hosts (lambda (a b) + (let ((a-rec (get-rec a)) + (b-rec (get-rec b))) + ;; (print "a=" a " a-rec=" a-rec " host-last-used=" (host-last-used a-rec)) + ;; (print "b=" b " b-rec=" b-rec " host-last-used=" (host-last-used b-rec)) + (< (host-last-used a-rec) + (host-last-used b-rec)))))))) + (debug:print 0 *default-log-port* "INFO: hosts-sorted=" (get-hosts-sorted hosts)) + (if (null? hosts) + #f ;; no hosts to select from. All done and giving up now. + (let ((hosts-sorted (get-hosts-sorted hosts))) + (common:update-host-loads-table hosts) + (let loop ((hostname (car hosts-sorted)) + (tal (cdr hosts-sorted)) + (best-host #f)) + (let* ((rec (get-rec hostname)) + (reachable (host-reachable rec)) + (load (host-last-cpuload rec)) + (last-used (host-last-used rec)) + (delta (- curr-time last-used)) + (job-rate (if (> delta 0) + (/ 1 delta) + 999)) ;; jobs per second + (new-best + (cond + ((not reachable) + (debug:print 0 *default-log-port* "Skipping host " hostname " as it cannot be reached.") + best-host) + ((and (< load maxnload) ;; load is acceptable + (< job-rate maxjobrate)) ;; job rate is acceptable + (set! best-load load) + hostname) + (else best-host)))) + (debug:print 0 *default-log-port* "INFO: Trying host " hostname " with load " load ", last used " delta " seconds ago, with job-rate " job-rate " for running a test." ) + (if new-best + (begin ;; found a host, return it + (debug:print 0 *default-log-port* "INFO: Found host: " new-best " load: " load " last-used: " delta " seconds ago, with job-rate: " job-rate) + (host-last-used-set! rec curr-time) + new-best) + (if (null? tal) #f (loop (car tal)(cdr tal) best-host))))))))) + +(define (common:wait-for-cpuload maxload-in numcpus-in waitdelay #!key (count 1000) (msg #f)(remote-host #f)(force-maxload #f)) + (let* ((loadavg (common:get-cpu-load remote-host)) + (numcpus (if (<= 1 numcpus-in) ;; not possible to have zero. If we get 1, it's possible that we got the previous default, and we should check again + (common:get-num-cpus remote-host) + numcpus-in)) + (maxload (if force-maxload + maxload-in + (max maxload-in 0.5))) ;; so maxload must be greater than 0.5 for now BUG - FIXME? + (first (car loadavg)) + (next (cadr loadavg)) + (adjload (* maxload (max 1 numcpus))) ;; possible bug where numcpus (or could be maxload) is zero, crude fallback is to at least use 1 + (loadjmp (- first next)) + (adjwait (min (+ 300 (random 10)) (abs (* (+ (random 10)(/ (- 1000 count) 10) waitdelay) (- first adjload) )) ))) ;; add some randomness to the time to break any alignment where netbatch dumps many jobs to machines simultaneously + (debug:print-info 1 *default-log-port* "Checking cpuload on " (or remote-host "localhost") ", maxload: " maxload + ", load: " first ", adjload: " adjload ", loadjmp: " loadjmp) + (cond + ((and (> first adjload) + (> count 0)) + (debug:print-info 0 *default-log-port* "server start delayed " adjwait " seconds due to load " first " exceeding max of " adjload " on server " (or remote-host (get-host-name)) " (normalized load-limit: " maxload ") " (if msg msg "")) + (thread-sleep! adjwait) + (common:wait-for-cpuload maxload numcpus waitdelay count: (- count 1) msg: msg remote-host: remote-host)) + ((and (> loadjmp numcpus) + (> count 0)) + (debug:print-info 0 *default-log-port* "waiting " adjwait " seconds due to load jump " loadjmp " > numcpus " numcpus (if msg msg "")) + (thread-sleep! adjwait) + (common:wait-for-cpuload maxload numcpus waitdelay count: (- count 1) msg: msg remote-host: remote-host))))) + +(define (common:wait-for-homehost-load maxload msg) + (let* ((hh-dat (if (common:on-homehost?) ;; if we are on the homehost then pass in #f so the calls are local. + #f + (common:get-homehost))) + (hh (if hh-dat (car hh-dat) #f)) + (numcpus (common:get-num-cpus hh))) + (common:wait-for-normalized-load maxload msg hh))) + +(define (common:get-num-cpus remote-host) + (let* ((actual-host (or remote-host (get-host-name)))) + (or (common:get-cached-info actual-host "num-cpus" age: 86400) ;; hosts had better not be changing the number of cpus too often! + (let* ((proc (lambda () + (let loop ((numcpu 0) + (inl (read-line))) + (if (eof-object? inl) + (begin + (common:write-cached-info remote-host "num-cpus" numcpu) + numcpu) + (loop (if (string-match "^processor\\s+:\\s+\\d+$" inl) + (+ numcpu 1) + numcpu) + (read-line)))))) + (result (if remote-host + (with-input-from-pipe + (conc "ssh " remote-host " cat /proc/cpuinfo") + proc) + (with-input-from-file "/proc/cpuinfo" proc)))) + (common:write-cached-info actual-host "num-cpus" result) + result)))) + +;; wait for normalized cpu load to drop below maxload +;; +(define (common:wait-for-normalized-load maxload msg remote-host) + (let ((num-cpus (common:get-num-cpus remote-host))) + (common:wait-for-cpuload maxload num-cpus 15 msg: msg remote-host: remote-host))) + +;;====================================================================== +;; D E B U G G I N G S T U F F +;;====================================================================== + +;; (define *verbosity* 1) +;; (define *logging* #f) + +(define (common:set-last-run-version) + (rmt:set-var "MEGATEST_VERSION" (common:version-signature))) + +;; postive number if megatest version > db version +;; negative number if megatest version < db version +(define (common:version-db-delta) + (- megatest-version (common:get-last-run-version-number))) + +(define (common:version-changed?) + (not (equal? (common:get-last-run-version) + (common:version-signature)))) + +;; from metadat lookup MEGATEST_VERSION +;; +(define (common:get-last-run-version) ;; RADT => How does this work in send-receive function??; assume it is the value saved in some DB + (rmt:get-var "MEGATEST_VERSION")) + +(define (common:get-last-run-version-number) + (string->number + (substring (common:get-last-run-version) 0 6))) + +(define (common:api-changed?) + (not (equal? (substring (->string megatest-version) 0 4) + (substring (conc (common:get-last-run-version)) 0 4)))) + +;; '(print (string-intersperse (map cadr (hash-table-ref/default (read-config "megatest.config" \#f \#t) "disks" '"'"'("none" ""))) "\n"))' +(define (common:get-disks #!key (configf #f)) + (hash-table-ref/default + (or configf (configf:read-config "megatest.config" #f #t)) + "disks" '("none" ""))) + +;;====================================================================== +;; watchdog and exit procedures +;;====================================================================== + +;;====================================================================== +;; E X I T H A N D L I N G +;;====================================================================== + +;; (let ((ohh (common:on-homehost?)) +;; (srv (args:get-arg "-server"))) +;; (and ohh srv))) + ;; (debug:print-info 0 *default-log-port* "common:run-sync? ohh=" ohh ", srv=" srv) + +(define *watchdog* (make-thread + (lambda () + (handle-exceptions + exn + (begin + (print-call-chain) + (print " message: " ((condition-property-accessor 'exn 'message) exn))) + (common:watchdog))) + "Watchdog thread")) + +;; currently the primary job of the watchdog is to run the sync back to megatest.db from the db in /tmp +;; if we are on the homehost and we are a server (by definition we are on the homehost if we are a server) +;; +(define (common:readonly-watchdog dbstruct) + (thread-sleep! 0.05) ;; delay for startup + (debug:print-info 13 *default-log-port* "common:readonly-watchdog entered.") + ;; sync megatest.db to /tmp/.../megatst.db + (let* ((sync-cool-off-duration 3) + (golden-mtdb (dbr:dbstruct-mtdb dbstruct)) + (golden-mtpath (db:dbdat-get-path golden-mtdb)) + (tmp-mtdb (dbr:dbstruct-tmpdb dbstruct)) + (tmp-mtpath (db:dbdat-get-path tmp-mtdb))) + (debug:print-info 0 *default-log-port* "Read-only periodic sync thread started.") + (let loop ((last-sync-time 0)) + (debug:print-info 13 *default-log-port* "loop top tmp-mtpath="tmp-mtpath" golden-mtpath="golden-mtpath) + (let* ((duration-since-last-sync (- (current-seconds) last-sync-time))) + (debug:print-info 13 *default-log-port* "duration-since-last-sync="duration-since-last-sync) + (if (and (not *time-to-exit*) + (< duration-since-last-sync sync-cool-off-duration)) + (thread-sleep! (- sync-cool-off-duration duration-since-last-sync))) + (if (not *time-to-exit*) + (let ((golden-mtdb-mtime (file-modification-time golden-mtpath)) + (tmp-mtdb-mtime (file-modification-time tmp-mtpath))) + (if (> golden-mtdb-mtime tmp-mtdb-mtime) + (if (< golden-mtdb-mtime (- (current-seconds) 3)) ;; file has NOT been touched in past three seconds, this way multiple servers won't fight to sync back + (let ((res (db:multi-db-sync dbstruct 'old2new))) + (debug:print-info 13 *default-log-port* "rosync called, " res " records transferred.")))) + (loop (current-seconds))) + #t))) + (debug:print-info 0 *default-log-port* "Exiting readonly-watchdog timer, *time-to-exit* = " *time-to-exit*" pid="(current-process-id)" mtpath="golden-mtpath))) + + +;; TODO: for multiple areas, we will have multiple watchdogs; and multiple threads to manage +(define (common:watchdog) + (debug:print-info 13 *default-log-port* "common:watchdog entered.") + (if (launch:setup) + (if (common:on-homehost?) + (let ((dbstruct (db:setup #t))) + (debug:print-info 13 *default-log-port* "after db:setup with dbstruct=" dbstruct) + (cond + ((dbr:dbstruct-read-only dbstruct) + (debug:print-info 13 *default-log-port* "loading read-only watchdog") + (common:readonly-watchdog dbstruct)) + (else + (debug:print-info 13 *default-log-port* "loading writable-watchdog.") + (let* ((syncer (or (configf:lookup *configdat* "server" "sync-method") "brute-force-sync"))) + (cond + ((equal? syncer "brute-force-sync") + (server:writable-watchdog-bruteforce dbstruct)) + ((equal? syncer "delta-sync") + (server:writable-watchdog-deltasync dbstruct)) + (else + (debug:print-error 0 *default-log-port* "Unknown server/sync-method specified ("syncer") - valid values are brute-force-sync and delta-sync.") + (exit 1))) + ;;(debug:print 1 *default-log-port* "INFO: ["(common:human-time)"] Syncer started (method="syncer")") + ))) + (debug:print-info 13 *default-log-port* "watchdog done.")) + (debug:print-info 13 *default-log-port* "no need for watchdog on non-homehost")))) + + +(define (std-exit-procedure) + ;;(common:telemetry-log-close) + (on-exit (lambda () 0)) + ;;(debug:print-info 13 *default-log-port* "std-exit-procedure called; *time-to-exit*="*time-to-exit*) + (let ((no-hurry (if *time-to-exit* ;; hurry up + #f + (begin + (set! *time-to-exit* #t) + #t)))) + (debug:print-info 4 *default-log-port* "starting exit process, finalizing databases.") + (if (and no-hurry (debug:debug-mode 18)) + (rmt:print-db-stats)) + (let ((th1 (make-thread (lambda () ;; thread for cleaning up, give it five seconds + (if *dbstruct-db* (db:close-all *dbstruct-db*)) ;; one second allocated + (if *task-db* + (let ((db (cdr *task-db*))) + (if (sqlite3:database? db) + (begin + (sqlite3:interrupt! db) + (sqlite3:finalize! db #t) + ;; (vector-set! *task-db* 0 #f) + (set! *task-db* #f))))) + #;(http-client#close-all-connections!) + ;; (if (and *runremote* + ;; (remote-conndat *runremote*)) + ;; (begin + ;; (http-client#close-all-connections!))) ;; for http-client + (if (not (eq? *default-log-port* (current-error-port))) + (close-output-port *default-log-port*)) + (set! *default-log-port* (current-error-port))) "Cleanup db exit thread")) + (th2 (make-thread (lambda () + (debug:print 4 *default-log-port* "Attempting clean exit. Please be patient and wait a few seconds...") + (if no-hurry + (begin + (thread-sleep! 5)) ;; give the clean up few seconds to do it's stuff + (begin + (thread-sleep! 2))) + (debug:print 4 *default-log-port* " ... done") + ) + "clean exit"))) + (thread-start! th1) + (thread-start! th2) + (thread-join! th1) + ) + ) + + 0) + +(define (std-signal-handler signum) + ;; (signal-mask! signum) + (set! *time-to-exit* #t) + ;;(debug:print-info 13 *default-log-port* "got signal "signum) + (debug:print-error 0 *default-log-port* "Received signal " signum " aaa exiting promptly") + ;; (std-exit-procedure) ;; shouldn't need this since we are exiting and it will be called anyway + (exit)) + +(define (special-signal-handler signum) + ;; (signal-mask! signum) + (set! *time-to-exit* #t) + ;;(debug:print-info 13 *default-log-port* "got signal "signum) + (debug:print-error 0 *default-log-port* "Received signal " signum " sending email befor exiting!!") + ;;TODO send email to notify admin contact listed in the config that the lisner got killed + ;; (std-exit-procedure) ;; shouldn't need this since we are exiting and it will be called anyway + (exit)) + + +(set-signal-handler! signal/int std-signal-handler) ;; ^C +(set-signal-handler! signal/term std-signal-handler) + +;; (set-signal-handler! signal/stop std-signal-handler) ;; ^Z NO, do NOT handle ^Z! + +;; Force a megatest cleanup-db if version is changed and skip-version-check not specified +;; Do NOT check if not on homehost! +;; +(define (common:exit-on-version-changed) + (if (common:on-homehost?) + (if (common:api-changed?) + (let* ((mtconf (conc (get-environment-variable "MT_RUN_AREA_HOME") "/megatest.config")) + (dbfile (conc (get-environment-variable "MT_RUN_AREA_HOME") "/megatest.db")) + (read-only (not (file-write-access? dbfile))) + (dbstruct (db:setup #t))) + (debug:print 0 *default-log-port* + "WARNING: Version mismatch!\n" + " expected: " (common:version-signature) "\n" + " got: " (common:get-last-run-version)) + (cond + ((get-environment-variable "MT_SKIP_DB_MIGRATE") #t) + ((and (common:file-exists? mtconf) (common:file-exists? dbfile) (not read-only) + (eq? (current-user-id)(file-owner mtconf))) ;; safe to run -cleanup-db + (debug:print 0 *default-log-port* " I see you are the owner of megatest.config, attempting to cleanup and reset to new version") + (handle-exceptions + exn + (begin + (debug:print 0 *default-log-port* "Failed to switch versions.") + (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) + (print-call-chain (current-error-port)) + (exit 1)) + (common:cleanup-db dbstruct))) + ((not (common:file-exists? mtconf)) + (debug:print 0 *default-log-port* " megatest.config does not exist in this area. Cannot proceed with megatest version migration.") + (exit 1)) + ((not (common:file-exists? dbfile)) + (debug:print 0 *default-log-port* " megatest.db does not exist in this area. Cannot proceed with megatest version migration.") + (exit 1)) + ((not (eq? (current-user-id)(file-owner mtconf))) + (debug:print 0 *default-log-port* " You do not own megatest.db in this area. Cannot proceed with megatest version migration.") + (exit 1)) + (read-only + (debug:print 0 *default-log-port* " You have read-only access to this area. Cannot proceed with megatest version migration.") + (exit 1)) + (else + (debug:print 0 *default-log-port* " to switch versions you can run: \"megatest -cleanup-db\"") + (exit 1))))))) +;; (begin +;; (debug:print 0 *default-log-port* "ERROR: cannot migrate version unless on homehost. Exiting.") +;; (exit 1)))) + +;; Move me elsewhere ... +;; RADT => Why do we meed the version check here, this is called only if version misma +;; +(define (common:cleanup-db dbstruct #!key (full #f)) + (apply db:multi-db-sync + dbstruct + 'schema + ;; 'new2old + 'killservers + 'adj-target + ;; 'old2new + 'new2old + ;; (if full + '(dejunk) + ;; '()) + ) + (if (common:api-changed?) + (common:set-last-run-version))) + +;; called in megatest.scm, host-port is string hostname:port +;; +;; NOTE: This is NOT called directly from clients as not all transports support a client running +;; in the same process as the server. +;; +#;(define (server:ping host-port-in #!key (do-exit #f)) + (let ((host:port (if (not host-port-in) ;; use read-dotserver to find + #f ;; (server:check-if-running *toppath*) + ;; (if (number? host-port-in) ;; we were handed a server-id + ;; (let ((srec (tasks:get-server-by-id (db:delay-if-busy (tasks:open-db)) host-port-in))) + ;; ;; (print "srec: " srec " host-port-in: " host-port-in) + ;; (if srec + ;; (conc (vector-ref srec 3) ":" (vector-ref srec 4)) + ;; (conc "no such server-id " host-port-in))) + host-port-in))) ;; ) + (let* ((host-port (if host:port + (let ((slst (string-split host:port ":"))) + (if (eq? (length slst) 2) + (list (car slst)(string->number (cadr slst))) + #f)) + #f))) +;; (toppath (launch:setup))) + ;; (print "host-port=" host-port) + (if (not host-port) + (begin + (if host-port-in + (debug:print 0 *default-log-port* "ERROR: bad host:port")) + (if do-exit (exit 1)) + #f) + (let* ((iface (car host-port)) + (port (cadr host-port)) + #;(server-dat (http-transport:client-connect iface port)) + (login-res (rmt:login-no-auto-client-setup server-dat))) + (if (and (list? login-res) + (car login-res)) + (begin + ;; (print "LOGIN_OK") + (if do-exit (exit 0)) + #t) + (begin + ;; (print "LOGIN_FAILED") + (if do-exit (exit 1)) + #f))))))) + +;; run ping in separate process, safest way in some cases +;; +(define (server:ping-server ifaceport) + (with-input-from-pipe + (conc (common:get-megatest-exe) " -ping " ifaceport) + (lambda () + (let loop ((inl (read-line)) + (res "NOREPLY")) + (if (eof-object? inl) + (case (string->symbol res) + ((NOREPLY) #f) + ((LOGIN_OK) #t) + (else #f)) + (loop (read-line) inl)))))) + +;; ping the given server +;; +#;(define (server:check-server server-record) + (let* ((server-url (server:record->url server-record)) + (res (case *transport-type* + ((http)(server:ping server-url)) + ;; ((nmsg)(nmsg-transport:ping (tasks:hostinfo-get-interface server) + ))) + (if res + server-url + #f))) + +;; no longer care if multiple servers are started by accident. older servers will drop off in time. +;; +#;(define (server:check-if-running areapath) ;; #!key (numservers "2")) + (let* ((ns (server:get-num-servers)) + (servers (server:get-best (server:get-list areapath)))) + ;; (print "servers: " servers " ns: " ns) + (if (or (and servers + (null? servers)) + (not servers) + (and (list? servers) + (< (length servers) (random ns)))) ;; somewhere between 0 and numservers + #f + (let loop ((hed (car servers)) + (tal (cdr servers))) + (let ((res (server:check-server hed))) + (if res + res + (if (null? tal) + #f + (loop (car tal)(cdr tal))))))))) + + +;; NOT USED (well, ok, reference in rpc-transport but otherwise not used). +;; +(define (server:login toppath) + (lambda (toppath) + (set! *db-last-access* (current-seconds)) ;; might not be needed. + (if (equal? *toppath* toppath) + #t + #f))) +;; Given a run id start a server process ### NOTE ### > file 2>&1 +;; if the run-id is zero and the target-host is set +;; try running on that host +;; incidental: rotate logs in logs/ dir. +;; +#;(define (server:run areapath) ;; areapath is *toppath* for a given testsuite area + (let* ((curr-host (get-host-name)) + ;; (attempt-in-progress (server:start-attempted? areapath)) + ;; (dot-server-url (server:check-if-running areapath)) + (curr-ip (server:get-best-guess-address curr-host)) + (curr-pid (current-process-id)) + (homehost (common:get-homehost)) ;; configf:lookup *configdat* "server" "homehost" )) + (target-host (car homehost)) + (testsuite (common:get-testsuite-name)) + (logfile (conc areapath "/logs/server.log")) ;; -" curr-pid "-" target-host ".log")) + (cmdln (conc (common:get-megatest-exe) + " -server " (or target-host "-") (if (equal? (configf:lookup *configdat* "server" "daemonize") "yes") + " -daemonize " + "") + ;; " -log " logfile + " -m testsuite:" testsuite)) ;; (conc " >> " logfile " 2>&1 &"))))) + (log-rotate (make-thread common:rotate-logs "server run, rotate logs thread")) + (load-limit (configf:lookup-number *configdat* "jobtools" "max-server-start-load" default: 3.0))) + ;; we want the remote server to start in *toppath* so push there + (push-directory areapath) + (debug:print 0 *default-log-port* "INFO: Trying to start server (" cmdln ") ...") + (thread-start! log-rotate) + + ;; host.domain.tld match host? + (if (and target-host + ;; look at target host, is it host.domain.tld or ip address and does it + ;; match current ip or hostname + (not (string-match (conc "("curr-host "|" curr-host"\\..*)") target-host)) + (not (equal? curr-ip target-host))) + (begin + (debug:print-info 0 *default-log-port* "Starting server on " target-host ", logfile is " logfile) + (setenv "TARGETHOST" target-host))) + + (setenv "TARGETHOST_LOGF" logfile) + (thread-sleep! (/ (random 5000) 1000)) ;; add about a random (up to 5 seconds) initial delay. It seems pretty common that many running tests request a server at the same time + (common:wait-for-normalized-load load-limit " delaying server start due to load" target-host) ;; do not try starting servers on an already overloaded machine, just wait forever + (system (conc "nbfake " cmdln)) + (unsetenv "TARGETHOST_LOGF") + (if (get-environment-variable "TARGETHOST")(unsetenv "TARGETHOST")) + (thread-join! log-rotate) + (pop-directory))) + +;; kind start up of servers, wait 40 seconds before allowing another server for a given +;; run-id to be launched +#;(define (server:kind-run areapath) + (if (not (server:check-if-running areapath)) ;; why try if there is already a server running? + (let* ((last-run-dat (hash-table-ref/default *server-kind-run* areapath '(0 0))) ;; callnum, whenrun + (call-num (car last-run-dat)) + (when-run (cadr last-run-dat)) + (run-delay (+ (case call-num + ((0) 0) + ((1) 20) + ((2) 300) + (else 600)) + (random 5))) ;; add a small random number just in case a lot of jobs hit the work hosts simultaneously + (lock-file (conc areapath "/logs/server-start.lock"))) + (if (> (- (current-seconds) when-run) run-delay) + (begin + (common:simple-file-lock-and-wait lock-file expire-time: 15) + (server:run areapath) + (thread-sleep! 2) ;; don't release the lock for at least a few seconds + (common:simple-file-release-lock lock-file))) + (hash-table-set! *server-kind-run* areapath (list (+ call-num 1)(current-seconds)))))) + +#;(define server:try-running server:run) ;; there is no more per-run servers ;; REMOVE ME. BUG. + +#;(define (server:start-and-wait areapath #!key (timeout 60)) + (let ((give-up-time (+ (current-seconds) timeout))) + (let loop ((server-url (server:check-if-running areapath)) + (try-num 0)) + (if (or server-url + (> (current-seconds) give-up-time)) ;; server-url will be #f if no server available. + server-url + (let ((num-ok (length (server:get-best (server:get-list areapath))))) + (if (and (> try-num 0) ;; first time through simply wait a little while then try again + (< num-ok 1)) ;; if there are no decent candidates for servers then try starting a new one + (server:kind-run areapath)) + (thread-sleep! 5) + (loop (server:check-if-running areapath) + (+ try-num 1))))))) + +;;====================================================================== +;; make html output +;;====================================================================== + +(define (tests:test-set-toplog! run-id test-name logf) + (rmt:general-call 'tests:test-set-toplog run-id logf run-id test-name)) + +(define (tests:summarize-items run-id test-id test-name force) + ;; if not force then only update the record if one of these is true: + ;; 1. logf is "log/final.log + ;; 2. logf is same as outputfilename + (let* ((outputfilename (conc "megatest-rollup-" test-name ".html")) + (orig-dir (current-directory)) + (logf-info (rmt:test-get-logfile-info run-id test-name)) + (logf (if logf-info (cadr logf-info) #f)) + (path (if logf-info (car logf-info) #f))) + ;; This query finds the path and changes the directory to it for the test + (if (and (string? path) + (directory? path)) ;; can get #f here under some wierd conditions. why, unknown ... + (begin + (debug:print 4 *default-log-port* "Found path: " path) + (change-directory path)) + ;; (set! outputfilename (conc path "/" outputfilename))) + (debug:print-error 0 *default-log-port* "summarize-items for run-id=" run-id ", test-name=" test-name ", no such path: " path)) + (debug:print 4 *default-log-port* "summarize-items with logf " logf ", outputfilename " outputfilename " and force " force) + (if (or (equal? logf "logs/final.log") + (equal? logf outputfilename) + force) + (let ((my-start-time (current-seconds)) + (lockf (conc outputfilename ".lock"))) + (let loop ((have-lock (common:simple-file-lock lockf))) + (if have-lock + (let ((script (configf:lookup *configdat* "testrollup" test-name))) + (print "Obtained lock for " outputfilename) + (rmt:set-state-status-and-roll-up-items run-id test-name "" #f #f #f) + (if script + (system (conc script " > " outputfilename " & ")) + (tests:generate-html-summary-for-iterated-test run-id test-id test-name outputfilename)) + (common:simple-file-release-lock lockf) + (change-directory orig-dir) + ;; NB// tests:test-set-toplog! is remote internal... + (tests:test-set-toplog! run-id test-name outputfilename)) + ;; didn't get the lock, check to see if current update started later than this + ;; update, if so we can exit without doing any work + (if (> my-start-time (handle-exceptions + exn + 0 + (file-modification-time lockf))) + ;; we started since current re-gen in flight, delay a little and try again + (begin + (debug:print-info 1 *default-log-port* "Waiting to update " outputfilename ", another test currently updating it") + (thread-sleep! (+ 5 (random 5))) ;; delay between 5 and 10 seconds + (loop (common:simple-file-lock lockf)))))))))) + +(define (tests:generate-html-summary-for-iterated-test run-id test-id test-name outputfilename) + (let ((counts (make-hash-table)) + (statecounts (make-hash-table)) + (outtxt "") + (tot 0) + (testdat (rmt:test-get-records-for-index-file run-id test-name))) + (with-output-to-file outputfilename + (lambda () + (set! outtxt (conc outtxt "Summary: " test-name + "

Summary for " test-name "

")) + (for-each + (lambda (testrecord) + (let ((id (vector-ref testrecord 0)) + (itempath (vector-ref testrecord 1)) + (state (vector-ref testrecord 2)) + (status (vector-ref testrecord 3)) + (run_duration (vector-ref testrecord 4)) + (logf (vector-ref testrecord 5)) + (comment (vector-ref testrecord 6))) + (hash-table-set! counts status (+ 1 (hash-table-ref/default counts status 0))) + (hash-table-set! statecounts state (+ 1 (hash-table-ref/default statecounts state 0))) + (set! outtxt (conc outtxt "" + ;; " " itempath "" + " " itempath "" + "" state "" + "" status "" + "" (if (equal? comment "") + " " + comment) "" + "")))) + (if (list? testdat) + testdat + (begin + (print "ERROR: failed to get records with rmt:test-get-records-for-index-file run-id=" run-id "test-name=" test-name) + '()))) + + (print "
") + ;; Print out stats for status + (set! tot 0) + (print "") + (for-each (lambda (state) + (set! tot (+ tot (hash-table-ref statecounts state))) + (print "")) + (hash-table-keys statecounts)) + (print "

State stats

" state "" (hash-table-ref statecounts state) "
Total" tot "
") + (print "
") + ;; Print out stats for state + (set! tot 0) + (print "") + (for-each (lambda (status) + (set! tot (+ tot (hash-table-ref counts status))) + (print "")) + (hash-table-keys counts)) + (print "

Status stats

" status + "" (hash-table-ref counts status) "
Total" tot "
") + (print "
") + + (print "" + "" + outtxt "
ItemStateStatusComment
") + ;; (release-dot-lock outputfilename) + ;;(rmt:update-run-stats + ;; run-id + ;; (hash-table-map + ;; state-status-counts + ;; (lambda (key val) + ;; (append key (list val))))) + )))) + +(define tests:css-jscript-block +#< +ul.LinkedList { display: block; } +/* ul.LinkedList ul { display: none; } */ +.HandCursorStyle { cursor: pointer; cursor: hand; } /* For IE */ +th {background-color: #8c8c8c;} +td.test {background-color: #d9dbdd;} +td.PASS {background-color: #347533;} +td.FAIL {background-color: #cc2812;} +td.SKIP{background-color: #FFD733;} +td.WARN {background-color: #EA8724;} +td.WAIVED {background-color: #838A12;} +td.ABORT{background-color: #EA24B7;} +.PASS .link, .SKIP .link, .WARN .link,.WAIVED .link,.ABORT .link, .FAIL .link{color: #FFFFFF;} + + + + + + +EOF +) + +(define tests:css-jscript-block-dynamic +#< +EOF +) + +(define (test:js-block javascript-lib) + (conc "" )) + + +(define tests:css-jscript-block-static (test:js-block *java-script-lib*)) + +(define (tests:css-jscript-block-cond dynamic) + (if (equal? dynamic #t) + tests:css-jscript-block-dynamic + tests:css-jscript-block-static)) + + +(define (tests:run-record->test-path run numkeys) + (append (take (vector->list run) numkeys) + (list (vector-ref run (+ 1 numkeys))))) + + +(define (tests:get-rest-data runs header numkeys) + (let ((resh (make-hash-table))) + (for-each + (lambda (run) + (let* ((run-id (db:get-value-by-header run header "id")) + (run-dir (tests:run-record->test-path run numkeys)) + (test-data (rmt:get-tests-for-run + run-id + "%" ;; testnamepatt + '() ;; states + '() ;; statuses + #f ;; offset + #f ;; num-to-get + #f ;; hide/not-hide + #f ;; sort-by + #f ;; sort-order + #f ;; 'shortlist ;; qrytype + 0 ;; last update + #f))) + + (map (lambda (test) + (let* ((test-name (vector-ref test 2)) + (test-html-path (conc (vector-ref test 10) "/" (vector-ref test 13))) + (test-item (conc test-name ":" (vector-ref test 11))) + (test-status (vector-ref test 4))) + + (if (not (hash-table-ref/default resh test-name #f)) + (hash-table-set! resh test-name (make-hash-table))) + (if (not (hash-table-ref/default (hash-table-ref/default resh test-name #f) test-item #f)) + (hash-table-set! (hash-table-ref/default resh test-name #f) test-item (make-hash-table))) + (hash-table-set! (hash-table-ref/default (hash-table-ref/default resh test-name #f) test-item #f) run-id (list test-status test-html-path)))) + test-data))) + runs) + resh)) + + +;; hash-table tree to html list tree +;; +;; tipfunc takes two parameters: y the tip value and path the path to that point +;; +(define (common:htree->html ht path tipfunc) + (let ((datlist (sort (hash-table->alist ht) + (lambda (a b) + (string< (car a)(car b)))))) + (if (null? datlist) + (tipfunc #f path) ;; really shouldn't get here + (s:ul + (map (lambda (x) + (let* ((levelname (car x)) + (y (cdr x)) + (newpath (append path (list levelname))) + (leaf (or (not (hash-table? y)) + (null? (hash-table-keys y))))) + (if leaf + (s:li (tipfunc y newpath)) + (s:li + (list + levelname + (common:htree->html y newpath tipfunc)))))) + datlist))))) + + +;; tests:genrate dashboard body +;; + +(define (tests:dashboard-body page pg-size keys numkeys total-runs linktree area-name get-prev-links get-next-links flag run-patt target-patt) + (let* ((start (* page pg-size)) + ;(runsdat (rmt:get-runs "%" pg-size start (map (lambda (x)(list x "%")) keys))) + (runsdat (rmt:get-runs-by-patt keys run-patt target-patt start pg-size #f 0 sort-order: "desc")) + ; db:get-runs-by-patt keys runnamepatt targpatt offset limit fields last-update + (header (vector-ref runsdat 0)) + (runs (vector-ref runsdat 1)) + (ctr 0) + (test-runs-hash (tests:get-rest-data runs header numkeys)) + (test-list (hash-table-keys test-runs-hash))) + + (s:html tests:css-jscript-block (tests:css-jscript-block-cond flag) + (s:title "Summary for " area-name) + (s:body 'onload "addEvents();" + (get-prev-links page linktree) + (get-next-links page linktree total-runs) + + (s:h1 "Summary for " area-name) + (s:h3 "Filter" ) + (s:input 'type "text" 'name "testname" 'id "testname" 'length "30" 'onkeyup "filtersome()") + ;; top list + + (s:table 'id "LinkedList1" 'border "1" 'cellspacing 0 + (map (lambda (key) + (let* ((res (s:tr 'class "something" + (s:th key ) + (map (lambda (run) + (s:th (vector-ref run ctr))) + runs)))) + (set! ctr (+ ctr 1)) + res)) + keys) + (s:tr + (s:th "Run Name") + (map (lambda (run) + (s:th (db:get-value-by-header run header "runname"))) + runs)) + + (map (lambda (test-name) + (let* ((item-hash (hash-table-ref/default test-runs-hash test-name #f)) + (item-keys (sort (hash-table-keys item-hash) string<=?))) + (map (lambda (item-name) + (let* ((res (s:tr 'class item-name + (s:td item-name 'class "test" ) + (map (lambda (run) + (let* ((run-test (hash-table-ref/default item-hash item-name #f)) + (run-id (db:get-value-by-header run header "id")) + (result (hash-table-ref/default run-test run-id "n/a")) + ;(relative-path (get-relative-path)) + (status (if (string? result) + result + (car result))) + (link (if (string? result) + result + (if (equal? flag #t) + (s:a (car result) 'href (conc "./test_log?runid=" run-id "&testname=" item-name )) + (s:a (car result) 'href (string-substitute (conc linktree "/") "" (cadr result) "-")))))) + (s:td link 'class status))) + runs)))) + res)) + item-keys))) + test-list)))))) + +;; (tests:create-html-tree "test-index.html") +;; +(define (tests:create-html-tree outf) + (let* ((lockfile (conc outf ".lock")) + (runs-to-process '()) + (linktree (common:get-linktree)) + (area-name (common:get-testsuite-name)) + (keys (rmt:get-keys)) + (numkeys (length keys)) + (run-patt (or (args:get-arg "-run-patt") + (args:get-arg "-runname") + "%")) + (target (or (args:get-arg "-target-patt") + (args:get-arg "-target") + "%")) + (targlist (string-split target "/")) + (numtarg (length targlist)) + (targtweaked (if (> numkeys numtarg) + (append targlist (make-list (- numkeys numtarg) "%")) + targlist)) + (target-patt (string-join targtweaked "/")) + ;(total-runs (rmt:get-num-runs "%")) ;;this needs to be changed to filter by target + (total-runs (rmt:get-runs-cnt-by-patt run-patt target-patt keys )) + (pg-size 10)) + (if (common:simple-file-lock lockfile) + (begin + ;(print total-runs) + (let loop ((page 0)) + (let* ((oup (open-output-file (or outf (conc linktree "/page" page ".html")))) + (get-prev-links (lambda (page linktree ) + (let* ((link (if (not (eq? page 0)) + (s:a "<<prev" 'href (conc "page" (- page 1) ".html")) + (s:a "" 'href (conc "page" page ".html"))))) + link))) + (get-next-links (lambda (page linktree total-runs) + (let* ((link (if (> total-runs (+ 10 (* page pg-size))) + (s:a "next>>" 'href (conc "page" (+ page 1) ".html")) + (s:a "" 'href (conc "page" page ".html"))))) + link))) ) + (print "total runs: " total-runs) + (s:output-new + oup + (tests:dashboard-body page pg-size keys numkeys total-runs linktree area-name get-prev-links get-next-links #f run-patt target-patt)) ;; update this function + (close-output-port oup) + ; (set! page (+ 1 page)) + (if (> total-runs (* (+ 1 page) pg-size)) + (loop (+ 1 page))))) + (common:simple-file-release-lock lockfile)) + + #f))) + + +(define (tests:readlines filename) + (call-with-input-file filename + (lambda (p) + (let loop ((line (read-line p)) + (result '())) + (if (eof-object? line) + (reverse result) + (loop (read-line p) (cons line result))))))) + +(define (tests:get-test-log run-id test-name item-name) + (let* ((test-data (rmt:get-tests-for-run + (string->number run-id) + test-name ;; testnamepatt + '() ;; states + '() ;; statuses + #f ;; offset + #f ;; num-to-get + #f ;; hide/not-hide + #f ;; sort-by + #f ;; sort-order + #f ;; 'shortlist ;; qrytype + 0 ;; last update + #f)) + (path "") + (found 0)) + (debug:print-info 0 *default-log-port* "found: " found ) + + (let loop ((hed (car test-data)) + (tal (cdr test-data))) + (debug:print-info 0 *default-log-port* "item: " (vector-ref hed 11) (vector-ref hed 10) "/" (vector-ref hed 13)) + + (if (equal? (vector-ref hed 11) item-name) + (begin + (set! found 1) + (set! path (conc (vector-ref hed 10) "/" (vector-ref hed 13))))) + (if (and (not (null? tal)) (equal? found 0)) + (loop (car tal)(cdr tal)))) + (if (equal? path "") + "

Data not found

" + (string-join (tests:readlines path) "\n")))) + + +(define (tests:dynamic-dboard page) +;(define (tests:create-html-tree o) + (let* ( +;(page "1") + (linktree (common:get-linktree)) + (area-name (common:get-testsuite-name)) + (keys (rmt:get-keys)) + (numkeys (length keys)) + (targtweaked (make-list numkeys "%")) + (target-patt (string-join targtweaked "/")) + (total-runs (rmt:get-num-runs "%")) + (pg-size 10) + (pg (if (equal? page #f) + 0 + (- (string->number page) 1))) + (get-prev-links (lambda (pg linktree) + (debug:print-info 0 *default-log-port* "val: " (- 1 pg)) + (let* ((link (if (not (eq? pg 0)) + (s:a "<<prev " 'href (conc "dashboard?page=" pg )) + (s:a "" 'href (conc "dashboard?page=" pg))))) + link))) + (get-next-links (lambda (pg linktree total-runs) + (debug:print-info 0 *default-log-port* "val: " pg) + (debug:print-info 0 *default-log-port* "val: " total-runs " size" pg-size) + + (let* ((link (if (> total-runs (+ 10 (* pg pg-size))) + (s:a "next>> " 'href (conc "dashboard?page=" (+ pg 2) )) + (s:a "" 'href (conc "dashboard?page=" pg ))))) + link))) + (html-body (tests:dashboard-body pg pg-size keys numkeys total-runs linktree area-name get-prev-links get-next-links #t "%" target-patt))) ;; update tis function + html-body)) + +(define (tests:create-html-summary outf) + (let* ((lockfile (conc outf ".lock")) + (linktree (common:get-linktree)) + (keys (rmt:get-keys)) + (area-name (common:get-testsuite-name)) + (run-patt (or (args:get-arg "-run-patt") + (args:get-arg "-runname") + "%")) + (target (or (args:get-arg "-target-patt") + (args:get-arg "-target") + "%")) + (targlist (string-split target "/")) + (numkeys (length keys)) + (numtarg (length targlist)) + (targtweaked (if (> numkeys numtarg) + (append targlist (make-list (- numkeys numtarg) "%")) + targlist)) + (target-patt (string-join targtweaked "/"))) + (if (common:simple-file-lock lockfile) + (begin + (let* (;(runsdat1 (rmt:get-runs run-patt #f #f (map (lambda (x)(list x "%")) keys))) + (runsdat (rmt:get-runs-by-patt keys run-patt target-patt #f #f #f 0)) + (runs (vector-ref runsdat 1)) + (header (vector-ref runsdat 0)) + (oup (open-output-file (or outf (conc linktree "/targets.html")))) + (target-hash (test:create-target-hash runs header (length keys)))) + (test:create-target-html target-hash oup area-name linktree) + (test:create-run-html runs area-name linktree (length keys) header)) + (common:simple-file-release-lock lockfile)) + #f))) + +(define (test:get-test-hash test-data) + (let ((resh (make-hash-table))) + (map (lambda (test) + (let* ((test-name (vector-ref test 2)) + (test-html-path (if (file-exists? (conc (vector-ref test 10) "/test-summary.html")) + (conc (vector-ref test 10) "/test-summary.html" ) + (conc (vector-ref test 10) "/" (vector-ref test 13)))) + (test-item (vector-ref test 11)) + (test-status (vector-ref test 4))) + (if (not (hash-table-ref/default resh test-item #f)) + (hash-table-set! resh test-item (make-hash-table))) + (hash-table-set! (hash-table-ref/default resh test-item #f) test-name (list test-status test-html-path)))) + test-data) +resh)) + +(define (test:get-data->b-keys ordered-data a-keys) + (delete-duplicates + (sort (apply + append + (map (lambda (sub-key) + (let ((subdat (hash-table-ref ordered-data sub-key))) + (hash-table-keys subdat))) + a-keys)) + string>=?))) + + +(define (test:create-run-html runs area-name linktree numkeys header) + (map (lambda (run) + (let* ((target (string-join (take (vector->list run) numkeys) "/")) + (run-name (db:get-value-by-header run header "runname")) + (run-time (seconds->work-week/day-time (db:get-value-by-header run header "event_time"))) + (oup (if (file-exists? (conc linktree "/" target "/" run-name)) + (open-output-file (conc linktree "/" target "/" run-name "/run.html")) + #f)) + (run-id (db:get-value-by-header run header "id")) + (test-data (rmt:get-tests-for-run + run-id + "%" ;; testnamepatt + '() ;; states + '() ;; statuses + #f ;; offset + #f ;; num-to-get + #f ;; hide/not-hide + #f ;; sort-by + #f ;; sort-order + #f ;; 'shortlist ;; qrytype + 0 ;; last update + #f)) + (item-test-hash (test:get-test-hash test-data)) + (items (hash-table-keys item-test-hash)) + (test-names (test:get-data->b-keys item-test-hash items))) + (if oup + (begin + (s:output-new + oup + (s:html tests:css-jscript-block (tests:css-jscript-block-cond #f) + (s:title "Runs View " run-name) + (s:body + (s:h1 "Runs View " ) + (s:h3 "Target" target) + (s:p + (s:b "Run name" ) run-name) + (s:p + (s:b "Run Date" ) run-time) + (s:table 'border 1 'cellspacing 0 + (s:tr + (s:th "Items") + (map (lambda (test) + (s:th test)) + test-names)) + (map (lambda (item) + (let* ((test-hash (hash-table-ref/default item-test-hash item #f))) + (if test-hash + (begin + (s:tr + (s:td 'class "test" item) + (map (lambda (test) + (let* ((test-details (hash-table-ref/default test-hash test #f)) + (status (if test-details + (car test-details))) + (link (if test-details + (string-substitute (conc linktree "/" target "/" run-name "/") "" (cadr test-details) "-")))) + (if test-details + (s:td 'class status + (s:a 'class "link" 'href link status )) + (s:td "")))) + test-names)))))) + (sort items string<=?)))))) + (close-output-port oup)) + (debug:print-info 0 "Skip: Dirctory structure " linktree "/" target "/" run-name " does not exist. Megatest will not create run.html")))) +runs)) + +(define (test:create-target-hash runs header numkeys) + (let ((resh (make-hash-table))) + (for-each + (lambda (run) + (let* ((run-name (db:get-value-by-header run header "runname")) + (target (string-join (take (vector->list run) numkeys) "/")) + (run-list (hash-table-ref/default resh target #f))) + + (if (not run-list) + (hash-table-set! resh target (list run-name)) + (hash-table-set! resh target (cons run-name run-list))))) + runs) + resh)) + +(define (test:get-max-run-cnt target-hash targets) + (let* ((cnt 0 )) + (map (lambda (target) + (let* ((runs (hash-table-ref/default target-hash target #f)) + (run-length (if runs + (length runs) + 0))) + + (if (< cnt run-length) + (set! cnt run-length)))) + targets) +cnt)) + +(define (test:pad-runs target-hash targets max-row-length) + (map (lambda (target) + (let loop ((run-list (hash-table-ref/default target-hash target #f))) + (if (< (length run-list) max-row-length) + (begin + (hash-table-set! target-hash target (cons "" run-list)) + (loop (hash-table-ref/default target-hash target #f) ))))) + targets) + target-hash) + +(define (test:create-target-html target-hash oup area-name linktree) + (let* ((targets (hash-table-keys target-hash)) + (max-row-length (test:get-max-run-cnt target-hash targets)) + (pad-runs-hash (test:pad-runs target-hash targets max-row-length))) + (s:output-new + oup + (s:html tests:css-jscript-block (tests:css-jscript-block-cond #f) + + (s:title "Target View " area-name) + (s:body + (s:h1 "Target View " area-name) + (s:table 'id "LinkedList1" 'border "1" 'cellspacing 0 + (s:tr 'class "something" + (s:th "Target") + (s:th 'colspan max-row-length "Runs")) + (let* ((tbl (map (lambda (target) + (s:tr + (s:td 'class "test" target) + (let* ((runs (hash-table-ref/default target-hash target #f)) + (rest-row (map (lambda (run) + (if (equal? run "") + (s:td run) + (if (file-exists?(conc linktree "/" target "/" run )) + (begin + (s:td + (s:a 'href (conc target "/" run "/run.html") run)))))) + (reverse runs)))) + rest-row))) + targets))) + tbl))))) + (close-output-port oup))) + + +(define (tests:create-html-tree-old outf) + (let* ((lockfile (conc outf ".lock")) + (runs-to-process '())) + (if (common:simple-file-lock lockfile) + (let* ((linktree (common:get-linktree)) + (oup (open-output-file (or outf (conc linktree "/runs-index.html")))) + (area-name (common:get-testsuite-name)) + (keys (rmt:get-keys)) + (numkeys (length keys)) + (runsdat (rmt:get-runs "%" #f #f (map (lambda (x)(list x "%")) keys))) + (header (vector-ref runsdat 0)) + (runs (vector-ref runsdat 1)) + (runtreedat (map (lambda (x) + (tests:run-record->test-path x numkeys)) + runs)) + (runs-htree (common:list->htree runtreedat))) + (set! runs-to-process runs) + (s:output-new + oup + (s:html tests:css-jscript-block + (s:title "Summary for " area-name) + (s:body 'onload "addEvents();" + (s:h1 "Summary for " area-name) + ;; top list + (s:ul 'id "LinkedList1" 'class "LinkedList" + (s:li + "Runs" + (common:htree->html runs-htree + '() + (lambda (x p) + (let* ((targ-path (string-intersperse p "/")) + (full-path (conc linktree "/" targ-path)) + (run-name (car (reverse p)))) + (if (and (common:file-exists? full-path) + (directory? full-path) + (file-write-access? full-path)) + (s:a run-name 'href (conc targ-path "/run-summary.html")) + (begin + (debug:print 0 *default-log-port* "INFO: Can't create " targ-path "/run-summary.html") + (conc run-name " (Not able to create summary at " targ-path ")"))))))))))) + (close-output-port oup) + (common:simple-file-release-lock lockfile) + + (for-each + (lambda (run) + (let* ((test-subpath (tests:run-record->test-path run numkeys)) + (run-id (db:get-value-by-header run header "id")) + (run-dir (tests:run-record->test-path run numkeys)) + (test-dats (rmt:get-tests-for-run + run-id + "%/" ;; testnamepatt + '() ;; states + '() ;; statuses + #f ;; offset + #f ;; num-to-get + #f ;; hide/not-hide + #f ;; sort-by + #f ;; sort-order + #f ;; 'shortlist ;; qrytype + 0 ;; last update + #f)) + (tests-tree-dat (map (lambda (test-dat) + ;; (tests:run-record->test-path x numkeys)) + (let* ((test-name (db:test-get-testname test-dat)) + (item-path (db:test-get-item-path test-dat)) + (full-name (db:test-make-full-name test-name item-path)) + (path-parts (string-split full-name))) + path-parts)) + test-dats)) + (tests-htree (common:list->htree tests-tree-dat)) + (html-dir (conc linktree "/" (string-intersperse run-dir "/"))) + (html-path (conc html-dir "/run-summary.html")) + (oup (if (and (common:file-exists? html-dir) + (directory? html-dir) + (file-write-access? html-dir)) + (open-output-file html-path) + #f))) + ;; (print "run-dir: " run-dir ", tests-tree-dat: " tests-tree-dat) + (if oup + (begin + (s:output-new + oup + (s:html tests:css-jscript-block + (s:title "Summary for " area-name) + (s:body 'onload "addEvents();" + (s:h1 "Summary for " (string-intersperse run-dir "/")) + ;; top list + (s:ul 'id "LinkedList1" 'class "LinkedList" + (s:li + "Tests" + (common:htree->html tests-htree + '() + (lambda (x p) + (let* ((targ-path (string-intersperse p "/")) + (test-name (car p)) + (item-path ;; (if (> (length p) 2) ;; test-name + run-name + (string-intersperse p "/")) + (full-targ (conc html-dir "/" targ-path)) + (std-file (conc full-targ "/test-summary.html")) + (alt-file (conc full-targ "/megatest-rollup-" test-name ".html")) + (html-file (if (common:file-exists? alt-file) + alt-file + std-file)) + (run-name (car (reverse p)))) + (if (and (not (common:file-exists? full-targ)) + (directory? full-targ) + (file-write-access? full-targ)) + (tests:summarize-test + run-id + (rmt:get-test-id run-id test-name item-path))) + (if (common:file-exists? full-targ) + (s:a run-name 'href html-file) + (begin + (debug:print 0 *default-log-port* "ERROR: can't access " full-targ) + (conc "No summary for " run-name))))) + )))))) + (close-output-port oup))))) + runs) + #t) + #f))) + + + + +(define (tests:update-central-meta-info run-id test-id cpuload diskfree minutes uname hostname) + (rmt:general-call 'update-test-rundat run-id test-id (current-seconds) (or cpuload -1)(or diskfree -1) -1 (or minutes -1)) + (if (and cpuload diskfree) + (rmt:general-call 'update-cpuload-diskfree run-id cpuload diskfree test-id)) + (if minutes + (rmt:general-call 'update-run-duration run-id minutes test-id)) + (if (and uname hostname) + (rmt:general-call 'update-uname-host run-id uname hostname test-id))) + +;; This one is for running with no db access (i.e. via rmt: internally) +(define (tests:set-full-meta-info db test-id run-id minutes work-area remtries) +;; (define (tests:set-full-meta-info test-id run-id minutes work-area) +;; (let ((remtries 10)) + (let* ((cpuload (get-cpu-load)) + (diskfree (get-df (current-directory))) + (uname (get-uname "-srvpio")) + (hostname (get-host-name))) + (tests:update-central-meta-info run-id test-id cpuload diskfree minutes uname hostname))) + +;; +;; +(define (tests:get-compressed-steps run-id test-id) + (let* ((steps-data (rmt:get-steps-for-test run-id test-id)) ;; 0 1 2 3 4 5 6 7 + (comprsteps (tests:process-steps-table steps-data))) ;; # + (map (lambda (x) + ;; take advantage of the \n on time->string + (vector ;; we are constructing basically the original vector but collapsing start end records + (vector-ref x 0) ;; id 0 + (let ((s (vector-ref x 1))) + (if (number? s)(seconds->time-string s) s)) ;; starttime 1 + (let ((s (vector-ref x 2))) + (if (number? s)(seconds->time-string s) s)) ;; endtime 2 + (vector-ref x 3) ;; status 3 + (vector-ref x 4) ;; duration 4 + (vector-ref x 5) ;; logfile 5 + (vector-ref x 6) ;; comment 6 + (vector-ref x 7))) ;; id 7 + (sort (hash-table-values comprsteps) + (lambda (a b) + (let ((time-a (vector-ref a 1)) + (time-b (vector-ref b 1)) + (id-a (vector-ref a 7)) + (id-b (vector-ref b 7))) + (if (and (number? time-a)(number? time-b)) + (if (< time-a time-b) + #t + (if (eq? time-a time-b) + (< id-a id-b) + ;; (stringwork-week/day-time + (db:test-get-event_time test-dat))) + (s:td "Duration") (s:td (seconds->hr-min-sec (db:test-get-run_duration test-dat))))) + (s:h3 "Log files") + (s:table + 'cellspacing "0" 'border "1" + (s:tr (s:td "Final log")(s:td (s:a 'href logf logf)))) + (s:table + 'cellspacing "0" 'border "1" + (s:tr (s:td "Step Name")(s:td "Start")(s:td "End")(s:td "Status")(s:td "Duration")(s:td "Log File")) + (map (lambda (step-dat) + (s:tr (s:td (tdb:steps-table-get-stepname step-dat)) + (s:td (tdb:steps-table-get-start step-dat)) + (s:td (tdb:steps-table-get-end step-dat)) + (s:td (tdb:steps-table-get-status step-dat)) + (s:td (tdb:steps-table-get-runtime step-dat)) + (s:td (let ((step-log (tdb:steps-table-get-log-file step-dat))) + (s:a 'href step-log step-log))))) + steps-dat)) + ))) + (close-output-port oup))))) + + +;; MUST BE CALLED local! +;; +(define (tests:test-get-paths-matching keynames target fnamepatt #!key (res '())) + ;; BUG: Move the values derived from args to parameters and push to megatest.scm + (let* ((testpatt (or (args:get-arg "-testpatt")(args:get-arg "-testpatt") "%")) + (statepatt (or (args:get-arg "-state") (args:get-arg ":state") "%")) + (statuspatt (or (args:get-arg "-status") (args:get-arg ":status") "%")) + (runname (or (args:get-arg "-runname") (args:get-arg ":runname") "%")) + (paths-from-db (rmt:test-get-paths-matching-keynames-target-new keynames target res + testpatt + statepatt + statuspatt + runname))) + (if fnamepatt + (apply append + (map (lambda (p) + (if (directory-exists? p) + (let ((glob-query (conc p "/" fnamepatt))) + (handle-exceptions + exn + (with-input-from-pipe + (conc "echo " glob-query) + read-lines) ;; we aren't going to try too hard. If glob breaks it is likely because someone tried to do */*/*.log or similar + (glob glob-query))) + '())) + paths-from-db)) + paths-from-db))) + + +;; for each test: +;; +(define (tests:filter-non-runnable run-id testkeynames testrecordshash) + (let ((runnables '())) + (for-each + (lambda (testkeyname) + (let* ((test-record (hash-table-ref testrecordshash testkeyname)) + (test-name (tests:testqueue-get-testname test-record)) + (itemdat (tests:testqueue-get-itemdat test-record)) + (item-path (tests:testqueue-get-item_path test-record)) + (waitons (tests:testqueue-get-waitons test-record)) + (keep-test #t) + (test-id (rmt:get-test-id run-id test-name item-path)) + (tdat (rmt:get-testinfo-state-status run-id test-id))) ;; (cdb:get-test-info-by-id *runremote* test-id))) + (if tdat + (begin + ;; Look at the test state and status + (if (or (and (member (db:test-get-status tdat) + '("PASS" "WARN" "WAIVED" "CHECK" "SKIP")) + (equal? (db:test-get-state tdat) "COMPLETED")) + (member (db:test-get-state tdat) + '("INCOMPLETE" "KILLED"))) + (set! keep-test #f)) + + ;; examine waitons for any fails. If it is FAIL or INCOMPLETE then eliminate this test + ;; from the runnable list + (if keep-test + (for-each (lambda (waiton) + ;; for now we are waiting only on the parent test + (let* ((parent-test-id (rmt:get-test-id run-id waiton "")) + (wtdat (rmt:get-testinfo-state-status run-id test-id))) ;; (cdb:get-test-info-by-id *runremote* test-id))) + (if (or (and (equal? (db:test-get-state wtdat) "COMPLETED") + (member (db:test-get-status wtdat) '("FAIL" "ABORT"))) + (member (db:test-get-status wtdat) '("KILLED")) + (member (db:test-get-state wtdat) '("INCOMPETE"))) + ;; (if (or (member (db:test-get-status wtdat) + ;; '("FAIL" "KILLED")) + ;; (member (db:test-get-state wtdat) + ;; '("INCOMPETE"))) + (set! keep-test #f)))) ;; no point in running this one again + waitons)))) + (if keep-test (set! runnables (cons testkeyname runnables))))) + testkeynames) + runnables)) + +;;====================================================================== +;; test steps +;;====================================================================== + +;; teststep-set-status! used to be here + +(define (test-get-kill-request run-id test-id) ;; run-id test-name itemdat) + (let* ((testdat (rmt:get-test-info-by-id run-id test-id))) + (and testdat + (equal? (test:get-state testdat) "KILLREQ")))) + +(define (test:tdb-get-rundat-count tdb) + (if tdb + (let ((res 0)) + (sqlite3:for-each-row + (lambda (count) + (set! res count)) + tdb + "SELECT count(id) FROM test_rundat;") + res)) + 0) + +;; (define (tests:set-partial-meta-info test-id run-id minutes work-area) +#;(define (tests:set-partial-meta-info test-id run-id minutes work-area remtries) + (let* ((cpuload (get-cpu-load)) + (diskfree (get-df (current-directory))) + (remtries 10)) + (handle-exceptions + exn + (if (> remtries 0) + (begin + (print-call-chain (current-error-port)) + (debug:print-info 0 *default-log-port* "WARNING: failed to set meta info. Will try " remtries " more times") + (set! remtries (- remtries 1)) + (thread-sleep! 10) + (tests:set-full-meta-info db test-id run-id minutes work-area (- remtries 1))) + (let ((err-status ((condition-property-accessor 'sqlite3 'status #f) exn))) + (debug:print-error 0 *default-log-port* "tried for over a minute to update meta info and failed. Giving up") + (debug:print 0 *default-log-port* "EXCEPTION: database probably overloaded or unreadable.") + (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) + (debug:print 5 *default-log-port* "exn=" (condition->list exn)) + (debug:print 0 *default-log-port* " status: " ((condition-property-accessor 'sqlite3 'status) exn)) + (print-call-chain (current-error-port)))) + (tests:update-testdat-meta-info db test-id work-area cpuload diskfree minutes) + ))) + + +;;====================================================================== +;; launch a task - this runs on the originating host, tests themselves +;; +;;====================================================================== + + +;;====================================================================== +;; ezsteps +;;====================================================================== + +;; ezsteps were going to be coded as +;; stepname[,predstep1,predstep2 ...] [{VAR1=first,second,third}] command to execute +;; BUT +;; now are +;; stepname {VAR=first,second,third ...} command ... +;; where the {VAR=first,second,third ...} is optional. + +;; given an exit code and whether or not logpro was used calculate OK/BAD +;; return #t if we are ok, #f otherwise +(define (steprun-good? logpro exitcode) + (or (eq? exitcode 0) + (and logpro (eq? exitcode 2)))) + +;; if handed a string, process it, else look for MT_CMDINFO +(define (launch:get-cmdinfo-assoc-list #!key (encoded-cmd #f)) + (let ((enccmd (if encoded-cmd encoded-cmd (getenv "MT_CMDINFO")))) + (if enccmd + (common:read-encoded-string enccmd) + '()))) + +;; return (conc status ": " comment) from the final section so that +;; the comment can be set in the step record in launch.scm +;; +(define (launch:load-logpro-dat run-id test-id stepname) + (let ((cname (conc stepname ".dat"))) + (if (common:file-exists? cname) + (let* ((dat (configf:read-config cname #f #f)) + (csvr (db:logpro-dat->csv dat stepname)) + (csvt (let-values (((fmt-cell fmt-record fmt-csv) (make-format ","))) + (fmt-csv (map list->csv-record csvr)))) + (status (configf:lookup dat "final" "exit-status")) + (msg (configf:lookup dat "final" "message"))) + (if csvt ;; this if blocked stack dump caused by .dat file from logpro being 0-byte. fixed by upgrading logpro + (rmt:csv->test-data run-id test-id csvt) + (debug:print 0 *default-log-port* "ERROR: no csvdat exists for run-id: " run-id " test-id: " test-id " stepname: " stepname ", check that logpro version is 1.15 or newer")) + ;; (debug:print-info 13 *default-log-port* "Error: run-id/test-id/stepname="run-id"/"test-id"/"stepname" => bad csvr="csvr) + ;; ) + (cond + ((equal? status "PASS") "PASS") ;; skip the message part if status is pass + (status (conc (configf:lookup dat "final" "exit-status") ": " (if msg msg "no message"))) + (else #f))) + #f))) + +(define (launch:runstep ezstep run-id test-id exit-info m tal testconfig) ;;; TODO: deprecate me in favor of ezsteps.scm + (let* ((stepname (car ezstep)) ;; do stuff to run the step + (stepinfo (cadr ezstep)) + ;; (let ((info (cadr ezstep))) + ;; (if (proc? info) "" info))) + ;; (stepproc (let ((info (cadr ezstep))) + ;; (if (proc? info) info #f))) + (stepparts (string-match (regexp "^(\\{([^\\}\\{]*)\\}\\s*|)(.*)$") stepinfo)) + (stepparams (list-ref stepparts 2)) ;; for future use, {VAR=1,2,3}, run step for each + (paramparts (if (string? stepparams) + (map (lambda (x)(string-split x "=")) (string-split-fields "[^;]*=[^;]*" stepparams)) + '())) + (subrun (alist-ref "subrun" paramparts equal?)) + (stepcmd (list-ref stepparts 3)) + (script "") ; "#!/bin/bash\n") ;; yep, we depend on bin/bash FIXME!!!\ + (logpro-file (conc stepname ".logpro")) + (html-file (conc stepname ".html")) + (dat-file (conc stepname ".dat")) + (tconfig-logpro (configf:lookup testconfig "logpro" stepname)) + (logpro-used (common:file-exists? logpro-file))) + + (debug:print 0 *default-log-port* "stepparts: " stepparts ", stepparams: " stepparams + ", paramparts: " paramparts ", subrun: " subrun ", stepcmd: " stepcmd) + + (if (and tconfig-logpro + (not logpro-used)) ;; no logpro file found but have a defn in the testconfig + (begin + (with-output-to-file logpro-file + (lambda () + (print ";; logpro file extracted from testconfig\n" + ";;") + (print tconfig-logpro))) + (set! logpro-used #t))) + + ;; NB// can safely assume we are in test-area directory + (debug:print 4 *default-log-port* "ezsteps:\n stepname: " stepname " stepinfo: " stepinfo " stepparts: " stepparts + " stepparams: " stepparams " stepcmd: " stepcmd) + + ;; ;; first source the previous environment + ;; (let ((prev-env (conc ".ezsteps/" prevstep (if (string-search (regexp "csh") + ;; (get-environment-variable "SHELL")) ".csh" ".sh")))) + ;; (if (and prevstep (common:file-exists? prev-env)) + ;; (set! script (conc script "source " prev-env)))) + + ;; call the command using mt_ezstep + ;; (set! script (conc "mt_ezstep " stepname " " (if prevstep prevstep "x") " " stepcmd)) + + (debug:print 4 *default-log-port* "script: " script) + (rmt:teststep-set-status! run-id test-id stepname "start" "-" #f #f) + ;; now launch the actual process + (call-with-environment-variables + (list (cons "PATH" (conc (get-environment-variable "PATH") ":."))) + (lambda () ;; (process-run "/bin/bash" "-c" "exec ls -l /tmp/foobar > /tmp/delme-more.log 2>&1") + (let* ((cmd (conc stepcmd " > " stepname ".log 2>&1")) ;; >outfile 2>&1 + (pid #f)) + (let ((proc (lambda () + (set! pid (process-run "/bin/bash" (list "-c" cmd)))))) + (if subrun + (begin + (debug:print-info 0 *default-log-port* "Running without MT_.* environment variables.") + (common:without-vars proc "^MT_.*")) + (proc))) + + (with-output-to-file "Makefile.ezsteps" + (lambda () + (print stepname ".log :") + (print "\t" cmd) + (if (common:file-exists? (conc stepname ".logpro")) + (print "\tlogpro " stepname ".logpro " stepname ".html < " stepname ".log")) + (print) + (print stepname " : " stepname ".log") + (print)) + #:append) + + (rmt:test-set-top-process-pid run-id test-id pid) + (let processloop ((i 0)) + (let-values (((pid-val exit-status exit-code)(process-wait pid #t))) + (mutex-lock! m) + (launch:einf-pid-set! exit-info pid) ;; (vector-set! exit-info 0 pid) + (launch:einf-exit-status-set! exit-info exit-status) ;; (vector-set! exit-info 1 exit-status) + (launch:einf-exit-code-set! exit-info exit-code) ;; (vector-set! exit-info 2 exit-code) + (mutex-unlock! m) + (if (eq? pid-val 0) + (begin + (thread-sleep! 2) + (processloop (+ i 1)))) + ))))) + (debug:print-info 0 *default-log-port* "step " stepname " completed with exit code " (launch:einf-exit-code exit-info)) ;; (vector-ref exit-info 2)) + ;; now run logpro if needed + (if logpro-used + (let* ((logpro-exe (or (getenv "LOGPRO_EXE") "logpro")) + (pid (process-run (conc "/bin/sh -c '"logpro-exe" "logpro-file " " (conc stepname ".html") " < " stepname ".log > /dev/null'")))) + (let processloop ((i 0)) + (let-values (((pid-val exit-status exit-code)(process-wait pid #t))) + (mutex-lock! m) + ;; (make-launch:einf pid: pid exit-status: exit-status exit-code: exit-code) + (launch:einf-pid-set! exit-info pid) ;; (vector-set! exit-info 0 pid) + (launch:einf-exit-status-set! exit-info exit-status) ;; (vector-set! exit-info 1 exit-status) + (launch:einf-exit-code-set! exit-info exit-code) ;; (vector-set! exit-info 2 exit-code) + (mutex-unlock! m) + (if (eq? pid-val 0) + (begin + (thread-sleep! 2) + (processloop (+ i 1))))) + (debug:print-info 0 *default-log-port* "logpro for step " stepname " exited with code " (launch:einf-exit-code exit-info))))) ;; (vector-ref exit-info 2))))) + + (let ((exinfo (launch:einf-exit-code exit-info)) ;; (vector-ref exit-info 2)) + (logfna (if logpro-used (conc stepname ".html") "")) + (comment #f)) + (if logpro-used + (let ((datfile (conc stepname ".dat"))) + ;; load the .dat file into the test_data table if it exists + (if (common:file-exists? datfile) + (set! comment (launch:load-logpro-dat run-id test-id stepname))) + (rmt:test-set-log! run-id test-id (conc stepname ".html")))) + (rmt:teststep-set-status! run-id test-id stepname "end" exinfo comment logfna)) + ;; set the test final status + (let* ((process-exit-status (launch:einf-exit-code exit-info)) ;; (vector-ref exit-info 2)) + (this-step-status (cond + ((and (eq? process-exit-status 2) logpro-used) 'warn) ;; logpro 2 = warnings + ((and (eq? process-exit-status 3) logpro-used) 'check) ;; logpro 3 = check + ((and (eq? process-exit-status 4) logpro-used) 'waived) ;; logpro 4 = waived + ((and (eq? process-exit-status 5) logpro-used) 'abort) ;; logpro 5 = abort + ((and (eq? process-exit-status 6) logpro-used) 'skip) ;; logpro 6 = skip + ((eq? process-exit-status 0) 'pass) ;; logpro 0 = pass + (else 'fail))) + (overall-status (cond + ((eq? (launch:einf-rollup-status exit-info) 2) 'warn) ;; rollup-status (vector-ref exit-info 3) + ((eq? (launch:einf-rollup-status exit-info) 0) 'pass) ;; (vector-ref exit-info 3) + (else 'fail))) + (next-status (cond + ((eq? overall-status 'pass) this-step-status) + ((eq? overall-status 'warn) + (if (eq? this-step-status 'fail) 'fail 'warn)) + ((eq? overall-status 'abort) 'abort) + (else 'fail))) + (next-state ;; "RUNNING") ;; WHY WAS THIS CHANGED TO NOT USE (null? tal) ?? + (cond + ((null? tal) ;; more to run? + "COMPLETED") + (else "RUNNING")))) + (debug:print 4 *default-log-port* "Exit value received: " (launch:einf-exit-code exit-info) " logpro-used: " logpro-used + " this-step-status: " this-step-status " overall-status: " overall-status + " next-status: " next-status " rollup-status: " (launch:einf-rollup-status exit-info)) ;; (vector-ref exit-info 3)) + (case next-status + ((warn) + (launch:einf-rollup-status-set! exit-info 2) ;; (vector-set! exit-info 3 2) ;; rollup-status + ;; NB// test-set-status! does rdb calls under the hood + (tests:test-set-status! run-id test-id next-state "WARN" + (if (eq? this-step-status 'warn) "Logpro warning found" #f) + #f)) + ((check) + (launch:einf-rollup-status-set! exit-info 3) ;; (vector-set! exit-info 3 3) ;; rollup-status + ;; NB// test-set-status! does rdb calls under the hood + (tests:test-set-status! run-id test-id next-state "CHECK" + (if (eq? this-step-status 'check) "Logpro check found" #f) + #f)) + ((waived) + (launch:einf-rollup-status-set! exit-info 4) ;; (vector-set! exit-info 3 3) ;; rollup-status + ;; NB// test-set-status! does rdb calls under the hood + (tests:test-set-status! run-id test-id next-state "WAIVED" + (if (eq? this-step-status 'check) "Logpro waived found" #f) + #f)) + ((abort) + (launch:einf-rollup-status-set! exit-info 5) ;; (vector-set! exit-info 3 4) ;; rollup-status + ;; NB// test-set-status! does rdb calls under the hood + (tests:test-set-status! run-id test-id next-state "ABORT" + (if (eq? this-step-status 'abort) "Logpro abort found" #f) + #f)) + ((skip) + (launch:einf-rollup-status-set! exit-info 6) ;; (vector-set! exit-info 3 4) ;; rollup-status + ;; NB// test-set-status! does rdb calls under the hood + (tests:test-set-status! run-id test-id next-state "SKIP" + (if (eq? this-step-status 'skip) "Logpro skip found" #f) + #f)) + ((pass) + (tests:test-set-status! run-id test-id next-state "PASS" #f #f)) + (else ;; 'fail + (launch:einf-rollup-status-set! exit-info 1) ;; (vector-set! exit-info 3 1) ;; force fail, this used to be next-state but that doesn't make sense. should always be "COMPLETED" + (tests:test-set-status! run-id test-id "COMPLETED" "FAIL" (conc "Failed at step " stepname) #f) + ))) + logpro-used)) + +(define (launch:manage-steps run-id test-id item-path fullrunscript ezsteps subrun test-name tconfigreg exit-info m) + ;; (let-values + ;; (((pid exit-status exit-code) + ;; (run-n-wait fullrunscript))) + ;; (tests:test-set-status! test-id "RUNNING" "n/a" #f #f) + ;; Since we should have a clean slate at this time there is no need to do + ;; any of the other stuff that tests:test-set-status! does. Let's just + ;; force RUNNING/n/a + + ;; (thread-sleep! 0.3) + ;; (tests:test-force-state-status! run-id test-id "RUNNING" "n/a") + (rmt:set-state-status-and-roll-up-items run-id test-name item-path "RUNNING" #f #f) + ;; (thread-sleep! 0.3) ;; NFS slowness has caused grief here + + ;; if there is a runscript do it first + (if fullrunscript + (let ((pid (process-run fullrunscript))) + (rmt:test-set-top-process-pid run-id test-id pid) + (let loop ((i 0)) + (let-values + (((pid-val exit-status exit-code) (process-wait pid #t))) + (mutex-lock! m) + (launch:einf-pid-set! exit-info pid) ;; (vector-set! exit-info 0 pid) + (launch:einf-exit-status-set! exit-info exit-status) ;; (vector-set! exit-info 1 exit-status) + (launch:einf-exit-code-set! exit-info exit-code) ;; (vector-set! exit-info 2 exit-code) + (launch:einf-rollup-status-set! exit-info exit-code) ;; (vector-set! exit-info 3 exit-code) ;; rollup status + (mutex-unlock! m) + (if (eq? pid-val 0) + (begin + (thread-sleep! 2) + (loop (+ i 1))) + ))))) + ;; then, if runscript ran ok (or did not get called) + ;; do all the ezsteps (if any) + (if (or ezsteps subrun) + (let* ((test-run-dir (tests:get-test-path-from-environment)) + (testconfig ;; (read-config (conc work-area "/testconfig") #f #t environ-patt: "pre-launch-env-vars")) ;; FIXME??? is allow-system ok here? + ;; NOTE: it is tempting to turn off force-create of testconfig but dynamic + ;; ezstep names need a full re-eval here. + (tests:get-testconfig test-name item-path tconfigreg #t force-create: #t)) ;; 'return-procs))) + (ezstepslst (if (hash-table? testconfig) + (hash-table-ref/default testconfig "ezsteps" '()) + #f))) + (if testconfig + (hash-table-set! *testconfigs* test-name testconfig) ;; cached for lazy reads later ... + (begin + (launch:setup) + (debug:print 0 *default-log-port* "WARNING: no testconfig found for " test-name " in search path:\n " + (string-intersperse (tests:get-tests-search-path *configdat*) "\n ")))) + ;; after all that, still no testconfig? Time to abort + (if (not testconfig) + (begin + (debug:print-error 0 *default-log-port* "Failed to resolve megatest.config, runconfigs.config and testconfig issues. Giving up now") + (exit 1))) + + ;; create a proc for the subrun if requested, save that proc in the ezsteps table as the last entry + ;; 1. get section [runarun] + ;; 2. unset MT_* vars + ;; 3. fix target + ;; 4. fix runname + ;; 5. fix testpatt or calculate it from contour + ;; 6. launch the run + ;; 7. roll up the run result and or roll up the logpro processed result + (when (configf:lookup testconfig "subrun" "runwait") ;; we use runwait as the flag that a subrun is requested + (subrun:initialize-toprun-test testconfig test-run-dir) + (let* ((mt-cmd (subrun:launch-cmd test-run-dir))) + (debug:print-info 0 *default-log-port* "Subrun command is \"" mt-cmd "\"") + (set! ezsteps #t) ;; set the needed flag + (set! ezstepslst + (append (or ezstepslst '()) + (list (list "subrun" (conc "{subrun=true} " mt-cmd))))))) + + ;; process the ezsteps + (if ezsteps + (begin + (if (not (common:file-exists? ".ezsteps"))(create-directory ".ezsteps")) + ;; if ezsteps was defined then we are sure to have at least one step but check anyway + (if (not (> (length ezstepslst) 0)) + (debug:print-error 0 *default-log-port* "ezsteps defined but ezstepslst is zero length") + (let loop ((ezstep (car ezstepslst)) + (tal (cdr ezstepslst)) + (prevstep #f)) + (debug:print-info 0 *default-log-port* "Processing ezstep \"" (string-intersperse ezstep " ") "\"") + ;; check exit-info (vector-ref exit-info 1) + (if (launch:einf-exit-status exit-info) ;; (vector-ref exit-info 1) + (let ((logpro-used (launch:runstep ezstep run-id test-id exit-info m tal testconfig)) + (stepname (car ezstep))) + ;; if logpro-used read in the stepname.dat file + (if (and logpro-used (common:file-exists? (conc stepname ".dat"))) + (launch:load-logpro-dat run-id test-id stepname)) + (if (steprun-good? logpro-used (launch:einf-exit-code exit-info)) + (if (not (null? tal)) + (loop (car tal) (cdr tal) stepname)) + (debug:print 0 *default-log-port* "WARNING: step " (car ezstep) " failed. Stopping"))) + (debug:print 0 *default-log-port* "WARNING: a prior step failed, stopping at " ezstep))))))))) + +(define (launch:monitor-job run-id test-id item-path fullrunscript ezsteps test-name tconfigreg exit-info m work-area runtlim misc-flags) + (let* ((update-period (string->number (or (configf:lookup *configdat* "setup" "test-stats-update-period") "30"))) + (start-seconds (current-seconds)) + (calc-minutes (lambda () + (inexact->exact + (round + (- + (current-seconds) + start-seconds))))) + (kill-tries 0)) + ;; (tests:set-full-meta-info #f test-id run-id (calc-minutes) work-area) + ;; (tests:set-full-meta-info test-id run-id (calc-minutes) work-area) + (tests:set-full-meta-info #f test-id run-id (calc-minutes) work-area 10) + + (let loop ((minutes (calc-minutes)) + (cpu-load (alist-ref 'adj-core-load (common:get-normalized-cpu-load #f))) + (disk-free (get-df (current-directory))) + (last-sync (current-seconds))) + #;(common:telemetry-log "zombie" (conc "launch:monitor-job - top of loop encountered at "(current-seconds)" with last-sync="last-sync)) + (let* ((over-time (> (current-seconds) (+ last-sync update-period))) + (new-cpu-load (let* ((load (alist-ref 'adj-core-load (common:get-normalized-cpu-load #f))) + (delta (abs (- load cpu-load)))) + (if (> delta 0.1) ;; don't bother updating with small changes + load + #f))) + (new-disk-free (let* ((df (if over-time ;; only get df every 30 seconds + (get-df (current-directory)) + disk-free)) + (delta (abs (- df disk-free)))) + (if (and (> df 0) + (> (/ delta df) 0.1)) ;; (> delta 200) ;; ignore changes under 200 Meg + df + #f))) + (do-sync (or new-cpu-load new-disk-free over-time)) + + (test-info (rmt:get-test-info-by-id run-id test-id)) + (state (db:test-get-state test-info)) + (status (db:test-get-status test-info)) + (kill-reason "no kill reason specified") + (kill-job? #f)) + #;(common:telemetry-log "zombie" (conc "launch:monitor-job - decision time encountered at "(current-seconds)" with last-sync="last-sync" do-sync="do-sync" over-time="over-time" update-period="update-period)) + (cond + ((test-get-kill-request run-id test-id) + (set! kill-reason "KILLING TEST since received kill request (KILLREQ)") + (set! kill-job? #t)) + ((and runtlim (> (- (current-seconds) start-seconds) runtlim)) + (set! kill-reason (conc "KILLING TEST DUE TO TIME LIMIT EXCEEDED! Runtime=" (- (current-seconds) start-seconds) " seconds, limit=" runtlim)) + (set! kill-job? #t)) + ((equal? status "DEAD") + (tests:update-central-meta-info run-id test-id new-cpu-load new-disk-free (calc-minutes) #f #f) + (rmt:set-state-status-and-roll-up-items run-id test-id 'foo "RUNNING" "n/a" "was marked dead; really still running.") + ;;(set! kill-reason "KILLING TEST because it was marked as DEAD by launch:handle-zombie-tests (might indicate really overloaded server or else overzealous setup.deadtime)") ;; MARK RUNNING + (set! kill-job? #f))) + + (debug:print 4 *default-log-port* "cpu: " new-cpu-load " disk: " new-disk-free " last-sync: " last-sync " do-sync: " do-sync) + (launch:handle-zombie-tests run-id) + (when do-sync + ;;(with-output-to-file (conc (getenv "MT_TEST_RUN_DIR") "/last-loadinfo.log" #:append) + ;; (lambda () (pp (list (current-seconds) new-cpu-load new-disk-free (calc-minutes))))) + #;(common:telemetry-log "zombie" (conc "launch:monitor-job - dosync started at "(current-seconds))) + (tests:update-central-meta-info run-id test-id new-cpu-load new-disk-free (calc-minutes) #f #f) + #;(common:telemetry-log "zombie" (conc "launch:monitor-job - dosync finished at "(current-seconds)))) + + (if kill-job? + (begin + (debug:print-info 0 *default-log-port* "proceeding to kill test: "kill-reason) + (mutex-lock! m) + ;; NOTE: The pid can change as different steps are run. Do we need handshaking between this + ;; section and the runit section? Or add a loop that tries three times with a 1/4 second + ;; between tries? + (let* ((pid1 (launch:einf-pid exit-info)) ;; (vector-ref exit-info 0)) + (pid2 (rmt:test-get-top-process-pid run-id test-id)) + (pids (delete-duplicates (filter number? (list pid1 pid2))))) + (if (not (null? pids)) + (begin + (for-each + (lambda (pid) + (handle-exceptions + exn + (begin + (debug:print-info 0 *default-log-port* "Unable to kill process with pid " pid ", possibly already killed.") + (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))) + (debug:print 0 *default-log-port* "WARNING: Request received to kill job " pid) ;; " (attempt # " kill-tries ")") + (debug:print-info 0 *default-log-port* "Signal mask=" (signal-mask)) + ;; (if (process:alive? pid) + ;; (begin + (map (lambda (pid-num) + (process-signal pid-num signal/term)) + (process:get-sub-pids pid)) + (thread-sleep! 5) + ;; (if (process:process-alive? pid) + (map (lambda (pid-num) + (handle-exceptions + exn + #f + (process-signal pid-num signal/kill))) + (process:get-sub-pids pid)))) + ;; (debug:print-info 0 *default-log-port* "not killing process " pid " as it is not alive")))) + pids) + ;; BB: question to Matt -- does the tests:test-state-status! encompass rollup to toplevel? If not, should it? + (tests:test-set-status! run-id test-id "KILLED" "KILLED" (conc (args:get-arg "-m")" "kill-reason) #f)) ;; BB ADDED kill-reason -- confirm OK with Matt + (begin + (debug:print-error 0 *default-log-port* "Nothing to kill, pid1=" pid1 ", pid2=" pid2) + (tests:test-set-status! run-id test-id "KILLED" "FAILED TO KILL" (conc (args:get-arg "-m")" "kill-reason) #f) ;; BB ADDED kill-reason -- confirm OK with Matt + ))) + (mutex-unlock! m) + ;; no point in sticking around. Exit now. But run end of run before exiting? + (launch:end-of-run-check run-id) + (exit))) + (if (hash-table-ref/default misc-flags 'keep-going #f) + (begin + (thread-sleep! 3) ;; (+ 3 (random 6))) ;; add some jitter to the call home time to spread out the db accesses + (if (hash-table-ref/default misc-flags 'keep-going #f) ;; keep originals for cpu-load and disk-free unless they change more than the allowed delta + (loop (calc-minutes) + (or new-cpu-load cpu-load) + (or new-disk-free disk-free) + (if do-sync (current-seconds) last-sync))))))) + (tests:update-central-meta-info run-id test-id (get-cpu-load) (get-df (current-directory))(calc-minutes) #f #f))) ;; NOTE: Checking twice for keep-going is intentional + +;; set up needed environment variables given a run-id and optionally a target, itempath etc. +;; +(define (runs:set-megatest-env-vars run-id #!key (inkeys #f)(inrunname #f)(inkeyvals #f)(intarget #f)(testname #f)(itempath #f)) + ;;(bb-check-path msg: "runs:set-megatest-env-vars entry") + (let* ((target (or intarget + (common:args-get-target) + (get-environment-variable "MT_TARGET"))) + (keys (if inkeys inkeys (rmt:get-keys))) + (keyvals (if inkeyvals inkeyvals (keys:target->keyval keys target))) + (vals (hash-table-ref/default *env-vars-by-run-id* run-id #f)) + (link-tree (common:get-linktree))) ;; (configf:lookup *configdat* "setup" "linktree"))) + (if testname (setenv "MT_TEST_NAME" testname)) + (if itempath (setenv "MT_ITEMPATH" itempath)) + + ;; get the info from the db and put it in the cache + (if link-tree + (setenv "MT_LINKTREE" link-tree) + (debug:print-error 0 *default-log-port* "linktree not set, should be set in megatest.config in [setup] section.")) + (if (not vals) + (let ((ht (make-hash-table))) + (hash-table-set! *env-vars-by-run-id* run-id ht) + (set! vals ht) + (for-each + (lambda (key) + (hash-table-set! vals (car key) (cadr key))) + keyvals))) + ;; from the cached data set the vars + + (hash-table-for-each + vals + (lambda (key val) + (debug:print 2 *default-log-port* "setenv " key " " val) + (safe-setenv key val))) + ;;(bb-check-path msg: "runs:set-megatest-env-vars block 1") + ;;(BB> "*env-vars-by-run-id*/runid("run-id" vals="(hash-table->alist vals)) + + (if (not (get-environment-variable "MT_TARGET"))(setenv "MT_TARGET" target)) + ;; we had a case where there was an exception generated by the hash-table-ref + ;; due to *configdat* being #f Adding a handle and exit + (let fatal-loop ((count 0)) + (handle-exceptions + exn + (let ((call-chain (get-call-chain)) + (msg ((condition-property-accessor 'exn 'message) exn))) + (if (< count 5) + (begin ;; this call is colliding, do some crude stuff to fix it. + (debug:print 0 *default-log-port* "ERROR: *configdat* was inaccessible! This should never happen. Retry #" count) + (launch:setup force-reread: #t) + (fatal-loop (+ count 1))) + (begin + (debug:print 0 *default-log-port* "FATAL: *configdat* was inaccessible! This should never happen. Retried " count " times. Message: " msg) + (debug:print 0 *default-log-port* "Call chain:") + (with-output-to-port *default-log-port* + + (lambda () + (print "*configdat* is >>"*configdat*"<<") + (pp *configdat*) + (pp call-chain))) + + (exit 1)))) + ;;(bb-check-path msg: "runs:set-megatest-env-vars block 1.5") + (when (or (not *configdat*) (not (hash-table? *configdat*))) + (debug:print 0 *default-log-port* "WARNING: *configdat* was inaccessible! This should never happen. Brute force reread.") + ;;(BB> "ERROR: *configdat* was inaccessible! This should never happen. Brute force reread.") + (thread-sleep! 2) ;; assuming nfs lag. + (launch:setup force-reread: #t)) + (alist->env-vars (hash-table-ref/default *configdat* "env-override" '())))) ;;;; environment is tainted HERE in this let block. + ;;(bb-check-path msg: "runs:set-megatest-env-vars block 2") + ;; Lets use this as an opportunity to put MT_RUNNAME in the environment + (let ((runname (if inrunname inrunname (rmt:get-run-name-from-id run-id)))) + (if runname + (setenv "MT_RUNNAME" runname) + (debug:print-error 0 *default-log-port* "no value for runname for id " run-id))) + (setenv "MT_RUN_AREA_HOME" *toppath*) + ;; if a testname and itempath are available set the remaining appropriate variables + (if testname (setenv "MT_TEST_NAME" testname)) + (if itempath (setenv "MT_ITEMPATH" itempath)) + ;;(bb-check-path msg: "runs:set-megatest-env-vars block 3") + (if (and testname link-tree) + (setenv "MT_TEST_RUN_DIR" (conc (getenv "MT_LINKTREE") "/" + (getenv "MT_TARGET") "/" + (getenv "MT_RUNNAME") "/" + (getenv "MT_TEST_NAME") + (if (and itempath + (not (equal? itempath ""))) + (conc "/" itempath) + "")))))) + +(define (launch:execute encoded-cmd) + (let* ((cmdinfo (common:read-encoded-string encoded-cmd)) + (tconfigreg #f)) + (setenv "MT_CMDINFO" encoded-cmd) + ;;(bb-check-path msg: "launch:execute incoming") + (if (list? cmdinfo) ;; ((testpath /tmp/mrwellan/jazzmind/src/example_run/tests/sqlitespeed) + ;; (test-name sqlitespeed) (runscript runscript.rb) (db-host localhost) (run-id 1)) + (let* ((testpath (assoc/default 'testpath cmdinfo)) ;; testpath is the test spec area + (top-path (assoc/default 'toppath cmdinfo)) + (work-area (assoc/default 'work-area cmdinfo)) ;; work-area is the test run area + (test-name (assoc/default 'test-name cmdinfo)) + (runscript (assoc/default 'runscript cmdinfo)) + (ezsteps (assoc/default 'ezsteps cmdinfo)) + (subrun (assoc/default 'subrun cmdinfo)) + ;; (runremote (assoc/default 'runremote cmdinfo)) + ;; (transport (assoc/default 'transport cmdinfo)) ;; not used + ;; (serverinf (assoc/default 'serverinf cmdinfo)) + ;; (port (assoc/default 'port cmdinfo)) + (serverurl (assoc/default 'serverurl cmdinfo)) + (homehost (assoc/default 'homehost cmdinfo)) + (run-id (assoc/default 'run-id cmdinfo)) + (test-id (assoc/default 'test-id cmdinfo)) + (target (assoc/default 'target cmdinfo)) + (areaname (assoc/default 'areaname cmdinfo)) + (itemdat (assoc/default 'itemdat cmdinfo)) + (env-ovrd (assoc/default 'env-ovrd cmdinfo)) + (set-vars (assoc/default 'set-vars cmdinfo)) ;; pre-overrides from -setvar + (runname (assoc/default 'runname cmdinfo)) + (megatest (assoc/default 'megatest cmdinfo)) + (runtlim (assoc/default 'runtlim cmdinfo)) + (contour (assoc/default 'contour cmdinfo)) + (item-path (item-list->path itemdat)) + (mt-bindir-path (assoc/default 'mt-bindir-path cmdinfo)) + (keys #f) + (keyvals #f) + (fullrunscript (if (not runscript) + #f + (if (substring-index "/" runscript) + runscript ;; use unadultered if contains slashes + (let ((fulln (conc work-area "/" runscript))) + (if (and (common:file-exists? fulln) + (file-execute-access? fulln)) + fulln + runscript))))) ;; assume it is on the path + (check-work-area (lambda () + ;; NFS might not have propagated the directory meta data to the run host - give it time if needed + (let loop ((count 0)) + (if (or (common:directory-exists? work-area) + (> count 10)) + (change-directory work-area) + (begin + (debug:print 0 *default-log-port* "INFO: Not starting job yet - directory " work-area " not found") + (thread-sleep! 10) + (loop (+ count 1))))) + + (if (not (string=? (common:real-path work-area)(common:real-path (current-directory)))) + (begin + (debug:print 0 *default-log-port* + "INFO: we are expecting to be in directory " work-area "\n" + " but we are actually in the directory " (current-directory) "\n" + " doing another change dir.") + (change-directory work-area))) + + ;; spot check that the files in testpath are available. Too often NFS delays cause problems here. + (let ((files (glob (conc testpath "/*"))) + (bad-files '())) + (for-each + (lambda (fullname) + (let* ((fname (pathname-strip-directory fullname)) + (targn (conc work-area "/" fname))) + (if (not (file-exists? targn)) + (set! bad-files (cons fname bad-files))))) + files) + (if (not (null? bad-files)) + (begin + (debug:print 0 *default-log-port* "INFO: test data from " testpath " not copied properly or filesystem problems causing data to not be found. Re-running the copy command.") + (debug:print 0 *default-log-port* "INFO: missing files from " work-area ": " (string-intersperse bad-files ", ")) + (launch:test-copy testpath work-area)))) + ;; one more time, change to the work-area directory + (change-directory work-area))) + ) ;; let* + + (if contour (setenv "MT_CONTOUR" contour)) + + ;; immediated set some key variables from CMDINFO data, yes, these will be set again below ... + ;; + (setenv "MT_TESTSUITENAME" areaname) + (setenv "MT_RUN_AREA_HOME" top-path) + (set! *toppath* top-path) + (change-directory *toppath*) ;; temporarily switch to the run area home + (setenv "MT_TEST_RUN_DIR" work-area) + + (launch:setup) ;; should be properly in the run area home now + + (if contour (setenv "MT_CONTOUR" contour)) + + ;; immediated set some key variables from CMDINFO data, yes, these will be set again below ... + ;; + (setenv "MT_TESTSUITENAME" areaname) + (setenv "MT_RUN_AREA_HOME" top-path) + (set! *toppath* top-path) + (change-directory *toppath*) ;; temporarily switch to the run area home + (setenv "MT_TEST_RUN_DIR" work-area) + + (launch:setup) ;; should be properly in the run area home now + + (set! tconfigreg (tests:get-all)) ;; mapping of testname => test source path + (let ((sighand (lambda (signum) + ;; (signal-mask! signum) ;; to mask or not? seems to cause issues in exiting + (if (eq? signum signal/stop) + (debug:print-error 0 *default-log-port* "attempt to STOP process. Exiting.")) + (set! *time-to-exit* #t) + (print "Received signal " signum ", cleaning up before exit (set this test to COMPLETED/ABORT) . Please wait...") + (let ((th1 (make-thread (lambda () + (print "set test to COMPLETED/ABORT begin.") + (rmt:test-set-state-status run-id test-id "COMPLETED" "ABORT" "received kill signal") + (print "set test to COMPLETED/ABORT complete.") + (print "Killed by signal " signum ". Exiting") + (exit 1)))) + (th2 (make-thread (lambda () + (thread-sleep! 20) + (debug:print 0 *default-log-port* "Done") + (exit 4))))) + (thread-start! th2) + (thread-start! th1) + (thread-join! th2))))) + (set-signal-handler! signal/int sighand) + (set-signal-handler! signal/term sighand) + ) ;; (set-signal-handler! signal/stop sighand) + + ;; Do not run the test if it is REMOVING, RUNNING, KILLREQ or REMOTEHOSTSTART, + ;; Mark the test as REMOTEHOSTSTART *IMMEDIATELY* + ;; + (let* ((test-info (rmt:get-test-info-by-id run-id test-id)) + (test-host (if test-info + (db:test-get-host test-info) + (begin + (debug:print 0 *default-log-port* "ERROR: failed to find a record for test-id " test-id ", exiting.") + (exit)))) + (test-pid (db:test-get-process_id test-info))) + (cond + ;; -mrw- I'm removing KILLREQ from this list so that a test in KILLREQ state is treated as a "do not run" flag. + ((member (db:test-get-state test-info) '("INCOMPLETE" "KILLED" "UNKNOWN" "STUCK")) ;; prior run of this test didn't complete, go ahead and try to rerun + (debug:print 0 *default-log-port* "INFO: test is INCOMPLETE or KILLED, treat this execute call as a rerun request") + ;; (tests:test-force-state-status! run-id test-id "REMOTEHOSTSTART" "n/a") + + (rmt:general-call 'set-test-start-time #f test-id) + (rmt:test-set-state-status run-id test-id "REMOTEHOSTSTART" "n/a" #f) + ) ;; prime it for running + ((member (db:test-get-state test-info) '("RUNNING" "REMOTEHOSTSTART")) + (if (process:alive-on-host? test-host test-pid) + (debug:print-error 0 *default-log-port* "test state is " (db:test-get-state test-info) " and process " test-pid " is still running on host " test-host ", cannot proceed") + (exit))) + ((not (member (db:test-get-state test-info) '("REMOVING" "REMOTEHOSTSTART" "RUNNING" "KILLREQ"))) + ;; (tests:test-force-state-status! run-id test-id "REMOTEHOSTSTART" "n/a") + (rmt:general-call 'set-test-start-time #f test-id) + (rmt:test-set-state-status run-id test-id "REMOTEHOSTSTART" "n/a" #f) + ) + (else ;; (member (db:test-get-state test-info) '("REMOVING" "REMOTEHOSTSTART" "RUNNING" "KILLREQ")) + (debug:print-error 0 *default-log-port* "test state is " (db:test-get-state test-info) ", cannot proceed") + (exit)))) + + ;; cleanup prior execution's steps + (rmt:delete-steps-for-test! run-id test-id) + + (debug:print 2 *default-log-port* "Executing " test-name " (id: " test-id ") on " (get-host-name)) + (set! keys (rmt:get-keys)) + ;; (runs:set-megatest-env-vars run-id inkeys: keys inkeyvals: keyvals) ;; these may be needed by the launching process + ;; one of these is defunct/redundant ... + (if (not (launch:setup force-reread: #t)) + (begin + (debug:print 0 *default-log-port* "Failed to setup, exiting") + ;; (sqlite3:finalize! db) + ;; (sqlite3:finalize! tdb) + (exit 1))) + ;; validate that the test run area is available + (check-work-area) + + ;; still need to go back to run area home for next couple steps + (change-directory *toppath*) + + ;; NOTE: Current order is to process runconfigs *before* setting the MT_ vars. This + ;; seems non-ideal but could well break stuff + ;; BUG? BUG? BUG? + + (let ((rconfig (full-runconfigs-read)) ;; (read-config (conc *toppath* "/runconfigs.config") #f #t sections: (list "default" target)))) + (wconfig (configf:read-config "waivers.config" #f #t sections: `( "default" ,target )))) ;; read the waivers config if it exists + ;; (setup-env-defaults (conc *toppath* "/runconfigs.config") run-id (make-hash-table) keyvals target) + ;; (set-run-config-vars run-id keyvals target) ;; (db:get-target db run-id)) + ;; Now have runconfigs data loaded, set environment vars + (for-each + (lambda (section) + (for-each + (lambda (varval) + (let ((var (car varval)) + (val (cadr varval))) + (if (and (string? var)(string? val)) + (begin + (safe-setenv var (configf:eval-string-in-environment val))) ;; val) + (debug:print-error 0 *default-log-port* "bad variable spec, " var "=" val)))) + (configf:get-section rconfig section))) + (list "default" target))) + ;;(bb-check-path msg: "launch:execute post block 1") + + ;; NFS might not have propagated the directory meta data to the run host - give it time if needed + (let loop ((count 0)) + (if (or (common:file-exists? work-area) + (> count 10)) + (change-directory work-area) + (begin + (debug:print 0 *default-log-port* "INFO: Not starting job yet - directory " work-area " not found") + (thread-sleep! 10) + (loop (+ count 1))))) + + ;; now we can switch to the work-area? + (change-directory work-area) + ;;(bb-check-path msg: "launch:execute post block 1.5") + ;; (change-directory work-area) + (set! keyvals (keys:target->keyval keys target)) + ;; apply pre-overrides before other variables. The pre-override vars must not + ;; clobbers things from the official sources such as megatest.config and runconfigs.config + (if (string? set-vars) + (let ((varpairs (string-split set-vars ","))) + (debug:print 4 *default-log-port* "varpairs: " varpairs) + (map (lambda (varpair) + (let ((varval (string-split varpair "="))) + (if (eq? (length varval) 2) + (let ((var (car varval)) + (val (cadr varval))) + (debug:print 1 *default-log-port* "Adding pre-var/val " var " = " val " to the environment") + (setenv var val))))) + varpairs))) + ;;(bb-check-path msg: "launch:execute post block 2") + (for-each + (lambda (varval) + (let ((var (car varval)) + (val (cadr varval))) + (if val + (setenv var val) + (begin + (debug:print-error 0 *default-log-port* "required variable " var " does not have a valid value. Exiting") + (exit))))) + (list + (list "MT_TEST_RUN_DIR" work-area) + (list "MT_TEST_NAME" test-name) + (list "MT_ITEM_INFO" (conc itemdat)) + (list "MT_ITEMPATH" item-path) + (list "MT_RUNNAME" runname) + (list "MT_MEGATEST" megatest) + (list "MT_TARGET" target) + (list "MT_LINKTREE" (common:get-linktree)) ;; (configf:lookup *configdat* "setup" "linktree")) + (list "MT_TESTSUITENAME" (common:get-testsuite-name)))) + ;;(bb-check-path msg: "launch:execute post block 3") + + (if mt-bindir-path (setenv "PATH" (conc (getenv "PATH") ":" mt-bindir-path))) + ;;(bb-check-path msg: "launch:execute post block 4") + ;; (change-directory top-path) + ;; Can setup as client for server mode now + ;; (client:setup) + + + ;; environment overrides are done *before* the remaining critical envars. + (alist->env-vars env-ovrd) + ;;(bb-check-path msg: "launch:execute post block 41") + (runs:set-megatest-env-vars run-id inkeys: keys inkeyvals: keyvals) + ;;(bb-check-path msg: "launch:execute post block 42") + (set-item-env-vars itemdat) + ;;(bb-check-path msg: "launch:execute post block 43") + (let ((blacklist (configf:lookup *configdat* "setup" "blacklistvars"))) + (if blacklist + (let ((vars (string-split blacklist))) + (save-environment-as-files "megatest" ignorevars: vars) + (for-each (lambda (var) + (unsetenv var)) + vars)) + (save-environment-as-files "megatest"))) + ;;(bb-check-path msg: "launch:execute post block 44") + ;; open-run-close not needed for test-set-meta-info + ;; (tests:set-full-meta-info #f test-id run-id 0 work-area) + ;; (tests:set-full-meta-info test-id run-id 0 work-area) + (tests:set-full-meta-info #f test-id run-id 0 work-area 10) + + ;; (thread-sleep! 0.3) ;; NFS slowness has caused grief here + + (if (args:get-arg "-xterm") + (set! fullrunscript "xterm") + (if (and fullrunscript + (common:file-exists? fullrunscript) + (not (file-execute-access? fullrunscript))) + (system (conc "chmod ug+x " fullrunscript)))) + + ;; We are about to actually kick off the test + ;; so this is a good place to remove the records for + ;; any previous runs + ;; (db:test-remove-steps db run-id testname itemdat) + ;; now is also a good time to write the .testconfig file + (let* ((tconfig-fname (conc work-area "/.testconfig")) + (tconfig-tmpfile (conc tconfig-fname ".tmp")) + (tconfig (tests:get-testconfig test-name item-path tconfigreg #t force-create: #t))) ;; 'return-procs))) + (configf:write-alist tconfig tconfig-tmpfile) + (file-move tconfig-tmpfile tconfig-fname #t)) + ;; + (let* ((m (make-mutex)) + (kill-job? #f) + (exit-info (make-launch:einf pid: #t exit-status: #t exit-code: #t rollup-status: 0)) ;; pid exit-status exit-code (i.e. process was successfully run) rollup-status + (job-thread #f) + ;; (keep-going #t) + (misc-flags (let ((ht (make-hash-table))) + (hash-table-set! ht 'keep-going #t) + ht)) + (runit (lambda () + (launch:manage-steps run-id test-id item-path fullrunscript ezsteps subrun test-name tconfigreg exit-info m))) + (monitorjob (lambda () + (launch:monitor-job run-id test-id item-path fullrunscript ezsteps test-name tconfigreg exit-info m work-area runtlim misc-flags))) + (th1 (make-thread monitorjob "monitor job")) + (th2 (make-thread runit "run job"))) + (set! job-thread th2) + (thread-start! th1) + (thread-start! th2) + (thread-join! th2) + (debug:print-info 0 *default-log-port* "Megatest exectute of test " test-name ", item path " item-path " complete. Notifying the db ...") + (hash-table-set! misc-flags 'keep-going #f) + (thread-join! th1) + (thread-sleep! 1) ;; givbe thread th1 a chance to be done TODO: Verify this is needed. At 0.1 I was getting fail to stop, increased to total of 1.1 sec. + (mutex-lock! m) + (let* ((item-path (item-list->path itemdat)) + ;; only state and status needed - use lazy routine + (testinfo (rmt:get-testinfo-state-status run-id test-id))) + ;; Am I completed? + (if (member (db:test-get-state testinfo) '("REMOTEHOSTSTART" "RUNNING")) ;; NOTE: It should *not* be REMOTEHOSTSTART but for reasons I don't yet understand it sometimes gets stuck in that state ;; (not (equal? (db:test-get-state testinfo) "COMPLETED")) + (let ((new-state (if kill-job? "KILLED" "COMPLETED") ;; (if (eq? (vector-ref exit-info 2) 0) ;; exited with "good" status + ;; "COMPLETED" ;; (db:test-get-state testinfo))) ;; else preseve the state as set within the test + ) + (new-status (cond + ((not (launch:einf-exit-status exit-info)) "FAIL") ;; job failed to run ... (vector-ref exit-info 1) + ((eq? (launch:einf-rollup-status exit-info) 0) ;; (vector-ref exit-info 3) + ;; if the current status is AUTO then defer to the calculated value (i.e. leave this AUTO) + (if (equal? (db:test-get-status testinfo) "AUTO") "AUTO" "PASS")) + ((eq? (launch:einf-rollup-status exit-info) 1) "FAIL") ;; (vector-ref exit-info 3) + ((eq? (launch:einf-rollup-status exit-info) 2) ;; (vector-ref exit-info 3) + ;; if the current status is AUTO the defer to the calculated value but qualify (i.e. make this AUTO-WARN) + (if (equal? (db:test-get-status testinfo) "AUTO") "AUTO-WARN" "WARN")) + ((eq? (launch:einf-rollup-status exit-info) 3) "CHECK") + ((eq? (launch:einf-rollup-status exit-info) 4) "WAIVED") + ((eq? (launch:einf-rollup-status exit-info) 5) "ABORT") + ((eq? (launch:einf-rollup-status exit-info) 6) "SKIP") + (else "FAIL")))) ;; (db:test-get-status testinfo))) + (debug:print-info 1 *default-log-port* "Test exited in state=" (db:test-get-state testinfo) ", setting state/status based on exit code of " (launch:einf-exit-status exit-info) " and rollup-status of " (launch:einf-rollup-status exit-info)) + (tests:test-set-status! run-id + test-id + new-state + new-status + (args:get-arg "-m") #f) + ;; need to update the top test record if PASS or FAIL and this is a subtest + ;; NO NEED TO CALL set-state-status-and-roll-up-items HERE, THIS IS DONE IN set-state-status-and-roll-up-items called by tests:test-set-status! + )) + ;; for automated creation of the rollup html file this is a good place... + (if (not (equal? item-path "")) + (tests:summarize-items run-id test-id test-name #f)) + (tests:summarize-test run-id test-id) ;; don't force - just update if no + (rmt:update-run-stats run-id (rmt:get-raw-run-stats run-id))) + (mutex-unlock! m) + (launch:end-of-run-check run-id ) + (debug:print 2 *default-log-port* "Output from running " fullrunscript ", pid " (launch:einf-pid exit-info) " in work area " + work-area ":\n====\n exit code " (launch:einf-exit-code exit-info) "\n" "====\n") + (if (not (launch:einf-exit-status exit-info)) + (exit 4)))) + ))) + +(define (launch:is-test-alive host pid) +(if (and host pid (not (equal? host "n/a"))) +(let* ((cmd (conc "ssh " host " pstree -A " pid)) + (output (with-input-from-pipe cmd read-lines))) + (print "cmd: " cmd "\n op: " output ) + (if(eq? (length output) 0) + #f + #t)) +#t)) + +(define (launch:kill-tests-if-dead run-id) + (let* ((running-tests (rmt:get-tests-for-run run-id "%" `("RUNNING" "LAUNCHED" "REMOTEHOSTSTART") `() #f #f #f #f #f #f #f #f))) + (let loop ((running-test (car running-tests)) + (tal (cdr running-tests)) + (kill-cnt 0)) + (let* ((test-name (vector-ref running-test 2)) + (item-path (vector-ref running-test 11)) + (test-id (vector-ref running-test 0)) + (host (vector-ref running-test 6)) + (pid (rmt:test-get-top-process-pid run-id test-id)) + (event-time (vector-ref running-test 5)) + (duration (vector-ref running-test 12)) + (flag 0) + (curr-time (current-seconds))) + (if (and (< (+ event-time duration 600) curr-time) (not (launch:is-test-alive host pid))) ;;test has not updated duration in last 10 min then likely its not running but confirm before marking it as killed + (begin + (debug:print 0 *default-log-port* "test " test-name "/" item-path " needs to be killed") + (set! flag 1) + (rmt:set-state-status-and-roll-up-items run-id test-name item-path "KILLREQ" "n/a" #f))) + (if (not (null? tal)) + (loop (car tal) (cdr tal) (+ kill-cnt flag)) + (+ kill-cnt flag)))))) + +;; DO NOT USE - caching of configs is handled in launch:setup now. +;; +(define (launch:cache-config) + ;; if we have a linktree and -runtests and -target and the directory exists dump the config + ;; to megatest-(current-seconds).cfg and symlink it to megatest.cfg + (if (and *configdat* + (or (args:get-arg "-run") + (args:get-arg "-runtests") + (args:get-arg "-execute"))) + (let* ((linktree (common:get-linktree)) ;; (get-environment-variable "MT_LINKTREE")) + (target (common:args-get-target exit-if-bad: #t)) + (runname (or (args:get-arg "-runname") + (args:get-arg ":runname") + (getenv "MT_RUNNAME"))) + (fulldir (conc linktree "/" + target "/" + runname))) + (if (and linktree (common:file-exists? linktree)) ;; can't proceed without linktree + (begin + (debug:print-info 0 *default-log-port* "Have -run with target=" target ", runname=" runname ", fulldir=" fulldir ", testpatt=" (or (args:get-arg "-testpatt") "%")) + (if (not (common:file-exists? fulldir)) + (create-directory fulldir #t)) ;; need to protect with exception handler + (if (and target + runname + (common:file-exists? fulldir)) + (let ((tmpfile (conc fulldir "/.megatest.cfg." (current-seconds))) + (targfile (conc fulldir "/.megatest.cfg-" megatest-version "-" megatest-fossil-hash)) + (rconfig (conc fulldir "/.runconfig." megatest-version "-" megatest-fossil-hash))) + (if (common:file-exists? rconfig) ;; only cache megatest.config AFTER runconfigs has been cached + (begin + (debug:print-info 0 *default-log-port* "Caching megatest.config in " tmpfile) + (if (not (common:in-running-test?)) + (configf:write-alist *configdat* tmpfile)) + (system (conc "ln -sf " tmpfile " " targfile)))) + ))) + (debug:print-info 1 *default-log-port* "No linktree yet, no caching configs."))))) + + +;; gather available information, if legit read configs in this order: +;; +;; if have cache; +;; read it a return it +;; else +;; megatest.config (do not cache) +;; runconfigs.config (cache if all vars avail) +;; megatest.config (cache if all vars avail) +;; returns: +;; *toppath* +;; side effects: +;; sets; *configdat* (megatest.config info) +;; *runconfigdat* (runconfigs.config info) +;; *configstatus* (status of the read data) +;; +(define (launch:setup #!key (force-reread #f) (areapath #f)) + (mutex-lock! *launch-setup-mutex*) + (if (and *toppath* + (eq? *configstatus* 'fulldata) (not force-reread)) ;; got it all + (begin + (debug:print 2 *default-log-port* "NOTE: skipping launch:setup-body call since we have fulldata") + (mutex-unlock! *launch-setup-mutex*) + *toppath*) + (let ((res (launch:setup-body force-reread: force-reread areapath: areapath))) + (mutex-unlock! *launch-setup-mutex*) + res))) + +;; return paths depending on what info is available. +;; +(define (launch:get-cache-file-paths areapath toppath target mtconfig) + (let* ((use-cache (common:use-cache?)) + (runname (common:args-get-runname)) + (linktree (common:get-linktree)) + (testname (common:get-full-test-name)) + (rundir (if (and runname target linktree) + (common:directory-writable? (conc linktree "/" target "/" runname)) + #f)) + (testdir (if (and rundir testname) + (common:directory-writable? (conc rundir "/" testname)) + #f)) + (cachedir (or testdir rundir)) + (mtcachef (and cachedir (conc cachedir "/" ".megatest.cfg-" megatest-version "-" megatest-fossil-hash))) + (rccachef (and cachedir (conc cachedir "/" ".runconfigs.cfg-" megatest-version "-" megatest-fossil-hash)))) + (debug:print-info 6 *default-log-port* + "runname=" runname + "\n linktree=" linktree + "\n testname=" testname + "\n rundir=" rundir + "\n testdir=" testdir + "\n cachedir=" cachedir + "\n mtcachef=" mtcachef + "\n rccachef=" rccachef) + (cons mtcachef rccachef))) + +(define (launch:setup-body #!key (force-reread #f) (areapath #f)) + (if (and (eq? *configstatus* 'fulldata) + *toppath* + (not force-reread)) ;; no need to reprocess + *toppath* ;; return toppath + (let* ((use-cache (common:use-cache?)) ;; BB- use-cache checks *configdat* for use-cache setting. We do not have *configdat*. Bootstrapping problem here. + (toppath (or *toppath* areapath (getenv "MT_RUN_AREA_HOME"))) ;; preserve toppath + (target (common:args-get-target)) + (sections (if target (list "default" target) #f)) ;; for runconfigs + (mtconfig (or (args:get-arg "-config") "megatest.config")) ;; allow overriding megatest.config + (cachefiles (launch:get-cache-file-paths areapath toppath target mtconfig)) + ;; checking for null cachefiles should not be necessary, I was seeing error car of '(), might be a chicken bug or a red herring ... + (mtcachef (if (null? cachefiles) + #f + (car cachefiles))) ;; (and cachedir (conc cachedir "/" ".megatest.cfg-" megatest-version "-" megatest-fossil-hash))) + (rccachef (if (null? cachefiles) + #f + (cdr cachefiles)))) ;; (and cachedir (conc cachedir "/" ".runconfigs.cfg-" megatest-version "-" megatest-fossil-hash))) + ;; (cancreate (and cachedir (common:file-exists? cachedir)(file-write-access? cachedir) (not (common:in-running-test?))))) + (set! *toppath* toppath) ;; This is needed when we are running as a test using CMDINFO as a datasource + ;;(BB> "launch:setup-body -- cachefiles="cachefiles) + (cond + ;; if mtcachef exists just read it, however we need to assume toppath is available in $MT_RUN_AREA_HOME + ((and (not force-reread) + mtcachef rccachef + use-cache + (get-environment-variable "MT_RUN_AREA_HOME") + (common:file-exists? mtcachef) + (common:file-exists? rccachef)) + ;;(BB> "launch:setup-body -- cond branch 1 - use-cache") + (set! *configdat* (configf:read-alist mtcachef)) + ;;(BB> "launch:setup-body -- 1 set! *configdat*="*configdat*) + (set! *runconfigdat* (configf:read-alist rccachef)) + (set! *configinfo* (list *configdat* (get-environment-variable "MT_RUN_AREA_HOME"))) + (set! *configstatus* 'fulldata) + (set! *toppath* (get-environment-variable "MT_RUN_AREA_HOME")) + *toppath*) + ;; there are no existing cached configs, do full reads of the configs and cache them + ;; we have all the info needed to fully process runconfigs and megatest.config + ((and ;; (not force-reread) ;; force-reread is irrelevant in the AND, could however OR it? + mtcachef + rccachef) ;; BB- why are we doing this without asking if caching is desired? + ;;(BB> "launch:setup-body -- cond branch 2") + (let* ((first-pass (configf:find-and-read-config ;; NB// sets MT_RUN_AREA_HOME as side effect + mtconfig + environ-patt: "env-override" + given-toppath: toppath + pathenvvar: "MT_RUN_AREA_HOME")) + (first-rundat (let ((toppath (if toppath + toppath + (car first-pass)))) + (configf:read-config ;; (conc toppath "/runconfigs.config") ;; this should be converted to runconfig:read but it is non-trivial, leaving it for now. + (conc (if (string? toppath) + toppath + (get-environment-variable "MT_RUN_AREA_HOME")) + "/runconfigs.config") + *runconfigdat* #t + sections: sections)))) + (set! *runconfigdat* first-rundat) + (if first-pass ;; + (begin + ;;(BB> "launch:setup-body -- \"first-pass\"=first-pass") + (set! *configdat* (car first-pass)) + ;;(BB> "launch:setup-body -- 2 set! *configdat*="*configdat*) + (set! *configinfo* first-pass) + (set! *toppath* (or toppath (cadr first-pass))) ;; use the gathered data unless already have it + (set! toppath *toppath*) + (if (not *toppath*) + (begin + (debug:print-error 0 *default-log-port* "you are not in a megatest area!") + (exit 1))) + (setenv "MT_RUN_AREA_HOME" *toppath*) + ;; the seed read is done, now read runconfigs, cache it then read megatest.config one more time and cache it + (let* ((keys (rmt:get-keys)) + (key-vals (keys:target->keyval keys target)) + (linktree (common:get-linktree)) ;; (or (getenv "MT_LINKTREE")(if *configdat* (configf:lookup *configdat* "setup" "linktree") #f))) + ; (if *configdat* + ; (configf:lookup *configdat* "setup" "linktree") + ; (conc *toppath* "/lt")))) + (second-pass (configf:find-and-read-config + mtconfig + environ-patt: "env-override" + given-toppath: toppath + pathenvvar: "MT_RUN_AREA_HOME")) + (runconfigdat (begin ;; this read of the runconfigs will see any adjustments made by re-reading megatest.config + (for-each (lambda (kt) + (setenv (car kt) (cadr kt))) + key-vals) + (configf:read-config (conc toppath "/runconfigs.config") *runconfigdat* #t ;; consider using runconfig:read some day ... + sections: sections))) + (cachefiles (launch:get-cache-file-paths areapath toppath target mtconfig)) + (mtcachef (car cachefiles)) + (rccachef (cdr cachefiles))) + ;; trap exception due to stale NFS handle -- Error: (open-output-file) cannot open file - Stale NFS file handle: "/p/fdk/gwa/lefkowit/mtTesting/qa/primbeqa/links/p1222/11/PDK_r1.1.1/prim/clean/pcell_testgen/.runconfigs.cfg-1.6427-7d1e789cb3f62f9cde719a4865bb51b3c17ea853" - ticket 220546342 + ;; TODO - consider 1) using simple-lock to bracket cache write + ;; 2) cache in hash on server, since need to do rmt: anyway to lock. + + (if rccachef + (common:fail-safe + (lambda () + (configf:write-alist runconfigdat rccachef)) + (conc "Could not write cache file - "rccachef))) + (if mtcachef + (common:fail-safe + (lambda () + (configf:write-alist *configdat* mtcachef)) + (conc "Could not write cache file - "mtcachef))) + (set! *runconfigdat* runconfigdat) + (if (and rccachef mtcachef) (set! *configstatus* 'fulldata)))) + ;; no configs found? should not happen but let's try to recover gracefully, return an empty hash-table + (set! *configdat* (make-hash-table)) + ))) + + ;; else read what you can and set the flag accordingly + ;; here we don't have either mtconfig or rccachef + (else + ;;(BB> "launch:setup-body -- cond branch 3 - else") + (let* ((cfgdat (configf:find-and-read-config + (or (args:get-arg "-config") "megatest.config") + environ-patt: "env-override" + given-toppath: (get-environment-variable "MT_RUN_AREA_HOME") + pathenvvar: "MT_RUN_AREA_HOME"))) + + (if (and cfgdat (list? cfgdat) (> (length cfgdat) 0) (hash-table? (car cfgdat))) + (let* ((toppath (or (get-environment-variable "MT_RUN_AREA_HOME")(cadr cfgdat))) + (rdat (configf:read-config (conc toppath ;; convert this to use runconfig:read! + "/runconfigs.config") *runconfigdat* #t sections: sections))) + (set! *configinfo* cfgdat) + (set! *configdat* (car cfgdat)) + (set! *runconfigdat* rdat) + (set! *toppath* toppath) + (set! *configstatus* 'partial)) + (begin + (debug:print-error 0 *default-log-port* "No " mtconfig " file found. Giving up.") + (exit 2)))))) + ;; COND ends here. + + ;; additional house keeping + (let* ((linktree (or (common:get-linktree) + (conc *toppath* "/lt")))) + (if linktree + (begin + (if (not (common:file-exists? linktree)) + (begin + (handle-exceptions + exn + (begin + (debug:print-error 0 *default-log-port* "Something went wrong when trying to create linktree dir at " linktree) + (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) + (exit 1)) + (create-directory linktree #t)))) + (handle-exceptions + exn + (begin + (debug:print-error 0 *default-log-port* "Something went wrong when trying to create link to linktree at " *toppath*) + (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))) + (let ((tlink (conc *toppath* "/lt"))) + (if (not (common:file-exists? tlink)) + (create-symbolic-link linktree tlink))))) + (begin + (debug:print-error 0 *default-log-port* "linktree not defined in [setup] section of megatest.config") + ))) + (if (and *toppath* + (directory-exists? *toppath*)) + (begin + (setenv "MT_RUN_AREA_HOME" *toppath*) + (setenv "MT_TESTSUITENAME" (common:get-testsuite-name))) + (begin + (debug:print-error 0 *default-log-port* "failed to find the top path to your Megatest area.") + (set! *toppath* #f) ;; force it to be false so we return #f + #f)) + + ;; one more attempt to cache the configs for future reading + (let* ((cachefiles (launch:get-cache-file-paths areapath toppath target mtconfig)) + (mtcachef (car cachefiles)) + (rccachef (cdr cachefiles))) + + ;; trap exception due to stale NFS handle -- Error: (open-output-file) cannot open file - Stale NFS file handle: "...somepath.../.runconfigs.cfg-1.6427-7d1e789cb3f62f9cde719a4865bb51b3c17ea853" - ticket 220546342 + ;; TODO - consider 1) using simple-lock to bracket cache write + ;; 2) cache in hash on server, since need to do rmt: anyway to lock. + (if (and rccachef *runconfigdat* (not (common:file-exists? rccachef))) + (common:fail-safe + (lambda () + (configf:write-alist *runconfigdat* rccachef)) + (conc "Could not write cache file - "rccachef)) + ) + (if (and mtcachef *configdat* (not (common:file-exists? mtcachef))) + (common:fail-safe + (lambda () + (configf:write-alist *configdat* mtcachef)) + (conc "Could not write cache file - "mtcachef)) + ) + (if (and rccachef mtcachef *runconfigdat* *configdat*) + (set! *configstatus* 'fulldata))) + + ;; if have -append-config then read and append here + (let ((cfname (args:get-arg "-append-config"))) + (if (and cfname + (file-read-access? cfname)) + (configf:read-config cfname *configdat* #t))) ;; values are added to the hash, no need to do anything special. + *toppath*))) + +(define (get-best-disk confdat testconfig) + (let* ((disks (or (and testconfig (hash-table-ref/default testconfig "disks" #f)) + (hash-table-ref/default confdat "disks" #f))) + (minspace (let ((m (configf:lookup confdat "setup" "minspace"))) + (string->number (or m "10000"))))) + (if disks + (let ((res (common:get-disk-with-most-free-space disks minspace))) ;; min size of 1000, seems tad dumb + (if res + (cdr res) + (begin +;; (if (common:low-noise-print 20 "No valid disks or no disk with enough space") +;; (debug:print-error 0 *default-log-port* "No valid disks found in megatest.config. Please add some to your [disks] section and ensure the directory exists and has enough space!\n You can change minspace in the [setup] section of megatest.config. Current setting is: " minspace)) + ;;(exit 1) + (if (null? disks) + (cons 1 (conc *toppath* "/runs")) + (let ((paths (sort disks (lambda (x y) (> (string-length (cadr x)) (string-length (cadr y))))))) + (let loop ((head (car paths)) (tail (cdr paths))) + (let ((result (handle-exceptions exn #f (create-directory (cadr head) #t)))) + (if result + result + (if (null? tail) + (cons 1 (conc *toppath* "/runs")) + (loop (car tail) (cdr tail)))))))))))))) ;; the code creates the necessary directories if it does not exist and returns the path. + + +(define (launch:test-copy test-src-path test-path) + (let* ((ovrcmd (let ((cmd (configf:lookup *configdat* "setup" "testcopycmd"))) + (if cmd + ;; substitute the TEST_SRC_PATH and TEST_TARG_PATH + (string-substitute "TEST_TARG_PATH" test-path + (string-substitute "TEST_SRC_PATH" test-src-path cmd #t) #t) + #f))) + (cmd (if ovrcmd + ovrcmd + (conc "rsync -av" (if (debug:debug-mode 1) "" "q") " " test-src-path "/ " test-path "/" + " >> " test-path "/mt_launch.log 2>> " test-path "/mt_launch.log"))) + (status (system cmd))) + (if (not (eq? status 0)) + (debug:print 2 *default-log-port* "ERROR: problem with running \"" cmd "\"")))) + + +;; Desired directory structure: +;; +;; - - -. +;; | +;; v +;; - - -|- +;; +;; dir stored in test is: +;; +;; - - [ - ] +;; +;; All log file links should be stored relative to the top of link path +;; +;; - [ - ] +;; +(define (create-work-area run-id run-info keyvals test-id test-src-path disk-path testname itemdat #!key (remtries 2)) + (let* ((item-path (if (string? itemdat) itemdat (item-list->path itemdat))) ;; if pass in string - just use it + (runname (if (string? run-info) ;; if we pass in a string as run-info use it as run-name. + run-info + (db:get-value-by-header (db:get-rows run-info) + (db:get-header run-info) + "runname"))) + (contour #f) ;; NOT READY FOR THIS (args:get-arg "-contour")) + ;; convert back to db: from rdb: - this is always run at server end + (target (string-intersperse (map cadr keyvals) "/")) + + (not-iterated (equal? "" item-path)) + + ;; all tests are found at /test-base or /test-base + (testtop-base (conc target "/" runname "/" testname)) + (test-base (conc testtop-base (if not-iterated "" "/") item-path)) + + ;; nb// if itempath is not "" then it is prefixed with "/" + (toptest-path (conc disk-path (if contour (conc "/" contour) "") "/" testtop-base)) + (test-path (conc disk-path (if contour (conc "/" contour) "") "/" test-base)) + + ;; ensure this exists first as links to subtests must be created there + (linktree (common:get-linktree)) + ;; WAS: (let ((rd (configf:lookup *configdat* "setup" "linktree"))) + ;; (if rd rd (conc *toppath* "/runs")))) + ;; which seems wrong ... + + (lnkbase (conc linktree (if contour (conc "/" contour) "") "/" target "/" runname)) + (lnkpath (conc lnkbase "/" testname)) + (lnkpathf (conc lnkpath (if not-iterated "" "/") item-path)) + (lnktarget (conc lnkpath "/" item-path))) + + ;; Update the rundir path in the test record for all, rundir=physical, shortdir=logical + ;; rundir shortdir + (rmt:general-call 'test-set-rundir-shortdir run-id lnkpathf test-path testname item-path run-id) + + (debug:print 2 *default-log-port* "INFO:\n lnkbase=" lnkbase "\n lnkpath=" lnkpath "\n toptest-path=" toptest-path "\n test-path=" test-path) + (if (not (common:file-exists? linktree)) + (begin + (debug:print 0 *default-log-port* "WARNING: linktree did not exist! Creating it now at " linktree) + (create-directory linktree #t))) ;; (system (conc "mkdir -p " linktree)))) + ;; create the directory for the tests dir links, this is needed no matter what... try up to three times + (let loop ((done 3)) + (let ((success (if (and (not (common:directory-exists? lnkbase)) + (not (common:file-exists? lnkbase))) + (handle-exceptions + exn + (begin + (debug:print-error 0 *default-log-port* "Problem creating linktree base at " lnkbase) + (print-error-message exn (current-error-port)) + #t) + (create-directory lnkbase #t) + #f)))) + (if (and (not success)(> done 0)) + (loop (- done 1))))) + + ;; update the toptest record with its location rundir, cache the path + ;; This wass highly inefficient, one db write for every subtest, potentially + ;; thousands of unnecessary updates, cache the fact it was set and don't set it + ;; again. + + ;; Now create the link from the test path to the link tree, however + ;; if the test is iterated it is necessary to create the parent path + ;; to the iteration. use pathname-directory to trim the path by one + ;; level + (if (not not-iterated) ;; i.e. iterated + (let ((iterated-parent (pathname-directory (conc lnkpath "/" item-path)))) + (debug:print-info 2 *default-log-port* "Creating iterated parent " iterated-parent) + (handle-exceptions + exn + (begin + (debug:print-error 0 *default-log-port* " Failed to create directory " iterated-parent ((condition-property-accessor 'exn 'message) exn) ", exiting") + (exit 1)) + (create-directory iterated-parent #t)))) + + (if (symbolic-link? lnkpath) + (handle-exceptions + exn + (begin + (debug:print-error 0 *default-log-port* " Failed to remove symlink " lnkpath ((condition-property-accessor 'exn 'message) exn) ", exiting") + (exit 1)) + (delete-file lnkpath))) + + (if (not (or (common:file-exists? lnkpath) + (symbolic-link? lnkpath))) + (handle-exceptions + exn + (begin + (debug:print-error 0 *default-log-port* " Failed to create symlink " lnkpath ((condition-property-accessor 'exn 'message) exn) ", exiting") + (exit 1)) + (create-symbolic-link toptest-path lnkpath))) + + ;; NB - This was not working right - some top tests are not getting the path set!!! + ;; + ;; Do the setting of this record after the paths are created so that the shortdir can + ;; be set to the real directory location. This is safer for future clean up if the link + ;; tree is damaged or lost. + ;; + (if (not (hash-table-ref/default *toptest-paths* testname #f)) + (let* ((testinfo (rmt:get-test-info-by-id run-id test-id)) ;; run-id testname item-path)) + (curr-test-path (if testinfo ;; (filedb:get-path *fdb* + ;; (db:get-path dbstruct + ;; (rmt:sdb-qry 'getstr + (db:test-get-rundir testinfo) ;; ) ;; ) + #f))) + (hash-table-set! *toptest-paths* testname curr-test-path) + ;; NB// Was this for the test or for the parent in an iterated test? + (rmt:general-call 'test-set-rundir-shortdir run-id lnkpath + (if (common:file-exists? lnkpath) + ;; (resolve-pathname lnkpath) + (common:nice-path lnkpath) + lnkpath) + testname "" run-id) + ;; (rmt:general-call 'test-set-rundir run-id lnkpath testname "") ;; toptest-path) + (if (or (not curr-test-path) + (not (directory-exists? toptest-path))) + (begin + (debug:print-info 2 *default-log-port* "Creating " toptest-path " and link " lnkpath) + (handle-exceptions + exn + #f ;; don't care to catch and deal with errors here for now. + (create-directory toptest-path #t)) + (hash-table-set! *toptest-paths* testname toptest-path))))) + + ;; The toptest path has been created, the link to the test in the linktree has + ;; been created. Now, if this is an iterated test the real test dir must be created + (if (not not-iterated) ;; this is an iterated test + (begin ;; (let ((lnktarget (conc lnkpath "/" item-path))) + (debug:print 2 *default-log-port* "Setting up sub test run area") + (debug:print 2 *default-log-port* " - creating run area in " test-path) + (handle-exceptions + exn + (begin + (debug:print-error 0 *default-log-port* " Failed to create directory " test-path ((condition-property-accessor 'exn 'message) exn) ", exiting") + (exit 1)) + (create-directory test-path #t)) + (debug:print 2 *default-log-port* + " - creating link from: " test-path "\n" + " to: " lnktarget) + + ;; If there is already a symlink delete it and recreate it. + (handle-exceptions + exn + (begin + (debug:print-error 0 *default-log-port* " Failed to re-create link " lnktarget ((condition-property-accessor 'exn 'message) exn) ", exiting") + (exit)) + (if (symbolic-link? lnktarget) (delete-file lnktarget)) + (if (not (common:file-exists? lnktarget)) (create-symbolic-link test-path lnktarget))))) + + (if (not (directory? test-path)) + (create-directory test-path #t)) ;; this is a hack, I don't know why out of the blue this path does not exist sometimes + + (if (and test-src-path (directory? test-path)) + (begin + (launch:test-copy test-src-path test-path) + (list lnkpathf lnkpath )) + (if (and test-src-path (> remtries 0)) + (begin + (debug:print-error 0 *default-log-port* "Failed to create work area at " test-path " with link at " lnktarget ", remaining attempts " remtries) + ;; + (create-work-area run-id run-info keyvals test-id test-src-path disk-path testname itemdat remtries: (- remtries 1))) + (list #f #f))))) + + +(define (launch:handle-zombie-tests run-id) + (let* ((key (conc "zombiescan-runid-"run-id)) + (now (current-seconds)) + (threshold (- (current-seconds) (* 2 (or (configf:lookup-number *configdat* "setup" "deadtime") 120)))) + (val (rmt:get-var key)) + (do-scan? + (cond + ((not val) + #t) + ((< val threshold) + #t) + (else #f)))) + (when do-scan? + (debug:print 1 *default-log-port* "INFO: search and mark zombie tests") + (rmt:set-var key (current-seconds)) + (rmt:find-and-mark-incomplete run-id #f)))) + + + + + +;; 1. look though disks list for disk with most space +;; 2. create run dir on disk, path name is meaningful +;; 3. create link from run dir to megatest runs area +;; 4. remotely run the test on allocated host +;; - could be ssh to host from hosts table (update regularly with load) +;; - could be netbatch +;; (launch-test db (cadr status) test-conf)) +(define (launch-test test-id run-id run-info keyvals runname test-conf test-name test-path itemdat params) + (mutex-lock! *launch-setup-mutex*) ;; setting variables and processing the testconfig is NOT thread-safe, reuse the launch-setup mutex + (let* ( ;; (lock-key (conc "test-" test-id)) + ;; (got-lock (let loop ((lock (rmt:no-sync-get-lock lock-key)) + ;; (expire-time (+ (current-seconds) 15))) ;; give up on getting the lock and steal it after 15 seconds + ;; (if (car lock) + ;; #t + ;; (if (> (current-seconds) expire-time) + ;; (begin + ;; (debug:print-info 0 *default-log-port* "Timed out waiting for a lock to launch test " keyvals " " runname " " test-name " " test-path) + ;; (rmt:no-sync-del! lock-key) ;; destroy the lock + ;; (loop (rmt:no-sync-get-lock lock-key) expire-time)) ;; + ;; (begin + ;; (thread-sleep! 1) + ;; (loop (rmt:no-sync-get-lock lock-key) expire-time)))))) + (item-path (item-list->path itemdat)) + (contour #f)) ;; NOT READY FOR THIS (args:get-arg "-contour"))) + (let loop ((delta (- (current-seconds) *last-launch*)) + (launch-delay (configf:lookup-number *configdat* "setup" "launch-delay" default: 1))) + (if (> launch-delay delta) + (begin + (if (common:low-noise-print 1200 "test launch delay") ;; every two hours or so remind the user about launch delay. + (debug:print-info 0 *default-log-port* "NOTE: test launches are delayed by " launch-delay " seconds. See megatest.config launch-delay setting to adjust.")) ;; launch of " test-name " for " (- launch-delay delta) " seconds")) + (thread-sleep! (- launch-delay delta)) + (loop (- (current-seconds) *last-launch*) launch-delay)))) + (change-directory *toppath*) + (alist->env-vars ;; consolidate this code with the code in megatest.scm for "-execute", *maybe* - the longer they are set the longer each launch takes (must be non-overlapping with the vars) + (append + (list + (list "MT_RUN_AREA_HOME" *toppath*) + (list "MT_TEST_NAME" test-name) + (list "MT_RUNNAME" runname) + (list "MT_ITEMPATH" item-path) + (list "MT_CONTOUR" contour) + ) + itemdat)) + (let* ((tregistry (tests:get-all)) ;; third param (below) is system-allowed + ;; for tconfig, why do we allow fallback to test-conf? + (tconfig (or (tests:get-testconfig test-name item-path tregistry #t force-create: #t) + (begin + (debug:print 0 *default-log-port* "WARNING: falling back to pre-calculated testconfig. This is likely not desired.") + test-conf))) ;; force re-read now that all vars are set + (useshell (let ((ush (configf:lookup *configdat* "jobtools" "useshell"))) + (if ush + (if (equal? ush "no") ;; must use "no" to NOT use shell + #f + ush) + #t))) ;; default is yes + (runscript (configf:lookup tconfig "setup" "runscript")) + (ezsteps (> (length (hash-table-ref/default tconfig "ezsteps" '())) 0)) ;; don't send all the steps, could be big, just send a flag + (subrun (> (length (hash-table-ref/default tconfig "subrun" '())) 0)) ;; send a flag to process a subrun + ;; (diskspace (configf:lookup tconfig "requirements" "diskspace")) + ;; (memory (configf:lookup tconfig "requirements" "memory")) + ;; (hosts (configf:lookup *configdat* "jobtools" "workhosts")) ;; I'm pretty sure this was never completed + (remote-megatest (configf:lookup *configdat* "setup" "executable")) + (run-time-limit (or (configf:lookup tconfig "requirements" "runtimelim") + (configf:lookup *configdat* "setup" "runtimelim"))) + ;; FIXME SOMEDAY: not good how this is so obtuse, this hack is to + ;; allow running from dashboard. Extract the path + ;; from the called megatest and convert dashboard + ;; or dboard to megatest + (local-megatest (let* ((lm (car (argv))) + (dir (pathname-directory lm)) + (exe (pathname-strip-directory lm))) + (conc (if dir (conc dir "/") "") + (case (string->symbol exe) + ((dboard) "../megatest") + ((mtest) "../megatest") + ((dashboard) "megatest") + (else exe))))) + (launcher (common:get-launcher *configdat* test-name item-path)) ;; (configf:lookup *configdat* "jobtools" "launcher")) + (test-sig (conc (common:get-testsuite-name) ":" test-name ":" item-path)) ;; (item-list->path itemdat))) ;; test-path is the full path including the item-path + (work-area #f) + (toptest-work-area #f) ;; for iterated tests the top test contains data relevant for all + (diskpath #f) + (cmdparms #f) + (fullcmd #f) ;; (define a (with-output-to-string (lambda ()(write x)))) + (mt-bindir-path #f) + (testinfo (rmt:get-test-info-by-id run-id test-id)) + (mt_target (string-intersperse (map cadr keyvals) "/")) + (debug-param (append (if (args:get-arg "-debug") (list "-debug" (args:get-arg "-debug")) '()) + (if (args:get-arg "-logging")(list "-logging") '())))) + ;; (if hosts (set! hosts (string-split hosts))) + ;; set the megatest to be called on the remote host + (if (not remote-megatest)(set! remote-megatest local-megatest)) ;; "megatest")) + (set! mt-bindir-path (pathname-directory remote-megatest)) + (if launcher (set! launcher (string-split launcher))) + ;; set up the run work area for this test + (if (and (args:get-arg "-preclean") ;; user has requested to preclean for this run + (not (member (db:test-get-rundir testinfo)(list "n/a" "/tmp/badname")))) ;; n/a is a placeholder and thus not a read dir + (begin + (debug:print-info 0 *default-log-port* "attempting to preclean directory " (db:test-get-rundir testinfo) " for test " test-name "/" item-path) + (runs:remove-test-directory testinfo 'remove-data-only))) ;; remove data only, do not perturb the record + + ;; prevent overlapping actions - set to LAUNCHED as early as possible + ;; + ;; the following call handles waiver propogation. cannot yet condense into roll-up-pass-fail + (tests:test-set-status! run-id test-id "LAUNCHED" "n/a" #f #f) ;; (if launch-results launch-results "FAILED")) + (rmt:set-state-status-and-roll-up-items run-id test-name item-path #f "LAUNCHED" #f) + ;; (pp (hash-table->alist tconfig)) + (set! diskpath (get-best-disk *configdat* tconfig)) + (if diskpath + (let ((dat (create-work-area run-id run-info keyvals test-id test-path diskpath test-name itemdat))) + (set! work-area (car dat)) + (set! toptest-work-area (cadr dat)) + (debug:print-info 2 *default-log-port* "Using work area " work-area)) + (begin + (set! work-area (conc test-path "/tmp_run")) + (create-directory work-area #t) + (debug:print 0 *default-log-port* "WARNING: No disk work area specified - running in the test directory under tmp_run"))) + (set! cmdparms (base64:base64-encode + (z3:encode-buffer + (with-output-to-string + (lambda () ;; (list 'hosts hosts) + (write (list (list 'testpath test-path) + ;; (list 'transport (conc *transport-type*)) + ;; (list 'serverinf *server-info*) + (list 'homehost (let* ((hhdat (common:get-homehost))) + (if hhdat + (car hhdat) + #f))) + (list 'serverurl (if *runremote* + (remote-server-url *runremote*) + #f)) ;; + (list 'areaname (common:get-testsuite-name)) + (list 'toppath *toppath*) + (list 'work-area work-area) + (list 'test-name test-name) + (list 'runscript runscript) + (list 'run-id run-id ) + (list 'test-id test-id ) + ;; (list 'item-path item-path ) + (list 'itemdat itemdat ) + (list 'megatest remote-megatest) + (list 'ezsteps ezsteps) + (list 'subrun subrun) + (list 'target mt_target) + (list 'contour contour) + (list 'runtlim (if run-time-limit (common:hms-string->seconds run-time-limit) #f)) + (list 'env-ovrd (hash-table-ref/default *configdat* "env-override" '())) + (list 'set-vars (if params (hash-table-ref/default params "-setvars" #f))) + (list 'runname runname) + (list 'mt-bindir-path mt-bindir-path)))))))) + + ;; clean out step records from previous run if they exist + ;; (rmt:delete-test-step-records run-id test-id) + ;; if the dir does not exist we may have a itempath where individual variables are a path, launch anyway + (if (common:file-exists? work-area) + (change-directory work-area)) ;; so that log files from the launch process don't clutter the test dir + (cond + ;; ((and launcher hosts) ;; must be using ssh hostname + ;; (set! fullcmd (append launcher (car hosts)(list remote-megatest "-m" test-sig "-execute" cmdparms) debug-param))) + ;; (set! fullcmd (append launcher (car hosts)(list remote-megatest test-sig "-execute" cmdparms)))) + (launcher + (set! fullcmd (append launcher (list remote-megatest "-m" test-sig "-execute" cmdparms) debug-param))) + ;; (set! fullcmd (append launcher (list remote-megatest test-sig "-execute" cmdparms)))) + (else + (if (not useshell)(debug:print 0 *default-log-port* "WARNING: internal launching will not work well without \"useshell yes\" in your [jobtools] section")) + (set! fullcmd (append (list remote-megatest "-m" test-sig "-execute" cmdparms) debug-param (list (if useshell "&" "")))))) + ;; (set! fullcmd (list remote-megatest test-sig "-execute" cmdparms (if useshell "&" ""))))) + (if (args:get-arg "-xterm")(set! fullcmd (append fullcmd (list "-xterm")))) + (debug:print 1 *default-log-port* "Launching " work-area) + ;; set pre-launch-env-vars before launching, keep the vars in prevvals and put the envionment back when done + (debug:print 4 *default-log-port* "fullcmd: " fullcmd) + (set! *last-launch* (current-seconds)) ;; all that junk above takes time, set this as late as possible. + (let* ((commonprevvals (alist->env-vars + (hash-table-ref/default *configdat* "env-override" '()))) + (miscprevvals (alist->env-vars ;; consolidate this code with the code in megatest.scm for "-execute" + (append (list (list "MT_TEST_RUN_DIR" work-area) + (list "MT_TEST_NAME" test-name) + (list "MT_ITEM_INFO" (conc itemdat)) + (list "MT_RUNNAME" runname) + (list "MT_TARGET" mt_target) + (list "MT_ITEMPATH" item-path) + ) + itemdat))) + (testprevvals (alist->env-vars + (hash-table-ref/default tconfig "pre-launch-env-overrides" '()))) + ;; Launchwait defaults to true, must override it to turn off wait + (launchwait (if (equal? (configf:lookup *configdat* "setup" "launchwait") "no") #f #t)) + (launch-results-prev (apply (if launchwait ;; BB: TODO: refactor this to examine return code of launcher, if nonzero, set state to launch failed. + process:cmd-run-with-stderr-and-exitcode->list + process-run) + (if useshell + (let ((cmdstr (string-intersperse fullcmd " "))) + (if launchwait + cmdstr + (conc cmdstr " >> mt_launch.log 2>&1 &"))) + (car fullcmd)) + (if useshell + '() + (cdr fullcmd)))) + (success (if launchwait (equal? 0 (cadr launch-results-prev)) #t)) + (launch-results (if launchwait (car launch-results-prev) launch-results-prev))) + (if (not success) + (tests:test-set-status! run-id test-id "COMPLETED" "DEAD" "launcher failed; exited non-zero; check mt_launch.log" #f)) ;; (if launch-results launch-results "FAILED")) + (mutex-unlock! *launch-setup-mutex*) ;; yes, really should mutex all the way to here. Need to put this entire process into a fork. + ;; (rmt:no-sync-del! lock-key) ;; release the lock for starting this test + (if (not launchwait) ;; give the OS a little time to allow the process to start + (thread-sleep! 0.01)) + (with-output-to-file "mt_launch.log" + (lambda () + (print "LAUNCHCMD: " (string-intersperse fullcmd " ")) + (if (list? launch-results) + (apply print launch-results) + (print "NOTE: launched \"" fullcmd "\"\n but did not wait for it to proceed. Add the following to megatest.config \n[setup]\nlaunchwait yes\n if you have problems with this")) + #:append)) + (debug:print 2 *default-log-port* "Launching completed, updating db") + (debug:print 2 *default-log-port* "Launch results: " launch-results) + (if (not launch-results) + (begin + (print "ERROR: Failed to run " (string-intersperse fullcmd " ") ", exiting now") + ;; (sqlite3:finalize! db) + ;; good ole "exit" seems not to work + ;; (_exit 9) + ;; but this hack will work! Thanks go to Alan Post of the Chicken email list + ;; NB// Is this still needed? Should be safe to go back to "exit" now? + (process-signal (current-process-id) signal/kill) + )) + (alist->env-vars miscprevvals) + (alist->env-vars testprevvals) + (alist->env-vars commonprevvals) + launch-results)) + (change-directory *toppath*))) + +;; recover a test where the top controlling mtest may have died +;; +(define (launch:recover-test run-id test-id) + ;; this function is called on the test run host via ssh + ;; + ;; 1. look at the process from pid + ;; - is it owned by calling user + ;; - it it's run directory correct for the test + ;; - is there a controlling mtest (maybe stuck) + ;; 2. if recovery is needed watch pid + ;; - when it exits take the exit code and do the needful + ;; + (let* ((pid (rmt:test-get-top-process-pid run-id test-id)) + (psres (with-input-from-pipe + (conc "ps -F -u " (current-user-name) " | grep -E '" pid " ' | grep -v 'grep -E " pid "'") + (lambda () + (read-line)))) + (rundir (if (string? psres) ;; real process owned by user + (read-symbolic-link (conc "/proc/" pid "/cwd")) + #f))) + ;; now wait on that process if all is correct + ;; periodically update the db with runtime + ;; when the process exits look at the db, if still RUNNING after 10 seconds set + ;; state/status appropriately + (process-wait pid))) + + +;; Do not rpc this one, do the underlying calls!!! +(define (tests:test-set-status! run-id test-id state status comment dat #!key (work-area #f)) + (let* ((real-status status) + (otherdat (if dat dat (make-hash-table))) + (testdat (rmt:get-test-info-by-id run-id test-id)) + (test-name (db:test-get-testname testdat)) + (item-path (db:test-get-item-path testdat)) + ;; before proceeding we must find out if the previous test (where all keys matched except runname) + ;; was WAIVED if this test is FAIL + + ;; NOTES: + ;; 1. Is the call to test:get-previous-run-record remotified? + ;; 2. Add test for testconfig waiver propagation control here + ;; + (prev-test (if (equal? status "FAIL") + (rmt:get-previous-test-run-record run-id test-name item-path) + #f)) + (waived (if prev-test + (if prev-test ;; true if we found a previous test in this run series + (let ((prev-status (db:test-get-status prev-test)) + (prev-state (db:test-get-state prev-test)) + (prev-comment (db:test-get-comment prev-test))) + (debug:print 4 *default-log-port* "prev-status " prev-status ", prev-state " prev-state ", prev-comment " prev-comment) + (if (and (equal? prev-state "COMPLETED") + (equal? prev-status "WAIVED")) + (if comment + comment + prev-comment) ;; waived is either the comment or #f + #f)) + #f) + #f))) + (if (and waived + (tests:check-waiver-eligibility testdat prev-test)) + (set! real-status "WAIVED")) + + (debug:print 4 *default-log-port* "real-status " real-status ", waived " waived ", status " status) + + ;; update the primary record IF state AND status are defined + (if (and state status) + (begin + (rmt:set-state-status-and-roll-up-items run-id test-id item-path state real-status (if waived waived comment)) + ;; (mt:process-triggers run-id test-id state real-status) ;; triggers are called in test-set-state-status + )) + + ;; if status is "AUTO" then call rollup (note, this one modifies data in test + ;; run area, it does remote calls under the hood. + ;; (if (and test-id state status (equal? status "AUTO")) + ;; (rmt:test-data-rollup run-id test-id status)) + + ;; add metadata (need to do this way to avoid SQL injection issues) + + ;; :first_err + ;; (let ((val (hash-table-ref/default otherdat ":first_err" #f))) + ;; (if val + ;; (sqlite3:execute db "UPDATE tests SET first_err=? WHERE run_id=? AND testname=? AND item_path=?;" val run-id test-name item-path))) + ;; + ;; ;; :first_warn + ;; (let ((val (hash-table-ref/default otherdat ":first_warn" #f))) + ;; (if val + ;; (sqlite3:execute db "UPDATE tests SET first_warn=? WHERE run_id=? AND testname=? AND item_path=?;" val run-id test-name item-path))) + + (let ((category (hash-table-ref/default otherdat ":category" "")) + (variable (hash-table-ref/default otherdat ":variable" "")) + (value (hash-table-ref/default otherdat ":value" #f)) + (expected (hash-table-ref/default otherdat ":expected" "n/a")) + (tol (hash-table-ref/default otherdat ":tol" "n/a")) + (units (hash-table-ref/default otherdat ":units" "")) + (type (hash-table-ref/default otherdat ":type" "")) + (dcomment (hash-table-ref/default otherdat ":comment" ""))) + (debug:print 4 *default-log-port* + "category: " category ", variable: " variable ", value: " value + ", expected: " expected ", tol: " tol ", units: " units) + (if (and value) ;; require only value; BB was- all three required + (let ((dat (conc category "," + variable "," + value "," + expected "," + tol "," + units "," + dcomment ",," ;; extra comma for status + type ))) + ;; This was run remote, don't think that makes sense. Perhaps not, but that is the easiest path for the moment. + (rmt:csv->test-data run-id test-id + dat) + ;; This was added in check-in a5adfa3f9a. Message was: "...added delay in set-values to allow for delayed write on server start" + ;; I'm inserting an arbitrary rmt: call to force/ensure that the server is available to (hopefully) prevent a communication issue. + (rmt:get-var "MEGATEST_VERSION") ;; this does NOTHING but ensure the server is reachable. This is almost certainly NOT needed :) + ;; BB - commentiong out arbitrary 10 second wait (thread-sleep! 10) ;; add 10 second delay before quit incase rmt needs time to start a server. + ))) + + ;; need to update the top test record if PASS or FAIL and this is a subtest + ;;;;;; (if (not (equal? item-path "")) + ;;;;;; (rmt:set-state-status-and-roll-up-items run-id test-name item-path state status #f) ;;;;;) + + (if (or (and (string? comment) + (string-match (regexp "\\S+") comment)) + waived) + (let ((cmt (if waived waived comment))) + (rmt:general-call 'set-test-comment run-id cmt test-id))))) + +;;====================================================================== +;; read a config file, loading only the section pertinent +;; to this run field1val/field2val/field3val ... +;;====================================================================== + +(define (runconfig:read fname target environ-patt) + (let ((ht (make-hash-table))) + (if target (hash-table-set! ht target '())) + (configf:read-config fname ht #t environ-patt: environ-patt sections: (if target (list "default" target) #f)))) + +;; NB// to process a runconfig ensure to use environ-patt with target! +;; +(define (setup-env-defaults fname run-id already-seen keyvals #!key (environ-patt #f)(change-env #t)) + (let* ((keys (map car keyvals)) + (thekey (if keyvals + (string-intersperse (map (lambda (x)(if x x "-na-")) (map cadr keyvals)) "/") + (or (common:args-get-target) + (get-environment-variable "MT_TARGET") + (begin + (debug:print-error 0 *default-log-port* "setup-env-defaults called with no run-id or -target or -reqtarg") + "nothing matches this I hope")))) + ;; Why was system disallowed in the reading of the runconfigs file? + ;; NOTE: Should be setting env vars based on (target|default) + (confdat (runconfig:read fname thekey environ-patt)) + (whatfound (make-hash-table)) + (finaldat (make-hash-table)) + (sections (list "default" thekey))) + (if (not *target*)(set! *target* thekey)) ;; may save a db access or two but repeats db:get-target code + (debug:print 4 *default-log-port* "Using key=\"" thekey "\"") + + (if change-env + (for-each ;; NB// This can be simplified with new content of keyvals having all that is needed. + (lambda (keyval) + (safe-setenv (car keyval)(cadr keyval))) + keyvals)) + + (for-each + (lambda (section) + (let ((section-dat (hash-table-ref/default confdat section #f))) + (if section-dat + (for-each + (lambda (envvar) + (let ((val (cadr (assoc envvar section-dat)))) + (hash-table-set! whatfound section (+ (hash-table-ref/default whatfound section 0) 1)) + (if (and (string? envvar) + (string? val) + change-env) + (safe-setenv envvar val)) + (hash-table-set! finaldat envvar val))) + (map car section-dat))))) + sections) + (if already-seen + (begin + (debug:print 2 *default-log-port* "Key settings found in runconfigs.config:") + (for-each (lambda (fullkey) + (debug:print 2 *default-log-port* (format #f "~20a ~a\n" fullkey (hash-table-ref/default whatfound fullkey 0)))) + sections) + (debug:print 2 *default-log-port* "---") + (set! *already-seen-runconfig-info* #t))) + ;; finaldat ;; was returning this "finaldat" which would be good but conflicts with other uses + confdat + )) + +(define (set-run-config-vars run-id keyvals targ-from-db) + (push-directory *toppath*) ;; the push/pop doesn't appear to do anything ... + (let ((runconfigf (conc *toppath* "/runconfigs.config")) + (targ (or (common:args-get-target) + targ-from-db + (get-environment-variable "MT_TARGET")))) + (pop-directory) + (if (common:file-exists? runconfigf) + (setup-env-defaults runconfigf run-id #t keyvals + environ-patt: (conc "(default" + (if targ + (conc "|" targ ")") + ")"))) + (debug:print 0 *default-log-port* "WARNING: You do not have a run config file: " runconfigf)))) + +;; given (a (b c) d) return ((a b d)(a c d)) +;; NOTE: this feels like it has been done before - perhaps with items handling? +;; +(define (runconfig:combinations inlst) + (let loop ((hed (car inlst)) + (tal (cdr inlst)) + (res '())) + ;; (print "res: " res " hed: " hed) + (if (list? hed) + (let ((newres (if (null? res) ;; first time through convert incoming items to list of items + (map list hed) + (apply append + (map (lambda (r) ;; iterate over items in res + (map (lambda (h) ;; iterate over items in hed + (append r (list h))) + hed)) + res))))) + ;; (print "newres1: " newres) + (if (null? tal) + newres + (loop (car tal)(cdr tal) newres))) + (let ((newres (if (null? res) + (list (list hed)) + (map (lambda (r) + (append r (list hed))) + res)))) + ;; (print "newres2: " newres) + (if (null? tal) + newres + (loop (car tal)(cdr tal) newres)))))) + +;; multi-part expand +;; Given a/b,c,d/e,f return a/b/e a/b/f a/c/e a/c/f a/d/e a/d/f +;; +(define (runconfig:expand target) + (let* ((parts (map (lambda (x) + (string-split x ",")) + (string-split target "/")))) + (map (lambda (x) + (string-intersperse x "/")) + (runconfig:combinations parts)))) + +;; multi-target expansion +;; a/b/c/x,y,z a/b/d/x,y => a/b/c/x a/b/c/y a/b/c/z a/b/d/x a/b/d/y +;; +(define (runconfig:expand-target target-strs) + (delete-duplicates + (apply append (map runconfig:expand (string-split target-strs " "))))) + +#| + (if (null? target-strs) + '() + (let loop ((hed (car target-strs)) + (tal (cdr target-strs)) + (res '())) + ;; first break all parts into individual target patterns + (if (string-index hed " ") ;; this is a multi-target target + (let ((newres (append (string-split hed " ") res))) + (runconfig:expand-target newres)) + (if (string-index hed ",") ;; this is a multi-target where one or more parts are comma separated + +|# + +;; cache the runconfigs in $MT_LINKTREE/$MT_TARGET/$MT_RUNNAME/.runconfig +;; +(define (full-runconfigs-read) +;; in the envprocessing branch the below code replaces the further below code +;; (if (eq? *configstatus* 'fulldata) +;; *runconfigdat* +;; (begin +;; (launch:setup) +;; *runconfigdat*))) + + (let* ((rundir (if (and (getenv "MT_LINKTREE")(getenv "MT_TARGET")(getenv "MT_RUNNAME")) + (conc (getenv "MT_LINKTREE") "/" (getenv "MT_TARGET") "/" (getenv "MT_RUNNAME")) + #f)) + (cfgf (if rundir (conc rundir "/.runconfig." megatest-version "-" megatest-fossil-hash) #f))) + (if (and cfgf + (common:file-exists? cfgf) + (file-write-access? cfgf) + (common:use-cache?)) + (configf:read-alist cfgf) + (let* ((keys (rmt:get-keys)) + (target (common:args-get-target)) + (key-vals (if target (keys:target->keyval keys target) #f)) + (sections (if target (list "default" target) #f)) + (data (begin + (setenv "MT_RUN_AREA_HOME" *toppath*) + (if key-vals + (for-each (lambda (kt) + (setenv (car kt) (cadr kt))) + key-vals)) + ;; (read-config (conc *toppath* "/runconfigs.config") #f #t sections: sections)))) + (runconfig:read (conc *toppath* "/runconfigs.config") target #f)))) + (if (and rundir ;; have all needed variabless + (directory-exists? rundir) + (file-write-access? rundir)) + (begin + (if (not (common:in-running-test?)) + (configf:write-alist data cfgf)) + ;; force re-read of megatest.config - this resolves circular references between megatest.config + (launch:setup force-reread: #t) + ;; (launch:cache-config) ;; there are two independent config cache locations, turning this one off for now. MRW. + )) ;; we can safely cache megatest.config since we have a valid runconfig + data)))) + + +;;====================================================================== +;; +;;====================================================================== + +;; NOT CURRENTLY USED - commented out as it has unresolved dependencies +;; +#;(define (archive:main linktree target runname testname itempath options) + (let ((testdir (conc linktree "/" target "/" runname "/" testname "/" itempatt)) + (flavor 'plain) ;; type of machine to run jobs on + (maxload 1.5) ;; max allowed load for this work + (adisks (archive:get-archive-disks))) + ;; get testdir size + ;; - hand off du to job mgr + (if (and (common:file-exists? testdir) + (file-is-writable? testdir)) + (let* ((dused (jobrunner:run-job + flavor ;; machine type + maxload ;; max allowed load + '() ;; prevars - environment vars to set for the job + common:get-disk-space-used ;; if a proc call it, if a string it is a unix command + (list testdir))) + (apath (archive:get-archive testname itempath dused))) + (jobrunner:run-job + flavor + maxload + '() + archive:run-bup + (list testdir apath)))))) + +;; Get archive disks from megatest.config +;; +(define (archive:get-archive-disks) + (let ((section (configf:get-section *configdat* "archive-disks"))) + (if section + section + '()))) + +;; look for the best candidate archive area, else create new +;; area +;; +(define (archive:get-archive testname itempath dused) + ;; look up in archive_allocations if there is a pre-used archive + ;; with adequate diskspace + ;; + (let* ((existing-blocks (rmt:archive-get-allocations testname itempath dused)) + (candidate-disks (map (lambda (block) + (list + (vector-ref block 1) ;; archive-area-name + (vector-ref block 2))) ;; disk-path + existing-blocks))) + (or (common:get-disk-with-most-free-space candidate-disks dused) + (archive:allocate-new-archive-block #f #f #f)))) ;; BROKEN. testname itempath)))) + +;; allocate a new archive area +;; +(define (archive:allocate-new-archive-block blockid-cache run-area-home testsuite-name dneeded target run-name test-name) + (let ((key (conc testsuite-name "/" target "/" run-name "/" test-name))) + (if (hash-table-exists? blockid-cache key) + (hash-table-ref blockid-cache key) + (let* ((pscript (configf:lookup *configdat* "archive" "pathscript")) + (pscript-cmd (conc pscript " " testsuite-name " " target " " run-name " " test-name)) + (apath (if pscript + (handle-exceptions + exn + (begin + (debug:print 0 "ERROR: script \"" pscript-cmd "\" failed to run properly.") + (exit 1)) + (with-input-from-pipe + pscript-cmd + read-line)) + #f)) ;; this is the user-calculated archive path + (adisks (archive:get-archive-disks)) + (best-disk (common:get-disk-with-most-free-space adisks dneeded))) + (if best-disk + (let* ((bdisk-name (car best-disk)) + (bdisk-path (cdr best-disk)) + (area-key (substring (message-digest-string (md5-primitive) run-area-home) 0 5)) + (bdisk-id (rmt:archive-register-disk bdisk-name bdisk-path (get-df bdisk-path))) + (archive-name (if apath + apath + (let ((sec (current-seconds))) + (conc (time->string (seconds->local-time sec) "%Y") + "_q" (seconds->quarter sec) "/" + testsuite-name "_" area-key)))) + (archive-path (conc bdisk-path "/" archive-name)) + (block-id (rmt:archive-register-block-name bdisk-id archive-path))) + ;; (allocation-id (rmt:archive-allocate-testsuite/area-to-block block-id testsuite-name area-key))) + (if block-id ;; (and block-id allocation-id) + (let ((res (cons block-id archive-path))) + (hash-table-set! blockid-cache key res) + res) + #f)) + #f)) ;; no best disk found + ))) + +;; archive - run bup +;; +;; 1. create the bup dir if not exists +;; 2. start the du of each directory +;; 3. gen index +;; 4. save +;; +(define (archive:run-bup archive-command run-id run-name tests rp-mutex bup-mutex) + ;; move the getting of archive space down into the below block so that a single run can + ;; allocate as needed should a disk fill up + ;; + (let* ((blockid-cache (make-hash-table)) + (tsname (common:get-testsuite-name)) + (min-space (string->number (or (configf:lookup *configdat* "archive" "minspace") "1000"))) + (arch-groups (make-hash-table)) ;; archive groups, each corrosponds to a bup area + (disk-groups (make-hash-table)) ;; + (test-groups (make-hash-table)) ;; these two (disk and test groups) could be combined nicely + (test-dirs (make-hash-table)) + (bup-exe (or (configf:lookup *configdat* "archive" "bup") "bup")) + (compress (or (configf:lookup *configdat* "archive" "compress") "9")) + (linktree (common:get-linktree)) ;; (configf:lookup *configdat* "setup" "linktree"))) + (archiver (let ((s (configf:lookup *configdat* "archive" "archiver"))) + (if s (string->symbol s) 'bup))) + (archiver-cmd (case archiver + ((tar) "tar cfj ARCHIVE_NAME.tar.bz2 ") + ((7z) " 7z u -t7z -m0=lzma -mx=9 -mfb=64 -md=32m -ms=on ARCHIVE_NAME.7z ") + (else #f))) + (print-prefix "Running: ") ;; change to #f to turn off printing + (preclean-spec (configf:get-section *configdat* "archive-preclean"))) + + ;; (tests:match patt testname itempath) + + ;; from the test info bin the path to the test by stem + ;; + (for-each + (lambda (test-dat) + (let* ((item-path (db:test-get-item-path test-dat)) + (test-name (db:test-get-testname test-dat)) + (test-id (db:test-get-id test-dat)) + (run-id (db:test-get-run_id test-dat)) + (target (string-intersperse (map cadr (rmt:get-key-val-pairs run-id)) "/")) + + (toplevel/children (and (db:test-get-is-toplevel test-dat) + (> (rmt:test-toplevel-num-items run-id test-name) 0))) + (test-partial-path (conc target "/" run-name "/" (db:test-make-full-name test-name item-path))) + ;; note the trailing slash to get the dir inspite of it being a link + (test-path (conc linktree "/" test-partial-path)) + (mutex-lock! rp-mutex) + (test-physical-path (if (common:file-exists? test-path) + (common:real-path test-path) + #f)) + (mutex-unlock! rp-mutex) + (partial-path-index (if test-physical-path (substring-index test-partial-path test-physical-path) #f)) + (test-base (if (and partial-path-index + test-physical-path ) + (substring test-physical-path + 0 + partial-path-index) + #f)) + ;; we need our archive dir checked for every test to enable folks who want to store other ways. + (archive-info (archive:allocate-new-archive-block blockid-cache *toppath* tsname min-space target run-name test-name)) + (archive-dir (if archive-info (cdr archive-info) #f)) + (archive-id (if archive-info (car archive-info) -1)) + + ) + + (if (not archive-dir) ;; no archive disk found, this is fatal + (begin + (debug:print 0 *default-log-port* "FATAL: No archive disks found. Please add disks with at least " + min-space " MB space to the [archive-disks] section of megatest.config") + (debug:print 0 *default-log-port* " use [archive] minspace to specify minimum available space") + (debug:print 0 *default-log-port* " disks: " + (string-intersperse (map cadr (archive:get-archive-disks)) "\n ")) + (exit 1)) + (debug:print-info 0 *default-log-port* "Using path " archive-dir " for archiving test " test-path)) + + ;; preclean the test directory per the spec if provided + (if (not (null? preclean-spec)) ;; we've been asked to preclean before archiving + (let loop ((spec (car preclean-spec)) + (tail (cdr preclean-spec))) + (if (> (length spec) 1) + (let ((testspec (car spec)) + (rules (cadr spec))) + (if (tests:match testspec test-name item-path) + (begin + (debug:print 0 *default-log-port* "INFO: cleanup requested for " test-physical-path) + (common:dir-clean-up test-physical-path rules remove-empty: #t)) + (if (not (null? tail)) + (loop (car tail)(cdr tail))))) + (begin + (debug:print 0 *default-log-port* "ERROR: bad spec line in [archive-preclean] section. \"" spec "\"") + (if (not (null? tail))(loop (car tail)(cdr tail))))))) + (cond + (toplevel/children + (debug:print 0 *default-log-port* "WARNING: cannot archive " test-name " with id " test-id + " as it is a toplevel test with children")) + ((not (common:file-exists? test-path)) + (debug:print 0 *default-log-port* "WARNING: Cannot archive " test-name "/" item-path + " as path " test-path " does not exist")) + (else + (debug:print 0 *default-log-port* + "From test-dat=" test-dat " derived the following:\n" + "test-partial-path = " test-partial-path "\n" + "test-path = " test-path "\n" + "test-physical-path = " test-physical-path "\n" + "partial-path-index = " partial-path-index "\n" + "test-base = " test-base) + (hash-table-set! disk-groups test-base + (cons test-physical-path (hash-table-ref/default disk-groups test-base '()))) + (hash-table-set! test-groups test-base + (cons test-dat (hash-table-ref/default test-groups test-base '()))) + (hash-table-set! arch-groups test-base + (cons archive-info (hash-table-ref/default arch-groups test-base '()))) + (hash-table-set! test-dirs test-id test-path))))) + ;; test-path)))) + tests) + (debug:print 0 *default-log-port* "INFO: DISK GROUPS=" (hash-table->alist disk-groups)) + ;; for each disk-group, initialize the bup area if needed + (for-each + (lambda (test-base) + (let* ((disk-group (hash-table-ref disk-groups test-base)) + (arch-group (hash-table-ref arch-groups test-base)) + (arch-info (car arch-group)) ;; don't know yet how this will work, can I get more than one possibility? + (archive-id (car arch-info)) + (archive-dir (cdr arch-info))) + (debug:print 0 *default-log-port* "Processing disk-group " test-base) + (let* ((test-paths (hash-table-ref disk-groups test-base))) + (if (not (common:file-exists? archive-dir)) + (create-directory archive-dir #t)) + (case archiver + ((bup) ;; Archive using bup + (let* ((bup-init-params (list "-d" archive-dir "init")) + (bup-index-params (append (list "-d" archive-dir "index") test-paths)) + (bup-save-params (append (list "-d" archive-dir "save" ;; (conc "--strip-path=" linktree) + (conc "-" compress) ;; or (conc "--compress=" compress) + "-n" (conc (common:get-testsuite-name) "-" run-id) + (conc "--strip-path=" test-base) ;; if we push to the directory do we need this? + ) + test-paths))) + (if (not (common:file-exists? (conc archive-dir "/HEAD"))) + (begin + ;; replace this with jobrunner stuff enventually + (debug:print-info 0 *default-log-port* "Init bup in " archive-dir) + ;; (mutex-lock! bup-mutex) + (run-n-wait bup-exe params: bup-init-params print-cmd: print-prefix) + ;; (mutex-unlock! bup-mutex) + )) + (debug:print-info 0 *default-log-port* "Indexing data to be archived") + ;; (mutex-lock! bup-mutex) + (run-n-wait bup-exe params: bup-index-params print-cmd: print-prefix) + (debug:print-info 0 *default-log-port* "Archiving data with bup") + (run-n-wait bup-exe params: bup-save-params print-cmd: print-prefix))) + ((7z tar) + (for-each + (lambda (test-dat) + (let* ((test-id (db:test-get-id test-dat)) + (test-name (db:test-get-testname test-dat)) + (item-path (db:test-get-item-path test-dat)) + (test-full-name (db:test-make-full-name test-name item-path)) + (run-id (db:test-get-run_id test-dat)) + (target (string-intersperse (map cadr (rmt:get-key-val-pairs run-id)) "/")) + (run-name (rmt:get-run-name-from-id run-id)) + (source-dir (hash-table-ref test-dirs test-id)) ;; (conc test-base "/" test-name "/" item-path)) + (target-dir (string-substitute "/$" "" (conc archive-dir "/" target "/" run-name "/" test-full-name)))) + ;; create the test and item-path levels under archive-dir + (create-directory (pathname-directory target-dir) #t) + (run-n-wait + (conc + (string-substitute "ARCHIVE_NAME" target-dir archiver-cmd) " " + "." + ) + print-cmd: print-prefix + run-dir: source-dir))) + (hash-table-ref test-groups test-base)))) + ;; (mutex-unlock! bup-mutex) + (for-each + (lambda (test-dat) + (let ((test-id (db:test-get-id test-dat)) + (run-id (db:test-get-run_id test-dat))) + (rmt:test-set-archive-block-id run-id test-id archive-id) + (if (member archive-command '("save-remove")) + (runs:remove-test-directory test-dat 'archive-remove)))) + (hash-table-ref test-groups test-base))))) + (hash-table-keys disk-groups)) + #t)) + +(define (archive:bup-restore archive-command run-id run-name tests rp-mutex bup-mutex) ;; move the getting of archive space down into the below block so that a single run can + ;; allocate as needed should a disk fill up + ;; + (let* ((bup-exe (or (configf:lookup *configdat* "archive" "bup") "bup")) + (linktree (common:get-linktree))) ;; (configf:lookup *configdat* "setup" "linktree"))) + + ;; from the test info bin the path to the test by stem + ;; + (for-each + (lambda (test-dat) + ;; When restoring test-dat will initially contain an old and invalid path to the test + (let* ((best-disk (get-best-disk *configdat* #f)) ;; BUG: get the testconfig and use it here. Otherwise data pulled out of archive could end up on the wrong kind of disk. + (item-path (db:test-get-item-path test-dat)) + (test-name (db:test-get-testname test-dat)) + (test-id (db:test-get-id test-dat)) + (run-id (db:test-get-run_id test-dat)) + (keyvals (rmt:get-key-val-pairs run-id)) + (target (string-intersperse (map cadr keyvals) "/")) + + (toplevel/children (and (db:test-get-is-toplevel test-dat) + (> (rmt:test-toplevel-num-items run-id test-name) 0))) + (test-partial-path (conc target "/" run-name "/" (db:test-make-full-name test-name item-path))) + ;; note the trailing slash to get the dir inspite of it being a link + (test-path (conc linktree "/" test-partial-path)) + ;; if the old path was not deleted then prev-test-physical-path will end up pointing to a real directory + (mutex-lock! rp-mutex) + (prev-test-physical-path (if (common:file-exists? test-path) + ;; (read-symbolic-link test-path #t) + (common:real-path test-path) + #f)) + (mutex-unlock! rp-mutex) + (new-test-physical-path (conc best-disk "/" test-partial-path)) + (archive-block-id (db:test-get-archived test-dat)) + (archive-block-info (rmt:test-get-archive-block-info archive-block-id)) + (archive-path (if (vector? archive-block-info) + (vector-ref archive-block-info 2) ;; look in db.scm for test-get-archive-block-info for the vector record info + #f)) ;; no archive found? + (archive-internal-path (conc (common:get-testsuite-name) "-" run-id "/latest/" test-partial-path))) + + ;; some sanity checks, move an existing path out of the way - iif it is not a toplevel with children + ;; + (if (and (not toplevel/children) ;; special handling needed for toplevel with children + prev-test-physical-path + (common:file-exists? prev-test-physical-path)) ;; what to do? abort or clean up or link it in? + (let* ((base (pathname-directory prev-test-physical-path)) + (dirn (pathname-file prev-test-physical-path)) + (newn (conc base "/." dirn))) + (debug:print-error 0 *default-log-port* "the old directory " prev-test-physical-path ", still exists! Moving it to " newn) + (rename-file prev-test-physical-path newn))) + + (if (and archive-path ;; no point in proceeding if there is no actual archive + (not toplevel/children)) + (begin + ;; CREATE WORK AREA + ;; test-src-path == #f ==> don't copy in data from tests directory + ;; itemdat == string ==> use directly + (create-work-area run-id run-name keyvals test-id #f best-disk test-name item-path) ;; #!key (remtries 2)) + + ;; 1. Get the block id from the test info + ;; 2. Get the block data given the block id + ;; 3. Construct the paths etc. for the following command: + ;; + ;; bup -d /tmp/matt/adisk1/2015_q1/fullrun_e1a40/ restore -C /tmp/seeme fullrun-30/latest/ubuntu/nfs/none/w02.1.20.54_b/ + + ;; DO BUP RESTORE + (let* ((new-test-dat (rmt:get-test-info-by-id run-id test-id)) + (new-test-path (if (vector? new-test-dat ) + (db:test-get-rundir new-test-dat) + (begin + (debug:print-error 0 *default-log-port* "unable to get data for run-id=" run-id ", test-id=" test-id) + (exit 1)))) + ;; new-test-path won't work - must use best-disk instead? Nope, new-test-path but tack on /.. + (bup-restore-params (list "-d" archive-path "restore" "-C" (conc new-test-path "/..") archive-internal-path))) + (debug:print-info 0 *default-log-port* "Restoring archived data to " new-test-physical-path " from archive in " archive-path " ... " archive-internal-path) + ;; (mutex-lock! bup-mutex) + (run-n-wait bup-exe params: bup-restore-params print-cmd: #f) + ;; (mutex-unlock! bup-mutex) + (mt:test-set-state-status-by-id run-id test-id "COMPLETED" #f #f))) + (debug:print-error 0 *default-log-port* "No archive path in the record for run-id=" run-id " test-id=" test-id)))) + (filter vector? tests)))) + + ) Index: server-inc.scm ================================================================== --- server-inc.scm +++ server-inc.scm @@ -15,712 +15,5 @@ ;; ;; You should have received a copy of the GNU General Public License ;; along with Megatest. If not, see . ;; -(define (server:make-server-url hostport) - (if (not hostport) - #f - (conc "http://" (car hostport) ":" (cadr hostport)))) - -(define *server-loop-heart-beat* (current-seconds)) - -;;====================================================================== -;; P K T S S T U F F -;;====================================================================== - -;; ??? - -;;====================================================================== -;; P K T S S T U F F -;;====================================================================== - -;; ??? - -;;====================================================================== -;; S E R V E R -;;====================================================================== - -;; Call this to start the actual server -;; - -;; all routes though here end in exit ... -;; -;; start_server -;; -(define (server:launch run-id transport-type) - (case transport-type - ((http)(http-transport:launch)) - ;;((nmsg)(nmsg-transport:launch run-id)) - ;;((rpc) (rpc-transport:launch run-id)) - (else (debug:print-error 0 *default-log-port* "unknown server type " transport-type)))) - -;;====================================================================== -;; S E R V E R U T I L I T I E S -;;====================================================================== - -;; Get the transport -(define (server:get-transport) - (if *transport-type* - *transport-type* - (let ((ttype (string->symbol - (or (args:get-arg "-transport") - (configf:lookup *configdat* "server" "transport") - "rpc")))) - (set! *transport-type* ttype) - ttype))) - -;; Generate a unique signature for this server -(define (server:mk-signature) - (message-digest-string (md5-primitive) - (with-output-to-string - (lambda () - (write (list (current-directory) - (argv))))))) - -;; When using zmq this would send the message back (two step process) -;; with spiffy or rpc this simply returns the return data to be returned -;; -(define (server:reply return-addr query-sig success/fail result) - (debug:print-info 11 *default-log-port* "server:reply return-addr=" return-addr ", result=" result) - ;; (send-message pubsock target send-more: #t) - ;; (send-message pubsock - (case (server:get-transport) - ((rpc) (db:obj->string (vector success/fail query-sig result))) - ((http) (db:obj->string (vector success/fail query-sig result))) - ((fs) result) - (else - (debug:print-error 0 *default-log-port* "unrecognised transport type: " *transport-type*) - result))) - -;; Given a run id start a server process ### NOTE ### > file 2>&1 -;; if the run-id is zero and the target-host is set -;; try running on that host -;; incidental: rotate logs in logs/ dir. -;; -(define (server:run areapath) ;; areapath is *toppath* for a given testsuite area - (let* ((curr-host (get-host-name)) - ;; (attempt-in-progress (server:start-attempted? areapath)) - ;; (dot-server-url (server:check-if-running areapath)) - (curr-ip (server:get-best-guess-address curr-host)) - (curr-pid (current-process-id)) - (homehost (common:get-homehost)) ;; configf:lookup *configdat* "server" "homehost" )) - (target-host (car homehost)) - (testsuite (common:get-testsuite-name)) - (logfile (conc areapath "/logs/server.log")) ;; -" curr-pid "-" target-host ".log")) - (cmdln (conc (common:get-megatest-exe) - " -server " (or target-host "-") (if (equal? (configf:lookup *configdat* "server" "daemonize") "yes") - " -daemonize " - "") - ;; " -log " logfile - " -m testsuite:" testsuite)) ;; (conc " >> " logfile " 2>&1 &"))))) - (log-rotate (make-thread common:rotate-logs "server run, rotate logs thread")) - (load-limit (configf:lookup-number *configdat* "jobtools" "max-server-start-load" default: 3.0))) - ;; we want the remote server to start in *toppath* so push there - (push-directory areapath) - (debug:print 0 *default-log-port* "INFO: Trying to start server (" cmdln ") ...") - (thread-start! log-rotate) - - ;; host.domain.tld match host? - (if (and target-host - ;; look at target host, is it host.domain.tld or ip address and does it - ;; match current ip or hostname - (not (string-match (conc "("curr-host "|" curr-host"\\..*)") target-host)) - (not (equal? curr-ip target-host))) - (begin - (debug:print-info 0 *default-log-port* "Starting server on " target-host ", logfile is " logfile) - (setenv "TARGETHOST" target-host))) - - (setenv "TARGETHOST_LOGF" logfile) - (thread-sleep! (/ (random 5000) 1000)) ;; add about a random (up to 5 seconds) initial delay. It seems pretty common that many running tests request a server at the same time - (common:wait-for-normalized-load load-limit " delaying server start due to load" target-host) ;; do not try starting servers on an already overloaded machine, just wait forever - (system (conc "nbfake " cmdln)) - (unsetenv "TARGETHOST_LOGF") - (if (get-environment-variable "TARGETHOST")(unsetenv "TARGETHOST")) - (thread-join! log-rotate) - (pop-directory))) - -;; given a path to a server log return: host port startseconds -;; -(define (server:logf-get-start-info logf) - (let ((rx (regexp "^SERVER STARTED: (\\S+):(\\d+) AT ([\\d\\.]+)"))) ;; SERVER STARTED: host:port AT timesecs - (handle-exceptions - exn - (list #f #f #f) ;; no idea what went wrong, call it a bad server - (with-input-from-file - logf - (lambda () - (let loop ((inl (read-line)) - (lnum 0)) - (if (not (eof-object? inl)) - (let ((mlst (string-match rx inl))) - (if (not mlst) - (if (< lnum 500) ;; give up if more than 500 lines of server log read - (loop (read-line)(+ lnum 1)) - (list #f #f #f)) - (let ((dat (cdr mlst))) - (list (car dat) ;; host - (string->number (cadr dat)) ;; port - (string->number (caddr dat)))))) - (list #f #f #f)))))))) - -;; get a list of servers with all relevant data -;; ( mod-time host port start-time pid ) -;; -(define (server:get-list areapath #!key (limit #f)) - (let ((fname-rx (regexp "^(|.*/)server-(\\d+)-(\\S+).log$")) - (day-seconds (* 24 60 60))) - ;; if the directory exists continue to get the list - ;; otherwise attempt to create the logs dir and then - ;; continue - (if (if (directory-exists? (conc areapath "/logs")) - '() - (if (file-write-access? areapath) - (begin - (condition-case - (create-directory (conc areapath "/logs") #t) - (exn (i/o file)(debug:print 0 *default-log-port* "ERROR: Cannot create directory at " (conc areapath "/logs"))) - (exn ()(debug:print 0 *default-log-port* "ERROR: Unknown error attemtping to get server list."))) - (directory-exists? (conc areapath "/logs"))) - '())) - (let* ((server-logs (glob (conc areapath "/logs/server-*.log"))) - (num-serv-logs (length server-logs))) - (if (null? server-logs) - '() - (let loop ((hed (car server-logs)) - (tal (cdr server-logs)) - (res '())) - (let* ((mod-time (handle-exceptions - exn - (current-seconds) ;; 0 - (file-modification-time hed))) ;; default to *very* old so log gets ignored if deleted - (down-time (- (current-seconds) mod-time)) - (serv-dat (if (or (< num-serv-logs 10) - (< down-time 900)) ;; day-seconds)) - (server:logf-get-start-info hed) - '())) ;; don't waste time processing server files not touched in the 15 minutes if there are more than ten servers to look at - (serv-rec (cons mod-time serv-dat)) - (fmatch (string-match fname-rx hed)) - (pid (if fmatch (string->number (list-ref fmatch 2)) #f)) - (new-res (if (null? serv-dat) - res - (cons (append serv-rec (list pid)) res)))) - (if (null? tal) - (if (and limit - (> (length new-res) limit)) - new-res ;; (take new-res limit) <= need intelligent sorting before this will work - new-res) - (loop (car tal)(cdr tal) new-res))))))))) - -(define (server:get-num-alive srvlst) - (let ((num-alive 0)) - (for-each - (lambda (server) - (match-let (((mod-time host port start-time pid) - server)) - (let* ((uptime (- (current-seconds) mod-time)) - (runtime (if start-time - (- mod-time start-time) - 0))) - (if (< uptime 5)(set! num-alive (+ num-alive 1)))))) - srvlst) - num-alive)) - -;; given a list of servers get a list of valid servers, i.e. at least -;; 10 seconds old, has started and is less than 1 hour old and is -;; active (i.e. mod-time < 10 seconds -;; -;; mod-time host port start-time pid -;; -;; sort by start-time descending. I.e. get the oldest first. Young servers will thus drop off -;; and servers should stick around for about two hours or so. -;; -(define (server:get-best srvlst) - (let* ((nums (server:get-num-servers)) - (now (current-seconds)) - (slst (sort - (filter (lambda (rec) - (if (and (list? rec) - (> (length rec) 2)) - (let ((start-time (list-ref rec 3)) - (mod-time (list-ref rec 0))) - ;; (print "start-time: " start-time " mod-time: " mod-time) - (and start-time mod-time - (> (- now start-time) 0) ;; been running at least 0 seconds - (< (- now mod-time) 16) ;; still alive - file touched in last 16 seconds - (< (- now start-time) - (+ (- (string->number (or (configf:lookup *configdat* "server" "runtime") "3600")) - 180) - (random 360))) ;; under one hour running time +/- 180 - )) - #f)) - srvlst) - (lambda (a b) - (< (list-ref a 3) - (list-ref b 3)))))) - (if (> (length slst) nums) - (take slst nums) - slst))) - -(define (server:get-first-best areapath) - (let ((srvrs (server:get-best (server:get-list areapath)))) - (if (and srvrs - (not (null? srvrs))) - (car srvrs) - #f))) - -(define (server:get-rand-best areapath) - (let ((srvrs (server:get-best (server:get-list areapath)))) - (if (and (list? srvrs) - (not (null? srvrs))) - (let* ((len (length srvrs)) - (idx (random len))) - (list-ref srvrs idx)) - #f))) - - -(define (server:record->url servr) - (match-let (((mod-time host port start-time pid) - servr)) - (if (and host port) - (conc host ":" port) - #f))) - -(define (server:get-client-signature) - (if *my-client-signature* *my-client-signature* - (let ((sig (server:mk-signature))) - (set! *my-client-signature* sig) - *my-client-signature*))) - -;; kind start up of servers, wait 40 seconds before allowing another server for a given -;; run-id to be launched -(define (server:kind-run areapath) - (if (not (server:check-if-running areapath)) ;; why try if there is already a server running? - (let* ((last-run-dat (hash-table-ref/default *server-kind-run* areapath '(0 0))) ;; callnum, whenrun - (call-num (car last-run-dat)) - (when-run (cadr last-run-dat)) - (run-delay (+ (case call-num - ((0) 0) - ((1) 20) - ((2) 300) - (else 600)) - (random 5))) ;; add a small random number just in case a lot of jobs hit the work hosts simultaneously - (lock-file (conc areapath "/logs/server-start.lock"))) - (if (> (- (current-seconds) when-run) run-delay) - (begin - (common:simple-file-lock-and-wait lock-file expire-time: 15) - (server:run areapath) - (thread-sleep! 2) ;; don't release the lock for at least a few seconds - (common:simple-file-release-lock lock-file))) - (hash-table-set! *server-kind-run* areapath (list (+ call-num 1)(current-seconds)))))) - -(define (server:start-and-wait areapath #!key (timeout 60)) - (let ((give-up-time (+ (current-seconds) timeout))) - (let loop ((server-url (server:check-if-running areapath)) - (try-num 0)) - (if (or server-url - (> (current-seconds) give-up-time)) ;; server-url will be #f if no server available. - server-url - (let ((num-ok (length (server:get-best (server:get-list areapath))))) - (if (and (> try-num 0) ;; first time through simply wait a little while then try again - (< num-ok 1)) ;; if there are no decent candidates for servers then try starting a new one - (server:kind-run areapath)) - (thread-sleep! 5) - (loop (server:check-if-running areapath) - (+ try-num 1))))))) - -(define server:try-running server:run) ;; there is no more per-run servers ;; REMOVE ME. BUG. - -(define (server:get-num-servers #!key (numservers 2)) - (let ((ns (string->number - (or (configf:lookup *configdat* "server" "numservers") "notanumber")))) - (or ns numservers))) - -;; no longer care if multiple servers are started by accident. older servers will drop off in time. -;; -(define (server:check-if-running areapath) ;; #!key (numservers "2")) - (let* ((ns (server:get-num-servers)) - (servers (server:get-best (server:get-list areapath)))) - ;; (print "servers: " servers " ns: " ns) - (if (or (and servers - (null? servers)) - (not servers) - (and (list? servers) - (< (length servers) (random ns)))) ;; somewhere between 0 and numservers - #f - (let loop ((hed (car servers)) - (tal (cdr servers))) - (let ((res (server:check-server hed))) - (if res - res - (if (null? tal) - #f - (loop (car tal)(cdr tal))))))))) - -;; ping the given server -;; -(define (server:check-server server-record) - (let* ((server-url (server:record->url server-record)) - (res (case *transport-type* - ((http)(server:ping server-url)) - ;; ((nmsg)(nmsg-transport:ping (tasks:hostinfo-get-interface server) - ))) - (if res - server-url - #f))) - -(define (server:kill servr) - (match-let (((mod-time hostname port start-time pid) - servr)) - (tasks:kill-server hostname pid))) - -;; called in megatest.scm, host-port is string hostname:port -;; -;; NOTE: This is NOT called directly from clients as not all transports support a client running -;; in the same process as the server. -;; -(define (server:ping host-port-in #!key (do-exit #f)) - (let ((host:port (if (not host-port-in) ;; use read-dotserver to find - #f ;; (server:check-if-running *toppath*) - ;; (if (number? host-port-in) ;; we were handed a server-id - ;; (let ((srec (tasks:get-server-by-id (db:delay-if-busy (tasks:open-db)) host-port-in))) - ;; ;; (print "srec: " srec " host-port-in: " host-port-in) - ;; (if srec - ;; (conc (vector-ref srec 3) ":" (vector-ref srec 4)) - ;; (conc "no such server-id " host-port-in))) - host-port-in))) ;; ) - (let* ((host-port (if host:port - (let ((slst (string-split host:port ":"))) - (if (eq? (length slst) 2) - (list (car slst)(string->number (cadr slst))) - #f)) - #f))) -;; (toppath (launch:setup))) - ;; (print "host-port=" host-port) - (if (not host-port) - (begin - (if host-port-in - (debug:print 0 *default-log-port* "ERROR: bad host:port")) - (if do-exit (exit 1)) - #f) - (let* ((iface (car host-port)) - (port (cadr host-port)) - (server-dat (http-transport:client-connect iface port)) - (login-res (rmt:login-no-auto-client-setup server-dat))) - (if (and (list? login-res) - (car login-res)) - (begin - ;; (print "LOGIN_OK") - (if do-exit (exit 0)) - #t) - (begin - ;; (print "LOGIN_FAILED") - (if do-exit (exit 1)) - #f))))))) - -;; run ping in separate process, safest way in some cases -;; -(define (server:ping-server ifaceport) - (with-input-from-pipe - (conc (common:get-megatest-exe) " -ping " ifaceport) - (lambda () - (let loop ((inl (read-line)) - (res "NOREPLY")) - (if (eof-object? inl) - (case (string->symbol res) - ((NOREPLY) #f) - ((LOGIN_OK) #t) - (else #f)) - (loop (read-line) inl)))))) - -;; NOT USED (well, ok, reference in rpc-transport but otherwise not used). -;; -(define (server:login toppath) - (lambda (toppath) - (set! *db-last-access* (current-seconds)) ;; might not be needed. - (if (equal? *toppath* toppath) - #t - #f))) - -;; timeout is hms string: 1h 5m 3s, default is 1 minute -;; -(define (server:expiration-timeout) - (let ((tmo (configf:lookup *configdat* "server" "timeout"))) - (if (and (string? tmo) - (common:hms-string->seconds tmo)) ;; BUG: hms-string->seconds is broken, if given "10" returns 0. Also, it doesn't belong in this logic unless the string->number is changed below - (* 3600 (string->number tmo)) - 60))) - -(define (server:get-best-guess-address hostname) - (let ((res #f)) - (for-each - (lambda (adr) - (if (not (eq? (u8vector-ref adr 0) 127)) - (set! res adr))) - ;; NOTE: This can fail when there is no mention of the host in /etc/hosts. FIXME - (vector->list (hostinfo-addresses (hostname->hostinfo hostname)))) - (string-intersperse - (map number->string - (u8vector->list - (if res res (hostname->ip hostname)))) "."))) - -;; (define server:sync-lock-token "SERVER_SYNC_LOCK") -;; (define (server:release-sync-lock) -;; (db:no-sync-del! *no-sync-db* server:sync-lock-token)) -;; (define (server:have-sync-lock?) -;; (let* ((have-lock-pair (db:no-sync-get-lock *no-sync-db* server:sync-lock-token)) -;; (have-lock? (car have-lock-pair)) -;; (lock-time (cdr have-lock-pair)) -;; (lock-age (- (current-seconds) lock-time))) -;; (cond -;; (have-lock? #t) -;; ((>lock-age -;; (* 3 (configf:lookup-number *configdat* "server" "minimum-intersync-delay" default: 180))) -;; (server:release-sync-lock) -;; (server:have-sync-lock?)) -;; (else #f)))) - -;; moving this here as it needs access to db and cannot be in common. -;; - -(define (server:get-bruteforce-syncer dbstruct #!key (fork-to-background #f) (persist-until-sync #f)) - (let* ((sqlite-exe (or (get-environment-variable "MT_SQLITE3_EXE"))) ;; defined in cfg.sh - (sync-log (or (args:get-arg "-sync-log") (conc *toppath* "/logs/sync-" (current-process-id) "-" (get-host-name) ".log"))) - (tmp-area (common:get-db-tmp-area)) - (tmp-db (conc tmp-area "/megatest.db")) - (staging-file (conc *toppath* "/.megatest.db")) - (mtdbfile (conc *toppath* "/megatest.db")) - (lockfile (common:get-sync-lock-filepath)) - (sync-cmd-core (conc sqlite-exe" " tmp-db " .dump | "sqlite-exe" " staging-file "&>"sync-log)) - (sync-cmd (if fork-to-background - (conc "/usr/bin/env NBFAKE_LOG="*toppath*"/logs/last-server-sync-"(current-process-id)".log nbfake \""sync-cmd-core" && /bin/mv -f " staging-file " " mtdbfile" \"") - sync-cmd-core)) - (default-min-intersync-delay 2) - (min-intersync-delay (configf:lookup-number *configdat* "server" "minimum-intersync-delay" default: default-min-intersync-delay)) - (default-duty-cycle 0.1) - (duty-cycle (configf:lookup-number *configdat* "server" "sync-duty-cycle" default: default-duty-cycle)) - (last-sync-seconds 10) ;; we will adjust this to a measurement and delay last-sync-seconds * (1 - duty-cycle) - (calculate-off-time (lambda (work-duration duty-cycle) - (* (/ (- 1 duty-cycle) duty-cycle) last-sync-seconds))) - (off-time min-intersync-delay) ;; adjusted in closure below. - (do-a-sync - (lambda () - #;(BB> "Start do-a-sync with fork-to-background="fork-to-background" persist-until-sync="persist-until-sync) - (let* ((finalres - (let retry-loop ((num-tries 0)) - (if (common:simple-file-lock lockfile) - (begin - (cond - ((not (or fork-to-background persist-until-sync)) - (debug:print 0 *default-log-port* "INFO: syncer thread sleeping for max of (server.minimum-intersync-delay="min-intersync-delay - " , off-time="off-time" seconds ]") - (thread-sleep! (max off-time min-intersync-delay))) - (else - (debug:print 0 *default-log-port* "INFO: syncer thread NOT sleeping ; maybe time-to-exit..."))) - - (if (not (configf:lookup *configdat* "server" "disable-db-snapshot")) - (common:snapshot-file mtdbfile subdir: ".db-snapshot")) - (delete-file* staging-file) - (let* ((start-time (current-milliseconds)) - (res (system sync-cmd)) - (res2 - (cond - ((eq? 0 res) - (delete-file* (conc mtdbfile ".backup")) - (if (eq? 0 (file-size sync-log)) - (delete-file sync-log)) - (system (conc "/bin/mv " staging-file " " mtdbfile)) - - (set! last-sync-seconds (/ (- (current-milliseconds) start-time) 1000)) - (set! off-time (calculate-off-time - last-sync-seconds - (cond - ((and (number? duty-cycle) (> duty-cycle 0) (< duty-cycle 1)) - duty-cycle) - (else - (debug:print 0 *default-log-port* "WARNING: ["(common:human-time)"] server.sync-duty-cycle is invalid. Should be a number between 0 and 1, but "duty-cycle" was specified. Using default value: "default-duty-cycle) - default-duty-cycle)))) - - (debug:print 1 *default-log-port* "INFO: ["(common:human-time)"] pid="(current-process-id)" SYNC took "last-sync-seconds" sec") - (debug:print 1 *default-log-port* "INFO: ["(common:human-time)"] pid="(current-process-id)" SYNC took "last-sync-seconds" sec ; with duty-cycle of "duty-cycle" off time is now "off-time) - 'sync-completed) - (else - (system (conc "/bin/cp "sync-log" "sync-log".fail")) - (debug:print 0 *default-log-port* "ERROR: ["(common:human-time)"] Sync failed. See log at "sync-log".fail") - (if (file-exists? (conc mtdbfile ".backup")) - (system (conc "/bin/cp "mtdbfile ".backup " mtdbfile))) - #f)))) - (common:simple-file-release-lock lockfile) - #;(BB> "released lockfile: " lockfile) - #;(when (common:file-exists? lockfile) - (BB> "DID NOT ACTUALLY RELEASE LOCKFILE")) - res2) ;; end let - );; end begin - ;; else - (cond - (persist-until-sync - (thread-sleep! 1) - (debug:print 1 *default-log-port* "INFO: ["(common:human-time)"] pid="(current-process-id)" other SYNC in progress; we're in a fork-to-background so we need to succeed. Let's wait a jiffy and and try again. num-tries="num-tries" (waiting for lockfile="lockfile" to disappear)") - (retry-loop (add1 num-tries))) - (else - (thread-sleep! (max off-time (+ last-sync-seconds min-intersync-delay))) - (debug:print 1 *default-log-port* "INFO: ["(common:human-time)"] pid="(current-process-id)" other SYNC in progress; not syncing.") - 'parallel-sync-in-progress)) - ) ;; end if got lockfile - ) - )) - #;(BB> "End do-a-sync with fork-to-background="fork-to-background" persist-until-sync="persist-until-sync" and result="finalres) - finalres) - ) ;; end lambda - )) - do-a-sync)) - -(define (server:writable-watchdog-bruteforce dbstruct) - (thread-sleep! 1) ;; delay for startup - (let* ((do-a-sync (server:get-bruteforce-syncer dbstruct)) - (final-sync (server:get-bruteforce-syncer dbstruct fork-to-background: #t persist-until-sync: #t))) - (when (and (not (args:get-arg "-sync-to-megatest.db")) ;; conditions under which we do not run the sync - (args:get-arg "-server")) - - (let loop () - (do-a-sync) - (if (not *time-to-exit*) (loop))) ;; keep going unless time to exit - - ;; time to exit, close the no-sync db here - (final-sync) - - (if (common:low-noise-print 30) - (debug:print-info 0 *default-log-port* "Exiting watchdog timer, *time-to-exit* = " *time-to-exit*" pid="(current-process-id) - ))))) - -(define (server:writable-watchdog-deltasync dbstruct) - (thread-sleep! 0.05) ;; delay for startup - (let ((legacy-sync (common:run-sync?)) - (sync-stale-seconds (configf:lookup-number *configdat* "server" "sync-stale-seconds" default: 300)) - (debug-mode (debug:debug-mode 1)) - (last-time (current-seconds)) - (no-sync-db (db:open-no-sync-db)) - (sync-duration 0) ;; run time of the sync in milliseconds - ;;(this-wd-num (begin (mutex-lock! *wdnum*mutex) (let ((x *wdnum*)) (set! *wdnum* (add1 *wdnum*)) (mutex-unlock! *wdnum*mutex) x))) - ) - (set! *no-sync-db* no-sync-db) ;; make the no sync db available to api calls - (debug:print-info 2 *default-log-port* "Periodic sync thread started.") - (debug:print-info 3 *default-log-port* "watchdog starting. legacy-sync is " legacy-sync" pid="(current-process-id) );; " this-wd-num="this-wd-num) - (if (and legacy-sync (not *time-to-exit*)) - (let* (;;(dbstruct (db:setup)) - (mtdb (dbr:dbstruct-mtdb dbstruct)) - (mtpath (db:dbdat-get-path mtdb)) - (tmp-area (common:get-db-tmp-area)) - (start-file (conc tmp-area "/.start-sync")) - (end-file (conc tmp-area "/.end-sync"))) - (debug:print-info 0 *default-log-port* "Server running, periodic sync started.") - (let loop () - ;; sync for filesystem local db writes - ;; - (mutex-lock! *db-multi-sync-mutex*) - (let* ((need-sync (>= *db-last-access* *db-last-sync*)) ;; no sync since last write - (sync-in-progress *db-sync-in-progress*) - (min-intersync-delay (configf:lookup-number *configdat* "server" "minimum-intersync-delay" default: 5)) - (should-sync (and (not *time-to-exit*) - (> (- (current-seconds) *db-last-sync*) min-intersync-delay))) ;; sync every five seconds minimum, deprecated logic, can probably be removed - (start-time (current-seconds)) - (cpu-load-adj (alist-ref 'adj-proc-load (common:get-normalized-cpu-load #f))) - (mt-mod-time (file-modification-time mtpath)) - (last-sync-start (if (common:file-exists? start-file) - (file-modification-time start-file) - 0)) - (last-sync-end (if (common:file-exists? end-file) - (file-modification-time end-file) - 10)) - (sync-period (+ 3 (* cpu-load-adj 30))) ;; as adjusted load increases increase the sync period - (recently-synced (and (< (- start-time mt-mod-time) sync-period) ;; not useful if sync didn't modify megatest.db! - (< mt-mod-time last-sync-start))) - (sync-done (<= last-sync-start last-sync-end)) - (sync-stale (> start-time (+ last-sync-start sync-stale-seconds))) - (will-sync (and (not *time-to-exit*) ;; do not start a sync if we are in the process of exiting - (or need-sync should-sync) - (or sync-done sync-stale) - (not sync-in-progress) - (not recently-synced)))) - (debug:print-info 13 *default-log-port* "WD writable-watchdog top of loop. need-sync="need-sync" sync-in-progress=" sync-in-progress - " should-sync="should-sync" start-time="start-time" mt-mod-time="mt-mod-time" recently-synced="recently-synced" will-sync="will-sync - " sync-done=" sync-done " sync-period=" sync-period) - (if (and (> sync-period 5) - (common:low-noise-print 30 "sync-period")) - (debug:print-info 0 *default-log-port* "Increased sync period due to long sync times, sync took: " sync-period " seconds.")) - ;; (if recently-synced (debug:print-info 0 *default-log-port* "Skipping sync due to recently-synced flag=" recently-synced)) - ;; (debug:print-info 0 *default-log-port* "need-sync: " need-sync " sync-in-progress: " sync-in-progress " should-sync: " should-sync " will-sync: " will-sync) - (if will-sync (set! *db-sync-in-progress* #t)) - (mutex-unlock! *db-multi-sync-mutex*) - (if will-sync - (let (;; (max-sync-duration (configf:lookup-number *configdat* "server" "max-sync-duration")) ;; KEEPING THIS AVAILABLE BUT SHOULD NOT USE, I'M PRETTY SURE IT DOES NOT WORK! - (sync-start (current-milliseconds))) - (with-output-to-file start-file (lambda ()(print (current-process-id)))) - - ;; put lock here - - ;; (if (or (not max-sync-duration) - ;; (< sync-duration max-sync-duration)) ;; NOTE: db:sync-to-megatest.db keeps track of time of last sync and syncs incrementally - (let ((res (db:sync-to-megatest.db dbstruct no-sync-db: no-sync-db))) ;; did we sync any data? If so need to set the db touched flag to keep the server alive - (set! sync-duration (- (current-milliseconds) sync-start)) - (if (> res 0) ;; some records were transferred, keep the db alive - (begin - (mutex-lock! *heartbeat-mutex*) - (set! *db-last-access* (current-seconds)) - (mutex-unlock! *heartbeat-mutex*) - (debug:print-info 0 *default-log-port* "sync called, " res " records transferred.")) - (debug:print-info 2 *default-log-port* "sync called but zero records transferred"))))) -;; ;; TODO: factor this next routine out into a function -;; (with-input-from-pipe ;; this should not block other threads but need to verify this -;; (conc "megatest -sync-to-megatest.db -m testsuite:" (common:get-area-name) ":" *toppath*) -;; (lambda () -;; (let loop ((inl (read-line)) -;; (res #f)) -;; (if (eof-object? inl) -;; (begin -;; (set! sync-duration (- (current-milliseconds) sync-start)) -;; (cond -;; ((not res) -;; (debug:print 0 *default-log-port* "ERROR: sync from /tmp db to megatest.db appears to have failed. Recommended that you stop your runs and run \"megatest -cleanup-db\"")) -;; ((> res 0) -;; (mutex-lock! *heartbeat-mutex*) -;; (set! *db-last-access* (current-seconds)) -;; (mutex-unlock! *heartbeat-mutex*)))) -;; (let ((num-synced (let ((matches (string-match "^Synced (\\d+).*$" inl))) -;; (if matches -;; (string->number (cadr matches)) -;; #f)))) -;; (loop (read-line) -;; (or num-synced res)))))))))) - (if will-sync - (begin - (mutex-lock! *db-multi-sync-mutex*) - (set! *db-sync-in-progress* #f) - (set! *db-last-sync* start-time) - (with-output-to-file end-file (lambda ()(print (current-process-id)))) - - ;; release lock here - - (mutex-unlock! *db-multi-sync-mutex*))) - (if (and debug-mode - (> (- start-time last-time) 60)) - (begin - (set! last-time start-time) - (debug:print-info 4 *default-log-port* "timestamp -> " (seconds->time-string (current-seconds)) ", time since start -> " (seconds->hr-min-sec (- (current-seconds) *time-zero*)))))) - - ;; keep going unless time to exit - ;; - (if (not *time-to-exit*) - (let delay-loop ((count 0)) - ;;(debug:print-info 13 *default-log-port* "delay-loop top; count="count" pid="(current-process-id)" this-wd-num="this-wd-num" *time-to-exit*="*time-to-exit*) - - (if (and (not *time-to-exit*) - (< count 6)) ;; was 11, changing to 4. - (begin - (thread-sleep! 1) - (delay-loop (+ count 1)))) - (if (not *time-to-exit*) (loop)))) - ;; time to exit, close the no-sync db here - (db:no-sync-close-db no-sync-db) - (if (common:low-noise-print 30) - (debug:print-info 0 *default-log-port* "Exiting watchdog timer, *time-to-exit* = " *time-to-exit*" pid="(current-process-id) ))))))) ;;" this-wd-num="this-wd-num))))))) - Index: servermod.scm ================================================================== --- servermod.scm +++ servermod.scm @@ -18,17 +18,651 @@ ;;====================================================================== (declare (unit servermod)) (declare (uses commonmod)) +(declare (uses dbmod)) +(declare (uses mtconfigf)) +(declare (uses mtargs)) +(declare (uses tasksmod)) (module servermod * -(import scheme chicken data-structures extras) -(import (prefix sqlite3 sqlite3:) posix typed-records srfi-18 srfi-69 format ports srfi-1 matchable) +(import scheme chicken data-structures extras files) +(import (prefix sqlite3 sqlite3:) posix typed-records srfi-18 + srfi-69 format ports srfi-1 matchable + directory-utils md5 message-digest regex + stack) (import commonmod) +(import dbmod) +(import tasksmod) +(import (prefix mtargs args:)) +(import (prefix mtconfigf configf:)) + ;; (use (prefix ulex ulex:)) (include "common_records.scm") + +(define (server:make-server-url hostport) + (if (not hostport) + #f + (conc "http://" (car hostport) ":" (cadr hostport)))) + +(define *server-loop-heart-beat* (current-seconds)) + +;; options: +;; +;; 'killservers - kills all servers +;; 'dejunk - removes junk records +;; 'adj-testids - move test-ids into correct ranges +;; 'old2new - sync megatest.db to /tmp/.../megatest.db and /tmp/.../megatest_ref.db +;; 'new2old - sync /tmp/.../megatest.db to megatest.db and /tmp/.../megatest_ref.db (and update data_synced) +;; 'closeall - close all opened dbs +;; 'schema - attempt to apply schema changes +;; run-ids: '(1 2 3 ...) or #f (for all) +;; +(define (db:multi-db-sync dbstruct . options) + ;; (if (not (launch:setup)) + ;; (debug:print 0 *default-log-port* "ERROR: not able to setup up for megatest.") + (let* ((mtdb (dbr:dbstruct-mtdb dbstruct)) + (tmpdb (db:get-db dbstruct)) + (refndb (dbr:dbstruct-refndb dbstruct)) + (allow-cleanup #t) ;; (if run-ids #f #t)) + (servers (server:get-list *toppath*)) ;; (tasks:get-all-servers (db:delay-if-busy tdbdat))) + (data-synced 0)) ;; count of changed records (I hope) + + (for-each + (lambda (option) + + (case option + ;; kill servers + ((killservers) + (for-each + (lambda (server) + (match-let (((mod-time host port start-time pid) server)) + (if (and host pid) + (tasks:kill-server host pid)))) + servers) + + ;; /tmp/bjbarcla/megatest_localdb/fullrun/.nfs.pdx.disks.icf_env_disk001.bjbarcla.gwa.issues.mtdev.matt-bisect.megatest.ext-tests.runs.all.v1.65.1.6524.dev.bb-24-justrollup-f8.rollup.fullrun/megatest.db.lock + (delete-file* (common:get-sync-lock-filepath)) + ) + + ;; clear out junk records + ;; + ((dejunk) + (db:delay-if-busy mtdb) ;; ok to delay on mtdb + (when (file-write-access? (db:dbdat-get-path mtdb)) (db:clean-up mtdb)) + (db:clean-up tmpdb) + (db:clean-up refndb)) + + ;; sync runs, test_meta etc. + ;; + ((old2new) + (set! data-synced + (+ (db:sync-tables (db:sync-all-tables-list dbstruct) #f mtdb tmpdb refndb) + data-synced))) + + ;; now ensure all newdb data are synced to megatest.db + ;; do not use the run-ids list passed in to the function + ;; + ((new2old) + (set! data-synced + (+ (db:sync-tables (db:sync-all-tables-list dbstruct) #f tmpdb refndb mtdb) + data-synced))) + + ((adj-target) + (db:adj-target (db:dbdat-get-db mtdb)) + (db:adj-target (db:dbdat-get-db tmpdb)) + (db:adj-target (db:dbdat-get-db refndb))) + + ((schema) + (db:patch-schema-maindb (db:dbdat-get-db mtdb)) + (db:patch-schema-maindb (db:dbdat-get-db tmpdb)) + (db:patch-schema-maindb (db:dbdat-get-db refndb)) + (db:patch-schema-rundb (db:dbdat-get-db mtdb)) + (db:patch-schema-rundb (db:dbdat-get-db tmpdb)) + (db:patch-schema-rundb (db:dbdat-get-db refndb)))) + + (stack-push! (dbr:dbstruct-dbstack dbstruct) tmpdb)) + options) + data-synced)) + +(define (db:tmp->megatest.db-sync dbstruct last-update) + (let* ((mtdb (dbr:dbstruct-mtdb dbstruct)) + (tmpdb (db:get-db dbstruct)) + (refndb (dbr:dbstruct-refndb dbstruct)) + (res (db:sync-tables (db:sync-all-tables-list dbstruct) last-update tmpdb refndb mtdb))) + (stack-push! (dbr:dbstruct-dbstack dbstruct) tmpdb) + res)) + +;;;; run-ids +;; if #f use *db-local-sync* : or 'local-sync-flags +;; if #t use timestamps : or 'timestamps +;; +;; NB// no-sync-db is the db handle, not a flag! +;; +(define (db:sync-to-megatest.db dbstruct #!key (no-sync-db #f)) + (let* ((start-time (current-seconds)) + (last-full-update (if no-sync-db + (db:no-sync-get/default no-sync-db "LAST_FULL_UPDATE" 0) + 0)) + (full-sync-needed (> (- start-time last-full-update) 3600)) ;; every hour do a full sync + (last-update (if full-sync-needed + 0 + (if no-sync-db + (db:no-sync-get/default no-sync-db "LAST_UPDATE" 0) + 0))) ;; (or (db:get-var dbstruct "LAST_UPDATE") 0)) + (sync-needed (> (- start-time last-update) 6)) + (res (if (or sync-needed ;; don't sync if a sync already occurred in the past 6 seconds + full-sync-needed) + (begin + (if no-sync-db + (begin + (if full-sync-needed (db:no-sync-set no-sync-db "LAST_FULL_UPDATE" start-time)) + (db:no-sync-set no-sync-db "LAST_UPDATE" start-time))) + (db:tmp->megatest.db-sync dbstruct last-update)) + 0)) + (sync-time (- (current-seconds) start-time))) + (debug:print-info 3 *default-log-port* "Sync of newdb to olddb completed in " sync-time " seconds pid="(current-process-id)) + (if (common:low-noise-print 30 "sync new to old") + (if sync-needed + (debug:print-info 0 *default-log-port* "Sync of " res " records from newdb to olddb completed in " sync-time " seconds pid="(current-process-id)) + (debug:print-info 0 *default-log-port* "No sync needed, last updated " (- start-time last-update) " seconds ago"))) + res)) + +;;====================================================================== +;; server stuff that operates on the server log files +;;====================================================================== + +(define (server:get-num-servers #!key (numservers 2)) + (let ((ns (string->number + (or (configf:lookup *configdat* "server" "numservers") "notanumber")))) + (or ns numservers))) + +;; get a list of servers with all relevant data +;; ( mod-time host port start-time pid ) +;; +(define (server:get-list areapath #!key (limit #f)) + (let ((fname-rx (regexp "^(|.*/)server-(\\d+)-(\\S+).log$")) + (day-seconds (* 24 60 60))) + ;; if the directory exists continue to get the list + ;; otherwise attempt to create the logs dir and then + ;; continue + (if (if (directory-exists? (conc areapath "/logs")) + '() + (if (file-write-access? areapath) + (begin + (condition-case + (create-directory (conc areapath "/logs") #t) + (exn (i/o file)(debug:print 0 *default-log-port* "ERROR: Cannot create directory at " (conc areapath "/logs"))) + (exn ()(debug:print 0 *default-log-port* "ERROR: Unknown error attemtping to get server list."))) + (directory-exists? (conc areapath "/logs"))) + '())) + (let* ((server-logs (glob (conc areapath "/logs/server-*.log"))) + (num-serv-logs (length server-logs))) + (if (null? server-logs) + '() + (let loop ((hed (car server-logs)) + (tal (cdr server-logs)) + (res '())) + (let* ((mod-time (handle-exceptions + exn + (current-seconds) ;; 0 + (file-modification-time hed))) ;; default to *very* old so log gets ignored if deleted + (down-time (- (current-seconds) mod-time)) + (serv-dat (if (or (< num-serv-logs 10) + (< down-time 900)) ;; day-seconds)) + (server:logf-get-start-info hed) + '())) ;; don't waste time processing server files not touched in the 15 minutes if there are more than ten servers to look at + (serv-rec (cons mod-time serv-dat)) + (fmatch (string-match fname-rx hed)) + (pid (if fmatch (string->number (list-ref fmatch 2)) #f)) + (new-res (if (null? serv-dat) + res + (cons (append serv-rec (list pid)) res)))) + (if (null? tal) + (if (and limit + (> (length new-res) limit)) + new-res ;; (take new-res limit) <= need intelligent sorting before this will work + new-res) + (loop (car tal)(cdr tal) new-res))))))))) +;; given a path to a server log return: host port startseconds +;; +(define (server:logf-get-start-info logf) + (let ((rx (regexp "^SERVER STARTED: (\\S+):(\\d+) AT ([\\d\\.]+)"))) ;; SERVER STARTED: host:port AT timesecs + (handle-exceptions + exn + (list #f #f #f) ;; no idea what went wrong, call it a bad server + (with-input-from-file + logf + (lambda () + (let loop ((inl (read-line)) + (lnum 0)) + (if (not (eof-object? inl)) + (let ((mlst (string-match rx inl))) + (if (not mlst) + (if (< lnum 500) ;; give up if more than 500 lines of server log read + (loop (read-line)(+ lnum 1)) + (list #f #f #f)) + (let ((dat (cdr mlst))) + (list (car dat) ;; host + (string->number (cadr dat)) ;; port + (string->number (caddr dat)))))) + (list #f #f #f)))))))) + +(define (server:get-num-alive srvlst) + (let ((num-alive 0)) + (for-each + (lambda (server) + (match-let (((mod-time host port start-time pid) + server)) + (let* ((uptime (- (current-seconds) mod-time)) + (runtime (if start-time + (- mod-time start-time) + 0))) + (if (< uptime 5)(set! num-alive (+ num-alive 1)))))) + srvlst) + num-alive)) + +;; given a list of servers get a list of valid servers, i.e. at least +;; 10 seconds old, has started and is less than 1 hour old and is +;; active (i.e. mod-time < 10 seconds +;; +;; mod-time host port start-time pid +;; +;; sort by start-time descending. I.e. get the oldest first. Young servers will thus drop off +;; and servers should stick around for about two hours or so. +;; +(define (server:get-best srvlst) + (let* ((nums (server:get-num-servers)) + (now (current-seconds)) + (slst (sort + (filter (lambda (rec) + (if (and (list? rec) + (> (length rec) 2)) + (let ((start-time (list-ref rec 3)) + (mod-time (list-ref rec 0))) + ;; (print "start-time: " start-time " mod-time: " mod-time) + (and start-time mod-time + (> (- now start-time) 0) ;; been running at least 0 seconds + (< (- now mod-time) 16) ;; still alive - file touched in last 16 seconds + (< (- now start-time) + (+ (- (string->number (or (configf:lookup *configdat* "server" "runtime") "3600")) + 180) + (random 360))) ;; under one hour running time +/- 180 + )) + #f)) + srvlst) + (lambda (a b) + (< (list-ref a 3) + (list-ref b 3)))))) + (if (> (length slst) nums) + (take slst nums) + slst))) + +(define (server:get-first-best areapath) + (let ((srvrs (server:get-best (server:get-list areapath)))) + (if (and srvrs + (not (null? srvrs))) + (car srvrs) + #f))) + +(define (server:get-rand-best areapath) + (let ((srvrs (server:get-best (server:get-list areapath)))) + (if (and (list? srvrs) + (not (null? srvrs))) + (let* ((len (length srvrs)) + (idx (random len))) + (list-ref srvrs idx)) + #f))) + + +(define (server:record->url servr) + (match-let (((mod-time host port start-time pid) + servr)) + (if (and host port) + (conc host ":" port) + #f))) + +;; timeout is hms string: 1h 5m 3s, default is 1 minute +;; +(define (server:expiration-timeout) + (let ((tmo (configf:lookup *configdat* "server" "timeout"))) + (if (and (string? tmo) + (common:hms-string->seconds tmo)) ;; BUG: hms-string->seconds is broken, if given "10" returns 0. Also, it doesn't belong in this logic unless the string->number is changed below + (* 3600 (string->number tmo)) + 60))) + +;;====================================================================== +;; P K T S S T U F F +;;====================================================================== + +;; ??? + +;;====================================================================== +;; P K T S S T U F F +;;====================================================================== + +;; ??? + +;;====================================================================== +;; S E R V E R +;;====================================================================== + +;; Call this to start the actual server +;; + +;;====================================================================== +;; S E R V E R U T I L I T I E S +;;====================================================================== + +;; Get the transport +(define (server:get-transport) + (if *transport-type* + *transport-type* + (let ((ttype (string->symbol + (or (args:get-arg "-transport") + (configf:lookup *configdat* "server" "transport") + "rpc")))) + (set! *transport-type* ttype) + ttype))) + +;; Generate a unique signature for this server +(define (server:mk-signature) + (message-digest-string (md5-primitive) + (with-output-to-string + (lambda () + (write (list (current-directory) + (argv))))))) + +;; When using zmq this would send the message back (two step process) +;; with spiffy or rpc this simply returns the return data to be returned +;; +(define (server:reply return-addr query-sig success/fail result) + (debug:print-info 11 *default-log-port* "server:reply return-addr=" return-addr ", result=" result) + ;; (send-message pubsock target send-more: #t) + ;; (send-message pubsock + (case (server:get-transport) + ((rpc) (db:obj->string (vector success/fail query-sig result))) + ((http) (db:obj->string (vector success/fail query-sig result))) + ((fs) result) + (else + (debug:print-error 0 *default-log-port* "unrecognised transport type: " *transport-type*) + result))) + +(define (server:get-client-signature) + ;; (if *my-client-signature* *my-client-signature* + ;; (let ((sig + (server:mk-signature)) ;; ) +;; (set! *my-client-signature* sig) +;; *my-client-signature*))) + +(define (server:kill servr) + (match-let (((mod-time hostname port start-time pid) + servr)) + (tasks:kill-server hostname pid))) + +;; (define server:sync-lock-token "SERVER_SYNC_LOCK") +;; (define (server:release-sync-lock) +;; (db:no-sync-del! *no-sync-db* server:sync-lock-token)) +;; (define (server:have-sync-lock?) +;; (let* ((have-lock-pair (db:no-sync-get-lock *no-sync-db* server:sync-lock-token)) +;; (have-lock? (car have-lock-pair)) +;; (lock-time (cdr have-lock-pair)) +;; (lock-age (- (current-seconds) lock-time))) +;; (cond +;; (have-lock? #t) +;; ((>lock-age +;; (* 3 (configf:lookup-number *configdat* "server" "minimum-intersync-delay" default: 180))) +;; (server:release-sync-lock) +;; (server:have-sync-lock?)) +;; (else #f)))) + +;; moving this here as it needs access to db and cannot be in common. +;; + +(define (server:get-bruteforce-syncer dbstruct #!key (fork-to-background #f) (persist-until-sync #f)) + (let* ((sqlite-exe (or (get-environment-variable "MT_SQLITE3_EXE"))) ;; defined in cfg.sh + (sync-log (or (args:get-arg "-sync-log") (conc *toppath* "/logs/sync-" (current-process-id) "-" (get-host-name) ".log"))) + (tmp-area (common:get-db-tmp-area)) + (tmp-db (conc tmp-area "/megatest.db")) + (staging-file (conc *toppath* "/.megatest.db")) + (mtdbfile (conc *toppath* "/megatest.db")) + (lockfile (common:get-sync-lock-filepath)) + (sync-cmd-core (conc sqlite-exe" " tmp-db " .dump | "sqlite-exe" " staging-file "&>"sync-log)) + (sync-cmd (if fork-to-background + (conc "/usr/bin/env NBFAKE_LOG="*toppath*"/logs/last-server-sync-"(current-process-id)".log nbfake \""sync-cmd-core" && /bin/mv -f " staging-file " " mtdbfile" \"") + sync-cmd-core)) + (default-min-intersync-delay 2) + (min-intersync-delay (configf:lookup-number *configdat* "server" "minimum-intersync-delay" default: default-min-intersync-delay)) + (default-duty-cycle 0.1) + (duty-cycle (configf:lookup-number *configdat* "server" "sync-duty-cycle" default: default-duty-cycle)) + (last-sync-seconds 10) ;; we will adjust this to a measurement and delay last-sync-seconds * (1 - duty-cycle) + (calculate-off-time (lambda (work-duration duty-cycle) + (* (/ (- 1 duty-cycle) duty-cycle) last-sync-seconds))) + (off-time min-intersync-delay) ;; adjusted in closure below. + (do-a-sync + (lambda () + #;(BB> "Start do-a-sync with fork-to-background="fork-to-background" persist-until-sync="persist-until-sync) + (let* ((finalres + (let retry-loop ((num-tries 0)) + (if (common:simple-file-lock lockfile) + (begin + (cond + ((not (or fork-to-background persist-until-sync)) + (debug:print 0 *default-log-port* "INFO: syncer thread sleeping for max of (server.minimum-intersync-delay="min-intersync-delay + " , off-time="off-time" seconds ]") + (thread-sleep! (max off-time min-intersync-delay))) + (else + (debug:print 0 *default-log-port* "INFO: syncer thread NOT sleeping ; maybe time-to-exit..."))) + + (if (not (configf:lookup *configdat* "server" "disable-db-snapshot")) + (common:snapshot-file mtdbfile subdir: ".db-snapshot")) + (delete-file* staging-file) + (let* ((start-time (current-milliseconds)) + (res (system sync-cmd)) + (res2 + (cond + ((eq? 0 res) + (delete-file* (conc mtdbfile ".backup")) + (if (eq? 0 (file-size sync-log)) + (delete-file sync-log)) + (system (conc "/bin/mv " staging-file " " mtdbfile)) + + (set! last-sync-seconds (/ (- (current-milliseconds) start-time) 1000)) + (set! off-time (calculate-off-time + last-sync-seconds + (cond + ((and (number? duty-cycle) (> duty-cycle 0) (< duty-cycle 1)) + duty-cycle) + (else + (debug:print 0 *default-log-port* "WARNING: ["(common:human-time)"] server.sync-duty-cycle is invalid. Should be a number between 0 and 1, but "duty-cycle" was specified. Using default value: "default-duty-cycle) + default-duty-cycle)))) + + (debug:print 1 *default-log-port* "INFO: ["(common:human-time)"] pid="(current-process-id)" SYNC took "last-sync-seconds" sec") + (debug:print 1 *default-log-port* "INFO: ["(common:human-time)"] pid="(current-process-id)" SYNC took "last-sync-seconds" sec ; with duty-cycle of "duty-cycle" off time is now "off-time) + 'sync-completed) + (else + (system (conc "/bin/cp "sync-log" "sync-log".fail")) + (debug:print 0 *default-log-port* "ERROR: ["(common:human-time)"] Sync failed. See log at "sync-log".fail") + (if (file-exists? (conc mtdbfile ".backup")) + (system (conc "/bin/cp "mtdbfile ".backup " mtdbfile))) + #f)))) + (common:simple-file-release-lock lockfile) + #;(BB> "released lockfile: " lockfile) + #;(when (common:file-exists? lockfile) + (BB> "DID NOT ACTUALLY RELEASE LOCKFILE")) + res2) ;; end let + );; end begin + ;; else + (cond + (persist-until-sync + (thread-sleep! 1) + (debug:print 1 *default-log-port* "INFO: ["(common:human-time)"] pid="(current-process-id)" other SYNC in progress; we're in a fork-to-background so we need to succeed. Let's wait a jiffy and and try again. num-tries="num-tries" (waiting for lockfile="lockfile" to disappear)") + (retry-loop (add1 num-tries))) + (else + (thread-sleep! (max off-time (+ last-sync-seconds min-intersync-delay))) + (debug:print 1 *default-log-port* "INFO: ["(common:human-time)"] pid="(current-process-id)" other SYNC in progress; not syncing.") + 'parallel-sync-in-progress)) + ) ;; end if got lockfile + ) + )) + #;(BB> "End do-a-sync with fork-to-background="fork-to-background" persist-until-sync="persist-until-sync" and result="finalres) + finalres) + ) ;; end lambda + )) + do-a-sync)) + +(define (server:writable-watchdog-bruteforce dbstruct) + (thread-sleep! 1) ;; delay for startup + (let* ((do-a-sync (server:get-bruteforce-syncer dbstruct)) + (final-sync (server:get-bruteforce-syncer dbstruct fork-to-background: #t persist-until-sync: #t))) + (when (and (not (args:get-arg "-sync-to-megatest.db")) ;; conditions under which we do not run the sync + (args:get-arg "-server")) + + (let loop () + (do-a-sync) + (if (not *time-to-exit*) (loop))) ;; keep going unless time to exit + + ;; time to exit, close the no-sync db here + (final-sync) + + (if (common:low-noise-print 30) + (debug:print-info 0 *default-log-port* "Exiting watchdog timer, *time-to-exit* = " *time-to-exit*" pid="(current-process-id) + ))))) + +(define (server:writable-watchdog-deltasync dbstruct) + (thread-sleep! 0.05) ;; delay for startup + (let ((legacy-sync (common:run-sync?)) + (sync-stale-seconds (configf:lookup-number *configdat* "server" "sync-stale-seconds" default: 300)) + (debug-mode (debug:debug-mode 1)) + (last-time (current-seconds)) + (no-sync-db (db:open-no-sync-db)) + (sync-duration 0) ;; run time of the sync in milliseconds + ;;(this-wd-num (begin (mutex-lock! *wdnum*mutex) (let ((x *wdnum*)) (set! *wdnum* (add1 *wdnum*)) (mutex-unlock! *wdnum*mutex) x))) + ) + (set! *no-sync-db* no-sync-db) ;; make the no sync db available to api calls + (debug:print-info 2 *default-log-port* "Periodic sync thread started.") + (debug:print-info 3 *default-log-port* "watchdog starting. legacy-sync is " legacy-sync" pid="(current-process-id) );; " this-wd-num="this-wd-num) + (if (and legacy-sync (not *time-to-exit*)) + (let* (;;(dbstruct (db:setup)) + (mtdb (dbr:dbstruct-mtdb dbstruct)) + (mtpath (db:dbdat-get-path mtdb)) + (tmp-area (common:get-db-tmp-area)) + (start-file (conc tmp-area "/.start-sync")) + (end-file (conc tmp-area "/.end-sync"))) + (debug:print-info 0 *default-log-port* "Server running, periodic sync started.") + (let loop () + ;; sync for filesystem local db writes + ;; + (mutex-lock! *db-multi-sync-mutex*) + (let* ((need-sync (>= *db-last-access* *db-last-sync*)) ;; no sync since last write + (sync-in-progress *db-sync-in-progress*) + (min-intersync-delay (configf:lookup-number *configdat* "server" "minimum-intersync-delay" default: 5)) + (should-sync (and (not *time-to-exit*) + (> (- (current-seconds) *db-last-sync*) min-intersync-delay))) ;; sync every five seconds minimum, deprecated logic, can probably be removed + (start-time (current-seconds)) + (cpu-load-adj (alist-ref 'adj-proc-load (common:get-normalized-cpu-load #f))) + (mt-mod-time (file-modification-time mtpath)) + (last-sync-start (if (common:file-exists? start-file) + (file-modification-time start-file) + 0)) + (last-sync-end (if (common:file-exists? end-file) + (file-modification-time end-file) + 10)) + (sync-period (+ 3 (* cpu-load-adj 30))) ;; as adjusted load increases increase the sync period + (recently-synced (and (< (- start-time mt-mod-time) sync-period) ;; not useful if sync didn't modify megatest.db! + (< mt-mod-time last-sync-start))) + (sync-done (<= last-sync-start last-sync-end)) + (sync-stale (> start-time (+ last-sync-start sync-stale-seconds))) + (will-sync (and (not *time-to-exit*) ;; do not start a sync if we are in the process of exiting + (or need-sync should-sync) + (or sync-done sync-stale) + (not sync-in-progress) + (not recently-synced)))) + (debug:print-info 13 *default-log-port* "WD writable-watchdog top of loop. need-sync="need-sync" sync-in-progress=" sync-in-progress + " should-sync="should-sync" start-time="start-time" mt-mod-time="mt-mod-time" recently-synced="recently-synced" will-sync="will-sync + " sync-done=" sync-done " sync-period=" sync-period) + (if (and (> sync-period 5) + (common:low-noise-print 30 "sync-period")) + (debug:print-info 0 *default-log-port* "Increased sync period due to long sync times, sync took: " sync-period " seconds.")) + ;; (if recently-synced (debug:print-info 0 *default-log-port* "Skipping sync due to recently-synced flag=" recently-synced)) + ;; (debug:print-info 0 *default-log-port* "need-sync: " need-sync " sync-in-progress: " sync-in-progress " should-sync: " should-sync " will-sync: " will-sync) + (if will-sync (set! *db-sync-in-progress* #t)) + (mutex-unlock! *db-multi-sync-mutex*) + (if will-sync + (let (;; (max-sync-duration (configf:lookup-number *configdat* "server" "max-sync-duration")) ;; KEEPING THIS AVAILABLE BUT SHOULD NOT USE, I'M PRETTY SURE IT DOES NOT WORK! + (sync-start (current-milliseconds))) + (with-output-to-file start-file (lambda ()(print (current-process-id)))) + + ;; put lock here + + ;; (if (or (not max-sync-duration) + ;; (< sync-duration max-sync-duration)) ;; NOTE: db:sync-to-megatest.db keeps track of time of last sync and syncs incrementally + (let ((res (db:sync-to-megatest.db dbstruct no-sync-db: no-sync-db))) ;; did we sync any data? If so need to set the db touched flag to keep the server alive + (set! sync-duration (- (current-milliseconds) sync-start)) + (if (> res 0) ;; some records were transferred, keep the db alive + (begin + (mutex-lock! *heartbeat-mutex*) + (set! *db-last-access* (current-seconds)) + (mutex-unlock! *heartbeat-mutex*) + (debug:print-info 0 *default-log-port* "sync called, " res " records transferred.")) + (debug:print-info 2 *default-log-port* "sync called but zero records transferred"))))) +;; ;; TODO: factor this next routine out into a function +;; (with-input-from-pipe ;; this should not block other threads but need to verify this +;; (conc "megatest -sync-to-megatest.db -m testsuite:" (common:get-area-name) ":" *toppath*) +;; (lambda () +;; (let loop ((inl (read-line)) +;; (res #f)) +;; (if (eof-object? inl) +;; (begin +;; (set! sync-duration (- (current-milliseconds) sync-start)) +;; (cond +;; ((not res) +;; (debug:print 0 *default-log-port* "ERROR: sync from /tmp db to megatest.db appears to have failed. Recommended that you stop your runs and run \"megatest -cleanup-db\"")) +;; ((> res 0) +;; (mutex-lock! *heartbeat-mutex*) +;; (set! *db-last-access* (current-seconds)) +;; (mutex-unlock! *heartbeat-mutex*)))) +;; (let ((num-synced (let ((matches (string-match "^Synced (\\d+).*$" inl))) +;; (if matches +;; (string->number (cadr matches)) +;; #f)))) +;; (loop (read-line) +;; (or num-synced res)))))))))) + (if will-sync + (begin + (mutex-lock! *db-multi-sync-mutex*) + (set! *db-sync-in-progress* #f) + (set! *db-last-sync* start-time) + (with-output-to-file end-file (lambda ()(print (current-process-id)))) + + ;; release lock here + + (mutex-unlock! *db-multi-sync-mutex*))) + (if (and debug-mode + (> (- start-time last-time) 60)) + (begin + (set! last-time start-time) + (debug:print-info 4 *default-log-port* "timestamp -> " (seconds->time-string (current-seconds)) ", time since start -> " (seconds->hr-min-sec (- (current-seconds) *time-zero*)))))) + + ;; keep going unless time to exit + ;; + (if (not *time-to-exit*) + (let delay-loop ((count 0)) + ;;(debug:print-info 13 *default-log-port* "delay-loop top; count="count" pid="(current-process-id)" this-wd-num="this-wd-num" *time-to-exit*="*time-to-exit*) + + (if (and (not *time-to-exit*) + (< count 6)) ;; was 11, changing to 4. + (begin + (thread-sleep! 1) + (delay-loop (+ count 1)))) + (if (not *time-to-exit*) (loop)))) + ;; time to exit, close the no-sync db here + (db:no-sync-close-db no-sync-db) + (if (common:low-noise-print 30) + (debug:print-info 0 *default-log-port* "Exiting watchdog timer, *time-to-exit* = " *time-to-exit*" pid="(current-process-id) ))))))) ;;" this-wd-num="this-wd-num))))))) + ) Index: subrun-inc.scm ================================================================== --- subrun-inc.scm +++ subrun-inc.scm @@ -14,231 +14,5 @@ ;; GNU General Public License for more details. ;; ;; You should have received a copy of the GNU General Public License ;; along with Megatest. If not, see . -;; strftime('%m/%d/%Y %H:%M:%S','now','localtime') - -(define (subrun:subrun-test-initialized? test-run-dir) - (if (and (common:file-exists? (conc test-run-dir "/subrun-area") ) - (common:file-exists? (conc test-run-dir "/testconfig.subrun") )) - #t - #f)) - -(define (subrun:launch-dashboard test-run-dir) - (if (subrun:subrun-test-initialized? test-run-dir) - (let* ((subarea (subrun:get-runarea test-run-dir))) - (if (and subarea (common:file-exists? subarea)) - (system (conc "cd " subarea ";env -i PATH=$PATH DISPLAY=$DISPLAY HOME=$HOME USER=$USER dashboard &")))))) - -(define (subrun:subrun-removed? test-run-dir) - (if (subrun:subrun-test-initialized? test-run-dir) - (let ((flagfile (conc test-run-dir "/subrun.removed"))) - (if (common:file-exists? flagfile) - #t - #f)) - #t)) - -(define (subrun:set-subrun-removed test-run-dir) - (let ((flagfile (conc test-run-dir "/subrun.removed"))) - (if (and (subrun:subrun-test-initialized? test-run-dir) (not (common:file-exists? flagfile))) - (with-output-to-file flagfile - (lambda () (print (current-seconds))))))) - -(define (subrun:unset-subrun-removed test-run-dir) - (let ((flagfile (conc test-run-dir "/subrun.removed"))) - (if (and (subrun:subrun-test-initialized? test-run-dir) (common:file-exists? flagfile)) - (delete-file flagfile)))) - - -(define (subrun:testconfig-defines-subrun? testconfig) - (configf:lookup testconfig "subrun" "runwait")) ;; we use runwait as the flag that a subrun is requested - -(define (subrun:initialize-toprun-test testconfig test-run-dir) - (let ((ra (configf:lookup testconfig "subrun" "run-area")) - (logpro (configf:lookup testconfig "subrun" "logpro")) - (symlink-target (conc test-run-dir "/subrun-area")) - ) - (if (not ra) ;; when runarea is not set we default to *toppath*. However - (let ((fallback-run-area (or *toppath* (conc test-run-dir "/subrun")))) - ;; we need to force the setting in the testconfig so it will - ;; be preserved in the testconfig.subrun file - (configf:set-section-var testconfig "subrun" "run-area" fallback-run-area) - (set! ra fallback-run-area))) - (configf:set-section-var testconfig "logpro" "subrun" logpro) ;; append the logpro rules to the logpro section as stepname subrun - (if (common:file-exists? symlink-target) - (delete-file symlink-target)) - (create-symbolic-link ra symlink-target) - (configf:write-alist testconfig "testconfig.subrun"))) - -(define (subrun:set-state-status test-run-dir state status new-state-status) - (if (and (not (subrun:subrun-removed? test-run-dir)) (subrun:subrun-test-initialized? test-run-dir)) - (let* ((action-switches-str - (conc "-set-state-status "new-state-status - (if state (conc " -state "state) "") - (if status (conc " -status "status) ""))) - (log-prefix - (subrun:sanitize-path - (conc "set-state-status="new-state-status - (if state (conc ":state="state) "") - (if status (conc "+status="status) "")))) - (submt-result - (subrun:exec-sub-megatest test-run-dir action-switches-str log-prefix))) - submt-result))) - -(define (subrun:remove-subrun test-run-dir keep-records ) - (if (and (not (subrun:subrun-removed? test-run-dir)) (subrun:subrun-test-initialized? test-run-dir)) - (let* ((action-switches-str - (conc "-remove-runs" - (if keep-records "-keep-records " "") - )) - (remove-result - (subrun:exec-sub-megatest test-run-dir action-switches-str "remove"))) - (if remove-result - (begin - (subrun:set-subrun-removed test-run-dir) - #t) - #f)) - #t)) - -(define (subrun:kill-subrun test-run-dir ) - (if (and (not (subrun:subrun-removed? test-run-dir)) (subrun:subrun-test-initialized? test-run-dir)) - (let* ((action-switches-str - (conc "-kill-runs" )) - (kill-result - (subrun:exec-sub-megatest test-run-dir action-switches-str "kill"))) - kill-result) - #t)) - -(define (subrun:launch-cmd test-run-dir #!optional (sub-cmd "-run")) ;; BUG: "-run" should be changed to "-rerun-clean" but current doesn't work - (if (subrun:subrun-removed? test-run-dir) - (subrun:unset-subrun-removed test-run-dir)) - - (let* ((log-prefix "run") - (switches (subrun:selector+log-switches test-run-dir log-prefix)) - (run-wait #t) - (cmd (conc "megatest " sub-cmd " " switches" " - (if run-wait "-run-wait " "")))) - cmd)) - - -(define (subrun:sanitize-path inpath) - (let* ((insane-pattern (irregex "[^[a-zA-Z0-9_\\-]"))) - (regex#string-substitute insane-pattern "_" inpath #t))) - -(define (subrun:get-runarea test-run-dir) - (if (subrun:subrun-test-initialized? test-run-dir) - (let* ((info-alist (subrun:selector+log-alist - test-run-dir - "foo")) - (run-area (if (list? info-alist) - (alist-ref "-start-dir" info-alist equal? #f) - #f))) - run-area) - #f)) - -(define (subrun:selector+log-alist test-run-dir log-prefix) - (let* ((switch-def-alist (common:get-param-mapping flavor: 'config)) - (subrunfile (conc test-run-dir "/testconfig.subrun" )) - (subrundata (with-input-from-file subrunfile read)) - (subrunconfig (configf:alist->config subrundata)) - (run-area (configf:lookup subrunconfig "subrun" "run-area")) - (defvals `(("start-dir" . ,(or run-area ;; default values if not specified in subrun section of tconf - (get-environment-variable "MT_RUN_AREA_HOME") - "/no/rundir/found")) - ("run-name" . ,(or (get-environment-variable "MT_RUNNAME") "NO-RUNNAME")) - ("target" . ,(or (get-environment-variable "MT_TARGET") "NO-TARGET")))) - (switch-alist-pre (filter-map (lambda (item) - (let* ((config-key (car item)) - (switch (cdr item)) - (defval (alist-ref config-key defvals equal? #f)) - (val (or (configf:lookup subrunconfig "subrun" config-key) - defval))) - (if val - (cons switch val) - #f))) - switch-def-alist)) - - ;; testpatt may be modified if all three of mode-patt, tag-expr, and testpatt are null - (mode-patt (alist-ref "-modepatt" switch-alist-pre equal? #f)) - (tag-expr (alist-ref "-tagexpr" switch-alist-pre equal? #f)) - (testpatt (alist-ref "-testpatt" switch-alist-pre equal? - (if (not (or mode-patt tag-expr)) "%" #f))) ;; testpatt is % if not - ;; otherwise specified - - ;; define compact-stem for logfile - (target (alist-ref "-target" switch-alist-pre equal? #f)) ;; want data-structures alist-ref, not alist-lib alist-ref - (runname (alist-ref "-runname" switch-alist-pre equal? #f)) - - - (compact-stem (subrun:sanitize-path - (conc - target - "-" - runname - "-" (or testpatt mode-patt tag-expr "NO-TESTPATT")))) - (logfile (conc - test-run-dir "/" - (if log-prefix - (conc (subrun:sanitize-path log-prefix) "-") - "") - compact-stem - ".log")) - ;; swap out testpatt with modified test-patt and add -log - (switch-alist (cons - (cons "-log" logfile) - (map (lambda (item) - (if (equal? (car item) "-testpatt") - (cons "-testpatt" testpatt) - item)) - switch-alist-pre)))) - switch-alist)) - ;; note - get precmd from subrun section - ;; apply to submegatest commands - -(define (subrun:get-log-path test-run-dir log-prefix) - (let* ((alist (subrun:selector+log-alist test-run-dir log-prefix)) - (res (alist-ref "-log" alist equal? #f))) - res)) - -(define (subrun:selector+log-switches test-run-dir log-prefix) - (let* ((switch-alist (subrun:selector+log-alist test-run-dir log-prefix)) - (res - (string-intersperse - (apply - append - (map - (lambda (x) - (list (car x) (cdr x))) - switch-alist)) - " "))) - res)) - -(define (subrun:exec-sub-megatest test-run-dir action-switches-str log-prefix) - (let* ((selector-switches (subrun:selector+log-switches test-run-dir log-prefix)) - (cmd (conc "megatest " selector-switches " " action-switches-str )) - (pid #f) - (proc (lambda () - (debug:print-info 0 *default-log-port* "Running sub megatest command: "cmd) - ;;(set! pid (process-run "/usr/bin/xterm" (list )))))) - (set! pid (process-run "/bin/bash" (list "-c" cmd)))))) - (call-with-environment-variables - (list (cons "PATH" (conc (get-environment-variable "PATH") ":."))) - (lambda () - (common:without-vars proc "^MT_.*"))) - (let processloop ((i 0)) - (let-values (((pid-val exit-status exit-code)(process-wait pid #t))) - (if (eq? pid-val 0) - (begin - (thread-sleep! 2) - (processloop (+ i 1))) - (begin - (debug:print-info 0 *default-log-port* "sub megatest " action-switches-str " completed with exit code " exit-code) - (if (eq? 0 exit-code) - (begin - #t) - (begin - #f)))))))) - - - -;; (subrun:exec-sub-megatest "/nfs/pdx/disks/icf_env_disk001/bjbarcla/gwa/issues/mtdev/165/megatest/ext-tests/tests/subrun-usecases/toparea/links/SYSTEM_val/RELEASE_val/go/toptest" "-foo" "foo") Index: subrunmod.scm ================================================================== --- subrunmod.scm +++ subrunmod.scm @@ -17,19 +17,252 @@ ;; along with Megatest. If not, see . ;;====================================================================== (declare (unit subrunmod)) + (declare (uses commonmod)) +(declare (uses mtconfigf)) (module subrunmod * (import scheme chicken data-structures extras) -(import (prefix sqlite3 sqlite3:) posix typed-records srfi-18 srfi-69 format ports srfi-1 matchable) +(use (prefix sqlite3 sqlite3:) posix typed-records srfi-18 + srfi-69 format ports srfi-1 matchable irregex + call-with-environment-variables) + (import commonmod) +(import (prefix mtconfigf configf:)) + ;; (use (prefix ulex ulex:)) (include "common_records.scm") +;; strftime('%m/%d/%Y %H:%M:%S','now','localtime') + +(define (subrun:subrun-test-initialized? test-run-dir) + (if (and (common:file-exists? (conc test-run-dir "/subrun-area") ) + (common:file-exists? (conc test-run-dir "/testconfig.subrun") )) + #t + #f)) + +(define (subrun:launch-dashboard test-run-dir) + (if (subrun:subrun-test-initialized? test-run-dir) + (let* ((subarea (subrun:get-runarea test-run-dir))) + (if (and subarea (common:file-exists? subarea)) + (system (conc "cd " subarea ";env -i PATH=$PATH DISPLAY=$DISPLAY HOME=$HOME USER=$USER dashboard &")))))) + +(define (subrun:subrun-removed? test-run-dir) + (if (subrun:subrun-test-initialized? test-run-dir) + (let ((flagfile (conc test-run-dir "/subrun.removed"))) + (if (common:file-exists? flagfile) + #t + #f)) + #t)) + +(define (subrun:set-subrun-removed test-run-dir) + (let ((flagfile (conc test-run-dir "/subrun.removed"))) + (if (and (subrun:subrun-test-initialized? test-run-dir) (not (common:file-exists? flagfile))) + (with-output-to-file flagfile + (lambda () (print (current-seconds))))))) + +(define (subrun:unset-subrun-removed test-run-dir) + (let ((flagfile (conc test-run-dir "/subrun.removed"))) + (if (and (subrun:subrun-test-initialized? test-run-dir) (common:file-exists? flagfile)) + (delete-file flagfile)))) + + +(define (subrun:testconfig-defines-subrun? testconfig) + (configf:lookup testconfig "subrun" "runwait")) ;; we use runwait as the flag that a subrun is requested + +(define (subrun:initialize-toprun-test testconfig test-run-dir) + (let ((ra (configf:lookup testconfig "subrun" "run-area")) + (logpro (configf:lookup testconfig "subrun" "logpro")) + (symlink-target (conc test-run-dir "/subrun-area")) + ) + (if (not ra) ;; when runarea is not set we default to *toppath*. However + (let ((fallback-run-area (or *toppath* (conc test-run-dir "/subrun")))) + ;; we need to force the setting in the testconfig so it will + ;; be preserved in the testconfig.subrun file + (configf:set-section-var testconfig "subrun" "run-area" fallback-run-area) + (set! ra fallback-run-area))) + (configf:set-section-var testconfig "logpro" "subrun" logpro) ;; append the logpro rules to the logpro section as stepname subrun + (if (common:file-exists? symlink-target) + (delete-file symlink-target)) + (create-symbolic-link ra symlink-target) + (configf:write-alist testconfig "testconfig.subrun"))) + +(define (subrun:set-state-status test-run-dir state status new-state-status) + (if (and (not (subrun:subrun-removed? test-run-dir)) (subrun:subrun-test-initialized? test-run-dir)) + (let* ((action-switches-str + (conc "-set-state-status "new-state-status + (if state (conc " -state "state) "") + (if status (conc " -status "status) ""))) + (log-prefix + (subrun:sanitize-path + (conc "set-state-status="new-state-status + (if state (conc ":state="state) "") + (if status (conc "+status="status) "")))) + (submt-result + (subrun:exec-sub-megatest test-run-dir action-switches-str log-prefix))) + submt-result))) + +(define (subrun:remove-subrun test-run-dir keep-records ) + (if (and (not (subrun:subrun-removed? test-run-dir)) (subrun:subrun-test-initialized? test-run-dir)) + (let* ((action-switches-str + (conc "-remove-runs" + (if keep-records "-keep-records " "") + )) + (remove-result + (subrun:exec-sub-megatest test-run-dir action-switches-str "remove"))) + (if remove-result + (begin + (subrun:set-subrun-removed test-run-dir) + #t) + #f)) + #t)) + +(define (subrun:kill-subrun test-run-dir ) + (if (and (not (subrun:subrun-removed? test-run-dir)) (subrun:subrun-test-initialized? test-run-dir)) + (let* ((action-switches-str + (conc "-kill-runs" )) + (kill-result + (subrun:exec-sub-megatest test-run-dir action-switches-str "kill"))) + kill-result) + #t)) + +(define (subrun:launch-cmd test-run-dir #!optional (sub-cmd "-run")) ;; BUG: "-run" should be changed to "-rerun-clean" but current doesn't work + (if (subrun:subrun-removed? test-run-dir) + (subrun:unset-subrun-removed test-run-dir)) + + (let* ((log-prefix "run") + (switches (subrun:selector+log-switches test-run-dir log-prefix)) + (run-wait #t) + (cmd (conc "megatest " sub-cmd " " switches" " + (if run-wait "-run-wait " "")))) + cmd)) + + +(define (subrun:sanitize-path inpath) + (let* ((insane-pattern (irregex "[^[a-zA-Z0-9_\\-]"))) + (regex#string-substitute insane-pattern "_" inpath #t))) + +(define (subrun:get-runarea test-run-dir) + (if (subrun:subrun-test-initialized? test-run-dir) + (let* ((info-alist (subrun:selector+log-alist + test-run-dir + "foo")) + (run-area (if (list? info-alist) + (alist-ref "-start-dir" info-alist equal? #f) + #f))) + run-area) + #f)) + +(define (subrun:selector+log-alist test-run-dir log-prefix) + (let* ((switch-def-alist (common:get-param-mapping flavor: 'config)) + (subrunfile (conc test-run-dir "/testconfig.subrun" )) + (subrundata (with-input-from-file subrunfile read)) + (subrunconfig (configf:alist->config subrundata)) + (run-area (configf:lookup subrunconfig "subrun" "run-area")) + (defvals `(("start-dir" . ,(or run-area ;; default values if not specified in subrun section of tconf + (get-environment-variable "MT_RUN_AREA_HOME") + "/no/rundir/found")) + ("run-name" . ,(or (get-environment-variable "MT_RUNNAME") "NO-RUNNAME")) + ("target" . ,(or (get-environment-variable "MT_TARGET") "NO-TARGET")))) + (switch-alist-pre (filter-map (lambda (item) + (let* ((config-key (car item)) + (switch (cdr item)) + (defval (alist-ref config-key defvals equal? #f)) + (val (or (configf:lookup subrunconfig "subrun" config-key) + defval))) + (if val + (cons switch val) + #f))) + switch-def-alist)) + + ;; testpatt may be modified if all three of mode-patt, tag-expr, and testpatt are null + (mode-patt (alist-ref "-modepatt" switch-alist-pre equal? #f)) + (tag-expr (alist-ref "-tagexpr" switch-alist-pre equal? #f)) + (testpatt (alist-ref "-testpatt" switch-alist-pre equal? + (if (not (or mode-patt tag-expr)) "%" #f))) ;; testpatt is % if not + ;; otherwise specified + + ;; define compact-stem for logfile + (target (alist-ref "-target" switch-alist-pre equal? #f)) ;; want data-structures alist-ref, not alist-lib alist-ref + (runname (alist-ref "-runname" switch-alist-pre equal? #f)) + + + (compact-stem (subrun:sanitize-path + (conc + target + "-" + runname + "-" (or testpatt mode-patt tag-expr "NO-TESTPATT")))) + (logfile (conc + test-run-dir "/" + (if log-prefix + (conc (subrun:sanitize-path log-prefix) "-") + "") + compact-stem + ".log")) + ;; swap out testpatt with modified test-patt and add -log + (switch-alist (cons + (cons "-log" logfile) + (map (lambda (item) + (if (equal? (car item) "-testpatt") + (cons "-testpatt" testpatt) + item)) + switch-alist-pre)))) + switch-alist)) + ;; note - get precmd from subrun section + ;; apply to submegatest commands + +(define (subrun:get-log-path test-run-dir log-prefix) + (let* ((alist (subrun:selector+log-alist test-run-dir log-prefix)) + (res (alist-ref "-log" alist equal? #f))) + res)) + +(define (subrun:selector+log-switches test-run-dir log-prefix) + (let* ((switch-alist (subrun:selector+log-alist test-run-dir log-prefix)) + (res + (string-intersperse + (apply + append + (map + (lambda (x) + (list (car x) (cdr x))) + switch-alist)) + " "))) + res)) + +(define (subrun:exec-sub-megatest test-run-dir action-switches-str log-prefix) + (let* ((selector-switches (subrun:selector+log-switches test-run-dir log-prefix)) + (cmd (conc "megatest " selector-switches " " action-switches-str )) + (pid #f) + (proc (lambda () + (debug:print-info 0 *default-log-port* "Running sub megatest command: "cmd) + ;;(set! pid (process-run "/usr/bin/xterm" (list )))))) + (set! pid (process-run "/bin/bash" (list "-c" cmd)))))) + (call-with-environment-variables + (list (cons "PATH" (conc (get-environment-variable "PATH") ":."))) + (lambda () + (common:without-vars proc "^MT_.*"))) + (let processloop ((i 0)) + (let-values (((pid-val exit-status exit-code)(process-wait pid #t))) + (if (eq? pid-val 0) + (begin + (thread-sleep! 2) + (processloop (+ i 1))) + (begin + (debug:print-info 0 *default-log-port* "sub megatest " action-switches-str " completed with exit code " exit-code) + (if (eq? 0 exit-code) + (begin + #t) + (begin + #f)))))))) + + + +;; (subrun:exec-sub-megatest "/nfs/pdx/disks/icf_env_disk001/bjbarcla/gwa/issues/mtdev/165/megatest/ext-tests/tests/subrun-usecases/toparea/links/SYSTEM_val/RELEASE_val/go/toptest" "-foo" "foo") ) Index: task_records.scm ================================================================== --- task_records.scm +++ task_records.scm @@ -17,28 +17,28 @@ ;; along with Megatest. If not, see . ;;====================================================================== ;; make-vector-record tasks task id action owner state target name test item params creation_time execution_time (define (make-tasks:task)(make-vector 11)) -(define-inline (tasks:task-get-id vec) (vector-ref vec 0)) -(define-inline (tasks:task-get-action vec) (vector-ref vec 1)) -(define-inline (tasks:task-get-owner vec) (vector-ref vec 2)) -(define-inline (tasks:task-get-state vec) (vector-ref vec 3)) -(define-inline (tasks:task-get-target vec) (vector-ref vec 4)) -(define-inline (tasks:task-get-name vec) (vector-ref vec 5)) -(define-inline (tasks:task-get-testpatt vec) (vector-ref vec 6)) -(define-inline (tasks:task-get-keylock vec) (vector-ref vec 7)) -(define-inline (tasks:task-get-params vec) (vector-ref vec 8)) -(define-inline (tasks:task-get-creation_time vec) (vector-ref vec 9)) -(define-inline (tasks:task-get-execution_time vec) (vector-ref vec 10)) - -(define-inline (tasks:task-set-state! vec val)(vector-set! vec 3 val)) +(define (tasks:task-get-id vec) (vector-ref vec 0)) +(define (tasks:task-get-action vec) (vector-ref vec 1)) +(define (tasks:task-get-owner vec) (vector-ref vec 2)) +(define (tasks:task-get-state vec) (vector-ref vec 3)) +(define (tasks:task-get-target vec) (vector-ref vec 4)) +(define (tasks:task-get-name vec) (vector-ref vec 5)) +(define (tasks:task-get-testpatt vec) (vector-ref vec 6)) +(define (tasks:task-get-keylock vec) (vector-ref vec 7)) +(define (tasks:task-get-params vec) (vector-ref vec 8)) +(define (tasks:task-get-creation_time vec) (vector-ref vec 9)) +(define (tasks:task-get-execution_time vec) (vector-ref vec 10)) + +(define (tasks:task-set-state! vec val)(vector-set! vec 3 val)) ;; make-vector-record tasks monitor id pid start_time last_update hostname username (define (make-tasks:monitor)(make-vector 5)) -(define-inline (tasks:monitor-get-id vec) (vector-ref vec 0)) -(define-inline (tasks:monitor-get-pid vec) (vector-ref vec 1)) -(define-inline (tasks:monitor-get-start_time vec) (vector-ref vec 2)) -(define-inline (tasks:monitor-get-last_update vec) (vector-ref vec 3)) -(define-inline (tasks:monitor-get-hostname vec) (vector-ref vec 4)) -(define-inline (tasks:monitor-get-username vec) (vector-ref vec 5)) +(define (tasks:monitor-get-id vec) (vector-ref vec 0)) +(define (tasks:monitor-get-pid vec) (vector-ref vec 1)) +(define (tasks:monitor-get-start_time vec) (vector-ref vec 2)) +(define (tasks:monitor-get-last_update vec) (vector-ref vec 3)) +(define (tasks:monitor-get-hostname vec) (vector-ref vec 4)) +(define (tasks:monitor-get-username vec) (vector-ref vec 5)) Index: tasks-inc.scm ================================================================== --- tasks-inc.scm +++ tasks-inc.scm @@ -15,1069 +15,5 @@ ;; You should have received a copy of the GNU General Public License ;; along with Megatest. If not, see . ;; ;; strftime('%m/%d/%Y %H:%M:%S','now','localtime') - -;;====================================================================== -;; Tasks db -;;====================================================================== - -;; wait up to aprox n seconds for a journal to go away -;; -(define (tasks:wait-on-journal path n #!key (remove #f)(waiting-msg #f)) - (if (not (string? path)) - (debug:print-error 0 *default-log-port* "Called tasks:wait-on-journal with path=" path " (not a string)") - (let ((fullpath (conc path "-journal"))) - (handle-exceptions - exn - (begin - (print-call-chain (current-error-port)) - (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) - (debug:print 5 *default-log-port* " exn=" (condition->list exn)) - (debug:print 0 *default-log-port* "tasks:wait-on-journal failed. Continuing on, you can ignore this call-chain") - #t) ;; if stuff goes wrong just allow it to move on - (let loop ((journal-exists (common:file-exists? fullpath)) - (count n)) ;; wait ten times ... - (if journal-exists - (begin - (if (and waiting-msg - (eq? (modulo n 30) 0)) - (debug:print 0 *default-log-port* waiting-msg)) - (if (> count 0) - (begin - (thread-sleep! 1) - (loop (common:file-exists? fullpath) - (- count 1))) - (begin - (debug:print 0 *default-log-port* "ERROR: removing the journal file " fullpath ", this is not good. Look for disk full, write access and other issues.") - (if remove (system (conc "rm -rf " fullpath))) - #f))) - #t)))))) - -(define (tasks:get-task-db-path) - (let ((dbdir (or (configf:lookup *configdat* "setup" "monitordir") - (configf:lookup *configdat* "setup" "dbdir") - (conc (common:get-linktree) "/.db")))) - (handle-exceptions - exn - (begin - (debug:print-error 0 *default-log-port* "Couldn't create path to " dbdir) - (exit 1)) - (if (not (directory? dbdir))(create-directory dbdir #t))) - dbdir)) - -;; If file exists AND -;; file readable -;; ==> open it -;; If file exists AND -;; file NOT readable -;; ==> open in-mem version -;; If file NOT exists -;; ==> open in-mem version -;; -(define (tasks:open-db #!key (numretries 4)) - (if *task-db* - *task-db* - (handle-exceptions - exn - (if (> numretries 0) - (begin - (print-call-chain (current-error-port)) - (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) - (debug:print 5 *default-log-port* " exn=" (condition->list exn)) - (thread-sleep! 1) - (tasks:open-db numretries (- numretries 1))) - (begin - (print-call-chain (current-error-port)) - (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) - (debug:print 5 *default-log-port* " exn=" (condition->list exn)))) - (let* ((dbpath (db:dbfile-path )) ;; (tasks:get-task-db-path)) - (dbfile (conc dbpath "/monitor.db")) - (avail (tasks:wait-on-journal dbpath 10)) ;; wait up to about 10 seconds for the journal to go away - (exists (common:file-exists? dbpath)) - (write-access (file-write-access? dbpath)) - (mdb (cond ;; what the hek is *toppath* doing here? - ((and (string? *toppath*)(file-write-access? *toppath*)) - (sqlite3:open-database dbfile)) - ((file-read-access? dbpath) (sqlite3:open-database dbfile)) - (else (sqlite3:open-database ":memory:")))) ;; (never-give-up-open-db dbpath)) - (handler (sqlite3:make-busy-timeout 36000))) - (if (and exists - (not write-access)) - (set! *db-write-access* write-access)) ;; only unset so other db's also can use this control - (sqlite3:set-busy-handler! mdb handler) - (db:set-sync mdb) ;; (sqlite3:execute mdb (conc "PRAGMA synchronous = 0;")) - ;; (if (or (and (not exists) - ;; (file-write-access? *toppath*)) - ;; (not (file-read-access? dbpath))) - ;; (begin - ;; - ;; TASKS QUEUE MOVED TO main.db - ;; - ;; (sqlite3:execute mdb "CREATE TABLE IF NOT EXISTS tasks_queue (id INTEGER PRIMARY KEY, - ;; action TEXT DEFAULT '', - ;; owner TEXT, - ;; state TEXT DEFAULT 'new', - ;; target TEXT DEFAULT '', - ;; name TEXT DEFAULT '', - ;; testpatt TEXT DEFAULT '', - ;; keylock TEXT, - ;; params TEXT, - ;; creation_time TIMESTAMP, - ;; execution_time TIMESTAMP);") - (sqlite3:execute mdb "CREATE TABLE IF NOT EXISTS monitors (id INTEGER PRIMARY KEY, - pid INTEGER, - start_time TIMESTAMP, - last_update TIMESTAMP, - hostname TEXT, - username TEXT, - CONSTRAINT monitors_constraint UNIQUE (pid,hostname));") - (sqlite3:execute mdb "CREATE TABLE IF NOT EXISTS servers (id INTEGER PRIMARY KEY, - pid INTEGER, - interface TEXT, - hostname TEXT, - port INTEGER, - pubport INTEGER, - start_time TIMESTAMP, - priority INTEGER, - state TEXT, - mt_version TEXT, - heartbeat TIMESTAMP, - transport TEXT, - run_id INTEGER);") - ;; CONSTRAINT servers_constraint UNIQUE (pid,hostname,port));") - (sqlite3:execute mdb "CREATE TABLE IF NOT EXISTS clients (id INTEGER PRIMARY KEY, - server_id INTEGER, - pid INTEGER, - hostname TEXT, - cmdline TEXT, - login_time TIMESTAMP, - logout_time TIMESTAMP DEFAULT -1, - CONSTRAINT clients_constraint UNIQUE (pid,hostname));") - - ;)) - (set! *task-db* (cons mdb dbpath)) - *task-db*)))) - -;;====================================================================== -;; Server and client management -;;====================================================================== - -;; make-vector-record tasks hostinfo id interface port pubport transport pid hostname -(define (tasks:hostinfo-get-id vec) (vector-ref vec 0)) -(define (tasks:hostinfo-get-interface vec) (vector-ref vec 1)) -(define (tasks:hostinfo-get-port vec) (vector-ref vec 2)) -(define (tasks:hostinfo-get-pubport vec) (vector-ref vec 3)) -(define (tasks:hostinfo-get-transport vec) (vector-ref vec 4)) -(define (tasks:hostinfo-get-pid vec) (vector-ref vec 5)) -(define (tasks:hostinfo-get-hostname vec) (vector-ref vec 6)) - -(define (tasks:need-server run-id) - (equal? (configf:lookup *configdat* "server" "required") "yes")) - -;; no elegance here ... -;; -(define (tasks:kill-server hostname pid #!key (kill-switch "")) - (debug:print-info 0 *default-log-port* "Attempting to kill server process " pid " on host " hostname) - (setenv "TARGETHOST" hostname) - (let* ((logdir (if (directory-exists? "logs") - "logs/" - "")) - (logfile (if logdir (conc "logs/server-"pid"-"hostname".log") #f)) - (gzfile (if logfile (conc logfile ".gz")))) - (setenv "TARGETHOST_LOGF" (conc logdir "server-kills.log")) - - (system (conc "nbfake kill "kill-switch" "pid)) - - (when logfile - (thread-sleep! 0.5) - (if (common:file-exists? gzfile) (delete-file gzfile)) - (system (conc "gzip " logfile)) - - (unsetenv "TARGETHOST_LOGF") - (unsetenv "TARGETHOST")))) - - -;;====================================================================== -;; M O N I T O R S -;;====================================================================== - -(define (tasks:remove-monitor-record mdb) - (sqlite3:execute mdb "DELETE FROM monitors WHERE pid=? AND hostname=?;" - (current-process-id) - (get-host-name))) - -(define (tasks:get-monitors mdb) - (let ((res '())) - (sqlite3:for-each-row - (lambda (a . rem) - (set! res (cons (apply vector a rem) res))) - mdb - "SELECT id,pid,strftime('%m/%d/%Y %H:%M',datetime(start_time,'unixepoch'),'localtime'),strftime('%m/%d/%Y %H:%M:%S',datetime(last_update,'unixepoch'),'localtime'),hostname,username FROM monitors ORDER BY last_update ASC;") - (reverse res) - )) - -(define (tasks:monitors->text-table monitors) - (let ((fmtstr "~4a~8a~20a~20a~10a~10a")) - (conc (format #f fmtstr "id" "pid" "start time" "last update" "hostname" "user") "\n" - (string-intersperse - (map (lambda (monitor) - (format #f fmtstr - (tasks:monitor-get-id monitor) - (tasks:monitor-get-pid monitor) - (tasks:monitor-get-start_time monitor) - (tasks:monitor-get-last_update monitor) - (tasks:monitor-get-hostname monitor) - (tasks:monitor-get-username monitor))) - monitors) - "\n")))) - -;; update the last_update field with the current time and -;; if any monitors appear dead, remove them -(define (tasks:monitors-update mdb) - (sqlite3:execute mdb "UPDATE monitors SET last_update=strftime('%s','now') WHERE pid=? AND hostname=?;" - (current-process-id) - (get-host-name)) - (let ((deadlist '())) - (sqlite3:for-each-row - (lambda (id pid host last-update delta) - (print "Going to delete stale record for monitor with pid " pid " on host " host " last updated " delta " seconds ago") - (set! deadlist (cons id deadlist))) - mdb - "SELECT id,pid,hostname,last_update,strftime('%s','now')-last_update AS delta FROM monitors WHERE delta > 700;") - (sqlite3:execute mdb (conc "DELETE FROM monitors WHERE id IN ('" (string-intersperse (map conc deadlist) "','") "');"))) - ) -(define (tasks:register-monitor db port) - (let* ((pid (current-process-id)) - (hostname (get-host-name)) - (userinfo (user-information (current-user-id))) - (username (car userinfo))) - (print "Register monitor, pid: " pid ", hostname: " hostname ", port: " port ", username: " username) - (sqlite3:execute db "INSERT INTO monitors (pid,start_time,last_update,hostname,username) VALUES (?,strftime('%s','now'),strftime('%s','now'),?,?);" - pid hostname username))) - -(define (tasks:get-num-alive-monitors mdb) - (let ((res 0)) - (sqlite3:for-each-row - (lambda (count) - (set! res count)) - mdb - "SELECT count(id) FROM monitors WHERE last_update < (strftime('%s','now') - 300) AND username=?;" - (car (user-information (current-user-id)))) - res)) - -;; -#;(define (tasks:start-monitor db mdb) - (if (> (tasks:get-num-alive-monitors mdb) 2) ;; have two running, no need for more - (debug:print-info 1 *default-log-port* "Not starting monitor, already have more than two running") - (let* ((megatestdb (conc *toppath* "/megatest.db")) - (monitordbf (conc (db:dbfile-path #f) "/monitor.db")) - (last-db-update 0)) ;; (file-modification-time megatestdb))) - (task:register-monitor mdb) - (let loop ((count 0) - (next-touch 0)) ;; next-touch is the time where we need to update last_update - ;; if the db has been modified we'd best look at the task queue - (let ((modtime (file-modification-time megatestdbpath ))) - (if (> modtime last-db-update) - (tasks:process-queue db)) ;; BROKEN. mdb last-db-update megatestdb next-touch)) - ;; WARNING: Possible race conditon here!! - ;; should this update be immediately after the task-get-action call above? - (if (> (current-seconds) next-touch) - (begin - (tasks:monitors-update mdb) - (loop (+ count 1)(+ (current-seconds) 240))) - (loop (+ count 1) next-touch))))))) - -;;====================================================================== -;; T A S K S Q U E U E -;; -;; NOTE:: These operate on task_queue which is in main.db -;; -;;====================================================================== - -;; NOTE: It might be good to add one more layer of checking to ensure -;; that no task gets run in parallel. - -;; id INTEGER PRIMARY KEY, -;; action TEXT DEFAULT '', -;; owner TEXT, -;; state TEXT DEFAULT 'new', -;; target TEXT DEFAULT '', -;; name TEXT DEFAULT '', -;; testpatt TEXT DEFAULT '', -;; keylock TEXT, -;; params TEXT, -;; creation_time TIMESTAMP DEFAULT (strftime('%s','now')), -;; execution_time TIMESTAMP); - - -;; register a task -(define (tasks:add dbstruct action owner target runname testpatt params) - (db:with-db - dbstruct #f #t - (lambda (db) - (sqlite3:execute db "INSERT INTO tasks_queue (action,owner,state,target,name,testpatt,params,creation_time,execution_time) - VALUES (?,?,'new',?,?,?,?,strftime('%s','now'),0);" - action - owner - target - runname - testpatt - (if params params ""))))) - -(define (keys:key-vals-hash->target keys key-params) - (let ((tmp (hash-table-ref/default key-params (vector-ref (car keys) 0) ""))) - (if (> (length keys) 1) - (for-each (lambda (key) - (set! tmp (conc tmp "/" (hash-table-ref/default key-params (vector-ref key 0) "")))) - (cdr keys))) - tmp)) - -;; for use from the gui, not ported -;; -;; (define (tasks:add-from-params mdb action keys key-params var-params) -;; (let ((target (keys:key-vals-hash->target keys key-params)) -;; (owner (car (user-information (current-user-id)))) -;; (runname (hash-table-ref/default var-params "runname" #f)) -;; (testpatts (hash-table-ref/default var-params "testpatts" "%")) -;; (params (hash-table-ref/default var-params "params" ""))) -;; (tasks:add mdb action owner target runname testpatts params))) - -;; return one task from those who are 'new' OR 'waiting' AND more than 10sec old -;; -(define (tasks:snag-a-task dbstruct) - (let ((res #f) - (keytxt (conc (current-process-id) "-" (get-host-name) "-" (car (user-information (current-user-id)))))) - (db:with-db - dbstruct #f #t - (lambda (db) - ;; first randomly set a new to pid-hostname-hostname - (sqlite3:execute - db - "UPDATE tasks_queue SET keylock=? WHERE id IN - (SELECT id FROM tasks_queue - WHERE state='new' OR - (state='waiting' AND (strftime('%s','now')-execution_time) > 10) OR - state='reset' - ORDER BY RANDOM() LIMIT 1);" keytxt) - - (sqlite3:for-each-row - (lambda (id . rem) - (set! res (apply vector id rem))) - db - "SELECT id,action,owner,state,target,name,test,item,params,creation_time,execution_time FROM tasks_queue WHERE keylock=? ORDER BY execution_time ASC LIMIT 1;" keytxt) - (if res ;; yep, have work to be done - (begin - (sqlite3:execute db "UPDATE tasks_queue SET state='inprogress',execution_time=strftime('%s','now') WHERE id=?;" - (tasks:task-get-id res)) - res) - #f))))) - -(define (tasks:reset-stuck-tasks dbstruct) - (let ((res '())) - (db:with-db - dbstruct #f #t - (lambda (db) - (sqlite3:for-each-row - (lambda (id delta) - (set! res (cons id res))) - db - "SELECT id,strftime('%s','now')-execution_time AS delta FROM tasks_queue WHERE state='inprogress' AND delta>700 ORDER BY delta DESC LIMIT 2;") - (sqlite3:execute - db - (conc "UPDATE tasks_queue SET state='reset' WHERE id IN ('" (string-intersperse (map conc res) "','") "');") - ))))) - -;; return all tasks in the tasks_queue table -;; -(define (tasks:get-tasks dbstruct types states) - (let ((res '())) - (db:with-db - dbstruct #f #f - (lambda (db) - (sqlite3:for-each-row - (lambda (id . rem) - (set! res (cons (apply vector id rem) res))) - db - (conc "SELECT id,action,owner,state,target,name,test,item,params,creation_time,execution_time - FROM tasks_queue " - ;; WHERE - ;; state IN " statesstr " AND - ;; action IN " actionsstr - " ORDER BY creation_time DESC;")) - res)))) - -(define (tasks:get-last dbstruct target runname) - (let ((res #f)) - (db:with-db - dbstruct #f #f - (lambda (db) - (sqlite3:for-each-row - (lambda (id . rem) - (set! res (apply vector id rem))) - db - (conc "SELECT id,action,owner,state,target,name,testpatt,keylock,params,creation_time,execution_time - FROM tasks_queue - WHERE - target = ? AND name =? - ORDER BY creation_time DESC LIMIT 1;") - target runname) - res)))) - -;; remove tasks given by a string of numbers comma separated -(define (tasks:remove-queue-entries dbstruct task-ids) - (db:with-db - dbstruct #f #t - (lambda (db) - (sqlite3:execute db (conc "DELETE FROM tasks_queue WHERE id IN (" task-ids ");"))))) - -#;(define (tasks:process-queue dbstruct) - (let* ((task (tasks:snag-a-task dbstruct)) - (action (if task (tasks:task-get-action task) #f))) - (if action (print "tasks:process-queue task: " task)) - (if action - (case (string->symbol action) - ((run) (tasks:start-run dbstruct task)) - ((remove) (tasks:remove-runs dbstruct task)) - ((lock) (tasks:lock-runs dbstruct task)) - ;; ((monitor) (tasks:start-monitor db task)) - #;((rollup) (tasks:rollup-runs dbstruct task)) - ((updatemeta)(tasks:update-meta dbstruct task)) - #;((kill) (tasks:kill-monitors dbstruct task)))))) - -(define (tasks:tasks->text tasks) - (let ((fmtstr "~10a~10a~10a~12a~20a~12a~12a~10a")) - (conc (format #f fmtstr "id" "action" "owner" "state" "target" "runname" "testpatts" "params") "\n" - (string-intersperse - (map (lambda (task) - (format #f fmtstr - (tasks:task-get-id task) - (tasks:task-get-action task) - (tasks:task-get-owner task) - (tasks:task-get-state task) - (tasks:task-get-target task) - (tasks:task-get-name task) - (tasks:task-get-testpatt task) - ;; (tasks:task-get-item task) - (tasks:task-get-params task))) - tasks) "\n")))) - -(define (tasks:set-state dbstruct task-id state) - (db:with-db - dbstruct #f #t - (lambda (db) - (sqlite3:execute db "UPDATE tasks_queue SET state=? WHERE id=?;" - state - task-id)))) - -;;====================================================================== -;; Access using task key (stored in params; (hash-table->alist flags) hostname pid -;;====================================================================== - -(define (tasks:param-key->id dbstruct task-params) - (db:with-db - dbstruct #f #f - (lambda (db) - (handle-exceptions - exn - #f - (sqlite3:first-result db "SELECT id FROM tasks_queue WHERE params LIKE ?;" - task-params))))) - -(define (tasks:set-state-given-param-key dbstruct param-key new-state) - (db:with-db - dbstruct #f #t - (lambda (db) - (sqlite3:execute db "UPDATE tasks_queue SET state=? WHERE params LIKE ?;" new-state param-key)))) - -(define (tasks:get-records-given-param-key dbstruct param-key state-patt action-patt test-patt) - (db:with-db - dbstruct #f #f - (lambda (db) - (handle-exceptions - exn - '() - (sqlite3:first-row db "SELECT id,action,owner,state,target,name,testpatt,keylock,params WHERE - params LIKE ? AND state LIKE ? AND action LIKE ? AND testpatt LIKE ?;" - param-key state-patt action-patt test-patt))))) - -(define (tasks:find-task-queue-records dbstruct target run-name test-patt state-patt action-patt) - ;; (handle-exceptions - ;; exn - ;; '() - ;; (sqlite3:first-row - (let ((db (db:delay-if-busy (db:get-db dbstruct))) - (res '())) - (sqlite3:for-each-row - (lambda (a . b) - (set! res (cons (cons a b) res))) - db "SELECT id,action,owner,state,target,name,testpatt,keylock,params FROM tasks_queue - WHERE - target = ? AND name = ? AND state LIKE ? AND action LIKE ? AND testpatt LIKE ?;" - target run-name state-patt action-patt test-patt) - res)) ;; ) - -;; kill any runner processes (i.e. processes handling -runtests) that match target/runname -;; -;; do a remote call to get the task queue info but do the killing as self here. -;; -(define (tasks:kill-runner target run-name testpatt) - (let ((records (rmt:tasks-find-task-queue-records target run-name testpatt "running" "run-tests")) - (hostpid-rx (regexp "\\s+(\\w+)\\s+(\\d+)$"))) ;; host pid is at end of param string - (if (null? records) - (debug:print 0 *default-log-port* "No run launching processes found for " target " / " run-name " with testpatt " (or testpatt "* no testpatt specified! *")) - (debug:print 0 *default-log-port* "Found " (length records) " run(s) to kill.")) - (for-each - (lambda (record) - (let* ((param-key (list-ref record 8)) - (match-dat (string-search hostpid-rx param-key))) - (if match-dat - (let ((hostname (cadr match-dat)) - (pid (string->number (caddr match-dat)))) - (debug:print 0 *default-log-port* "Sending SIGINT to process " pid " on host " hostname) - (if (equal? (get-host-name) hostname) - (if (process:alive? pid) - (begin - (handle-exceptions - exn - (begin - (debug:print 0 *default-log-port* "Kill of process " pid " on host " hostname " failed.") - (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) - #t) - (process-signal pid signal/int) - (thread-sleep! 5) - (if (process:alive? pid) - (process-signal pid signal/kill))))) - ;; (call-with-environment-variables - (let ((old-targethost (getenv "TARGETHOST"))) - (setenv "TARGETHOST" hostname) - (setenv "TARGETHOST_LOGF" "server-kills.log") - (system (conc "nbfake kill " pid)) - (if old-targethost (setenv "TARGETHOST" old-targethost)) - (unsetenv "TARGETHOST") - (unsetenv "TARGETHOST_LOGF")))) - (debug:print-error 0 *default-log-port* "no record or improper record for " target "/" run-name " in tasks_queue in main.db")))) - records))) - -;; (define (tasks:start-run dbstruct mdb task) -;; (let ((flags (make-hash-table))) -;; (hash-table-set! flags "-rerun" "NOT_STARTED") -;; (if (not (string=? (tasks:task-get-params task) "")) -;; (hash-table-set! flags "-setvars" (tasks:task-get-params task))) -;; (print "Starting run " task) -;; ;; sillyness, just call the damn routine with the task vector and be done with it. FIXME SOMEDAY -;; (runs:run-tests db -;; (tasks:task-get-target task) -;; (tasks:task-get-name task) -;; (tasks:task-get-test task) -;; (tasks:task-get-item task) -;; (tasks:task-get-owner task) -;; flags) -;; (tasks:set-state mdb (tasks:task-get-id task) "waiting"))) -;; -;; (define (tasks:rollup-runs db mdb task) -;; (let* ((flags (make-hash-table)) -;; (keys (db:get-keys db)) -;; (keyvals (keys:target-keyval keys (tasks:task-get-target task)))) -;; ;; (hash-table-set! flags "-rerun" "NOT_STARTED") -;; (print "Starting rollup " task) -;; ;; sillyness, just call the damn routine with the task vector and be done with it. FIXME SOMEDAY -;; (runs:rollup-run db -;; keys -;; keyvals -;; (tasks:task-get-name task) -;; (tasks:task-get-owner task)) -;; (tasks:set-state mdb (tasks:task-get-id task) "waiting"))) - -;;====================================================================== -;; S Y N C T O P O S T G R E S Q L -;;====================================================================== - -;; In the spirit of "dump your junk in the tasks module" I'll put the -;; sync to postgres here for now. - -;; attempt to automatically set up an area. call only if get area by path -;; returns naught of interest -;; -(define (tasks:set-area dbh configdat #!key (toppath #f)) ;; could I safely put *toppath* in for the default for toppath? when would it be evaluated? - (let loop ((area-name (or (configf:lookup configdat "setup" "area-name") - (common:get-area-name))) - (modifier 'none)) - (let ((success (handle-exceptions - exn - (begin - (debug:print 0 *default-log-port* "ERROR: cannot create area entry, " ((condition-property-accessor 'exn 'message) exn)) - #f) ;; FIXME: I don't care for now but I should look at *why* there was an exception - (pgdb:add-area dbh area-name (or toppath *toppath*))))) - (or success - (case modifier - ((none)(loop (conc (current-user-name) "_" area-name) 'user)) - ((user)(loop (conc (substring (common:get-area-path-signature) 0 4) - area-name) 'areasig)) - (else #f)))))) ;; give up - -(define (task:print-runtime run-times saperator) -(for-each - (lambda (run-time-info) - (let* ((run-name (vector-ref run-time-info 0)) - (run-time (vector-ref run-time-info 1)) - (target (vector-ref run-time-info 2))) - (print target saperator run-name saperator run-time ))) - run-times)) - -(define (task:print-runtime-as-json run-times) - (let loop ((run-time-info (car run-times)) - (rema (cdr run-times)) - (str "")) - (let* ((run-name (vector-ref run-time-info 0)) - (run-time (vector-ref run-time-info 1)) - (target (vector-ref run-time-info 2))) - ;(print (not (equal? str ""))) - (if (not (equal? str "")) - (set! str (conc str ","))) - (if (null? rema) - (print "[" str "{target:" target ",run-name:" run-name ", run-time:" run-time "}]") - (loop (car rema) (cdr rema) (conc str "{target:" target ", run-name:" run-name ", run-time:" run-time "}")))))) - -(define (task:get-run-times) - (let* ( - (run-patt (if (args:get-arg "-run-patt") - (args:get-arg "-run-patt") - "%")) - (target-patt (if (args:get-arg "-target-patt") - (args:get-arg "-target-patt") - "%")) - - (run-times (rmt:get-run-times run-patt target-patt ))) - (if (eq? (length run-times) 0) - (begin - (print "Data not found!!") - (exit))) - (if (equal? (args:get-arg "-dumpmode") "json") - (task:print-runtime-as-json run-times) - (if (equal? (args:get-arg "-dumpmode") "csv") - (task:print-runtime run-times ",") - (task:print-runtime run-times " "))))) - - -(define (task:print-testtime test-times saperator) -(for-each - (lambda (test-time-info) - (let* ((test-name (vector-ref test-time-info 0)) - (test-time (vector-ref test-time-info 2)) - (test-item (if (eq? (string-length (vector-ref test-time-info 1)) 0) - "N/A" - (vector-ref test-time-info 1)))) - (print test-name saperator test-item saperator test-time ))) - test-times)) - -(define (task:print-testtime-as-json test-times) - (let loop ((test-time-info (car test-times)) - (rema (cdr test-times)) - (str "")) - (let* ((test-name (vector-ref test-time-info 0)) - (test-time (vector-ref test-time-info 2)) - (item (vector-ref test-time-info 1))) - ;(print (not (equal? str ""))) - (if (not (equal? str "")) - (set! str (conc str ","))) - (if (null? rema) - (print "[" str "{test-name:" test-name ", item-path:" item ", test-time:" test-time "}]") - (loop (car rema) (cdr rema) (conc str "{test-name:" test-name ", item-path:" item ", test-time:" test-time "}")))))) - - - (define (task:get-test-times) - (let* ((runname (if (args:get-arg "-runname") - (args:get-arg "-runname") - #f)) - (target (if (args:get-arg "-target") - (args:get-arg "-target") - #f)) - - (test-times (rmt:get-test-times runname target ))) - (if (not runname) - (begin - (print "Error: Missing argument -runname") - (exit))) - (if (string-contains runname "%") - (begin - (print "Error: Invalid runname, '%' not allowed (" runname ") ") - (exit))) - (if (not target) - (begin - (print "Error: Missing argument -target") - (exit))) - (if (string-contains target "%") - (begin - (print "Error: Invalid target, '%' not allowed (" target ") ") - (exit))) - - (if (eq? (length test-times) 0) - (begin - (print "Data not found!!") - (exit))) - (if (equal? (args:get-arg "-dumpmode") "json") - (task:print-testtime-as-json test-times) - (if (equal? (args:get-arg "-dumpmode") "csv") - (task:print-testtime test-times ",") - (task:print-testtime test-times " "))))) - - - -;; gets mtpg-run-id and syncs the record if different -;; -(define (tasks:run-id->mtpg-run-id dbh cached-info run-id area-info smallest-last-update-time) - (let* ((runs-ht (hash-table-ref cached-info 'runs)) - (runinf (hash-table-ref/default runs-ht run-id #f)) - (area-id (vector-ref area-info 0))) - (if runinf - runinf ;; already cached - (let* ((run-dat (rmt:get-run-info run-id)) ;; NOTE: get-run-info returns a vector < row header > - (run-name (rmt:get-run-name-from-id run-id)) - (row (db:get-rows run-dat)) ;; yes, this returns a single row - (header (db:get-header run-dat)) - (state (db:get-value-by-header row header "state")) - (status (db:get-value-by-header row header "status")) - (owner (db:get-value-by-header row header "owner")) - (event-time (db:get-value-by-header row header "event_time")) - (comment (db:get-value-by-header row header "comment")) - (fail-count (db:get-value-by-header row header "fail_count")) - (pass-count (db:get-value-by-header row header "pass_count")) - (db-contour (db:get-value-by-header row header "contour")) - (contour (if (args:get-arg "-prepend-contour") - (if (and db-contour (not (equal? db-contour "")) (string? db-contour )) - (begin - (debug:print-info 1 *default-log-port* "db-contour") - db-contour) - (args:get-arg "-contour")))) - (run-tag (if (args:get-arg "-run-tag") - (args:get-arg "-run-tag") - "")) - (last-update (db:get-value-by-header row header "last_update")) - (keytarg (if (or (args:get-arg "-prepend-contour") (args:get-arg "-prefix-target")) - (conc "MT_CONTOUR/MT_AREA/" (string-intersperse (rmt:get-keys) "/")) (string-intersperse (rmt:get-keys) "/"))) ;; e.g. version/iteration/platform - (target (if (or (args:get-arg "-prepend-contour") (args:get-arg "-prefix-target")) - (conc (or (args:get-arg "-prefix-target") (conc contour "/" (common:get-area-name) "/")) (rmt:get-target run-id)) (rmt:get-target run-id))) ;; e.g. v1.63/a3e1/ubuntu - (spec-id (pgdb:get-ttype dbh keytarg)) - (publish-time (if (args:get-arg "-cp-eventtime-to-publishtime") - event-time - (current-seconds))) - (new-run-id (pgdb:get-run-id dbh spec-id target run-name area-id))) - (if new-run-id - (begin ;; let ((run-record (pgdb:get-run-info dbh new-run-id)) - (hash-table-set! runs-ht run-id new-run-id) - ;; ensure key fields are up to date - ;; if last_update == pgdb_last_update do not update smallest-last-update-time - (let* ((pgdb-last-update (pgdb:get-run-last-update dbh new-run-id)) - (smallest-time (hash-table-ref/default smallest-last-update-time "smallest-time" #f))) - (if (and (> last-update pgdb-last-update) (or (not smallest-time) (< last-update smallest-time))) - (hash-table-set! smallest-last-update-time "smallest-time" last-update))) - (pgdb:refresh-run-info - dbh - new-run-id - state status owner event-time comment fail-count pass-count area-id last-update publish-time) - (debug:print-info 0 *default-log-port* "Working on run-id " run-id " pgdb-id " new-run-id ) - (if (not (equal? run-tag "")) - (task:add-run-tag dbh new-run-id run-tag)) - new-run-id) - - (if (equal? state "deleted") - (begin - (debug:print-info 1 *default-log-port* "Warning: Run with id " run-id " was created after previous sync and deleted before the sync") #f) - (if (handle-exceptions - exn - (begin (print-call-chain) - (print ((condition-property-accessor 'exn 'message) exn)) - #f) - - (pgdb:insert-run - dbh - spec-id target run-name state status owner event-time comment fail-count pass-count area-id last-update publish-time)) - (let* ((smallest-time (hash-table-ref/default smallest-last-update-time "smallest-time" #f))) - (if (or (not smallest-time) (< last-update smallest-time)) - (hash-table-set! smallest-last-update-time "smallest-time" last-update)) - (tasks:run-id->mtpg-run-id dbh cached-info run-id area-info smallest-last-update-time)) - #f))))))) - -(define (task:add-run-tag dbh run-id tag) - (let* ((tag-info (pgdb:get-tag-info-by-name dbh tag))) - (if (not tag-info) - (begin - (if (handle-exceptions - exn - (begin - (debug:print-info 1 *default-log-port* ((condition-property-accessor 'exn 'message) exn)) - #f) - (pgdb:insert-tag dbh tag)) - (set! tag-info (pgdb:get-tag-info-by-name dbh tag)) - #f))) - ;;add to area_tags - (handle-exceptions - exn - (begin - (debug:print-info 1 *default-log-port* ((condition-property-accessor 'exn 'message) exn)) - #f) - (if (not (pgdb:is-run-taged-with-a-tag dbh (vector-ref tag-info 0) run-id)) - (pgdb:insert-run-tag dbh (vector-ref tag-info 0) run-id))))) - - -(define (tasks:sync-test-steps dbh cached-info test-step-ids smallest-last-update-time) - ; (print "Sync Steps " test-step-ids ) - (let ((test-ht (hash-table-ref cached-info 'tests)) - (step-ht (hash-table-ref cached-info 'steps))) - (for-each - (lambda (test-step-id) - (let* ((test-step-info (rmt:get-steps-info-by-id test-step-id)) - (step-id (tdb:step-get-id test-step-info)) - (test-id (tdb:step-get-test_id test-step-info)) - (stepname (tdb:step-get-stepname test-step-info)) - (state (tdb:step-get-state test-step-info)) - (status (tdb:step-get-status test-step-info)) - (event_time (tdb:step-get-event_time test-step-info)) - (comment (tdb:step-get-comment test-step-info)) - (logfile (tdb:step-get-logfile test-step-info)) - (last-update (tdb:step-get-last_update test-step-info)) - (pgdb-test-id (hash-table-ref/default test-ht test-id #f)) - (smallest-time (hash-table-ref/default smallest-last-update-time "smallest-time" #f)) - (pgdb-step-id (if pgdb-test-id - (pgdb:get-test-step-id dbh pgdb-test-id stepname state) - #f))) - (if step-id - (begin - (if pgdb-test-id - (begin - (if pgdb-step-id - (begin - (debug:print-info 1 *default-log-port* "Updating existing test-step with test-id: " test-id " and step-id " step-id " pgdb test id: " pgdb-test-id " pgdb step id " pgdb-step-id ) - (let* ((pgdb-last-update (pgdb:get-test-step-last-update dbh pgdb-step-id))) - (if (and (> last-update pgdb-last-update) (or (not smallest-time) (< last-update smallest-time))) - (hash-table-set! smallest-last-update-time "smallest-time" last-update))) - (pgdb:update-test-step dbh pgdb-step-id pgdb-test-id stepname state status event_time comment logfile last-update)) - (begin - (debug:print-info 1 *default-log-port* "Inserting test-step with test-id: " test-id " and step-id " step-id " pgdb test id: " pgdb-test-id) - (if (or (not smallest-time) (< last-update smallest-time)) - (hash-table-set! smallest-last-update-time "smallest-time" last-update)) - (pgdb:insert-test-step dbh pgdb-test-id stepname state status event_time comment logfile last-update ) - (set! pgdb-step-id (pgdb:get-test-step-id dbh pgdb-test-id stepname state)))) - (hash-table-set! step-ht step-id pgdb-step-id )) - (debug:print-info 1 *default-log-port* "Error: Test not cashed"))) - (debug:print-info 1 *default-log-port* "Error: Could not get test step info for step id " test-step-id )))) ;; this is a wierd senario need to debug - test-step-ids))) - -(define (tasks:sync-test-gen-data dbh cached-info test-data-ids smallest-last-update-time) - (let ((test-ht (hash-table-ref cached-info 'tests)) - (data-ht (hash-table-ref cached-info 'data))) - (for-each - (lambda (test-data-id) - (let* ((test-data-info (rmt:get-data-info-by-id test-data-id)) - (data-id (db:test-data-get-id test-data-info)) - (test-id (db:test-data-get-test_id test-data-info)) - (category (db:test-data-get-category test-data-info)) - (variable (db:test-data-get-variable test-data-info)) - (value (db:test-data-get-value test-data-info)) - (expected (db:test-data-get-expected test-data-info)) - (tol (db:test-data-get-tol test-data-info)) - (units (db:test-data-get-units test-data-info)) - (comment (db:test-data-get-comment test-data-info)) - (status (db:test-data-get-status test-data-info)) - (type (db:test-data-get-type test-data-info)) - (last-update (db:test-data-get-last_update test-data-info)) - (smallest-time (hash-table-ref/default smallest-last-update-time "smallest-time" #f)) - - (pgdb-test-id (hash-table-ref/default test-ht test-id #f)) - (pgdb-data-id (if pgdb-test-id - (pgdb:get-test-data-id dbh pgdb-test-id category variable) - #f))) - (if data-id - (begin - (if pgdb-test-id - (begin - (if pgdb-data-id - (begin - (debug:print-info 1 *default-log-port* "Updating existing test-data with test-id: " test-id " and data-id " data-id " pgdb test id: " pgdb-test-id " pgdb data id " pgdb-data-id) - (let* ((pgdb-last-update (pgdb:get-test-data-last-update dbh pgdb-data-id))) - (if (and (> last-update pgdb-last-update) (or (not smallest-time) (< last-update smallest-time))) - (hash-table-set! smallest-last-update-time "smallest-time" last-update))) - (pgdb:update-test-data dbh pgdb-data-id pgdb-test-id category variable value expected tol units comment status type last-update)) - (begin - (debug:print-info 1 *default-log-port* "Inserting test-data with test-id: " test-id " and data-id " data-id " pgdb test id: " pgdb-test-id) - (if (handle-exceptions - exn - (begin (print-call-chain) - (print ((condition-property-accessor 'exn 'message) exn)) - #f) - - (pgdb:insert-test-data dbh pgdb-test-id category variable value expected tol units comment status type last-update)) - ;(tasks:run-id->mtpg-run-id dbh cached-info run-id area-info) - (begin - ;(pgdb:insert-test-data dbh pgdb-test-id category variable value expected tol units comment status type ) - (if (or (not smallest-time) (< last-update smallest-time)) - (hash-table-set! smallest-last-update-time "smallest-time" last-update)) - (set! pgdb-data-id (pgdb:get-test-data-id dbh pgdb-test-id category variable))) - #f))) - (hash-table-set! data-ht data-id pgdb-data-id )) - (begin - (debug:print-info 1 *default-log-port* "Error: Test not in pgdb")))) - - (debug:print-info 1 *default-log-port* "Error: Could not get test data info for data id " test-data-id )))) ;; this is a wierd senario need to debug - test-data-ids))) - - - -(define (tasks:sync-tests-data dbh cached-info test-ids area-info smallest-last-update-time) - (let ((test-ht (hash-table-ref cached-info 'tests))) - (for-each - (lambda (test-id) - ; (print test-id) - (let* ((test-info (rmt:get-test-info-by-id #f test-id)) - (run-id (db:test-get-run_id test-info)) ;; look these up in db_records.scm - (test-id (db:test-get-id test-info)) - (test-name (db:test-get-testname test-info)) - (item-path (db:test-get-item-path test-info)) - (state (db:test-get-state test-info)) - (status (db:test-get-status test-info)) - (host (db:test-get-host test-info)) - (pid (db:test-get-process_id test-info)) - (cpuload (db:test-get-cpuload test-info)) - (diskfree (db:test-get-diskfree test-info)) - (uname (db:test-get-uname test-info)) - (run-dir (db:test-get-rundir test-info)) - (log-file (db:test-get-final_logf test-info)) - (run-duration (db:test-get-run_duration test-info)) - (comment (db:test-get-comment test-info)) - (event-time (db:test-get-event_time test-info)) - (archived (db:test-get-archived test-info)) - (last-update (db:test-get-last_update test-info)) - (pgdb-run-id (tasks:run-id->mtpg-run-id dbh cached-info run-id area-info smallest-last-update-time)) - (smallest-time (hash-table-ref/default smallest-last-update-time "smallest-time" #f)) - (pgdb-test-id (if pgdb-run-id - (begin - ;(print pgdb-run-id) - (pgdb:get-test-id dbh pgdb-run-id test-name item-path)) - #f))) - ;; "id" "run_id" "testname" "state" "status" "event_time" - ;; "host" "cpuload" "diskfree" "uname" "rundir" "item_path" - ;; "run_duration" "final_logf" "comment" "shortdir" "attemptnum" "archived" - (if pgdb-run-id - (begin - (if pgdb-test-id ;; have a record - (begin ;; let ((key-name (conc run-id "/" test-name "/" item-path))) - (debug:print-info 0 *default-log-port* "Updating existing test with run-id: " run-id " and test-id: " test-id " pgdb run id: " pgdb-run-id " pgdb-test-id " pgdb-test-id) - (let* ((pgdb-last-update (pgdb:get-test-last-update dbh pgdb-test-id))) - (if (and (> last-update pgdb-last-update) (or (not smallest-time) (< last-update smallest-time))) ;;if last-update is same as pgdb-last-update then it is safe to assume the records are identical and we can use a larger last update time. - (hash-table-set! smallest-last-update-time "smallest-time" last-update))) - (pgdb:update-test dbh pgdb-test-id pgdb-run-id test-name item-path state status host cpuload diskfree uname run-dir log-file run-duration comment event-time archived last-update pid)) - (begin - (debug:print-info 0 *default-log-port* "Inserting test with run-id: " run-id " and test-id: " test-id " pgdb run id: " pgdb-run-id) - (pgdb:insert-test dbh pgdb-run-id test-name item-path state status host cpuload diskfree uname run-dir log-file run-duration comment event-time archived last-update pid) - (if (or (not smallest-time) (< last-update smallest-time)) - (hash-table-set! smallest-last-update-time "smallest-time" last-update)) - (set! pgdb-test-id (pgdb:get-test-id dbh pgdb-run-id test-name item-path)))) - (hash-table-set! test-ht test-id pgdb-test-id)) - (debug:print-info 1 *default-log-port* "WARNING: Skipping run with run-id:" run-id ". This run was created after privious sync and removed before this sync.")))) - test-ids))) - -(define (task:add-area-tag dbh area-info tag) - (let* ((tag-info (pgdb:get-tag-info-by-name dbh tag))) - (if (not tag-info) - (begin - (if (handle-exceptions - exn - (begin - (debug:print-info 1 *default-log-port* ((condition-property-accessor 'exn 'message) exn)) - #f) - (pgdb:insert-tag dbh tag)) - (set! tag-info (pgdb:get-tag-info-by-name dbh tag)) - #f))) - ;;add to area_tags - (handle-exceptions - exn - (begin - (debug:print-info 1 *default-log-port* ((condition-property-accessor 'exn 'message) exn)) - #f) - (if (not (pgdb:is-area-taged-with-a-tag dbh (vector-ref tag-info 0) (vector-ref area-info 0))) - (pgdb:insert-area-tag dbh (vector-ref tag-info 0) (vector-ref area-info 0)))))) - -(define (tasks:sync-run-data dbh cached-info run-ids area-info smallest-last-update-time) - (for-each - (lambda (run-id) - (debug:print-info 1 *default-log-port* "Check if run with " run-id " needs to be synced" ) - (tasks:run-id->mtpg-run-id dbh cached-info run-id area-info smallest-last-update-time)) -run-ids)) - - -;; get runs changed since last sync -;; (define (tasks:sync-test-data dbh cached-info area-info) -;; (let* (( - -(define (tasks:sync-to-postgres configdat dest) - (print "In sync") - (let* ((dbh (pgdb:open configdat dbname: dest)) - (area-info (pgdb:get-area-by-path dbh *toppath*)) - (cached-info (make-hash-table)) - (start (current-seconds)) - (test-patt (if (args:get-arg "-testpatt") - (args:get-arg "-testpatt") - "%")) - (target (if (args:get-arg "-target") - (args:get-arg "-target") - #f)) - (run-name (if (args:get-arg "-runname") - (args:get-arg "-runname") - #f))) - (if (and target (not run-name)) - (begin - (print "Error: Provide runname") - (exit 1))) - (if (and (not target) run-name) - (begin - (print "Error: Provide target") - (exit 1))) - ;(print "123") - ;(exit 1) - (for-each (lambda (dtype) - (hash-table-set! cached-info dtype (make-hash-table))) - '(runs targets tests steps data)) - (hash-table-set! cached-info 'start start) ;; when done we'll set sync times to this - (if area-info - (let* ((last-sync-time (vector-ref area-info 3)) - (smallest-last-update-time (make-hash-table)) - (changed (if (and target run-name) - (rmt:get-run-record-ids target run-name (rmt:get-keys) test-patt) - (rmt:get-changed-record-ids last-sync-time))) - (run-ids (alist-ref 'runs changed)) - (test-ids (alist-ref 'tests changed)) - (test-step-ids (alist-ref 'test_steps changed)) - (test-data-ids (alist-ref 'test_data changed)) - (run-stat-ids (alist-ref 'run_stats changed)) - (area-tag (if (args:get-arg "-area-tag") - (args:get-arg "-area-tag") - (if (args:get-arg "-area") - (args:get-arg "-area") - "")))) - (if (and (equal? area-tag "") (not (pgdb:is-area-taged dbh (vector-ref area-info 0)))) - (set! area-tag *default-area-tag*)) - (if (not (equal? area-tag "")) - (task:add-area-tag dbh area-info area-tag)) - (if (or (not (null? test-ids)) (not (null? run-ids))) - (begin - (debug:print-info 0 *default-log-port* "syncing runs") - (tasks:sync-run-data dbh cached-info run-ids area-info smallest-last-update-time) - (debug:print-info 0 *default-log-port* "syncing tests") - (tasks:sync-tests-data dbh cached-info test-ids area-info smallest-last-update-time) - (debug:print-info 0 *default-log-port* "syncing test steps") - (tasks:sync-test-steps dbh cached-info test-step-ids smallest-last-update-time) - (debug:print-info 0 *default-log-port* "syncing test data") - (tasks:sync-test-gen-data dbh cached-info test-data-ids smallest-last-update-time) - (print "----------done---------------"))) - (let* ((smallest-time (hash-table-ref/default smallest-last-update-time "smallest-time" #f))) - (debug:print-info 0 "smallest-time :" smallest-time " last-sync-time " last-sync-time) - (if (not (and target run-name)) - (if (or (and smallest-time (> smallest-time last-sync-time)) (and smallest-time (eq? last-sync-time 0))) - (pgdb:write-sync-time dbh area-info smallest-time))))) ;;this needs to be changed - (if (tasks:set-area dbh configdat) - (tasks:sync-to-postgres configdat dest) - (begin - (debug:print 0 *default-log-port* "ERROR: unable to create an area record") - #f))))) - Index: tasksmod.scm ================================================================== --- tasksmod.scm +++ tasksmod.scm @@ -17,21 +17,404 @@ ;; along with Megatest. If not, see . ;;====================================================================== (declare (unit tasksmod)) + (declare (uses commonmod)) +(declare (uses pgdbmod)) +(declare (uses mtconfigf)) (module tasksmod * (import scheme chicken data-structures extras) -(import (prefix sqlite3 sqlite3:) posix typed-records srfi-18 srfi-69 format ports srfi-1 matchable) +(use (prefix sqlite3 sqlite3:) posix typed-records srfi-18 srfi-69 format ports srfi-1 matchable + regex) + (import commonmod) -;; (use (prefix ulex ulex:)) +(import (prefix mtconfigf configf:)) +(import pgdbmod) (include "common_records.scm") (include "task_records.scm") ;; (include "tasks-inc.scm") + + +;;====================================================================== +;; Tasks db +;;====================================================================== + +;; wait up to aprox n seconds for a journal to go away +;; +(define (tasks:wait-on-journal path n #!key (remove #f)(waiting-msg #f)) + (if (not (string? path)) + (debug:print-error 0 *default-log-port* "Called tasks:wait-on-journal with path=" path " (not a string)") + (let ((fullpath (conc path "-journal"))) + (handle-exceptions + exn + (begin + (print-call-chain (current-error-port)) + (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) + (debug:print 5 *default-log-port* " exn=" (condition->list exn)) + (debug:print 0 *default-log-port* "tasks:wait-on-journal failed. Continuing on, you can ignore this call-chain") + #t) ;; if stuff goes wrong just allow it to move on + (let loop ((journal-exists (common:file-exists? fullpath)) + (count n)) ;; wait ten times ... + (if journal-exists + (begin + (if (and waiting-msg + (eq? (modulo n 30) 0)) + (debug:print 0 *default-log-port* waiting-msg)) + (if (> count 0) + (begin + (thread-sleep! 1) + (loop (common:file-exists? fullpath) + (- count 1))) + (begin + (debug:print 0 *default-log-port* "ERROR: removing the journal file " fullpath ", this is not good. Look for disk full, write access and other issues.") + (if remove (system (conc "rm -rf " fullpath))) + #f))) + #t)))))) + +(define (tasks:get-task-db-path) + (let ((dbdir (or (configf:lookup *configdat* "setup" "monitordir") + (configf:lookup *configdat* "setup" "dbdir") + (conc (common:get-linktree) "/.db")))) + (handle-exceptions + exn + (begin + (debug:print-error 0 *default-log-port* "Couldn't create path to " dbdir) + (exit 1)) + (if (not (directory? dbdir))(create-directory dbdir #t))) + dbdir)) + +;; If file exists AND +;; file readable +;; ==> open it +;; If file exists AND +;; file NOT readable +;; ==> open in-mem version +;; If file NOT exists +;; ==> open in-mem version +;; +(define (tasks:open-db #!key (numretries 4)) + (if *task-db* + *task-db* + (handle-exceptions + exn + (if (> numretries 0) + (begin + (print-call-chain (current-error-port)) + (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) + (debug:print 5 *default-log-port* " exn=" (condition->list exn)) + (thread-sleep! 1) + (tasks:open-db numretries (- numretries 1))) + (begin + (print-call-chain (current-error-port)) + (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) + (debug:print 5 *default-log-port* " exn=" (condition->list exn)))) + (let* ((dbpath (db:dbfile-path )) ;; (tasks:get-task-db-path)) + (dbfile (conc dbpath "/monitor.db")) + (avail (tasks:wait-on-journal dbpath 10)) ;; wait up to about 10 seconds for the journal to go away + (exists (common:file-exists? dbpath)) + (write-access (file-write-access? dbpath)) + (mdb (cond ;; what the hek is *toppath* doing here? + ((and (string? *toppath*)(file-write-access? *toppath*)) + (sqlite3:open-database dbfile)) + ((file-read-access? dbpath) (sqlite3:open-database dbfile)) + (else (sqlite3:open-database ":memory:")))) ;; (never-give-up-open-db dbpath)) + (handler (sqlite3:make-busy-timeout 36000))) + (if (and exists + (not write-access)) + (set! *db-write-access* write-access)) ;; only unset so other db's also can use this control + (sqlite3:set-busy-handler! mdb handler) + (db:set-sync mdb) ;; (sqlite3:execute mdb (conc "PRAGMA synchronous = 0;")) + ;; (if (or (and (not exists) + ;; (file-write-access? *toppath*)) + ;; (not (file-read-access? dbpath))) + ;; (begin + ;; + ;; TASKS QUEUE MOVED TO main.db + ;; + ;; (sqlite3:execute mdb "CREATE TABLE IF NOT EXISTS tasks_queue (id INTEGER PRIMARY KEY, + ;; action TEXT DEFAULT '', + ;; owner TEXT, + ;; state TEXT DEFAULT 'new', + ;; target TEXT DEFAULT '', + ;; name TEXT DEFAULT '', + ;; testpatt TEXT DEFAULT '', + ;; keylock TEXT, + ;; params TEXT, + ;; creation_time TIMESTAMP, + ;; execution_time TIMESTAMP);") + (sqlite3:execute mdb "CREATE TABLE IF NOT EXISTS monitors (id INTEGER PRIMARY KEY, + pid INTEGER, + start_time TIMESTAMP, + last_update TIMESTAMP, + hostname TEXT, + username TEXT, + CONSTRAINT monitors_constraint UNIQUE (pid,hostname));") + (sqlite3:execute mdb "CREATE TABLE IF NOT EXISTS servers (id INTEGER PRIMARY KEY, + pid INTEGER, + interface TEXT, + hostname TEXT, + port INTEGER, + pubport INTEGER, + start_time TIMESTAMP, + priority INTEGER, + state TEXT, + mt_version TEXT, + heartbeat TIMESTAMP, + transport TEXT, + run_id INTEGER);") + ;; CONSTRAINT servers_constraint UNIQUE (pid,hostname,port));") + (sqlite3:execute mdb "CREATE TABLE IF NOT EXISTS clients (id INTEGER PRIMARY KEY, + server_id INTEGER, + pid INTEGER, + hostname TEXT, + cmdline TEXT, + login_time TIMESTAMP, + logout_time TIMESTAMP DEFAULT -1, + CONSTRAINT clients_constraint UNIQUE (pid,hostname));") + + ;)) + (set! *task-db* (cons mdb dbpath)) + *task-db*)))) + +;;====================================================================== +;; Server and client management +;;====================================================================== + +;; make-vector-record tasks hostinfo id interface port pubport transport pid hostname +(define (tasks:hostinfo-get-id vec) (vector-ref vec 0)) +(define (tasks:hostinfo-get-interface vec) (vector-ref vec 1)) +(define (tasks:hostinfo-get-port vec) (vector-ref vec 2)) +(define (tasks:hostinfo-get-pubport vec) (vector-ref vec 3)) +(define (tasks:hostinfo-get-transport vec) (vector-ref vec 4)) +(define (tasks:hostinfo-get-pid vec) (vector-ref vec 5)) +(define (tasks:hostinfo-get-hostname vec) (vector-ref vec 6)) + +(define (tasks:need-server run-id) + (equal? (configf:lookup *configdat* "server" "required") "yes")) + +;; no elegance here ... +;; +(define (tasks:kill-server hostname pid #!key (kill-switch "")) + (debug:print-info 0 *default-log-port* "Attempting to kill server process " pid " on host " hostname) + (setenv "TARGETHOST" hostname) + (let* ((logdir (if (directory-exists? "logs") + "logs/" + "")) + (logfile (if logdir (conc "logs/server-"pid"-"hostname".log") #f)) + (gzfile (if logfile (conc logfile ".gz")))) + (setenv "TARGETHOST_LOGF" (conc logdir "server-kills.log")) + + (system (conc "nbfake kill "kill-switch" "pid)) + + (when logfile + (thread-sleep! 0.5) + (if (common:file-exists? gzfile) (delete-file gzfile)) + (system (conc "gzip " logfile)) + + (unsetenv "TARGETHOST_LOGF") + (unsetenv "TARGETHOST")))) + + +;;====================================================================== +;; M O N I T O R S +;;====================================================================== + +(define (tasks:remove-monitor-record mdb) + (sqlite3:execute mdb "DELETE FROM monitors WHERE pid=? AND hostname=?;" + (current-process-id) + (get-host-name))) + +(define (tasks:get-monitors mdb) + (let ((res '())) + (sqlite3:for-each-row + (lambda (a . rem) + (set! res (cons (apply vector a rem) res))) + mdb + "SELECT id,pid,strftime('%m/%d/%Y %H:%M',datetime(start_time,'unixepoch'),'localtime'),strftime('%m/%d/%Y %H:%M:%S',datetime(last_update,'unixepoch'),'localtime'),hostname,username FROM monitors ORDER BY last_update ASC;") + (reverse res) + )) + +(define (tasks:monitors->text-table monitors) + (let ((fmtstr "~4a~8a~20a~20a~10a~10a")) + (conc (format #f fmtstr "id" "pid" "start time" "last update" "hostname" "user") "\n" + (string-intersperse + (map (lambda (monitor) + (format #f fmtstr + (tasks:monitor-get-id monitor) + (tasks:monitor-get-pid monitor) + (tasks:monitor-get-start_time monitor) + (tasks:monitor-get-last_update monitor) + (tasks:monitor-get-hostname monitor) + (tasks:monitor-get-username monitor))) + monitors) + "\n")))) + +;; update the last_update field with the current time and +;; if any monitors appear dead, remove them +(define (tasks:monitors-update mdb) + (sqlite3:execute mdb "UPDATE monitors SET last_update=strftime('%s','now') WHERE pid=? AND hostname=?;" + (current-process-id) + (get-host-name)) + (let ((deadlist '())) + (sqlite3:for-each-row + (lambda (id pid host last-update delta) + (print "Going to delete stale record for monitor with pid " pid " on host " host " last updated " delta " seconds ago") + (set! deadlist (cons id deadlist))) + mdb + "SELECT id,pid,hostname,last_update,strftime('%s','now')-last_update AS delta FROM monitors WHERE delta > 700;") + (sqlite3:execute mdb (conc "DELETE FROM monitors WHERE id IN ('" (string-intersperse (map conc deadlist) "','") "');"))) + ) +(define (tasks:register-monitor db port) + (let* ((pid (current-process-id)) + (hostname (get-host-name)) + (userinfo (user-information (current-user-id))) + (username (car userinfo))) + (print "Register monitor, pid: " pid ", hostname: " hostname ", port: " port ", username: " username) + (sqlite3:execute db "INSERT INTO monitors (pid,start_time,last_update,hostname,username) VALUES (?,strftime('%s','now'),strftime('%s','now'),?,?);" + pid hostname username))) + +(define (tasks:get-num-alive-monitors mdb) + (let ((res 0)) + (sqlite3:for-each-row + (lambda (count) + (set! res count)) + mdb + "SELECT count(id) FROM monitors WHERE last_update < (strftime('%s','now') - 300) AND username=?;" + (car (user-information (current-user-id)))) + res)) + +;; +#;(define (tasks:start-monitor db mdb) + (if (> (tasks:get-num-alive-monitors mdb) 2) ;; have two running, no need for more + (debug:print-info 1 *default-log-port* "Not starting monitor, already have more than two running") + (let* ((megatestdb (conc *toppath* "/megatest.db")) + (monitordbf (conc (db:dbfile-path #f) "/monitor.db")) + (last-db-update 0)) ;; (file-modification-time megatestdb))) + (task:register-monitor mdb) + (let loop ((count 0) + (next-touch 0)) ;; next-touch is the time where we need to update last_update + ;; if the db has been modified we'd best look at the task queue + (let ((modtime (file-modification-time megatestdbpath ))) + (if (> modtime last-db-update) + (tasks:process-queue db)) ;; BROKEN. mdb last-db-update megatestdb next-touch)) + ;; WARNING: Possible race conditon here!! + ;; should this update be immediately after the task-get-action call above? + (if (> (current-seconds) next-touch) + (begin + (tasks:monitors-update mdb) + (loop (+ count 1)(+ (current-seconds) 240))) + (loop (+ count 1) next-touch))))))) + +;;====================================================================== +;; T A S K S Q U E U E +;; +;; NOTE:: These operate on task_queue which is in main.db +;; +;;====================================================================== + +;; NOTE: It might be good to add one more layer of checking to ensure +;; that no task gets run in parallel. + +;; id INTEGER PRIMARY KEY, +;; action TEXT DEFAULT '', +;; owner TEXT, +;; state TEXT DEFAULT 'new', +;; target TEXT DEFAULT '', +;; name TEXT DEFAULT '', +;; testpatt TEXT DEFAULT '', +;; keylock TEXT, +;; params TEXT, +;; creation_time TIMESTAMP DEFAULT (strftime('%s','now')), +;; execution_time TIMESTAMP); + + +;;====================================================================== +;; S Y N C T O P O S T G R E S Q L +;;====================================================================== + +;; In the spirit of "dump your junk in the tasks module" I'll put the +;; sync to postgres here for now. + +;; attempt to automatically set up an area. call only if get area by path +;; returns naught of interest +;; +(define (tasks:set-area dbh configdat #!key (toppath #f)) ;; could I safely put *toppath* in for the default for toppath? when would it be evaluated? + (let loop ((area-name (or (configf:lookup configdat "setup" "area-name") + (common:get-area-name))) + (modifier 'none)) + (let ((success (handle-exceptions + exn + (begin + (debug:print 0 *default-log-port* "ERROR: cannot create area entry, " ((condition-property-accessor 'exn 'message) exn)) + #f) ;; FIXME: I don't care for now but I should look at *why* there was an exception + (pgdb:add-area dbh area-name (or toppath *toppath*))))) + (or success + (case modifier + ((none)(loop (conc (current-user-name) "_" area-name) 'user)) + ((user)(loop (conc (substring (common:get-area-path-signature) 0 4) + area-name) 'areasig)) + (else #f)))))) ;; give up + +(define (task:print-runtime run-times saperator) +(for-each + (lambda (run-time-info) + (let* ((run-name (vector-ref run-time-info 0)) + (run-time (vector-ref run-time-info 1)) + (target (vector-ref run-time-info 2))) + (print target saperator run-name saperator run-time ))) + run-times)) + +(define (task:print-runtime-as-json run-times) + (let loop ((run-time-info (car run-times)) + (rema (cdr run-times)) + (str "")) + (let* ((run-name (vector-ref run-time-info 0)) + (run-time (vector-ref run-time-info 1)) + (target (vector-ref run-time-info 2))) + ;(print (not (equal? str ""))) + (if (not (equal? str "")) + (set! str (conc str ","))) + (if (null? rema) + (print "[" str "{target:" target ",run-name:" run-name ", run-time:" run-time "}]") + (loop (car rema) (cdr rema) (conc str "{target:" target ", run-name:" run-name ", run-time:" run-time "}")))))) + + +(define (task:print-testtime test-times saperator) +(for-each + (lambda (test-time-info) + (let* ((test-name (vector-ref test-time-info 0)) + (test-time (vector-ref test-time-info 2)) + (test-item (if (eq? (string-length (vector-ref test-time-info 1)) 0) + "N/A" + (vector-ref test-time-info 1)))) + (print test-name saperator test-item saperator test-time ))) + test-times)) + +(define (task:print-testtime-as-json test-times) + (let loop ((test-time-info (car test-times)) + (rema (cdr test-times)) + (str "")) + (let* ((test-name (vector-ref test-time-info 0)) + (test-time (vector-ref test-time-info 2)) + (item (vector-ref test-time-info 1))) + ;(print (not (equal? str ""))) + (if (not (equal? str "")) + (set! str (conc str ","))) + (if (null? rema) + (print "[" str "{test-name:" test-name ", item-path:" item ", test-time:" test-time "}]") + (loop (car rema) (cdr rema) (conc str "{test-name:" test-name ", item-path:" item ", test-time:" test-time "}")))))) + +(define (db:set-sync db) + (let ((syncprag (configf:lookup *configdat* "setup" "sychronous"))) + (sqlite3:execute db (conc "PRAGMA synchronous = " (or syncprag 0) ";")))) + + + ) DELETED tdb-inc.scm Index: tdb-inc.scm ================================================================== --- tdb-inc.scm +++ /dev/null @@ -1,396 +0,0 @@ -;;====================================================================== -;; Copyright 2006-2013, Matthew Welland. -;; -;; This file is part of Megatest. -;; -;; Megatest is free software: you can redistribute it and/or modify -;; it under the terms of the GNU General Public License as published by -;; the Free Software Foundation, either version 3 of the License, or -;; (at your option) any later version. -;; -;; Megatest is distributed in the hope that it will be useful, -;; but WITHOUT ANY WARRANTY; without even the implied warranty of -;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -;; GNU General Public License for more details. -;; -;; You should have received a copy of the GNU General Public License -;; along with Megatest. If not, see . -;; -;;====================================================================== - -;;====================================================================== -;; Database access -;;====================================================================== - -;;====================================================================== -;; -;; T E S T D A T A B A S E S -;; -;;====================================================================== - -;;====================================================================== -;; T E S T S P E C I F I C D B -;;====================================================================== - -;; Create the sqlite db for the individual test(s) -;; -;; Moved these tables into .db -;; THIS CODE TO BE REMOVED -;; -(define (open-test-db work-area) - (debug:print-info 11 *default-log-port* "open-test-db " work-area) - (if (and work-area - (directory? work-area) - (file-read-access? work-area)) - (let* ((dbpath (conc work-area "/testdat.db")) - (dbexists (common:file-exists? dbpath)) - (work-area-writeable (file-write-access? work-area)) - (db (handle-exceptions ;; open the db if area writeable or db pre-existing. open in-mem otherwise. if exception, open in-mem - exn - (begin - (print-call-chain (current-error-port)) - (debug:print 2 *default-log-port* "ERROR: problem accessing test db " work-area ", you probably should clean and re-run this test" - ((condition-property-accessor 'exn 'message) exn)) - (set! dbexists #f) ;; must force re-creation of tables, more tom-foolery - (sqlite3:open-database ":memory:")) ;; open an in-memory db to allow readonly access - (if (or work-area-writeable - dbexists) - (sqlite3:open-database dbpath) - (sqlite3:open-database ":memory:")))) - (tdb-writeable (and (file-write-access? work-area) - (file-write-access? dbpath))) - (handler (sqlite3:make-busy-timeout (if (args:get-arg "-override-timeout") - (string->number (args:get-arg "-override-timeout")) - 136000)))) - - (if (and tdb-writeable - *db-write-access*) - (sqlite3:set-busy-handler! db handler)) - (if (not dbexists) - (begin - (db:set-sync db) ;; (sqlite3:execute db "PRAGMA synchronous = FULL;") - (debug:print-info 11 *default-log-port* "Initialized test database " dbpath) - (tdb:testdb-initialize db))) - ;; (sqlite3:execute db "PRAGMA synchronous = 0;") - (debug:print-info 11 *default-log-port* "open-test-db END (sucessful)" work-area) - ;; now let's test that everything is correct - (handle-exceptions - exn - (begin - (print-call-chain (current-error-port)) - (debug:print-error 0 *default-log-port* "problem accessing test db " work-area ", you probably should clean and re-run this test or remove the file " - dbpath ".\n " - ((condition-property-accessor 'exn 'message) exn)) - #f) - ;; Is there a cheaper single line operation that will check for existance of a table - ;; and raise an exception ? - (sqlite3:execute db "SELECT id FROM test_data LIMIT 1;")) - db) - ;; no work-area or not readable - create a placeholder to fake rest of world out - (let ((baddb (sqlite3:open-database ":memory:"))) - (debug:print-info 11 *default-log-port* "open-test-db END (unsucessful)" work-area) - ;; provide an in-mem db (this is dangerous!) - (tdb:testdb-initialize baddb) - baddb))) - -;; find and open the testdat.db file for an existing test -(define (tdb:open-test-db-by-test-id test-id #!key (work-area #f)) - (let* ((test-path (if work-area - work-area - (rmt:test-get-rundir-from-test-id test-id)))) - (debug:print 3 *default-log-port* "TEST PATH: " test-path) - (open-test-db test-path))) - -;; find and open the testdat.db file for an existing test -(define (tdb:open-test-db-by-test-id-local dbstruct run-id test-id #!key (work-area #f)) - (let* ((test-path (if work-area - work-area - (db:test-get-rundir-from-test-id dbstruct run-id test-id)))) - (debug:print 3 *default-log-port* "TEST PATH: " test-path) - (open-test-db test-path))) - -;; find and open the testdat.db file for an existing test -(define (tdb:open-run-close-db-by-test-id-local dbstruct run-id test-id work-area proc . params) - (let* ((test-path (if work-area - work-area - (db:test-get-rundir-from-test-id dbstruct run-id test-id))) - (tdb (open-test-db test-path))) - (apply proc tdb params))) - -(define (tdb:testdb-initialize db) - (debug:print 11 *default-log-port* "db:testdb-initialize START") - (sqlite3:with-transaction - db - (lambda () - (for-each - (lambda (sqlcmd) - (sqlite3:execute db sqlcmd)) - (list "CREATE TABLE IF NOT EXISTS test_rundat ( - id INTEGER PRIMARY KEY, - update_time TIMESTAMP, - cpuload INTEGER DEFAULT -1, - diskfree INTEGER DEFAULT -1, - diskusage INTGER DEFAULT -1, - run_duration INTEGER DEFAULT 0);" - "CREATE TABLE IF NOT EXISTS test_data ( - id INTEGER PRIMARY KEY, - test_id INTEGER, - category TEXT DEFAULT '', - variable TEXT, - value REAL, - expected REAL, - tol REAL, - units TEXT, - comment TEXT DEFAULT '', - status TEXT DEFAULT 'n/a', - type TEXT DEFAULT '', - CONSTRAINT test_data_constraint UNIQUE (test_id,category,variable));" - "CREATE TABLE IF NOT EXISTS test_steps ( - id INTEGER PRIMARY KEY, - test_id INTEGER, - stepname TEXT, - state TEXT DEFAULT 'NOT_STARTED', - status TEXT DEFAULT 'n/a', - event_time TIMESTAMP, - comment TEXT DEFAULT '', - logfile TEXT DEFAULT '', - CONSTRAINT test_steps_constraint UNIQUE (test_id,stepname,state));" - ;; test_meta can be used for handing commands to the test - ;; e.g. KILLREQ - ;; the ackstate is set to 1 once the command has been completed - "CREATE TABLE IF NOT EXISTS test_meta ( - id INTEGER PRIMARY KEY, - var TEXT, - val TEXT, - ackstate INTEGER DEFAULT 0, - CONSTRAINT metadat_constraint UNIQUE (var));")))) - (debug:print 11 *default-log-port* "db:testdb-initialize END")) - -;; This routine moved to db:read-test-data -;; -(define (tdb:read-test-data tdb test-id categorypatt) - (let ((res '())) - (sqlite3:for-each-row - (lambda (id test_id category variable value expected tol units comment status type) - (set! res (cons (vector id test_id category variable value expected tol units comment status type) res))) - tdb - "SELECT id,test_id,category,variable,value,expected,tol,units,comment,status,type FROM test_data WHERE test_id=? AND category LIKE ? ORDER BY category,variable;" test-id categorypatt) - (sqlite3:finalize! tdb) - (reverse res))) - -;;====================================================================== -;; T E S T D A T A -;;====================================================================== - -;; ;; get a list of test_data records matching categorypatt -;; (define (tdb:read-test-data test-id categorypatt #!key (work-area #f)) -;; (let ((tdb (tdb:open-test-db-by-test-id test-id work-area: work-area))) -;; (if (sqlite3:database? tdb) -;; (let ((res '())) -;; (sqlite3:for-each-row -;; (lambda (id test_id category variable value expected tol units comment status type) -;; (set! res (cons (vector id test_id category variable value expected tol units comment status type) res))) -;; tdb -;; "SELECT id,test_id,category,variable,value,expected,tol,units,comment,status,type FROM test_data WHERE test_id=? AND category LIKE ? ORDER BY category,variable;" test-id categorypatt) -;; (sqlite3:finalize! tdb) -;; (reverse res)) -;; '()))) - -;; NOTE: Run this local with #f for db !!! -(define (tdb:load-test-data run-id test-id) - (let loop ((lin (read-line))) - (if (not (eof-object? lin)) - (begin - (debug:print 4 *default-log-port* lin) - ;;(when lin ;; this when blocked stack dump caused by .dat file from logpro being 0-byte. fixed by upgrading logpro - (rmt:csv->test-data run-id test-id lin) - ;;) - (loop (read-line))))) - ;; roll up the current results. - ;; FIXME: Add the status too - (rmt:test-data-rollup run-id test-id #f)) - -;; NOTE: Run this local with #f for db !!! -(define (tdb:load-logpro-data run-id test-id) - (let loop ((lin (read-line))) - (if (not (eof-object? lin)) - (begin - (debug:print 4 *default-log-port* lin) - ;;(when lin ;; this when blocked stack dump caused by .dat file from logpro being 0-byte. fixed by upgrading logpro - (rmt:csv->test-data run-id test-id lin) - ;;) - (loop (read-line))))) - ;; roll up the current results. - ;; FIXME: Add the status too - (rmt:test-data-rollup run-id test-id #f)) - -(define (tdb:get-prev-tol-for-test tdb test-id category variable) - ;; Finish me? - (values #f #f #f)) - -;;====================================================================== -;; S T E P S -;;====================================================================== - -(define (tdb:step-get-time-as-string vec) - (seconds->time-string (tdb:step-get-event_time vec))) - -;; get a pretty table to summarize steps -;; -;; NOT USED, WILL BE REMOVED -;; -(define (tdb:get-steps-table steps);; organise the steps for better readability - (let ((res (make-hash-table))) - (for-each - (lambda (step) - (debug:print 6 *default-log-port* "step=" step) - (let ((record (hash-table-ref/default - res - (tdb:step-get-stepname step) - ;; stepname start end status Duration Logfile - (vector (tdb:step-get-stepname step) "" "" "" "" "")))) - (debug:print 6 *default-log-port* "record(before) = " record - "\nid: " (tdb:step-get-id step) - "\nstepname: " (tdb:step-get-stepname step) - "\nstate: " (tdb:step-get-state step) - "\nstatus: " (tdb:step-get-status step) - "\ntime: " (tdb:step-get-event_time step)) - (case (string->symbol (tdb:step-get-state step)) - ((start)(vector-set! record 1 (tdb:step-get-event_time step)) - (vector-set! record 3 (if (equal? (vector-ref record 3) "") - (tdb:step-get-status step))) - (if (> (string-length (tdb:step-get-logfile step)) - 0) - (vector-set! record 5 (tdb:step-get-logfile step)))) - ((end) - (vector-set! record 2 (any->number (tdb:step-get-event_time step))) - (vector-set! record 3 (tdb:step-get-status step)) - (vector-set! record 4 (let ((startt (any->number (vector-ref record 1))) - (endt (any->number (vector-ref record 2)))) - (debug:print 4 *default-log-port* "record[1]=" (vector-ref record 1) - ", startt=" startt ", endt=" endt - ", get-status: " (tdb:step-get-status step)) - (if (and (number? startt)(number? endt)) - (seconds->hr-min-sec (- endt startt)) "-1"))) - (if (> (string-length (tdb:step-get-logfile step)) - 0) - (vector-set! record 5 (tdb:step-get-logfile step)))) - (else - (vector-set! record 2 (tdb:step-get-state step)) - (vector-set! record 3 (tdb:step-get-status step)) - (vector-set! record 4 (tdb:step-get-event_time step)))) - (hash-table-set! res (tdb:step-get-stepname step) record) - (debug:print 6 *default-log-port* "record(after) = " record - "\nid: " (tdb:step-get-id step) - "\nstepname: " (tdb:step-get-stepname step) - "\nstate: " (tdb:step-get-state step) - "\nstatus: " (tdb:step-get-status step) - "\ntime: " (tdb:step-get-event_time step)))) - ;; (else (vector-set! record 1 (tdb:step-get-event_time step))) - (sort steps (lambda (a b) - (cond - ((< (tdb:step-get-event_time a)(tdb:step-get-event_time b)) #t) - ((eq? (tdb:step-get-event_time a)(tdb:step-get-event_time b)) - (< (tdb:step-get-id a) (tdb:step-get-id b))) - (else #f))))) - res)) - -;; Move this to steps.scm -;; -;; get a pretty table to summarize steps -;; -(define (tdb:get-steps-table-list steps) - ;; organise the steps for better readability - (let ((res (make-hash-table))) - (for-each - (lambda (step) - (debug:print 6 *default-log-port* "step=" step) - (let ((record (hash-table-ref/default - res - (tdb:step-get-stepname step) - ;; stepname start end status - (vector (tdb:step-get-stepname step) "" "" "" "" "")))) - (debug:print 6 *default-log-port* "record(before) = " record - "\nid: " (tdb:step-get-id step) - "\nstepname: " (tdb:step-get-stepname step) - "\nstate: " (tdb:step-get-state step) - "\nstatus: " (tdb:step-get-status step) - "\ntime: " (tdb:step-get-event_time step)) - (case (string->symbol (tdb:step-get-state step)) - ((start)(vector-set! record 1 (tdb:step-get-event_time step)) - (vector-set! record 3 (if (equal? (vector-ref record 3) "") - (tdb:step-get-status step))) - (if (> (string-length (tdb:step-get-logfile step)) - 0) - (vector-set! record 5 (tdb:step-get-logfile step)))) - ((end) - (vector-set! record 2 (any->number (tdb:step-get-event_time step))) - (vector-set! record 3 (tdb:step-get-status step)) - (vector-set! record 4 (let ((startt (any->number (vector-ref record 1))) - (endt (any->number (vector-ref record 2)))) - (debug:print 4 *default-log-port* "record[1]=" (vector-ref record 1) - ", startt=" startt ", endt=" endt - ", get-status: " (tdb:step-get-status step)) - (if (and (number? startt)(number? endt)) - (seconds->hr-min-sec (- endt startt)) "-1"))) - (if (> (string-length (tdb:step-get-logfile step)) - 0) - (vector-set! record 5 (tdb:step-get-logfile step)))) - (else - (vector-set! record 2 (tdb:step-get-state step)) - (vector-set! record 3 (tdb:step-get-status step)) - (vector-set! record 4 (tdb:step-get-event_time step)))) - (hash-table-set! res (tdb:step-get-stepname step) record) - (debug:print 6 *default-log-port* "record(after) = " record - "\nid: " (tdb:step-get-id step) - "\nstepname: " (tdb:step-get-stepname step) - "\nstate: " (tdb:step-get-state step) - "\nstatus: " (tdb:step-get-status step) - "\ntime: " (tdb:step-get-event_time step)))) - ;; (else (vector-set! record 1 (tdb:step-get-event_time step))) - (sort steps (lambda (a b) - (cond - ((< (tdb:step-get-event_time a)(tdb:step-get-event_time b)) #t) - ((eq? (tdb:step-get-event_time a)(tdb:step-get-event_time b)) - (< (tdb:step-get-id a) (tdb:step-get-id b))) - (else #f))))) - res)) - -;; -;; Move to steps.scm -;; -(define (tdb:get-compressed-steps comprsteps) ;; from tdb:get-steps-table - (map (lambda (x) - ;; take advantage of the \n on time->string - (vector - (vector-ref x 0) - (let ((s (vector-ref x 1))) - (if (number? s)(seconds->time-string s) s)) - (let ((s (vector-ref x 2))) - (if (number? s)(seconds->time-string s) s)) - (vector-ref x 3) ;; status - (vector-ref x 4) - (vector-ref x 5))) ;; time delta - (sort (hash-table-values comprsteps) - (lambda (a b) - (let ((time-a (vector-ref a 1)) - (time-b (vector-ref b 1))) - (if (and (number? time-a)(number? time-b)) - (if (< time-a time-b) - #t - (if (eq? time-a time-b) - (string. ;; ;;====================================================================== -;;====================================================================== -;; Tests -;;====================================================================== - -;; return items given config -;; -(define (tests:get-items tconfig) - (let ((items (hash-table-ref/default tconfig "items" #f)) ;; items 4 - (itemstable (hash-table-ref/default tconfig "itemstable" #f))) - ;; if either items or items table is a proc return it so test running - ;; process can know to call items:get-items-from-config - ;; if either is a list and none is a proc go ahead and call get-items - ;; otherwise return #f - this is not an iterated test - (cond - ((procedure? items) - (debug:print-info 4 *default-log-port* "items is a procedure, will calc later") - items) ;; calc later - ((procedure? itemstable) - (debug:print-info 4 *default-log-port* "itemstable is a procedure, will calc later") - itemstable) ;; calc later - ((filter (lambda (x) - (let ((val (car x))) - (if (procedure? val) val #f))) - (append (if (list? items) items '()) - (if (list? itemstable) itemstable '()))) - 'have-procedure) - ((or (list? items)(list? itemstable)) ;; calc now - (debug:print-info 4 *default-log-port* "items and itemstable are lists, calc now\n" - " items: " items " itemstable: " itemstable) - (items:get-items-from-config tconfig)) - (else #f)))) ;; not iterated - - -;; returns waitons waitors tconfigdat -;; -(define (tests:get-waitons test-name all-tests-registry) - (let* ((config (tests:get-testconfig test-name #f all-tests-registry 'return-procs))) ;; assuming no problems with immediate evaluation, this could be simplified ('return-procs -> #t) - (let ((instr (if config - (configf:lookup config "requirements" "waiton") - (begin ;; No config means this is a non-existant test - (debug:print-error 0 *default-log-port* "non-existent required test \"" test-name "\"") - (exit 1)))) - (instr2 (if config - (configf:lookup config "requirements" "waitor") - ""))) - (debug:print-info 8 *default-log-port* "waitons string is " instr ", waitors string is " instr2) - (let ((newwaitons - (string-split (cond - ((procedure? instr) ;; here - (let ((res (instr))) - (debug:print-info 8 *default-log-port* "waiton procedure results in string " res " for test " test-name) - res)) - ((string? instr) instr) - (else - ;; NOTE: This is actually the case of *no* waitons! ;; (debug:print-error 0 *default-log-port* "something went wrong in processing waitons for test " test-name) - "")))) - (newwaitors - (string-split (cond - ((procedure? instr2) - (let ((res (instr2))) - (debug:print-info 8 *default-log-port* "waitor procedure results in string " res " for test " test-name) - res)) - ((string? instr2) instr2) - (else - ;; NOTE: This is actually the case of *no* waitons! ;; (debug:print-error 0 *default-log-port* "something went wrong in processing waitons for test " test-name) - ""))))) - (values - ;; the waitons - (filter (lambda (x) - (if (hash-table-ref/default all-tests-registry x #f) - #t - (begin - (debug:print-error 0 *default-log-port* "test " test-name " has unrecognised waiton testname " x) - #f))) - newwaitons) - (filter (lambda (x) - (if (hash-table-ref/default all-tests-registry x #f) - #t - (begin - (debug:print-error 0 *default-log-port* "test " test-name " has unrecognised waiton testname " x) - #f))) - newwaitors) - config))))) - -;; Check for waiver eligibility -;; -(define (tests:check-waiver-eligibility testdat prev-testdat) - (let* ((test-registry (make-hash-table)) - (testconfig (tests:get-testconfig (db:test-get-testname testdat) (db:test-get-item-path testdat) test-registry #f)) - (test-rundir ;; (sdb:qry 'passstr - (db:test-get-rundir testdat)) ;; ) - (prev-rundir ;; (sdb:qry 'passstr - (db:test-get-rundir prev-testdat)) ;; ) - (waivers (if testconfig (configf:section-vars testconfig "waivers") '())) - (waiver-rx (regexp "^(\\S+)\\s+(.*)$")) - (diff-rule "diff %file1% %file2%") - (logpro-rule "diff %file1% %file2% | logpro %waivername%.logpro %waivername%.html")) - (if (not (common:file-exists? test-rundir)) - (begin - (debug:print-error 0 *default-log-port* "test run directory is gone, cannot propagate waiver") - #f) - (begin - (push-directory test-rundir) - (let ((result (if (null? waivers) - #f - (let loop ((hed (car waivers)) - (tal (cdr waivers))) - (debug:print 0 *default-log-port* "INFO: Applying waiver rule \"" hed "\"") - (let* ((waiver (configf:lookup testconfig "waivers" hed)) - (wparts (if waiver (string-match waiver-rx waiver) #f)) - (waiver-rule (if wparts (cadr wparts) #f)) - (waiver-glob (if wparts (caddr wparts) #f)) - (logpro-file (if waiver - (let ((fname (conc hed ".logpro"))) - (if (common:file-exists? fname) - fname - (begin - (debug:print 0 *default-log-port* "INFO: No logpro file " fname " falling back to diff") - #f))) - #f)) - ;; if rule by name of waiver-rule is found in testconfig - use it - ;; else if waivername.logpro exists use logpro-rule - ;; else default to diff-rule - (rule-string (let ((rule (configf:lookup testconfig "waiver_rules" waiver-rule))) - (if rule - rule - (if logpro-file - logpro-rule - (begin - (debug:print 0 *default-log-port* "INFO: No logpro file " logpro-file " found, using diff rule") - diff-rule))))) - ;; (string-substitute "%file1%" "foofoo.txt" "This is %file1% and so is this %file1%." #t) - (processed-cmd (string-substitute - "%file1%" (conc test-rundir "/" waiver-glob) - (string-substitute - "%file2%" (conc prev-rundir "/" waiver-glob) - (string-substitute - "%waivername%" hed rule-string #t) #t) #t)) - (res #f)) - (debug:print 0 *default-log-port* "INFO: waiver command is \"" processed-cmd "\"") - (if (eq? (system processed-cmd) 0) - (if (null? tal) - #t - (loop (car tal)(cdr tal))) - #f)))))) - (pop-directory) - result))))) - -;; Do not rpc this one, do the underlying calls!!! -(define (tests:test-set-status! run-id test-id state status comment dat #!key (work-area #f)) - (let* ((real-status status) - (otherdat (if dat dat (make-hash-table))) - (testdat (rmt:get-test-info-by-id run-id test-id)) - (test-name (db:test-get-testname testdat)) - (item-path (db:test-get-item-path testdat)) - ;; before proceeding we must find out if the previous test (where all keys matched except runname) - ;; was WAIVED if this test is FAIL - - ;; NOTES: - ;; 1. Is the call to test:get-previous-run-record remotified? - ;; 2. Add test for testconfig waiver propagation control here - ;; - (prev-test (if (equal? status "FAIL") - (rmt:get-previous-test-run-record run-id test-name item-path) - #f)) - (waived (if prev-test - (if prev-test ;; true if we found a previous test in this run series - (let ((prev-status (db:test-get-status prev-test)) - (prev-state (db:test-get-state prev-test)) - (prev-comment (db:test-get-comment prev-test))) - (debug:print 4 *default-log-port* "prev-status " prev-status ", prev-state " prev-state ", prev-comment " prev-comment) - (if (and (equal? prev-state "COMPLETED") - (equal? prev-status "WAIVED")) - (if comment - comment - prev-comment) ;; waived is either the comment or #f - #f)) - #f) - #f))) - (if (and waived - (tests:check-waiver-eligibility testdat prev-test)) - (set! real-status "WAIVED")) - - (debug:print 4 *default-log-port* "real-status " real-status ", waived " waived ", status " status) - - ;; update the primary record IF state AND status are defined - (if (and state status) - (begin - (rmt:set-state-status-and-roll-up-items run-id test-id item-path state real-status (if waived waived comment)) - ;; (mt:process-triggers run-id test-id state real-status) ;; triggers are called in test-set-state-status - )) - - ;; if status is "AUTO" then call rollup (note, this one modifies data in test - ;; run area, it does remote calls under the hood. - ;; (if (and test-id state status (equal? status "AUTO")) - ;; (rmt:test-data-rollup run-id test-id status)) - - ;; add metadata (need to do this way to avoid SQL injection issues) - - ;; :first_err - ;; (let ((val (hash-table-ref/default otherdat ":first_err" #f))) - ;; (if val - ;; (sqlite3:execute db "UPDATE tests SET first_err=? WHERE run_id=? AND testname=? AND item_path=?;" val run-id test-name item-path))) - ;; - ;; ;; :first_warn - ;; (let ((val (hash-table-ref/default otherdat ":first_warn" #f))) - ;; (if val - ;; (sqlite3:execute db "UPDATE tests SET first_warn=? WHERE run_id=? AND testname=? AND item_path=?;" val run-id test-name item-path))) - - (let ((category (hash-table-ref/default otherdat ":category" "")) - (variable (hash-table-ref/default otherdat ":variable" "")) - (value (hash-table-ref/default otherdat ":value" #f)) - (expected (hash-table-ref/default otherdat ":expected" "n/a")) - (tol (hash-table-ref/default otherdat ":tol" "n/a")) - (units (hash-table-ref/default otherdat ":units" "")) - (type (hash-table-ref/default otherdat ":type" "")) - (dcomment (hash-table-ref/default otherdat ":comment" ""))) - (debug:print 4 *default-log-port* - "category: " category ", variable: " variable ", value: " value - ", expected: " expected ", tol: " tol ", units: " units) - (if (and value) ;; require only value; BB was- all three required - (let ((dat (conc category "," - variable "," - value "," - expected "," - tol "," - units "," - dcomment ",," ;; extra comma for status - type ))) - ;; This was run remote, don't think that makes sense. Perhaps not, but that is the easiest path for the moment. - (rmt:csv->test-data run-id test-id - dat) - ;; This was added in check-in a5adfa3f9a. Message was: "...added delay in set-values to allow for delayed write on server start" - ;; I'm inserting an arbitrary rmt: call to force/ensure that the server is available to (hopefully) prevent a communication issue. - (rmt:get-var "MEGATEST_VERSION") ;; this does NOTHING but ensure the server is reachable. This is almost certainly NOT needed :) - ;; BB - commentiong out arbitrary 10 second wait (thread-sleep! 10) ;; add 10 second delay before quit incase rmt needs time to start a server. - ))) - - ;; need to update the top test record if PASS or FAIL and this is a subtest - ;;;;;; (if (not (equal? item-path "")) - ;;;;;; (rmt:set-state-status-and-roll-up-items run-id test-name item-path state status #f) ;;;;;) - - (if (or (and (string? comment) - (string-match (regexp "\\S+") comment)) - waived) - (let ((cmt (if waived waived comment))) - (rmt:general-call 'set-test-comment run-id cmt test-id))))) - -(define (tests:test-set-toplog! run-id test-name logf) - (rmt:general-call 'tests:test-set-toplog run-id logf run-id test-name)) - -(define (tests:summarize-items run-id test-id test-name force) - ;; if not force then only update the record if one of these is true: - ;; 1. logf is "log/final.log - ;; 2. logf is same as outputfilename - (let* ((outputfilename (conc "megatest-rollup-" test-name ".html")) - (orig-dir (current-directory)) - (logf-info (rmt:test-get-logfile-info run-id test-name)) - (logf (if logf-info (cadr logf-info) #f)) - (path (if logf-info (car logf-info) #f))) - ;; This query finds the path and changes the directory to it for the test - (if (and (string? path) - (directory? path)) ;; can get #f here under some wierd conditions. why, unknown ... - (begin - (debug:print 4 *default-log-port* "Found path: " path) - (change-directory path)) - ;; (set! outputfilename (conc path "/" outputfilename))) - (debug:print-error 0 *default-log-port* "summarize-items for run-id=" run-id ", test-name=" test-name ", no such path: " path)) - (debug:print 4 *default-log-port* "summarize-items with logf " logf ", outputfilename " outputfilename " and force " force) - (if (or (equal? logf "logs/final.log") - (equal? logf outputfilename) - force) - (let ((my-start-time (current-seconds)) - (lockf (conc outputfilename ".lock"))) - (let loop ((have-lock (common:simple-file-lock lockf))) - (if have-lock - (let ((script (configf:lookup *configdat* "testrollup" test-name))) - (print "Obtained lock for " outputfilename) - (rmt:set-state-status-and-roll-up-items run-id test-name "" #f #f #f) - (if script - (system (conc script " > " outputfilename " & ")) - (tests:generate-html-summary-for-iterated-test run-id test-id test-name outputfilename)) - (common:simple-file-release-lock lockf) - (change-directory orig-dir) - ;; NB// tests:test-set-toplog! is remote internal... - (tests:test-set-toplog! run-id test-name outputfilename)) - ;; didn't get the lock, check to see if current update started later than this - ;; update, if so we can exit without doing any work - (if (> my-start-time (handle-exceptions - exn - 0 - (file-modification-time lockf))) - ;; we started since current re-gen in flight, delay a little and try again - (begin - (debug:print-info 1 *default-log-port* "Waiting to update " outputfilename ", another test currently updating it") - (thread-sleep! (+ 5 (random 5))) ;; delay between 5 and 10 seconds - (loop (common:simple-file-lock lockf)))))))))) - -(define (tests:generate-html-summary-for-iterated-test run-id test-id test-name outputfilename) - (let ((counts (make-hash-table)) - (statecounts (make-hash-table)) - (outtxt "") - (tot 0) - (testdat (rmt:test-get-records-for-index-file run-id test-name))) - (with-output-to-file outputfilename - (lambda () - (set! outtxt (conc outtxt "Summary: " test-name - "

Summary for " test-name "

")) - (for-each - (lambda (testrecord) - (let ((id (vector-ref testrecord 0)) - (itempath (vector-ref testrecord 1)) - (state (vector-ref testrecord 2)) - (status (vector-ref testrecord 3)) - (run_duration (vector-ref testrecord 4)) - (logf (vector-ref testrecord 5)) - (comment (vector-ref testrecord 6))) - (hash-table-set! counts status (+ 1 (hash-table-ref/default counts status 0))) - (hash-table-set! statecounts state (+ 1 (hash-table-ref/default statecounts state 0))) - (set! outtxt (conc outtxt "" - ;; " " itempath "" - " " itempath "" - "" state "" - "" status "" - "" (if (equal? comment "") - " " - comment) "" - "")))) - (if (list? testdat) - testdat - (begin - (print "ERROR: failed to get records with rmt:test-get-records-for-index-file run-id=" run-id "test-name=" test-name) - '()))) - - (print "
") - ;; Print out stats for status - (set! tot 0) - (print "") - (for-each (lambda (state) - (set! tot (+ tot (hash-table-ref statecounts state))) - (print "")) - (hash-table-keys statecounts)) - (print "

State stats

" state "" (hash-table-ref statecounts state) "
Total" tot "
") - (print "
") - ;; Print out stats for state - (set! tot 0) - (print "") - (for-each (lambda (status) - (set! tot (+ tot (hash-table-ref counts status))) - (print "")) - (hash-table-keys counts)) - (print "

Status stats

" status - "" (hash-table-ref counts status) "
Total" tot "
") - (print "
") - - (print "" - "" - outtxt "
ItemStateStatusComment
") - ;; (release-dot-lock outputfilename) - ;;(rmt:update-run-stats - ;; run-id - ;; (hash-table-map - ;; state-status-counts - ;; (lambda (key val) - ;; (append key (list val))))) - )))) - -(define tests:css-jscript-block -#< -ul.LinkedList { display: block; } -/* ul.LinkedList ul { display: none; } */ -.HandCursorStyle { cursor: pointer; cursor: hand; } /* For IE */ -th {background-color: #8c8c8c;} -td.test {background-color: #d9dbdd;} -td.PASS {background-color: #347533;} -td.FAIL {background-color: #cc2812;} -td.SKIP{background-color: #FFD733;} -td.WARN {background-color: #EA8724;} -td.WAIVED {background-color: #838A12;} -td.ABORT{background-color: #EA24B7;} -.PASS .link, .SKIP .link, .WARN .link,.WAIVED .link,.ABORT .link, .FAIL .link{color: #FFFFFF;} - - - - - - -EOF -) - -(define tests:css-jscript-block-dynamic -#< -EOF -) - -(define (test:js-block javascript-lib) - (conc "" )) - - -(define tests:css-jscript-block-static (test:js-block *java-script-lib*)) - -(define (tests:css-jscript-block-cond dynamic) - (if (equal? dynamic #t) - tests:css-jscript-block-dynamic - tests:css-jscript-block-static)) - - -(define (tests:run-record->test-path run numkeys) - (append (take (vector->list run) numkeys) - (list (vector-ref run (+ 1 numkeys))))) - - -(define (tests:get-rest-data runs header numkeys) - (let ((resh (make-hash-table))) - (for-each - (lambda (run) - (let* ((run-id (db:get-value-by-header run header "id")) - (run-dir (tests:run-record->test-path run numkeys)) - (test-data (rmt:get-tests-for-run - run-id - "%" ;; testnamepatt - '() ;; states - '() ;; statuses - #f ;; offset - #f ;; num-to-get - #f ;; hide/not-hide - #f ;; sort-by - #f ;; sort-order - #f ;; 'shortlist ;; qrytype - 0 ;; last update - #f))) - - (map (lambda (test) - (let* ((test-name (vector-ref test 2)) - (test-html-path (conc (vector-ref test 10) "/" (vector-ref test 13))) - (test-item (conc test-name ":" (vector-ref test 11))) - (test-status (vector-ref test 4))) - - (if (not (hash-table-ref/default resh test-name #f)) - (hash-table-set! resh test-name (make-hash-table))) - (if (not (hash-table-ref/default (hash-table-ref/default resh test-name #f) test-item #f)) - (hash-table-set! (hash-table-ref/default resh test-name #f) test-item (make-hash-table))) - (hash-table-set! (hash-table-ref/default (hash-table-ref/default resh test-name #f) test-item #f) run-id (list test-status test-html-path)))) - test-data))) - runs) - resh)) - - -;; hash-table tree to html list tree -;; -;; tipfunc takes two parameters: y the tip value and path the path to that point -;; -(define (common:htree->html ht path tipfunc) - (let ((datlist (sort (hash-table->alist ht) - (lambda (a b) - (string< (car a)(car b)))))) - (if (null? datlist) - (tipfunc #f path) ;; really shouldn't get here - (s:ul - (map (lambda (x) - (let* ((levelname (car x)) - (y (cdr x)) - (newpath (append path (list levelname))) - (leaf (or (not (hash-table? y)) - (null? (hash-table-keys y))))) - (if leaf - (s:li (tipfunc y newpath)) - (s:li - (list - levelname - (common:htree->html y newpath tipfunc)))))) - datlist))))) - - -;; tests:genrate dashboard body -;; - -(define (tests:dashboard-body page pg-size keys numkeys total-runs linktree area-name get-prev-links get-next-links flag run-patt target-patt) - (let* ((start (* page pg-size)) - ;(runsdat (rmt:get-runs "%" pg-size start (map (lambda (x)(list x "%")) keys))) - (runsdat (rmt:get-runs-by-patt keys run-patt target-patt start pg-size #f 0 sort-order: "desc")) - ; db:get-runs-by-patt keys runnamepatt targpatt offset limit fields last-update - (header (vector-ref runsdat 0)) - (runs (vector-ref runsdat 1)) - (ctr 0) - (test-runs-hash (tests:get-rest-data runs header numkeys)) - (test-list (hash-table-keys test-runs-hash))) - - (s:html tests:css-jscript-block (tests:css-jscript-block-cond flag) - (s:title "Summary for " area-name) - (s:body 'onload "addEvents();" - (get-prev-links page linktree) - (get-next-links page linktree total-runs) - - (s:h1 "Summary for " area-name) - (s:h3 "Filter" ) - (s:input 'type "text" 'name "testname" 'id "testname" 'length "30" 'onkeyup "filtersome()") - ;; top list - - (s:table 'id "LinkedList1" 'border "1" 'cellspacing 0 - (map (lambda (key) - (let* ((res (s:tr 'class "something" - (s:th key ) - (map (lambda (run) - (s:th (vector-ref run ctr))) - runs)))) - (set! ctr (+ ctr 1)) - res)) - keys) - (s:tr - (s:th "Run Name") - (map (lambda (run) - (s:th (db:get-value-by-header run header "runname"))) - runs)) - - (map (lambda (test-name) - (let* ((item-hash (hash-table-ref/default test-runs-hash test-name #f)) - (item-keys (sort (hash-table-keys item-hash) string<=?))) - (map (lambda (item-name) - (let* ((res (s:tr 'class item-name - (s:td item-name 'class "test" ) - (map (lambda (run) - (let* ((run-test (hash-table-ref/default item-hash item-name #f)) - (run-id (db:get-value-by-header run header "id")) - (result (hash-table-ref/default run-test run-id "n/a")) - ;(relative-path (get-relative-path)) - (status (if (string? result) - result - (car result))) - (link (if (string? result) - result - (if (equal? flag #t) - (s:a (car result) 'href (conc "./test_log?runid=" run-id "&testname=" item-name )) - (s:a (car result) 'href (string-substitute (conc linktree "/") "" (cadr result) "-")))))) - (s:td link 'class status))) - runs)))) - res)) - item-keys))) - test-list)))))) - -;; (tests:create-html-tree "test-index.html") -;; -(define (tests:create-html-tree outf) - (let* ((lockfile (conc outf ".lock")) - (runs-to-process '()) - (linktree (common:get-linktree)) - (area-name (common:get-testsuite-name)) - (keys (rmt:get-keys)) - (numkeys (length keys)) - (run-patt (or (args:get-arg "-run-patt") - (args:get-arg "-runname") - "%")) - (target (or (args:get-arg "-target-patt") - (args:get-arg "-target") - "%")) - (targlist (string-split target "/")) - (numtarg (length targlist)) - (targtweaked (if (> numkeys numtarg) - (append targlist (make-list (- numkeys numtarg) "%")) - targlist)) - (target-patt (string-join targtweaked "/")) - ;(total-runs (rmt:get-num-runs "%")) ;;this needs to be changed to filter by target - (total-runs (rmt:get-runs-cnt-by-patt run-patt target-patt keys )) - (pg-size 10)) - (if (common:simple-file-lock lockfile) - (begin - ;(print total-runs) - (let loop ((page 0)) - (let* ((oup (open-output-file (or outf (conc linktree "/page" page ".html")))) - (get-prev-links (lambda (page linktree ) - (let* ((link (if (not (eq? page 0)) - (s:a "<<prev" 'href (conc "page" (- page 1) ".html")) - (s:a "" 'href (conc "page" page ".html"))))) - link))) - (get-next-links (lambda (page linktree total-runs) - (let* ((link (if (> total-runs (+ 10 (* page pg-size))) - (s:a "next>>" 'href (conc "page" (+ page 1) ".html")) - (s:a "" 'href (conc "page" page ".html"))))) - link))) ) - (print "total runs: " total-runs) - (s:output-new - oup - (tests:dashboard-body page pg-size keys numkeys total-runs linktree area-name get-prev-links get-next-links #f run-patt target-patt)) ;; update this function - (close-output-port oup) - ; (set! page (+ 1 page)) - (if (> total-runs (* (+ 1 page) pg-size)) - (loop (+ 1 page))))) - (common:simple-file-release-lock lockfile)) - - #f))) - - -(define (tests:readlines filename) - (call-with-input-file filename - (lambda (p) - (let loop ((line (read-line p)) - (result '())) - (if (eof-object? line) - (reverse result) - (loop (read-line p) (cons line result))))))) - -(define (tests:get-test-log run-id test-name item-name) - (let* ((test-data (rmt:get-tests-for-run - (string->number run-id) - test-name ;; testnamepatt - '() ;; states - '() ;; statuses - #f ;; offset - #f ;; num-to-get - #f ;; hide/not-hide - #f ;; sort-by - #f ;; sort-order - #f ;; 'shortlist ;; qrytype - 0 ;; last update - #f)) - (path "") - (found 0)) - (debug:print-info 0 *default-log-port* "found: " found ) - - (let loop ((hed (car test-data)) - (tal (cdr test-data))) - (debug:print-info 0 *default-log-port* "item: " (vector-ref hed 11) (vector-ref hed 10) "/" (vector-ref hed 13)) - - (if (equal? (vector-ref hed 11) item-name) - (begin - (set! found 1) - (set! path (conc (vector-ref hed 10) "/" (vector-ref hed 13))))) - (if (and (not (null? tal)) (equal? found 0)) - (loop (car tal)(cdr tal)))) - (if (equal? path "") - "

Data not found

" - (string-join (tests:readlines path) "\n")))) - - -(define (tests:dynamic-dboard page) -;(define (tests:create-html-tree o) - (let* ( -;(page "1") - (linktree (common:get-linktree)) - (area-name (common:get-testsuite-name)) - (keys (rmt:get-keys)) - (numkeys (length keys)) - (targtweaked (make-list numkeys "%")) - (target-patt (string-join targtweaked "/")) - (total-runs (rmt:get-num-runs "%")) - (pg-size 10) - (pg (if (equal? page #f) - 0 - (- (string->number page) 1))) - (get-prev-links (lambda (pg linktree) - (debug:print-info 0 *default-log-port* "val: " (- 1 pg)) - (let* ((link (if (not (eq? pg 0)) - (s:a "<<prev " 'href (conc "dashboard?page=" pg )) - (s:a "" 'href (conc "dashboard?page=" pg))))) - link))) - (get-next-links (lambda (pg linktree total-runs) - (debug:print-info 0 *default-log-port* "val: " pg) - (debug:print-info 0 *default-log-port* "val: " total-runs " size" pg-size) - - (let* ((link (if (> total-runs (+ 10 (* pg pg-size))) - (s:a "next>> " 'href (conc "dashboard?page=" (+ pg 2) )) - (s:a "" 'href (conc "dashboard?page=" pg ))))) - link))) - (html-body (tests:dashboard-body pg pg-size keys numkeys total-runs linktree area-name get-prev-links get-next-links #t "%" target-patt))) ;; update tis function - html-body)) - -(define (tests:create-html-summary outf) - (let* ((lockfile (conc outf ".lock")) - (linktree (common:get-linktree)) - (keys (rmt:get-keys)) - (area-name (common:get-testsuite-name)) - (run-patt (or (args:get-arg "-run-patt") - (args:get-arg "-runname") - "%")) - (target (or (args:get-arg "-target-patt") - (args:get-arg "-target") - "%")) - (targlist (string-split target "/")) - (numkeys (length keys)) - (numtarg (length targlist)) - (targtweaked (if (> numkeys numtarg) - (append targlist (make-list (- numkeys numtarg) "%")) - targlist)) - (target-patt (string-join targtweaked "/"))) - (if (common:simple-file-lock lockfile) - (begin - (let* (;(runsdat1 (rmt:get-runs run-patt #f #f (map (lambda (x)(list x "%")) keys))) - (runsdat (rmt:get-runs-by-patt keys run-patt target-patt #f #f #f 0)) - (runs (vector-ref runsdat 1)) - (header (vector-ref runsdat 0)) - (oup (open-output-file (or outf (conc linktree "/targets.html")))) - (target-hash (test:create-target-hash runs header (length keys)))) - (test:create-target-html target-hash oup area-name linktree) - (test:create-run-html runs area-name linktree (length keys) header)) - (common:simple-file-release-lock lockfile)) - #f))) - -(define (test:get-test-hash test-data) - (let ((resh (make-hash-table))) - (map (lambda (test) - (let* ((test-name (vector-ref test 2)) - (test-html-path (if (file-exists? (conc (vector-ref test 10) "/test-summary.html")) - (conc (vector-ref test 10) "/test-summary.html" ) - (conc (vector-ref test 10) "/" (vector-ref test 13)))) - (test-item (vector-ref test 11)) - (test-status (vector-ref test 4))) - (if (not (hash-table-ref/default resh test-item #f)) - (hash-table-set! resh test-item (make-hash-table))) - (hash-table-set! (hash-table-ref/default resh test-item #f) test-name (list test-status test-html-path)))) - test-data) -resh)) - -(define (test:get-data->b-keys ordered-data a-keys) - (delete-duplicates - (sort (apply - append - (map (lambda (sub-key) - (let ((subdat (hash-table-ref ordered-data sub-key))) - (hash-table-keys subdat))) - a-keys)) - string>=?))) - - -(define (test:create-run-html runs area-name linktree numkeys header) - (map (lambda (run) - (let* ((target (string-join (take (vector->list run) numkeys) "/")) - (run-name (db:get-value-by-header run header "runname")) - (run-time (seconds->work-week/day-time (db:get-value-by-header run header "event_time"))) - (oup (if (file-exists? (conc linktree "/" target "/" run-name)) - (open-output-file (conc linktree "/" target "/" run-name "/run.html")) - #f)) - (run-id (db:get-value-by-header run header "id")) - (test-data (rmt:get-tests-for-run - run-id - "%" ;; testnamepatt - '() ;; states - '() ;; statuses - #f ;; offset - #f ;; num-to-get - #f ;; hide/not-hide - #f ;; sort-by - #f ;; sort-order - #f ;; 'shortlist ;; qrytype - 0 ;; last update - #f)) - (item-test-hash (test:get-test-hash test-data)) - (items (hash-table-keys item-test-hash)) - (test-names (test:get-data->b-keys item-test-hash items))) - (if oup - (begin - (s:output-new - oup - (s:html tests:css-jscript-block (tests:css-jscript-block-cond #f) - (s:title "Runs View " run-name) - (s:body - (s:h1 "Runs View " ) - (s:h3 "Target" target) - (s:p - (s:b "Run name" ) run-name) - (s:p - (s:b "Run Date" ) run-time) - (s:table 'border 1 'cellspacing 0 - (s:tr - (s:th "Items") - (map (lambda (test) - (s:th test)) - test-names)) - (map (lambda (item) - (let* ((test-hash (hash-table-ref/default item-test-hash item #f))) - (if test-hash - (begin - (s:tr - (s:td 'class "test" item) - (map (lambda (test) - (let* ((test-details (hash-table-ref/default test-hash test #f)) - (status (if test-details - (car test-details))) - (link (if test-details - (string-substitute (conc linktree "/" target "/" run-name "/") "" (cadr test-details) "-")))) - (if test-details - (s:td 'class status - (s:a 'class "link" 'href link status )) - (s:td "")))) - test-names)))))) - (sort items string<=?)))))) - (close-output-port oup)) - (debug:print-info 0 "Skip: Dirctory structure " linktree "/" target "/" run-name " does not exist. Megatest will not create run.html")))) -runs)) - -(define (test:create-target-hash runs header numkeys) - (let ((resh (make-hash-table))) - (for-each - (lambda (run) - (let* ((run-name (db:get-value-by-header run header "runname")) - (target (string-join (take (vector->list run) numkeys) "/")) - (run-list (hash-table-ref/default resh target #f))) - - (if (not run-list) - (hash-table-set! resh target (list run-name)) - (hash-table-set! resh target (cons run-name run-list))))) - runs) - resh)) - -(define (test:get-max-run-cnt target-hash targets) - (let* ((cnt 0 )) - (map (lambda (target) - (let* ((runs (hash-table-ref/default target-hash target #f)) - (run-length (if runs - (length runs) - 0))) - - (if (< cnt run-length) - (set! cnt run-length)))) - targets) -cnt)) - -(define (test:pad-runs target-hash targets max-row-length) - (map (lambda (target) - (let loop ((run-list (hash-table-ref/default target-hash target #f))) - (if (< (length run-list) max-row-length) - (begin - (hash-table-set! target-hash target (cons "" run-list)) - (loop (hash-table-ref/default target-hash target #f) ))))) - targets) - target-hash) - -(define (test:create-target-html target-hash oup area-name linktree) - (let* ((targets (hash-table-keys target-hash)) - (max-row-length (test:get-max-run-cnt target-hash targets)) - (pad-runs-hash (test:pad-runs target-hash targets max-row-length))) - (s:output-new - oup - (s:html tests:css-jscript-block (tests:css-jscript-block-cond #f) - - (s:title "Target View " area-name) - (s:body - (s:h1 "Target View " area-name) - (s:table 'id "LinkedList1" 'border "1" 'cellspacing 0 - (s:tr 'class "something" - (s:th "Target") - (s:th 'colspan max-row-length "Runs")) - (let* ((tbl (map (lambda (target) - (s:tr - (s:td 'class "test" target) - (let* ((runs (hash-table-ref/default target-hash target #f)) - (rest-row (map (lambda (run) - (if (equal? run "") - (s:td run) - (if (file-exists?(conc linktree "/" target "/" run )) - (begin - (s:td - (s:a 'href (conc target "/" run "/run.html") run)))))) - (reverse runs)))) - rest-row))) - targets))) - tbl))))) - (close-output-port oup))) - - -(define (tests:create-html-tree-old outf) - (let* ((lockfile (conc outf ".lock")) - (runs-to-process '())) - (if (common:simple-file-lock lockfile) - (let* ((linktree (common:get-linktree)) - (oup (open-output-file (or outf (conc linktree "/runs-index.html")))) - (area-name (common:get-testsuite-name)) - (keys (rmt:get-keys)) - (numkeys (length keys)) - (runsdat (rmt:get-runs "%" #f #f (map (lambda (x)(list x "%")) keys))) - (header (vector-ref runsdat 0)) - (runs (vector-ref runsdat 1)) - (runtreedat (map (lambda (x) - (tests:run-record->test-path x numkeys)) - runs)) - (runs-htree (common:list->htree runtreedat))) - (set! runs-to-process runs) - (s:output-new - oup - (s:html tests:css-jscript-block - (s:title "Summary for " area-name) - (s:body 'onload "addEvents();" - (s:h1 "Summary for " area-name) - ;; top list - (s:ul 'id "LinkedList1" 'class "LinkedList" - (s:li - "Runs" - (common:htree->html runs-htree - '() - (lambda (x p) - (let* ((targ-path (string-intersperse p "/")) - (full-path (conc linktree "/" targ-path)) - (run-name (car (reverse p)))) - (if (and (common:file-exists? full-path) - (directory? full-path) - (file-write-access? full-path)) - (s:a run-name 'href (conc targ-path "/run-summary.html")) - (begin - (debug:print 0 *default-log-port* "INFO: Can't create " targ-path "/run-summary.html") - (conc run-name " (Not able to create summary at " targ-path ")"))))))))))) - (close-output-port oup) - (common:simple-file-release-lock lockfile) - - (for-each - (lambda (run) - (let* ((test-subpath (tests:run-record->test-path run numkeys)) - (run-id (db:get-value-by-header run header "id")) - (run-dir (tests:run-record->test-path run numkeys)) - (test-dats (rmt:get-tests-for-run - run-id - "%/" ;; testnamepatt - '() ;; states - '() ;; statuses - #f ;; offset - #f ;; num-to-get - #f ;; hide/not-hide - #f ;; sort-by - #f ;; sort-order - #f ;; 'shortlist ;; qrytype - 0 ;; last update - #f)) - (tests-tree-dat (map (lambda (test-dat) - ;; (tests:run-record->test-path x numkeys)) - (let* ((test-name (db:test-get-testname test-dat)) - (item-path (db:test-get-item-path test-dat)) - (full-name (db:test-make-full-name test-name item-path)) - (path-parts (string-split full-name))) - path-parts)) - test-dats)) - (tests-htree (common:list->htree tests-tree-dat)) - (html-dir (conc linktree "/" (string-intersperse run-dir "/"))) - (html-path (conc html-dir "/run-summary.html")) - (oup (if (and (common:file-exists? html-dir) - (directory? html-dir) - (file-write-access? html-dir)) - (open-output-file html-path) - #f))) - ;; (print "run-dir: " run-dir ", tests-tree-dat: " tests-tree-dat) - (if oup - (begin - (s:output-new - oup - (s:html tests:css-jscript-block - (s:title "Summary for " area-name) - (s:body 'onload "addEvents();" - (s:h1 "Summary for " (string-intersperse run-dir "/")) - ;; top list - (s:ul 'id "LinkedList1" 'class "LinkedList" - (s:li - "Tests" - (common:htree->html tests-htree - '() - (lambda (x p) - (let* ((targ-path (string-intersperse p "/")) - (test-name (car p)) - (item-path ;; (if (> (length p) 2) ;; test-name + run-name - (string-intersperse p "/")) - (full-targ (conc html-dir "/" targ-path)) - (std-file (conc full-targ "/test-summary.html")) - (alt-file (conc full-targ "/megatest-rollup-" test-name ".html")) - (html-file (if (common:file-exists? alt-file) - alt-file - std-file)) - (run-name (car (reverse p)))) - (if (and (not (common:file-exists? full-targ)) - (directory? full-targ) - (file-write-access? full-targ)) - (tests:summarize-test - run-id - (rmt:get-test-id run-id test-name item-path))) - (if (common:file-exists? full-targ) - (s:a run-name 'href html-file) - (begin - (debug:print 0 *default-log-port* "ERROR: can't access " full-targ) - (conc "No summary for " run-name))))) - )))))) - (close-output-port oup))))) - runs) - #t) - #f))) - - - - - - - -;; CHECK - WAS THIS ADDED OR REMOVED? MANUAL MERGE WITH API STUFF!!! -;; -;; get a pretty table to summarize steps -;; -;; (define (dcommon:process-steps-table steps);; db test-id #!key (work-area #f)) -(define (tests:process-steps-table steps);; db test-id #!key (work-area #f)) -;; (let ((steps (db:get-steps-for-test db test-id work-area: work-area))) - ;; organise the steps for better readability - (let ((res (make-hash-table))) - (for-each - (lambda (step) - (debug:print 6 *default-log-port* "step=" step) - (let ((record (hash-table-ref/default - res - (tdb:step-get-stepname step) - ;; 0 1 2 3 4 5 6 7 - ;; stepname start end status Duration Logfile Comment first-id - (vector (tdb:step-get-stepname step) "" "" "" "" "" "" #f)))) - (debug:print 6 *default-log-port* "record(before) = " record - "\nid: " (tdb:step-get-id step) - "\nstepname: " (tdb:step-get-stepname step) - "\nstate: " (tdb:step-get-state step) - "\nstatus: " (tdb:step-get-status step) - "\ntime: " (tdb:step-get-event_time step)) - (if (not (vector-ref record 7))(vector-set! record 7 (tdb:step-get-id step))) ;; do not clobber the id if previously set - (case (string->symbol (tdb:step-get-state step)) - ((start)(vector-set! record 1 (tdb:step-get-event_time step)) - (vector-set! record 3 (if (equal? (vector-ref record 3) "") - (tdb:step-get-status step))) - (if (> (string-length (tdb:step-get-logfile step)) - 0) - (vector-set! record 5 (tdb:step-get-logfile step)))) - ((end) - (vector-set! record 2 (any->number (tdb:step-get-event_time step))) - (vector-set! record 3 (tdb:step-get-status step)) - (vector-set! record 4 (let ((startt (any->number (vector-ref record 1))) - (endt (any->number (vector-ref record 2)))) - (debug:print 4 *default-log-port* "record[1]=" (vector-ref record 1) - ", startt=" startt ", endt=" endt - ", get-status: " (tdb:step-get-status step)) - (if (and (number? startt)(number? endt)) - (seconds->hr-min-sec (- endt startt)) "-1"))) - (if (> (string-length (tdb:step-get-logfile step)) - 0) - (vector-set! record 5 (tdb:step-get-logfile step))) - (if (> (string-length (tdb:step-get-comment step)) - 0) - (vector-set! record 6 (tdb:step-get-comment step)))) - (else - (vector-set! record 2 (tdb:step-get-state step)) - (vector-set! record 3 (tdb:step-get-status step)) - (vector-set! record 4 (tdb:step-get-event_time step)) - (vector-set! record 6 (tdb:step-get-comment step)))) - (hash-table-set! res (tdb:step-get-stepname step) record) - (debug:print 6 *default-log-port* "record(after) = " record - "\nid: " (tdb:step-get-id step) - "\nstepname: " (tdb:step-get-stepname step) - "\nstate: " (tdb:step-get-state step) - "\nstatus: " (tdb:step-get-status step) - "\ntime: " (tdb:step-get-event_time step)))) - ;; (else (vector-set! record 1 (tdb:step-get-event_time step))) - (sort steps (lambda (a b) - (cond - ((< (tdb:step-get-event_time a)(tdb:step-get-event_time b)) #t) - ((eq? (tdb:step-get-event_time a)(tdb:step-get-event_time b)) - (< (tdb:step-get-id a) (tdb:step-get-id b))) - (else #f))))) - res)) - -;; -;; -(define (tests:get-compressed-steps run-id test-id) - (let* ((steps-data (rmt:get-steps-for-test run-id test-id)) ;; 0 1 2 3 4 5 6 7 - (comprsteps (tests:process-steps-table steps-data))) ;; # - (map (lambda (x) - ;; take advantage of the \n on time->string - (vector ;; we are constructing basically the original vector but collapsing start end records - (vector-ref x 0) ;; id 0 - (let ((s (vector-ref x 1))) - (if (number? s)(seconds->time-string s) s)) ;; starttime 1 - (let ((s (vector-ref x 2))) - (if (number? s)(seconds->time-string s) s)) ;; endtime 2 - (vector-ref x 3) ;; status 3 - (vector-ref x 4) ;; duration 4 - (vector-ref x 5) ;; logfile 5 - (vector-ref x 6) ;; comment 6 - (vector-ref x 7))) ;; id 7 - (sort (hash-table-values comprsteps) - (lambda (a b) - (let ((time-a (vector-ref a 1)) - (time-b (vector-ref b 1)) - (id-a (vector-ref a 7)) - (id-b (vector-ref b 7))) - (if (and (number? time-a)(number? time-b)) - (if (< time-a time-b) - #t - (if (eq? time-a time-b) - (< id-a id-b) - ;; (stringwork-week/day-time - (db:test-get-event_time test-dat))) - (s:td "Duration") (s:td (seconds->hr-min-sec (db:test-get-run_duration test-dat))))) - (s:h3 "Log files") - (s:table - 'cellspacing "0" 'border "1" - (s:tr (s:td "Final log")(s:td (s:a 'href logf logf)))) - (s:table - 'cellspacing "0" 'border "1" - (s:tr (s:td "Step Name")(s:td "Start")(s:td "End")(s:td "Status")(s:td "Duration")(s:td "Log File")) - (map (lambda (step-dat) - (s:tr (s:td (tdb:steps-table-get-stepname step-dat)) - (s:td (tdb:steps-table-get-start step-dat)) - (s:td (tdb:steps-table-get-end step-dat)) - (s:td (tdb:steps-table-get-status step-dat)) - (s:td (tdb:steps-table-get-runtime step-dat)) - (s:td (let ((step-log (tdb:steps-table-get-log-file step-dat))) - (s:a 'href step-log step-log))))) - steps-dat)) - ))) - (close-output-port oup))))) - - -;; MUST BE CALLED local! -;; -(define (tests:test-get-paths-matching keynames target fnamepatt #!key (res '())) - ;; BUG: Move the values derived from args to parameters and push to megatest.scm - (let* ((testpatt (or (args:get-arg "-testpatt")(args:get-arg "-testpatt") "%")) - (statepatt (or (args:get-arg "-state") (args:get-arg ":state") "%")) - (statuspatt (or (args:get-arg "-status") (args:get-arg ":status") "%")) - (runname (or (args:get-arg "-runname") (args:get-arg ":runname") "%")) - (paths-from-db (rmt:test-get-paths-matching-keynames-target-new keynames target res - testpatt - statepatt - statuspatt - runname))) - (if fnamepatt - (apply append - (map (lambda (p) - (if (directory-exists? p) - (let ((glob-query (conc p "/" fnamepatt))) - (handle-exceptions - exn - (with-input-from-pipe - (conc "echo " glob-query) - read-lines) ;; we aren't going to try too hard. If glob breaks it is likely because someone tried to do */*/*.log or similar - (glob glob-query))) - '())) - paths-from-db)) - paths-from-db))) - - -;;====================================================================== -;; Gather data from test/task specifications -;;====================================================================== - -;; (define (tests:get-valid-tests testsdir test-patts) ;; #!key (test-names '())) -;; (let ((tests (glob (conc testsdir "/tests/*")))) ;; " (string-translate patt "%" "*"))))) -;; (set! tests (filter (lambda (test)(common:file-exists? (conc test "/testconfig"))) tests)) -;; (delete-duplicates -;; (filter (lambda (testname) -;; (tests:match test-patts testname #f)) -;; (map (lambda (testp) -;; (last (string-split testp "/"))) -;; tests))))) - -(define (tests:get-test-path-from-environment) - (if (and (getenv "MT_LINKTREE") - (getenv "MT_TARGET") - (getenv "MT_RUNNAME") - (getenv "MT_TEST_NAME") - (getenv "MT_ITEMPATH")) - (conc (getenv "MT_LINKTREE") "/" - (getenv "MT_TARGET") "/" - (getenv "MT_RUNNAME") "/" - (getenv "MT_TEST_NAME") - (if (and (getenv "MT_ITEMPATH") - (not (string=? "" (getenv "MT_ITEMPATH")))) - (conc "/" (getenv "MT_ITEMPATH")) - "")) - #f)) - -;; if .testconfig exists in test directory read and return it -;; else if have cached copy in *testconfigs* return it IFF there is a section "have fulldata" -;; else read the testconfig file -;; if have path to test directory save the config as .testconfig and return it -;; -(define (tests:get-testconfig test-name item-path test-registry system-allowed - #!key (force-create #f)(allow-write-cache #t)(wait-a-minute #f)) - (let* ((use-cache (common:use-cache?)) - (cache-path (tests:get-test-path-from-environment)) - (cache-file (and cache-path (conc cache-path "/.testconfig"))) - (cache-exists (and cache-file - (not force-create) ;; if force-create then pretend there is no cache to read - (common:file-exists? cache-file))) - (cached-dat (if (and (not force-create) - cache-exists - use-cache) - (handle-exceptions - exn - #f ;; any issues, just give up with the cached version and re-read - (configf:read-alist cache-file)) - #f)) - (test-full-name (if (and item-path (not (string-null? item-path))) - (conc test-name "/" item-path) - test-name))) - (if cached-dat - cached-dat - (let ((dat (hash-table-ref/default *testconfigs* test-full-name #f))) - (if (and dat ;; have a locally cached version - (hash-table-ref/default dat "have fulldata" #f)) ;; marked as good data? - dat - ;; no cached data available - (let* ((treg (or test-registry - (tests:get-all))) - (test-path (or (hash-table-ref/default treg test-name #f) - (let* ((local-tcdir (conc (getenv "MT_LINKTREE") "/" - (getenv "MT_TARGET") "/" - (getenv "MT_RUNNAME") "/" - test-name "/" item-path)) - (local-tcfg (conc local-tcdir "/testconfig"))) - (if (common:file-exists? local-tcfg) - local-tcdir - #f)) - (conc *toppath* "/tests/" test-name))) - (test-configf (conc test-path "/testconfig")) - (testexists (let loopa ((tries-left 30)) - (cond - ( - (and (common:file-exists? test-configf)(file-read-access? test-configf)) - #t) - ( - (common:file-exists? test-configf) - (debug:print 0 *default-log-port* "WARNING: Cannot read testconfig file: "test-configf) - #f) - ( - (and wait-a-minute (> tries-left 0)) - (thread-sleep! 10) - (debug:print 0 *default-log-port* "WARNING: testconfig file does not exist: "test-configf" will retry in 10 seconds. Tries left: "tries-left) ;; BB: this fires - (loopa (sub1 tries-left))) - (else - (debug:print 0 *default-log-port* "WARNING: testconfig file does not exist: "test-configf) ;; BB: this fires - #f)))) - (tcfg (if testexists - (read-config test-configf #f system-allowed - environ-patt: (if system-allowed - "pre-launch-env-vars" - #f)) - #f))) - (if (and tcfg cache-file) (hash-table-set! tcfg "have fulldata" #t)) ;; mark this as fully read data - (if tcfg (hash-table-set! *testconfigs* test-full-name tcfg)) - (if (and testexists - cache-file - (file-write-access? cache-path) - allow-write-cache) - (let ((tpath (conc cache-path "/.testconfig"))) - (debug:print-info 1 *default-log-port* "Caching testconfig for " test-name " in " tpath) - (if (and tcfg (not (common:in-running-test?))) - (configf:write-alist tcfg tpath)))) - tcfg)))))) - -;; sort tests by priority and waiton -;; Move test specific stuff to a test unit FIXME one of these days -(define (tests:sort-by-priority-and-waiton test-records) - (if (eq? (hash-table-size test-records) 0) - '() - (let* ((mungepriority (lambda (priority) - (if priority - (let ((tmp (any->number priority))) - (if tmp tmp (begin (debug:print-error 0 *default-log-port* "bad priority value " priority ", using 0") 0))) - 0))) - (all-tests (hash-table-keys test-records)) - (all-waited-on (let loop ((hed (car all-tests)) - (tal (cdr all-tests)) - (res '())) - (let* ((trec (hash-table-ref test-records hed)) - (waitons (or (tests:testqueue-get-waitons trec) '()))) - (if (null? tal) - (append res waitons) - (loop (car tal)(cdr tal)(append res waitons)))))) - (sort-fn1 - (lambda (a b) - (let* ((a-record (hash-table-ref test-records a)) - (b-record (hash-table-ref test-records b)) - (a-waitons (or (tests:testqueue-get-waitons a-record) '())) - (b-waitons (or (tests:testqueue-get-waitons b-record) '())) - (a-config (tests:testqueue-get-testconfig a-record)) - (b-config (tests:testqueue-get-testconfig b-record)) - (a-raw-pri (configf:lookup a-config "requirements" "priority")) - (b-raw-pri (configf:lookup b-config "requirements" "priority")) - (a-priority (mungepriority a-raw-pri)) - (b-priority (mungepriority b-raw-pri))) - (tests:testqueue-set-priority! a-record a-priority) - (tests:testqueue-set-priority! b-record b-priority) - ;; (debug:print 0 *default-log-port* "a=" a ", b=" b ", a-waitons=" a-waitons ", b-waitons=" b-waitons) - (cond - ;; is - ((member a b-waitons) ;; is b waiting on a? - ;; (debug:print 0 *default-log-port* "case1") - #t) - ((member b a-waitons) ;; is a waiting on b? - ;; (debug:print 0 *default-log-port* "case2") - #f) - ((and (not (null? a-waitons)) ;; both have waitons - do not disturb - (not (null? b-waitons))) - ;; (debug:print 0 *default-log-port* "case2.1") - #t) - ((and (null? a-waitons) ;; no waitons for a but b has waitons - (not (null? b-waitons))) - ;; (debug:print 0 *default-log-port* "case3") - #f) - ((and (not (null? a-waitons)) ;; a has waitons but b does not - (null? b-waitons)) - ;; (debug:print 0 *default-log-port* "case4") - #t) - ((not (eq? a-priority b-priority)) ;; use - (> a-priority b-priority)) - (else - ;; (debug:print 0 *default-log-port* "case5") - (string>? a b)))))) - - (sort-fn2 - (lambda (a b) - (> (mungepriority (tests:testqueue-get-priority (hash-table-ref test-records a))) - (mungepriority (tests:testqueue-get-priority (hash-table-ref test-records b))))))) - ;; (let ((dot-res (tests:run-dot (tests:tests->dot test-records) "plain"))) - ;; (debug:print "dot-res=" dot-res)) - ;; (let ((data (map cdr (filter - ;; (lambda (x)(equal? "node" (car x))) - ;; (map string-split (tests:easy-dot test-records "plain")))))) - ;; (map car (sort data (lambda (a b) - ;; (> (string->number (caddr a))(string->number (caddr b))))))) - ;; )) - (sort all-tests sort-fn1)))) ;; avoid dealing with deleted tests, look at the hash table - -(define (tests:easy-dot test-records outtype) - (let-values (((fd temp-path) (file-mkstemp (conc "/tmp/" (current-user-name) ".XXXXXX")))) - (let ((all-testnames (hash-table-keys test-records)) - (temp-port (open-output-file* fd))) - ;; (format temp-port "This file is ~A.~%" temp-path) - (format temp-port "digraph tests {\n") - (format temp-port " size=4,8\n") - ;; (format temp-port " splines=none\n") - (for-each - (lambda (testname) - (let* ((testrec (hash-table-ref test-records testname)) - (waitons (or (tests:testqueue-get-waitons testrec) '()))) - (for-each - (lambda (waiton) - (format temp-port (conc " " waiton " -> " testname " [splines=ortho]\n"))) - waitons))) - all-testnames) - (format temp-port "}\n") - (close-output-port temp-port) - (with-input-from-pipe - (conc "env -i PATH=$PATH dot -T" outtype " < " temp-path) - (lambda () - (let ((res (read-lines))) - ;; (delete-file temp-path) - res)))))) - -(define (tests:write-dot-file test-records fname sizex sizey) - (if (file-write-access? (pathname-directory fname)) - (with-output-to-file fname - (lambda () - (map print (tests:tests->dot test-records sizex sizey)))))) - -(define (tests:tests->dot test-records sizex sizey) - (let ((all-testnames (hash-table-keys test-records))) - (if (null? all-testnames) - '() - (let loop ((hed (car all-testnames)) - (tal (cdr all-testnames)) - (res (list "digraph tests {" - (conc " size=\"" (or sizex 11) "," (or sizey 11) "\";") - " ratio=0.95;" - ))) - (let* ((testrec (hash-table-ref test-records hed)) - (waitons (or (tests:testqueue-get-waitons testrec) '())) - (newres (append res - (if (null? waitons) - (list (conc " \"" hed "\" [shape=box];")) - (map (lambda (waiton) - (conc " \"" waiton "\" -> \"" hed "\" [shape=box];")) - waitons) - )))) - (if (null? tal) - (append newres (list "}")) - (loop (car tal)(cdr tal) newres) - )))))) - -;; (tests:run-dot (list "digraph tests {" "a -> b" "}") "plain") - -(define (tests:run-dot indat outtype) ;; outtype is plain, fig, dot, etc. http://www.graphviz.org/content/output-formats - (let-values (((inp oup pid)(process "env -i PATH=$PATH dot" (list "-T" outtype)))) - (with-output-to-port oup - (lambda () - (map print indat))) - (close-output-port oup) - (let ((res (with-input-from-port inp - (lambda () - (read-lines))))) - (close-input-port inp) - res))) - -;; read data from tmp file or create if not exists -;; if exists regen in background -;; -(define (tests:lazy-dot testrecords outtype sizex sizey) - (let ((dfile (conc "/tmp/." (current-user-name) "-" (server:mk-signature) ".dot")) - (fname (conc "/tmp/." (current-user-name) "-" (server:mk-signature) ".dotdat"))) - (tests:write-dot-file testrecords dfile sizex sizey) - (if (common:file-exists? fname) - (let ((res (with-input-from-file fname - (lambda () - (read-lines))))) - (system (conc "env -i PATH=$PATH dot -T " outtype " < " dfile " > " fname "&")) - res) - (begin - (system (conc "env -i PATH=$PATH dot -T " outtype " < " dfile " > " fname)) - (with-input-from-file fname - (lambda () - (read-lines))))))) - - -;; for each test: -;; -(define (tests:filter-non-runnable run-id testkeynames testrecordshash) - (let ((runnables '())) - (for-each - (lambda (testkeyname) - (let* ((test-record (hash-table-ref testrecordshash testkeyname)) - (test-name (tests:testqueue-get-testname test-record)) - (itemdat (tests:testqueue-get-itemdat test-record)) - (item-path (tests:testqueue-get-item_path test-record)) - (waitons (tests:testqueue-get-waitons test-record)) - (keep-test #t) - (test-id (rmt:get-test-id run-id test-name item-path)) - (tdat (rmt:get-testinfo-state-status run-id test-id))) ;; (cdb:get-test-info-by-id *runremote* test-id))) - (if tdat - (begin - ;; Look at the test state and status - (if (or (and (member (db:test-get-status tdat) - '("PASS" "WARN" "WAIVED" "CHECK" "SKIP")) - (equal? (db:test-get-state tdat) "COMPLETED")) - (member (db:test-get-state tdat) - '("INCOMPLETE" "KILLED"))) - (set! keep-test #f)) - - ;; examine waitons for any fails. If it is FAIL or INCOMPLETE then eliminate this test - ;; from the runnable list - (if keep-test - (for-each (lambda (waiton) - ;; for now we are waiting only on the parent test - (let* ((parent-test-id (rmt:get-test-id run-id waiton "")) - (wtdat (rmt:get-testinfo-state-status run-id test-id))) ;; (cdb:get-test-info-by-id *runremote* test-id))) - (if (or (and (equal? (db:test-get-state wtdat) "COMPLETED") - (member (db:test-get-status wtdat) '("FAIL" "ABORT"))) - (member (db:test-get-status wtdat) '("KILLED")) - (member (db:test-get-state wtdat) '("INCOMPETE"))) - ;; (if (or (member (db:test-get-status wtdat) - ;; '("FAIL" "KILLED")) - ;; (member (db:test-get-state wtdat) - ;; '("INCOMPETE"))) - (set! keep-test #f)))) ;; no point in running this one again - waitons)))) - (if keep-test (set! runnables (cons testkeyname runnables))))) - testkeynames) - runnables)) - -;;====================================================================== -;; refactoring this block into tests:get-full-data from line 263 of runs.scm -;;====================================================================== -;; hed is the test name -;; test-records is a hash of test-name => test record -(define (tests:get-full-data test-names test-records required-tests all-tests-registry) - (if (not (null? test-names)) - (let loop ((hed (car test-names)) - (tal (cdr test-names))) ;; 'return-procs tells the config reader to prep running system but return a proc - (debug:print-info 4 *default-log-port* "hed=" hed " at top of loop") - ;; don't know item-path at this time, let the testconfig get the top level testconfig - (let* ((config (tests:get-testconfig hed #f all-tests-registry 'return-procs)) - (waitons (let ((instr (if config - (configf:lookup config "requirements" "waiton") - (begin ;; No config means this is a non-existant test - (debug:print-error 0 *default-log-port* "non-existent required test \"" hed "\", grep through your testconfigs to find and remove or create the test. Discarding and continuing.") - "")))) - (debug:print-info 8 *default-log-port* "waitons string is " instr) - (string-split (cond - ((procedure? instr) - (let ((res (instr))) - (debug:print-info 8 *default-log-port* "waiton procedure results in string " res " for test " hed) - res)) - ((string? instr) instr) - (else - ;; NOTE: This is actually the case of *no* waitons! ;; (debug:print-error 0 *default-log-port* "something went wrong in processing waitons for test " hed) - "")))))) - (if (not config) ;; this is a non-existant test called in a waiton. - (if (null? tal) - test-records - (loop (car tal)(cdr tal))) - (begin - (debug:print-info 8 *default-log-port* "waitons: " waitons) - ;; check for hed in waitons => this would be circular, remove it and issue an - ;; error - (if (member hed waitons) - (begin - (debug:print-error 0 *default-log-port* "test " hed " has listed itself as a waiton, please correct this!") - (set! waitons (filter (lambda (x)(not (equal? x hed))) waitons)))) - - ;; (items (items:get-items-from-config config))) - (if (not (hash-table-ref/default test-records hed #f)) - (hash-table-set! test-records - hed (vector hed ;; 0 - config ;; 1 - waitons ;; 2 - (configf:lookup config "requirements" "priority") ;; priority 3 - (let ((items (hash-table-ref/default config "items" #f)) ;; items 4 - (itemstable (hash-table-ref/default config "itemstable" #f))) - ;; if either items or items table is a proc return it so test running - ;; process can know to call items:get-items-from-config - ;; if either is a list and none is a proc go ahead and call get-items - ;; otherwise return #f - this is not an iterated test - (cond - ((procedure? items) - (debug:print-info 4 *default-log-port* "items is a procedure, will calc later") - items) ;; calc later - ((procedure? itemstable) - (debug:print-info 4 *default-log-port* "itemstable is a procedure, will calc later") - itemstable) ;; calc later - ((filter (lambda (x) - (let ((val (car x))) - (if (procedure? val) val #f))) - (append (if (list? items) items '()) - (if (list? itemstable) itemstable '()))) - 'have-procedure) - ((or (list? items)(list? itemstable)) ;; calc now - (debug:print-info 4 *default-log-port* "items and itemstable are lists, calc now\n" - " items: " items " itemstable: " itemstable) - (items:get-items-from-config config)) - (else #f))) ;; not iterated - #f ;; itemsdat 5 - #f ;; spare - used for item-path - ))) - (for-each - (lambda (waiton) - (if (and waiton (not (member waiton test-names))) - (begin - (set! required-tests (cons waiton required-tests)) - (set! test-names (cons waiton test-names))))) ;; was an append, now a cons - waitons) - (let ((remtests (delete-duplicates (append waitons tal)))) - (if (not (null? remtests)) - (loop (car remtests)(cdr remtests)) - test-records)))))))) - -;;====================================================================== -;; test steps -;;====================================================================== - -;; teststep-set-status! used to be here - -(define (test-get-kill-request run-id test-id) ;; run-id test-name itemdat) - (let* ((testdat (rmt:get-test-info-by-id run-id test-id))) - (and testdat - (equal? (test:get-state testdat) "KILLREQ")))) - -(define (test:tdb-get-rundat-count tdb) - (if tdb - (let ((res 0)) - (sqlite3:for-each-row - (lambda (count) - (set! res count)) - tdb - "SELECT count(id) FROM test_rundat;") - res)) - 0) - -(define (tests:update-central-meta-info run-id test-id cpuload diskfree minutes uname hostname) - (rmt:general-call 'update-test-rundat run-id test-id (current-seconds) (or cpuload -1)(or diskfree -1) -1 (or minutes -1)) - (if (and cpuload diskfree) - (rmt:general-call 'update-cpuload-diskfree run-id cpuload diskfree test-id)) - (if minutes - (rmt:general-call 'update-run-duration run-id minutes test-id)) - (if (and uname hostname) - (rmt:general-call 'update-uname-host run-id uname hostname test-id))) - -;; This one is for running with no db access (i.e. via rmt: internally) -(define (tests:set-full-meta-info db test-id run-id minutes work-area remtries) -;; (define (tests:set-full-meta-info test-id run-id minutes work-area) -;; (let ((remtries 10)) - (let* ((cpuload (get-cpu-load)) - (diskfree (get-df (current-directory))) - (uname (get-uname "-srvpio")) - (hostname (get-host-name))) - (tests:update-central-meta-info run-id test-id cpuload diskfree minutes uname hostname))) - -;; (define (tests:set-partial-meta-info test-id run-id minutes work-area) -#;(define (tests:set-partial-meta-info test-id run-id minutes work-area remtries) - (let* ((cpuload (get-cpu-load)) - (diskfree (get-df (current-directory))) - (remtries 10)) - (handle-exceptions - exn - (if (> remtries 0) - (begin - (print-call-chain (current-error-port)) - (debug:print-info 0 *default-log-port* "WARNING: failed to set meta info. Will try " remtries " more times") - (set! remtries (- remtries 1)) - (thread-sleep! 10) - (tests:set-full-meta-info db test-id run-id minutes work-area (- remtries 1))) - (let ((err-status ((condition-property-accessor 'sqlite3 'status #f) exn))) - (debug:print-error 0 *default-log-port* "tried for over a minute to update meta info and failed. Giving up") - (debug:print 0 *default-log-port* "EXCEPTION: database probably overloaded or unreadable.") - (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) - (debug:print 5 *default-log-port* "exn=" (condition->list exn)) - (debug:print 0 *default-log-port* " status: " ((condition-property-accessor 'sqlite3 'status) exn)) - (print-call-chain (current-error-port)))) - (tests:update-testdat-meta-info db test-id work-area cpuload diskfree minutes) - ))) - -;;====================================================================== -;; A R C H I V I N G -;;====================================================================== - -(define (test:archive db test-id) - #f) - -(define (test:archive-tests db keynames target) - #f) - Index: testsmod.scm ================================================================== --- testsmod.scm +++ testsmod.scm @@ -17,44 +17,46 @@ ;; along with Megatest. If not, see . ;;====================================================================== (declare (unit testsmod)) + (declare (uses commonmod)) (declare (uses mtargs)) +(declare (uses servermod)) +(declare (uses mtconfigf)) +(declare (uses itemsmod)) +(declare (uses dbmod)) (module testsmod * -(import scheme chicken data-structures extras) +(import scheme chicken data-structures extras files) + +(use (prefix sqlite3 sqlite3:) posix typed-records srfi-18 srfi-69 + format ports srfi-1 matchable + directory-utils + regex srfi-13) + + +(import commonmod) +(import servermod) +(import itemsmod) +(import dbmod) +(import (prefix mtconfigf configf:)) +(import (prefix mtargs args:)) -(import (prefix sqlite3 sqlite3:) posix typed-records srfi-18 srfi-69 format ports srfi-1 matchable - (prefix mtconfigf configf:) - regex srfi-13 - commonmod - (prefix mtargs args:)) +(include "run_records.scm") +(include "test_records.scm") +(include "db_records.scm") (define *java-script-lib* #f) (define (init-java-script-lib) (set! *java-script-lib* (conc (common:get-install-area) "/share/js/jquery-3.1.0.slim.min.js")) ) -;; A routine to map itempaths using a itemmap -;; patha and pathb must be strings or this will fail -;; -;; path-b is waiting on path-a -;; -(define (db:compare-itempaths test-b-name path-a path-b itemmaps ) - (debug:print-info 6 *default-log-port* "ITEMMAPS: " itemmaps) - (let* ((itemmap (tests:lookup-itemmap itemmaps test-b-name))) - (if itemmap - (let ((path-b-mapped (db:multi-pattern-apply path-b itemmap))) - (debug:print-info 6 *default-log-port* "ITEMMAP is " itemmap ", path: " path-b ", mapped path: " path-b-mapped) - (equal? path-a path-b-mapped)) - (equal? path-b path-a)))) - ;; A routine to convert test/itempath using a itemmap ;; NOTE: to process only an itempath (i.e. no prepended testname) ;; just call db:multi-pattern-apply ;; (define (db:convert-test-itempath path-in itemmap) @@ -63,18 +65,11 @@ (test-name (if (null? path-parts) "" (car path-parts))) (item-path (string-intersperse (if (null? path-parts) '() (cdr path-parts)) "/"))) (conc test-name "/" (db:multi-pattern-apply item-path itemmap)))) -;;====================================================================== -;; Run keys, these are used to hierarchially organise tests and run areas -;;====================================================================== - -(define (keys->keystr keys) ;; => key1,key2,key3,additiona1, ... - (string-intersperse keys ",")) - -(define (args:usage . a) #f) +#; (define (args:usage . a) #f) ;;====================================================================== ;; key <=> target routines ;;====================================================================== @@ -95,72 +90,10 @@ vals) (debug:print-error 0 *default-log-port* "wrong number of values in " target ", should match " keys)) vals) (debug:print 4 *default-log-port* "ERROR: keys:target-set-args called with no target."))) -;; given the keys (a list of vectors or a list of keys) and a target return a keyval list -;; keyval list ( (key1 val1) (key2 val2) ...) -(define (keys:target->keyval keys target) - (let* ((targlist (string-split target "/")) - (numkeys (length keys)) - (numtarg (length targlist)) - (targtweaked (if (> numkeys numtarg) - (append targlist (make-list (- numkeys numtarg) "")) - targlist))) - (map (lambda (key targ) - (list key targ)) - keys targtweaked))) - -;;====================================================================== -;; config file related routines -;;====================================================================== - -(define keys:config-get-fields common:get-fields) -(define (keys:make-key/field-string confdat) - (let ((fields (configf:get-section confdat "fields"))) - (string-join - (map (lambda (field)(conc (car field) " " (cadr field))) - fields) - ","))) - -;; patterns are: -;; "rx1" "replacement1"\n -;; "rx2" "replacement2" -;; etc. -;; -(define (db:multi-pattern-apply item-path itemmap) - (let ((all-patts (string-split itemmap "\n"))) - (if (null? all-patts) - item-path - (let loop ((hed (car all-patts)) - (tal (cdr all-patts)) - (res item-path)) - (let* ((parts (string-split hed)) - (patt (car parts)) - - (repl (if (> (length parts) 1)(cadr parts) "")) - - (newr (if (and patt repl) - (begin - (handle-exceptions - exn - (begin - (debug:print 0 *default-log-port* - "WARNING: itemmap has problem \"" itemmap "\", patt: " patt ", repl: " repl) - res) - (string-substitute patt repl res)) - - - ) - (begin - (debug:print 0 *default-log-port* - "WARNING: itemmap has problem \"" itemmap "\", patt: " patt ", repl: " repl) - res)))) - (if (null? tal) - newr - (loop (car tal)(cdr tal) newr))))))) - ;; given waiting-test that is waiting on waiton-test extend test-patt appropriately ;; ;; genlib/testconfig sim/testconfig ;; genlib/sch sim/sch/cell1 ;; @@ -215,121 +148,18 @@ (cons waiton-test patts)))) (string-intersperse (delete-duplicates new-patts) ","))))) -;; tests:glob-like-match -(define (tests:glob-like-match patt str) - (let ((like (substring-index "%" patt))) - (let* ((notpatt (equal? (substring-index "~" patt) 0)) - (newpatt (if notpatt (substring patt 1) patt)) - (finpatt (if like - (string-substitute (regexp "%") ".*" newpatt #f) - (string-substitute (regexp "\\*") ".*" newpatt #f))) - (res #f)) - ;; (print "tests:glob-like-match => notpatt: " notpatt ", newpatt: " newpatt ", finpatt: " finpatt) - (set! res (string-match (regexp finpatt (if like #t #f)) str)) - (if notpatt (not res) res)))) - -;; if itempath is #f then look only at the testname part -;; -(define (tests:match patterns testname itempath #!key (required '())) - (if (string? patterns) - (let ((patts (append (string-split patterns ",") required))) - (if (null? patts) ;;; no pattern(s) means no match - #f - (let loop ((patt (car patts)) - (tal (cdr patts))) - ;; (print "loop: patt: " patt ", tal " tal) - (if (string=? patt "") - #f ;; nothing ever matches empty string - policy - (let* ((patt-parts (string-match (regexp "^([^\\/]*)(\\/(.*)|)$") patt)) - (test-patt (cadr patt-parts)) - (item-patt (cadddr patt-parts))) - ;; special case: test vs. test/ - ;; test => "test" "%" - ;; test/ => "test" "" - (if (and (not (substring-index "/" patt)) ;; no slash in the original - (or (not item-patt) - (equal? item-patt ""))) ;; should always be true that item-patt is "" - (set! item-patt "%")) - ;; (print "tests:match => patt-parts: " patt-parts ", test-patt: " test-patt ", item-patt: " item-patt) - (if (and (tests:glob-like-match test-patt testname) - (or (not itempath) - (tests:glob-like-match (if item-patt item-patt "") itempath))) - #t - (if (null? tal) - #f - (loop (car tal)(cdr tal))))))))))) - -;; if itempath is #f then look only at the testname part -;; -(define (tests:match->sqlqry patterns) - (if (string? patterns) - (let ((patts (string-split patterns ","))) - (if (null? patts) ;;; no pattern(s) means no match, we will do no query - #f - (let loop ((patt (car patts)) - (tal (cdr patts)) - (res '())) - ;; (print "loop: patt: " patt ", tal " tal) - (let* ((patt-parts (string-match (regexp "^([^\\/]*)(\\/(.*)|)$") patt)) - (test-patt (cadr patt-parts)) - (item-patt (cadddr patt-parts)) - (test-qry (db:patt->like "testname" test-patt)) - (item-qry (db:patt->like "item_path" item-patt)) - (qry (conc "(" test-qry " AND " item-qry ")"))) - ;; (print "tests:match => patt-parts: " patt-parts ", test-patt: " test-patt ", item-patt: " item-patt) - (if (null? tal) - (string-intersperse (append (reverse res)(list qry)) " OR ") - (loop (car tal)(cdr tal)(cons qry res))))))) - #f)) - -;; keys list to key1,key2,key3 ... -(define (runs:get-std-run-fields keys remfields) - (let* ((header (append keys remfields)) - (keystr (conc (keys->keystr keys) "," - (string-intersperse remfields ",")))) - (list keystr header))) - -;; make a query (fieldname like 'patt1' OR fieldname -(define (db:patt->like fieldname pattstr #!key (comparator " OR ")) - (let ((patts (if (string? pattstr) - (string-split pattstr ",") - '("%")))) - (string-intersperse (map (lambda (patt) - (let ((wildtype (if (substring-index "%" patt) "LIKE" "GLOB"))) - (conc fieldname " " wildtype " '" patt "'"))) - (if (null? patts) - '("") - patts)) - comparator))) - ;; Call this one to do all the work and get a standardized list of tests ;; gets paths from configs and finds valid tests ;; returns hash of testname --> fullpath ;; (define (tests:get-all) (let* ((test-search-path (tests:get-tests-search-path *configdat*))) (tests:get-valid-tests (make-hash-table) test-search-path))) -(define (tests:get-tests-search-path cfgdat) - (let ((paths (let ((section (if cfgdat - (configf:get-section cfgdat "tests-paths") - #f))) - (if section - (map cadr section) - '())))) - (filter (lambda (d) - (if (directory-exists? d) - d - (begin - (if (common:low-noise-print 60 "tests:get-tests-search-path" d) - (debug:print 0 *default-log-port* "WARNING: problem with directory " d ", dropping it from tests path")) - #f))) - (append paths (list (conc *toppath* "/tests")))))) - (define (tests:get-valid-tests test-registry tests-paths) (if (null? tests-paths) test-registry (let loop ((hed (car tests-paths)) (tal (cdr tests-paths))) @@ -374,24 +204,615 @@ '()) (if itemmap-table itemmap-table '())))) -;; given a list of itemmaps (testname . map), return the first match +;;====================================================================== +;; Tests +;;====================================================================== + +;; return items given config +;; +(define (tests:get-items tconfig) + (let ((items (hash-table-ref/default tconfig "items" #f)) ;; items 4 + (itemstable (hash-table-ref/default tconfig "itemstable" #f))) + ;; if either items or items table is a proc return it so test running + ;; process can know to call items:get-items-from-config + ;; if either is a list and none is a proc go ahead and call get-items + ;; otherwise return #f - this is not an iterated test + (cond + ((procedure? items) + (debug:print-info 4 *default-log-port* "items is a procedure, will calc later") + items) ;; calc later + ((procedure? itemstable) + (debug:print-info 4 *default-log-port* "itemstable is a procedure, will calc later") + itemstable) ;; calc later + ((filter (lambda (x) + (let ((val (car x))) + (if (procedure? val) val #f))) + (append (if (list? items) items '()) + (if (list? itemstable) itemstable '()))) + 'have-procedure) + ((or (list? items)(list? itemstable)) ;; calc now + (debug:print-info 4 *default-log-port* "items and itemstable are lists, calc now\n" + " items: " items " itemstable: " itemstable) + (items:get-items-from-config tconfig)) + (else #f)))) ;; not iterated + + +;; returns waitons waitors tconfigdat +;; +(define (tests:get-waitons test-name all-tests-registry) + (let* ((config (tests:get-testconfig test-name #f all-tests-registry 'return-procs))) ;; assuming no problems with immediate evaluation, this could be simplified ('return-procs -> #t) + (let ((instr (if config + (configf:lookup config "requirements" "waiton") + (begin ;; No config means this is a non-existant test + (debug:print-error 0 *default-log-port* "non-existent required test \"" test-name "\"") + (exit 1)))) + (instr2 (if config + (configf:lookup config "requirements" "waitor") + ""))) + (debug:print-info 8 *default-log-port* "waitons string is " instr ", waitors string is " instr2) + (let ((newwaitons + (string-split (cond + ((procedure? instr) ;; here + (let ((res (instr))) + (debug:print-info 8 *default-log-port* "waiton procedure results in string " res " for test " test-name) + res)) + ((string? instr) instr) + (else + ;; NOTE: This is actually the case of *no* waitons! ;; (debug:print-error 0 *default-log-port* "something went wrong in processing waitons for test " test-name) + "")))) + (newwaitors + (string-split (cond + ((procedure? instr2) + (let ((res (instr2))) + (debug:print-info 8 *default-log-port* "waitor procedure results in string " res " for test " test-name) + res)) + ((string? instr2) instr2) + (else + ;; NOTE: This is actually the case of *no* waitons! ;; (debug:print-error 0 *default-log-port* "something went wrong in processing waitons for test " test-name) + ""))))) + (values + ;; the waitons + (filter (lambda (x) + (if (hash-table-ref/default all-tests-registry x #f) + #t + (begin + (debug:print-error 0 *default-log-port* "test " test-name " has unrecognised waiton testname " x) + #f))) + newwaitons) + (filter (lambda (x) + (if (hash-table-ref/default all-tests-registry x #f) + #t + (begin + (debug:print-error 0 *default-log-port* "test " test-name " has unrecognised waiton testname " x) + #f))) + newwaitors) + config))))) + +;; Check for waiver eligibility +;; +(define (tests:check-waiver-eligibility testdat prev-testdat) + (let* ((test-registry (make-hash-table)) + (testconfig (tests:get-testconfig (db:test-get-testname testdat) (db:test-get-item-path testdat) test-registry #f)) + (test-rundir ;; (sdb:qry 'passstr + (db:test-get-rundir testdat)) ;; ) + (prev-rundir ;; (sdb:qry 'passstr + (db:test-get-rundir prev-testdat)) ;; ) + (waivers (if testconfig (configf:section-vars testconfig "waivers") '())) + (waiver-rx (regexp "^(\\S+)\\s+(.*)$")) + (diff-rule "diff %file1% %file2%") + (logpro-rule "diff %file1% %file2% | logpro %waivername%.logpro %waivername%.html")) + (if (not (common:file-exists? test-rundir)) + (begin + (debug:print-error 0 *default-log-port* "test run directory is gone, cannot propagate waiver") + #f) + (begin + (push-directory test-rundir) + (let ((result (if (null? waivers) + #f + (let loop ((hed (car waivers)) + (tal (cdr waivers))) + (debug:print 0 *default-log-port* "INFO: Applying waiver rule \"" hed "\"") + (let* ((waiver (configf:lookup testconfig "waivers" hed)) + (wparts (if waiver (string-match waiver-rx waiver) #f)) + (waiver-rule (if wparts (cadr wparts) #f)) + (waiver-glob (if wparts (caddr wparts) #f)) + (logpro-file (if waiver + (let ((fname (conc hed ".logpro"))) + (if (common:file-exists? fname) + fname + (begin + (debug:print 0 *default-log-port* "INFO: No logpro file " fname " falling back to diff") + #f))) + #f)) + ;; if rule by name of waiver-rule is found in testconfig - use it + ;; else if waivername.logpro exists use logpro-rule + ;; else default to diff-rule + (rule-string (let ((rule (configf:lookup testconfig "waiver_rules" waiver-rule))) + (if rule + rule + (if logpro-file + logpro-rule + (begin + (debug:print 0 *default-log-port* "INFO: No logpro file " logpro-file " found, using diff rule") + diff-rule))))) + ;; (string-substitute "%file1%" "foofoo.txt" "This is %file1% and so is this %file1%." #t) + (processed-cmd (string-substitute + "%file1%" (conc test-rundir "/" waiver-glob) + (string-substitute + "%file2%" (conc prev-rundir "/" waiver-glob) + (string-substitute + "%waivername%" hed rule-string #t) #t) #t)) + (res #f)) + (debug:print 0 *default-log-port* "INFO: waiver command is \"" processed-cmd "\"") + (if (eq? (system processed-cmd) 0) + (if (null? tal) + #t + (loop (car tal)(cdr tal))) + #f)))))) + (pop-directory) + result))))) + + + + + + + + + + + + + + + + + + + + + + + + +;; CHECK - WAS THIS ADDED OR REMOVED? MANUAL MERGE WITH API STUFF!!! +;; +;; get a pretty table to summarize steps +;; +;; (define (dcommon:process-steps-table steps);; db test-id #!key (work-area #f)) +(define (tests:process-steps-table steps);; db test-id #!key (work-area #f)) +;; (let ((steps (db:get-steps-for-test db test-id work-area: work-area))) + ;; organise the steps for better readability + (let ((res (make-hash-table))) + (for-each + (lambda (step) + (debug:print 6 *default-log-port* "step=" step) + (let ((record (hash-table-ref/default + res + (tdb:step-get-stepname step) + ;; 0 1 2 3 4 5 6 7 + ;; stepname start end status Duration Logfile Comment first-id + (vector (tdb:step-get-stepname step) "" "" "" "" "" "" #f)))) + (debug:print 6 *default-log-port* "record(before) = " record + "\nid: " (tdb:step-get-id step) + "\nstepname: " (tdb:step-get-stepname step) + "\nstate: " (tdb:step-get-state step) + "\nstatus: " (tdb:step-get-status step) + "\ntime: " (tdb:step-get-event_time step)) + (if (not (vector-ref record 7))(vector-set! record 7 (tdb:step-get-id step))) ;; do not clobber the id if previously set + (case (string->symbol (tdb:step-get-state step)) + ((start)(vector-set! record 1 (tdb:step-get-event_time step)) + (vector-set! record 3 (if (equal? (vector-ref record 3) "") + (tdb:step-get-status step))) + (if (> (string-length (tdb:step-get-logfile step)) + 0) + (vector-set! record 5 (tdb:step-get-logfile step)))) + ((end) + (vector-set! record 2 (any->number (tdb:step-get-event_time step))) + (vector-set! record 3 (tdb:step-get-status step)) + (vector-set! record 4 (let ((startt (any->number (vector-ref record 1))) + (endt (any->number (vector-ref record 2)))) + (debug:print 4 *default-log-port* "record[1]=" (vector-ref record 1) + ", startt=" startt ", endt=" endt + ", get-status: " (tdb:step-get-status step)) + (if (and (number? startt)(number? endt)) + (seconds->hr-min-sec (- endt startt)) "-1"))) + (if (> (string-length (tdb:step-get-logfile step)) + 0) + (vector-set! record 5 (tdb:step-get-logfile step))) + (if (> (string-length (tdb:step-get-comment step)) + 0) + (vector-set! record 6 (tdb:step-get-comment step)))) + (else + (vector-set! record 2 (tdb:step-get-state step)) + (vector-set! record 3 (tdb:step-get-status step)) + (vector-set! record 4 (tdb:step-get-event_time step)) + (vector-set! record 6 (tdb:step-get-comment step)))) + (hash-table-set! res (tdb:step-get-stepname step) record) + (debug:print 6 *default-log-port* "record(after) = " record + "\nid: " (tdb:step-get-id step) + "\nstepname: " (tdb:step-get-stepname step) + "\nstate: " (tdb:step-get-state step) + "\nstatus: " (tdb:step-get-status step) + "\ntime: " (tdb:step-get-event_time step)))) + ;; (else (vector-set! record 1 (tdb:step-get-event_time step))) + (sort steps (lambda (a b) + (cond + ((< (tdb:step-get-event_time a)(tdb:step-get-event_time b)) #t) + ((eq? (tdb:step-get-event_time a)(tdb:step-get-event_time b)) + (< (tdb:step-get-id a) (tdb:step-get-id b))) + (else #f))))) + res)) + +;;====================================================================== +;; Gather data from test/task specifications +;;====================================================================== + +;; (define (tests:get-valid-tests testsdir test-patts) ;; #!key (test-names '())) +;; (let ((tests (glob (conc testsdir "/tests/*")))) ;; " (string-translate patt "%" "*"))))) +;; (set! tests (filter (lambda (test)(common:file-exists? (conc test "/testconfig"))) tests)) +;; (delete-duplicates +;; (filter (lambda (testname) +;; (tests:match test-patts testname #f)) +;; (map (lambda (testp) +;; (last (string-split testp "/"))) +;; tests))))) + +(define (tests:get-test-path-from-environment) + (if (and (getenv "MT_LINKTREE") + (getenv "MT_TARGET") + (getenv "MT_RUNNAME") + (getenv "MT_TEST_NAME") + (getenv "MT_ITEMPATH")) + (conc (getenv "MT_LINKTREE") "/" + (getenv "MT_TARGET") "/" + (getenv "MT_RUNNAME") "/" + (getenv "MT_TEST_NAME") + (if (and (getenv "MT_ITEMPATH") + (not (string=? "" (getenv "MT_ITEMPATH")))) + (conc "/" (getenv "MT_ITEMPATH")) + "")) + #f)) + +;; if .testconfig exists in test directory read and return it +;; else if have cached copy in *testconfigs* return it IFF there is a section "have fulldata" +;; else read the testconfig file +;; if have path to test directory save the config as .testconfig and return it +;; +(define (tests:get-testconfig test-name item-path test-registry system-allowed + #!key (force-create #f)(allow-write-cache #t)(wait-a-minute #f)) + (let* ((use-cache (common:use-cache?)) + (cache-path (tests:get-test-path-from-environment)) + (cache-file (and cache-path (conc cache-path "/.testconfig"))) + (cache-exists (and cache-file + (not force-create) ;; if force-create then pretend there is no cache to read + (common:file-exists? cache-file))) + (cached-dat (if (and (not force-create) + cache-exists + use-cache) + (handle-exceptions + exn + #f ;; any issues, just give up with the cached version and re-read + (configf:read-alist cache-file)) + #f)) + (test-full-name (if (and item-path (not (string-null? item-path))) + (conc test-name "/" item-path) + test-name))) + (if cached-dat + cached-dat + (let ((dat (hash-table-ref/default *testconfigs* test-full-name #f))) + (if (and dat ;; have a locally cached version + (hash-table-ref/default dat "have fulldata" #f)) ;; marked as good data? + dat + ;; no cached data available + (let* ((treg (or test-registry + (tests:get-all))) + (test-path (or (hash-table-ref/default treg test-name #f) + (let* ((local-tcdir (conc (getenv "MT_LINKTREE") "/" + (getenv "MT_TARGET") "/" + (getenv "MT_RUNNAME") "/" + test-name "/" item-path)) + (local-tcfg (conc local-tcdir "/testconfig"))) + (if (common:file-exists? local-tcfg) + local-tcdir + #f)) + (conc *toppath* "/tests/" test-name))) + (test-configf (conc test-path "/testconfig")) + (testexists (let loopa ((tries-left 30)) + (cond + ( + (and (common:file-exists? test-configf)(file-read-access? test-configf)) + #t) + ( + (common:file-exists? test-configf) + (debug:print 0 *default-log-port* "WARNING: Cannot read testconfig file: "test-configf) + #f) + ( + (and wait-a-minute (> tries-left 0)) + (thread-sleep! 10) + (debug:print 0 *default-log-port* "WARNING: testconfig file does not exist: "test-configf" will retry in 10 seconds. Tries left: "tries-left) ;; BB: this fires + (loopa (sub1 tries-left))) + (else + (debug:print 0 *default-log-port* "WARNING: testconfig file does not exist: "test-configf) ;; BB: this fires + #f)))) + (tcfg (if testexists + (configf:read-config test-configf #f system-allowed + environ-patt: (if system-allowed + "pre-launch-env-vars" + #f)) + #f))) + (if (and tcfg cache-file) (hash-table-set! tcfg "have fulldata" #t)) ;; mark this as fully read data + (if tcfg (hash-table-set! *testconfigs* test-full-name tcfg)) + (if (and testexists + cache-file + (file-write-access? cache-path) + allow-write-cache) + (let ((tpath (conc cache-path "/.testconfig"))) + (debug:print-info 1 *default-log-port* "Caching testconfig for " test-name " in " tpath) + (if (and tcfg (not (common:in-running-test?))) + (configf:write-alist tcfg tpath)))) + tcfg)))))) + +;; sort tests by priority and waiton +;; Move test specific stuff to a test unit FIXME one of these days +(define (tests:sort-by-priority-and-waiton test-records) + (if (eq? (hash-table-size test-records) 0) + '() + (let* ((mungepriority (lambda (priority) + (if priority + (let ((tmp (any->number priority))) + (if tmp tmp (begin (debug:print-error 0 *default-log-port* "bad priority value " priority ", using 0") 0))) + 0))) + (all-tests (hash-table-keys test-records)) + (all-waited-on (let loop ((hed (car all-tests)) + (tal (cdr all-tests)) + (res '())) + (let* ((trec (hash-table-ref test-records hed)) + (waitons (or (tests:testqueue-get-waitons trec) '()))) + (if (null? tal) + (append res waitons) + (loop (car tal)(cdr tal)(append res waitons)))))) + (sort-fn1 + (lambda (a b) + (let* ((a-record (hash-table-ref test-records a)) + (b-record (hash-table-ref test-records b)) + (a-waitons (or (tests:testqueue-get-waitons a-record) '())) + (b-waitons (or (tests:testqueue-get-waitons b-record) '())) + (a-config (tests:testqueue-get-testconfig a-record)) + (b-config (tests:testqueue-get-testconfig b-record)) + (a-raw-pri (configf:lookup a-config "requirements" "priority")) + (b-raw-pri (configf:lookup b-config "requirements" "priority")) + (a-priority (mungepriority a-raw-pri)) + (b-priority (mungepriority b-raw-pri))) + (tests:testqueue-set-priority! a-record a-priority) + (tests:testqueue-set-priority! b-record b-priority) + ;; (debug:print 0 *default-log-port* "a=" a ", b=" b ", a-waitons=" a-waitons ", b-waitons=" b-waitons) + (cond + ;; is + ((member a b-waitons) ;; is b waiting on a? + ;; (debug:print 0 *default-log-port* "case1") + #t) + ((member b a-waitons) ;; is a waiting on b? + ;; (debug:print 0 *default-log-port* "case2") + #f) + ((and (not (null? a-waitons)) ;; both have waitons - do not disturb + (not (null? b-waitons))) + ;; (debug:print 0 *default-log-port* "case2.1") + #t) + ((and (null? a-waitons) ;; no waitons for a but b has waitons + (not (null? b-waitons))) + ;; (debug:print 0 *default-log-port* "case3") + #f) + ((and (not (null? a-waitons)) ;; a has waitons but b does not + (null? b-waitons)) + ;; (debug:print 0 *default-log-port* "case4") + #t) + ((not (eq? a-priority b-priority)) ;; use + (> a-priority b-priority)) + (else + ;; (debug:print 0 *default-log-port* "case5") + (string>? a b)))))) + + (sort-fn2 + (lambda (a b) + (> (mungepriority (tests:testqueue-get-priority (hash-table-ref test-records a))) + (mungepriority (tests:testqueue-get-priority (hash-table-ref test-records b))))))) + ;; (let ((dot-res (tests:run-dot (tests:tests->dot test-records) "plain"))) + ;; (debug:print "dot-res=" dot-res)) + ;; (let ((data (map cdr (filter + ;; (lambda (x)(equal? "node" (car x))) + ;; (map string-split (tests:easy-dot test-records "plain")))))) + ;; (map car (sort data (lambda (a b) + ;; (> (string->number (caddr a))(string->number (caddr b))))))) + ;; )) + (sort all-tests sort-fn1)))) ;; avoid dealing with deleted tests, look at the hash table + +(define (tests:easy-dot test-records outtype) + (let-values (((fd temp-path) (file-mkstemp (conc "/tmp/" (current-user-name) ".XXXXXX")))) + (let ((all-testnames (hash-table-keys test-records)) + (temp-port (open-output-file* fd))) + ;; (format temp-port "This file is ~A.~%" temp-path) + (format temp-port "digraph tests {\n") + (format temp-port " size=4,8\n") + ;; (format temp-port " splines=none\n") + (for-each + (lambda (testname) + (let* ((testrec (hash-table-ref test-records testname)) + (waitons (or (tests:testqueue-get-waitons testrec) '()))) + (for-each + (lambda (waiton) + (format temp-port (conc " " waiton " -> " testname " [splines=ortho]\n"))) + waitons))) + all-testnames) + (format temp-port "}\n") + (close-output-port temp-port) + (with-input-from-pipe + (conc "env -i PATH=$PATH dot -T" outtype " < " temp-path) + (lambda () + (let ((res (read-lines))) + ;; (delete-file temp-path) + res)))))) + +(define (tests:write-dot-file test-records fname sizex sizey) + (if (file-write-access? (pathname-directory fname)) + (with-output-to-file fname + (lambda () + (map print (tests:tests->dot test-records sizex sizey)))))) + +(define (tests:tests->dot test-records sizex sizey) + (let ((all-testnames (hash-table-keys test-records))) + (if (null? all-testnames) + '() + (let loop ((hed (car all-testnames)) + (tal (cdr all-testnames)) + (res (list "digraph tests {" + (conc " size=\"" (or sizex 11) "," (or sizey 11) "\";") + " ratio=0.95;" + ))) + (let* ((testrec (hash-table-ref test-records hed)) + (waitons (or (tests:testqueue-get-waitons testrec) '())) + (newres (append res + (if (null? waitons) + (list (conc " \"" hed "\" [shape=box];")) + (map (lambda (waiton) + (conc " \"" waiton "\" -> \"" hed "\" [shape=box];")) + waitons) + )))) + (if (null? tal) + (append newres (list "}")) + (loop (car tal)(cdr tal) newres) + )))))) + +;; (tests:run-dot (list "digraph tests {" "a -> b" "}") "plain") + +(define (tests:run-dot indat outtype) ;; outtype is plain, fig, dot, etc. http://www.graphviz.org/content/output-formats + (let-values (((inp oup pid)(process "env -i PATH=$PATH dot" (list "-T" outtype)))) + (with-output-to-port oup + (lambda () + (map print indat))) + (close-output-port oup) + (let ((res (with-input-from-port inp + (lambda () + (read-lines))))) + (close-input-port inp) + res))) + +;; read data from tmp file or create if not exists +;; if exists regen in background ;; -(define (tests:lookup-itemmap itemmaps testname) - (let ((best-matches (filter (lambda (itemmap) - (tests:match (car itemmap) testname #f)) - itemmaps))) - (if (null? best-matches) - #f - (let ((res (car best-matches))) - ;; (debug:print 0 *default-log-port* "res=" res) - (cond - ((string? res) res) ;;; FIX THE ROOT CAUSE HERE .... - ((null? res) #f) - ((string? (cdr res)) (cdr res)) ;; it is a pair - ((string? (cadr res))(cadr res)) ;; it is a list - (else cadr res)))))) - +(define (tests:lazy-dot testrecords outtype sizex sizey) + (let ((dfile (conc "/tmp/." (current-user-name) "-" (server:mk-signature) ".dot")) + (fname (conc "/tmp/." (current-user-name) "-" (server:mk-signature) ".dotdat"))) + (tests:write-dot-file testrecords dfile sizex sizey) + (if (common:file-exists? fname) + (let ((res (with-input-from-file fname + (lambda () + (read-lines))))) + (system (conc "env -i PATH=$PATH dot -T " outtype " < " dfile " > " fname "&")) + res) + (begin + (system (conc "env -i PATH=$PATH dot -T " outtype " < " dfile " > " fname)) + (with-input-from-file fname + (lambda () + (read-lines))))))) + + +;;====================================================================== +;; refactoring this block into tests:get-full-data from line 263 of runs.scm +;;====================================================================== +;; hed is the test name +;; test-records is a hash of test-name => test record +(define (tests:get-full-data test-names test-records required-tests all-tests-registry) + (if (not (null? test-names)) + (let loop ((hed (car test-names)) + (tal (cdr test-names))) ;; 'return-procs tells the config reader to prep running system but return a proc + (debug:print-info 4 *default-log-port* "hed=" hed " at top of loop") + ;; don't know item-path at this time, let the testconfig get the top level testconfig + (let* ((config (tests:get-testconfig hed #f all-tests-registry 'return-procs)) + (waitons (let ((instr (if config + (configf:lookup config "requirements" "waiton") + (begin ;; No config means this is a non-existant test + (debug:print-error 0 *default-log-port* "non-existent required test \"" hed "\", grep through your testconfigs to find and remove or create the test. Discarding and continuing.") + "")))) + (debug:print-info 8 *default-log-port* "waitons string is " instr) + (string-split (cond + ((procedure? instr) + (let ((res (instr))) + (debug:print-info 8 *default-log-port* "waiton procedure results in string " res " for test " hed) + res)) + ((string? instr) instr) + (else + ;; NOTE: This is actually the case of *no* waitons! ;; (debug:print-error 0 *default-log-port* "something went wrong in processing waitons for test " hed) + "")))))) + (if (not config) ;; this is a non-existant test called in a waiton. + (if (null? tal) + test-records + (loop (car tal)(cdr tal))) + (begin + (debug:print-info 8 *default-log-port* "waitons: " waitons) + ;; check for hed in waitons => this would be circular, remove it and issue an + ;; error + (if (member hed waitons) + (begin + (debug:print-error 0 *default-log-port* "test " hed " has listed itself as a waiton, please correct this!") + (set! waitons (filter (lambda (x)(not (equal? x hed))) waitons)))) + + ;; (items (items:get-items-from-config config))) + (if (not (hash-table-ref/default test-records hed #f)) + (hash-table-set! test-records + hed (vector hed ;; 0 + config ;; 1 + waitons ;; 2 + (configf:lookup config "requirements" "priority") ;; priority 3 + (let ((items (hash-table-ref/default config "items" #f)) ;; items 4 + (itemstable (hash-table-ref/default config "itemstable" #f))) + ;; if either items or items table is a proc return it so test running + ;; process can know to call items:get-items-from-config + ;; if either is a list and none is a proc go ahead and call get-items + ;; otherwise return #f - this is not an iterated test + (cond + ((procedure? items) + (debug:print-info 4 *default-log-port* "items is a procedure, will calc later") + items) ;; calc later + ((procedure? itemstable) + (debug:print-info 4 *default-log-port* "itemstable is a procedure, will calc later") + itemstable) ;; calc later + ((filter (lambda (x) + (let ((val (car x))) + (if (procedure? val) val #f))) + (append (if (list? items) items '()) + (if (list? itemstable) itemstable '()))) + 'have-procedure) + ((or (list? items)(list? itemstable)) ;; calc now + (debug:print-info 4 *default-log-port* "items and itemstable are lists, calc now\n" + " items: " items " itemstable: " itemstable) + (items:get-items-from-config config)) + (else #f))) ;; not iterated + #f ;; itemsdat 5 + #f ;; spare - used for item-path + ))) + (for-each + (lambda (waiton) + (if (and waiton (not (member waiton test-names))) + (begin + (set! required-tests (cons waiton required-tests)) + (set! test-names (cons waiton test-names))))) ;; was an append, now a cons + waitons) + (let ((remtests (delete-duplicates (append waitons tal)))) + (if (not (null? remtests)) + (loop (car remtests)(cdr remtests)) + test-records)))))))) + +;;====================================================================== +;; A R C H I V I N G +;;====================================================================== + +(define (test:archive db test-id) + #f) + +(define (test:archive-tests db keynames target) + #f) ) ADDED utils/deps.scm Index: utils/deps.scm ================================================================== --- /dev/null +++ utils/deps.scm @@ -0,0 +1,35 @@ +#!/usr/bin/env csi + +(use posix regex-case regex) + +(define (get-files #!optional (patt #f)) + (let ((files (glob (or patt "*.scm")))) + files)) + +(define (get-deps fname #!optional (data #f)) + (let ((hdat (or data (make-hash-table))) + (modname (string-substitute ".scm" "" fname))) + ;; (print "Processing " fname) + (hash-table-set! hdat fname '()) + (with-input-from-file fname + (lambda () + (let loop ((l (read-line))) + (if (eof-object? l) + data + (begin + (regex-case + l + ("^\\s*\\(import\\s+([^\\s]+)\\).*" (x md ) + (print "\"" md "\" -> \"" modname "\";"))) + (loop (read-line))))))))) + +(define (do-all-mod-files) + (let ((modfiles (get-files "*mod.scm"))) + (for-each + (lambda (mfile) + (print "// " mfile) + (get-deps mfile)) + modfiles))) + +(do-all-mod-files) +(exit)