Index: Makefile ================================================================== --- Makefile +++ Makefile @@ -131,10 +131,14 @@ chmod a+x $@ $(PREFIX)/bin/nbfake : utils/nbfake $(INSTALL) $< $@ chmod a+x $@ + +$(PREFIX)/bin/remrun : utils/remrun + $(INSTALL) $< $@ + chmod a+x $@ $(PREFIX)/bin/viewscreen : utils/viewscreen $(INSTALL) $< $@ chmod a+x $@ @@ -159,10 +163,15 @@ chmod a+x $@ deploytarg/nbfind : utils/nbfind $(INSTALL) $< $@ chmod a+x $@ + +$(PREFIX)/bin/mtest-reaper: helpers/mtest-reaper.scm helpers/ducttape-lib.scm helpers/inteldate.scm helpers/mimetypes.scm + make -C helpers $@ PREFIX=$(PREFIX) INSTALL=$(INSTALL) ARCHSTR=$(ARCHSTR) + +mtest-reaper: $(PREFIX)/bin/mtest-reaper # install dashboard as dboard so wrapper script can be called dashboard $(PREFIX)/bin/.$(ARCHSTR)/dboard : dboard $(FILES) utils/mk_wrapper utils/mk_wrapper $(PREFIX) dboard $(PREFIX)/bin/dashboard chmod a+x $(PREFIX)/bin/dashboard @@ -169,14 +178,15 @@ $(INSTALL) dboard $(PREFIX)/bin/.$(ARCHSTR)/dboard install : $(PREFIX)/bin/.$(ARCHSTR) $(PREFIX)/bin/.$(ARCHSTR)/mtest $(PREFIX)/bin/megatest \ $(PREFIX)/bin/.$(ARCHSTR)/dboard $(PREFIX)/bin/dashboard $(HELPERS) $(PREFIX)/bin/nbfake \ $(PREFIX)/bin/nbfind $(PREFIX)/bin/loadrunner $(PREFIX)/bin/viewscreen $(PREFIX)/bin/mt_xterm \ - $(PREFIX)/share/docs/megatest_manual.html + $(PREFIX)/share/docs/megatest_manual.html $(PREFIX)/bin/remrun $(PREFIX)/bin/.$(ARCHSTR) : mkdir -p $(PREFIX)/bin/.$(ARCHSTR) + mkdir -p $(PREFIX)/bin/.$(ARCHSTR)/lib test: tests/tests.scm cd tests;csi -I .. -b -n tests.scm ext-tests/.fslckout : $(MTQA_FOSSIL) @@ -211,11 +221,11 @@ # chicken-install -prefix deploytarg -deploy $$i;done # deploytarg/libsqlite3.so : # CSC_OPTIONS="-Ideploytarg -Ldeploytarg" $CHICKEN_INSTALL -prefix deploytarg -deploy sqlite3 -deploy : deploytarg/mtest deploytarg/dboard $(DEPLOYHELPERS) deploytarg/nbfake deploytarg/viewsceen deploytarg/nbfind deploytarg/apropos.so +deploy : deploytarg/mtest deploytarg/dboard $(DEPLOYHELPERS) deploytarg/nbfake deploytarg/remrun deploytarg/viewsceen deploytarg/nbfind deploytarg/apropos.so # deploytarg/libiupcd.so : $(CKPATH)/lib/libiupcd.so # for i in iup im cd av call sqlite; do \ # cp $(CKPATH)/lib/lib$$i* deploytarg/ ; \ # done @@ -278,5 +288,6 @@ echo "(use postgresql)(hash-table-set! *available-db* 'postgresql #t)" >> altdb.scm;\ fi portlogger-example : portlogger-example.scm api.o archive.o client.o common.o configf.o daemon.o dashboard-tests.o db.o dcommon.o ezsteps.o filedb.o genexample.o gutils.o http-transport.o items.o keys.o launch.o lock-queue.o margs.o megatest-version.o mt.o ods.o portlogger.o process.o rmt.o rpc-transport.o runconfig.o runs.o sdb.o server.o synchash.o tasks.o tdb.o tests.o tree.o csc $(CSCOPTS) portlogger-example.scm api.o archive.o client.o common.o configf.o daemon.o dashboard-tests.o db.o dcommon.o ezsteps.o filedb.o genexample.o gutils.o http-transport.o items.o keys.o launch.o lock-queue.o margs.o megatest-version.o mt.o ods.o portlogger.o process.o rmt.o rpc-transport.o runconfig.o runs.o sdb.o server.o synchash.o tasks.o tdb.o tests.o tree.o + Index: NOTES ================================================================== --- NOTES +++ NOTES @@ -1,13 +1,19 @@ +===================================================================== +NOTES from looking at branch v1.62-rpc +===================================================================== + +*last-db-access* or *db-last-access* ==> which is it to be? +seen in singletest: ERROR: Unrecognised arguments: :first_err This is the first error ====================================================================== New way of launching needed to accomodate different target hosttypes for items ====================================================================== [flavors] -general ssh #{getbgesthost general} +general ssh #{getbesthost general} nbgeneral nbjob run JOBCOMMAND -log $MT_LINKTREE/$MT_TARGET/$MT_RUNNAME.$MT_TESTNAME-$MT_ITEM_PATH.lgo [hosts] general cubian xena Index: api.scm ================================================================== --- api.scm +++ api.scm @@ -6,10 +6,12 @@ ;; ;; This program is distributed WITHOUT ANY WARRANTY; without even the ;; implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR ;; PURPOSE. ;;====================================================================== + +(use srfi-69 posix) (declare (unit api)) (declare (uses rmt)) (declare (uses db)) (declare (uses tasks)) @@ -39,10 +41,11 @@ get-run-status get-run-stats get-targets get-target ;; register-run + get-tests-tags get-tests-for-run get-test-id get-tests-for-runs-mindata get-run-name-from-id get-runs @@ -61,21 +64,23 @@ synchash-get )) (define api:write-queries '( + get-keys-write ;; dummy "write" query to force server start + ;; SERVERS start-server kill-server ;; TESTS test-set-state-status-by-id delete-test-records delete-old-deleted-test-records - test-set-status-state + test-set-state-status test-set-top-process-pid - roll-up-pass-fail-counts + set-state-status-and-roll-up-items update-pass-fail-counts top-test-set-per-pf-counts ;; (db:top-test-set-per-pf-counts (db:get-db *db* 5) 5 "runfirst") ;; RUNS register-run @@ -111,169 +116,179 @@ ;; (define (api:execute-requests dbstruct dat) (handle-exceptions exn (let ((call-chain (get-call-chain))) + (debug:print 0 *default-log-port* "WARNING: api:execute-requests received an exception from peer") (print-call-chain (current-error-port)) (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) (vector #f (vector exn call-chain dat))) ;; return some stuff for debug if an exception happens (if (not (vector? dat)) ;; it is an error to not receive a vector (vector #f #f "remote must be called with a vector") (vector ;; return a vector + the returned data structure #t - (let ((cmd (vector-ref dat 0)) - (params (vector-ref dat 1))) - (case (if (symbol? cmd) - cmd - (string->symbol cmd)) - - ;;=============================================== - ;; READ/WRITE QUERIES - ;;=============================================== - - ;; SERVERS - ((start-server) (apply server:kind-run params)) - ((kill-server) (set! *server-run* #f)) - - ;; TESTS - ((test-set-state-status-by-id) (apply db:test-set-state-status-by-id dbstruct params)) - ((delete-test-records) (apply db:delete-test-records dbstruct params)) - ((delete-old-deleted-test-records) (apply db:delete-old-deleted-test-records dbstruct params)) - ((test-set-status-state) (apply db:test-set-status-state dbstruct params)) - ((test-set-top-process-pid) (apply db:test-set-top-process-pid dbstruct params)) - ((roll-up-pass-fail-counts) (apply db:roll-up-pass-fail-counts dbstruct params)) - ;; ((update-pass-fail-counts) (apply db:general-call dbstruct 'update-pass-fail-counts params)) - ((top-test-set-per-pf-counts) (apply db:top-test-set-per-pf-counts dbstruct params)) - ((test-set-archive-block-id) (apply db:test-set-archive-block-id dbstruct params)) - - ;; RUNS - ((register-run) (apply db:register-run dbstruct params)) - ((set-tests-state-status) (apply db:set-tests-state-status dbstruct params)) - ((delete-run) (apply db:delete-run dbstruct params)) - ((lock/unlock-run) (apply db:lock/unlock-run dbstruct params)) - ((update-run-event_time) (apply db:update-run-event_time dbstruct params)) - ((update-run-stats) (apply db:update-run-stats dbstruct params)) - ((set-var) (apply db:set-var dbstruct params)) - - ;; STEPS - ((teststep-set-status!) (apply db:teststep-set-status! dbstruct params)) - - ;; TEST DATA - ((test-data-rollup) (apply db:test-data-rollup dbstruct params)) - ((csv->test-data) (apply db:csv->test-data dbstruct params)) - - ;; MISC - ((sync-inmem->db) (let ((run-id (car params))) - (db:sync-touched dbstruct run-id force-sync: #t))) - ((mark-incomplete) (apply db:find-and-mark-incomplete dbstruct params)) - - ;; TESTMETA - ((testmeta-add-record) (apply db:testmeta-add-record dbstruct params)) - ((testmeta-update-field) (apply db:testmeta-update-field dbstruct params)) - - ;; TASKS - ((tasks-add) (apply tasks:add dbstruct params)) - ((tasks-set-state-given-param-key) (apply tasks:set-state-given-param-key dbstruct params)) - ((tasks-get-last) (apply tasks:get-last dbstruct params)) - - ;; ARCHIVES - ;; ((archive-get-allocations) - ((archive-register-disk) (apply db:archive-register-disk dbstruct params)) - ((archive-register-block-name)(apply db:archive-register-block-name dbstruct params)) - ((archive-allocate-testsuite/area-to-block)(apply db:archive-allocate-testsuite/area-to-block dbstruct block-id testsuite-name areakey)) - - ;;====================================================================== - ;; READ ONLY QUERIES - ;;====================================================================== - - ;; KEYS - ((get-key-val-pairs) (apply db:get-key-val-pairs dbstruct params)) - ((get-keys) (db:get-keys dbstruct)) - ((get-key-vals) (apply db:get-key-vals dbstruct params)) - ((get-target) (apply db:get-target dbstruct params)) - ((get-targets) (db:get-targets dbstruct)) - - ;; ARCHIVES - ((test-get-archive-block-info) (apply db:test-get-archive-block-info dbstruct params)) - - ;; TESTS - ((test-toplevel-num-items) (apply db:test-toplevel-num-items dbstruct params)) - ((get-test-info-by-id) (apply db:get-test-info-by-id dbstruct params)) - ((test-get-rundir-from-test-id) (apply db:test-get-rundir-from-test-id dbstruct params)) - ((get-count-tests-running-for-testname) (apply db:get-count-tests-running-for-testname dbstruct params)) - ((get-count-tests-running) (apply db:get-count-tests-running dbstruct params)) - ((get-count-tests-running-in-jobgroup) (apply db:get-count-tests-running-in-jobgroup dbstruct params)) - ;; ((delete-test-step-records) (apply db:delete-test-step-records dbstruct params)) - ((get-previous-test-run-record) (apply db:get-previous-test-run-record dbstruct params)) - ((get-matching-previous-test-run-records)(apply db:get-matching-previous-test-run-records dbstruct params)) - ((test-get-logfile-info) (apply db:test-get-logfile-info dbstruct params)) - ((test-get-records-for-index-file) (apply db:test-get-records-for-index-file dbstruct params)) - ((get-testinfo-state-status) (apply db:get-testinfo-state-status dbstruct params)) - ((test-get-top-process-pid) (apply db:test-get-top-process-pid dbstruct params)) - ((test-get-paths-matching-keynames-target-new) (apply db:test-get-paths-matching-keynames-target-new dbstruct params)) - ((get-prereqs-not-met) (apply db:get-prereqs-not-met dbstruct params)) - ((get-count-tests-running-for-run-id) (apply db:get-count-tests-running-for-run-id dbstruct params)) - ((synchash-get) (apply synchash:server-get dbstruct params)) - ((get-raw-run-stats) (apply db:get-raw-run-stats dbstruct params)) - - ;; RUNS - ((get-run-info) (apply db:get-run-info dbstruct params)) - ((get-run-status) (apply db:get-run-status dbstruct params)) - ((set-run-status) (apply db:set-run-status dbstruct params)) - ((get-tests-for-run) (apply db:get-tests-for-run dbstruct params)) - ((get-test-id) (apply db:get-test-id dbstruct params)) - ((get-tests-for-run-mindata) (apply db:get-tests-for-run-mindata dbstruct params)) - ((get-runs) (apply db:get-runs dbstruct params)) - ((get-num-runs) (apply db:get-num-runs dbstruct params)) - ((get-all-run-ids) (db:get-all-run-ids dbstruct)) - ((get-prev-run-ids) (apply db:get-prev-run-ids dbstruct params)) - ((get-run-ids-matching-target) (apply db:get-run-ids-matching-target dbstruct params)) - ((get-runs-by-patt) (apply db:get-runs-by-patt dbstruct params)) - ((get-run-name-from-id) (apply db:get-run-name-from-id dbstruct params)) - ((get-main-run-stats) (apply db:get-main-run-stats dbstruct params)) - ((get-var) (apply db:get-var dbstruct params)) - ((get-run-stats) (apply db:get-run-stats dbstruct params)) - - ;; STEPS - ((get-steps-data) (apply db:get-steps-data dbstruct params)) - ((get-steps-for-test) (apply db:get-steps-for-test dbstruct params)) - - ;; TEST DATA - ((read-test-data) (apply db:read-test-data dbstruct params)) - - ;; MISC - ((have-incompletes?) (apply db:have-incompletes? dbstruct params)) - ((login) (apply db:login dbstruct params)) - ((general-call) (let ((stmtname (car params)) - (run-id (cadr params)) - (realparams (cddr params))) - (db:with-db dbstruct run-id #t ;; these are all for modifying the db - (lambda (db) - (db:general-call db stmtname realparams))))) - ((sdb-qry) (apply sdb:qry params)) - ((ping) (current-process-id)) - - ;; TESTMETA - ((testmeta-get-record) (apply db:testmeta-get-record dbstruct params)) - - ;; TASKS - ((find-task-queue-records) (apply tasks:find-task-queue-records dbstruct params)))))))) - + (let* ((cmd-in (vector-ref dat 0)) + (cmd (if (symbol? cmd-in) + cmd-in + (string->symbol cmd-in))) + (params (vector-ref dat 1)) + (start-t (current-milliseconds)) + (res + (case cmd + ;;=============================================== + ;; READ/WRITE QUERIES + ;;=============================================== + + ;; SERVERS + ((start-server) (apply server:kind-run params)) + ((kill-server) (set! *server-run* #f)) + + ;; TESTS + ((test-set-state-status-by-id) (apply db:test-set-state-status-by-id dbstruct params)) + ((delete-test-records) (apply db:delete-test-records dbstruct params)) + ((delete-old-deleted-test-records) (apply db:delete-old-deleted-test-records dbstruct params)) + ((test-set-state-status) (apply db:test-set-state-status dbstruct params)) + ((test-set-top-process-pid) (apply db:test-set-top-process-pid dbstruct params)) + ((set-state-status-and-roll-up-items) (apply db:set-state-status-and-roll-up-items dbstruct params)) + ((top-test-set-per-pf-counts) (apply db:top-test-set-per-pf-counts dbstruct params)) + ((test-set-archive-block-id) (apply db:test-set-archive-block-id dbstruct params)) + + ;; RUNS + ((register-run) (apply db:register-run dbstruct params)) + ((set-tests-state-status) (apply db:set-tests-state-status dbstruct params)) + ((delete-run) (apply db:delete-run dbstruct params)) + ((lock/unlock-run) (apply db:lock/unlock-run dbstruct params)) + ((update-run-event_time) (apply db:update-run-event_time dbstruct params)) + ((update-run-stats) (apply db:update-run-stats dbstruct params)) + ((set-var) (apply db:set-var dbstruct params)) + + ;; STEPS + ((teststep-set-status!) (apply db:teststep-set-status! dbstruct params)) + + ;; TEST DATA + ((test-data-rollup) (apply db:test-data-rollup dbstruct params)) + ((csv->test-data) (apply db:csv->test-data dbstruct params)) + + ;; MISC + ((sync-inmem->db) (let ((run-id (car params))) + (db:sync-touched dbstruct run-id force-sync: #t))) + ((mark-incomplete) (apply db:find-and-mark-incomplete dbstruct params)) + + ;; TESTMETA + ((testmeta-add-record) (apply db:testmeta-add-record dbstruct params)) + ((testmeta-update-field) (apply db:testmeta-update-field dbstruct params)) + ((get-tests-tags) (db:get-tests-tags dbstruct)) + + ;; TASKS + ((tasks-add) (apply tasks:add dbstruct params)) + ((tasks-set-state-given-param-key) (apply tasks:set-state-given-param-key dbstruct params)) + ((tasks-get-last) (apply tasks:get-last dbstruct params)) + + ;; ARCHIVES + ;; ((archive-get-allocations) + ((archive-register-disk) (apply db:archive-register-disk dbstruct params)) + ((archive-register-block-name)(apply db:archive-register-block-name dbstruct params)) + ((archive-allocate-testsuite/area-to-block)(apply db:archive-allocate-testsuite/area-to-block dbstruct block-id testsuite-name areakey)) + + ;;====================================================================== + ;; READ ONLY QUERIES + ;;====================================================================== + + ;; KEYS + ((get-key-val-pairs) (apply db:get-key-val-pairs dbstruct params)) + ((get-keys) (db:get-keys dbstruct)) + ((get-keys-write) (db:get-keys dbstruct)) ;; force a dummy "write" query to force server + ((get-key-vals) (apply db:get-key-vals dbstruct params)) + ((get-target) (apply db:get-target dbstruct params)) + ((get-targets) (db:get-targets dbstruct)) + + ;; ARCHIVES + ((test-get-archive-block-info) (apply db:test-get-archive-block-info dbstruct params)) + + ;; TESTS + ((test-toplevel-num-items) (apply db:test-toplevel-num-items dbstruct params)) + ((get-test-info-by-id) (apply db:get-test-info-by-id dbstruct params)) + ((test-get-rundir-from-test-id) (apply db:test-get-rundir-from-test-id dbstruct params)) + ((get-count-tests-running-for-testname) (apply db:get-count-tests-running-for-testname dbstruct params)) + ((get-count-tests-running) (apply db:get-count-tests-running dbstruct params)) + ((get-count-tests-running-in-jobgroup) (apply db:get-count-tests-running-in-jobgroup dbstruct params)) + ;; ((delete-test-step-records) (apply db:delete-test-step-records dbstruct params)) + ((get-previous-test-run-record) (apply db:get-previous-test-run-record dbstruct params)) + ((get-matching-previous-test-run-records)(apply db:get-matching-previous-test-run-records dbstruct params)) + ((test-get-logfile-info) (apply db:test-get-logfile-info dbstruct params)) + ((test-get-records-for-index-file) (apply db:test-get-records-for-index-file dbstruct params)) + ((get-testinfo-state-status) (apply db:get-testinfo-state-status dbstruct params)) + ((test-get-top-process-pid) (apply db:test-get-top-process-pid dbstruct params)) + ((test-get-paths-matching-keynames-target-new) (apply db:test-get-paths-matching-keynames-target-new dbstruct params)) + ((get-prereqs-not-met) (apply db:get-prereqs-not-met dbstruct params)) + ((get-count-tests-running-for-run-id) (apply db:get-count-tests-running-for-run-id dbstruct params)) + ((synchash-get) (apply synchash:server-get dbstruct params)) + ((get-raw-run-stats) (apply db:get-raw-run-stats dbstruct params)) + + ;; RUNS + ((get-run-info) (apply db:get-run-info dbstruct params)) + ((get-run-status) (apply db:get-run-status dbstruct params)) + ((set-run-status) (apply db:set-run-status dbstruct params)) + ((get-tests-for-run) (apply db:get-tests-for-run dbstruct params)) + ((get-test-id) (apply db:get-test-id dbstruct params)) + ((get-tests-for-run-mindata) (apply db:get-tests-for-run-mindata dbstruct params)) + ((get-runs) (apply db:get-runs dbstruct params)) + ((get-num-runs) (apply db:get-num-runs dbstruct params)) + ((get-all-run-ids) (db:get-all-run-ids dbstruct)) + ((get-prev-run-ids) (apply db:get-prev-run-ids dbstruct params)) + ((get-run-ids-matching-target) (apply db:get-run-ids-matching-target dbstruct params)) + ((get-runs-by-patt) (apply db:get-runs-by-patt dbstruct params)) + ((get-run-name-from-id) (apply db:get-run-name-from-id dbstruct params)) + ((get-main-run-stats) (apply db:get-main-run-stats dbstruct params)) + ((get-var) (apply db:get-var dbstruct params)) + ((get-run-stats) (apply db:get-run-stats dbstruct params)) + + ;; STEPS + ((get-steps-data) (apply db:get-steps-data dbstruct params)) + ((get-steps-for-test) (apply db:get-steps-for-test dbstruct params)) + + ;; TEST DATA + ((read-test-data) (apply db:read-test-data dbstruct params)) + + ;; MISC + ((get-latest-host-load) (apply db:get-latest-host-load dbstruct params)) + ((have-incompletes?) (apply db:have-incompletes? dbstruct params)) + ((login) (apply db:login dbstruct params)) + ((general-call) (let ((stmtname (car params)) + (run-id (cadr params)) + (realparams (cddr params))) + (db:general-call dbstruct stmtname realparams))) + ((sdb-qry) (apply sdb:qry params)) + ((ping) (current-process-id)) + + ;; TESTMETA + ((testmeta-get-record) (apply db:testmeta-get-record dbstruct params)) + + ;; TASKS + ((find-task-queue-records) (apply tasks:find-task-queue-records dbstruct params))))) + (let ((delta-t (- (current-milliseconds) + start-t))) + (hash-table-set! *db-api-call-time* cmd + (cons delta-t (hash-table-ref/default *db-api-call-time* cmd '())))) + res))))) ;; http-server send-response ;; api:process-request ;; db:* ;; ;; NB// Runs on the server as part of the server loop ;; (define (api:process-request dbstruct $) ;; the $ is the request vars proc + (set! *api-process-request-count* (+ *api-process-request-count* 1)) (let* ((cmd ($ 'cmd)) (paramsj ($ 'params)) (params (db:string->obj paramsj transport: 'http)) ;; (rmt:json-str->dat paramsj)) (resdat (api:execute-requests dbstruct (vector cmd params))) ;; #( flag result ) (res (vector-ref resdat 1))) - + (if (> *api-process-request-count* *max-api-process-requests*) + (set! *max-api-process-requests* *api-process-request-count*)) + (set! *api-process-request-count* (- *api-process-request-count* 1)) ;; This can be here but needs controls to ensure it doesn't run more than every 4 seconds ;; (rmt:dat->json-str ;; (if (or (string? res) ;; (list? res) ;; (number? res) Index: client.scm ================================================================== --- client.scm +++ client.scm @@ -12,11 +12,11 @@ ;; C L I E N T S ;;====================================================================== (require-extension (srfi 18) extras tcp s11n) -(use srfi-1 posix regex regex-case srfi-69 hostinfo md5 message-digest) +(use srfi-1 posix regex regex-case srfi-69 hostinfo md5 message-digest matchable) ;; (use zmq) (use (prefix sqlite3 sqlite3:)) (use spiffy uri-common intarweb http-client spiffy-request-vars uri-common intarweb directory-utils) @@ -48,102 +48,16 @@ ((rpc) (rpc:client-connect iface port)) ((http) (http:client-connect iface port)) ((zmq) (zmq:client-connect iface port)) (else (rpc:client-connect iface port)))) -(define (client:setup run-id #!key (remaining-tries 10) (failed-connects 0)) +(define (client:setup areapath #!key (remaining-tries 100) (failed-connects 0)) (case (server:get-transport) ((rpc) (rpc-transport:client-setup run-id remaining-tries: remaining-tries failed-connects: failed-connects)) ;;(client:setup-rpc run-id)) - ((http)(client:setup-http run-id remaining-tries: remaining-tries failed-connects: failed-connects)) + ((http)(client:setup-http areapath remaining-tries: remaining-tries failed-connects: failed-connects)) (else (rpc-transport:client-setup run-id remaining-tries: remaining-tries failed-connects: failed-connects)))) ;; (client:setup-rpc run-id)))) -;; (define (client:login-no-auto-setup server-info run-id) -;; (case (server:get-transport) -;; ((rpc) (rpc:login-no-auto-client-setup server-info run-id)) -;; ((http) (rmt:login-no-auto-client-setup server-info run-id)) -;; (else (rpc:login-no-auto-client-setup server-info run-id)))) -;; -;; (define (client:setup-rpc run-id) -;; (debug:print 0 *default-log-port* "INFO: client:setup remaining-tries=" remaining-tries) -;; (if (<= remaining-tries 0) -;; (begin -;; (debug:print-error 0 *default-log-port* "failed to start or connect to server for run-id " run-id) -;; (exit 1)) -;; (let ((host-info (hash-table-ref/default *runremote* run-id #f))) -;; (debug:print-info 0 *default-log-port* "client:setup host-info=" host-info ", remaining-tries=" remaining-tries) -;; (if host-info -;; (let* ((iface (car host-info)) -;; (port (cadr host-info)) -;; (start-res (client:connect iface port)) -;; ;; (ping-res (server:ping-server run-id iface port)) -;; (ping-res (client:login-no-auto-setup start-res run-id))) -;; (if ping-res ;; sucessful login? -;; (begin -;; (hash-table-set! *runremote* run-id start-res) -;; start-res) ;; return the server info -;; (if (member remaining-tries '(3 4 6)) -;; (begin ;; login failed -;; (debug:print 25 *default-log-port* "INFO: client:setup start-res=" start-res ", run-id=" run-id ", server-dat=" host-info) -;; (hash-table-delete! *runremote* run-id) -;; (open-run-close tasks:server-force-clean-run-record -;; tasks:open-db -;; run-id -;; (car host-info) -;; (cadr host-info) -;; " client:setup (host-info=#t)") -;; (thread-sleep! 5) -;; (client:setup run-id remaining-tries: 10)) ;; (- remaining-tries 1))) -;; (begin -;; (debug:print 25 *default-log-port* "INFO: client:setup failed to connect, start-res=" start-res ", run-id=" run-id ", host-info=" host-info) -;; (thread-sleep! 5) -;; (client:setup run-id remaining-tries: (- remaining-tries 1)))))) -;; ;; YUK: rename server-dat here -;; (let* ((server-dat (open-run-close tasks:get-server tasks:open-db run-id))) -;; (debug:print-info 0 *default-log-port* "client:setup server-dat=" server-dat ", remaining-tries=" remaining-tries) -;; (if server-dat -;; (let* ((iface (tasks:hostinfo-get-interface server-dat)) -;; (port (tasks:hostinfo-get-port server-dat)) -;; (start-res (http-transport:client-connect iface port)) -;; ;; (ping-res (server:ping-server run-id iface port)) -;; (ping-res (rmt:login-no-auto-client-setup start-res run-id))) -;; (if start-res -;; (begin -;; (hash-table-set! *runremote* run-id start-res) -;; start-res) -;; (if (member remaining-tries '(2 5)) -;; (begin ;; login failed -;; (debug:print 25 *default-log-port* "INFO: client:setup start-res=" start-res ", run-id=" run-id ", server-dat=" server-dat) -;; (hash-table-delete! *runremote* run-id) -;; (open-run-close tasks:server-force-clean-run-record -;; tasks:open-db -;; run-id -;; (tasks:hostinfo-get-interface server-dat) -;; (tasks:hostinfo-get-port server-dat) -;; " client:setup (server-dat = #t)") -;; (thread-sleep! 2) -;; (server:try-running run-id) -;; (thread-sleep! 10) ;; give server a little time to start up -;; (client:setup run-id remaining-tries: 10)) ;; (- remaining-tries 1))) -;; (begin -;; (debug:print 25 *default-log-port* "INFO: client:setup start-res=" start-res ", run-id=" run-id ", server-dat=" server-dat) -;; (thread-sleep! 5) -;; (client:setup run-id remaining-tries: (- remaining-tries 1)))))) -;; (begin ;; no server registered -;; (if (eq? remaining-tries 2) -;; (begin -;; ;; (open-run-close tasks:server-clean-out-old-records-for-run-id tasks:open-db run-id " client:setup (server-dat=#f)") -;; (client:setup run-id remaining-tries: 10)) -;; (begin -;; (thread-sleep! 2) -;; (debug:print 25 *default-log-port* "INFO: client:setup start-res (not defined here), run-id=" run-id ", server-dat=" server-dat) -;; (if (< (open-run-close tasks:num-in-available-state tasks:open-db run-id) 3) -;; (begin -;; ;; (open-run-close tasks:server-clean-out-old-records-for-run-id tasks:open-db run-id " client:setup (server-dat=#f)") -;; (server:try-running run-id))) -;; (thread-sleep! 10) ;; give server a little time to start up -;; (client:setup run-id remaining-tries: (- remaining-tries 1))))))))))) - ;; Do all the connection work, look up the transport type and set up the ;; connection if required. ;; ;; There are two scenarios. ;; 1. We are a test manager and we received *transport-type* and *runremote* via cmdline @@ -152,100 +66,50 @@ ;; ;; client:setup ;; ;; lookup_server, need to remove *runremote* stuff ;; -(define (client:setup-http run-id #!key (remaining-tries 10) (failed-connects 0)) + +(define (client:setup-http areapath #!key (remaining-tries 100) (failed-connects 0)) (debug:print-info 2 *default-log-port* "client:setup remaining-tries=" remaining-tries) - (let* ((tdbdat (tasks:open-db))) - (if (<= remaining-tries 0) - (begin - (debug:print-error 0 *default-log-port* "failed to start or connect to server for run-id " run-id) - (exit 1)) - (let* ((server-dat (tasks:get-server (db:delay-if-busy tdbdat) run-id))) - (debug:print-info 4 *default-log-port* "client:setup server-dat=" server-dat ", remaining-tries=" remaining-tries) - (if server-dat - (let* ((iface (tasks:hostinfo-get-interface server-dat)) - (hostname (tasks:hostinfo-get-hostname server-dat)) - (port (tasks:hostinfo-get-port server-dat)) - (start-res (case *transport-type* - ((http)(http-transport:client-connect iface port)) - ;;((nmsg)(nmsg-transport:client-connect hostname port)) - )) - (ping-res (case *transport-type* - ((http)(rmt:login-no-auto-client-setup start-res)) - ;; ((nmsg)(let ((logininfo (rmt:login-no-auto-client-setup start-res run-id))) - ;; (if logininfo - ;; (car (vector-ref logininfo 1)) - ;; #f))) - - ))) - (if (and start-res - ping-res) - (begin - (remote-conndat-set! *runremote* start-res) ;; (hash-table-set! *runremote* run-id start-res) - (debug:print-info 2 *default-log-port* "connected to " (http-transport:server-dat-make-url start-res)) - start-res) - (begin ;; login failed but have a server record, clean out the record and try again - (debug:print-info 0 *default-log-port* "client:setup, login failed, will attempt to start server ... start-res=" start-res ", run-id=" run-id ", server-dat=" server-dat) - (case *transport-type* - ((http)(http-transport:close-connections run-id))) - (remote-conndat-set! *runremote* #f) ;; (hash-table-delete! *runremote* run-id) - (tasks:kill-server-run-id run-id) - (tasks:server-force-clean-run-record (db:delay-if-busy tdbdat) - run-id - (tasks:hostinfo-get-interface server-dat) - (tasks:hostinfo-get-port server-dat) - " client:setup (server-dat = #t)") - (if (> remaining-tries 8) - (thread-sleep! (+ 1 (random 5))) ;; spread out the starts a little - (thread-sleep! (+ 15 (random 20)))) ;; it isn't going well. give it plenty of time - (server:try-running run-id) - (thread-sleep! 5) ;; give server a little time to start up - (client:setup run-id remaining-tries: (- remaining-tries 1)) - ))) - (begin ;; no server registered - (let ((num-available (tasks:num-in-available-state (db:dbdat-get-db tdbdat) run-id))) - (debug:print-info 0 *default-log-port* "client:setup, no server registered, remaining-tries=" remaining-tries " num-available=" num-available) - (if (< num-available 2) - (server:try-running run-id)) - (thread-sleep! (+ 5 (random (- 20 remaining-tries)))) ;; give server a little time to start up, randomize a little to avoid start storms. - (client:setup run-id remaining-tries: (- remaining-tries 1))))))))) - -;; keep this as a function to ease future -(define (client:start run-id server-info) - (http-transport:client-connect (tasks:hostinfo-get-interface server-info) - (tasks:hostinfo-get-port server-info))) - -;; ;; client:signal-handler -;; (define (client:signal-handler signum) -;; (signal-mask! signum) -;; (set! *time-to-exit* #t) -;; (handle-exceptions -;; exn -;; (debug:print 0 *default-log-port* " ... exiting ...") -;; (let ((th1 (make-thread (lambda () -;; "") ;; do nothing for now (was flush out last call if applicable) -;; "eat response")) -;; (th2 (make-thread (lambda () -;; (debug:print-error 0 *default-log-port* "Received ^C, attempting clean exit. Please be patient and wait a few seconds before hitting ^C again.") -;; (thread-sleep! 1) ;; give the flush one second to do it's stuff -;; (debug:print 0 *default-log-port* " Done.") -;; (exit 4)) -;; "exit on ^C timer"))) -;; (thread-start! th2) -;; (thread-start! th1) -;; (thread-join! th2)))) -;; -;; ;; client:launch -;; ;; Need to set the signal handler somewhere other than here as this -;; ;; routine will go away. -;; ;; -;; (define (client:launch run-id) -;; (set-signal-handler! signal/int client:signal-handler) -;; (set-signal-handler! signal/term client:signal-handler) -;; (if (client:setup run-id) -;; (debug:print-info 2 *default-log-port* "connected as client") -;; (begin -;; (debug:print-error 0 *default-log-port* "Failed to connect as client") -;; (exit)))) -;; + (server:start-and-wait areapath) + (if (<= remaining-tries 0) + (begin + (debug:print-error 0 *default-log-port* "failed to start or connect to server") + (exit 1)) + ;; + ;; Alternatively here, we can get the list of candidate servers and work our way + ;; through them searching for a good one. + ;; + (let* ((server-dat (server:get-first-best areapath))) + (if (not server-dat) ;; no server found + (client:setup-http areapath remaining-tries: (- remaining-tries 1)) + (let ((host (cadr server-dat)) + (port (caddr server-dat))) + (debug:print-info 4 *default-log-port* "client:setup server-dat=" server-dat ", remaining-tries=" remaining-tries) + (if (not *runremote*)(set! *runremote* (make-remote))) + (if (and host port) + (let* ((start-res (case *transport-type* + ((http)(http-transport:client-connect host port)))) + (ping-res (case *transport-type* + ((http)(rmt:login-no-auto-client-setup start-res))))) + (if (and start-res + ping-res) + (begin + (remote-conndat-set! *runremote* start-res) ;; (hash-table-set! *runremote* run-id start-res) + (debug:print-info 2 *default-log-port* "connected to " (http-transport:server-dat-make-url start-res)) + start-res) + (begin ;; login failed but have a server record, clean out the record and try again + (debug:print-info 0 *default-log-port* "client:setup, login failed, will attempt to start server ... start-res=" start-res ", run-id=" run-id ", server-dat=" server-dat) + (case *transport-type* + ((http)(http-transport:close-connections run-id))) + (remote-conndat-set! *runremote* #f) ;; (hash-table-delete! *runremote* run-id) + (thread-sleep! 1) + (client:setup-http areapath remaining-tries: (- remaining-tries 1)) + ))) + (begin ;; no server registered + (server:kind-run areapath) + (debug:print-info 0 *default-log-port* "client:setup, no server registered, remaining-tries=" remaining-tries) + (thread-sleep! 1) ;; (+ 5 (random (- 20 remaining-tries)))) ;; give server a little time to start up, randomize a little to avoid start storms. + (server:start-and-wait areapath) + (client:setup-http areapath remaining-tries: (- remaining-tries 1))))))))) + Index: common.scm ================================================================== --- common.scm +++ common.scm @@ -7,11 +7,11 @@ ;; This program is distributed WITHOUT ANY WARRANTY; without even the ;; implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR ;; PURPOSE. ;;====================================================================== -(use srfi-1 posix regex-case base64 format dot-locking csv-xml z3 sql-de-lite hostinfo md5 message-digest typed-records directory-utils) +(use srfi-1 posix regex-case base64 format dot-locking csv-xml z3 sql-de-lite hostinfo md5 message-digest typed-records directory-utils stack) (require-extension regex posix) (require-extension (srfi 18) extras tcp rpc) (import (prefix sqlite3 sqlite3:)) @@ -90,19 +90,21 @@ (define *db-stats-mutex* (make-mutex)) ;; db access (define *db-last-access* (current-seconds)) ;; last db access, used in server (define *db-write-access* #t) ;; db sync -(define *db-last-write* 0) ;; used to record last touch of db (define *db-last-sync* 0) ;; last time the sync to megatest.db happened (define *db-sync-in-progress* #f) ;; if there is a sync in progress do not try to start another -(define *db-multi-sync-mutex* (make-mutex)) ;; protect access to *db-sync-in-progress*, *db-last-sync* and *db-last-write* +(define *db-multi-sync-mutex* (make-mutex)) ;; protect access to *db-sync-in-progress*, *db-last-sync* ;; task db (define *task-db* #f) ;; (vector db path-to-db) (define *db-access-allowed* #t) ;; flag to allow access (define *db-access-mutex* (make-mutex)) +(define *db-transaction-mutex* (make-mutex)) (define *db-cache-path* #f) +(define *db-with-db-mutex* (make-mutex)) +(define *db-api-call-time* (make-hash-table)) ;; hash of command => (list of times) ;; SERVER (define *my-client-signature* #f) (define *transport-type* 'http) ;; override with [server] transport http|rpc|nmsg (define *runremote* #f) ;; if set up for server communication this will hold @@ -115,10 +117,12 @@ (define *run-id* #f) (define *server-kind-run* (make-hash-table)) (define *home-host* #f) (define *total-non-write-delay* 0) (define *heartbeat-mutex* (make-mutex)) +(define *api-process-request-count* 0) +(define *max-api-process-requests* 0) ;; client (define *rmt-mutex* (make-mutex)) ;; remote access calls mutex ;; RPC transport @@ -131,28 +135,45 @@ (define *toptest-paths* (make-hash-table)) ;; cache toptest path settings here (define *test-paths* (make-hash-table)) ;; cache test-id to test run paths here (define *test-ids* (make-hash-table)) ;; cache run-id, testname, and item-path => test-id (define *test-info* (make-hash-table)) ;; cache the test info records, update the state, status, run_duration etc. from testdat.db -(define *run-info-cache* (make-hash-table)) ;; run info is stable, no need to reget +(define *run-info-cache* (make-hash-table)) ;; run info is stable, no need to reget (define *launch-setup-mutex* (make-mutex)) ;; need to be able to call launch:setup often so mutex it and re-call the real deal only if *toppath* not set (define *homehost-mutex* (make-mutex)) +(defstruct remote + (hh-dat (common:get-homehost)) ;; homehost record ( addr . hhflag ) + (server-url (if *toppath* (server:check-if-running *toppath*))) ;; (server:check-if-running *toppath*) #f)) + (last-server-check 0) ;; last time we checked to see if the server was alive + (conndat #f) + (transport *transport-type*) + (server-timeout (or (server:get-timeout) 100))) ;; default to 100 seconds + +;; launching and hosts +(defstruct host + (reachable #f) + (last-update 0) + (last-used 0) + (last-cpuload 1)) + +(define *host-loads* (make-hash-table)) + ;; cache environment vars for each run here (define *env-vars-by-run-id* (make-hash-table)) ;; Testconfig and runconfig caches. -(define *testconfigs* (make-hash-table)) ;; test-name => testconfig -(define *runconfigs* (make-hash-table)) ;; target => runconfig +(define *testconfigs* (make-hash-table)) ;; test-name => testconfig +(define *runconfigs* (make-hash-table)) ;; target => runconfig ;; This is a cache of pre-reqs met, don't re-calc in cases where called with same params less than ;; five seconds ago (define *pre-reqs-met-cache* (make-hash-table)) ;; cache of verbosity given string ;; -(define *verbosity-cache* (make-hash-table)) +(define *verbosity-cache* (make-hash-table)) (define (common:clear-caches) (set! *target* (make-hash-table)) (set! *keys* (make-hash-table)) (set! *keyvals* (make-hash-table)) @@ -221,19 +242,32 @@ ;; (define (common:rotate-logs) (if (not (directory-exists? "logs"))(create-directory "logs")) (directory-fold (lambda (file rem) - (if (and (string-match "^.*.log" file) - (> (file-size (conc "logs/" file)) 200000)) - (let ((gzfile (conc "logs/" file ".gz"))) - (if (file-exists? gzfile) - (begin - (debug:print-info 0 *default-log-port* "removing " gzfile) - (delete-file gzfile))) - (debug:print-info 0 *default-log-port* "compressing " file) - (system (conc "gzip logs/" file))))) + (handle-exceptions + exn + (debug:print-info 0 *default-log-port* "failed to rotate log " file ", probably handled by another process.") + (let* ((fullname (conc "logs/" file)) + (file-age (- (current-seconds)(file-modification-time fullname)))) + (if (or (and (string-match "^.*.log" file) + (> (file-size fullname) 200000)) + (and (string-match "^server-.*.log" file) + (> (- (current-seconds) (file-modification-time fullname)) + (* 8 60 60)))) + (let ((gzfile (conc fullname ".gz"))) + (if (file-exists? gzfile) + (begin + (debug:print-info 0 *default-log-port* "removing " gzfile) + (delete-file gzfile))) + (debug:print-info 0 *default-log-port* "compressing " file) + (system (conc "gzip " fullname))) + (if (> file-age (* (string->number (or (configf:lookup *configdat* "setup" "log-expire-days") "30")) 24 3600)) + (handle-exceptions + exn + #f + (delete-file fullname))))))) '() "logs")) ;; Force a megatest cleanup-db if version is changed and skip-version-check not specified ;; @@ -522,50 +556,65 @@ ;;====================================================================== ;; E X I T H A N D L I N G ;;====================================================================== (define (common:run-sync?) - (let ((ohh (common:on-homehost?)) - (srv (args:get-arg "-server"))) - ;; (debug:print-info 0 *default-log-port* "common:run-sync? ohh=" ohh ", srv=" srv) (and (common:on-homehost?) - (args:get-arg "-server")))) + (args:get-arg "-server"))) + +;; (let ((ohh (common:on-homehost?)) +;; (srv (args:get-arg "-server"))) +;; (and ohh srv))) + ;; (debug:print-info 0 *default-log-port* "common:run-sync? ohh=" ohh ", srv=" srv) ;;;; run-ids ;; if #f use *db-local-sync* : or 'local-sync-flags ;; if #t use timestamps : or 'timestamps (define (common:sync-to-megatest.db dbstruct) (let ((start-time (current-seconds)) (res (db:multi-db-sync dbstruct 'new2old))) (let ((sync-time (- (current-seconds) start-time))) - (debug:print-info 3 *default-log-port* "Sync of newdb to olddb completed in " sync-time " seconds") + (debug:print-info 3 *default-log-port* "Sync of newdb to olddb completed in " sync-time " seconds pid="(current-process-id)) (if (common:low-noise-print 30 "sync new to old") - (debug:print-info 0 *default-log-port* "Sync of newdb to olddb completed in " sync-time " seconds"))) + (debug:print-info 0 *default-log-port* "Sync of newdb to olddb completed in " sync-time " seconds pid="(current-process-id)))) res)) + + + +(define *wdnum* 0) +(define *wdnum*mutex (make-mutex)) ;; currently the primary job of the watchdog is to run the sync back to megatest.db from the db in /tmp ;; if we are on the homehost and we are a server (by definition we are on the homehost if we are a server) ;; (define (common:watchdog) (thread-sleep! 0.05) ;; delay for startup (let ((legacy-sync (common:run-sync?)) (debug-mode (debug:debug-mode 1)) - (last-time (current-seconds))) - (debug:print-info 0 *default-log-port* "watchdog starting. legacy-sync is " legacy-sync) - (if legacy-sync - (let ((dbstruct (db:setup))) + (last-time (current-seconds)) + (this-wd-num (begin (mutex-lock! *wdnum*mutex) (let ((x *wdnum*)) (set! *wdnum* (add1 *wdnum*)) (mutex-unlock! *wdnum*mutex) x)))) + (debug:print-info 0 *default-log-port* "watchdog starting. legacy-sync is " legacy-sync" pid="(current-process-id)" this-wd-num="this-wd-num) + (if (and legacy-sync (not *time-to-exit*)) + (let* ((dbstruct (db:setup)) + (mtdb (dbr:dbstruct-mtdb dbstruct)) + (mtpath (db:dbdat-get-path mtdb))) (debug:print-info 0 *default-log-port* "Server running, periodic sync started.") (let loop () ;; sync for filesystem local db writes ;; (mutex-lock! *db-multi-sync-mutex*) - (let* ((need-sync (>= *db-last-write* *db-last-sync*)) ;; no sync since last write + (let* ((need-sync (>= *db-last-access* *db-last-sync*)) ;; no sync since last write (sync-in-progress *db-sync-in-progress*) - (should-sync (> (- (current-seconds) *db-last-sync*) 5)) ;; sync every five seconds minimum + (should-sync (and (not *time-to-exit*) + (> (- (current-seconds) *db-last-sync*) 5))) ;; sync every five seconds minimum + (start-time (current-seconds)) + (mt-mod-time (file-modification-time mtpath)) + (recently-synced (> (- start-time mt-mod-time) 4)) (will-sync (and (or need-sync should-sync) - (not sync-in-progress))) - (start-time (current-seconds))) + (not sync-in-progress) + (not recently-synced)))) + ;; (if recently-synced (debug:print-info 0 *default-log-port* "Skipping sync due to recently-synced flag=" recently-synced)) ;; (debug:print-info 0 *default-log-port* "need-sync: " need-sync " sync-in-progress: " sync-in-progress " should-sync: " should-sync " will-sync: " will-sync) (if will-sync (set! *db-sync-in-progress* #t)) (mutex-unlock! *db-multi-sync-mutex*) (if will-sync (let ((res (common:sync-to-megatest.db dbstruct))) ;; did we sync any data? If so need to set the db touched flag to keep the server alive @@ -590,55 +639,71 @@ ;; keep going unless time to exit ;; (if (not *time-to-exit*) (let delay-loop ((count 0)) + ;;(BB> "delay-loop top; count="count" pid="(current-process-id)" this-wd-num="this-wd-num" *time-to-exit*="*time-to-exit*) + (if (and (not *time-to-exit*) (< count 4)) ;; was 11, changing to 4. (begin (thread-sleep! 1) (delay-loop (+ count 1)))) - (loop))) + (if (not *time-to-exit*) (loop)))) (if (common:low-noise-print 30) - (debug:print-info 0 *default-log-port* "Exiting watchdog timer, *time-to-exit* = " *time-to-exit*))))))) + (debug:print-info 0 *default-log-port* "Exiting watchdog timer, *time-to-exit* = " *time-to-exit*" pid="(current-process-id)" this-wd-num="this-wd-num))))))) (define (std-exit-procedure) + (on-exit (lambda () 0)) + ;;(BB> "std-exit-procedure called; *time-to-exit*="*time-to-exit*) (let ((no-hurry (if *time-to-exit* ;; hurry up #f (begin (set! *time-to-exit* #t) #t)))) (debug:print-info 4 *default-log-port* "starting exit process, finalizing databases.") (if (and no-hurry (debug:debug-mode 18)) (rmt:print-db-stats)) (let ((th1 (make-thread (lambda () ;; thread for cleaning up, give it five seconds - (if *dbstruct-db* (db:close-all *dbstruct-db*)) ;; one second allocated + (if *dbstruct-db* (db:close-all *dbstruct-db*)) ;; one second allocated (if *task-db* (let ((db (cdr *task-db*))) (if (sqlite3:database? db) (begin (sqlite3:interrupt! db) (sqlite3:finalize! db #t) ;; (vector-set! *task-db* 0 #f) (set! *task-db* #f))))) - (close-output-port *default-log-port*) + (if (and *runremote* + (remote-conndat *runremote*)) + (begin + (http-client#close-all-connections!))) ;; for http-client + (if (not (eq? *default-log-port* (current-error-port))) + (close-output-port *default-log-port*)) (set! *default-log-port* (current-error-port))) "Cleanup db exit thread")) (th2 (make-thread (lambda () (debug:print 4 *default-log-port* "Attempting clean exit. Please be patient and wait a few seconds...") (if no-hurry - (thread-sleep! 5) ;; give the clean up few seconds to do it's stuff - (thread-sleep! 2)) - (debug:print 4 *default-log-port* " ... done") - ) + (begin + (thread-sleep! 5)) ;; give the clean up few seconds to do it's stuff + (begin + (thread-sleep! 2))) + (debug:print 4 *default-log-port* " ... done") + ) "clean exit"))) (thread-start! th1) (thread-start! th2) - (thread-join! th1)))) + (thread-join! th1) + ) + ) + + 0) (define (std-signal-handler signum) ;; (signal-mask! signum) (set! *time-to-exit* #t) + ;;(BB> "got signal "signum) (debug:print-error 0 *default-log-port* "Received signal " signum " exiting promptly") ;; (std-exit-procedure) ;; shouldn't need this since we are exiting and it will be called anyway (exit)) (set-signal-handler! signal/int std-signal-handler) ;; ^C @@ -770,20 +835,24 @@ (define (common:args-get-status) (or (args:get-arg "-status")(args:get-arg ":status"))) (define (common:args-get-testpatt rconf) - (let* ((rtestpatt (if rconf (runconfigs-get rconf "TESTPATT") #f)) - (args-testpatt (or (args:get-arg "-testpatt") - (args:get-arg "-runtests") - "%")) - (testpatt (or (and (equal? args-testpatt "%") - rtestpatt) - args-testpatt))) - (if rtestpatt (debug:print-info 0 *default-log-port* "TESTPATT from runconfigs: " rtestpatt)) - testpatt)) - + (let* ((tagexpr (args:get-arg "-tagexpr")) + (tags-testpatt (if tagexpr (string-join (runs:get-tests-matching-tags tagexpr) ",") #f)) + (testpatt-key (if (args:get-arg "--modepatt") (args:get-arg "--modepatt") "TESTPATT")) + (args-testpatt (or (args:get-arg "-testpatt") (args:get-arg "-runtests") "%")) + (rtestpatt (if rconf (runconfigs-get rconf testpatt-key) #f))) + (cond + (tags-testpatt + (debug:print-info 0 *default-log-port* "-tagexpr "tagexpr" selects testpatt "tags-testpatt) + tags-testpatt) + ((and (equal? args-testpatt "%") rtestpatt) + (debug:print-info 0 *default-log-port* "testpatt defined in "testpatt-key" from runconfigs: " rtestpatt) + rtestpatt) + (else args-testpatt)))) + (define (common:get-linktree) (or (getenv "MT_LINKTREE") (if *configdat* (configf:lookup *configdat* "setup" "linktree")))) @@ -912,10 +981,20 @@ #f ;; better than an exception for my needs (fold (lambda (a b) (if (comp a b) a b)) (car lst) lst))) + +;; get min or max, use > for max and < for min, this works around the limits on apply +;; +(define (common:sum lst) + (if (null? lst) + 0 + (fold (lambda (a b) + (+ a b)) + (car lst) + lst))) ;; path list to hash-table tree ;; ((a b c)(a b d)(e b c)) => ((a (b (d) (c))) (e (b (c)))) ;; (define (common:list->htree lst) @@ -1024,10 +1103,24 @@ (define (common:lazy-modification-time fpath) (handle-exceptions exn 0 (file-modification-time fpath))) + +;; find timestamp of newest file associated with a sqlite db file +(define (common:lazy-sqlite-db-modification-time fpath) + (let* ((glob-list (handle-exceptions + exn + '("/no/such/file") + (glob (conc fpath "*")))) + (file-list (if (eq? 0 (length glob-list)) + '("/no/such/file") + glob-list))) + (apply max + (map + common:lazy-modification-time + file-list)))) ;; return a nice clean pathname made absolute (define (common:nice-path dir) (let ((match (string-match "^(~[^\\/]*)(\\/.*|)$" dir))) (if match ;; using ~ for home? @@ -1073,10 +1166,151 @@ (with-input-from-pipe (conc "ssh " remote-host " cat /proc/loadavg") (lambda ()(list (read)(read)(read))))) (with-input-from-file "/proc/loadavg" (lambda ()(list (read)(read)(read)))))) + +;; get normalized cpu load by reading from /proc/loadavg and /proc/cpuinfo return all three values and the number of real cpus and the number of threads +;; returns alist '((adj-cpu-load . normalized-proc-load) ... etc. +;; keys: adj-proc-load, adj-core-load, 1m-load, 5m-load, 15m-load +;; +(define (common:get-normalized-cpu-load remote-host) + (let ((data (if remote-host + (with-input-from-pipe + (conc "ssh " remote-host " cat /proc/loadavg;cat /proc/cpuinfo;echo end") + read-lines) + (append + (with-input-from-file "/proc/loadavg" + read-lines) + (with-input-from-file "/proc/cpuinfo" + read-lines) + (list "end")))) + (load-rx (regexp "^([\\d\\.]+)\\s+([\\d\\.]+)\\s+([\\d\\.]+)\\s+.*$")) + (proc-rx (regexp "^processor\\s+:\\s+(\\d+)\\s*$")) + (core-rx (regexp "^core id\\s+:\\s+(\\d+)\\s*$")) + (phys-rx (regexp "^physical id\\s+:\\s+(\\d+)\\s*$")) + (max-num (lambda (p n)(max (string->number p) n)))) + ;; (print "data=" data) + (if (null? data) ;; something went wrong + #f + (let loop ((hed (car data)) + (tal (cdr data)) + (loads #f) + (proc-num 0) ;; processor includes threads + (phys-num 0) ;; physical chip on motherboard + (core-num 0)) ;; core + ;; (print hed ", " loads ", " proc-num ", " phys-num ", " core-num) + (if (null? tal) ;; have all our data, calculate normalized load and return result + (let* ((act-proc (+ proc-num 1)) + (act-phys (+ phys-num 1)) + (act-core (+ core-num 1)) + (adj-proc-load (/ (car loads) act-proc)) + (adj-core-load (/ (car loads) act-core))) + (append (list (cons 'adj-proc-load adj-proc-load) + (cons 'adj-core-load adj-core-load)) + (list (cons '1m-load (car loads)) + (cons '5m-load (cadr loads)) + (cons '15m-load (caddr loads))) + (list (cons 'proc act-proc) + (cons 'core act-core) + (cons 'phys act-phys)))) + (regex-case + hed + (load-rx ( x l1 l5 l15 ) (loop (car tal)(cdr tal)(map string->number (list l1 l5 l15)) proc-num phys-num core-num)) + (proc-rx ( x p ) (loop (car tal)(cdr tal) loads (max-num p proc-num) phys-num core-num)) + (phys-rx ( x p ) (loop (car tal)(cdr tal) loads proc-num (max-num p phys-num) core-num)) + (core-rx ( x c ) (loop (car tal)(cdr tal) loads proc-num phys-num (max-num c core-num))) + (else + (begin + ;; (print "NO MATCH: " hed) + (loop (car tal)(cdr tal) loads proc-num phys-num core-num))))))))) + +(define (common:unix-ping hostname) + (let ((res (system (conc "ping -c 1 " hostname " > /dev/null")))) + (eq? res 0))) + +;; ideally put all this info into the db, no need to preserve it across moving homehost +;; +;; return list of +;; ( reachable? cpuload update-time ) +(define (common:get-host-info hostname) + (let* ((loadinfo (rmt:get-latest-host-load hostname)) + (load (car loadinfo)) + (load-sample-time (cdr loadinfo)) + (load-sample-age (- (current-seconds) load-sample-time)) + (loadinfo-timeout-seconds 20) + (host-last-update-timeout-seconds 10) + (host-rec (hash-table-ref/default *host-loads* hostname #f)) + ) + (cond + ((< load-sample-age loadinfo-timeout-seconds) + (list #t + load-sample-time + load)) + ((and host-rec + (< (current-seconds) (+ (host-last-update host-rec) host-last-update-timeout-seconds))) + (list #t + (host-last-update host-rec) + (host-last-cpuload host-rec ))) + ((common:unix-ping hostname) + (list #t + (current-seconds) + (alist-ref 'adj-core-load (common:get-normalized-cpu-load hostname)))) + (else + (list #f 0 -1))))) + +(define (common:update-host-loads-table hosts-raw) + (let* ((hosts (filter (lambda (x) + (string-match (regexp "^\\S+$") x)) + hosts-raw))) + (for-each + (lambda (hostname) + (let* ((rec (let ((h (hash-table-ref/default *host-loads* hostname #f))) + (if h + h + (let ((h (make-host))) + (hash-table-set! *host-loads* hostname h) + h)))) + (host-info (common:get-host-info hostname)) + (is-reachable (car host-info)) + (last-reached-time (cadr host-info)) + (load (caddr host-info))) + (host-reachable-set! rec is-reachable) + (host-last-update-set! rec last-reached-time) + (host-last-cpuload-set! rec load))) + hosts))) + +(define (common:get-least-loaded-host hosts-raw) + (let* ((hosts (filter (lambda (x) + (string-match (regexp "^\\S+$") x)) + hosts-raw)) + (best-host #f) + (best-load 99999) + (curr-time (current-seconds))) + (common:update-host-loads-table hosts) + (for-each + (lambda (hostname) + (let* ((rec + (let ((h (hash-table-ref/default *host-loads* hostname #f))) + (if h + h + (let ((h (make-host))) + (hash-table-set! *host-loads* hostname h) + h)))) + (reachable (host-reachable rec)) + (load (host-last-cpuload rec))) + (cond + ((not reachable) #f) + ((< (+ load (/ (random 250) 1000)) ;; add a random factor to keep from getting in a rut + (+ best-load (/ (random 250) 1000)) ) + (set! best-load load) + (set! best-host hostname))))) + hosts) + best-host)) + + + (define (common:wait-for-cpuload maxload numcpus waitdelay #!key (count 1000) (msg #f)(remote-host #f)) (let* ((loadavg (common:get-cpu-load remote-host)) (first (car loadavg)) (next (cadr loadavg)) @@ -1575,28 +1809,30 @@ ;;====================================================================== ;; T E S T L A U N C H I N G P E R I T E M W I T H H O S T T Y P E S ;;====================================================================== ;; -;; [host-types] -;; general ssh #{getbgesthost general} -;; nbgeneral nbjob run JOBCOMMAND -log $MT_LINKTREE/$MT_TARGET/$MT_RUNNAME.$MT_TESTNAME-$MT_ITEM_PATH.lgo +;; [hosts] +;; arm cubie01 cubie02 +;; x86_64 zeus xena myth01 +;; allhosts #{g hosts arm} #{g hosts x86_64} ;; -;; [hosts] -;; general cubian xena +;; [host-types] +;; general #MTLOWESTLOAD #{g hosts allhosts} +;; arm #MTLOWESTLOAD #{g hosts arm} +;; nbgeneral nbjob run JOBCOMMAND -log $MT_LINKTREE/$MT_TARGET/$MT_RUNNAME.$MT_TESTNAME-$MT_ITEM_PATH.lgo ;; ;; [launchers] ;; envsetup general ;; xor/%/n 4C16G ;; % nbgeneral ;; ;; [jobtools] -;; launcher bsub -;; # if defined and not "no" flexi-launcher will bypass launcher unless there is no -;; # match. +;; # if defined and not "no" flexi-launcher will bypass "launcher" unless no match. ;; flexi-launcher yes - +;; launcher nbfake +;; (define (common:get-launcher configdat testname itempath) (let ((fallback-launcher (configf:lookup configdat "jobtools" "launcher"))) (if (and (configf:lookup configdat "jobtools" "flexi-launcher") ;; overrides launcher (not (equal? (configf:lookup configdat "jobtools" "flexi-launcher") "no"))) (let* ((launchers (hash-table-ref/default configdat "launchers" '()))) @@ -1609,11 +1845,16 @@ (if (tests:match patt testname itempath) (begin (debug:print-info 2 *default-log-port* "Have flexi-launcher match for " testname "/" itempath " = " host-type) (let ((launcher (configf:lookup configdat "host-types" host-type))) (if launcher - launcher + (let* ((launcher-parts (string-split launcher)) + (launcher-exe (car launcher-parts))) + (if (equal? launcher-exe "#MTLOWESTLOAD") ;; this is our special case, we will find the lowest load and craft a nbfake commandline + (let ((targ-host (common:get-least-loaded-host (cdr launcher-parts)))) + (conc "remrun " targ-host)) + launcher)) (begin (debug:print-info 0 *default-log-port* "WARNING: no launcher found for host-type " host-type) (if (null? tal) fallback-launcher (loop (car tal)(cdr tal))))))) Index: common_records.scm ================================================================== --- common_records.scm +++ common_records.scm @@ -121,10 +121,11 @@ (db:log-event (apply conc params)) (apply print params) ))))) ;; Brandon's debug printer shortcut (indulge me :) +(define *BB-process-starttime* (current-milliseconds)) (define (BB> . in-args) (let* ((stack (get-call-chain)) (location #f)) (for-each (lambda (frame) @@ -131,12 +132,59 @@ (let* ((this-loc (vector-ref frame 0)) (this-func (cadr (string-split this-loc " ")))) (if (equal? this-func "BB>") (set! location this-loc)))) stack) - (let ((dp-args (append (list 0 *default-log-port* location" " ) in-args))) + (let ((dp-args (append (list 0 *default-log-port* (conc location "@"(/ (- (current-milliseconds) *BB-process-starttime*) 1000)" ") ) in-args))) (apply debug:print dp-args)))) + +(define *BBpp_custom_expanders_list* (make-hash-table)) + + + +;; register hash tables with BBpp. +(hash-table-set! *BBpp_custom_expanders_list* HASH_TABLE: + (cons hash-table? hash-table->alist)) + +;; test name converter +(define (BBpp_custom_converter arg) + (let ((res #f)) + (for-each + (lambda (custom-type-name) + (let* ((custom-type-info (hash-table-ref *BBpp_custom_expanders_list* custom-type-name)) + (custom-type-test (car custom-type-info)) + (custom-type-converter (cdr custom-type-info))) + (when (and (not res) (custom-type-test arg)) + (set! res (custom-type-converter arg))))) + (hash-table-keys *BBpp_custom_expanders_list*)) + (if res (BBpp_ res) arg))) + +(define (BBpp_ arg) + (cond + ;;((SOMESTRUCT? arg) (cons SOMESTRUCT: (SOMESTRUCT->alist arg))) + ;;((dboard:tabdat? arg) (cons dboard:tabdat: (dboard:tabdat->alist arg))) + ((hash-table? arg) + (let ((al (hash-table->alist arg))) + (BBpp_ (cons HASH_TABLE: al)))) + ((null? arg) '()) + ;;((list? arg) (cons (BBpp_ (car arg)) (BBpp_ (cdr arg)))) + ((pair? arg) (cons (BBpp_ (car arg)) (BBpp_ (cdr arg)))) + (else (BBpp_custom_converter arg)))) + +;; Brandon's pretty printer. It expands hashes and custom types in addition to regular pp +(define (BBpp arg) + (pp (BBpp_ arg))) + +;(use define-macro) +(define-syntax inspect + (syntax-rules () + [(_ x) + ;; (with-output-to-port (current-error-port) + (printf "~a is: ~a\n" 'x (with-output-to-string (lambda () (BBpp x)))) + ;; ) + ] + [(_ x y ...) (begin (inspect x) (inspect y ...))])) (define (debug:print-error n e . params) ;; normal print (if (debug:debug-mode n) (with-output-to-port (or e (current-error-port)) Index: configf.scm ================================================================== --- configf.scm +++ configf.scm @@ -57,10 +57,11 @@ ;;====================================================================== ;; Make the regexp's needed globally available ;;====================================================================== (define configf:include-rx (regexp "^\\[include\\s+(.*)\\]\\s*$")) +(define configf:script-rx (regexp "^\\[scriptinc\\s+(.*)\\]\\s*$")) ;; include output from a script (define configf:section-rx (regexp "^\\[(.*)\\]\\s*$")) (define configf:blank-l-rx (regexp "^\\s*$")) (define configf:key-sys-pr (regexp "^(\\S+)\\s+\\[system\\s+(\\S+.*)\\]\\s*$")) (define configf:key-val-pr (regexp "^(\\S+)(\\s+(.*)|())$")) (define configf:key-no-val (regexp "^(\\S+)(\\s*)$")) @@ -68,11 +69,11 @@ (define configf:cont-ln-rx (regexp "^(\\s+)(\\S+.*)$")) (define configf:settings (regexp "^\\[configf:settings\\s+(\\S+)\\s+(\\S+)]\\s*$")) ;; read a line and process any #{ ... } constructs -(define configf:var-expand-regex (regexp "^(.*)#\\{(scheme|system|shell|getenv|get|runconfigs-get|rget)\\s+([^\\}\\{]*)\\}(.*)")) +(define configf:var-expand-regex (regexp "^(.*)#\\{(scheme|system|shell|getenv|get|runconfigs-get|rget|scm|sh|rp|gv|g|mtrah)\\s+([^\\}\\{]*)\\}(.*)")) (define (configf:process-line l ht allow-system #!key (linenum #f)) (let loop ((res l)) (if (string? res) (let ((matchdat (string-search configf:var-expand-regex res))) @@ -83,36 +84,42 @@ (poststr (list-ref matchdat 4)) (result #f) (start-time (current-seconds)) (cmdsym (string->symbol cmdtype)) (fullcmd (case cmdsym - ((scheme)(conc "(lambda (ht)" cmd ")")) - ((system)(conc "(lambda (ht)(system \"" cmd "\"))")) - ((shell) (conc "(lambda (ht)(shell \"" cmd "\"))")) - ((getenv)(conc "(lambda (ht)(get-environment-variable \"" cmd "\"))")) - ((get) + ((scheme scm) (conc "(lambda (ht)" cmd ")")) + ((system) (conc "(lambda (ht)(system \"" cmd "\"))")) + ((shell sh) (conc "(lambda (ht)(string-translate (shell \"" cmd "\") \"\n\" \" \"))")) + ((realpath rp)(conc "(lambda (ht)(common:nice-path \"" cmd "\"))")) + ((getenv gv) (conc "(lambda (ht)(get-environment-variable \"" cmd "\"))")) + ((mtrah) (conc "(lambda (ht)" + " (let ((extra \"" cmd "\"))" + " (conc (or *toppath* (get-environment-variable \"MT_RUN_AREA_HOME\"))" + " (if (string-null? extra) \"\" \"/\")" + " extra)))")) + ((get g) (let* ((parts (string-split cmd)) (sect (car parts)) (var (cadr parts))) (conc "(lambda (ht)(config-lookup ht \"" sect "\" \"" var "\"))"))) - ((runconfigs-get) (conc "(lambda (ht)(runconfigs-get ht \"" cmd "\"))")) - ((rget) (conc "(lambda (ht)(runconfigs-get ht \"" cmd "\"))")) + ((runconfigs-get rget) (conc "(lambda (ht)(runconfigs-get ht \"" cmd "\"))")) + ;; ((rget) (conc "(lambda (ht)(runconfigs-get ht \"" cmd "\"))")) (else "(lambda (ht)(print \"ERROR\") \"ERROR\")")))) ;; (print "fullcmd=" fullcmd) (handle-exceptions exn (begin (debug:print 0 *default-log-port* "WARNING: failed to process config input \"" l "\"") (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) ;; (print "exn=" (condition->list exn)) - (set! result (conc "#{( " cmdtype ") " cmd"}"))) + (set! result (conc "#{( " cmdtype ") " cmd "}, full expansion: " fullcmd))) (if (or allow-system - (not (member cmdtype '("system" "shell")))) + (not (member cmdtype '("system" "shell" "sh")))) (with-input-from-string fullcmd (lambda () (set! result ((eval (read)) ht)))) - (set! result (conc "#{(" cmdtype ") " cmd "}")))) + (set! result (conc "#{(" cmdtype ") " cmd "}")))) (case cmdsym ((system shell scheme) (let ((delta (- (current-seconds) start-time))) (if (> delta 2) (debug:print-info 0 *default-log-port* "for line \"" l "\"\n command: " cmd " took " delta " seconds to run with output:\n " result) @@ -182,16 +189,19 @@ ;; post-section-procs alist of section-pattern => proc, where: (proc section-name next-section-name ht curr-path) ;; (define (read-config path ht allow-system #!key (environ-patt #f)(curr-section #f)(sections #f)(settings (make-hash-table))(keep-filenames #f)(post-section-procs '())) (debug:print-info 5 *default-log-port* "read-config " path " allow-system " allow-system " environ-patt " environ-patt " curr-section: " curr-section " sections: " sections " pwd: " (current-directory)) (debug:print 9 *default-log-port* "START: " path) - (if (not (file-exists? path)) + (if (and (not (port? path)) + (not (file-exists? path))) ;; for case where we are handed a port (begin (debug:print-info 1 *default-log-port* "read-config - file not found " path " current path: " (current-directory)) ;; WARNING: This is a risky change but really, we should not return an empty hash table if no file read? #f) ;; (if (not ht)(make-hash-table) ht)) - (let ((inp (open-input-file path)) + (let ((inp (if (string? path) + (open-input-file path) + path)) ;; we can be handed a port (res (if (not ht)(make-hash-table) ht)) (metapath (if (or (debug:debug-mode 9) keep-filenames) path #f))) (let loop ((inl (configf:read-line inp res (calc-allow-system allow-system curr-section sections) settings)) ;; (read-line inp)) @@ -199,11 +209,12 @@ (var-flag #f);; turn on for key-var-pr and cont-ln-rx, turn off elsewhere (lead #f)) (debug:print-info 8 *default-log-port* "curr-section-name: " curr-section-name " var-flag: " var-flag "\n inl: \"" inl "\"") (if (eof-object? inl) (begin - (close-input-port inp) + (if (string? path) ;; we received a path, not a port, thus we are responsible for closing it. + (close-input-port inp)) (hash-table-delete! res "") ;; we are using "" as a dumping ground and must remove it before returning the ht (debug:print 9 *default-log-port* "END: " path) res) (regex-case inl @@ -229,10 +240,26 @@ (loop (configf:read-line inp res (calc-allow-system allow-system curr-section-name sections) settings) curr-section-name #f #f)) (begin (debug:print '(2 9) #f "INFO: include file " include-file " not found (called from " path ")") (debug:print 2 *default-log-port* " " full-conf) (loop (configf:read-line inp res (calc-allow-system allow-system curr-section-name sections) settings) curr-section-name #f #f))))) + (configf:script-rx ( x include-script );; handle-exceptions + ;; exn + ;; (begin + ;; (debug:print '(0 2 9) #f "INFO: include from script " include-script " failed.") + ;; (loop (configf:read-line inp res (calc-allow-system allow-system curr-section-name sections) settings) curr-section-name #f #f)) + (if (and (file-exists? include-script)(file-execute-access? include-script)) + (let* ((new-inp-port (open-input-pipe include-script))) + (debug:print '(2 9) *default-log-port* "Including from script output: " include-script) + ;; (print "We got here, calling read-config next. Port is: " new-inp-port) + (read-config new-inp-port res allow-system environ-patt: environ-patt curr-section: curr-section-name sections: sections settings: settings keep-filenames: keep-filenames) + (close-input-port new-inp-port) + (loop (configf:read-line inp res (calc-allow-system allow-system curr-section-name sections) settings) curr-section-name #f #f)) + (begin + (debug:print 0 *default-log-port* "Script not found or not exectutable: " include-script) + (loop (configf:read-line inp res (calc-allow-system allow-system curr-section-name sections) settings) curr-section-name #f #f))) + ) ;; ) (configf:section-rx ( x section-name ) (begin ;; call post-section-procs (for-each (lambda (dat) (let ((patt (car dat)) Index: dashboard-tests.scm ================================================================== --- dashboard-tests.scm +++ dashboard-tests.scm @@ -271,13 +271,14 @@ (iup:frame #:title "Set fields" (iup:vbox (iup:hbox (iup:label "Comment:") (let ((txtbox (iup:textbox #:action (lambda (val a b) - (rmt:test-set-state-status-by-id run-id test-id #f #f b) + ;; (rmt:test-set-state-status-by-id run-id test-id #f #f b) + (rmt:test-set-state-status run-id test-id #f #f b) ;; IDEA: Just set a variable with the proc to call? - (rmt:test-set-state-status-by-id run-id test-id #f #f b) + ;; (rmt:test-set-state-status-by-id run-id test-id #f #f b) (set! newcomment b)) #:value (db:test-get-comment testdat) #:expand "HORIZONTAL"))) (set! wtxtbox txtbox) txtbox)) @@ -287,11 +288,11 @@ (let* ((btns (map (lambda (state) (let ((btn (iup:button state #:expand "HORIZONTAL" #:size "50x" #:font "Courier New, -10" #:action (lambda (x) ;; (rmt:test-set-state-status-by-id run-id test-id state #f #f) - (rmt:roll-up-pass-fail-counts run-id test-id #f state #f #f) ;; test-name passed in as test-id is respected + (rmt:set-state-status-and-roll-up-items run-id test-id #f state #f #f) ;; test-name passed in as test-id is respected (db:test-set-state! testdat state))))) btn)) (map cadr *common:std-states*)))) ;; (list "COMPLETED" "NOT_STARTED" "RUNNING" "REMOTEHOSTSTART" "LAUNCHED" "KILLED" "KILLREQ")))) (vector-set! *state-status* 0 (lambda (state color) @@ -321,11 +322,11 @@ (if (not *dashboard-comment-share-slot*) (set! *dashboard-comment-share-slot* wtxtbox))) )))) (begin ;; (rmt:test-set-state-status-by-id run-id test-id #f status #f) - (rmt:roll-up-pass-fail-counts run-id test-id #f #f status #f) ;; test-name passed in as test-id is respected + (rmt:set-state-status-and-roll-up-items run-id test-id #f #f status #f) ;; test-name passed in as test-id is respected (db:test-set-status! testdat status)))))))) btn)) (map cadr *common:std-statuses*)))) ;; (list "PASS" "WARN" "FAIL" "CHECK" "n/a" "WAIVED" "SKIP")))) (vector-set! *state-status* 1 (lambda (status color) @@ -402,11 +403,12 @@ (let ((comment (iup:attribute comnt "VALUE")) (test-id (db:test-get-id testdat))) (if (or (not wpatt) (string-match wregx comment)) (begin - (rmt:test-set-state-status-by-id run-id test-id #f "WAIVED" comment) + ;; (rmt:test-set-state-status-by-id run-id test-id #f "WAIVED" comment) + (rmt:test-set-state-status-by run-id test-id #f "WAIVED" comment) (db:test-set-status! testdat "WAIVED") (cmtcmd comment) (iup:destroy! dlog)))))) (iup:button "Cancel" #:expand "HORIZONTAL" @@ -473,11 +475,11 @@ (testconfig (begin ;; (runs:set-megatest-env-vars run-id inrunname: runname testname: test-name itempath: item-path) (runs:set-megatest-env-vars run-id inkeyvals: keydat inrunname: runname intarget: keystring testname: testname itempath: item-path) ;; these may be needed by the launching process (handle-exceptions exn - (tests:get-testconfig (db:test-get-testname testdat) test-registry #f) + (tests:get-testconfig (db:test-get-testname testdat) (db:test-get-item-path testdat) test-registry #f) (tests:get-testconfig (db:test-get-testname testdat) test-registry #t)))) (viewlog (lambda (x) (if (file-exists? logfile) ;(system (conc "firefox " logfile "&")) (dashboard-tests:run-html-viewer logfile) Index: dashboard.scm ================================================================== --- dashboard.scm +++ dashboard.scm @@ -99,10 +99,14 @@ (if (args:get-arg "-h") (begin (print help) (exit))) +(if (not (common:on-homehost?)) + (begin + (debug:print 0 *default-log-port* "ERROR: Current policy requires running dashboard on homehost: " (common:get-homehost)))) + ;; TODO: Move this inside (main) ;; (if (not (launch:setup)) (begin (print "Failed to find megatest.config, exiting") @@ -290,10 +294,21 @@ ;; runs summary view tests-tree ;; used in newdashboard ) +;; register tabdat with BBpp +;; this is used by BBpp (Brandon's pretty printer) to convert dboard:tabdat into a composition of lists that pp will handle +(hash-table-set! *BBpp_custom_expanders_list* TABDAT: + (cons dboard:tabdat? + (lambda (tabdat-item) + (filter + (lambda (alist-entry) + (member (car alist-entry) + '(allruns-by-id allruns))) ;; FIELDS OF INTEREST + (dboard:tabdat->alist tabdat-item))))) + (define (dboard:tabdat-target-string vec) (let ((targ (dboard:tabdat-target vec))) (if (list? targ)(string-intersperse targ "/") "no-target-specified"))) (define (dboard:tabdat-test-patts-use vec) @@ -355,15 +370,29 @@ rowsused ;; hash of lists covering what areas used - replace with quadtree hierdat ;; put hierarchial sorted list here tests ;; hash of id => testdat ((tests-by-name (make-hash-table)) : hash-table) ;; hash of testfullname => testdat key-vals - ((last-update 0) : fixnum) ;; last query to db got records from before last-update - ((data-changed #f) : boolean) - ((run-data-offset 0) : number) ;; get only 100 items per call, set back to zero when received less that 100 items - (db-path #f) - ) + ((last-update 0) : number) ;; last query to db got records from before last-update + ((last-db-time 0) : number) ;; last timestamp on megatest.db + ((data-changed #f) : boolean) + ((run-data-offset 0) : number) ;; get only 100 items per call, set back to zero when received less than 100 items + (db-path #f)) + +;; register dboard:rundat with BBpp +;; this is used by BBpp (Brandon's pretty printer) to convert dboard:rundat into a composition of lists that pp will handle +(hash-table-set! *BBpp_custom_expanders_list* RUNDAT: + (cons dboard:rundat? + (lambda (tabdat-item) + (filter + (lambda (alist-entry) + (member (car alist-entry) + '(run run-data-offset ))) ;; FIELDS OF INTEREST + (dboard:rundat->alist tabdat-item))))) + + + (define (dboard:rundat-make-init #!key (run #f)(key-vals #f)(tests #f));; -100 is before time began (make-dboard:rundat run: run tests: (or tests (make-hash-table)) @@ -490,78 +519,87 @@ ;; gets all the tests for run-id that match testnamepatt and key-vals, merges them ;; ;; NOTE: Yes, this is used ;; (define (dboard:get-tests-for-run-duplicate tabdat run-id run testnamepatt key-vals) - (let* ((access-mode (dboard:tabdat-access-mode tabdat)) - (num-to-get - (let ((num-tests-from-config (configf:lookup *configdat* "setup" "num-tests-to-get"))) - (if num-tests-from-config - (begin - (BB> "override num-tests 100 -> "num-tests-from-config) - (string->number num-tests-from-config)) - 100))) - (states (hash-table-keys (dboard:tabdat-state-ignore-hash tabdat))) - (statuses (hash-table-keys (dboard:tabdat-status-ignore-hash tabdat))) - (do-not-use-db-file-timestamps #t) ;; (configf:lookup *configdat* "setup" "do-not-use-db-file-timestamps")) ;; this still hosts runs-summary-tab - (do-not-use-query-timestamps #t) ;; (configf:lookup *configdat* "setup" "do-not-use-query-timestamps")) ;; this no longer troubles runs-summary-tab - (sort-info (get-curr-sort)) - (sort-by (vector-ref sort-info 1)) - (sort-order (vector-ref sort-info 2)) - (bubble-type (if (member sort-order '(testname)) - 'testname - 'itempath)) + (let* ((start-time (current-seconds)) + (access-mode (dboard:tabdat-access-mode tabdat)) + (num-to-get (string->number (or (configf:lookup *configdat* "setup" "num-tests-to-get") + "200"))) + (states (hash-table-keys (dboard:tabdat-state-ignore-hash tabdat))) + (statuses (hash-table-keys (dboard:tabdat-status-ignore-hash tabdat))) + (do-not-use-db-file-timestamps #f) ;; (configf:lookup *configdat* "setup" "do-not-use-db-file-timestamps")) ;; this still hosts runs-summary-tab + (do-not-use-query-timestamps #f) ;; (configf:lookup *configdat* "setup" "do-not-use-query-timestamps")) ;; this no longer troubles runs-summary-tab + (sort-info (get-curr-sort)) + (sort-by (vector-ref sort-info 1)) + (sort-order (vector-ref sort-info 2)) + (bubble-type (if (member sort-order '(testname)) + 'testname + 'itempath)) ;; note: the rundat is normally created in "update-rundat". - (run-dat (or (hash-table-ref/default (dboard:tabdat-allruns-by-id tabdat) run-id #f) - (let ((rd (dboard:rundat-make-init run: run key-vals: key-vals))) - (hash-table-set! (dboard:tabdat-allruns-by-id tabdat) run-id rd) - rd))) + (run-dat (or (hash-table-ref/default (dboard:tabdat-allruns-by-id tabdat) run-id #f) + (let ((rd (dboard:rundat-make-init run: run key-vals: key-vals))) + (hash-table-set! (dboard:tabdat-allruns-by-id tabdat) run-id rd) + rd))) ;; (prev-tests (dboard:rundat-tests prev-dat)) ;; (vector-ref prev-dat 1)) - (last-update - (if do-not-use-query-timestamps - 0 - (dboard:rundat-last-update run-dat) - ;;(hash-table-ref/default (dboard:tabdat-run-update-times tabdat) run-id 0) - )) - - (db-path (or (dboard:rundat-db-path run-dat) - (let* ((db-dir (tasks:get-task-db-path)) - (db-pth (conc db-dir "/" run-id ".db"))) - (dboard:rundat-db-path-set! run-dat db-pth) - db-pth))) - (tmptests (if (or do-not-use-db-file-timestamps - (>= (common:lazy-modification-time db-path) last-update)) - (db:dispatch-query access-mode rmt:get-tests-for-run db:get-tests-for-run - run-id testnamepatt states statuses ;; run-id testpatt states statuses - (dboard:rundat-run-data-offset run-dat) - num-to-get - (dboard:tabdat-hide-not-hide tabdat) ;; no-in - sort-by ;; sort-by - sort-order ;; sort-order - #f ;; 'shortlist ;; qrytype - (if (dboard:tabdat-filters-changed tabdat) - 0 - last-update) ;; last-update - *dashboard-mode*) ;; use dashboard mode - '())) + (last-update (if (or do-not-use-query-timestamps + (dboard:tabdat-filters-changed tabdat)) + 0 + (dboard:rundat-last-update run-dat))) + (last-db-time (if do-not-use-db-file-timestamps + 0 + (dboard:rundat-last-db-time run-dat))) + (db-path (or (dboard:rundat-db-path run-dat) + (let* ((db-dir (common:get-db-tmp-area)) + (db-pth (conc db-dir "/megatest.db"))) + (dboard:rundat-db-path-set! run-dat db-pth) + db-pth))) + (db-mod-time (common:lazy-sqlite-db-modification-time db-path)) + (db-modified (>= db-mod-time last-db-time)) + (multi-get (> (dboard:rundat-run-data-offset run-dat) 0)) ;; multi-get in progress + (tmptests (if (or do-not-use-db-file-timestamps + (dboard:tabdat-filters-changed tabdat) + db-modified) + (db:dispatch-query access-mode rmt:get-tests-for-run db:get-tests-for-run + run-id testnamepatt states statuses ;; run-id testpatt states statuses + (dboard:rundat-run-data-offset run-dat) ;; query offset + num-to-get + (dboard:tabdat-hide-not-hide tabdat) ;; no-in + sort-by ;; sort-by + sort-order ;; sort-order + #f ;; 'shortlist ;; qrytype + last-update ;; last-update + *dashboard-mode*) ;; use dashboard mode + '())) (use-new (dboard:tabdat-hide-not-hide tabdat)) (tests-ht (if (dboard:tabdat-filters-changed tabdat) (let ((ht (make-hash-table))) (dboard:rundat-tests-set! run-dat ht) ht) - (dboard:rundat-tests run-dat)))) - ;;(start-time (current-seconds))) + (dboard:rundat-tests run-dat))) + (got-all (< (length tmptests) num-to-get)) ;; got all for this round + ) + + ;; if we saw the db modified, reset it (the signal has already been used) + (if (and got-all ;; (not multi-get) + db-modified) + (dboard:rundat-last-db-time-set! run-dat (- start-time 2))) ;; to limit the amount of data transferred each cycle use limit of num-to-get and offset - (dboard:rundat-run-data-offset-set! - run-dat - (if (< (length tmptests) num-to-get) - 0 - (let ((newval (+ num-to-get (dboard:rundat-run-data-offset run-dat)))) - ;; (print "Incremental get, offset=" (dboard:rundat-run-data-offset run-dat) " retrieved: " (length tmptests) " newval: " newval) - newval))) - + ;; DO NOT bump time indexes last-update and last-db-time until all the first pass of the + ;; data has been read + ;; set last-update to 0 if still getting data incrementally ;; NO NEED, handled above + ;; + ;; (debug:print 0 *default-log-port* "got-all: " got-all " multi-get: " multi-get " num-to-get: " num-to-get " (length tmptests): " (length tmptests) " db-modified: " db-modified " db-mod-time: " db-mod-time " db-path: " db-path) + (if got-all + (begin + (dboard:rundat-last-update-set! run-dat (- start-time 2)) + (dboard:rundat-run-data-offset-set! run-dat 0)) + (begin + (dboard:rundat-run-data-offset-set! run-dat + (+ num-to-get (dboard:rundat-run-data-offset run-dat))))) + (for-each (lambda (tdat) (let ((test-id (db:test-get-id tdat)) (state (db:test-get-state tdat))) (dboard:rundat-data-changed-set! run-dat #t) @@ -568,22 +606,10 @@ (if (equal? state "DELETED") (hash-table-delete! tests-ht test-id) (hash-table-set! tests-ht test-id tdat)))) tmptests) - ;; set last-update to 0 if still getting data incrementally - - (if (> (dboard:rundat-run-data-offset run-dat) 0) - (begin - ;; (print "run-data-offset: " (dboard:rundat-run-data-offset run-dat) ", setting last-update to 0") - ;; (dboard:rundat-last-update-set! run-dat 0) - (dboard:rundat-last-update-set! run-dat 0)) - ;; (hash-table-set! (dboard:tabdat-run-update-times tabdat) run-id (- start-time 3)) - - (dboard:rundat-last-update-set! run-dat (- (current-seconds) 2))) ;; go back two seconds in time to ensure all changes are captured. - - ;; (debug:print-info 0 *default-log-port* "tests-ht: " (hash-table-keys tests-ht)) tests-ht)) ;; tmptests - new tests data ;; prev-tests - old tests data ;; @@ -623,10 +649,12 @@ (for-each (lambda (run) (hash-table-set! ht (db:get-value-by-header run header "id") run)) runs-tree) ;; (vector-ref runs-dat 1)) ht)) (tb (dboard:tabdat-runs-tree tabdat))) + ;;(BB> "In update-rundat") + ;;(inspect allruns runs-hash) (dboard:tabdat-last-runs-update-set! tabdat (- (current-seconds) 2)) (dboard:tabdat-header-set! tabdat header) ;; ;; trim runs to only those that are changing often here ;; @@ -740,11 +768,17 @@ (run-struct (or run-struct (dboard:rundat-make-init run: run tests: tests-ht key-vals: key-vals))) - (new-res (if (null? all-test-ids) res (cons run-struct res))) + (new-res (if (null? all-test-ids) + res + (delete-duplicates + (cons run-struct res) + (lambda (a b) + (eq? (db:get-value-by-header (dboard:rundat-run a) header "id") + (db:get-value-by-header (dboard:rundat-run b) header "id")))))) (elapsed-time (- (current-seconds) start-time))) (if (null? all-test-ids) (hash-table-delete! (dboard:tabdat-allruns-by-id tabdat) run-id) (hash-table-set! (dboard:tabdat-allruns-by-id tabdat) run-id run-struct)) (if (or (null? tal) @@ -2659,11 +2693,11 @@ (get-environment-variable "DASHBOARDROWS") "15")))) (define *tim* (iup:timer)) (define *ord* #f) -(iup:attribute-set! *tim* "TIME" 300) +(iup:attribute-set! *tim* "TIME" 300 ) (iup:attribute-set! *tim* "RUN" "YES") (define *last-recalc-ended-time* 0) (define (dashboard:recalc modtime please-update-buttons last-db-update-time) @@ -2680,11 +2714,11 @@ (define (dashboard:get-youngest-run-db-mod-time dbdir) (handle-exceptions exn (begin - (debug:print 0 *default-log-port* "WARNING: error in accessing databases in get-youngest-run-db-mod-time: " ((condition-property-accessor 'exn 'message) exn)) + (debug:print 0 *default-log-port* "WARNING: error in accessing databases in get-youngest-run-db-mod-time: " ((condition-property-accessor 'exn 'message) exn) " db-dir="dbdir) (current-seconds)) ;; something went wrong - just print an error and return current-seconds (common:max (map (lambda (filen) (file-modification-time filen)) (glob (conc dbdir "/*.db*")))))) @@ -3391,10 +3425,13 @@ ;;(print "RA => calling runs-tab-updater with commondat " commondat " tab-num " tab-num) ;;(tabdat-values tabdat) ;;RA added ;; (pp (dboard:tabdat->alist tabdat)) ;; (if (dashboard:database-changed? commondat tabdat context-key: 'runs-rundat) (dashboard:do-update-rundat tabdat) + ;;(BB> "dashboard:runs-tab-updater") + ;;(inspect tabdat) + (let ((uidat (dboard:commondat-uidat commondat))) ;;(print "RA => Calling update-buttons with tabdat : " tabdat " uidat " uidat) (update-buttons tabdat uidat (dboard:tabdat-numruns tabdat) (dboard:tabdat-num-tests tabdat))) )) "dashboard:runs-tab-updater")) Index: db.scm ================================================================== --- db.scm +++ db.scm @@ -13,14 +13,14 @@ ;; Database access ;;====================================================================== ;; dbstruct vector containing all the relevant dbs like main.db, megatest.db, run.db etc -(require-extension (srfi 18) extras tcp) ;; RADT => use of require-extension? -(use sqlite3 srfi-1 posix regex regex-case srfi-69 csv-xml s11n md5 message-digest base64 format dot-locking z3 typed-records) +(use (srfi 18) extras tcp stack) +(use sqlite3 srfi-1 posix regex regex-case srfi-69 csv-xml s11n md5 message-digest base64 format dot-locking z3 typed-records matchable) (import (prefix sqlite3 sqlite3:)) -(import (prefix base64 base64:)) ;; RADT => prefix?? +(import (prefix base64 base64:)) (declare (unit db)) (declare (uses common)) (declare (uses keys)) (declare (uses ods)) @@ -42,11 +42,12 @@ ;; each db entry is a pair ( db . dbfilepath ) ;; I propose this record evolves into the area record ;; (defstruct dbr:dbstruct - (tmpdb #f) + ;; (tmpdb #f) + (dbstack #f) ;; stack for tmp db handles, do not initialize with a stack (mtdb #f) (refndb #f) (homehost #f) ;; not used yet (on-homehost #f) ;; not used yet ) ;; goal is to converge on one struct for an area but for now it is too confusing @@ -91,12 +92,17 @@ ;; if #f => get main db ;; if db already open - return inmem ;; if db not open, open inmem, rundb and sync then return inmem ;; inuse gets set automatically for rundb's ;; -(define (db:get-db dbstruct . blah) ;; run-id) - (or (dbr:dbstruct-tmpdb dbstruct) +(define (db:get-db dbstruct) ;; run-id) + (if (stack? (dbr:dbstruct-dbstack dbstruct)) + (if (stack-empty? (dbr:dbstruct-dbstack dbstruct)) + (let ((newdb (db:open-megatest-db path: (db:dbfile-path)))) + ;; (stack-push! (dbr:dbstruct-dbstack dbstruct) newdb) + newdb) + (stack-pop! (dbr:dbstruct-dbstack dbstruct))) (db:open-db dbstruct))) ;; ;; legacy handling of structure for managing db's. Refactor this into dbr:? (define (db:dbdat-get-db dbdat) (if (pair? dbdat) @@ -125,24 +131,35 @@ ;; (db:with-db dbstruct run-id sqlite3:exec "select blah fgrom blaz;") ;; r/w is a flag to indicate if the db is modified by this query #t = yes, #f = no ;; (define (db:with-db dbstruct run-id r/w proc . params) - (let* ((dbdat (if (dbr:dbstruct? dbstruct) - (db:get-db dbstruct run-id) - (begin - (print-call-chain) - (print "db:with-db called with dbdat instead of dbstruct, FIXME!!") - dbstruct))) ;; cheat, allow for passing in a dbdat - (db (db:dbdat-get-db dbdat))) + (let* ((have-struct (dbr:dbstruct? dbstruct)) + (dbdat (if have-struct + (db:get-db dbstruct) + #f)) + (db (if have-struct + (db:dbdat-get-db dbdat) + dbstruct)) + (use-mutex (> *api-process-request-count* 25))) + (if (and use-mutex + (common:low-noise-print 120 "over-50-parallel-api-requests")) + (debug:print-info 0 *default-log-port* *api-process-request-count* " parallel api requests being processed in process " (current-process-id) ", throttling access")) + (if (common:low-noise-print 600 (conc "parallel-api-requests" *max-api-process-requests*)) + (debug:print-info 2 *default-log-port* "Parallel api request count: " *api-process-request-count* " max parallel requests: " *max-api-process-requests*)) (handle-exceptions exn (begin + (print-call-chain (current-error-port)) (debug:print-error 0 *default-log-port* "sqlite3 issue in db:with-db, dbstruct=" dbstruct ", run-id=" run-id ", proc=" proc ", params=" params " error: " ((condition-property-accessor 'exn 'message) exn)) - (print-call-chain (current-error-port))) + ;; there is no recovering at this time. exit + (exit 50)) + (if use-mutex (mutex-lock! *db-with-db-mutex*)) (let ((res (apply proc db params))) + (if use-mutex (mutex-unlock! *db-with-db-mutex*)) ;; (if (vector? dbstruct)(db:done-with dbstruct run-id r/w)) + (if dbdat (stack-push! (dbr:dbstruct-dbstack dbstruct) dbdat)) res)))) ;;====================================================================== ;; K E E P F I L E D B I N dbstruct ;;====================================================================== @@ -171,33 +188,22 @@ ;; (was planned to be; zeroth db with name=main.db) ;; ;; If run-id is #f return to create and retrieve the path where the db will live. ;; (define (db:dbfile-path . junk) ;; run-id) - (let* ((dbdir (common:get-db-tmp-area))) ;; (db:get-dbdir)) -;; (fname (if run-id -;; (if (eq? run-id 0) "main.db" (conc run-id ".db")) -;; #f))) + (let* ((dbdir (common:get-db-tmp-area))) (handle-exceptions exn (begin (debug:print-error 0 *default-log-port* "Couldn't create path to " dbdir) (exit 1)) (if (not (directory? dbdir))(create-directory dbdir #t))) - dbdir)) ;; (if fname -;; (conc dbdir "/" fname) -;; dbdir))) - -;; Returns the database location as specified in config file -;; -;; (define db:get-dbdir common:get-db-tmp-area) -;; (or (configf:lookup *configdat* "setup" "dbdir") -;; (conc (configf:lookup *configdat* "setup" "linktree") "/.db"))) - + dbdir)) + (define (db:set-sync db) (let ((syncprag (configf:lookup *configdat* "setup" "sychronous"))) - (sqlite3:execute db (conc "PRAGMA synchronous = " (or syncprag 1) ";")))) + (sqlite3:execute db (conc "PRAGMA synchronous = " (or syncprag 0) ";")))) ;; open an sql database inside a file lock ;; returns: db existed-prior-to-opening ;; RA => Returns a db handler; sets the lock if opened in writable mode ;; @@ -211,11 +217,11 @@ (if file-write ;; dir-writable (let (;; (lock (obtain-dot-lock fname 1 5 10)) (db (sqlite3:open-database fname))) (sqlite3:set-busy-handler! db (make-busy-timeout 136000)) ;; (db:set-sync db) - (sqlite3:execute db "PRAGMA synchronous = NORMAL;") + (sqlite3:execute db "PRAGMA synchronous = 0;") (if (not file-exists) (begin (if (string-match "^/tmp/.*" fname) ;; this is a file in /tmp (sqlite3:execute db "PRAGMA journal_mode=WAL;") (print "Creating " fname " in NON-WAL mode.")) @@ -264,14 +270,13 @@ ;; db)) ;; This routine creates the db if not already present. It is only called if the db is not already opened ;; (define (db:open-db dbstruct #!key (areapath #f)) - (let ((tmpdb (dbr:dbstruct-tmpdb dbstruct))) ;; RA => Returns the first reference in dbstruct - (if tmpdb - tmpdb - ;; (mutex-lock! *rundb-mutex*) + (let ((tmpdb-stack (dbr:dbstruct-dbstack dbstruct))) ;; RA => Returns the first reference in dbstruct + (if (stack? tmpdb-stack) + (db:get-db tmpdb-stack) ;; get previously opened db (will create new db handle if all in the stack are already used (let* ((dbpath (db:dbfile-path)) ;; 0)) (dbexists (file-exists? dbpath)) (dbfexists (file-exists? (conc dbpath "/megatest.db"))) (tmpdb (db:open-megatest-db path: dbpath)) ;; lock-create-open dbpath db:initialize-main-db)) (mtdb (db:open-megatest-db)) @@ -278,11 +283,12 @@ (refndb (db:open-megatest-db path: dbpath name: "megatest_ref.db")) (write-access (file-write-access? dbpath))) (if (and dbexists (not write-access)) (set! *db-write-access* #f)) (dbr:dbstruct-mtdb-set! dbstruct mtdb) - (dbr:dbstruct-tmpdb-set! dbstruct tmpdb) ;; olddb is already a (cons db path) + (dbr:dbstruct-dbstack-set! dbstruct (make-stack)) + (stack-push! (dbr:dbstruct-dbstack dbstruct) tmpdb) ;; olddb is already a (cons db path) (dbr:dbstruct-refndb-set! dbstruct refndb) ;; (mutex-unlock! *rundb-mutex*) (if (and (not dbfexists) write-access) ;; *db-write-access*) ;; did not have a prior db and do have write access (begin @@ -324,11 +330,11 @@ (cons db dbpath))) ;; sync run to disk if touched ;; (define (db:sync-touched dbstruct run-id #!key (force-sync #f)) - (let ((tmpdb (dbr:dbstruct-tmpdb dbstruct)) + (let ((tmpdb (db:get-db dbstruct)) (mtdb (dbr:dbstruct-mtdb dbstruct)) (refndb (dbr:dbstruct-refndb dbstruct)) (start-t (current-seconds))) (debug:print-info 4 *default-log-port* "Syncing for run-id: " run-id) (mutex-lock! *db-multi-sync-mutex*) @@ -335,21 +341,24 @@ (let ((update_info (cons (if force-sync 0 *db-last-sync*) "last_update"))) (mutex-unlock! *db-multi-sync-mutex*) (db:sync-tables (db:sync-all-tables-list dbstruct) update_info tmpdb refndb mtdb)) (mutex-lock! *db-multi-sync-mutex*) (set! *db-last-sync* start-t) - (mutex-unlock! *db-multi-sync-mutex*))) + (set! *db-last-access* start-t) + (mutex-unlock! *db-multi-sync-mutex*) + (stack-push! (dbr:dbstruct-dbstack dbstruct) tmpdb))) ;; close all opened run-id dbs (define (db:close-all dbstruct) (if (dbr:dbstruct? dbstruct) (begin ;; (db:sync-touched dbstruct 0 force-sync: #t) ;; NO. Do not do this here. Instead we rely on a server to be started when there are writes, even if the server itself is not going to be used as a server. - (let ((tdb (db:dbdat-get-db (dbr:dbstruct-tmpdb dbstruct))) + (let ((tdbs (map db:dbdat-get-db + (stack->list (dbr:dbstruct-dbstack dbstruct)))) (mdb (db:dbdat-get-db (dbr:dbstruct-mtdb dbstruct))) (rdb (db:dbdat-get-db (dbr:dbstruct-refndb dbstruct)))) - (if tdb (sqlite3:finalize! tdb)) + (map sqlite3:finalize! tdbs) (if mdb (sqlite3:finalize! mdb)) (if rdb (sqlite3:finalize! rdb)))))) ;; (let ((locdbs (dbr:dbstruct-locdbs dbstruct))) ;; (if (hash-table? locdbs) @@ -625,11 +634,11 @@ ;; first pass implementation, just insert all changed rows (for-each (lambda (targdb) (let* ((db (db:dbdat-get-db targdb)) (stmth (sqlite3:prepare db full-ins))) - ;; (db:delay-if-busy targdb) ;; NO WAITING + (db:delay-if-busy targdb) ;; NO WAITING (for-each (lambda (fromdat-lst) (sqlite3:with-transaction db (lambda () @@ -739,11 +748,11 @@ ;; Add db direct ;; (define (db:dispatch-query access-mode rmt-cmd db-cmd . params) (if (eq? access-mode 'cached) - (print "not doing cached calls right now")) + (debug:print 2 *default-log-port* "not doing cached calls right now")) ;; (apply db:call-with-cached-db db-cmd params) (apply rmt-cmd params)) ;;) ;; return the target db handle so it can be used @@ -813,23 +822,24 @@ ;; (define (db:multi-db-sync dbstruct . options) (if (not (launch:setup)) (debug:print 0 *default-log-port* "ERROR: not able to setup up for megatest.") (let* ((mtdb (dbr:dbstruct-mtdb dbstruct)) - (tmpdb (dbr:dbstruct-tmpdb dbstruct)) + (tmpdb (db:get-db dbstruct)) (refndb (dbr:dbstruct-refndb dbstruct)) (allow-cleanup #t) ;; (if run-ids #f #t)) - (tdbdat (tasks:open-db)) - (servers (tasks:get-all-servers (db:delay-if-busy tdbdat))) + ;; (tdbdat (tasks:open-db)) + (servers (server:get-list *toppath*)) ;; (tasks:get-all-servers (db:delay-if-busy tdbdat))) (data-synced 0)) ;; count of changed records (I hope) ;; kill servers (if (member 'killservers options) (for-each (lambda (server) - (tasks:server-delete-record (db:delay-if-busy tdbdat) (vector-ref server 0) "dbmigration") - (tasks:kill-server (vector-ref server 2)(vector-ref server 1))) + (match-let (((mod-time host port start-time pid) server)) + (if (and host pid) + (tasks:kill-server host pid)))) servers)) ;; clear out junk records ;; (if (member 'dejunk options) @@ -942,10 +952,11 @@ ;; (delete-file fullname))))) ;; dead-runs)))) ;; ;; (db:close-all dbstruct) ;; (sqlite3:finalize! mdb) + (stack-push! (dbr:dbstruct-dbstack dbstruct) tmpdb) data-synced))) ;; keeping it around for debugging purposes only (define (open-run-close-no-exception-handling proc idb . params) (debug:print-info 11 *default-log-port* "open-run-close-no-exception-handling START given a db=" (if idb "yes " "no ") ", params=" params) @@ -1138,11 +1149,11 @@ fail_count INTEGER DEFAULT 0, pass_count INTEGER DEFAULT 0, archived INTEGER DEFAULT 0, -- 0=no, > 1=archive block id where test data can be found last_update INTEGER DEFAULT (strftime('%s','now')), CONSTRAINT testsconstraint UNIQUE (run_id, testname, item_path));") - (sqlite3:execute db "CREATE INDEX IF NOT EXISTS tests_index ON tests (run_id, testname, item_path);") + (sqlite3:execute db "CREATE INDEX IF NOT EXISTS tests_index ON tests (run_id, testname, item_path, uname);") (sqlite3:execute db "CREATE TRIGGER IF NOT EXISTS update_tests_trigger AFTER UPDATE ON tests FOR EACH ROW BEGIN UPDATE tests SET last_update=(strftime('%s','now')) WHERE id=old.id; @@ -1210,11 +1221,11 @@ ;; dneeded is minimum space needed, scan for existing archives that ;; are on disks with adequate space and already have this test/itempath ;; archived ;; (define (db:archive-get-allocations dbstruct testname itempath dneeded) - (let* ((dbdat (db:get-db dbstruct #f)) ;; archive tables are in main.db + (let* ((dbdat (db:get-db dbstruct)) ;; archive tables are in main.db (db (db:dbdat-get-db dbdat)) (res '()) (blocks '())) ;; a block is an archive chunck that can be added too if there is space (sqlite3:for-each-row (lambda (id archive-disk-id disk-path last-du last-du-time) @@ -1241,11 +1252,11 @@ ;; returns id of the record, register a disk allocated to archiving and record it's last known ;; available space ;; (define (db:archive-register-disk dbstruct bdisk-name bdisk-path df) - (let* ((dbdat (db:get-db dbstruct #f)) ;; archive tables are in main.db + (let* ((dbdat (db:get-db dbstruct)) ;; archive tables are in main.db (db (db:dbdat-get-db dbdat)) (res #f)) (sqlite3:for-each-row (lambda (id) (set! res id)) @@ -1255,25 +1266,27 @@ (if res ;; record exists, update df and return id (begin (sqlite3:execute db "UPDATE archive_disks SET last_df=?,last_df_time=(strftime('%s','now')) WHERE archive_area_name=? AND disk_path=?;" df bdisk-name bdisk-path) + (stack-push! (dbr:dbstruct-dbstack dbstruct) dbdat) res) (begin (sqlite3:execute db "INSERT OR REPLACE INTO archive_disks (archive_area_name,disk_path,last_df) VALUES (?,?,?);" bdisk-name bdisk-path df) + (stack-push! (dbr:dbstruct-dbstack dbstruct) dbdat) (db:archive-register-disk dbstruct bdisk-name bdisk-path df))))) ;; record an archive path created on a given archive disk (identified by it's bdisk-id) ;; if path starts with / then it is full, otherwise it is relative to the archive disk ;; preference is to store the relative path. ;; (define (db:archive-register-block-name dbstruct bdisk-id archive-path #!key (du #f)) - (let* ((dbdat (db:get-db dbstruct #f)) ;; archive tables are in main.db + (let* ((dbdat (db:get-db dbstruct)) ;; archive tables are in main.db (db (db:dbdat-get-db dbdat)) (res #f)) ;; first look to see if this path is already registered (sqlite3:for-each-row (lambda (id) @@ -1370,62 +1383,63 @@ ;;====================================================================== ;; M A I N T E N A N C E ;;====================================================================== (define (db:have-incompletes? dbstruct run-id ovr-deadtime) - (let* ((dbdat (db:get-db dbstruct run-id)) - (db (db:dbdat-get-db dbdat)) - (incompleted '()) + (let* ((incompleted '()) (oldlaunched '()) (toplevels '()) (deadtime-str (configf:lookup *configdat* "setup" "deadtime")) (deadtime (if (and deadtime-str (string->number deadtime-str)) (string->number deadtime-str) 7200))) ;; two hours - (if (number? ovr-deadtime)(set! deadtime ovr-deadtime)) - - ;; in RUNNING or REMOTEHOSTSTART for more than 10 minutes - ;; - ;; HOWEVER: this code in run:test seems to work fine - ;; (> (- (current-seconds)(+ (db:test-get-event_time testdat) - ;; (db:test-get-run_duration testdat))) - ;; 600) - ;; (db:delay-if-busy dbdat) - (sqlite3:for-each-row - (lambda (test-id run-dir uname testname item-path) - (if (and (equal? uname "n/a") - (equal? item-path "")) ;; this is a toplevel test - ;; what to do with toplevel? call rollup? - (begin - (set! toplevels (cons (list test-id run-dir uname testname item-path run-id) toplevels)) - (debug:print-info 0 *default-log-port* "Found old toplevel test in RUNNING state, test-id=" test-id)) - (set! incompleted (cons (list test-id run-dir uname testname item-path run-id) incompleted)))) - db - "SELECT id,rundir,uname,testname,item_path FROM tests WHERE run_id=? AND (strftime('%s','now') - event_time) > (run_duration + ?) AND state IN ('RUNNING','REMOTEHOSTSTART');" - run-id deadtime) - - ;; in LAUNCHED for more than one day. Could be long due to job queues TODO/BUG: Need override for this in config - ;; - ;; (db:delay-if-busy dbdat) - (sqlite3:for-each-row - (lambda (test-id run-dir uname testname item-path) - (if (and (equal? uname "n/a") - (equal? item-path "")) ;; this is a toplevel test - ;; what to do with toplevel? call rollup? - (set! toplevels (cons (list test-id run-dir uname testname item-path run-id) toplevels)) - (set! oldlaunched (cons (list test-id run-dir uname testname item-path run-id) oldlaunched)))) - db - "SELECT id,rundir,uname,testname,item_path FROM tests WHERE run_id=? AND (strftime('%s','now') - event_time) > 86400 AND state IN ('LAUNCHED');" - run-id) - - (debug:print-info 18 *default-log-port* "Found " (length oldlaunched) " old LAUNCHED items, " (length toplevels) " old LAUNCHED toplevel tests and " (length incompleted) " tests marked RUNNING but apparently dead.") - (if (and (null? incompleted) - (null? oldlaunched) - (null? toplevels)) - #f - #t))) + (db:with-db + dbstruct #f #f + (lambda (db) + (if (number? ovr-deadtime)(set! deadtime ovr-deadtime)) + + ;; in RUNNING or REMOTEHOSTSTART for more than 10 minutes + ;; + ;; HOWEVER: this code in run:test seems to work fine + ;; (> (- (current-seconds)(+ (db:test-get-event_time testdat) + ;; (db:test-get-run_duration testdat))) + ;; 600) + ;; (db:delay-if-busy dbdat) + (sqlite3:for-each-row + (lambda (test-id run-dir uname testname item-path) + (if (and (equal? uname "n/a") + (equal? item-path "")) ;; this is a toplevel test + ;; what to do with toplevel? call rollup? + (begin + (set! toplevels (cons (list test-id run-dir uname testname item-path run-id) toplevels)) + (debug:print-info 0 *default-log-port* "Found old toplevel test in RUNNING state, test-id=" test-id)) + (set! incompleted (cons (list test-id run-dir uname testname item-path run-id) incompleted)))) + db + "SELECT id,rundir,uname,testname,item_path FROM tests WHERE run_id=? AND (strftime('%s','now') - event_time) > (run_duration + ?) AND state IN ('RUNNING','REMOTEHOSTSTART');" + run-id deadtime) + + ;; in LAUNCHED for more than one day. Could be long due to job queues TODO/BUG: Need override for this in config + ;; + ;; (db:delay-if-busy dbdat) + (sqlite3:for-each-row + (lambda (test-id run-dir uname testname item-path) + (if (and (equal? uname "n/a") + (equal? item-path "")) ;; this is a toplevel test + ;; what to do with toplevel? call rollup? + (set! toplevels (cons (list test-id run-dir uname testname item-path run-id) toplevels)) + (set! oldlaunched (cons (list test-id run-dir uname testname item-path run-id) oldlaunched)))) + db + "SELECT id,rundir,uname,testname,item_path FROM tests WHERE run_id=? AND (strftime('%s','now') - event_time) > 86400 AND state IN ('LAUNCHED');" + run-id) + + (debug:print-info 18 *default-log-port* "Found " (length oldlaunched) " old LAUNCHED items, " (length toplevels) " old LAUNCHED toplevel tests and " (length incompleted) " tests marked RUNNING but apparently dead.") + (if (and (null? incompleted) + (null? oldlaunched) + (null? toplevels)) + #f + #t))))) ;; given a launch delay (minimum time from last launch) return amount of time to wait ;; ;; (define (db:launch-delay-left dbstruct run-id launch-delay) @@ -1434,95 +1448,103 @@ ;; (select testname,item_path,event_time+run_duration as ;; end_time,strftime('%s','now') as now from tests where state in ;; ('RUNNING','REMOTEHOSTSTART','LAUNCED')); (define (db:find-and-mark-incomplete dbstruct run-id ovr-deadtime) - (let* ((dbdat (db:get-db dbstruct run-id)) - (db (db:dbdat-get-db dbdat)) - (incompleted '()) + (let* ((incompleted '()) (oldlaunched '()) (toplevels '()) (deadtime-str (configf:lookup *configdat* "setup" "deadtime")) (deadtime (if (and deadtime-str (string->number deadtime-str)) (string->number deadtime-str) 7200))) ;; two hours - (if (number? ovr-deadtime)(set! deadtime ovr-deadtime)) - - ;; in RUNNING or REMOTEHOSTSTART for more than 10 minutes - ;; - ;; HOWEVER: this code in run:test seems to work fine - ;; (> (- (current-seconds)(+ (db:test-get-event_time testdat) - ;; (db:test-get-run_duration testdat))) - ;; 600) - ;; (db:delay-if-busy dbdat) - (sqlite3:for-each-row - (lambda (test-id run-dir uname testname item-path) - (if (and (equal? uname "n/a") - (equal? item-path "")) ;; this is a toplevel test - ;; what to do with toplevel? call rollup? - (begin - (set! toplevels (cons (list test-id run-dir uname testname item-path run-id) toplevels)) - (debug:print-info 0 *default-log-port* "Found old toplevel test in RUNNING state, test-id=" test-id)) - (set! incompleted (cons (list test-id run-dir uname testname item-path run-id) incompleted)))) - db - "SELECT id,rundir,uname,testname,item_path FROM tests WHERE run_id=? AND (strftime('%s','now') - event_time) > (run_duration + ?) AND state IN ('RUNNING','REMOTEHOSTSTART');" - run-id deadtime) - - ;; in LAUNCHED for more than one day. Could be long due to job queues TODO/BUG: Need override for this in config - ;; - ;; (db:delay-if-busy dbdat) - (sqlite3:for-each-row - (lambda (test-id run-dir uname testname item-path) - (if (and (equal? uname "n/a") - (equal? item-path "")) ;; this is a toplevel test - ;; what to do with toplevel? call rollup? - (set! toplevels (cons (list test-id run-dir uname testname item-path run-id) toplevels)) - (set! oldlaunched (cons (list test-id run-dir uname testname item-path run-id) oldlaunched)))) - db - "SELECT id,rundir,uname,testname,item_path FROM tests WHERE run_id=? AND (strftime('%s','now') - event_time) > 86400 AND state IN ('LAUNCHED');" - run-id) - - (debug:print-info 18 *default-log-port* "Found " (length oldlaunched) " old LAUNCHED items, " (length toplevels) " old LAUNCHED toplevel tests and " (length incompleted) " tests marked RUNNING but apparently dead.") - - ;; These are defunct tests, do not do all the overhead of set-state-status. Force them to INCOMPLETE. - ;; - ;; (db:delay-if-busy dbdat) - (let* (;; (min-incompleted (filter (lambda (x) - ;; (let* ((testpath (cadr x)) - ;; (tdatpath (conc testpath "/testdat.db")) - ;; (dbexists (file-exists? tdatpath))) - ;; (or (not dbexists) ;; if no file then something wrong - mark as incomplete - ;; (> (- (current-seconds)(file-modification-time tdatpath)) 600)))) ;; no change in 10 minutes to testdat.db - she's dead Jim - ;; incompleted)) - (min-incompleted-ids (map car incompleted)) ;; do 'em all - (all-ids (append min-incompleted-ids (map car oldlaunched)))) - (if (> (length all-ids) 0) - (begin - (debug:print 0 *default-log-port* "WARNING: Marking test(s); " (string-intersperse (map conc all-ids) ", ") " as INCOMPLETE") - (sqlite3:execute - db - (conc "UPDATE tests SET state='INCOMPLETE' WHERE id IN (" - (string-intersperse (map conc all-ids) ",") - ");"))))) - - ;; Now do rollups for the toplevel tests - ;; - ;; (db:delay-if-busy dbdat) - (for-each - (lambda (toptest) - (let ((test-name (list-ref toptest 3))) -;; (run-id (list-ref toptest 5))) - (db:top-test-set-per-pf-counts dbstruct run-id test-name))) - toplevels))) + (db:with-db + dbstruct #f #f + (lambda (db) + (if (number? ovr-deadtime)(set! deadtime ovr-deadtime)) + + ;; in RUNNING or REMOTEHOSTSTART for more than 10 minutes + ;; + ;; HOWEVER: this code in run:test seems to work fine + ;; (> (- (current-seconds)(+ (db:test-get-event_time testdat) + ;; (db:test-get-run_duration testdat))) + ;; 600) + ;; (db:delay-if-busy dbdat) + (sqlite3:for-each-row + (lambda (test-id run-dir uname testname item-path) + (if (and (equal? uname "n/a") + (equal? item-path "")) ;; this is a toplevel test + ;; what to do with toplevel? call rollup? + (begin + (set! toplevels (cons (list test-id run-dir uname testname item-path run-id) toplevels)) + (debug:print-info 0 *default-log-port* "Found old toplevel test in RUNNING state, test-id=" test-id)) + (set! incompleted (cons (list test-id run-dir uname testname item-path run-id) incompleted)))) + db + "SELECT id,rundir,uname,testname,item_path FROM tests WHERE run_id=? AND (strftime('%s','now') - event_time) > (run_duration + ?) AND state IN ('RUNNING','REMOTEHOSTSTART');" + run-id deadtime) + + ;; in LAUNCHED for more than one day. Could be long due to job queues TODO/BUG: Need override for this in config + ;; + ;; (db:delay-if-busy dbdat) + (sqlite3:for-each-row + (lambda (test-id run-dir uname testname item-path) + (if (and (equal? uname "n/a") + (equal? item-path "")) ;; this is a toplevel test + ;; what to do with toplevel? call rollup? + (set! toplevels (cons (list test-id run-dir uname testname item-path run-id) toplevels)) + (set! oldlaunched (cons (list test-id run-dir uname testname item-path run-id) oldlaunched)))) + db + "SELECT id,rundir,uname,testname,item_path FROM tests WHERE run_id=? AND (strftime('%s','now') - event_time) > 86400 AND state IN ('LAUNCHED');" + run-id) + + (debug:print-info 18 *default-log-port* "Found " (length oldlaunched) " old LAUNCHED items, " (length toplevels) " old LAUNCHED toplevel tests and " (length incompleted) " tests marked RUNNING but apparently dead.") + + ;; These are defunct tests, do not do all the overhead of set-state-status. Force them to INCOMPLETE. + ;; + ;; (db:delay-if-busy dbdat) + (let* (;; (min-incompleted (filter (lambda (x) + ;; (let* ((testpath (cadr x)) + ;; (tdatpath (conc testpath "/testdat.db")) + ;; (dbexists (file-exists? tdatpath))) + ;; (or (not dbexists) ;; if no file then something wrong - mark as incomplete + ;; (> (- (current-seconds)(file-modification-time tdatpath)) 600)))) ;; no change in 10 minutes to testdat.db - she's dead Jim + ;; incompleted)) + (min-incompleted-ids (map car incompleted)) ;; do 'em all + (all-ids (append min-incompleted-ids (map car oldlaunched)))) + (if (> (length all-ids) 0) + (begin + (debug:print 0 *default-log-port* "WARNING: Marking test(s); " (string-intersperse (map conc all-ids) ", ") " as INCOMPLETE") + (for-each + (lambda (test-id) + (db:test-set-state-status dbstruct run-id test-id "COMPLETE" "DEAD" "Test failed to complete")) + all-ids)))))))) + +;; ALL REPLACED BY THE BLOCK ABOVE +;; +;; (sqlite3:execute +;; db +;; (conc "UPDATE tests SET state='INCOMPLETE' WHERE run_id=? AND id IN (" +;; (string-intersperse (map conc all-ids) ",") +;; ");") +;; run-id)))) +;; +;; ;; Now do rollups for the toplevel tests +;; ;; +;; ;; (db:delay-if-busy dbdat) +;; (for-each +;; (lambda (toptest) +;; (let ((test-name (list-ref toptest 3))) +;; ;; (run-id (list-ref toptest 5))) +;; (db:top-test-set-per-pf-counts dbstruct run-id test-name))) +;; toplevels))) ;; BUG: Probably broken - does not explicitly use run-id in the query ;; (define (db:top-test-set-per-pf-counts dbstruct run-id test-name) - (db:general-call (db:get-db dbstruct run-id) 'top-test-set-per-pf-counts (list test-name test-name test-name test-name test-name test-name test-name test-name test-name test-name test-name test-name test-name test-name test-name test-name test-name))) - - + (db:general-call dbstruct 'top-test-set-per-pf-counts (list test-name test-name test-name test-name test-name test-name test-name test-name test-name test-name test-name test-name test-name test-name test-name test-name test-name))) + ;; Clean out old junk and vacuum the database ;; ;; Ultimately do something like this: ;; ;; 1. Look at test records either deleted or part of deleted run: @@ -1663,23 +1685,24 @@ ;; returns number if string->number is successful, string otherwise ;; also updates *global-delta* ;; (define (db:get-var dbstruct var) - (let* ((res #f) - (dbdat (db:get-db dbstruct #f)) - (db (db:dbdat-get-db dbdat))) - (sqlite3:for-each-row - (lambda (val) - (set! res val)) - db - "SELECT val FROM metadat WHERE var=?;" var) - ;; convert to number if can - (if (string? res) - (let ((valnum (string->number res))) - (if valnum (set! res valnum)))) - res)) + (let* ((res #f)) + (db:with-db + dbstruct #f #f + (lambda (db) + (sqlite3:for-each-row + (lambda (val) + (set! res val)) + db + "SELECT val FROM metadat WHERE var=?;" var) + ;; convert to number if can + (if (string? res) + (let ((valnum (string->number res))) + (if valnum (set! res valnum)))) + res)))) ;; This was part of db:get-var. It was used to estimate the load on ;; the database files. ;; ;; scale by 10, average with current value. @@ -1690,16 +1713,15 @@ ;; (begin ;; (debug:print-info 4 *default-log-port* "launch throttle factor=" *global-delta*) ;; (set! *last-global-delta-printed* *global-delta*))) (define (db:set-var dbstruct var val) - (let* ((dbdat (db:get-db dbstruct #f)) - (db (db:dbdat-get-db dbdat))) - (sqlite3:execute db "INSERT OR REPLACE INTO metadat (var,val) VALUES (?,?);" var val))) + (db:with-db dbstruct #f #t + (lambda (db) + (sqlite3:execute db "INSERT OR REPLACE INTO metadat (var,val) VALUES (?,?);" var val)))) (define (db:del-var dbstruct var) - ;; (db:delay-if-busy) (db:with-db dbstruct #f #t (lambda (db) (sqlite3:execute db "DELETE FROM metadat WHERE var=?;" var)))) ;; use a global for some primitive caching, it is just silly to @@ -1795,13 +1817,11 @@ ;; register a test run with the db, this accesses the main.db and does NOT ;; use server api ;; (define (db:register-run dbstruct keyvals runname state status user) - (let* ((dbdat (db:get-db dbstruct #f)) - (db (db:dbdat-get-db dbdat)) - (keys (map car keyvals)) + (let* ((keys (map car keyvals)) (keystr (keys->keystr keys)) (comma (if (> (length keys) 0) "," "")) (andstr (if (> (length keys) 0) " AND " "")) (valslots (keys->valslots keys)) ;; ?,?,? ... (allvals (append (list runname state status user) (map cadr keyvals))) @@ -1808,26 +1828,25 @@ (qryvals (append (list runname) (map cadr keyvals))) (key=?str (string-intersperse (map (lambda (k)(conc k "=?")) keys) " AND "))) (debug:print 3 *default-log-port* "keys: " keys " allvals: " allvals " keyvals: " keyvals " key=?str is " key=?str) (debug:print 2 *default-log-port* "NOTE: using target " (string-intersperse (map cadr keyvals) "/") " for this run") (if (and runname (null? (filter (lambda (x)(not x)) keyvals))) ;; there must be a better way to "apply and" - (let ((res #f)) - ;; (db:delay-if-busy dbdat) - (apply sqlite3:execute db (conc "INSERT OR IGNORE INTO runs (runname,state,status,owner,event_time" comma keystr ") VALUES (?,?,?,?,strftime('%s','now')" comma valslots ");") - allvals) - ;; (db:delay-if-busy dbdat) - (apply sqlite3:for-each-row - (lambda (id) - (set! res id)) - db - (let ((qry (conc "SELECT id FROM runs WHERE (runname=? " andstr key=?str ");"))) - ;(debug:print 4 *default-log-port* "qry: " qry) - qry) - qryvals) - ;; (db:delay-if-busy dbdat) - (sqlite3:execute db "UPDATE runs SET state=?,status=?,event_time=strftime('%s','now') WHERE id=? AND state='deleted';" state status res) - res) + (db:with-db + dbstruct #f #f + (lambda (db) + (let ((res #f)) + (apply sqlite3:execute db (conc "INSERT OR IGNORE INTO runs (runname,state,status,owner,event_time" comma keystr ") VALUES (?,?,?,?,strftime('%s','now')" comma valslots ");") + allvals) + (apply sqlite3:for-each-row + (lambda (id) + (set! res id)) + db + (let ((qry (conc "SELECT id FROM runs WHERE (runname=? " andstr key=?str ");"))) + qry) + qryvals) + (sqlite3:execute db "UPDATE runs SET state=?,status=?,event_time=strftime('%s','now') WHERE id=? AND state='deleted';" state status res) + res))) (begin (debug:print-error 0 *default-log-port* "Called without all necessary keys") #f)))) ;; replace header and keystr with a call to runs:get-std-run-fields @@ -1945,23 +1964,26 @@ (sqlite3:fold-row (lambda (res state status count) (cons (list state status count) res)) '() db - "SELECT state,status,count(id) AS count FROM tests WHERE run_id=? AND NOT(uname='n/a' AND item_path='') GROUP BY state,status;;" + "SELECT state,status,count(id) AS count FROM tests WHERE run_id=? AND NOT(uname='n/a' AND item_path='') GROUP BY state,status;" run-id)))) ;; Update run_stats for given run_id ;; input data is a list (state status count) ;; (define (db:update-run-stats dbstruct run-id stats) + ;; (mutex-lock! *db-transaction-mutex*) (db:with-db dbstruct #f #f + (lambda (db) ;; remove previous data + (let* ((stmt1 (sqlite3:prepare db "DELETE FROM run_stats WHERE run_id=? AND state=? AND status=?;")) (stmt2 (sqlite3:prepare db "INSERT INTO run_stats (run_id,state,status,count) VALUES (?,?,?,?);")) (res (sqlite3:with-transaction db @@ -1971,10 +1993,11 @@ (sqlite3:execute stmt1 run-id (car dat)(cadr dat)) (apply sqlite3:execute stmt2 run-id dat)) stats))))) (sqlite3:finalize! stmt1) (sqlite3:finalize! stmt2) + ;; (mutex-unlock! *db-transaction-mutex*) res)))) (define (db:get-main-run-stats dbstruct run-id) (db:with-db dbstruct @@ -1986,10 +2009,25 @@ (cons (list state status count) res)) '() db "SELECT state,status,count FROM run_stats WHERE run_id=? AND run_id IN (SELECT id FROM runs WHERE state NOT IN ('DELETED','deleted'));" run-id)))) + +(define (db:print-current-query-stats) + ;; generate stats from *db-api-call-time* + (let ((ordered-keys (sort (hash-table-keys *db-api-call-time*) + (lambda (a b) + (let ((sum-a (common:sum (hash-table-ref *db-api-call-time* a))) + (sum-b (common:sum (hash-table-ref *db-api-call-time* b)))) + (> sum-a sum-b)))))) + (for-each + (lambda (cmd-key) + (let* ((dat (hash-table-ref *db-api-call-time* cmd-key)) + (avg (if (> (length dat) 0) + (/ (common:sum dat)(length dat))))) + (debug:print-info 0 *default-log-port* cmd-key "\tavg: " avg " max: " (common:max dat) " min: " (common:min-max < dat) " num: " (length dat)))) + ordered-keys))) (define (db:get-all-run-ids dbstruct) (db:with-db dbstruct #f @@ -2002,27 +2040,30 @@ db "SELECT id FROM runs WHERE state != 'deleted' ORDER BY event_time DESC;") (reverse run-ids))))) ;; get some basic run stats +;; +;; data structure: ;; ;; ( (runname (( state count ) ... )) -;; ( ... +;; ( ... +;; (define (db:get-run-stats dbstruct) - (let* ((dbdat (db:get-db dbstruct #f)) - (db (db:dbdat-get-db dbdat)) - (totals (make-hash-table)) + (let* ((totals (make-hash-table)) (curr (make-hash-table)) (res '()) (runs-info '())) ;; First get all the runname/run-ids - ;; (db:delay-if-busy dbdat) - (sqlite3:for-each-row - (lambda (run-id runname) - (set! runs-info (cons (list run-id runname) runs-info))) - db - "SELECT id,runname FROM runs WHERE state != 'deleted' ORDER BY event_time DESC;") ;; If you change this to the more logical ASC please adjust calls to db:get-run-stats + (db:with-db + dbstruct #f #f + (lambda (db) + (sqlite3:for-each-row + (lambda (run-id runname) + (set! runs-info (cons (list run-id runname) runs-info))) + db + "SELECT id,runname FROM runs WHERE state != 'deleted' ORDER BY event_time DESC;"))) ;; If you change this to the more logical ASC please adjust calls to db:get-run-stats ;; for each run get stats data (for-each (lambda (run-info) ;; get the net state/status counts for this run (let* ((run-id (car run-info)) @@ -2038,11 +2079,12 @@ (if (string? netstate) (begin (hash-table-set! totals netstate (+ (hash-table-ref/default totals netstate 0) count)) (hash-table-set! curr netstate (+ (hash-table-ref/default curr netstate 0) count)))))) db - "SELECT state,status,count(id) FROM tests AS t GROUP BY state,status ORDER BY state,status DESC;") + "SELECT state,status,count(id) FROM tests AS t WHERE run_id=? GROUP BY state,status ORDER BY state,status DESC;" + run-id) ;; add the per run counts to res (for-each (lambda (state) (set! res (cons (list run-name state (hash-table-ref curr state)) res))) (sort (hash-table-keys curr) string>=)) (set! curr (make-hash-table)))))) @@ -2101,70 +2143,61 @@ ;; use (get-value-by-header (db:get-header runinfo)(db:get-rows runinfo)) (define (db:get-run-info dbstruct run-id) ;;(if (hash-table-ref/default *run-info-cache* run-id #f) ;; (hash-table-ref *run-info-cache* run-id) - (let* ((dbdat (db:get-db dbstruct #f)) - (db (db:dbdat-get-db dbdat)) - (res (vector #f #f #f #f)) + (let* ((res (vector #f #f #f #f)) (keys (db:get-keys dbstruct)) (remfields (list "id" "runname" "state" "status" "owner" "event_time")) (header (append keys remfields)) (keystr (conc (keys->keystr keys) "," (string-intersperse remfields ",")))) (debug:print-info 11 *default-log-port* "db:get-run-info run-id: " run-id " header: " header " keystr: " keystr) - ;; (db:delay-if-busy dbdat) - (sqlite3:for-each-row - (lambda (a . x) - (set! res (apply vector a x))) - db - (conc "SELECT " keystr " FROM runs WHERE id=? AND state != 'deleted';") - run-id) + (db:with-db + dbstruct #f #f + (lambda (db) + (sqlite3:for-each-row + (lambda (a . x) + (set! res (apply vector a x))) + db + (conc "SELECT " keystr " FROM runs WHERE id=? AND state != 'deleted';") + run-id))) (debug:print-info 11 *default-log-port* "db:get-run-info run-id: " run-id " header: " header " keystr: " keystr) (let ((finalres (vector header res))) ;; (hash-table-set! *run-info-cache* run-id finalres) finalres))) (define (db:set-comment-for-run dbstruct run-id comment) (db:with-db - dbstruct - #f - #f + dbstruct #f #f (lambda (db) (sqlite3:execute db "UPDATE runs SET comment=? WHERE id=?;" comment ;; (sdb:qry 'getid comment) run-id)))) ;; does not (obviously!) removed dependent data. But why not!!? (define (db:delete-run dbstruct run-id) - ;; First set any related tests to DELETED - (let* ((rdbdat (db:get-db dbstruct run-id)) - (rdb (db:dbdat-get-db rdbdat)) - (dbdat (db:get-db dbstruct #f)) - (db (db:dbdat-get-db dbdat))) - ;; (db:delay-if-busy rdbdat) - (sqlite3:with-transaction - db - (lambda () - (sqlite3:execute rdb "DELETE FROM test_steps WHERE test_id IN (SELECT id FROM tests WHERE run_id=?);" run-id) - (sqlite3:execute rdb "DELETE FROM test_data WHERE test_id IN (SELECT id FROM tests WHERE run_id=?);" run-id) - (sqlite3:execute rdb "UPDATE tests SET state='DELETED',comment='' WHERE run_id=?;" run-id) - ;; (db:delay-if-busy dbdat) - (sqlite3:execute db "UPDATE runs SET state='deleted',comment='' WHERE id=?;" run-id))))) + (db:with-db + dbstruct #f #f + (lambda (db) + (sqlite3:with-transaction + db + (lambda () + (sqlite3:execute db "DELETE FROM test_steps WHERE test_id IN (SELECT id FROM tests WHERE run_id=?);" run-id) + (sqlite3:execute db "DELETE FROM test_data WHERE test_id IN (SELECT id FROM tests WHERE run_id=?);" run-id) + (sqlite3:execute db "UPDATE tests SET state='DELETED',comment='' WHERE run_id=?;" run-id) + ;; (db:delay-if-busy dbdat) + (sqlite3:execute db "UPDATE runs SET state='deleted',comment='' WHERE id=?;" run-id)))))) (define (db:update-run-event_time dbstruct run-id) (db:with-db - dbstruct - #f - #t + dbstruct #f #t (lambda (db) (sqlite3:execute db "UPDATE runs SET event_time=strftime('%s','now') WHERE id=?;" run-id)))) (define (db:lock/unlock-run dbstruct run-id lock unlock user) (db:with-db - dbstruct - #f - #t + dbstruct #f #t (lambda (db) (let ((newlockval (if lock "locked" (if unlock "unlocked" "locked")))) ;; semi-failsafe @@ -2172,23 +2205,21 @@ (sqlite3:execute db "INSERT INTO access_log (user,accessed,args) VALUES(?,strftime('%s','now'),?);" user (conc newlockval " " run-id)) (debug:print-info 1 *default-log-port* "" newlockval " run number " run-id))))) (define (db:set-run-status dbstruct run-id status msg) - (let* ((dbdat (db:get-db dbstruct #f)) - (db (db:dbdat-get-db dbdat))) - ;; (db:delay-if-busy dbdat) - (if msg - (sqlite3:execute db "UPDATE runs SET status=?,comment=? WHERE id=?;" status msg run-id) - (sqlite3:execute db "UPDATE runs SET status=? WHERE id=?;" status run-id)))) + (db:with-db + dbstruct #f #f + (lambda (db) + (if msg + (sqlite3:execute db "UPDATE runs SET status=?,comment=? WHERE id=?;" status msg run-id) + (sqlite3:execute db "UPDATE runs SET status=? WHERE id=?;" status run-id))))) (define (db:get-run-status dbstruct run-id) (let ((res "n/a")) (db:with-db - dbstruct - #f - #f + dbstruct #f #f (lambda (db) (sqlite3:for-each-row (lambda (status) (set! res status)) db @@ -2202,39 +2233,40 @@ ;; get key val pairs for a given run-id ;; ( (FIELDNAME1 keyval1) (FIELDNAME2 keyval2) ... ) (define (db:get-key-val-pairs dbstruct run-id) (let* ((keys (db:get-keys dbstruct)) - (res '()) - (dbdat (db:get-db dbstruct #f)) - (db (db:dbdat-get-db dbdat))) - (for-each - (lambda (key) - (let ((qry (conc "SELECT " key " FROM runs WHERE id=?;"))) - ;; (db:delay-if-busy dbdat) - (sqlite3:for-each-row - (lambda (key-val) - (set! res (cons (list key key-val) res))) - db qry run-id))) - keys) - (reverse res))) + (res '())) + (db:with-db + dbstruct #f #f + (lambda (db) + (for-each + (lambda (key) + (let ((qry (conc "SELECT " key " FROM runs WHERE id=?;"))) + (sqlite3:for-each-row + (lambda (key-val) + (set! res (cons (list key key-val) res))) + db qry run-id))) + keys))) + (reverse res))) ;; get key vals for a given run-id (define (db:get-key-vals dbstruct run-id) (let* ((keys (db:get-keys dbstruct)) - (res '()) - (dbdat (db:get-db dbstruct #f)) - (db (db:dbdat-get-db dbdat))) - (for-each - (lambda (key) - (let ((qry (conc "SELECT " key " FROM runs WHERE id=?;"))) - ;; (db:delay-if-busy dbdat) - (sqlite3:for-each-row - (lambda (key-val) - (set! res (cons key-val res))) - db qry run-id))) - keys) + (res '())) + (db:with-db + dbstruct #f #f + (lambda (db) + (for-each + (lambda (key) + (let ((qry (conc "SELECT " key " FROM runs WHERE id=?;"))) + ;; (db:delay-if-busy dbdat) + (sqlite3:for-each-row + (lambda (key-val) + (set! res (cons key-val res))) + db qry run-id))) + keys))) (let ((final-res (reverse res))) (hash-table-set! *keyvals* run-id final-res) final-res))) ;; The target is keyval1/keyval2..., cached in *target* as it is used often @@ -2249,18 +2281,22 @@ (let* ((keyvals (rmt:get-key-val-pairs run-id)) (kvalues (map cadr keyvals)) (keys (rmt:get-keys)) (qrystr (string-intersperse (map (lambda (x)(conc x "=?")) keys) " AND "))) (let ((prev-run-ids '())) - (db:with-db dbstruct #f #f ;; #f means work with the zeroth db - i.e. the runs db - (lambda (db) - (apply sqlite3:for-each-row - (lambda (id) - (set! prev-run-ids (cons id prev-run-ids))) - db - (conc "SELECT id FROM runs WHERE " qrystr " AND state != 'deleted' AND id != ?;") (append kvalues (list run-id))))) - prev-run-ids))) + (if (null? keyvals) + '() + (begin + (db:with-db dbstruct #f #f ;; #f means work with the zeroth db - i.e. the runs db + (lambda (db) + (apply sqlite3:for-each-row + (lambda (id) + (set! prev-run-ids (cons id prev-run-ids))) + db + (conc "SELECT id FROM runs WHERE " qrystr " AND state != 'deleted' AND id != ?;") + (append kvalues (list run-id))))) + prev-run-ids))))) ;;====================================================================== ;; T E S T S ;;====================================================================== @@ -2427,16 +2463,16 @@ ;; Convert calling routines to get list of run-ids and loop, do not use the get-tests-for-runs ;; (define (db:delete-test-records dbstruct run-id test-id) - (let* ((dbdat (db:get-db dbstruct run-id)) - (db (db:dbdat-get-db dbdat))) - (db:general-call dbdat 'delete-test-step-records (list test-id)) - ;; (db:delay-if-busy) - (db:general-call dbdat 'delete-test-data-records (list test-id)) - (sqlite3:execute db "UPDATE tests SET state='DELETED',status='n/a',comment='' WHERE id=?;" test-id))) + (db:general-call dbstruct 'delete-test-step-records (list test-id)) + (db:general-call dbstruct 'delete-test-data-records (list test-id)) + (db:with-db + dbstruct #f #f + (lambda (db) + (sqlite3:execute db "UPDATE tests SET state='DELETED',status='n/a',comment='' WHERE id=?;" test-id)))) ;; (define (db:delete-old-deleted-test-records dbstruct) (let (;; (run-ids (db:get-all-run-ids dbstruct)) (targtime (- (current-seconds)(* 30 24 60 60)))) ;; one month in the past @@ -2465,26 +2501,25 @@ (define (db:set-tests-state-status dbstruct run-id testnames currstate currstatus newstate newstatus) (for-each (lambda (testname) (let ((qry (conc "UPDATE tests SET state=?,status=? WHERE " (if currstate (conc "state='" currstate "' AND ") "") (if currstatus (conc "status='" currstatus "' AND ") "") - " run_id=? AND testname LIKE ?;"))) + " run_id=? AND testname LIKE ?;")) + (test-id (db:get-test-id dbstruct run-id testname ""))) (db:with-db dbstruct run-id #t (lambda (db) - (let ((test-id (db:get-test-id dbstruct run-id testname ""))) - (sqlite3:execute db qry newstate newstatus run-id testname) - (if test-id (mt:process-triggers run-id test-id newstate newstatus))) - )))) + (sqlite3:execute db qry newstate newstatus run-id testname))) + (if test-id (mt:process-triggers dbstruct run-id test-id newstate newstatus)))) testnames)) -;; speed up for common cases with a little logic -;; NB// Ultimately this will be deprecated in deference to mt:test-set-state-status-by-id -;; -(define (db:test-set-state-status-by-id dbstruct run-id test-id newstate newstatus newcomment) +;; ;; speed up for common cases with a little logic +;; ;; NB// Ultimately this will be deprecated in deference to mt:test-set-state-status-by-id +;; ;; +(define (db:test-set-state-status dbstruct run-id test-id newstate newstatus newcomment) (db:with-db dbstruct run-id #t (lambda (db) @@ -2496,12 +2531,12 @@ (sqlite3:execute db "UPDATE tests SET state=?,status=? WHERE id=?;" newstate newstatus test-id)) (else (if newstate (sqlite3:execute db "UPDATE tests SET state=? WHERE id=?;" newstate test-id)) (if newstatus (sqlite3:execute db "UPDATE tests SET status=? WHERE id=?;" newstatus test-id)) (if newcomment (sqlite3:execute db "UPDATE tests SET comment=? WHERE id=?;" newcomment ;; (sdb:qry 'getid newcomment) - test-id)))) - (mt:process-triggers run-id test-id newstate newstatus)))) + test-id)))))) + (mt:process-triggers dbstruct run-id test-id newstate newstatus)) ;; NEW BEHAVIOR: Count tests running in all runs! ;; (define (db:get-count-tests-running dbstruct run-id) (db:with-db @@ -2557,23 +2592,23 @@ (sqlite3:first-result db "SELECT count(id) FROM tests WHERE state in ('RUNNING','LAUNCHED','REMOTEHOSTSTART') AND run_id=? AND NOT (uname = 'n/a' AND item_path = '') AND testname=?;" run-id testname)))) (define (db:get-count-tests-running-in-jobgroup dbstruct run-id jobgroup) - (let* ((dbdat (db:get-db dbstruct #f)) - (db (db:dbdat-get-db dbdat))) (if (not jobgroup) 0 ;; (let ((testnames '())) ;; get the testnames - ;; (db:delay-if-busy dbdat) - (sqlite3:for-each-row - (lambda (testname) - (set! testnames (cons testname testnames))) - db - "SELECT testname FROM test_meta WHERE jobgroup=?" - jobgroup) + (db:with-db + dbstruct #f #f + (lambda (db) + (sqlite3:for-each-row + (lambda (testname) + (set! testnames (cons testname testnames))) + db + "SELECT testname FROM test_meta WHERE jobgroup=?" + jobgroup))) ;; get the jobcount NB// EXTEND THIS TO OPPERATE OVER ALL RUNS? (if (not (null? testnames)) (db:with-db dbstruct run-id @@ -2583,14 +2618,14 @@ db (conc "SELECT count(id) FROM tests WHERE state in ('RUNNING','LAUNCHED','REMOTEHOSTSTART') AND testname in ('" (string-intersperse testnames "','") "') AND NOT (uname = 'n/a' AND item_path='');")) ;; should this include the (uname = 'n/a' ...) ??? )) - 0))))) - ;; DEBUG FIXME - need to merge this v.155 query correctly - ;; AND testname in (SELECT testname FROM test_meta WHERE jobgroup=?) - ;; AND NOT (uname = 'n/a' AND item_path = '');" + 0)))) + +;; tags: '("tag%" "tag2" "%ag6") +;; ;; done with run when: ;; 0 tests in LAUNCHED, NOT_STARTED, REMOTEHOSTSTART, RUNNING (define (db:estimated-tests-remaining dbstruct run-id) (db:with-db @@ -2661,24 +2696,22 @@ ;; NOTE: Use db:test-get* to access records ;; NOTE: This needs rundir decoding? Decide, decode here or where used? For the moment decode where used. (define (db:get-all-tests-info-by-run-id dbstruct run-id) - (let* ((dbdat (if (vector? dbstruct) - (db:get-db dbstruct run-id) - dbstruct)) ;; still settling on when to use dbstruct or dbdat - (db (db:dbdat-get-db dbdat)) - (res '())) - ;; (db:delay-if-busy dbdat) - (sqlite3:for-each-row - (lambda (id run-id testname state status event-time host cpuload diskfree uname rundir item-path run-duration final-logf comment shortdir attemptnum archived) - ;; 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 - (set! res (cons (vector id run-id testname state status event-time host cpuload diskfree uname rundir item-path run-duration final-logf comment shortdir attemptnum archived) - res))) - db - (conc "SELECT " db:test-record-qry-selector " FROM tests WHERE state != 'DELETED' AND run_id=?;") - run-id) + (let* ((res '())) + (db:with-db + dbstruct #f #f + (lambda (db) + (sqlite3:for-each-row + (lambda (id run-id testname state status event-time host cpuload diskfree uname rundir item-path run-duration final-logf comment shortdir attemptnum archived) + ;; 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 + (set! res (cons (vector id run-id testname state status event-time host cpuload diskfree uname rundir item-path run-duration final-logf comment shortdir attemptnum archived) + res))) + db + (conc "SELECT " db:test-record-qry-selector " FROM tests WHERE state != 'DELETED' AND run_id=?;") + run-id))) res)) (define (db:replace-test-records dbstruct run-id testrecs) (db:with-db dbstruct run-id #t (lambda (db) @@ -2857,27 +2890,27 @@ ;; Roll up test_data pass/fail results ;; look at the test_data status field, ;; if all are pass (any case) and the test status is PASS or NULL or '' then set test status to PASS. ;; if one or more are fail (any case) then set test status to PASS, non "pass" or "fail" are ignored (define (db:test-data-rollup dbstruct run-id test-id status) - (let* ((dbdat (db:get-db dbstruct run-id)) - (db (db:dbdat-get-db dbdat)) - (fail-count 0) + (let* ((fail-count 0) (pass-count 0)) - ;; (db:delay-if-busy dbdat) - (sqlite3:for-each-row - (lambda (fcount pcount) - (set! fail-count fcount) - (set! pass-count pcount)) - db - "SELECT (SELECT count(id) FROM test_data WHERE test_id=? AND status like 'fail') AS fail_count, + (db:with-db + dbstruct #f #f + (lambda (db) + (sqlite3:for-each-row + (lambda (fcount pcount) + (set! fail-count fcount) + (set! pass-count pcount)) + db + "SELECT (SELECT count(id) FROM test_data WHERE test_id=? AND status like 'fail') AS fail_count, (SELECT count(id) FROM test_data WHERE test_id=? AND status like 'pass') AS pass_count;" - test-id test-id) - ;; Now rollup the counts to the central megatest.db - (db:general-call dbdat 'pass-fail-counts (list pass-count fail-count test-id)) - ;; if the test is not FAIL then set status based on the fail and pass counts. - (db:general-call dbdat 'test_data-pf-rollup (list test-id test-id test-id test-id)))) + test-id test-id) + ;; Now rollup the counts to the central megatest.db + (db:general-call dbstruct 'pass-fail-counts (list pass-count fail-count test-id)) + ;; if the test is not FAIL then set status based on the fail and pass counts. + (db:general-call dbstruct 'test_data-pf-rollup (list test-id test-id test-id test-id)))))) ;; each section is a rule except "final" which is the final result ;; ;; [rule-5] ;; operator in @@ -2958,119 +2991,124 @@ ;; faz,bar, 10, 8mA, , ,"this is a comment" ;; EOF (define (db:csv->test-data dbstruct run-id test-id csvdata) (debug:print 4 *default-log-port* "test-id " test-id ", csvdata: " csvdata) - (let* ((dbdat (db:get-db dbstruct run-id)) - (db (db:dbdat-get-db dbdat)) - (csvlist (csv->list (make-csv-reader - (open-input-string csvdata) - '((strip-leading-whitespace? #t) - (strip-trailing-whitespace? #t)))))) ;; (csv->list csvdata))) - (for-each - (lambda (csvrow) - (let* ((padded-row (take (append csvrow (list #f #f #f #f #f #f #f #f #f)) 9)) - (category (list-ref padded-row 0)) - (variable (list-ref padded-row 1)) - (value (any->number-if-possible (list-ref padded-row 2))) - (expected (any->number-if-possible (list-ref padded-row 3))) - (tol (any->number-if-possible (list-ref padded-row 4))) ;; >, <, >=, <=, or a number - (units (list-ref padded-row 5)) - (comment (list-ref padded-row 6)) - (status (let ((s (list-ref padded-row 7))) - (if (and (string? s)(or (string-match (regexp "^\\s*$") s) - (string-match (regexp "^n/a$") s))) - #f - s))) ;; if specified on the input then use, else calculate - (type (list-ref padded-row 8))) - ;; look up expected,tol,units from previous best fit test if they are all either #f or '' - (debug:print 4 *default-log-port* "BEFORE: category: " category " variable: " variable " value: " value - ", expected: " expected " tol: " tol " units: " units " status: " status " comment: " comment " type: " type) - - (if (and (or (not expected)(equal? expected "")) - (or (not tol) (equal? expected "")) - (or (not units) (equal? expected ""))) - (let-values (((new-expected new-tol new-units)(tdb:get-prev-tol-for-test #f test-id category variable))) - (set! expected new-expected) - (set! tol new-tol) - (set! units new-units))) - - (debug:print 4 *default-log-port* "AFTER: category: " category " variable: " variable " value: " value - ", expected: " expected " tol: " tol " units: " units " status: " status " comment: " comment) - ;; calculate status if NOT specified - (if (and (not status)(number? expected)(number? value)) ;; need expected and value to be numbers - (if (number? tol) ;; if tol is a number then we do the standard comparison - (let* ((max-val (+ expected tol)) - (min-val (- expected tol)) - (result (and (>= value min-val)(<= value max-val)))) - (debug:print 4 *default-log-port* "max-val: " max-val " min-val: " min-val " result: " result) - (set! status (if result "pass" "fail"))) - (set! status ;; NB// need to assess each one (i.e. not return operator since need to act if not valid op. - (case (string->symbol tol) ;; tol should be >, <, >=, <= - ((>) (if (> value expected) "pass" "fail")) - ((<) (if (< value expected) "pass" "fail")) - ((>=) (if (>= value expected) "pass" "fail")) - ((<=) (if (<= value expected) "pass" "fail")) - (else (conc "ERROR: bad tol comparator " tol)))))) - (debug:print 4 *default-log-port* "AFTER2: category: " category " variable: " variable " value: " value - ", expected: " expected " tol: " tol " units: " units " status: " status " comment: " comment) - ;; (db:delay-if-busy dbdat) - (sqlite3:execute db "INSERT OR REPLACE INTO test_data (test_id,category,variable,value,expected,tol,units,comment,status,type) VALUES (?,?,?,?,?,?,?,?,?,?);" - test-id category variable value expected tol units (if comment comment "") status type))) - csvlist))) + (db:with-db + dbstruct #f #f + (lambda (db) + (let* ((csvlist (csv->list (make-csv-reader + (open-input-string csvdata) + '((strip-leading-whitespace? #t) + (strip-trailing-whitespace? #t)))))) ;; (csv->list csvdata))) + (for-each + (lambda (csvrow) + (let* ((padded-row (take (append csvrow (list #f #f #f #f #f #f #f #f #f)) 9)) + (category (list-ref padded-row 0)) + (variable (list-ref padded-row 1)) + (value (any->number-if-possible (list-ref padded-row 2))) + (expected (any->number-if-possible (list-ref padded-row 3))) + (tol (any->number-if-possible (list-ref padded-row 4))) ;; >, <, >=, <=, or a number + (units (list-ref padded-row 5)) + (comment (list-ref padded-row 6)) + (status (let ((s (list-ref padded-row 7))) + (if (and (string? s)(or (string-match (regexp "^\\s*$") s) + (string-match (regexp "^n/a$") s))) + #f + s))) ;; if specified on the input then use, else calculate + (type (list-ref padded-row 8))) + ;; look up expected,tol,units from previous best fit test if they are all either #f or '' + (debug:print 4 *default-log-port* "BEFORE: category: " category " variable: " variable " value: " value + ", expected: " expected " tol: " tol " units: " units " status: " status " comment: " comment " type: " type) + + (if (and (or (not expected)(equal? expected "")) + (or (not tol) (equal? expected "")) + (or (not units) (equal? expected ""))) + (let-values (((new-expected new-tol new-units)(tdb:get-prev-tol-for-test #f test-id category variable))) + (set! expected new-expected) + (set! tol new-tol) + (set! units new-units))) + + (debug:print 4 *default-log-port* "AFTER: category: " category " variable: " variable " value: " value + ", expected: " expected " tol: " tol " units: " units " status: " status " comment: " comment) + ;; calculate status if NOT specified + (if (and (not status)(number? expected)(number? value)) ;; need expected and value to be numbers + (if (number? tol) ;; if tol is a number then we do the standard comparison + (let* ((max-val (+ expected tol)) + (min-val (- expected tol)) + (result (and (>= value min-val)(<= value max-val)))) + (debug:print 4 *default-log-port* "max-val: " max-val " min-val: " min-val " result: " result) + (set! status (if result "pass" "fail"))) + (set! status ;; NB// need to assess each one (i.e. not return operator since need to act if not valid op. + (case (string->symbol tol) ;; tol should be >, <, >=, <= + ((>) (if (> value expected) "pass" "fail")) + ((<) (if (< value expected) "pass" "fail")) + ((>=) (if (>= value expected) "pass" "fail")) + ((<=) (if (<= value expected) "pass" "fail")) + (else (conc "ERROR: bad tol comparator " tol)))))) + (debug:print 4 *default-log-port* "AFTER2: category: " category " variable: " variable " value: " value + ", expected: " expected " tol: " tol " units: " units " status: " status " comment: " comment) + ;; (db:delay-if-busy dbdat) + (sqlite3:execute db "INSERT OR REPLACE INTO test_data (test_id,category,variable,value,expected,tol,units,comment,status,type) VALUES (?,?,?,?,?,?,?,?,?,?);" + test-id category variable value expected tol units (if comment comment "") status type))) + csvlist))))) ;; This routine moved from tdb.scm, tdb:read-test-data ;; (define (db:read-test-data dbstruct run-id test-id categorypatt) - (let* ((dbdat (db:get-db dbstruct run-id)) - (db (db:dbdat-get-db dbdat)) - (res '())) - ;; (db:delay-if-busy dbdat) - (sqlite3:for-each-row - (lambda (id test_id category variable value expected tol units comment status type) - (set! res (cons (vector id test_id category variable value expected tol units comment status type) res))) - db - "SELECT id,test_id,category,variable,value,expected,tol,units,comment,status,type FROM test_data WHERE test_id=? AND category LIKE ? ORDER BY category,variable;" test-id categorypatt) - (reverse res))) + (let* ((res '())) + (db:with-db + dbstruct #f #f + (lambda (db) + (sqlite3:for-each-row + (lambda (id test_id category variable value expected tol units comment status type) + (set! res (cons (vector id test_id category variable value expected tol units comment status type) res))) + db + "SELECT id,test_id,category,variable,value,expected,tol,units,comment,status,type FROM test_data WHERE test_id=? AND category LIKE ? ORDER BY category,variable;" test-id categorypatt) + (reverse res))))) ;;====================================================================== ;; Misc. test related queries ;;====================================================================== (define (db:get-run-ids-matching-target dbstruct keynames target res runname testpatt statepatt statuspatt) - (let* ((dbdat (db:get-db dbstruct #f)) - (db (db:dbdat-get-db dbdat)) - (row-ids '()) - (keystr (string-intersperse - (map (lambda (key val) - (conc key " like '" val "'")) - keynames - (string-split target "/")) - " AND ")) - ;; (testqry (tests:match->sqlqry testpatt)) - (runsqry (sqlite3:prepare db (conc "SELECT id FROM runs WHERE " keystr " AND runname LIKE '" runname "';")))) - ;; (debug:print 8 *default-log-port* "db:test-get-paths-matching-keynames-target-new\n runsqry=" runsqry "\n tstsqry=" testqry) - (sqlite3:for-each-row - (lambda (rid) - (set! row-ids (cons rid row-ids))) - runsqry) - (sqlite3:finalize! runsqry) - row-ids)) - + (db:with-db + dbstruct #f #f + (lambda (db) + (let* ((row-ids '()) + (keystr (string-intersperse + (map (lambda (key val) + (conc key " like '" val "'")) + keynames + (string-split target "/")) + " AND ")) + ;; (testqry (tests:match->sqlqry testpatt)) + (runsqry (sqlite3:prepare db (conc "SELECT id FROM runs WHERE " keystr " AND runname LIKE '" runname "';")))) + ;; (debug:print 8 *default-log-port* "db:test-get-paths-matching-keynames-target-new\n runsqry=" runsqry "\n tstsqry=" testqry) + (sqlite3:for-each-row + (lambda (rid) + (set! row-ids (cons rid row-ids))) + runsqry) + (sqlite3:finalize! runsqry) + row-ids)))) + +;; finds latest matching all patts for given run-id +;; (define (db:test-get-paths-matching-keynames-target-new dbstruct run-id keynames target res testpatt statepatt statuspatt runname) (let* ((testqry (tests:match->sqlqry testpatt)) - (tstsqry (conc "SELECT rundir FROM tests WHERE " testqry " AND state LIKE '" statepatt "' AND status LIKE '" statuspatt "' ORDER BY event_time ASC;"))) + (tstsqry (conc "SELECT rundir FROM tests WHERE run_id=? AND " testqry " AND state LIKE '" statepatt "' AND status LIKE '" statuspatt "' ORDER BY event_time ASC;"))) (db:with-db dbstruct run-id #f (lambda (db) (sqlite3:for-each-row (lambda (p) (set! res (cons p res))) db - tstsqry) + tstsqry + run-id) res)))) (define (db:test-toplevel-num-items dbstruct run-id testname) (db:with-db dbstruct @@ -3122,140 +3160,111 @@ (print-call-chain (current-error-port)) msg))) ;; crude reply for when things go awry ((zmq nmsg)(with-input-from-string msg (lambda ()(deserialize)))) (else msg))) ;; rpc -;; This is to be the big daddy call - -(define (db:test-set-status-state dbstruct run-id test-id status state msg) - (let ((dbdat (db:get-db dbstruct run-id))) - (if (member state '("LAUNCHED" "REMOTEHOSTSTART")) - (db:general-call dbdat 'set-test-start-time (list test-id))) - ;; (if msg - ;; (db:general-call dbdat 'state-status-msg (list state status msg test-id)) - ;; (db:general-call dbdat 'state-status (list state status test-id))) - (db:set-state-status-and-roll-up-items dbstruct run-id test-id #f state status msg) - ;; process the test_data table - (if (and test-id state status (equal? status "AUTO")) - (db:test-data-rollup dbstruct run-id test-id status)) - (mt:process-triggers run-id test-id state status))) +;; ; This is to be the big daddy call NOPE: Replaced by db:set-state-status-and-roll-up-items +;; ; +;; define (db:test-set-state-status dbstruct run-id test-id state status msg) +;; (let ((dbdat (db:get-db dbstruct run-id))) +;; (if (member state '("LAUNCHED" "REMOTEHOSTSTART")) +;; (db:general-call dbdat 'set-test-start-time (list test-id))) +;; ;; (if msg +;; ;; (db:general-call dbdat 'state-status-msg (list state status msg test-id)) +;; ;; (db:general-call dbdat 'state-status (list state status test-id))) +;; (db:set-state-status-and-roll-up-items dbstruct run-id test-id #f state status msg) +;; ;; process the test_data table +;; (if (and test-id state status (equal? status "AUTO")) +;; (db:test-data-rollup dbstruct run-id test-id status)) +;; (mt:process-triggers dbstruct run-id test-id state status))) ;; state is the priority rollup of all states ;; status is the priority rollup of all completed statesfu ;; ;; if test-name is an integer work off that instead of test-name test-path ;; (define (db:set-state-status-and-roll-up-items dbstruct run-id test-name item-path state status comment) ;; establish info on incoming test followed by info on top level test - (let* ((db (db:dbdat-get-db (dbr:dbstruct-tmpdb dbstruct))) - (testdat (if (number? test-name) + (let* ((testdat (if (number? test-name) (db:get-test-info-by-id dbstruct run-id test-name) ;; test-name is actually a test-id (db:get-test-info dbstruct run-id test-name item-path))) (test-id (db:test-get-id testdat)) (test-name (if (number? test-name) (db:test-get-testname testdat) test-name)) (item-path (db:test-get-item-path testdat)) (tl-testdat (db:get-test-info dbstruct run-id test-name "")) (tl-test-id (db:test-get-id tl-testdat))) - (sqlite3:with-transaction - db - (lambda () - (db:test-set-state-status-by-id dbstruct run-id test-id state status comment) - (if (not (equal? item-path "")) ;; only roll up IF incoming test is an item - (let* ((state-status-counts (db:get-all-state-status-counts-for-test db run-id test-name item-path)) ;; item-path is used to exclude current state/status of THIS test - (running (length (filter (lambda (x) - (member (dbr:counts-state x) *common:running-states*)) - state-status-counts))) - (bad-not-started (length (filter (lambda (x) - (and (equal? (dbr:counts-state x) "NOT_STARTED") - (not (member (dbr:counts-status x) - *common:not-started-ok-statuses*)))) - state-status-counts))) - (all-curr-states (common:special-sort ;; worst -> best (sort of) - (delete-duplicates - (cons state (map dbr:counts-state state-status-counts))) - *common:std-states* >)) - (all-curr-statuses (common:special-sort ;; worst -> best - (delete-duplicates - (cons status (map dbr:counts-status state-status-counts))) - *common:std-statuses* >)) - (newstate (if (> running 0) - "RUNNING" - (if (> bad-not-started 0) - "COMPLETED" - (car all-curr-states)))) - (newstatus (if (> bad-not-started 0) - "CHECK" - (car all-curr-statuses)))) - ;; (print "Setting toplevel to: " newstate "/" newstatus) - (db:test-set-state-status-by-id dbstruct run-id tl-test-id newstate newstatus #f))))))) - -(define db:roll-up-pass-fail-counts db:set-state-status-and-roll-up-items) - -;; call with state = #f to roll up with out accounting for state/status of this item -;; -;; (define (db:roll-up-pass-fail-counts dbstruct run-id test-name item-path state status) -;; (if (not (equal? item-path "")) ;; if asked to do this for a specific item then do an incremental update -;; (let* ((dbdat (db:get-db dbstruct run-id)) -;; (toptestdat (db:get-test-info dbstruct run-id test-name item-path)) -;; (currtopstate (db:test-get-state toptestdat)) -;; (currtopstatus (db:test-get-status toptestdat)) -;; (nextss (common:apply-state-status currtopstate currtopstatus state status)) -;; (newtopstate (car nextss)) ;; #f or a symbol -;; (newtopstatus (cdr nextss))) ;; #f or a symbol -;; (if (not newtopstate) ;; need to calculate it -;; -;; ;; We rely on the toplevel to track status as state varies. I.e. preserve an ABORT -;; -;; -;; ;; (db (db:dbdat-get-db dbdat))) -;; (db:general-call dbdat 'update-pass-fail-counts (list test-name test-name test-name)) -;; (db:top-test-set-per-pf-counts dbstruct run-id test-name)))) -;; -;; ;; (case (string->symbol status) -;; ;; ((RUNNING) (db:general-call dbdat 'top-test-set-running (list test-name))) -;; ;; ((LAUNCHED) (db:general-call dbdat 'top-test-set (list "LAUNCHED" test-name))) -;; ;; ((ABORT INCOMPLETE) (db:general-call dbdat 'top-test-set (list status test-name)))) -;; -;; ;; (if (or (not state) -;; ;; (not (equal? item-path ""))) -;; ;; ;; just do a rollup -;; ;; (begin -;; ;; (db:top-test-set-per-pf-counts dbdat run-id test-name) -;; ;; #f) -;; ;; (begin -;; ;; ;; NOTE: No else clause needed for this case -;; ;; (case (string->symbol status) -;; ;; ((RUNNING) (db:general-call dbdat 'top-test-set-running (list test-name))) -;; ;; ((LAUNCHED) (db:general-call dbdat 'top-test-set (list "LAUNCHED" test-name))) -;; ;; ((ABORT INCOMPLETE) (db:general-call dbdat 'top-test-set (list status test-name)))) -;; ;; #f) -;; ;; ))) - -(define (db:get-all-state-status-counts-for-test db run-id test-name item-path) - (sqlite3:map-row - (lambda (state status count) - (make-dbr:counts state: state status: status count: count)) - db - "SELECT state,status,count(id) FROM tests WHERE run_id=? AND testname=? AND item_path != '' AND item_path !=? GROUP BY state,status;" - run-id test-name item-path)) - - -(define (db:get-all-item-states db run-id test-name) - (sqlite3:map-row - (lambda (a) a) - db - "SELECT DISTINCT state FROM tests WHERE item_path != '' AND state != 'DELETED' AND run_id=? AND testname=?" - run-id test-name)) - -(define (db:get-all-item-statuses db run-id test-name) - (sqlite3:map-row - (lambda (a) a) - db - "SELECT DISTINCT status FROM tests WHERE item_path != '' AND state != 'DELETED' AND state='COMPLETED' AND run_id=? AND testname=?" - run-id test-name)) + (if (member state '("LAUNCHED" "REMOTEHOSTSTART")) + (db:general-call dbstruct 'set-test-start-time (list test-id))) + (mutex-lock! *db-transaction-mutex*) + (db:with-db + dbstruct #f #f + (lambda (db) + (let ((tr-res + (sqlite3:with-transaction + db + (lambda () + ;; NB// Pass the db so it is part fo the transaction + (db:test-set-state-status db run-id test-id state status comment) + (if (not (equal? item-path "")) ;; only roll up IF incoming test is an item + (let* ((state-status-counts (db:get-all-state-status-counts-for-test dbstruct run-id test-name item-path)) ;; item-path is used to exclude current state/status of THIS test + (running (length (filter (lambda (x) + (member (dbr:counts-state x) *common:running-states*)) + state-status-counts))) + (bad-not-started (length (filter (lambda (x) + (and (equal? (dbr:counts-state x) "NOT_STARTED") + (not (member (dbr:counts-status x) + *common:not-started-ok-statuses*)))) + state-status-counts))) + (all-curr-states (common:special-sort ;; worst -> best (sort of) + (delete-duplicates + (cons state (map dbr:counts-state state-status-counts))) + *common:std-states* >)) + (all-curr-statuses (common:special-sort ;; worst -> best + (delete-duplicates + (cons status (map dbr:counts-status state-status-counts))) + *common:std-statuses* >)) + (newstate (if (> running 0) + "RUNNING" + (if (> bad-not-started 0) + "COMPLETED" + (car all-curr-states)))) + (newstatus (if (> bad-not-started 0) + "CHECK" + (car all-curr-statuses)))) + ;; NB// Pass the db so it is part of the transaction + (db:test-set-state-status db run-id tl-test-id newstate newstatus #f))))))) + (mutex-unlock! *db-transaction-mutex*) + (if (and test-id state status (equal? status "AUTO")) + (db:test-data-rollup dbstruct run-id test-id status)) + tr-res))))) + +(define (db:get-all-state-status-counts-for-test dbstruct run-id test-name item-path) + (db:with-db + dbstruct #f #f + (lambda (db) + (sqlite3:map-row + (lambda (state status count) + (make-dbr:counts state: state status: status count: count)) + db + "SELECT state,status,count(id) FROM tests WHERE run_id=? AND testname=? AND item_path != '' AND item_path !=? GROUP BY state,status;" + run-id test-name item-path)))) + +;; (define (db:get-all-item-states db run-id test-name) +;; (sqlite3:map-row +;; (lambda (a) a) +;; db +;; "SELECT DISTINCT state FROM tests WHERE item_path != '' AND state != 'DELETED' AND run_id=? AND testname=?" +;; run-id test-name)) +;; +;; (define (db:get-all-item-statuses db run-id test-name) +;; (sqlite3:map-row +;; (lambda (a) a) +;; db +;; "SELECT DISTINCT status FROM tests WHERE item_path != '' AND state != 'DELETED' AND state='COMPLETED' AND run_id=? AND testname=?" +;; run-id test-name)) (define (db:test-get-logfile-info dbstruct run-id test-name) (db:with-db dbstruct run-id @@ -3310,13 +3319,14 @@ '(delete-tests-in-state ;; "DELETE FROM tests WHERE state=?;") ;; DONE "UPDATE tests SET state='DELETED' WHERE state=?") '(tests:test-set-toplog "UPDATE tests SET final_logf=? WHERE run_id=? AND testname=? AND item_path='';") '(update-cpuload-diskfree "UPDATE tests SET cpuload=?,diskfree=? WHERE id=?;") ;; DONE '(update-uname-host "UPDATE tests SET uname=?,host=? WHERE id=?;") ;; DONE + '(update-test-rundat "INSERT INTO test_rundat (test_id,update_time,cpuload,diskfree,diskusage,run_duration) VALUES (?,?,?,?,?,?);") '(update-test-state "UPDATE tests SET state=? WHERE state=? AND run_id=? AND testname=? AND NOT (item_path='' AND testname IN (SELECT DISTINCT testname FROM tests WHERE testname=? AND item_path != ''));") '(update-test-status "UPDATE tests SET status=? WHERE status like ? AND run_id=? AND testname=? AND NOT (item_path='' AND testname IN (SELECT DISTINCT testname FROM tests WHERE testname=? AND item_path != ''));") - ;; stuff for roll-up-pass-fail-counts + ;; stuff for set-state-status-and-roll-up-items '(update-pass-fail-counts "UPDATE tests SET fail_count=(SELECT count(id) FROM tests WHERE testname=? AND item_path != '' AND status IN ('FAIL','CHECK','INCOMPLETE','ABORT')), pass_count=(SELECT count(id) FROM tests WHERE testname=? AND item_path != '' AND status IN ('PASS','WARN','WAIVED')) WHERE testname=? AND item_path='' AND run_id=?;") ;; DONE ;; BROKEN!!! NEEDS run-id '(top-test-set-running "UPDATE tests SET state='RUNNING' WHERE testname=? AND item_path='' AND run_id=?;") ;; DONE ;; BROKEN!!! NEEDS run-id @@ -3420,11 +3430,11 @@ (let ((q (alist-ref qry-name db:queries))) (if q (car q) #f))) ;; do not run these as part of the transaction (define db:special-queries '(rollup-tests-pass-fail - ;; db:roll-up-pass-fail-counts ;; WHY NOT!? + ;; db:set-state-status-and-roll-up-items ;; WHY NOT!? login immediate flush sync set-verbosity @@ -3441,38 +3451,50 @@ (list #f "Login failed due to mismatch megatest version: " calling-version ", " megatest-version)) (else (hash-table-set! *logged-in-clients* client-signature (current-seconds)) '(#t "successful login")))) -(define (db:general-call dbdat stmtname params) +(define (db:general-call dbstruct stmtname params) (let ((query (let ((q (alist-ref (if (string? stmtname) (string->symbol stmtname) stmtname) db:queries))) (if q (car q) #f)))) - ;; (db:delay-if-busy dbdat) - (apply sqlite3:execute (db:dbdat-get-db dbdat) query params) - #t)) + (db:with-db + dbstruct #f #f + (lambda (db) + (apply sqlite3:execute db query params) + #t)))) ;; get a summary of state and status counts to calculate a rollup ;; -;; NOTE: takes a db, not a dbstruct -;; -(define (db:get-state-status-summary db run-id testname) +(define (db:get-state-status-summary dbstruct run-id testname) (let ((res '())) - (sqlite3:for-each-row - (lambda (state status count) - (set! res (cons (vector state status count) res))) - db - "SELECT state,status,count(state) FROM tests WHERE run_id=? AND testname=? AND item_path='' GROUP BY state,status;" - run-id testname) - res)) + (db:with-db + dbstruct #f #f + (sqlite3:for-each-row + (lambda (state status count) + (set! res (cons (vector state status count) res))) + db + "SELECT state,status,count(state) FROM tests WHERE run_id=? AND testname=? AND item_path='' GROUP BY state,status;" + run-id testname) + res))) + +(define (db:get-latest-host-load dbstruct raw-hostname) + (let* ((hostname (string-substitute "\\..*$" "" raw-hostname)) + (res (cons -1 0))) + (db:with-db + dbstruct #f #f + (lambda (db) + (sqlite3:for-each-row + (lambda (cpuload update-time) (set! res (cons cpuload update-time))) + db + "SELECT tr.cpuload, tr.update_time FROM test_rundat tr, tests t WHERE t.host=? AND tr.cpuload != -1 AND tr.test_id=t.id ORDER BY tr.update_time DESC LIMIT 1;" + hostname))) res )) (define (db:set-top-level-from-items dbstruct run-id testname) - (let* ((dbdat (db:get-db dbstruct run-id)) - (db (db:dbdat-get-db dbdat)) - (summ (db:get-state-status-summary db run-id testname)) + (let* ((summ (db:get-state-status-summary dbstruct run-id testname)) (find (lambda (state status) (if (null? summ) #f (let loop ((hed (car summ)) (tal (cdr summ))) @@ -3497,32 +3519,35 @@ ;; can use wildcards. Also can likely be factored in with get test paths? ;; ;; Run this remotely!! ;; (define (db:get-matching-previous-test-run-records dbstruct run-id test-name item-path) - (let* ((dbdat (db:get-db dbstruct #f)) - (db (db:dbdat-get-db dbdat)) - (keys (db:get-keys dbstruct)) + (let* ((keys (db:get-keys dbstruct)) (selstr (string-intersperse keys ",")) (qrystr (string-intersperse (map (lambda (x)(conc x "=?")) keys) " AND ")) (keyvals #f) (tests-hash (make-hash-table))) ;; first look up the key values from the run selected by run-id - ;; (db:delay-if-busy dbdat) - (sqlite3:for-each-row - (lambda (a . b) - (set! keyvals (cons a b))) - db - (conc "SELECT " selstr " FROM runs WHERE id=? ORDER BY event_time DESC;") run-id) + (db:with-db + dbstruct #f #f + (lambda (db) + (sqlite3:for-each-row + (lambda (a . b) + (set! keyvals (cons a b))) + db + (conc "SELECT " selstr " FROM runs WHERE id=? ORDER BY event_time DESC;") run-id))) (if (not keyvals) '() (let ((prev-run-ids '())) - (apply sqlite3:for-each-row - (lambda (id) - (set! prev-run-ids (cons id prev-run-ids))) - db - (conc "SELECT id FROM runs WHERE " qrystr " AND id != ?;") (append keyvals (list run-id))) + (db:with-db + dbstruct #f #f + (lambda (db) + (apply sqlite3:for-each-row + (lambda (id) + (set! prev-run-ids (cons id prev-run-ids))) + db + (conc "SELECT id FROM runs WHERE " qrystr " AND id != ?;") (append keyvals (list run-id))))) ;; collect all matching tests for the runs then ;; extract the most recent test and return that. (debug:print 4 *default-log-port* "selstr: " selstr ", qrystr: " qrystr ", keyvals: " keyvals ", previous run ids found: " prev-run-ids) (if (null? prev-run-ids) '() ;; no previous runs? return null @@ -3604,10 +3629,30 @@ res)))) ;;====================================================================== ;; Tests meta data ;;====================================================================== + +;; returns a hash table of tags to tests +;; +(define (db:get-tests-tags dbstruct) + (db:with-db + dbstruct #f #f + (lambda (db) + (let* ((res (make-hash-table))) + (sqlite3:for-each-row + (lambda (testname tags-in) + (let ((tags (string-split tags-in ","))) + (for-each + (lambda (tag) + (hash-table-set! res tag + (delete-duplicates + (cons testname (hash-table-ref/default res tag '()))))) + tags))) + db + "SELECT testname,tags FROM test_meta") + res)))) ;; read the record given a testname (define (db:testmeta-get-record dbstruct testname) (let ((res #f)) (db:with-db @@ -3791,11 +3836,11 @@ (define (db:extract-ods-file dbstruct outputfile keypatt-alist runspatt pathmod) (let* ((keysstr (string-intersperse (map car keypatt-alist) ",")) (keyqry (string-intersperse (map (lambda (p)(conc (car p) " LIKE ? ")) keypatt-alist) " AND ")) (numkeys (length keypatt-alist)) (test-ids '()) - (dbdat (db:get-db dbstruct #f)) + (dbdat (db:get-db dbstruct)) (db (db:dbdat-get-db dbdat)) (windows (and pathmod (substring-index "\\" pathmod))) (tempdir (conc "/tmp/" (current-user-name) "/" runspatt "_" (random 10000) "_" (current-process-id))) (runsheader (append (list "Run Id" "Runname") ; 0 1 (map car keypatt-alist) ; + N = length keypatt-alist Index: dcommon.scm ================================================================== --- dcommon.scm +++ dcommon.scm @@ -12,11 +12,11 @@ (use format) (require-library iup) (import (prefix iup iup:)) (use canvas-draw) (import canvas-draw-iup) -(use regex typed-records) +(use regex typed-records matchable) (declare (unit dcommon)) (declare (uses megatest-version)) (declare (uses gutils)) @@ -620,11 +620,12 @@ #:numlin-visible 5 )) (colnames (list "Id" "MTver" "Pid" "Host" "Interface:OutPort" "RunTime" "State" "RunId")) (updater (lambda () (if (dashboard:monitor-changed? commondat tabdat) - (let ((servers (tasks:get-all-servers (db:delay-if-busy tdbdat)))) + (let ((servers (server:get-list *toppath* limit: 10))) + ;; (tasks:get-all-servers (db:delay-if-busy tdbdat)))) (iup:attribute-set! servers-matrix "NUMLIN" (length servers)) ;; (set! colnum 0) ;; (for-each (lambda (colname) ;; ;; (print "colnum: " colnum " colname: " colname) ;; (iup:attribute-set! servers-matrix (conc "0:" colnum) colname) @@ -632,36 +633,40 @@ ;; colnames) (set! rownum 1) (for-each (lambda (server) (set! colnum 0) - (let* ((vals (list (vector-ref server 0) ;; Id - (vector-ref server 9) ;; MT-Ver - (vector-ref server 1) ;; Pid - (vector-ref server 2) ;; Hostname - (conc (vector-ref server 3) ":" (vector-ref server 4)) ;; IP:Port - (seconds->hr-min-sec (- (current-seconds)(vector-ref server 6))) - ;; (vector-ref server 5) ;; Pubport - ;; (vector-ref server 10) ;; Last beat - ;; (vector-ref server 6) ;; Start time - ;; (vector-ref server 7) ;; Priority - ;; (vector-ref server 8) ;; State - (vector-ref server 8) ;; State - (vector-ref server 12) ;; RunId - ))) - (for-each (lambda (val) - (let* ((row-col (conc rownum ":" colnum)) - (curr-val (iup:attribute servers-matrix row-col))) - (if (not (equal? (conc val) curr-val)) - (begin - (iup:attribute-set! servers-matrix row-col val) - (iup:attribute-set! servers-matrix "FITTOTEXT" (conc "C" colnum)))) - (set! colnum (+ 1 colnum)))) - vals) - (set! rownum (+ rownum 1))) - (iup:attribute-set! servers-matrix "REDRAW" "ALL")) - servers)))))) + (match-let (((mod-time host port start-time pid) + server)) + (let* ((uptime (- (current-seconds) mod-time)) + (runtime (if start-time + (- mod-time start-time) + 0)) + (vals (list "-" ;; (vector-ref server 0) ;; Id + "-" ;; (vector-ref server 9) ;; MT-Ver + pid ;; (vector-ref server 1) ;; Pid + host ;; (vector-ref server 2) ;; Hostname + (conc host ":" port) ;; (conc (vector-ref server 3) ":" (vector-ref server 4)) ;; IP:Port + (seconds->hr-min-sec runtime) ;; (- (current-seconds) start-time)) ;; (vector-ref server 6))) + (cond + ((< uptime 5) "alive") + ((< uptime 16) "probably alive");; less than 15 seconds since mod, call it alive (vector-ref server 8) ;; State + (else "dead")) + "-" ;; (vector-ref server 12) ;; RunId + ))) + (for-each (lambda (val) + (let* ((row-col (conc rownum ":" colnum)) + (curr-val (iup:attribute servers-matrix row-col))) + (if (not (equal? (conc val) curr-val)) + (begin + (iup:attribute-set! servers-matrix row-col val) + (iup:attribute-set! servers-matrix "FITTOTEXT" (conc "C" colnum)))) + (set! colnum (+ 1 colnum)))) + vals) + (set! rownum (+ rownum 1))) + (iup:attribute-set! servers-matrix "REDRAW" "ALL"))) + (sort servers (lambda (a b)(> (car a)(car b)))))))))) (set! colnum 0) (for-each (lambda (colname) (iup:attribute-set! servers-matrix (conc "0:" colnum) colname) (iup:attribute-set! servers-matrix "FITTOTEXT" (conc "C" colnum)) (set! colnum (+ colnum 1))) Index: docs/inprogress/megatest-architecture-proposed-2.fig ================================================================== --- docs/inprogress/megatest-architecture-proposed-2.fig +++ docs/inprogress/megatest-architecture-proposed-2.fig @@ -13,35 +13,140 @@ 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 675 1575 675 2175 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 1575 1500 1575 2175 -6 -6 1875 825 2850 1875 -1 1 0 1 0 7 50 -1 -1 0.000 1 0.0000 2400 975 450 150 2400 975 2850 1125 -1 1 0 1 0 7 50 -1 -1 0.000 1 0.0000 2399 1652 450 150 2399 1652 2849 1802 -2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 - 1950 1050 1950 1650 -2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 - 2850 975 2850 1650 --6 -6 3225 450 4200 1500 -1 1 0 1 0 7 50 -1 -1 0.000 1 0.0000 3750 600 450 150 3750 600 4200 750 -1 1 0 1 0 7 50 -1 -1 0.000 1 0.0000 3749 1277 450 150 3749 1277 4199 1427 -2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 - 3300 675 3300 1275 -2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 - 4200 600 4200 1275 --6 -6 3075 2925 4050 3975 -1 1 0 1 0 7 50 -1 -1 0.000 1 0.0000 3600 3075 450 150 3600 3075 4050 3225 -1 1 0 1 0 7 50 -1 -1 0.000 1 0.0000 3599 3752 450 150 3599 3752 4049 3902 -2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 - 3150 3150 3150 3750 -2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 - 4050 3075 4050 3750 --6 -6 7275 4050 12825 9675 +6 14100 150 19950 6075 +6 14850 1350 15825 2400 +1 1 0 1 0 7 50 -1 -1 0.000 1 0.0000 15375 1500 450 150 15375 1500 15825 1650 +1 1 0 1 0 7 50 -1 -1 0.000 1 0.0000 15374 2177 450 150 15374 2177 15824 2327 +2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 + 14925 1575 14925 2175 +2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 + 15825 1500 15825 2175 +-6 +2 4 0 1 0 7 50 -1 -1 0.000 0 0 7 0 0 5 + 17025 4050 17025 3450 15750 3450 15750 4050 17025 4050 +2 1 0 1 0 7 50 -1 -1 3.000 0 0 -1 1 1 2 + 0 0 1.00 60.00 120.00 + 0 0 1.00 60.00 120.00 + 16050 3375 15525 2400 +2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5 + 15825 5325 17175 5325 17175 5850 15825 5850 15825 5325 +2 1 0 1 0 7 50 -1 -1 3.000 0 0 -1 1 1 2 + 0 0 1.00 60.00 120.00 + 0 0 1.00 60.00 120.00 + 16350 4050 16350 5325 +2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5 + 17550 4800 18900 4800 18900 5325 17550 5325 17550 4800 +2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5 + 18375 3900 19725 3900 19725 4425 18375 4425 18375 3900 +2 1 0 1 0 7 50 -1 -1 3.000 0 0 -1 1 1 2 + 0 0 1.00 60.00 120.00 + 0 0 1.00 60.00 120.00 + 16725 4050 17850 4800 +2 1 0 1 0 7 50 -1 -1 3.000 0 0 -1 1 1 2 + 0 0 1.00 60.00 120.00 + 0 0 1.00 60.00 120.00 + 17025 3750 18375 4125 +2 1 0 1 14 7 50 -1 -1 3.000 0 0 -1 1 1 3 + 0 0 1.00 60.00 120.00 + 0 0 1.00 60.00 120.00 + 18975 3900 18075 2625 15900 1875 +2 2 0 1 0 7 50 -1 -1 3.000 0 0 -1 0 0 5 + 14100 150 19950 150 19950 6075 14100 6075 14100 150 +4 0 0 50 -1 0 12 0.0000 4 150 900 15825 3675 rpc-server\001 +4 0 0 50 -1 0 12 0.0000 4 165 270 17475 3825 tcp\001 +4 0 0 50 -1 0 12 0.0000 4 120 315 18525 4125 test\001 +4 0 0 50 -1 0 12 0.0000 4 195 1065 14850 1200 megatest.db\001 +4 0 0 50 -1 0 12 0.0000 4 150 1020 14325 525 basic model\001 +-6 +6 14850 7425 15825 8475 +1 1 0 1 0 7 50 -1 -1 0.000 1 0.0000 15375 7575 450 150 15375 7575 15825 7725 +1 1 0 1 0 7 50 -1 -1 0.000 1 0.0000 15374 8252 450 150 15374 8252 15824 8402 +2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 + 14925 7650 14925 8250 +2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 + 15825 7575 15825 8250 +-6 +6 17775 6675 18750 7725 +1 1 0 1 0 7 50 -1 -1 0.000 1 0.0000 18300 6825 450 150 18300 6825 18750 6975 +1 1 0 1 0 7 50 -1 -1 0.000 1 0.0000 18299 7502 450 150 18299 7502 18749 7652 +2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 + 17850 6900 17850 7500 +2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 + 18750 6825 18750 7500 +-6 +6 6150 2700 7500 3225 +2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5 + 6150 2700 7500 2700 7500 3225 6150 3225 6150 2700 +4 0 0 50 -1 0 12 0.0000 4 180 870 6225 2925 run2/test1\001 +-6 +6 2025 675 3000 1725 +1 1 0 1 0 7 50 -1 -1 0.000 1 0.0000 2550 825 450 150 2550 825 3000 975 +1 1 0 1 0 7 50 -1 -1 0.000 1 0.0000 2549 1502 450 150 2549 1502 2999 1652 +2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 + 2100 900 2100 1500 +2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 + 3000 825 3000 1500 +-6 +6 675 7275 1650 8325 +1 1 0 1 0 7 50 -1 -1 0.000 1 0.0000 1200 7425 450 150 1200 7425 1650 7575 +1 1 0 1 0 7 50 -1 -1 0.000 1 0.0000 1199 8102 450 150 1199 8102 1649 8252 +2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 + 750 7500 750 8100 +2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 + 1650 7425 1650 8100 +-6 +6 3675 6675 4650 7725 +1 1 0 1 0 7 50 -1 -1 0.000 1 0.0000 4200 6825 450 150 4200 6825 4650 6975 +1 1 0 1 0 7 50 -1 -1 0.000 1 0.0000 4199 7502 450 150 4199 7502 4649 7652 +2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 + 3750 6900 3750 7500 +2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 + 4650 6825 4650 7500 +-6 +6 900 3825 2175 4425 +2 4 0 1 0 7 50 -1 -1 0.000 0 0 7 0 0 5 + 2175 4425 2175 3825 900 3825 900 4425 2175 4425 +4 0 0 50 -1 0 12 0.0000 4 150 720 1050 4125 server-1\001 +-6 +6 150 5475 1500 6000 +2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5 + 150 5475 1500 5475 1500 6000 150 6000 150 5475 +4 0 0 50 -1 0 12 0.0000 4 180 870 300 5700 run1/test1\001 +-6 +6 1725 5400 3075 5925 +2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5 + 1725 5400 3075 5400 3075 5925 1725 5925 1725 5400 +4 0 0 50 -1 0 12 0.0000 4 180 870 1800 5625 run1/test2\001 +-6 +6 5400 5100 6375 6975 +6 5400 5100 6375 6150 +1 1 0 1 0 7 50 -1 -1 0.000 1 0.0000 5925 5250 450 150 5925 5250 6375 5400 +1 1 0 1 0 7 50 -1 -1 0.000 1 0.0000 5924 5927 450 150 5924 5927 6374 6077 +2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 + 5475 5325 5475 5925 +2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 + 6375 5250 6375 5925 +-6 +4 0 0 50 -1 0 12 0.0000 4 195 885 5475 6375 postgresql\001 +4 0 0 50 -1 0 12 0.0000 4 195 555 5475 6630 sqlite3\001 +4 0 0 50 -1 0 12 0.0000 4 195 510 5475 6885 mysql\001 +-6 +6 4050 675 6000 2175 +6 4125 900 5100 1950 +1 1 0 1 0 7 50 -1 -1 0.000 1 0.0000 4650 1050 450 150 4650 1050 5100 1200 +1 1 0 1 0 7 50 -1 -1 0.000 1 0.0000 4649 1727 450 150 4649 1727 5099 1877 +2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 + 4200 1125 4200 1725 +2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 + 5100 1050 5100 1725 +-6 +4 0 0 50 -1 0 12 0.0000 4 195 1905 4050 2100 pointers to the servers\001 +4 0 0 50 -1 0 12 0.0000 4 150 930 4200 825 monitor.db\001 +-6 6 8175 4125 8400 8625 2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5 8175 4125 8400 4125 8400 4350 8175 4350 8175 4125 2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5 8175 4350 8400 4350 8400 4575 8175 4575 8175 4350 @@ -248,165 +353,14 @@ 2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5 9375 8175 9600 8175 9600 8400 9375 8400 9375 8175 2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5 9375 8400 9600 8400 9600 8625 9375 8625 9375 8400 -6 -# Dimension line: 1-1/16 in -6 7875 9375 9150 9675 -# main dimension line -2 1 0 1 0 7 50 -1 -1 4.000 0 0 -1 1 1 2 - 1 1 1.00 60.00 120.00 - 1 1 1.00 60.00 120.00 - 7875 9525 9150 9525 -# text box -2 3 0 1 4 7 49 -1 20 0.000 0 0 -1 0 0 5 - 8085 9375 8085 9675 8939 9675 8939 9375 8085 9375 -# tick -2 1 0 1 0 7 50 -1 -1 4.000 0 0 -1 0 0 2 - 7875 9375 7875 9675 -# tick -2 1 0 1 0 7 50 -1 -1 4.000 0 0 -1 0 0 2 - 9150 9375 9150 9675 -4 1 0 48 -1 0 12 -0.0000 4 180 735 8512 9585 1-1/16 in\001 --6 -# Dimension line: 1-11/16 in -6 7425 4125 7725 6150 -# main dimension line -2 1 0 1 0 7 50 -1 -1 4.000 0 0 -1 1 1 2 - 1 1 1.00 60.00 120.00 - 1 1 1.00 60.00 120.00 - 7575 4125 7575 6150 -# text box -2 3 0 1 4 7 49 -1 20 0.000 0 0 -1 0 0 5 - 7425 5617 7725 5617 7725 4657 7425 4657 7425 5617 -# tick -2 1 0 1 0 7 50 -1 -1 4.000 0 0 -1 0 0 2 - 7425 6150 7725 6150 -# tick -2 1 0 1 0 7 50 -1 -1 4.000 0 0 -1 0 0 2 - 7425 4125 7725 4125 -4 1 0 48 -1 0 12 1.5708 4 180 840 7635 5137 1-11/16 in\001 --6 -2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5 - 7800 4050 12825 4050 12825 8925 7800 8925 7800 4050 -2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5 - 9225 6225 12450 6225 12450 8325 9225 8325 9225 6225 -2 2 0 1 4 7 50 -1 -1 0.000 0 0 -1 0 0 5 - 9225 6150 9675 6150 9675 8400 9225 8400 9225 6150 -4 0 0 50 -1 0 12 0.0000 4 150 150 8475 9300 X\001 -4 0 0 50 -1 0 12 0.0000 4 150 135 7275 6975 Y\001 --6 -6 14100 150 19950 6075 -6 14850 1350 15825 2400 -1 1 0 1 0 7 50 -1 -1 0.000 1 0.0000 15375 1500 450 150 15375 1500 15825 1650 -1 1 0 1 0 7 50 -1 -1 0.000 1 0.0000 15374 2177 450 150 15374 2177 15824 2327 -2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 - 14925 1575 14925 2175 -2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 - 15825 1500 15825 2175 --6 -2 4 0 1 0 7 50 -1 -1 0.000 0 0 7 0 0 5 - 17025 4050 17025 3450 15750 3450 15750 4050 17025 4050 -2 1 0 1 0 7 50 -1 -1 3.000 0 0 -1 1 1 2 - 0 0 1.00 60.00 120.00 - 0 0 1.00 60.00 120.00 - 16050 3375 15525 2400 -2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5 - 15825 5325 17175 5325 17175 5850 15825 5850 15825 5325 -2 1 0 1 0 7 50 -1 -1 3.000 0 0 -1 1 1 2 - 0 0 1.00 60.00 120.00 - 0 0 1.00 60.00 120.00 - 16350 4050 16350 5325 -2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5 - 17550 4800 18900 4800 18900 5325 17550 5325 17550 4800 -2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5 - 18375 3900 19725 3900 19725 4425 18375 4425 18375 3900 -2 1 0 1 0 7 50 -1 -1 3.000 0 0 -1 1 1 2 - 0 0 1.00 60.00 120.00 - 0 0 1.00 60.00 120.00 - 16725 4050 17850 4800 -2 1 0 1 0 7 50 -1 -1 3.000 0 0 -1 1 1 2 - 0 0 1.00 60.00 120.00 - 0 0 1.00 60.00 120.00 - 17025 3750 18375 4125 -2 1 0 1 14 7 50 -1 -1 3.000 0 0 -1 1 1 3 - 0 0 1.00 60.00 120.00 - 0 0 1.00 60.00 120.00 - 18975 3900 18075 2625 15900 1875 -2 2 0 1 0 7 50 -1 -1 3.000 0 0 -1 0 0 5 - 14100 150 19950 150 19950 6075 14100 6075 14100 150 -4 0 0 50 -1 0 12 0.0000 4 150 900 15825 3675 rpc-server\001 -4 0 0 50 -1 0 12 0.0000 4 165 270 17475 3825 tcp\001 -4 0 0 50 -1 0 12 0.0000 4 120 315 18525 4125 test\001 -4 0 0 50 -1 0 12 0.0000 4 195 1065 14850 1200 megatest.db\001 -4 0 0 50 -1 0 12 0.0000 4 150 1020 14325 525 basic model\001 --6 -6 14850 7425 15825 8475 -1 1 0 1 0 7 50 -1 -1 0.000 1 0.0000 15375 7575 450 150 15375 7575 15825 7725 -1 1 0 1 0 7 50 -1 -1 0.000 1 0.0000 15374 8252 450 150 15374 8252 15824 8402 -2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 - 14925 7650 14925 8250 -2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 - 15825 7575 15825 8250 --6 -6 17775 6675 18750 7725 -1 1 0 1 0 7 50 -1 -1 0.000 1 0.0000 18300 6825 450 150 18300 6825 18750 6975 -1 1 0 1 0 7 50 -1 -1 0.000 1 0.0000 18299 7502 450 150 18299 7502 18749 7652 -2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 - 17850 6900 17850 7500 -2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 - 18750 6825 18750 7500 --6 -6 4875 6075 5850 7125 -1 1 0 1 0 7 50 -1 -1 0.000 1 0.0000 5400 6225 450 150 5400 6225 5850 6375 -1 1 0 1 0 7 50 -1 -1 0.000 1 0.0000 5399 6902 450 150 5399 6902 5849 7052 -2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 - 4950 6300 4950 6900 -2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 - 5850 6225 5850 6900 --6 -6 5400 7425 7350 8925 -6 5475 7650 6450 8700 -1 1 0 1 0 7 50 -1 -1 0.000 1 0.0000 6000 7800 450 150 6000 7800 6450 7950 -1 1 0 1 0 7 50 -1 -1 0.000 1 0.0000 5999 8477 450 150 5999 8477 6449 8627 -2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 - 5550 7875 5550 8475 -2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 - 6450 7800 6450 8475 --6 -4 0 0 50 -1 0 12 0.0000 4 195 1905 5400 8850 pointers to the servers\001 -4 0 0 50 -1 0 12 0.0000 4 150 930 5550 7575 monitor.db\001 --6 -6 6150 2700 7500 3225 -2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5 - 6150 2700 7500 2700 7500 3225 6150 3225 6150 2700 -4 0 0 50 -1 0 12 0.0000 4 180 870 6225 2925 run2/test1\001 --6 -2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 1 2 - 0 0 1.00 60.00 120.00 - 0 0 1.00 60.00 120.00 - 1725 5025 1275 2475 -2 4 0 1 0 7 50 -1 -1 0.000 0 0 7 0 0 5 - 5550 4500 5550 225 225 225 225 4500 5550 4500 -2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 1 2 - 0 0 1.00 60.00 120.00 - 0 0 1.00 60.00 120.00 - 1875 7725 1875 5775 -2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5 - 1425 7725 2775 7725 2775 8250 1425 8250 1425 7725 -2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5 - 3375 7725 4725 7725 4725 8250 3375 8250 3375 7725 -2 1 0 1 0 7 50 -1 -1 3.000 0 0 -1 1 1 2 - 0 0 1.00 60.00 120.00 - 0 0 1.00 60.00 120.00 - 3675 7725 2175 5775 -2 4 0 1 0 7 50 -1 -1 0.000 0 0 7 0 0 5 - 2775 5700 2775 5100 1500 5100 1500 5700 2775 5700 -2 1 0 1 0 7 50 -1 -1 3.000 0 0 -1 1 1 2 - 0 0 1.00 60.00 120.00 - 0 0 1.00 60.00 120.00 - 6600 3300 2925 5025 +2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 1 2 + 0 0 1.00 60.00 120.00 + 0 0 1.00 60.00 120.00 + 1500 3825 1200 2550 2 4 0 1 0 7 50 -1 -1 0.000 0 0 7 0 0 5 17025 10125 17025 9525 15750 9525 15750 10125 17025 10125 2 1 0 1 0 7 50 -1 -1 3.000 0 0 -1 1 1 2 0 0 1.00 60.00 120.00 0 0 1.00 60.00 120.00 @@ -448,32 +402,55 @@ 2 4 0 1 0 7 50 -1 -1 0.000 0 0 7 0 0 5 5325 12675 5325 12075 4050 12075 4050 12675 5325 12675 2 1 0 1 0 7 50 -1 -1 3.000 0 0 -1 1 0 2 0 0 1.00 60.00 120.00 3975 11250 4575 12075 +2 4 0 1 0 7 50 -1 -1 0.000 0 0 7 0 0 5 + 3300 3000 3300 225 225 225 225 3000 3300 3000 +2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2 + 0 0 1.00 60.00 120.00 + 3675 7275 1800 7875 +2 4 0 1 0 7 50 -1 -1 0.000 0 0 7 0 0 5 + 2475 8775 2475 6675 225 6675 225 8775 2475 8775 +2 4 0 1 5 7 50 -1 -1 0.000 0 0 7 0 0 5 + 75 6525 75 9000 4950 9000 4950 6525 75 6525 +2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2 + 0 0 1.00 60.00 120.00 + 2400 4200 5400 5400 +2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 1 2 + 0 0 1.00 60.00 120.00 + 0 0 1.00 60.00 120.00 + 1135 5476 1285 4426 2 1 0 1 0 7 50 -1 -1 3.000 0 0 -1 1 1 2 0 0 1.00 60.00 120.00 0 0 1.00 60.00 120.00 - 2175 5025 3075 3750 + 2321 5402 1796 4427 2 1 0 1 0 7 50 -1 -1 3.000 0 0 -1 1 1 2 0 0 1.00 60.00 120.00 0 0 1.00 60.00 120.00 - 4800 6375 2850 5550 + 6000 3075 1725 2100 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 1 2 0 0 1.00 60.00 120.00 0 0 1.00 60.00 120.00 - 3600 2475 7425 6525 -4 0 0 50 -1 0 12 0.0000 4 195 990 1800 2625 last_update\001 -4 0 0 50 -1 0 12 0.0000 4 150 690 3150 4125 main.db\001 -4 0 0 50 -1 0 12 0.0000 4 195 990 4200 3600 last_update\001 -4 0 0 50 -1 0 12 0.0000 4 195 330 1950 6825 http\001 -4 0 0 50 -1 0 12 0.0000 4 180 870 1575 7950 run1/test1\001 -4 0 0 50 -1 0 12 0.0000 4 150 720 1650 5400 server-1\001 -4 0 0 50 -1 0 12 0.0000 4 150 375 2175 2025 2.db\001 -4 0 0 50 -1 0 12 0.0000 4 150 375 750 2550 1.db\001 -4 0 0 50 -1 0 12 0.0000 4 180 870 3450 7950 run1/test2\001 -4 0 0 50 -1 0 12 0.0000 4 150 1110 9675 3750 Dashboardm\001 + 1725 2250 7275 4425 +2 4 0 1 5 7 50 -1 -1 0.000 0 0 7 0 0 5 + 6300 525 6300 2175 3825 2175 3825 525 6300 525 +2 1 0 1 5 7 50 -1 -1 0.000 0 0 -1 0 0 2 + 3675 225 6000 2400 +2 1 0 1 5 7 50 -1 -1 0.000 0 0 -1 0 0 2 + 3825 2475 5775 300 +2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5 + 7800 4050 12825 4050 12825 8925 7800 8925 7800 4050 +2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5 + 9225 6225 12450 6225 12450 8325 9225 8325 9225 6225 +2 2 0 1 4 7 50 -1 -1 0.000 0 0 -1 0 0 5 + 9225 6150 9675 6150 9675 8400 9225 8400 9225 6150 +3 2 0 1 0 7 50 -1 -1 3.000 0 1 1 3 + 0 0 1.00 60.00 120.00 + 0 0 1.00 60.00 120.00 + 4125 6675 3675 5250 2325 4425 + 0.000 -1.000 0.000 4 0 0 50 -1 0 12 1.5708 4 150 390 8325 3975 run1\001 4 0 0 50 -1 0 12 0.0000 4 150 900 15825 9750 rpc-server\001 4 0 0 50 -1 0 12 0.0000 4 165 270 17475 9900 tcp\001 4 0 0 50 -1 0 12 0.0000 4 120 315 18525 10200 test\001 4 0 0 50 -1 0 12 0.0000 4 195 1065 14850 7275 megatest.db\001 @@ -481,10 +458,17 @@ 4 0 0 50 -1 0 12 0.0000 4 195 1875 14325 6600 mysql/postgres model\001 4 0 0 50 -1 0 12 0.0000 4 195 4065 600 9300 Current state, no bypass - (if we switch to rpc?)\001 4 0 0 50 -1 0 12 0.0000 4 195 1125 2175 10650 prev try RPC\001 4 0 0 50 -1 0 12 0.0000 4 165 1095 2475 11100 rmt:get-tests\001 4 0 0 50 -1 0 12 0.0000 4 195 2205 450 525 /tmp//??? /.db/*.db\001 -4 0 0 50 -1 0 12 0.0000 4 195 1065 4800 7350 megatest.db\001 -4 0 0 50 -1 0 12 0.0000 4 150 1785 600 8775 Possible Future state\001 4 0 0 50 -1 0 12 0.0000 4 150 1110 8025 450 CHANGES:\001 4 0 0 50 -1 0 12 0.0000 4 195 2145 8025 705 1. http -> rcp or nanomsg\001 4 0 0 50 -1 0 12 0.0000 4 195 3330 8025 960 2. cache db moves from inmem to /tmp\001 +4 0 0 50 -1 0 12 0.0000 4 195 1410 2025 1875 megatest_ref.db\001 +4 0 0 50 -1 0 12 0.0000 4 150 1785 3675 375 Possible Future state\001 +4 0 0 50 -1 0 12 0.0000 4 195 1290 450 6900 Read-only user\001 +4 0 0 50 -1 0 12 0.0000 4 195 1755 675 8475 /tmp/.../megatest.db\001 +4 0 0 50 -1 0 12 0.0000 4 195 1065 3750 8025 megatest.db\001 +4 0 0 50 -1 0 12 0.0000 4 195 990 1650 2925 last_update\001 +4 0 0 50 -1 0 12 0.0000 4 195 330 1350 5100 http\001 +4 0 0 50 -1 0 12 0.0000 4 195 1065 750 2475 megatest.db\001 +4 0 0 50 -1 0 12 0.0000 4 150 945 9675 3750 Dashboard\001 Index: docs/manual/megatest_manual.html ================================================================== --- docs/manual/megatest_manual.html +++ docs/manual/megatest_manual.html @@ -1325,11 +1325,105 @@

Reference

-

Megatest Config File Settings

+

Config File Helpers

+

Various helpers for more advanced config files.

+ + +++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Table 2. Helpers
Helper Purpose Valid values Comments

#{scheme (scheme code…)}

Execute arbitrary scheme code

Any valid scheme

Value returned from the call is converted to a string and processed as part of the config file

#{system command}

Execute program, inserts exit code

Any valid Unix command

Discards the output from the program

#{shell command} or #{sh …}

Execute program, inserts result from stdout

Any valid Unix command

Value returned from the call is converted to a string and processed as part of the config file

#{realpath path} or #{rp …}

Replace with normalized path

Must be a valid path

#{getenv VAR} or #{gv VAR}

Replace with content of env variable

Must be a valid var

#{get s v} or #{g s v}

Replace with variable v from section s

Variable must be defined before use

#{rget v}

Replace with variable v from target or default of runconfigs file

Replace with the path to the megatest testsuite area

+
+
+

Config File Settings

+

Settings in megatest.config

+
+
+

Config File Additional Features

+

Including output from a script as if it was inline to the config file:

+
+
+
[scriptinc myscript.sh]
+
+

If the script outputs:

+
+
+
[items]
+A a b c
+B d e f
+
+

Then the config file would effectively appear to contain an items section +exactly like the output from the script. This is extremely useful when +dynamically creating items, itemstables and other config structures. You can +see the expansion of the call by looking in the cached files (look in your +linktree for megatest.config and runconfigs.config cache files and in your +test run areas for the expanded and cached testconfig).

Disk Space Checks

Some parameters you can put in the [setup] section of megatest.config:

@@ -1448,11 +1542,11 @@

Database settings

- + @@ -1917,11 +2011,11 @@

These routines can be called from the megatest repl.

Table 2. Database config settings in [setup] section of megatest.configTable 3. Database config settings in [setup] section of megatest.config
- + @@ -1969,10 +2063,10 @@

Index: docs/manual/reference.txt ================================================================== --- docs/manual/reference.txt +++ docs/manual/reference.txt @@ -1,11 +1,56 @@ Reference --------- -Megatest Config File Settings -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Config File Helpers +~~~~~~~~~~~~~~~~~~~ + +Various helpers for more advanced config files. + +.Helpers +[width="80%",cols="^,2m,2m,2m",frame="topbot",options="header"] +|====================== +|Helper | Purpose | Valid values | Comments +| #{scheme (scheme code...)} | Execute arbitrary scheme code | Any valid scheme | Value returned from the call is converted to a string and processed as part of the config file +| #{system command} | Execute program, inserts exit code | Any valid Unix command | Discards the output from the program +| #{shell command} or #{sh ...} | Execute program, inserts result from stdout | Any valid Unix command | Value returned from the call is converted to a string and processed as part of the config file +| #{realpath path} or #{rp ...} | Replace with normalized path | Must be a valid path | +| #{getenv VAR} or #{gv VAR} | Replace with content of env variable | Must be a valid var | +| #{get s v} or #{g s v} | Replace with variable v from section s | Variable must be defined before use | +| #{rget v} | Replace with variable v from target or default of runconfigs file | | +| #{mtrah} | Replace with the path to the megatest testsuite area | | +|====================== + +Config File Settings +~~~~~~~~~~~~~~~~~~~~ + +Settings in megatest.config + +Config File Additional Features +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Including output from a script as if it was inline to the config file: + +------------------------- +[scriptinc myscript.sh] +------------------------- + +If the script outputs: + +------------------------- +[items] +A a b c +B d e f +------------------------- + +Then the config file would effectively appear to contain an items section +exactly like the output from the script. This is extremely useful when +dynamically creating items, itemstables and other config structures. You can +see the expansion of the call by looking in the cached files (look in your +linktree for megatest.config and runconfigs.config cache files and in your +test run areas for the expanded and cached testconfig). Disk Space Checks ^^^^^^^^^^^^^^^^^ Some parameters you can put in the [setup] section of megatest.config: Index: ezsteps.scm ================================================================== --- ezsteps.scm +++ ezsteps.scm @@ -164,11 +164,11 @@ new-state new-status (args:get-arg "-m") #f) ;; need to update the top test record if PASS or FAIL and this is a subtest (if (not (equal? item-path "")) - (cdb:roll-up-pass-fail-counts *runremote* run-id test-name item-path new-status)))) + (cdb:set-state-status-and-roll-up-items *runremote* run-id test-name item-path new-status)))) ;; for automated creation of the rollup html file this is a good place... (if (not (equal? item-path "")) (tests:summarize-items #f run-id test-id test-name #f)) ;; don't force - just update if no ))) (pop-directory) Index: http-transport.scm ================================================================== --- http-transport.scm +++ http-transport.scm @@ -8,11 +8,11 @@ ;; implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR ;; PURPOSE. (require-extension (srfi 18) extras tcp s11n) -(use srfi-1 posix regex regex-case srfi-69 hostinfo md5 message-digest) ;; sqlite3 +(use srfi-1 posix regex regex-case srfi-69 hostinfo md5 message-digest posix-extras) ;; sqlite3 ;; (import (prefix sqlite3 sqlite3:)) (use spiffy uri-common intarweb http-client spiffy-request-vars intarweb spiffy-directory-listing) ;; Configurations for server @@ -47,11 +47,11 @@ ;; Call this to start the actual server ;; (define *db:process-queue-mutex* (make-mutex)) -(define (http-transport:run hostn run-id server-id) +(define (http-transport:run hostn) (debug:print 2 *default-log-port* "Attempting to start the server ...") (let* ((db #f) ;; (open-db)) ;; we don't want the server to be opening and closing the db unnecesarily (hostname (get-host-name)) (ipaddrstr (let ((ipstr (if (string=? "-" hostn) ;; (string-intersperse (map number->string (u8vector->list (hostname->ip hostname))) ".") @@ -104,18 +104,17 @@ ((equal? (uri-path (request-uri (current-request))) '(/ "hey")) (send-response body: "hey there!\n" headers: '((content-type text/plain)))) (else (continue)))))))) - (http-transport:try-start-server run-id ipaddrstr start-port server-id))) + (http-transport:try-start-server ipaddrstr start-port))) ;; This is recursively run by http-transport:run until sucessful ;; -(define (http-transport:try-start-server run-id ipaddrstr portnum server-id) - (let ((config-hostname (configf:lookup *configdat* "server" "hostname")) - (tdbdat (tasks:open-db))) - (debug:print-info 0 *default-log-port* "http-transport:try-start-server time=" (seconds->time-string (current-seconds)) " run-id=" run-id " ipaddrsstr=" ipaddrstr " portnum=" portnum " server-id=" server-id " config-hostname=" config-hostname) +(define (http-transport:try-start-server ipaddrstr portnum) + (let ((config-hostname (configf:lookup *configdat* "server" "hostname"))) + (debug:print-info 0 *default-log-port* "http-transport:try-start-server time=" (seconds->time-string (current-seconds)) " ipaddrsstr=" ipaddrstr " portnum=" portnum " config-hostname=" config-hostname) (handle-exceptions exn (begin (print-error-message exn) (if (< portnum 64000) @@ -126,34 +125,26 @@ (portlogger:open-run-close portlogger:set-failed portnum) (debug:print 0 *default-log-port* "WARNING: failed to start on portnum: " portnum ", trying next port") (thread-sleep! 0.1) ;; get_next_port goes here - (http-transport:try-start-server run-id - ipaddrstr - (portlogger:open-run-close portlogger:find-port) - server-id)) + (http-transport:try-start-server ipaddrstr + (portlogger:open-run-close portlogger:find-port))) (begin - (tasks:server-force-clean-run-record (db:delay-if-busy tdbdat) run-id ipaddrstr portnum " http-transport:try-start-server") (print "ERROR: Tried and tried but could not start the server")))) ;; any error in following steps will result in a retry (set! *server-info* (list ipaddrstr portnum)) - (tasks:server-set-interface-port - (db:delay-if-busy tdbdat) - server-id - ipaddrstr portnum) (debug:print 0 *default-log-port* "INFO: Trying to start server on " ipaddrstr ":" portnum) ;; This starts the spiffy server ;; NEED WAY TO SET IP TO #f TO BIND ALL ;; (start-server bind-address: ipaddrstr port: portnum) (if config-hostname ;; this is a hint to bind directly (start-server port: portnum bind-address: (if (equal? config-hostname "-") ipaddrstr config-hostname)) (start-server port: portnum)) - ;; (portlogger:open-run-close portlogger:set-port portnum "released") - (tasks:server-force-clean-run-record (db:delay-if-busy tdbdat) run-id ipaddrstr portnum " http-transport:try-start-server") + (portlogger:open-run-close portlogger:set-port portnum "released") (debug:print 1 *default-log-port* "INFO: server has been stopped")))) ;;====================================================================== ;; S E R V E R U T I L I T I E S ;;====================================================================== @@ -217,11 +208,11 @@ (let* ((fullurl (if (vector? serverdat) (http-transport:server-dat-get-api-req serverdat) (begin (debug:print 0 *default-log-port* "FATAL ERROR: http-transport:client-api-send-receive called with no server info") (exit 1)))) - (res #f) + (res (vector #f "uninitialized")) (success #t) (sparams (db:obj->string params transport: 'http))) (debug:print-info 11 *default-log-port* "fullurl=" fullurl ", cmd=" cmd ", params=" params ", run-id=" run-id "\n") ;; set up the http-client here (max-retry-attempts 1) @@ -341,17 +332,16 @@ server-dat)) ;; run http-transport:keep-running in a parallel thread to monitor that the db is being ;; used and to shutdown after sometime if it is not. ;; -(define (http-transport:keep-running server-id run-id) +(define (http-transport:keep-running) ;; if none running or if > 20 seconds since ;; server last used then start shutdown ;; This thread waits for the server to come alive - (debug:print-info 0 *default-log-port* "Starting the sync-back, keep alive thread in server for run-id=" run-id) - (let* ((tdbdat (tasks:open-db)) - (server-start-time (current-seconds)) + (debug:print-info 0 *default-log-port* "Starting the sync-back, keep alive thread in server") + (let* ((server-start-time (current-seconds)) (server-info (let loop ((start-time (current-seconds)) (changed #t) (last-sdat "not this")) (let ((sdat #f)) (thread-sleep! 0.01) @@ -368,45 +358,34 @@ (begin (debug:print-info 0 *default-log-port* "Still waiting, last-sdat=" last-sdat) (sleep 4) (if (> (- (current-seconds) start-time) 120) ;; been waiting for two minutes (begin - (debug:print-error 0 *default-log-port* "transport appears to have died, exiting server " server-id " for run " run-id) - (tasks:server-delete-record (db:delay-if-busy tdbdat) server-id "failed to start, never received server alive signature") + (debug:print-error 0 *default-log-port* "transport appears to have died, exiting server") (exit)) (loop start-time (equal? sdat last-sdat) sdat))))))) (iface (car server-info)) (port (cadr server-info)) (last-access 0) (server-timeout (server:get-timeout)) - (server-going #f)) + (server-going #f) + (server-log-file (args:get-arg "-log"))) ;; always set when we are a server (let loop ((count 0) (server-state 'available) (bad-sync-count 0) (start-time (current-milliseconds))) - ;; Use this opportunity to sync the tmp db to megatest.db (if (not server-going) ;; *dbstruct-db* - ;; Removed code is pasted below (keeping it around until we are clear it is not needed). - ;; no *dbstruct-db* yet, set running after our first pass through and start the db - (if (eq? server-state 'available) - (let ((new-server-id (tasks:server-am-i-the-server? (db:delay-if-busy tdbdat) run-id))) ;; try to ensure no double registering of servers - (if (equal? new-server-id server-id) - (begin - (tasks:server-set-state! (db:delay-if-busy tdbdat) server-id "dbprep") - (thread-sleep! 0.5) ;; give some margin for queries to complete before switching from file based access to server based access - (set! *dbstruct-db* (db:setup)) ;; run-id)) - (set! server-going #t) - (tasks:server-set-state! (db:delay-if-busy tdbdat) server-id "running") - (server:write-dotserver *toppath* (conc iface ":" port)) - (delete-file* (conc *toppath* "/.starting-server"))) - (begin ;; gotta exit nicely - (tasks:server-set-state! (db:delay-if-busy tdbdat) server-id "collision") - (http-transport:server-shutdown server-id port)))))) - + (begin + (debug:print 0 *default-log-port* "SERVER: dbprep") + (set! *dbstruct-db* (db:setup)) ;; run-id)) + (set! server-going #t) + (debug:print 0 *default-log-port* "SERVER: running, megatest version: " (common:get-full-version)) ;; NOTE: the server is NOT yet marked as running in the log. We do that in the keep-running routine. + (thread-start! *watchdog*))) + ;; when things go wrong we don't want to be doing the various queries too often ;; so we strive to run this stuff only every four seconds or so. (let* ((sync-time (- (current-milliseconds) start-time)) (rem-time (quotient (- 4000 sync-time) 1000))) (if (and (<= rem-time 4) @@ -419,175 +398,111 @@ ;; Check that iface and port have not changed (can happen if server port collides) (mutex-lock! *heartbeat-mutex*) (set! sdat *server-info*) (mutex-unlock! *heartbeat-mutex*) - (if (or (not (equal? sdat (list iface port))) - (not server-id)) - (begin - (debug:print-info 0 *default-log-port* "interface changed, refreshing iface and port info") - (set! iface (car sdat)) - (set! port (cadr sdat)))) + (if (not (equal? sdat (list iface port))) + (let ((new-iface (car sdat)) + (new-port (cadr sdat))) + (debug:print-info 0 *default-log-port* "WARNING: interface changed, refreshing iface and port info") + (set! iface new-iface) + (set! port new-port) + (debug:print 0 *default-log-port* "SERVER STARTED: " iface ":" port " AT " (current-seconds)) + (flush-output *default-log-port*))) ;; Transfer *db-last-access* to last-access to use in checking that we are still alive (mutex-lock! *heartbeat-mutex*) (set! last-access *db-last-access*) (mutex-unlock! *heartbeat-mutex*) + + (if (common:low-noise-print 120 (conc "server running on " iface ":" port)) + (begin + (debug:print 0 *default-log-port* "SERVER STARTED: " iface ":" port " AT " (current-seconds)) + (flush-output *default-log-port*))) - ;; (debug:print 11 *default-log-port* "last-access=" last-access ", server-timeout=" server-timeout) - ;; - ;; no_traffic, no running tests, if server 0, no running servers - ;; - ;; (let ((wait-on-running (configf:lookup *configdat* "server" b"wait-on-running"))) ;; wait on running tasks (if not true then exit on time out) - ;; (let* ((hrs-since-start (/ (- (current-seconds) server-start-time) 3600)) (adjusted-timeout (if (> hrs-since-start 1) (- server-timeout (inexact->exact (round (* hrs-since-start 60)))) ;; subtract 60 seconds per hour server-timeout))) (if (common:low-noise-print 120 "server timeout") (debug:print-info 0 *default-log-port* "Adjusted server timeout: " adjusted-timeout)) - (if (and *server-run* - (> (+ last-access server-timeout) - (current-seconds))) - (begin - (if (common:low-noise-print 120 "server continuing") - (debug:print-info 0 *default-log-port* "Server continuing, seconds since last db access: " (- (current-seconds) last-access))) - ;; - ;; Consider implementing some smarts here to re-insert the record or kill self is - ;; the db indicates so - ;; - ;; (if (tasks:server-am-i-the-server? tdb run-id) - ;; (tasks:server-set-state! tdb server-id "running")) - ;; - (loop 0 server-state bad-sync-count (current-milliseconds))) - (http-transport:server-shutdown server-id port)))))) - -;; code cut out from above -;; -;; (condition-case -;; ;; (if (and (member (mutex-state *db-sync-mutex*) '(abandoned not-abandoned)) -;; ;; (> (- (current-seconds) *db-last-sync*) 5)) ;; if not currently being synced nor recently synced -;; (db:sync-touched *dbstruct-db* *run-id* force-sync: #t) ;; usually done in the watchdog, not here. -;; ((sync-failed)(cond -;; ((> bad-sync-count 10) ;; time to give up -;; (http-transport:server-shutdown server-id port)) -;; (else ;; (> bad-sync-count 0) ;; we've had a fail or two, delay and loop -;; (thread-sleep! 5) -;; (loop count server-state (+ bad-sync-count 1))))) -;; ((exn) -;; (debug:print-error 0 *default-log-port* "error from sync code other than 'sync-failed. Attempting to gracefully shutdown the server") -;; (tasks:server-delete-record (db:delay-if-busy tdbdat) server-id " http-transport:keep-running crashed") -;; (exit))) -;; (set! sync-time (- (current-milliseconds) start-time)) -;; (set! rem-time (quotient (- 4000 sync-time) 1000)) -;; (debug:print 4 *default-log-port* "SYNC: time= " sync-time ", rem-time=" rem-time) -;; -;; (if (and (<= rem-time 4) -;; (> rem-time 0)) -;; (thread-sleep! rem-time) -;; (thread-sleep! 4))) ;; fallback for if the math is changed ... - -(define (http-transport:server-shutdown server-id port) + (cond + ((and *server-run* + (> (+ last-access server-timeout) + (current-seconds)) + (< (- (current-seconds) server-start-time) 3600)) ;; do not update log or touch log if we've been running for more than one hour. + (if (common:low-noise-print 120 "server continuing") + (debug:print-info 0 *default-log-port* "Server continuing, seconds since last db access: " (- (current-seconds) last-access)) + (let ((curr-time (current-seconds))) + (change-file-times server-log-file curr-time curr-time))) + (loop 0 server-state bad-sync-count (current-milliseconds))) + (else + (debug:print-info 0 *default-log-port* "Server timed out. seconds since last db access: " (- (current-seconds) last-access)) + (http-transport:server-shutdown port))))))) + +(define (http-transport:server-shutdown port) (let ((tdbdat (tasks:open-db))) - (debug:print-info 0 *default-log-port* "Starting to shutdown the server.") + ;;(BB> "http-transport:server-shutdown called") + (debug:print-info 0 *default-log-port* "Starting to shutdown the server. pid="(current-process-id)) ;; ;; start_shutdown ;; - (tasks:server-set-state! (db:delay-if-busy tdbdat) server-id "shutting-down") + ;; (tasks:server-set-state! (db:delay-if-busy tdbdat) server-id "shutting-down") (set! *time-to-exit* #t) ;; tell on-exit to be fast as we've already cleaned up (portlogger:open-run-close portlogger:set-port port "released") - (thread-sleep! 5) - (debug:print-info 0 *default-log-port* "Max cached queries was " *max-cache-size*) - (debug:print-info 0 *default-log-port* "Number of cached writes " *number-of-writes*) - (debug:print-info 0 *default-log-port* "Average cached write time " - (if (eq? *number-of-writes* 0) - "n/a (no writes)" - (/ *writes-total-delay* - *number-of-writes*)) - " ms") - (debug:print-info 0 *default-log-port* "Number non-cached queries " *number-non-write-queries*) - (debug:print-info 0 *default-log-port* "Average non-cached time " - (if (eq? *number-non-write-queries* 0) - "n/a (no queries)" - (/ *total-non-write-delay* - *number-non-write-queries*)) - " ms") + (thread-sleep! 1) + + ;; (debug:print-info 0 *default-log-port* "Max cached queries was " *max-cache-size*) + ;; (debug:print-info 0 *default-log-port* "Number of cached writes " *number-of-writes*) + ;; (debug:print-info 0 *default-log-port* "Average cached write time " + ;; (if (eq? *number-of-writes* 0) + ;; "n/a (no writes)" + ;; (/ *writes-total-delay* + ;; *number-of-writes*)) + ;; " ms") + ;; (debug:print-info 0 *default-log-port* "Number non-cached queries " *number-non-write-queries*) + ;; (debug:print-info 0 *default-log-port* "Average non-cached time " + ;; (if (eq? *number-non-write-queries* 0) + ;; "n/a (no queries)" + ;; (/ *total-non-write-delay* + ;; *number-non-write-queries*)) + ;; " ms") + + (db:print-current-query-stats) + (debug:print-info 0 *default-log-port* "Server shutdown complete. Exiting") - (tasks:server-delete-record (db:delay-if-busy tdbdat) server-id " http-transport:keep-running complete") - ;; if the .server file contained :myport then we can remove it - (server:remove-dotserver-file *toppath* port) (exit))) ;; all routes though here end in exit ... ;; ;; start_server? ;; -(define (http-transport:launch run-id) - (with-output-to-file - (conc *toppath* "/.starting-server") - (lambda () - (print (current-process-id) " on " (get-host-name)))) - (let* ((tdbdat (tasks:open-db))) - (set! *run-id* run-id) - (if (args:get-arg "-daemonize") - (begin - (daemon:ize) - (if *alt-log-file* ;; we should re-connect to this port, I think daemon:ize disrupts it - (begin - (current-error-port *alt-log-file*) - (current-output-port *alt-log-file*))))) - (if (and (server:read-dotserver *toppath*) - (server:check-if-running run-id)) - (begin - (debug:print 0 *default-log-port* "INFO: Server for run-id " run-id " already running") - (exit 0)) - (begin ;; ok, no server detected, clean out any lingering records - (tasks:server-force-clean-running-records-for-run-id (db:delay-if-busy tdbdat) run-id "notresponding"))) - (let loop ((server-id (tasks:server-lock-slot (db:delay-if-busy tdbdat) run-id)) - (remtries 4)) - (if (not server-id) - (if (> remtries 0) - (begin - (thread-sleep! 2) - (loop (tasks:server-lock-slot (db:delay-if-busy tdbdat) run-id) - (- remtries 1))) - (begin - ;; since we didn't get the server lock we are going to clean up and bail out - (debug:print-info 2 *default-log-port* "INFO: server pid=" (current-process-id) ", hostname=" (get-host-name) " not starting due to other candidates ahead in start queue") - (tasks:server-delete-records-for-this-pid (db:delay-if-busy tdbdat) " http-transport:launch") - (delete-file* (conc *toppath* "/.starting-server")) - )) - (let* ((th2 (make-thread (lambda () - (debug:print-info 0 *default-log-port* "Server run thread started") - (http-transport:run - (if (args:get-arg "-server") - (args:get-arg "-server") - "-") - run-id - server-id)) "Server run")) - (th3 (make-thread (lambda () - (debug:print-info 0 *default-log-port* "Server monitor thread started") - (http-transport:keep-running server-id run-id)) - "Keep running"))) - (thread-start! th2) - (thread-sleep! 0.25) ;; give the server time to settle before starting the keep-running monitor. - (thread-start! th3) - (set! *didsomething* #t) - (thread-join! th2) - (exit)))))) - -;; (define (http:ping run-id host-port) -;; (let* ((server-dat (http-transport:client-connect (car host-port)(cadr host-port))) -;; (login-res (rmt:login-no-auto-client-setup server-dat run-id))) -;; (if (and (list? login-res) -;; (car login-res)) -;; (begin -;; (print "LOGIN_OK") -;; (exit 0)) -;; (begin -;; (print "LOGIN_FAILED") -;; (exit 1))))) +(define (http-transport:launch) + (if (args:get-arg "-daemonize") + (begin + (daemon:ize) + (if *alt-log-file* ;; we should re-connect to this port, I think daemon:ize disrupts it + (begin + (current-error-port *alt-log-file*) + (current-output-port *alt-log-file*))))) + (let* ((th2 (make-thread (lambda () + (debug:print-info 0 *default-log-port* "Server run thread started") + (http-transport:run + (if (args:get-arg "-server") + (args:get-arg "-server") + "-") + )) "Server run")) + (th3 (make-thread (lambda () + (debug:print-info 0 *default-log-port* "Server monitor thread started") + (http-transport:keep-running) + "Keep running")))) + (thread-start! th2) + (thread-sleep! 0.25) ;; give the server time to settle before starting the keep-running monitor. + (thread-start! th3) + (set! *didsomething* #t) + (thread-join! th2) + (exit))) (define (http-transport:server-signal-handler signum) (signal-mask! signum) (handle-exceptions exn DELETED inteldate.scm Index: inteldate.scm ================================================================== --- inteldate.scm +++ /dev/null @@ -1,180 +0,0 @@ -(use srfi-19) -(use test) -(use format) -(use regex) -(declare (unit inteldate)) -;; utility procedures to convert among -;; different ways to express date (inteldate, seconds since epoch, isodate) -;; -;; samples: -;; isodate -> "2016-01-01" -;; inteldate -> "16ww01.5" -;; seconds -> 1451631600 - -;; procedures provided: -;; ==================== -;; seconds->isodate -;; seconds->inteldate -;; -;; isodate->seconds -;; isodate->inteldate -;; -;; inteldate->seconds -;; inteldate->isodate - -;; srfi-19 used extensively; this doc is better tha the eggref: -;; http://srfi.schemers.org/srfi-19/srfi-19.html - -;; Author: brandon.j.barclay@intel.com 16ww18.6 - -(define (date->seconds date) - (inexact->exact - (string->number - (date->string date "~s")))) - -(define (seconds->isodate seconds) - (let* ((date (seconds->date seconds)) - (result (date->string date "~Y-~m-~d"))) - result)) - -(define (isodate->seconds isodate) - "Takes a string input of the form 'YY-MM-DD' or 'YYYY-MM-DD' and returns epoch time; for YY, assume after Y2K" - (let* ((numlist (map string->number (string-split isodate "-"))) - (raw-year (car numlist)) - (year (if (< raw-year 100) (+ raw-year 2000) raw-year)) - (month (list-ref numlist 1)) - (day (list-ref numlist 2)) - (date (make-date 0 0 0 0 day month year)) - (seconds (date->seconds date))) - - seconds)) - -;; adapted from perl Intel::WorkWeek perl module -;; intel year consists of numbered weeks starting from week 1 -;; week 1 is the week containing jan 1 of the year -;; days of week are numbered starting from 0 on sunday -;; intel year does not match calendar year in workweek 1 -;; before jan1. -(define (seconds->inteldate-values seconds) - (define (date-difference->seconds d1 d2) - (- (date->seconds d1) (date->seconds d2))) - - (let* ((thisdate (seconds->date seconds)) - (thisdow (string->number (date->string thisdate "~w"))) - - (year (date-year thisdate)) - ;; intel workweek 1 begins on sunday of week containing jan1 - (jan1 (make-date 0 0 0 0 1 1 year)) - (jan1dow (date-week-day jan1)) - (ww01 (date-subtract-duration jan1 (seconds->time (* 60 60 24 jan1dow)))) - - (ww01_delta_seconds (date-difference->seconds thisdate ww01)) - (wwnum_initial (inexact->exact (add1 (floor (/ ww01_delta_seconds 24 3600 7) )))) - - ;; we could be in ww1 of next year - (this-saturday (seconds->date - (+ seconds - (* 60 60 24 (- 6 thisdow))))) - (this-week-ends-next-year? - (> (date-year this-saturday) year)) - (intelyear - (if this-week-ends-next-year? - (add1 year) - year)) - (intelweek - (if this-week-ends-next-year? - 1 - wwnum_initial))) - (values intelyear intelweek thisdow))) - -(define (seconds->inteldate seconds) - (define (string-leftpad in width pad-char) - (let* ((unpadded-str (->string in)) - (padlen_temp (- width (string-length unpadded-str))) - (padlen (if (< padlen_temp 0) 0 padlen_temp)) - (padding - (fold conc "" - (map (lambda (x) (->string pad-char)) (iota padlen))))) - (conc padding unpadded-str))) - (define (zeropad num width) - (string-leftpad num width #:0)) - - (let-values (((intelyear intelweek day-of-week-num) - (seconds->inteldate-values seconds))) - (let ((intelyear-str - (zeropad - (->string - (if (> intelyear 1999) - (- intelyear 2000) intelyear)) - 2)) - (intelweek-str - (zeropad (->string intelweek) 2)) - (dow-str (->string day-of-week-num))) - (conc intelyear-str "ww" intelweek-str "." dow-str)))) - -(define (isodate->inteldate isodate) - (seconds->inteldate - (isodate->seconds isodate))) - -(define (inteldate->seconds inteldate) - (let ((match (string-match "^(\\d+)ww(\\d+).(\\d)$" inteldate))) - (if - (not match) - #f - (let* ( - (intelyear-raw (string->number (list-ref match 1))) - (intelyear (if (< intelyear-raw 100) - (+ intelyear-raw 2000) - intelyear-raw)) - (intelww (string->number (list-ref match 2))) - (dayofweek (string->number (list-ref match 3))) - - (day-of-seconds (* 60 60 24 )) - (week-of-seconds (* day-of-seconds 7)) - - - ;; get seconds at ww1.0 - (new-years-date (make-date 0 0 0 0 1 1 intelyear)) - (new-years-seconds - (date->seconds new-years-date)) - (new-years-dayofweek (date-week-day new-years-date)) - (ww1.0_seconds (- new-years-seconds - (* day-of-seconds - new-years-dayofweek))) - (workweek-adjustment (* week-of-seconds (sub1 intelww))) - (weekday-adjustment (* dayofweek day-of-seconds)) - - (result (+ ww1.0_seconds workweek-adjustment weekday-adjustment))) - result)))) - -(define (inteldate->isodate inteldate) - (seconds->isodate (inteldate->seconds inteldate))) - -(define (inteldate-tests) - (test-group - "date conversion tests" - (let ((test-table - '(("16ww01.5" . "2016-01-01") - ("16ww18.5" . "2016-04-29") - ("1999ww33.5" . "1999-08-13") - ("16ww18.4" . "2016-04-28") - ("16ww18.3" . "2016-04-27") - ("13ww01.0" . "2012-12-30") - ("13ww52.6" . "2013-12-28") - ("16ww53.3" . "2016-12-28")))) - (for-each - (lambda (test-pair) - (let ((inteldate (car test-pair)) - (isodate (cdr test-pair))) - (test - (conc "(isodate->inteldate "isodate ") => "inteldate) - inteldate - (isodate->inteldate isodate)) - - (test - (conc "(inteldate->isodate "inteldate ") => "isodate) - isodate - (inteldate->isodate inteldate)))) - test-table)))) - -;(inteldate-tests) Index: launch.scm ================================================================== --- launch.scm +++ launch.scm @@ -64,18 +64,19 @@ (define (launch:load-logpro-dat run-id test-id stepname) (let ((cname (conc stepname ".dat"))) (if (file-exists? cname) (let* ((dat (read-config cname #f #f)) (csvr (db:logpro-dat->csv dat stepname)) - (csvt (let-values (( (fmt-cell fmt-record fmt-csv) (make-format ","))) - (fmt-csv (map list->csv-record csvr)))) + (csvt (let-values (((fmt-cell fmt-record fmt-csv) (make-format ","))) + (fmt-csv (map list->csv-record csvr)))) (status (configf:lookup dat "final" "exit-status")) (msg (configf:lookup dat "final" "message"))) - ;;(if csvt ;; this if blocked stack dump caused by .dat file from logpro being 0-byte. fixed by upgrading logpro + (if csvt ;; this if blocked stack dump caused by .dat file from logpro being 0-byte. fixed by upgrading logpro (rmt:csv->test-data run-id test-id csvt) - ;; (BB> "Error: run-id/test-id/stepname="run-id"/"test-id"/"stepname" => bad csvr="csvr) - ;; ) + (debug:print 0 *default-log-port* "ERROR: no csvdat exists for run-id: " run-id " test-id: " test-id " stepname: " stepname ", check that logpro version is 1.15 or newer")) + ;; (BB> "Error: run-id/test-id/stepname="run-id"/"test-id"/"stepname" => bad csvr="csvr) + ;; ) (cond ((equal? status "PASS") "PASS") ;; skip the message part if status is pass (status (conc (configf:lookup dat "final" "exit-status") ": " (if msg msg "no message"))) (else #f))) #f))) @@ -122,10 +123,22 @@ (call-with-environment-variables (list (cons "PATH" (conc (get-environment-variable "PATH") ":."))) (lambda () ;; (process-run "/bin/bash" "-c" "exec ls -l /tmp/foobar > /tmp/delme-more.log 2>&1") (let* ((cmd (conc stepcmd " > " stepname ".log 2>&1")) ;; >outfile 2>&1 (pid (process-run "/bin/bash" (list "-c" cmd)))) + + (with-output-to-file "Makefile.ezsteps" + (lambda () + (print stepname ".log :") + (print "\t" cmd) + (if (file-exists? (conc stepname ".logpro")) + (print "\tlogpro " stepname ".logpro " stepname ".html < " stepname ".log")) + (print) + (print stepname " : " stepname ".log") + (print)) + #:append) + (rmt:test-set-top-process-pid run-id test-id pid) (let processloop ((i 0)) (let-values (((pid-val exit-status exit-code)(process-wait pid #t))) (mutex-lock! m) (launch:einf-pid-set! exit-info pid) ;; (vector-set! exit-info 0 pid) @@ -241,11 +254,11 @@ ;; any of the other stuff that tests:test-set-status! does. Let's just ;; force RUNNING/n/a ;; (thread-sleep! 0.3) ;; (tests:test-force-state-status! run-id test-id "RUNNING" "n/a") - (rmt:roll-up-pass-fail-counts run-id test-name item-path #f "RUNNING" #f) + (rmt:set-state-status-and-roll-up-items run-id test-name item-path "RUNNING" #f #f) ;; (thread-sleep! 0.3) ;; NFS slowness has caused grief here ;; if there is a runscript do it first (if fullrunscript (let ((pid (process-run fullrunscript))) @@ -268,11 +281,11 @@ ;; do all the ezsteps (if any) (if ezsteps (let* ((testconfig ;; (read-config (conc work-area "/testconfig") #f #t environ-patt: "pre-launch-env-vars")) ;; FIXME??? is allow-system ok here? ;; NOTE: it is tempting to turn off force-create of testconfig but dynamic ;; ezstep names need a full re-eval here. - (tests:get-testconfig test-name tconfigreg #t force-create: #t)) ;; 'return-procs))) + (tests:get-testconfig test-name item-path tconfigreg #t force-create: #t)) ;; 'return-procs))) (ezstepslst (if (hash-table? testconfig) (hash-table-ref/default testconfig "ezsteps" '()) #f))) (if testconfig (hash-table-set! *testconfigs* test-name testconfig) ;; cached for lazy reads later ... @@ -316,15 +329,15 @@ (kill-tries 0)) ;; (tests:set-full-meta-info #f test-id run-id (calc-minutes) work-area) ;; (tests:set-full-meta-info test-id run-id (calc-minutes) work-area) (tests:set-full-meta-info #f test-id run-id (calc-minutes) work-area 10) (let loop ((minutes (calc-minutes)) - (cpu-load (get-cpu-load)) + (cpu-load (alist-ref 'adj-core-load (common:get-normalized-cpu-load #f))) (disk-free (get-df (current-directory)))) - (let ((new-cpu-load (let* ((load (get-cpu-load)) + (let ((new-cpu-load (let* ((load (alist-ref 'adj-core-load (common:get-normalized-cpu-load #f))) (delta (abs (- load cpu-load)))) - (if (> delta 0.6) ;; don't bother updating with small changes + (if (> delta 0.1) ;; don't bother updating with small changes load #f))) (new-disk-free (let* ((df (get-df (current-directory))) (delta (abs (- df disk-free)))) (if (> delta 200) ;; ignore changes under 200 Meg @@ -445,11 +458,11 @@ (if (eq? signum signal/stop) (debug:print-error 0 *default-log-port* "attempt to STOP process. Exiting.")) (set! *time-to-exit* #t) (print "Received signal " signum ", cleaning up before exit. Please wait...") (let ((th1 (make-thread (lambda () - (tests:test-force-state-status! run-id test-id "INCOMPLETE" "KILLED") + (rmt:test-set-state-status run-id test-id "INCOMPLETE" "KILLED" #f) (print "Killed by signal " signum ". Exiting") (thread-sleep! 1) (exit 1)))) (th2 (make-thread (lambda () (thread-sleep! 2) @@ -469,17 +482,23 @@ (test-host (db:test-get-host test-info)) (test-pid (db:test-get-process_id test-info))) (cond ((member (db:test-get-state test-info) '("INCOMPLETE" "KILLED" "UNKNOWN" "KILLREQ" "STUCK")) ;; prior run of this test didn't complete, go ahead and try to rerun (debug:print 0 *default-log-port* "INFO: test is INCOMPLETE or KILLED, treat this execute call as a rerun request") - (tests:test-force-state-status! run-id test-id "REMOTEHOSTSTART" "n/a")) ;; prime it for running + ;; (tests:test-force-state-status! run-id test-id "REMOTEHOSTSTART" "n/a") + (rmt:test-set-state-status run-id test-id "REMOTEHOSTSTART" "n/a" #f) + ) ;; prime it for running ((member (db:test-get-state test-info) '("RUNNING" "REMOTEHOSTSTART")) (if (process:alive-on-host? test-host test-pid) (debug:print-error 0 *default-log-port* "test state is " (db:test-get-state test-info) " and process " test-pid " is still running on host " test-host ", cannot proceed") - (tests:test-force-state-status! run-id test-id "REMOTEHOSTSTART" "n/a"))) + ;; (tests:test-force-state-status! run-id test-id "REMOTEHOSTSTART" "n/a") + (rmt:test-set-state-status run-id test-id "REMOTEHOSTSTART" "n/a" #f) + )) ((not (member (db:test-get-state test-info) '("REMOVING" "REMOTEHOSTSTART" "RUNNING" "KILLREQ"))) - (tests:test-force-state-status! run-id test-id "REMOTEHOSTSTART" "n/a")) + ;; (tests:test-force-state-status! run-id test-id "REMOTEHOSTSTART" "n/a") + (rmt:test-set-state-status run-id test-id "REMOTEHOSTSTART" "n/a" #f) + ) (else ;; (member (db:test-get-state test-info) '("REMOVING" "REMOTEHOSTSTART" "RUNNING" "KILLREQ")) (debug:print-error 0 *default-log-port* "test state is " (db:test-get-state test-info) ", cannot proceed") (exit)))) (debug:print 2 *default-log-port* "Exectuing " test-name " (id: " test-id ") on " (get-host-name)) @@ -639,11 +658,11 @@ test-id new-state new-status (args:get-arg "-m") #f) ;; need to update the top test record if PASS or FAIL and this is a subtest - ;; NO NEED TO CALL roll-up-pass-fail-counts HERE, THIS IS DONE IN roll-up-pass-fail-counts called by tests:test-set-status! + ;; NO NEED TO CALL set-state-status-and-roll-up-items HERE, THIS IS DONE IN set-state-status-and-roll-up-items called by tests:test-set-status! )) ;; for automated creation of the rollup html file this is a good place... (if (not (equal? item-path "")) (tests:summarize-items run-id test-id test-name #f)) (tests:summarize-test run-id test-id) ;; don't force - just update if no @@ -846,11 +865,14 @@ (directory-exists? *toppath*)) (begin (setenv "MT_RUN_AREA_HOME" *toppath*) (setenv "MT_TESTSUITE_NAME" (common:get-testsuite-name))) (begin - (debug:print-error 0 *default-log-port* "failed to find the top path to your Megatest area."))) + (debug:print-error 0 *default-log-port* "failed to find the top path to your Megatest area.") + ;;(exit 1) + #f + )) *toppath*)) (define (get-best-disk confdat testconfig) (let* ((disks (or (and testconfig (hash-table-ref/default testconfig "disks" #f)) (hash-table-ref/default confdat "disks" #f))) @@ -861,11 +883,11 @@ (if res (cdr res) (begin (if (common:low-noise-print 20 "No valid disks or no disk with enough space") (debug:print-error 0 *default-log-port* "No valid disks found in megatest.config. Please add some to your [disks] section and ensure the directory exists and has enough space!\n You can change minspace in the [setup] section of megatest.config. Current setting is: " minspace)) - (exit 1))))))) + (exit 1))))))) ;; TODO - move the exit to the calling location and return #f ;; Desired directory structure: ;; ;; - - -. ;; | @@ -1053,198 +1075,201 @@ ;; 4. remotely run the test on allocated host ;; - could be ssh to host from hosts table (update regularly with load) ;; - could be netbatch ;; (launch-test db (cadr status) test-conf)) (define (launch-test test-id run-id run-info keyvals runname test-conf test-name test-path itemdat params) - (let loop ((delta (- (current-seconds) *last-launch*)) - (launch-delay (string->number (or (configf:lookup *configdat* "setup" "launch-delay") "5")))) - (if (> launch-delay delta) - (begin - (debug:print-info 0 *default-log-port* "Delaying launch of " test-name " for " (- launch-delay delta) " seconds") - (thread-sleep! (- launch-delay delta)) - (loop (- (current-seconds) *last-launch*) launch-delay)))) - (set! *last-launch* (current-seconds)) - (change-directory *toppath*) - (alist->env-vars ;; consolidate this code with the code in megatest.scm for "-execute" - (list ;; (list "MT_TEST_RUN_DIR" work-area) - (list "MT_RUN_AREA_HOME" *toppath*) - (list "MT_TEST_NAME" test-name) - ;; (list "MT_ITEM_INFO" (conc itemdat)) - (list "MT_RUNNAME" runname) - ;; (list "MT_TARGET" mt_target) - )) - (let* ((tregistry (tests:get-all)) - (item-path (let ((ip (item-list->path itemdat))) - (alist->env-vars (list (list "MT_ITEMPATH" ip))) - ip)) - (tconfig (or (tests:get-testconfig test-name tregistry #t force-create: #t) - test-conf)) ;; force re-read now that all vars are set - (useshell (let ((ush (config-lookup *configdat* "jobtools" "useshell"))) - (if ush - (if (equal? ush "no") ;; must use "no" to NOT use shell - #f - ush) - #t))) ;; default is yes - (runscript (config-lookup tconfig "setup" "runscript")) - (ezsteps (> (length (hash-table-ref/default tconfig "ezsteps" '())) 0)) ;; don't send all the steps, could be big - (diskspace (config-lookup tconfig "requirements" "diskspace")) - (memory (config-lookup tconfig "requirements" "memory")) - (hosts (config-lookup *configdat* "jobtools" "workhosts")) - (remote-megatest (config-lookup *configdat* "setup" "executable")) - (run-time-limit (or (configf:lookup tconfig "requirements" "runtimelim") - (configf:lookup *configdat* "setup" "runtimelim"))) - ;; FIXME SOMEDAY: not good how this is so obtuse, this hack is to - ;; allow running from dashboard. Extract the path - ;; from the called megatest and convert dashboard - ;; or dboard to megatest - (local-megatest (let* ((lm (car (argv))) - (dir (pathname-directory lm)) - (exe (pathname-strip-directory lm))) - (conc (if dir (conc dir "/") "") - (case (string->symbol exe) - ((dboard) "../megatest") - ((mtest) "../megatest") - ((dashboard) "megatest") - (else exe))))) - (launcher (common:get-launcher *configdat* test-name item-path)) ;; (config-lookup *configdat* "jobtools" "launcher")) - (test-sig (conc (common:get-testsuite-name) ":" test-name ":" item-path)) ;; (item-list->path itemdat))) ;; test-path is the full path including the item-path - (work-area #f) - (toptest-work-area #f) ;; for iterated tests the top test contains data relevant for all - (diskpath #f) - (cmdparms #f) - (fullcmd #f) ;; (define a (with-output-to-string (lambda ()(write x)))) - (mt-bindir-path #f) - (testinfo (rmt:get-test-info-by-id run-id test-id)) - (mt_target (string-intersperse (map cadr keyvals) "/")) - (debug-param (append (if (args:get-arg "-debug") (list "-debug" (args:get-arg "-debug")) '()) - (if (args:get-arg "-logging")(list "-logging") '())))) - - (setenv "MT_ITEMPATH" item-path) - (if hosts (set! hosts (string-split hosts))) - ;; set the megatest to be called on the remote host - (if (not remote-megatest)(set! remote-megatest local-megatest)) ;; "megatest")) - (set! mt-bindir-path (pathname-directory remote-megatest)) - (if launcher (set! launcher (string-split launcher))) - ;; set up the run work area for this test - (if (and (args:get-arg "-preclean") ;; user has requested to preclean for this run - (not (member (db:test-get-rundir testinfo)(list "n/a" "/tmp/badname")))) ;; n/a is a placeholder and thus not a read dir - (begin - (debug:print-info 0 *default-log-port* "attempting to preclean directory " (db:test-get-rundir testinfo) " for test " test-name "/" item-path) - (runs:remove-test-directory testinfo 'remove-data-only))) ;; remove data only, do not perturb the record - - ;; prevent overlapping actions - set to LAUNCHED as early as possible - ;; - ;; the following call handles waiver propogation. cannot yet condense into roll-up-pass-fail - (tests:test-set-status! run-id test-id "LAUNCHED" "n/a" #f #f) ;; (if launch-results launch-results "FAILED")) - (rmt:roll-up-pass-fail-counts run-id test-name item-path #f "LAUNCHED" #f) - (set! diskpath (get-best-disk *configdat* tconfig)) - (if diskpath - (let ((dat (create-work-area run-id run-info keyvals test-id test-path diskpath test-name itemdat))) - (set! work-area (car dat)) - (set! toptest-work-area (cadr dat)) - (debug:print-info 2 *default-log-port* "Using work area " work-area)) - (begin - (set! work-area (conc test-path "/tmp_run")) - (create-directory work-area #t) - (debug:print 0 *default-log-port* "WARNING: No disk work area specified - running in the test directory under tmp_run"))) - (set! cmdparms (base64:base64-encode - (z3:encode-buffer - (with-output-to-string - (lambda () ;; (list 'hosts hosts) - (write (list (list 'testpath test-path) - (list 'transport (conc *transport-type*)) - ;; (list 'serverinf *server-info*) - (list 'toppath *toppath*) - (list 'work-area work-area) - (list 'test-name test-name) - (list 'runscript runscript) - (list 'run-id run-id ) - (list 'test-id test-id ) - ;; (list 'item-path item-path ) - (list 'itemdat itemdat ) - (list 'megatest remote-megatest) - (list 'ezsteps ezsteps) - (list 'target mt_target) - (list 'runtlim (if run-time-limit (common:hms-string->seconds run-time-limit) #f)) - (list 'env-ovrd (hash-table-ref/default *configdat* "env-override" '())) - (list 'set-vars (if params (hash-table-ref/default params "-setvars" #f))) - (list 'runname runname) - (list 'mt-bindir-path mt-bindir-path)))))))) - - ;; clean out step records from previous run if they exist - ;; (rmt:delete-test-step-records run-id test-id) - ;; if the dir does not exist we may have a itempath where individual variables are a path, launch anyway - (if (file-exists? work-area) - (change-directory work-area)) ;; so that log files from the launch process don't clutter the test dir - (cond - ((and launcher hosts) ;; must be using ssh hostname - (set! fullcmd (append launcher (car hosts)(list remote-megatest "-m" test-sig "-execute" cmdparms) debug-param))) - ;; (set! fullcmd (append launcher (car hosts)(list remote-megatest test-sig "-execute" cmdparms)))) - (launcher - (set! fullcmd (append launcher (list remote-megatest "-m" test-sig "-execute" cmdparms) debug-param))) - ;; (set! fullcmd (append launcher (list remote-megatest test-sig "-execute" cmdparms)))) - (else - (if (not useshell)(debug:print 0 *default-log-port* "WARNING: internal launching will not work well without \"useshell yes\" in your [jobtools] section")) - (set! fullcmd (append (list remote-megatest "-m" test-sig "-execute" cmdparms) debug-param (list (if useshell "&" "")))))) - ;; (set! fullcmd (list remote-megatest test-sig "-execute" cmdparms (if useshell "&" ""))))) - (if (args:get-arg "-xterm")(set! fullcmd (append fullcmd (list "-xterm")))) - (debug:print 1 *default-log-port* "Launching " work-area) - ;; set pre-launch-env-vars before launching, keep the vars in prevvals and put the envionment back when done - (debug:print 4 *default-log-port* "fullcmd: " fullcmd) - (let* ((commonprevvals (alist->env-vars - (hash-table-ref/default *configdat* "env-override" '()))) - (testprevvals (alist->env-vars - (hash-table-ref/default tconfig "pre-launch-env-overrides" '()))) - (miscprevvals (alist->env-vars ;; consolidate this code with the code in megatest.scm for "-execute" - (append (list (list "MT_TEST_RUN_DIR" work-area) - (list "MT_TEST_NAME" test-name) - (list "MT_ITEM_INFO" (conc itemdat)) - (list "MT_RUNNAME" runname) - (list "MT_TARGET" mt_target) - (list "MT_ITEMPATH" item-path) - ) - itemdat))) - ;; Launchwait defaults to true, must override it to turn off wait - (launchwait (if (equal? (configf:lookup *configdat* "setup" "launchwait") "no") #f #t)) - (launch-results (apply (if launchwait - process:cmd-run-with-stderr->list - process-run) - (if useshell - (let ((cmdstr (string-intersperse fullcmd " "))) - (if launchwait - cmdstr - (conc cmdstr " >> mt_launch.log 2>&1"))) - (car fullcmd)) - (if useshell - '() - (cdr fullcmd))))) - (if (not launchwait) ;; give the OS a little time to allow the process to start - (thread-sleep! 0.01)) - (with-output-to-file "mt_launch.log" - (lambda () - (print "LAUNCHCMD: " (string-intersperse fullcmd " ")) - (if (list? launch-results) - (apply print launch-results) - (print "NOTE: launched \"" fullcmd "\"\n but did not wait for it to proceed. Add the following to megatest.config \n[setup]\nlaunchwait yes\n if you have problems with this")) - #:append)) - (debug:print 2 *default-log-port* "Launching completed, updating db") - (debug:print 2 *default-log-port* "Launch results: " launch-results) - (if (not launch-results) - (begin - (print "ERROR: Failed to run " (string-intersperse fullcmd " ") ", exiting now") - ;; (sqlite3:finalize! db) - ;; good ole "exit" seems not to work - ;; (_exit 9) - ;; but this hack will work! Thanks go to Alan Post of the Chicken email list - ;; NB// Is this still needed? Should be safe to go back to "exit" now? - (process-signal (current-process-id) signal/kill) - )) - (alist->env-vars miscprevvals) - (alist->env-vars testprevvals) - (alist->env-vars commonprevvals) - launch-results)) - (change-directory *toppath*)) + (mutex-lock! *launch-setup-mutex*) ;; setting variables and processing the testconfig is NOT thread-safe, reuse the launch-setup mutex + (let* ((item-path (item-list->path itemdat))) + (let loop ((delta (- (current-seconds) *last-launch*)) + (launch-delay (string->number (or (configf:lookup *configdat* "setup" "launch-delay") "5")))) + (if (> launch-delay delta) + (begin + (debug:print-info 0 *default-log-port* "Delaying launch of " test-name " for " (- launch-delay delta) " seconds") + (thread-sleep! (- launch-delay delta)) + (loop (- (current-seconds) *last-launch*) launch-delay)))) + (change-directory *toppath*) + (alist->env-vars ;; consolidate this code with the code in megatest.scm for "-execute", *maybe* - the longer they are set the longer each launch takes (must be non-overlapping with the vars) + (append + (list + (list "MT_RUN_AREA_HOME" *toppath*) + (list "MT_TEST_NAME" test-name) + (list "MT_RUNNAME" runname) + (list "MT_ITEMPATH" item-path) + ) + itemdat)) + (let* ((tregistry (tests:get-all)) ;; third param (below) is system-allowed + ;; for tconfig, why do we allow fallback to test-conf? + (tconfig (or (tests:get-testconfig test-name item-path tregistry #t force-create: #t) + (begin + (debug:print 0 *default-log-port* "WARNING: falling back to pre-calculated testconfig. This is likely not desired.") + test-conf))) ;; force re-read now that all vars are set + (useshell (let ((ush (config-lookup *configdat* "jobtools" "useshell"))) + (if ush + (if (equal? ush "no") ;; must use "no" to NOT use shell + #f + ush) + #t))) ;; default is yes + (runscript (config-lookup tconfig "setup" "runscript")) + (ezsteps (> (length (hash-table-ref/default tconfig "ezsteps" '())) 0)) ;; don't send all the steps, could be big + ;; (diskspace (config-lookup tconfig "requirements" "diskspace")) + ;; (memory (config-lookup tconfig "requirements" "memory")) + ;; (hosts (config-lookup *configdat* "jobtools" "workhosts")) ;; I'm pretty sure this was never completed + (remote-megatest (config-lookup *configdat* "setup" "executable")) + (run-time-limit (or (configf:lookup tconfig "requirements" "runtimelim") + (configf:lookup *configdat* "setup" "runtimelim"))) + ;; FIXME SOMEDAY: not good how this is so obtuse, this hack is to + ;; allow running from dashboard. Extract the path + ;; from the called megatest and convert dashboard + ;; or dboard to megatest + (local-megatest (let* ((lm (car (argv))) + (dir (pathname-directory lm)) + (exe (pathname-strip-directory lm))) + (conc (if dir (conc dir "/") "") + (case (string->symbol exe) + ((dboard) "../megatest") + ((mtest) "../megatest") + ((dashboard) "megatest") + (else exe))))) + (launcher (common:get-launcher *configdat* test-name item-path)) ;; (config-lookup *configdat* "jobtools" "launcher")) + (test-sig (conc (common:get-testsuite-name) ":" test-name ":" item-path)) ;; (item-list->path itemdat))) ;; test-path is the full path including the item-path + (work-area #f) + (toptest-work-area #f) ;; for iterated tests the top test contains data relevant for all + (diskpath #f) + (cmdparms #f) + (fullcmd #f) ;; (define a (with-output-to-string (lambda ()(write x)))) + (mt-bindir-path #f) + (testinfo (rmt:get-test-info-by-id run-id test-id)) + (mt_target (string-intersperse (map cadr keyvals) "/")) + (debug-param (append (if (args:get-arg "-debug") (list "-debug" (args:get-arg "-debug")) '()) + (if (args:get-arg "-logging")(list "-logging") '())))) + ;; (if hosts (set! hosts (string-split hosts))) + ;; set the megatest to be called on the remote host + (if (not remote-megatest)(set! remote-megatest local-megatest)) ;; "megatest")) + (set! mt-bindir-path (pathname-directory remote-megatest)) + (if launcher (set! launcher (string-split launcher))) + ;; set up the run work area for this test + (if (and (args:get-arg "-preclean") ;; user has requested to preclean for this run + (not (member (db:test-get-rundir testinfo)(list "n/a" "/tmp/badname")))) ;; n/a is a placeholder and thus not a read dir + (begin + (debug:print-info 0 *default-log-port* "attempting to preclean directory " (db:test-get-rundir testinfo) " for test " test-name "/" item-path) + (runs:remove-test-directory testinfo 'remove-data-only))) ;; remove data only, do not perturb the record + + ;; prevent overlapping actions - set to LAUNCHED as early as possible + ;; + ;; the following call handles waiver propogation. cannot yet condense into roll-up-pass-fail + (tests:test-set-status! run-id test-id "LAUNCHED" "n/a" #f #f) ;; (if launch-results launch-results "FAILED")) + (rmt:set-state-status-and-roll-up-items run-id test-name item-path #f "LAUNCHED" #f) + ;; (pp (hash-table->alist tconfig)) + (set! diskpath (get-best-disk *configdat* tconfig)) + (if diskpath + (let ((dat (create-work-area run-id run-info keyvals test-id test-path diskpath test-name itemdat))) + (set! work-area (car dat)) + (set! toptest-work-area (cadr dat)) + (debug:print-info 2 *default-log-port* "Using work area " work-area)) + (begin + (set! work-area (conc test-path "/tmp_run")) + (create-directory work-area #t) + (debug:print 0 *default-log-port* "WARNING: No disk work area specified - running in the test directory under tmp_run"))) + (set! cmdparms (base64:base64-encode + (z3:encode-buffer + (with-output-to-string + (lambda () ;; (list 'hosts hosts) + (write (list (list 'testpath test-path) + (list 'transport (conc *transport-type*)) + ;; (list 'serverinf *server-info*) + (list 'toppath *toppath*) + (list 'work-area work-area) + (list 'test-name test-name) + (list 'runscript runscript) + (list 'run-id run-id ) + (list 'test-id test-id ) + ;; (list 'item-path item-path ) + (list 'itemdat itemdat ) + (list 'megatest remote-megatest) + (list 'ezsteps ezsteps) + (list 'target mt_target) + (list 'runtlim (if run-time-limit (common:hms-string->seconds run-time-limit) #f)) + (list 'env-ovrd (hash-table-ref/default *configdat* "env-override" '())) + (list 'set-vars (if params (hash-table-ref/default params "-setvars" #f))) + (list 'runname runname) + (list 'mt-bindir-path mt-bindir-path)))))))) + + ;; clean out step records from previous run if they exist + ;; (rmt:delete-test-step-records run-id test-id) + ;; if the dir does not exist we may have a itempath where individual variables are a path, launch anyway + (if (file-exists? work-area) + (change-directory work-area)) ;; so that log files from the launch process don't clutter the test dir + (cond + ;; ((and launcher hosts) ;; must be using ssh hostname + ;; (set! fullcmd (append launcher (car hosts)(list remote-megatest "-m" test-sig "-execute" cmdparms) debug-param))) + ;; (set! fullcmd (append launcher (car hosts)(list remote-megatest test-sig "-execute" cmdparms)))) + (launcher + (set! fullcmd (append launcher (list remote-megatest "-m" test-sig "-execute" cmdparms) debug-param))) + ;; (set! fullcmd (append launcher (list remote-megatest test-sig "-execute" cmdparms)))) + (else + (if (not useshell)(debug:print 0 *default-log-port* "WARNING: internal launching will not work well without \"useshell yes\" in your [jobtools] section")) + (set! fullcmd (append (list remote-megatest "-m" test-sig "-execute" cmdparms) debug-param (list (if useshell "&" "")))))) + ;; (set! fullcmd (list remote-megatest test-sig "-execute" cmdparms (if useshell "&" ""))))) + (if (args:get-arg "-xterm")(set! fullcmd (append fullcmd (list "-xterm")))) + (debug:print 1 *default-log-port* "Launching " work-area) + ;; set pre-launch-env-vars before launching, keep the vars in prevvals and put the envionment back when done + (debug:print 4 *default-log-port* "fullcmd: " fullcmd) + (set! *last-launch* (current-seconds)) ;; all that junk above takes time, set this as late as possible. + (let* ((commonprevvals (alist->env-vars + (hash-table-ref/default *configdat* "env-override" '()))) + (miscprevvals (alist->env-vars ;; consolidate this code with the code in megatest.scm for "-execute" + (append (list (list "MT_TEST_RUN_DIR" work-area) + (list "MT_TEST_NAME" test-name) + (list "MT_ITEM_INFO" (conc itemdat)) + (list "MT_RUNNAME" runname) + (list "MT_TARGET" mt_target) + (list "MT_ITEMPATH" item-path) + ) + itemdat))) + (testprevvals (alist->env-vars + (hash-table-ref/default tconfig "pre-launch-env-overrides" '()))) + ;; Launchwait defaults to true, must override it to turn off wait + (launchwait (if (equal? (configf:lookup *configdat* "setup" "launchwait") "no") #f #t)) + (launch-results (apply (if launchwait + process:cmd-run-with-stderr->list + process-run) + (if useshell + (let ((cmdstr (string-intersperse fullcmd " "))) + (if launchwait + cmdstr + (conc cmdstr " >> mt_launch.log 2>&1 &"))) + (car fullcmd)) + (if useshell + '() + (cdr fullcmd))))) + (mutex-unlock! *launch-setup-mutex*) ;; yes, really should mutex all the way to here. Need to put this entire process into a fork. + (if (not launchwait) ;; give the OS a little time to allow the process to start + (thread-sleep! 0.01)) + (with-output-to-file "mt_launch.log" + (lambda () + (print "LAUNCHCMD: " (string-intersperse fullcmd " ")) + (if (list? launch-results) + (apply print launch-results) + (print "NOTE: launched \"" fullcmd "\"\n but did not wait for it to proceed. Add the following to megatest.config \n[setup]\nlaunchwait yes\n if you have problems with this")) + #:append)) + (debug:print 2 *default-log-port* "Launching completed, updating db") + (debug:print 2 *default-log-port* "Launch results: " launch-results) + (if (not launch-results) + (begin + (print "ERROR: Failed to run " (string-intersperse fullcmd " ") ", exiting now") + ;; (sqlite3:finalize! db) + ;; good ole "exit" seems not to work + ;; (_exit 9) + ;; but this hack will work! Thanks go to Alan Post of the Chicken email list + ;; NB// Is this still needed? Should be safe to go back to "exit" now? + (process-signal (current-process-id) signal/kill) + )) + (alist->env-vars miscprevvals) + (alist->env-vars testprevvals) + (alist->env-vars commonprevvals) + launch-results)) + (change-directory *toppath*))) ;; recover a test where the top controlling mtest may have died ;; (define (launch:recover-test run-id test-id) ;; this function is called on the test run host via ssh Index: megatest-version.scm ================================================================== --- megatest-version.scm +++ megatest-version.scm @@ -1,7 +1,7 @@ ;; Always use two or four digit decimal ;; 1.01, 1.02...1.10,1.11,1.1101 ... 1.99,2.00.. (declare (unit megatest-version)) -(define megatest-version 1.6208) +(define megatest-version 1.6306) Index: megatest.scm ================================================================== --- megatest.scm +++ megatest.scm @@ -67,10 +67,11 @@ version " megatest-version " license GPL, Copyright Matt Welland 2006-2015 Usage: megatest [options] -h : this help + -manual : show the Megatest user manual -version : print megatest version (currently " megatest-version ") Launching and managing runs -runall : run all tests or as specified by -testpatt -remove-runs : remove the data for a run, requires -runname and -testpatt @@ -93,10 +94,12 @@ -reqtarg key1/key2/... : run for key1, key2, etc. but key1/key2 must be in runconfig -testpatt patt1/patt2,patt3/... : % is wildcard -runname : required, name for this particular test run -state : Applies to runs, tests or steps depending on context -status : Applies to runs, tests or steps depending on context + --modepatt key : load testpatt from in runconfigs instead of default TESTPATT if -testpatt and -tagexpr are not specified + -tagexpr tag1,tag2%,.. : select tests with tags matching expression Test helpers (for use inside tests) -step stepname -test-status : set the state and status of a test (use :state and :status) -setlog logfname : set the path/filename to the final log relative to the test @@ -118,11 +121,11 @@ fields category,variable,value,comment Queries -list-runs patt : list runs matching pattern \"patt\", % is the wildcard -show-keys : show the keys used in this megatest setup - -test-files targpatt : get the most recent test path/file matching targpatt e.g. %/%... + -test-files targpatt : get the most recent test path/file matching targpatt e.g. %/% or '*.log' returns list sorted by age ascending, see examples below -test-paths : get the test paths matching target, runname, item and test patterns. -list-disks : list the disks available for storing runs -list-targets : list the targets in runconfigs.config @@ -209,11 +212,13 @@ ":state" "-state" ":status" "-status" "-list-runs" - "-testpatt" + "-testpatt" + "--modepatt" + "-tagexpr" "-itempatt" "-setlog" "-set-toplog" "-runstep" "-logpro" @@ -345,15 +350,21 @@ ;; The watchdog is to keep an eye on things like db sync etc. ;; (define *watchdog* (make-thread common:watchdog "Watchdog thread")) -(thread-start! *watchdog*) +(if (not (args:get-arg "-server")) + (thread-start! *watchdog*)) ;; if starting a server; wait till we get to running state before kicking off watchdog -(if (args:get-arg "-log") - (let ((oup (open-output-file (args:get-arg "-log")))) - (debug:print-info 0 *default-log-port* "Sending log output to " (args:get-arg "-log")) +(if (or (args:get-arg "-log")(args:get-arg "-server")) ;; redirect the log always when a server + (let* ((tl (or (args:get-arg "-log")(launch:setup))) ;; run launch:setup if -server + (logf (or (args:get-arg "-log") ;; use -log unless we are a server, then craft a logfile name + (conc tl "/logs/server-" (current-process-id) "-" (get-host-name) ".log"))) + (oup (open-output-file logf))) + (if (not (args:get-arg "-log")) + (hash-table-set! args:arg-hash "-log" logf)) ;; fake out future queries of -log + (debug:print-info 0 *default-log-port* "Sending log output to " logf) (set! *default-log-port* oup))) (if (or (args:get-arg "-h") (args:get-arg "-help") (args:get-arg "--help")) @@ -692,50 +703,17 @@ ;;====================================================================== ;; Start the server - can be done in conjunction with -runall or -runtests (one day...) ;; we start the server if not running else start the client thread ;;====================================================================== +;; Server? Start up here. +;; (if (args:get-arg "-server") - - ;; Server? Start up here. - ;; (let ((tl (launch:setup)) - ;; (run-id (and (args:get-arg "-run-id") - ;; (string->number (args:get-arg "-run-id")))) (transport-type (string->symbol (or (args:get-arg "-transport") "http")))) - ;; (if run-id - ;; (begin (server:launch 0 transport-type) (set! *didsomething* #t))) -;; ;; (debug:print-error 0 *default-log-port* "server requires run-id be specified with -run-id"))) -;; -;; ;; Not a server? This section will decide how to communicate -;; ;; -;; ;; Setup client for all expect listed here -;; (if (null? (lset-intersection -;; equal? -;; (hash-table-keys args:arg-hash) -;; '("-list-servers" -;; "-stop-server" -;; "-kill-server" -;; "-show-cmdinfo" -;; "-list-runs" -;; "-ping"))) -;; (if (launch:setup) -;; (let ((run-id (and (args:get-arg "-run-id") -;; (string->number (args:get-arg "-run-id"))))) -;; ;; (set! *fdb* (filedb:open-db (conc *toppath* "/db/paths.db"))) -;; ;; if not list or kill then start a client (if appropriate) -;; (if (or (args-defined? "-h" "-version" "-create-megatest-area" "-create-test") -;; (eq? (length (hash-table-keys args:arg-hash)) 0)) -;; (debug:print-info 1 *default-log-port* "Server connection not needed") -;; (begin -;; ;; (if run-id -;; ;; (client:launch run-id) -;; ;; (client:launch 0) ;; without run-id we'll start a server for "0" -;; #t -;; )))))) (if (or (args:get-arg "-list-servers") (args:get-arg "-stop-server") (args:get-arg "-kill-server")) (let ((tl (launch:setup))) @@ -790,23 +768,24 @@ ;;====================================================================== ;; Weird special calls that need to run *after* the server has started? ;;====================================================================== (if (args:get-arg "-list-targets") - (let ((targets (common:get-runconfig-targets))) - (debug:print 1 *default-log-port* "Found "(length targets) " targets") - (case (string->symbol (or (args:get-arg "-dumpmode") "alist")) - ((alist) - (for-each (lambda (x) - ;; (print "[" x "]")) - (print x)) - targets)) - ((json) - (json-write targets)) - (else - (debug:print-error 0 *default-log-port* "dump output format " (args:get-arg "-dumpmode") " not supported for -list-targets"))) - (set! *didsomething* #t))) + (if (launch:setup) + (let ((targets (common:get-runconfig-targets))) + (debug:print 1 *default-log-port* "Found "(length targets) " targets") + (case (string->symbol (or (args:get-arg "-dumpmode") "alist")) + ((alist) + (for-each (lambda (x) + ;; (print "[" x "]")) + (print x)) + targets)) + ((json) + (json-write targets)) + (else + (debug:print-error 0 *default-log-port* "dump output format " (args:get-arg "-dumpmode") " not supported for -list-targets"))) + (set! *didsomething* #t)))) ;; cache the runconfigs in $MT_LINKTREE/$MT_TARGET/$MT_RUNNAME/.runconfig ;; (define (full-runconfigs-read) ;; in the envprocessing branch the below code replaces the further below code @@ -1018,29 +997,30 @@ ;; "%")) (keys (rmt:get-keys)) ;; (db:get-keys dbstruct)) ;; (runsdat (db:get-runs dbstruct runpatt #f #f '())) ;; (runsdat (rmt:get-runs-by-patt keys (or runpatt "%") (common:args-get-target) ;; (db:get-runs-by-patt dbstruct keys (or runpatt "%") (common:args-get-target) ;; #f #f '("id" "runname" "state" "status" "owner" "event_time" "comment") 0)) - (runsdat (db:dispatch-query access-mode rmt:get-runs-by-patt db:get-runs-by-patt keys (or runpatt "%") - (common:args-get-target) #f #f '("id" "runname" "state" "status" "owner" "event_time" "comment") 0)) + (runsdat (rmt:get-runs-by-patt keys (or runpatt "%") + (common:args-get-target) #f #f '("id" "runname" "state" "status" "owner" "event_time" "comment") 0)) (runstmp (db:get-rows runsdat)) (header (db:get-header runsdat)) ;; this is "-since" support. This looks at last mod times of .db files ;; and collects those modified since the -since time. - (runs (if (and (not (null? runstmp)) - (args:get-arg "-since")) - (let ((changed-ids (db:get-changed-run-ids (string->number (args:get-arg "-since"))))) - (let loop ((hed (car runstmp)) - (tal (cdr runstmp)) - (res '())) - (let ((new-res (if (member (db:get-value-by-header hed header "id") changed-ids) - (cons hed res) - res))) - (if (null? tal) - (reverse new-res) - (loop (car tal)(cdr tal) new-res))))) - runstmp)) + (runs runstmp) + ;; (if (and (not (null? runstmp)) + ;; (args:get-arg "-since")) + ;; (let ((changed-ids (db:get-changed-run-ids (string->number (args:get-arg "-since"))))) + ;; (let loop ((hed (car runstmp)) + ;; (tal (cdr runstmp)) + ;; (res '())) + ;; (let ((new-res (if (member (db:get-value-by-header hed header "id") changed-ids) + ;; (cons hed res) + ;; res))) + ;; (if (null? tal) + ;; (reverse new-res) + ;; (loop (car tal)(cdr tal) new-res))))) + ;; runstmp)) (db-targets (args:get-arg "-list-db-targets")) (seen (make-hash-table)) (dmode (let ((d (args:get-arg "-dumpmode"))) (if d (string->symbol d) #f))) (data (make-hash-table)) @@ -1527,11 +1507,12 @@ (let* ((keys (rmt:get-keys)) ;; db:test-get-paths must not be run remote (paths (tests:test-get-paths-matching keys target (args:get-arg "-test-files")))) (set! *didsomething* #t) (for-each (lambda (path) - (print path)) + (if (file-exists? path) + (print path))) paths))) ;; else do a general-run-call (general-run-call "-test-files" "Get paths to test" @@ -1825,11 +1806,12 @@ (begin (if (not (launch:setup)) (begin (debug:print 0 *default-log-port* "Failed to setup, exiting") (exit 1))) - (common:cleanup-db) + (let ((dbstruct (db:setup *toppath*))) + (common:cleanup-db dbstruct)) (set! *didsomething* #t))) (if (args:get-arg "-mark-incompletes") (begin (if (not (launch:setup)) @@ -1847,13 +1829,11 @@ (begin (if (not (launch:setup)) (begin (debug:print 0 *default-log-port* "Failed to setup, exiting") (exit 1))) - ;; now can find our db - ;; keep this one local - (open-run-close runs:update-all-test_meta #f) + (runs:update-all-test_meta #f) (set! *didsomething* #t))) ;;====================================================================== ;; Start a repl ;;====================================================================== @@ -1982,17 +1962,22 @@ ;;====================================================================== ;; Exit and clean up ;;====================================================================== -(if *runremote* (close-all-connections!)) ;; for http-client - (if (not *didsomething*) (debug:print 0 *default-log-port* help)) +;;(BB> "thread-join! watchdog") + +;; join the watchdog thread if it has been thread-start!ed (it may not have been started in the case of a server that never enters running state) +;; (symbols returned by thread-state: created ready running blocked suspended sleeping terminated dead) +(if (thread? *watchdog*) + (case (thread-state *watchdog*) + ((ready running blocked sleeping terminated dead) + (thread-join! *watchdog*)))) (set! *time-to-exit* #t) -(thread-join! *watchdog*) (if (not (eq? *globalexitstatus* 0)) (if (or (args:get-arg "-run")(args:get-arg "-runtests")(args:get-arg "-runall")) (begin (debug:print 0 *default-log-port* "NOTE: Subprocesses with non-zero exit code detected: " *globalexitstatus*) ADDED minimal/manyservers.sh Index: minimal/manyservers.sh ================================================================== --- /dev/null +++ minimal/manyservers.sh @@ -0,0 +1,119 @@ +#!/bin/bash + +echo manyservers.sh pid $$ + +logdir=$PWD/log-manysrv + + +function reset { + rm -f .homehost .server .server.lock links/.db/monitor.db .starting-server + } + +function launch_many_servers { + # count = $1 + # logdir = $2 + # prefx = $3 + perl -e 'foreach my $i (1 ... '$1'){print "'$2'/'$3'-srv-$i.log\n"}' | \ + xargs -P $1 -n 1 megatest -server - -run-id 0 -daemonize -log +} + + +function get_srv_pids { + ps auwx | grep "mtest -server" | grep $logdir | grep -v grep | awk '{print $2}' +} + + +if [[ -e $logdir ]]; then rm -rf $logdir; fi +if [[ ! -e $logdir ]]; then mkdir $logdir; fi + +reset + +simultaneous_servers=20 +server_collision_resolution_delay=15 +server_timeout_delay=65 + +echo "Launching $simultaneous_servers simultaneous servers" +launch_many_servers $simultaneous_servers $logdir "first" +echo "Sleeping $server_collision_resolution_delay seconds to allow new servers to die because one is already running." +sleep $server_collision_resolution_delay + +pids=`get_srv_pids` +pids_left=`echo $pids | wc -w` +echo "pids_left=$pids_left" +echo "after $server_collision_resolution_delay seconds: servers remaining=$pids_left; expecting 1" +if [[ $pids_left == 1 ]]; then + echo "All servers but 1 terminated. Still good." +else + if [[ $pids_left == 0 ]]; then + echo "All servers died too soon. Not good. Aborting." + echo "TEST FAIL" + exit 1 + else + echo "Too many servers left. Not good. Aborting." + echo "TEST FAIL" + echo $pids | xargs kill + sleep 5 + pids=`get_srv_pids` + pids_left=`echo $pids | wc -w` + if [[ ! ( $pids_left == 0 ) ]]; then + echo $pids | xargs kill -9 + fi + exit 1 + fi +fi + + + +echo "launching another volley of $simultaneous_servers. THey should all perish. right away, leaving the one server running." +launch_many_servers $simultaneous_servers $logdir "second" +sleep $server_collision_resolution_delay + +pids=`get_srv_pids` +pids_left=`echo $pids | wc -w` +echo "pids_left=$pids_left" +echo "after $server_collision_resolution_delay seconds: servers remaining=$pids_left; expecting 1" +if [[ $pids_left == 1 ]]; then + echo "All servers but 1 terminated. So far so good." +else + if [[ $pids_left == 0 ]]; then + echo "All servers died too soon. Not good. Aborting." + echo "TEST FAIL" + exit 1 + else + echo "Too many servers left. Not good. Aborting." + echo "TEST FAIL" + echo $pids | xargs kill + sleep 5 + pids=`get_srv_pids` + pids_left=`echo $pids | wc -w` + if [[ ! ( $pids_left == 0 ) ]]; then + echo $pids | xargs kill -9 + fi + exit 1 + fi +fi + + + +echo "sleeping for awhile ($server_timeout_delay seconds) to let server exit on its own for no-request timeout" +sleep $server_timeout_delay +pids=`get_srv_pids` +pids_left=`echo $pids | wc -w` +echo "after $server_timeout_delay seconds: servers remaining=$pids_left; expecting 0" + +if [[ $pids_left == 0 ]]; then + echo "No servers remain. This is good." + echo "TEST PASS" + exit 0 +else + echo "Too many servers left. Not good. Aborting." + echo "TEST FAIL" + echo $pids | xargs kill + sleep 5 + pids=`get_srv_pids` + pids_left=`echo $pids | wc -w` + if [[ ! ( $pids_left == 0 ) ]]; then + echo $pids | xargs kill -9 + fi + exit 1 +fi Index: mt.scm ================================================================== --- mt.scm +++ mt.scm @@ -128,12 +128,12 @@ ;;====================================================================== ;; T R I G G E R S ;;====================================================================== -(define (mt:process-triggers run-id test-id newstate newstatus) - (let* ((test-dat (rmt:get-test-info-by-id run-id test-id))) +(define (mt:process-triggers dbstruct run-id test-id newstate newstatus) + (let* ((test-dat (db:get-test-info-by-id dbstruct run-id test-id))) (if test-dat (let* ((test-rundir ;; (rmt:sdb-qry 'getstr ;; (filedb:get-path *fdb* (db:test-get-rundir test-dat)) ;; ) ;; ) (test-name (db:test-get-testname test-dat)) (tconfig #f) @@ -186,18 +186,18 @@ ;; (rmt:general-call 'state-status run-id newstate newstatus test-id)) ;; (else ;; (if newstate (rmt:general-call 'set-test-state run-id newstate test-id)) ;; (if newstatus (rmt:general-call 'set-test-status run-id newstatus test-id)) ;; (if newcomment (rmt:general-call 'set-test-comment run-id newcomment test-id)))) - (rmt:roll-up-pass-fail-counts run-id test-id #f newstate newstatus newcomment) - (mt:process-triggers run-id test-id newstate newstatus) + (rmt:set-state-status-and-roll-up-items run-id test-id #f newstate newstatus newcomment) + ;; (mt:process-triggers run-id test-id newstate newstatus) #t))) (define (mt:test-set-state-status-by-testname run-id test-name item-path new-state new-status new-comment) (let ((test-id (rmt:get-test-id run-id test-name item-path))) - (rmt:roll-up-pass-fail-counts run-id test-name item-path new-state new-status new-comment) - (mt:process-triggers run-id test-id new-state new-status) + (rmt:set-state-status-and-roll-up-items run-id test-name item-path new-state new-status new-comment) + ;; (mt:process-triggers run-id test-id new-state new-status) #t)) ;;(mt:test-set-state-status-by-id run-id test-id new-state new-status new-comment))) (define (mt:lazy-read-test-config test-name) (let ((tconf (hash-table-ref/default *testconfigs* test-name #f))) Index: rmt.scm ================================================================== --- rmt.scm +++ rmt.scm @@ -24,31 +24,24 @@ ;; generate entries for ~/.megatestrc with the following ;; ;; grep define ../rmt.scm | grep rmt: |perl -pi -e 's/\(define\s+\((\S+)\W.*$/\1/'|sort -u -(defstruct remote - (hh-dat (common:get-homehost)) ;; homehost record ( addr . hhflag ) - (server-url (if *toppath* (server:read-dotserver *toppath*))) ;; (server:check-if-running *toppath*) #f)) - (last-server-check 0) ;; last time we checked to see if the server was alive - (conndat #f) - (transport *transport-type*) - (server-timeout (or (server:get-timeout) 100))) ;; default to 100 seconds - ;;====================================================================== ;; S U P P O R T F U N C T I O N S ;;====================================================================== ;; if a server is either running or in the process of starting call client:setup ;; else return #f to let the calling proc know that there is no server available ;; -(define (rmt:get-connection-info run-id) - (let ((cinfo (remote-conndat *runremote*))) +(define (rmt:get-connection-info areapath) ;; TODO: push areapath down. + (let ((cinfo (remote-conndat *runremote*)) + (run-id 0)) (if cinfo cinfo - (if (tasks:server-running-or-starting? (db:delay-if-busy (tasks:open-db)) run-id) - (client:setup run-id) + (if (server:check-if-running areapath) + (client:setup areapath) #f)))) (define *send-receive-mutex* (make-mutex)) ;; should have separate mutex per run-id ;; RA => e.g. usage (rmt:send-receive 'get-var #f (list varname)) @@ -69,11 +62,11 @@ (debug:print 0 *default-log-port* "ERROR: 15 tries to start/connect to server. Giving up.") (exit 1)) ;; reset the connection if it has been unused too long ((and *runremote* (remote-conndat *runremote*) - (let ((expire-time (- start-time (remote-server-timeout *runremote*)))) + (let ((expire-time (+ (- start-time (remote-server-timeout *runremote*))(random 30)))) ;; add 30 seconds of noise so that not all running tests expire at the same time causing a storm of server starts (< (http-transport:server-dat-get-last-access (remote-conndat *runremote*)) expire-time))) (debug:print-info 12 *default-log-port* "rmt:send-receive, case 8") (remote-conndat-set! *runremote* #f) (mutex-unlock! *rmt-mutex*) (rmt:send-receive cmd rid params attemptnum: attemptnum)) @@ -82,11 +75,11 @@ (set! *runremote* (make-remote)) (mutex-unlock! *rmt-mutex*) (debug:print-info 12 *default-log-port* "rmt:send-receive, case 1") (rmt:send-receive cmd rid params attemptnum: attemptnum)) ;; ensure we have a homehost record - ((not (pair? (remote-hh-dat *runremote*))) ;; have a homehost record? + ((not (pair? (remote-hh-dat *runremote*))) ;; not on homehost (thread-sleep! 0.1) ;; since we shouldn't get here, delay a little (remote-hh-dat-set! *runremote* (common:get-homehost)) (mutex-unlock! *rmt-mutex*) (debug:print-info 12 *default-log-port* "rmt:send-receive, case 2") (rmt:send-receive cmd rid params attemptnum: attemptnum)) @@ -94,60 +87,60 @@ ((and (cdr (remote-hh-dat *runremote*)) ;; on homehost (member cmd api:read-only-queries)) ;; this is a read (mutex-unlock! *rmt-mutex*) (debug:print-info 12 *default-log-port* "rmt:send-receive, case 3") (rmt:open-qry-close-locally cmd 0 params)) + + ;; on homehost and this is a write, we already have a server, but server has died + ((and (cdr (remote-hh-dat *runremote*)) ;; on homehost + (not (member cmd api:read-only-queries)) ;; this is a write + (remote-server-url *runremote*) ;; have a server + (not (server:check-if-running *toppath*))) ;; server has died. + (set! *runremote* (make-remote)) + (mutex-unlock! *rmt-mutex*) + (debug:print-info 12 *default-log-port* "rmt:send-receive, case 4.1") + (rmt:send-receive cmd rid params attemptnum: attemptnum)) + ;; on homehost and this is a write, we already have a server ((and (cdr (remote-hh-dat *runremote*)) ;; on homehost (not (member cmd api:read-only-queries)) ;; this is a write (remote-server-url *runremote*)) ;; have a server (mutex-unlock! *rmt-mutex*) (debug:print-info 12 *default-log-port* "rmt:send-receive, case 4") (rmt:open-qry-close-locally cmd 0 params)) - ;; on homehost and this is a write, we have a server (we know because case 4 checked) - ((and (cdr (remote-hh-dat *runremote*)) ;; on homehost - (not (member cmd api:read-only-queries))) - (mutex-unlock! *rmt-mutex*) - (debug:print-info 12 *default-log-port* "rmt:send-receive, case 4.1") - (rmt:open-qry-close-locally cmd 0 params)) - ;; no server contact made and this is a write, passively start a server - ((and (not (remote-server-url *runremote*)) + + ;; on homehost, no server contact made and this is a write, passively start a server + ((and (cdr (remote-hh-dat *runremote*)) ; new + (not (remote-server-url *runremote*)) (not (member cmd api:read-only-queries))) (debug:print-info 12 *default-log-port* "rmt:send-receive, case 5") - (let ((serverconn (server:read-dotserver *toppath*))) ;; (server:check-if-running *toppath*))) ;; Do NOT want to run server:check-if-running - very expensive to do for every write call - (if serverconn - (remote-server-url-set! *runremote* serverconn) ;; the string can be consumed by the client setup if needed - (if (not (server:start-attempted? *toppath*)) - (server:kind-run *toppath*)))) - (if (cdr (remote-hh-dat *runremote*)) ;; we are on the homehost, just do the call - (begin - (mutex-unlock! *rmt-mutex*) - (debug:print-info 12 *default-log-port* "rmt:send-receive, case 5.1") - (rmt:open-qry-close-locally cmd 0 params)) - (begin ;; not on homehost, start server and wait - (mutex-unlock! *rmt-mutex*) - (debug:print-info 12 *default-log-port* "rmt:send-receive, case 5.2") - (tasks:start-and-wait-for-server (tasks:open-db) 0 15) - (rmt:send-receive cmd rid params attemptnum: attemptnum)))) - ;; if not on homehost ensure we have a connection to a live server - ;; NOTE: we *have* a homehost record by now - ((and (not (cdr (remote-hh-dat *runremote*))) ;; are we on a homehost? + (let ((server-url (server:check-if-running *toppath*))) ;; (server:read-dotserver->url *toppath*))) ;; (server:check-if-running *toppath*))) ;; Do NOT want to run server:check-if-running - very expensive to do for every write call + (if server-url + (remote-server-url-set! *runremote* server-url) ;; the string can be consumed by the client setup if needed + (server:kind-run *toppath*))) + (mutex-unlock! *rmt-mutex*) + (debug:print-info 12 *default-log-port* "rmt:send-receive, case 5.1") + (rmt:open-qry-close-locally cmd 0 params)) + + ((and (not (cdr (remote-hh-dat *runremote*))) ;; not on a homehost (not (remote-conndat *runremote*))) ;; and no connection (debug:print-info 12 *default-log-port* "rmt:send-receive, case 6 hh-dat: " (remote-hh-dat *runremote*) " conndat: " (remote-conndat *runremote*)) (mutex-unlock! *rmt-mutex*) - (tasks:start-and-wait-for-server (tasks:open-db) 0 15) - (remote-conndat-set! *runremote* (rmt:get-connection-info 0)) ;; calls client:setup which calls client:setup-http - (rmt:send-receive cmd rid params attemptnum: attemptnum)) + (server:start-and-wait *toppath*) + (remote-conndat-set! *runremote* (rmt:get-connection-info *toppath*)) ;; calls client:setup which calls client:setup-http + (rmt:send-receive cmd rid params attemptnum: attemptnum)) ;; TODO: add back-off timeout as ;; all set up if get this far, dispatch the query ((cdr (remote-hh-dat *runremote*)) ;; we are on homehost (mutex-unlock! *rmt-mutex*) (debug:print-info 12 *default-log-port* "rmt:send-receive, case 7") (rmt:open-qry-close-locally cmd (if rid rid 0) params)) + ;; not on homehost, do server query (else (mutex-unlock! *rmt-mutex*) (debug:print-info 12 *default-log-port* "rmt:send-receive, case 9") + (mutex-lock! *rmt-mutex*) (let* ((conninfo (remote-conndat *runremote*)) (dat (case (remote-transport *runremote*) ((http) (condition-case ;; handling here has caused a lot of problems. However it is needed to deal with attemtped communication to servers that have gone away (http-transport:client-api-send-receive 0 conninfo cmd params) ((commfail)(vector #f "communications fail")) @@ -156,44 +149,48 @@ (debug:print 0 *default-log-port* "ERROR: transport " (remote-transport *runremote*) " not supported") (exit)))) (success (if (vector? dat) (vector-ref dat 0) #f)) (res (if (vector? dat) (vector-ref dat 1) #f))) (if (vector? conninfo)(http-transport:server-dat-update-last-access conninfo)) ;; refresh access time - (debug:print-info 12 *default-log-port* "rmt:send-receive, case 9. conninfo=" conninfo " dat=" dat) + ;; (mutex-unlock! *rmt-mutex*) + (debug:print-info 12 *default-log-port* "rmt:send-receive, case 9. conninfo=" conninfo " dat=" dat " *runremote* = "*runremote*) (if success (case (remote-transport *runremote*) - ((http) res) + ((http) + (mutex-unlock! *rmt-mutex*) + res) (else (debug:print 0 *default-log-port* "ERROR: transport " (remote-transport *runremote*) " is unknown") + (mutex-unlock! *rmt-mutex*) (exit 1))) (begin (debug:print 0 *default-log-port* "WARNING: communication failed. Trying again, try num: " attemptnum) (remote-conndat-set! *runremote* #f) (remote-server-url-set! *runremote* #f) (debug:print-info 12 *default-log-port* "rmt:send-receive, case 9.1") - (tasks:start-and-wait-for-server (tasks:open-db) 0 15) + (mutex-unlock! *rmt-mutex*) + (server:start-and-wait *toppath*) (rmt:send-receive cmd rid params attemptnum: (+ attemptnum 1))))))))) -(define (rmt:update-db-stats run-id rawcmd params duration) - (mutex-lock! *db-stats-mutex*) - (handle-exceptions - exn - (begin - (debug:print 0 *default-log-port* "WARNING: stats collection failed in update-db-stats") - (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) - (print "exn=" (condition->list exn)) - #f) ;; if this fails we don't care, it is just stats - (let* ((cmd (conc "run-id=" run-id " " (if (eq? rawcmd 'general-call) (car params) rawcmd))) - (stat-vec (hash-table-ref/default *db-stats* cmd #f))) - (if (not (vector? stat-vec)) - (let ((newvec (vector 0 0))) - (hash-table-set! *db-stats* cmd newvec) - (set! stat-vec newvec))) - (vector-set! stat-vec 0 (+ (vector-ref stat-vec 0) 1)) - (vector-set! stat-vec 1 (+ (vector-ref stat-vec 1) duration)))) - (mutex-unlock! *db-stats-mutex*)) - +;; (define (rmt:update-db-stats run-id rawcmd params duration) +;; (mutex-lock! *db-stats-mutex*) +;; (handle-exceptions +;; exn +;; (begin +;; (debug:print 0 *default-log-port* "WARNING: stats collection failed in update-db-stats") +;; (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) +;; (print "exn=" (condition->list exn)) +;; #f) ;; if this fails we don't care, it is just stats +;; (let* ((cmd (conc "run-id=" run-id " " (if (eq? rawcmd 'general-call) (car params) rawcmd))) +;; (stat-vec (hash-table-ref/default *db-stats* cmd #f))) +;; (if (not (vector? stat-vec)) +;; (let ((newvec (vector 0 0))) +;; (hash-table-set! *db-stats* cmd newvec) +;; (set! stat-vec newvec))) +;; (vector-set! stat-vec 0 (+ (vector-ref stat-vec 0) 1)) +;; (vector-set! stat-vec 1 (+ (vector-ref stat-vec 1) duration)))) +;; (mutex-unlock! *db-stats-mutex*)) (define (rmt:print-db-stats) (let ((fmtstr "~40a~7-d~9-d~20,2-f")) ;; "~20,2-f" (debug:print 18 *default-log-port* "DB Stats\n========") (debug:print 18 *default-log-port* (format #f "~40a~8a~10a~10a" "Cmd" "Count" "TotTime" "Avg")) @@ -257,11 +254,11 @@ ;; (rmt:update-db-stats run-id cmd params duration) ;; mark this run as dirty if this was a write, the watchdog is responsible for syncing it (if qry-is-write (let ((start-time (current-seconds))) (mutex-lock! *db-multi-sync-mutex*) - (set! *db-last-write* start-time) ;; the oldest "write" + (set! *db-last-access* start-time) ;; THIS IS PROBABLY USELESS? (we are on a client) (mutex-unlock! *db-multi-sync-mutex*))))) res)) (define (rmt:send-receive-no-auto-client-setup connection-info cmd run-id params) (let* ((run-id (if run-id run-id 0)) @@ -320,10 +317,15 @@ ;; added run-id to make looking up the correct db possible ;; (define (rmt:general-call stmtname run-id . params) (rmt:send-receive 'general-call run-id (append (list stmtname run-id) params))) + +;; given a hostname, return a pair of cpu load and update time representing latest intelligence from tests running on that host +(define (rmt:get-latest-host-load hostname) + (rmt:send-receive 'get-latest-host-load 0 (list hostname))) + ;; (define (rmt:sync-inmem->db run-id) ;; (rmt:send-receive 'sync-inmem->db run-id '())) (define (rmt:sdb-qry qry val run-id) ;; add caching if qry is 'getid or 'getstr @@ -330,10 +332,17 @@ (rmt:send-receive 'sdb-qry run-id (list qry val))) ;; NOT COMPLETED (define (rmt:runtests user run-id testpatt params) (rmt:send-receive 'runtests run-id testpatt)) + +;;====================================================================== +;; T E S T M E T A +;;====================================================================== + +(define (rmt:get-tests-tags) + (rmt:send-receive 'get-tests-tags #f '())) ;;====================================================================== ;; K E Y S ;;====================================================================== @@ -345,10 +354,15 @@ (define (rmt:get-keys) (if *db-keys* *db-keys* (let ((res (rmt:send-receive 'get-keys #f '()))) (set! *db-keys* res) res))) + +(define (rmt:get-keys-write) ;; dummy query to force server start + (let ((res (rmt:send-receive 'get-keys-write #f '()))) + (set! *db-keys* res) + res)) ;; we don't reuse run-id's (except possibly *after* a db cleanup) so it is safe ;; to cache the resuls in a hash ;; (define (rmt:get-key-vals run-id) @@ -460,12 +474,12 @@ ;; This is not needed as test steps are deleted on test delete call ;; ;; (define (rmt:delete-test-step-records run-id test-id) ;; (rmt:send-receive 'delete-test-step-records run-id (list run-id test-id))) -(define (rmt:test-set-status-state run-id test-id status state msg) - (rmt:send-receive 'test-set-status-state run-id (list run-id test-id status state msg))) +(define (rmt:test-set-state-status run-id test-id state status msg) + (rmt:send-receive 'test-set-state-status run-id (list run-id test-id state status msg))) (define (rmt:test-toplevel-num-items run-id test-name) (rmt:send-receive 'test-toplevel-num-items run-id (list run-id test-name))) ;; (define (rmt:get-previous-test-run-record run-id test-name item-path) @@ -524,12 +538,12 @@ (define (rmt:get-count-tests-running-in-jobgroup run-id jobgroup) (rmt:send-receive 'get-count-tests-running-in-jobgroup run-id (list run-id jobgroup))) ;; state and status are extra hints not usually used in the calculation ;; -(define (rmt:roll-up-pass-fail-counts run-id test-name item-path state status comment) - (rmt:send-receive 'roll-up-pass-fail-counts run-id (list run-id test-name item-path state status comment))) +(define (rmt:set-state-status-and-roll-up-items run-id test-name item-path state status comment) + (rmt:send-receive 'set-state-status-and-roll-up-items run-id (list run-id test-name item-path state status comment))) (define (rmt:update-pass-fail-counts run-id test-name) (rmt:general-call 'update-pass-fail-counts run-id test-name test-name test-name)) (define (rmt:top-test-set-per-pf-counts run-id test-name) @@ -588,12 +602,12 @@ (define (rmt:get-runs-by-patt keys runnamepatt targpatt offset limit fields last-runs-update) ;; fields of #f uses default (rmt:send-receive 'get-runs-by-patt #f (list keys runnamepatt targpatt offset limit fields last-runs-update))) (define (rmt:find-and-mark-incomplete run-id ovr-deadtime) - (if (rmt:send-receive 'have-incompletes? run-id (list run-id ovr-deadtime)) - (rmt:send-receive 'mark-incomplete run-id (list run-id ovr-deadtime)))) + ;; (if (rmt:send-receive 'have-incompletes? run-id (list run-id ovr-deadtime)) + (rmt:send-receive 'mark-incomplete run-id (list run-id ovr-deadtime))) ;; ) (define (rmt:get-main-run-stats run-id) (rmt:send-receive 'get-main-run-stats #f (list run-id))) (define (rmt:get-var varname) Index: rpc-transport.scm ================================================================== --- rpc-transport.scm +++ rpc-transport.scm @@ -176,11 +176,11 @@ (if ping-res (let ((server-dat (list iface port #f #f #f))) (hash-table-set! *runremote* run-id server-dat) server-dat) (begin - (server:try-running run-id) + (server:try-running *toppath*) (thread-sleep! 2) (rpc-transport:client-setup run-id (- remtries 1))))) (let* ((server-db-info (open-run-close tasks:get-server tasks:open-db run-id))) (debug:print-info 0 *default-log-port* "client:setup server-dat=" server-dat ", remaining-tries=" remaining-tries) (if server-db-info @@ -191,15 +191,15 @@ (if start-res (begin (hash-table-set! *runremote* run-id server-dat) server-dat) (begin - (server:try-running run-id) + (server:try-running *toppath*) (thread-sleep! 2) (rpc-transport:client-setup run-id (- remtries 1))))) (begin - (server:try-running run-id) + (server:try-running *toppath*) (thread-sleep! 2) (rpc-transport:client-setup run-id (- remtries 1))))))))) ;; ;; (port (if (and hostinfo (> (length hostdat) 1))(cadr hostdat) #f))) ;; (if (and port Index: runs.scm ================================================================== --- runs.scm +++ runs.scm @@ -928,11 +928,11 @@ (debug:print 0 *default-log-port* "WARNING: test " hed " has FAILED prerequitests and we've tried at least 10 times to run it. Giving up now.")) ;; (debug:print 0 *default-log-port* " prereqs: " prereqs-not-met) (hash-table-set! test-registry hed 'removed) (mt:test-set-state-status-by-testname run-id test-name item-path "NOT_STARTED" "TEN_STRIKES" #f) ;; I'm unclear on if this roll up is needed - it may be the root cause of the "all set to FAIL" bug. - (rmt:roll-up-pass-fail-counts run-id test-name item-path #f "FAIL" #f) ;; treat as FAIL + (rmt:set-state-status-and-roll-up-items run-id test-name item-path #f "FAIL" #f) ;; treat as FAIL (list (if (null? tal)(car newtal)(car tal)) tal reg reruns))))) ;; can't drop this - maybe running? Just keep trying @@ -1673,10 +1673,11 @@ ((remove-runs) ;; (if (tasks:need-server run-id)(tasks:start-and-wait-for-server tdbdat run-id 10)) ;; seek and kill in flight -runtests with % as testpatt here ;; (if (equal? testpatt "%") (tasks:kill-runner target run-name testpatt) + ;; (debug:print 0 *default-log-port* "not attempting to kill any run launcher processes as testpatt is " testpatt)) (debug:print 1 *default-log-port* "Removing tests for run: " runkey " " (db:get-value-by-header run header "runname"))) ((set-state-status) (if (tasks:need-server run-id)(tasks:start-and-wait-for-server tdbdat run-id 10)) (debug:print 1 *default-log-port* "Modifying state and staus for tests for run: " runkey " " (db:get-value-by-header run header "runname"))) @@ -1958,10 +1959,23 @@ (if (and val (not (equal? (vector-ref currrecord idx) val))) (begin (print "Updating " test-name " " fld " to " val) (rmt:testmeta-update-field test-name fld val))))) '(("author" 2)("owner" 3)("description" 4)("reviewed" 5)("tags" 9)("jobgroup" 10))))) + +;; find tests with matching tags, tagpatt is a string "tagpatt1,tagpatt2%, ..." +;; +(define (runs:get-tests-matching-tags tagpatt) + (let* ((tagdata (rmt:get-tests-tags)) + (res '())) ;; list of tests that match one or more tags + (for-each + (lambda (tag) + (if (patt-list-match tag tagpatt) + (set! res (append (hash-table-ref tagdata tag))))) + (hash-table-keys tagdata)) + res)) + ;; Update test_meta for all tests (define (runs:update-all-test_meta db) (let ((test-names (tests:get-all))) ;; (tests:get-valid-tests))) (for-each Index: server.scm ================================================================== --- server.scm +++ server.scm @@ -8,11 +8,11 @@ ;; implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR ;; PURPOSE. (require-extension (srfi 18) extras tcp s11n) -(use srfi-1 posix regex regex-case srfi-69 hostinfo md5 message-digest directory-utils) +(use srfi-1 posix regex regex-case srfi-69 hostinfo md5 message-digest directory-utils posix-extras matchable) ;; (use zmq) (use spiffy uri-common intarweb http-client spiffy-request-vars) (declare (unit server)) @@ -47,18 +47,15 @@ ;; all routes though here end in exit ... ;; ;; start_server ;; (define (server:launch run-id transport-type) - (BB> "server:launch fired for run-id="run-id" transport-type="transport-type) (case transport-type - ((http)(http-transport:launch run-id)) + ((http)(http-transport:launch)) ;;((nmsg)(nmsg-transport:launch run-id)) ((rpc) (rpc-transport:launch run-id)) (else (debug:print-error 0 *default-log-port* "unknown server type " transport-type)))) -;; (else (debug:print-error 0 *default-log-port* "No known transport set, transport=" transport ", using rpc") -;; (rpc-transport:launch run-id))))) ;;====================================================================== ;; S E R V E R U T I L I T I E S ;;====================================================================== @@ -89,14 +86,10 @@ ;; (send-message pubsock target send-more: #t) ;; (send-message pubsock (case (server:get-transport) ((rpc) (db:obj->string (vector success/fail query-sig result))) ((http) (db:obj->string (vector success/fail query-sig result))) - ((zmq) - (let ((pub-socket (vector-ref *runremote* 1))) - (send-message pub-socket return-addr send-more: #t) - (send-message pub-socket (db:obj->string (vector success/fail query-sig result))))) ((fs) result) (else (debug:print-error 0 *default-log-port* "unrecognised transport type: " *transport-type*) result))) @@ -103,163 +96,258 @@ ;; Given a run id start a server process ### NOTE ### > file 2>&1 ;; if the run-id is zero and the target-host is set ;; try running on that host ;; incidental: rotate logs in logs/ dir. ;; -(define (server:run areapath) ;; areapath is ignored for now. +(define (server:run areapath) ;; areapath is *toppath* for a given testsuite area (let* ((curr-host (get-host-name)) + ;; (attempt-in-progress (server:start-attempted? areapath)) + ;; (dot-server-url (server:check-if-running areapath)) (curr-ip (server:get-best-guess-address curr-host)) (curr-pid (current-process-id)) (homehost (common:get-homehost)) ;; configf:lookup *configdat* "server" "homehost" )) (target-host (car homehost)) (testsuite (common:get-testsuite-name)) - (logfile (conc *toppath* "/logs/server.log")) + (logfile (conc areapath "/logs/server.log")) ;; -" curr-pid "-" target-host ".log")) (cmdln (conc (common:get-megatest-exe) - " -server " (or target-host "-") " -run-id " 0 (if (equal? (configf:lookup *configdat* "server" "daemonize") "yes") - (conc " -daemonize -log " logfile) - "") + " -server " (or target-host "-") (if (equal? (configf:lookup *configdat* "server" "daemonize") "yes") + " -daemonize " + "") + ;; " -log " logfile " -m testsuite:" testsuite)) ;; (conc " >> " logfile " 2>&1 &"))))) (log-rotate (make-thread common:rotate-logs "server run, rotate logs thread"))) ;; we want the remote server to start in *toppath* so push there - (push-directory *toppath*) + (push-directory areapath) (debug:print 0 *default-log-port* "INFO: Trying to start server (" cmdln ") ...") (thread-start! log-rotate) - + ;; host.domain.tld match host? (if (and target-host ;; look at target host, is it host.domain.tld or ip address and does it ;; match current ip or hostname (not (string-match (conc "("curr-host "|" curr-host"\\..*)") target-host)) (not (equal? curr-ip target-host))) (begin (debug:print-info 0 *default-log-port* "Starting server on " target-host ", logfile is " logfile) (setenv "TARGETHOST" target-host))) - + (setenv "TARGETHOST_LOGF" logfile) (common:wait-for-normalized-load 4 " delaying server start due to load" remote-host: (get-environment-variable "TARGETHOST")) ;; do not try starting servers on an already overloaded machine, just wait forever (system (conc "nbfake " cmdln)) (unsetenv "TARGETHOST_LOGF") (if (get-environment-variable "TARGETHOST")(unsetenv "TARGETHOST")) (thread-join! log-rotate) (pop-directory))) + +;; given a path to a server log return: host port startseconds +;; +(define (server:logf-get-start-info logf) + (let ((rx (regexp "^SERVER STARTED: (\\S+):(\\d+) AT ([\\d\\.]+)"))) ;; SERVER STARTED: host:port AT timesecs + (with-input-from-file + logf + (lambda () + (let loop ((inl (read-line)) + (lnum 0)) + (if (not (eof-object? inl)) + (let ((mlst (string-match rx inl))) + (if (not mlst) + (if (< lnum 500) ;; give up if more than 500 lines of server log read + (loop (read-line)(+ lnum 1)) + (list #f #f #f)) + (let ((dat (cdr mlst))) + (list (car dat) ;; host + (string->number (cadr dat)) ;; port + (string->number (caddr dat)))))) + (list #f #f #f))))))) + +;; get a list of servers with all relevant data +;; ( mod-time host port start-time pid ) +;; +(define (server:get-list areapath #!key (limit #f)) + (let ((fname-rx (regexp "^(|.*/)server-(\\d+)-(\\S+).log$")) + (day-seconds (* 24 60 60))) + ;; if the directory exists continue to get the list + ;; otherwise attempt to create the logs dir and then + ;; continue + (if (if (directory-exists? (conc areapath "/logs")) + #t + (if (file-write-access? areapath) + (begin + (condition-case + (create-directory (conc areapath "/logs") #t) + (exn (i/o file)(debug:print 0 *default-log-port* "ERROR: Cannot create directory at " (conc areapath "/logs"))) + (exn ()(debug:print 0 *default-log-port* "ERROR: Unknown error attemtping to get server list."))) + (directory-exists? (conc areapath "/logs"))) + #f)) + (let* ((server-logs (glob (conc areapath "/logs/server-*.log"))) + (num-serv-logs (length server-logs))) + (if (null? server-logs) + '() + (let loop ((hed (car server-logs)) + (tal (cdr server-logs)) + (res '())) + (let* ((mod-time (file-modification-time hed)) + (down-time (- (current-seconds) mod-time)) + (serv-dat (if (or (< num-serv-logs 10) + (< down-time day-seconds)) + (server:logf-get-start-info hed) + '())) ;; don't waste time processing server files not touched in the past day if there are more than ten servers to look at + (serv-rec (cons mod-time serv-dat)) + (fmatch (string-match fname-rx hed)) + (pid (if fmatch (string->number (list-ref fmatch 2)) #f)) + (new-res (if (null? serv-dat) + res + (cons (append serv-rec (list pid)) res)))) + (if (null? tal) + (if (and limit + (> (length new-res) limit)) + new-res ;; (take new-res limit) <= need intelligent sorting before this will work + new-res) + (loop (car tal)(cdr tal) new-res))))))))) + +;; given a list of servers get a list of valid servers, i.e. at least +;; 10 seconds old, has started and is less than 1 hour old and is +;; active (i.e. mod-time < 10 seconds +;; +;; mod-time host port start-time pid +;; +;; sort by start-time descending. I.e. get the oldest first. Young servers will thus drop off +;; and servers should stick around for about two hours or so. +;; +(define (server:get-best srvlst) + (let ((now (current-seconds))) + (sort + (filter (lambda (rec) + (let ((start-time (list-ref rec 3)) + (mod-time (list-ref rec 0))) + ;; (print "start-time: " start-time " mod-time: " mod-time) + (and start-time mod-time + (> (- now start-time) 0) ;; been running at least 0 seconds + (< (- now mod-time) 16) ;; still alive - file touched in last 16 seconds + (< (- now start-time) + (+ (- (string->number (or (configf:lookup *configdat* "server" "runtime") "3600")) + 180) + (random 360))) ;; under one hour running time +/- 180 + ))) + srvlst) + (lambda (a b) + (< (list-ref a 3) + (list-ref b 3)))))) + +(define (server:get-first-best areapath) + (let ((srvrs (server:get-best (server:get-list areapath)))) + (if (and srvrs + (not (null? srvrs))) + (car srvrs) + #f))) + +(define (server:record->url servr) + (match-let (((mod-time host port start-time pid) + servr)) + (if (and host port) + (conc host ":" port) + #f))) (define (server:get-client-signature) ;; BB> why is this proc named "get-"? it returns nothing -- set! has not return value. (if *my-client-signature* *my-client-signature* (let ((sig (server:mk-signature))) - (set! *my-client-signature* sig) - *my-client-signature*))) + (set! *my-client-signature* sig) + *my-client-signature*))) ;; kind start up of servers, wait 40 seconds before allowing another server for a given ;; run-id to be launched (define (server:kind-run areapath) - (let ((last-run-time (hash-table-ref/default *server-kind-run* areapath #f))) - (if (or (not last-run-time) - (> (- (current-seconds) last-run-time) 30)) - (begin - (server:run areapath) - (hash-table-set! *server-kind-run* areapath (current-seconds)))))) - -;; The generic run a server command. Dispatches the call to server 0 if run-id != 0 -;; -;; (define (server:try-running run-id) -;; (if (eq? run-id 0) -;; (server:run run-id) -;; (rmt:start-server run-id))) + (let* ((last-run-dat (hash-table-ref/default *server-kind-run* areapath '(0 0))) ;; callnum, whenrun + (call-num (car last-run-dat)) + (when-run (cadr last-run-dat)) + (run-delay (+ (case call-num + ((0) 0) + ((1) 20) + ((2) 300) + (else 600)) + (random 5)))) ;; add a small random number just in case a lot of jobs hit the work hosts simultaneously + (if (> (- (current-seconds) when-run) run-delay) + (server:run areapath)) + (hash-table-set! *server-kind-run* areapath (list (+ call-num 1)(current-seconds))))) + +(define (server:start-and-wait areapath #!key (timeout 60)) + (let ((give-up-time (+ (current-seconds) timeout))) + (let loop ((server-url (server:check-if-running areapath))) + (if (or server-url + (> (current-seconds) give-up-time)) ;; server-url will be #f if no server available. + server-url + (let ((num-ok (length (server:get-best (server:get-list areapath))))) + (if (< num-ok 1) ;; if there are no decent candidates for servers then try starting a new one + (server:kind-run areapath)) + (thread-sleep! 5) + (loop (server:check-if-running areapath))))))) + (define server:try-running server:run) ;; there is no more per-run servers ;; REMOVE ME. BUG. -(define (server:start-attempted? areapath) - (let ((flagfile (conc areapath "/.starting-server"))) - (handle-exceptions - exn - #f ;; if things go wrong pretend we can't see the file - (and (file-exists? flagfile) - (< (- (current-seconds) - (file-modification-time flagfile)) - 15))))) ;; exists and less than 15 seconds old - -(define (server:read-dotserver areapath) - (let ((dotfile (conc areapath "/.server"))) - (handle-exceptions - exn - #f ;; if things go wrong pretend we can't see the file - (if (and (file-exists? dotfile) - (file-read-access? dotfile)) - (with-input-from-file - dotfile - (lambda () - (read-line))) - #f)))) - -;; write a .server file in *toppath* with hostport -;; return #t on success, #f otherwise -;; -(define (server:write-dotserver areapath hostport) - (let ((lock-file (conc areapath "/.server.lock")) - (server-file (conc areapath "/.server"))) - (if (common:simple-file-lock lock-file) - (let ((res (handle-exceptions - exn - #f ;; failed for some reason, for the moment simply return #f - (with-output-to-file server-file - (lambda () - (print hostport))) - #t))) - (debug:print-info 0 *default-log-port* "server file " server-file " for " hostport " created") - (common:simple-file-release-lock lock-file) - res) - #f))) - -(define (server:remove-dotserver-file areapath hostport) - (let ((dotserver (server:read-dotserver areapath)) - (server-file (conc areapath "/.server")) - (lock-file (conc areapath "/.server.lock"))) - (if (and dotserver (string-match (conc ".*:" hostport "$") dotserver)) ;; port matches, good enough info to decide to remove the file - (if (common:simple-file-lock lock-file) - (begin - (handle-exceptions - exn - #f - (delete-file* server-file)) - (debug:print-info 0 *default-log-port* "server file " server-file " for " hostport " removed") - (common:simple-file-release-lock lock-file)))))) - +(define (server:dotserver-age-seconds areapath) + (let ((server-file (conc areapath "/.server"))) + (begin + (handle-exceptions + exn + #f + (- (current-seconds) + (file-modification-time server-file)))))) + ;; no longer care if multiple servers are started by accident. older servers will drop off in time. ;; (define (server:check-if-running areapath) - (let* ((dotserver (server:read-dotserver areapath))) ;; tdbdat (tasks:open-db))) - (if dotserver - (let* ((res (case *transport-type* - ((http)(server:ping-server dotserver)) - ;; ((nmsg)(nmsg-transport:ping (tasks:hostinfo-get-interface server) - ))) - (if res - dotserver - #f)) + (let* ((servers (server:get-best (server:get-list areapath)))) + (if (null? servers) + #f + (let loop ((hed (car servers)) + (tal (cdr servers))) + (let ((res (server:check-server hed))) + (if res + res + (if (null? tal) + #f + (loop (car tal)(cdr tal))))))))) + +;; ping the given server +;; +(define (server:check-server server-record) + (let* ((server-url (server:record->url server-record)) + (res (case *transport-type* + ((http)(server:ping server-url)) + ;; ((nmsg)(nmsg-transport:ping (tasks:hostinfo-get-interface server) + ))) + (if res + server-url #f))) +(define (server:kill servr) + (match-let (((mod-time hostname port start-time pid) + servr)) + (tasks:kill-server hostname pid))) + ;; called in megatest.scm, host-port is string hostname:port ;; ;; NOTE: This is NOT called directly from clients as not all transports support a client running ;; in the same process as the server. ;; (define (server:ping host-port-in #!key (do-exit #f)) (let ((host:port (if (not host-port-in) ;; use read-dotserver to find - (server:read-dotserver *toppath*) - (if (number? host-port-in) ;; we were handed a server-id - (let ((srec (tasks:get-server-by-id (db:delay-if-busy (tasks:open-db)) host-port-in))) - ;; (print "srec: " srec " host-port-in: " host-port-in) - (if srec - (conc (vector-ref srec 3) ":" (vector-ref srec 4)) - (conc "no such server-id " host-port-in))) - host-port-in)))) + #f ;; (server:check-if-running *toppath*) + ;; (if (number? host-port-in) ;; we were handed a server-id + ;; (let ((srec (tasks:get-server-by-id (db:delay-if-busy (tasks:open-db)) host-port-in))) + ;; ;; (print "srec: " srec " host-port-in: " host-port-in) + ;; (if srec + ;; (conc (vector-ref srec 3) ":" (vector-ref srec 4)) + ;; (conc "no such server-id " host-port-in))) + host-port-in))) ;; ) (let* ((host-port (if host:port (let ((slst (string-split host:port ":"))) (if (eq? (length slst) 2) (list (car slst)(string->number (cadr slst))) #f)) - #f)) - (toppath (launch:setup))) + #f))) +;; (toppath (launch:setup))) ;; (print "host-port=" host-port) (if (not host-port) (begin (if host-port-in (debug:print 0 *default-log-port* "ERROR: bad host:port")) @@ -270,15 +358,17 @@ (server-dat (http-transport:client-connect iface port)) (login-res (rmt:login-no-auto-client-setup server-dat))) (if (and (list? login-res) (car login-res)) (begin - (print "LOGIN_OK") - (if do-exit (exit 0))) + ;; (print "LOGIN_OK") + (if do-exit (exit 0)) + #t) (begin - (print "LOGIN_FAILED") - (if do-exit (exit 1))))))))) + ;; (print "LOGIN_FAILED") + (if do-exit (exit 1)) + #f))))))) ;; run ping in separate process, safest way in some cases ;; (define (server:ping-server ifaceport) (with-input-from-pipe Index: tasks.scm ================================================================== --- tasks.scm +++ tasks.scm @@ -170,312 +170,24 @@ (define (tasks:hostinfo-get-pubport vec) (vector-ref vec 3)) (define (tasks:hostinfo-get-transport vec) (vector-ref vec 4)) (define (tasks:hostinfo-get-pid vec) (vector-ref vec 5)) (define (tasks:hostinfo-get-hostname vec) (vector-ref vec 6)) -(define (tasks:server-lock-slot mdb run-id) - (tasks:server-clean-out-old-records-for-run-id mdb run-id " tasks:server-lock-slot") - (if (< (tasks:num-in-available-state mdb run-id) 4) - (begin - (tasks:server-set-available mdb run-id) - (thread-sleep! (/ (random 1500) 1000)) ;; (thread-sleep! 2) ;; Try removing this. It may not be needed. - (tasks:server-am-i-the-server? mdb run-id)) - #f)) - -;; register that this server may come online (first to register goes though with the process) -(define (tasks:server-set-available mdb run-id) - (sqlite3:execute - mdb - "INSERT INTO servers (pid,hostname,port,pubport,start_time, priority,state,mt_version,heartbeat, interface,transport,run_id) - VALUES(?, ?, ?, ?, strftime('%s','now'), ?, ?, ?,-1,?, ?, ?);" - (current-process-id) ;; pid - (get-host-name) ;; hostname - -1 ;; port - -1 ;; pubport - (random 1000) ;; priority (used a tiebreaker on get-available) - "available" ;; state - (common:version-signature) ;; mt_version - -1 ;; interface - ;; (conc (server:get-transport)) ;; transport - (conc *transport-type*) ;; transport - run-id - )) - -(define (tasks:num-in-available-state mdb run-id) - (let ((res 0)) - (sqlite3:for-each-row - (lambda (num-in-queue) - (set! res num-in-queue)) - mdb - "SELECT count(id) FROM servers WHERE run_id=? AND state = 'available' AND (strftime('%s','now') - start_time) < 30 ;" - run-id) - res)) - -(define (tasks:num-servers-non-zero-running mdb) - (let ((res 0)) - (sqlite3:for-each-row - (lambda (num-running) - (set! res num-running)) - mdb - "SELECT count(id) FROM servers WHERE run_id != 0 AND state = 'running';") - res)) - -(define (tasks:server-clean-out-old-records-for-run-id mdb run-id tag) - (sqlite3:execute mdb "UPDATE servers SET state=?,heartbeat=strftime('%s','now') WHERE state in ('available','dbprep','shutting-down') AND (strftime('%s','now') - start_time) > 50 AND run_id=?;" - (conc "defunct" tag) run-id)) - -(define (tasks:server-force-clean-running-records-for-run-id mdb run-id tag) - (sqlite3:execute mdb "UPDATE servers SET state=?,heartbeat=strftime('%s','now') WHERE state = 'running' AND run_id=?;" - (conc "defunct" tag) run-id)) - -(define (tasks:server-force-clean-run-record mdb run-id iface port tag) - (sqlite3:execute mdb "UPDATE servers SET state=?,heartbeat=strftime('%s','now') WHERE state = 'running' AND run_id=? AND interface=? AND port=?;" - (conc "defunct" tag) run-id iface port)) - - -;; BB> adding missing func for --list-servers -(define (tasks:server-deregister mdb hostname #!key (pullport #f) (pid #f) (action #f)) ;;pullport pid: pid action: 'delete)) - (if (eq? action 'delete) - (sqlite3:execute mdb "DELETE FROM servers WHERE pid=? AND port=? AND hostname=?;" pid pullport hostname) - (sqlite3:execute mdb "UPDATE servers SET state='defunct', heartbeat=strftime('%s','now') WHERE hostname=? AND pid=?;" - hostname pid))) - -(define (tasks:server-delete-records-for-this-pid mdb tag) - (sqlite3:execute mdb "UPDATE servers SET state=?,heartbeat=strftime('%s','now') WHERE hostname=? AND pid=?;" - (conc "defunct" tag) (get-host-name) (current-process-id))) - -(define (tasks:server-delete-record mdb server-id tag) - (sqlite3:execute mdb "UPDATE servers SET state=?,heartbeat=strftime('%s','now') WHERE id=?;" - (conc "defunct" tag) server-id) - ;; use this opportuntity to clean out records over one month old or over 10 minutes old with port = -1 (i.e. a never used placeholder) - (sqlite3:execute mdb "DELETE FROM servers WHERE state not in ('running','shutting-down','dbprep') AND (strftime('%s','now') - start_time) > 2628000;") - (sqlite3:execute mdb "DELETE FROM servers WHERE state like 'defunct%' AND port=-1 AND (strftime('%s','now') - start_time) > 600;") - ) - -(define (tasks:server-set-state! mdb server-id state) - (sqlite3:execute mdb "UPDATE servers SET state=?,heartbeat=strftime('%s','now') WHERE id=?;" state server-id)) - -(define (tasks:server-set-interface-port mdb server-id interface port) - (sqlite3:execute mdb "UPDATE servers SET interface=?,port=?,heartbeat=strftime('%s','now') WHERE id=?;" interface port server-id)) - -;; Get random port not used in long time -;; -(define (tasks:server-get-next-port mdb) - (let* ((lownum 30000) - (highnum 64000) - (used-ports '()) - (get-rand-port (lambda () - (+ lownum (random (- highnum lownum))))) - (port-param (if (and (args:get-arg "-port") - (string->number (args:get-arg "-port"))) - (string->number (args:get-arg "-port")) - #f)) - ;; (config-port (if (and (config-lookup *configdat* "server" "port") - ;; (string->number (config-lookup *configdat* "server" "port"))) - ;; (string->number (config-lookup *configdat* "server" "port")) - ;; #f)) - ) - (sqlite3:for-each-row - (lambda (port) - (set! used-ports (cons port used-ports))) - mdb - "SELECT port FROM servers;") - (cond - ((and port-param res) (if (> res port-param) res port-param)) - (port-param port-param) - ;; ((and config-port res) (if (> res config-port) res config-port)) - ;; (config-port config-port) - (else - (let loop ((port (get-rand-port)) - (remtries 100)) - (if (member port used-ports) - (if (> remtries 0) - (loop (get-rand-port)(- remtries 1)) - (get-rand-port)) - port)))))) - -(define (tasks:server-am-i-the-server? mdb run-id) - (let* ((all (tasks:server-get-servers-vying-for-run-id mdb run-id)) - (first (if (null? all) - #f;; (begin (debug:print-error 0 *default-log-port* "no servers listed, should be at least one by now.") - ;; (sqlite3:finalize! mdb) - ;; (exit 1)) - (car (db:get-rows all))))) - (if first - (let* ((header (db:get-header all)) - (id (db:get-value-by-header first header "id")) - (hostname (db:get-value-by-header first header "hostname")) - (pid (db:get-value-by-header first header "pid")) - (priority (db:get-value-by-header first header "priority"))) - ;; (debug:print 0 *default-log-port* "INFO: am-i-the-server got record " first) - ;; for now a basic check. add tiebreaking by priority later - (if (and (equal? hostname (get-host-name)) - (equal? pid (current-process-id))) - id - #f)) - #f))) - -;; Use: (db:get-value-by-header (car (db:get-rows dat)) (db:get-header dat) "fieldname") -;; to extract info from the structure returned -;; -(define (tasks:server-get-servers-vying-for-run-id mdb run-id) - (let* ((header (list "id" "hostname" "pid" "interface" "port" "pubport" "state" "run_id" "priority" "start_time")) - (selstr (string-intersperse header ",")) - (res '())) - (sqlite3:for-each-row - (lambda (a . b) - (set! res (cons (apply vector a b) res))) - mdb - (conc "SELECT " selstr " FROM servers WHERE run_id=? AND state in ('available','running','dbprep') ORDER BY start_time DESC;") - run-id) - (vector header res))) - -(define (tasks:get-server mdb run-id #!key (retries 10)) - (let ((res #f) - (best #f)) - (handle-exceptions - exn - (begin - (print-call-chain (current-error-port)) - (debug:print 0 *default-log-port* "WARNING: tasks:get-server db access error.") - (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) - (debug:print 0 *default-log-port* " for run " run-id) - (print-call-chain (current-error-port)) - (if (> retries 0) - (begin - (debug:print 0 *default-log-port* " trying call to tasks:get-server again in 10 seconds") - (thread-sleep! 10) - (tasks:get-server mdb run-id retries: (- retries 0))) - (debug:print 0 *default-log-port* "10 tries of tasks:get-server all crashed and burned. Giving up and returning \"no server found\""))) - (sqlite3:for-each-row - (lambda (id interface port pubport transport pid hostname) - (set! res (vector id interface port pubport transport pid hostname))) - mdb - ;; removed: - ;; strftime('%s','now')-heartbeat < 10 AND mt_version = ? - "SELECT id,interface,port,pubport,transport,pid,hostname FROM servers - WHERE run_id=? AND state='running' - ORDER BY start_time DESC LIMIT 1;" run-id) ;; (common:version-signature) run-id) - res))) - -(define (tasks:server-running-or-starting? mdb run-id) - (let ((res #f)) - (sqlite3:for-each-row - (lambda (id) - (set! res id)) - mdb ;; NEEDS dbprep ADDED - "SELECT id FROM servers WHERE run_id=? AND (state = 'running' OR (state = 'dbprep' AND (strftime('%s','now') - start_time) < 60));" run-id) - res)) - -(define (tasks:server-running? mdb run-id) - (let ((res #f)) - (sqlite3:for-each-row - (lambda (id) - (set! res id)) - mdb ;; NEEDS dbprep ADDED - "SELECT id FROM servers WHERE run_id=? AND state = 'running';" run-id) - res)) - (define (tasks:need-server run-id) (equal? (configf:lookup *configdat* "server" "required") "yes")) -;; (maxqry (cdr (rmt:get-max-query-average run-id))) -;; (threshold (string->number (or (configf:lookup *configdat* "server" "server-query-threshold") "10")))) -;; (cond -;; (forced -;; (if (common:low-noise-print 60 run-id "server required is set") -;; (debug:print-info 0 *default-log-port* "Server required is set, starting server for run-id " run-id ".")) -;; #t) -;; ((> maxqry threshold) -;; (if (common:low-noise-print 60 run-id "Max query time execeeded") -;; (debug:print-info 0 *default-log-port* "Max avg query time of " maxqry "ms exceeds limit of " threshold "ms, server needed for run-id " run-id ".")) -;; #t) -;; (else -;; #f)))) - -;; try to start a server and wait for it to be available -;; -(define (tasks:start-and-wait-for-server tdbdat run-id delay-max-tries) - ;; ensure a server is running for this run - (let loop ((server-dat (tasks:get-server (db:delay-if-busy tdbdat) run-id)) - (delay-time 0)) - (if (and (not server-dat) - (< delay-time delay-max-tries)) - (begin - (if (common:low-noise-print 60 "tasks:start-and-wait-for-server" run-id) - (debug:print 0 *default-log-port* "Try starting server for run-id " run-id)) - (thread-sleep! (/ (random 2000) 1000)) - (server:kind-run run-id) - (thread-sleep! (min delay-time 1)) - (if (not (or (server:start-attempted? *toppath*) - (server:read-dotserver *toppath*))) ;; no point in trying - (loop (tasks:get-server (db:delay-if-busy tdbdat) run-id)(+ delay-time 1)) - #f)) - #f))) - -(define (tasks:get-all-servers mdb) - (let ((res '())) - (sqlite3:for-each-row - (lambda (id pid hostname interface port pubport start-time priority state mt-version last-update transport run-id) - ;; 0 1 2 3 4 5 6 7 8 9 10 11 12 - (set! res (cons (vector id pid hostname interface port pubport start-time priority state mt-version last-update transport run-id) res))) - mdb - "SELECT id,pid,hostname,interface,port,pubport,start_time,priority,state,mt_version,strftime('%s','now')-heartbeat AS last_update,transport,run_id - FROM servers WHERE state NOT LIKE 'defunct%' ORDER BY start_time DESC;") - res)) - -(define (tasks:get-server-by-id mdb id) - (let ((res #f)) - (sqlite3:for-each-row - (lambda (id pid hostname interface port pubport start-time priority state mt-version last-update transport run-id) - ;; 0 1 2 3 4 5 6 7 8 9 10 11 12 - (set! res (vector id pid hostname interface port pubport start-time priority state mt-version last-update transport run-id))) - mdb - "SELECT id,pid,hostname,interface,port,pubport,start_time,priority,state,mt_version,strftime('%s','now')-heartbeat AS last_update,transport,run_id - FROM servers WHERE id=?;" - id) - res)) - -(define (tasks:get-server-records mdb run-id) - (let ((res '())) - (sqlite3:for-each-row - (lambda (id pid hostname interface port pubport start-time priority state mt-version last-update transport run-id) - ;; 0 1 2 3 4 5 6 7 8 9 10 11 12 - (set! res (cons (vector id pid hostname interface port pubport start-time priority state mt-version last-update transport run-id) res))) - mdb - "SELECT id,pid,hostname,interface,port,pubport,start_time,priority,state,mt_version,strftime('%s','now')-heartbeat AS last_update,transport,run_id - FROM servers WHERE run_id=? AND state NOT LIKE 'defunct%' ORDER BY start_time DESC;" - run-id) - (reverse res))) - ;; no elegance here ... ;; (define (tasks:kill-server hostname pid #!key (kill-switch "")) (debug:print-info 0 *default-log-port* "Attempting to kill server process " pid " on host " hostname) (setenv "TARGETHOST" hostname) (setenv "TARGETHOST_LOGF" "server-kills.log") (system (conc "nbfake kill "kill-switch" "pid)) + (unsetenv "TARGETHOST_LOGF") (unsetenv "TARGETHOST")) -;; look up a server by run-id and send it a kill, also delete the record for that server -;; -(define (tasks:kill-server-run-id run-id #!key (tag "default")) - (let* ((tdbdat (tasks:open-db)) - (sdat (tasks:get-server (db:delay-if-busy tdbdat) run-id))) - (if sdat - (let ((hostname (vector-ref sdat 6)) - (pid (vector-ref sdat 5)) - (server-id (vector-ref sdat 0))) - (tasks:server-set-state! (db:delay-if-busy tdbdat) server-id "killed") - (debug:print-info 0 *default-log-port* "Killing server " server-id " for run-id " run-id " on host " hostname " with pid " pid) - (tasks:kill-server hostname pid) - (tasks:server-delete-record (db:delay-if-busy tdbdat) server-id tag) ) - (debug:print-info 0 *default-log-port* "No server found for run-id " run-id ", nothing to kill")) - ;; (sqlite3:finalize! tdb) - )) - ;;====================================================================== ;; M O N I T O R S ;;====================================================================== (define (tasks:remove-monitor-record mdb) @@ -780,11 +492,11 @@ (define (tasks:find-task-queue-records dbstruct target run-name test-patt state-patt action-patt) ;; (handle-exceptions ;; exn ;; '() ;; (sqlite3:first-row - (let ((db (db:delay-if-busy (db:get-db dbstruct #f))) + (let ((db (db:delay-if-busy (db:get-db dbstruct))) (res '())) (sqlite3:for-each-row (lambda (a . b) (set! res (cons (cons a b) res))) db "SELECT id,action,owner,state,target,name,testpatt,keylock,params FROM tasks_queue Index: tests.scm ================================================================== --- tests.scm +++ tests.scm @@ -141,11 +141,11 @@ ;; returns waitons waitors tconfigdat ;; (define (tests:get-waitons test-name all-tests-registry) - (let* ((config (tests:get-testconfig test-name all-tests-registry 'return-procs))) + (let* ((config (tests:get-testconfig test-name #f all-tests-registry 'return-procs))) (let ((instr (if config (config-lookup config "requirements" "waiton") (begin ;; No config means this is a non-existant test (debug:print-error 0 *default-log-port* "non-existent required test \"" test-name "\"") (exit 1)))) @@ -291,11 +291,11 @@ ;; Check for waiver eligibility ;; (define (tests:check-waiver-eligibility testdat prev-testdat) (let* ((test-registry (make-hash-table)) - (testconfig (tests:get-testconfig (db:test-get-testname testdat) test-registry #f)) + (testconfig (tests:get-testconfig (db:test-get-testname testdat) (db:test-get-item-path testdat) test-registry #f)) (test-rundir ;; (sdb:qry 'passstr (db:test-get-rundir testdat)) ;; ) (prev-rundir ;; (sdb:qry 'passstr (db:test-get-rundir prev-testdat)) ;; ) (waivers (if testconfig (configf:section-vars testconfig "waivers") '())) @@ -351,15 +351,10 @@ (loop (car tal)(cdr tal))) #f)))))) (pop-directory) result))))) -(define (tests:test-force-state-status! run-id test-id state status) - (rmt:test-set-status-state run-id test-id status state #f) - ;; (rmt:roll-up-pass-fail-counts run-id test-name item - (mt:process-triggers run-id test-id state status)) - ;; Do not rpc this one, do the underlying calls!!! (define (tests:test-set-status! run-id test-id state status comment dat #!key (work-area #f)) (let* ((real-status status) (otherdat (if dat dat (make-hash-table))) (testdat (rmt:get-test-info-by-id run-id test-id)) @@ -396,12 +391,12 @@ (debug:print 4 *default-log-port* "real-status " real-status ", waived " waived ", status " status) ;; update the primary record IF state AND status are defined (if (and state status) (begin - (rmt:test-set-status-state run-id test-id real-status state (if waived waived comment)) - ;; (mt:process-triggers run-id test-id state real-status) ;; triggers are called in test-set-status-state + (rmt:set-state-status-and-roll-up-items run-id test-id item-path state real-status (if waived waived comment)) + ;; (mt:process-triggers run-id test-id state real-status) ;; triggers are called in test-set-state-status )) ;; if status is "AUTO" then call rollup (note, this one modifies data in test ;; run area, it does remote calls under the hood. ;; (if (and test-id state status (equal? status "AUTO")) @@ -442,12 +437,12 @@ ;; This was run remote, don't think that makes sense. Perhaps not, but that is the easiest path for the moment. (rmt:csv->test-data run-id test-id dat)))) ;; need to update the top test record if PASS or FAIL and this is a subtest - (if (not (equal? item-path "")) - (rmt:roll-up-pass-fail-counts run-id test-name item-path state status #f)) + ;;;;;; (if (not (equal? item-path "")) + ;;;;;; (rmt:set-state-status-and-roll-up-items run-id test-name item-path state status #f) ;;;;;) (if (or (and (string? comment) (string-match (regexp "\\S+") comment)) waived) (let ((cmt (if waived waived comment))) @@ -481,12 +476,11 @@ (lockf (conc outputfilename ".lock"))) (let loop ((have-lock (common:simple-file-lock lockf))) (if have-lock (let ((script (configf:lookup *configdat* "testrollup" test-name))) (print "Obtained lock for " outputfilename) - (rmt:roll-up-pass-fail-counts run-id test-name "" #f #f #f) - ;; (rmt:test-set-status-state run-id test-name #f #f #f) ;; (rmt:top-test-set-per-pf-counts run-id test-name) + (rmt:set-state-status-and-roll-up-items run-id test-name "" #f #f #f) (if script (system (conc script " > " outputfilename " & ")) (tests:generate-html-summary-for-iterated-test run-id test-id test-name outputfilename)) (common:simple-file-release-lock lockf) (change-directory orig-dir) @@ -920,24 +914,30 @@ ;; MUST BE CALLED local! ;; (define (tests:test-get-paths-matching keynames target fnamepatt #!key (res '())) ;; BUG: Move the values derived from args to parameters and push to megatest.scm - (let* ((testpatt (if (args:get-arg "-testpatt")(args:get-arg "-testpatt") "%")) - (statepatt (if (args:get-arg ":state") (args:get-arg ":state") "%")) - (statuspatt (if (args:get-arg ":status") (args:get-arg ":status") "%")) - (runname (if (args:get-arg ":runname") (args:get-arg ":runname") "%")) + (let* ((testpatt (or (args:get-arg "-testpatt")(args:get-arg "-testpatt") "%")) + (statepatt (or (args:get-arg "-state") (args:get-arg ":state") "%")) + (statuspatt (or (args:get-arg "-status") (args:get-arg ":status") "%")) + (runname (or (args:get-arg "-runname") (args:get-arg ":runname") "%")) (paths-from-db (rmt:test-get-paths-matching-keynames-target-new keynames target res testpatt statepatt statuspatt runname))) (if fnamepatt (apply append (map (lambda (p) (if (directory-exists? p) - (glob (conc p "/" fnamepatt)) + (let ((glob-query (conc p "/" fnamepatt))) + (handle-exceptions + exn + (with-input-from-pipe + (conc "echo " glob-query) + read-lines) ;; we aren't going to try too hard. If glob breaks it is likely because someone tried to do */*/*.log or similar + (glob glob-query))) '())) paths-from-db)) paths-from-db))) @@ -973,11 +973,11 @@ ;; if .testconfig exists in test directory read and return it ;; else if have cached copy in *testconfigs* return it IFF there is a section "have fulldata" ;; else read the testconfig file ;; if have path to test directory save the config as .testconfig and return it ;; -(define (tests:get-testconfig test-name test-registry system-allowed #!key (force-create #f)) +(define (tests:get-testconfig test-name item-path test-registry system-allowed #!key (force-create #f)) (let* ((cache-path (tests:get-test-path-from-environment)) (cache-file (and cache-path (conc cache-path "/.testconfig"))) (cache-exists (and cache-file (not force-create) ;; if force-create then pretend there is no cache to read (file-exists? cache-file))) @@ -985,14 +985,17 @@ cache-exists) (handle-exceptions exn #f ;; any issues, just give up with the cached version and re-read (configf:read-alist cache-file)) - #f))) + #f)) + (test-full-name (if (and item-path (not (string-null? item-path))) + (conc test-name "/" item-path) + test-name))) (if cached-dat cached-dat - (let ((dat (hash-table-ref/default *testconfigs* test-name #f))) + (let ((dat (hash-table-ref/default *testconfigs* test-full-name #f))) (if (and dat ;; have a locally cached version (hash-table-ref/default dat "have fulldata" #f)) ;; marked as good data? dat ;; no cached data available (let* ((treg (or test-registry @@ -1006,11 +1009,11 @@ environ-patt: (if system-allowed "pre-launch-env-vars" #f)) #f))) (if (and tcfg cache-file) (hash-table-set! tcfg "have fulldata" #t)) ;; mark this as fully read data - (if tcfg (hash-table-set! *testconfigs* test-name tcfg)) + (if tcfg (hash-table-set! *testconfigs* test-full-name tcfg)) (if (and testexists cache-file (file-write-access? cache-path)) (let ((tpath (conc cache-path "/.testconfig"))) (debug:print-info 1 *default-log-port* "Caching testconfig for " test-name " in " tpath) @@ -1234,11 +1237,12 @@ (define (tests:get-full-data test-names test-records required-tests all-tests-registry) (if (not (null? test-names)) (let loop ((hed (car test-names)) (tal (cdr test-names))) ;; 'return-procs tells the config reader to prep running system but return a proc (debug:print-info 4 *default-log-port* "hed=" hed " at top of loop") - (let* ((config (tests:get-testconfig hed all-tests-registry 'return-procs)) + ;; don't know item-path at this time, let the testconfig get the top level testconfig + (let* ((config (tests:get-testconfig hed #f all-tests-registry 'return-procs)) (waitons (let ((instr (if config (config-lookup config "requirements" "waiton") (begin ;; No config means this is a non-existant test (debug:print-error 0 *default-log-port* "non-existent required test \"" hed "\", grep through your testconfigs to find and remove or create the test. Discarding and continuing.") "")))) @@ -1332,10 +1336,11 @@ "SELECT count(id) FROM test_rundat;") res)) 0) (define (tests:update-central-meta-info run-id test-id cpuload diskfree minutes uname hostname) + (rmt:general-call 'update-test-rundat run-id test-id (current-seconds) (or cpuload -1)(or diskfree -1) -1 (or minutes -1)) (if (and cpuload diskfree) (rmt:general-call 'update-cpuload-diskfree run-id cpuload diskfree test-id)) (if minutes (rmt:general-call 'update-run-duration run-id minutes test-id)) (if (and uname hostname) Index: tests/fdktestqa/testqa/Makefile ================================================================== --- tests/fdktestqa/testqa/Makefile +++ tests/fdktestqa/testqa/Makefile @@ -5,16 +5,16 @@ NEWDASHBOARD = $(BINDIR)/newdashboard RUNNAME = a NUMTESTS = 20 all : - $(MEGATEST) -remove-runs -target a/b :runname c -testpatt %/% - $(MEGATEST) -runtests % -target a/b :runname c + $(MEGATEST) -remove-runs -target a/b -runname c -testpatt %/% + $(MEGATEST) -run -testpatt % -target a/b -runname c bigbig : for tn in a b c d;do \ - ($(MEGATEST) -runtests % -target a/b :runname $tn & ) ; \ + ($(MEGATEST) -run -testpatt % -target a/b -runname $tn & ) ; \ done waitonpatt : megatest -remove-runs -runname waitonpatt -target a/b -testpatt % NUMTESTS=15 megatest -run -target a/b -runname waitonpatt -testpatt bigrun3/%8 @@ -22,17 +22,17 @@ waitonall : megatest -remove-runs -runname waitonall -target a/b -testpatt % NUMTESTS=20 megatest -run -target a/b -runname waitonall -testpatt alltop bigrun : - NUMTESTS=$(NUMTESTS) $(MEGATEST) -runtests bigrun -target a/bigrun :runname a$(shell date +%V) + NUMTESTS=$(NUMTESTS) $(MEGATEST) -run -testpatt bigrun -target a/bigrun -runname a$(shell date +%V) bigrun2 : - NUMTESTS=$(NUMTESTS) $(MEGATEST) -runtests bigrun2 -target a/bigrun2 :runname a$(shell date +%V) + NUMTESTS=$(NUMTESTS) $(MEGATEST) -run -testpatt bigrun2 -target a/bigrun2 -runname a$(shell date +%V) bigrun3 : - NUMTESTS=$(NUMTESTS) $(MEGATEST) -runtests bigrun3 -target a/bigrun3 :runname $(RUNNAME) + NUMTESTS=$(NUMTESTS) $(MEGATEST) -run -testpatt bigrun3 -target a/bigrun3 -runname $(RUNNAME) dashboard : mkdir -p ../simpleruns $(DASHBOARD) -rows 20 & @@ -41,6 +41,6 @@ compile : (cd ../../..;make -j && make install) clean : - rm -rf ../simple*/*/* megatest.db db/* ../simple*/.db/* logs/* monitor.db + rm -rf ../simple*/*/* megatest.db db/* ../simple*/.db/* logs/* monitor.db /tmp/$(USER)/megatest_localdb/testqa .server ADDED tests/fdktestqa/testqa/local.config.example Index: tests/fdktestqa/testqa/local.config.example ================================================================== --- /dev/null +++ tests/fdktestqa/testqa/local.config.example @@ -0,0 +1,15 @@ +[host-types] +general #MTLOWESTLOAD xena zeus + +[jobtools] +launcher nbfake +maxload 1.5 +flexi-launcher yes +# useshell no + +[setup] +launch-delay 1 +launchwait no + +[launchers] +% general Index: tests/fdktestqa/testqa/megatest.config ================================================================== --- tests/fdktestqa/testqa/megatest.config +++ tests/fdktestqa/testqa/megatest.config @@ -1,11 +1,13 @@ [setup] testcopycmd cp --remove-destination -rlv TEST_SRC_PATH/. TEST_TARG_PATH/. >> TEST_TARG_PATH/mt_launch.log 2>> TEST_TARG_PATH/mt_launch.log # launchwait no +launch-delay 0 # All these are overridden in ../fdk.config # [jobtools] # launcher nbfake # launcher bsub -q priority -o $MT_TEST_RUN_DIR/openlava.log [include ../fdk.config] +[include local.config] Index: tests/fullrun/tests/all_toplevel/testconfig ================================================================== --- tests/fullrun/tests/all_toplevel/testconfig +++ tests/fullrun/tests/all_toplevel/testconfig @@ -1,8 +1,8 @@ [ezsteps] calcresults megatest -list-runs $MT_RUNNAME -target $MT_TARGET -check_triggers cat $MT_RUN_AREA_HOME/triggers_$MT_RUN_NAME.dat +check_triggers cat $MT_RUN_AREA_HOME/triggers_$MT_RUNNAME.dat [logpro] check_triggers ;; (expect:error in "LogFileBody" = 0 "No errors" #/error/i) ADDED utils/homehost_check.sh Index: utils/homehost_check.sh ================================================================== --- /dev/null +++ utils/homehost_check.sh @@ -0,0 +1,17 @@ +#! /bin/bash + +#exits 1 when current host is not homehost. + +if [[ ! -e .homehost ]]; then + exit 0 +fi + +homehostname=$( host `cat .homehost` | awk '{print $NF}' | sed 's/\.$//' ) +hostname=$( hostname -f ) + +if [[ $homehostname == $hostname ]]; then + exit 0 +fi +echo "ERROR: this host ($homehostname) is not the megatest homehost ($hostname)" +exit 1 + ADDED utils/lock-stats.sh Index: utils/lock-stats.sh ================================================================== --- /dev/null +++ utils/lock-stats.sh @@ -0,0 +1,13 @@ +#!/bin/bash + +while IFS=': ' read x x x x p x x i x; do + if ! [[ ${i}x == "x" ]];then + if ! $(echo $i|grep EOF >/dev/null);then + fname=$(find -L "/proc/$p/fd" -maxdepth 1 -inum "$i" -exec readlink {} \; -quit) + if $(echo $fname | grep megatest.db > /dev/null) || \ + $(echo $fname | egrep '.db/\d+.db' > /dev/null);then + echo $fname + fi + fi + fi +done < /proc/locks Index: utils/mk_wrapper ================================================================== --- utils/mk_wrapper +++ utils/mk_wrapper @@ -27,10 +27,22 @@ # echo "exec $prefix/bin/.\$(lsb_release -sr)/$cmd \"\$@\"" >> $target echo "#!/bin/bash" > $target if [[ $cmd =~ dboard ]]; then cat >> $target <<'EOF' + +# # disable if not running on homehost +# if [[ -e .homehost ]]; then +# homehostname=$( host `cat .homehost` | awk '{print $NF}' | sed 's/\.$//' ) +# hostname=$( hostname -f ) +# +# if [[ ! ($homehostname == $hostname) ]]; then +# echo "ERROR: this host ($hostname) is not the homehost ($homehostname) for this megatest run area. Cannot start dashboard." +# echo " Please log into homehost before launching dashboard." +# exit 1 +# fi +# fi # check that $DISPLAY is set if [[ -z $DISPLAY ]]; then echo 'ERROR: $DISPLAY environment variable is not set; megatest dashboard requires X display address to be set in $DISPLAY.' exit 1 Index: utils/nbfake ================================================================== --- utils/nbfake +++ utils/nbfake @@ -70,7 +70,7 @@ if [[ -z "$MY_NBFAKE_HOST" ]]; then # Run locally sh -c "cd $CURRWD;export DISPLAY=$DISPLAY; export PATH=$PATH; nohup $* >> $MY_NBFAKE_LOG 2>&1 &" else # run remotely - ssh -n -f $MY_NBFAKE_HOST "sh -c \"cd $CURRWD;export DISPLAY=$DISPLAY; export PATH=$PATH; nohup $* >> $MY_NBFAKE_LOG 2>&1 &\"" + ssh -X -n -f $MY_NBFAKE_HOST "sh -c \"cd $CURRWD;export DISPLAY=$DISPLAY; export PATH=$PATH; nohup $* >> $MY_NBFAKE_LOG 2>&1 &\"" fi Index: utils/plot-code.scm ================================================================== --- utils/plot-code.scm +++ utils/plot-code.scm @@ -8,17 +8,19 @@ ;; third param is list of files to scan (use regex srfi-69 srfi-13) (define targs #f) -(define files (cddddr (argv))) +(define files (cdr (cddddr (argv)))) (let ((targdat (cadddr (argv)))) (if (equal? targdat "-") (set! targs files) (set! targs (string-split targdat ",")))) +(define function-patt (car (cdr (cdddr (argv))))) +(define function-rx (regexp function-patt)) (define filedat-defns (make-hash-table)) (define filedat-usages (make-hash-table)) (define defn-rx (regexp "^\\s*\\(define\\s+\\(([^\\s\\)]+).*")) (define all-regexs (make-hash-table)) @@ -32,10 +34,11 @@ (lambda () (apply print data)))) (print-err "Making graph for files: " (string-intersperse targs ", ")) (print-err "Looking at files: " (string-intersperse files ", ")) +(print-err "Function regex: " function-patt) ;; Gather the functions ;; (for-each (lambda (fname) @@ -46,16 +49,18 @@ (if (not (eof-object? inl)) (let ((match (string-match defn-rx inl))) (if match (let ((fnname (cadr match))) ;; (print " " fnname) - (set! all-fns (cons fnname all-fns)) - (hash-table-set! - filedat-defns - fname - (cons fnname (hash-table-ref/default filedat-defns fname '()))) - )) + (if (string-match function-rx fnname) + (begin + (set! all-fns (cons fnname all-fns))) + (hash-table-set! + filedat-defns + fname + (cons fnname (hash-table-ref/default filedat-defns fname '()))) + ))) (loop (read-line)))))))) files) ;; fill up the regex hash (print-err "Make the huge regex hash") ADDED utils/remrun Index: utils/remrun ================================================================== --- /dev/null +++ utils/remrun @@ -0,0 +1,28 @@ +#!/bin/bash +############################################################################### +# +# remrun - same behavior as nbfake but first param is a hosthane +# (capture command output in a logfile) +# +# remrun behavior can be changed by setting the following env var: +# NBFAKE_LOG Logfile for nbfake output +# +############################################################################### + +if [[ -z "$@" ]]; then + cat <<__EOF + +remrun usage: + +remrun hostname + +remrun behavior can be changed by setting the following env vars: + NBFAKE_LOG Logfile for remrun output + +__EOF + exit +fi + +export NBFAKE_HOST=$1 +shift +exec nbfake $*
Table 3. API Keys Related CallsTable 4. API Keys Related Calls