Index: .mtutil.scm ================================================================== --- .mtutil.scm +++ .mtutil.scm @@ -23,45 +23,46 @@ (define (str-first-char->number str) (char->integer (string-ref str 0))) ;; example of how to set up and write target mappers ;; -(hash-table-set! *target-mappers* - 'prefix-contour - (lambda (target run-name area area-path reason contour mode-patt) - (conc contour "/" target))) -(hash-table-set! *target-mappers* - 'prefix-area-contour - (lambda (target run-name area area-path reason contour mode-patt) - (conc area "/" contour "/" target))) - -(hash-table-set! *runname-mappers* - 'corporate-ww - (lambda (target run-name area area-path reason contour mode-patt) - (print "corporate-ww called with: target=" target " run-name=" run-name " area=" area " area-path=" area-path " reason=" reason " contour=" contour " mode-patt=" mode-patt) - (let* ((last-name (get-last-runname area-path target)) - (last-letter (let* ((ch (if (string? last-name) - (let ((len (string-length last-name))) - (substring last-name (- len 1) len)) - "a")) - (chnum (str-first-char->number ch)) - (a (str-first-char->number "a")) - (z (str-first-char->number "z"))) - (if (and (>= chnum a)(<= chnum z)) - chnum - #f))) - (next-letter (if last-letter - (list->string - (list - (integer->char - (+ last-letter 1)))) ;; surely there is an easier way? - "a"))) - ;; (print "last-name: " last-name " last-letter: " last-letter " next-letter: " next-letter) - (conc (seconds->wwdate (current-seconds)) next-letter)))) - -(hash-table-set! *runname-mappers* - 'auto - (lambda (target run-name area area-path reason contour mode-patt) - "auto-eh")) - -;; (print "Got here!") +(add-target-mapper 'prefix-contour + (lambda (target run-name area area-path reason contour mode-patt) + (conc contour "/" target))) +(add-target-mapper 'prefix-area-contour + (lambda (target run-name area area-path reason contour mode-patt) + (conc area "/" contour "/" target))) + +(add-runname-mapper 'corporate-ww + (lambda (target run-name area area-path reason contour mode-patt) + (print "corporate-ww called with: target=" target " run-name=" run-name " area=" area " area-path=" area-path " reason=" reason " contour=" contour " mode-patt=" mode-patt) + (let* ((last-name (get-last-runname area-path target)) + (last-letter (let* ((ch (if (string? last-name) + (let ((len (string-length last-name))) + (substring last-name (- len 1) len)) + "a")) + (chnum (str-first-char->number ch)) + (a (str-first-char->number "a")) + (z (str-first-char->number "z"))) + (if (and (>= chnum a)(<= chnum z)) + chnum + #f))) + (next-letter (if last-letter + (list->string + (list + (integer->char + (+ last-letter 1)))) ;; surely there is an easier way? + "a"))) + ;; (print "last-name: " last-name " last-letter: " last-letter " next-letter: " next-letter) + (conc (seconds->wwdate (current-seconds)) next-letter)))) + +(add-runname-mapper 'auto + (lambda (target run-name area area-path reason contour mode-patt) + "auto-eh")) + +;; run only areas where first letter of area name is "a" +;; +(add-area-checker 'first-letter-a + (lambda (area target contour) + (string-match "^a.*$" area))) + Index: Makefile ================================================================== --- Makefile +++ Makefile @@ -6,14 +6,14 @@ INSTALL=install SRCFILES = common.scm items.scm launch.scm \ ods.scm runconfig.scm server.scm configf.scm \ db.scm keys.scm margs.scm megatest-version.scm \ process.scm runs.scm tasks.scm tests.scm genexample.scm \ - http-transport.scm filedb.scm \ - client.scm synchash.scm daemon.scm mt.scm \ + http-transport.scm filedb.scm tdb.scm \ + client.scm daemon.scm mt.scm \ ezsteps.scm lock-queue.scm sdb.scm \ - rmt.scm api.scm tdb.scm rpc-transport.scm \ + rmt.scm api.scm \ portlogger.scm archive.scm env.scm diff-report.scm cgisetup/models/pgdb.scm # Eggs to install (straightforward ones) EGGS=matchable readline apropos base64 regex-literals format regex-case test coops trace csv \ dot-locking posix-utils posix-extras directory-utils hostinfo tcp-server rpc csv-xml fmt \ @@ -52,10 +52,44 @@ csc $(CSCOPTS) $(OFILES) $(GOFILES) newdashboard.scm -o ndboard mtut: $(OFILES) mtut.scm csc $(CSCOPTS) $(OFILES) mtut.scm -o mtut +TCMTOBJS = \ + api.o \ + archive.o \ + cgisetup/models/pgdb.o \ + client.o \ + common.o \ + configf.o \ + daemon.o \ + db.o \ + env.o \ + http-transport.o \ + items.o \ + keys.o \ + launch.o \ + lock-queue.o \ + margs.o \ + mt.o \ + megatest-version.o \ + ods.o \ + portlogger.o \ + process.o \ + rmt.o \ + rpc-transport.o \ + runconfig.o \ + runs.o \ + server.o \ + tasks.o \ + tdb.o \ + tests.o \ + + +tcmt : $(TCMTOBJS) tcmt.scm + csc $(CSCOPTS) $(TCMTOBJS) tcmt.scm -o tcmt + # install documentation to $(PREFIX)/docs # DOES NOT REBUILD DOCS # $(PREFIX)/share/docs/megatest_manual.html : docs/manual/megatest_manual.html mkdir -p $(PREFIX)/share/docs @@ -114,10 +148,17 @@ $(INSTALL) mtut $(PREFIX)/bin/.$(ARCHSTR)/mtut $(PREFIX)/bin/mtutil : $(PREFIX)/bin/.$(ARCHSTR)/mtut utils/mk_wrapper utils/mk_wrapper $(PREFIX) mtut $(PREFIX)/bin/mtutil chmod a+x $(PREFIX)/bin/mtutil + +$(PREFIX)/bin/.$(ARCHSTR)/tcmt : tcmt + $(INSTALL) tcmt $(PREFIX)/bin/.$(ARCHSTR)/tcmt + +$(PREFIX)/bin/tcmt : $(PREFIX)/bin/.$(ARCHSTR)/tcmt utils/mk_wrapper + utils/mk_wrapper $(PREFIX) tcmt $(PREFIX)/bin/tcmt + chmod a+x $(PREFIX)/bin/tcmt # $(PREFIX)/bin/.$(ARCHSTR)/mdboard : multi-dboard # $(INSTALL) multi-dboard $(PREFIX)/bin/.$(ARCHSTR)/mdboard # $(PREFIX)/bin/mdboard : $(PREFIX)/bin/.$(ARCHSTR)/mdboard utils/mk_wrapper @@ -193,11 +234,13 @@ install : $(PREFIX)/bin/.$(ARCHSTR) $(PREFIX)/bin/.$(ARCHSTR)/mtest $(PREFIX)/bin/megatest \ $(PREFIX)/bin/.$(ARCHSTR)/dboard $(PREFIX)/bin/dashboard $(HELPERS) $(PREFIX)/bin/nbfake \ $(PREFIX)/bin/nbfind $(PREFIX)/bin/loadrunner $(PREFIX)/bin/viewscreen $(PREFIX)/bin/mt_xterm \ $(PREFIX)/share/docs/megatest_manual.html $(PREFIX)/bin/remrun $(PREFIX)/bin/mtutil \ - $(PREFIX)/share/db/mt-pg.sql $(PREFIX)/bin/.$(ARCHSTR)/ndboard $(PREFIX)/bin/newdashboard + $(PREFIX)/share/db/mt-pg.sql $(PREFIX)/bin/.$(ARCHSTR)/ndboard $(PREFIX)/bin/tcmt + +# $(PREFIX)/bin/newdashboard $(PREFIX)/bin/.$(ARCHSTR) : mkdir -p $(PREFIX)/bin/.$(ARCHSTR) mkdir -p $(PREFIX)/bin/.$(ARCHSTR)/lib @@ -304,5 +347,9 @@ fi portlogger-example : portlogger-example.scm api.o archive.o client.o common.o configf.o daemon.o dashboard-tests.o db.o dcommon.o ezsteps.o filedb.o genexample.o gutils.o http-transport.o items.o keys.o launch.o lock-queue.o margs.o megatest-version.o mt.o ods.o portlogger.o process.o rmt.o rpc-transport.o runconfig.o runs.o sdb.o server.o synchash.o tasks.o tdb.o tests.o tree.o csc $(CSCOPTS) portlogger-example.scm api.o archive.o client.o common.o configf.o daemon.o dashboard-tests.o db.o dcommon.o ezsteps.o filedb.o genexample.o gutils.o http-transport.o items.o keys.o launch.o lock-queue.o margs.o megatest-version.o mt.o ods.o portlogger.o process.o rmt.o rpc-transport.o runconfig.o runs.o sdb.o server.o synchash.o tasks.o tdb.o tests.o tree.o +# create a pdf dot graphviz diagram from notations in rmt.scm +rmt.pdf : rmt.scm + grep ';;DOT' rmt.scm | sed -e 's/.*;;DOT //' > rmt.dot;dot -Tpdf rmt.dot -o rmt.pdf + Index: api.scm ================================================================== --- api.scm +++ api.scm @@ -56,10 +56,11 @@ get-run-ids-matching-target get-runs-by-patt get-steps-data get-steps-for-test read-test-data + read-test-data* login tasks-get-last testmeta-get-record have-incompletes? synchash-get @@ -126,10 +127,11 @@ (cond ((not (vector? dat)) ;; it is an error to not receive a vector (vector #f (vector #f "remote must be called with a vector"))) ((> *api-process-request-count* 20) ;; 20) (debug:print 0 *default-log-port* "WARNING: api:execute-requests received an overloaded message.") + (set! *server-overloaded* #t) (vector #f (vector #f 'overloaded))) ;; the inner vector is what gets returned. nope, don't know why. please refactor! (else (let* ((cmd-in (vector-ref dat 0)) (cmd (if (symbol? cmd-in) cmd-in @@ -152,11 +154,26 @@ ;; SERVERS ((start-server) (apply server:kind-run params)) ((kill-server) (set! *server-run* #f)) ;; TESTS - ((test-set-state-status-by-id) (apply mt:test-set-state-status-by-id dbstruct params)) + + ;;((test-set-state-status-by-id) (apply mt:test-set-state-status-by-id dbstruct params)) + ;;BB - commented out above because it was calling below, eventually, incorrectly (dbstruct passed to mt:test-set-state-status-by-id, which previosly did more, but now only passes thru to db:set-state-status-and-roll-up-items. + ((test-set-state-status-by-id) + + ;; (define (db:set-state-status-and-roll-up-items dbstruct run-id test-name item-path state status comment) + (db:set-state-status-and-roll-up-items + dbstruct + (list-ref params 0) ; run-id + (list-ref params 1) ; test-name + #f ; item-path + (list-ref params 2) ; state + (list-ref params 3) ; status + (list-ref params 4) ; comment + )) + ((delete-test-records) (apply db:delete-test-records dbstruct params)) ((delete-old-deleted-test-records) (apply db:delete-old-deleted-test-records dbstruct params)) ((test-set-state-status) (apply db:test-set-state-status dbstruct params)) ((test-set-top-process-pid) (apply db:test-set-top-process-pid dbstruct params)) ((set-state-status-and-roll-up-items) (apply db:set-state-status-and-roll-up-items dbstruct params)) @@ -193,10 +210,15 @@ ;; TASKS ((tasks-add) (apply tasks:add dbstruct params)) ((tasks-set-state-given-param-key) (apply tasks:set-state-given-param-key dbstruct params)) ((tasks-get-last) (apply tasks:get-last dbstruct params)) + ;; NO SYNC DB + ((no-sync-set) (apply db:no-sync-set *no-sync-db* params)) + ((no-sync-get/default) (apply db:no-sync-get/default *no-sync-db* params)) + ((no-sync-del!) (apply db:no-sync-del! *no-sync-db* params)) + ;; ARCHIVES ;; ((archive-get-allocations) ((archive-register-disk) (apply db:archive-register-disk dbstruct params)) ((archive-register-block-name)(apply db:archive-register-block-name dbstruct params)) ((archive-allocate-testsuite/area-to-block)(apply db:archive-allocate-testsuite/area-to-block dbstruct block-id testsuite-name areakey)) @@ -257,10 +279,11 @@ ((get-steps-data) (apply db:get-steps-data dbstruct params)) ((get-steps-for-test) (apply db:get-steps-for-test dbstruct params)) ;; TEST DATA ((read-test-data) (apply db:read-test-data dbstruct params)) + ((read-test-data*) (apply db:read-test-data* dbstruct params)) ;; MISC ((get-latest-host-load) (apply db:get-latest-host-load dbstruct params)) ((have-incompletes?) (apply db:have-incompletes? dbstruct params)) ((login) (apply db:login dbstruct params)) Index: archive.scm ================================================================== --- archive.scm +++ archive.scm @@ -7,12 +7,11 @@ ;; implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR ;; PURPOSE. ;; strftime('%m/%d/%Y %H:%M:%S','now','localtime') -(use sqlite3 srfi-1 posix regex regex-case srfi-69 dot-locking format md5 message-digest srfi-18) -(import (prefix sqlite3 sqlite3:)) +(use (prefix sqlite3 sqlite3:) srfi-1 posix regex regex-case srfi-69 format md5 message-digest srfi-18) (declare (unit archive)) (declare (uses db)) (declare (uses common)) @@ -30,11 +29,11 @@ (flavor 'plain) ;; type of machine to run jobs on (maxload 1.5) ;; max allowed load for this work (adisks (archive:get-archive-disks))) ;; get testdir size ;; - hand off du to job mgr - (if (and (file-exists? testdir) + (if (and (common:file-exists? testdir) (file-is-writable? testdir)) (let* ((dused (jobrunner:run-job flavor ;; machine type maxload ;; max allowed load '() ;; prevars - environment vars to set for the job @@ -137,11 +136,11 @@ (> (rmt:test-toplevel-num-items run-id test-name) 0))) (test-partial-path (conc target "/" run-name "/" (db:test-make-full-name test-name item-path))) ;; note the trailing slash to get the dir inspite of it being a link (test-path (conc linktree "/" test-partial-path)) (mutex-lock! rp-mutex) - (test-physical-path (if (file-exists? test-path) + (test-physical-path (if (common:file-exists? test-path) (common:real-path test-path) #f)) (mutex-unlock! rp-mutex) (partial-path-index (if test-physical-path (substring-index test-partial-path test-physical-path) #f)) (test-base (if (and partial-path-index @@ -152,11 +151,11 @@ #f))) (cond (toplevel/children (debug:print 0 *default-log-port* "WARNING: cannot archive " test-name " with id " test-id " as it is a toplevel test with children")) - ((not (file-exists? test-path)) + ((not (common:file-exists? test-path)) (debug:print 0 *default-log-port* "WARNING: Cannot archive " test-name "/" item-path " as path " test-path " does not exist")) (else (debug:print 0 *default-log-port* "From test-dat=" test-dat " derived the following:\n" "test-partial-path = " test-partial-path "\n" @@ -180,13 +179,13 @@ (conc "-" compress) ;; or (conc "--compress=" compress) "-n" (conc (common:get-testsuite-name) "-" run-id) (conc "--strip-path=" disk-group)) test-paths)) (print-prefix #f)) ;; "Running: ")) ;; change to #f to turn off printing - (if (not (file-exists? archive-dir)) + (if (not (common:file-exists? archive-dir)) (create-directory archive-dir #t)) - (if (not (file-exists? (conc archive-dir "/HEAD"))) + (if (not (common:file-exists? (conc archive-dir "/HEAD"))) (begin ;; replace this with jobrunner stuff enventually (debug:print-info 0 *default-log-port* "Init bup in " archive-dir) ;; (mutex-lock! bup-mutex) (run-n-wait bup-exe params: bup-init-params print-cmd: print-prefix) @@ -233,11 +232,11 @@ (test-partial-path (conc target "/" run-name "/" (db:test-make-full-name test-name item-path))) ;; note the trailing slash to get the dir inspite of it being a link (test-path (conc linktree "/" test-partial-path)) ;; if the old path was not deleted then prev-test-physical-path will end up pointing to a real directory (mutex-lock! rp-mutex) - (prev-test-physical-path (if (file-exists? test-path) + (prev-test-physical-path (if (common:file-exists? test-path) ;; (read-symbolic-link test-path #t) (common:real-path test-path) #f)) (mutex-unlock! rp-mutex) (new-test-physical-path (conc best-disk "/" test-partial-path)) @@ -250,11 +249,11 @@ ;; some sanity checks, move an existing path out of the way - iif it is not a toplevel with children ;; (if (and (not toplevel/children) ;; special handling needed for toplevel with children prev-test-physical-path - (file-exists? prev-test-physical-path)) ;; what to do? abort or clean up or link it in? + (common:file-exists? prev-test-physical-path)) ;; what to do? abort or clean up or link it in? (let* ((base (pathname-directory prev-test-physical-path)) (dirn (pathname-file prev-test-physical-path)) (newn (conc base "/." dirn))) (debug:print-error 0 *default-log-port* "the old directory " prev-test-physical-path ", still exists! Moving it to " newn) (rename-file prev-test-physical-path newn))) Index: client.scm ================================================================== --- client.scm +++ client.scm @@ -10,18 +10,13 @@ ;;====================================================================== ;; C L I E N T S ;;====================================================================== -(require-extension (srfi 18) extras tcp s11n) - -(use srfi-1 posix regex regex-case srfi-69 hostinfo md5 message-digest matchable) -;; (use zmq) - -(use (prefix sqlite3 sqlite3:)) - -(use spiffy uri-common intarweb http-client spiffy-request-vars uri-common intarweb directory-utils) +(use srfi-18 extras tcp s11n srfi-1 posix regex srfi-69 hostinfo md5 + message-digest matchable spiffy uri-common intarweb http-client + spiffy-request-vars uri-common intarweb directory-utils) (declare (unit client)) (declare (uses common)) (declare (uses db)) Index: common.scm ================================================================== --- common.scm +++ common.scm @@ -8,20 +8,17 @@ ;; implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR ;; PURPOSE. ;;====================================================================== (use srfi-1 posix regex-case base64 format dot-locking csv-xml z3 sql-de-lite hostinfo md5 message-digest typed-records directory-utils stack - matchable) -(require-extension regex posix) - -(require-extension (srfi 18) extras tcp rpc) + matchable regex posix srfi-18 extras + pkts (prefix dbi dbi:)) (import (prefix sqlite3 sqlite3:)) (import (prefix base64 base64:)) (declare (unit common)) -(declare (uses keys)) (include "common_records.scm") ;; (require-library margs) ;; (include "margs.scm") @@ -46,11 +43,11 @@ (debug:print-error 0 *default-log-port* "bad value for setenv, key=" key ", value=" val)))) (define home (getenv "HOME")) (define user (getenv "USER")) -;; GLOBAL GLETCHES +;; GLOBALS ;; CONTEXTS (defstruct cxt (taskdb #f) (cmutex (make-mutex))) @@ -111,10 +108,12 @@ (define *db-access-mutex* (make-mutex)) (define *db-transaction-mutex* (make-mutex)) (define *db-cache-path* #f) (define *db-with-db-mutex* (make-mutex)) (define *db-api-call-time* (make-hash-table)) ;; hash of command => (list of times) +;; no sync db +(define *no-sync-db* #f) ;; SERVER (define *my-client-signature* #f) (define *transport-type* 'http) ;; override with [server] transport http|rpc|nmsg (define *runremote* #f) ;; if set up for server communication this will hold @@ -129,10 +128,11 @@ (define *home-host* #f) ;; (define *total-non-write-delay* 0) (define *heartbeat-mutex* (make-mutex)) (define *api-process-request-count* 0) (define *max-api-process-requests* 0) +(define *server-overloaded* #f) ;; client (define *rmt-mutex* (make-mutex)) ;; remote access calls mutex ;; RPC transport @@ -149,10 +149,13 @@ (define *run-info-cache* (make-hash-table)) ;; run info is stable, no need to reget (define *launch-setup-mutex* (make-mutex)) ;; need to be able to call launch:setup often so mutex it and re-call the real deal only if *toppath* not set (define *homehost-mutex* (make-mutex)) +;; Miscellaneous +(define *triggers-mutex* (make-mutex)) ;; block overlapping processing of triggers + (defstruct remote (hh-dat (common:get-homehost)) ;; homehost record ( addr . hhflag ) (server-url (if *toppath* (server:check-if-running *toppath*))) ;; (server:check-if-running *toppath*) #f)) (last-server-check 0) ;; last time we checked to see if the server was alive (conndat #f) @@ -225,28 +228,40 @@ (substring (common:get-last-run-version) 0 6))) (define (common:set-last-run-version) (rmt:set-var "MEGATEST_VERSION" (common:version-signature))) +;; postive number if megatest version > db version +;; negative number if megatest version < db version +(define (common:version-db-delta) + (- megatest-version (common:get-last-run-version-number))) + (define (common:version-changed?) (not (equal? (common:get-last-run-version) - (common:version-signature)))) + (common:version-signature)))) +(define (common:api-changed?) + (not (equal? (substring (->string megatest-version) 0 4) + (substring (conc (common:get-last-run-version)) 0 4)))) + ;; Move me elsewhere ... ;; RADT => Why do we meed the version check here, this is called only if version misma ;; -(define (common:cleanup-db dbstruct) - (db:multi-db-sync +(define (common:cleanup-db dbstruct #!key (full #f)) + (apply db:multi-db-sync dbstruct + 'schema ;; 'new2old 'killservers - 'dejunk - ;; 'adj-testids + 'adj-target ;; 'old2new 'new2old - 'schema) - (if (common:version-changed?) + ;; (if full + '(dejunk) + ;; '()) + ) + (if (common:api-changed?) (common:set-last-run-version))) ;; Rotate logs, logic: ;; if > 500k and older than 1 week: ;; remove previous compressed log and compress this log @@ -266,11 +281,11 @@ (> (file-size fullname) 200000)) (and (string-match "^server-.*.log" file) (> (- (current-seconds) (file-modification-time fullname)) (* 8 60 60)))) (let ((gzfile (conc fullname ".gz"))) - (if (file-exists? gzfile) + (if (common:file-exists? gzfile) (begin (debug:print-info 0 *default-log-port* "removing " gzfile) (delete-file gzfile))) (debug:print-info 0 *default-log-port* "compressing " file) (system (conc "gzip " fullname))) @@ -285,22 +300,22 @@ ;; Force a megatest cleanup-db if version is changed and skip-version-check not specified ;; Do NOT check if not on homehost! ;; (define (common:exit-on-version-changed) (if (common:on-homehost?) - (if (common:version-changed?) + (if (common:api-changed?) (let* ((mtconf (conc (get-environment-variable "MT_RUN_AREA_HOME") "/megatest.config")) (dbfile (conc (get-environment-variable "MT_RUN_AREA_HOME") "/megatest.db")) (read-only (not (file-write-access? dbfile))) - (dbstruct (db:setup))) + (dbstruct (db:setup #t))) (debug:print 0 *default-log-port* "WARNING: Version mismatch!\n" " expected: " (common:version-signature) "\n" " got: " (common:get-last-run-version)) (cond ((get-environment-variable "MT_SKIP_DB_MIGRATE") #t) - ((and (file-exists? mtconf) (file-exists? dbfile) (not read-only) + ((and (common:file-exists? mtconf) (common:file-exists? dbfile) (not read-only) (eq? (current-user-id)(file-owner mtconf))) ;; safe to run -cleanup-db (debug:print 0 *default-log-port* " I see you are the owner of megatest.config, attempting to cleanup and reset to new version") (handle-exceptions exn (begin @@ -307,14 +322,14 @@ (debug:print 0 *default-log-port* "Failed to switch versions.") (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) (print-call-chain (current-error-port)) (exit 1)) (common:cleanup-db dbstruct))) - ((not (file-exists? mtconf)) + ((not (common:file-exists? mtconf)) (debug:print 0 *default-log-port* " megatest.config does not exist in this area. Cannot proceed with megatest version migration.") (exit 1)) - ((not (file-exists? dbfile)) + ((not (common:file-exists? dbfile)) (debug:print 0 *default-log-port* " megatest.db does not exist in this area. Cannot proceed with megatest version migration.") (exit 1)) ((not (eq? (current-user-id)(file-owner mtconf))) (debug:print 0 *default-log-port* " You do not own megatest.db in this area. Cannot proceed with megatest version migration.") (exit 1)) @@ -321,14 +336,14 @@ (read-only (debug:print 0 *default-log-port* " You have read-only access to this area. Cannot proceed with megatest version migration.") (exit 1)) (else (debug:print 0 *default-log-port* " to switch versions you can run: \"megatest -cleanup-db\"") - (exit 1))))) - (begin - (debug:print 0 *default-log-port* "ERROR: cannot migrate version unless on homehost. Exiting.") - (exit 1)))) + (exit 1))))))) +;; (begin +;; (debug:print 0 *default-log-port* "ERROR: cannot migrate version unless on homehost. Exiting.") +;; (exit 1)))) ;;====================================================================== ;; S P A R S E A R R A Y S ;;====================================================================== @@ -424,11 +439,11 @@ ;; (define (common:simple-file-lock fname #!key (expire-time 300)) (handle-exceptions exn #f ;; don't really care what went wrong right now. NOTE: I have not seen this one actually fail. - (if (file-exists? fname) + (if (common:file-exists? fname) (if (> (- (current-seconds)(file-modification-time fname)) expire-time) (begin (delete-file* fname) (common:simple-file-lock fname expire-time: expire-time)) #f) @@ -435,11 +450,11 @@ (let ((key-string (conc (get-host-name) "-" (current-process-id)))) (with-output-to-file fname (lambda () (print key-string))) (thread-sleep! 0.25) - (if (file-exists? fname) + (if (common:file-exists? fname) (with-input-from-file fname (lambda () (equal? key-string (read-line)))) #f))))) @@ -462,11 +477,12 @@ ;;====================================================================== ;; S T A T E S A N D S T A T U S E S ;;====================================================================== -(define *common:std-states* +;; BBnote: *common:std-states* - dashboard filter control and test control state buttons defined here; used in set-fields-panel and dboard:make-controls +(define *common:std-states* ;; for toggle buttons in dashboard '((0 "ARCHIVED") (1 "STUCK") (2 "KILLREQ") (3 "KILLED") (4 "NOT_STARTED") @@ -474,18 +490,19 @@ (6 "LAUNCHED") (7 "REMOTEHOSTSTART") (8 "RUNNING") )) +;; BBnote: *common:std-statuses* dashboard filter control and test control status buttons defined here; used in set-fields-panel and dboard:make-controls (define *common:std-statuses* '(;; (0 "DELETED") (1 "n/a") (2 "PASS") - (3 "CHECK") - (4 "SKIP") - (5 "WARN") - (6 "WAIVED") + (3 "SKIP") + (4 "WARN") + (5 "WAIVED") + (6 "CHECK") (7 "STUCK/DEAD") (8 "FAIL") (9 "ABORT"))) (define *common:ended-states* ;; states which indicate the test is stopped and will not proceed @@ -492,12 +509,13 @@ '("COMPLETED" "ARCHIVED" "KILLED" "KILLREQ" "STUCK" "INCOMPLETE")) (define *common:badly-ended-states* ;; these roll up as CHECK, i.e. results need to be checked '("KILLED" "KILLREQ" "STUCK" "INCOMPLETE" "DEAD")) +;; BBnote: *common:running-states* used from db:set-state-status-and-roll-up-items (define *common:running-states* ;; test is either running or can be run - '("RUNNING" "REMOTEHOSTSTART" "LAUNCHED")) + '("RUNNING" "REMOTEHOSTSTART" "LAUNCHED" "STARTED")) (define *common:cant-run-states* ;; These are stopping conditions that prevent a test from being run '("COMPLETED" "KILLED" "UNKNOWN" "INCOMPLETE" "ARCHIVED")) (define *common:not-started-ok-statuses* ;; if not one of these statuses when in not_started state treat as dead @@ -588,20 +606,26 @@ (pathname-file *toppath*) #f))) ;; (pathname-file (current-directory))))) (define common:get-area-name common:get-testsuite-name) -(define (common:get-db-tmp-area) +(define (common:get-db-tmp-area . junk) (if *db-cache-path* *db-cache-path* - (if *toppath* - (let ((dbpath (create-directory (conc "/tmp/" (current-user-name) - "/megatest_localdb/" - (common:get-testsuite-name) "/" - (string-translate *toppath* "/" ".")) #t))) - (set! *db-cache-path* dbpath) - dbpath) + (if *toppath* ;; common:get-create-writeable-dir + (handle-exceptions + exn + (begin + (debug:print-error 0 *default-log-port* "Couldn't create path to " dbdir) + (exit 1)) + (let ((dbpath (common:get-create-writeable-dir + (list (conc "/tmp/" (current-user-name) + "/megatest_localdb/" + (common:get-testsuite-name) "/" + (string-translate *toppath* "/" ".")))))) ;; #t)))) + (set! *db-cache-path* dbpath) + dbpath)) #f))) (define (common:get-area-path-signature) (message-digest-string (md5-primitive) *toppath*)) @@ -618,23 +642,10 @@ ;; (let ((ohh (common:on-homehost?)) ;; (srv (args:get-arg "-server"))) ;; (and ohh srv))) ;; (debug:print-info 0 *default-log-port* "common:run-sync? ohh=" ohh ", srv=" srv) - -;;;; run-ids -;; if #f use *db-local-sync* : or 'local-sync-flags -;; if #t use timestamps : or 'timestamps -(define (common:sync-to-megatest.db dbstruct) - (let ((start-time (current-seconds)) - (res (db:multi-db-sync dbstruct 'new2old))) - (let ((sync-time (- (current-seconds) start-time))) - (debug:print-info 3 *default-log-port* "Sync of newdb to olddb completed in " sync-time " seconds pid="(current-process-id)) - (if (common:low-noise-print 30 "sync new to old") - (debug:print-info 0 *default-log-port* "Sync of newdb to olddb completed in " sync-time " seconds pid="(current-process-id)))) - res)) - (define *wdnum* 0) (define *wdnum*mutex (make-mutex)) @@ -661,102 +672,34 @@ (< duration-since-last-sync sync-cool-off-duration)) (thread-sleep! (- sync-cool-off-duration duration-since-last-sync))) (if (not *time-to-exit*) (let ((golden-mtdb-mtime (file-modification-time golden-mtpath)) (tmp-mtdb-mtime (file-modification-time tmp-mtpath))) - (if (> golden-mtdb-mtime tmp-mtdb-mtime) - (let ((res (db:multi-db-sync dbstruct 'old2new))) - (debug:print-info 13 *default-log-port* "rosync called, " res " records transferred."))) + (if (> golden-mtdb-mtime tmp-mtdb-mtime) + (if (< golden-mtdb-mtime (- (current-seconds) 3)) ;; file has NOT been touched in past three seconds, this way multiple servers won't fight to sync back + (let ((res (db:multi-db-sync dbstruct 'old2new))) + (debug:print-info 13 *default-log-port* "rosync called, " res " records transferred.")))) (loop (current-seconds))) #t))) (debug:print-info 0 *default-log-port* "Exiting readonly-watchdog timer, *time-to-exit* = " *time-to-exit*" pid="(current-process-id)" mtpath="golden-mtpath))) - - -(define (common:writable-watchdog dbstruct) - (thread-sleep! 0.05) ;; delay for startup - (let ((legacy-sync (common:run-sync?)) - (debug-mode (debug:debug-mode 1)) - (last-time (current-seconds)) - (this-wd-num (begin (mutex-lock! *wdnum*mutex) (let ((x *wdnum*)) (set! *wdnum* (add1 *wdnum*)) (mutex-unlock! *wdnum*mutex) x)))) - (debug:print-info 2 *default-log-port* "Periodic sync thread started.") - (debug:print-info 3 *default-log-port* "watchdog starting. legacy-sync is " legacy-sync" pid="(current-process-id)" this-wd-num="this-wd-num) - (if (and legacy-sync (not *time-to-exit*)) - (let* (;;(dbstruct (db:setup)) - (mtdb (dbr:dbstruct-mtdb dbstruct)) - (mtpath (db:dbdat-get-path mtdb))) - (debug:print-info 0 *default-log-port* "Server running, periodic sync started.") - (let loop () - ;; sync for filesystem local db writes - ;; - (mutex-lock! *db-multi-sync-mutex*) - (let* ((need-sync (>= *db-last-access* *db-last-sync*)) ;; no sync since last write - (sync-in-progress *db-sync-in-progress*) - (should-sync (and (not *time-to-exit*) - (> (- (current-seconds) *db-last-sync*) 5))) ;; sync every five seconds minimum - (start-time (current-seconds)) - (mt-mod-time (file-modification-time mtpath)) - (recently-synced (< (- start-time mt-mod-time) 4)) - (will-sync (and (or need-sync should-sync) - (not sync-in-progress) - (not recently-synced)))) - (debug:print-info 13 *default-log-port* "WD writable-watchdog top of loop. need-sync="need-sync" sync-in-progress="sync-in-progress" should-sync="should-sync" start-time="start-time" mt-mod-time="mt-mod-time" recently-synced="recently-synced" will-sync="will-sync) - ;; (if recently-synced (debug:print-info 0 *default-log-port* "Skipping sync due to recently-synced flag=" recently-synced)) - ;; (debug:print-info 0 *default-log-port* "need-sync: " need-sync " sync-in-progress: " sync-in-progress " should-sync: " should-sync " will-sync: " will-sync) - (if will-sync (set! *db-sync-in-progress* #t)) - (mutex-unlock! *db-multi-sync-mutex*) - (if will-sync - (let ((res (common:sync-to-megatest.db dbstruct))) ;; did we sync any data? If so need to set the db touched flag to keep the server alive - (if (> res 0) ;; some records were transferred, keep the db alive - (begin - (mutex-lock! *heartbeat-mutex*) - (set! *db-last-access* (current-seconds)) - (mutex-unlock! *heartbeat-mutex*) - (debug:print-info 0 *default-log-port* "sync called, " res " records transferred.")) - (debug:print-info 2 *default-log-port* "sync called but zero records transferred")))) - (if will-sync - (begin - (mutex-lock! *db-multi-sync-mutex*) - (set! *db-sync-in-progress* #f) - (set! *db-last-sync* start-time) - (mutex-unlock! *db-multi-sync-mutex*))) - (if (and debug-mode - (> (- start-time last-time) 60)) - (begin - (set! last-time start-time) - (debug:print-info 4 *default-log-port* "timestamp -> " (seconds->time-string (current-seconds)) ", time since start -> " (seconds->hr-min-sec (- (current-seconds) *time-zero*)))))) - - ;; keep going unless time to exit - ;; - (if (not *time-to-exit*) - (let delay-loop ((count 0)) - ;;(debug:print-info 13 *default-log-port* "delay-loop top; count="count" pid="(current-process-id)" this-wd-num="this-wd-num" *time-to-exit*="*time-to-exit*) - - (if (and (not *time-to-exit*) - (< count 4)) ;; was 11, changing to 4. - (begin - (thread-sleep! 1) - (delay-loop (+ count 1)))) - (if (not *time-to-exit*) (loop)))) - (if (common:low-noise-print 30) - (debug:print-info 0 *default-log-port* "Exiting watchdog timer, *time-to-exit* = " *time-to-exit*" pid="(current-process-id)" this-wd-num="this-wd-num))))))) - ;; TODO: for multiple areas, we will have multiple watchdogs; and multiple threads to manage (define (common:watchdog) (debug:print-info 13 *default-log-port* "common:watchdog entered.") - (if (common:on-homehost?) - (let ((dbstruct (db:setup))) - (debug:print-info 13 *default-log-port* "after db:setup with dbstruct="dbstruct) - (cond - ((dbr:dbstruct-read-only dbstruct) - (debug:print-info 13 *default-log-port* "loading read-only watchdog") - (common:readonly-watchdog dbstruct)) - (else - (debug:print-info 13 *default-log-port* "loading writable-watchdog.") - (common:writable-watchdog dbstruct))) - (debug:print-info 13 *default-log-port* "watchdog done.")) - (debug:print-info 13 *default-log-port* "no need for watchdog on non-homehost"))) + (if (launch:setup) + (if (common:on-homehost?) + (let ((dbstruct (db:setup #t))) + (debug:print-info 13 *default-log-port* "after db:setup with dbstruct=" dbstruct) + (cond + ((dbr:dbstruct-read-only dbstruct) + (debug:print-info 13 *default-log-port* "loading read-only watchdog") + (common:readonly-watchdog dbstruct)) + (else + (debug:print-info 13 *default-log-port* "loading writable-watchdog.") + (server:writable-watchdog dbstruct))) + (debug:print-info 13 *default-log-port* "watchdog done.")) + (debug:print-info 13 *default-log-port* "no need for watchdog on non-homehost")))) (define (std-exit-procedure) (on-exit (lambda () 0)) ;;(debug:print-info 13 *default-log-port* "std-exit-procedure called; *time-to-exit*="*time-to-exit*) @@ -776,14 +719,15 @@ (begin (sqlite3:interrupt! db) (sqlite3:finalize! db #t) ;; (vector-set! *task-db* 0 #f) (set! *task-db* #f))))) - (if (and *runremote* - (remote-conndat *runremote*)) - (begin - (http-client#close-all-connections!))) ;; for http-client + (http-client#close-all-connections!) + ;; (if (and *runremote* + ;; (remote-conndat *runremote*)) + ;; (begin + ;; (http-client#close-all-connections!))) ;; for http-client (if (not (eq? *default-log-port* (current-error-port))) (close-output-port *default-log-port*)) (set! *default-log-port* (current-error-port))) "Cleanup db exit thread")) (th2 (make-thread (lambda () (debug:print 4 *default-log-port* "Attempting clean exit. Please be patient and wait a few seconds...") @@ -858,19 +802,19 @@ #f (let loop ((hed (car cmds)) (tal (cdr cmds))) (let ((res (with-input-from-pipe (conc "which " hed) read-line))) (if (and (string? res) - (file-exists? res)) + (common:file-exists? res)) res (if (null? tal) #f (loop (car tal)(cdr tal)))))))) (define (common:get-install-area) (let ((exe-path (car (argv)))) - (if (file-exists? exe-path) + (if (common:file-exists? exe-path) (handle-exceptions exn #f (pathname-directory (pathname-directory @@ -886,12 +830,14 @@ (tal (cdr dirs))) (let ((res (or (and (directory? hed) (file-write-access? hed) hed) (handle-exceptions - exn - #f + exn + (begin + (debug:print-info 0 *default-log-port* "could not create " hed ", this might cause problems down the road.") + #f) (create-directory hed #t))))) (if (and (string? res) (directory? res)) res (if (null? tal) @@ -969,10 +915,14 @@ ;; (tags-testpatt (if tagexpr (string-join (runs:get-tests-matching-tags tagexpr) ",") #f)) (testpatt-key (if (args:get-arg "--modepatt") (args:get-arg "--modepatt") "TESTPATT")) (args-testpatt (or (args:get-arg "-testpatt") (args:get-arg "-runtests") "%")) (rtestpatt (if rconf (runconfigs-get rconf testpatt-key) #f))) (cond + ((args:get-arg "--modepatt") ;; modepatt is a forced setting, when set it MUST refer to an existing PATT in the runconfig + (if rconf + (runconfigs-get rconf testpatt-key) + #f)) ;; We do NOT fall back to "%" ;; (tags-testpatt ;; (debug:print-info 0 *default-log-port* "-tagexpr "tagexpr" selects testpatt "tags-testpatt) ;; tags-testpatt) ((and (equal? args-testpatt "%") rtestpatt) (debug:print-info 0 *default-log-port* "testpatt defined in "testpatt-key" from runconfigs: " rtestpatt) @@ -1000,30 +950,44 @@ ;;;; TODO: catch permission denied exceptions and emit appropriate warnings, eg: system error while trying to access file: "/nfs/pdx/disks/icf_env_disk001/bjbarcla/gwa/issues/mtdev/randy-slow/reproduce/q... (common:false-on-exception (lambda () (directory-exists? path-string)) message: (conc "Unable to access path: " path-string) )) +;; does the directory exist and do we have write access? +;; +;; returns the directory or #f +;; +(define (common:directory-writable? path-string) + (handle-exceptions + exn + #f + (if (and (directory-exists? path-string) + (file-write-access? path-string)) + path-string + #f))) (define (common:get-linktree) (or (getenv "MT_LINKTREE") - (or (and *configdat* - (configf:lookup *configdat* "setup" "linktree")) + (if *configdat* + (configf:lookup *configdat* "setup" "linktree") (if *toppath* (conc *toppath* "/lt") - (if (file-exists? "megatest.config") ;; we are in the toppath (new area, mtutils compatible) - (conc (current-directory) "/lt") - #f))))) + #f)))) (define (common:args-get-runname) (let ((res (or (args:get-arg "-runname") (args:get-arg ":runname") (getenv "MT_RUNNAME")))) ;; (if res (set-environment-variable "MT_RUNNAME" res)) ;; not sure if this is a good idea. side effect and all ... res)) + +(define (common:get-fields cfgdat) + (let ((fields (hash-table-ref/default cfgdat "fields" '()))) + (map car fields))) (define (common:args-get-target #!key (split #f)(exit-if-bad #f)) - (let* ((keys (if (hash-table? *configdat*) (keys:config-get-fields *configdat*) '())) + (let* ((keys (if (hash-table? *configdat*) (common:get-fields *configdat*) '())) (numkeys (length keys)) (target (or (args:get-arg "-reqtarg") (args:get-arg "-target") (getenv "MT_TARGET"))) (tlist (if target (string-split target "/" #t) '())) @@ -1041,10 +1005,20 @@ (begin (debug:print-error 0 *default-log-port* "Invalid target, spaces or blanks not allowed \"" target "\", target should be: " (string-intersperse keys "/") ", have " tlist " for elements") (if exit-if-bad (exit 1)) #f) #f)))) + +;; looking only (at least for now) at the MT_ variables craft the full testname +;; +(define (common:get-full-test-name) + (if (getenv "MT_TEST_NAME") + (if (and (getenv "MT_ITEMPATH") + (not (equal? (getenv "MT_ITEMPATH") ""))) + (getenv "MT_TEST_NAME") + (conc (getenv "MT_TEST_NAME") "/" (getenv "MT_ITEMPATH"))) + #f)) ;; logic for getting homehost. Returns (host . at-home) ;; IF *toppath* is not set, wait up to five seconds trying every two seconds ;; (this is to accomodate the watchdog) ;; @@ -1079,11 +1053,11 @@ (begin (mutex-unlock! *homehost-mutex*) (debug:print 0 *default-log-port* "ERROR: Failed to read .homehost file after trying five times. Giving up and exiting, message: " ((condition-property-accessor 'exn 'message) exn)) (exit 1))) (let ((hhf (conc *toppath* "/.homehost"))) - (if (file-exists? hhf) + (if (common:file-exists? hhf) (with-input-from-file hhf read-line) (if (file-write-access? *toppath*) (begin (with-output-to-file hhf (lambda () @@ -1107,14 +1081,24 @@ #f))) ;; do we honor the caches of the config files? ;; (define (common:use-cache?) - (not (or (args:get-arg "-no-cache") - (and *configdat* - (equal? (configf:lookup *configdat* "setup" "use-cache") "no"))))) - + (let ((res #t)) ;; priority by order of evaluation + (if *configdat* ;; sillyness here. can't use setup/use-cache to know if we can use the cached files! + (if (equal? (configf:lookup *configdat* "setup" "use-cache") "no") + (set! res #f) + (if (equal? (configf:lookup *configdat* "setup" "use-cache") "yes") + (set! res #t)))) + (if (args:get-arg "-no-cache")(set! res #f)) ;; overrides setting in "setup" + (if (getenv "MT_USE_CACHE") + (if (equal? (getenv "MT_USE_CACHE") "yes") + (set! res #t) + (if (equal? (getenv "MT_USE_CACHE") "no") + (set! res #f)))) ;; overrides -no-cache switch + res)) + ;; force use of server? ;; (define (common:force-server?) (let* ((force-setting (configf:lookup *configdat* "server" "force")) (force-type (if force-setting (string->symbol force-setting) #f)) @@ -1131,17 +1115,10 @@ (begin (debug:print-info 0 *default-log-port* "forcing use of server, force setting is \"" force-setting "\".") #t) #f))) -;; do we honor the caches of the config files? -;; -(define (common:use-cache?) - (not (or (args:get-arg "-no-cache") - (and *configdat* - (equal? (configf:lookup *configdat* "setup" "use-cache") "no"))))) - ;;====================================================================== ;; M I S C L I S T S ;;====================================================================== ;; items in lista are matched value and position in listb @@ -1509,31 +1486,40 @@ (set! best-load load) (set! best-host hostname))))) hosts) best-host)) - - - -(define (common:wait-for-cpuload maxload numcpus waitdelay #!key (count 1000) (msg #f)(remote-host #f)) +(define (common:wait-for-cpuload maxload-in numcpus-in waitdelay #!key (count 1000) (msg #f)(remote-host #f)) (let* ((loadavg (common:get-cpu-load remote-host)) + (numcpus (if (< 1 numcpus-in) ;; not possible + (common:get-num-cpus remote-host) + numcpus-in)) + (maxload (max maxload-in 0.5)) ;; so maxload must be greater than 0.5 for now BUG - FIXME? (first (car loadavg)) (next (cadr loadavg)) - (adjload (* maxload numcpus)) + (adjload (* maxload (max 1 numcpus))) ;; possible bug where numcpus (or could be maxload) is zero, crude fallback is to at least use 1 (loadjmp (- first next))) (cond ((and (> first adjload) (> count 0)) - (debug:print-info 0 *default-log-port* "waiting " waitdelay " seconds due to load " first " exceeding max of " adjload (if msg msg "")) + (debug:print-info 0 *default-log-port* "waiting " waitdelay " seconds due to load " first " exceeding max of " adjload " " (if msg msg "")) (thread-sleep! waitdelay) (common:wait-for-cpuload maxload numcpus waitdelay count: (- count 1))) ((and (> loadjmp numcpus) (> count 0)) (debug:print-info 0 *default-log-port* "waiting " waitdelay " seconds due to load jump " loadjmp " > numcpus " numcpus (if msg msg "")) (thread-sleep! waitdelay) (common:wait-for-cpuload maxload numcpus waitdelay count: (- count 1)))))) +(define (common:wait-for-homehost-load maxload msg) + (let* ((hh-dat (if (common:on-homehost?) ;; if we are on the homehost then pass in #f so the calls are local. + #f + (common:get-homehost))) + (hh (if hh-dat (car hh-dat) #f)) + (numcpus (common:get-num-cpus hh))) + (common:wait-for-normalized-load maxload msg: msg remote-host: hh))) + (define (common:get-num-cpus remote-host) (let ((proc (lambda () (let loop ((numcpu 0) (inl (read-line))) (if (eof-object? inl) @@ -1550,11 +1536,11 @@ ;; wait for normalized cpu load to drop below maxload ;; (define (common:wait-for-normalized-load maxload #!key (msg #f)(remote-host #f)) (let ((num-cpus (common:get-num-cpus remote-host))) - (common:wait-for-cpuload maxload num-cpus 15 msg: msg))) + (common:wait-for-cpuload maxload num-cpus 15 msg: msg remote-host: remote-host))) (define (get-uname . params) (let* ((uname-res (process:cmd-run->list (conc "uname " (if (null? params) "-a" (car params))))) (uname #f)) (if (null? (car uname-res)) @@ -1708,11 +1694,12 @@ (let* ((key (car keyval)) (val (cdr keyval)) (delim (if (string-search whitesp val) "\"" ""))) - (print (if (member key ignorevars) + (print (if (or (member key ignorevars) + (string-search whitesp key)) "# setenv " "setenv ") key " " delim (mungeval val) delim))) envvars))) (with-output-to-file (conc fname ".sh") @@ -1722,10 +1709,11 @@ (val (cdr keyval)) (delim (if (string-search whitesp val) "\"" ""))) (print (if (or (member key ignorevars) + (string-search whitesp key) (string-search ":" key)) ;; internal only values to be skipped. "# export " "export ") key "=" delim (mungeval val) delim))) envvars))))) @@ -1739,11 +1727,11 @@ (let* ((var (car p)) (val (cadr p)) (prv (get-environment-variable var))) (set! res (cons (list var prv) res)) (if val - (setenv var (->string val)) + (safe-setenv var (->string val)) (unsetenv var)))) lst) res) '())) @@ -2091,21 +2079,29 @@ (number->string x 16)) (map string->number (string-split instr))) "/")) -(define (common:faux-lock keyname) - (if (rmt:get-var keyname) - #f +(define (common:faux-lock keyname #!key (wait-time 8)) + (if (rmt:no-sync-get/default keyname #f) ;; do not be tempted to compare to pid. locking is a one-shot action, if already locked for this pid it doesn't actually count + (if (> wait-time 0) + (begin + (thread-sleep! 1) + (if (eq? wait-time 1) ;; only one second left, steal the lock + (begin + (debug:print-info 0 *default-log-port* "stealing lock for " keyname) + (common:faux-unlock keyname force: #t))) + (common:faux-lock keyname wait-time: (- wait-time 1))) + #f) (begin - (rmt:set-var keyname (conc (current-process-id))) - (equal? (conc (current-process-id)) (conc (rmt:get-var keyname)))))) + (rmt:no-sync-set keyname (conc (current-process-id))) + (equal? (conc (current-process-id)) (conc (rmt:no-sync-get/default keyname #f)))))) (define (common:faux-unlock keyname #!key (force #f)) - (if (or force (equal? (conc (current-process-id)) (conc (rmt:get-var keyname)))) + (if (or force (equal? (conc (current-process-id)) (conc (rmt:no-sync-get/default keyname #f)))) (begin - (if (rmt:get-var keyname) (rmt:del-var keyname)) + (if (rmt:no-sync-get/default keyname #f) (rmt:no-sync-del! keyname)) #t) #f)) (define (common:in-running-test?) @@ -2120,93 +2116,81 @@ ((equal? status "KILLREQ") "purple") ((equal? status "RUNNING") "blue") ((equal? status "ABORT") "brown") (else "black"))) -;;====================================================================== -;; N A N O M S G C L I E N T -;;====================================================================== - -(define (server:get-best-guess-address hostname) - (let ((res #f)) - (for-each - (lambda (adr) - (if (not (eq? (u8vector-ref adr 0) 127)) - (set! res adr))) - ;; NOTE: This can fail when there is no mention of the host in /etc/hosts. FIXME - (vector->list (hostinfo-addresses (hostname->hostinfo hostname)))) - (string-intersperse - (map number->string - (u8vector->list - (if res res (hostname->ip hostname)))) "."))) - - -(define (common:send-dboard-main-changed) - (let* ((dashboard-ips (mddb:get-dashboards))) - (for-each - (lambda (ipadr) - (let* ((soc (common:open-nm-req (conc "tcp://" ipadr))) - (msg (conc "main " *toppath*)) - (res (common:nm-send-receive-timeout soc msg))) - (if (not res) ;; couldn't reach that dashboard - remove it from db - (print "ERROR: couldn't reach dashboard " ipadr)) - res)) - dashboard-ips))) - - -;;====================================================================== -;; D A S H B O A R D D B -;;====================================================================== - -(define (mddb:open-db) - (let* ((db (open-database (conc (get-environment-variable "HOME") "/.dashboard.db")))) - (set-busy-handler! db (busy-timeout 10000)) - (for-each - (lambda (qry) - (exec (sql db qry))) - (list - "CREATE TABLE IF NOT EXISTS vars (id INTEGER PRIMARY KEY,key TEXT, val TEXT, CONSTRAINT varsconstraint UNIQUE (key));" - "CREATE TABLE IF NOT EXISTS dashboards ( - id INTEGER PRIMARY KEY, - pid INTEGER, - username TEXT, - hostname TEXT, - ipaddr TEXT, - portnum INTEGER, - start_time TIMESTAMP DEFAULT (strftime('%s','now')), - CONSTRAINT hostport UNIQUE (hostname,portnum) - );" - )) - db)) - -;; register a dashboard -;; -(define (mddb:register-dashboard port) - (let* ((pid (current-process-id)) - (hostname (get-host-name)) - (ipaddr (server:get-best-guess-address hostname)) - (username (current-user-name)) ;; (car userinfo))) - (db (mddb:open-db))) - (print "Register monitor, pid: " pid ", hostname: " hostname ", port: " port ", username: " username) - (exec (sql db "INSERT OR REPLACE INTO dashboards (pid,username,hostname,ipaddr,portnum) VALUES (?,?,?,?,?);") - pid username hostname ipaddr port) - (close-database db))) - -;; unregister a monitor -;; -(define (mddb:unregister-dashboard host port) - (let* ((db (mddb:open-db))) - (print "Register unregister monitor, host:port=" host ":" port) - (exec (sql db "DELETE FROM dashboards WHERE hostname=? AND portnum=?;") host port) - (close-database db))) - -;; get registered dashboards -;; -(define (mddb:get-dashboards) - (let ((db (mddb:open-db))) - (query fetch-column - (sql db "SELECT ipaddr || ':' || portnum FROM dashboards;")))) +;; ;;====================================================================== +;; ;; N A N O M S G C L I E N T +;; ;;====================================================================== +;; +;; +;; +;; (define (common:send-dboard-main-changed) +;; (let* ((dashboard-ips (mddb:get-dashboards))) +;; (for-each +;; (lambda (ipadr) +;; (let* ((soc (common:open-nm-req (conc "tcp://" ipadr))) +;; (msg (conc "main " *toppath*)) +;; (res (common:nm-send-receive-timeout soc msg))) +;; (if (not res) ;; couldn't reach that dashboard - remove it from db +;; (print "ERROR: couldn't reach dashboard " ipadr)) +;; res)) +;; dashboard-ips))) +;; +;; +;; ;;====================================================================== +;; ;; D A S H B O A R D D B +;; ;;====================================================================== +;; +;; (define (mddb:open-db) +;; (let* ((db (open-database (conc (get-environment-variable "HOME") "/.dashboard.db")))) +;; (set-busy-handler! db (busy-timeout 10000)) +;; (for-each +;; (lambda (qry) +;; (exec (sql db qry))) +;; (list +;; "CREATE TABLE IF NOT EXISTS vars (id INTEGER PRIMARY KEY,key TEXT, val TEXT, CONSTRAINT varsconstraint UNIQUE (key));" +;; "CREATE TABLE IF NOT EXISTS dashboards ( +;; id INTEGER PRIMARY KEY, +;; pid INTEGER, +;; username TEXT, +;; hostname TEXT, +;; ipaddr TEXT, +;; portnum INTEGER, +;; start_time TIMESTAMP DEFAULT (strftime('%s','now')), +;; CONSTRAINT hostport UNIQUE (hostname,portnum) +;; );" +;; )) +;; db)) +;; +;; ;; register a dashboard +;; ;; +;; (define (mddb:register-dashboard port) +;; (let* ((pid (current-process-id)) +;; (hostname (get-host-name)) +;; (ipaddr (server:get-best-guess-address hostname)) +;; (username (current-user-name)) ;; (car userinfo))) +;; (db (mddb:open-db))) +;; (print "Register monitor, pid: " pid ", hostname: " hostname ", port: " port ", username: " username) +;; (exec (sql db "INSERT OR REPLACE INTO dashboards (pid,username,hostname,ipaddr,portnum) VALUES (?,?,?,?,?);") +;; pid username hostname ipaddr port) +;; (close-database db))) +;; +;; ;; unregister a monitor +;; ;; +;; (define (mddb:unregister-dashboard host port) +;; (let* ((db (mddb:open-db))) +;; (print "Register unregister monitor, host:port=" host ":" port) +;; (exec (sql db "DELETE FROM dashboards WHERE hostname=? AND portnum=?;") host port) +;; (close-database db))) +;; +;; ;; get registered dashboards +;; ;; +;; (define (mddb:get-dashboards) +;; (let ((db (mddb:open-db))) +;; (query fetch-column +;; (sql db "SELECT ipaddr || ':' || portnum FROM dashboards;")))) ;;====================================================================== ;; T E S T L A U N C H I N G P E R I T E M W I T H H O S T T Y P E S ;;====================================================================== ;; @@ -2271,12 +2255,113 @@ ;; (define (common:load-views-config) (let* ((view-cfgdat (make-hash-table)) (home-cfgfile (conc (get-environment-variable "HOME") "/.mtviews.config")) (mthome-cfgfile (conc *toppath* "/.mtviews.config"))) - (if (file-exists? mthome-cfgfile) + (if (common:file-exists? mthome-cfgfile) (read-config mthome-cfgfile view-cfgdat #t)) ;; we load the home dir file AFTER the MTRAH file so the user can clobber settings when running the dashboard in read-only areas - (if (file-exists? home-cfgfile) + (if (common:file-exists? home-cfgfile) (read-config home-cfgfile view-cfgdat #t)) view-cfgdat)) + +;;====================================================================== +;; Manage pkts, used in servers, tests and likely other contexts so put +;; in common +;;====================================================================== + +(define common:pkt-spec + '((server . ((action . a) + (pid . d) + (ipaddr . i) + (port . p))) + + (test . ((cpuuse . c) + (diskuse . d) + (item-path . i) + (runname . r) + (state . s) + (target . t) + (status . u))))) + +(define (common:get-pkts-dirs mtconf use-lt) + (let* ((pktsdirs-str (or (configf:lookup mtconf "setup" "pktsdirs") + (and use-lt + (conc *toppath* "/lt/.pkts")))) + (pktsdirs (if pktsdirs-str + (string-split pktsdirs-str " ") + #f))) + pktsdirs)) + +;; use-lt is use linktree "lt" link to find pkts dir +(define (common:with-queue-db mtconf proc #!key (use-lt #f)(toppath-in #f)) + (let* ((pktsdirs (common:get-pkts-dirs mtconf use-lt)) + (pktsdir (if pktsdirs (car pktsdirs) #f)) + (toppath (or (configf:lookup mtconf "scratchdat" "toppath") + toppath-in)) + (pdbpath (or (configf:lookup mtconf "setup" "pdbpath") pktsdir))) + (cond + ((not (and pktsdir toppath pdbpath)) + (debug:print 0 *default-log-port* "ERROR: settings are missing in your megatest.config for area management.") + (debug:print 0 *default-log-port* " you need to have pktsdir in the [setup] section.")) + ((not (common:file-exists? pktsdir)) + (debug:print 0 *default-log-port* "ERROR: pkts directory not found " pktsdir)) + ((not (equal? (file-owner pktsdir)(current-effective-user-id))) + (debug:print 0 *default-log-port* "ERROR: directory " pktsdir " is not owned by " (current-effective-user-name))) + (else + (let* ((pdb (open-queue-db pdbpath "pkts.db" + schema: '("CREATE TABLE groups (id INTEGER PRIMARY KEY,groupname TEXT, CONSTRAINT group_constraint UNIQUE (groupname));")))) + (proc pktsdirs pktsdir pdb) + (dbi:close pdb)))))) + +(define (common:load-pkts-to-db mtconf) + (common:with-queue-db + mtconf + (lambda (pktsdirs pktsdir pdb) + (for-each + (lambda (pktsdir) ;; look at all + (cond + ((not (common:file-exists? pktsdir)) + (debug:print 0 *default-log-port* "ERROR: packets directory " pktsdir " does not exist.")) + ((not (directory? pktsdir)) + (debug:print 0 *default-log-port* "ERROR: packets directory path " pktsdir " is not a directory.")) + ((not (file-read-access? pktsdir)) + (debug:print 0 *default-log-port* "ERROR: packets directory path " pktsdir " is not readable.")) + (else + (debug:print-info 0 *default-log-port* "Loading packets found in " pktsdir) + (let ((pkts (glob (conc pktsdir "/*.pkt")))) + (for-each + (lambda (pkt) + (let* ((uuid (cadr (string-match ".*/([0-9a-f]+).pkt" pkt))) + (exists (lookup-by-uuid pdb uuid #f))) + (if (not exists) + (let* ((pktdat (string-intersperse + (with-input-from-file pkt read-lines) + "\n")) + (apkt (pkt->alist pktdat)) + (ptype (alist-ref 'T apkt))) + (add-to-queue pdb pktdat uuid (or ptype 'cmd) #f 0) + (debug:print 4 *default-log-port* "Added " uuid " of type " ptype " to queue")) + (debug:print 4 *default-log-port* "pkt: " uuid " exists, skipping...") + ))) + pkts))))) + pktsdirs)))) + +(define (common:get-pkt-alists pkts) + (map (lambda (x) + (alist-ref 'apkt x)) ;; 'pkta pulls out the alist from the read pkt + pkts)) + +;; given list of pkts (alist mode) return list of D cards as Unix epoch, sorted descending +;; also delete duplicates by target i.e. (car pkt) +;; +(define (common:get-pkt-times pkts) + (delete-duplicates + (sort + (map (lambda (x) + `(,(alist-ref 't x) . ,(string->number (alist-ref 'D x)))) + pkts) + (lambda (a b)(> (cdr a)(cdr b)))) ;; sort descending + (lambda (a b)(equal? (car a)(car b))))) ;; remove duplicates by target + + Index: configf.scm ================================================================== --- configf.scm +++ configf.scm @@ -23,18 +23,18 @@ ;; return list (path fullpath configname) (define (find-config configname #!key (toppath #f)) (if toppath (let ((cfname (conc toppath "/" configname))) - (if (file-exists? cfname) + (if (common:file-exists? cfname) (list toppath cfname configname) (list #f #f #f))) (let* ((cwd (string-split (current-directory) "/"))) (let loop ((dir cwd)) (let* ((path (conc "/" (string-intersperse dir "/"))) (fullpath (conc path "/" configname))) - (if (file-exists? fullpath) + (if (common:file-exists? fullpath) (list path fullpath configname) (let ((remcwd (take dir (- (length dir) 1)))) (if (null? remcwd) (list #f #f #f) ;; #f #f) (loop remcwd))))))))) @@ -228,11 +228,11 @@ (define (read-config path ht allow-system #!key (environ-patt #f) (curr-section #f) (sections #f) (settings (make-hash-table)) (keep-filenames #f) (post-section-procs '()) (apply-wildcards #t)) (debug:print 9 *default-log-port* "START: " path) (if (and (not (port? path)) - (not (file-exists? path))) ;; for case where we are handed a port + (not (common:file-exists? path))) ;; for case where we are handed a port (begin (debug:print-info 1 *default-log-port* "read-config - file not found " path " current path: " (current-directory)) ;; WARNING: This is a risky change but really, we should not return an empty hash table if no file read? #f) ;; (if (not ht)(make-hash-table) ht)) (let ((inp (if (string? path) @@ -281,11 +281,11 @@ (common:nice-path (conc (if curr-conf-dir curr-conf-dir ".") "/" include-file))))) - (if (file-exists? full-conf) + (if (common:file-exists? full-conf) (begin ;; (push-directory conf-dir) (debug:print 9 *default-log-port* "Including: " full-conf) (read-config full-conf res allow-system environ-patt: environ-patt curr-section: curr-section-name sections: sections settings: settings keep-filenames: keep-filenames) ;; (pop-directory) @@ -297,11 +297,11 @@ (configf:script-rx ( x include-script params);; handle-exceptions ;; exn ;; (begin ;; (debug:print '(0 2 9) #f "INFO: include from script " include-script " failed.") ;; (loop (configf:read-line inp res (calc-allow-system allow-system curr-section-name sections) settings) curr-section-name #f #f)) - (if (and (file-exists? include-script)(file-execute-access? include-script)) + (if (and (common:file-exists? include-script)(file-execute-access? include-script)) (let* ((new-inp-port (open-input-pipe (conc include-script " " params)))) (debug:print '(2 9) *default-log-port* "Including from script output: " include-script) ;; (print "We got here, calling read-config next. Port is: " new-inp-port) (read-config new-inp-port res allow-system environ-patt: environ-patt curr-section: curr-section-name sections: sections settings: settings keep-filenames: keep-filenames) (close-input-port new-inp-port) @@ -320,10 +320,11 @@ (proc curr-section-name section-name res path)))) post-section-procs) ;; after gathering the vars for a section and if apply-wildcards is true and if there is a wildcard in the section name process wildcards ;; NOTE: we are processing the curr-section-name, NOT section-name. (process-wildcards res curr-section-name) + (if (not (hash-table-ref/default res section-name #f))(hash-table-set! res section-name '())) ;; ensure that mere mention of a section is not lost (loop (configf:read-line inp res (calc-allow-system allow-system curr-section-name sections) settings) ;; if we have the sections list then force all settings into "" and delete it later? ;; (if (or (not sections) ;; (member section-name sections)) ;; section-name "") ;; stick everything into "". NOPE: We need new strategy. Put stuff in correct sections and then delete all sections later. @@ -400,11 +401,11 @@ (let* ((curr-dir (current-directory)) (configinfo (find-config fname toppath: given-toppath)) (toppath (car configinfo)) (configfile (cadr configinfo)) (set-fields (lambda (curr-section next-section ht path) - (let ((field-names (if ht (keys:config-get-fields ht) '())) + (let ((field-names (if ht (common:get-fields ht) '())) (target (or (getenv "MT_TARGET")(args:get-arg "-reqtarg")(args:get-arg "-target")))) (debug:print-info 9 *default-log-port* "set-fields with field-names=" field-names " target=" target " curr-section=" curr-section " next-section=" next-section " path=" path " ht=" ht) (if (not (null? field-names))(keys:target-set-args field-names target #f)))))) (if toppath (change-directory toppath)) (if (and toppath pathenvvar)(setenv pathenvvar toppath)) @@ -423,12 +424,34 @@ (cadr match) #f)) )) #f)) +;; use to have definitive setting: +;; [foo] +;; var yes +;; +;; (configf:var-is? cfgdat "foo" "var" "yes") => #t +;; +(define (configf:var-is? cfgdat section var expected-val) + (equal? (configf:lookup cfgdat section var) expected-val)) + (define configf:lookup config-lookup) (define configf:read-file read-config) + +;; safely look up a value that is expected to be a number, return +;; a default (#f unless provided) +;; +(define (configf:lookup-number cfdat section varname #!key (default #f)) + (let* ((val (configf:lookup *configdat* section varname)) + (res (if val + (string->number (string-substitute "\\s+" "" val #t)) + #f))) + (cond + (res res) + (val (debug:print 0 *default-log-port* "ERROR: no number found for [" section "], " varname ", got: " val)) + (else default)))) (define (configf:section-vars cfgdat section) (let ((sectdat (hash-table-ref/default cfgdat section '()))) (if (null? sectdat) '() @@ -487,11 +510,11 @@ (if (null? tal) newres (loop (car tal)(cdr tal) newres)))))) (define (configf:file->list fname) - (if (file-exists? fname) + (if (common:file-exists? fname) (let ((inp (open-input-file fname))) (let loop ((inl (read-line inp)) (res '())) (if (eof-object? inl) (begin @@ -591,11 +614,11 @@ ;; reads a refdb into an assoc array of assoc arrays ;; returns (list dat msg) (define (configf:read-refdb refdb-path) (let ((sheets-file (conc refdb-path "/sheet-names.cfg"))) - (if (not (file-exists? sheets-file)) + (if (not (common:file-exists? sheets-file)) (list #f (conc "ERROR: no refdb found at " refdb-path)) (if (not (file-read-access? sheets-file)) (list #f (conc "ERROR: refdb file not readable at " refdb-path)) (let* ((sheets (with-input-from-file sheets-file (lambda () @@ -663,36 +686,33 @@ #f (configf:alist->config (with-input-from-file fname read)))) (define (configf:write-alist cdat fname) - (if (common:faux-lock fname) - (let* ((dat (configf:config->alist cdat)) - (res - (begin - (with-output-to-file fname ;; first write out the file - (lambda () - (pp dat))) - - (if (common:file-exists? fname) ;; now verify it is readable - (if (configf:read-alist fname) - #t ;; data is good. - (begin - (handle-exceptions - exn - #f - (debug:print 0 *default-log-port* "WARNING: content " dat " for cache " fname " is not readable. Deleting generated file.") - (delete-file fname)) - #f)) - #f)))) - - (common:faux-unlock fname) - res) - (begin - (debug:print 0 *default-log-port* "WARNING: could not get faux-lock on " fname) - #f))) - + (if (not (common:faux-lock fname)) + (debug:print 0 *default-log-port* "INFO: Could not get lock on " fname)) + (let* ((dat (configf:config->alist cdat)) + (res + (begin + (with-output-to-file fname ;; first write out the file + (lambda () + (pp dat))) + + (if (common:file-exists? fname) ;; now verify it is readable + (if (configf:read-alist fname) + #t ;; data is good. + (begin + (handle-exceptions + exn + #f + (debug:print 0 *default-log-port* "WARNING: content " dat " for cache " fname " is not readable. Deleting generated file.") + (delete-file fname)) + #f)) + #f)))) + (common:faux-unlock fname) + res)) + ;; convert hierarchial list to ini format ;; (define (configf:config->ini data) (map (lambda (section) Index: dashboard-tests.scm ================================================================== --- dashboard-tests.scm +++ dashboard-tests.scm @@ -234,11 +234,11 @@ ;; if there is a submegatest create a button to launch dashboard in that area ;; (define (submegatest-panel dbstruct keydat testdat runname testconfig) (let* ((subarea (configf:lookup testconfig "setup" "submegatest")) - (area-exists (and subarea (file-exists? subarea)))) + (area-exists (and subarea (common:file-exists? subarea)))) ;; (debug:print-info 0 *default-log-port* "Megatest subarea=" subarea ", area-exists=" area-exists) (if subarea (iup:frame #:title "Megatest Run Info" ; #:expand "YES" (iup:button @@ -458,11 +458,11 @@ "/")) (item-path (db:test-get-item-path testdat)) ;; this next block was added to fix a bug where variables were ;; needed. Revisit this. (runconfig (let ((runconfigf (conc *toppath* "/runconfigs.config"))) ;; no rush but it would be good to convert this call to use runconfig:read - (if (file-exists? runconfigf) + (if (common:file-exists? runconfigf) (handle-exceptions exn #f ;; do nothing, just keep on trucking .... (setup-env-defaults runconfigf run-id (make-hash-table) keydat environ-patt: keystring)) (make-hash-table)))) @@ -472,18 +472,18 @@ (handle-exceptions exn ;; NOTE: I've no idea why this was written this way. Research, study and fix needed! (tests:get-testconfig (db:test-get-testname testdat) (db:test-get-item-path testdat) test-registry #f) (tests:get-testconfig (db:test-get-testname testdat) item-path test-registry #t)))) (viewlog (lambda (x) - (if (file-exists? logfile) + (if (common:file-exists? logfile) ;(system (conc "firefox " logfile "&")) (dcommon:run-html-viewer logfile) (message-window (conc "File " logfile " not found"))))) (view-a-log (lambda (lfile) (let ((lfilename (conc rundir "/" lfile))) ;; (print "lfilename: " lfilename) - (if (file-exists? lfilename) + (if (common:file-exists? lfilename) ;(system (conc "firefox " logfile "&")) (dcommon:run-html-viewer lfilename) (message-window (conc "File " lfilename " not found")))))) (xterm (lambda (x) (if (directory-exists? rundir) @@ -496,11 +496,11 @@ "MT_.*")) (message-window (conc "Directory " rundir " not found"))))) (widgets (make-hash-table)) (refreshdat (lambda () (let* ((curr-mod-time (file-modification-time db-path)) - ;; (max ..... (if (file-exists? testdat-path) + ;; (max ..... (if (common:file-exists? testdat-path) ;; (file-modification-time testdat-path) ;; (begin ;; (set! testdat-path (conc rundir "/testdat.db")) ;; 0)))) (need-update (or (and (>= curr-mod-time db-mod-time) Index: dashboard.scm ================================================================== --- dashboard.scm +++ dashboard.scm @@ -85,10 +85,11 @@ "-v" "-q" "-use-db-cache" "-skip-version-check" "-repl" + "-rh5.11" ;; fix to allow running on rh5.11 ) args:arg-hash 0)) (if (not (null? remargs)) @@ -105,10 +106,17 @@ ;; (if (not (launch:setup)) (begin (print "Failed to find megatest.config, exiting") (exit 1))) + +;; deal with RH 5.11 gtk lib or iup lib missing detachbox feature +;; first check for the switch +;; +(if (or (args:get-arg "-rh5.11") + (configf:lookup *configdat* "dashboard" "no-detachbox")) + (set! iup:detachbox iup:vbox)) (if (not (common:on-homehost?)) (begin (debug:print 0 *default-log-port* "WARNING: Current policy requires running dashboard on homehost: " (common:get-homehost)))) @@ -204,11 +212,13 @@ ((allruns-by-id (make-hash-table)) : hash-table) ;; hash of run-id -> dboard:rundat records ((done-runs '()) : list) ;; list of runs already drawn ((not-done-runs '()) : list) ;; list of runs not yet drawn (header #f) ;; header for decoding the run records (keys #f) ;; keys for this run (i.e. target components) - ((numruns (string->number (or (args:get-arg "-cols") "10"))) : number) ;; + ((numruns (string->number (or (args:get-arg "-cols") + (configf:lookup *configdat* "dashboard" "cols") + "8"))) : number) ;; ((tot-runs 0) : number) ((last-data-update 0) : number) ;; last time the data in allruns was updated ((last-runs-update 0) : number) ;; last time we pulled the runs info to update the tree (runs-mutex (make-mutex)) ;; use to prevent parallel access to draw objects ((run-update-times (make-hash-table)) : hash-table) ;; update times indexed by run-id @@ -222,11 +232,11 @@ (runs-matrix #f) ;; used in newdashboard ((start-run-offset 0) : number) ;; left-right slider value ((start-test-offset 0) : number) ;; up-down slider value ((runs-btn-height (or (configf:lookup *configdat* "dashboard" "btn-height") "x16")) : string) ;; was 12 ((runs-btn-fontsz (or (configf:lookup *configdat* "dashboard" "btn-fontsz") "10")) : string) ;; was 8 - ((runs-cell-width (or (configf:lookup *configdat* "dashboard" "cell-width") "60")) : string) ;; was 50 + ((runs-cell-width (or (configf:lookup *configdat* "dashboard" "cell-width") "50")) : string) ;; was 50 ((all-test-names '()) : list) ;; Canvas and drawing data (cnv #f) (cnv-obj #f) @@ -1085,10 +1095,11 @@ (dboard:tabdat-filters-changed-set! tabdat #t))) (define (update-search commondat tabdat x val) (hash-table-set! (dboard:tabdat-searchpatts tabdat) x val) (dboard:tabdat-filters-changed-set! tabdat #t) + (mark-for-update tabdat) (set-bg-on-filter commondat tabdat)) ;; force ALL updates to zero (effectively) ;; (define (mark-for-update tabdat) @@ -1376,11 +1387,11 @@ ;; Target, testpatt, state and status input boxes ;; (iup:vbox ;; Command to run, placed over the top of the canvas (dcommon:command-action-selector commondat tabdat tab-num: tab-num) - (dboard:runs-tree-browser commondat tabdat) + (dboard:runs-tree-browser commondat tabdat) (dcommon:command-runname-selector commondat tabdat tab-num: tab-num) (dcommon:command-testname-selector commondat tabdat update-keyvals)) ;; key-listboxes)) (dcommon:command-tests-tasks-canvas tabdat test-records sorted-testnames tests-draw-state)))) (tb (dboard:tabdat-runs-tree tabdat))) @@ -1397,14 +1408,20 @@ ;; (let ((logs-tb (iup:textbox #:expand "YES" ;; #:multiline "YES"))) ;; (dboard:tabdat-logs-textbox-set! tabdat logs-tb) ;; logs-tb)) +;; browse runs as a tree. Used in both "Runs" tab and +;; in the runs control panel. +;; (define (dboard:runs-tree-browser commondat tabdat) - (let* ((txtbox (iup:textbox #:action (lambda (val a b) + (let* ( + (txtbox (iup:textbox #:action (lambda (val a b) (debug:catch-and-dump (lambda () + ;; for the Runs view we put the list of keyvals into tabdat target + ;; for the Run Controls we put then update the run-command (if b (dboard:tabdat-target-set! tabdat (string-split b "/"))) (dashboard:update-run-command tabdat)) "command-testname-selector tb action")) #:value (dboard:test-patt->lines (dboard:tabdat-test-patts-use tabdat)) @@ -1414,11 +1431,11 @@ (tb (iup:treebox #:value 0 #:name "Runs" #:expand "YES" - #:addexpanded "NO" + #:addexpanded "YES" #:size "10x" #:selection-cb (lambda (obj id state) (debug:catch-and-dump (lambda () @@ -1440,11 +1457,14 @@ (debug:print-error 5 *default-log-port* "tree-path->run-id returned non-number " run-id)))) "treebox")) ;; (print "path: " (tree:node->path obj id) " run-id: " run-id) ))) (dboard:tabdat-runs-tree-set! tabdat tb) - (iup:vbox tb txtbox))) + (iup:detachbox + (iup:vbox + tb + txtbox)))) ;;====================================================================== ;; R U N C O N T R O L S ;;====================================================================== ;; @@ -1509,11 +1529,11 @@ (iup:vbox (iup:split #:orientation "HORIZONTAL" #:value 800 (let* ((cnv-obj (iup:canvas - ;; #:size "500x400" + ;; #:size "250x250" ;; "500x400" #:expand "YES" #:scrollbar "YES" #:posx "0.5" #:posy "0.5" #:action (make-canvas-action @@ -1554,15 +1574,15 @@ (let* ((hb1 (iup:hbox)) (graph-cell-table (dboard:tabdat-graph-cell-table tabdat)) (changed #f) (graph-matrix (iup:matrix #:alignment1 "ALEFT" - #:expand "YES" ;; "HORIZONTAL" + ;; #:expand "YES" ;; "HORIZONTAL" #:scrollbar "YES" #:numcol 10 #:numlin 20 - #:numcol-visible (min 8) + #:numcol-visible 5 ;; (min 8) #:numlin-visible 1 #:click-cb (lambda (obj row col status) (let* ((graph-cell (conc row ":" col)) @@ -1865,11 +1885,11 @@ (define (dashboard:summary commondat tabdat #!key (tab-num #f)) (let* ((rawconfig (read-config (conc *toppath* "/megatest.config") #f #f)) ;; changed to #f since I want #{} to be expanded by [system ...] to NOT be expanded. WAS: 'return-string))) (changed #f)) (iup:vbox (iup:split - #:value 500 + #:value 300 (iup:frame #:title "General Info" (iup:vbox (iup:hbox (iup:label "Area Path") @@ -1902,11 +1922,11 @@ (let* ((success #t) ;; at any stage of the process set this flag to #f to skip downstream steps. Intention here is to recover gracefully if user provided tabs fail to load. (source (configf:lookup views-cfgdat view-name "source")) (viewgen (configf:lookup views-cfgdat view-name "viewgen")) (updater (configf:lookup views-cfgdat view-name "updater")) (result-child #f)) - (if (and (file-exists? source) + (if (and (common:file-exists? source) (file-read-access? source)) (handle-exceptions exn (begin (print-call-chain) @@ -2048,11 +2068,11 @@ (let* ((update-mutex (dboard:commondat-update-mutex commondat)) (tb (iup:treebox #:value 0 #:name "Runs" #:expand "YES" - #:addexpanded "NO" + #:addexpanded "YES" #:selection-cb (lambda (obj id state) (debug:catch-and-dump (lambda () ;; (print "obj: " obj ", id: " id ", state: " state) @@ -2159,10 +2179,28 @@ ;;====================================================================== ;; R U N S ;;====================================================================== +(define (dboard:squarify toggles size) + (let loop ((hed (car toggles)) + (tal (cdr toggles)) + (cur '()) + (res '())) + (let* ((ovrflo (>= (length cur) size)) + (newcur (if ovrflo + (list hed) + (cons hed cur))) + (newres (if ovrflo + (cons cur res) + res))) + (if (null? tal) + (if ovrflo + newres + (cons newcur res)) + (loop (car tal)(cdr tal) newcur newres))))) + (define (dboard:make-controls commondat tabdat #!key (extra-widget #f) ) (let ((btn-fontsz (dboard:tabdat-runs-btn-fontsz tabdat))) (iup:hbox (iup:vbox (iup:frame @@ -2223,17 +2261,17 @@ (mark-for-update tabdat)))) (default-cmd (car (list-ref *tests-sort-type-index* *tests-sort-reverse*)))) (iuplistbox-fill-list sort-lb cmds-list selected-item: default-cmd) - (set! hide-empty (iup:button "HideEmpty" - ;; #:expand HORIZONTAL" - #:expand "NO" #:size "80x15" - #:action (lambda (obj) - (dboard:tabdat-hide-empty-runs-set! tabdat (not (dboard:tabdat-hide-empty-runs tabdat))) - (iup:attribute-set! obj "TITLE" (if (dboard:tabdat-hide-empty-runs tabdat) "+HideE" "-HideE")) - (mark-for-update tabdat)))) + ;; (set! hide-empty (iup:button "HideEmpty" + ;; ;; #:expand HORIZONTAL" + ;; #:expand "NO" #:size "80x15" + ;; #:action (lambda (obj) + ;; (dboard:tabdat-hide-empty-runs-set! tabdat (not (dboard:tabdat-hide-empty-runs tabdat))) + ;; (iup:attribute-set! obj "TITLE" (if (dboard:tabdat-hide-empty-runs tabdat) "+HideE" "-HideE")) + ;; (mark-for-update tabdat)))) (set! hide (iup:button "Hide" #:expand "NO" #:size "40x15" ;; #:expand "HORIZONTAL" #:action (lambda (obj) (dboard:tabdat-hide-not-hide-set! tabdat #t) ;; (not (dboard:tabdat-hide-not-hide tabdat))) ;; (iup:attribute-set! obj "TITLE" (if (dboard:tabdat-hide-not-hide tabdat) "HideTests" "NotHide")) @@ -2263,54 +2301,80 @@ ))) - (iup:frame - #:title "state/status filter" - (iup:vbox - (apply - iup:hbox - (map (lambda (status) - (iup:toggle (conc status " ") - #:fontsize btn-fontsz ;; "10" - #:expand "HORIZONTAL" - #:action (lambda (obj val) - (mark-for-update tabdat) - (if (eq? val 1) - (hash-table-set! (dboard:tabdat-status-ignore-hash tabdat) status #t) - (hash-table-delete! (dboard:tabdat-status-ignore-hash tabdat) status)) - (set-bg-on-filter commondat tabdat)))) - (map cadr *common:std-statuses*))) ;; '("PASS" "FAIL" "WARN" "CHECK" "WAIVED" "STUCK/DEAD" "n/a" "SKIP"))) - (apply - iup:hbox - (map (lambda (state) - (iup:toggle (conc state " ") - #:fontsize btn-fontsz - #:expand "HORIZONTAL" - #:action (lambda (obj val) - (mark-for-update tabdat) - (if (eq? val 1) - (hash-table-set! (dboard:tabdat-state-ignore-hash tabdat) state #t) - (hash-table-delete! (dboard:tabdat-state-ignore-hash tabdat) state)) - (set-bg-on-filter commondat tabdat)))) - (map cadr *common:std-states*))) ;; '("RUNNING" "COMPLETED" "INCOMPLETE" "LAUNCHED" "NOT_STARTED" "KILLED" "DELETED"))) - (iup:valuator #:valuechanged_cb (lambda (obj) - (let ((val (inexact->exact (round (/ (string->number (iup:attribute obj "VALUE")) 10)))) - (oldmax (string->number (iup:attribute obj "MAX"))) - (maxruns (dboard:tabdat-tot-runs tabdat))) - (dboard:tabdat-start-run-offset-set! tabdat val) - (mark-for-update tabdat) - (debug:print 6 *default-log-port* "(dboard:tabdat-start-run-offset tabdat) " (dboard:tabdat-start-run-offset tabdat) " maxruns: " maxruns ", val: " val " oldmax: " oldmax) - (iup:attribute-set! obj "MAX" (* maxruns 10)))) - #:expand "HORIZONTAL" - #:max (* 10 (max (hash-table-size (dboard:tabdat-allruns-by-id tabdat)) 10)) - #:min 0 - #:step 0.01))) - ;;(iup:button "inc rows" #:action (lambda (obj)(dboard:tabdat-num-tests-set! tabdat (+ (dboard:tabdat-num-tests tabdat) 1)))) - ;(iup:button "dec rows" #:action (lambda (obj)(dboard:tabdat-num-tests-set! tabdat (if (> (dboard:tabdat-num-tests tabdat) 0)(- (dboard:tabdat-num-tests tabdat) 1) 0)))) - ))) + (let* ((status-toggles (map (lambda (status) + (iup:toggle (conc status) + #:fontsize 8 ;; btn-fontsz ;; "10" + ;; #:expand "HORIZONTAL" + #:action (lambda (obj val) + (mark-for-update tabdat) + (if (eq? val 1) + (hash-table-set! (dboard:tabdat-status-ignore-hash tabdat) status #t) + (hash-table-delete! (dboard:tabdat-status-ignore-hash tabdat) status)) + (set-bg-on-filter commondat tabdat)))) + (map cadr *common:std-statuses*))) ;; '("PASS" "FAIL" "WARN" "CHECK" "WAIVED" "STUCK/DEAD" "n/a" "SKIP"))) + (state-toggles (map (lambda (state) + (iup:toggle (conc state) + #:fontsize 8 ;; btn-fontsz + ;; #:expand "HORIZONTAL" + #:action (lambda (obj val) + (mark-for-update tabdat) + (if (eq? val 1) + (hash-table-set! (dboard:tabdat-state-ignore-hash tabdat) state #t) + (hash-table-delete! (dboard:tabdat-state-ignore-hash tabdat) state)) + (set-bg-on-filter commondat tabdat)))) + (map cadr *common:std-states*))) ;; '("RUNNING" "COMPLETED" "INCOMPLETE" "LAUNCHED" "NOT_STARTED" "KILLED" "DELETED"))) + (num-toggle-cols (inexact->exact (round (/ (max (length status-toggles)(length state-toggles)) 3))))) + (iup:vbox + (iup:hbox + (iup:frame + #:title "states" + (apply + iup:hbox + (map (lambda (colgrp) + (apply iup:vbox colgrp)) + (dboard:squarify state-toggles 3)))) + (iup:frame + #:title "statuses" + (apply + iup:hbox + (map (lambda (colgrp) + (apply iup:vbox colgrp)) + (dboard:squarify status-toggles 3))))) + ;; + ;; (iup:frame + ;; #:title "state/status filter" + ;; (iup:vbox + ;; (apply + ;; iup:hbox + ;; (map + ;; (lambda (status-toggle state-toggle) + ;; (iup:vbox + ;; status-toggle + ;; state-toggle)) + ;; status-toggles state-toggles)) + + ;; horizontal slider was here + + ))))) + +(define (dashboard:runs-horizontal-slider tabdat ) + (iup:valuator #:valuechanged_cb (lambda (obj) + (let ((val (inexact->exact (round (/ (string->number (iup:attribute obj "VALUE")) 10)))) + (oldmax (string->number (iup:attribute obj "MAX"))) + (maxruns (dboard:tabdat-tot-runs tabdat))) + (dboard:tabdat-start-run-offset-set! tabdat val) + (mark-for-update tabdat) + (debug:print 6 *default-log-port* "(dboard:tabdat-start-run-offset tabdat) " (dboard:tabdat-start-run-offset tabdat) " maxruns: " maxruns ", val: " val " oldmax: " oldmax) + (iup:attribute-set! obj "MAX" (* maxruns 10)))) + #:expand "HORIZONTAL" + #:max (* 10 (max (hash-table-size (dboard:tabdat-allruns-by-id tabdat)) 10)) + #:min 0 + #:step 0.01)) + (define (dashboard:popup-menu run-id test-id target runname test-name testpatt item-test-path test-info) (iup:menu (iup:menu-item "Test Control Panel" @@ -2506,10 +2570,13 @@ (map (lambda (x) (let ((res (iup:hbox #:expand "HORIZONTAL" (iup:label x #:size (conc 40 btn-height) #:fontsize btn-fontsz #:expand "NO") ;; "HORIZONTAL") (iup:textbox #:size (conc 35 btn-height) #:fontsize btn-fontsz #:value "%" #:expand "NO" ;; "HORIZONTAL" #:action (lambda (obj unk val) + ;; each field (field name is "x" var) live updates + ;; the search filter as it is typed + (dboard:tabdat-target-set! runs-dat #f) ;; ensure the fields text boxes are used and not the info from the tree (mark-for-update runs-dat) (update-search commondat runs-dat x val)))))) (set! i (+ i 1)) res)) keynames))))) @@ -2630,20 +2697,23 @@ #:title (conc "Megatest dashboard " (current-user-name) ":" *toppath*) #:menu (dcommon:main-menu) (let* ((runs-view (iup:vbox (iup:split #:orientation "VERTICAL" ;; "HORIZONTAL" - #:value 150 + #:value 100 (dboard:runs-tree-browser commondat runs-dat) (iup:split + #:value 100 ;; left most block, including row names (apply iup:vbox lftlst) ;; right hand block, including cells (iup:vbox + #:expand "YES" ;; the header (apply iup:hbox (reverse hdrlst)) - (apply iup:hbox (reverse bdylst))))) + (apply iup:hbox (reverse bdylst)) + (dashboard:runs-horizontal-slider runs-dat)))) controls )) (views-cfgdat (common:load-views-config)) (additional-tabnames '()) (tab-start-num 5) ;; DON'T FORGET TO UPDATE THIS WHEN CHANGING THE STANDARD TABS BELOW @@ -2688,10 +2758,11 @@ runs-view (dashboard:runs-summary commondat onerun-dat tab-num: 2) ;; (dashboard:new-view db data new-view-dat tab-num: 3) (dashboard:run-controls commondat runcontrols-dat tab-num: 3) (dashboard:run-times commondat runtimes-dat tab-num: 4) + ;; (dashboard:runs-summary commondat onerun-dat tab-num: 4) additional-views))) ;; (set! (iup:callback tabs tabchange-cb:) (lambda (a b c)(print "SWITCHED TO TAB: " a " " b " " c))) (iup:attribute-set! tabs "TABTITLE0" "Summary") (iup:attribute-set! tabs "TABTITLE1" "Runs") (iup:attribute-set! tabs "TABTITLE2" "Run Summary") @@ -2758,11 +2829,11 @@ (glob (conc dbdir "/*.db*")))))) (define (dashboard:monitor-changed? commondat tabdat) (let* ((run-update-time (current-seconds)) (monitor-db-path (dboard:tabdat-monitor-db-path tabdat)) - (monitor-modtime (if (and monitor-db-path (file-exists? monitor-db-path)) + (monitor-modtime (if (and monitor-db-path (common:file-exists? monitor-db-path)) (file-modification-time monitor-db-path) -1))) (if (and (eq? (dboard:commondat-curr-tab-num commondat) 0) (or (> monitor-modtime *last-monitor-update-time*) (> (- run-update-time *last-monitor-update-time*) 5))) ;; update every 1/2 minute just in case @@ -3475,11 +3546,11 @@ ;; The heavy lifting starts here ;;====================================================================== (define (main) (let ((mtdb-path (conc *toppath* "/megatest.db"))) ;; - (if (and (file-exists? mtdb-path) + (if (and (common:file-exists? mtdb-path) (file-write-access? mtdb-path)) (if (not (args:get-arg "-skip-version-check")) (common:exit-on-version-changed))) (let* ((commondat (dboard:commondat-make))) ;; Move this stuff to db.scm? I'm not sure that is the right thing to do... @@ -3534,12 +3605,12 @@ (thread-start! th2) (thread-join! th2))))) ;; ease debugging by loading ~/.dashboardrc (let ((debugcontrolf (conc (get-environment-variable "HOME") "/.dashboardrc"))) - (if (file-exists? debugcontrolf) + (if (common:file-exists? debugcontrolf) (load debugcontrolf))) (if (args:get-arg "-repl") (repl) (main)) Index: datashare.scm ================================================================== --- datashare.scm +++ datashare.scm @@ -226,11 +226,11 @@ (if (and path (directory? path) (file-read-access? path)) (let* ((dbpath (conc path "/datashare.db")) (writeable (file-write-access? dbpath)) - (dbexists (file-exists? dbpath)) + (dbexists (common:file-exists? dbpath)) (handler (make-busy-timeout 136000))) (handle-exceptions exn (begin (debug:print 2 *default-log-port* "ERROR: problem accessing db " dbpath @@ -413,11 +413,11 @@ paths)) ;; remove existing link and if possible ... ;; create path to next of tip of target, create link back to source (define (datashare:build-dir-make-link source target) - (if (file-exists? target)(datashare:backup-move target)) + (if (common:file-exists? target)(datashare:backup-move target)) (create-directory (pathname-directory target) #t) (create-symbolic-link source target)) (define (datashare:backup-move path) (let* ((trashdir (conc (pathname-directory path) "/.trash")) @@ -518,11 +518,11 @@ (define (datashare:path->lst path) (string-split path "/")) (define (datashare:pathdat-apply-heuristics configdat path) (cond - ((file-exists? path) "found") + ((common:file-exists? path) "found") (else (conc path " not installed")))) (define (datashare:get-view configdat) (iup:vbox (iup:hbox @@ -692,11 +692,11 @@ (define (datashare:find name paths) (if (null? paths) #f (let loop ((hed (car paths)) (tal (cdr paths))) - (if (file-exists? (conc hed "/" name)) + (if (common:file-exists? (conc hed "/" name)) hed (if (null? tal) #f (loop (car tal)(cdr tal))))))) @@ -706,11 +706,11 @@ (define (datashare:load-config exe-dir exe-name) (let* ((fname (conc exe-dir "/." exe-name ".config"))) (ini:property-separator-patt " * *") (ini:property-separator #\space) - (if (file-exists? fname) + (if (common:file-exists? fname) ;; (ini:read-ini fname) (read-config fname #f #t) (make-hash-table)))) (define (datashare:process-action configdat action . args) @@ -785,11 +785,11 @@ versions) (sqlite3:finalize! db))))) ;; ease debugging by loading ~/.dashboardrc - REMOVE FROM PRODUCTION! (let ((debugcontrolf (conc (get-environment-variable "HOME") "/.datasharerc"))) - (if (file-exists? debugcontrolf) + (if (common:file-exists? debugcontrolf) (load debugcontrolf))) (define (main) (let* ((args (argv)) (prog (car args)) Index: db.scm ================================================================== --- db.scm +++ db.scm @@ -188,19 +188,11 @@ ;; NB// #f => return dbdir only ;; (was planned to be; zeroth db with name=main.db) ;; ;; If run-id is #f return to create and retrieve the path where the db will live. ;; -(define (db:dbfile-path . junk) ;; run-id) - (let* ((dbdir (common:get-db-tmp-area))) - (handle-exceptions - exn - (begin - (debug:print-error 0 *default-log-port* "Couldn't create path to " dbdir) - (exit 1)) - (if (not (directory? dbdir))(create-directory dbdir #t))) - dbdir)) +(define db:dbfile-path common:get-db-tmp-area) (define (db:set-sync db) (let ((syncprag (configf:lookup *configdat* "setup" "sychronous"))) (sqlite3:execute db (conc "PRAGMA synchronous = " (or syncprag 0) ";")))) @@ -212,20 +204,20 @@ (define (db:lock-create-open fname initproc) (let* ((parent-dir (or (pathname-directory fname)(current-directory))) ;; no parent? go local (raw-fname (pathname-file fname)) (dir-writable (file-write-access? parent-dir)) - (file-exists (file-exists? fname)) + (file-exists (common:file-exists? fname)) (file-write (if file-exists (file-write-access? fname) dir-writable ))) ;;(mutex-lock! *db-open-mutex*) ;; tried this mutex, not clear it helped. (if file-write ;; dir-writable (condition-case (let* ((lockfname (conc fname ".lock")) (readyfname (conc parent-dir "/.ready-" raw-fname)) - (readyexists (file-exists? readyfname))) + (readyexists (common:file-exists? readyfname))) (if (not readyexists) (common:simple-file-lock-and-wait lockfname)) (let ((db (sqlite3:open-database fname))) (sqlite3:set-busy-handler! db (make-busy-timeout 136000)) (sqlite3:execute db "PRAGMA synchronous = 0;") @@ -232,11 +224,11 @@ (if (not file-exists) (begin (if (and (configf:lookup *configdat* "setup" "use-wal") (string-match "^/tmp/.*" fname)) ;; this is a file in /tmp (sqlite3:execute db "PRAGMA journal_mode=WAL;") - (print "Creating " fname " in NON-WAL mode.")) + (debug:print 2 *default-log-port* "Creating " fname " in NON-WAL mode.")) (initproc db))) (if (not readyexists) (begin (common:simple-file-release-lock lockfname) (with-output-to-file @@ -272,11 +264,11 @@ ;; ;; This routine creates the db. It is only called if the db is not already opened ;; ;; ;; (define (db:open-rundb dbstruct run-id #!key (attemptnum 0)(do-not-open #f)) ;; (conc *toppath* "/megatest.db") (car *configinfo*))) ;; (let* ((dbfile (db:dbfile-path run-id)) ;; (conc toppath "/db/" run-id ".db")) -;; (dbexists (file-exists? dbfile)) +;; (dbexists (common:file-exists? dbfile)) ;; (db (db:lock-create-open dbfile (lambda (db) ;; (handle-exceptions ;; exn ;; (begin ;; ;; (release-dot-lock dbpath) @@ -307,19 +299,19 @@ ;; (db:sync-tables db:sync-tests-only *megatest-db* db) ;; db)) ;; This routine creates the db if not already present. It is only called if the db is not already opened ;; -(define (db:open-db dbstruct #!key (areapath #f)) ;; TODO: actually use areapath +(define (db:open-db dbstruct #!key (areapath #f)(do-sync #t)) ;; TODO: actually use areapath (let ((tmpdb-stack (dbr:dbstruct-dbstack dbstruct))) ;; RA => Returns the first reference in dbstruct (if (stack? tmpdb-stack) (db:get-db tmpdb-stack) ;; get previously opened db (will create new db handle if all in the stack are already used (let* ((dbpath (db:dbfile-path )) ;; path to tmp db area - (dbexists (file-exists? dbpath)) + (dbexists (common:file-exists? dbpath)) (tmpdbfname (conc dbpath "/megatest.db")) - (dbfexists (file-exists? tmpdbfname)) ;; (conc dbpath "/megatest.db"))) - (mtdbexists (file-exists? (conc *toppath* "/megatest.db"))) + (dbfexists (common:file-exists? tmpdbfname)) ;; (conc dbpath "/megatest.db"))) + (mtdbexists (common:file-exists? (conc *toppath* "/megatest.db"))) (mtdb (db:open-megatest-db)) (mtdbpath (db:dbdat-get-path mtdb)) (tmpdb (db:open-megatest-db path: dbpath)) ;; lock-create-open dbpath db:initialize-main-db)) (refndb (db:open-megatest-db path: dbpath name: "megatest_ref.db")) @@ -337,13 +329,14 @@ (dbr:dbstruct-tmpdb-set! dbstruct tmpdb) (dbr:dbstruct-dbstack-set! dbstruct (make-stack)) ;; BB: why a stack? Why would the number of db's be indeterminate? Is this a legacy of 1.db 2.db .. ? (stack-push! (dbr:dbstruct-dbstack dbstruct) tmpdb) ;; olddb is already a (cons db path) (dbr:dbstruct-refndb-set! dbstruct refndb) ;; (mutex-unlock! *rundb-mutex*) - (if (or (not dbfexists) - (and modtimedelta - (> modtimedelta 10))) ;; if db in tmp is over ten seconds older than the file in MTRA then do a sync back + (if (and (or (not dbfexists) + (and modtimedelta + (> modtimedelta 10))) ;; if db in tmp is over ten seconds older than the file in MTRA then do a sync back + do-sync) (begin (debug:print 4 *default-log-port* "filling db " (db:dbdat-get-path tmpdb) " with data \n from " (db:dbdat-get-path mtdb) " mod time delta: " modtimedelta) (db:sync-tables (db:sync-all-tables-list dbstruct) #f mtdb refndb tmpdb) (debug:print-info 13 *default-log-port* "db:sync-all-tables-list done.") ) @@ -353,23 +346,22 @@ ;; Make the dbstruct, setup up auxillary db's and call for main db at least once ;; ;; called in http-transport and replicated in rmt.scm for *local* access. ;; -(define (db:setup #!key (areapath #f)) +(define (db:setup do-sync #!key (areapath #f)) ;; - (cond (*dbstruct-db* *dbstruct-db*);; TODO: when multiple areas are supported, this optimization will be a hazard (else ;;(common:on-homehost?) (debug:print-info 13 *default-log-port* "db:setup entered (first time, not cached.)") (let* ((dbstruct (make-dbr:dbstruct))) (when (not *toppath*) (debug:print-info 13 *default-log-port* "in db:setup, *toppath* not set; calling launch:setup") (launch:setup areapath: areapath)) (debug:print-info 13 *default-log-port* "Begin db:open-db") - (db:open-db dbstruct areapath: areapath) + (db:open-db dbstruct areapath: areapath do-sync: do-sync) (debug:print-info 13 *default-log-port* "Done db:open-db") (set! *dbstruct-db* dbstruct) ;;(debug:print-info 13 *default-log-port* "new dbstruct = "(dbr:dbstruct->alist dbstruct)) dbstruct)))) ;; (else @@ -381,11 +373,11 @@ ;; NOTE: returns a dbdat not a dbstruct! ;; (define (db:open-megatest-db #!key (path #f)(name #f)) (let* ((dbdir (or path *toppath*)) (dbpath (conc dbdir "/" (or name "megatest.db"))) - (dbexists (file-exists? dbpath)) + (dbexists (common:file-exists? dbpath)) (db (db:lock-create-open dbpath (lambda (db) (db:initialize-main-db db) ;;(db:initialize-run-id-db db) ))) @@ -410,10 +402,25 @@ (mutex-lock! *db-multi-sync-mutex*) (set! *db-last-sync* start-t) (set! *db-last-access* start-t) (mutex-unlock! *db-multi-sync-mutex*) (stack-push! (dbr:dbstruct-dbstack dbstruct) tmpdb))) + +(define (db:safely-close-sqlite3-db db #!key (try-num 3)) + (if (<= try-num 0) + #f + (handle-exceptions + exn + (begin + (thread-sleep! 3) + (sqlite3:interrupt! db) + (db:safely-close-sqlite3-db db try-num: (- try-num 1))) + (if (sqlite3:database? db) + (begin + (sqlite3:finalize! db) + #t) + #f)))) ;; close all opened run-id dbs (define (db:close-all dbstruct) (if (dbr:dbstruct? dbstruct) (handle-exceptions @@ -425,15 +432,16 @@ (let ((tdbs (map db:dbdat-get-db (stack->list (dbr:dbstruct-dbstack dbstruct)))) (mdb (db:dbdat-get-db (dbr:dbstruct-mtdb dbstruct))) (rdb (db:dbdat-get-db (dbr:dbstruct-refndb dbstruct)))) (map (lambda (db) - (if (sqlite3:database? db) - (sqlite3:finalize! db))) + (db:safely-close-sqlite3-db db)) +;; (if (sqlite3:database? db) +;; (sqlite3:finalize! db))) tdbs) - (if (sqlite3:database? mdb) (sqlite3:finalize! mdb)) - (if (sqlite3:database? rdb) (sqlite3:finalize! rdb)))))) + (db:safely-close-sqlite3-db mdb) ;; (if (sqlite3:database? mdb) (sqlite3:finalize! mdb)) + (db:safely-close-sqlite3-db rdb))))) ;; (if (sqlite3:database? rdb) (sqlite3:finalize! rdb)))))) ;; (let ((locdbs (dbr:dbstruct-locdbs dbstruct))) ;; (if (hash-table? locdbs) ;; (for-each (lambda (run-id) ;; (db:close-run-db dbstruct run-id)) @@ -537,11 +545,11 @@ (tmpname (conc fname "." (current-process-id))) (tmpjnl (conc fnamejnl "." (current-process-id)))) (debug:print-error 0 *default-log-port* "" fname " appears corrupted. Making backup \"old/" fname "\"") (system (conc "cd " dbdir ";mkdir -p old;cat " fname " > old/" tmpname)) (system (conc "rm -f " dbpath)) - (if (file-exists? fnamejnl) + (if (common:file-exists? fnamejnl) (begin (debug:print-error 0 *default-log-port* "" fnamejnl " found, moving it to old dir as " tmpjnl) (system (conc "cd " dbdir ";mkdir -p old;cat " fnamejnl " > old/" tmpjnl)) (system (conc "rm -f " dbdir "/" fnamejnl)))) ;; attempt to recreate database @@ -613,11 +621,11 @@ exn (begin (debug:print 0 *default-log-port* "EXCEPTION: database probably overloaded or unreadable in db:sync-tables.") (print-call-chain (current-error-port)) (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) - (print "exn=" (condition->list exn)) + (debug:print 5 *default-log-port* "exn=" (condition->list exn)) (debug:print 0 *default-log-port* " status: " ((condition-property-accessor 'sqlite3 'status) exn)) (debug:print 0 *default-log-port* " src db: " (db:dbdat-get-path fromdb)) (for-each (lambda (dbdat) (let ((dbpath (db:dbdat-get-path dbdat))) (debug:print 0 *default-log-port* " dbpath: " dbpath) @@ -661,26 +669,42 @@ (numrecs (make-hash-table)) (start-time (current-milliseconds)) (tot-count 0)) (for-each ;; table (lambda (tabledat) - (let* ((tablename (car tabledat)) - (fields (cdr tabledat)) - (use-last-update (if last-update - (if (pair? last-update) - (member (car last-update) ;; last-update field name - (map car fields)) - (begin - (debug:print 0 *default-log-port* "ERROR: parameter last-update for db:sync-tables must be a pair, received: " last-update) ;; found in fields - #f)) - #f)) + (let* ((tablename (car tabledat)) + (fields (cdr tabledat)) + (has-last-update (member "last_update" fields)) + (use-last-update (cond + ((and has-last-update + (member "last_update" fields)) + #t) ;; if given a number, just use it for all fields + ((number? last-update) #f) ;; if not matched first entry then ignore last-update for this table + ((and (pair? last-update) + (member (car last-update) ;; last-update field name + (map car fields))) #t) + (last-update + (debug:print 0 *default-log-port* "ERROR: parameter last-update for db:sync-tables must be a pair or a number, received: " last-update) ;; found in fields + #f) + (else + #f))) + (last-update-value (if use-last-update ;; no need to check for has-last-update - it is already accounted for + (if (number? last-update) + last-update + (cdr last-update)) + #f)) + (last-update-field (if use-last-update + (if (number? last-update) + "last_update" + (car last-update)) + #f)) (num-fields (length fields)) (field->num (make-hash-table)) (num->field (apply vector (map car fields))) (full-sel (conc "SELECT " (string-intersperse (map car fields) ",") " FROM " tablename (if use-last-update ;; apply last-update criteria - (conc " " (car last-update) ">=" (cdr last-update)) + (conc " WHERE " last-update-field " >= " last-update-value) "") ";")) (full-ins (conc "INSERT OR REPLACE INTO " tablename " ( " (string-intersperse (map car fields) ",") " ) " " VALUES ( " (string-intersperse (make-list num-fields "?") ",") " );")) (fromdat '()) @@ -834,12 +858,50 @@ (sqlite3:execute maindb "CREATE TRIGGER IF NOT EXISTS update_run_stats_trigger AFTER UPDATE ON run_stats FOR EACH ROW BEGIN UPDATE run_stats SET last_update=(strftime('%s','now')) WHERE id=old.id; - END;")) + END;") + (sqlite3:execute maindb "CREATE TABLE IF NOT EXISTS test_rundat ( + id INTEGER PRIMARY KEY, + test_id INTEGER, + update_time TIMESTAMP, + cpuload INTEGER DEFAULT -1, + diskfree INTEGER DEFAULT -1, + diskusage INTGER DEFAULT -1, + run_duration INTEGER DEFAULT 0);")) +(define (db:adj-target db) + (let ((fields (configf:get-section *configdat* "fields")) + (field-num 0)) + ;; because we will be refreshing the keys table it is best to clear it here + (sqlite3:execute db "DELETE FROM keys;") + (for-each + (lambda (field) + (let ((column (car field)) + (spec (cadr field))) + (handle-exceptions + exn + (if (string-match ".*duplicate.*" ((condition-property-accessor 'exn 'message) exn)) + (debug:print 0 *default-log-port* "Target field " column " already exists in the runs table") + (db:general-sqlite-error-dump exn "alter table runs ..." #f "none")) + ;; Add the column if needed + (sqlite3:execute + db + (conc "ALTER TABLE runs ADD COLUMN " column " " spec))) + ;; correct the entry in the keys column + (sqlite3:execute + db + "INSERT INTO keys (id,fieldname,fieldtype) VALUES (?,?,?);" + field-num column spec) + ;; fill in blanks (not allowed as it would be part of the path + (sqlite3:execute + db + (conc "UPDATE runs SET " column "='x' WHERE " column "='';")) + (set! field-num (+ field-num 1)))) + fields))) + (define *global-db-store* (make-hash-table)) (define (db:get-access-mode) (if (args:get-arg "-use-db-cache") 'cached 'rmt)) @@ -857,11 +919,11 @@ (define (db:cache-for-read-only source target #!key (use-last-update #f)) (if (and (hash-table-ref/default *global-db-store* target #f) (>= (file-modification-time target)(file-modification-time source))) (hash-table-ref *global-db-store* target) (let* ((toppath (launch:setup)) - (targ-db-last-mod (if (file-exists? target) + (targ-db-last-mod (if (common:file-exists? target) (file-modification-time target) 0)) (cache-db (or (hash-table-ref/default *global-db-store* target #f) (db:open-megatest-db path: target))) (source-db (db:open-megatest-db path: source)) @@ -871,41 +933,41 @@ (db:sync-tables (db:sync-main-list source-db) last-update source-db cache-db) (db:sync-tables db:sync-tests-only last-update source-db cache-db) (hash-table-set! *global-db-store* target cache-db) cache-db))) -;; call a proc with a cached db -;; -(define (db:call-with-cached-db proc . params) - ;; first cache the db in /tmp - (let* ((cname-part (conc "megatest_cache/" (common:get-testsuite-name))) - (fname (conc (common:get-area-path-signature) ".db")) - (cache-dir (common:get-create-writeable-dir - (list (conc "/tmp/" (current-user-name) "/" cname-part) - (conc "/tmp/" (current-user-name) "-" cname-part) - (conc "/tmp/" (current-user-name) "_" cname-part)))) - (megatest-db (conc *toppath* "/megatest.db"))) - ;; (debug:print-info 0 *default-log-port* "Using cache dir " cache-dir) - (if (not cache-dir) - (begin - (debug:print 0 *default-log-port* "ERROR: Failed to find an area to write the cache db") - (exit 1)) - (let* ((th1 (make-thread - (lambda () - (if (and (file-exists? megatest-db) - (file-write-access? megatest-db)) - (begin - (common:sync-to-megatest.db 'timestamps) ;; internally mutexes on *db-local-sync* - (debug:print-info 2 *default-log-port* "Done syncing to megatest.db")))) - "call-with-cached-db sync-to-megatest.db")) - (cache-db (db:cache-for-read-only - megatest-db - (conc cache-dir "/" fname) - use-last-update: #t))) - (thread-start! th1) - (apply proc cache-db params) - )))) +;; ;; call a proc with a cached db +;; ;; +;; (define (db:call-with-cached-db proc . params) +;; ;; first cache the db in /tmp +;; (let* ((cname-part (conc "megatest_cache/" (common:get-testsuite-name))) +;; (fname (conc (common:get-area-path-signature) ".db")) +;; (cache-dir (common:get-create-writeable-dir +;; (list (conc "/tmp/" (current-user-name) "/" cname-part) +;; (conc "/tmp/" (current-user-name) "-" cname-part) +;; (conc "/tmp/" (current-user-name) "_" cname-part)))) +;; (megatest-db (conc *toppath* "/megatest.db"))) +;; ;; (debug:print-info 0 *default-log-port* "Using cache dir " cache-dir) +;; (if (not cache-dir) +;; (begin +;; (debug:print 0 *default-log-port* "ERROR: Failed to find an area to write the cache db") +;; (exit 1)) +;; (let* ((th1 (make-thread +;; (lambda () +;; (if (and (common:file-exists? megatest-db) +;; (file-write-access? megatest-db)) +;; (begin +;; (db:sync-to-megatest.db dbstruct 'timestamps) ;; internally mutexes on *db-local-sync* +;; (debug:print-info 2 *default-log-port* "Done syncing to megatest.db")))) +;; "call-with-cached-db sync-to-megatest.db")) +;; (cache-db (db:cache-for-read-only +;; megatest-db +;; (conc cache-dir "/" fname) +;; use-last-update: #t))) +;; (thread-start! th1) +;; (apply proc cache-db params) +;; )))) ;; options: ;; ;; 'killservers - kills all servers ;; 'dejunk - removes junk records @@ -915,147 +977,100 @@ ;; 'closeall - close all opened dbs ;; 'schema - attempt to apply schema changes ;; run-ids: '(1 2 3 ...) or #f (for all) ;; (define (db:multi-db-sync dbstruct . options) - (if (not (launch:setup)) - (debug:print 0 *default-log-port* "ERROR: not able to setup up for megatest.") - (let* ((mtdb (dbr:dbstruct-mtdb dbstruct)) - (tmpdb (db:get-db dbstruct)) - (refndb (dbr:dbstruct-refndb dbstruct)) - (allow-cleanup #t) ;; (if run-ids #f #t)) - (servers (server:get-list *toppath*)) ;; (tasks:get-all-servers (db:delay-if-busy tdbdat))) - (data-synced 0)) ;; count of changed records (I hope) - - ;; kill servers - (if (member 'killservers options) - (for-each - (lambda (server) - (match-let (((mod-time host port start-time pid) server)) - (if (and host pid) - (tasks:kill-server host pid)))) - servers)) - - ;; clear out junk records - ;; - (if (member 'dejunk options) - (begin - (db:delay-if-busy mtdb) ;; ok to delay on mtdb - (db:clean-up mtdb) - (db:clean-up tmpdb) - (db:clean-up refndb))) - - ;; adjust test-ids to fit into proper range - ;; - ;; (if (member 'adj-testids options) - ;; (begin - ;; (db:delay-if-busy mtdb) - ;; (db:prep-megatest.db-for-migration mtdb))) - - ;; sync runs, test_meta etc. - ;; - (if (member 'old2new options) - ;; (begin - (set! data-synced - (+ (db:sync-tables (db:sync-all-tables-list dbstruct) #f mtdb tmpdb refndb) - data-synced))) - ;; (db:sync-main-list mtdb) mtdb (db:get-db dbstruct #f)) -;; (for-each -;; (lambda (run-id) -;; (db:delay-if-busy mtdb) -;; (let ((testrecs (db:get-all-tests-info-by-run-id mtdb run-id))) -;; ;; (dbstruct (if toppath (make-dbr:dbstruct path: toppath local: #t) #f))) -;; (debug:print 0 *default-log-port* "INFO: Propagating " (length testrecs) " records for run-id=" run-id " to run specific db") -;; (db:replace-test-records dbstruct run-id testrecs) -;; (sqlite3:finalize! (db:dbdat-get-db (dbr:dbstruct-rundb dbstruct))))) -;; run-ids))) - - ;; now ensure all newdb data are synced to megatest.db - ;; do not use the run-ids list passed in to the function - ;; - (if (member 'new2old options) - (set! data-synced - (+ (db:sync-tables (db:sync-all-tables-list dbstruct) #f tmpdb refndb mtdb) - data-synced))) - - - - (if (member 'schema options) - (begin - (db:patch-schema-maindb (db:dbdat-get-db mtdb)) - (db:patch-schema-maindb (db:dbdat-get-db tmpdb)) - (db:patch-schema-maindb (db:dbdat-get-db refndb)) - (db:patch-schema-rundb (db:dbdat-get-db mtdb)) - (db:patch-schema-rundb (db:dbdat-get-db tmpdb)) - (db:patch-schema-rundb (db:dbdat-get-db refndb)))) - - ;; (let* ((maindb (make-dbr:dbstruct path: toppath local: #t)) - ;; (src-run-ids (if run-ids run-ids (db:get-all-run-ids (db:dbdat-get-db (db:get-db maindb 0))))) - ;; (all-run-ids (sort (delete-duplicates (cons 0 src-run-ids)) <)) - ;; (count 1) - ;; (total (length all-run-ids)) - ;; (dead-runs '())) - ;; ;; first fix schema if needed - ;; (map - ;; (lambda (th) - ;; (thread-join! th)) - ;; (map - ;; (lambda (run-id) - ;; (thread-start! - ;; (make-thread - ;; (lambda () - ;; (let* ((fromdb (if toppath (make-dbr:dbstruct path: toppath local: #t) #f)) -;; (if (member 'schema options) - ;; (if (eq? run-id 0) - ;; (let ((maindb (db:dbdat-get-db (db:get-db fromdb #f)))) - ;; (db:patch-schema-maindb run-id maindb)) - ;; (db:patch-schema-rundb run-id frundb))) - ;; (set! count (+ count 1)) - ;; (debug:print 0 *default-log-port* "Finished patching schema for " (if (eq? run-id 0) " main.db " (conc run-id ".db")) ", " count " of " total))))) - ;; all-run-ids)) - ;; ;; Then sync and fix db's - ;; (set! count 0) - ;; (process-fork - ;; (lambda () - ;; (map - ;; (lambda (th) - ;; (thread-join! th)) - ;; (map - ;; (lambda (run-id) - ;; (thread-start! - ;; (make-thread - ;; (lambda () - ;; (let* ((fromdb (if toppath (make-dbr:dbstruct path: toppath local: #t) #f)) - ;; (frundb (db:dbdat-get-db (db:get-db fromdb run-id)))) - ;; (if (eq? run-id 0) - ;; (let ((maindb (db:dbdat-get-db (db:get-db fromdb #f)))) -;; (db:sync-tables (db:sync-main-list dbstruct) #f (db:get-db fromdb #f) mtdb) - ;; (set! dead-runs (db:clean-up-maindb (db:get-db fromdb #f)))) - ;; (begin - ;; ;; NB// must sync first to ensure deleted tests get marked as such in megatest.db -;; (db:sync-tables db:sync-tests-only #f (db:get-db fromdb run-id) mtdb) - ;; (db:clean-up-rundb (db:get-db fromdb run-id))))) - ;; (set! count (+ count 1)) - ;; (debug:print 0 *default-log-port* "Finished clean up of " - ;; (if (eq? run-id 0) - ;; " main.db " (conc run-id ".db")) ", " count " of " total))))) - ;; all-run-ids)))) - - ;; removed deleted runs -;; (let ((dbdir (tasks:get-task-db-path))) -;; (for-each (lambda (run-id) -;; (let ((fullname (conc dbdir "/" run-id ".db"))) -;; (if (file-exists? fullname) -;; (begin -;; (debug:print 0 *default-log-port* "Removing database file for deleted run " fullname) -;; (delete-file fullname))))) -;; dead-runs)))) -;; - ;; (db:close-all dbstruct) - ;; (sqlite3:finalize! mdb) - (stack-push! (dbr:dbstruct-dbstack dbstruct) tmpdb) - data-synced))) + ;; (if (not (launch:setup)) + ;; (debug:print 0 *default-log-port* "ERROR: not able to setup up for megatest.") + (let* ((mtdb (dbr:dbstruct-mtdb dbstruct)) + (tmpdb (db:get-db dbstruct)) + (refndb (dbr:dbstruct-refndb dbstruct)) + (allow-cleanup #t) ;; (if run-ids #f #t)) + (servers (server:get-list *toppath*)) ;; (tasks:get-all-servers (db:delay-if-busy tdbdat))) + (data-synced 0)) ;; count of changed records (I hope) + + (for-each + (lambda (option) + + (case option + ;; kill servers + ((killservers) + (for-each + (lambda (server) + (match-let (((mod-time host port start-time pid) server)) + (if (and host pid) + (tasks:kill-server host pid)))) + servers)) + + ;; clear out junk records + ;; + ((dejunk) + (db:delay-if-busy mtdb) ;; ok to delay on mtdb + (db:clean-up mtdb) + (db:clean-up tmpdb) + (db:clean-up refndb)) + + ;; sync runs, test_meta etc. + ;; + ((old2new) + (set! data-synced + (+ (db:sync-tables (db:sync-all-tables-list dbstruct) #f mtdb tmpdb refndb) + data-synced))) + + ;; now ensure all newdb data are synced to megatest.db + ;; do not use the run-ids list passed in to the function + ;; + ((new2old) + (set! data-synced + (+ (db:sync-tables (db:sync-all-tables-list dbstruct) #f tmpdb refndb mtdb) + data-synced))) + + ((adj-target) + (db:adj-target (db:dbdat-get-db mtdb)) + (db:adj-target (db:dbdat-get-db tmpdb)) + (db:adj-target (db:dbdat-get-db refndb))) + + ((schema) + (db:patch-schema-maindb (db:dbdat-get-db mtdb)) + (db:patch-schema-maindb (db:dbdat-get-db tmpdb)) + (db:patch-schema-maindb (db:dbdat-get-db refndb)) + (db:patch-schema-rundb (db:dbdat-get-db mtdb)) + (db:patch-schema-rundb (db:dbdat-get-db tmpdb)) + (db:patch-schema-rundb (db:dbdat-get-db refndb)))) + + (stack-push! (dbr:dbstruct-dbstack dbstruct) tmpdb)) + options) + data-synced)) + +(define (db:tmp->megatest.db-sync dbstruct last-update) + (let* ((mtdb (dbr:dbstruct-mtdb dbstruct)) + (tmpdb (db:get-db dbstruct)) + (refndb (dbr:dbstruct-refndb dbstruct))) + (db:sync-tables (db:sync-all-tables-list dbstruct) last-update tmpdb refndb mtdb))) + +;;;; run-ids +;; if #f use *db-local-sync* : or 'local-sync-flags +;; if #t use timestamps : or 'timestamps +(define (db:sync-to-megatest.db dbstruct #!key (no-sync-db #f)) + (let* ((start-time (current-seconds)) + (last-update (if no-sync-db + (db:no-sync-get/default no-sync-db "LAST_UPDATE" 0) + 0)) ;; (or (db:get-var dbstruct "LAST_UPDATE") 0)) + (sync-needed (> (- start-time last-update) 6)) + (res (if sync-needed ;; don't sync if a sync already occurred in the past 6 seconds + (begin + (if no-sync-db + (db:no-sync-set no-sync-db "LAST_UPDATE" start-time)) + (db:tmp->megatest.db-sync dbstruct last-update)) + 0)) + (sync-time (- (current-seconds) start-time))) + (debug:print-info 3 *default-log-port* "Sync of newdb to olddb completed in " sync-time " seconds pid="(current-process-id)) + (if (common:low-noise-print 30 "sync new to old") + (if sync-needed + (debug:print-info 0 *default-log-port* "Sync of " res " records from newdb to olddb completed in " sync-time " seconds pid="(current-process-id)) + (debug:print-info 0 *default-log-port* "No sync needed, last updated " (- start-time last-update) " seconds ago"))) + res)) ;; keeping it around for debugging purposes only (define (open-run-close-no-exception-handling proc idb . params) (debug:print-info 11 *default-log-port* "open-run-close-no-exception-handling START given a db=" (if idb "yes " "no ") ", params=" params) (print "I don't work anymore. open-run-close-no-exception-handling needs fixing or removing...") @@ -1084,11 +1099,11 @@ ((busy) (thread-sleep! sleep-time)) (else (debug:print 0 *default-log-port* "EXCEPTION: database probably overloaded or unreadable.") (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) - (print "exn=" (condition->list exn)) + (debug:print 5 *default-log-port* "exn=" (condition->list exn)) (debug:print 0 *default-log-port* " status: " ((condition-property-accessor 'sqlite3 'status) exn)) (print-call-chain (current-error-port)) (thread-sleep! sleep-time) (debug:print-info 0 *default-log-port* "trying db call one more time....this may never recover, if necessary kill process " (current-process-id) " on host " (get-host-name) " to clean up"))) (apply open-run-close-exception-handling proc idb params)) @@ -1105,17 +1120,17 @@ (launch:setup)) ;; added because Elena was getting stack dump because *configinfo* below was #f. (let* ((configdat (car *configinfo*)) ;; tut tut, global warning... (keys (keys:config-get-fields configdat)) (havekeys (> (length keys) 0)) (keystr (keys->keystr keys)) - (fieldstr (keys->key/field keys)) + (fieldstr (keys:make-key/field-string configdat)) (db (db:dbdat-get-db dbdat))) (for-each (lambda (key) (let ((keyn key)) (if (member (string-downcase keyn) (list "runname" "state" "status" "owner" "event_time" "comment" "fail_count" - "pass_count")) + "pass_count" "contour")) (begin (print "ERROR: your key cannot be named " keyn " as this conflicts with the same named field in the runs table, you must remove your megatest.db and /.db before trying again.") (exit 1))))) keys) (sqlite3:with-transaction @@ -1448,11 +1463,11 @@ ;; L O G G I N G D B ;;====================================================================== (define (open-logging-db) (let* ((dbpath (conc (if *toppath* (conc *toppath* "/") "") "logging.db")) ;; fname) - (dbexists (file-exists? dbpath)) + (dbexists (common:file-exists? dbpath)) (db (sqlite3:open-database dbpath)) (handler (make-busy-timeout (if (args:get-arg "-override-timeout") (string->number (args:get-arg "-override-timeout")) 136000)))) ;; 136000))) (sqlite3:set-busy-handler! db handler) @@ -1604,11 +1619,11 @@ ;; ;; (db:delay-if-busy dbdat) (let* (;; (min-incompleted (filter (lambda (x) ;; (let* ((testpath (cadr x)) ;; (tdatpath (conc testpath "/testdat.db")) - ;; (dbexists (file-exists? tdatpath))) + ;; (dbexists (common:file-exists? tdatpath))) ;; (or (not dbexists) ;; if no file then something wrong - mark as incomplete ;; (> (- (current-seconds)(file-modification-time tdatpath)) 600)))) ;; no change in 10 minutes to testdat.db - she's dead Jim ;; incompleted)) (min-incompleted-ids (map car incompleted)) ;; do 'em all (all-ids (append min-incompleted-ids (map car oldlaunched)))) @@ -1671,10 +1686,14 @@ "DELETE FROM tests WHERE run_id NOT IN (SELECT DISTINCT id FROM runs);" ;; delete all runs that are state='deleted' "DELETE FROM runs WHERE state='deleted';" ;; delete empty runs "DELETE FROM runs WHERE id NOT IN (SELECT DISTINCT r.id FROM runs AS r INNER JOIN tests AS t ON t.run_id=r.id);" + ;; remove orphaned test_rundat entries + "DELETE FROM test_rundat where test_id NOT IN (SELECT id FROM tests);" + ;; + "DELETE FROM test_steps WHERE test_id NOT IN (SELECT id FROM tests);" )))) ;; (db:delay-if-busy dbdat) (sqlite3:with-transaction db (lambda () @@ -1822,10 +1841,60 @@ (define (db:del-var dbstruct var) (db:with-db dbstruct #f #t (lambda (db) (sqlite3:execute db "DELETE FROM metadat WHERE var=?;" var)))) + +;;====================================================================== +;; no-sync.db - small bits of data to be shared between servers +;;====================================================================== + +(define (db:open-no-sync-db) + (let* ((dbpath (db:dbfile-path)) + (dbname (conc dbpath "/no-sync.db")) + (db (sqlite3:open-database dbname))) + (sqlite3:set-busy-handler! db (make-busy-timeout 136000)) + (sqlite3:execute db "PRAGMA synchronous = 0;") + (sqlite3:execute db "CREATE TABLE IF NOT EXISTS no_sync_metadat (var TEXT,val TEXT, CONSTRAINT no_sync_metadat_constraint UNIQUE (var));") + db)) + +;; if we are not a server create a db handle. this is not finalized +;; so watch for problems. I'm still not clear if it is needed to manually +;; finalize sqlite3 dbs with the sqlite3 egg. +;; +(define (db:no-sync-db db-in) + (if db-in + db-in + (let ((db (db:open-no-sync-db))) + (set! *no-sync-db* db) + db))) + +(define (db:no-sync-set db var val) + (sqlite3:execute (db:no-sync-db db) "INSERT OR REPLACE INTO no_sync_metadat (var,val) VALUES (?,?);" var val)) + +(define (db:no-sync-del! db var) + (sqlite3:execute (db:no-sync-db db) "DELETE FROM no_sync_metadat WHERE var=?;" var)) + +(define (db:no-sync-get/default db var default) + (let ((res default)) + (sqlite3:for-each-row + (lambda (val) + (set! res val)) + (db:no-sync-db db) + "SELECT val FROM no_sync_metadat WHERE var=?;" + var) + (if res + (let ((newres (if (string? res) + (string->number res) + #f))) + (if newres + newres + res)) + res))) + +(define (db:no-sync-close-db db) + (db:safely-close-sqlite3-db db)) ;; use a global for some primitive caching, it is just silly to ;; re-read the db over and over again for the keys since they never ;; change @@ -1848,14 +1917,19 @@ ;; look up values in a header/data structure (define (db:get-value-by-header row header field) (if (or (null? header) (not row)) #f (let loop ((hed (car header)) - (tal (cdr header)) - (n 0)) - (if (equal? hed field) - (vector-ref row n) + (tal (cdr header)) + (n 0)) + (if (equal? hed field) + (handle-exceptions + exn + (begin + (debug:print 0 *default-log-port* "WARNING: attempt to read non-existant field, row=" row " header=" header " field=" field) + #f) + (vector-ref row n)) (if (null? tal) #f (loop (car tal)(cdr tal)(+ n 1))))))) ;; Accessors for the header/data structure ;; get rows and header from (define (db:get-header vec)(vector-ref vec 0)) @@ -3077,12 +3151,12 @@ (configf:lookup dat entry-name "message") ;; 4 ;; Comment (configf:lookup dat entry-name "exit-status") ;; 5 ;; Status "logpro" ;; 6 ;; Type )))) (let* ((value (or (configf:lookup dat entry-name "measured") "n/a")) - (expected (or (configf:lookup dat entry-name "expected") "n/a")) - (tolerance (or (configf:lookup dat entry-name "tolerance") "n/a")) + (expected (or (configf:lookup dat entry-name "expected") 0.0)) + (tolerance (or (configf:lookup dat entry-name "tolerance") 0.0)) (comment (or (configf:lookup dat entry-name "comment") (configf:lookup dat entry-name "desc") "n/a")) (status (or (configf:lookup dat entry-name "status") "n/a")) (type (or (configf:lookup dat entry-name "expected") "n/a"))) (set! res (append @@ -3183,10 +3257,25 @@ (lambda (id test_id category variable value expected tol units comment status type) (set! res (cons (vector id test_id category variable value expected tol units comment status type) res))) db "SELECT id,test_id,category,variable,value,expected,tol,units,comment,status,type FROM test_data WHERE test_id=? AND category LIKE ? ORDER BY category,variable;" test-id categorypatt) (reverse res))))) + +;; This routine moved from tdb.scm, tdb:read-test-data +;; +(define (db:read-test-data* dbstruct run-id test-id categorypatt varpatt) + (let* ((res '())) + (db:with-db + dbstruct #f #f + (lambda (db) + (sqlite3:for-each-row + (lambda (id test_id category variable value expected tol units comment status type) + (set! res (cons (vector id test_id category variable value expected tol units comment status type) res))) + db + "SELECT id,test_id,category,variable,value,expected,tol,units,comment,status,type FROM test_data WHERE test_id=? AND category LIKE ? AND variable LIKE ? ORDER BY category,variable;" test-id categorypatt varpatt) + (reverse res))))) + ;;====================================================================== ;; Misc. test related queries ;;====================================================================== @@ -3302,20 +3391,24 @@ ;; ;; if test-name is an integer work off that instead of test-name test-path ;; (define (db:set-state-status-and-roll-up-items dbstruct run-id test-name item-path state status comment) ;; establish info on incoming test followed by info on top level test + ;; BBnote - for mode itemwait, linkage between upstream test & matching item status is propagated to run queue in db:prereqs-not-met + (let* ((testdat (if (number? test-name) (db:get-test-info-by-id dbstruct run-id test-name) ;; test-name is actually a test-id (db:get-test-info dbstruct run-id test-name item-path))) (test-id (db:test-get-id testdat)) (test-name (if (number? test-name) (db:test-get-testname testdat) test-name)) (item-path (db:test-get-item-path testdat)) (tl-testdat (db:get-test-info dbstruct run-id test-name "")) - (tl-test-id (db:test-get-id tl-testdat))) + (tl-test-id (if tl-testdat + (db:test-get-id tl-testdat) + #f))) (if (member state '("LAUNCHED" "REMOTEHOSTSTART")) (db:general-call dbstruct 'set-test-start-time (list test-id))) (mutex-lock! *db-transaction-mutex*) (db:with-db dbstruct #f #f @@ -3339,41 +3432,55 @@ ;; (non-completes (filter (lambda (x) ;; (not (equal? (dbr:counts-state x) "COMPLETED"))) ;; state-status-counts)) (all-curr-states (common:special-sort ;; worst -> best (sort of) (delete-duplicates - (cons state (map dbr:counts-state state-status-counts))) + (if (not (equal? state "DELETED")) + (cons state (map dbr:counts-state state-status-counts)) + (map dbr:counts-state state-status-counts))) *common:std-states* >)) (all-curr-statuses (common:special-sort ;; worst -> best (delete-duplicates - (cons status (map dbr:counts-status state-status-counts))) + (if (not (equal? state "DELETED")) + (cons status (map dbr:counts-status state-status-counts)) + (map dbr:counts-status state-status-counts))) *common:std-statuses* >)) (non-completes (filter (lambda (x) (not (equal? x "COMPLETED"))) all-curr-states)) + (num-non-completes (length non-completes)) + (newstate (cond - ((> (length non-completes) 0) ;; + ((> running 0) + "RUNNING") ;; anything running, call the situation running + ((> bad-not-started 0) ;; we have an ugly situation, it is completed in the sense we cannot do more. + "COMPLETED") + ((> num-non-completes 0) ;; (car non-completes)) ;; (remove (lambda (x)(equal? "COMPLETED" x)) all-curr-states))) + ;; only rollup DELETED if all DELETED (else (car all-curr-states)))) ;; (if (> running 0) ;; "RUNNING" ;; (if (> bad-not-started 0) ;; "COMPLETED" ;; (car all-curr-states)))) - (newstatus (if (> bad-not-started 0) - "CHECK" + (newstatus (if (or (> bad-not-started 0) + (and (equal? newstate "NOT_STARTED") + (> num-non-completes 0))) + "STARTED" (car all-curr-statuses)))) ;; (print "bad-not-supported: " bad-not-support " all-curr-states: " all-curr-states " all-curr-statuses: " all-curr-states) ;; " newstate: " newstate " newstatus: " newstatus) ;; NB// Pass the db so it is part of the transaction - (db:test-set-state-status db run-id tl-test-id newstate newstatus #f))))))) + (if tl-test-id + (db:test-set-state-status db run-id tl-test-id newstate newstatus #f)))))))) (mutex-unlock! *db-transaction-mutex*) (if (and test-id state status (equal? status "AUTO")) (db:test-data-rollup dbstruct run-id test-id status)) tr-res))))) - +;; BBnote: db:get-all-state-status-counts-for-test returns dbr:counts object aggregating state and status of items of a given test, *not including rollup state/status* (define (db:get-all-state-status-counts-for-test dbstruct run-id test-name item-path) (db:with-db dbstruct #f #f (lambda (db) (sqlite3:map-row @@ -3719,11 +3826,11 @@ exn (begin (debug:print-info 0 *default-log-port* "WARNING: failed to test for existance of " dbfj) (thread-sleep! 1) (db:delay-if-busy count (- count 1))) - (file-exists? dbfj)) + (common:file-exists? dbfj)) (case count ((6) (thread-sleep! 0.2) (db:delay-if-busy count: 5)) ((5) @@ -3894,10 +4001,11 @@ ;; mode 'itemmatch or 'itemwait means that tests items must be COMPLETED and (PASS|WARN|WAIVED|CHECK) [[ NB// NOT IMPLEMENTED YET ]] ;; mode 'exclusive means this test/item cannot run if the same test/item is LAUNCHED,REMOTEHOSTSTART or RUNNING ;; ;; (define (db:get-prereqs-not-met dbstruct run-id waitons ref-item-path mode) (define (db:get-prereqs-not-met dbstruct run-id waitons ref-test-name ref-item-path mode itemmaps) ;; #!key (mode '(normal))(itemmap #f)) + ;; BBnote - rollup of an itemized test's overall state/status done in db:set-state-status-and-roll-up-items (append (if (member 'exclusive mode) (let ((running-tests (db:get-tests-for-run dbstruct #f ;; run-id of #f means for all runs. (if (string=? ref-item-path "") ;; testpatt @@ -3935,15 +4043,15 @@ (let ((tests (db:get-tests-for-run-state-status dbstruct run-id waitontest-name)) (ever-seen #f) (parent-waiton-met #f) (item-waiton-met #f)) (for-each - (lambda (test) + (lambda (test) ;; BB- this is the upstream test ;; (if (equal? waitontest-name (db:test-get-testname test)) ;; by defintion this had better be true ... (let* ((state (db:test-get-state test)) (status (db:test-get-status test)) - (item-path (db:test-get-item-path test)) + (item-path (db:test-get-item-path test)) ;; BB- this is the upstream itempath (is-completed (equal? state "COMPLETED")) (is-running (equal? state "RUNNING")) (is-killed (equal? state "KILLED")) (is-ok (member status '("PASS" "WARN" "CHECK" "WAIVED" "SKIP"))) ;; testname-b path-a path-b @@ -3964,12 +4072,12 @@ ((and (not (null? (lset-intersection eq? mode '(itemmatch itemwait)))) ;; how is that different from (member mode '(itemmatch itemwait)) ????? ;; (not (equal? item-path "")) ;; this applies to both top level (to allow launching of next batch) and items same-itempath) (if (and is-completed is-ok) (set! item-waiton-met #t)) - (if (and (equal? item-path "") - (or is-completed is-running));; this is the parent, set it to run if completed or running + (if (and (equal? item-path "") ;; if upstream rollup test is completed, parent-waiton-met is set + (or is-completed is-running));; this is the parent, set it to run if completed or running ;; BB1 (set! parent-waiton-met #t))) ;; normal checking of parent items, any parent or parent item not ok blocks running ((and is-completed (or is-ok (member 'toplevel mode)) ;; toplevel does not block on FAIL @@ -4083,12 +4191,12 @@ (testname (vector-ref vb (+ 2 numkeys))) (item-path (vector-ref vb (+ 3 numkeys))) (final-log (vector-ref vb (+ 7 numkeys))) (run-dir (vector-ref vb (+ 18 numkeys))) (log-fpath (conc run-dir "/" final-log))) ;; (string-intersperse keyvals "/") "/" testname "/" item-path "/" - (debug:print 4 *default-log-port* "log: " log-fpath " exists: " (file-exists? log-fpath)) - (vector-set! vb (+ 7 numkeys) (if (file-exists? log-fpath) + (debug:print 4 *default-log-port* "log: " log-fpath " exists: " (common:file-exists? log-fpath)) + (vector-set! vb (+ 7 numkeys) (if (common:file-exists? log-fpath) (let ((newpath (conc pathmod "/" (string-intersperse keyvals "/") "/" runname "/" testname "/" (if (string=? item-path "") "" (conc "/" item-path)) final-log))) Index: dcommon.scm ================================================================== --- dcommon.scm +++ dcommon.scm @@ -19,11 +19,11 @@ (declare (unit dcommon)) (declare (uses megatest-version)) (declare (uses gutils)) (declare (uses db)) -(declare (uses synchash)) +;; (declare (uses synchash)) (include "common_records.scm") (include "db_records.scm") (include "key_records.scm") (include "run_records.scm") @@ -85,185 +85,185 @@ ;; 3. Add extraction of filters to synchash calls ;; ;; NOTE: Used in newdashboard ;; ;; Mode is 'full or 'incremental for full refresh or incremental refresh -(define (dcommon:run-update keys data runname keypatts testpatt states statuses mode window-id) - (let* (;; count and offset => #f so not used - ;; the synchash calls modify the "data" hash - (changed #f) - (get-runs-sig (conc (client:get-signature) " get-runs")) - (get-tests-sig (conc (client:get-signature) " get-tests")) - (get-details-sig (conc (client:get-signature) " get-test-details")) - - ;; test-ids to get and display are indexed on window-id in curr-test-ids hash - (test-ids (hash-table-values (dboard:tabdat-curr-test-ids data))) - ;; run-id is #f in next line to send the query to server 0 - (run-changes (synchash:client-get 'db:get-runs get-runs-sig (length keypatts) data #f runname #f #f keypatts)) - (tests-detail-changes (if (not (null? test-ids)) - (synchash:client-get 'db:get-test-info-by-ids get-details-sig 0 data #f test-ids) - '())) - - ;; Now can calculate the run-ids - (run-hash (hash-table-ref/default data get-runs-sig #f)) - (run-ids (if run-hash (filter number? (hash-table-keys run-hash)) '())) - - (all-test-changes (let ((res (make-hash-table))) - (for-each (lambda (run-id) - (if (> run-id 0) - (hash-table-set! res run-id (synchash:client-get 'db:get-tests-for-run-mindata get-tests-sig 0 data run-id 1 testpatt states statuses #f)))) - run-ids) - res)) - (runs-hash (hash-table-ref/default data get-runs-sig #f)) - (header (hash-table-ref/default runs-hash "header" #f)) - (run-ids (sort (filter number? (hash-table-keys runs-hash)) - (lambda (a b) - (let* ((record-a (hash-table-ref runs-hash a)) - (record-b (hash-table-ref runs-hash b)) - (time-a (db:get-value-by-header record-a header "event_time")) - (time-b (db:get-value-by-header record-b header "event_time"))) - (> time-a time-b))) - )) - (runid-to-col (hash-table-ref *cachedata* "runid-to-col")) - (testname-to-row (hash-table-ref *cachedata* "testname-to-row")) - (colnum 1) - (rownum 0) - (cellname (conc rownum ":" colnum))) ;; rownum = 0 is the header -;; (debug:print 0 *default-log-port* "test-ids " test-ids ", tests-detail-changes " tests-detail-changes) - - ;; tests related stuff - ;; (all-testnames (delete-duplicates (map db:test-get-testname test-changes)))) - - ;; Given a run-id and testname/item_path calculate a cell R:C - - ;; NOTE: Also build the test tree browser and look up table - ;; - ;; Each run is unique on its keys and runname or run-id, store in hash on colnum - (for-each (lambda (run-id) - (let* ((run-record (hash-table-ref/default runs-hash run-id #f)) - (key-vals (map (lambda (key)(db:get-value-by-header run-record header key)) - keys)) - (run-name (db:get-value-by-header run-record header "runname")) - (col-name (conc (string-intersperse key-vals "\n") "\n" run-name)) - (run-path (append key-vals (list run-name)))) - (hash-table-set! (dboard:tabdat-run-keys data) run-id run-path) - ;; modify cell - but only if changed - (set! changed (dcommon:modifiy-if-different (dboard:tabdat-runs-matrix data) cellname col-name changed)) - (hash-table-set! runid-to-col run-id (list colnum run-record)) - ;; Here we update the tests treebox and tree keys - (tree:add-node (dboard:tabdat-tests-tree data) "Runs" (append key-vals (list run-name)) - userdata: (conc "run-id: " run-id)) - (set! colnum (+ colnum 1)))) - run-ids) - - ;; Scan all tests to be displayed and organise all the test names, respecting what is in the hash table - ;; Do this analysis in the order of the run-ids, the most recent run wins - (for-each (lambda (run-id) - (let* ((run-path (hash-table-ref (dboard:tabdat-run-keys data) run-id)) - (test-changes (hash-table-ref all-test-changes run-id)) - (new-test-dat (car test-changes)) - (removed-tests (cadr test-changes)) - (tests (sort (map cadr (filter (lambda (testrec) - (eq? run-id (db:mintest-get-run_id (cadr testrec)))) - new-test-dat)) - (lambda (a b) - (let ((time-a (db:mintest-get-event_time a)) - (time-b (db:mintest-get-event_time b))) - (> time-a time-b))))) - ;; test-changes is a list of (( id record ) ... ) - ;; Get list of test names sorted by time, remove tests - (test-names (delete-duplicates (map (lambda (t) - (let ((i (db:mintest-get-item_path t)) - (n (db:mintest-get-testname t))) - (if (string=? i "") - (conc " " i) - n))) - tests))) - (colnum (car (hash-table-ref runid-to-col run-id)))) - ;; for each test name get the slot if it exists and fill in the cell - ;; or take the next slot and fill in the cell, deal with items in the - ;; run view panel? The run view panel can have a tree selector for - ;; browsing the tests/items - - ;; SWITCH THIS TO USING CHANGED TESTS ONLY - (for-each (lambda (test) - (let* ((test-id (db:mintest-get-id test)) - (state (db:mintest-get-state test)) - (status (db:mintest-get-status test)) - (testname (db:mintest-get-testname test)) - (itempath (db:mintest-get-item_path test)) - (fullname (conc testname "/" itempath)) - (dispname (if (string=? itempath "") testname (conc " " itempath))) - (rownum (hash-table-ref/default testname-to-row fullname #f)) - (test-path (append run-path (if (equal? itempath "") - (list testname) - (list testname itempath)))) - (tb (dboard:tabdat-tests-tree data))) - (print "INFONOTE: run-path: " run-path) - (tree:add-node (dboard:tabdat-tests-tree data) "Runs" - test-path - userdata: (conc "test-id: " test-id)) - (let ((node-num (tree:find-node tb (cons "Runs" test-path))) - (color (car (gutils:get-color-for-state-status state status)))) - (debug:print 0 *default-log-port* "node-num: " node-num ", color: " color) - - (set! changed (dcommon:modifiy-if-different - tb - (conc "COLOR" node-num) - color changed)) - - ;; (iup:attribute-set! tb (conc "COLOR" node-num) color) - ) - (hash-table-set! (dboard:tabdat-path-test-ids data) test-path test-id) - (if (not rownum) - (let ((rownums (hash-table-values testname-to-row))) - (set! rownum (if (null? rownums) - 1 - (+ 1 (common:max rownums)))) - (hash-table-set! testname-to-row fullname rownum) - ;; create the label - (set! changed (dcommon:modifiy-if-different - (dboard:tabdat-runs-matrix data) - (conc rownum ":" 0) - dispname - changed)) - ;; (iup:attribute-set! (dboard:tabdat-runs-matrix data) - ;; (conc rownum ":" 0) dispname) - )) - ;; set the cell text and color - ;; (debug:print 2 *default-log-port* "rownum:colnum=" rownum ":" colnum ", state=" status) - (set! changed (dcommon:modifiy-if-different - (dboard:tabdat-runs-matrix data) - (conc rownum ":" colnum) - (if (member state '("ARCHIVED" "COMPLETED")) - status - state) - changed)) - ;; (iup:attribute-set! (dboard:tabdat-runs-matrix data) - ;; (conc rownum ":" colnum) - ;; (if (member state '("ARCHIVED" "COMPLETED")) - ;; status - ;; state)) - (set! changed (dcommon:modifiy-if-different - (dboard:tabdat-runs-matrix data) - (conc "BGCOLOR" rownum ":" colnum) - (car (gutils:get-color-for-state-status state status)) - changed)) - ;; (iup:attribute-set! (dboard:tabdat-runs-matrix data) - ;; (conc "BGCOLOR" rownum ":" colnum) - ;; (car (gutils:get-color-for-state-status state status))) - )) - tests))) - run-ids) - - (let ((updater (hash-table-ref/default (dboard:commondat-updaters commondat) window-id #f))) - (if updater (updater (hash-table-ref/default data get-details-sig #f)))) - - (if changed (iup:attribute-set! (dboard:tabdat-runs-matrix data) "REDRAW" "ALL")) - ;; (debug:print 2 *default-log-port* "run-changes: " run-changes) - ;; (debug:print 2 *default-log-port* "test-changes: " test-changes) - (list run-changes all-test-changes))) +;; (define (dcommon:run-update keys data runname keypatts testpatt states statuses mode window-id) +;; (let* (;; count and offset => #f so not used +;; ;; the synchash calls modify the "data" hash +;; (changed #f) +;; (get-runs-sig (conc (client:get-signature) " get-runs")) +;; (get-tests-sig (conc (client:get-signature) " get-tests")) +;; (get-details-sig (conc (client:get-signature) " get-test-details")) +;; +;; ;; test-ids to get and display are indexed on window-id in curr-test-ids hash +;; (test-ids (hash-table-values (dboard:tabdat-curr-test-ids data))) +;; ;; run-id is #f in next line to send the query to server 0 +;; (run-changes (synchash:client-get 'db:get-runs get-runs-sig (length keypatts) data #f runname #f #f keypatts)) +;; (tests-detail-changes (if (not (null? test-ids)) +;; (synchash:client-get 'db:get-test-info-by-ids get-details-sig 0 data #f test-ids) +;; '())) +;; +;; ;; Now can calculate the run-ids +;; (run-hash (hash-table-ref/default data get-runs-sig #f)) +;; (run-ids (if run-hash (filter number? (hash-table-keys run-hash)) '())) +;; +;; (all-test-changes (let ((res (make-hash-table))) +;; (for-each (lambda (run-id) +;; (if (> run-id 0) +;; (hash-table-set! res run-id (synchash:client-get 'db:get-tests-for-run-mindata get-tests-sig 0 data run-id 1 testpatt states statuses #f)))) +;; run-ids) +;; res)) +;; (runs-hash (hash-table-ref/default data get-runs-sig #f)) +;; (header (hash-table-ref/default runs-hash "header" #f)) +;; (run-ids (sort (filter number? (hash-table-keys runs-hash)) +;; (lambda (a b) +;; (let* ((record-a (hash-table-ref runs-hash a)) +;; (record-b (hash-table-ref runs-hash b)) +;; (time-a (db:get-value-by-header record-a header "event_time")) +;; (time-b (db:get-value-by-header record-b header "event_time"))) +;; (> time-a time-b))) +;; )) +;; (runid-to-col (hash-table-ref *cachedata* "runid-to-col")) +;; (testname-to-row (hash-table-ref *cachedata* "testname-to-row")) +;; (colnum 1) +;; (rownum 0) +;; (cellname (conc rownum ":" colnum))) ;; rownum = 0 is the header +;; ;; (debug:print 0 *default-log-port* "test-ids " test-ids ", tests-detail-changes " tests-detail-changes) +;; +;; ;; tests related stuff +;; ;; (all-testnames (delete-duplicates (map db:test-get-testname test-changes)))) +;; +;; ;; Given a run-id and testname/item_path calculate a cell R:C +;; +;; ;; NOTE: Also build the test tree browser and look up table +;; ;; +;; ;; Each run is unique on its keys and runname or run-id, store in hash on colnum +;; (for-each (lambda (run-id) +;; (let* ((run-record (hash-table-ref/default runs-hash run-id #f)) +;; (key-vals (map (lambda (key)(db:get-value-by-header run-record header key)) +;; keys)) +;; (run-name (db:get-value-by-header run-record header "runname")) +;; (col-name (conc (string-intersperse key-vals "\n") "\n" run-name)) +;; (run-path (append key-vals (list run-name)))) +;; (hash-table-set! (dboard:tabdat-run-keys data) run-id run-path) +;; ;; modify cell - but only if changed +;; (set! changed (dcommon:modifiy-if-different (dboard:tabdat-runs-matrix data) cellname col-name changed)) +;; (hash-table-set! runid-to-col run-id (list colnum run-record)) +;; ;; Here we update the tests treebox and tree keys +;; (tree:add-node (dboard:tabdat-tests-tree data) "Runs" (append key-vals (list run-name)) +;; userdata: (conc "run-id: " run-id)) +;; (set! colnum (+ colnum 1)))) +;; run-ids) +;; +;; ;; Scan all tests to be displayed and organise all the test names, respecting what is in the hash table +;; ;; Do this analysis in the order of the run-ids, the most recent run wins +;; (for-each (lambda (run-id) +;; (let* ((run-path (hash-table-ref (dboard:tabdat-run-keys data) run-id)) +;; (test-changes (hash-table-ref all-test-changes run-id)) +;; (new-test-dat (car test-changes)) +;; (removed-tests (cadr test-changes)) +;; (tests (sort (map cadr (filter (lambda (testrec) +;; (eq? run-id (db:mintest-get-run_id (cadr testrec)))) +;; new-test-dat)) +;; (lambda (a b) +;; (let ((time-a (db:mintest-get-event_time a)) +;; (time-b (db:mintest-get-event_time b))) +;; (> time-a time-b))))) +;; ;; test-changes is a list of (( id record ) ... ) +;; ;; Get list of test names sorted by time, remove tests +;; (test-names (delete-duplicates (map (lambda (t) +;; (let ((i (db:mintest-get-item_path t)) +;; (n (db:mintest-get-testname t))) +;; (if (string=? i "") +;; (conc " " i) +;; n))) +;; tests))) +;; (colnum (car (hash-table-ref runid-to-col run-id)))) +;; ;; for each test name get the slot if it exists and fill in the cell +;; ;; or take the next slot and fill in the cell, deal with items in the +;; ;; run view panel? The run view panel can have a tree selector for +;; ;; browsing the tests/items +;; +;; ;; SWITCH THIS TO USING CHANGED TESTS ONLY +;; (for-each (lambda (test) +;; (let* ((test-id (db:mintest-get-id test)) +;; (state (db:mintest-get-state test)) +;; (status (db:mintest-get-status test)) +;; (testname (db:mintest-get-testname test)) +;; (itempath (db:mintest-get-item_path test)) +;; (fullname (conc testname "/" itempath)) +;; (dispname (if (string=? itempath "") testname (conc " " itempath))) +;; (rownum (hash-table-ref/default testname-to-row fullname #f)) +;; (test-path (append run-path (if (equal? itempath "") +;; (list testname) +;; (list testname itempath)))) +;; (tb (dboard:tabdat-tests-tree data))) +;; (print "INFONOTE: run-path: " run-path) +;; (tree:add-node (dboard:tabdat-tests-tree data) "Runs" +;; test-path +;; userdata: (conc "test-id: " test-id)) +;; (let ((node-num (tree:find-node tb (cons "Runs" test-path))) +;; (color (car (gutils:get-color-for-state-status state status)))) +;; (debug:print 0 *default-log-port* "node-num: " node-num ", color: " color) +;; +;; (set! changed (dcommon:modifiy-if-different +;; tb +;; (conc "COLOR" node-num) +;; color changed)) +;; +;; ;; (iup:attribute-set! tb (conc "COLOR" node-num) color) +;; ) +;; (hash-table-set! (dboard:tabdat-path-test-ids data) test-path test-id) +;; (if (not rownum) +;; (let ((rownums (hash-table-values testname-to-row))) +;; (set! rownum (if (null? rownums) +;; 1 +;; (+ 1 (common:max rownums)))) +;; (hash-table-set! testname-to-row fullname rownum) +;; ;; create the label +;; (set! changed (dcommon:modifiy-if-different +;; (dboard:tabdat-runs-matrix data) +;; (conc rownum ":" 0) +;; dispname +;; changed)) +;; ;; (iup:attribute-set! (dboard:tabdat-runs-matrix data) +;; ;; (conc rownum ":" 0) dispname) +;; )) +;; ;; set the cell text and color +;; ;; (debug:print 2 *default-log-port* "rownum:colnum=" rownum ":" colnum ", state=" status) +;; (set! changed (dcommon:modifiy-if-different +;; (dboard:tabdat-runs-matrix data) +;; (conc rownum ":" colnum) +;; (if (member state '("ARCHIVED" "COMPLETED")) +;; status +;; state) +;; changed)) +;; ;; (iup:attribute-set! (dboard:tabdat-runs-matrix data) +;; ;; (conc rownum ":" colnum) +;; ;; (if (member state '("ARCHIVED" "COMPLETED")) +;; ;; status +;; ;; state)) +;; (set! changed (dcommon:modifiy-if-different +;; (dboard:tabdat-runs-matrix data) +;; (conc "BGCOLOR" rownum ":" colnum) +;; (car (gutils:get-color-for-state-status state status)) +;; changed)) +;; ;; (iup:attribute-set! (dboard:tabdat-runs-matrix data) +;; ;; (conc "BGCOLOR" rownum ":" colnum) +;; ;; (car (gutils:get-color-for-state-status state status))) +;; )) +;; tests))) +;; run-ids) +;; +;; (let ((updater (hash-table-ref/default (dboard:commondat-updaters commondat) window-id #f))) +;; (if updater (updater (hash-table-ref/default data get-details-sig #f)))) +;; +;; (if changed (iup:attribute-set! (dboard:tabdat-runs-matrix data) "REDRAW" "ALL")) +;; ;; (debug:print 2 *default-log-port* "run-changes: " run-changes) +;; ;; (debug:print 2 *default-log-port* "test-changes: " test-changes) +;; (list run-changes all-test-changes))) (define (dcommon:runsdat-get-col-num dat target runname force-set) (let* ((runs-index (dboard:runsdat-runs-index dat)) (col-name (conc target "/" runname)) (res (hash-table-ref/default runs-index col-name #f))) @@ -491,11 +491,11 @@ (define (dcommon:section-matrix rawconfig sectionname varcolname valcolname #!key (title #f)) (let* ((curr-row-num 1) (key-vals (configf:section-vars rawconfig sectionname)) (section-matrix (iup:matrix #:alignment1 "ALEFT" - #:expand "YES" ;; "HORIZONTAL" + ;; #:expand "YES" ;; "HORIZONTAL" #:numcol 1 #:numlin (length key-vals) #:numcol-visible 1 #:numlin-visible (min 10 (length key-vals)) #:scrollbar "YES"))) @@ -1185,11 +1185,11 @@ (* scalef 0.01) (* scalef -0.01)))) (if the-cnv (dashboard:draw-tests the-cnv last-xadj last-yadj tests-draw-state sorted-testnames test-records)) )) - ;; #:size "50x50" + ;; #:size "250x250" #:expand "YES" #:scrollbar "YES" #:posx "0.5" #:posy "0.5" #:button-cb (lambda (obj btn pressed x y status) Index: docs/Makefile ================================================================== --- docs/Makefile +++ docs/Makefile @@ -13,5 +13,7 @@ fossil add html/* megatest.pdf : megatest.lyx lyx -e pdf2 megatest.lyx +pkts.pdf : pkts.dot + dot -Tpdf pkts.dot -o pkts.pdf Index: docs/manual/megatest_manual.html ================================================================== --- docs/manual/megatest_manual.html +++ docs/manual/megatest_manual.html @@ -801,18 +801,17 @@

Megatest Design Philosophy

-

Megatest is intended to provide the minimum needed resources to make -writing a suite of tests and tasks for implementing continuous build -for software, design engineering or process control (via owlfs for -example) without being specialized for any specific problem -space. Megatest in of itself does not know what constitutes a PASS or -FAIL of a test or task. In most cases megatest is best used in -conjunction with logpro or a similar tool to parse, analyze and decide -on the test outcome.

+

Megatest is a distributed system intended to provide the minimum needed +resources to make writing a suite of tests and tasks for implementing +continuous build for software, design engineering or process control (via +owlfs for example) without being specialized for any specific problem +space. Megatest in of itself does not know what constitutes a PASS or FAIL +of a test or task. In most cases megatest is best used in conjunction with +logpro or a similar tool to parse, analyze and decide on the test outcome.

  • Self-checking -Repeatable strive for directed or self-checking test as opposed to delta based tests @@ -876,24 +875,24 @@

    Goals

    1. Reduce load on the file system. Sqlite3 files on network filesystem can be - a burden. + a burden. [DONE]

    2. Reduce number of servers and frequency of start/stop. This is mostly an - issue of clutter but also a reduction in "moving parts". + issue of clutter but also a reduction in "moving parts". [DONE]

    3. Coalesce activities to a single home host where possible. Give the user feedback that they have started the dashboard on a host other than the - home host. + home host. [DONE]

    4. Reduce number of processes involved in managing running tests. @@ -905,35 +904,35 @@

      Changes Needed

      1. ACID compliant db will be on /tmp and synced to megatest.db with a five - second max delay. + second max delay. [DONE]

      2. Read/writes to db for processes on homehost will go direct to /tmp - megatest.db file. + megatest.db file. [DONE]

      3. Read/wites fron non-homehost processes will go through one server. Bulk reads (e.g. for dashboard or list-runs) will be cached on the current host - in /tmp and synced from the home megatest.db in the testsuite area. + in /tmp and synced from the home megatest.db in the testsuite area. [DONE]

      4. -Db syncs rely on the target db file timestame minus some margin. +Db syncs rely on the target db file timestame minus some margin. [DONE]

      5. Since bulk reads do not use the server we can switch to simple RPC for the - network transport. + network transport. [DONE]

      6. Test running manager process extended to manage multiple running tests. @@ -947,31 +946,32 @@

        ww05 - migrate to inmem-db

        1. -Switch to inmem db with fast sync to on disk db’s [DONE] +Switch to inmem db with fast sync to on disk db’s [DONE]

        2. Server polls tasks table for next action

          1. -Task table used for tracking runner process [DONE] +Task table used for tracking runner process [Replaced by mtutil]

          2. -Task table used for jobs to run +Task table used for jobs to run [Replaced by mtutil]

          3. -Task table used for queueing runner actions (remove runs, cleanRunExecute, etc) +Task table used for queueing runner actions (remove runs, + cleanRunExecute, etc) [Replaced by mtutil]

        @@ -1415,15 +1415,35 @@
        [items]
         A a b c
         B d e f

      Then the config file would effectively appear to contain an items section -exactly like the output from the script. This is extremely useful when -dynamically creating items, itemstables and other config structures. You can -see the expansion of the call by looking in the cached files (look in your -linktree for megatest.config and runconfigs.config cache files and in your -test run areas for the expanded and cached testconfig).

      +exactly like the output from the script. This is useful when dynamically +creating items, itemstables and other config structures. You can see the +expansion of the call by looking in the cached files (look in your linktree +for megatest.config and runconfigs.config cache files and in your test run +areas for the expanded and cached testconfig).

    +

    Wildcards and regexes in Targets

    +
    +
    +
    [a/2/b]
    +VAR1 VAL1
    +
    +[a/%/b]
    +VAR1 VAL2
    +
    +

    Will result in:

    +
    +
    +
    [a/2/b]
    +VAR1 VAL2
    +
    +

    Can use either wildcard of "%" or a regular expression:

    +
    +
    +
    [/abc.*def/]
    +

    Disk Space Checks

    Some parameters you can put in the [setup] section of megatest.config:

    @@ -1489,10 +1509,19 @@
    In megatest.config
    [setup]
     reruns 5
    +
    +

    Replace the default blacklisted environment variables with user supplied +list.

    +

    Default list: USER HOME DISPLAY LS_COLORS XKEYSYMDB EDITOR MAKEFLAGS MAKEF MAKEOVERRIDES

    +
    Add a "bad" variable "PROMPT" to the variables that will be commented out

    in the megatest.sh and megatest.csh files:

    +
    +
    +
    [setup]
    +blacklistvars USER HOME DISPLAY LS_COLORS XKEYSYMDB EDITOR MAKEFLAGS PROMPT
    Run time limit
    @@ -1893,11 +1922,11 @@
    $MT_MEGATEST -env2file .ezsteps/${stepname}

    Triggers

    -

    In your testconfig triggers can be specified

    +

    In your testconfig or megatest.config triggers can be specified

    [triggers]
     
     # Call script running.sh when test goes to state=RUNNING, status=PASS
    @@ -1907,11 +1936,11 @@
     RUNNING/ running.sh
     
     # Call script onpass.sh any time status goes to PASS
     PASS/ onpass.sh
    -

    Scripts called will have; test-id test-rundir trigger, added to the commandline.

    +

    Scripts called will have; test-id test-rundir trigger test-name item-path state status event-time, added to the commandline.

    HINT

    To start an xterm (useful for debugging), use a command line like the following:

    [triggers]
    @@ -1923,10 +1952,78 @@
     Note
     
     There is a trailing space after the --
     
     
    +

    There are a number of environment variables available to the trigger script +but since triggers can be called in various contexts not all variables are +available at all times. The trigger script should check for the variable and +fail gracefully if it doesn’t exist.

    + + +++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    Table 4. Environment variables visible to the trigger script
    Variable Purpose

    MT_TEST_RUN_DIR

    The directory where Megatest ran this test

    MT_CMDINFO

    Encoded command data for the test

    MT_DEBUG_MODE

    Used to pass the debug mode to nested calls to Megatest

    MT_RUN_AREA_HOME

    Megatest home area

    MT_TESTSUITENAME

    The name of this testsuite or area

    MT_TEST_NAME

    The name of this test

    MT_ITEM_INFO

    The variable and values for the test item

    MT_MEGATEST

    Which Megatest binary is being used by this area

    MT_TARGET

    The target variable values, separated by /

    MT_LINKTREE

    The base of the link tree where all run tests can be found

    MT_ITEMPATH

    The values of the item path variables, separated by /

    MT_RUNNAME

    The name of the run

    Override the Toplevel HTML File

    Megatest generates a simple html file summary for top level tests of iterated tests. The generation can be overridden. NOTE: the output of @@ -2011,11 +2108,11 @@

    These routines can be called from the megatest repl.

    - + @@ -2063,10 +2160,10 @@

    Index: docs/manual/megatest_manual.txt ================================================================== --- docs/manual/megatest_manual.txt +++ docs/manual/megatest_manual.txt @@ -24,18 +24,17 @@ qualification. Megatest Design Philosophy -------------------------- -Megatest is intended to provide the minimum needed resources to make -writing a suite of tests and tasks for implementing continuous build -for software, design engineering or process control (via owlfs for -example) without being specialized for any specific problem -space. Megatest in of itself does not know what constitutes a PASS or -FAIL of a test or task. In most cases megatest is best used in -conjunction with logpro or a similar tool to parse, analyze and decide -on the test outcome. +Megatest is a distributed system intended to provide the minimum needed +resources to make writing a suite of tests and tasks for implementing +continuous build for software, design engineering or process control (via +owlfs for example) without being specialized for any specific problem +space. Megatest in of itself does not know what constitutes a PASS or FAIL +of a test or task. In most cases megatest is best used in conjunction with +logpro or a similar tool to parse, analyze and decide on the test outcome. * Self-checking -Repeatable strive for directed or self-checking test as opposed to delta based tests * Traceable - environment variables, host OS and other possibly influential Index: docs/manual/reference.txt ================================================================== --- docs/manual/reference.txt +++ docs/manual/reference.txt @@ -67,11 +67,13 @@ VAR1 VAL2 ------------------------- Can use either wildcard of "%" or a regular expression: +------------------------- [/abc.*def/] +------------------------- Disk Space Checks ^^^^^^^^^^^^^^^^^ Some parameters you can put in the [setup] section of megatest.config: @@ -143,10 +145,22 @@ .In megatest.config ------------------ [setup] reruns 5 ------------------ + +Replace the default blacklisted environment variables with user supplied +list. + +Default list: USER HOME DISPLAY LS_COLORS XKEYSYMDB EDITOR MAKEFLAGS MAKEF MAKEOVERRIDES + +.Add a "bad" variable "PROMPT" to the variables that will be commented out +in the megatest.sh and megatest.csh files: +----------------- +[setup] +blacklistvars USER HOME DISPLAY LS_COLORS XKEYSYMDB EDITOR MAKEFLAGS PROMPT +----------------- Run time limit ++++++++++++++ ----------------- @@ -468,11 +482,11 @@ ---------------------------- Triggers ~~~~~~~~ -In your testconfig triggers can be specified +In your testconfig or megatest.config triggers can be specified ----------------- [triggers] # Call script running.sh when test goes to state=RUNNING, status=PASS @@ -483,11 +497,11 @@ # Call script onpass.sh any time status goes to PASS PASS/ onpass.sh ----------------- -Scripts called will have; test-id test-rundir trigger, added to the commandline. +Scripts called will have; test-id test-rundir trigger test-name item-path state status event-time, added to the commandline. HINT To start an xterm (useful for debugging), use a command line like the following: @@ -495,10 +509,33 @@ [triggers] COMPLETED/ xterm -e bash -s -- ----------------- NOTE: There is a trailing space after the -- + +There are a number of environment variables available to the trigger script +but since triggers can be called in various contexts not all variables are +available at all times. The trigger script should check for the variable and +fail gracefully if it doesn't exist. + +.Environment variables visible to the trigger script +[width="90%",cols="^,2m",frame="topbot",options="header"] +|====================== +|Variable | Purpose +| MT_TEST_RUN_DIR | The directory where Megatest ran this test +| MT_CMDINFO | Encoded command data for the test +| MT_DEBUG_MODE | Used to pass the debug mode to nested calls to Megatest +| MT_RUN_AREA_HOME | Megatest home area +| MT_TESTSUITENAME | The name of this testsuite or area +| MT_TEST_NAME | The name of this test +| MT_ITEM_INFO | The variable and values for the test item +| MT_MEGATEST | Which Megatest binary is being used by this area +| MT_TARGET | The target variable values, separated by '/' +| MT_LINKTREE | The base of the link tree where all run tests can be found +| MT_ITEMPATH | The values of the item path variables, separated by '/' +| MT_RUNNAME | The name of the run +|====================== Override the Toplevel HTML File ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ADDED docs/pkts.dot Index: docs/pkts.dot ================================================================== --- /dev/null +++ docs/pkts.dot @@ -0,0 +1,59 @@ +digraph megatest_pkts { + ranksep=0.05 + // rankdir=LR + +node [shape=box,style=filled]; + + "SENSORS" [ label = "{ Sensor Processing | { file | git | fossil | script }}" + shape = "record"; ]; + + "RUNS" [ label = "{ Runs Processing | { launch | clean | re-run | archive } | { dispatcher }}"; + shape = "record"; ]; + + "WORK" [ label = "{ Work Items | { start task | task competed }}"; + shape = "record"; ]; + + "USERREQ" [ label = "{ User Requests (Unix and Web) | { launch | clean | re-run | archive }}"; + shape = "record"; ]; + + "MTAREA1" [ label = "{ Megatest Area 1 | { parallel job\nmanagement | test\nmanagement | data\nrollup }}"; + shape = "record"; ]; + + "MTAREA2" [ label = "{ Megatest Area 2 | { parallel job\nmanagement | test\nmanagement | data\nrollup }}"; + shape = "record"; ]; + + "MTAREA3" [ label = "More Megatest Areas ... "; + shape = "record"; ]; + + "PGDB" [ label = "postgres database"; + shape = "cylinder"; ]; + + "WEBAPP" [ label = "{ Web View | { Runs | Contours | Control | Time View }}"; + shape = "record"; ]; + + // "WEBCTRL" [ label = "{ Web View \n(control) }"; + // shape = "record"; ]; + + "SENSORS" -> "SPKTS"; + "RUNS" -> "run pkts"; + "run pkts" -> "RUNS"; + "WORK" -> "work pkts"; + "work pkts" -> "RUNS"; + "USERREQ" -> "user request pkts"; + "SPKTS" -> "RUNS"; + "user request pkts" -> "RUNS"; + "RUNS" -> "MTAREA1" -> "PGDB"; + "RUNS" -> "MTAREA2" -> "PGDB"; + "RUNS" -> "MTAREA3" -> "PGDB"; + "PGDB" -> "WEBAPP"; + // "WEBCTRL" -> "run pkts"; + + subgraph cluster_pkts { + label="Packets"; + "SPKTS" [ label = "Sensor Packets" ]; + "run pkts"; + "work pkts"; + "user request pkts"; + } +} + ADDED docs/pkts.pdf Index: docs/pkts.pdf ================================================================== --- /dev/null +++ docs/pkts.pdf cannot compute difference between binary files Index: docs/plan.txt ================================================================== --- docs/plan.txt +++ docs/plan.txt @@ -8,44 +8,45 @@ Goals ^^^^^ . Reduce load on the file system. Sqlite3 files on network filesystem can be - a burden. + a burden. [green]#[DONE]# . Reduce number of servers and frequency of start/stop. This is mostly an - issue of clutter but also a reduction in "moving parts". + issue of clutter but also a reduction in "moving parts". [green]#[DONE]# . Coalesce activities to a single home host where possible. Give the user feedback that they have started the dashboard on a host other than the - home host. + home host. [green]#[DONE]# . Reduce number of processes involved in managing running tests. Changes Needed ^^^^^^^^^^^^^^ . ACID compliant db will be on /tmp and synced to megatest.db with a five - second max delay. + second max delay. [green]#[DONE]# . Read/writes to db for processes on homehost will go direct to /tmp - megatest.db file. + megatest.db file. [green]#[DONE]# . Read/wites fron non-homehost processes will go through one server. Bulk reads (e.g. for dashboard or list-runs) will be cached on the current host - in /tmp and synced from the home megatest.db in the testsuite area. -. Db syncs rely on the target db file timestame minus some margin. + in /tmp and synced from the home megatest.db in the testsuite area. [green]#[DONE]# +. Db syncs rely on the target db file timestame minus some margin. [green]#[DONE]# . Since bulk reads do not use the server we can switch to simple RPC for the - network transport. + network transport. [green]#[DONE]# . Test running manager process extended to manage multiple running tests. Current Items ~~~~~~~~~~~~~ ww05 - migrate to inmem-db ^^^^^^^^^^^^^^^^^^^^^^^^^^ -. Switch to inmem db with fast sync to on disk db's [DONE] +. Switch to inmem db with fast sync to on disk db's [green]#[DONE]# . Server polls tasks table for next action -.. Task table used for tracking runner process [DONE] -.. Task table used for jobs to run -.. Task table used for queueing runner actions (remove runs, cleanRunExecute, etc) +.. Task table used for tracking runner process [red]#[Replaced by mtutil]# +.. Task table used for jobs to run [red]#[Replaced by mtutil]# +.. Task table used for queueing runner actions (remove runs, + cleanRunExecute, etc) [red]#[Replaced by mtutil#] // ww32 // ~~~~ // ADDED emergency-patch-1.scm Index: emergency-patch-1.scm ================================================================== --- /dev/null +++ emergency-patch-1.scm @@ -0,0 +1,203 @@ + + +;; These are called by the server on recipt of /api calls +;; - keep it simple, only return the actual result of the call, i.e. no meta info here +;; +;; - returns #( flag result ) +;; +(define (api:execute-requests dbstruct dat) + (handle-exceptions + exn + (let ((call-chain (get-call-chain))) + (debug:print 0 *default-log-port* "WARNING: api:execute-requests received an exception from peer, dat=" dat) + (print-call-chain (current-error-port)) + (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) + (vector #f (vector exn call-chain dat))) ;; return some stuff for debug if an exception happens + (cond + ((not (vector? dat)) ;; it is an error to not receive a vector + (vector #f (vector #f "remote must be called with a vector"))) + ((> *api-process-request-count* 20) ;; 20) + (debug:print 0 *default-log-port* "WARNING: api:execute-requests received an overloaded message.") + (set! *server-overloaded* #t) + (vector #f (vector #f 'overloaded))) ;; the inner vector is what gets returned. nope, don't know why. please refactor! + (else + (let* ((cmd-in (vector-ref dat 0)) + (cmd (if (symbol? cmd-in) + cmd-in + (string->symbol cmd-in))) + (params (vector-ref dat 1)) + (start-t (current-milliseconds)) + (readonly-mode (dbr:dbstruct-read-only dbstruct)) + (readonly-command (member cmd api:read-only-queries)) + (writecmd-in-readonly-mode (and readonly-mode (not readonly-command))) + (res + (if writecmd-in-readonly-mode + (conc "attempt to run write command "cmd" on a read-only database") + (case cmd + ;;=============================================== + ;; READ/WRITE QUERIES + ;;=============================================== + + ((get-keys-write) (db:get-keys dbstruct)) ;; force a dummy "write" query to force server; for debug in -repl + + ;; SERVERS + ((start-server) (apply server:kind-run params)) + ((kill-server) (set! *server-run* #f)) + + ;; TESTS + + ;;((test-set-state-status-by-id) (apply mt:test-set-state-status-by-id dbstruct params)) + ;;BB - commented out above because it was calling below, eventually, incorrectly (dbstruct passed to mt:test-set-state-status-by-id, which previosly did more, but now only passes thru to db:set-state-status-and-roll-up-items. + ((test-set-state-status-by-id) + + ;; (define (db:set-state-status-and-roll-up-items dbstruct run-id test-name item-path state status comment) + (db:set-state-status-and-roll-up-items + dbstruct + (list-ref params 0) ; run-id + (list-ref params 1) ; test-name + #f ; item-path + (list-ref params 2) ; state + (list-ref params 3) ; status + (list-ref params 4) ; comment + )) + + ((delete-test-records) (apply db:delete-test-records dbstruct params)) + ((delete-old-deleted-test-records) (apply db:delete-old-deleted-test-records dbstruct params)) + ((test-set-state-status) (apply db:test-set-state-status dbstruct params)) + ((test-set-top-process-pid) (apply db:test-set-top-process-pid dbstruct params)) + ((set-state-status-and-roll-up-items) (apply db:set-state-status-and-roll-up-items dbstruct params)) + ((top-test-set-per-pf-counts) (apply db:top-test-set-per-pf-counts dbstruct params)) + ((test-set-archive-block-id) (apply db:test-set-archive-block-id dbstruct params)) + + ;; RUNS + ((register-run) (apply db:register-run dbstruct params)) + ((set-tests-state-status) (apply db:set-tests-state-status dbstruct params)) + ((delete-run) (apply db:delete-run dbstruct params)) + ((lock/unlock-run) (apply db:lock/unlock-run dbstruct params)) + ((update-run-event_time) (apply db:update-run-event_time dbstruct params)) + ((update-run-stats) (apply db:update-run-stats dbstruct params)) + ((set-var) (apply db:set-var dbstruct params)) + ((del-var) (apply db:del-var dbstruct params)) + + ;; STEPS + ((teststep-set-status!) (apply db:teststep-set-status! dbstruct params)) + + ;; TEST DATA + ((test-data-rollup) (apply db:test-data-rollup dbstruct params)) + ((csv->test-data) (apply db:csv->test-data dbstruct params)) + + ;; MISC + ((sync-inmem->db) (let ((run-id (car params))) + (db:sync-touched dbstruct run-id force-sync: #t))) + ((mark-incomplete) (apply db:find-and-mark-incomplete dbstruct params)) + + ;; TESTMETA + ((testmeta-add-record) (apply db:testmeta-add-record dbstruct params)) + ((testmeta-update-field) (apply db:testmeta-update-field dbstruct params)) + ((get-tests-tags) (db:get-tests-tags dbstruct)) + + ;; TASKS + ((tasks-add) (apply tasks:add dbstruct params)) + ((tasks-set-state-given-param-key) (apply tasks:set-state-given-param-key dbstruct params)) + ((tasks-get-last) (apply tasks:get-last dbstruct params)) + + ;; NO SYNC DB + ((no-sync-set) (apply db:no-sync-set *no-sync-db* params)) + ((no-sync-get/default) (apply db:no-sync-get/default *no-sync-db* params)) + ((no-sync-del!) (apply db:no-sync-del! *no-sync-db* params)) + + ;; ARCHIVES + ;; ((archive-get-allocations) + ((archive-register-disk) (apply db:archive-register-disk dbstruct params)) + ((archive-register-block-name)(apply db:archive-register-block-name dbstruct params)) + ((archive-allocate-testsuite/area-to-block)(apply db:archive-allocate-testsuite/area-to-block dbstruct block-id testsuite-name areakey)) + + ;;====================================================================== + ;; READ ONLY QUERIES + ;;====================================================================== + + ;; KEYS + ((get-key-val-pairs) (apply db:get-key-val-pairs dbstruct params)) + ((get-keys) (db:get-keys dbstruct)) + ((get-key-vals) (apply db:get-key-vals dbstruct params)) + ((get-target) (apply db:get-target dbstruct params)) + ((get-targets) (db:get-targets dbstruct)) + + ;; ARCHIVES + ((test-get-archive-block-info) (apply db:test-get-archive-block-info dbstruct params)) + + ;; TESTS + ((test-toplevel-num-items) (apply db:test-toplevel-num-items dbstruct params)) + ((get-test-info-by-id) (apply db:get-test-info-by-id dbstruct params)) + ((test-get-rundir-from-test-id) (apply db:test-get-rundir-from-test-id dbstruct params)) + ((get-count-tests-running-for-testname) (apply db:get-count-tests-running-for-testname dbstruct params)) + ((get-count-tests-running) (apply db:get-count-tests-running dbstruct params)) + ((get-count-tests-running-in-jobgroup) (apply db:get-count-tests-running-in-jobgroup dbstruct params)) + ;; ((delete-test-step-records) (apply db:delete-test-step-records dbstruct params)) + ((get-previous-test-run-record) (apply db:get-previous-test-run-record dbstruct params)) + ((get-matching-previous-test-run-records)(apply db:get-matching-previous-test-run-records dbstruct params)) + ((test-get-logfile-info) (apply db:test-get-logfile-info dbstruct params)) + ((test-get-records-for-index-file) (apply db:test-get-records-for-index-file dbstruct params)) + ((get-testinfo-state-status) (apply db:get-testinfo-state-status dbstruct params)) + ((test-get-top-process-pid) (apply db:test-get-top-process-pid dbstruct params)) + ((test-get-paths-matching-keynames-target-new) (apply db:test-get-paths-matching-keynames-target-new dbstruct params)) + ((get-prereqs-not-met) (apply db:get-prereqs-not-met dbstruct params)) + ((get-count-tests-running-for-run-id) (apply db:get-count-tests-running-for-run-id dbstruct params)) + ((synchash-get) (apply synchash:server-get dbstruct params)) + ((get-raw-run-stats) (apply db:get-raw-run-stats dbstruct params)) + + ;; RUNS + ((get-run-info) (apply db:get-run-info dbstruct params)) + ((get-run-status) (apply db:get-run-status dbstruct params)) + ((set-run-status) (apply db:set-run-status dbstruct params)) + ((get-tests-for-run) (apply db:get-tests-for-run dbstruct params)) + ((get-test-id) (apply db:get-test-id dbstruct params)) + ((get-tests-for-run-mindata) (apply db:get-tests-for-run-mindata dbstruct params)) + ((get-runs) (apply db:get-runs dbstruct params)) + ((get-num-runs) (apply db:get-num-runs dbstruct params)) + ((get-all-run-ids) (db:get-all-run-ids dbstruct)) + ((get-prev-run-ids) (apply db:get-prev-run-ids dbstruct params)) + ((get-run-ids-matching-target) (apply db:get-run-ids-matching-target dbstruct params)) + ((get-runs-by-patt) (apply db:get-runs-by-patt dbstruct params)) + ((get-run-name-from-id) (apply db:get-run-name-from-id dbstruct params)) + ((get-main-run-stats) (apply db:get-main-run-stats dbstruct params)) + ((get-var) (apply db:get-var dbstruct params)) + ((get-run-stats) (apply db:get-run-stats dbstruct params)) + + ;; STEPS + ((get-steps-data) (apply db:get-steps-data dbstruct params)) + ((get-steps-for-test) (apply db:get-steps-for-test dbstruct params)) + + ;; TEST DATA + ((read-test-data) (apply db:read-test-data dbstruct params)) + ((read-test-data*) (apply db:read-test-data* dbstruct params)) + + ;; MISC + ((get-latest-host-load) (apply db:get-latest-host-load dbstruct params)) + ((have-incompletes?) (apply db:have-incompletes? dbstruct params)) + ((login) (apply db:login dbstruct params)) + ((general-call) (let ((stmtname (car params)) + (run-id (cadr params)) + (realparams (cddr params))) + (db:general-call dbstruct stmtname realparams))) + ((sdb-qry) (apply sdb:qry params)) + ((ping) (current-process-id)) + ((get-changed-record-ids) (apply db:get-changed-record-ids dbstruct params)) + + ;; TESTMETA + ((testmeta-get-record) (apply db:testmeta-get-record dbstruct params)) + + ;; TASKS + ((find-task-queue-records) (apply tasks:find-task-queue-records dbstruct params)) + (else + (debug:print 0 *default-log-port* "ERROR: bad api call " cmd) + (conc "ERROR: BAD api call " cmd)))))) + + ;; save all stats + (let ((delta-t (- (current-milliseconds) + start-t))) + (hash-table-set! *db-api-call-time* cmd + (cons delta-t (hash-table-ref/default *db-api-call-time* cmd '())))) + (if writecmd-in-readonly-mode + (vector #f res) + (vector #t res))))))) ADDED emergency-patch-2.scm Index: emergency-patch-2.scm ================================================================== --- /dev/null +++ emergency-patch-2.scm @@ -0,0 +1,311 @@ +(include "common_records.scm") +(include "key_records.scm") +(include "db_records.scm") +(include "run_records.scm") +(include "test_records.scm") + +(define (common:wait-for-cpuload maxload numcpus waitdelay #!key (count 1000) (msg #f)(remote-host #f)) + (let* ((loadavg (common:get-cpu-load remote-host)) + (first (car loadavg)) + (next (cadr loadavg)) + (adjload (* maxload numcpus)) + (loadjmp (- first next))) + (cond + ((and (> first adjload) + (> count 0)) + (debug:print-info 0 *default-log-port* "waiting " waitdelay " seconds due to load " first " exceeding max of " adjload " " (if msg msg "")) + (thread-sleep! waitdelay) + (common:wait-for-cpuload maxload numcpus waitdelay count: (- count 1))) + ((and (> loadjmp numcpus) + (> count 0)) + (debug:print-info 0 *default-log-port* "waiting " waitdelay " seconds due to load jump " loadjmp " > numcpus " numcpus (if msg msg "")) + (thread-sleep! waitdelay) + (common:wait-for-cpuload maxload numcpus waitdelay count: (- count 1)))))) + +(define (common:wait-for-homehost-load maxload msg) + (let* ((hh-dat (if (common:on-homehost?) ;; if we are on the homehost then pass in #f so the calls are local. + #f + (common:get-homehost))) + (hh (if hh-dat (car hh-dat) #f)) + (numcpus (common:get-num-cpus hh))) + (common:wait-for-normalized-load maxload msg: msg remote-host: hh))) + +;; wait for normalized cpu load to drop below maxload +;; +(define (common:wait-for-normalized-load maxload #!key (msg #f)(remote-host #f)) + (let ((num-cpus (common:get-num-cpus remote-host))) + (common:wait-for-cpuload maxload num-cpus 15 msg: msg remote-host: remote-host))) + +;; hed tal reg reruns reglen regfull test-record runname test-name item-path jobgroup max-concurrent-jobs run-id waitons item-path testmode test-patts required-tests test-registry registry-mutex flags keyvals run-info newtal all-tests-registry itemmaps) +(define (runs:process-expanded-tests runsdat testdat) + ;; unroll the contents of runsdat and testdat (due to ongoing refactoring). + (let* ((hed (runs:testdat-hed testdat)) + (tal (runs:testdat-tal testdat)) + (reg (runs:testdat-reg testdat)) + (reruns (runs:testdat-reruns testdat)) + (test-name (runs:testdat-test-name testdat)) + (item-path (runs:testdat-item-path testdat)) + (jobgroup (runs:testdat-jobgroup testdat)) + (waitons (runs:testdat-waitons testdat)) + (item-path (runs:testdat-item-path testdat)) + (testmode (runs:testdat-testmode testdat)) + (newtal (runs:testdat-newtal testdat)) + (itemmaps (runs:testdat-itemmaps testdat)) + (test-record (runs:testdat-test-record testdat)) + (prereqs-not-met (runs:testdat-prereqs-not-met testdat)) + + (reglen (runs:dat-reglen runsdat)) + (regfull (runs:dat-regfull runsdat)) + (runname (runs:dat-runname runsdat)) + (max-concurrent-jobs (runs:dat-max-concurrent-jobs runsdat)) + (run-id (runs:dat-run-id runsdat)) + (test-patts (runs:dat-test-patts runsdat)) + (required-tests (runs:dat-required-tests runsdat)) + (test-registry (runs:dat-test-registry runsdat)) + (registry-mutex (runs:dat-registry-mutex runsdat)) + (flags (runs:dat-flags runsdat)) + (keyvals (runs:dat-keyvals runsdat)) + (run-info (runs:dat-run-info runsdat)) + (all-tests-registry (runs:dat-all-tests-registry runsdat)) + (run-limits-info (runs:dat-can-run-more-tests runsdat)) + ;; (runs:can-run-more-tests run-id jobgroup max-concurrent-jobs)) ;; look at the test jobgroup and tot jobs running + (have-resources (car run-limits-info)) + (num-running (list-ref run-limits-info 1)) + (num-running-in-jobgroup(list-ref run-limits-info 2)) + (max-concurrent-jobs (list-ref run-limits-info 3)) + (job-group-limit (list-ref run-limits-info 4)) + ;; (prereqs-not-met (rmt:get-prereqs-not-met run-id waitons hed item-path mode: testmode itemmaps: itemmaps)) + ;; (prereqs-not-met (mt:lazy-get-prereqs-not-met run-id waitons item-path mode: testmode itemmap: itemmap)) + (fails (if (list? prereqs-not-met) + (runs:calc-fails prereqs-not-met) + (begin + (debug:print-error 0 *default-log-port* "prereqs-not-met is not a list! " prereqs-not-met) + '()))) + (non-completed (filter (lambda (x) ;; remove hed from not completed list, duh, of course it is not completed! + (not (equal? x hed))) + (runs:calc-not-completed prereqs-not-met))) + (loop-list (list hed tal reg reruns)) + ;; configure the load runner + (numcpus (common:get-num-cpus #f)) + (maxload (string->number (or (configf:lookup *configdat* "jobtools" "maxload") "3.0"))) ;; use a non-number string to disable + (maxhomehostload (string->number (or (configf:lookup *configdat* "jobtools" "maxhomehostload") "1.2"))) ;; use a non-number string to disable + (waitdelay (string->number (or (configf:lookup *configdat* "jobtools" "waitdelay") "60")))) + (debug:print-info 4 *default-log-port* "have-resources: " have-resources " prereqs-not-met: (" + (string-intersperse + (map (lambda (t) + (if (vector? t) + (conc (db:test-get-state t) "/" (db:test-get-status t)) + (conc " WARNING: t is not a vector=" t ))) + prereqs-not-met) + ", ") ") fails: " fails + "\nregistered? " (hash-table-ref/default test-registry (db:test-make-full-name test-name item-path) #f)) + + + + (if (and (not (null? prereqs-not-met)) + (runs:lownoise (conc "waiting on tests " prereqs-not-met hed) 60)) + (debug:print-info 2 *default-log-port* "waiting on tests; " (string-intersperse (runs:mixed-list-testname-and-testrec->list-of-strings prereqs-not-met) ", "))) + + ;; Don't know at this time if the test have been launched at some time in the past + ;; i.e. is this a re-launch? + (debug:print-info 4 *default-log-port* "run-limits-info = " run-limits-info) + + (cond + + ;; Check item path against item-patts, + ;; + ((not (tests:match test-patts (tests:testqueue-get-testname test-record) item-path required: required-tests)) ;; This test/itempath is not to be run + ;; else the run is stuck, temporarily or permanently + ;; but should check if it is due to lack of resources vs. prerequisites + (debug:print-info 1 *default-log-port* "Skipping " (tests:testqueue-get-testname test-record) " " item-path " as it doesn't match " test-patts) + (if (or (not (null? tal))(not (null? reg))) + (list (runs:queue-next-hed tal reg reglen regfull) + (runs:queue-next-tal tal reg reglen regfull) + (runs:queue-next-reg tal reg reglen regfull) + reruns) + #f)) + + ;; Register tests + ;; + ((not (hash-table-ref/default test-registry (db:test-make-full-name test-name item-path) #f)) + (debug:print-info 4 *default-log-port* "Pre-registering test " test-name "/" item-path " to create placeholder" ) + ;; always do firm registration now in v1.60 and greater ;; (eq? *transport-type* 'fs) ;; no point in parallel registration if use fs + (let register-loop ((numtries 15)) + (rmt:register-test run-id test-name item-path) + (if (rmt:get-test-id run-id test-name item-path) + (hash-table-set! test-registry (db:test-make-full-name test-name item-path) 'done) + (if (> numtries 0) + (begin + (thread-sleep! 0.5) + (register-loop (- numtries 1))) + (debug:print-error 0 *default-log-port* "failed to register test " (db:test-make-full-name test-name item-path))))) + (if (not (eq? (hash-table-ref/default test-registry (db:test-make-full-name test-name "") #f) 'done)) + (begin + (rmt:register-test run-id test-name "") + (if (rmt:get-test-id run-id test-name "") + (hash-table-set! test-registry (db:test-make-full-name test-name "") 'done)))) + (runs:shrink-can-run-more-tests-count runsdat) ;; DELAY TWEAKER (still needed?) + (if (and (null? tal)(null? reg)) + (list hed tal (append reg (list hed)) reruns) + (list (runs:queue-next-hed tal reg reglen regfull) + (runs:queue-next-tal tal reg reglen regfull) + ;; NB// Here we are building reg as we register tests + ;; if regfull we must pop the front item off reg + (if regfull + (append (cdr reg) (list hed)) + (append reg (list hed))) + reruns))) + + ;; At this point hed test registration must be completed. + ;; + ((eq? (hash-table-ref/default test-registry (db:test-make-full-name test-name item-path) #f) + 'start) + (debug:print-info 0 *default-log-port* "Waiting on test registration(s): " + (string-intersperse + (filter (lambda (x) + (eq? (hash-table-ref/default test-registry x #f) 'start)) + (hash-table-keys test-registry)) + ", ")) + (thread-sleep! 0.051) + (list hed tal reg reruns)) + + ;; If no resources are available just kill time and loop again + ;; + ((not have-resources) ;; simply try again after waiting a second + (if (runs:lownoise "no resources" 60) + (debug:print-info 1 *default-log-port* "no resources to run new tests, waiting ...")) + ;; Have gone back and forth on this but db starvation is an issue. + ;; wait one second before looking again to run jobs. + (thread-sleep! 1) + ;; could have done hed tal here but doing car/cdr of newtal to rotate tests + (list (car newtal)(cdr newtal) reg reruns)) + + ;; This is the final stage, everything is in place so launch the test + ;; + ((and have-resources + (or (null? prereqs-not-met) + (and (member 'toplevel testmode) ;; 'toplevel) + (null? non-completed) + (not (member 'exclusive testmode))))) + ;; (hash-table-delete! *max-tries-hash* (db:test-make-full-name test-name item-path)) + ;; we are going to reset all the counters for test retries by setting a new hash table + ;; this means they will increment only when nothing can be run + (set! *max-tries-hash* (make-hash-table)) + ;; well, first lets see if cpu load throttling is enabled. If so wait around until the + ;; average cpu load is under the threshold before continuing + (if maxload ;; only gate if maxload is specified + (common:wait-for-cpuload maxload numcpus waitdelay)) + (if maxhomehostload + (common:wait-for-homehost-load maxhomehostload (conc "Waiting for homehost load to drop below normalized value of " maxhomehostload))) + + (run:test run-id run-info keyvals runname test-record flags #f test-registry all-tests-registry) + (runs:incremental-print-results run-id) + (hash-table-set! test-registry (db:test-make-full-name test-name item-path) 'running) + (runs:shrink-can-run-more-tests-count runsdat) ;; DELAY TWEAKER (still needed?) + ;; (thread-sleep! *global-delta*) + (if (or (not (null? tal))(not (null? reg))) + (list (runs:queue-next-hed tal reg reglen regfull) + (runs:queue-next-tal tal reg reglen regfull) + (runs:queue-next-reg tal reg reglen regfull) + reruns) + #f)) + + ;; must be we have unmet prerequisites + ;; + (else + (debug:print 4 *default-log-port* "FAILS: " fails) + ;; If one or more of the prereqs-not-met are FAIL then we can issue + ;; a message and drop hed from the items to be processed. + ;; (runs:mixed-list-testname-and-testrec->list-of-strings prereqs-not-met) + (if (and (not (null? prereqs-not-met)) + (runs:lownoise (conc "waiting on tests " prereqs-not-met hed) 60)) + (debug:print-info 1 *default-log-port* "waiting on tests; " (string-intersperse + (runs:mixed-list-testname-and-testrec->list-of-strings + prereqs-not-met) ", "))) + (if (or (null? fails) + (member 'toplevel testmode)) + (begin + ;; couldn't run, take a breather + (if (runs:lownoise "Waiting for more work to do..." 60) + (debug:print-info 0 *default-log-port* "Waiting for more work to do...")) + (thread-sleep! 1) + (list (car newtal)(cdr newtal) reg reruns)) + ;; the waiton is FAIL so no point in trying to run hed ever again + (if (or (not (null? reg))(not (null? tal))) + (if (vector? hed) + (begin + (debug:print 1 *default-log-port* "WARNING: Dropping test " test-name "/" item-path + " from the launch list as it has prerequistes that are FAIL") + (let ((test-id (rmt:get-test-id run-id hed ""))) + (if test-id (mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "PREQ_FAIL" "Failed to run due to failed prerequisites"))) + (runs:shrink-can-run-more-tests-count runsdat) ;; DELAY TWEAKER (still needed?) + ;; (thread-sleep! *global-delta*) + ;; This next is for the items + (mt:test-set-state-status-by-testname run-id test-name item-path "NOT_STARTED" "BLOCKED" #f) + (hash-table-set! test-registry (db:test-make-full-name test-name item-path) 'removed) + (list (runs:queue-next-hed tal reg reglen regfull) + (runs:queue-next-tal tal reg reglen regfull) + (runs:queue-next-reg tal reg reglen regfull) + reruns ;; WAS: (cons hed reruns) ;; but that makes no sense? + )) + (let ((nth-try (hash-table-ref/default test-registry hed 0))) + (cond + ((member "RUNNING" (map db:test-get-state prereqs-not-met)) + (if (runs:lownoise (conc "possible RUNNING prerequistes " hed) 60) + (debug:print 0 *default-log-port* "WARNING: test " hed " has possible RUNNING prerequisites, don't give up on it yet.")) + (thread-sleep! 4) + (list (runs:queue-next-hed newtal reg reglen regfull) + (runs:queue-next-tal newtal reg reglen regfull) + (runs:queue-next-reg newtal reg reglen regfull) + reruns)) + ((or (not nth-try) + (and (number? nth-try) + (< nth-try 10))) + (hash-table-set! test-registry hed (if (number? nth-try) + (+ nth-try 1) + 0)) + (if (runs:lownoise (conc "not removing test " hed) 60) + (debug:print 1 *default-log-port* "WARNING: not removing test " hed " from queue although it may not be runnable due to FAILED prerequisites")) + ;; may not have processed correctly. Could be a race condition in your test implementation? Dropping test " hed) ;; " as it has prerequistes that are FAIL. (NOTE: hed is not a vector)") + (runs:shrink-can-run-more-tests-count runsdat) ;; DELAY TWEAKER (still needed?) + ;; (list hed tal reg reruns) + ;; (list (car newtal)(cdr newtal) reg reruns) + ;; (hash-table-set! test-registry hed 'removed) + (list (runs:queue-next-hed newtal reg reglen regfull) + (runs:queue-next-tal newtal reg reglen regfull) + (runs:queue-next-reg newtal reg reglen regfull) + reruns)) + ((symbol? nth-try) + (if (eq? nth-try 'removed) ;; removed is removed - drop it NOW + (if (null? tal) + #f ;; yes, really + (list (car tal)(cdr tal) reg reruns)) + (begin + (if (runs:lownoise (conc "FAILED prerequisites or other issue" hed) 60) + (debug:print 0 *default-log-port* "WARNING: test " hed " has FAILED prerequisites or other issue. Internal state " nth-try " will be overridden and we'll retry.")) + (mt:test-set-state-status-by-testname run-id test-name item-path "NOT_STARTED" "KEEP_TRYING" #f) + (hash-table-set! test-registry hed 0) + (list (runs:queue-next-hed newtal reg reglen regfull) + (runs:queue-next-tal newtal reg reglen regfull) + (runs:queue-next-reg newtal reg reglen regfull) + reruns)))) + (else + (if (runs:lownoise (conc "FAILED prerequitests and we tried" hed) 60) + (debug:print 0 *default-log-port* "WARNING: test " hed " has FAILED prerequitests and we've tried at least 10 times to run it. Giving up now.")) + ;; (debug:print 0 *default-log-port* " prereqs: " prereqs-not-met) + (hash-table-set! test-registry hed 'removed) + (mt:test-set-state-status-by-testname run-id test-name item-path "NOT_STARTED" "TEN_STRIKES" #f) + ;; I'm unclear on if this roll up is needed - it may be the root cause of the "all set to FAIL" bug. + (rmt:set-state-status-and-roll-up-items run-id test-name item-path #f "FAIL" #f) ;; treat as FAIL + (list (if (null? tal)(car newtal)(car tal)) + tal + reg + reruns))))) + ;; can't drop this - maybe running? Just keep trying + (let ((runable-tests (runs:runable-tests prereqs-not-met))) + (if (null? runable-tests) + #f ;; I think we are truly done here + (list (runs:queue-next-hed newtal reg reglen regfull) + (runs:queue-next-tal newtal reg reglen regfull) + (runs:queue-next-reg newtal reg reglen regfull) + reruns))))))))) Index: env.scm ================================================================== --- env.scm +++ env.scm @@ -12,11 +12,11 @@ (declare (unit env)) (use sql-de-lite) ;; srfi-1 posix regex regex-case srfi-69 srfi-18 call-with-environment-variables) (define (env:open-db fname) - (let* ((db-exists (file-exists? fname)) + (let* ((db-exists (common:file-exists? fname)) (db (open-database fname))) (if (not db-exists) (begin (exec (sql db "CREATE TABLE envvars ( id INTEGER PRIMARY KEY, Index: ezsteps.scm ================================================================== --- ezsteps.scm +++ ezsteps.scm @@ -8,12 +8,11 @@ ;; implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR ;; PURPOSE. ;; strftime('%m/%d/%Y %H:%M:%S','now','localtime') -(use sqlite3 srfi-1 posix regex regex-case srfi-69 dot-locking directory-utils) -(import (prefix sqlite3 sqlite3:)) +(use srfi-1 posix regex srfi-69 directory-utils) (declare (unit ezsteps)) (declare (uses db)) (declare (uses common)) (declare (uses items)) @@ -37,19 +36,19 @@ (test-id (db:test-get-id testdat)) (run-id (db:test-get-run_id testdat)) (test-name (db:test-get-testname testdat)) (kill-job #f)) ;; for future use (on re-factoring with launch.scm code (let loop ((count 5)) - (if (file-exists? test-run-dir) + (if (common:file-exists? test-run-dir) (push-directory test-run-dir) (if (> count 0) (begin (debug:print 0 *default-log-port* "WARNING: ezsteps attempting to run but test run directory " test-run-dir " is not there. Waiting and trying again " count " more times") (sleep 3) (loop (- count 1)))))) (debug:print-info 0 *default-log-port* "Running in directory " test-run-dir) - (if (not (file-exists? ".ezsteps"))(create-directory ".ezsteps")) + (if (not (common:file-exists? ".ezsteps"))(create-directory ".ezsteps")) ;; if ezsteps was defined then we are sure to have at least one step but check anyway (if (not (> (length ezstepslst) 0)) (message-window "ERROR: You can only re-run steps defined via ezsteps") (begin (let loop ((ezstep (car ezstepslst)) @@ -75,11 +74,11 @@ (loop (car tal)(cdr tal) stepname #f)))) (debug:print 4 *default-log-port* "ezsteps:\n stepname: " stepname " stepinfo: " stepinfo " stepparts: " stepparts " stepparms: " stepparms " stepcmd: " stepcmd) - (if (file-exists? (conc stepname ".logpro"))(set! logpro-used #t)) + (if (common:file-exists? (conc stepname ".logpro"))(set! logpro-used #t)) ;; call the command using mt_ezstep (set! script (conc "mt_ezstep " stepname " " (if prevstep prevstep "-") " " stepcmd)) (debug:print 4 *default-log-port* "script: " script) ADDED file-tail.scm Index: file-tail.scm ================================================================== --- /dev/null +++ file-tail.scm @@ -0,0 +1,76 @@ + +(use (prefix sqlite3 sqlite3:) posix typed-records) + +(define (open-tail-db ) + (let* ((basedir (create-directory (conc "/tmp/" (current-user-name)))) + (dbpath (conc basedir "/megatest_logs.db")) + (dbexists (common:file-exists? dbpath)) + (db (sqlite3:open-database dbpath)) + (handler (sqlite3:make-busy-timeout 136000))) + (sqlite3:set-busy-handler! db handler) + (sqlite3:execute db "PRAGMA synchronous = 0;") + (if (not dbexists) + (begin + (sqlite3:execute db "CREATE TABLE IF NOT EXISTS log_files (id INTEGER PRIMARY KEY,filename TEXT,event_time TIMESTAMP DEFAULT (strftime('%s','now')));") + (sqlite3:execute db "CREATE TABLE IF NOT EXISTS log_data (id INTEGER PRIMARY KEY,fid INTEGER,line TEXT,event_time TIMESTAMP DEFAULT (strftime('%s','now')));") + )) + db)) + +(define (tail-write db fid lines) + (sqlite3:with-transaction + db + (lambda () + (for-each + (lambda (line) + (sqlite3:execute db "INSERT INTO log_data (fid,line) VALUES (?,?);" fid line)) + lines)))) + +(define (tail-get-fid db fname) + (let ((fid (handle-exceptions + exn + #f + (sqlite3:first-result db "SELECT id FROM log_files WHERE filename=?;" fname)))) + (if fid + fid + (begin + (sqlite3:execute db "INSERT INTO log_files (filename) VALUES (?);" fname) + (tail-get-fid db fname))))) + +(define (file-tail fname #!key (db-in #f)) + (let* ((inp (open-input-file fname)) + (db (or db-in (open-tail-db))) + (fid (tail-get-fid db fname))) + (let loop ((inl (read-line inp)) + (lines '()) + (lastwr (current-seconds))) + (if (eof-object? inl) + (let ((timed-out (> (- (current-seconds) lastwr) 60))) + (if timed-out (tail-write db fid (reverse lines))) + (sleep 1) + (if timed-out + (loop (read-line inp) '() (current-seconds)) + (loop (read-line inp) lines lastwr))) + (let* ((savelines (> (length lines) 19))) + ;; (print inl) + (if savelines (tail-write db fid (reverse lines))) + (loop (read-line inp) + (if savelines + '() + (cons inl lines)) + (if savelines + (current-seconds) + lastwr))))))) + +;; offset -20 means get last 20 lines +;; +(define (tail-get-lines db fid offset count) + (if (> offset 0) + (map-row (lambda (id line) + (vector id line)) + db + "SELECT id,line FROM log_data WHERE fid=? OFFSET ? LIMIT ?;" fid offset count) + (reverse ;; get N from the end + (map-row (lambda (id line) + (vector id line)) + db + "SELECT id,line FROM log_data WHERE fid=? ORDER BY id DESC LIMIT ?;" fid (abs offset))))) Index: filedb.scm ================================================================== --- filedb.scm +++ filedb.scm @@ -16,11 +16,11 @@ (include "fdb_records.scm") ;; (include "settings.scm") (define (filedb:open-db dbpath) (let* ((fdb (make-filedb:fdb)) - (dbexists (file-exists? dbpath)) + (dbexists (common:file-exists? dbpath)) (db (sqlite3:open-database dbpath))) (filedb:fdb-set-db! fdb db) (filedb:fdb-set-dbpath! fdb dbpath) (filedb:fdb-set-pathcache! fdb (make-hash-table)) (filedb:fdb-set-idcache! fdb (make-hash-table)) Index: genexample.scm ================================================================== --- genexample.scm +++ genexample.scm @@ -56,11 +56,11 @@ (begin (print "The path " path " does not exist or is not a directory. Attempting to create it now") (create-directory path #t))) ;; First check that the directory is empty! - (if (and (file-exists? path) + (if (and (common:file-exists? path) (not (null? (glob (conc path "/*"))))) (begin (print "WARNING: directory " path " is not empty, are you sure you want to continue?") (display "Enter y/n: ") (if (equal? "y" (read-line)) @@ -210,22 +210,22 @@ (scripts '()) (items '()) (rel-path #f)) (cond - ((file-exists? "megatest.config") (set! rel-path "./")) - ((file-exists? "../megatest.config") (set! rel-path "../")) - ((file-exists? "../../megatest.config") (set! rel-path "../../")) - ((file-exists? "../../../megatest.config")(set! rel-path "../../../"))) ;; good enough dang it. + ((common:file-exists? "megatest.config") (set! rel-path "./")) + ((common:file-exists? "../megatest.config") (set! rel-path "../")) + ((common:file-exists? "../../megatest.config") (set! rel-path "../../")) + ((common:file-exists? "../../../megatest.config")(set! rel-path "../../../"))) ;; good enough dang it. ;; Don't gather data or continue if a) megatest.config can't be found or b) testconfig already exists (if (not rel-path) (begin (print "ERROR: I could not find megatest.config, please run -create-test in the top dir of your megatest area") (exit 1))) - (if (file-exists? (conc rel-path "tests/" testname "/testconfig")) + (if (common:file-exists? (conc rel-path "tests/" testname "/testconfig")) (begin (print "WARNING: You already have a testconfig in " rel-path "tests/" testname ", do you want to clobber your files?") (display "Enter y/n: ") (if (not (equal? "y" (read-line))) (begin Index: gutils.scm ================================================================== --- gutils.scm +++ gutils.scm @@ -20,24 +20,35 @@ (let* ((c1 (map string->number (string-split color1))) (c2 (map string->number (string-split color2))) (delta (map (lambda (a b)(abs (- a b))) c1 c2))) (null? (filter (lambda (x)(> x 3)) delta)))) +(define gutils:colors + '((PASS . "70 249 73") + (FAIL . "253 33 49") + (SKIP . "230 230 0"))) + +(define (gutils:get-color-spec effective-state) + (or (alist-ref effective-state gutils:colors) + (alist-ref 'FAIL gutils:colors))) + +;; BBnote - state status dashboard button color / text defined here (define (gutils:get-color-for-state-status state status);; #!key (get-label #f)) ;; ((if get-label cadr car) (case (string->symbol state) ((COMPLETED) ;; ARCHIVED) (case (string->symbol status) ((PASS) (list "70 249 73" status)) ((WARN WAIVED) (list "255 172 13" status)) - ((SKIP) (list "230 230 0" status)) + ((SKIP) (list (gutils:get-color-spec 'SKIP) status)) + ((ABORT) (list "198 36 166" status)) (else (list "253 33 49" status)))) ((ARCHIVED) (case (string->symbol status) ((PASS) (list "70 170 73" status)) ((WARN WAIVED) (list "200 130 13" status)) - ((SKIP) (list "180 180 0" status)) + ((SKIP) (list (gutils:get-color-spec 'SKIP) status)) (else (list "180 33 49" status)))) ;; (if (equal? status "PASS") ;; '("70 249 73" "PASS") ;; (if (or (equal? status "WARN") ;; (equal? status "WAIVED")) @@ -44,14 +55,16 @@ ;; (list "255 172 13" status) ;; (list "223 33 49" status)))) ;; greenish orangeish redish ((LAUNCHED) (list "101 123 142" state)) ((CHECK) (list "255 100 50" state)) ((REMOTEHOSTSTART) (list "50 130 195" state)) - ((RUNNING) (list "9 131 232" state)) + ((RUNNING STARTED) (list "9 131 232" state)) ((KILLREQ) (list "39 82 206" state)) ((KILLED) (list "234 101 17" state)) - ((NOT_STARTED) (list "240 240 240" state)) + ((NOT_STARTED) (case (string->symbol status) + ((CHECK STARTED)(list (gutils:get-color-spec 'SKIP) state)) + (else (list "240 240 240" state)))) ;; for xor mode below ;; ((CLEAN) (case (string->symbol status) ((CLEAN-FAIL CLEAN-CHECK CLEAN-ABORT) (list "200 130 13" status)) ;; orange requested for these Index: http-transport.scm ================================================================== --- http-transport.scm +++ http-transport.scm @@ -8,12 +8,11 @@ ;; implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR ;; PURPOSE. (require-extension (srfi 18) extras tcp s11n) -(use srfi-1 posix regex regex-case srfi-69 hostinfo md5 message-digest posix-extras) ;; sqlite3 -;; (import (prefix sqlite3 sqlite3:)) +(use srfi-1 posix regex regex-case srfi-69 hostinfo md5 message-digest posix-extras) (use spiffy uri-common intarweb http-client spiffy-request-vars intarweb spiffy-directory-listing) ;; Configurations for server (tcp-buffer-size 2048) @@ -109,21 +108,24 @@ (http-transport:try-start-server ipaddrstr start-port))) ;; This is recursively run by http-transport:run until sucessful ;; (define (http-transport:try-start-server ipaddrstr portnum) - (let ((config-hostname (configf:lookup *configdat* "server" "hostname"))) + (let ((config-hostname (configf:lookup *configdat* "server" "hostname")) + (config-use-proxy (equal? (configf:lookup *configdat* "client" "use-http_proxy") "yes"))) + (if (not config-use-proxy) + (determine-proxy (constantly #f))) (debug:print-info 0 *default-log-port* "http-transport:try-start-server time=" (seconds->time-string (current-seconds)) " ipaddrsstr=" ipaddrstr " portnum=" portnum " config-hostname=" config-hostname) (handle-exceptions exn (begin (print-error-message exn) (if (< portnum 64000) (begin (debug:print 0 *default-log-port* "WARNING: attempt to start server failed. Trying again ...") (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) - (debug:print 0 *default-log-port* "exn=" (condition->list exn)) + (debug:print 5 *default-log-port* "exn=" (condition->list exn)) (portlogger:open-run-close portlogger:set-failed portnum) (debug:print 0 *default-log-port* "WARNING: failed to start on portnum: " portnum ", trying next port") (thread-sleep! 0.1) ;; get_next_port goes here @@ -292,12 +294,17 @@ (server-dat (if runremote (remote-conndat runremote) #f))) ;; (hash-table-ref/default *runremote* run-id #f))) (if (vector? server-dat) (let ((api-dat (http-transport:server-dat-get-api-uri server-dat))) - (close-connection! api-dat) - #t) + (handle-exceptions + exn + (begin + (print-call-chain *default-log-port*) + (debug:print-error 0 *default-log-port* " closing connection failed with error: " ((condition-property-accessor 'exn 'message) exn))) + (close-connection! api-dat) + #t)) #f))) (define (make-http-transport:server-dat)(make-vector 6)) (define (http-transport:server-dat-get-iface vec) (vector-ref vec 0)) @@ -380,11 +387,11 @@ (start-time (current-milliseconds))) ;; Use this opportunity to sync the tmp db to megatest.db (if (not server-going) ;; *dbstruct-db* (begin (debug:print 0 *default-log-port* "SERVER: dbprep") - (set! *dbstruct-db* (db:setup)) ;; run-id)) + (set! *dbstruct-db* (db:setup #t)) ;; run-id)) (set! server-going #t) (debug:print 0 *default-log-port* "SERVER: running, megatest version: " (common:get-full-version)) ;; NOTE: the server is NOT yet marked as running in the log. We do that in the keep-running routine. (thread-start! *watchdog*))) ;; when things go wrong we don't want to be doing the various queries too often @@ -424,11 +431,11 @@ (if (common:low-noise-print 60 "dbstats") (begin (debug:print 0 *default-log-port* "Server stats:") (db:print-current-query-stats))) (let* ((hrs-since-start (/ (- (current-seconds) server-start-time) 3600)) - (adjusted-timeout (if (> hrs-since-start 1) + (adjusted-timeout (if (> hrs-since-start 1) ;; never used! (- server-timeout (inexact->exact (round (* hrs-since-start 60)))) ;; subtract 60 seconds per hour server-timeout))) (if (common:low-noise-print 120 "server timeout") (debug:print-info 0 *default-log-port* "Adjusted server timeout: " adjusted-timeout)) (cond @@ -440,11 +447,12 @@ (debug:print-info 0 *default-log-port* "Server continuing, seconds since last db access: " (- (current-seconds) last-access)) (let ((curr-time (current-seconds))) (handle-exceptions exn (debug:print 0 *default-log-port* "ERROR: Failed to change timestamp on log file " server-log-file ". Are you out of space on that disk?") - (change-file-times server-log-file curr-time curr-time)))) + (if (not *server-overloaded*) + (change-file-times server-log-file curr-time curr-time))))) (loop 0 server-state bad-sync-count (current-milliseconds))) (else (debug:print-info 0 *default-log-port* "Server timed out. seconds since last db access: " (- (current-seconds) last-access)) (http-transport:server-shutdown port))))))) @@ -483,17 +491,16 @@ ;; all routes though here end in exit ... ;; ;; start_server? ;; (define (http-transport:launch) - ;; (if (args:get-arg "-daemonize") - ;; (begin - ;; (daemon:ize) - ;; (if *alt-log-file* ;; we should re-connect to this port, I think daemon:ize disrupts it - ;; (begin - ;; (current-error-port *alt-log-file*) - ;; (current-output-port *alt-log-file*))))) + ;; lets not even bother to start if there are already three or more server files ready to go + (let* ((num-alive (server:get-num-alive (server:get-list *toppath*)))) + (if (> num-alive 3) + (begin + (debug:print 0 *default-log-port* "ERROR: Aborting server start because there are already " num-alive " possible servers either running or starting up") + (exit)))) (let* ((th2 (make-thread (lambda () (debug:print-info 0 *default-log-port* "Server run thread started") (http-transport:run (if (args:get-arg "-server") (args:get-arg "-server") Index: items.scm ================================================================== --- items.scm +++ items.scm @@ -132,12 +132,12 @@ (set! itemstable (map (lambda (item) (if (procedure? (cadr item)) (list (car item)((cadr item))) ;; evaluate the proc item)) itemstable)) - (if (and have-items (null? items)) (debug:print-error 0 *default-log-port* "[items] section in testconfig but no entries defined")) - (if (and have-itable (null? itemstable))(debug:print-error 0 *default-log-port* "[itemstable] section in testconfig but no entries defined")) + (if (and have-items (null? items)) (debug:print 0 *default-log-port* "WARNING:[items] section in testconfig but no entries defined")) + (if (and have-itable (null? itemstable))(debug:print 0 *default-log-port* "WARNNG:[itemstable] section in testconfig but no entries defined")) (if (or (not (null? items))(not (null? itemstable))) (append (item-assoc->item-list items) (item-table->item-list itemstable)) '(())))) Index: key_records.scm ================================================================== --- key_records.scm +++ key_records.scm @@ -10,13 +10,13 @@ ;;====================================================================== (define-inline (keys->valslots keys) ;; => ?,?,? .... (string-intersperse (map (lambda (x) "?") keys) ",")) -(define-inline (keys->key/field keys . additional) - (string-join (map (lambda (k)(conc k " TEXT")) - (append keys additional)) ",")) +;; (define-inline (keys->key/field keys . additional) +;; (string-join (map (lambda (k)(conc k " TEXT")) +;; (append keys additional)) ",")) (define-inline (item-list->path itemdat) (if (list? itemdat) (string-intersperse (map cadr itemdat) "/") "")) Index: keys.scm ================================================================== --- keys.scm +++ keys.scm @@ -64,9 +64,13 @@ ;;====================================================================== ;; config file related routines ;;====================================================================== -(define (keys:config-get-fields confdat) - (let ((fields (hash-table-ref/default confdat "fields" '()))) - (map car fields))) +(define keys:config-get-fields common:get-fields) +(define (keys:make-key/field-string confdat) + (let ((fields (configf:get-section confdat "fields"))) + (string-join + (map (lambda (field)(conc (car field) " " (cadr field))) + fields) + ","))) Index: launch.scm ================================================================== --- launch.scm +++ launch.scm @@ -21,13 +21,10 @@ (declare (unit launch)) (declare (uses common)) (declare (uses configf)) (declare (uses db)) -;; (declare (uses sdb)) -(declare (uses tdb)) -;; (declare (uses filedb)) (include "common_records.scm") (include "key_records.scm") (include "db_records.scm") @@ -61,11 +58,11 @@ ;; return (conc status ": " comment) from the final section so that ;; the comment can be set in the step record in launch.scm ;; (define (launch:load-logpro-dat run-id test-id stepname) (let ((cname (conc stepname ".dat"))) - (if (file-exists? cname) + (if (common:file-exists? cname) (let* ((dat (read-config cname #f #f)) (csvr (db:logpro-dat->csv dat stepname)) (csvt (let-values (((fmt-cell fmt-record fmt-csv) (make-format ","))) (fmt-csv (map list->csv-record csvr)))) (status (configf:lookup dat "final" "exit-status")) @@ -90,11 +87,11 @@ (script "") ; "#!/bin/bash\n") ;; yep, we depend on bin/bash FIXME!!!\ (logpro-file (conc stepname ".logpro")) (html-file (conc stepname ".html")) (dat-file (conc stepname ".dat")) (tconfig-logpro (configf:lookup testconfig "logpro" stepname)) - (logpro-used (file-exists? logpro-file))) + (logpro-used (common:file-exists? logpro-file))) (if (and tconfig-logpro (not logpro-used)) ;; no logpro file found but have a defn in the testconfig (begin (with-output-to-file logpro-file @@ -109,11 +106,11 @@ " stepparms: " stepparms " stepcmd: " stepcmd) ;; ;; first source the previous environment ;; (let ((prev-env (conc ".ezsteps/" prevstep (if (string-search (regexp "csh") ;; (get-environment-variable "SHELL")) ".csh" ".sh")))) - ;; (if (and prevstep (file-exists? prev-env)) + ;; (if (and prevstep (common:file-exists? prev-env)) ;; (set! script (conc script "source " prev-env)))) ;; call the command using mt_ezstep ;; (set! script (conc "mt_ezstep " stepname " " (if prevstep prevstep "x") " " stepcmd)) @@ -128,11 +125,11 @@ (with-output-to-file "Makefile.ezsteps" (lambda () (print stepname ".log :") (print "\t" cmd) - (if (file-exists? (conc stepname ".logpro")) + (if (common:file-exists? (conc stepname ".logpro")) (print "\tlogpro " stepname ".logpro " stepname ".html < " stepname ".log")) (print) (print stepname " : " stepname ".log") (print)) #:append) @@ -172,11 +169,11 @@ (logfna (if logpro-used (conc stepname ".html") "")) (comment #f)) (if logpro-used (let ((datfile (conc stepname ".dat"))) ;; load the .dat file into the test_data table if it exists - (if (file-exists? datfile) + (if (common:file-exists? datfile) (set! comment (launch:load-logpro-dat run-id test-id stepname))) (rmt:test-set-log! run-id test-id (conc stepname ".html")))) (rmt:teststep-set-status! run-id test-id stepname "end" exinfo comment logfna)) ;; set the test final status (let* ((process-exit-status (launch:einf-exit-code exit-info)) ;; (vector-ref exit-info 2)) @@ -296,11 +293,11 @@ ;; after all that, still no testconfig? Time to abort (if (not testconfig) (begin (debug:print-error 0 *default-log-port* "Failed to resolve megatest.config, runconfigs.config and testconfig issues. Giving up now") (exit 1))) - (if (not (file-exists? ".ezsteps"))(create-directory ".ezsteps")) + (if (not (common:file-exists? ".ezsteps"))(create-directory ".ezsteps")) ;; if ezsteps was defined then we are sure to have at least one step but check anyway (if (not (> (length ezstepslst) 0)) (debug:print-error 0 *default-log-port* "ezsteps defined but ezstepslst is zero length") (let loop ((ezstep (car ezstepslst)) (tal (cdr ezstepslst)) @@ -308,20 +305,21 @@ ;; check exit-info (vector-ref exit-info 1) (if (launch:einf-exit-status exit-info) ;; (vector-ref exit-info 1) (let ((logpro-used (launch:runstep ezstep run-id test-id exit-info m tal testconfig)) (stepname (car ezstep))) ;; if logpro-used read in the stepname.dat file - (if (and logpro-used (file-exists? (conc stepname ".dat"))) + (if (and logpro-used (common:file-exists? (conc stepname ".dat"))) (launch:load-logpro-dat run-id test-id stepname)) (if (steprun-good? logpro-used (launch:einf-exit-code exit-info)) (if (not (null? tal)) (loop (car tal) (cdr tal) stepname)) (debug:print 4 *default-log-port* "WARNING: step " (car ezstep) " failed. Stopping"))) (debug:print 4 *default-log-port* "WARNING: a prior step failed, stopping at " ezstep))))))) (define (launch:monitor-job run-id test-id item-path fullrunscript ezsteps test-name tconfigreg exit-info m work-area runtlim misc-flags) - (let* ((start-seconds (current-seconds)) + (let* ((update-period (string->number (or (configf:lookup *configdat* "setup" "test-stats-update-period") "30"))) + (start-seconds (current-seconds)) (calc-minutes (lambda () (inexact->exact (round (- (current-seconds) @@ -330,30 +328,38 @@ ;; (tests:set-full-meta-info #f test-id run-id (calc-minutes) work-area) ;; (tests:set-full-meta-info test-id run-id (calc-minutes) work-area) (tests:set-full-meta-info #f test-id run-id (calc-minutes) work-area 10) (let loop ((minutes (calc-minutes)) (cpu-load (alist-ref 'adj-core-load (common:get-normalized-cpu-load #f))) - (disk-free (get-df (current-directory)))) - (let ((new-cpu-load (let* ((load (alist-ref 'adj-core-load (common:get-normalized-cpu-load #f))) - (delta (abs (- load cpu-load)))) - (if (> delta 0.1) ;; don't bother updating with small changes - load - #f))) - (new-disk-free (let* ((df (get-df (current-directory))) - (delta (abs (- df disk-free)))) - (if (> delta 200) ;; ignore changes under 200 Meg - df - #f)))) + (disk-free (get-df (current-directory))) + (last-sync (current-seconds))) + (let* ((over-time (> (current-seconds) (+ last-sync update-period))) + (new-cpu-load (let* ((load (alist-ref 'adj-core-load (common:get-normalized-cpu-load #f))) + (delta (abs (- load cpu-load)))) + (if (> delta 0.1) ;; don't bother updating with small changes + load + #f))) + (new-disk-free (let* ((df (if over-time ;; only get df every 30 seconds + (get-df (current-directory)) + disk-free)) + (delta (abs (- df disk-free)))) + (if (and (> df 0) + (> (/ delta df) 0.1)) ;; (> delta 200) ;; ignore changes under 200 Meg + df + #f))) + (do-sync (or new-cpu-load new-disk-free over-time))) + (debug:print 4 *default-log-port* "cpu: " new-cpu-load " disk: " new-disk-free " last-sync: " last-sync " do-sync: " do-sync) (set! kill-job? (or (test-get-kill-request run-id test-id) ;; run-id test-name itemdat)) (and runtlim (let* ((run-seconds (- (current-seconds) start-seconds)) (time-exceeded (> run-seconds runtlim))) (if time-exceeded (begin (debug:print-info 0 *default-log-port* "KILLING TEST DUE TO TIME LIMIT EXCEEDED! Runtime=" run-seconds " seconds, limit=" runtlim) #t) #f))))) - (tests:update-central-meta-info run-id test-id new-cpu-load new-disk-free (calc-minutes) #f #f) + (if do-sync + (tests:update-central-meta-info run-id test-id new-cpu-load new-disk-free (calc-minutes) #f #f)) (if kill-job? (begin (mutex-lock! m) ;; NOTE: The pid can change as different steps are run. Do we need handshaking between this ;; section and the runit section? Or add a loop that tries three times with a 1/4 second @@ -397,11 +403,14 @@ (exit))) (if (hash-table-ref/default misc-flags 'keep-going #f) (begin (thread-sleep! 3) ;; (+ 3 (random 6))) ;; add some jitter to the call home time to spread out the db accesses (if (hash-table-ref/default misc-flags 'keep-going #f) ;; keep originals for cpu-load and disk-free unless they change more than the allowed delta - (loop (calc-minutes) (or new-cpu-load cpu-load) (or new-disk-free disk-free))))))) + (loop (calc-minutes) + (or new-cpu-load cpu-load) + (or new-disk-free disk-free) + (if do-sync (current-seconds) last-sync))))))) (tests:update-central-meta-info run-id test-id (get-cpu-load) (get-df (current-directory))(calc-minutes) #f #f))) ;; NOTE: Checking twice for keep-going is intentional (define (launch:execute encoded-cmd) (let* ((cmdinfo (common:read-encoded-string encoded-cmd)) @@ -440,11 +449,11 @@ (fullrunscript (if (not runscript) #f (if (substring-index "/" runscript) runscript ;; use unadultered if contains slashes (let ((fulln (conc testpath "/" runscript))) - (if (and (file-exists? fulln) + (if (and (common:file-exists? fulln) (file-execute-access? fulln)) fulln runscript))))) ;; assume it is on the path ) ;; (rollup-status 0) @@ -505,11 +514,11 @@ (debug:print 0 *default-log-port* "Failed to create directory " logdir " expect problems, message: " ((condition-property-accessor 'exn 'message) exn)) (create-directory logdir #t))))) ;; NFS might not have propagated the directory meta data to the run host - give it time if needed (let loop ((count 0)) - (if (or (file-exists? top-path) + (if (or (common:file-exists? top-path) (> count 10)) (change-directory top-path) (begin (debug:print 0 *default-log-port* "INFO: Not starting job yet - directory " top-path " not found") (thread-sleep! 10) @@ -540,11 +549,15 @@ ;; Do not run the test if it is REMOVING, RUNNING, KILLREQ or REMOTEHOSTSTART, ;; Mark the test as REMOTEHOSTSTART *IMMEDIATELY* ;; (let* ((test-info (rmt:get-test-info-by-id run-id test-id)) - (test-host (db:test-get-host test-info)) + (test-host (if test-info + (db:test-get-host test-info) + (begin + (debug:print 0 *default-log-port* "ERROR: failed to find a record for test-id " test-id ", exiting.") + (exit)))) (test-pid (db:test-get-process_id test-info))) (cond ((member (db:test-get-state test-info) '("INCOMPLETE" "KILLED" "UNKNOWN" "KILLREQ" "STUCK")) ;; prior run of this test didn't complete, go ahead and try to rerun (debug:print 0 *default-log-port* "INFO: test is INCOMPLETE or KILLED, treat this execute call as a rerun request") ;; (tests:test-force-state-status! run-id test-id "REMOTEHOSTSTART" "n/a") @@ -566,11 +579,11 @@ (debug:print 2 *default-log-port* "Exectuing " test-name " (id: " test-id ") on " (get-host-name)) (set! keys (rmt:get-keys)) ;; (runs:set-megatest-env-vars run-id inkeys: keys inkeyvals: keyvals) ;; these may be needed by the launching process ;; one of these is defunct/redundant ... - (if (not (launch:setup force: #t)) + (if (not (launch:setup force-reread: #t)) (begin (debug:print 0 *default-log-port* "Failed to setup, exiting") ;; (sqlite3:finalize! db) ;; (sqlite3:finalize! tdb) (exit 1))) @@ -597,11 +610,11 @@ (list "default" target))) ;;(bb-check-path msg: "launch:execute post block 1") ;; NFS might not have propagated the directory meta data to the run host - give it time if needed (let loop ((count 0)) - (if (or (file-exists? work-area) + (if (or (common:file-exists? work-area) (> count 10)) (change-directory work-area) (begin (debug:print 0 *default-log-port* "INFO: Not starting job yet - directory " work-area " not found") (thread-sleep! 10) @@ -656,11 +669,14 @@ ;;(bb-check-path msg: "launch:execute post block 41") (runs:set-megatest-env-vars run-id inkeys: keys inkeyvals: keyvals) ;;(bb-check-path msg: "launch:execute post block 42") (set-item-env-vars itemdat) ;;(bb-check-path msg: "launch:execute post block 43") - (save-environment-as-files "megatest") + (let ((blacklist (configf:lookup *configdat* "setup" "blacklistvars"))) + (if blacklist + (save-environment-as-files "megatest" ignorevars: (string-split blacklist)) + (save-environment-as-files "megatest"))) ;;(bb-check-path msg: "launch:execute post block 44") ;; open-run-close not needed for test-set-meta-info ;; (tests:set-full-meta-info #f test-id run-id 0 work-area) ;; (tests:set-full-meta-info test-id run-id 0 work-area) (tests:set-full-meta-info #f test-id run-id 0 work-area 10) @@ -668,11 +684,11 @@ ;; (thread-sleep! 0.3) ;; NFS slowness has caused grief here (if (args:get-arg "-xterm") (set! fullrunscript "xterm") (if (and fullrunscript - (file-exists? fullrunscript) + (common:file-exists? fullrunscript) (not (file-execute-access? fullrunscript))) (system (conc "chmod ug+x " fullrunscript)))) ;; We are about to actually kick off the test ;; so this is a good place to remove the records for @@ -741,12 +757,15 @@ (rmt:update-run-stats run-id (rmt:get-raw-run-stats run-id))) (mutex-unlock! m) (debug:print 2 *default-log-port* "Output from running " fullrunscript ", pid " (launch:einf-pid exit-info) " in work area " work-area ":\n====\n exit code " (launch:einf-exit-code exit-info) "\n" "====\n") (if (not (launch:einf-exit-status exit-info)) - (exit 4))))))) + (exit 4)))) + ))) +;; DO NOT USE - caching of configs is handled in launch:setup now. +;; (define (launch:cache-config) ;; if we have a linktree and -runtests and -target and the directory exists dump the config ;; to megatest-(current-seconds).cfg and symlink it to megatest.cfg (if (and *configdat* (or (args:get-arg "-run") @@ -758,22 +777,22 @@ (args:get-arg ":runname") (getenv "MT_RUNNAME"))) (fulldir (conc linktree "/" target "/" runname))) - (if (and linktree (file-exists? linktree)) ;; can't proceed without linktree + (if (and linktree (common:file-exists? linktree)) ;; can't proceed without linktree (begin (debug:print-info 0 *default-log-port* "Have -run with target=" target ", runname=" runname ", fulldir=" fulldir ", testpatt=" (or (args:get-arg "-testpatt") "%")) - (if (not (file-exists? fulldir)) + (if (not (common:file-exists? fulldir)) (create-directory fulldir #t)) ;; need to protect with exception handler (if (and target runname - (file-exists? fulldir)) + (common:file-exists? fulldir)) (let ((tmpfile (conc fulldir "/.megatest.cfg." (current-seconds))) (targfile (conc fulldir "/.megatest.cfg-" megatest-version "-" megatest-fossil-hash)) (rconfig (conc fulldir "/.runconfig." megatest-version "-" megatest-fossil-hash))) - (if (file-exists? rconfig) ;; only cache megatest.config AFTER runconfigs has been cached + (if (common:file-exists? rconfig) ;; only cache megatest.config AFTER runconfigs has been cached (begin (debug:print-info 0 *default-log-port* "Caching megatest.config in " tmpfile) (if (not (common:in-running-test?)) (configf:write-alist *configdat* tmpfile)) (system (conc "ln -sf " tmpfile " " targfile)))) @@ -794,61 +813,92 @@ ;; side effects: ;; sets; *configdat* (megatest.config info) ;; *runconfigdat* (runconfigs.config info) ;; *configstatus* (status of the read data) ;; -(define (launch:setup #!key (force #f) (areapath #f)) +(define (launch:setup #!key (force-reread #f) (areapath #f)) (mutex-lock! *launch-setup-mutex*) (if (and *toppath* - (eq? *configstatus* 'fulldata)) ;; got it all + (eq? *configstatus* 'fulldata) (not force-reread)) ;; got it all (begin - (debug:print 0 *default-log-port* "NOTE: skipping launch:setup-body call since we have fulldata") + (debug:print 2 *default-log-port* "NOTE: skipping launch:setup-body call since we have fulldata") (mutex-unlock! *launch-setup-mutex*) *toppath*) - (let ((res (launch:setup-body force: force areapath: areapath))) + (let ((res (launch:setup-body force-reread: force-reread areapath: areapath))) (mutex-unlock! *launch-setup-mutex*) res))) + +;; return paths depending on what info is available. +;; +(define (launch:get-cache-file-paths areapath toppath target mtconfig) + (let* ((use-cache (common:use-cache?)) + (runname (common:args-get-runname)) + (linktree (common:get-linktree)) + (testname (common:get-full-test-name)) + (rundir (if (and runname target linktree) + (common:directory-writable? (conc linktree "/" target "/" runname)) + #f)) + (testdir (if (and rundir testname) + (common:directory-writable? (conc rundir "/" testname)) + #f)) + (cachedir (or testdir rundir)) + (mtcachef (and cachedir (conc cachedir "/" ".megatest.cfg-" megatest-version "-" megatest-fossil-hash))) + (rccachef (and cachedir (conc cachedir "/" ".runconfigs.cfg-" megatest-version "-" megatest-fossil-hash)))) + (debug:print-info 6 *default-log-port* + "runname=" runname + "\n linktree=" linktree + "\n testname=" testname + "\n rundir=" rundir + "\n testdir=" testdir + "\n cachedir=" cachedir + "\n mtcachef=" mtcachef + "\n rccachef=" rccachef) + (cons mtcachef rccachef))) (define (launch:setup-body #!key (force-reread #f) (areapath #f)) (if (and (eq? *configstatus* 'fulldata) *toppath* (not force-reread)) ;; no need to reprocess *toppath* ;; return toppath - (let* ((use-cache (common:use-cache?)) + (let* ((use-cache (common:use-cache?)) ;; BB- use-cache checks *configdat* for use-cache setting. We do not have *configdat*. Bootstrapping problem here. (toppath (or *toppath* areapath (getenv "MT_RUN_AREA_HOME"))) ;; preserve toppath - - (runname (common:args-get-runname)) (target (common:args-get-target)) - (linktree (common:get-linktree)) - (contour #f) ;; NOT READY FOR THIS (args:get-arg "-contour")) (sections (if target (list "default" target) #f)) ;; for runconfigs (mtconfig (or (args:get-arg "-config") "megatest.config")) ;; allow overriding megatest.config - (rundir (if (and runname target linktree) - (conc linktree (if contour (conc "/" contour) "") "/" target "/" runname) - #f)) - - (mtcachef (and rundir (conc rundir "/" ".megatest.cfg-" megatest-version "-" megatest-fossil-hash))) - (rccachef (and rundir (conc rundir "/" ".runconfigs.cfg-" megatest-version "-" megatest-fossil-hash))) - (cancreate (and rundir (common:file-exists? rundir)(file-write-access? rundir) (not (common:in-running-test?))))) - ;; (cxt (hash-table-ref/default *contexts* toppath #f))) - - ;; create our cxt for this area if it doesn't already exist - ;; (if (not cxt)(hash-table-set! *contexts* toppath (make-cxt))) - - ;; (print "runname: " runname " target: " target " mtcachef: " mtcachef " rccachef: " rccachef) + (cachefiles (launch:get-cache-file-paths areapath toppath target mtconfig)) + ;; checking for null cachefiles should not be necessary, I was seeing error car of '(), might be a chicken bug or a red herring ... + (mtcachef (if (null? cachefiles) + #f + (car cachefiles))) ;; (and cachedir (conc cachedir "/" ".megatest.cfg-" megatest-version "-" megatest-fossil-hash))) + (rccachef (if (null? cachefiles) + #f + (cdr cachefiles)))) ;; (and cachedir (conc cachedir "/" ".runconfigs.cfg-" megatest-version "-" megatest-fossil-hash))) + ;; (cancreate (and cachedir (common:file-exists? cachedir)(file-write-access? cachedir) (not (common:in-running-test?))))) (set! *toppath* toppath) ;; This is needed when we are running as a test using CMDINFO as a datasource + ;;(BB> "launch:setup-body -- cachefiles="cachefiles) (cond ;; if mtcachef exists just read it, however we need to assume toppath is available in $MT_RUN_AREA_HOME - ((and mtcachef (common:file-exists? mtcachef) (get-environment-variable "MT_RUN_AREA_HOME") use-cache) - (set! *configdat* (configf:read-alist mtcachef)) + ((and (not force-reread) + mtcachef rccachef + use-cache + (get-environment-variable "MT_RUN_AREA_HOME") + (common:file-exists? mtcachef) + (common:file-exists? rccachef)) + ;;(BB> "launch:setup-body -- cond branch 1 - use-cache") + (set! *configdat* (configf:read-alist mtcachef)) + ;;(BB> "launch:setup-body -- 1 set! *configdat*="*configdat*) (set! *runconfigdat* (configf:read-alist rccachef)) (set! *configinfo* (list *configdat* (get-environment-variable "MT_RUN_AREA_HOME"))) (set! *configstatus* 'fulldata) (set! *toppath* (get-environment-variable "MT_RUN_AREA_HOME")) *toppath*) + ;; there are no existing cached configs, do full reads of the configs and cache them ;; we have all the info needed to fully process runconfigs and megatest.config - (mtcachef + ((and ;; (not force-reread) ;; force-reread is irrelevant in the AND, could however OR it? + mtcachef + rccachef) ;; BB- why are we doing this without asking if caching is desired? + ;;(BB> "launch:setup-body -- cond branch 2") (let* ((first-pass (find-and-read-config ;; NB// sets MT_RUN_AREA_HOME as side effect mtconfig environ-patt: "env-override" given-toppath: toppath pathenvvar: "MT_RUN_AREA_HOME")) @@ -863,11 +913,13 @@ *runconfigdat* #t sections: sections)))) (set! *runconfigdat* first-rundat) (if first-pass ;; (begin + ;;(BB> "launch:setup-body -- \"first-pass\"=first-pass") (set! *configdat* (car first-pass)) + ;;(BB> "launch:setup-body -- 2 set! *configdat*="*configdat*) (set! *configinfo* first-pass) (set! *toppath* (or toppath (cadr first-pass))) ;; use the gathered data unless already have it (set! toppath *toppath*) (if (not *toppath*) (begin @@ -889,26 +941,33 @@ (runconfigdat (begin ;; this read of the runconfigs will see any adjustments made by re-reading megatest.config (for-each (lambda (kt) (setenv (car kt) (cadr kt))) key-vals) (read-config (conc toppath "/runconfigs.config") *runconfigdat* #t ;; consider using runconfig:read some day ... - sections: sections)))) - (if cancreate (configf:write-alist runconfigdat rccachef)) + sections: sections))) + (cachefiles (launch:get-cache-file-paths areapath toppath target mtconfig)) + (mtcachef (car cachefiles)) + (rccachef (cdr cachefiles))) + (if rccachef (configf:write-alist runconfigdat rccachef)) + (if mtcachef (configf:write-alist *configdat* mtcachef)) (set! *runconfigdat* runconfigdat) - (if cancreate (configf:write-alist *configdat* mtcachef)) - (if cancreate (set! *configstatus* 'fulldata)))) + (if (and rccachef mtcachef) (set! *configstatus* 'fulldata)))) ;; no configs found? should not happen but let's try to recover gracefully, return an empty hash-table (set! *configdat* (make-hash-table)) ))) + ;; else read what you can and set the flag accordingly + ;; here we don't have either mtconfig or rccachef (else + ;;(BB> "launch:setup-body -- cond branch 3 - else") (let* ((cfgdat (find-and-read-config (or (args:get-arg "-config") "megatest.config") environ-patt: "env-override" given-toppath: (get-environment-variable "MT_RUN_AREA_HOME") pathenvvar: "MT_RUN_AREA_HOME"))) - (if cfgdat + + (if (and cfgdat (list? cfgdat) (> (length cfgdat) 0) (hash-table? (car cfgdat))) (let* ((toppath (or (get-environment-variable "MT_RUN_AREA_HOME")(cadr cfgdat))) (rdat (read-config (conc toppath ;; convert this to use runconfig:read! "/runconfigs.config") *runconfigdat* #t sections: sections))) (set! *configinfo* cfgdat) (set! *configdat* (car cfgdat)) @@ -916,10 +975,12 @@ (set! *toppath* toppath) (set! *configstatus* 'partial)) (begin (debug:print-error 0 *default-log-port* "No " mtconfig " file found. Giving up.") (exit 2)))))) + ;; COND ends here. + ;; additional house keeping (let* ((linktree (common:get-linktree))) (if linktree (begin (if (not (common:file-exists? linktree)) @@ -935,11 +996,11 @@ exn (begin (debug:print-error 0 *default-log-port* "Something went wrong when trying to create link to linktree at " *toppath*) (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))) (let ((tlink (conc *toppath* "/lt"))) - (if (not (file-exists? tlink)) + (if (not (common:file-exists? tlink)) (create-symbolic-link linktree tlink))))) (begin (debug:print-error 0 *default-log-port* "linktree not defined in [setup] section of megatest.config") ))) (if (and *toppath* @@ -947,14 +1008,22 @@ (begin (setenv "MT_RUN_AREA_HOME" *toppath*) (setenv "MT_TESTSUITENAME" (common:get-testsuite-name))) (begin (debug:print-error 0 *default-log-port* "failed to find the top path to your Megatest area.") - ;;(exit 1) (set! *toppath* #f) ;; force it to be false so we return #f - #f - )) + #f)) + + ;; one more attempt to cache the configs for future reading + (let* ((cachefiles (launch:get-cache-file-paths areapath toppath target mtconfig)) + (mtcachef (car cachefiles)) + (rccachef (cdr cachefiles))) + (if (and rccachef *runconfigdat* (not (common:file-exists? rccachef))) (configf:write-alist *runconfigdat* rccachef)) + (if (and mtcachef *configdat* (not (common:file-exists? mtcachef))) (configf:write-alist *configdat* mtcachef)) + (if (and rccachef mtcachef *runconfigdat* *configdat*) + (set! *configstatus* 'fulldata))) + ;; if have -append-config then read and append here (let ((cfname (args:get-arg "-append-config"))) (if (and cfname (file-read-access? cfname)) (read-config cfname *configdat* #t))) ;; values are added to the hash, no need to do anything special. @@ -1065,11 +1134,11 @@ (begin (debug:print-error 0 *default-log-port* " Failed to remove symlink " lnkpath ((condition-property-accessor 'exn 'message) exn) ", exiting") (exit 1)) (delete-file lnkpath))) - (if (not (or (file-exists? lnkpath) + (if (not (or (common:file-exists? lnkpath) (symbolic-link? lnkpath))) (handle-exceptions exn (begin (debug:print-error 0 *default-log-port* " Failed to create symlink " lnkpath ((condition-property-accessor 'exn 'message) exn) ", exiting") @@ -1090,11 +1159,11 @@ (db:test-get-rundir testinfo) ;; ) ;; ) #f))) (hash-table-set! *toptest-paths* testname curr-test-path) ;; NB// Was this for the test or for the parent in an iterated test? (rmt:general-call 'test-set-rundir-shortdir run-id lnkpath - (if (file-exists? lnkpath) + (if (common:file-exists? lnkpath) ;; (resolve-pathname lnkpath) (common:nice-path lnkpath) lnkpath) testname "" run-id) ;; (rmt:general-call 'test-set-rundir run-id lnkpath testname "") ;; toptest-path) @@ -1129,11 +1198,11 @@ exn (begin (debug:print-error 0 *default-log-port* " Failed to re-create link " lnktarget ((condition-property-accessor 'exn 'message) exn) ", exiting") (exit)) (if (symbolic-link? lnktarget) (delete-file lnktarget)) - (if (not (file-exists? lnktarget)) (create-symbolic-link test-path lnktarget))))) + (if (not (common:file-exists? lnktarget)) (create-symbolic-link test-path lnktarget))))) (if (not (directory? test-path)) (create-directory test-path #t)) ;; this is a hack, I don't know why out of the blue this path does not exist sometimes (if (and test-src-path (directory? test-path)) @@ -1169,14 +1238,15 @@ (define (launch-test test-id run-id run-info keyvals runname test-conf test-name test-path itemdat params) (mutex-lock! *launch-setup-mutex*) ;; setting variables and processing the testconfig is NOT thread-safe, reuse the launch-setup mutex (let* ((item-path (item-list->path itemdat)) (contour #f)) ;; NOT READY FOR THIS (args:get-arg "-contour"))) (let loop ((delta (- (current-seconds) *last-launch*)) - (launch-delay (string->number (or (configf:lookup *configdat* "setup" "launch-delay") "5")))) + (launch-delay (configf:lookup-number *configdat* "setup" "launch-delay" default: 1))) (if (> launch-delay delta) (begin - (debug:print-info 0 *default-log-port* "Delaying launch of " test-name " for " (- launch-delay delta) " seconds") + (if (common:low-noise-print 1200 "test launch delay") ;; every two hours or so remind the user about launch delay. + (debug:print-info 0 *default-log-port* "NOTE: test launches are delayed by " launch-delay " seconds. See megatest.config launch-delay setting to adjust.")) ;; launch of " test-name " for " (- launch-delay delta) " seconds")) (thread-sleep! (- launch-delay delta)) (loop (- (current-seconds) *last-launch*) launch-delay)))) (change-directory *toppath*) (alist->env-vars ;; consolidate this code with the code in megatest.scm for "-execute", *maybe* - the longer they are set the longer each launch takes (must be non-overlapping with the vars) (append @@ -1295,11 +1365,11 @@ (list 'mt-bindir-path mt-bindir-path)))))))) ;; clean out step records from previous run if they exist ;; (rmt:delete-test-step-records run-id test-id) ;; if the dir does not exist we may have a itempath where individual variables are a path, launch anyway - (if (file-exists? work-area) + (if (common:file-exists? work-area) (change-directory work-area)) ;; so that log files from the launch process don't clutter the test dir (cond ;; ((and launcher hosts) ;; must be using ssh hostname ;; (set! fullcmd (append launcher (car hosts)(list remote-megatest "-m" test-sig "-execute" cmdparms) debug-param))) ;; (set! fullcmd (append launcher (car hosts)(list remote-megatest test-sig "-execute" cmdparms)))) Index: lock-queue.scm ================================================================== --- lock-queue.scm +++ lock-queue.scm @@ -5,12 +5,11 @@ ;; ;; This program is distributed WITHOUT ANY WARRANTY; without even the ;; implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR ;; PURPOSE. -(use sqlite3 srfi-18) -(import (prefix sqlite3 sqlite3:)) +(use (prefix sqlite3 sqlite3:) srfi-18) (declare (unit lock-queue)) (declare (uses common)) (declare (uses tasks)) @@ -33,11 +32,11 @@ (let ((fname (lock-queue:db-dat-get-path dbdat))) (system (conc "rm -f " fname "*")))) (define (lock-queue:open-db fname #!key (count 10)) (let* ((actualfname (conc fname ".lockdb")) - (dbexists (file-exists? actualfname)) + (dbexists (common:file-exists? actualfname)) (db (sqlite3:open-database actualfname)) (handler (make-busy-timeout 136000))) (if dbexists (vector db actualfname) (begin @@ -164,12 +163,12 @@ ;; If we've tried ten times and failed there is a serious problem ;; try to remove the lock db and allow it to be recreated (handle-exceptions exn #f - (if (file-exists? journal)(delete-file journal)) - (if (file-exists? fname) (delete-file fname)) + (if (common:file-exists? journal)(delete-file journal)) + (if (common:file-exists? fname) (delete-file fname)) #f)))) (sqlite3:execute (lock-queue:db-dat-get-db dbdat) "DELETE FROM runlocks WHERE test_id=?;" test-id) (sqlite3:finalize! (lock-queue:db-dat-get-db dbdat))))) (define (lock-queue:steal-lock dbdat test-id #!key (count 10)) Index: megatest-version.scm ================================================================== --- megatest-version.scm +++ megatest-version.scm @@ -1,7 +1,7 @@ ;; Always use two or four digit decimal ;; 1.01, 1.02...1.10,1.11,1.1101 ... 1.99,2.00.. (declare (unit megatest-version)) -(define megatest-version 1.6404) +(define megatest-version 1.6502) Index: megatest.config ================================================================== --- megatest.config +++ megatest.config @@ -1,22 +1,54 @@ -[fields] -a text -b text -c text + +## commented out due to a bug in v1.6501 in mtutil +## [fields] +## a text +## b text +## c text +usercode .mtutil.scm +areafilter area-to-run +targtrans generic-target-translator +runtrans generic-runname-translator [setup] -pktsdirs /tmp/pkts /some/other/source +pktsdirs /tmp/mt_pkts /some/other/source [areas] # path-to-area map-target-script(future, optional) -fullrun path=tests/fullrun +# someqa path=../megatestqa/someqa; targtrans=somefunc; areafilter=area-to-run +fullrun path=tests/fullrun; # targtrans is name of scheme proc stored in .mtutil.scm, which lives in PWD where mtutil is run +# the target translator can return: a/target OR (list/of targets/to apply/run) OR #f i.e. run nothing # ext-tests path=ext-tests; targtrans=prefix-contour; -ext-tests path=ext-tests +ext path=ext-tests [contours] # mode-patt/tag-expr -quick selector=QUICKPATT/quick -full areas=fullrun,ext-tests; selector=MAXPATT/ -all areas=fullrun,ext-tests -snazy areas=%; selector=QUICKPATT/ +quick areas=ext; selector=/QUICKPATT +quick2 areafn=check-area; selector=/QUICKPATT +# quick areas=fullrun,ext-tests; selector=QUICKPATT/quick +# full areas=fullrun,ext-tests; selector=MAXPATT/ +# short areas=fullrun,ext-tests; selector=MAXPATT/ +# all areas=fullrun,ext-tests +# snazy selector=QUICKPATT/ + +[nopurpose] + +[access] +ext matt:admin mattw:owner + +[accesstypes] +admin run rerun resume remove set-ss +owner run rerun resume remove +badguy set-ss + +[setup] +maxload 1.2 + +[listeners] +localhost:12345 contact=matt@kiatoa.com +localhost:54321 contact=matt@kiatoa.com + +[listener] +script nbfake echo + Index: megatest.scm ================================================================== --- megatest.scm +++ megatest.scm @@ -11,20 +11,18 @@ ;; (include "megatest-version.scm") ;; fake out readline usage of toplevel-command (define (toplevel-command . a) #f) -(use sqlite3 srfi-1 posix regex regex-case srfi-69 base64 readline apropos json http-client directory-utils rpc typed-records;; (srfi 18) extras) - http-client srfi-18 extras format) ;; zmq extras) +(use (prefix sqlite3 sqlite3:) srfi-1 posix regex regex-case srfi-69 (prefix base64 base64:) + readline apropos json http-client directory-utils typed-records + http-client srfi-18 extras format) ;; Added for csv stuff - will be removed ;; (use sparse-vectors) -(import (prefix sqlite3 sqlite3:)) -(import (prefix base64 base64:)) -(import (prefix rpc rpc:)) (require-library mutils) ;; (use zmq) (declare (uses common)) @@ -54,11 +52,11 @@ (include "db_records.scm") (include "run_records.scm") (include "megatest-fossil-hash.scm") (let ((debugcontrolf (conc (get-environment-variable "HOME") "/.megatestrc"))) - (if (file-exists? debugcontrolf) + (if (common:file-exists? debugcontrolf) (load debugcontrolf))) ;; Disabled help items ;; -rollup : (currently disabled) fill run (set by :runname) with latest test(s) ;; from prior runs with same keys @@ -99,10 +97,11 @@ -runname : required, name for this particular test run -state : Applies to runs, tests or steps depending on context -status : Applies to runs, tests or steps depending on context --modepatt key : load testpatt from in runconfigs instead of default TESTPATT if -testpatt and -tagexpr are not specified -tagexpr tag1,tag2%,.. : select tests with tags matching expression + Test helpers (for use inside tests) -step stepname -test-status : set the state and status of a test (use :state and :status) -setlog logfname : set the path/filename to the final log relative to the test @@ -133,17 +132,18 @@ -list-disks : list the disks available for storing runs -list-targets : list the targets in runconfigs.config -list-db-targets : list the target combinations used in the db -show-config : dump the internal representation of the megatest.config file -show-runconfig : dump the internal representation of the runconfigs.config file - -dumpmode MODE : dump in MODE format instead of sexpr, MODE=json,ini,sexp etc. + -dumpmode MODE : dump in MODE format instead of sexpr, MODE=json,ini,sexp etc. (add -debug 0,9 to see which file contributes each line) -show-cmdinfo : dump the command info for a test (run in test environment) -section sectionName -var varName : for config and runconfig lookup value for sectionName varName -since N : get list of runs changed since time N (Unix seconds) -fields fieldspec : fields to include in json dump; runs:id,runame+tests:testname+steps -sort fieldname : in -list-runs sort tests by this field + -testdata-csv [categorypatt/]varpatt : dump testdata for given category Misc -start-dir path : switch to this directory before running megatest -contour cname : add a level of hierarcy to the linktree and run paths -rebuild-db : bring the database schema up to date @@ -228,10 +228,11 @@ ":state" "-state" ":status" "-status" "-list-runs" + "-testdata-csv" "-testpatt" "--modepatt" "-tagexpr" "-itempatt" "-setlog" @@ -372,11 +373,11 @@ (debug:print-error 0 *default-log-port* "Unrecognised arguments: " (string-intersperse (if (list? remargs) remargs (argv)) " "))) ;; before doing anything else change to the start-dir if provided ;; (if (args:get-arg "-start-dir") - (if (file-exists? (args:get-arg "-start-dir")) + (if (common:file-exists? (args:get-arg "-start-dir")) (let ((fullpath (common:real-path (args:get-arg "-start-dir")))) (setenv "PWD" fullpath) (change-directory fullpath)) (begin (debug:print-error 0 *default-log-port* "non-existant start dir " (args:get-arg "-start-dir") " specified, exiting.") @@ -389,25 +390,35 @@ ;; The watchdog is to keep an eye on things like db sync etc. ;; ;; TODO: for multiple areas, we will have multiple watchdogs; and multiple threads to manage -(define *watchdog* (make-thread common:watchdog "Watchdog thread")) +(define *watchdog* (make-thread + (lambda () + (handle-exceptions + exn + (begin + (print-call-chain) + (print " message: " ((condition-property-accessor 'exn 'message) exn))) + (common:watchdog))) + "Watchdog thread")) ;;(if (not (args:get-arg "-server")) ;; (thread-start! *watchdog*)) ;; if starting a server; wait till we get to running state before kicking off watchdog (let* ((no-watchdog-args '("-list-runs" + "-testdata-csv" "-list-servers" "-server" "-list-disks" "-list-targets" "-show-runconfig" ;;"-list-db-targets" "-show-runconfig" "-show-config" - "-show-cmdinfo")) + "-show-cmdinfo" + "-cleanup-db")) (no-watchdog-args-vals (filter (lambda (x) x) (map args:get-arg no-watchdog-args))) (start-watchdog (null? no-watchdog-args-vals))) ;;(BB> "no-watchdog-args="no-watchdog-args "no-watchdog-args-vals="no-watchdog-args-vals) (if start-watchdog @@ -424,18 +435,21 @@ (exn () (debug:print-error 0 *default-log-port* "Could not open log file for write: "logpath) (define *didsomething* #t) (exit 1)))) - +;; this segment will run launch:setup only if -log is not set. This is fairly safe as servers are not +;; manually started and thus should never be started in a non-megatest area. Thus no need to handle situation +;; where (launch:setup) returns #f? +;; (if (or (args:get-arg "-log")(args:get-arg "-server")) ;; redirect the log always when a server (handle-exceptions exn (begin - (print "ERROR: Failed to switch to log output. " ((conition-property-accessor 'exn 'message) exn)) + (print "ERROR: Failed to switch to log output. " ((condition-property-accessor 'exn 'message) exn)) ) - (let* ((tl (or (args:get-arg "-log")(launch:setup))) ;; run launch:setup if -server + (let* ((tl (or (args:get-arg "-log")(launch:setup))) ;; run launch:setup if -server, ensure we do NOT run launch:setup if -log specified (logf (or (args:get-arg "-log") ;; use -log unless we are a server, then craft a logfile name (conc tl "/logs/server-" (current-process-id) "-" (get-host-name) ".log"))) (oup (open-logfile logf))) (if (not (args:get-arg "-log")) (hash-table-set! args:arg-hash "-log" logf)) ;; fake out future queries of -log @@ -453,11 +467,11 @@ (let* ((htmlviewercmd (or (configf:lookup *configdat* "setup" "htmlviewercmd") (common:which '("firefox" "arora")))) (install-home (common:get-install-area)) (manual-html (conc install-home "/share/docs/megatest_manual.html"))) (if (and install-home - (file-exists? manual-html)) + (common:file-exists? manual-html)) (system (conc "(" htmlviewercmd " " manual-html " ) &")) (system (conc "(" htmlviewercmd " http://www.kiatoa.com/cgi-bin/fossils/megatest/doc/tip/docs/manual/megatest_manual.html ) &"))) (exit))) (if (args:get-arg "-version") @@ -494,10 +508,24 @@ ;; for some switches always print the command to stderr ;; (if (args:any? "-run" "-runall" "-remove-runs" "-set-state-status") (debug:print 0 *default-log-port* (string-intersperse (argv) " "))) + +;; some switches imply homehost. Exit here if not on homehost +;; +(let ((homehost-required (list "-cleanup-db" "-server"))) + (if (apply args:any? homehost-required) + (if (not (common:on-homehost?)) + (for-each + (lambda (switch) + (if (args:get-arg switch) + (begin + (debug:print 0 *default-log-port* "ERROR: you must be on the homehost to run with " switch + ", you can move homehost by removing the .homehost file but this will disrupt any runs in progress.") + (exit 1)))) + homehost-required)))) ;;====================================================================== ;; Misc setup stuff ;;====================================================================== @@ -534,11 +562,15 @@ ;; handle a clean-cache request as early as possible ;; (if (args:get-arg "-clean-cache") (let ((toppath (launch:setup))) (set! *didsomething* #t) ;; suppress the help output. - (runs:clean-cache (getenv "MT_TARGET")(args:get-arg "-runname") toppath))) + (runs:clean-cache (or (getenv "MT_TARGET") + (args:get-arg "-target") + (args:get-arg "-remtarg")) + (args:get-arg "-runname") + toppath))) (if (args:get-arg "-env2file") (begin (save-environment-as-files (args:get-arg "-env2file")) (set! *didsomething* #t))) @@ -690,11 +722,11 @@ (else (loop row (+ col 1) (append curr-row (list val)) result))))))))) (hash-table-keys results)))) ((sqlite3) (let* ((db-file (or out-file (pathname-file input-db))) - (db-exists (file-exists? db-file)) + (db-exists (common:file-exists? db-file)) (db (sqlite3:open-database db-file))) (if (not db-exists)(sqlite3:execute db "CREATE TABLE data (sheet,section,var,val);")) (configf:map-all-hier-alist data (lambda (sheetname sectionname varname val) @@ -838,11 +870,11 @@ (let* ((rundir (if (and (getenv "MT_LINKTREE")(getenv "MT_TARGET")(getenv "MT_RUNNAME")) (conc (getenv "MT_LINKTREE") "/" (getenv "MT_TARGET") "/" (getenv "MT_RUNNAME")) #f)) (cfgf (if rundir (conc rundir "/.runconfig." megatest-version "-" megatest-fossil-hash) #f))) (if (and cfgf - (file-exists? cfgf) + (common:file-exists? cfgf) (file-write-access? cfgf) (common:use-cache?)) (configf:read-alist cfgf) (let* ((keys (rmt:get-keys)) (target (common:args-get-target)) @@ -861,12 +893,13 @@ (file-write-access? rundir)) (begin (if (not (common:in-running-test?)) (configf:write-alist data cfgf)) ;; force re-read of megatest.config - this resolves circular references between megatest.config - (launch:setup force: #t) - (launch:cache-config))) ;; we can safely cache megatest.config since we have a valid runconfig + (launch:setup force-reread: #t) + ;; (launch:cache-config) ;; there are two independent config cache locations, turning this one off for now. MRW. + )) ;; we can safely cache megatest.config since we have a valid runconfig data)))) (if (args:get-arg "-show-runconfig") (let ((tl (launch:setup))) (push-directory *toppath*) @@ -1023,10 +1056,114 @@ (if indx (if (>= indx (vector-length datavec)) #f ;; index too high, should raise an error I suppose (vector-ref datavec indx)) #f))) + + + + + +(when (args:get-arg "-testdata-csv") + (if (launch:setup) + (let* ((keys (rmt:get-keys)) ;; (db:get-keys dbstruct)) + (runpatt (or (args:get-arg "-runname") "%")) + (testpatt (common:args-get-testpatt #f)) + (datapatt (args:get-arg "-testdata-csv")) + (match-data (string-match "^([^/]+)/(.*)" (args:get-arg "-testdata-csv"))) + (categorypatt (if match-data (list-ref match-data 1) "%")) + (setvarpatt (if match-data + (list-ref match-data 2) + (args:get-arg "-testdata-csv"))) + (runsdat (rmt:get-runs-by-patt keys (or runpatt "%") + (common:args-get-target) #f #f '("id" "runname" "state" "status" "owner" "event_time" "comment") 0)) + (header (db:get-header runsdat)) + (access-mode (db:get-access-mode)) + (testpatt (common:args-get-testpatt #f)) + (fields-spec (if (args:get-arg "-fields") + (extract-fields-constraints (args:get-arg "-fields")) + (list (cons "runs" (append keys (list "id" "runname" "state" "status" "owner" "event_time" "comment" "fail_count" "pass_count"))) + (cons "tests" db:test-record-fields) ;; "id" "testname" "test_path") + (list "steps" "id" "stepname")))) + (tests-spec (let ((t (alist-ref "tests" fields-spec equal?))) + (if (and t (null? t)) ;; all fields + db:test-record-fields + t))) + (adj-tests-spec (delete-duplicates (if tests-spec (cons "id" tests-spec) db:test-record-fields))) + (test-field-index (make-hash-table)) + (runs (db:get-rows runsdat)) + ) + (if (and tests-spec (not (null? tests-spec))) ;; do some validation and processing of the test-spec + (let ((invalid-tests-spec (filter (lambda (x)(not (member x db:test-record-fields))) tests-spec))) + (if (null? invalid-tests-spec) + ;; generate the lookup map test-field-name => index-number + (let loop ((hed (car adj-tests-spec)) + (tal (cdr adj-tests-spec)) + (idx 0)) + (hash-table-set! test-field-index hed idx) + (if (not (null? tal))(loop (car tal)(cdr tal)(+ idx 1)))) + (begin + (debug:print-error 0 *default-log-port* "Invalid test fields specified: " (string-intersperse invalid-tests-spec ", ")) + (exit))))) + (let* ((table-header (string-split "target,run,test,itempath,category,var,value,comment" ",")) + (table-rows + (apply append (map + (lambda (run) + (let* ((target (string-intersperse (map (lambda (x) + (db:get-value-by-header run header x)) + keys) "/")) + (statuses (string-split (or (args:get-arg "-status") "") ",")) + (run-id (db:get-value-by-header run header "id")) + (runname (db:get-value-by-header run header "runname")) + (states (string-split (or (args:get-arg "-state") "") ",")) + (tests (if tests-spec + (db:dispatch-query access-mode rmt:get-tests-for-run db:get-tests-for-run run-id testpatt states statuses #f #f #f 'testname 'asc ;; (db:get-tests-for-run dbstruct run-id testpatt '() '() #f #f #f 'testname 'asc + ;; use qryvals if test-spec provided + (if tests-spec + (string-intersperse adj-tests-spec ",") + ;; db:test-record-fields + #f) + #f + 'normal) + '()))) + (apply append + (map + (lambda (test) + (let* ( + (test-id (if (member "id" tests-spec)(get-value-by-fieldname test test-field-index "id" ) #f)) ;; (db:test-get-id test)) + (testname (if (member "testname" tests-spec)(get-value-by-fieldname test test-field-index "testname" ) #f)) ;; (db:test-get-testname test)) + (itempath (if (member "item_path" tests-spec)(get-value-by-fieldname test test-field-index "item_path" ) #f)) ;; (db:test-get-item-path test)) + (fullname (conc testname + (if (equal? itempath "") + "" + (conc "/" itempath )))) + (testdat-raw (map vector->list (rmt:read-test-data* run-id test-id categorypatt setvarpatt))) + (testdat (filter + (lambda (x) + (not (equal? "logpro" + (list-ref x 10)))) + testdat-raw))) + (map + (lambda (item) + (receive (id test_id category + variable value expected + tol units comment status type) + (apply values item) + (list target runname testname itempath category variable value comment))) + testdat))) + tests)))) + runs)))) + (print (string-join table-header ",")) + (for-each (lambda(table-row) + (print (string-join (map ->string table-row) ","))) + + + table-rows)))) + (set! *didsomething* #t) + (set! *time-to-exit* #t)) + + ;; NOTE: list-runs and list-db-targets operate on local db!!! ;; ;; IDEA: megatest list -runname blah% ... ;; @@ -1160,11 +1297,11 @@ (lambda (test) (common:debug-handle-exceptions #f exn (begin (debug:print-error 0 *default-log-port* "Bad data in test record? " test) - (print "exn=" (condition->list exn)) + (debug:print-error 5 *default-log-port* "exn=" (condition->list exn)) (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) (print-call-chain (current-error-port))) (let* ((test-id (if (member "id" tests-spec)(get-value-by-fieldname test test-field-index "id" ) #f)) ;; (db:test-get-id test)) (testname (if (member "testname" tests-spec)(get-value-by-fieldname test test-field-index "testname" ) #f)) ;; (db:test-get-testname test)) (itempath (if (member "item_path" tests-spec)(get-value-by-fieldname test test-field-index "item_path" ) #f)) ;; (db:test-get-item-path test)) @@ -1403,61 +1540,63 @@ (if (or (args:get-arg "-runall") (args:get-arg "-run") (args:get-arg "-rerun-clean") (args:get-arg "-rerun-all") (args:get-arg "-runtests")) - (general-run-call - "-runall" - "run all tests" - (lambda (target runname keys keyvals) - (if (args:get-arg "-rerun-clean") ;; first set states/statuses correct - (let ((states (or (configf:lookup *configdat* "validvalues" "cleanrerun-states") - "KILLREQ,KILLED,UNKNOWN,INCOMPLETE,STUCK,NOT_STARTED")) - (statuses (or (configf:lookup *configdat* "validvalues" "cleanrerun-statuses") - "FAIL,INCOMPLETE,ABORT,CHECK"))) - (hash-table-set! args:arg-hash "-preclean" #t) - (runs:operate-on 'set-state-status - target - (common:args-get-runname) ;; (or (args:get-arg "-runname")(args:get-arg ":runname")) - "%" ;; (common:args-get-testpatt #f) ;; (args:get-arg "-testpatt") - state: states - ;; status: statuses - new-state-status: "NOT_STARTED,n/a") - (runs:clean-cache target runname *toppath*) - (runs:operate-on 'set-state-status - target - (common:args-get-runname) ;; (or (args:get-arg "-runname")(args:get-arg ":runname")) - "%" ;; (common:args-get-testpatt #f) ;; (args:get-arg "-testpatt") - ;; state: states - status: statuses - new-state-status: "NOT_STARTED,n/a"))) - ;; RERUN ALL - (if (args:get-arg "-rerun-all") ;; first set states/statuses correct - (begin - (hash-table-set! args:arg-hash "-preclean" #t) - (runs:operate-on 'set-state-status - target - (common:args-get-runname) ;; (or (args:get-arg "-runname")(args:get-arg ":runname")) - "%" ;; (common:args-get-testpatt #f) ;; (args:get-arg "-testpatt") - state: #f - ;; status: statuses - new-state-status: "NOT_STARTED,n/a") - (runs:clean-cache target runname *toppath*) - (runs:operate-on 'set-state-status - target - (common:args-get-runname) ;; (or (args:get-arg "-runname")(args:get-arg ":runname")) - "%" ;; (common:args-get-testpatt #f) ;; (args:get-arg "-testpatt") - ;; state: states - status: #f - new-state-status: "NOT_STARTED,n/a"))) - (runs:run-tests target - runname - #f ;; (common:args-get-testpatt #f) - ;; (or (args:get-arg "-testpatt") - ;; "%") - user - args:arg-hash)))) + (let ((need-clean (or (args:get-arg "-rerun-clean") + (args:get-arg "-rerun-all")))) + (general-run-call + "-runall" + "run all tests" + (lambda (target runname keys keyvals) + (if (args:get-arg "-rerun-clean") ;; first set states/statuses correct + (let ((states (or (configf:lookup *configdat* "validvalues" "cleanrerun-states") + "KILLREQ,KILLED,UNKNOWN,INCOMPLETE,STUCK,NOT_STARTED")) + (statuses (or (configf:lookup *configdat* "validvalues" "cleanrerun-statuses") + "FAIL,INCOMPLETE,ABORT,CHECK"))) + (hash-table-set! args:arg-hash "-preclean" #t) + (runs:operate-on 'set-state-status + target + (common:args-get-runname) ;; (or (args:get-arg "-runname")(args:get-arg ":runname")) + "%" ;; (common:args-get-testpatt #f) ;; (args:get-arg "-testpatt") + state: states + ;; status: statuses + new-state-status: "NOT_STARTED,n/a") + (runs:clean-cache target runname *toppath*) + (runs:operate-on 'set-state-status + target + (common:args-get-runname) ;; (or (args:get-arg "-runname")(args:get-arg ":runname")) + "%" ;; (common:args-get-testpatt #f) ;; (args:get-arg "-testpatt") + ;; state: states + status: statuses + new-state-status: "NOT_STARTED,n/a"))) + ;; RERUN ALL + (if (args:get-arg "-rerun-all") ;; first set states/statuses correct + (begin + (hash-table-set! args:arg-hash "-preclean" #t) + (runs:operate-on 'set-state-status + target + (common:args-get-runname) ;; (or (args:get-arg "-runname")(args:get-arg ":runname")) + "%" ;; (common:args-get-testpatt #f) ;; (args:get-arg "-testpatt") + state: #f + ;; status: statuses + new-state-status: "NOT_STARTED,n/a") + (runs:clean-cache target runname *toppath*) + (runs:operate-on 'set-state-status + target + (common:args-get-runname) ;; (or (args:get-arg "-runname")(args:get-arg ":runname")) + "%" ;; (common:args-get-testpatt #f) ;; (args:get-arg "-testpatt") + ;; state: states + status: #f + new-state-status: "NOT_STARTED,n/a"))) + (runs:run-tests target + runname + #f ;; (common:args-get-testpatt #f) + ;; (or (args:get-arg "-testpatt") + ;; "%") + user + args:arg-hash))))) ;;====================================================================== ;; run one test ;;====================================================================== @@ -1559,11 +1698,11 @@ (let* ((keys (rmt:get-keys)) ;; db:test-get-paths must not be run remote (paths (tests:test-get-paths-matching keys target (args:get-arg "-test-files")))) (set! *didsomething* #t) (for-each (lambda (path) - (if (file-exists? path) + (if (common:file-exists? path) (print path))) paths))) ;; else do a general-run-call (general-run-call "-test-files" @@ -1849,20 +1988,22 @@ (if (not (launch:setup)) (begin (debug:print 0 *default-log-port* "Failed to setup, exiting") (exit 1))) ;; keep this one local - (open-run-close patch-db #f) + ;; (open-run-close patch-db #f) + (let ((dbstruct (db:setup #f areapath: *toppath*))) + (common:cleanup-db dbstruct full: #t)) (set! *didsomething* #t))) (if (args:get-arg "-cleanup-db") (begin (if (not (launch:setup)) (begin (debug:print 0 *default-log-port* "Failed to setup, exiting") (exit 1))) - (let ((dbstruct (db:setup *toppath*))) + (let ((dbstruct (db:setup #f areapath: *toppath*))) (common:cleanup-db dbstruct)) (set! *didsomething* #t))) (if (args:get-arg "-mark-incompletes") (begin @@ -1917,11 +2058,11 @@ (args:get-arg "-repl") (args:get-arg "-load")) (let* ((toppath (launch:setup)) (dbstruct (if (and toppath (common:on-homehost?)) - (db:setup) + (db:setup #t) #f))) ;; make-dbr:dbstruct path: toppath local: (args:get-arg "-local")) #f))) (if *toppath* (cond ((getenv "MT_RUNSCRIPT") ;; How to run megatest scripts @@ -2006,11 +2147,11 @@ ;; ;; ;; redo me (set! *didsomething* #t))) (if (args:get-arg "-import-megatest.db") (begin (db:multi-db-sync - (db:setup) + (db:setup #f) 'killservers 'dejunk 'adj-testids 'old2new ;; 'new2old @@ -2018,11 +2159,11 @@ (set! *didsomething* #t))) (if (args:get-arg "-sync-to-megatest.db") (begin (db:multi-db-sync - (db:setup) + (db:setup #f) 'new2old ) (set! *didsomething* #t))) (if (args:get-arg "-sync-to") ADDED minimt/Makefile Index: minimt/Makefile ================================================================== --- /dev/null +++ minimt/Makefile @@ -0,0 +1,12 @@ +minimt : minimt.scm db.scm setup.scm direct.scm + csc minimt.scm + +run : minimt + export PATH="$(PWD)":$(PATH) ; minimt runrun foo/bar run1 + +runseq : clean run + sleep 5;tail -F runtest/*log + +clean : + rm -rf runtest/* + ADDED minimt/db.scm Index: minimt/db.scm ================================================================== --- /dev/null +++ minimt/db.scm @@ -0,0 +1,173 @@ +;; pretend to be a simplified Megatest + +(use sql-de-lite defstruct) + +;; init the db - NOTE: takes a db NOT a dbconn +;; +(define (init-db db) + (with-transaction + db + (lambda () + (for-each + (lambda (qrystr) + (exec (sql db qrystr))) + '("CREATE TABLE IF NOT EXISTS runs + (id INTEGER PRIMARY KEY, + target TEXT NOT NULL, + run_name TEXT NOT NULL, + state TEXT NOT NULL, + status TEXT NOT NULL, + CONSTRAINT runs_constraint UNIQUE (run_name));" + "CREATE TABLE IF NOT EXISTS tests + (id INTEGER PRIMARY KEY, + run_id INTEGER NOT NULL, + test_name TEXT NOT NULL, + state TEXT NOT NULL, + status TEXT NOT NULL, + start_time INTEGER DEFAULT (strftime('%s','now')), + end_time INTEGER DEFAULT -1, + CONSTRAINT tests_constraint UNIQUE (run_id,test_name));" + "CREATE TABLE IF NOT EXISTS steps + (id INTEGER PRIMARY KEY, + test_id INTEGER NOT NULL, + step_name TEXT NOT NULL, + state TEXT NOT NULL, + status TEXT NOT NULL, + CONSTRAINT step_constraint UNIQUE (test_id,step_name));"))))) + +(defstruct dbconn-dat + dbh ;; the database handle + writeable ;; do we have write access? + path ;; where the db lives + name ;; name of the db + ) + +;; open the database, return a dbconn struct +(define (open-create-db path fname init) + (let* ((fullname (conc path "/" fname)) + (already-exists (file-exists? fullname)) + (write-access (and (file-write-access? path) + (or (not already-exists) + (and already-exists + (file-write-access? fullname))))) + (db (if (or already-exists write-access) + (open-database fullname) + (begin + (print "FATAL: No existing db and no write access thus cannot create " fullname) ;; no db and no write access cannot proceed. + (exit 1)))) + (dbconn (make-dbconn-dat))) + (set-busy-handler! db (busy-timeout 120000)) ;; set a busy timeout + (exec (sql db "PRAGMA synchronous=0;")) + (if (and init write-access (not already-exists)) + (init db)) + (dbconn-dat-dbh-set! dbconn db) + (dbconn-dat-writeable-set! dbconn write-access) + (dbconn-dat-path-set! dbconn path) + (dbconn-dat-name-set! dbconn fname) + dbconn)) + +(define-inline (get-db dbconn) + (dbconn-dat-dbh dbconn)) + +;; RUNS + +;; create a run +(define (create-run dbconn target run-name) + (exec (sql (get-db dbconn) "INSERT INTO runs (run_name,target,state,status) VALUES (?,?,'NEW','na');") + run-name target)) + +;; get a run id +(define (get-run-id dbconn target run-name) + (first-column (query fetch (sql (get-db dbconn) "SELECT id FROM runs WHERE target=? AND run_name=?;") + target run-name))) + +;; TESTS + +(defstruct test-dat + id + run-id + test-name + state + status) + +;; create a test +(define (create-test dbconn run-id test-name) + (exec (sql (get-db dbconn) "INSERT INTO tests (run_id,test_name,state,status) VALUES (?,?,'NOT_STARTED','na');") + run-id test-name)) + +;; get a test id +(define (get-test-id dbconn run-id test-name) + (first-column (query fetch (sql (get-db dbconn) "SELECT id FROM tests WHERE run_id=? AND test_name=?;") + run-id test-name))) + +(define-inline (test-row->test-dat row) + (make-test-dat + id: (list-ref row 0) + run-id: (list-ref row 1) + test-name: (list-ref row 2) + state: (list-ref row 3) + status: (list-ref row 4))) + +;; get the data for given test-id +(define (test-get-record dbconn test-id) + (let* ((row (query fetch-row (sql (get-db dbconn) "SELECT id,run_id,test_name,state,status FROM tests WHERE test_id=?;") + test-id))) + (test-row->test-dat row))) + +;; get a bunch of tests data +(define (test-get-tests dbconn run-ids test-name-patt) + (let* ((rows (query fetch-rows + (sql (get-db dbconn) + (conc "SELECT id,run_id,test_name,state,status FROM tests WHERE test_name LIKE ? AND run_id IN (" + (string-intersperse (map conc run-ids) ",") ");")) + test-name-patt))) + (map test-row->test-dat rows))) + +(define (test-set-state-status dbconn test-id new-state new-status) + (exec (sql (get-db dbconn) "UPDATE tests SET state=?,status=?,end_time=? WHERE id=?;") + new-state new-status (current-seconds) test-id)) + +;; STEPS + +;; create a step +(define (create-step dbconn test-id step-name) + (exec (sql (get-db dbconn) "INSERT INTO steps (test_id,step_name,state,status) VALUES (?,?,'NOT_STARTED','na');") + test-id step-name)) + +;; get a step id +(define (get-step-id dbconn test-id step-name) + (first-column (query fetch (sql (get-db dbconn) "SELECT id FROM steps WHERE test_id=? AND step_name=?;") + test-id step-name))) + +(define (step-set-state-status dbconn step-id new-state new-status) + (exec (sql (get-db dbconn) "UPDATE steps SET state=?,status=? WHERE id=?;") + new-state new-status step-id)) + +;;====================================================================== +;; Statistics gathering +;;====================================================================== + +(define *stats* (make-hash-table)) + +(define (update-stats key duration) + (let ((rec (or (hash-table-ref/default *stats* key #f) + (let ((new (vector 0 0 0))) + (hash-table-set! *stats* key new) + new)))) + (vector-set! rec 0 (+ (vector-ref rec 0) 1)) ;; num calls + (vector-set! rec 1 (+ (vector-ref rec 1) duration)) ;; total duration + (if (> duration (vector-ref rec 2) ) + (vector-set! rec 2 duration)))) + +(define (statwrap name proc) + (lambda params + (let ((start-time (current-milliseconds)) + (res (apply proc params))) + (update-stats name (- (current-milliseconds) start-time)) + res))) + +(define (print-stats statdat) + (hash-table-for-each + statdat + (lambda (key val) + (print key " count: " (vector-ref val 0) " avg: " (/ (vector-ref val 1)(vector-ref val 0)) " max: " (vector-ref val 2))))) ADDED minimt/direct.scm Index: minimt/direct.scm ================================================================== --- /dev/null +++ minimt/direct.scm @@ -0,0 +1,11 @@ +;; direct API, call the db calls directly +(define rmt:create-run (statwrap 'create-run create-run)) +(define rmt:create-step (statwrap 'create-step create-step)) +(define rmt:create-test (statwrap 'create-test create-test)) +(define rmt:get-test-id (statwrap 'get-test-id get-test-id)) +(define rmt:get-run-id (statwrap 'get-run-id get-run-id)) +(define rmt:open-create-db (statwrap 'open open-create-db)) +(define rmt:step-set-state-status (statwrap 'step-set-state-status step-set-state-status)) +(define rmt:test-set-state-status (statwrap 'test-set-state-status test-set-state-status)) +(define rmt:test-get-tests (statwrap 'test-get-tests test-get-tests)) + ADDED minimt/minimt.scm Index: minimt/minimt.scm ================================================================== --- /dev/null +++ minimt/minimt.scm @@ -0,0 +1,86 @@ +(use posix) + +(include "db.scm") + +;; define following in setup.scm +;; *remotehost* => host for "tests" +;; *homehost* => host for servers +;; *homepath* => directory from which to run +;; *numtests* => how many tests to simulate for each run +;; *numruns* => how many runs to simulate +;; +(include "setup.scm") + +(include "direct.scm") ;; direct db calls + +;; RUN A TEST +(define (run-test dbconn run-id test-name) + (rmt:create-test dbconn run-id test-name) + (let ((test-id (rmt:get-test-id dbconn run-id test-name))) + (rmt:test-set-state-status dbconn test-id "LAUNCHED" "na") + (thread-sleep! *launchdelay*) + (rmt:test-set-state-status dbconn test-id "RUNNING" "na") + (let loop ((step-num 0)) + (let ((step-name (conc "step" step-num))) + (rmt:create-step dbconn test-id step-name) + (let ((step-id (get-step-id dbconn test-id step-name))) + (rmt:step-set-state-status dbconn step-id "START" -1) + (thread-sleep! *stepdelay*) + (rmt:step-set-state-status dbconn step-id "END" 0) + (print" STEP: " step-name " done."))) + (if (< step-num *numsteps*) + (loop (+ step-num 1)))) + ;; we will do a large but bogus read to simulate the logic in Megatest + (rmt:test-get-tests dbconn `(,run-id) "%") + (rmt:test-set-state-status dbconn test-id "COMPLETED" (if (> (random 10) 2) "PASS" "FAIL")) + (print "TEST: " test-name " done.") + (print "Stats:") + (print-stats *stats*) + test-id)) + +;; RUN A RUN +(define (run-run dbconn target run-name num-tests) + (rmt:create-run dbconn target run-name) + (let ((run-id (rmt:get-run-id dbconn target run-name))) + (let loop ((test-num 0)) + (system (conc "NBFAKE_LOG=test-" test-num "-run-id-" run-id ".log NBFAKE_HOST=" *remotehost* " nbfake minimt runtest " run-id " test-" test-num)) + (if (< test-num num-tests) + (loop (+ test-num 1)))))) + +;; Do what is asked +(let ((args (cdr (argv)))) + (if (< (length args) 1) + (print + "Usage: minimt [options]" " + runtest run-id testname + runrun target runname") + (let ((cmd (car args)) + (dbconn (rmt:open-create-db *homepath* "mt.db" init-db))) + (thread-sleep! 0.5) ;; be sure the db is written out to disk? Should really not be needed. + (change-directory *homepath*) + (case (string->symbol cmd) + ((runtest) + (let ((run-id (string->number (cadr args))) + (test-name (caddr args))) + (print "Launching test " test-name " for run-id " run-id) + (run-test dbconn run-id test-name))) + ((runrun) + (let ((target (cadr args)) + (run-name (caddr args))) + (run-run dbconn target run-name *numtests*) + (print "Use: sqlite3 runtest/mt.db 'select max(end_time)-min(start_time) from tests;' to see the total run time") + )) + ((runall) + (for-each + (lambda (target) + (let loop ((run-num 0)) + (thread-sleep! *rundelay*) + (system (conc "NBFAKE_LOG=run-" target "-" run-num ".log nbfake minimt runrun " target " run-" run-num)) + (if (< run-num *numruns*) + (loop (+ run-num 1))))) + *targets*)) + ((server) + (start-server dbconn)) + (else + (print "Command: " cmd " not recognised. Run without params to see help."))) + (close-database (dbconn-dat-dbh dbconn))))) ADDED minimt/queued.scm Index: minimt/queued.scm ================================================================== --- /dev/null +++ minimt/queued.scm @@ -0,0 +1,209 @@ + +(use nanomsg defstruct srfi-18) + +;;====================================================================== +;; Commands +;;====================================================================== + +(define *commands* (make-hash-table)) + +(defstruct cmd + key + proc + ctype ;; command type; 'r (read), 'w (write) or 't (transaction) + ) + +(define (register-command key ctype proc) + (hash-table-set! *commands* + key + (make-cmd key: key ctype: ctype proc: proc))) + +(define (get-proc key) + (cmd-proc (hash-table-ref key *commands*))) + +(for-each + (lambda (dat) + (apply register-command dat)) + `( (create-run w ,create-run) + (create-step w ,create-step) + (create-test w ,create-test) + (get-test-id r ,get-test-id) + (get-run-id r ,get-run-id) + ;; (open-db w ,open-create-db) + (step-set-ss w ,step-set-state-status) + (test-set-ss w ,test-set-state-status) + (test-get-tests r ,test-get-tests) )) + +;;====================================================================== +;; Server/client stuff +;;====================================================================== + +(define-inline (encode data) + (with-output-to-string + (lambda () + (write data)))) + +(define-inline (decode data) + (with-input-from-string + data + (lambda () + (read)))) + +;;====================================================================== +;; Command queue +;;====================================================================== + +(defstruct qitem + command + params + host-port) + +(define *cmd-queue* '()) +(define *queue-mutex* (make-mutex)) + +(define (queue-push cmddat) + (mutex-lock! *queue-mutex*) + (set! *cmd-queue* (cons cmddat *cmd-queue*)) + (mutex-unlock! *queue-mutex*)) + +;; get all the cmds of type ctype and return them, also remove them from the queue +(define (queue-take ctype) + (mutex-lock! *queue-mutex*) + (let ((res (filter (lambda (x)(eq? (cmd-ctype x) ctype)) *cmd-queue*)) + (rem (filter (lambda (x)(not (eq? (cmd-ctype x) ctype))) *cmd-queue*))) + (set! *queue* rem) + (mutex-unlock! *queue-mutex*) + res)) + +(define (queue-process-commands dbconn commands) + (for-each + (lambda (qitem) + (let ((soc (request-connect (qitem-host-port qitem))) ;; we will be sending the data back to host-port via soc + (cmd (hash-table-ref/default *commands* (qitem-command qitem) #f))) + (if cmd + (let* ((res (apply (get-proc cmd) dbconn (qitem-params qitem))) + (pkg (encode `((r . ,res))))) + (nn-send soc pkg) + (if (not (eq? (nn-recv soc)) "ok") + (print "Client failed to receive properly the data from " cmd " request")))))) + commands)) + +;; the continuously running queue processor +;; +(define ((queue-processor dbconn)) + (let loop () + (queue-process-commands dbconn (queue-take 'r)) ;; reads first, probably largest numbers of them + (queue-process-commands dbconn (queue-take 'w)) ;; writes next + (queue-process-commands dbconn (queue-take 't)) ;; lastly process transactions + (thread-sleep! 0.2) ;; open up the db for any other processes to access + (loop))) + +;;====================================================================== +;; Client stuff +;;====================================================================== + +;; client struct +(defstruct client + host-port + socket + last-access) + +(define *clients* (make-hash-table)) ;; host:port -> client struct +(define *client-mutex* (make-mutex)) + +;; add a channel or return existing channel, this is a normal req +;; +(define (request-connect host-port) + (mutex-lock! *client-mutex*) + (let* ((curr (hash-table-ref/default *clients* host-port #f))) + (if curr + (begin + (mutex-unlock! *client-mutex*) + curr) + (let ((req (nn-socket 'req))) + (nn-connect req host-port) ;; "inproc://test") + (hash-table-set! *clients* host-port req) + (mutex-unlock! *client-mutex*) + req)))) + +;; open up a channel to the server and send a package of info for the server to act on +;; host-port needs to be found and provided +;; +(define (generic-db-access host-port) + (let* ((soc (request-connect host-port)) + ;; NEED *MY* host/port also to let the server know where to send the results + ))) + + +(define (client-send-receive soc msg) + (nn-send soc msg) + (nn-recv soc)) + +;;====================================================================== +;; Server +;;====================================================================== + +(defstruct srvdat + host + port + soc) + +;; remember, everyone starts a server, both client and the actual server alike. +;; clients start a server for the server to return results to. +;; +(define (start-raw-server #!key (given-host-name #f)) + (let ((srvdat (let loop ((portnum 10000)) + (handle-exceptions + exn + (if (< portnum 64000) + (loop (+ portnum 1)) + #f) + (let* ((rep (nn-socket 'rep))) + (nn-bind rep (conc "tcp://*:" portnum)) ;; "inproc://test") + (make-srvdat port: portnum soc: rep))))) + (host-name (or give-host-name (get-host-name))) + (soc (srvdat-soc srvdat))) + (srvdat-host-set! srvdat host-name) + srvdat)) + +;; The actual *server* side server +;; +(define (start-server dbconn #!key (given-host-name #f)) + (let* ((srvdat (start-raw-server given-host-name: given-host-name)) + (host-name (srvdat-host srvdat)) + (soc (srvdat-soc srvdat))) + + ;; start the queue processor + (thread-start! (queue-processory dbconn) "Queue processor") + ;; msg is an alist + ;; 'r host:port <== where to return the data + ;; 'p params <== data to apply the command to + ;; 'e j|s|l <== encoding of the params. default is s (sexp), if not specified is assumed to be default + ;; 'c command <== look up the function to call using this key + ;; + (let loop ((msg-in (nn-recv soc))) + (if (not (equal? msg-in "quit")) + (let* ((dat (decode msg-in)) + (host-port (alist-ref 'r dat)) ;; this is for the reverse req rep where the server is a client of the original client + (params (alist-ref 'p dat)) + (command (let ((c (alist-ref 'c dat)))(if c (string->symbol c) #f))) + (all-good (and host-port params command (hash-table-exists? *commands* command)))) + (if all-good + (let ((cmddat (make-qitem + command: command + host-port: host-port + params: params))) + (queue-push cmddat) ;; put request into the queue + (nn-send soc "queued")) ;; reply with "queued" + (print "ERROR: BAD request " dat)) + (loop (nn-recv soc))))) + (nn-close soc))) + +;;====================================================================== +;; Gasket layer +;;====================================================================== + +(define rmt:open-create-db open-create-db) +(define (rmt:create-run . params) + + ADDED minimt/setup.scm Index: minimt/setup.scm ================================================================== --- /dev/null +++ minimt/setup.scm @@ -0,0 +1,17 @@ +(define *remotehost* "orion") +(define *homehost* "zeus") +(define *homepath* "/nfs/phoebe/disk1/home/mfs_matt/data/megatest/minimt/runtest") +(define *numsteps* 20) +(define *numtests* 20) +(define *numruns* 5) +(define *targets* '("targ1")) +(define *testdelay* 0) +(define *rundelay* 0) +(define *launchdelay* 0) +(define *stepdelay* 0) + +(use trace) +(trace-call-sites #t) +(trace +;; open-create-db + ) Index: mlaunch.scm ================================================================== --- mlaunch.scm +++ mlaunch.scm @@ -15,12 +15,11 @@ ;; take jobs from the given queue and keep launching them keeping ;; the cpu load at the targeted level ;; ;;====================================================================== -(use sqlite3 srfi-1 posix regex regex-case srfi-69 dot-locking format) -(import (prefix sqlite3 sqlite3:)) +(use sqlite3 srfi-1 posix regex regex-case srfi-69 format) (declare (unit mlaunch)) (declare (uses db)) (declare (uses common)) Index: mt.scm ================================================================== --- mt.scm +++ mt.scm @@ -127,47 +127,90 @@ (cons testn res))))))))) ;;====================================================================== ;; T R I G G E R S ;;====================================================================== + +(define (mt:run-trigger cmd test-id test-rundir trigger logname test-name item-path event-time actual-state actual-status) + ;; Putting the commandline into ( )'s means no control over the shell. + ;; stdout and stderr will be caught in the NBFAKE or mt_launch.log files + ;; or equivalent. No need to do this. Just run it? + (let* ((fullcmd (conc "nbfake " + cmd " " + test-id " " + test-rundir " " + trigger " " + test-name " " + item-path " " ;; has / prepended to deal with toplevel tests + actual-state " " + actual-status " " + event-time + )) + (prev-nbfake-log (get-environment-variable "NBFAKE_LOG"))) + (setenv "NBFAKE_LOG" (conc (cond + ((and (directory-exists? test-rundir) + (file-write-access? test-rundir)) + test-rundir) + ((and (directory-exists? *toppath*) + (file-write-access? *toppath*)) + *toppath*) + (else (conc "/tmp/" (current-user-name)))) + "/" logname)) + (debug:print-info 0 *default-log-port* "TRIGGERED on " trigger ", running command " fullcmd " output at " (get-environment-variable "NBFAKE_LOG")) + ;; (call-with-environment-variables + ;; `(("NBFAKE_LOG" . ,(conc test-rundir "/" logname))) + ;; (lambda () + (process-run fullcmd) + (if prev-nbfake-log + (setenv "NBFAKE_LOG" prev-nbfake-log) + (unsetenv "NBFAKE_LOG")) + )) ;; )) (define (mt:process-triggers dbstruct run-id test-id newstate newstatus) - (let* ((test-dat (db:get-test-info-by-id dbstruct run-id test-id))) - (if test-dat - (let* ((test-rundir ;; (rmt:sdb-qry 'getstr ;; (filedb:get-path *fdb* - (db:test-get-rundir test-dat)) ;; ) ;; ) - (test-name (db:test-get-testname test-dat)) - (tconfig #f) - (state (if newstate newstate (db:test-get-state test-dat))) - (status (if newstatus newstatus (db:test-get-status test-dat)))) - (if (and test-name - test-rundir ;; #f means no dir set yet - (file-exists? test-rundir) - (directory? test-rundir)) - (call-with-environment-variables - (list (cons "MT_TEST_NAME" test-name) - (cons "MT_TEST_RUN_DIR" test-rundir) - (cons "MT_ITEMPATH" (db:test-get-item-path test-dat))) - (lambda () - (push-directory test-rundir) - (set! tconfig (mt:lazy-read-test-config test-name)) - (for-each (lambda (trigger) - (let ((cmd (configf:lookup tconfig "triggers" trigger)) - (logf (conc test-rundir "/last-trigger.log"))) - (if cmd - ;; Putting the commandline into ( )'s means no control over the shell. - ;; stdout and stderr will be caught in the NBFAKE or mt_launch.log files - ;; or equivalent. No need to do this. Just run it? - (let ((fullcmd (conc cmd " " test-id " " test-rundir " " trigger "&"))) - (debug:print-info 0 *default-log-port* "TRIGGERED on " trigger ", running command " fullcmd) - (process-run fullcmd))))) - (list - (conc state "/" status) - (conc state "/") - (conc "/" status))) - (pop-directory)) - )))))) + (if test-id + (let* ((test-dat (db:get-test-info-by-id dbstruct run-id test-id))) + (if test-dat + (let* ((test-rundir (db:test-get-rundir test-dat)) ;; ) ;; ) + (test-name (db:test-get-testname test-dat)) + (item-path (db:test-get-item-path test-dat)) + (duration (db:test-get-run_duration test-dat)) + (comment (db:test-get-comment test-dat)) + (event-time (db:test-get-event_time test-dat)) + (tconfig #f) + (state (if newstate newstate (db:test-get-state test-dat))) + (status (if newstatus newstatus (db:test-get-status test-dat)))) + ;; (mutex-lock! *triggers-mutex*) + (if (and test-name + test-rundir) ;; #f means no dir set yet + ;; (common:file-exists? test-rundir) + ;; (directory? test-rundir)) + (call-with-environment-variables + (list (cons "MT_TEST_NAME" (or test-name "no such test")) + (cons "MT_TEST_RUN_DIR" (or test-rundir "no test directory yet")) + (cons "MT_ITEMPATH" (or item-path ""))) + (lambda () + (if (directory-exists? test-rundir) + (push-directory test-rundir) + (push-directory *toppath*)) + (set! tconfig (mt:lazy-read-test-config test-name)) + (for-each (lambda (trigger) + (let* ((munged-trigger (string-translate trigger "/ " "--")) + (logname (conc "last-trigger-" munged-trigger ".log"))) + ;; first any triggers from the testconfig + (let ((cmd (configf:lookup tconfig "triggers" trigger))) + (if cmd (mt:run-trigger cmd test-id test-rundir trigger (conc "tconfig-" logname) test-name item-path event-time state status))) + ;; next any triggers from megatest.config + (let ((cmd (configf:lookup *configdat* "triggers" trigger))) + (if cmd (mt:run-trigger cmd test-id test-rundir trigger (conc "mtconfig-" logname) test-name item-path event-time state status))))) + (list + (conc state "/" status) + (conc state "/") + (conc "/" status))) + (pop-directory)) + )) + ;; (mutex-unlock! *triggers-mutex*) + ))))) ;;====================================================================== ;; S T A T E A N D S T A T U S F O R T E S T S ;;====================================================================== @@ -206,11 +249,11 @@ (let ((test-dirs (tests:get-tests-search-path *configdat*))) (let loop ((hed (car test-dirs)) (tal (cdr test-dirs))) ;; Setting MT_LINKTREE here is almost certainly unnecessary. (let ((tconfig-file (conc hed "/" test-name "/testconfig"))) - (if (and (file-exists? tconfig-file) + (if (and (common:file-exists? tconfig-file) (file-read-access? tconfig-file)) (let ((link-tree-path (common:get-linktree)) ;; (configf:lookup *configdat* "setup" "linktree")) (old-link-tree (get-environment-variable "MT_LINKTREE"))) (if link-tree-path (setenv "MT_LINKTREE" link-tree-path)) (let ((newtcfg (read-config tconfig-file #f #f))) ;; NOTE: Does NOT run [system ...] Index: mtut.scm ================================================================== --- mtut.scm +++ mtut.scm @@ -12,12 +12,13 @@ ;; fake out readline usage of toplevel-command (define (toplevel-command . a) #f) (use srfi-1 posix srfi-69 readline ;; regex regex-case srfi-69 apropos json http-client directory-utils rpc typed-records;; (srfi 18) extras) - srfi-18 extras format pkts pkts regex regex-case - (prefix dbi dbi:)) ;; zmq extras) + srfi-18 extras format pkts regex regex-case + (prefix dbi dbi:) + nanomsg) (declare (uses common)) (declare (uses megatest-version)) (declare (uses margs)) (declare (uses configf)) @@ -25,26 +26,71 @@ (include "megatest-fossil-hash.scm") (require-library stml) -(define *target-mappers* (make-hash-table)) ;; '()) -(define *runname-mappers* (make-hash-table)) ;; '()) +;; stuff for the mapper and checker functions +;; +(define *target-mappers* (make-hash-table)) +(define *runname-mappers* (make-hash-table)) +(define *area-checkers* (make-hash-table)) +;; helpers for mappers/checkers +(define (add-target-mapper name proc) + (hash-table-set! *target-mappers* name proc)) +(define (add-runname-mapper name proc) + (hash-table-set! *runname-mappers* name proc)) +(define (add-area-checker name proc) + (hash-table-set! *area-checkers* name proc)) + +;; given a runkey, xlatr-key and other info return one of the following: +;; list of targets, null list to skip processing +;; +(define (map-targets mtconf aval-alist runkey area contour #!key (xlatr-key-in #f)) + (let* ((xlatr-key (or xlatr-key-in + (conf-get/default mtconf aval-alist 'targtrans))) + (proc (hash-table-ref/default *target-mappers* xlatr-key #f))) + (if proc + (begin + (print "Using target mapper: " xlatr-key) + (handle-exceptions + exn + (begin + (print "FAILED TO RUN TARGET MAPPER FOR " area ", called " xlatr-key) + (print " function is: " (hash-table-ref/default *target-mappers* xlatr-key #f ) ) + (print " message: " ((condition-property-accessor 'exn 'message) exn)) + runkey) + (proc runkey area contour))) + (begin + (if xlatr-key + (print "ERROR: Failed to find named target translator " xlatr-key ", using original target.")) + `(,runkey))))) ;; no proc then use runkey + +;; given mtconf and areaconf extract a translator/filter, first look at areaconf +;; then if not found look at default +;; +(define (conf-get/default mtconf areaconf keyname #!key (default #f)) + (let ((res (or (alist-ref keyname areaconf) + (configf:lookup mtconf "default" (conc keyname)) + default))) + (if res + (string->symbol res) + res))) + ;; this needs some thought regarding security implications. ;; ;; i. Check that owner of the file and calling user are same? ;; ii. Check that we are in a legal megatest area? ;; iii. Have some form of authentication or record of the md5sum or similar of the file? ;; iv. Use compiled version in preference to .scm version. Thus there is a manual "blessing" ;; required to use .mtutil.scm. ;; -(if (file-exists? "megatest.config") - (if (file-exists? ".mtutil.so") +(if (common:file-exists? "megatest.config") + (if (common:file-exists? ".mtutil.so") (load ".mtutil.so") - (if (file-exists? ".mtutil.scm") - (load ".mtutil.scm")))) + (if (common:file-exists? ".mtutil.scm") + (load ".mtutil.scm")))) ;; Disabled help items ;; -rollup : (currently disabled) fill run (set by :runname) with latest test(s) ;; from prior runs with same keys ;; Contour actions @@ -56,55 +102,60 @@ mtutil, part of the Megatest tool suite, documentation at http://www.kiatoa.com/fossils/megatest version " megatest-version " license GPL, Copyright Matt Welland 2006-2017 Usage: mtutil action [options] - -h : this help - -manual : show the Megatest user manual - -version : print megatest version (currently " megatest-version ") - -Actions: - run : initiate runs - remove : remove runs - rerun : register action for processing - set-ss : set state/status - archive : compress and move test data to archive disk - kill : stop tests or entire runs - db : database utilities + -h : this help + -manual : show the Megatest user manual + -version : print megatest version (currently " megatest-version ") + +Actions: + run : initiate runs + remove : remove runs + rerun : register action for processing + set-ss : set state/status + archive : compress and move test data to archive disk + kill : stop tests or entire runs + db : database utilities + areas, contours, setup : show areas, contours or setup section from megatest.config Contour actions: - process : runs import, rungen and dispatch - -Selectors - -immediate : apply this action immediately, default is to queue up actions - -area areapatt1,area2... : apply this action only to the specified areas - -target key1/key2/... : run for key1, key2, etc. - -test-patt p1/p2,p3/... : % is wildcard - -run-name : required, name for this particular test run - -contour contourname : run all targets for contourname, requires -run-name, -target - -state-status c/p,c/f : Specify a list of state and status patterns - -tag-expr tag1,tag2%,.. : select tests with tags matching expression - -mode-patt key : load testpatt from in runconfigs instead of default TESTPATT - if -testpatt and -tagexpr are not specified - -new state/status : specify new state/status for set-ss - -Misc - -start-dir path : switch to this directory before running mtutil - -set-vars V1=1,V2=2 : Add environment variables to a run NB// these are - overwritten by values set in config files. - -log logfile : send stdout and stderr to logfile - -repl : start a repl (useful for extending megatest) - -load file.scm : load and run file.scm - -debug N|N,M,O... : enable debug messages 0-N or N and M and O ... - -Utility - db pgschema : emit postgresql schema; do \"mtutil db pgschema | psql -d mydb\" + process : runs import, rungen and dispatch + +Trigger propagation actions: + tsend a=b,c=d... : send trigger info to all recpients in the [listeners] section + tlisten -port N : listen for trigger info on port N + +Selectors + -immediate : apply this action immediately, default is to queue up actions + -area areapatt1,area2... : apply this action only to the specified areas + -target key1/key2/... : run for key1, key2, etc. + -test-patt p1/p2,p3/... : % is wildcard + -run-name : required, name for this particular test run + -contour contourname : run all targets for contourname, requires -run-name, -target + -state-status c/p,c/f : Specify a list of state and status patterns + -tag-expr tag1,tag2%,.. : select tests with tags matching expression + -mode-patt key : load testpatt from in runconfigs instead of default TESTPATT + if -testpatt and -tagexpr are not specified + -new state/status : specify new state/status for set-ss + +Misc + -start-dir path : switch to this directory before running mtutil + -set-vars V1=1,V2=2 : Add environment variables to a run NB// these are + overwritten by values set in config files. + -log logfile : send stdout and stderr to logfile + -repl : start a repl (useful for extending megatest) + -load file.scm : load and run file.scm + -debug N|N,M,O... : enable debug messages 0-N or N and M and O ... + +Utility + db pgschema : emit postgresql schema; do \"mtutil db pgschema | psql -d mydb\" Examples: # Start a megatest run in the area \"mytests\" -mtutil -area mytests -action run -target v1.63/aa3e -mode-patt MYPATT -tag-expr quick +mtutil run -area mytests -target v1.63/aa3e -mode-patt MYPATT -tag-expr quick # Start a contour mtutil run -contour quick -target v1.63/aa3e Called as " (string-intersperse (argv) " ") " @@ -111,17 +162,20 @@ Version " megatest-version ", built from " megatest-fossil-hash )) ;; args and pkt key specs ;; (define *arg-keys* + ;; used keys + ;; a - action '( ("-area" . G) ;; maps to group ("-contour" . c) ("-append-config" . d) ("-state" . e) ("-item-patt" . i) ("-sync-to" . k) + ("-new" . l) ;; l (see below) is new-ss ("-run-name" . n) ("-mode-patt" . o) ("-test-patt" . p) ;; idea, enhance margs ("-test-patt" "-testpatt") => yields one value in "-test-patt" ("-status" . s) ("-target" . t) @@ -152,11 +206,35 @@ ;; alist to map actions to old megatest commands (define *action-keys* '((run . "-run") (sync . "") (archive . "-archive") - (set-ss . "-set-state-status"))) + (set-ss . "-set-state-status") + (remove . "-remove-runs"))) + +;; Card types: +;; +;; A action +;; U username (Unix) +;; D timestamp +;; T card type + +;; utilitarian alist for standard cards +;; +(define *additional-cards* + '( + ;; Standard Cards + (A . action ) + (D . timestamp ) + (T . cardtype ) + (U . user ) ;; username + (Z . shar1sum ) + + ;; Extras + (a . runkey ) ;; needed for matching up pkts with target derived from runkey + ;; (l . new-ss ) ;; new state/status + )) ;; inlst is an alternative input ;; (define (lookup-param-by-key key #!key (inlst #f)) (fold (lambda (a res) @@ -179,11 +257,12 @@ (or (alist-ref (string->symbol param) '((-tag-expr . "-tagexpr") (-mode-patt . "--modepatt") (-run-name . "-runname") (-test-patt . "-testpatt") - (-msg . "-m"))) + (-msg . "-m") + (-new . "-set-state-status"))) param)) (define (val->alist val) (let ((val-list (string-split-fields ";\\s*" val #:infix))) (if val-list @@ -210,11 +289,11 @@ (print "ERROR: failed to create directory " dest-dir " message: " ((condition-property-accessor 'exn 'message) exn)) (create-directory dest-dir #t)) (handle-exceptions exn (print "ERROR: failed to clone or sync 1ossil " url " message: " ((condition-property-accessor 'exn 'message) exn)) - (if (file-exists? targ-file) + (if (common:file-exists? targ-file) (system (conc "fossil pull --once " url " -R " targ-file)) (system (conc "fossil clone " url " " targ-file)) )))) (define (fossil:last-change-node-and-time fossils-dir fossil-name branch) @@ -259,22 +338,14 @@ (loop (get-line) date node time)))) (else ;; no more datat and last node on branch not found (close-input-port timeline-port) (values (common:date-time->seconds (conc date " " time)) node)))))) - ;;====================================================================== ;; GLOBALS ;;====================================================================== -;; Card types: -;; -;; a action -;; u username (Unix) -;; D timestamp -;; T card type - ;; process args (define *action* (if (> (length (argv)) 1) (cadr (argv)) #f)) (define remargs (args:get-args @@ -300,11 +371,12 @@ (if (and (not (null? remargs)) (not (or (args:get-arg "-runstep") (args:get-arg "-envcap") (args:get-arg "-envdelta") - (member *action* '("db")) ;; very loose checks on db. + (member *action* '("db" "tsend" "tlisten")) ;; very loose checks on db and tsend/listen + (equal? *action* "show") ;; just keep going if list ))) (debug:print-error 0 *default-log-port* "Unrecognised arguments: " (string-intersperse (if (list? remargs) remargs (argv)) " "))) (if (or (args:any? "-h" "help" "-help" "--help") (member *action* '("-h" "-help" "--help" "help"))) @@ -311,71 +383,69 @@ (begin (print help) (exit 1))) ;;====================================================================== -;; pkts -;;====================================================================== - -(define (with-queue-db mtconf proc) - (let* ((pktsdirs (configf:lookup mtconf "setup" "pktsdirs")) - (pktsdir (if pktsdirs (car (string-split pktsdirs " ")) #f)) - (toppath (configf:lookup mtconf "dyndat" "toppath")) - (pdbpath (or (configf:lookup mtconf "setup" "pdbpath") pktsdir))) - (if (not (and pktsdir toppath pdbpath)) - (begin - (print "ERROR: settings are missing in your megatest.config for area management.") - (print " you need to have pktsdir in the [setup] section.")) - (let* ((pdb (open-queue-db pdbpath "pkts.db" - schema: '("CREATE TABLE groups (id INTEGER PRIMARY KEY,groupname TEXT, CONSTRAINT group_constraint UNIQUE (groupname));")))) - (proc pktsdirs pktsdir pdb) - (dbi:close pdb))))) - -(define (load-pkts-to-db mtconf) - (with-queue-db - mtconf - (lambda (pktsdirs pktsdir pdb) - (for-each - (lambda (pktsdir) ;; look at all - (if (and (file-exists? pktsdir) - (directory? pktsdir) - (file-read-access? pktsdir)) - (let ((pkts (glob (conc pktsdir "/*.pkt")))) - (for-each - (lambda (pkt) - (let* ((uuid (cadr (string-match ".*/([0-9a-f]+).pkt" pkt))) - (exists (lookup-by-uuid pdb uuid #f))) - (if (not exists) - (let* ((pktdat (string-intersperse - (with-input-from-file pkt read-lines) - "\n")) - (apkt (pkt->alist pktdat)) - (ptype (alist-ref 'T apkt))) - (add-to-queue pdb pktdat uuid (or ptype 'cmd) #f 0) - (debug:print 4 *default-log-port* "Added " uuid " of type " ptype " to queue")) - (debug:print 4 *default-log-port* "pkt: " uuid " exists, skipping...") - ))) - pkts)))) - (string-split pktsdirs))))) - -(define (get-pkt-alists pkts) - (map (lambda (x) - (alist-ref 'apkt x)) ;; 'pkta pulls out the alist from the read pkt - pkts)) - -;; given list of pkts (alist mode) return list of D cards as Unix epoch, sorted descending -;; also delete duplicates by target i.e. (car pkt) -(define (get-pkt-times pkts) - (delete-duplicates - (sort - (map (lambda (x) - `(,(alist-ref 't x) . ,(string->number (alist-ref 'D x)))) - pkts) - (lambda (a b)(> (cdr a)(cdr b)))) ;; sort descending - (lambda (a b)(equal? (car a)(car b))))) ;; remove duplicates by target - -;;====================================================================== +;; Nanomsg transport +;;====================================================================== + +(define-inline (encode data) + (with-output-to-string + (lambda () + (write data)))) + +(define-inline (decode data) + (with-input-from-string + data + (lambda () + (read)))) + +;;start a server, returns the connection +;; +(define (start-nn-server portnum) + (let ((rep (nn-socket 'rep))) + (handle-exceptions + exn + (let ((emsg ((condition-property-accessor 'exn 'message) exn))) + (print "ERROR: Failed to start server \"" emsg "\"") + (exit 1)) + (nn-bind rep (conc "tcp://*:" portnum))) + rep)) + +;; open connection to server, send message, close connection +;; +(define (open-send-close-nn host-port msg #!key (timeout 3)) ;; default timeout is 3 seconds + (let ((req (nn-socket 'req)) + (uri (conc "tcp://" host-port)) + (res #f)) + (handle-exceptions + exn + (let ((emsg ((condition-property-accessor 'exn 'message) exn))) + (print "ERROR: Failed to connect/send to " uri " message was \"" emsg "\"") + #f) + (nn-connect req uri) + (nn-send req msg) + ;; NEED timer here! + (let* ((th1 (make-thread (lambda () + (let ((resp (nn-recv req))) + (nn-close req) + (set! res (if (equal? resp "ok") + #t + #f)))) + "recv thread")) + (th2 (make-thread (lambda () + (thread-sleep! timeout) + (thread-terminate! th1)) + "timer thread"))) + (thread-start! th1) + (thread-start! th2) + (thread-join! th1) + res)))) + +;;====================================================================== + + ;; Runs ;;====================================================================== ;; make a runname ;; @@ -386,24 +456,34 @@ ;; collect, translate, collate and assemble a pkt from the command-line ;; ;; sched => force the run start time to be recorded as sched Unix ;; epoch. This aligns times properly for triggers in some cases. ;; -(define (command-line->pkt action args-alist sched-in) +;; extra-dat format is ( 'x xval 'y yval .... ) +;; +(define (command-line->pkt action args-alist sched-in #!key (extra-dat '())(area-path #f)(new-ss #f)) (let* ((sched (cond ((vector? sched-in)(local-time->seconds sched-in)) ;; we recieved a time ((number? sched-in) sched-in) (else (current-seconds)))) (args-data (if args-alist (if (hash-table? args-alist) ;; seriously? (hash-table->alist args-alist) args-alist) (hash-table->alist args:arg-hash))) ;; if no args-alist then we assume this is a call driven directly by commandline - (alldat (apply append (list 'T "cmd" - 'a action - 'U (current-user-name) - 'D sched) + (alldat (apply append + (list 'A action + 'U (current-user-name) + 'D sched) + (if area-path + (list 'S area-path) ;; the area-path is mapped to the start-dir + '()) + (if (list? extra-dat) + extra-dat + (begin + (debug:print 0 *default-log-port* "ERROR: command-line->pkt received bad extra-dat " extra-dat) + '())) (map (lambda (x) (let* ((param (car x)) (value (cdr x)) (pmeta (assoc param *arg-keys*)) ;; translate the card key to a megatest switch or parameter (smeta (assoc param *switch-keys*)) ;; first lookup the key in arg-keys or switch-keys @@ -412,12 +492,12 @@ #f))) (if (or pmeta smeta) ;; construct the switch/param pair. (list meta value) '()))) (filter cdr args-data))))) -;; (print "Alldat: " alldat -;; " args-data: " args-data) + (print "Alldat: " alldat + " args-data: " args-data) (add-z-card (apply construct-sdat alldat)))) (define (simple-setup start-dir-in) (let* ((start-dir (or start-dir-in ".")) @@ -427,15 +507,15 @@ ;; environ-patt: "env-override" given-toppath: start-dir ;; pathenvvar: "MT_RUN_AREA_HOME" )) (mtconf (if mtconfdat (car mtconfdat) #f))) - ;; we set some dynamic data in a section called "dyndata" + ;; we set some dynamic data in a section called "scratchdata" (if mtconf (begin - (configf:section-var-set! mtconf "dyndat" "toppath" start-dir))) - ;; (print "TOPPATH: " (configf:lookup mtconf "dyndat" "toppath")) + (configf:section-var-set! mtconf "scratchdat" "toppath" start-dir))) + ;; (print "TOPPATH: " (configf:lookup mtconf "scratchdat" "toppath")) mtconfdat)) ;; NEED TIMESTAMP ON PKTS for efficient loading of packets into db. @@ -446,16 +526,19 @@ ;; ii. Pass the pkt keys and values to this proc and go from there. ;; iii. Maybe have an abstraction alist with meaningful names for the pkt keys ;; ;; Override the run start time record with sched. Usually #f is fine. ;; -(define (create-run-pkt mtconf action area runkey runname mode-patt tag-expr pktsdir reason contour sched dbdest append-conf runtrans) +(define (create-run-pkt mtconf action area runkey target runname mode-patt + tag-expr pktsdir reason contour sched dbdest append-conf + runtrans) (let* ((good-val (lambda (inval)(and inval (string? inval)(not (string-null? inval))))) (area-dat (val->alist (or (configf:lookup mtconf "areas" area) ""))) (area-path (alist-ref 'path area-dat)) - (area-xlatr (alist-ref 'targtrans area-dat)) - (new-runname (let* ((callname (if (string? runtrans)(string->symbol runtrans) #f)) + ;; (area-xlatr (alist-ref 'targtrans area-dat)) + ;; (xlatr-key (if area-xlatr (string->symbol area-xlatr) #f)) + (new-runname (let* ((callname (if (string? runtrans)(string->symbol runtrans) #f)) (mapper (if callname (hash-table-ref/default *runname-mappers* callname #f) #f))) ;; (print "callname=" callname " runtrans=" runtrans " mapper=" mapper) (if (and callname (not (equal? callname "auto")) (not mapper)) @@ -471,28 +554,11 @@ (print "(mapper " (string-intersperse (list runkey runname area area-path reason contour mode-patt) ", ") ")") (mapper runkey runname area area-path reason contour mode-patt)) (case callname ((auto) runname) (else runtrans))))) - (new-target (if area-xlatr - (let ((xlatr-key (string->symbol area-xlatr))) - (if (hash-table-exists? *target-mappers* xlatr-key) - (begin - (print "Using target mapper: " area-xlatr) - (handle-exceptions - exn - (begin - (print "FAILED TO RUN TARGET MAPPER FOR " area ", called " area-xlatr) - (print " function is: " (hash-table-ref/default *target-mappers* xlatr-key #f ) ) - (print " message: " ((condition-property-accessor 'exn 'message) exn)) - runkey) - ((hash-table-ref *target-mappers* xlatr-key) - runkey new-runname area area-path reason contour mode-patt))) - (begin - (print "ERROR: Failed to find named target translator " xlatr-key ", using original target.") - runkey))) - runkey)) + (new-target target) ;; I believe we will want target manipulation here .. (map-targets xlatr-key runkey area contour)) (actual-action (if action (if (equal? action "sync-prepend") "sync" action) "run"))) ;; this has gotten a bit ugly. Need a function to handle actions processing. @@ -499,11 +565,11 @@ ;; some hacks to remove switches not needed in certain cases (case (string->symbol (or action "run")) ((sync sync-prepend) (set! new-target #f) (set! runame #f))) - (print "area-path: " area-path " area-xlatr: " area-xlatr " orig-target: " runkey " new-target: " new-target) + ;; (print "area-path: " area-path " orig-target: " runkey " new-target: " new-target) (let-values (((uuid pkt) (command-line->pkt actual-action (append `(("-start-dir" . ,area-path) @@ -523,23 +589,50 @@ (equal? action "run")) `(("-preclean" . " ") ("-rerun-all" . " ")) ;; if run we *always* want preclean set, use single space as placeholder '()) ) - sched))) + sched + extra-dat: `(a ,runkey) ;; we need the run key for marking the run as launched + ))) (with-output-to-file (conc pktsdir "/" uuid ".pkt") (lambda () (print pkt)))))) + +;; look for areas=a1,a2,a3 OR areafn=somefuncname +;; +(define (val-alist->areas val-alist) + (let ((areas-string (alist-ref 'areas val-alist)) + (areas-procname (alist-ref 'areafn val-alist))) + (if areas-procname ;; areas-procname take precedence + areas-procname + (string-split (or areas-string "") ",")))) + +;; area - the current area under consideration +;; areas - the list of allowed areas from the contour spec -OR- +;; if it is a string then it is the function to use to +;; lookup in *area-checkers* +;; +(define (area-allowed? area areas runkey contour mode-patt) + (cond + ((not areas) #t) ;; no spec + ((string? areas) ;; + (let ((check-fn (hash-table-ref/default *area-checkers* (string->symbol areas) #f))) + (if check-fn + (check-fn area runkey contour mode-patt) + #f))) + ((list? areas)(member area areas)) + (else #f))) ;; shouldn't get here ;; (use trace)(trace create-run-pkt) ;; collect all needed data and create run pkts for contours with changed inputs ;; (define (generate-run-pkts mtconf toppath) (let ((std-runname (conc "sched" (time->string (seconds->local-time (current-seconds)) "%M%H%d")))) - (with-queue-db + (common:with-queue-db mtconf (lambda (pktsdirs pktsdir pdb) (let* ((rgconfdat (find-and-read-config (conc toppath "/runconfigs.config"))) (rgconf (car rgconfdat)) (all-areas (map car (configf:get-section mtconf "areas"))) @@ -564,26 +657,35 @@ (optional (if (> len-key 3)(cadddr keyparts) #f)) ;; (val-list (string-split-fields ";\\s*" val #:infix)) ;; (string-split val)) ;; runname-rule params (val-alist (val->alist val)) (runname (make-runname "" "")) (runtrans (alist-ref 'runtrans val-alist)) + + ;; these may or may not be defined and not all are used in each handler type in the case below + (run-name (alist-ref 'run-name val-alist)) + (target (alist-ref 'target val-alist)) + (crontab (alist-ref 'cron val-alist)) + (areas (val-alist->areas val-alist)) ;; areas can be a single string (a reference to call an areas function), or a list of area names. + (dbdest (alist-ref 'dbdest val-alist)) + (appendconf (alist-ref 'appendconf val-alist)) + (file-globs (alist-ref 'glob val-alist)) (runstarts (find-pkts pdb '(runstart) `((o . ,contour) (t . ,runkey)))) - (rspkts (get-pkt-alists runstarts)) + (rspkts (common:get-pkt-alists runstarts)) ;; starttimes is for run start times and is used to know when the last run was launched - (starttimes (get-pkt-times rspkts)) ;; sort by age (youngest first) and delete duplicates by target - (last-run (if (null? starttimes) ;; if '() then it has never been run, else get the max - 0 - (apply max (map cdr starttimes)))) + (starttimes (common:get-pkt-times rspkts)) ;; sort by age (youngest first) and delete duplicates by target + (last-run (if (null? starttimes) ;; if '() then it has never been run, else get the max + 0 + (apply max (map cdr starttimes)))) ;; synctimes is for figuring out the last time a sync was done - (syncstarts (find-pkts pdb '(syncstart) '())) ;; no qualifiers, a sync does all tarets etc. - (sspkts (get-pkt-alists syncstarts)) - (synctimes (get-pkt-times sspkts)) - (last-sync (if (null? synctimes) ;; if '() then it has never been run, else get the max - 0 - (apply max (map cdr synctimes)))) + (syncstarts (find-pkts pdb '(syncstart) '())) ;; no qualifiers, a sync does all tarets etc. + (sspkts (common:get-pkt-alists syncstarts)) + (synctimes (common:get-pkt-times sspkts)) + (last-sync (if (null? synctimes) ;; if '() then it has never been run, else get the max + 0 + (apply max (map cdr synctimes)))) ) (let ((delta (lambda (x) (round (/ (- (current-seconds) x) 60))))) (print "runkey: " runkey ", ruletype: " ruletype ", action: " action ", last-run: " last-run " time since; last-run: " (delta last-run) ", last-sync: " (delta last-sync))) @@ -597,41 +699,53 @@ (case (string->symbol (or ruletype "no-such-rule")) ((no-such-rule) (print "ERROR: no such rule for " sense)) + ;; Handle crontab like rules + ;; ((scheduled) (if (not (alist-ref 'cron val-alist)) ;; gotta have cron spec (print "ERROR: bad sense spec \"" (string-intersperse sense " ") "\" params: " val-alist) - (let* ((run-name (alist-ref 'run-name val-alist)) - (target (alist-ref 'target val-alist)) - (crontab (alist-ref 'cron val-alist)) + (let* ( ;; (action (alist-ref 'action val-alist)) - (cron-safe-string (string-translate (string-intersperse (string-split (alist-ref 'cron val-alist)) "-") "*" "X")) + (cron-safe-string (string-translate (string-intersperse (string-split crontab) "-") "*" "X")) (runname std-runname)) ;; (conc "sched" (time->string (seconds->local-time (current-seconds)) "%M%H%d"))))) ;; (print "last-run: " last-run " need-run: " need-run) ;; (if need-run (case (string->symbol action) ((sync sync-prepend) (if (common:extended-cron crontab #f last-sync) (push-run-spec torun contour runkey `((message . ,(conc ruletype ":sync-" cron-safe-string)) (action . ,action) - (dbdest . ,(alist-ref 'dbdest val-alist)) - (append . ,(alist-ref 'appendconf val-alist)))))) + (dbdest . ,dbdest) + (append . ,appendconf) + (areas . ,areas))))) ((run) (if (common:extended-cron crontab #f last-run) (push-run-spec torun contour runkey - `((message . ,(conc ruletype ":" cron-safe-string)) - (runname . ,runname) + `((message . ,(conc ruletype ":" cron-safe-string)) + (runname . ,runname) + (runtrans . ,runtrans) + (action . ,action) + (areas . ,areas) + (target . ,target))))) + ((remove) + (push-run-spec torun contour runkey + `((message . ,(conc ruletype ":" cron-safe-string)) + (runname . ,runname) (runtrans . ,runtrans) - (action . ,action) - (target . ,target))))) + (action . ,action) + (areas . ,areas) + (target . ,target)))) (else (print "ERROR: action \"" action "\" has no scheduled handler") ))))) + ;; script based sensors + ;; ((script) ;; syntax is a little different here. It is a list of commands to run, "scriptname = extra_parameters;scriptname = ..." ;; where scriptname may be repeated multiple times. The script must return unix-epoch of last change, new-target-name and new-run-name ;; the script is called like this: scriptname contour runkey std-runname action extra_param1 extra_param2 ... (for-each @@ -664,19 +778,23 @@ (if need-run (let* ((key-msg `((message . ,(conc ruletype ":" message)) (runname . ,runname) (runtrans . ,runtrans) (action . ,action) - (target . ,new-target)))) + (areas . ,areas) + (target . ,new-target) ;; overriding with result from runing the script + ))) (print "key-msg: " key-msg) (push-run-spec torun contour (if optional ;; we need to be able to differentiate same contour, different behavior. (conc runkey ":" optional) ;; NOTE: NOT COMPLETELY IMPLEMENTED. DO NOT USE runkey) key-msg))))))) val-alist)) ;; iterate over the param split by ;\s* + ;; fossil scm based triggers + ;; ((fossil) (for-each (lambda (fspec) (print "fspec: " fspec) (let* ((url (symbol->string (car fspec))) ;; THIS COULD BE TROUBLE. Add option to reading line to return as string. @@ -688,65 +806,73 @@ (fossil:clone-or-sync url fname fdir) ;; ) (let-values (((datetime node) (fossil:last-change-node-and-time fdir fname branch))) (if (null? starttimes) (push-run-spec torun contour runkey - `((message . ,(conc "fossil:" branch "-neverrun")) - (runname . ,(conc runname "-" node)) + `((message . ,(conc "fossil:" branch "-neverrun")) + (runname . ,(conc runname "-" node)) (runtrans . ,runtrans) - (target . ,runkey))) + (areas . ,areas) + ;; (target . ,runkey) + )) (if (> datetime last-run) ;; change time is greater than last-run time (push-run-spec torun contour runkey - `((message . ,(conc "fossil:" branch "-" node)) - (runname . ,(conc runname "-" node)) + `((message . ,(conc "fossil:" branch "-" node)) + (runname . ,(conc runname "-" node)) (runtrans . ,runtrans) - (target . ,runkey))))) + (areas . ,areas) + ;; (target . ,runkey) + )))) (print "Got datetime=" datetime " node=" node)))) val-alist)) - + + ;; sensor looking for one or more files newer than reference + ;; ((file file-or) ;; one or more files must be newer than the reference - (let* ((file-globs (alist-ref 'glob val-alist)) - (youngestdat (common:get-youngest (common:bash-glob file-globs))) + (let* ((youngestdat (common:get-youngest (common:bash-glob file-globs))) (youngestmod (car youngestdat))) ;; (print "youngestmod: " youngestmod " starttimes: " starttimes) (if (null? starttimes) ;; this target has never been run (push-run-spec torun contour runkey - `((message . "file:neverrun") - (action . ,action) + `((message . "file:neverrun") + (action . ,action) (runtrans . ,runtrans) - (target . ,runkey) - (runname . ,runname))) + ;; (target . ,runkey) + (areas . ,areas) + (runname . ,runname))) ;; (for-each ;; (lambda (starttime) ;; look at the time the last run was kicked off for this contour ;; (if (> youngestmod (cdr starttime)) ;; (begin ;; (print "starttime younger than youngestmod: " starttime " Youngestmod: " youngestmod) (if (> youngestmod last-run) (push-run-spec torun contour runkey - `((message . ,(conc ruletype ":" (cadr youngestdat))) - (action . ,action) - (target . ,runkey) + `((message . ,(conc ruletype ":" (cadr youngestdat))) + (action . ,action) + ;; (target . ,runkey) (runtrans . ,runtrans) - (runname . ,runname) + (areas . ,areas) + (runname . ,runname) )))))) - ;; starttimes)) + ;; all globbed files must be newer than the reference + ;; ((file-and) ;; all files must be newer than the reference - (let* ((file-globs (alist-ref 'glob val-alist)) - (youngestdat (common:get-youngest file-globs)) + (let* ((youngestdat (common:get-youngest file-globs)) (youngestmod (car youngestdat)) (success #t)) ;; any cases of not true, set flag to #f for AND ;; (print "youngestmod: " youngestmod " starttimes: " starttimes) (if (null? starttimes) ;; this target has never been run (push-run-spec torun contour runkey - `((message . "file:neverrun") - (runname . ,runname) + `((message . "file:neverrun") + (runname . ,runname) (runtrans . ,runtrans) - (target . ,runkey) - (action . ,action))) + (areas . ,areas) + ;; (target . ,runkey) + (action . ,action))) ;; NB// I think this is wrong. It should be looking at last-run only. - (if (> youngestmod last-run) + (if (> youngestmod last-run) ;; WAIT!! Shouldn't file-and be looking at the *oldest* file (thus all are younger than ...) ;; (for-each ;; (lambda (starttime) ;; look at the time the last run was kicked off for this contour ;; (if (< youngestmod (cdr starttime)) ;; (set! success #f))) @@ -753,82 +879,98 @@ ;; starttimes)) ;; (if success ;; (begin ;; (print "starttime younger than youngestmod: " starttime " Youngestmod: " youngestmod) (push-run-spec torun contour runkey - `((message . ,(conc ruletype ":" (cadr youngestdat))) - (runname . ,runname) + `((message . ,(conc ruletype ":" (cadr youngestdat))) + (runname . ,runname) (runtrans . ,runtrans) - (target . ,runkey) - (action . ,action) + ;; (target . ,runkey) + (areas . ,areas) + (action . ,action) )))))) (else (print "ERROR: unrecognised rule \"" ruletype))))) keydats))) ;; sense rules (hash-table-keys rgconf)) ;; now have to run populated (for-each (lambda (contour) - (print "contour: " contour) - (let* ((val (or (configf:lookup mtconf "contours" contour) "")) - (val-alist (val->alist val)) - (areas (string-split (or (alist-ref 'areas val-alist) "") ",")) - (selector (alist-ref 'selector val-alist)) - (mode-tag (and selector (string-split-fields "/" selector #:infix))) - (mode-patt (and mode-tag (if (eq? (length mode-tag) 2)(cadr mode-tag) #f))) - (tag-expr (and mode-tag (if (null? mode-tag) #f (car mode-tag))))) + (let* ((cval (or (configf:lookup mtconf "contours" contour) "")) + (cval-alist (val->alist cval)) ;; BEWARE ... NOT the same val-alist as above! + (areas (val-alist->areas cval-alist)) + (selector (alist-ref 'selector cval-alist)) + (mode-tag (and selector (string-split-fields "/" selector #:infix))) + (mode-patt (and mode-tag (if (eq? (length mode-tag) 2)(cadr mode-tag) #f))) + (tag-expr (and mode-tag (if (null? mode-tag) #f (car mode-tag))))) + (print "contour: " contour " areas=" areas " cval=" cval) (for-each - (lambda (runkeydatset) + (lambda (runkeydatset) ;; (print "runkeydatset: ")(pp runkeydatset) (let ((runkey (car runkeydatset)) (runkeydats (cadr runkeydatset))) (for-each (lambda (runkeydat) (for-each (lambda (area) - (let ((runname (alist-ref 'runname runkeydat)) - (runtrans (alist-ref 'runtrans runkeydat)) - (reason (alist-ref 'message runkeydat)) - (sched (alist-ref 'sched runkeydat)) - (action (alist-ref 'action runkeydat)) - (dbdest (alist-ref 'dbdest runkeydat)) - (append (alist-ref 'append runkeydat)) - (target (or (alist-ref 'target runkeydat) runkey))) ;; override with target if forced - (print "Have: runkey=" runkey " contour=" contour " area=" area " action=" action " tag-expr=" tag-expr " mode-patt=" mode-patt " target=" target) - (if (case (or (and action (string->symbol action)) 'noaction) ;; ensure we have the needed data to run this action - ((noaction) #f) - ((run) (and runname reason)) - ((sync sync-prepend) (and reason dbdest)) - (else #f)) - ;; instead of unwrapping the runkeydat alist, pass it directly to create-run-pkt - (create-run-pkt mtconf action area runkey runname mode-patt tag-expr pktsdir reason contour sched dbdest append runtrans) - (print "ERROR: Missing info to make a " action " call: runkey=" runkey " contour=" contour " area=" area " tag-expr=" tag-expr " mode-patt=" mode-patt " dbdest=" dbdest) - ))) - all-areas)) + (if (area-allowed? area areas runkey contour mode-patt) ;; is this area to be handled (from areas=a,b,c OR using areafn=abcfn and *area-checks* ...) + (let* ((aval (or (configf:lookup mtconf "areas" area) "")) + (aval-alist (val->alist aval)) + (runname (alist-ref 'runname runkeydat)) + (runtrans (alist-ref 'runtrans runkeydat)) + + (reason (alist-ref 'message runkeydat)) + (sched (alist-ref 'sched runkeydat)) + (action (alist-ref 'action runkeydat)) + (dbdest (alist-ref 'dbdest runkeydat)) + (append (alist-ref 'append runkeydat)) + (targets (or (alist-ref 'target runkeydat) + (map-targets mtconf aval-alist runkey area contour)))) ;; override with target if forced + ;; NEED TO EXPAND RUNKEY => ALL TARGETS MAPPED AND THEN FOREACH .... + (for-each + (lambda (target) + (print "Creating pkt for runkey=" runkey " target=" target " contour=" contour " area=" area " action=" action " tag-expr=" tag-expr " mode-patt=" mode-patt) + (if (case (or (and action (string->symbol action)) 'noaction) ;; ensure we have the needed data to run this action + ((noaction) #f) + ((run) (and runname reason)) + ((sync sync-prepend) (and reason dbdest)) + (else #f)) + ;; instead of unwrapping the runkeydat alist, pass it directly to create-run-pkt + (create-run-pkt mtconf action area runkey target runname mode-patt + tag-expr pktsdir reason contour sched dbdest append + runtrans) + (print "ERROR: Missing info to make a " action " call: runkey=" runkey " contour=" contour " area=" area " tag-expr=" tag-expr " mode-patt=" mode-patt " dbdest=" dbdest) + )) + targets)) + (print "NOTE: skipping " runkeydat " for area \"" area "\", not in " areas))) + all-areas)) runkeydats))) (let ((res (configf:get-section torun contour))) ;; each contour / target ;; (print "res=" res) res)))) (hash-table-keys torun))))))) (define (pkt->cmdline pkta) - (let ((action (or (lookup-action-by-key (alist-ref 'a pkta)) "noaction"))) + (let* ((action (or (lookup-action-by-key (alist-ref 'A pkta)) "noaction")) + (action-param (case (string->symbol action) + ((-set-state-status) (conc (alist-ref 'l pkta) " ")) + (else "")))) (fold (lambda (a res) (let* ((key (car a)) ;; get the key name (val (cdr a)) (par (or (lookup-param-by-key key) ;; need to check also if it is a switch (lookup-param-by-key key inlst: *switch-keys*)))) ;; (print "key: " key " val: " val " par: " par) (if par (conc res " " (param-translate par) " " val) - (if (member key '(a Z U D T)) ;; a is the action + (if (alist-ref key *additional-cards*) ;; these cards do not translate to parameters or switches res (begin (print "ERROR: Unknown key in packet \"" key "\" with value \"" val "\"") res))))) (conc "megatest " (if (not (member action '("sync"))) - (conc action " ") + (conc action " " action-param) "")) pkta))) ;; (use trace)(trace pkt->cmdline) @@ -851,12 +993,16 @@ #f (create-directory "logs") #t) #t) "logs" - "/tmp"))) - (with-queue-db + "/tmp")) + (cpuload (alist-ref 'adj-proc-load (common:get-normalized-cpu-load #f))) + (maxload (string->number (or (configf:lookup mtconf "setup" "maxload") + (configf:lookup mtconf "jobtools" "maxload") ;; respect value used by Megatest calls + "1.1")))) + (common:with-queue-db mtconf (lambda (pktsdirs pktsdir pdb) (let* ((rgconfdat (find-and-read-config (conc toppath "/runconfigs.config"))) (rgconf (car rgconfdat)) (areas (configf:get-section mtconf "areas")) @@ -865,85 +1011,214 @@ (torun (make-hash-table)) ;; target => ( ... info ... ) (rgentargs (hash-table-keys rgconf))) ;; these are the targets registered for automatically triggering (for-each (lambda (pktdat) (let* ((pkta (alist-ref 'apkt pktdat)) - (action (alist-ref 'a pkta)) + (action (alist-ref 'A pkta)) (cmdline (pkt->cmdline pkta)) (uuid (alist-ref 'Z pkta)) + (user (alist-ref 'U pkta)) + (area (alist-ref 'G pkta)) (logf (conc logdir "/" uuid "-run.log")) (fullcmd (conc "NBFAKE_LOG=" logf " nbfake " cmdline))) - (print "RUNNING: " fullcmd) - (system fullcmd) - (mark-processed pdb (list (alist-ref 'id pktdat))) - (let-values (((ack-uuid ack-pkt) - (add-z-card - (construct-sdat 'P uuid - 'T (case (string->symbol action) - ((run) "runstart") - ((sync) "syncstart") ;; example of translating run -> runstart - (else action)) - 'c (alist-ref 'o pkta) ;; THIS IS WRONG! SHOULD BE 'c - 't (alist-ref 't pkta))))) - (write-pkt pktsdir ack-uuid ack-pkt)))) + (if (check-access user mtconf action area) + (if (and (> cpuload maxload) + (member action '("run" "archive"))) ;; do not run archive or run if load is over the specified limit + (print "WARNING: cpuload too high, skipping processing of " uuid) + (begin + (print "RUNNING: " fullcmd) + (system fullcmd) ;; replace with process ... + (mark-processed pdb (list (alist-ref 'id pktdat))) + (let-values (((ack-uuid ack-pkt) + (add-z-card + (construct-sdat 'P uuid + 'T (case (string->symbol action) + ((run) "runstart") + ((sync) "syncstart") ;; example of translating run -> runstart + (else action)) + 'c (alist-ref 'o pkta) ;; THIS IS WRONG! SHOULD BE 'c + 't (alist-ref 't pkta))))) + (write-pkt pktsdir ack-uuid ack-pkt)))) + (begin ;; access denied! Mark as such + (mark-processed pdb (list (alist-ref 'id pktdat))) + (let-values (((ack-uuid ack-pkt) + (add-z-card + (construct-sdat 'P uuid + 'T "access-denied" + 'c (alist-ref 'o pkta) ;; THIS IS WRONG! SHOULD BE 'c + 't (alist-ref 't pkta))))) + (write-pkt pktsdir ack-uuid ack-pkt)))))) pkts)))))) - + +(define (check-access user mtconf action area) + ;; NOTE: Need control over defaults. E.g. default might be no access + (let* ((access-ctrl (hash-table-exists? mtconf "access")) ;; if there is an access section the default is to REQUIRE enablement/access + (access-list (map (lambda (x) + (string-split x ":")) + (string-split (or (configf:lookup mtconf "access" area) ;; userid:rightstype userid2:rightstype2 ... + (if access-ctrl + "*:none" ;; nobody has access by default + "*:all"))))) + (access-types-dat (configf:get-section mtconf "accesstypes"))) + (debug:print 0 *default-log-port* "Checking access in " access-list " with access-ctrl " access-ctrl " for area " area) + (if access-ctrl + (let* ((user-access (or (assoc user access-list) + (assoc "*" access-list))) + (access-type (cadr user-access)) + (access-types (let ((res (alist-ref access-type access-types-dat equal?))) + (if res (car res) res))) + (allowed-actions (string-split (or access-types "")))) + (print "Got " allowed-actions " for user " user " where access-types=" access-types " access-type=" access-type) + (cond + ((and access-types (member action allowed-actions)) + ;; (print "Access granted for " user " for " action) + #t) + (else + ;; (print "Access denied for " user " for " action) + #f)))))) + (define (get-pkts-dir mtconf) (let ((pktsdirs (configf:lookup mtconf "setup" "pktsdirs")) (pktsdir (if pktsdirs (car (string-split pktsdirs " ")) #f))) pktsdir)) (let ((debugcontrolf (conc (get-environment-variable "HOME") "/.mtutilrc"))) - (if (file-exists? debugcontrolf) + (if (common:file-exists? debugcontrolf) (load debugcontrolf))) (if *action* (case (string->symbol *action*) - ((run remove rerun set-ss archive kill) + ((run remove rerun set-ss archive kill list) (let* ((mtconfdat (simple-setup (args:get-arg "-start-dir"))) (mtconf (car mtconfdat)) + (area (args:get-arg "-area")) ;; look up the area to dispatch to from [areas] section + (areasec (if area (configf:lookup mtconf "areas" area) #f)) + (areadat (if areasec (val->alist areasec) #f)) + (area-path (if areadat (alist-ref 'path areadat) #f)) (pktsdirs (configf:lookup mtconf "setup" "pktsdirs")) (pktsdir (if pktsdirs (car (string-split pktsdirs " ")) #f)) - (adjargs (hash-table-copy args:arg-hash))) + (adjargs (hash-table-copy args:arg-hash)) + (new-ss (args:get-arg "-new"))) + ;; check a few things + (cond + ((and area (not area-path)) + (print "ERROR: the specified area was not found in the [areas] table. Area name=" area) + (exit 1)) + ((not area) + (print "ERROR: no area specified. Use -area ") + (exit 1)) + (else + (let ((user (current-user-name))) + (if (check-access user mtconf *action* area);; check rights + (print "Access granted for " *action* " action by " user) + (begin + (print "Access denied for " *action* " action by " user) + (exit 1)))))) + ;; (for-each ;; (lambda (key) ;; (if (not (member key *legal-params*)) ;; (hash-table-delete! adjargs key))) ;; we need to delete any params intended for mtutil ;; (hash-table-keys adjargs)) (let-values (((uuid pkt) - (command-line->pkt *action* adjargs #f))) + (command-line->pkt *action* adjargs #f area-path: area-path new-ss: new-ss))) (write-pkt pktsdir uuid pkt)))) ((dispatch import rungen process) (let* ((mtconfdat (simple-setup (args:get-arg "-start-dir"))) (mtconf (car mtconfdat)) - (toppath (configf:lookup mtconf "dyndat" "toppath"))) + (toppath (configf:lookup mtconf "scratchdat" "toppath"))) (case (string->symbol *action*) ((process) (begin - (load-pkts-to-db mtconf) + (common:load-pkts-to-db mtconf) (generate-run-pkts mtconf toppath) - (load-pkts-to-db mtconf) + (common:load-pkts-to-db mtconf) (dispatch-commands mtconf toppath))) - ((import) (load-pkts-to-db mtconf)) ;; import pkts + ((import) (common:load-pkts-to-db mtconf)) ;; import pkts ((rungen) (generate-run-pkts mtconf toppath)) ((dispatch) (dispatch-commands mtconf toppath))))) + ;; misc + ((show) + (if (> (length remargs) 0) + (let* ((mtconfdat (simple-setup (args:get-arg "-start-dir"))) + (mtconf (car mtconfdat)) + (sect-dat (configf:get-section mtconf (car remargs)))) + (if sect-dat + (for-each + (lambda (entry) + (if (> (length entry) 1) + (print (car entry) " " (cadr entry)) + (print (car entry)))) + sect-dat) + (print "No section \"" (car remargs) "\" found"))) + (print "ERROR: list requires section parameter; areas, setup or contours"))) + ((gendot) + (let* ((mtconfdat (simple-setup (args:get-arg "-start-dir"))) + (mtconf (car mtconfdat))) + (common:with-queue-db + mtconf + (lambda (pktsdirs pktsdir conn) + ;; pktspec display-fields + (make-report "out.dot" conn + '((cmd . ((parent . P) + (user . M) + (target . t))) + (runstart . ((parent . P) + (target . t))) + (runtype . ((parent . P)))) ;; pktspec + '(P U t) ;; + ))))) ;; no ptypes listed (ptypes are strings of pkt types to read from db ((db) (if (null? remargs) (print "ERROR: missing sub command for db command") (let ((subcmd (car remargs))) (case (string->symbol subcmd) ((pgschema) (let* ((install-home (common:get-install-area)) (schema-file (conc install-home "/share/db/mt-pg.sql"))) - (if (file-exists? schema-file) + (if (common:file-exists? schema-file) (system (conc "/bin/cat " schema-file))))) ((sqlite3schema) (let* ((install-home (common:get-install-area)) (schema-file (conc install-home "/share/db/mt-sqlite3.sql"))) - (if (file-exists? schema-file) + (if (common:file-exists? schema-file) (system (conc "/bin/cat " schema-file))))) ((junk) - (rmt:get-keys)))))))) + (rmt:get-keys)))))) + ((tsend) + (if (null? remargs) + (print "ERROR: missing data to send to trigger listeners") + (let* ((msg (car remargs)) + (mtconfdat (simple-setup (args:get-arg "-start-dir"))) + (mtconf (car mtconfdat)) + (listeners (configf:get-section mtconf "listeners")) + (prev-seen (make-hash-table))) ;; catch duplicates + (for-each + (lambda (listener) + (let ((host-port (car listener)) + (remdat (cdr listener))) + (print "sending " msg " to " host-port) + (open-send-close-nn host-port msg timeout: 2))) + listeners)))) + ((tlisten) + (if (null? remargs) + (print "ERROR: useage for tlisten is \"mtutil tlisten portnum\"") + (let ((portnum (string->number (car remargs)))) + (if (not portnum) + (print "ERROR: the portnumber parameter must be a number, you gave: " (car remargs)) + (let* ((rep (start-nn-server portnum)) + (mtconfdat (simple-setup (args:get-arg "-start-dir"))) + (mtconf (car mtconfdat)) + (script (configf:lookup mtconf "listener" "script"))) + (print "Listening on port " portnum " for messages") + (let loop ((instr (nn-recv rep))) + (print "received " instr ", running \"" script " " instr "\"") + (system (conc script " " instr)) + (nn-send rep "ok") + (loop (nn-recv rep)))))))) + + )) ;; the end + ;; If HTTP_HOST is defined then we must be in the cgi environment ;; so run stml and exit ;; (if (get-environment-variable "HTTP_HOST") @@ -963,5 +1238,11 @@ (install-history-file (get-environment-variable "HOME") ".mtutil_history") ;; [homedir] [filename] [nlines]) (current-input-port (make-readline-port "mtutil> ")) (if (args:get-arg "-repl") (repl) (load (args:get-arg "-load"))))) + +#| +(define mtconf (car (simple-setup #f))) +(define dat (with-queue-db mtconf (lambda (conn)(get-pkts conn '())))) +(pp (pkts#flatten-all dat '((cmd . ((parent . P)(url . M)))(runtype . ((parent . P)))) 'id 'group-id 'uuid 'parent 'pkt-type 'pkt 'processed)) +|# Index: newdashboard.scm ================================================================== --- newdashboard.scm +++ newdashboard.scm @@ -84,11 +84,11 @@ (print help) (exit))) ;; ease debugging by loading ~/.dashboardrc (let ((debugcontrolf (conc (get-environment-variable "HOME") "/.dashboardrc"))) - (if (file-exists? debugcontrolf) + (if (common:file-exists? debugcontrolf) (load debugcontrolf))) (debug:setup) (define *tim* (iup:timer)) @@ -375,11 +375,11 @@ #f)) (define (test-panel window-id) (let* ((curr-row-num 0) (viewlog (lambda (x) - (if (file-exists? logfile) + (if (common:file-exists? logfile) ;(system (conc "firefox " logfile "&")) (iup:send-url logfile) (message-window (conc "File " logfile " not found"))))) (xterm (lambda (x) (if (directory-exists? rundir) @@ -730,14 +730,15 @@ (lambda (x) ;; Want to dedicate no more than 50% of the time to this so skip if ;; 2x delta time has not passed since last query (if (< nextmintime (current-milliseconds)) (let* ((starttime (current-milliseconds)) - (changes (dcommon:run-update keys data runname keypatts testpatt states statuses 'full my-window-id)) + ;; (changes (dcommon:run-update keys data runname keypatts testpatt states statuses 'full my-window-id)) (endtime (current-milliseconds))) (set! nextmintime (+ endtime (* 2 (- endtime starttime)))) - (debug:print 11 *default-log-port* "CHANGE(S): " (car changes) "...")) + ;; (debug:print 11 *default-log-port* "CHANGE(S): " (car changes) "...")) + ) (debug:print-info 11 *default-log-port* "Server overloaded")))))) ;; (dboard:data-updaters-set! *data* (make-hash-table)) (newdashboard #f) ;; *dbstruct-local*) (iup:main-loop) Index: ods.scm ================================================================== --- ods.scm +++ ods.scm @@ -197,11 +197,11 @@ ;; '( (sheet1 (r1c1 r1c2 r1c3 ...) ;; (r2c1 r2c3 r2c3 ...) ) ;; (sheet2 ( ... ) ;; ( ... ) ) ) (define (ods:list->ods path fname data) - (if (not (file-exists? path)) + (if (not (common:file-exists? path)) (print "ERROR: path to create ods data must pre-exist") (begin (with-output-to-file (conc path "/content.xml") (lambda () (ods:construct-dir path) Index: portlogger.scm ================================================================== --- portlogger.scm +++ portlogger.scm @@ -19,11 +19,11 @@ ;; lsof -i (define (portlogger:open-db fname) (let* ((avail (tasks:wait-on-journal fname 5 remove: #t)) ;; wait up to about 10 seconds for the journal to go away - (exists (file-exists? fname)) + (exists (common:file-exists? fname)) (db (if avail (sqlite3:open-database fname) (begin (system (conc "rm -f " fname)) (sqlite3:open-database fname)))) @@ -56,12 +56,12 @@ exn (begin ;; (release-dot-lock fname) (debug:print-error 0 *default-log-port* "portlogger:open-run-close failed. " proc " " params) (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) - (debug:print 0 *default-log-port* "exn=" (condition->list exn)) - (if (file-exists? fname)(delete-file fname)) ;; brutally get rid of it + (debug:print 5 *default-log-port* "exn=" (condition->list exn)) + (if (common:file-exists? fname)(delete-file fname)) ;; brutally get rid of it (print-call-chain (current-error-port))) (let* (;; (lock (obtain-dot-lock fname 2 9 10)) (db (portlogger:open-db fname)) (res (apply proc db params))) (sqlite3:finalize! db) @@ -103,11 +103,11 @@ (handle-exceptions exn (begin (debug:print 0 *default-log-port* "EXCEPTION: portlogger database probably overloaded or unreadable. If you see this message again remove /tmp/.$USER-portlogger.db") (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) - (debug:print 0 *default-log-port* "exn=" (condition->list exn)) + (debug:print 5 *default-log-port* "exn=" (condition->list exn)) (print-call-chain (current-error-port)) (debug:print 0 *default-log-port* "Continuing anyway.") #f) (sqlite3:fold-row (lambda (var curr) @@ -128,11 +128,11 @@ (handle-exceptions exn (begin (debug:print 0 *default-log-port* "EXCEPTION: portlogger database probably overloaded or unreadable. If you see this message again remove /tmp/.$USER-portlogger.db") (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) - (debug:print 0 *default-log-port* "exn=" (condition->list exn)) + (debug:print 5 *default-log-port* "exn=" (condition->list exn)) (print-call-chain (current-error-port)) (debug:print 0 *default-log-port* "Continuing anyway.")) (portlogger:take-port db portnum)) portnum)) @@ -158,11 +158,11 @@ (handle-exceptions exn (begin (debug:print 0 *default-log-port* "EXCEPTION: portlogger database at " dbfname " probably overloaded or unreadable. Try removing it.") (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) - (print "exn=" (condition->list exn)) + (debug:print 5 *default-log-port* "exn=" (condition->list exn)) (debug:print 0 *default-log-port* " status: " ((condition-property-accessor 'sqlite3 'status) exn)) (print-call-chain (current-error-port)) #f) (case (string->symbol (car args)) ;; commands with two or more params ((take)(portlogger:take-port db (string->number (cadr args)))) Index: process.scm ================================================================== --- process.scm +++ process.scm @@ -13,11 +13,10 @@ ;; Process convience utils ;;====================================================================== (use regex) (declare (unit process)) -;;(declare (uses common)) (define (process:conservative-read port) (let loop ((res "")) (if (not (eof-object? (peek-char port))) (loop (conc res (read-char port))) @@ -53,11 +52,11 @@ (handle-exceptions exn (begin (print "ERROR: Failed to run command: " cmd " " (string-intersperse params " ")) (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) - (print "exn=" (condition->list exn)) + (debug:print 5 *default-log-port* "exn=" (condition->list exn)) #f) (let-values (((fh fho pid) (if (null? params) (process cmd) (process cmd params)))) (let loop ((curr (read-line fh)) @@ -143,11 +142,11 @@ (define (process:alive? pid) (handle-exceptions exn ;; possibly pid is a process not a child, look in /proc to see if it is running still - (file-exists? (conc "/proc/" pid)) + (common:file-exists? (conc "/proc/" pid)) (let-values (((rpid exit-type exit-signal)(process-wait pid #t))) (and (number? rpid) (equal? rpid pid))))) (define (process:alive-on-host? host pid) Index: rmt.scm ================================================================== --- rmt.scm +++ rmt.scm @@ -11,13 +11,11 @@ (use format typed-records) ;; RADT => purpose of json format?? (declare (unit rmt)) (declare (uses api)) -(declare (uses tdb)) (declare (uses http-transport)) -;;(declare (uses nmsg-transport)) (include "common_records.scm") ;; ;; THESE ARE ALL CALLED ON THE CLIENT SIDE!!! ;; @@ -48,13 +46,19 @@ ;; RA => e.g. usage (rmt:send-receive 'get-var #f (list varname)) ;; (define (rmt:send-receive cmd rid params #!key (attemptnum 1)(area-dat #f)) ;; start attemptnum at 1 so the modulo below works as expected + ;;DOT digraph megatest_state_status { + ;;DOT ranksep=0; + ;;DOT // rankdir=LR; + ;;DOT node [shape="box"]; + ;;DOT "rmt:send-receive" -> MUTEXLOCK; + ;;DOT { edge [style=invis];"case 1" -> "case 2" -> "case 3" -> "case 4" -> "case 5" -> "case 6" -> "case 7" -> "case 8" -> "case 9" -> "case 10" -> "case 11"; } ;; do all the prep locked under the rmt-mutex (mutex-lock! *rmt-mutex*) - + ;; 1. check if server is started IFF cmd is a write OR if we are not on the homehost, store in runremote ;; 2. check the age of the connections. refresh the connection if it is older than timeout-20 seconds. ;; 3. do the query, if on homehost use local access ;; (let* ((start-time (current-seconds)) ;; snapshot time so all use cases get same value @@ -71,114 +75,160 @@ (remote-ro-mode-set! runremote ro-mode) (remote-ro-mode-checked-set! runremote #t) ro-mode) ro-mode))))) - ;; ensure we have a record for our connection for given area - (if (not runremote) ;; can remove this one. should never get here. - (begin - (set! *runremote* (make-remote)) - (set! runremote *runremote*))) ;; new runremote will come from this on next iteration - ;; ensure we have a homehost record - (if (not (pair? (remote-hh-dat runremote))) ;; not on homehost - (thread-sleep! 0.1) ;; since we shouldn't get here, delay a little - (remote-hh-dat-set! runremote (common:get-homehost))) - - ;;(print "BB> readonly-mode is "readonly-mode" dbfile is "dbfile) + ;; DOT INIT_RUNREMOTE; // leaving off - doesn't really add to the clarity + ;; DOT MUTEXLOCK -> INIT_RUNREMOTE [label="no remote?"]; + ;; DOT INIT_RUNREMOTE -> MUTEXLOCK; + ;; ensure we have a record for our connection for given area + (if (not runremote) ;; can remove this one. should never get here. + (begin + (set! *runremote* (make-remote)) + (set! runremote *runremote*))) ;; new runremote will come from this on next iteration + + ;; DOT SET_HOMEHOST; // leaving off - doesn't really add to the clarity + ;; DOT MUTEXLOCK -> SET_HOMEHOST [label="no homehost?"]; + ;; DOT SET_HOMEHOST -> MUTEXLOCK; + ;; ensure we have a homehost record + (if (not (pair? (remote-hh-dat runremote))) ;; not on homehost + (thread-sleep! 0.1) ;; since we shouldn't get here, delay a little + (remote-hh-dat-set! runremote (common:get-homehost))) + + ;;(print "BB> readonly-mode is "readonly-mode" dbfile is "dbfile) (cond + ;;DOT EXIT; + ;;DOT MUTEXLOCK -> EXIT [label="> 15 attempts"]; {rank=same "case 1" "EXIT" } ;; give up if more than 15 attempts ((> attemptnum 15) (debug:print 0 *default-log-port* "ERROR: 15 tries to start/connect to server. Giving up.") (exit 1)) - ;; readonly mode, read request- handle it - case 20 + ;;DOT CASE2 [label="local\nreadonly\nquery"]; + ;;DOT MUTEXLOCK -> CASE2; {rank=same "case 2" CASE2} + ;;DOT CASE2 -> "rmt:open-qry-close-locally"; + ;; readonly mode, read request- handle it - case 2 ((and readonly-mode (member cmd api:read-only-queries)) (mutex-unlock! *rmt-mutex*) - (debug:print-info 12 *default-log-port* "rmt:send-receive, case 20") + (debug:print-info 12 *default-log-port* "rmt:send-receive, case 2") (rmt:open-qry-close-locally cmd 0 params) ) + ;;DOT CASE3 [label="write in\nread-only mode"]; + ;;DOT MUTEXLOCK -> CASE3 [label="readonly\nmode?"]; {rank=same "case 3" CASE3} + ;;DOT CASE3 -> "#f"; ;; readonly mode, write request. Do nothing, return #f (readonly-mode (mutex-unlock! *rmt-mutex*) - (debug:print-info 12 *default-log-port* "rmt:send-receive, case 21") + (debug:print-info 12 *default-log-port* "rmt:send-receive, case 3") (debug:print 0 *default-log-port* "WARNING: write transaction requested on a readonly area. cmd="cmd" params="params) - #f - ) - - ;; reset the connection if it has been unused too long - ((and runremote - (remote-conndat runremote) - (let ((expire-time (+ (- start-time (remote-server-timeout runremote))(random 10)))) ;; Subtract or add the random value? Seems like it should be substract but Neither fixes the "WARNING: failure in with-input-from-request to #.\n message: Server closed connection before sending response" - (< (http-transport:server-dat-get-last-access (remote-conndat runremote)) expire-time))) - (debug:print-info 0 *default-log-port* "Connection to " (remote-server-url runremote) " expired due to no accesses, forcing new connection.") - (remote-conndat-set! runremote #f) ;; invalidate the connection, thus forcing a new connection. - (mutex-unlock! *rmt-mutex*) - (rmt:send-receive cmd rid params attemptnum: attemptnum)) + #f) + + ;; This block was for pre-emptively resetting the connection if there had been no communication for some time. + ;; I don't think it adds any value. If the server is not there, just fail and start a new connection. + ;; also, the expire-time calculation might not be correct. We want, time-since-last-server-access > (server:get-timeout) + ;; + ;; ;;DOT CASE4 [label="reset\nconnection"]; + ;; ;;DOT MUTEXLOCK -> CASE4 [label="have connection,\nlast_access > expire_time"]; {rank=same "case 4" CASE4} + ;; ;;DOT CASE4 -> "rmt:send-receive"; + ;; ;; reset the connection if it has been unused too long + ;; ((and runremote + ;; (remote-conndat runremote) + ;; (let ((expire-time (+ (- start-time (remote-server-timeout runremote))(random 10)))) ;; Subtract or add the random value? Seems like it should be substract but Neither fixes the "WARNING: failure in with-input-from-request to #.\n message: Server closed connection before sending response" + ;; (< (http-transport:server-dat-get-last-access (remote-conndat runremote)) expire-time))) + ;; (debug:print-info 0 *default-log-port* "Connection to " (remote-server-url runremote) " expired due to no accesses, forcing new connection.") + ;; (http-transport:close-connections area-dat: runremote) + ;; (remote-conndat-set! runremote #f) ;; invalidate the connection, thus forcing a new connection. + ;; (mutex-unlock! *rmt-mutex*) + ;; (rmt:send-receive cmd rid params attemptnum: attemptnum)) + + ;;DOT CASE5 [label="local\nread"]; + ;;DOT MUTEXLOCK -> CASE5 [label="server not required,\non homehost,\nread-only query"]; {rank=same "case 5" CASE5}; + ;;DOT CASE5 -> "rmt:open-qry-close-locally"; ;; on homehost and this is a read - ((and (not (remote-force-server runremote)) ;; honor forced use of server - (cdr (remote-hh-dat runremote)) ;; on homehost - (member cmd api:read-only-queries)) ;; this is a read + ((and (not (remote-force-server runremote)) ;; honor forced use of server, i.e. server NOT required + (cdr (remote-hh-dat runremote)) ;; on homehost + (member cmd api:read-only-queries)) ;; this is a read (mutex-unlock! *rmt-mutex*) - (debug:print-info 12 *default-log-port* "rmt:send-receive, case 3") + (debug:print-info 12 *default-log-port* "rmt:send-receive, case 5") (rmt:open-qry-close-locally cmd 0 params)) + ;;DOT CASE6 [label="init\nremote"]; + ;;DOT MUTEXLOCK -> CASE6 [label="on homehost,\nwrite query,\nhave server,\ncan't reach it"]; {rank=same "case 6" CASE6}; + ;;DOT CASE6 -> "rmt:send-receive"; ;; on homehost and this is a write, we already have a server, but server has died ((and (cdr (remote-hh-dat runremote)) ;; on homehost (not (member cmd api:read-only-queries)) ;; this is a write (remote-server-url runremote) ;; have a server (not (server:ping (remote-server-url runremote)))) ;; server has died. NOTE: this is not a cheap call! Need better approach. (set! *runremote* (make-remote)) + (remote-force-server-set! runremote (common:force-server?)) (mutex-unlock! *rmt-mutex*) - (debug:print-info 12 *default-log-port* "rmt:send-receive, case 4.1") + (debug:print-info 12 *default-log-port* "rmt:send-receive, case 6") (rmt:send-receive cmd rid params attemptnum: attemptnum)) + ;;DOT CASE7 [label="homehost\nwrite"]; + ;;DOT MUTEXLOCK -> CASE7 [label="server not required,\non homehost,\na write,\nhave a server"]; {rank=same "case 7" CASE7}; + ;;DOT CASE7 -> "rmt:open-qry-close-locally"; ;; on homehost and this is a write, we already have a server - ((and (not (remote-force-server runremote)) ;; honor forced use of server + ((and (not (remote-force-server runremote)) ;; honor forced use of server, i.e. server NOT required (cdr (remote-hh-dat runremote)) ;; on homehost (not (member cmd api:read-only-queries)) ;; this is a write (remote-server-url runremote)) ;; have a server (mutex-unlock! *rmt-mutex*) - (debug:print-info 12 *default-log-port* "rmt:send-receive, case 4") + (debug:print-info 12 *default-log-port* "rmt:send-receive, case 4.1") (rmt:open-qry-close-locally cmd 0 params)) + ;;DOT CASE8 [label="force\nserver"]; + ;;DOT MUTEXLOCK -> CASE8 [label="server not required,\nhave homehost info,\nno connection yet,\nnot a read-only query"]; {rank=same "case 8" CASE8}; + ;;DOT CASE8 -> "rmt:open-qry-close-locally"; ;; on homehost, no server contact made and this is a write, passively start a server - ((and (not (remote-force-server runremote)) ;; honor forced use of server - (cdr (remote-hh-dat runremote)) ;; new - (not (remote-server-url runremote)) - (not (member cmd api:read-only-queries))) - (debug:print-info 12 *default-log-port* "rmt:send-receive, case 5") + ((and (not (remote-force-server runremote)) ;; honor forced use of server, i.e. server NOT required + (cdr (remote-hh-dat runremote)) ;; have homehost + (not (remote-server-url runremote)) ;; no connection yet + (not (member cmd api:read-only-queries))) ;; not a read-only query + (debug:print-info 12 *default-log-port* "rmt:send-receive, case 8") (let ((server-url (server:check-if-running *toppath*))) ;; (server:read-dotserver->url *toppath*))) ;; (server:check-if-running *toppath*))) ;; Do NOT want to run server:check-if-running - very expensive to do for every write call (if server-url (remote-server-url-set! runremote server-url) ;; the string can be consumed by the client setup if needed (if (common:force-server?) (server:start-and-wait *toppath*) (server:kind-run *toppath*)))) (remote-force-server-set! runremote (common:force-server?)) (mutex-unlock! *rmt-mutex*) - (debug:print-info 12 *default-log-port* "rmt:send-receive, case 5.1") + (debug:print-info 12 *default-log-port* "rmt:send-receive, case 8.1") (rmt:open-qry-close-locally cmd 0 params)) + ;;DOT CASE9 [label="force server\nnot on homehost"]; + ;;DOT MUTEXLOCK -> CASE9 [label="no connection\nand either require server\nor not on homehost"]; {rank=same "case 9" CASE9}; + ;;DOT CASE9 -> "start\nserver" -> "rmt:send-receive"; ((or (and (remote-force-server runremote) ;; we are forcing a server and don't yet have a connection to one (not (remote-conndat runremote))) (and (not (cdr (remote-hh-dat runremote))) ;; not on a homehost (not (remote-conndat runremote)))) ;; and no connection - (debug:print-info 12 *default-log-port* "rmt:send-receive, case 6 hh-dat: " (remote-hh-dat runremote) " conndat: " (remote-conndat runremote)) + (debug:print-info 12 *default-log-port* "rmt:send-receive, case 9, hh-dat: " (remote-hh-dat runremote) " conndat: " (remote-conndat runremote)) (mutex-unlock! *rmt-mutex*) (if (not (server:check-if-running *toppath*)) ;; who knows, maybe one has started up? (server:start-and-wait *toppath*)) - (remote-force-server-set! runremote (common:force-server?)) (remote-conndat-set! runremote (rmt:get-connection-info *toppath*)) ;; calls client:setup which calls client:setup-http (rmt:send-receive cmd rid params attemptnum: attemptnum)) ;; TODO: add back-off timeout as + + ;;DOT CASE10 [label="on homehost"]; + ;;DOT MUTEXLOCK -> CASE10 [label="server not required,\non homehost"]; {rank=same "case 10" CASE10}; + ;;DOT CASE10 -> "rmt:open-qry-close-locally"; ;; all set up if get this far, dispatch the query ((and (not (remote-force-server runremote)) (cdr (remote-hh-dat runremote))) ;; we are on homehost (mutex-unlock! *rmt-mutex*) - (debug:print-info 12 *default-log-port* "rmt:send-receive, case 7") + (debug:print-info 12 *default-log-port* "rmt:send-receive, case 10") (rmt:open-qry-close-locally cmd (if rid rid 0) params)) + ;;DOT CASE11 [label="send_receive"]; + ;;DOT MUTEXLOCK -> CASE11 [label="else"]; {rank=same "case 11" CASE11}; + ;;DOT CASE11 -> "rmt:send-receive" [label="call failed"]; + ;;DOT CASE11 -> "RESULT" [label="call succeeded"]; ;; not on homehost, do server query (else (mutex-unlock! *rmt-mutex*) (debug:print-info 12 *default-log-port* "rmt:send-receive, case 9") (mutex-lock! *rmt-mutex*) @@ -202,24 +252,28 @@ (eq? (vector-length res) 2) (eq? (vector-ref res 1) 'overloaded)) ;; since we are looking at the data to carry the error we'll use a fairly obtuse combo to minimise the chances of some sort of collision. (let ((wait-delay (+ attemptnum (* attemptnum 10)))) (debug:print 0 *default-log-port* "WARNING: server is overloaded. Delaying " wait-delay " seconds and trying call again.") (mutex-lock! *rmt-mutex*) + (http-transport:close-connections area-dat: runremote) (set! *runremote* #f) ;; force starting over (mutex-unlock! *rmt-mutex*) (thread-sleep! wait-delay) (rmt:send-receive cmd rid params attemptnum: (+ attemptnum 1))) res) ;; All good, return res (begin (debug:print 0 *default-log-port* "WARNING: communication failed. Trying again, try num: " attemptnum) (remote-conndat-set! runremote #f) + (http-transport:close-connections area-dat: runremote) (remote-server-url-set! runremote #f) (debug:print-info 12 *default-log-port* "rmt:send-receive, case 9.1") - (if (not (server:check-if-running *toppath*)) - (server:start-and-wait *toppath*)) + ;; (if (not (server:check-if-running *toppath*)) + ;; (server:start-and-wait *toppath*)) (rmt:send-receive cmd rid params attemptnum: (+ attemptnum 1))))))))) + ;;DOT } + ;; (define (rmt:update-db-stats run-id rawcmd params duration) ;; (mutex-lock! *db-stats-mutex*) ;; (handle-exceptions ;; exn ;; (begin @@ -275,11 +329,11 @@ res)) (define (rmt:open-qry-close-locally cmd run-id params #!key (remretries 5)) (let* ((qry-is-write (not (member cmd api:read-only-queries))) (db-file-path (db:dbfile-path)) ;; 0)) - (dbstruct-local (db:setup)) ;; make-dbr:dbstruct path: dbdir local: #t))) + (dbstruct-local (db:setup #t)) ;; make-dbr:dbstruct path: dbdir local: #t))) (read-only (not (file-write-access? db-file-path))) (start (current-milliseconds)) (resdat (if (not (and read-only qry-is-write)) (let ((v (api:execute-requests dbstruct-local (vector (symbol->string cmd) params)))) (handle-exceptions ;; there has been a long history of receiving strange errors from values returned by the client when things go wrong.. @@ -474,16 +528,16 @@ (define (rmt:set-tests-state-status run-id testnames currstate currstatus newstate newstatus) (rmt:send-receive 'set-tests-state-status run-id (list run-id testnames currstate currstatus newstate newstatus))) (define (rmt:get-tests-for-run run-id testpatt states statuses offset limit not-in sort-by sort-order qryvals last-update mode) - (if (number? run-id) - (rmt:send-receive 'get-tests-for-run run-id (list run-id testpatt states statuses offset limit not-in sort-by sort-order qryvals last-update mode)) - (begin - (debug:print-error 0 *default-log-port* "rmt:get-tests-for-run called with bad run-id=" run-id) - (print-call-chain (current-error-port)) - '()))) + ;; (if (number? run-id) + (rmt:send-receive 'get-tests-for-run run-id (list run-id testpatt states statuses offset limit not-in sort-by sort-order qryvals last-update mode))) + ;; (begin + ;; (debug:print-error 0 *default-log-port* "rmt:get-tests-for-run called with bad run-id=" run-id) + ;; (print-call-chain (current-error-port)) + ;; '()))) ;; get stuff via synchash (define (rmt:synchash-get run-id proc synckey keynum params) (rmt:send-receive 'synchash-get run-id (list run-id proc synckey keynum params))) @@ -755,14 +809,13 @@ ;; T E S T D A T A ;;====================================================================== (define (rmt:read-test-data run-id test-id categorypatt #!key (work-area #f)) (rmt:send-receive 'read-test-data run-id (list run-id test-id categorypatt))) -;; (let ((tdb (rmt:open-test-db-by-test-id run-id test-id work-area: work-area))) -;; (if tdb -;; (tdb:read-test-data tdb test-id categorypatt) -;; '()))) +(define (rmt:read-test-data* run-id test-id categorypatt varpatt #!key (work-area #f)) + (rmt:send-receive 'read-test-data* run-id (list run-id test-id categorypatt varpatt))) + (define (rmt:testmeta-add-record testname) (rmt:send-receive 'testmeta-add-record #f (list testname))) (define (rmt:testmeta-get-record testname) @@ -790,10 +843,23 @@ (define (rmt:tasks-set-state-given-param-key param-key new-state) (rmt:send-receive 'tasks-set-state-given-param-key #f (list param-key new-state))) (define (rmt:tasks-get-last target runname) (rmt:send-receive 'tasks-get-last #f (list target runname))) + +;;====================================================================== +;; N O S Y N C D B +;;====================================================================== + +(define (rmt:no-sync-set var val) + (rmt:send-receive 'no-sync-set #f `(,var ,val))) + +(define (rmt:no-sync-get/default var default) + (rmt:send-receive 'no-sync-get/default #f `(,var ,default))) + +(define (rmt:no-sync-del! var) + (rmt:send-receive 'no-sync-del! #f `(,var))) ;;====================================================================== ;; A R C H I V E S ;;====================================================================== Index: rpctest/rpctest-continuous-client.scm ================================================================== --- rpctest/rpctest-continuous-client.scm +++ rpctest/rpctest-continuous-client.scm @@ -17,11 +17,11 @@ (print "Operation: " operation ", param: " param) ;; have a pool of db's to pick from (define *dbpool* '()) (define *pool-mutex* (make-mutex)) -1 + (define (get-db) (mutex-lock! *pool-mutex*) (if (null? *dbpool*) (begin (mutex-unlock! *pool-mutex*) Index: runconfig.scm ================================================================== --- runconfig.scm +++ runconfig.scm @@ -73,11 +73,11 @@ (let ((runconfigf (conc *toppath* "/runconfigs.config")) (targ (or (common:args-get-target) targ-from-db (get-environment-variable "MT_TARGET")))) (pop-directory) - (if (file-exists? runconfigf) + (if (common:file-exists? runconfigf) (setup-env-defaults runconfigf run-id #t keyvals environ-patt: (conc "(default" (if targ (conc "|" targ ")") ")"))) Index: runconfigs.config ================================================================== --- runconfigs.config +++ runconfigs.config @@ -7,13 +7,13 @@ # # example of a cron entry to run sync using db spec pgdb, with pgdb setting in file local.config # [a/b/c] -all:scheduled:sync cron= 0/5 * * * *;dbdest=pgdb;appendconf=/mfs/matt/.sysmaint/local.config -quick:scheduled:sync cron= 0/5 * * * *;dbdest=pgdb;appendconf=/mfs/matt/.sysmaint/local.config -fast:scheduled:sync-prepend cron= 0/1 * * * *;dbdest=pgdb;appendconf=/mfs/matt/.sysmaint/local.config +all:scheduled:sync cron= 0/5 * * * *;dbdest=pgdb;appendconf=/nfs/phoebe/disk1/home/mfs/matt/.sysmaint/local.config +# quick:scheduled:sync cron= 0/5 * * * *;dbdest=pgdb;appendconf=/nfs/phoebe/disk1/home/mfs/matt/.sysmaint/local.config +# fast:scheduled:sync-prepend cron= 0/1 * * * *;dbdest=pgdb;appendconf=/mfs/matt/.sysmaint/local.config [scriptinc ./gentargets.sh #{getenv USER}] # [v1.23/45/67] # tip will be replaced with hashkey? @@ -25,24 +25,25 @@ # [v1.63/tip/dev] # file: files changes since last run trigger new run # script: script is called with unix seconds as last parameter (other parameters are preserved) # # contour:sensetype:action params data -quick:file:run runtrans=auto; glob=/home/matt/data/megatest/*.scm -snazy:file:run runtrans=corporate-ww; glob=/home/matt/data/megatest/*.scm -short:file:run runtrans=short; glob=/home/matt/data/megatest/*.scm +# commented out for debug +quick:file:run runtrans=auto; glob=/home/matt/data/megatest/*.scm foo.touchme +# snazy:file:run runtrans=corporate-ww; glob=/home/matt/data/megatest/*.scm +# short:file:run runtrans=short; glob=/home/matt/data/megatest/*.scm # script returns change-time (unix epoch), new-target-name, run-name # # quick:script:run checkfossil = http://www.kiatoa.com/fossils/megatest v1.63;\ # checkfossil = http://www.kiatoa.com/fossils/megatest_qa trunk -# fossil based trigger -# -quick:fossil:run http://www.kiatoa.com/fossils/megatest=v1.63;\ - http://www.kiatoa.com/fossils/megatest_qa=trunk;\ - http://www.kiatoa.com/fossils/megatest=v1.64 +# # fossil based trigger +# # +# quick:fossil:run http://www.kiatoa.com/fossils/megatest=v1.63;\ +# http://www.kiatoa.com/fossils/megatest_qa=trunk;\ +# http://www.kiatoa.com/fossils/megatest=v1.64 # field allowed values # ----- -------------- # minute 0-59 # hour 0-23 Index: runs.scm ================================================================== --- runs.scm +++ runs.scm @@ -8,13 +8,12 @@ ;; implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR ;; PURPOSE. ;; strftime('%m/%d/%Y %H:%M:%S','now','localtime') -(use sqlite3 srfi-1 posix regex regex-case srfi-69 dot-locking (srfi 18) +(use (prefix sqlite3 sqlite3:) srfi-1 posix regex regex-case srfi-69 (srfi 18) posix-extras directory-utils pathname-expand typed-records format) -(import (prefix sqlite3 sqlite3:)) (declare (unit runs)) (declare (uses db)) (declare (uses common)) (declare (uses items)) @@ -21,11 +20,10 @@ (declare (uses runconfig)) (declare (uses tests)) (declare (uses server)) (declare (uses mt)) (declare (uses archive)) -(declare (uses keys)) ;; (declare (uses filedb)) (include "common_records.scm") (include "key_records.scm") (include "db_records.scm") @@ -96,18 +94,28 @@ (msg ((condition-property-accessor 'exn 'message) exn))) (if (< count 5) (begin ;; this call is colliding, do some crude stuff to fix it. (debug:print 0 *default-log-port* "ERROR: *configdat* was inaccessible! This should never happen. Retry #" count) (launch:setup force-reread: #t) - (fatal-loop (+ count 1))) + (fatal-loop (+ count 1))) (begin (debug:print 0 *default-log-port* "FATAL: *configdat* was inaccessible! This should never happen. Retried " count " times. Message: " msg) (debug:print 0 *default-log-port* "Call chain:") (with-output-to-port *default-log-port* - (lambda ()(pp call-chain))) + + (lambda () + (print "*configdat* is >>"*configdat*"<<") + (pp *configdat*) + (pp call-chain))) + (exit 1)))) ;;(bb-check-path msg: "runs:set-megatest-env-vars block 1.5") + (when (or (not *configdat*) (not (hash-table? *configdat*))) + (debug:print 0 *default-log-port* "WARNING: *configdat* was inaccessible! This should never happen. Brute force reread.") + ;;(BB> "ERROR: *configdat* was inaccessible! This should never happen. Brute force reread.") + (thread-sleep! 2) ;; assuming nfs lag. + (launch:setup force-reread: #t)) (alist->env-vars (hash-table-ref/default *configdat* "env-override" '())))) ;;;; environment is tainted HERE in this let block. ;;(bb-check-path msg: "runs:set-megatest-env-vars block 2") ;; Lets use this as an opportunity to put MT_RUNNAME in the environment (let ((runname (if inrunname inrunname (rmt:get-run-name-from-id run-id)))) (if runname @@ -218,11 +226,49 @@ " in jobgroup \"" jobgroup "\" exceeds limit of " job-group-limit)) #t) (else #f)))) (list (not can-not-run-more) num-running num-running-in-jobgroup max-concurrent-jobs job-group-limit))))) - +(define (runs:run-pre-hook run-id) + (let* ((run-pre-hook (configf:lookup *configdat* "runs" "pre-hook")) + (existing-tests (if run-pre-hook + (rmt:get-tests-for-run run-id "%" '() '() ;; run-id testpatt states statuses + #f #f ;; offset limit + #f ;; not-in + #f ;; sort-by + #f ;; sort-order + #f ;; get full data (not 'shortlist) + 0 ;; (runs:gendat-inc-results-last-update *runs:general-data*) ;; last update time + 'dashboard) + '())) + (log-dir (conc *toppath* "/logs")) + (log-file (conc "pre-hook-" (string-translate (getenv "MT_TARGET") "/" "-") "-" (getenv "MT_RUNNAME") ".log")) + (full-log-fname (conc log-dir "/" log-file))) + (if run-pre-hook + (if (null? existing-tests) + (let* ((use-log-dir (if (not (directory-exists? log-dir)) + (handle-exceptions + exn + (begin + (debug:print 0 *default-log-port* "WARNING: Failed to create " log-dir) + #f) + (create-directory log-dir #t) + #t) + #t)) + (start-time (current-seconds)) + (actual-logf (if use-log-dir full-log-fname log-file))) + (handle-exceptions + exn + (begin + (print-call-chain *default-log-port*) + (debug:print 0 *default-log-port* "Message: " ((condition-property-accessor 'exn 'message) exn)) + (debug:print 0 *default-log-port* "ERROR: failed to run pre-hook " run-pre-hook ", check the log " log-file)) + (debug:print-info 0 *default-log-port* "running run-pre-hook: \"" run-pre-hook "\", log is " actual-logf) + (system (conc run-pre-hook " >> " actual-logf " 2>&1")) + (debug:print-info 0 *default-log-port* "pre-hook \"" run-pre-hook "\" took " (- (current-seconds) start-time) " seconds to run."))) + (debug:print 0 *default-log-port* "Skipping pre-hook call \"" run-pre-hook "\" as there are existing tests for this run."))))) + ;; test-names: Comma separated patterns same as test-patts but used in selection ;; of tests to run. The item portions are not respected. ;; FIXME: error out if /patt specified ;; (define (runs:run-tests target runname test-patts user flags #!key (run-count 1)) ;; test-names @@ -284,18 +330,24 @@ ;; force the starting of a server (debug:print 0 *default-log-port* "waiting on server...") (server:start-and-wait *toppath*) (runs:set-megatest-env-vars run-id inkeys: keys inrunname: runname) ;; these may be needed by the launching process - (set! runconf (if (file-exists? runconfigf) + (set! runconf (if (common:file-exists? runconfigf) (setup-env-defaults runconfigf run-id *already-seen-runconfig-info* keyvals target) (begin (debug:print 0 *default-log-port* "WARNING: You do not have a run config file: " runconfigf) #f))) (if (not test-patts) ;; first time in - adjust testpatt (set! test-patts (common:args-get-testpatt runconf))) + ;; if test-patts is #f at this point there is something wrong and we need to bail out + (if (not test-patts) + (begin + (debug:print 0 *default-log-port* "WARNING: there is no test pattern for this run. Exiting now.") + (exit 0))) + (if (args:get-arg "-tagexpr") (begin (set! allowed-tests (string-join (runs:get-tests-matching-tags (args:get-arg "-tagexpr")) ",")) (debug:print-info 0 *default-log-port* "filtering initial test list with tagexpr: " (args:get-arg "-tagexpr") " => " allowed-tests) ));; tests will be ANDed with this list @@ -361,10 +413,14 @@ (string-split (or (configf:lookup *configdat* "setup" "allow-auto-rerun") ""))))) ;; Ensure all tests are registered in the test_meta table (runs:update-all-test_meta #f) + ;; run the run prehook if there are no tests yet run for this run: + ;; + (runs:run-pre-hook run-id) + ;; now add non-directly referenced dependencies (i.e. waiton) ;;====================================================================== ;; refactoring this block into tests:get-full-data ;; ;; What happended, this code is now duplicated in tests!? @@ -457,11 +513,17 @@ (let ((reglen (configf:lookup *configdat* "setup" "runqueue"))) (if (> (length (hash-table-keys test-records)) 0) (let* ((keep-going #t) (run-queue-retries 5) (th1 (make-thread (lambda () - (runs:run-tests-queue run-id runname test-records keyvals flags test-patts required-tests (any->number reglen) all-tests-registry)) + (handle-exceptions + exn + (begin + (print-call-chain) + (print " message: " ((condition-property-accessor 'exn 'message) exn))) + (runs:run-tests-queue run-id runname test-records keyvals flags test-patts required-tests + (any->number reglen) all-tests-registry))) ;; (handle-exceptions ;; exn ;; (begin ;; (print-call-chain (current-error-port)) ;; (debug:print-error 0 *default-log-port* "failure in runs:run-tests-queue thread, error: " ((condition-property-accessor 'exn 'message) exn)) @@ -778,12 +840,13 @@ (not (equal? x hed))) (runs:calc-not-completed prereqs-not-met))) (loop-list (list hed tal reg reruns)) ;; configure the load runner (numcpus (common:get-num-cpus #f)) - (maxload (string->number (or (configf:lookup *configdat* "jobtools" "maxload") "3"))) - (waitdelay (string->number (or (configf:lookup *configdat* "jobtools" "waitdelay") "60")))) + (maxload (string->number (or (configf:lookup *configdat* "jobtools" "maxload") "3.0"))) ;; use a non-number string to disable + (maxhomehostload (string->number (or (configf:lookup *configdat* "jobtools" "maxhomehostload") "1.2"))) ;; use a non-number string to disable + (waitdelay (string->number (or (configf:lookup *configdat* "jobtools" "waitdelay") "60")))) (debug:print-info 4 *default-log-port* "have-resources: " have-resources " prereqs-not-met: (" (string-intersperse (map (lambda (t) (if (vector? t) (conc (db:test-get-state t) "/" (db:test-get-status t)) @@ -883,12 +946,15 @@ ;; we are going to reset all the counters for test retries by setting a new hash table ;; this means they will increment only when nothing can be run (set! *max-tries-hash* (make-hash-table)) ;; well, first lets see if cpu load throttling is enabled. If so wait around until the ;; average cpu load is under the threshold before continuing - (if (configf:lookup *configdat* "jobtools" "maxload") ;; only gate if maxload is specified - (common:wait-for-cpuload maxload numcpus waitdelay)) + (if maxload ;; only gate if maxload is specified + (common:wait-for-cpuload maxload numcpus waitdelay)) + (if maxhomehostload + (common:wait-for-homehost-load maxhomehostload (conc "Waiting for homehost load to drop below normalized value of " maxhomehostload))) + (run:test run-id run-info keyvals runname test-record flags #f test-registry all-tests-registry) (runs:incremental-print-results run-id) (hash-table-set! test-registry (db:test-make-full-name test-name item-path) 'running) (runs:shrink-can-run-more-tests-count runsdat) ;; DELAY TWEAKER (still needed?) ;; (thread-sleep! *global-delta*) @@ -1105,15 +1171,12 @@ (sorted-test-names (tests:sort-by-priority-and-waiton test-records)) (test-registry (make-hash-table)) (registry-mutex (make-mutex)) (num-retries 0) (max-retries (config-lookup *configdat* "setup" "maxretries")) - (max-concurrent-jobs (let ((mcj (config-lookup *configdat* "setup" "max_concurrent_jobs"))) - (if (and mcj (string->number mcj)) - (string->number mcj) - 1))) ;; length of the register queue ahead - (reglen (if (number? reglen-in) reglen-in 1)) + (max-concurrent-jobs (configf:lookup-number *configdat* "setup" "max_concurrent_jobs" default: 50)) + (reglen (if (number? reglen-in) reglen-in 1)) (last-time-incomplete (- (current-seconds) 900)) ;; force at least one clean up cycle (last-time-some-running (current-seconds)) ;; (tdbdat (tasks:open-db)) (runsdat (make-runs:dat ;; hed: hed @@ -1209,11 +1272,14 @@ ))) (runs:dat-regfull-set! runsdat regfull) ;; every 15 minutes verify the server is there for this run (if (and (common:low-noise-print 240 "try start server" run-id) - (not (server:check-if-running *toppath*))) + (not (or (and *runremote* + (remote-server-url *runremote*) + (server:ping (remote-server-url *runremote*))) + (server:check-if-running *toppath*)))) (server:kind-run *toppath*)) (if (> num-running 0) (set! last-time-some-running (current-seconds))) @@ -1523,11 +1589,11 @@ (thread-sleep! 1) (loop))))) (if (not testdat) ;; should NOT happen (debug:print-error 0 *default-log-port* "failed to get test record for test-id " test-id)) (set! test-id (db:test-get-id testdat)) - (if (file-exists? test-path) + (if (common:file-exists? test-path) (change-directory test-path) (begin (debug:print-error 0 *default-log-port* "test run path not created before attempting to run the test. Perhaps you are running -remove-runs at the same time?") (change-directory *toppath*))) (case (if force ;; (args:get-arg "-force") @@ -1703,11 +1769,10 @@ (readonly-mode (not (file-write-access? dbfile)))) (when (and readonly-mode (member action write-access-actions)) (debug:print-error 0 *default-log-port* "megatest.db is readonly. Cannot proceed with action ["action"] in which write-access isrequired .") (exit 1))) - (debug:print-info 4 *default-log-port* "runs:operate-on => Header: " header " action: " action " new-state-status: " new-state-status) (if (> 2 (length state-status)) (begin (debug:print-error 0 *default-log-port* "the parameter to -set-state-status is a comma delimited string. E.g. COMPLETED,FAIL") @@ -1854,20 +1919,21 @@ ((archive) (if (and run-dir (not toplevel-with-children)) (let ((ddir (conc run-dir "/"))) (case (string->symbol (args:get-arg "-archive")) ((save save-remove keep-html) - (if (file-exists? ddir) + (if (common:file-exists? ddir) (debug:print-info 0 *default-log-port* "Estimating disk space usage for " test-fulln ": " (common:get-disk-space-used ddir))))))) (if (not (null? tal)) (loop (car tal)(cdr tal)))) ))) ) (if worker-thread (thread-join! worker-thread)))))) ;; remove the run if zero tests remain (if (eq? action 'remove-runs) - (let ((remtests (mt:get-tests-for-run (db:get-value-by-header run header "id") #f '("DELETED") '("n/a") not-in: #t))) + (let* ((run-id (db:get-value-by-header run header "id")) ;; NB// masks run-id from above? + (remtests (mt:get-tests-for-run run-id #f '("DELETED") '("n/a") not-in: #t))) (if (null? remtests) ;; no more tests remaining (let* ((dparts (string-split lasttpath "/")) (runpath (conc "/" (string-intersperse (take dparts (- (length dparts) 1)) "/")))) @@ -1887,11 +1953,11 @@ ) #t) (define (runs:remove-test-directory test mode) ;; remove-data-only) (let* ((run-dir (db:test-get-rundir test)) ;; run dir is from the link tree - (real-dir (if (file-exists? run-dir) + (real-dir (if (common:file-exists? run-dir) ;; (resolve-pathname run-dir) (common:nice-path run-dir) #f))) (case mode ((remove-data-only)(mt:test-set-state-status-by-id (db:test-get-run_id test)(db:test-get-id test) "CLEANING" "LOCKED" #f)) @@ -1898,14 +1964,14 @@ ((remove-all) (mt:test-set-state-status-by-id (db:test-get-run_id test)(db:test-get-id test) "REMOVING" "LOCKED" #f)) ((archive-remove) (mt:test-set-state-status-by-id (db:test-get-run_id test)(db:test-get-id test) "ARCHIVE_REMOVING" #f #f))) (debug:print-info 1 *default-log-port* "Attempting to remove " (if real-dir (conc " dir " real-dir " and ") "") " link " run-dir) (if (and real-dir (> (string-length real-dir) 5) - (file-exists? real-dir)) ;; bad heuristic but should prevent /tmp /home etc. + (common:file-exists? real-dir)) ;; bad heuristic but should prevent /tmp /home etc. (begin ;; let* ((realpath (resolve-pathname run-dir))) (debug:print-info 1 *default-log-port* "Recursively removing " real-dir) - (if (file-exists? real-dir) + (if (common:file-exists? real-dir) (runs:safe-delete-test-dir real-dir) (debug:print 0 *default-log-port* "WARNING: test dir " real-dir " appears to not exist or is not readable"))) (if real-dir (debug:print 0 *default-log-port* "WARNING: directory " real-dir " does not exist") (debug:print 0 *default-log-port* "WARNING: no real directory corrosponding to link " run-dir ", nothing done"))) @@ -1954,11 +2020,12 @@ (let (;; (db #f) (keys #f)) (if (launch:setup) (begin (full-runconfigs-read) ;; cache the run config - (launch:cache-config)) ;; do not cache here - need to be sure runconfigs is processed + ;; (launch:cache-config) ;; there are two independent config cache locations, turning this one off for now. MRW. + ) ;; do not cache here - need to be sure runconfigs is processed (begin (debug:print 0 *default-log-port* "Failed to setup, exiting") (exit 1))) @@ -2115,11 +2182,11 @@ (define (runs:clean-cache target runname toppath) (if target (if runname (let* ((linktree (common:get-linktree)) ;; (if toppath (configf:lookup *configdat* "setup" "linktree"))) (runtop (conc linktree "/" target "/" runname)) - (files (if (file-exists? runtop) + (files (if (common:file-exists? runtop) (append (glob (conc runtop "/.megatest*")) (glob (conc runtop "/.runconfig*"))) '()))) (if (null? files) (debug:print-info 0 *default-log-port* "No cached megatest or runconfigs files found. None removed.") Index: sdb.scm ================================================================== --- sdb.scm +++ sdb.scm @@ -22,11 +22,11 @@ (declare (unit sdb)) ;; (define (sdb:open fname) (let* ((dbpath (pathname-directory fname)) - (dbexists (let ((fe (file-exists? fname))) + (dbexists (let ((fe (common:file-exists? fname))) (if fe fe (begin (create-directory dbpath #t) #f)))) Index: server.scm ================================================================== --- server.scm +++ server.scm @@ -1,7 +1,7 @@ -;; Copyright 2006-2012, Matthew Welland. +;; Copyright 2006-2017, Matthew Welland. ;; ;; This program is made available under the GNU GPL version 2.0 or ;; greater. See the accompanying file COPYING for details. ;; ;; This program is distributed WITHOUT ANY WARRANTY; without even the @@ -8,24 +8,22 @@ ;; implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR ;; PURPOSE. (require-extension (srfi 18) extras tcp s11n) -(use srfi-1 posix regex regex-case srfi-69 hostinfo md5 message-digest directory-utils posix-extras matchable) -;; (use zmq) +(use srfi-1 posix regex regex-case srfi-69 hostinfo md5 message-digest directory-utils posix-extras matchable + ) (use spiffy uri-common intarweb http-client spiffy-request-vars) (declare (unit server)) (declare (uses common)) (declare (uses db)) (declare (uses tasks)) ;; tasks are where stuff is maintained about what is running. -(declare (uses synchash)) +;; (declare (uses synchash)) (declare (uses http-transport)) -(declare (uses rpc-transport)) -;;(declare (uses nmsg-transport)) (declare (uses launch)) (declare (uses daemon)) (include "common_records.scm") (include "db_records.scm") @@ -34,10 +32,16 @@ (if (not hostport) #f (conc "http://" (car hostport) ":" (cadr hostport)))) (define *server-loop-heart-beat* (current-seconds)) + +;;====================================================================== +;; P K T S S T U F F +;;====================================================================== + +;; ??? ;;====================================================================== ;; S E R V E R ;;====================================================================== @@ -206,11 +210,25 @@ (if (null? tal) (if (and limit (> (length new-res) limit)) new-res ;; (take new-res limit) <= need intelligent sorting before this will work new-res) - (loop (car tal)(cdr tal) new-res))))))))) + (loop (car tal)(cdr tal) new-res))))))))) + +(define (server:get-num-alive srvlst) + (let ((num-alive 0)) + (for-each + (lambda (server) + (match-let (((mod-time host port start-time pid) + server)) + (let* ((uptime (- (current-seconds) mod-time)) + (runtime (if start-time + (- mod-time start-time) + 0))) + (if (< uptime 5)(set! num-alive (+ num-alive 1)))))) + srvlst) + num-alive)) ;; given a list of servers get a list of valid servers, i.e. at least ;; 10 seconds old, has started and is less than 1 hour old and is ;; active (i.e. mod-time < 10 seconds ;; @@ -435,5 +453,92 @@ ;; (* 3 24 60 60) ;; default to three days ;;(* 60 60 1) ;; default to one hour (* 60 5) ;; default to five minutes ))) +(define (server:get-best-guess-address hostname) + (let ((res #f)) + (for-each + (lambda (adr) + (if (not (eq? (u8vector-ref adr 0) 127)) + (set! res adr))) + ;; NOTE: This can fail when there is no mention of the host in /etc/hosts. FIXME + (vector->list (hostinfo-addresses (hostname->hostinfo hostname)))) + (string-intersperse + (map number->string + (u8vector->list + (if res res (hostname->ip hostname)))) "."))) + +;; moving this here as it needs access to db and cannot be in common. +;; +(define (server:writable-watchdog dbstruct) + (thread-sleep! 0.05) ;; delay for startup + (let ((legacy-sync (common:run-sync?)) + (debug-mode (debug:debug-mode 1)) + (last-time (current-seconds)) + (no-sync-db (db:open-no-sync-db)) + (this-wd-num (begin (mutex-lock! *wdnum*mutex) (let ((x *wdnum*)) (set! *wdnum* (add1 *wdnum*)) (mutex-unlock! *wdnum*mutex) x)))) + (set! *no-sync-db* no-sync-db) ;; make the no sync db available to api calls + (debug:print-info 2 *default-log-port* "Periodic sync thread started.") + (debug:print-info 3 *default-log-port* "watchdog starting. legacy-sync is " legacy-sync" pid="(current-process-id)" this-wd-num="this-wd-num) + (if (and legacy-sync (not *time-to-exit*)) + (let* (;;(dbstruct (db:setup)) + (mtdb (dbr:dbstruct-mtdb dbstruct)) + (mtpath (db:dbdat-get-path mtdb))) + (debug:print-info 0 *default-log-port* "Server running, periodic sync started.") + (let loop () + ;; sync for filesystem local db writes + ;; + (mutex-lock! *db-multi-sync-mutex*) + (let* ((need-sync (>= *db-last-access* *db-last-sync*)) ;; no sync since last write + (sync-in-progress *db-sync-in-progress*) + (should-sync (and (not *time-to-exit*) + (> (- (current-seconds) *db-last-sync*) 5))) ;; sync every five seconds minimum + (start-time (current-seconds)) + (mt-mod-time (file-modification-time mtpath)) + (recently-synced (< (- start-time mt-mod-time) 4)) + (will-sync (and (or need-sync should-sync) + (not sync-in-progress) + (not recently-synced)))) + (debug:print-info 13 *default-log-port* "WD writable-watchdog top of loop. need-sync="need-sync" sync-in-progress="sync-in-progress" should-sync="should-sync" start-time="start-time" mt-mod-time="mt-mod-time" recently-synced="recently-synced" will-sync="will-sync) + ;; (if recently-synced (debug:print-info 0 *default-log-port* "Skipping sync due to recently-synced flag=" recently-synced)) + ;; (debug:print-info 0 *default-log-port* "need-sync: " need-sync " sync-in-progress: " sync-in-progress " should-sync: " should-sync " will-sync: " will-sync) + (if will-sync (set! *db-sync-in-progress* #t)) + (mutex-unlock! *db-multi-sync-mutex*) + (if will-sync + (let ((res (db:sync-to-megatest.db dbstruct no-sync-db: no-sync-db))) ;; did we sync any data? If so need to set the db touched flag to keep the server alive + (if (> res 0) ;; some records were transferred, keep the db alive + (begin + (mutex-lock! *heartbeat-mutex*) + (set! *db-last-access* (current-seconds)) + (mutex-unlock! *heartbeat-mutex*) + (debug:print-info 0 *default-log-port* "sync called, " res " records transferred.")) + (debug:print-info 2 *default-log-port* "sync called but zero records transferred")))) + (if will-sync + (begin + (mutex-lock! *db-multi-sync-mutex*) + (set! *db-sync-in-progress* #f) + (set! *db-last-sync* start-time) + (mutex-unlock! *db-multi-sync-mutex*))) + (if (and debug-mode + (> (- start-time last-time) 60)) + (begin + (set! last-time start-time) + (debug:print-info 4 *default-log-port* "timestamp -> " (seconds->time-string (current-seconds)) ", time since start -> " (seconds->hr-min-sec (- (current-seconds) *time-zero*)))))) + + ;; keep going unless time to exit + ;; + (if (not *time-to-exit*) + (let delay-loop ((count 0)) + ;;(debug:print-info 13 *default-log-port* "delay-loop top; count="count" pid="(current-process-id)" this-wd-num="this-wd-num" *time-to-exit*="*time-to-exit*) + + (if (and (not *time-to-exit*) + (< count 6)) ;; was 11, changing to 4. + (begin + (thread-sleep! 1) + (delay-loop (+ count 1)))) + (if (not *time-to-exit*) (loop)))) + ;; time to exit, close the no-sync db here + (db:no-sync-close-db no-sync-db) + (if (common:low-noise-print 30) + (debug:print-info 0 *default-log-port* "Exiting watchdog timer, *time-to-exit* = " *time-to-exit*" pid="(current-process-id)" this-wd-num="this-wd-num))))))) + Index: tasks.scm ================================================================== --- tasks.scm +++ tasks.scm @@ -36,24 +36,24 @@ (handle-exceptions exn (begin (print-call-chain (current-error-port)) (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) - (debug:print 0 *default-log-port* " exn=" (condition->list exn)) + (debug:print 5 *default-log-port* " exn=" (condition->list exn)) (debug:print 0 *default-log-port* "tasks:wait-on-journal failed. Continuing on, you can ignore this call-chain") #t) ;; if stuff goes wrong just allow it to move on - (let loop ((journal-exists (file-exists? fullpath)) + (let loop ((journal-exists (common:file-exists? fullpath)) (count n)) ;; wait ten times ... (if journal-exists (begin (if (and waiting-msg (eq? (modulo n 30) 0)) (debug:print 0 *default-log-port* waiting-msg)) (if (> count 0) (begin (thread-sleep! 1) - (loop (file-exists? fullpath) + (loop (common:file-exists? fullpath) (- count 1))) (begin (debug:print 0 *default-log-port* "ERROR: removing the journal file " fullpath ", this is not good. Look for disk full, write access and other issues.") (if remove (system (conc "rm -rf " fullpath))) #f))) @@ -87,21 +87,21 @@ exn (if (> numretries 0) (begin (print-call-chain (current-error-port)) (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) - (debug:print 0 *default-log-port* " exn=" (condition->list exn)) + (debug:print 5 *default-log-port* " exn=" (condition->list exn)) (thread-sleep! 1) (tasks:open-db numretries (- numretries 1))) (begin (print-call-chain (current-error-port)) (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) - (debug:print 0 *default-log-port* " exn=" (condition->list exn)))) + (debug:print 5 *default-log-port* " exn=" (condition->list exn)))) (let* ((dbpath (db:dbfile-path )) ;; (tasks:get-task-db-path)) (dbfile (conc dbpath "/monitor.db")) (avail (tasks:wait-on-journal dbpath 10)) ;; wait up to about 10 seconds for the journal to go away - (exists (file-exists? dbpath)) + (exists (common:file-exists? dbpath)) (write-access (file-write-access? dbpath)) (mdb (cond ;; what the hek is *toppath* doing here? ((and (string? *toppath*)(file-write-access? *toppath*)) (sqlite3:open-database dbfile)) ((file-read-access? dbpath) (sqlite3:open-database dbfile)) @@ -183,15 +183,27 @@ ;; no elegance here ... ;; (define (tasks:kill-server hostname pid #!key (kill-switch "")) (debug:print-info 0 *default-log-port* "Attempting to kill server process " pid " on host " hostname) (setenv "TARGETHOST" hostname) - (setenv "TARGETHOST_LOGF" "server-kills.log") - (system (conc "nbfake kill "kill-switch" "pid)) + (let* ((logdir (if (directory-exists? "logs") + "logs/" + "")) + (logfile (if logdir (conc "logs/server-"pid"-"hostname".log") #f)) + (gzfile (if logfile (conc logfile ".gz")))) + (setenv "TARGETHOST_LOGF" (conc logdir "server-kills.log")) + + (system (conc "nbfake kill "kill-switch" "pid)) - (unsetenv "TARGETHOST_LOGF") - (unsetenv "TARGETHOST")) + (when logfile + (thread-sleep! 0.5) + (if (common:file-exists? gzfile) (delete-file gzfile)) + (system (conc "gzip " logfile)) + + (unsetenv "TARGETHOST_LOGF") + (unsetenv "TARGETHOST")))) + ;;====================================================================== ;; M O N I T O R S ;;====================================================================== ADDED tcmt.scm Index: tcmt.scm ================================================================== --- /dev/null +++ tcmt.scm @@ -0,0 +1,172 @@ +;; Copyright 2006-2017, Matthew Welland. +;; +;; This program is made available under the GNU GPL version 2.0 or +;; greater. See the accompanying file COPYING for details. +;; +;; This program is distributed WITHOUT ANY WARRANTY; without even the +;; implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR +;; PURPOSE. +;; +;;====================================================================== +;; +;; Wrapper to enable running Megatest flows under teamcity +;; +;; 1. Run the megatest process and pass it all the needed parameters +;; 2. Every five seconds check for state/status changes and print the info +;; + +(use srfi-1 posix srfi-69 srfi-18 regex) + +(declare (uses margs)) +(declare (uses rmt)) +(declare (uses common)) +(declare (uses megatest-version)) + +(include "megatest-fossil-hash.scm") +(include "db_records.scm") + +(define origargs (cdr (argv))) +(define remargs (args:get-args + (argv) + `( "-target" + "-reqtarg" + "-runname" + ) + `("-tc-repl" + ) + args:arg-hash + 0)) + +;; ##teamcity[testStarted name='suite.testName'] +;; ##teamcity[testStdOut name='suite.testName' out='text'] +;; ##teamcity[testStdErr name='suite.testName' out='error text'] +;; ##teamcity[testFailed name='suite.testName' message='failure message' details='message and stack trace'] +;; ##teamcity[testFinished name='suite.testName' duration='50'] +;; + +(define (print-changes-since data run-ids last-update tsname target runname) + (let ((now (current-seconds))) + (handle-exceptions + exn + (begin (print-call-chain) (print "Error message: " ((condition-property-accessor 'exn 'message) exn))) + (for-each + (lambda (run-id) + (let* ((tests (rmt:get-tests-for-run run-id "%" '() '() #f #f #f #f #f #f last-update #f))) + ;; (print "DEBUG: got tests=" tests) + (for-each + (lambda (testdat) + (let* ((testn (db:test-get-fullname testdat)) + (testname (db:test-get-testname testdat)) + (itempath (db:test-get-item-path testdat)) + (tctname (if (string=? itempath "") testname (conc testname "." (string-translate itempath "/" ".")))) + (state (db:test-get-state testdat)) + (status (db:test-get-status testdat)) + (duration (or (any->number (db:test-get-run_duration testdat)) 0)) + (comment (db:test-get-comment testdat)) + (logfile (db:test-get-final_logf testdat)) + (prevstat (hash-table-ref/default data testn #f)) + (newstat (if (equal? state "RUNNING") + "RUNNING" + (if (equal? state "COMPLETED") + status + "UNK"))) + (cmtstr (if comment + (conc " message='" comment "' ") + " ")) + (details (if (string-match ".*html$" logfile) + (conc " details='" *toppath* "/lt/" target "/" runname "/" testname (if (equal? itempath "") "/" (conc "/" itempath "/")) logfile "' ") + ""))) + + ;; (print "DEBUG: testn=" testn " state=" state " status=" status " prevstat=" prevstat " newstat=" newstat) + (if (or (not prevstat) + (not (equal? prevstat newstat))) + (begin + (case (string->symbol newstat) + ((UNK) ) ;; do nothing + ((RUNNING) (print "##teamcity[testStarted name='" tctname "']")) + ((PASS SKIP) (print "##teamcity[testFinished name='" tctname "' duration='" (* 1e3 duration) "'" cmtstr details " ]")) + (else + (print "##teamcity[testFailed name='" tctname "' " cmtstr details " ]"))) + (flush-output) + (hash-table-set! data testn newstat))))) + tests))) + run-ids)) + now)) + +(define (monitor pid) + (let ((run-ids #f) + (testdat (make-hash-table)) + (keys #f) + (last-update 0) + (target (or (args:get-arg "-target") + (args:get-arg "-reqtarg"))) + (runname (args:get-arg "-runname")) + (tsname #f)) + (if (and target runname) + (begin + (launch:setup) + (set! keys (rmt:get-keys)))) + (set! tsname (common:get-testsuite-name)) + (print "TCMT: for testsuite=" tsname " found runname=" runname ", target=" target ", keys=" keys " and successfully ran launch:setup.") + (let loop () + (handle-exceptions + exn + ;; (print "Process done.") + (begin (print-call-chain) (print "Error message: " ((condition-property-accessor 'exn 'message) exn))) + (let-values (((pidres exittype exitstatus) + (process-wait pid #t))) + (if (and keys + (or (not run-ids) + (null? run-ids))) + (let* ((runs (rmt:get-runs-by-patt keys + runname + target + #f ;; offset + #f ;; limit + #f ;; fields + 0 ;; last-update + )) + (header (db:get-header runs)) + (rows (db:get-rows runs)) + (run-ids-in (map (lambda (row) + (db:get-value-by-header row header "id")) + rows))) + (set! run-ids run-ids-in))) + ;; (print "TCMT: pidres=" pidres " exittype=" exittype " exitstatus=" exitstatus " run-ids=" run-ids) + (if keys + (set! last-update (print-changes-since testdat run-ids last-update tsname target runname))) + (if (eq? pidres 0) + (begin + (thread-sleep! 3) + (loop)) + (begin + ;; (print "TCMT: pidres=" pidres " exittype=" exittype " exitstatus=" exitstatus " run-ids=" run-ids) + (print "TCMT: All done.") + ))))))) + +;; (if (not (eq? pidres 0)) ;; (not exitstatus)) +;; (begin +;; (thread-sleep! 3) +;; (loop)) +;; (print "Process: megatest " (string-intersperse origargs " ") " is done."))))) +(define (main) + (let* ((mt-done #f) + (pid #f) + (th1 (make-thread (lambda () + (print "Running megatest " (string-intersperse origargs " ")) + (set! pid (process-run "megatest" origargs))) + "Megatest job")) + (th2 (make-thread (lambda () + (monitor pid)) + "Monitor job"))) + (thread-start! th1) + (thread-sleep! 1) ;; give the process time to get going + (thread-start! th2) + (thread-join! th2))) + +(if (args:get-arg "-tc-repl") + (repl) + (main)) + +;; (process-wait) + Index: tdb.scm ================================================================== --- tdb.scm +++ tdb.scm @@ -50,11 +50,11 @@ (debug:print-info 11 *default-log-port* "open-test-db " work-area) (if (and work-area (directory? work-area) (file-read-access? work-area)) (let* ((dbpath (conc work-area "/testdat.db")) - (dbexists (file-exists? dbpath)) + (dbexists (common:file-exists? dbpath)) (work-area-writeable (file-write-access? work-area)) (db (handle-exceptions ;; open the db if area writeable or db pre-existing. open in-mem otherwise. if exception, open in-mem exn (begin (print-call-chain (current-error-port)) Index: tests.scm ================================================================== --- tests.scm +++ tests.scm @@ -61,16 +61,16 @@ (define (tests:get-valid-tests test-registry tests-paths) (if (null? tests-paths) test-registry (let loop ((hed (car tests-paths)) (tal (cdr tests-paths))) - (if (file-exists? hed) + (if (common:file-exists? hed) (for-each (lambda (test-path) (let* ((tname (last (string-split test-path "/"))) (tconfig (conc test-path "/testconfig"))) (if (and (not (hash-table-ref/default test-registry tname #f)) - (file-exists? tconfig)) + (common:file-exists? tconfig)) (hash-table-set! test-registry tname test-path)))) (glob (conc hed "/*")))) (if (null? tal) test-registry (loop (car tal)(cdr tal)))))) @@ -305,11 +305,11 @@ (db:test-get-rundir prev-testdat)) ;; ) (waivers (if testconfig (configf:section-vars testconfig "waivers") '())) (waiver-rx (regexp "^(\\S+)\\s+(.*)$")) (diff-rule "diff %file1% %file2%") (logpro-rule "diff %file1% %file2% | logpro %waivername%.logpro %waivername%.html")) - (if (not (file-exists? test-rundir)) + (if (not (common:file-exists? test-rundir)) (begin (debug:print-error 0 *default-log-port* "test run directory is gone, cannot propagate waiver") #f) (begin (push-directory test-rundir) @@ -322,11 +322,11 @@ (wparts (if waiver (string-match waiver-rx waiver) #f)) (waiver-rule (if wparts (cadr wparts) #f)) (waiver-glob (if wparts (caddr wparts) #f)) (logpro-file (if waiver (let ((fname (conc hed ".logpro"))) - (if (file-exists? fname) + (if (common:file-exists? fname) fname (begin (debug:print 0 *default-log-port* "INFO: No logpro file " fname " falling back to diff") #f))) #f)) @@ -420,19 +420,19 @@ ;; (sqlite3:execute db "UPDATE tests SET first_warn=? WHERE run_id=? AND testname=? AND item_path=?;" val run-id test-name item-path))) (let ((category (hash-table-ref/default otherdat ":category" "")) (variable (hash-table-ref/default otherdat ":variable" "")) (value (hash-table-ref/default otherdat ":value" #f)) - (expected (hash-table-ref/default otherdat ":expected" #f)) - (tol (hash-table-ref/default otherdat ":tol" #f)) + (expected (hash-table-ref/default otherdat ":expected" "n/a")) + (tol (hash-table-ref/default otherdat ":tol" "n/a")) (units (hash-table-ref/default otherdat ":units" "")) (type (hash-table-ref/default otherdat ":type" "")) (dcomment (hash-table-ref/default otherdat ":comment" ""))) (debug:print 4 *default-log-port* "category: " category ", variable: " variable ", value: " value ", expected: " expected ", tol: " tol ", units: " units) - (if (and value expected tol) ;; all three required + (if (and value) ;; require only value; BB was- all three required (let ((dat (conc category "," variable "," value "," expected "," tol "," @@ -439,11 +439,13 @@ units "," dcomment ",," ;; extra comma for status type ))) ;; This was run remote, don't think that makes sense. Perhaps not, but that is the easiest path for the moment. (rmt:csv->test-data run-id test-id - dat)))) + dat) + (thread-sleep! 10) ;; add 10 second delay before quit incase rmt needs time to start a server. + ))) ;; need to update the top test record if PASS or FAIL and this is a subtest ;;;;;; (if (not (equal? item-path "")) ;;;;;; (rmt:set-state-status-and-roll-up-items run-id test-name item-path state status #f) ;;;;;) @@ -839,11 +841,11 @@ '() (lambda (x p) (let* ((targ-path (string-intersperse p "/")) (full-path (conc linktree "/" targ-path)) (run-name (car (reverse p)))) - (if (and (file-exists? full-path) + (if (and (common:file-exists? full-path) (directory? full-path) (file-write-access? full-path)) (s:a run-name 'href (conc targ-path "/run-summary.html")) (begin (debug:print 0 *default-log-port* "INFO: Can't create " targ-path "/run-summary.html") @@ -878,11 +880,11 @@ path-parts)) test-dats)) (tests-htree (common:list->htree tests-tree-dat)) (html-dir (conc linktree "/" (string-intersperse run-dir "/"))) (html-path (conc html-dir "/run-summary.html")) - (oup (if (and (file-exists? html-dir) + (oup (if (and (common:file-exists? html-dir) (directory? html-dir) (file-write-access? html-dir)) (open-output-file html-path) #f))) ;; (print "run-dir: " run-dir ", tests-tree-dat: " tests-tree-dat) @@ -906,21 +908,21 @@ (item-path ;; (if (> (length p) 2) ;; test-name + run-name (string-intersperse p "/")) (full-targ (conc html-dir "/" targ-path)) (std-file (conc full-targ "/test-summary.html")) (alt-file (conc full-targ "/megatest-rollup-" test-name ".html")) - (html-file (if (file-exists? alt-file) + (html-file (if (common:file-exists? alt-file) alt-file std-file)) (run-name (car (reverse p)))) - (if (and (not (file-exists? full-targ)) + (if (and (not (common:file-exists? full-targ)) (directory? full-targ) (file-write-access? full-targ)) (tests:summarize-test run-id (rmt:get-test-id run-id test-name item-path))) - (if (file-exists? full-targ) + (if (common:file-exists? full-targ) (s:a run-name 'href html-file) (begin (debug:print 0 *default-log-port* "ERROR: can't access " full-targ) (conc "No summary for " run-name))))) )))))) @@ -1129,11 +1131,11 @@ ;; Gather data from test/task specifications ;;====================================================================== ;; (define (tests:get-valid-tests testsdir test-patts) ;; #!key (test-names '())) ;; (let ((tests (glob (conc testsdir "/tests/*")))) ;; " (string-translate patt "%" "*"))))) -;; (set! tests (filter (lambda (test)(file-exists? (conc test "/testconfig"))) tests)) +;; (set! tests (filter (lambda (test)(common:file-exists? (conc test "/testconfig"))) tests)) ;; (delete-duplicates ;; (filter (lambda (testname) ;; (tests:match test-patts testname #f)) ;; (map (lambda (testp) ;; (last (string-split testp "/"))) @@ -1146,14 +1148,15 @@ (getenv "MT_TEST_NAME") (getenv "MT_ITEMPATH")) (conc (getenv "MT_LINKTREE") "/" (getenv "MT_TARGET") "/" (getenv "MT_RUNNAME") "/" - (getenv "MT_TEST_NAME") "/" - (if (or (getenv "MT_ITEMPATH") - (not (string=? "" (getenv "MT_ITEMPATH")))) - (conc "/" (getenv "MT_ITEMPATH")))) + (getenv "MT_TEST_NAME") + (if (and (getenv "MT_ITEMPATH") + (not (string=? "" (getenv "MT_ITEMPATH")))) + (conc "/" (getenv "MT_ITEMPATH")) + "")) #f)) ;; if .testconfig exists in test directory read and return it ;; else if have cached copy in *testconfigs* return it IFF there is a section "have fulldata" ;; else read the testconfig file @@ -1163,11 +1166,11 @@ (let* ((use-cache (common:use-cache?)) (cache-path (tests:get-test-path-from-environment)) (cache-file (and cache-path (conc cache-path "/.testconfig"))) (cache-exists (and cache-file (not force-create) ;; if force-create then pretend there is no cache to read - (file-exists? cache-file))) + (common:file-exists? cache-file))) (cached-dat (if (and (not force-create) cache-exists use-cache) (handle-exceptions exn @@ -1187,11 +1190,11 @@ (let* ((treg (or test-registry (tests:get-all))) (test-path (or (hash-table-ref/default treg test-name #f) (conc *toppath* "/tests/" test-name))) (test-configf (conc test-path "/testconfig")) - (testexists (and (file-exists? test-configf)(file-read-access? test-configf))) + (testexists (and (common:file-exists? test-configf)(file-read-access? test-configf))) (tcfg (if testexists (read-config test-configf #f system-allowed environ-patt: (if system-allowed "pre-launch-env-vars" #f)) @@ -1356,11 +1359,11 @@ ;; (define (tests:lazy-dot testrecords outtype sizex sizey) (let ((dfile (conc "/tmp/." (current-user-name) "-" (server:mk-signature) ".dot")) (fname (conc "/tmp/." (current-user-name) "-" (server:mk-signature) ".dotdat"))) (tests:write-dot-file testrecords dfile sizex sizey) - (if (file-exists? fname) + (if (common:file-exists? fname) (let ((res (with-input-from-file fname (lambda () (read-lines))))) (system (conc "env -i PATH=$PATH dot -T " outtype " < " dfile " > " fname "&")) res) @@ -1559,11 +1562,11 @@ (tests:set-full-meta-info db test-id run-id minutes work-area (- remtries 1))) (let ((err-status ((condition-property-accessor 'sqlite3 'status #f) exn))) (debug:print-error 0 *default-log-port* "tried for over a minute to update meta info and failed. Giving up") (debug:print 0 *default-log-port* "EXCEPTION: database probably overloaded or unreadable.") (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) - (print "exn=" (condition->list exn)) + (debug:print 5 *default-log-port* "exn=" (condition->list exn)) (debug:print 0 *default-log-port* " status: " ((condition-property-accessor 'sqlite3 'status) exn)) (print-call-chain (current-error-port)))) (tests:update-testdat-meta-info db test-id work-area cpuload diskfree minutes) ))) Index: tests/Makefile ================================================================== --- tests/Makefile +++ tests/Makefile @@ -173,11 +173,11 @@ cd mintest;$(MEGATEST) -server - -debug $(DEBUG) > server.log 2> server.log & sleep 3 cd mintest;$(DASHBOARD) -rows 18 & cleanprep : ../*.scm Makefile */*.config build - mkdir -p fullrun/tmp/mt_runs fullrun/tmp/mt_links /tmp/$(USER)/adisk1 + mkdir -p fullrun/tmp/mt_runs fullrun/tmp/mt_links /tmp/$(USER)/adisk1 fullrun/logs rm -f */logging.db touch cleanprep fullprep : cleanprep cd fullrun;$(MEGATEST) -remove-runs :runname $(RUNNAME)% -target %/%/% -testpatt %/% Index: tests/fdktestqa/testqa/megatest.config ================================================================== --- tests/fdktestqa/testqa/megatest.config +++ tests/fdktestqa/testqa/megatest.config @@ -1,12 +1,14 @@ [setup] testcopycmd cp --remove-destination -rlv TEST_SRC_PATH/. TEST_TARG_PATH/. >> TEST_TARG_PATH/mt_launch.log 2>> TEST_TARG_PATH/mt_launch.log # launchwait no -launch-delay 0 +launch-delay 0.1 [server] -runtime 180 +# runtime 180 +# timeout is in hours, this is how long the server will stay alive when not being used. +timeout 0.1 # All these are overridden in ../fdk.config # [jobtools] # launcher nbfake # launcher bsub -q priority -o $MT_TEST_RUN_DIR/openlava.log Index: tests/fdktestqa/testqa/tests/bigrun2/testconfig ================================================================== --- tests/fdktestqa/testqa/tests/bigrun2/testconfig +++ tests/fdktestqa/testqa/tests/bigrun2/testconfig @@ -9,11 +9,11 @@ mode itemwait itemmap .*/ # Iteration for your tests are controlled by the items section [items] -NUMBER #{scheme (string-intersperse (map (lambda (x)(conc "blah/" x)) \ +NUMBER #{scheme (string-intersperse (map (lambda (x)(conc (if (getenv "USEBLAH") "blah/" "") x)) \ (map number->string (sort (let loop ((a 0)(res '())) \ (if (<= a (or (any->number (get-environment-variable "NUMTESTS")) 2500)) \ (loop (+ a 1)(cons a res)) res)) <))) " ")} Index: tests/fullrun/megatest.config ================================================================== --- tests/fullrun/megatest.config +++ tests/fullrun/megatest.config @@ -47,11 +47,13 @@ waivercommentpatt ^WW\d+ [a-z].* incomplete-timeout 1 # wait 0.5 seconds between launching every process # -launch-delay 0.5 +# launch-delay 0.5 +launch-delay 0 + # wait for runs to completely complete. yes, anything else is no run-wait yes # If set to "default" the old code is used. Otherwise defaults to 200 or uses Index: tests/fullrun/runconfigs.config ================================================================== --- tests/fullrun/runconfigs.config +++ tests/fullrun/runconfigs.config @@ -1,10 +1,10 @@ [default] SOMEVAR This should show up in SOMEVAR3 VARNOVAL VARNOVAL_WITHSPACE -QUICK % +QUICKPATT test_mt_vars,test2,priority_9 # target based getting of config file, look at afs.config and nfs.config [include #{getenv fsname}.config] [include #{getenv MT_RUN_AREA_HOME}/common_runconfigs.config] Index: tests/tests.scm ================================================================== --- tests/tests.scm +++ tests/tests.scm @@ -17,10 +17,26 @@ (import srfi-18) ;; (require-extension zmq) ;; (import zmq) (define test-work-dir (current-directory)) + +;; given list of lists +;; ( ( msg expected param1 param2 ...) +;; ( ... ) ) +;; apply test to all +;; +(define (test-batch proc pname inlst #!key (post-proc #f)) + (for-each + (lambda (spec) + (let ((msg (conc pname " " (car spec))) + (result (cadr spec)) + (params (cddr spec))) + (if post-proc + (test msg result (post-proc (apply proc params))) + (test msg result (apply proc params))))) + inlst)) ;; read in all the _record files (let ((files (glob "*_records.scm"))) (for-each (lambda (file) Index: tests/unittests/all-rmt.scm ================================================================== --- tests/unittests/all-rmt.scm +++ tests/unittests/all-rmt.scm @@ -28,13 +28,25 @@ ;; DEF (rmt:kill-server run-id) ;; DEF (rmt:start-server run-id) (test #f '(#t "successful login")(rmt:login #f)) ;; DEF (rmt:login-no-auto-client-setup connection-info) (test #f #t (pair? (rmt:get-latest-host-load (get-host-name)))) + +;; get-latest-host-load does a lookup in the db, it won't return a useful value unless +;; a test ran recently on host +(test-batch rmt:get-latest-host-load + "rmt:get-latest-host-load" + (list (list "localhost" #t (get-host-name)) + (list "not-a-host" #t "not-a-host" )) + post-proc: pair?) + (test #f #t (list? (rmt:get-changed-record-ids 0))) + (test #f #f (begin (runs:update-all-test_meta #f) #f)) + (test #f '("test1" "test2")(sort (alist-ref "tagtwo" (hash-table->alist (rmt:get-tests-tags)) equal?) string<=)) + (test #f '() (rmt:get-key-val-pairs 0)) (test #f '("SYSTEM" "RELEASE") (rmt:get-keys)) (test #f '("SYSTEM" "RELEASE") (rmt:get-keys-write)) ;; dummy query to force server start (test #f '() (rmt:get-key-vals 1)) (test #f (vector '("SYSTEM" "RELEASE") '()) (rmt:get-targets)) @@ -82,11 +94,27 @@ (test #f '()(rmt:get-prev-run-ids 1)) (test #f #t (begin (rmt:lock/unlock-run 1 #t #f "mikey") #t)) (test #f "JUSTFINE" (rmt:get-run-status 1)) (test #f #t (begin (rmt:set-run-status 1 "NOTFINE" msg: "A message") #t)) (test #f #t (begin (rmt:update-run-event_time 1) #t)) + ;; (rmt:get-runs-by-patt keys runnamepatt targpatt offset limit fields last-runs-update) ;; fields of #f uses default +;; +(let ((keys (rmt:get-keys)) + (rnp "%") ;; run name patt + (tpt "%/%")) ;; target patt + (test-batch rmt:get-runs-by-patt + "rmt:get-runs-by-patt" + (list (list "t=0" #t keys rnp tpt #f #f #f 0) + (list "t=current" #f keys rnp tpt #f #f #f (+ 100 (current-seconds))) ;; should be no records from the future + ) + post-proc: (lambda (res) + ;; (print "rmt:get-runs-by-patt returned: " res) + (and (vector? res) + (let ((rows (vector-ref res 1))) + (> (length rows) 0)))))) + ;; (rmt:find-and-mark-incomplete run-id ovr-deadtime) ;; (rmt:get-main-run-stats run-id) ;; (rmt:get-var varname) ;; (rmt:set-var varname value) ;; (rmt:find-and-mark-incomplete-all-runs #!key (ovr-deadtime #f)) Index: tests/unittests/tests.scm ================================================================== --- tests/unittests/tests.scm +++ tests/unittests/tests.scm @@ -12,67 +12,70 @@ ;; (non-completed (runs:calc-not-completed prereqs-not-met)) ;; (runnables (runs:calc-runnable prereqs-not-met))) ;; ;; ;; - (define user (current-user-name)) (define runname "mytestrun") (define keys (rmt:get-keys)) (define runinfo #f) (define keyvals '(("SYSTEM" "abc")("RELEASE" "def"))) (define header (list "SYSTEM" "RELEASE" "id" "runname" "state" "status" "owner" "event_time")) +(define contour #f) (define run-id 1) - +(define new-comment #f) ;; Create a run -(test #f 1 (rmt:register-run keyvals runname "new" "n/a" user)) +(test #f 1 (rmt:register-run keyvals runname "new" "n/a" user contour)) (test #f #t (rmt:general-call 'register-test run-id run-id "test-one" "")) (test #f #t (rmt:general-call 'register-test run-id run-id "test-two" "")) (test #f #t (rmt:general-call 'register-test run-id run-id "test-three" "")) (test #f #t (rmt:general-call 'register-test run-id run-id "test-four" "")) -(rmt:test-set-state-status-by-id run-id (rmt:get-test-id run-id "test-one" "") "COMPLETED" "FAIL" "") -(rmt:test-set-state-status-by-id run-id (rmt:get-test-id run-id "test-two" "") "COMPLETED" "PASS" "") -(rmt:test-set-state-status-by-id run-id (rmt:get-test-id run-id "test-three" "") "RUNNING" "n/a" "") -(rmt:test-set-state-status-by-id run-id (rmt:get-test-id run-id "test-four" "") "COMPLETED" "WARN" "") + +;; (define (rmt:test-set-state-status-by-id run-id test-id newstate newstatus newcomment) +(rmt:test-set-state-status-by-id run-id (rmt:get-test-id run-id "test-one" "") "COMPLETED" "FAIL" new-comment) +(rmt:test-set-state-status-by-id run-id (rmt:get-test-id run-id "test-two" "") "COMPLETED" "PASS" new-comment) +(rmt:test-set-state-status-by-id run-id (rmt:get-test-id run-id "test-three" "") "RUNNING" "n/a" new-comment) +(rmt:test-set-state-status-by-id run-id (rmt:get-test-id run-id "test-four" "") "COMPLETED" "WARN" new-comment) -(print "MODE=not in") -(test #f '() +(test "MODE=not in" + '() (filter (lambda (y) (equal? y "FAIL")) ;; any FAIL in the output list? (map (lambda (x)(vector-ref x 4)) (rmt:get-tests-for-run run-id "%/%" '() '("FAIL") #f #f #t 'event_time "DESC" 'shortlist 0 'dashboard)))) -(print "MODE=in") -(test #f '("FAIL") +(test "MODE=in" + '("FAIL") (map (lambda (x)(vector-ref x 4)) (rmt:get-tests-for-run run-id "%/%" '() '("FAIL") #f #f #f 'event_time "DESC" 'shortlist 0 'dashboard))) (set! *verbosity* 1) -(print "MODE=in, state in RUNNING") ;; (set! *verbosity* 8) -(test #f '("RUNNING") +(test "MODE=in, state in RUNNING" '("RUNNING") (map (lambda (x)(vector-ref x 3)) (rmt:get-tests-for-run run-id "%/%" '("RUNNING") '() #f #f #f 'event_time "DESC" 'shortlist 0 'dashboard))) (set! *verbosity* 1) -(print "MODE=in, state in RUNNING and status IN WARN") ;; (set! *verbosity* 8) -(test #f '(("RUNNING" . "n/a") ("COMPLETED" . "WARN")) - (map - (lambda (x) - (cons (vector-ref x 3)(vector-ref x 4))) - (rmt:get-tests-for-run run-id "%/%" '("RUNNING") '("WARN") #f #f #f 'event_time "DESC" 'shortlist 0 'dashboard))) +;;(define (rmt:get-tests-for-run run-id testpatt states statuses offset limit not-in sort-by sort-order qryvals last-update mode) +(test + "MODE=in, state in RUNNING and status IN WARN" + '(("COMPLETED" . "WARN") ("RUNNING" . "n/a") ) + (map + (lambda (x) + (cons (vector-ref x 3)(vector-ref x 4))) + (rmt:get-tests-for-run run-id "%/%" '("RUNNING") '("WARN") #f #f #f 'event_time "DESC" 'shortlist 0 'dashboard))) (set! *verbosity* 1) -(print "MODE=not in, state in RUNNING and status IN WARN") (set! *verbosity* 8) -(test #f '(("DELETED" . "n/a") ("COMPLETED" . "PASS") ("COMPLETED" . "FAIL")) +(test "MODE=not in, state in RUNNING and status IN WARN" + '(("COMPLETED" . "PASS") ("COMPLETED" . "FAIL")) (map (lambda (x) (cons (vector-ref x 3)(vector-ref x 4))) (rmt:get-tests-for-run run-id "%/%" '("RUNNING") '("WARN") #f #f #t 'event_time "DESC" 'shortlist 0 'dashboard))) (set! *verbosity* 1) Index: tree.scm ================================================================== --- tree.scm +++ tree.scm @@ -22,11 +22,11 @@ (declare (uses launch)) (declare (uses megatest-version)) (declare (uses gutils)) (declare (uses db)) (declare (uses server)) -(declare (uses synchash)) +;; (declare (uses synchash)) (declare (uses dcommon)) (include "common_records.scm") (include "db_records.scm") (include "key_records.scm") Index: utils/mk_wrapper ================================================================== --- utils/mk_wrapper +++ utils/mk_wrapper @@ -1,13 +1,13 @@ #!/bin/bash prefix=$1 cmd=$2 target=$3 +cfgfile="$prefix/bin/.$(lsb_release -sr)/cfg.sh" if [ "$LD_LIBRARY_PATH" != "" ];then - cfgfile="$prefix/bin/.$(lsb_release -sr)/cfg.sh" echo "INFO: Using LD_LIBRARY_PATH=$LD_LIBRARY_PATH" >&2 ( cat << __EOF if [ "\$LD_LIBRARY_PATH" != "" ];then export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:\$LD_LIBRARY_PATH else @@ -18,15 +18,10 @@ echo else echo "INFO: LD_LIBRARY_PATH not set" >&2 fi -# echo "#!/bin/bash" > $target -# if [ "$LD_LIBRARY_PATH" != "" ];then -# echo "source $prefix/bin/.\$(lsb_release -sr)/cfg.sh" >> $target -# fi -# echo "exec $prefix/bin/.\$(lsb_release -sr)/$cmd \"\$@\"" >> $target echo "#!/bin/bash" > $target if [[ $cmd =~ dboard ]]; then cat >> $target <<'EOF' @@ -57,8 +52,19 @@ fi EOF fi +cat >> $target << EOF +if [[ \$(ulimit -a | grep 'open files' | awk '{print \$4}') -gt 10000 ]];then ulimit -n 10000;fi +EOF + +# echo "#!/bin/bash" > $target +# echo "exec $prefix/bin/.\$(lsb_release -sr)/$cmd \"\$@\"" >> $target + echo "lsbr=\$(lsb_release -sr)" >> $target -echo "if [[ -e \$lsbr ]];then source \$lsbr;fi" >> $target +if [ "$LD_LIBRARY_PATH" != "" ];then + echo "source $prefix/bin/.\$lsbr/cfg.sh" >> $target +fi + +# echo "if [[ -e \$lsbr ]];then source \$lsbr;fi" >> $target echo "exec $prefix/bin/.\$lsbr/$cmd \"\$@\"" >> $target
    Table 4. API Keys Related CallsTable 5. API Keys Related Calls