Index: Makefile ================================================================== --- Makefile +++ Makefile @@ -24,14 +24,15 @@ fossil timeline -n 350 -t ci -F "%h,%a,%b,%t,\"%c\"" > recent-commits.csv SHELL=/bin/bash PREFIX=$(PWD) +# CSCOPTS=-lfa2 -specialize -inline-global CSCOPTS= INSTALL=install SRCFILES = common.scm items.scm launch.scm ods.scm runconfig.scm \ - server.scm configf.scm db.scm keys.scm margs.scm \ + server.scm configf.scm db.scm keys.scm \ process.scm runs.scm tasks.scm tests.scm genexample.scm \ tdb.scm mt.scm \ ezsteps.scm rmt.scm api.scm \ subrun.scm archive.scm env.scm \ diff-report.scm cgisetup/models/pgdb.scm @@ -39,28 +40,27 @@ # module source files MSRCFILES = dbfile.scm debugprint.scm mtargs.scm commonmod.scm dbmod.scm \ tcp-transportmod.scm rmtmod.scm portlogger.scm transport-mode.scm : transport-mode.scm.template - @if [[ -e transport-mode.scm ]];then \ - echo "WARNING: transport-mode.scm.template is newer than transport-mode.scm"; else \ - cp transport-mode.scm.template transport-mode.scm; fi + cp transport-mode.scm.template transport-mode.scm dashboard-transport-mode.scm : dashboard-transport-mode.scm.template - @if [[ -e dashboard-transport-mode.scm ]];then \ - echo "WARNING: dashboard-transport-mode.scm.template is newer than dashboard-transport-mode.scm"; else \ - cp dashboard-transport-mode.scm.template dashboard-transport-mode.scm; fi + cp dashboard-transport-mode.scm.template dashboard-transport-mode.scm -megatest.scm : transport-mode.scm -dashboard.scm : dashboard-transport-mode.scm +mtest : transport-mode.scm +dboard : dashboard-transport-mode.scm # dbmod.import.o is just a hack here mofiles/portlogger.o : mofiles/dbmod.o mofiles/dbfile.o : \ - mofiles/debugprint.o mofiles/commonmod.o + mofiles/debugprint.o mofiles/commonmod.o +mofiles/dbmod.o : mofiles/dbfile.o + +mofiles/commonmod.o : mofiles/debugprint.o configf.o : commonmod.import.o mofiles/dbfile.o : mofiles/debugprint.o mofiles/rmtmod.o mofiles/dbmod.o : mofiles/dbfile.o mofiles/commonmod.o mofiles/debugprint.o db.o : mofiles/dbmod.o mofiles/dbfile.o mofiles/debugprint.o : mofiles/mtargs.o @@ -340,10 +340,13 @@ $(PREFIX)/bin/mt-new-to-old.sh : utils/mt-new-to-old.sh $(INSTALL) $< $@ chmod a+x $@ +$(PREFIX)/bin/convert-db.sh : utils/convert-db.sh + $(INSTALL) $< $@ + chmod a+x $@ deploytarg/nbfake : utils/nbfake $(INSTALL) $< $@ chmod a+x $@ @@ -386,10 +389,11 @@ install : $(PREFIX)/bin/.$(ARCHSTR) $(PREFIX)/bin/.$(ARCHSTR)/mtest $(PREFIX)/bin/megatest \ $(PREFIX)/bin/.$(ARCHSTR)/dboard $(PREFIX)/bin/dashboard $(HELPERS) $(PREFIX)/bin/nbfake \ $(PREFIX)/bin/nbfind $(PREFIX)/bin/mtrunner $(PREFIX)/bin/viewscreen $(PREFIX)/bin/mt_xterm \ $(PREFIX)/bin/mt-old-to-new.sh $(PREFIX)/bin/mt-new-to-old.sh \ + $(PREFIX)/bin/convert-db.sh $(PREFIX)/bin/convert-db.sh \ $(PREFIX)/share/docs/megatest_manual.html $(PREFIX)/bin/remrun \ $(PREFIX)/share/docs/megatest_manual.html $(PREFIX)/bin/remrun $(PREFIX)/bin/mtutil \ $(PREFIX)/share/db/mt-pg.sql \ $(PREFIX)/share/js/jquery-3.1.0.slim.min.js \ $(PREFIX)/bin/.$(ARCHSTR)/lib/libpangox-1.0.so \ Index: TODO ================================================================== --- TODO +++ TODO @@ -16,10 +16,51 @@ # along with Megatest. If not, see . TODO ==== +23WW48 +. Add calls-per-minute to db access stats +. Find out why start-server calls are taking 250ms and fix +. Allow two or three servers to run for any given db +. Update avg call count/sec every 30 sec in no-sync +. get server uses no-sync process info to decide which server to suggest +. Use process table to decide who will do sync back +. Fix metadat being synced over and over + +23WW47 +. Finding server +.. look at .servinfo for likely prime main +.. ask the .servinfo prime main for real prime main +.. save prime main (for how long, 10 seconds or 10 minutes?) + +. Starting prime main +.. get servinfo files - START +.. no files? create my servinfo file, goto START +.. have files? am I the prime main according to servinfo files? +.. no, I'm not the prime main, ping prime main +.. ping is good, prime main exists, register self as server if on same host as prime main DONE +.. no pirng response, remove the .servinfo file - goto START +.. if I am prime main according to .servinfo files, register directly in no-sync + +. Starting non-main +.. get servinfo files +.. no files? launch server for main.db +.. have files? pick out prime main +.. register self as server with prime main + +23WW46 - v1.80 branch +. Use file semaphore to kill tests, eliminate db load of the KILLREQ query +. Merge this change to revolution branch +23WW45 - the revolution branch +. Add "fast" db start option (no handshaking over NFS) +. Add server-ro to server types (just "server" is fine for read/write). +. [DONE] Create pause-server and resume-server calls +. Create rsync or cp sync to MTRAH function +. Change rmt:send-receive to divert calls to read-only server when possible +. [DONE] Change start server to call main.db server for 1..N.db servers, block until server is read for use. + 23WW21 . Dashboard needs its own cache db in /tmp 23WW07 . Remove use of *dbstruct-dbs* Index: api.scm ================================================================== --- api.scm +++ api.scm @@ -35,11 +35,12 @@ (use srfi-69 srfi-18 posix matchable - s11n) + s11n + typed-records) ;; allow these queries through without starting a server ;; (define api:read-only-queries '(get-key-val-pairs @@ -95,10 +96,13 @@ login tasks-get-last testmeta-get-record have-incompletes? get-changed-record-ids + get-all-runids + get-changed-record-test-ids + get-changed-record-run-ids get-run-record-ids get-not-completed-cnt)) (define api:write-queries '( @@ -149,82 +153,20 @@ tasks-set-state-given-param-key )) (define *db-write-mutexes* (make-hash-table)) (define *server-signature* #f) -;; ;; These are called by the server on recipt of /api calls -;; ;; - keep it simple, only return the actual result of the call, i.e. no meta info here -;; ;; -;; ;; - returns #( flag result ) -;; ;; -;; (define (api:execute-requests dbstruct dat) -;; (if (> *api-process-request-count* 50) -;; (begin -;; (if (common:low-noise-print 30 "too many threads") -;; (debug:print 0 *default-log-port* "WARNING: "*api-process-request-count*" threads, potential overload, adding 0.5 sec delay.")) -;; ;; (thread-sleep! 0.5) ;; take a nap - no, the napping is moved to the clients via tt:backoff-incr -;; )) -;; (cond -;; ((not (vector? dat)) ;; it is an error to not receive a vector -;; (vector #f (vector #f "remote must be called with a vector"))) -;; (else -;; (let* ((cmd-in (vector-ref dat 0)) -;; (cmd (if (symbol? cmd-in) -;; cmd-in -;; (string->symbol cmd-in))) -;; (params (vector-ref dat 1)) -;; (run-id (if (null? params) -;; 0 -;; (car params))) -;; (write-mutex (if (hash-table-exists? *db-write-mutexes* run-id) -;; (hash-table-ref *db-write-mutexes* run-id) -;; (let* ((newmutex (make-mutex))) -;; (hash-table-set! *db-write-mutexes* run-id newmutex) -;; newmutex))) -;; (start-t (current-milliseconds)) -;; (readonly-mode (dbr:dbstruct-read-only dbstruct)) -;; (readonly-command (member cmd api:read-only-queries)) -;; (writecmd-in-readonly-mode (and readonly-mode (not readonly-command)))) -;; (if (not readonly-command) -;; (mutex-lock! write-mutex)) -;; (let* ((tmppath (dbr:dbstruct-tmppath dbstruct)) -;; (clean-run-id (cond -;; ((number? run-id) run-id) -;; ((equal? run-id #f) "main") -;; (else "other"))) -;; (crumbfile (dbfile:wait-for-qif tmppath clean-run-id (cons cmd params))) -;; (res -;; (if writecmd-in-readonly-mode -;; (conc "attempt to run write command "cmd" on a read-only database") -;; (api:dispatch-request dbstruct cmd run-id params)))) -;; (delete-file* crumbfile) -;; (if (not readonly-command) -;; (mutex-unlock! write-mutex)) -;; -;; ;; save all stats -;; (let ((delta-t (- (current-milliseconds) -;; start-t)) -;; (modified-cmd (if (eq? cmd 'general-call) -;; (string->symbol (conc "general-call-" (car params))) -;; cmd))) -;; (hash-table-set! *db-api-call-time* modified-cmd -;; (cons delta-t (hash-table-ref/default *db-api-call-time* modified-cmd '())))) -;; (if writecmd-in-readonly-mode -;; (begin -;; #;(common:telemetry-log (conc "api-out:"(->string cmd)) -;; payload: `((params . ,params) -;; (ok-res . #t))) -;; (vector #f res)) -;; (begin -;; #;(common:telemetry-log (conc "api-out:"(->string cmd)) -;; payload: `((params . ,params) -;; (ok-res . #f))) -;; (vector #t res)))))))) (define *api-threads* '()) -(define (api:register-thread th-in) - (set! *api-threads* (cons (cons th-in (current-seconds)) *api-threads*))) +(define (api:register-thread th-in command) + (set! *api-threads* (cons (list th-in (current-seconds) command) *api-threads*))) + +(define (api:get-thread-command th-in) + (let ((thread-data (assoc th-in *api-threads*))) + (if thread-data + (third thread-data) ; Assuming the command is the third element in the list + #f))) ; Return #f if the thread is not found (define (api:unregister-thread th-in) (set! *api-threads* (filter (lambda (thdat) (not (eq? th-in (car thdat)))) *api-threads*))) @@ -234,11 +176,34 @@ (not (member (thread-state (car thdat)) '(terminated dead)))) *api-threads*))) (define (api:get-count-threads-alive) (length *api-threads*)) - + +(define (api:get-threads) + (map (lambda (thdat) + (let ((thread (first thdat)) + (timestamp (second thdat)) + (command (third thdat))) + (format "\nThread: ~a, age: ~a, Command: ~a" thread (- (current-seconds) timestamp) command))) + *api-threads*)) + + +(define *api:last-stats-print* 0) +(define *api-print-db-stats-mutex* (make-mutex)) +(define (api:print-db-stats) + (debug:print-info 0 *default-log-port* "Started periodic db stats printer") + (let loop () + (mutex-lock! *api-print-db-stats-mutex*) + (if (> (- (current-seconds) *api:last-stats-print*) 15) + (begin + (rmt:print-db-stats) + (set! *api:last-stats-print* (current-seconds)))) + (mutex-unlock! *api-print-db-stats-mutex*) + (thread-sleep! 5) + (loop))) + ;; indat is (cmd run-id params meta) ;; ;; WARNING: Do not print anything in the lambda of this function as it ;; reads/writes to current in/out port @@ -246,86 +211,111 @@ (define (api:tcp-dispatch-request-make-handler dbstruct) ;; cmd run-id params) (assert *toppath* "FATAL: api:tcp-dispatch-request-make-handler called but *toppath* not set.") (if (not *server-signature*) (set! *server-signature* (tt:mk-signature *toppath*))) (lambda (indat) - (api:register-thread (current-thread)) - (let* (;; (indat (deserialize)) - (newcount (+ *api-process-request-count* 1)) - (numthreads (api:get-count-threads-alive)) - (delay-wait (if (> newcount 10) - (- newcount 10) - 0)) - (normal-proc (lambda (cmd run-id params) - (case cmd - ((ping) *server-signature*) - (else - (api:dispatch-request dbstruct cmd run-id params)))))) - (set! *api-process-request-count* newcount) - (set! *db-last-access* (current-seconds)) - (if (not (eq? newcount numthreads)) - (begin - (api:remove-dead-or-terminated) - (let ((threads-now (api:get-count-threads-alive))) - (debug:print 0 *default-log-port* "WARNING: newcount="newcount", numthreads="numthreads", remaining="threads-now) - (set! newcount threads-now)))) - (match indat - ((cmd run-id params meta) - (let* ((db-ok (let* ((dbfname (dbmod:run-id->dbfname run-id)) - (ok (equal? dbfname (dbr:dbstruct-dbfname dbstruct)))) - (case cmd - ((ping) #t) ;; we are fine - (else - (if (not ok)(debug:print 0 *default-log-port* "ERROR: "cmd", run-id "run-id", not correct for dbfname "(dbr:dbstruct-dbfname dbstruct))) - (assert ok "FATAL: database file and run-id not aligned."))))) - (ttdat *server-info*) - (server-state (tt-state ttdat)) - (status (cond - ((> newcount 3) 'busy) - ;; ((> newcount 5) 'loaded) ;; this gets transmitted to the client which calls tt:backoff-incr to slow stuff down. - (else 'ok))) - (errmsg (case status - ((busy) (conc "Server overloaded, "newcount" threads in flight")) - ((loaded) (conc "Server loaded, "newcount" threads in flight")) - (else #f))) - (result (case status - ((busy) - (if (eq? cmd 'ping) - (normal-proc cmd run-id params) - ;; newcount must be greater than 5 for busy - (* 1 (- newcount 3)) ;; was 15 - )) ;; (- newcount 29)) ;; call back in as many seconds - ((loaded) -;; (if (eq? (rmt:transport-mode) 'tcp) -;; (thread-sleep! 0.5)) - (normal-proc cmd run-id params)) - (else - (normal-proc cmd run-id params)))) - (meta (case cmd - ((ping) `((sstate . ,server-state))) - (else `((wait . ,delay-wait))))) - (payload (list status errmsg result meta))) - (set! *api-process-request-count* (- *api-process-request-count* 1)) - ;; (serialize payload) - (api:unregister-thread (current-thread)) - payload)) - (else - (assert #f "FATAL: failed to deserialize indat "indat)))))) - + (api:register-thread (current-thread) (car indat)) + (let* ((result + (let* ((numthreads (api:get-count-threads-alive)) + (delay-wait (if (> numthreads 10) + (- numthreads 10) + 0)) + (normal-proc (lambda (cmd run-id params) + (case cmd + ((ping) *server-signature*) + (else + (api:dispatch-request dbstruct cmd run-id params)))))) + (set! *api-process-request-count* numthreads) + (set! *db-last-access* (current-seconds)) +;; (if (not (eq? numthreads numthreads)) +;; (begin +;; (api:remove-dead-or-terminated) +;; (let ((threads-now (api:get-count-threads-alive))) +;; (debug:print 0 *default-log-port* "WARNING: numthreads="numthreads", numthreads="numthreads", remaining="threads-now) +;; (set! numthreads threads-now)))) + (match indat + ((cmd run-id params meta) + (let* ((start-t (current-milliseconds)) + (db-ok (let* ((dbfname (dbmod:run-id->dbfname run-id)) + (ok (equal? dbfname (dbr:dbstruct-dbfname dbstruct)))) + (case cmd + ((ping) #t) ;; we are fine + (else + (assert ok "FATAL: database file and run-id not aligned."))))) + (ttdat *server-info*) + (server-state (tt-state ttdat)) + (maxthreads 20) ;; make this a parameter? + (status (cond + ((> numthreads maxthreads) + (let* ((testsuite (common:get-testsuite-name)) + (mtexe (common:find-local-megatest)) + (proc (lambda () + ;; we are overloaded, try to start another server + (debug:print 0 *default-log-port* "Too many threads running, starting another server") + (tt:server-process-run *toppath* testsuite mtexe run-id)))) + (set! *server-start-requests* (cons proc *server-start-requests*))) + ;; 'busy + 'loaded ;; not ideal since the client will not backoff + ) + (else 'ok))) + (errmsg (case status + ((busy) (conc "Server overloaded, "numthreads" threads in flight, current cmd: " cmd "\n current threads: " (api:get-threads))) + ((loaded) (conc "Server loaded, "numthreads" threads in flight")) + (else #f))) + (result (case status + ((busy) + (if (eq? cmd 'ping) + (normal-proc cmd run-id params) + ;; numthreads must be greater than 5 for busy + (* 0.1 (- numthreads maxthreads)) ;; was 15 + )) ;; (- numthreads 29)) ;; call back in as many seconds + ((loaded) + ;; (if (eq? (rmt:transport-mode) 'tcp) + ;; (thread-sleep! 0.5)) + (normal-proc cmd run-id params)) + (else + (normal-proc cmd run-id params)))) + (meta (case cmd + ((ping) `((sstate . ,server-state)(sload . ,numthreads))) + (else `((wait . ,delay-wait))))) + (payload (list status errmsg result meta))) + ;; (cmd run-id params meta) + (db:add-stats cmd run-id params (- (current-milliseconds) start-t)) + payload)) + (else + (assert #f "FATAL: failed to deserialize indat "indat)))))) + ;; (set! *api-process-request-count* (- *api-process-request-count* 1)) + ;; (serialize payload) + + (api:unregister-thread (current-thread)) + result))) + +(define *api-halt-writes* #f) (define (api:dispatch-request dbstruct cmd run-id params) (if (not *no-sync-db*) (db:open-no-sync-db)) + (let* ((start-time (current-milliseconds))) + (if (member cmd api:write-queries) + (let loop () + (if *api-halt-writes* + (begin + (thread-sleep! 0.2) + (if (< (- (current-milliseconds) start-time) + 5000) ;; hope it don't take more than five seconds to sync + (loop-time) + #;(debug:print 0 *default-log-port* "ERROR: writes halted for more than 5 seconds, sync might be taking too long")))))) + (db:add-stats 'api-write-blocking-for-sync run-id params (- (current-milliseconds) start-time))) (case cmd ;;=============================================== ;; READ/WRITE QUERIES ;;=============================================== ((get-keys-write) (db:get-keys dbstruct)) ;; force a dummy "write" query to force server; for debug in -repl ;; SERVERS - ((start-server) (apply server:kind-run params)) + ((start-server) (apply tt:server-process-run params)) ((kill-server) (set! *server-run* #f)) ;; TESTS ;;((test-set-state-status-by-id) (apply mt:test-set-state-status-by-id dbstruct params)) @@ -400,10 +390,16 @@ ;; NO SYNC DB ((no-sync-set) (apply db:no-sync-set *no-sync-db* params)) ((no-sync-get/default) (apply db:no-sync-get/default *no-sync-db* params)) ((no-sync-del!) (apply db:no-sync-del! *no-sync-db* params)) ((no-sync-get-lock) (apply db:no-sync-get-lock *no-sync-db* params)) + + ;; NO SYNC DB PROCESSES + ((register-process) (apply dbfile:register-process *no-sync-db* params)) + ((set-process-done) (apply dbfile:set-process-done *no-sync-db* params)) + ((set-process-status) (apply dbfile:set-process-status *no-sync-db* params)) + ((get-process-options) (apply dbfile:get-process-options *no-sync-db* params)) ;; ARCHIVES ;; ((archive-get-allocations) ((archive-register-disk) (apply db:archive-register-disk dbstruct params)) ((archive-register-block-name)(apply db:archive-register-block-name dbstruct params)) @@ -491,53 +487,18 @@ (realparams (cddr params))) (db:general-call dbstruct run-id stmtname realparams))) ((sdb-qry) (apply sdb:qry params)) ((ping) (current-process-id)) ((get-changed-record-ids) (apply db:get-changed-record-ids dbstruct params)) - ((get-run-record-ids) (apply db:get-run-record-ids dbstruct params)) + ((get-changed-record-test-ids) (apply db:get-changed-record-test-ids dbstruct params)) + ((get-changed-record-run-ids) (apply db:get-changed-record-run-ids dbstruct params)) + ((get-run-record-ids) (apply db:get-run-record-ids dbstruct params)) + ((get-all-runids) (apply db:get-all-runids dbstruct)) ;; TESTMETA ((testmeta-get-record) (apply db:testmeta-get-record dbstruct params)) ;; TASKS ((find-task-queue-records) (apply tasks:find-task-queue-records dbstruct params)) (else (debug:print 0 *default-log-port* "ERROR: bad api call " cmd) (conc "ERROR: BAD api call " cmd)))) -;; http-server send-response -;; api:process-request -;; db:* -;; -;; NB// Runs on the server as part of the server loop -;; -(define (api:process-request dbstruct $) ;; the $ is the request vars proc - (debug:print 4 *default-log-port* "server-id:" *server-id*) - (let* ((cmd ($ 'cmd)) - (paramsj ($ 'params)) - (key ($ 'key)) - (params (db:string->obj paramsj transport: 'http))) ;; incoming data from the POST (or is it a GET?) - (debug:print 4 *default-log-port* "cmd:" cmd " with params " params "key " key) - (if (equal? key *server-id*) - (begin - (set! *api-process-request-count* (+ *api-process-request-count* 1)) - (let* ((resdat (api:execute-requests dbstruct (vector cmd params))) ;; process the request, resdat = #( flag result ) - (success (vector-ref resdat 0)) - (res (vector-ref resdat 1))) ;; (vector flag payload), get the payload, ignore the flag (why?) - (debug:print 4 *default-log-port* "res:" res) - (if (not success) - (debug:print 0 *default-log-port* "ERROR: success flag is #f for " cmd " with params " params)) - (if (> *api-process-request-count* *max-api-process-requests*) - (set! *max-api-process-requests* *api-process-request-count*)) - (set! *api-process-request-count* (- *api-process-request-count* 1)) - ;; This can be here but needs controls to ensure it doesn't run more than every 4 seconds - ;; (rmt:dat->json-str - ;; (if (or (string? res) - ;; (list? res) - ;; (number? res) - ;; (boolean? res)) - ;; res - ;; (list "ERROR, not string, list, number or boolean" 1 cmd params res))))) - (db:obj->string res transport: 'http))) - (begin - (debug:print 0 *default-log-port* "Server refused to process request. Server id mismatch. recived " key " expected: " *server-id* ".\nOther arguments recived: cmd=" cmd " params = " params) - (db:obj->string (conc "Server refused to process request server-id mismatch: " key ", " *server-id*) transport: 'http))))) - Index: archive.scm ================================================================== --- archive.scm +++ archive.scm @@ -359,11 +359,11 @@ (archive-dir (if archive-info (cdr archive-info) #f)) (archive-id (if archive-info (car archive-info) -1)) (home-host (server:choose-server *toppath* 'homehost)) (archive-time (seconds->std-time-str (current-seconds))) (archive-staging-db (conc *toppath* "/.db-snapshot/archive_" archive-time)) - (tmp-db-path (conc (common:get-db-tmp-area) "/megatest.db")) + (tmp-db-path (conc (dbfile:make-tmpdir-name *toppath* "") "/megatest.db")) (dbfile (conc archive-staging-db "/megatest.db"))) (create-directory archive-staging-db #t) (let-values (((pid-val exit-status exit-code) (run-n-wait rsync-exe params: (list "-v" (conc (car home-host) ":"tmp-db-path) archive-staging-db) print-cmd: print-prefix))) (if (eq? exit-code 0) (case archiver @@ -407,11 +407,11 @@ (bup-restore-params (list "-d" archive-path "restore" "-C" *toppath* archive-internal-path))) (debug:print-info 2 *default-log-port* "Restoring archived data to " *toppath* " from archive in " archive-path " ... " archive-internal-path) (run-n-wait bup-exe params: bup-restore-params print-cmd: "Running:")) (sleep 2) (db:multi-db-sync - (db:setup #t) ;; (db:setup-db *dbstruct-dbs* *toppath* #f) + (db:setup) ;; (db:setup-db *dbstruct-dbs* *toppath* #f) 'killservers ;'dejunk ;'adj-testids 'old2new ) Index: cgisetup/models/pgdb.scm ================================================================== --- cgisetup/models/pgdb.scm +++ cgisetup/models/pgdb.scm @@ -18,10 +18,11 @@ ;;====================================================================== (declare (unit pgdb)) (declare (uses configf)) +(declare (uses mtargs)) ;; I don't know how to mix compilation units and modules, so no module here. ;; ;; (module pgdb ;; ( @@ -31,10 +32,11 @@ ;; (import scheme) ;; (import data-structures) ;; (import chicken) (use typed-records (prefix dbi dbi:)) +(import (prefix mtargs args:)) ;; given a configdat lookup the connection info and open the db ;; (define (pgdb:open configdat #!key (dbname #f)(dbispec #f)) (let ((pgconf (or dbispec Index: common.scm ================================================================== --- common.scm +++ common.scm @@ -21,10 +21,11 @@ (declare (unit common)) (declare (uses commonmod)) (declare (uses rmtmod)) (declare (uses debugprint)) (declare (uses mtargs)) + (use srfi-1 data-structures posix regex-case (prefix base64 base64:) format dot-locking csv-xml z3 udp ;; sql-de-lite hostinfo md5 message-digest typed-records directory-utils stack matchable regex posix (srfi 18) extras ;; tcp @@ -37,33 +38,32 @@ (import commonmod debugprint rmtmod (prefix mtargs args:)) - + +(define (remove-server-files directory-path) + (let ((files (glob (string-append directory-path "/server*")))) + (for-each delete-file* files))) (include "common_records.scm") - - -;; (require-library margs) -;; (include "margs.scm") - -;; (define old-exit exit) -;; -;; (define (exit . code) -;; (if (null? code) -;; (old-exit) -;; (old-exit code))) +(define (remove-files filespec) + (let ((files (glob filespec))) + (for-each delete-file* files))) (define (stop-the-train) (thread-start! (make-thread (lambda () (let loop () (if (and *toppath* (file-exists? (conc *toppath*"/stop-the-train"))) (let* ((msg (conc "ERROR: found file "*toppath*"/stop-the-train, exiting immediately"))) ;; yes, print to current-output-port AND *default-log-port*, annoying but necessary I think (print msg) + (remove-server-files (conc *toppath* "/logs")) (debug:print 0 *default-log-port* msg) + (remove-files (conc *toppath* "/logs/server*")) + (remove-files (conc *toppath* "/.servinfo/*")) + (remove-files (conc *toppath* "/.mtdb/*lock")) (exit 1))) (thread-sleep! 5) (loop)))))) ;; execute thunk, return value. If exception thrown, trap exception, return #f, and emit nonfatal condition note to *default-log-port* . @@ -157,14 +157,10 @@ (define *time-zero* (current-seconds)) ;; for the watchdog (define *on-exit-procs* '()) ;; add procs to this list to be executed on exit (define *default-area-tag* "local") ;; DATABASE -;; (define *dbstruct-dbs* #f) ;; used to cache the dbstruct in db:setup. Goal is to remove this. -;; db stats -(define *db-stats* (make-hash-table)) ;; hash of vectors < count duration-total > -(define *db-stats-mutex* (make-mutex)) ;; db access (define *db-last-access* (current-seconds)) ;; last db access, used in server ;; (define *db-write-access* #t) ;; db sync ;; (define *db-last-sync* 0) ;; last time the sync to megatest.db happened @@ -185,11 +181,10 @@ ;; (define *max-cache-size* 0) (define *logged-in-clients* (make-hash-table)) (define *server-id* #f) (define *server-info* #f) ;; good candidate for easily convert to non-global (define *time-to-exit* #f) -(define *server-run* #t) (define *run-id* #f) (define *server-kind-run* (make-hash-table)) (define *home-host* #f) ;; (define *total-non-write-delay* 0) (define *heartbeat-mutex* (make-mutex)) @@ -251,11 +246,11 @@ (define *common:this-exe-fullpath* (common:get-this-exe-fullpath)) (define *common:this-exe-dir* (pathname-directory *common:this-exe-fullpath*)) (define *common:this-exe-name* (pathname-strip-directory *common:this-exe-fullpath*)) (define (common:get-sync-lock-filepath) - (let* ((tmp-area (common:get-db-tmp-area)) + (let* ((tmp-area (common:make-tmpdir-name *toppath* "")) (lockfile (conc tmp-area "/megatest.db.lock"))) lockfile)) (define *common:logpro-exit-code->status-sym-alist* '( ( 0 . pass ) @@ -405,21 +400,19 @@ (define (common:version-changed?) (not (equal? (common:get-last-run-version) (common:version-signature)))) -;; From 1.70 to 1.80, db's are compatible. - +;; From 1.70 to 1.81, db's are compatible. +;; +;; BUG: This logic is almost certainly not quite correct. +;; (define (common:api-changed?) - (let* ( - (megatest-major-version (substring (->string megatest-version) 0 4)) - (run-major-version (substring (conc (common:get-last-run-version)) 0 4)) - ) - (and (not (equal? megatest-major-version "1.80")) - (not (equal? megatest-major-version megatest-run-version))) - ) -) + (let* ((megatest-major-version (substring (->string megatest-version) 0 4)) + (run-major-version (substring (conc (common:get-last-run-version)) 0 4))) + (and (not (member megatest-major-version '("1.81" "1.80"))) + (not (equal? megatest-major-version run-major-version))))) ;;====================================================================== ;; Move me elsewhere ... ;; RADT => Why do we meed the version check here, this is called only if version misma ;; @@ -433,12 +426,11 @@ 'adj-target 'new2old '(dejunk) )) ((tcp nfs) - (debug:print 0 *default-log-port* "WARNING: cleanup-db NOT implemented yet for tcp and nfs.") - #;(apply db:multi-db-sync + (apply db:multi-db-sync dbstruct 'schema 'killservers 'adj-target 'new2old @@ -622,11 +614,11 @@ (common:on-homehost?)) (if (common:api-changed?) (let* ((mtconf (conc (get-environment-variable "MT_RUN_AREA_HOME") "/megatest.config")) (dbfile (conc (get-environment-variable "MT_RUN_AREA_HOME") ".mtdb/main.db")) (read-only (not (file-write-access? dbfile))) - (dbstruct (db:setup #t))) ;; (db:setup-db *dbstruct-dbs* *toppath* #f))) ;; #t))) + (dbstruct (db:setup))) ;; (db:setup-db *dbstruct-dbs* *toppath* #f))) ;; #t))) (debug:print 0 *default-log-port* "WARNING: Version mismatch!\n" " expected: " (common:version-signature) "\n" " got: " (common:get-last-run-version)) (cond @@ -937,21 +929,10 @@ (debug:print-error 0 *default-log-port* "Unable to find megatest home directory.") #f) (loop (pathname-directory thepath))))) )) - -(define (common:db-tmp-area-path) - (conc "/tmp/" - (current-user-name) - "/megatest_localdb/" - (common:get-testsuite-name) - "/" - (string-translate *toppath* "/" ".") - ) -) - ;;====================================================================== ;; redefine for future cleanup (converge on area-name, the more generic ;; (define common:get-area-name common:get-testsuite-name) @@ -971,11 +952,11 @@ (list (conc "/tmp/" (current-user-name) "/megatest_localdb/" tsname "/" (string-translate toppath "/" ".")) (conc "/tmp/" (current-process-id) ;; just in case we have an issue with the dir by own user name - "/megatest_localdb/" + "/"(current-user-name) "/megatest_localdb/" tsname (string-translate toppath "/" ".")) )))) (set! *db-cache-path* dbpath) ;; ensure megatest area has .mtdb @@ -1548,11 +1529,11 @@ ;; (define (common:lazy-modification-time fpath) (handle-exceptions exn (begin - (debug:print 0 *default-log-port* "Failed to get modifcation time for " fpath ", treating it as zero. exn=" exn) + (debug:print 2 *default-log-port* "Failed to get modification time for " fpath ", treating it as zero. exn=" exn) 0) (if (file-exists? fpath) (file-modification-time fpath) 0))) @@ -1664,12 +1645,12 @@ ;; (let loop ((x 0)) ;; (print x "," (common:get-delay x 1)) ;; (if (< x 2) ;; (loop (+ x 0.1))))) -(define (get-cpu-load #!key (remote-host #f)) - (car (common:get-cpu-load remote-host))) +;; (define (get-cpu-load #!key (remote-host #f)) +;; (car (common:get-cpu-load remote-host))) ;;====================================================================== ;; (let* ((load-res (process:cmd-run->list "uptime")) ;; (load-rx (regexp "load average:\\s+(\\d+)")) ;; (cpu-load #f)) @@ -1681,18 +1662,18 @@ ;; (set! cpu-load newval)))))) ;; (car load-res)) ;; cpu-load)) ;;====================================================================== -;; get values from cached info from dropping file in logs dir +;; get values from cached info from dropping file in .sysdata dir ;; e.g. key is host and dtype is normalized-load ;; (define (common:get-cached-info key dtype #!key (age 10)) (if *toppath* (let* ((fullpath (conc *toppath* "/.sysdata/" key "-" dtype ".log")) (delfile (lambda (exn) - (debug:print-info 1 *default-log-port* " removing bad file " fullpath ", exn=" exn) + (debug:print-info 2 *default-log-port* " removing bad file " fullpath ", exn=" exn) (delete-file* fullpath) #f))) (if (and (file-exists? fullpath) (file-read-access? fullpath)) (handle-exceptions @@ -1739,20 +1720,10 @@ (debug:print 0 *default-log-port* "failed to write file " fullpath ", exn=" exn) #f) (with-output-to-file fullpath (lambda ()(pp dat))))) #f)) -(define (common:raw-get-remote-host-load-orig remote-host) - (handle-exceptions - exn - (begin - (debug:print 0 *default-log-port* "failed to ssh to " remote-host " and get loadavg. exn=" exn) - #f) ;; more specific handling of errors needed - (with-input-from-pipe - (conc "ssh " remote-host " cat /proc/loadavg") - (lambda ()(list (read)(read)(read)))))) - (define (common:raw-get-remote-host-load remote-host) (let* ((inp #f)) (handle-exceptions exn (begin @@ -1773,11 +1744,12 @@ (begin (debug:print 0 *default-log-port* "failed to ssh or read loadavg from host " remote-host ", exn=" exn) '(-99 -99 -99)) (let* ((actual-hostname (or remote-host (get-host-name) "localhost"))) (or (common:get-cached-info actual-hostname "cpu-load") - (let ((result (if remote-host + (let ((result (if (and remote-host + (not (equal? remote-host (get-host-name)))) (map (lambda (res) (if (eof-object? res) 9e99 res)) (common:raw-get-remote-host-load remote-host)) (with-input-from-file "/proc/loadavg" (lambda ()(list (read)(read)(read))))))) @@ -2036,11 +2008,12 @@ #f) ;; if zero return #f so caller knows that things are not working (loop (if (string-match "^processor\\s+:\\s+\\d+$" inl) (+ numcpu 1) numcpu) (read-line)))))) - (result (if remote-host + (result (if (and remote-host + (not (equal? remote-host (get-host-name)))) (common:generic-ssh (conc "ssh " remote-host " cat /proc/cpuinfo") proc -1) (with-input-from-file "/proc/cpuinfo" proc)))) (if (and (number? result) @@ -2303,11 +2276,11 @@ (define (common:check-db-dir-space) (let* ((required (string->number ;; default is 1GB (or actually a billion bytes) This is the number of 1 kB blocks. (or (configf:lookup *configdat* "setup" "dbdir-space-required") "1000000"))) - (dbdir (common:get-db-tmp-area)) ;; (db:get-dbdir)) + (dbdir (common:make-tmpdir-name *toppath* "")) ;; (db:get-dbdir)) (tdbspace (common:check-space-in-dir dbdir required)) (mdbspace (common:check-space-in-dir *toppath* required))) (sort (list tdbspace mdbspace) (lambda (a b) (< (cadr a)(cadr b)))))) Index: commonmod.scm ================================================================== --- commonmod.scm +++ commonmod.scm @@ -64,10 +64,11 @@ chicken.condition chicken.file chicken.file.posix chicken.io chicken.pathname + chicken.port chicken.process chicken.process-context chicken.process-context.posix chicken.sort chicken.string @@ -83,10 +84,12 @@ srfi-1 srfi-18 srfi-69 typed-records system-information + + debugprint ))) ;;====================================================================== ;; CONTENTS ;; @@ -160,23 +163,34 @@ '()))) ;; should it return empty list or #f to indicate not set? (define (get-section cfgdat section) (hash-table-ref/default cfgdat section '())) + +(define (common:make-tmpdir-name areapath tmpadj) + (let* ((area (pathname-file areapath)) + (dname (conc "/tmp/"(current-user-name)"/megatest_localdb/" area "/" (string-translate areapath "/" ".") tmpadj "/.mtdb"))) + (unless (directory-exists? dname) + (create-directory dname #t)) + dname)) ;; dot-locking egg seems not to work, using this for now ;; if lock is older than expire-time then remove it and try again ;; to get the lock ;; (define (common:simple-file-lock fname #!key (expire-time 300)) - (let ((fmod-time (handle-exceptions - ext - (current-seconds) - (file-modification-time fname)))) - (if (file-exists? fname) ;; (common:file-exists? fname) + (let* ((lock-exists (file-exists? fname)) + (fmod-time (if lock-exists + (current-seconds) + (handle-exceptions + ext + (current-seconds) + (file-modification-time fname))))) + (if lock-exists (if (> (- (current-seconds) fmod-time) expire-time) (begin + (debug:print-info 1 *default-log-port* "Removing stale lock "fname) (handle-exceptions exn #f (delete-file* fname)) (common:simple-file-lock fname expire-time: expire-time)) #f) (let ((key-string (conc (get-host-name) "-" (current-process-id)))) (with-output-to-file fname @@ -283,13 +297,16 @@ (filter (lambda (x) (not (string-match "^\\s*" x))) val-list)) '()))) -(define (get-cpu-load) - (let* ((load-info (with-input-from-file "/proc/loadavg" read-lines))) - (map string->number (string-split load-info)))) +(define (commonmod:get-cpu-load) + (let* ((load-info (with-input-from-file "/proc/loadavg" read-lines)) + (res (map string->number (string-split (car load-info))))) + (if (null? res) + #f ;; something is wrong + (car res)))) (define *current-host-cores* #f) (define (get-current-host-cores) (or *current-host-cores* @@ -311,11 +328,11 @@ (string->number (read-line))))) ;; get the normalized (i.e. load / numcpus) for *this* host ;; (define (get-normalized-cpu-load) - (/ (get-cpu-load)(get-current-host-cores))) + (/ (commonmod:get-cpu-load)(get-current-host-cores))) ;;====================================================================== ;; testsuite and area utilites ;;====================================================================== @@ -395,14 +412,13 @@ ((d) 86400) ((w) 604800) ((M) 2628000) ;; aproximately one month ((y) 31536000) (else - 0))))))) - ;; (print "ERROR: can't parse timestring "tstr", component "part) - ;; can't (yet) use debugprint. rely on -show-config for user to find errors - ))) + 0))))) + (debug:print 0 *default-log-port* "ERROR: can't parse timestring "tstr", component "part", string: "(cadr match)))) + (debug:print 0 *default-log-port* "ERROR: can't parse timestring "tstr", component "part)))) parts) time-secs)) (define (seconds->hr-min-sec secs) (let* ((hrs (quotient secs 3600)) Index: configf.scm ================================================================== --- configf.scm +++ configf.scm @@ -31,10 +31,15 @@ (declare (uses mtargs)) (declare (uses mtargs.import)) (declare (uses common)) (declare (uses commonmod)) (declare (uses commonmod.import)) +(declare (uses dbfile)) +(declare (uses dbfile.import)) +(declare (uses dbmod)) +(declare (uses dbmod.import)) + (import commonmod (prefix mtargs args:) debugprint) (include "common_records.scm") Index: dashboard-tests.scm ================================================================== --- dashboard-tests.scm +++ dashboard-tests.scm @@ -23,10 +23,11 @@ ;;====================================================================== (declare (unit dashboard-tests)) (declare (uses common)) (declare (uses commonmod)) +(declare (uses dcommon)) (declare (uses db)) (declare (uses gutils)) (declare (uses rmt)) (declare (uses ezsteps)) (declare (uses subrun)) @@ -94,11 +95,12 @@ "Current state: " "Current status: " "Test comment: " "Test id: " "Test date: ")) - (list (iup:label "" #:expand "VERTICAL")))) + (list (iup:label "" #:expand "VERTICAL" + )))) (apply iup:vbox ; #:expand "YES" (list (store-label "testname" (iup:label (db:test-get-testname testdat) #:expand "HORIZONTAL") (lambda (testdat)(db:test-get-testname testdat))) @@ -162,11 +164,12 @@ (list "Author: " "Owner: " "Reviewed: " "Tags: " "Description: ")) - (list (iup:label "" #:expand "VERTICAL")))) + (list (iup:label "" #:expand "VERTICAL" + )))) (apply iup:vbox ; #:expand "YES" (list (store-meta "author" (iup:label (db:testmeta-get-author testmeta) #:expand "HORIZONTAL") (lambda (testmeta)(db:testmeta-get-author testmeta))) @@ -178,11 +181,12 @@ (lambda (testmeta)(db:testmeta-get-reviewed testmeta))) (store-meta "tags" (iup:label (db:testmeta-get-tags testmeta) #:expand "HORIZONTAL") (lambda (testmeta)(db:testmeta-get-tags testmeta))) (store-meta "description" - (iup:label (test-meta-panel-get-description testmeta) #:size "x50"); #:expand "HORIZONTAL") + (iup:label (test-meta-panel-get-description testmeta) ;; #:wordwrap "YES" ;; #:size "x50" + ) ;; #:expand "HORIZONTAL") (lambda (testmeta) (test-meta-panel-get-description testmeta))) ))))) @@ -206,16 +210,21 @@ (list (iup:label "runname ") (iup:label "run-id") (iup:label "run-date")))) (apply iup:vbox (append (map (lambda (keyval) - (iup:label (cadr keyval) #:expand "HORIZONTAL")) + (iup:vbox + (iup:label (cadr keyval) #:expand "HORIZONTAL") + ;; (iup:label "" #:expand "BOTH") + ) + ) keydat) (list (iup:label runname) (iup:label (conc run-id)) (iup:label (seconds->year-work-week/day-time event_time)) - (iup:label "" #:expand "VERTICAL")))))))) + (iup:label "" ;;#:expand "VERTICAL" + )))))))) ;;====================================================================== ;; Host info panel ;;====================================================================== (define (host-info-panel testdat store-label) @@ -231,11 +240,12 @@ "CPU Load: " "Run duration: " "Logfile: " "Top process id: " "Uname -a: ")) - (iup:label "" #:expand "VERTICAL"))) + (iup:label "" ;; #:expand "VERTICAL" + ))) (apply iup:vbox ; #:expand "YES" (list ;; NOTE: Yes, the host can change! (store-label "HostName" (iup:label ;; (sdb:qry 'getstr @@ -267,18 +277,20 @@ ;; (define (submegatest-panel dbstruct keydat testdat runname testconfig) (let* ((test-run-dir (db:test-get-rundir testdat)) (subarea (subrun:get-runarea test-run-dir)) (area-exists (and subarea (common:file-exists? subarea silent: #t)))) - (if subarea - (iup:frame - #:title "Megatest Run Info" ; #:expand "YES" + (iup:frame + #:title "Megatest Run Info" ;; #:expand "HORIZONTAL" + (if subarea (iup:button "Launch Dashboard" #:action (lambda (obj) - (subrun:launch-dashboard test-run-dir)))) - (iup:vbox)))) + (subrun:launch-dashboard test-run-dir))) + (iup:vbox + (iup:label "Not a subrun..." #:expand "HORIZONTAL") + ))))) ;; use a global for setting the buttons colors ;; state status teststeps (define *state-status* (vector #f #f #f)) (define (update-state-status-buttons testdat) @@ -463,11 +475,11 @@ ;;====================================================================== ;; ;;====================================================================== (define (dashboard-tests:examine-test run-id test-id) ;; run-id run-key origtest) - (let* ((db-path (db:dbfile-path)) ;; (conc (configf:lookup *configdat* "setup" "linktree") "/db/" run-id ".db")) + (let* ((db-path (common:make-tmpdir-name *toppath* "")) ;; (conc (configf:lookup *configdat* "setup" "linktree") "/db/" run-id ".db")) (dbstruct #f) ;; NOT USED (testdat (rmt:get-test-info-by-id run-id test-id)) ;; (db:get-test-info-by-id dbstruct run-id test-id)) (db-mod-time 0) ;; (file-modification-time db-path)) (last-update 0) ;; (current-seconds)) (request-update #t)) @@ -670,10 +682,19 @@ (conc "megatest -target " keystring " -runname " runname " -run -testpatt " (conc testname "/" (if (equal? item-path "") "%" item-path)) " -clean-cache" + )))) + (rerun-clean (lambda (x) + (iup:attribute-set! + command-text-box "VALUE" + (conc "megatest -target " keystring " -runname " runname + " -rerun-clean -testpatt " (conc testname "/" (if (equal? item-path "") + "%" + item-path)) + " -clean-cache" )))) (remove-test (lambda (x) (iup:attribute-set! command-text-box "VALUE" (conc "megatest -remove-runs -target " keystring " -runname " runname @@ -718,31 +739,45 @@ (else ;; (test-set-status! db run-id test-name state status itemdat) (set! self ; (iup:dialog #:close_cb (lambda (a)(exit)) ; #:expand "YES" #:title testfullname - (iup:vbox ; #:expand "YES" - ;; The run and test info - (iup:hbox ; #:expand "YES" - (run-info-panel dbstruct keydat testdat runname) - (test-info-panel testdat store-label widgets) - (test-meta-panel testmeta store-meta)) + (iup:vbox (iup:hbox - (host-info-panel testdat store-label) - (submegatest-panel dbstruct keydat testdat runname testconfig)) + (iup:vbox ; #:expand "YES" + ;; The run and test info + (iup:hbox ; #:expand "YES" + (run-info-panel dbstruct keydat testdat runname) + (test-info-panel testdat store-label widgets)) + (host-info-panel testdat store-label)) + (iup:vbox + (test-meta-panel testmeta store-meta) + (submegatest-panel dbstruct keydat testdat runname testconfig))) ;; The controls - (iup:frame #:title "Actions" + (iup:hbox ;; frame #:title "Actions" (iup:vbox - (iup:hbox - (iup:button "View Log" #:action viewlog #:size "80x") - (iup:button "Start Xterm" #:action xterm #:size "80x") - (iup:button "Run Test" #:action run-test #:size "80x") - (iup:button "Clean Test" #:action remove-test #:size "80x") - (iup:button "CleanRunExecute!" #:action clean-run-execute #:size "80x") - (iup:button "Kill All Jobs" #:action kill-jobs #:size "80x") - (iup:button "Archive Test" #:action archive-test #:size "80x") - (iup:button "Close" #:action (lambda (x)(exit)) #:size "80x")) + (iup:hbox + (iup:frame + #:title "Immediate" + (iup:hbox + (iup:button "Start Xterm" #:action xterm #:size "80x") + (iup:button "CleanRunExecute!" #:action clean-run-execute #:size "80x") + (iup:button "View Log" #:action viewlog #:size "80x"))) + (iup:frame + #:title "Command line" + (iup:hbox + (iup:button "Run Test" #:action run-test #:size "80x") + (iup:button "Rerun-clean" #:action rerun-clean #:size "80x") + (iup:button "Clean Test" #:action remove-test #:size "80x") + (iup:button "Kill All Jobs" #:action kill-jobs #:size "80x"))) + (iup:label "" #:expand "HORIZONTAL") + (iup:frame + #:title "Other" + (iup:hbox + ;; (iup:button "Archive Test" #:action archive-test #:size "80x") + (iup:button "Close" #:action (lambda (x)(exit)) #:size "80x") + ))) (apply iup:hbox (list command-text-box command-launch-button)))) (set-fields-panel dbstruct run-id test-id testdat) (let ((tabs Index: dashboard-transport-mode.scm.template ================================================================== --- dashboard-transport-mode.scm.template +++ dashboard-transport-mode.scm.template @@ -13,10 +13,10 @@ ;; (dbfile:sync-method 'none) ;; (dbfile:cache-method 'none) ;; (rmt:transport-mode 'nfs) ;; uncomment this block to test with tcp and cachedb -(dbfile:sync-method 'attach) ;; original was causing crash on start. +(dbfile:sync-method 'none) ;; original was causing crash on start. (dbfile:cache-method 'none) (rmt:transport-mode 'nfs) Index: dashboard.scm ================================================================== --- dashboard.scm +++ dashboard.scm @@ -36,12 +36,14 @@ (declare (uses dashboard-context-menu)) (declare (uses vg)) (declare (uses subrun)) (declare (uses mt)) (declare (uses dbmod)) -(declare (uses rmtmod)) (declare (uses dbfile)) +(declare (uses dbfile.import)) +(declare (uses rmtmod)) +(declare (uses rmtmod.import)) (declare (uses commonmod)) (declare (uses commonmod.import)) (use format) @@ -74,10 +76,12 @@ ;; executables such as dashboard and mtutil ;; (include "dashboard-transport-mode.scm") (dbfile:db-init-proc db:initialize-main-db) (set! rmtmod:send-receive rmt:send-receive) + +(debug:print-info 0 *default-log-port* "transport-mode="(rmt:transport-mode)) (define help (conc "Megatest Dashboard, documentation at http://www.kiatoa.com/fossils/megatest version " megatest-version " license GPL, Copyright (C) Matt Welland 2012-2017 @@ -118,10 +122,11 @@ 0)) (if (args:get-arg "-mode") (let* ((mode (string->symbol (args:get-arg "-mode")))) (rmt:transport-mode mode))) +;; (rmt:transport-mode 'tcp)) (if (args:get-arg "-test") ;; need to use tcp for test control panel (rmt:transport-mode 'tcp)) ;; RA => Might require revert for filters @@ -138,33 +143,37 @@ ;; (hash-table-set! args:arg-hash "-use-db-cache" #t)));;;) ;;) ;; data common to all tabs goes here ;; -(defstruct dboard:commondat - ((curr-tab-num 0) : number) - please-update - tabdats - update-mutex - updaters - updating - uidat ;; needs to move to tabdat at some time - hide-not-hide-tabs - target - ) - -(define (dboard:commondat-make) - (make-dboard:commondat - curr-tab-num: 0 - tabdats: (make-hash-table) - please-update: #t - update-mutex: (make-mutex) - updaters: (make-hash-table) - updating: #f - hide-not-hide-tabs: #f - target: "" - )) +;; Moved to dcommon.scm +;; +;; (defstruct dboard:commondat +;; ((curr-tab-num 0) : number) +;; please-update +;; tabdats +;; update-mutex +;; updaters +;; updating +;; uidat ;; needs to move to tabdat at some time +;; hide-not-hide-tabs +;; target +;; ) +;; +;; (define (dboard:commondat-make) +;; (make-dboard:commondat +;; curr-tab-num: 0 +;; tabdats: (make-hash-table) +;; please-update: #t +;; update-mutex: (make-mutex) +;; updaters: (make-hash-table) +;; updating: #f +;; hide-not-hide-tabs: #f +;; target: "" +;; )) + +(set! *journal-stats-enable* #f) ;;====================================================================== ;; buttons color using image ;;====================================================================== @@ -207,39 +216,17 @@ ;; (iup:attribute-set! img1 "2" "255 0 0") (hash-table-set! images name img1) name))) -;; RA => returns the tabdat stored at hashkey passed in commondat-tabdats table (e.g. 0 gives summary) -;; -(define (dboard:common-get-tabdat commondat #!key (tab-num #f)) - (let* ((tnum (or tab-num - (dboard:commondat-curr-tab-num commondat) - 0)) ;; tab-num value is curr-tab-num value in passed commondat - (ht (dboard:commondat-tabdats commondat)) - (res (hash-table-ref/default ht tnum #f))) - (or res - (let ((new-tabdat (dboard:tabdat-make-data))) - (hash-table-set! ht tnum new-tabdat) - new-tabdat)))) - -;; RA => sets the tabdat passed to the hashkey at commondat:tabdats hash table -;; -(define (dboard:common-set-tabdat! commondat tabnum tabdat) - (hash-table-set! - (dboard:commondat-tabdats commondat) - tabnum - tabdat)) - ;; gets and calls updater list based on curr-tab-num ;; (define (dboard:common-run-curr-updaters commondat #!key (tab-num #f)) ;; (sync-db-to-tmp (dboard:common-get-tabdat commondat tab-num: tab-num)) ;; no longer applies ;; maybe need sleep here? - (if (dboard:common-get-tabdat commondat tab-num: tab-num) ;; only update if there is a tabdat (let* ((tnum (or tab-num (dboard:commondat-curr-tab-num commondat))) (updaters (hash-table-ref/default (dboard:commondat-updaters commondat) tnum '()))) @@ -401,12 +388,12 @@ (dboard:setup-tabdat dat) (dboard:setup-num-rows dat) dat)) (define (dboard:setup-tabdat tabdat) - (dboard:tabdat-dbdir-set! tabdat (db:dbfile-path)) ;; (conc (configf:lookup *configdat* "setup" "linktree") "/.db")) - (dboard:tabdat-dbfpath-set! tabdat (db:dbfile-path)) + (dboard:tabdat-dbdir-set! tabdat (common:make-tmpdir-name *toppath* "")) ;; (conc (configf:lookup *configdat* "setup" "linktree") "/.db")) + (dboard:tabdat-dbfpath-set! tabdat (common:make-tmpdir-name *toppath* "")) (dboard:tabdat-monitor-db-path-set! tabdat (conc (dboard:tabdat-dbdir tabdat) "/monitor.db")) ;; HACK ALERT: this is a hack, please fix. (dboard:tabdat-ro-set! tabdat (not (file-read-access? (dboard:tabdat-dbfpath tabdat)))) @@ -670,11 +657,11 @@ ;; (define (dboard:get-tests-for-run-duplicate tabdat run-id run testnamepatt key-vals) (let* ((start-time (current-seconds)) (access-mode (dboard:tabdat-access-mode tabdat)) (num-to-get (string->number (or (configf:lookup *configdat* "setup" "num-tests-to-get") - "200"))) + "1000"))) (states (hash-table-keys (dboard:tabdat-state-ignore-hash tabdat))) (statuses (hash-table-keys (dboard:tabdat-status-ignore-hash tabdat))) (do-not-use-db-file-timestamps #f) ;; (configf:lookup *configdat* "setup" "do-not-use-db-file-timestamps")) ;; this still hosts runs-summary-tab (do-not-use-query-timestamps #t) ;; (configf:lookup *configdat* "setup" "do-not-use-query-timestamps")) ;; this no longer troubles runs-summary-tab (sort-info (get-curr-sort)) @@ -697,12 +684,12 @@ (last-db-time (if do-not-use-db-file-timestamps 0 (dboard:rundat-last-db-time run-dat))) (db-path (or (dboard:rundat-db-path run-dat) (let* ((db-dir (get-environment-variable "MT_RUN_AREA_HOME")) ;; (common:get-db-tmp-area)) - (db-pth (conc db-dir "/.mtdb/main.db"))) - (dboard:rundat-db-path-set! run-dat db-pth) + (db-pth (conc db-dir "/.mtdb/*.db"))) + (dboard:rundat-db-path-set! run-dat db-pth) ;; this is just a cache of the path db-pth))) (db-mod-time (common:lazy-sqlite-db-modification-time db-path)) (db-modified (>= db-mod-time last-db-time)) (multi-get (> (dboard:rundat-run-data-offset run-dat) 0)) ;; multi-get in progress (tmptests (if (or do-not-use-db-file-timestamps @@ -748,16 +735,16 @@ (for-each (lambda (tdat) (let ((test-id (db:test-get-id tdat)) (state (db:test-get-state tdat))) - (dboard:rundat-data-changed-set! run-dat #t) - (if (equal? state "DELETED") - (hash-table-delete! tests-ht test-id) - (hash-table-set! tests-ht test-id tdat)))) - tmptests) - + (dboard:rundat-data-changed-set! run-dat #t) + (if (equal? state "DELETED") + (hash-table-delete! tests-ht test-id) + (hash-table-set! tests-ht test-id tdat)))) + tmptests) + tests-ht)) ;; tmptests - new tests data ;; prev-tests - old tests data ;; @@ -852,10 +839,16 @@ (loop run tal new-res newmaxtests) ;; not done getting data for this run (loop (car tal)(cdr tal) new-res newmaxtests))))))) (dboard:tabdat-filters-changed-set! tabdat #f) (dboard:update-tree tabdat runs-hash header tb))) + +(define *dashboard-last-run-id-update* (make-hash-table)) ;; id => seconds + +(define (dboard:clear-run-id-update-hash) + (hash-table-clear! *dashboard-last-run-id-update*)) + ;; this calls dboard:get-tests-for-run-duplicate for each run ;; ;; create a virtual table of all the tests ;; keypatts: ( (KEY1 "abc%def")(KEY2 "%") ) ;; @@ -888,63 +881,82 @@ (dboard:tabdat-item-test-names-set! tabdat '()) (hash-table-clear! (dboard:tabdat-allruns-by-id tabdat))) (let loop ((run (car runs)) (tal (cdr runs)) (res '()) - (maxtests 0)) + (maxtests 0) + (cont-run #f)) (let* ((run-id (db:get-value-by-header run header "id")) + (recently-done (< (- (current-seconds) + (hash-table-ref/default *dashboard-last-run-id-update* run-id 0)) 1)) (run-struct (hash-table-ref/default (dboard:tabdat-allruns-by-id tabdat) run-id #f)) ;; (last-update (if run-struct (dboard:rundat-last-update run-struct) 0)) (key-vals (rmt:get-key-vals run-id)) - (tests-ht (dboard:get-tests-for-run-duplicate tabdat run-id run testnamepatt key-vals)) + (tests-ht (let* ((tht (if (and recently-done run-struct) + (let ((rht (dboard:rundat-tests run-struct))) ;; (dboard:tabdat-allruns-by-id tabdat))) + (or rht + (dboard:get-tests-for-run-duplicate tabdat run-id run testnamepatt key-vals))) + (dboard:get-tests-for-run-duplicate tabdat run-id run testnamepatt key-vals)))) + (assert (hash-table? tht) "FATAL: But here tht should be a hash-table") + tht)) ;; GET RID OF dboard:get-tests-dat - it is superceded by dboard:get-tests-for-run-duplicate ;; dboard:get-tests-for-run-duplicate - returns a hash table ;; (dboard:get-tests-dat tabdat run-id last-update)) (all-test-ids (hash-table-keys tests-ht)) - (num-tests (length all-test-ids))) - ;; (print "run-struct: " run-struct) - ;; NOTE: bubble-up also sets the global (dboard:tabdat-item-test-names tabdat) - ;; (tests (bubble-up tmptests priority: bubble-type)) - ;; NOTE: 11/01/2013 This routine is *NOT* getting called excessively. - ;; (debug:print 0 *default-log-port* "Getting data for run " run-id " with key-vals=" key-vals) - ;; Not sure this is needed? - (let* ((newmaxtests (max num-tests maxtests)) - ;; (last-update (- (current-seconds) 10)) - (run-struct (or run-struct - (dboard:rundat-make-init - run: run - tests: tests-ht - key-vals: key-vals))) - (new-res (if (null? all-test-ids) - res - (delete-duplicates - (cons run-struct res) - (lambda (a b) - (eq? (db:get-value-by-header (dboard:rundat-run a) header "id") - (db:get-value-by-header (dboard:rundat-run b) header "id")))))) - (elapsed-time (- (current-seconds) start-time))) - (if (null? all-test-ids) + (num-tests (length all-test-ids)) + ;; (print "run-struct: " run-struct) + ;; NOTE: bubble-up also sets the global (dboard:tabdat-item-test-names tabdat) + ;; (tests (bubble-up tmptests priority: bubble-type)) + ;; NOTE: 11/01/2013 This routine is *NOT* getting called excessively. + ;; (debug:print 0 *default-log-port* "Getting data for run " run-id " with key-vals=" key-vals) + ;; Not sure this is needed? + (newmaxtests (max num-tests maxtests)) + ;; (last-update (- (current-seconds) 10)) + (run-struct (or run-struct + (dboard:rundat-make-init + run: run + tests: tests-ht + key-vals: key-vals))) + (new-res (if (null? all-test-ids) + res + (delete-duplicates + (cons run-struct res) + (lambda (a b) + (eq? (db:get-value-by-header (dboard:rundat-run a) header "id") + (db:get-value-by-header (dboard:rundat-run b) header "id")))))) + (elapsed-time (- (current-seconds) start-time))) + (if (null? all-test-ids) (hash-table-delete! (dboard:tabdat-allruns-by-id tabdat) run-id) (hash-table-set! (dboard:tabdat-allruns-by-id tabdat) run-id run-struct)) - (if (or (null? tal) - (> elapsed-time 2)) ;; stop loading data after 5 seconds, on the next call more data *should* be loaded since get-tests-for-run uses last update - (begin - (when (> elapsed-time 2) - (debug:print 0 *default-log-port* "NOTE: updates are taking a long time, " elapsed-time "s elapsed.") - (let* ((old-val (iup:attribute *tim* "TIME")) - (new-val (number->string (inexact->exact (floor (* 2 (string->number old-val))))))) - (if (< (string->number new-val) 5000) - (begin - (debug:print 0 *default-log-port* "NOTE: increasing poll interval from "old-val" to "new-val) - (iup:attribute-set! *tim* "TIME" new-val))))) - (dboard:tabdat-allruns-set! tabdat new-res) - maxtests) - (if (> (dboard:rundat-run-data-offset run-struct) 0) - (loop run tal new-res newmaxtests) ;; not done getting data for this run - (loop (car tal)(cdr tal) new-res newmaxtests))))))) - (dboard:tabdat-filters-changed-set! tabdat #f) - (dboard:update-tree tabdat runs-hash header tb))) + + (if (or (null? tal) + (> elapsed-time 2)) ;; stop loading data after 5 + ;; seconds, on the next call + ;; more data *should* be + ;; loaded since + ;; get-tests-for-run uses last + ;; update + (begin + (when (> elapsed-time 2) + (debug:print 2 *default-log-port* "NOTE: updates are taking a long time, " elapsed-time "s elapsed.") + (let* ((old-val (iup:attribute *tim* "TIME")) + (new-val (number->string (inexact->exact (floor (* 2 (string->number old-val))))))) + (if (< (string->number new-val) 5000) + (begin + (debug:print 2 *default-log-port* "NOTE: increasing poll interval from "old-val" to "new-val) + (iup:attribute-set! *tim* "TIME" new-val))))) + (dboard:tabdat-allruns-set! tabdat new-res) + maxtests) + (if (> (dboard:rundat-run-data-offset run-struct) 0) + (begin + (thread-sleep! 0.2) ;; let the gui re-draw + (loop run tal new-res newmaxtests #t)) ;; not done getting data for this run + (begin + (hash-table-set! *dashboard-last-run-id-update* run-id (current-seconds)) + (loop (car tal)(cdr tal) new-res newmaxtests #f))))))) + (dboard:tabdat-filters-changed-set! tabdat #f) + (dboard:update-tree tabdat runs-hash header tb))) (define *collapsed* (make-hash-table)) (define (toggle-hide lnum uidat) ; fulltestname) (let* ((btn (vector-ref (dboard:uidat-get-lftcol uidat) lnum)) @@ -1152,15 +1164,15 @@ (drop (dboard:tabdat-all-test-names tabdat) (dboard:tabdat-start-test-offset tabdat)) '()))) (append xl (make-list (- (dboard:tabdat-num-tests tabdat) (length xl)) "")))) (update-labels uidat (dboard:tabdat-all-test-names tabdat)) - (for-each + (for-each ;;run (lambda (rundat) - ;; if rundat is junk clobber it with a decent placeholder (if (or (not rundat) ;; handle padded runs (not (dboard:rundat-run rundat))) + ;; Need to put an empty column in to erase previous contents. (set! rundat (dboard:rundat-make-init key-vals: (map (lambda (x) "")(dboard:tabdat-keys tabdat))))) (let* ((run (dboard:rundat-run rundat)) (testsdat-by-name (dboard:rundat-tests-by-name rundat)) (key-val-dat (dboard:rundat-key-vals rundat)) @@ -1167,23 +1179,22 @@ (run-id (db:get-value-by-header run (dboard:tabdat-header tabdat) "id")) (key-vals (append key-val-dat (list (let ((x (db:get-value-by-header run (dboard:tabdat-header tabdat) "runname"))) (if (string? x) x ""))))) (run-key (string-intersperse key-vals "\n"))) - + ;; fill in the run header key values ;; - (let ((rown 0) + (let ((rown 0) (headercol (vector-ref tableheader coln))) (for-each (lambda (kval) (let* ((labl (vector-ref headercol rown))) (if (not (equal? kval (iup:attribute labl "TITLE"))) (iup:attribute-set! (vector-ref headercol rown) "TITLE" kval)) (set! rown (+ rown 1)))) key-vals)) - - ;; For this run now fill in the buttons for each test + ;; For this run now fill in the buttons for each test ;; (let ((rown 0) (columndat (vector-ref table coln))) (for-each (lambda (testname) @@ -1196,17 +1207,12 @@ ;; testsdat))) (if (not matching) (vector -1 -1 "" "" "" 0 "" "" 0 "" "" "" 0 "" "") ;; (car matching)))) matching))) - (testname (db:test-get-testname testdat)) - (itempath (db:test-get-item-path testdat)) - (testfullname (test:test-get-fullname testdat)) (teststatus (db:test-get-status testdat)) (teststate (db:test-get-state testdat)) - ;;(teststart (db:test-get-event_time test)) - ;;(runtime (db:test-get-run_duration test)) (buttontxt (cond ((member teststate '("COMPLETED" "ARCHIVED")) teststatus) ((and (equal? teststate "NOT_STARTED") (member teststatus '("ZERO_ITEMS" "BLOCKED" "PREQ_FAIL" "PREQ_DISCARDED" "TIMED_OUT" "KEEP_TRYING" "TEN_STRIKES"))) teststatus) @@ -1401,11 +1407,15 @@ tp))) (states (dboard:tabdat-states tabdat)) (statuses (dboard:tabdat-statuses tabdat)) (target (let ((targ-list (dboard:tabdat-target tabdat))) (if targ-list (string-intersperse targ-list "/") "no-target-selected"))) - (run-name (dboard:tabdat-run-name tabdat)) + (run-name (let ((run-input (dboard:tabdat-run-name tabdat)) + ) + (if (equal? run-input "") + "no-runname-specified" + run-input))) (states-str (if (or (not states) (null? states)) "" (conc " -state " (string-intersperse states ",")))) (statuses-str (if (or (not statuses) @@ -2403,20 +2413,21 @@ ) )) "runs-summary-click-callback")))) (runs-summary-updater (lambda () - (mutex-lock! update-mutex) + ;; (mutex-lock! update-mutex) (if (or (dashboard:database-changed? commondat tabdat context-key: 'runs-summary-updater) (dboard:tabdat-view-changed tabdat)) (debug:catch-and-dump (lambda () ;; check that run-matrix is initialized before calling the updater (if run-matrix (dashboard:runs-summary-updater commondat tabdat tb cell-lookup run-matrix))) "dashboard:runs-summary-updater") ) - (mutex-unlock! update-mutex))) + #;(mutex-unlock! update-mutex) + )) (runs-summary-control-panel (dashboard:runs-summary-control-panel tabdat)) ) (dboard:commondat-add-updater commondat runs-summary-updater tab-num: tab-num) (dboard:tabdat-runs-tree-set! tabdat tb) (iup:vbox @@ -2459,11 +2470,11 @@ (iup:vbox (iup:textbox #:size "120x15" #:fontsize "10" #:value "%" #:expand "NO" #:action (lambda (obj unk val) (debug:catch-and-dump - (lambda () + (lambda ()57 (mark-for-update tabdat) (update-search commondat tabdat "test-name" val)) "make-controls"))) (iup:hbox (iup:button "Quit" #:action (lambda (obj) @@ -2478,10 +2489,11 @@ (dboard:tabdat-allruns-by-id-set! tabdat (make-hash-table)) (dboard:tabdat-done-runs-set! tabdat '()) (dboard:tabdat-not-done-runs-set! tabdat '()) (dboard:tabdat-view-changed-set! tabdat #t) (dboard:commondat-please-update-set! commondat #t) + (dboard:clear-run-id-update-hash) (mark-for-update tabdat)) #:expand "NO" #:size "40x15") (iup:button "Collapse" #:action (lambda (obj) (debug:catch-and-dump (lambda () @@ -3111,11 +3123,11 @@ (debug:print 2 *default-log-port* "WARNING: error in accessing databases in get-youngest-run-db-mod-time: " ((condition-property-accessor 'exn 'message) exn) " db-dir="dbdir ", exn=" exn) (current-seconds)) ;; something went wrong - just print an error and return current-seconds (common:max (map (lambda (filen) (file-modification-time filen)) - (glob (conc dbdir "/*.db*")))))) + (cons (conc dbdir "/main.db") (glob (conc dbdir "/?.db"))))))) (define (dashboard:monitor-changed? commondat tabdat) (let* ((run-update-time (current-seconds)) (monitor-db-path (dboard:tabdat-monitor-db-path tabdat)) (monitor-modtime (if (and monitor-db-path (common:file-exists? monitor-db-path)) @@ -3136,11 +3148,11 @@ (hash-table-set! (dboard:tabdat-last-db-update tabdat) context newtime)) ;; (define (dashboard:database-changed? commondat tabdat #!key (context-key 'default)) (let* ((run-update-time (current-seconds)) - (dbdir *toppath*) + (dbdir (conc *toppath* "/.mtdb")) (modtime (dashboard:get-youngest-run-db-mod-time dbdir)) (recalc (dashboard:recalc modtime (dboard:commondat-please-update commondat) (dboard:get-last-db-update tabdat context-key)))) (if recalc @@ -3343,14 +3355,14 @@ (vch (dboard:tabdat-view-changed tabdat))) (if (and cnv dwg vch) (begin (vg:drawing-xoff-set! dwg (dboard:tabdat-xadj tabdat)) (vg:drawing-yoff-set! dwg (dboard:tabdat-yadj tabdat)) - (mutex-lock! mtx) + ;; (mutex-lock! mtx) (canvas-clear! cnv) (vg:draw dwg tabdat) - (mutex-unlock! mtx) + ;; (mutex-unlock! mtx) (dboard:tabdat-view-changed-set! tabdat #f))))) ;; doesn't work. ;; ;;(define (gotoescape tabdat escape) @@ -3630,17 +3642,17 @@ (graph-uly (- (calc-y 0) canvas-margin)) (sec-per-50pt (/ 50 timescale)) ) ;; (print "timeoffset: " timeoffset " timescale: " timescale " run-duration: " (seconds->hr-min-sec run-duration) " width: " width " sec-per-50pt: " sec-per-50pt) ;; (print "timescale: " timescale " timeoffset: " timeoffset " sizex: " sizex " originx: " originx) - (mutex-lock! mtx) + ;; (mutex-lock! mtx) (vg:add-comp-to-lib runslib run-full-name runcomp) ;; Have to keep moving the instantiated box as it is anchored at the lower left ;; this should have worked for x in next statement? (maptime run-start) ;; add 60 to make room for the graph (vg:instantiate drawing "runslib" run-full-name run-full-name 8 (- (calc-y curr-run-start-row) (+ 5 graph-height run-to-run-margin))) - (mutex-unlock! mtx) + ;; (mutex-unlock! mtx) ;; (set! run-start-row (+ max-row 2)) ;; (dboard:tabdat-start-row-set! tabdat (+ new-run-start-row 1)) ;; get tests in list sorted by event time ascending (let testsloop ((test-ids (car hierdat)) ;; loop on tests (NOTE: not items!) (tests-tal (cdr hierdat)) @@ -3741,13 +3753,13 @@ (outln (vg:make-rect-obj -5 lly ulx uly text: run-full-name line-color: (vg:rgb->number 255 0 255 a: 128)))) ; (vg:components-get-extents d1 c1))) ;; this is the box around the run - (mutex-lock! mtx) + ;; (mutex-lock! mtx) (vg:add-obj-to-comp runcomp outln) - (mutex-unlock! mtx) + ;; (mutex-unlock! mtx) ;; this is where we have enough info to place the graph (dboard:graph commondat tabdat tab-num -5 (+ uly 10) ulx (+ uly graph-height 3) run-start run-end timescale maptime run-full-name canvas-margin) (dboard:tabdat-max-row-set! tabdat (+ (dboard:tabdat-max-row tabdat)(quotient (+ graph-height 40 3) row-height))) ;; (vg:instance-move drawing run-full-name 0 (dboard:tabdat-max-row tabdat)) )) @@ -3827,11 +3839,12 @@ (let* ((mtdb-path (conc *toppath* "/.mtdb/main.db")) (target (args:get-arg "-target")) (commondat (dboard:commondat-make))) (if target (begin - (args:remove-arg-from-ht "-target") + (hash-table-delete! args:arg-hash "-target") ;; workaround for the following commented out function + ;; (args:remove-arg-from-ht "-target") This function is in mtargs/mtargs.scm, but it's in an egg that is not in the current build of chicken 4.10 (dboard:commondat-target-set! commondat target) ) ) (if (not (launch:setup)) @@ -3886,22 +3899,22 @@ ;; tab-num: 2) (iup:callback-set! *tim* "ACTION_CB" (lambda (time-obj) (let ((update-is-running #f)) - (mutex-lock! (dboard:commondat-update-mutex commondat)) - (set! update-is-running (dboard:commondat-updating commondat)) - (if (not update-is-running) - (dboard:commondat-updating-set! commondat #t)) - (mutex-unlock! (dboard:commondat-update-mutex commondat)) - (if (not update-is-running) ;; we know that the update was not running and we now have a lock on doing an update + ;; (mutex-lock! (dboard:commondat-update-mutex commondat)) + (set! update-is-running (dboard:commondat-updating commondat)) + (if (not update-is-running) + (dboard:commondat-updating-set! commondat #t)) + ;; (mutex-unlock! (dboard:commondat-update-mutex commondat)) + (if (not update-is-running) ;; we know that the update was not running and we now have a lock on doing an update (begin (dboard:common-run-curr-updaters commondat) ;; (dashboard:run-update commondat) - (mutex-lock! (dboard:commondat-update-mutex commondat)) + ;; (mutex-lock! (dboard:commondat-update-mutex commondat)) (dboard:commondat-updating-set! commondat #f) - (mutex-unlock! (dboard:commondat-update-mutex commondat))) - )) + ;; (mutex-unlock! (dboard:commondat-update-mutex commondat)) + ))) 1)))) ;; (debug:print 0 *default-log-port* "Starting updaters") (let ((th1 (make-thread (lambda () (thread-sleep! 1) (dboard:common-run-curr-updaters commondat 0) ;; force update of summary tab @@ -3922,11 +3935,11 @@ (define (sync-db-to-tmp tabdat) (let* ((db-file "./.mtdb/main.db")) (if (and (not (file-write-access? db-file)) ( > (current-seconds) (+ last-copy-time 5))) (begin - (db:multi-db-sync (db:setup #f) 'old2new) + (db:multi-db-sync (db:setup) 'old2new) (set! last-copy-time (current-seconds)) ) ) ) ) @@ -3943,11 +3956,14 @@ (exit 1)))) '("MT_RUN_AREA_HOME" "MT_MEGATEST" "MT_CMDINFO" "MT_TEST_RUN_DIR" "MT_LINKTREE" "MT_TESTSUITENAME")) ) ) -(setenv "MT_RUN_AREA_HOME" (get-environment-variable "PWD")) +;; This is NOT good +;; (setenv "MT_RUN_AREA_HOME" (get-environment-variable "PWD")) +;; This should be OK but it really should not be necessary +(setenv "MT_RUN_AREA_HOME" (current-directory)) (if (not (null? remargs)) (if remargs (begin (debug:print 0 *default-log-port* "Unrecognised arguments: " (string-intersperse remargs " ")) Index: db.scm ================================================================== --- db.scm +++ db.scm @@ -131,15 +131,15 @@ (debug:print-error 0 *default-log-port* " query " stmt " failed, params: " params ", error: " ((condition-property-accessor 'exn 'message) exn) ", exn=" exn) (print-call-chain (current-error-port)) default))) (apply sqlite3:first-result db stmt params))) -(define (db:setup do-sync) +(define (db:setup) (assert *toppath* "FATAL: db:setup called before launch:setup has been run.") - (let* ((tmpdir (common:get-db-tmp-area))) + (let* ((tmpdir (common:make-tmpdir-name *toppath* ""))) (if (not *dbstruct-dbs*) - (dbfile:setup do-sync *toppath* tmpdir) + (dbfile:setup (conc *toppath* "/.mtdb") tmpdir) *dbstruct-dbs*))) ;; moved from dbfile ;; ;; ADD run-id SUPPORT @@ -267,17 +267,10 @@ ", arguments: " ((condition-property-accessor 'exn 'arguments) exn) ", location: " ((condition-property-accessor 'exn 'location) exn) )) -;; NB// #f => return dbdir only -;; (was planned to be; zeroth db with name=main.db) -;; -;; If run-id is #f return to create and retrieve the path where the db will live. -;; -(define db:dbfile-path common:get-db-tmp-area) - (define (db:set-sync db) (let ((syncprag (configf:lookup *configdat* "setup" "sychronous"))) (sqlite3:execute db (conc "PRAGMA synchronous = " (or syncprag 0) ";")))) @@ -467,11 +460,11 @@ (get-mtime shm-file)))) ;; (define (db:all-db-sync dbstruct) ;; (let* ((dbdat (db:open-db dbstruct #f db:initialize-main-db)) ;; (data-synced 0) ;; count of changed records -;; (tmp-area (common:get-db-tmp-area)) +;; (tmp-area (common:make-tmpdir-name *toppath*)) ;; (dbfiles (glob (conc tmp-area"/.mtdb/*.db"))) ;; (sync-durations (make-hash-table)) ;; (no-sync-db (db:open-no-sync-db))) ;; (for-each ;; (lambda (file) ;; tmp db file @@ -528,23 +521,63 @@ ;; (if dbdat (dbfile:add-dbdat dbstruct #f dbdat)) ;; ) ;; #t) (define (db:kill-servers) - (let* ((servers (server:choose-server *toppath* 'all-valid))) ;; (server:get-list *toppath*)) - (for-each - (lambda (server) - (handle-exceptions - exn - (begin - (debug:print-info 0 *default-log-port* "Unable to get host and/or port from " server ", exn=" exn) - #f) - (match-let (((mod-time host port start-time server-id pid) server)) - (if (and host pid) - (tasks:kill-server host pid))))) - servers) - (delete-file* (common:get-sync-lock-filepath)))) + (let* ((tl (launch:setup)) ;; need this to initialize *toppath* + (servdir (conc *toppath* "/.servinfo")) + (servfiles (glob (conc servdir "/*:*.db"))) + (fmtstr "~10a~22a~10a~25a~25a~8a\n") + (dbfiles (append (glob (conc *toppath* "/.mtdb/main.db")) (glob (conc *toppath* "/.mtdb/?.db"))(glob (conc *toppath* "/.mtdb/??.db")))) + (ttdat (make-tt areapath: *toppath*)) + ) + (format #t fmtstr "DB" "host:port" "PID" "age" "last mod" "state") + (for-each + (lambda (dbfile) + (let* ( + (dbfname (conc (pathname-file dbfile) ".db")) + (sfiles (tt:find-server *toppath* dbfname)) + ) + (for-each + (lambda (sfile) + (let ( + (sinfos (tt:get-server-info-sorted ttdat dbfname)) + ) + (for-each + (lambda (sinfo) + (let* ( + (db (list-ref sinfo 5)) + (pid (list-ref sinfo 4)) + (host (list-ref sinfo 0)) + (port (list-ref sinfo 1)) + (server-id (list-ref sinfo 3)) + (age (seconds->hr-min-sec (- (current-seconds) (list-ref sinfo 2)))) + (last-mod (seconds->string (list-ref sinfo 2))) + (killed (system (conc "ssh " host " kill " pid " > /dev/null"))) + (dummy2 (sleep 1)) + (state (if (> (system (conc "ssh " host " ps " pid " > /dev/null")) 0) "dead" "alive")) + ) + (format #t fmtstr db (conc host ":" port) pid age last-mod state) + (system (conc "rm " sfile)) + ) + ) + sinfos + ) + ) + ) + sfiles + ) + ) + ) + dbfiles + ) + ;; remove this db, because otherwise metadata contains records for old servers, and this causes a problem with db:no-sync-get-lock-with-id. + (if (file-exists? (conc *toppath* "/.mtdb/no-sync.db")) + (delete-file (conc *toppath* "/.mtdb/no-sync.db")) + ) + ) +) ;; options: ;; ;; 'killservers - kills all servers ;; 'dejunk - removes junk records @@ -556,33 +589,34 @@ ;; run-ids: '(1 2 3 ...) or #f (for all) ;; (define (db:multi-db-sync dbstruct . options) (let* (;; (dbdat (db:open-db dbstruct #f dbfile:db-init-proc)) (data-synced 0) ;; count of changed records - (tmp-area (common:get-db-tmp-area)) + (tmp-area (common:make-tmpdir-name *toppath* "")) (old2new (member 'old2new options)) (dejunk (member 'dejunk options)) (killservers (member 'killservers options)) (src-area (if old2new *toppath* tmp-area)) - (dest-area (if old2new tmp-area *toppath*)) - (dbfiles (if old2new (glob (conc *toppath* "/.mtdb/*.db")) (glob (conc tmp-area "/.mtdt/*.db")))) + (dest-area (if old2new tmp-area (conc *toppath* "/.mtdb"))) + (dbfiles (if old2new (glob (conc *toppath* "/.mtdb/*.db")) + (glob (conc tmp-area "/*.db")))) (keys (db:get-keys dbstruct)) (sync-durations (make-hash-table))) ;; kill servers - (if killservers (db:kill-servers)) + ;; (if killservers (db:kill-servers)) (if (not dbfiles) (debug:print-error 0 *default-log-port* "no dbfiles found in " (conc *toppath* "/.mtdb")) (for-each (lambda (srcfile) (debug:print-info 3 *default-log-port* "file: " srcfile) (let* ((fname (conc (pathname-file srcfile) ".db")) (basename (pathname-file srcfile)) (run-id (if (string= basename "main") #f (string->number basename))) - (destfile (conc dest-area "/.mtdb/" fname)) - (dest-directory (conc dest-area "/.mtdb/")) + (destfile (conc dest-area "/" fname)) + (dest-directory dest-area) (time1 (file-modification-time srcfile)) (time2 (if (file-exists? destfile) (begin (debug:print-info 2 *default-log-port* "destfile " destfile " exists") (file-modification-time destfile)) @@ -602,33 +636,37 @@ #t) ((and changed *time-to-exit*) ;; last sync #t) (else #f)))) - (if (or dejunk do-cp) + + (if (or dejunk do-cp) (let* ((start-time (current-milliseconds)) - ;; subdb is misnamed - should be dbdat (I think...) - (subdb (dbfile:open-db dbstruct run-id dbfile:db-init-proc)) - ;; (or (dbfile:get-subdb dbstruct run-id) - ;; (dbfile:init-subdb dbstruct run-id dbfile:db-init-proc))) + (subdb (or (dbfile:get-subdb dbstruct run-id) (dbfile:init-subdb dbstruct run-id dbfile:db-init-proc))) + (dbdat (or (dbfile:get-dbdat dbstruct run-id) (dbfile:open-db dbstruct run-id dbfile:db-init-proc))) (mtdb (dbr:subdb-mtdbdat subdb)) ;; ;; BUG: -mrw- I think this next line is wrong. run-id should be the path to .mtdb/.db ;; (tmpdb (dbfile:open-db dbstruct run-id dbfile:db-init-proc))) + (if dejunk + (begin + (debug:print 0 *default-log-port* "Cleaning tmp DB") + (db:clean-up run-id tmpdb) + (debug:print 0 *default-log-port* "Cleaning nfs DB") + (db:clean-up run-id mtdb) + ) + ) (debug:print-info 2 *default-log-port* "delta syncing file: " srcfile ", time diff: " (- time1 time2) " seconds") (if old2new (begin - (if dejunk (db:clean-up run-id mtdb)) (db:sync-tables (db:sync-all-tables-list - dbstruct (db:get-keys dbstruct)) #f mtdb tmpdb)) (begin - (if dejunk (db:clean-up run-id tmpdb)) - (db:sync-tables (db:sync-all-tables-list dbstruct (db:get-keys dbstruct)) #f tmpdb mtdb))) + (db:sync-tables (db:sync-all-tables-list (db:get-keys dbstruct)) #f tmpdb mtdb))) (hash-table-set! sync-durations (conc srcfile ".db") (- (current-milliseconds) start-time))) (debug:print-info 2 *default-log-port* "skipping delta sync. " srcfile " is up to date")))) dbfiles)) data-synced)) @@ -640,11 +678,11 @@ (for-each (lambda (subdb) (let* ((mtdb (dbr:subdb-mtdb subdb)) (tmpdb (db:get-subdb dbstruct run-id)) (refndb (dbr:subdb-refndb subdb)) - (newres (db:sync-tables (db:sync-all-tables-list dbstruct (db:get-keys dbstruct)) last-update tmpdb refndb mtdb))) + (newres (db:sync-tables (db:sync-all-tables-list (db:get-keys dbstruct)) last-update tmpdb refndb mtdb))) ;; (stack-push! (dbr:subdb-dbstack subdb) tmpdb) ;; BUG: verify this is really needed (dbfile:add-dbdat dbstruct run-id tmpdb) (set! res (cons newres res)))) subdbs) @@ -1148,16 +1186,19 @@ ;; 2. Look at run records ;; a. If have tests that are not deleted, set state='unknown' ;; b. .... ;; (define (db:clean-up run-id dbdat) - (debug:print 2 *default-log-port* "db:clean-up") - - (if run-id - (db:clean-up-rundb dbdat) - (db:clean-up-maindb dbdat) + (begin + (debug:print 0 *default-log-port* "Cleaning run DB " run-id) + (db:clean-up-rundb dbdat run-id) + ) + (begin + (debug:print 0 *default-log-port* "Cleaning main DB ") + (db:clean-up-maindb dbdat) + ) ) ) ;; Clean out old junk and vacuum the database @@ -1169,38 +1210,42 @@ ;; b. If test dir gone, delete the test record ;; 2. Look at run records ;; a. If have tests that are not deleted, set state='unknown' ;; b. .... ;; -(define (db:clean-up-rundb dbdat) +(define (db:clean-up-rundb dbdat run-id) ;; (debug:print 0 *default-log-port* "WARNING: db clean up not fully ported to v1.60, cleanup action will be on megatest.db") (let* ((db (dbr:dbdat-dbh dbdat)) - (count-stmt (sqlite3:prepare db "SELECT (SELECT count(id) FROM tests);")) + (test-count-stmt (sqlite3:prepare db "SELECT (SELECT count(id) FROM tests);")) + (step-count-stmt (sqlite3:prepare db "SELECT (SELECT count(id) FROM test_steps);")) (statements (map (lambda (stmt) (sqlite3:prepare db stmt)) (list - ;; delete all tests that belong to runs that are 'deleted' - ;; (conc "DELETE FROM tests WHERE run_id NOT IN (" (string-intersperse (map conc valid-runs) ",") ");") - ;; delete all tests that are 'DELETED' "DELETE FROM tests WHERE state='DELETED';" + "DELETE FROM test_steps WHERE status = 'DELETED';" + "DELETE FROM tests WHERE run_id IN (SELECT id FROM runs WHERE state = 'deleted');" )))) - ;; (db:delay-if-busy dbdat) (sqlite3:with-transaction db (lambda () (sqlite3:for-each-row (lambda (tot) - (debug:print-info 0 *default-log-port* "Records count before clean: " tot)) - count-stmt) + (debug:print-info 0 *default-log-port* "Test records count before clean: " tot)) + test-count-stmt) + (sqlite3:for-each-row (lambda (tot) + (debug:print-info 0 *default-log-port* "Test_step records count before clean: " tot)) + step-count-stmt) (map sqlite3:execute statements) (sqlite3:for-each-row (lambda (tot) - (debug:print-info 0 *default-log-port* "Records count after clean: " tot)) - count-stmt))) + (debug:print-info 0 *default-log-port* "Test records count after clean: " tot)) + test-count-stmt) + (sqlite3:for-each-row (lambda (tot) + (debug:print-info 0 *default-log-port* "Test_step records count after clean: " tot)) + step-count-stmt))) (map sqlite3:finalize! statements) - (sqlite3:finalize! count-stmt) - ;; (db:find-and-mark-incomplete db) - ;; (db:delay-if-busy dbdat) + (sqlite3:finalize! test-count-stmt) + (sqlite3:finalize! step-count-stmt) (sqlite3:execute db "VACUUM;"))) ;; Clean out old junk and vacuum the database ;; ;; Ultimately do something like this: @@ -1234,15 +1279,15 @@ ;; (db:delay-if-busy dbdat) (sqlite3:with-transaction db (lambda () (sqlite3:for-each-row (lambda (tot) - (debug:print-info 0 *default-log-port* "Records count before clean: " tot)) + (debug:print-info 0 *default-log-port* "Run records count before clean: " tot)) count-stmt) (map sqlite3:execute statements) (sqlite3:for-each-row (lambda (tot) - (debug:print-info 0 *default-log-port* "Records count after clean: " tot)) + (debug:print-info 0 *default-log-port* "Run records count after clean: " tot)) count-stmt))) (map sqlite3:finalize! statements) (sqlite3:finalize! count-stmt) ;; (db:find-and-mark-incomplete db) ;; (db:delay-if-busy dbdat) @@ -1253,11 +1298,11 @@ ;; no-sync.db - small bits of data to be shared between servers ;;====================================================================== (define (db:get-dbsync-path) (case (rmt:transport-mode) - ((http)(common:get-db-tmp-area)) + ((http)(common:make-tmpdir-name *toppath* "")) ((tcp) (conc *toppath*"/.mtdb")) ((nfs) (conc *toppath*"/.mtdb")) (else "/tmp/dunno-this-gonna-exist"))) ;; This is needed for api.scm @@ -1418,62 +1463,85 @@ #f (simple-run-id (car runs))))) ;; called with run-id=#f so will operate on main.db ;; -(define (db:insert-run dbstruct target runname run-meta) +(define (db:insert-run dbstruct run-id target runname run-meta) (let* ((keys (db:get-keys dbstruct)) (runs (db:simple-get-runs dbstruct runname #f #f target #f))) ;; runpatt count offset target last-update ;; need to insert run based on target and runname (let* ((targvals (string-split target "/")) (keystr (string-intersperse keys ",")) (key?str (string-intersperse (make-list (length targvals) "?") ",")) - (qrystr (conc "INSERT INTO runs (runname,"keystr") VALUES (?,"key?str")")) + (qrystr (conc "INSERT INTO runs (id,runname,"keystr") VALUES (?,?,"key?str")")) (get-var (lambda (db qrystr) (let* ((res #f)) (sqlite3:for-each-row (lambda row (set res (car row))) - db qrystr runname) + db qrystr run-id runname) res)))) (if (null? runs) - (db:create-initial-run-record dbstruct runname target)) - (let* ((run-id (db:get-run-id dbstruct runname target))) - (db:with-db + (begin + (db:create-initial-run-record dbstruct run-id runname target) + ) + ) + (let* () + ;;(debug:print 0 *default-log-port* "db:insert-run: Calling db:with-db to update the run record") + (debug:print 0 *default-log-port* "db:insert-run: runid = " run-id) +#; (db:with-db dbstruct #f #t (lambda (dbdat db) + (debug:print 0 *default-log-port* "In the lambda proc for " dbdat " " db) (for-each (lambda (keyval) + (debug:print 0 *default-log-port* "In the lambda proc for " keyval) (let* ((fieldname (car keyval)) (getqry (conc "SELECT "fieldname" FROM runs WHERE id=?;")) (setqry (conc "UPDATE runs SET "fieldname"=? WHERE id=?;")) (val (cdr keyval)) (valnum (if (number? val) val (if (string? val) (string->number val) #f)))) + (debug:print 0 *default-log-port* "fieldname " fieldname " val " val " valnum " valnum) (if (not (member fieldname (cons "runname" keys))) ;; don't attempt to tweak these (let* ((curr-val (get-var db getqry)) (have-it (or (equal? curr-val val) (equal? curr-val valnum)))) + (debug:print 0 *default-log-port* "have-it = " have-it) (if (not have-it) - (sqlite3:execute db setqry (or valnum val) run-id)))))) + (begin + (debug:print 0 *default-log-port* "Do sqlite3:execute") + ;; (sqlite3:execute db setqry (or valnum val) run-id) + ) + ) + ) + ) + (debug:print 0 *default-log-port* "Done with update") + ) + (debug:print 0 *default-log-port* "next keyval") + ) run-meta))) run-id)))) -(define (db:create-initial-run-record dbstruct runname target) +(define (db:create-initial-run-record dbstruct run-id runname target) (let* ((keys (db:get-keys dbstruct)) (targvals (string-split target "/")) (keystr (string-intersperse keys ",")) - (key?str (string-intersperse (make-list (length targvals) "?") ",")) - (qrystr (conc "INSERT INTO runs (runname,"keystr") VALUES (?,"key?str")"))) + (key?str (string-intersperse (make-list (length targvals) "?") ",")) ;; a string with the same length as targvals, where each element is "?" and interspersed with commas. + (qrystr (conc "INSERT INTO runs (id,runname,"keystr") VALUES (?,?,"key?str")"))) + (debug:print 0 *default-log-port* "db:create-initial-run-record") + (debug:print 0 *default-log-port* "qrystr = " qrystr) + (db:with-db - dbstruct #f #t + dbstruct #f #t ;; run-id writable (lambda (dbdat db) - (apply sqlite3:execute db qrystr runname targvals))))) + (debug:print 0 *default-log-port* "lambda proc: dbdat: " dbdat " db: " db) + (apply sqlite3:execute db qrystr run-id runname targvals))))) (define (db:insert-test dbstruct run-id test-rec) (let* ((testname (alist-ref "testname" test-rec equal?)) (item-path (alist-ref "item_path" test-rec equal?)) (id (db:get-test-id dbstruct run-id testname item-path)) @@ -1483,11 +1551,11 @@ (conc (car dat)"=?")) fieldvals) ",")" WHERE id=?;")) (insqry (conc "INSERT INTO tests ("(string-intersperse (map (lambda (x) (car x)) fieldvals) ",") ") VALUES ("(string-intersperse (make-list (length fieldvals) "?") ",")");"))) - (debug:print 0 *default-log-port* "id: "id"\nset: "setqry"\ninsqry: "insqry) + ;; (debug:print 0 *default-log-port* "id: "id"\nset: "setqry"\ninsqry: "insqry) (db:with-db dbstruct run-id #t (lambda (dbdat db) (if id @@ -1579,18 +1647,18 @@ ;; TODO: Switch this to use max(update_time) from each run db? Then if using a server there is no disk traffic (using cachedb db) ??? ;; ;; NOTE: This DOESN'T (necessarily) get the real run ids, but the number of the .db!! (define (db:get-changed-run-ids since-time) - (let* ((dbdir (db:dbfile-path)) ;; (configf:lookup *configdat* "setup" "dbdir")) - (alldbs (glob (conc dbdir "/.mtdb/[0-9]*.db*"))) + (let* ((dbdir (common:make-tmpdir-name *toppath* "")) ;; (configf:lookup *configdat* "setup" "dbdir")) + (alldbs (glob (conc *toppath* "/.mtdb/[0-9]*.db*"))) (changed (filter (lambda (dbfile) (> (file-modification-time dbfile) since-time)) alldbs))) (delete-duplicates (map (lambda (dbfile) - (let* ((res (string-match ".*\\/(\\d\\d)\\.db*" dbfile))) + (let* ((res (string-match ".*\\/(\\d+)\\.db*" dbfile))) (if res (string->number (cadr res)) (begin (debug:print 2 *default-log-port* "WARNING: Failed to process " dbfile " for run-id") 0)))) @@ -2233,21 +2301,24 @@ qry run-id (or last-update 0)))))) (define (db:get-testinfo-state-status dbstruct run-id test-id) - (let ((res #f)) - (db:with-db dbstruct run-id #f - (lambda (dbdat db) - (sqlite3:for-each-row - (lambda (run-id testname item-path state status) - ;; id,run_id,testname,state,status,event_time,host,cpuload,diskfree,uname,rundir,item_path,run_duration,final_logf,comment - (set! res (vector test-id run-id testname state status -1 "" -1 -1 "" "-" item-path -1 "-" "-"))) - db - "SELECT run_id,testname,item_path,state,status FROM tests WHERE id=? and run_id=?;" - test-id run-id))) - res)) + (db:with-db + dbstruct run-id #f + (lambda (dbdat db) + (let* ((res #f) + (stmth (db:get-cache-stmth dbdat db "SELECT run_id,testname,item_path,state,status FROM tests WHERE id=? and run_id=?;"))) + (sqlite3:for-each-row + (lambda (run-id testname item-path state status) + ;; id,run_id,testname,state,status,event_time,host,cpuload,diskfree,uname,rundir,item_path,run_duration,final_logf,comment + (set! res (vector test-id run-id testname state status -1 "" -1 -1 "" "-" item-path -1 "-" "-"))) + ;; db + ;; "SELECT run_id,testname,item_path,state,status FROM tests WHERE id=? and run_id=?;" + stmth + test-id run-id) + res)))) ;; get a useful subset of the tests data (used in dashboard ;; use db:mintest-get-{id ,run_id,testname ...} ;; (define (db:get-tests-for-run-mindata dbstruct run-id testpatt states statuses not-in) @@ -2278,25 +2349,37 @@ dbstruct run-id #t (lambda (dbdat db) (sqlite3:execute db "UPDATE tests SET state='DELETED',status='n/a',comment='' WHERE id=?;" test-id)))) ;; -(define (db:delete-old-deleted-test-records dbstruct) - (let ((targtime (- (current-seconds) - (or (configf:lookup-number *configdat* "setup" "keep-deleted-records") - (* 30 24 60 60))))) ;; one month in the past +(define (db:delete-old-deleted-test-records dbstruct run-id) + (let* ((targtime (- (current-seconds) + (or (configf:lookup-number *configdat* "setup" "keep-deleted-records") + (* 7 24 60 60)))) ;; cleanup if over one week old + (mtdbfile (dbmod:run-id->full-dbfname dbstruct run-id)) + (qry1 "DELETE FROM test_steps WHERE test_id IN (SELECT id FROM tests WHERE state='DELETED' AND event_timealist res))))) +;; testmeta doesn't change, we can cache it for up too an hour + +(define *db:testmeta-cache* (make-hash-table)) +(define *db:testmeta-last-update* 0) + ;; read the record given a testname (define (db:testmeta-get-record dbstruct testname) - (let ((res #f)) - (db:with-db - dbstruct - #f - #f - (lambda (dbdat db) - (sqlite3:for-each-row - (lambda (id testname author owner description reviewed iterated avg_runtime avg_disk tags jobgroup) - (set! res (vector id testname author owner description reviewed iterated avg_runtime avg_disk tags jobgroup))) - db - "SELECT id,testname,author,owner,description,reviewed,iterated,avg_runtime,avg_disk,tags,jobgroup FROM test_meta WHERE testname=?;" - testname) - res)))) + (if (and (< (- (current-seconds) *db:testmeta-last-update*) 600) + (hash-table-exists? *db:testmeta-cache* testname)) + (hash-table-ref *db:testmeta-cache* testname) + (let ((res #f)) + (db:with-db + dbstruct + #f + #f + (lambda (dbdat db) + (sqlite3:for-each-row + (lambda (id testname author owner description reviewed iterated avg_runtime avg_disk tags jobgroup) + (set! res (vector id testname author owner description reviewed iterated avg_runtime avg_disk tags jobgroup))) + db + "SELECT id,testname,author,owner,description,reviewed,iterated,avg_runtime,avg_disk,tags,jobgroup FROM test_meta WHERE testname=?;" + testname))) + (hash-table-set! *db:testmeta-cache* testname res) + (set! *db:testmeta-last-update* (current-seconds)) + res))) ;; create a new record for a given testname (define (db:testmeta-add-record dbstruct testname) (db:with-db dbstruct #f #t (lambda (dbdat db) @@ -4014,11 +4105,11 @@ (delete-duplicates result))))) ;;====================================================================== ;; To sync individual run ;;====================================================================== -(define (db:get-run-record-ids dbstruct target run keynames test-patt) +(define (db:get-run-record-ids dbstruct target run keynames) (let* ((backcons (lambda (lst item)(cons item lst))) (all_tests '()) (keystr (string-intersperse (map (lambda (key val) (conc key " like '" val "'")) @@ -4025,39 +4116,19 @@ keynames (string-split target "/")) " AND ") ) (run-qry (conc "SELECT id FROM runs WHERE " keystr " and runname='" run"'")) - (test-qry (conc "SELECT id FROM tests WHERE run_id in (" run-qry ") and testname like '" test-patt "'")) + ; (test-qry (conc "SELECT id FROM tests WHERE run_id in (" run-qry ") and testname like '" test-patt "'")) (run_ids (db:with-db dbstruct #f #f (lambda (dbdat db) (sqlite3:fold-row backcons '() db run-qry)) ) ) - ) - (for-each - (lambda (run_id) - (set! all_tests - (append - (map (lambda (x) (cons x run_id)) - (db:with-db dbstruct run_id #f - (lambda (dbdat db) - (sqlite3:fold-row backcons '() db (conc "SELECT id FROM tests WHERE run_id in (" run_id ") and testname like '" test-patt "'")) - ) - ) - ) all_tests - ) - ) - ) - run_ids - ) - `((runs . ,run_ids) - (tests . ,all_tests) - ) - - ) + ) + run_ids) ) ;;====================================================================== ;; Just for sync, procedures to make sync easy ;;====================================================================== @@ -4069,11 +4140,11 @@ ;; The function takes two arguments: dbstruct, which represents the database structure, and since-time, which is a timestamp indicating the time of the last update. ;; The function first defines a few helper functions, including backcons, which takes a list and an item and adds the item to the front of the list. ;; It then initializes several variables to empty lists: all_tests, all_test_steps, all_test_data, all_run_ids, and all_test_ids. ;; The function then retrieves a list of IDs for runs that have been changed since since-time using the db:get-changed-run-ids function. -;; It then filters the full list of run IDs to only include those that match the changed run IDs based on their modulo 100. +;; It then filters the full list of run IDs to only include those that match the changed run IDs based on their modulo (num-run-dbs). ;; For each changed run ID, the function retrieves a list of test IDs, test step IDs, and test data IDs that have been updated since since-time. ;; It appends these IDs to the appropriate lists (all_tests, all_test_steps, and all_test_data) using the append and map functions. ;; The function then retrieves a list of run stat IDs that have been updated since since-time. ;; Finally, the function returns a list of associations between record types and their corresponding IDs: runs, tests, test_steps, test_data, and run_stats. ;; @@ -4086,11 +4157,11 @@ (db:with-db dbstruct #f #f (lambda (dbdat db) (sqlite3:fold-row backcons '() db "SELECT id FROM runs")) ) ) - (changed_run_ids (filter (lambda (run) (member (modulo run 100) changed_run_dbs)) all_run_ids)) + (changed_run_ids (filter (lambda (run) (member (modulo run (num-run-dbs)) changed_run_dbs)) all_run_ids)) (run_ids (db:with-db dbstruct #f #f (lambda (dbdat db) (sqlite3:fold-row backcons '() db "SELECT id FROM runs WHERE last_update>=?" since-time)) ) @@ -4119,10 +4190,38 @@ (tests . ,all_tests) ) ) ) + + +(define (db:get-changed-record-test-ids dbstruct since-time run-id) + (let* ((backcons (lambda (lst item)(cons item lst))) + (all-tests (db:with-db dbstruct run-id #f + (lambda (dbdat db) + (sqlite3:fold-row backcons '() db "SELECT id FROM tests WHERE run_id=? and last_update>=?" run-id since-time))))) + + all-tests)) + +(define (db:get-changed-record-run-ids dbstruct since-time) + ;; no transaction, allow the db to be accessed between the big queries + (let* ((backcons (lambda (lst item)(cons item lst))) + (run_ids (db:with-db dbstruct #f #f + (lambda (dbdat db) + (sqlite3:fold-row backcons '() db "SELECT id FROM runs WHERE last_update>=?" since-time))))) + (debug:print 2 *default-log-port* "run_ids = " run_ids) + run_ids) +) + +(define (db:get-all-runids dbstruct) + (let* ((backcons (lambda (lst item)(cons item lst))) + (all_run_ids (db:with-db dbstruct #f #f + (lambda (dbdat db) + (sqlite3:fold-row backcons '() db "SELECT id FROM runs"))))) + +all_run_ids)) + ;;====================================================================== ;; Extract ods file from the db ;;====================================================================== ;; NOT REWRITTEN YET!!!!! @@ -4307,11 +4406,11 @@ )))) ;; sync for filesystem local db writes ;; (define (db:run-lock-and-sync no-sync-db) - (let* ((tmp-area (common:get-db-tmp-area)) + (let* ((tmp-area (common:make-tmpdir-name *toppath* "")) (dbfiles (glob (conc tmp-area"/.mtdb/*.db"))) (sync-durations (make-hash-table))) ;; (debug:print-info 0 *default-log-port* "lock-and-sync, dbfiles: "dbfiles) (for-each (lambda (file) @@ -4363,11 +4462,11 @@ (sync-stale-seconds (configf:lookup-number *configdat* "server" "sync-stale-seconds" default: 300)) (debug-mode (debug:debug-mode 1)) (last-time (current-seconds)) ;; last time through the sync loop (no-sync-db (db:open-no-sync-db)) (sync-duration 0) ;; run time of the sync in milliseconds - (tmp-area (common:get-db-tmp-area))) + (tmp-area (common:make-tmpdir-name *toppath* ""))) ;; Sync moved to http-transport keep-running loop (debug:print-info 2 *default-log-port* "Periodic copy-based sync thread started. syncer is copy-sync, tmp-area is " tmp-area) (debug:print-info 3 *default-log-port* "watchdog starting. syncer is copy-sync pid="(current-process-id));; " this-wd-num="this-wd-num) (if (and legacy-sync (not *time-to-exit*)) @@ -4471,11 +4570,11 @@ (for-each (lambda (subdb) (let* (;;(dbstruct (db:setup)) (mtdb (dbr:subdb-mtdb subdb)) (mtpath (db:dbdat-get-path mtdb)) - (tmp-area (common:get-db-tmp-area)) + (tmp-area (common:make-tmpdir-name *toppath* "")) (res (db:sync-to-megatest.db dbstruct no-sync-db: no-sync-db))) ;; did we sync any data? If so need to set the db touched flag to keep the server alive (set! sync-duration (- (current-milliseconds) sync-start)) (if (> res 0) ;; some records were transferred, keep the db alive (begin (mutex-lock! *heartbeat-mutex*) @@ -4518,11 +4617,10 @@ ;; ;; time to exit, close the no-sync db here ;; (db:no-sync-close-db no-sync-db stmt-cache) (if (common:low-noise-print 30) (debug:print-info 0 *default-log-port* "Exiting watchdog timer, *time-to-exit* = " *time-to-exit*" pid="(current-process-id) )))) )) - (define (std-exit-procedure) ;;(common:telemetry-log-close) (on-exit (lambda () 0)) ;; why is this here? ;;(debug:print-info 13 *default-log-port* "std-exit-procedure called; *time-to-exit*="*time-to-exit*) Index: dbfile.scm ================================================================== --- dbfile.scm +++ dbfile.scm @@ -16,39 +16,86 @@ ;; You should have received a copy of the GNU General Public License ;; along with Megatest. If not, see . ;;====================================================================== -(use srfi-18) +(use srfi-18 posix hostinfo) (declare (unit dbfile)) (declare (uses debugprint)) (declare (uses commonmod)) (module dbfile * - - (import scheme - chicken +(import scheme) + +(cond-expand + (chicken-4 + + (import chicken data-structures extras matchable (prefix sqlite3 sqlite3:) - posix typed-records + posix posix-extras typed-records srfi-18 srfi-1 srfi-69 stack files ports + hostinfo + + commonmod + debugprint + ) + ) + (chicken-5 + (import (prefix sqlite3 sqlite3:) + ;; data-structures + ;; extras + ;; files + ;; posix + ;; posix-extras + chicken.base + chicken.condition + chicken.file + chicken.file.posix + chicken.format + chicken.io + chicken.pathname + chicken.port + chicken.process + chicken.process-context + chicken.process-context.posix + chicken.sort + chicken.string + chicken.time + chicken.time.posix + matchable + md5 + message-digest + pathname-expand + regex + regex-case + srfi-1 + srfi-18 + srfi-69 + typed-records + stack + system-information + commonmod debugprint ) - + (define file-write-access? file-writable?) + (define file-move move-file) + )) + ;; parameters ;; (define dbfile:testsuite-name (make-parameter #f)) (define keep-age-param (make-parameter 10)) ;; qif file age, if over move to attic @@ -121,10 +168,50 @@ (define-record simple-run target id runname state status owner event_time) (define-record-printer (simple-run x out) (fprintf out "#,(simple-run ~S ~S ~S ~S)" (simple-run-target x) (simple-run-id x) (simple-run-runname x) (time->string (seconds->local-time (simple-run-event_time x) )))) +;; args is hash table of string to value +;; +(define (get-purpose args) + (let* ((get-arg (lambda (key) + (hash-table-ref/default args key #f))) + (get-switch (lambda keys + (fold + (lambda (key res) + (if (hash-table-ref/default args key #f) + (or key res) + res)) + #f + keys))) + (action (get-switch "-server" "-execute" "-run" "-rerun"))) + (cond + (action + (substring action 1 (string-length action))) + (else + "nopurpose")))) + +;; megatest process tracking + +(defstruct procinf + (start (current-seconds)) + (end -1) + (host (get-host-name)) ;; why is this not being recognised? + (pid (current-process-id)) + (port -1) + (cwd (current-directory)) + (load #f) + (purpose #f) ;; get-purpose needed to be run in megatest.scm + (dbname #f) + (mtbin (car (argv))) + (mtversion #f) + (status "running") + + + ) + +(define *procinf* (make-procinf)) (define *dbstruct-dbs* #f) (define *db-open-mutex* (make-mutex)) (define *db-access-mutex* (make-mutex)) ;; used in common.scm (define *no-sync-db* #f) (define *db-sync-in-progress* #f) @@ -201,11 +288,12 @@ #f ) ) (define (dbfile:make-tmpdir-name areapath tmpadj) - (let* ((dname (conc "/tmp/"(current-user-name)"/" (string-translate areapath "/" ".") tmpadj))) + (let* ((area (pathname-file areapath)) + (dname (conc "/tmp/"(current-user-name)"/megatest_localdb/" area "/" (string-translate areapath "/" ".") tmpadj "/.mtdb"))) (unless (directory-exists? dname) (create-directory dname #t)) dname)) (define (dbfile:run-id->path apath run-id) @@ -226,18 +314,18 @@ (define (dbfile:run-id->dbfname run-id) (conc (dbfile:run-id->dbnum run-id)".db")) ;; the path in MTRAH with the filename (define (dbfile:run-id->dbname run-id) - (conc ".mtdb/"(dbfile:run-id->dbfname run-id))) + (conc (dbfile:run-id->dbfname run-id))) ;; Make the dbstruct, setup up auxillary db's and call for main db at least once ;; ;; called in http-transport and replicated in rmt.scm for *local* access. ;; -(define (dbfile:setup do-sync areapath tmppath) +(define (dbfile:setup areapath tmppath) (cond (*dbstruct-dbs* (dbfile:print-err "WARNING: dbfile:setup called when *dbstruct-dbs* is already initialized") *dbstruct-dbs*) ;; TODO: when multiple areas are supported, this optimization will be a hazard (else @@ -317,11 +405,12 @@ (if dbdat dbdat (let* ((tmppath (dbr:dbstruct-tmppath dbstruct)) (tmpdbpath (dbfile:run-id->path tmppath run-id)) (dbdat (dbfile:open-sqlite3-db tmpdbpath init-proc sync-mode: 0 journal-mode: "WAL"))) - ;; the following line short-circuits the "one db handle per thread" model + + ;; the following line short-circuits the "one db handle per thread" model ;; ;; (dbfile:add-dbdat dbstruct run-id dbdat) ;; dbdat)))))) @@ -362,21 +451,22 @@ (with-output-to-port (current-error-port) (lambda () (apply print params)))) -(define (dbfile:cautious-open-database fname init-proc sync-mode journal-mode #!optional (tries-left 500)) +(define (dbfile:cautious-open-database fname init-proc sync-mode journal-mode + #!key (tries-left 500)(force-init #f)) (let* ((busy-file (conc fname "-journal")) (delay-time (* (- 51 tries-left) 1.1)) (write-access (file-write-access? fname)) (dir-access (file-write-access? (pathname-directory fname))) (retry (lambda () (thread-sleep! delay-time) (if (> tries-left 0) (dbfile:cautious-open-database fname init-proc sync-mode journal-mode - (- tries-left 1)))))) + tries-left: (- tries-left 1)))))) (assert (>= tries-left 0) (conc "FATAL: too many attempts in dbfile:cautious-open-database of "fname", giving up.")) (if (and (file-write-access? fname) (file-exists? busy-file)) (begin @@ -386,11 +476,11 @@ (thread-sleep! 1) (if (eq? tries-left 2) (begin (dbfile:print-err "INFO: forcing journal rollup "busy-file) (dbfile:brute-force-salvage-db fname))) - (dbfile:cautious-open-database fname init-proc sync-mode journal-mode (- tries-left 1))) + (dbfile:cautious-open-database fname init-proc sync-mode journal-mode tries-left: (- tries-left 1))) (let* ((result (condition-case (if dir-access (dbfile:with-simple-file-lock (conc fname ".lock") @@ -400,21 +490,23 @@ (sqlite3:set-busy-handler! db (sqlite3:make-busy-timeout 30000)) (if sync-mode (sqlite3:execute db (conc "PRAGMA synchronous = "sync-mode";"))) (if journal-mode (sqlite3:execute db (conc "PRAGMA journal_mode = "journal-mode";"))) - (if (and init-proc (not db-exists)) + (if (and init-proc (or force-init + (not db-exists))) (init-proc db)) - db))) + db)) + expire-time: 30) (begin (if (file-exists? fname ) (let ((db (sqlite3:open-database fname))) ;; pragmas synchronous not needed because this db is used read-only ;; (sqlite3:execute db (conc "PRAGMA synchronous = "mode";") (sqlite3:set-busy-handler! db (sqlite3:make-busy-timeout 30000)) ;; read-only but still need timeout db ) - (print "file doesn't exist: " fname)))) + (print "cautious-open-database: file doesn't exist: " fname)))) (exn (io-error) (dbfile:print-err exn "ERROR: i/o error with " fname ". Check permissions, disk space etc. and try again.") (retry)) (exn (corrupt) (dbfile:print-err exn "ERROR: database " fname " is corrupt. Repair it to proceed.") @@ -444,11 +536,11 @@ ;; NOTE: this is already protected by mutex *no-sync-db-mutex* ;; (define (dbfile:raw-open-no-sync-db dbpath) (if (not (file-exists? dbpath)) (create-directory dbpath #t)) - (debug:print-info 0 *default-log-port* "Opening "dbpath"/no-sync.db") + (debug:print-info 2 *default-log-port* "(dbfile:raw-open-no-sync-db: Opening "dbpath"/no-sync.db") (let* ((dbname (conc dbpath "/no-sync.db")) (db-exists (file-exists? dbname)) (init-proc (lambda (db) (sqlite3:with-transaction db @@ -465,22 +557,124 @@ val TEXT, CONSTRAINT no_sync_metadat_constraint UNIQUE (var));" "CREATE TABLE IF NOT EXISTS no_sync_locks (key TEXT, val TEXT, - CONSTRAINT no_sync_metadat_constraint UNIQUE (key));")))))) + CONSTRAINT no_sync_metadat_constraint UNIQUE (key));" + "CREATE TABLE IF NOT EXISTS processes + (id INTEGER PRIMARY KEY, + host TEXT, + port INTEGER, + pid INTEGER, + starttime INTEGER, + endtime INTEGER, + status TEXT, + purpose TEXT, + dbname TEXT, + mtversion TEXT, + reason TEXT DEFAULT 'none', + CONSTRAINT no_sync_processes UNIQUE (host,pid));" + )))))) (on-tmp (equal? (car (string-split dbpath "/")) "tmp")) (db (if on-tmp - (dbfile:cautious-open-database dbname init-proc 0 "WAL") - (dbfile:cautious-open-database dbname init-proc 0 #f) + (dbfile:cautious-open-database dbname init-proc 1 "WAL" force-init: #t) ;; WAL MODE should use syncronous=1 + ;; (dbfile:cautious-open-database dbname init-proc 0 #f force-init: #t) + (dbfile:cautious-open-database dbname init-proc 0 "MEMORY" force-init: #t) ;; Journal mode = memory is fastest? ;; (sqlite3:open-database dbname) ))) - (if on-tmp ;; done in cautious-open-database - (begin - (sqlite3:execute db "PRAGMA synchronous = 0;") - (sqlite3:set-busy-handler! db (sqlite3:make-busy-timeout 136000)))) + ;; (if on-tmp ;; done in cautious-open-database + ;; (begin + ;; (sqlite3:execute db "PRAGMA synchronous = 0;") ;; why was this here when is is handled by cautious-open-database? + (sqlite3:set-busy-handler! db (sqlite3:make-busy-timeout 136000)) ;; )) db)) + +;; mtest processes registry calls + +(define (dbfile:insert-or-update-process nsdb dat) + (let* ((host (procinf-host dat)) + (pid (procinf-pid dat)) + (curr-info (dbfile:get-process-info nsdb host pid))) + (if curr-info ;; record exists, do update + (match curr-info + ((host port pid starttime endtime status purpose dbname mtversion) + (sqlite3:execute + nsdb + "UPDATE processes SET port=?,starttime=?,endtime=?,status=?, + purpose=?,dbname=?,mtversion=? + WHERE host=? AND pid=?;" + (or (procinf-port dat) port) + (or (procinf-start dat) starttime) + (or (procinf-end dat) endtime) + (or (procinf-status dat) status) + (or (procinf-purpose dat) purpose) + (or (procinf-dbname dat) dbname) + (or (procinf-mtversion dat) mtversion) + host pid)) + (else + #f ;; what to do? + )) + (dbfile:register-process + nsdb + (procinf-host dat) + (procinf-port dat) + (procinf-pid dat) + (procinf-start dat) + (procinf-end dat) + (procinf-status dat) + (procinf-purpose dat) + (procinf-dbname dat) + (procinf-mtversion dat))))) + + +(define (dbfile:register-process nsdb host port pid starttime endtime status purpose dbname mtversion) + (sqlite3:execute nsdb "INSERT INTO processes (host,port,pid,starttime,endtime,status,purpose,dbname,mtversion) VALUES (?,?,?,?,?,?,?,?,?);" + host port pid starttime endtime status purpose dbname mtversion)) + +(define (dbfile:set-process-status nsdb host pid newstatus) + (sqlite3:execute nsdb "UPDATE processes SET status=? WHERE host=? AND pid=?;" newstatus host pid)) + +;; as sorted should be stable. can use to choose "winner" +;; +(define (dbfile:get-process-options nsdb purpose dbname) + (sqlite3:fold-row + ;; host port pid starttime status mtversion + (lambda (res . row) + (cons row res)) + '() + nsdb + "SELECT host,port,pid,starttime,endtime,status,mtversion FROM processes WHERE purpose=? AND dbname LIKE ? AND status IN ('running','alive') ORDER BY starttime ASC,host,port;" + purpose dbname)) + +(define (dbfile:get-process-info nsdb host pid) + (let ((res (sqlite3:fold-row + ;; host port pid starttime status mtversion + (lambda (res . row) + (cons row res)) + '() + nsdb + "SELECT host,port,pid,starttime,endtime,status,purpose,dbname,mtversion FROM processes WHERE host=? AND pid=?;" + host pid))) + (if (null? res) + #f + (car res)))) + +(define (dbfile:row->procinf row) + (match row + ((host port pid starttime endtime status mtversion) + (make-procinf host: host port: port pid: pid starttime: starttime endtime: endtime status: status mtversion: mtversion)) + (else + (debug:print 0 *default-log-port* "ERROR: row "row" did not match host,port,pid,starttime,endtime,status,mtversion") + #f))) + +(define (dbfile:set-process-done nsdb host pid reason) + (sqlite3:execute nsdb "UPDATE processes SET status='done',endtime=?,reason=? WHERE host=? AND pid=?;" (current-seconds) reason host pid) + (dbfile:cleanup-old-entries nsdb)) + +(define (dbfile:cleanup-old-entries nsdb) + (sqlite3:execute nsdb "DELETE FROM process WHERE status='done' AND endtimetimestamp, identifier ((timestamp . ident) (cons (equal? ident identifier) timestamp)) - (else (cons #f 'malformed-lock))) ;; lock malformed + (else + (debug:print 2 *default-log-port* "db:no-sync-get-lock-with-id: malformed lock") + (cons #f 'malformed-lock) + ) + ) ;; lock malformed (let ((curr-sec (current-seconds)) (lock-value (if identifier (conc (current-seconds)"+"identifier) (current-seconds)))) (sqlite3:execute db "INSERT OR REPLACE INTO no_sync_metadat (var,val) VALUES(?,?);" keyname lock-value) @@ -1372,11 +1572,11 @@ #f (with-input-from-file fname (lambda () (equal? key-string (read-line))))) (begin - (dbfile:print-err "dbfile:simple-file-lock created " fname " but it was gone 3 seconds later") + (dbfile:print-err "dbfile:simple-file-lock created " fname " but it was gone 0.25 seconds later") #f ) ) ) ) @@ -1438,7 +1638,15 @@ ;; (db:hoh-set! stmt-cache db stmt newstmth) (hash-table-set! stmt-cache stmt newstmth) newstmth)))) (mutex-unlock! *get-cache-stmth-mutex*) result)) + +;; (define *mutex-stmth-call* (make-mutex)) +;; +;; (define (db:with-mutex-for-stmth proc) +;; (mutex-lock! *mutex-stmth-call*) +;; (let* ((res (proc))) +;; (mutex-unlock! *mutex-stmth-call*) +;; res)) ) Index: dbmod.scm ================================================================== --- dbmod.scm +++ dbmod.scm @@ -227,12 +227,12 @@ (lambda (last-update) (if *sync-in-progress* (debug:print 3 *default-log-port* "WARNING: overlapping calls to sync to disk") (let* ((syncer-logfile (conc areapath"/logs/"dbfname"-syncer.log")) (sync-cmd (if (eq? syncdir 'todisk) - (conc "(NBFAKE_LOG="syncer-logfile" nbfake megatest -db2db -from "tmpdb" -to "dbfullname" -period 5 -timeout 10 > /dev/null 2&>1)&") - (conc "(NBFAKE_LOG="syncer-logfile" nbfake megatest -db2db -from "dbfullname" -to "tmpdb" -period 5 -timeout 10 > /dev/null 2&>1)&"))) + (conc "NBFAKE_QUIET=yes NBFAKE_LOG="syncer-logfile" nbfake megatest -db2db -from "tmpdb" -to "dbfullname" -period 5 -timeout 10") + (conc "NBFAKE_QUIET=yes NBFAKE_LOG="syncer-logfile" nbfake megatest -db2db -from "dbfullname" -to "tmpdb" -period 5 -timeout 10"))) (synclock-file (conc dbfullname".lock")) (syncer-running-file (conc dbfullname"-sync-running")) (synclock-mod-time (if (file-exists? synclock-file) (handle-exceptions exn