Index: Makefile
==================================================================
--- Makefile
+++ Makefile
@@ -24,14 +24,15 @@
fossil timeline -n 350 -t ci -F "%h,%a,%b,%t,\"%c\"" > recent-commits.csv
SHELL=/bin/bash
PREFIX=$(PWD)
+# CSCOPTS=-lfa2 -specialize -inline-global
CSCOPTS=
INSTALL=install
SRCFILES = common.scm items.scm launch.scm ods.scm runconfig.scm \
- server.scm configf.scm db.scm keys.scm margs.scm \
+ server.scm configf.scm db.scm keys.scm \
process.scm runs.scm tasks.scm tests.scm genexample.scm \
tdb.scm mt.scm \
ezsteps.scm rmt.scm api.scm \
subrun.scm archive.scm env.scm \
diff-report.scm cgisetup/models/pgdb.scm
@@ -39,28 +40,27 @@
# module source files
MSRCFILES = dbfile.scm debugprint.scm mtargs.scm commonmod.scm dbmod.scm \
tcp-transportmod.scm rmtmod.scm portlogger.scm
transport-mode.scm : transport-mode.scm.template
- @if [[ -e transport-mode.scm ]];then \
- echo "WARNING: transport-mode.scm.template is newer than transport-mode.scm"; else \
- cp transport-mode.scm.template transport-mode.scm; fi
+ cp transport-mode.scm.template transport-mode.scm
dashboard-transport-mode.scm : dashboard-transport-mode.scm.template
- @if [[ -e dashboard-transport-mode.scm ]];then \
- echo "WARNING: dashboard-transport-mode.scm.template is newer than dashboard-transport-mode.scm"; else \
- cp dashboard-transport-mode.scm.template dashboard-transport-mode.scm; fi
+ cp dashboard-transport-mode.scm.template dashboard-transport-mode.scm
-megatest.scm : transport-mode.scm
-dashboard.scm : dashboard-transport-mode.scm
+mtest : transport-mode.scm
+dboard : dashboard-transport-mode.scm
# dbmod.import.o is just a hack here
mofiles/portlogger.o : mofiles/dbmod.o
mofiles/dbfile.o : \
- mofiles/debugprint.o mofiles/commonmod.o
+ mofiles/debugprint.o mofiles/commonmod.o
+mofiles/dbmod.o : mofiles/dbfile.o
+
+mofiles/commonmod.o : mofiles/debugprint.o
configf.o : commonmod.import.o
mofiles/dbfile.o : mofiles/debugprint.o
mofiles/rmtmod.o mofiles/dbmod.o : mofiles/dbfile.o mofiles/commonmod.o mofiles/debugprint.o
db.o : mofiles/dbmod.o mofiles/dbfile.o
mofiles/debugprint.o : mofiles/mtargs.o
@@ -340,10 +340,13 @@
$(PREFIX)/bin/mt-new-to-old.sh : utils/mt-new-to-old.sh
$(INSTALL) $< $@
chmod a+x $@
+$(PREFIX)/bin/convert-db.sh : utils/convert-db.sh
+ $(INSTALL) $< $@
+ chmod a+x $@
deploytarg/nbfake : utils/nbfake
$(INSTALL) $< $@
chmod a+x $@
@@ -386,10 +389,11 @@
install : $(PREFIX)/bin/.$(ARCHSTR) $(PREFIX)/bin/.$(ARCHSTR)/mtest $(PREFIX)/bin/megatest \
$(PREFIX)/bin/.$(ARCHSTR)/dboard $(PREFIX)/bin/dashboard $(HELPERS) $(PREFIX)/bin/nbfake \
$(PREFIX)/bin/nbfind $(PREFIX)/bin/mtrunner $(PREFIX)/bin/viewscreen $(PREFIX)/bin/mt_xterm \
$(PREFIX)/bin/mt-old-to-new.sh $(PREFIX)/bin/mt-new-to-old.sh \
+ $(PREFIX)/bin/convert-db.sh $(PREFIX)/bin/convert-db.sh \
$(PREFIX)/share/docs/megatest_manual.html $(PREFIX)/bin/remrun \
$(PREFIX)/share/docs/megatest_manual.html $(PREFIX)/bin/remrun $(PREFIX)/bin/mtutil \
$(PREFIX)/share/db/mt-pg.sql \
$(PREFIX)/share/js/jquery-3.1.0.slim.min.js \
$(PREFIX)/bin/.$(ARCHSTR)/lib/libpangox-1.0.so \
Index: TODO
==================================================================
--- TODO
+++ TODO
@@ -16,10 +16,51 @@
# along with Megatest. If not, see .
TODO
====
+23WW48
+. Add calls-per-minute to db access stats
+. Find out why start-server calls are taking 250ms and fix
+. Allow two or three servers to run for any given db
+. Update avg call count/sec every 30 sec in no-sync
+. get server uses no-sync process info to decide which server to suggest
+. Use process table to decide who will do sync back
+. Fix metadat being synced over and over
+
+23WW47
+. Finding server
+.. look at .servinfo for likely prime main
+.. ask the .servinfo prime main for real prime main
+.. save prime main (for how long, 10 seconds or 10 minutes?)
+
+. Starting prime main
+.. get servinfo files - START
+.. no files? create my servinfo file, goto START
+.. have files? am I the prime main according to servinfo files?
+.. no, I'm not the prime main, ping prime main
+.. ping is good, prime main exists, register self as server if on same host as prime main DONE
+.. no pirng response, remove the .servinfo file - goto START
+.. if I am prime main according to .servinfo files, register directly in no-sync
+
+. Starting non-main
+.. get servinfo files
+.. no files? launch server for main.db
+.. have files? pick out prime main
+.. register self as server with prime main
+
+23WW46 - v1.80 branch
+. Use file semaphore to kill tests, eliminate db load of the KILLREQ query
+. Merge this change to revolution branch
+23WW45 - the revolution branch
+. Add "fast" db start option (no handshaking over NFS)
+. Add server-ro to server types (just "server" is fine for read/write).
+. [DONE] Create pause-server and resume-server calls
+. Create rsync or cp sync to MTRAH function
+. Change rmt:send-receive to divert calls to read-only server when possible
+. [DONE] Change start server to call main.db server for 1..N.db servers, block until server is read for use.
+
23WW21
. Dashboard needs its own cache db in /tmp
23WW07
. Remove use of *dbstruct-dbs*
Index: api.scm
==================================================================
--- api.scm
+++ api.scm
@@ -35,11 +35,12 @@
(use srfi-69
srfi-18
posix
matchable
- s11n)
+ s11n
+ typed-records)
;; allow these queries through without starting a server
;;
(define api:read-only-queries
'(get-key-val-pairs
@@ -95,10 +96,13 @@
login
tasks-get-last
testmeta-get-record
have-incompletes?
get-changed-record-ids
+ get-all-runids
+ get-changed-record-test-ids
+ get-changed-record-run-ids
get-run-record-ids
get-not-completed-cnt))
(define api:write-queries
'(
@@ -149,82 +153,20 @@
tasks-set-state-given-param-key
))
(define *db-write-mutexes* (make-hash-table))
(define *server-signature* #f)
-;; ;; These are called by the server on recipt of /api calls
-;; ;; - keep it simple, only return the actual result of the call, i.e. no meta info here
-;; ;;
-;; ;; - returns #( flag result )
-;; ;;
-;; (define (api:execute-requests dbstruct dat)
-;; (if (> *api-process-request-count* 50)
-;; (begin
-;; (if (common:low-noise-print 30 "too many threads")
-;; (debug:print 0 *default-log-port* "WARNING: "*api-process-request-count*" threads, potential overload, adding 0.5 sec delay."))
-;; ;; (thread-sleep! 0.5) ;; take a nap - no, the napping is moved to the clients via tt:backoff-incr
-;; ))
-;; (cond
-;; ((not (vector? dat)) ;; it is an error to not receive a vector
-;; (vector #f (vector #f "remote must be called with a vector")))
-;; (else
-;; (let* ((cmd-in (vector-ref dat 0))
-;; (cmd (if (symbol? cmd-in)
-;; cmd-in
-;; (string->symbol cmd-in)))
-;; (params (vector-ref dat 1))
-;; (run-id (if (null? params)
-;; 0
-;; (car params)))
-;; (write-mutex (if (hash-table-exists? *db-write-mutexes* run-id)
-;; (hash-table-ref *db-write-mutexes* run-id)
-;; (let* ((newmutex (make-mutex)))
-;; (hash-table-set! *db-write-mutexes* run-id newmutex)
-;; newmutex)))
-;; (start-t (current-milliseconds))
-;; (readonly-mode (dbr:dbstruct-read-only dbstruct))
-;; (readonly-command (member cmd api:read-only-queries))
-;; (writecmd-in-readonly-mode (and readonly-mode (not readonly-command))))
-;; (if (not readonly-command)
-;; (mutex-lock! write-mutex))
-;; (let* ((tmppath (dbr:dbstruct-tmppath dbstruct))
-;; (clean-run-id (cond
-;; ((number? run-id) run-id)
-;; ((equal? run-id #f) "main")
-;; (else "other")))
-;; (crumbfile (dbfile:wait-for-qif tmppath clean-run-id (cons cmd params)))
-;; (res
-;; (if writecmd-in-readonly-mode
-;; (conc "attempt to run write command "cmd" on a read-only database")
-;; (api:dispatch-request dbstruct cmd run-id params))))
-;; (delete-file* crumbfile)
-;; (if (not readonly-command)
-;; (mutex-unlock! write-mutex))
-;;
-;; ;; save all stats
-;; (let ((delta-t (- (current-milliseconds)
-;; start-t))
-;; (modified-cmd (if (eq? cmd 'general-call)
-;; (string->symbol (conc "general-call-" (car params)))
-;; cmd)))
-;; (hash-table-set! *db-api-call-time* modified-cmd
-;; (cons delta-t (hash-table-ref/default *db-api-call-time* modified-cmd '()))))
-;; (if writecmd-in-readonly-mode
-;; (begin
-;; #;(common:telemetry-log (conc "api-out:"(->string cmd))
-;; payload: `((params . ,params)
-;; (ok-res . #t)))
-;; (vector #f res))
-;; (begin
-;; #;(common:telemetry-log (conc "api-out:"(->string cmd))
-;; payload: `((params . ,params)
-;; (ok-res . #f)))
-;; (vector #t res))))))))
(define *api-threads* '())
-(define (api:register-thread th-in)
- (set! *api-threads* (cons (cons th-in (current-seconds)) *api-threads*)))
+(define (api:register-thread th-in command)
+ (set! *api-threads* (cons (list th-in (current-seconds) command) *api-threads*)))
+
+(define (api:get-thread-command th-in)
+ (let ((thread-data (assoc th-in *api-threads*)))
+ (if thread-data
+ (third thread-data) ; Assuming the command is the third element in the list
+ #f))) ; Return #f if the thread is not found
(define (api:unregister-thread th-in)
(set! *api-threads* (filter (lambda (thdat)
(not (eq? th-in (car thdat))))
*api-threads*)))
@@ -234,11 +176,34 @@
(not (member (thread-state (car thdat)) '(terminated dead))))
*api-threads*)))
(define (api:get-count-threads-alive)
(length *api-threads*))
-
+
+(define (api:get-threads)
+ (map (lambda (thdat)
+ (let ((thread (first thdat))
+ (timestamp (second thdat))
+ (command (third thdat)))
+ (format "\nThread: ~a, age: ~a, Command: ~a" thread (- (current-seconds) timestamp) command)))
+ *api-threads*))
+
+
+(define *api:last-stats-print* 0)
+(define *api-print-db-stats-mutex* (make-mutex))
+(define (api:print-db-stats)
+ (debug:print-info 0 *default-log-port* "Started periodic db stats printer")
+ (let loop ()
+ (mutex-lock! *api-print-db-stats-mutex*)
+ (if (> (- (current-seconds) *api:last-stats-print*) 15)
+ (begin
+ (rmt:print-db-stats)
+ (set! *api:last-stats-print* (current-seconds))))
+ (mutex-unlock! *api-print-db-stats-mutex*)
+ (thread-sleep! 5)
+ (loop)))
+
;; indat is (cmd run-id params meta)
;;
;; WARNING: Do not print anything in the lambda of this function as it
;; reads/writes to current in/out port
@@ -246,86 +211,111 @@
(define (api:tcp-dispatch-request-make-handler dbstruct) ;; cmd run-id params)
(assert *toppath* "FATAL: api:tcp-dispatch-request-make-handler called but *toppath* not set.")
(if (not *server-signature*)
(set! *server-signature* (tt:mk-signature *toppath*)))
(lambda (indat)
- (api:register-thread (current-thread))
- (let* (;; (indat (deserialize))
- (newcount (+ *api-process-request-count* 1))
- (numthreads (api:get-count-threads-alive))
- (delay-wait (if (> newcount 10)
- (- newcount 10)
- 0))
- (normal-proc (lambda (cmd run-id params)
- (case cmd
- ((ping) *server-signature*)
- (else
- (api:dispatch-request dbstruct cmd run-id params))))))
- (set! *api-process-request-count* newcount)
- (set! *db-last-access* (current-seconds))
- (if (not (eq? newcount numthreads))
- (begin
- (api:remove-dead-or-terminated)
- (let ((threads-now (api:get-count-threads-alive)))
- (debug:print 0 *default-log-port* "WARNING: newcount="newcount", numthreads="numthreads", remaining="threads-now)
- (set! newcount threads-now))))
- (match indat
- ((cmd run-id params meta)
- (let* ((db-ok (let* ((dbfname (dbmod:run-id->dbfname run-id))
- (ok (equal? dbfname (dbr:dbstruct-dbfname dbstruct))))
- (case cmd
- ((ping) #t) ;; we are fine
- (else
- (if (not ok)(debug:print 0 *default-log-port* "ERROR: "cmd", run-id "run-id", not correct for dbfname "(dbr:dbstruct-dbfname dbstruct)))
- (assert ok "FATAL: database file and run-id not aligned.")))))
- (ttdat *server-info*)
- (server-state (tt-state ttdat))
- (status (cond
- ((> newcount 3) 'busy)
- ;; ((> newcount 5) 'loaded) ;; this gets transmitted to the client which calls tt:backoff-incr to slow stuff down.
- (else 'ok)))
- (errmsg (case status
- ((busy) (conc "Server overloaded, "newcount" threads in flight"))
- ((loaded) (conc "Server loaded, "newcount" threads in flight"))
- (else #f)))
- (result (case status
- ((busy)
- (if (eq? cmd 'ping)
- (normal-proc cmd run-id params)
- ;; newcount must be greater than 5 for busy
- (* 1 (- newcount 3)) ;; was 15
- )) ;; (- newcount 29)) ;; call back in as many seconds
- ((loaded)
-;; (if (eq? (rmt:transport-mode) 'tcp)
-;; (thread-sleep! 0.5))
- (normal-proc cmd run-id params))
- (else
- (normal-proc cmd run-id params))))
- (meta (case cmd
- ((ping) `((sstate . ,server-state)))
- (else `((wait . ,delay-wait)))))
- (payload (list status errmsg result meta)))
- (set! *api-process-request-count* (- *api-process-request-count* 1))
- ;; (serialize payload)
- (api:unregister-thread (current-thread))
- payload))
- (else
- (assert #f "FATAL: failed to deserialize indat "indat))))))
-
+ (api:register-thread (current-thread) (car indat))
+ (let* ((result
+ (let* ((numthreads (api:get-count-threads-alive))
+ (delay-wait (if (> numthreads 10)
+ (- numthreads 10)
+ 0))
+ (normal-proc (lambda (cmd run-id params)
+ (case cmd
+ ((ping) *server-signature*)
+ (else
+ (api:dispatch-request dbstruct cmd run-id params))))))
+ (set! *api-process-request-count* numthreads)
+ (set! *db-last-access* (current-seconds))
+;; (if (not (eq? numthreads numthreads))
+;; (begin
+;; (api:remove-dead-or-terminated)
+;; (let ((threads-now (api:get-count-threads-alive)))
+;; (debug:print 0 *default-log-port* "WARNING: numthreads="numthreads", numthreads="numthreads", remaining="threads-now)
+;; (set! numthreads threads-now))))
+ (match indat
+ ((cmd run-id params meta)
+ (let* ((start-t (current-milliseconds))
+ (db-ok (let* ((dbfname (dbmod:run-id->dbfname run-id))
+ (ok (equal? dbfname (dbr:dbstruct-dbfname dbstruct))))
+ (case cmd
+ ((ping) #t) ;; we are fine
+ (else
+ (assert ok "FATAL: database file and run-id not aligned.")))))
+ (ttdat *server-info*)
+ (server-state (tt-state ttdat))
+ (maxthreads 20) ;; make this a parameter?
+ (status (cond
+ ((> numthreads maxthreads)
+ (let* ((testsuite (common:get-testsuite-name))
+ (mtexe (common:find-local-megatest))
+ (proc (lambda ()
+ ;; we are overloaded, try to start another server
+ (debug:print 0 *default-log-port* "Too many threads running, starting another server")
+ (tt:server-process-run *toppath* testsuite mtexe run-id))))
+ (set! *server-start-requests* (cons proc *server-start-requests*)))
+ ;; 'busy
+ 'loaded ;; not ideal since the client will not backoff
+ )
+ (else 'ok)))
+ (errmsg (case status
+ ((busy) (conc "Server overloaded, "numthreads" threads in flight, current cmd: " cmd "\n current threads: " (api:get-threads)))
+ ((loaded) (conc "Server loaded, "numthreads" threads in flight"))
+ (else #f)))
+ (result (case status
+ ((busy)
+ (if (eq? cmd 'ping)
+ (normal-proc cmd run-id params)
+ ;; numthreads must be greater than 5 for busy
+ (* 0.1 (- numthreads maxthreads)) ;; was 15
+ )) ;; (- numthreads 29)) ;; call back in as many seconds
+ ((loaded)
+ ;; (if (eq? (rmt:transport-mode) 'tcp)
+ ;; (thread-sleep! 0.5))
+ (normal-proc cmd run-id params))
+ (else
+ (normal-proc cmd run-id params))))
+ (meta (case cmd
+ ((ping) `((sstate . ,server-state)(sload . ,numthreads)))
+ (else `((wait . ,delay-wait)))))
+ (payload (list status errmsg result meta)))
+ ;; (cmd run-id params meta)
+ (db:add-stats cmd run-id params (- (current-milliseconds) start-t))
+ payload))
+ (else
+ (assert #f "FATAL: failed to deserialize indat "indat))))))
+ ;; (set! *api-process-request-count* (- *api-process-request-count* 1))
+ ;; (serialize payload)
+
+ (api:unregister-thread (current-thread))
+ result)))
+
+(define *api-halt-writes* #f)
(define (api:dispatch-request dbstruct cmd run-id params)
(if (not *no-sync-db*)
(db:open-no-sync-db))
+ (let* ((start-time (current-milliseconds)))
+ (if (member cmd api:write-queries)
+ (let loop ()
+ (if *api-halt-writes*
+ (begin
+ (thread-sleep! 0.2)
+ (if (< (- (current-milliseconds) start-time)
+ 5000) ;; hope it don't take more than five seconds to sync
+ (loop-time)
+ #;(debug:print 0 *default-log-port* "ERROR: writes halted for more than 5 seconds, sync might be taking too long"))))))
+ (db:add-stats 'api-write-blocking-for-sync run-id params (- (current-milliseconds) start-time)))
(case cmd
;;===============================================
;; READ/WRITE QUERIES
;;===============================================
((get-keys-write) (db:get-keys dbstruct)) ;; force a dummy "write" query to force server; for debug in -repl
;; SERVERS
- ((start-server) (apply server:kind-run params))
+ ((start-server) (apply tt:server-process-run params))
((kill-server) (set! *server-run* #f))
;; TESTS
;;((test-set-state-status-by-id) (apply mt:test-set-state-status-by-id dbstruct params))
@@ -400,10 +390,16 @@
;; NO SYNC DB
((no-sync-set) (apply db:no-sync-set *no-sync-db* params))
((no-sync-get/default) (apply db:no-sync-get/default *no-sync-db* params))
((no-sync-del!) (apply db:no-sync-del! *no-sync-db* params))
((no-sync-get-lock) (apply db:no-sync-get-lock *no-sync-db* params))
+
+ ;; NO SYNC DB PROCESSES
+ ((register-process) (apply dbfile:register-process *no-sync-db* params))
+ ((set-process-done) (apply dbfile:set-process-done *no-sync-db* params))
+ ((set-process-status) (apply dbfile:set-process-status *no-sync-db* params))
+ ((get-process-options) (apply dbfile:get-process-options *no-sync-db* params))
;; ARCHIVES
;; ((archive-get-allocations)
((archive-register-disk) (apply db:archive-register-disk dbstruct params))
((archive-register-block-name)(apply db:archive-register-block-name dbstruct params))
@@ -491,53 +487,18 @@
(realparams (cddr params)))
(db:general-call dbstruct run-id stmtname realparams)))
((sdb-qry) (apply sdb:qry params))
((ping) (current-process-id))
((get-changed-record-ids) (apply db:get-changed-record-ids dbstruct params))
- ((get-run-record-ids) (apply db:get-run-record-ids dbstruct params))
+ ((get-changed-record-test-ids) (apply db:get-changed-record-test-ids dbstruct params))
+ ((get-changed-record-run-ids) (apply db:get-changed-record-run-ids dbstruct params))
+ ((get-run-record-ids) (apply db:get-run-record-ids dbstruct params))
+ ((get-all-runids) (apply db:get-all-runids dbstruct))
;; TESTMETA
((testmeta-get-record) (apply db:testmeta-get-record dbstruct params))
;; TASKS
((find-task-queue-records) (apply tasks:find-task-queue-records dbstruct params))
(else
(debug:print 0 *default-log-port* "ERROR: bad api call " cmd)
(conc "ERROR: BAD api call " cmd))))
-;; http-server send-response
-;; api:process-request
-;; db:*
-;;
-;; NB// Runs on the server as part of the server loop
-;;
-(define (api:process-request dbstruct $) ;; the $ is the request vars proc
- (debug:print 4 *default-log-port* "server-id:" *server-id*)
- (let* ((cmd ($ 'cmd))
- (paramsj ($ 'params))
- (key ($ 'key))
- (params (db:string->obj paramsj transport: 'http))) ;; incoming data from the POST (or is it a GET?)
- (debug:print 4 *default-log-port* "cmd:" cmd " with params " params "key " key)
- (if (equal? key *server-id*)
- (begin
- (set! *api-process-request-count* (+ *api-process-request-count* 1))
- (let* ((resdat (api:execute-requests dbstruct (vector cmd params))) ;; process the request, resdat = #( flag result )
- (success (vector-ref resdat 0))
- (res (vector-ref resdat 1))) ;; (vector flag payload), get the payload, ignore the flag (why?)
- (debug:print 4 *default-log-port* "res:" res)
- (if (not success)
- (debug:print 0 *default-log-port* "ERROR: success flag is #f for " cmd " with params " params))
- (if (> *api-process-request-count* *max-api-process-requests*)
- (set! *max-api-process-requests* *api-process-request-count*))
- (set! *api-process-request-count* (- *api-process-request-count* 1))
- ;; This can be here but needs controls to ensure it doesn't run more than every 4 seconds
- ;; (rmt:dat->json-str
- ;; (if (or (string? res)
- ;; (list? res)
- ;; (number? res)
- ;; (boolean? res))
- ;; res
- ;; (list "ERROR, not string, list, number or boolean" 1 cmd params res)))))
- (db:obj->string res transport: 'http)))
- (begin
- (debug:print 0 *default-log-port* "Server refused to process request. Server id mismatch. recived " key " expected: " *server-id* ".\nOther arguments recived: cmd=" cmd " params = " params)
- (db:obj->string (conc "Server refused to process request server-id mismatch: " key ", " *server-id*) transport: 'http)))))
-
Index: archive.scm
==================================================================
--- archive.scm
+++ archive.scm
@@ -359,11 +359,11 @@
(archive-dir (if archive-info (cdr archive-info) #f))
(archive-id (if archive-info (car archive-info) -1))
(home-host (server:choose-server *toppath* 'homehost))
(archive-time (seconds->std-time-str (current-seconds)))
(archive-staging-db (conc *toppath* "/.db-snapshot/archive_" archive-time))
- (tmp-db-path (conc (common:get-db-tmp-area) "/megatest.db"))
+ (tmp-db-path (conc (dbfile:make-tmpdir-name *toppath* "") "/megatest.db"))
(dbfile (conc archive-staging-db "/megatest.db")))
(create-directory archive-staging-db #t)
(let-values (((pid-val exit-status exit-code) (run-n-wait rsync-exe params: (list "-v" (conc (car home-host) ":"tmp-db-path) archive-staging-db) print-cmd: print-prefix)))
(if (eq? exit-code 0)
(case archiver
@@ -407,11 +407,11 @@
(bup-restore-params (list "-d" archive-path "restore" "-C" *toppath* archive-internal-path)))
(debug:print-info 2 *default-log-port* "Restoring archived data to " *toppath* " from archive in " archive-path " ... " archive-internal-path)
(run-n-wait bup-exe params: bup-restore-params print-cmd: "Running:"))
(sleep 2)
(db:multi-db-sync
- (db:setup #t) ;; (db:setup-db *dbstruct-dbs* *toppath* #f)
+ (db:setup) ;; (db:setup-db *dbstruct-dbs* *toppath* #f)
'killservers
;'dejunk
;'adj-testids
'old2new
)
Index: cgisetup/models/pgdb.scm
==================================================================
--- cgisetup/models/pgdb.scm
+++ cgisetup/models/pgdb.scm
@@ -18,10 +18,11 @@
;;======================================================================
(declare (unit pgdb))
(declare (uses configf))
+(declare (uses mtargs))
;; I don't know how to mix compilation units and modules, so no module here.
;;
;; (module pgdb
;; (
@@ -31,10 +32,11 @@
;; (import scheme)
;; (import data-structures)
;; (import chicken)
(use typed-records (prefix dbi dbi:))
+(import (prefix mtargs args:))
;; given a configdat lookup the connection info and open the db
;;
(define (pgdb:open configdat #!key (dbname #f)(dbispec #f))
(let ((pgconf (or dbispec
Index: common.scm
==================================================================
--- common.scm
+++ common.scm
@@ -21,10 +21,11 @@
(declare (unit common))
(declare (uses commonmod))
(declare (uses rmtmod))
(declare (uses debugprint))
(declare (uses mtargs))
+
(use srfi-1 data-structures posix regex-case (prefix base64 base64:)
format dot-locking csv-xml z3 udp ;; sql-de-lite
hostinfo md5 message-digest typed-records directory-utils stack
matchable regex posix (srfi 18) extras ;; tcp
@@ -37,33 +38,32 @@
(import commonmod
debugprint
rmtmod
(prefix mtargs args:))
-
+
+(define (remove-server-files directory-path)
+ (let ((files (glob (string-append directory-path "/server*"))))
+ (for-each delete-file* files)))
(include "common_records.scm")
-
-
-;; (require-library margs)
-;; (include "margs.scm")
-
-;; (define old-exit exit)
-;;
-;; (define (exit . code)
-;; (if (null? code)
-;; (old-exit)
-;; (old-exit code)))
+(define (remove-files filespec)
+ (let ((files (glob filespec)))
+ (for-each delete-file* files)))
(define (stop-the-train)
(thread-start! (make-thread (lambda ()
(let loop ()
(if (and *toppath*
(file-exists? (conc *toppath*"/stop-the-train")))
(let* ((msg (conc "ERROR: found file "*toppath*"/stop-the-train, exiting immediately")))
;; yes, print to current-output-port AND *default-log-port*, annoying but necessary I think
(print msg)
+ (remove-server-files (conc *toppath* "/logs"))
(debug:print 0 *default-log-port* msg)
+ (remove-files (conc *toppath* "/logs/server*"))
+ (remove-files (conc *toppath* "/.servinfo/*"))
+ (remove-files (conc *toppath* "/.mtdb/*lock"))
(exit 1)))
(thread-sleep! 5)
(loop))))))
;; execute thunk, return value. If exception thrown, trap exception, return #f, and emit nonfatal condition note to *default-log-port* .
@@ -157,14 +157,10 @@
(define *time-zero* (current-seconds)) ;; for the watchdog
(define *on-exit-procs* '()) ;; add procs to this list to be executed on exit
(define *default-area-tag* "local")
;; DATABASE
-;; (define *dbstruct-dbs* #f) ;; used to cache the dbstruct in db:setup. Goal is to remove this.
-;; db stats
-(define *db-stats* (make-hash-table)) ;; hash of vectors < count duration-total >
-(define *db-stats-mutex* (make-mutex))
;; db access
(define *db-last-access* (current-seconds)) ;; last db access, used in server
;; (define *db-write-access* #t)
;; db sync
;; (define *db-last-sync* 0) ;; last time the sync to megatest.db happened
@@ -185,11 +181,10 @@
;; (define *max-cache-size* 0)
(define *logged-in-clients* (make-hash-table))
(define *server-id* #f)
(define *server-info* #f) ;; good candidate for easily convert to non-global
(define *time-to-exit* #f)
-(define *server-run* #t)
(define *run-id* #f)
(define *server-kind-run* (make-hash-table))
(define *home-host* #f)
;; (define *total-non-write-delay* 0)
(define *heartbeat-mutex* (make-mutex))
@@ -251,11 +246,11 @@
(define *common:this-exe-fullpath* (common:get-this-exe-fullpath))
(define *common:this-exe-dir* (pathname-directory *common:this-exe-fullpath*))
(define *common:this-exe-name* (pathname-strip-directory *common:this-exe-fullpath*))
(define (common:get-sync-lock-filepath)
- (let* ((tmp-area (common:get-db-tmp-area))
+ (let* ((tmp-area (common:make-tmpdir-name *toppath* ""))
(lockfile (conc tmp-area "/megatest.db.lock")))
lockfile))
(define *common:logpro-exit-code->status-sym-alist*
'( ( 0 . pass )
@@ -405,21 +400,19 @@
(define (common:version-changed?)
(not (equal? (common:get-last-run-version)
(common:version-signature))))
-;; From 1.70 to 1.80, db's are compatible.
-
+;; From 1.70 to 1.81, db's are compatible.
+;;
+;; BUG: This logic is almost certainly not quite correct.
+;;
(define (common:api-changed?)
- (let* (
- (megatest-major-version (substring (->string megatest-version) 0 4))
- (run-major-version (substring (conc (common:get-last-run-version)) 0 4))
- )
- (and (not (equal? megatest-major-version "1.80"))
- (not (equal? megatest-major-version megatest-run-version)))
- )
-)
+ (let* ((megatest-major-version (substring (->string megatest-version) 0 4))
+ (run-major-version (substring (conc (common:get-last-run-version)) 0 4)))
+ (and (not (member megatest-major-version '("1.81" "1.80")))
+ (not (equal? megatest-major-version run-major-version)))))
;;======================================================================
;; Move me elsewhere ...
;; RADT => Why do we meed the version check here, this is called only if version misma
;;
@@ -433,12 +426,11 @@
'adj-target
'new2old
'(dejunk)
))
((tcp nfs)
- (debug:print 0 *default-log-port* "WARNING: cleanup-db NOT implemented yet for tcp and nfs.")
- #;(apply db:multi-db-sync
+ (apply db:multi-db-sync
dbstruct
'schema
'killservers
'adj-target
'new2old
@@ -622,11 +614,11 @@
(common:on-homehost?))
(if (common:api-changed?)
(let* ((mtconf (conc (get-environment-variable "MT_RUN_AREA_HOME") "/megatest.config"))
(dbfile (conc (get-environment-variable "MT_RUN_AREA_HOME") ".mtdb/main.db"))
(read-only (not (file-write-access? dbfile)))
- (dbstruct (db:setup #t))) ;; (db:setup-db *dbstruct-dbs* *toppath* #f))) ;; #t)))
+ (dbstruct (db:setup))) ;; (db:setup-db *dbstruct-dbs* *toppath* #f))) ;; #t)))
(debug:print 0 *default-log-port*
"WARNING: Version mismatch!\n"
" expected: " (common:version-signature) "\n"
" got: " (common:get-last-run-version))
(cond
@@ -937,21 +929,10 @@
(debug:print-error 0 *default-log-port* "Unable to find megatest home directory.")
#f)
(loop (pathname-directory thepath)))))
))
-
-(define (common:db-tmp-area-path)
- (conc "/tmp/"
- (current-user-name)
- "/megatest_localdb/"
- (common:get-testsuite-name)
- "/"
- (string-translate *toppath* "/" ".")
- )
-)
-
;;======================================================================
;; redefine for future cleanup (converge on area-name, the more generic
;;
(define common:get-area-name common:get-testsuite-name)
@@ -971,11 +952,11 @@
(list (conc "/tmp/" (current-user-name)
"/megatest_localdb/"
tsname "/"
(string-translate toppath "/" "."))
(conc "/tmp/" (current-process-id) ;; just in case we have an issue with the dir by own user name
- "/megatest_localdb/"
+ "/"(current-user-name) "/megatest_localdb/"
tsname
(string-translate toppath "/" "."))
))))
(set! *db-cache-path* dbpath)
;; ensure megatest area has .mtdb
@@ -1548,11 +1529,11 @@
;;
(define (common:lazy-modification-time fpath)
(handle-exceptions
exn
(begin
- (debug:print 0 *default-log-port* "Failed to get modifcation time for " fpath ", treating it as zero. exn=" exn)
+ (debug:print 2 *default-log-port* "Failed to get modification time for " fpath ", treating it as zero. exn=" exn)
0)
(if (file-exists? fpath)
(file-modification-time fpath)
0)))
@@ -1664,12 +1645,12 @@
;; (let loop ((x 0))
;; (print x "," (common:get-delay x 1))
;; (if (< x 2)
;; (loop (+ x 0.1)))))
-(define (get-cpu-load #!key (remote-host #f))
- (car (common:get-cpu-load remote-host)))
+;; (define (get-cpu-load #!key (remote-host #f))
+;; (car (common:get-cpu-load remote-host)))
;;======================================================================
;; (let* ((load-res (process:cmd-run->list "uptime"))
;; (load-rx (regexp "load average:\\s+(\\d+)"))
;; (cpu-load #f))
@@ -1681,18 +1662,18 @@
;; (set! cpu-load newval))))))
;; (car load-res))
;; cpu-load))
;;======================================================================
-;; get values from cached info from dropping file in logs dir
+;; get values from cached info from dropping file in .sysdata dir
;; e.g. key is host and dtype is normalized-load
;;
(define (common:get-cached-info key dtype #!key (age 10))
(if *toppath*
(let* ((fullpath (conc *toppath* "/.sysdata/" key "-" dtype ".log"))
(delfile (lambda (exn)
- (debug:print-info 1 *default-log-port* " removing bad file " fullpath ", exn=" exn)
+ (debug:print-info 2 *default-log-port* " removing bad file " fullpath ", exn=" exn)
(delete-file* fullpath)
#f)))
(if (and (file-exists? fullpath)
(file-read-access? fullpath))
(handle-exceptions
@@ -1739,20 +1720,10 @@
(debug:print 0 *default-log-port* "failed to write file " fullpath ", exn=" exn)
#f)
(with-output-to-file fullpath (lambda ()(pp dat)))))
#f))
-(define (common:raw-get-remote-host-load-orig remote-host)
- (handle-exceptions
- exn
- (begin
- (debug:print 0 *default-log-port* "failed to ssh to " remote-host " and get loadavg. exn=" exn)
- #f) ;; more specific handling of errors needed
- (with-input-from-pipe
- (conc "ssh " remote-host " cat /proc/loadavg")
- (lambda ()(list (read)(read)(read))))))
-
(define (common:raw-get-remote-host-load remote-host)
(let* ((inp #f))
(handle-exceptions
exn
(begin
@@ -1773,11 +1744,12 @@
(begin
(debug:print 0 *default-log-port* "failed to ssh or read loadavg from host " remote-host ", exn=" exn)
'(-99 -99 -99))
(let* ((actual-hostname (or remote-host (get-host-name) "localhost")))
(or (common:get-cached-info actual-hostname "cpu-load")
- (let ((result (if remote-host
+ (let ((result (if (and remote-host
+ (not (equal? remote-host (get-host-name))))
(map (lambda (res)
(if (eof-object? res) 9e99 res))
(common:raw-get-remote-host-load remote-host))
(with-input-from-file "/proc/loadavg"
(lambda ()(list (read)(read)(read)))))))
@@ -2036,11 +2008,12 @@
#f) ;; if zero return #f so caller knows that things are not working
(loop (if (string-match "^processor\\s+:\\s+\\d+$" inl)
(+ numcpu 1)
numcpu)
(read-line))))))
- (result (if remote-host
+ (result (if (and remote-host
+ (not (equal? remote-host (get-host-name))))
(common:generic-ssh
(conc "ssh " remote-host " cat /proc/cpuinfo")
proc -1)
(with-input-from-file "/proc/cpuinfo" proc))))
(if (and (number? result)
@@ -2303,11 +2276,11 @@
(define (common:check-db-dir-space)
(let* ((required (string->number
;; default is 1GB (or actually a billion bytes) This is the number of 1 kB blocks.
(or (configf:lookup *configdat* "setup" "dbdir-space-required")
"1000000")))
- (dbdir (common:get-db-tmp-area)) ;; (db:get-dbdir))
+ (dbdir (common:make-tmpdir-name *toppath* "")) ;; (db:get-dbdir))
(tdbspace (common:check-space-in-dir dbdir required))
(mdbspace (common:check-space-in-dir *toppath* required)))
(sort (list tdbspace mdbspace) (lambda (a b)
(< (cadr a)(cadr b))))))
Index: commonmod.scm
==================================================================
--- commonmod.scm
+++ commonmod.scm
@@ -64,10 +64,11 @@
chicken.condition
chicken.file
chicken.file.posix
chicken.io
chicken.pathname
+ chicken.port
chicken.process
chicken.process-context
chicken.process-context.posix
chicken.sort
chicken.string
@@ -83,10 +84,12 @@
srfi-1
srfi-18
srfi-69
typed-records
system-information
+
+ debugprint
)))
;;======================================================================
;; CONTENTS
;;
@@ -160,23 +163,34 @@
'()))) ;; should it return empty list or #f to indicate not set?
(define (get-section cfgdat section)
(hash-table-ref/default cfgdat section '()))
+
+(define (common:make-tmpdir-name areapath tmpadj)
+ (let* ((area (pathname-file areapath))
+ (dname (conc "/tmp/"(current-user-name)"/megatest_localdb/" area "/" (string-translate areapath "/" ".") tmpadj "/.mtdb")))
+ (unless (directory-exists? dname)
+ (create-directory dname #t))
+ dname))
;; dot-locking egg seems not to work, using this for now
;; if lock is older than expire-time then remove it and try again
;; to get the lock
;;
(define (common:simple-file-lock fname #!key (expire-time 300))
- (let ((fmod-time (handle-exceptions
- ext
- (current-seconds)
- (file-modification-time fname))))
- (if (file-exists? fname) ;; (common:file-exists? fname)
+ (let* ((lock-exists (file-exists? fname))
+ (fmod-time (if lock-exists
+ (current-seconds)
+ (handle-exceptions
+ ext
+ (current-seconds)
+ (file-modification-time fname)))))
+ (if lock-exists
(if (> (- (current-seconds) fmod-time) expire-time)
(begin
+ (debug:print-info 1 *default-log-port* "Removing stale lock "fname)
(handle-exceptions exn #f (delete-file* fname))
(common:simple-file-lock fname expire-time: expire-time))
#f)
(let ((key-string (conc (get-host-name) "-" (current-process-id))))
(with-output-to-file fname
@@ -283,13 +297,16 @@
(filter (lambda (x)
(not (string-match "^\\s*" x)))
val-list))
'())))
-(define (get-cpu-load)
- (let* ((load-info (with-input-from-file "/proc/loadavg" read-lines)))
- (map string->number (string-split load-info))))
+(define (commonmod:get-cpu-load)
+ (let* ((load-info (with-input-from-file "/proc/loadavg" read-lines))
+ (res (map string->number (string-split (car load-info)))))
+ (if (null? res)
+ #f ;; something is wrong
+ (car res))))
(define *current-host-cores* #f)
(define (get-current-host-cores)
(or *current-host-cores*
@@ -311,11 +328,11 @@
(string->number (read-line)))))
;; get the normalized (i.e. load / numcpus) for *this* host
;;
(define (get-normalized-cpu-load)
- (/ (get-cpu-load)(get-current-host-cores)))
+ (/ (commonmod:get-cpu-load)(get-current-host-cores)))
;;======================================================================
;; testsuite and area utilites
;;======================================================================
@@ -395,14 +412,13 @@
((d) 86400)
((w) 604800)
((M) 2628000) ;; aproximately one month
((y) 31536000)
(else
- 0)))))))
- ;; (print "ERROR: can't parse timestring "tstr", component "part)
- ;; can't (yet) use debugprint. rely on -show-config for user to find errors
- )))
+ 0)))))
+ (debug:print 0 *default-log-port* "ERROR: can't parse timestring "tstr", component "part", string: "(cadr match))))
+ (debug:print 0 *default-log-port* "ERROR: can't parse timestring "tstr", component "part))))
parts)
time-secs))
(define (seconds->hr-min-sec secs)
(let* ((hrs (quotient secs 3600))
Index: configf.scm
==================================================================
--- configf.scm
+++ configf.scm
@@ -31,10 +31,15 @@
(declare (uses mtargs))
(declare (uses mtargs.import))
(declare (uses common))
(declare (uses commonmod))
(declare (uses commonmod.import))
+(declare (uses dbfile))
+(declare (uses dbfile.import))
+(declare (uses dbmod))
+(declare (uses dbmod.import))
+
(import commonmod
(prefix mtargs args:)
debugprint)
(include "common_records.scm")
Index: dashboard-tests.scm
==================================================================
--- dashboard-tests.scm
+++ dashboard-tests.scm
@@ -23,10 +23,11 @@
;;======================================================================
(declare (unit dashboard-tests))
(declare (uses common))
(declare (uses commonmod))
+(declare (uses dcommon))
(declare (uses db))
(declare (uses gutils))
(declare (uses rmt))
(declare (uses ezsteps))
(declare (uses subrun))
@@ -94,11 +95,12 @@
"Current state: "
"Current status: "
"Test comment: "
"Test id: "
"Test date: "))
- (list (iup:label "" #:expand "VERTICAL"))))
+ (list (iup:label "" #:expand "VERTICAL"
+ ))))
(apply iup:vbox ; #:expand "YES"
(list
(store-label "testname"
(iup:label (db:test-get-testname testdat) #:expand "HORIZONTAL")
(lambda (testdat)(db:test-get-testname testdat)))
@@ -162,11 +164,12 @@
(list "Author: "
"Owner: "
"Reviewed: "
"Tags: "
"Description: "))
- (list (iup:label "" #:expand "VERTICAL"))))
+ (list (iup:label "" #:expand "VERTICAL"
+ ))))
(apply iup:vbox ; #:expand "YES"
(list
(store-meta "author"
(iup:label (db:testmeta-get-author testmeta) #:expand "HORIZONTAL")
(lambda (testmeta)(db:testmeta-get-author testmeta)))
@@ -178,11 +181,12 @@
(lambda (testmeta)(db:testmeta-get-reviewed testmeta)))
(store-meta "tags"
(iup:label (db:testmeta-get-tags testmeta) #:expand "HORIZONTAL")
(lambda (testmeta)(db:testmeta-get-tags testmeta)))
(store-meta "description"
- (iup:label (test-meta-panel-get-description testmeta) #:size "x50"); #:expand "HORIZONTAL")
+ (iup:label (test-meta-panel-get-description testmeta) ;; #:wordwrap "YES" ;; #:size "x50"
+ ) ;; #:expand "HORIZONTAL")
(lambda (testmeta)
(test-meta-panel-get-description testmeta)))
)))))
@@ -206,16 +210,21 @@
(list (iup:label "runname ")
(iup:label "run-id")
(iup:label "run-date"))))
(apply iup:vbox
(append (map (lambda (keyval)
- (iup:label (cadr keyval) #:expand "HORIZONTAL"))
+ (iup:vbox
+ (iup:label (cadr keyval) #:expand "HORIZONTAL")
+ ;; (iup:label "" #:expand "BOTH")
+ )
+ )
keydat)
(list (iup:label runname)
(iup:label (conc run-id))
(iup:label (seconds->year-work-week/day-time event_time))
- (iup:label "" #:expand "VERTICAL"))))))))
+ (iup:label "" ;;#:expand "VERTICAL"
+ ))))))))
;;======================================================================
;; Host info panel
;;======================================================================
(define (host-info-panel testdat store-label)
@@ -231,11 +240,12 @@
"CPU Load: "
"Run duration: "
"Logfile: "
"Top process id: "
"Uname -a: "))
- (iup:label "" #:expand "VERTICAL")))
+ (iup:label "" ;; #:expand "VERTICAL"
+ )))
(apply iup:vbox ; #:expand "YES"
(list
;; NOTE: Yes, the host can change!
(store-label "HostName"
(iup:label ;; (sdb:qry 'getstr
@@ -267,18 +277,20 @@
;;
(define (submegatest-panel dbstruct keydat testdat runname testconfig)
(let* ((test-run-dir (db:test-get-rundir testdat))
(subarea (subrun:get-runarea test-run-dir))
(area-exists (and subarea (common:file-exists? subarea silent: #t))))
- (if subarea
- (iup:frame
- #:title "Megatest Run Info" ; #:expand "YES"
+ (iup:frame
+ #:title "Megatest Run Info" ;; #:expand "HORIZONTAL"
+ (if subarea
(iup:button
"Launch Dashboard"
#:action (lambda (obj)
- (subrun:launch-dashboard test-run-dir))))
- (iup:vbox))))
+ (subrun:launch-dashboard test-run-dir)))
+ (iup:vbox
+ (iup:label "Not a subrun..." #:expand "HORIZONTAL")
+ )))))
;; use a global for setting the buttons colors
;; state status teststeps
(define *state-status* (vector #f #f #f))
(define (update-state-status-buttons testdat)
@@ -463,11 +475,11 @@
;;======================================================================
;;
;;======================================================================
(define (dashboard-tests:examine-test run-id test-id) ;; run-id run-key origtest)
- (let* ((db-path (db:dbfile-path)) ;; (conc (configf:lookup *configdat* "setup" "linktree") "/db/" run-id ".db"))
+ (let* ((db-path (common:make-tmpdir-name *toppath* "")) ;; (conc (configf:lookup *configdat* "setup" "linktree") "/db/" run-id ".db"))
(dbstruct #f) ;; NOT USED
(testdat (rmt:get-test-info-by-id run-id test-id)) ;; (db:get-test-info-by-id dbstruct run-id test-id))
(db-mod-time 0) ;; (file-modification-time db-path))
(last-update 0) ;; (current-seconds))
(request-update #t))
@@ -670,10 +682,19 @@
(conc "megatest -target " keystring " -runname " runname
" -run -testpatt " (conc testname "/" (if (equal? item-path "")
"%"
item-path))
" -clean-cache"
+ ))))
+ (rerun-clean (lambda (x)
+ (iup:attribute-set!
+ command-text-box "VALUE"
+ (conc "megatest -target " keystring " -runname " runname
+ " -rerun-clean -testpatt " (conc testname "/" (if (equal? item-path "")
+ "%"
+ item-path))
+ " -clean-cache"
))))
(remove-test (lambda (x)
(iup:attribute-set!
command-text-box "VALUE"
(conc "megatest -remove-runs -target " keystring " -runname " runname
@@ -718,31 +739,45 @@
(else
;; (test-set-status! db run-id test-name state status itemdat)
(set! self ;
(iup:dialog #:close_cb (lambda (a)(exit)) ; #:expand "YES"
#:title testfullname
- (iup:vbox ; #:expand "YES"
- ;; The run and test info
- (iup:hbox ; #:expand "YES"
- (run-info-panel dbstruct keydat testdat runname)
- (test-info-panel testdat store-label widgets)
- (test-meta-panel testmeta store-meta))
+ (iup:vbox
(iup:hbox
- (host-info-panel testdat store-label)
- (submegatest-panel dbstruct keydat testdat runname testconfig))
+ (iup:vbox ; #:expand "YES"
+ ;; The run and test info
+ (iup:hbox ; #:expand "YES"
+ (run-info-panel dbstruct keydat testdat runname)
+ (test-info-panel testdat store-label widgets))
+ (host-info-panel testdat store-label))
+ (iup:vbox
+ (test-meta-panel testmeta store-meta)
+ (submegatest-panel dbstruct keydat testdat runname testconfig)))
;; The controls
- (iup:frame #:title "Actions"
+ (iup:hbox ;; frame #:title "Actions"
(iup:vbox
- (iup:hbox
- (iup:button "View Log" #:action viewlog #:size "80x")
- (iup:button "Start Xterm" #:action xterm #:size "80x")
- (iup:button "Run Test" #:action run-test #:size "80x")
- (iup:button "Clean Test" #:action remove-test #:size "80x")
- (iup:button "CleanRunExecute!" #:action clean-run-execute #:size "80x")
- (iup:button "Kill All Jobs" #:action kill-jobs #:size "80x")
- (iup:button "Archive Test" #:action archive-test #:size "80x")
- (iup:button "Close" #:action (lambda (x)(exit)) #:size "80x"))
+ (iup:hbox
+ (iup:frame
+ #:title "Immediate"
+ (iup:hbox
+ (iup:button "Start Xterm" #:action xterm #:size "80x")
+ (iup:button "CleanRunExecute!" #:action clean-run-execute #:size "80x")
+ (iup:button "View Log" #:action viewlog #:size "80x")))
+ (iup:frame
+ #:title "Command line"
+ (iup:hbox
+ (iup:button "Run Test" #:action run-test #:size "80x")
+ (iup:button "Rerun-clean" #:action rerun-clean #:size "80x")
+ (iup:button "Clean Test" #:action remove-test #:size "80x")
+ (iup:button "Kill All Jobs" #:action kill-jobs #:size "80x")))
+ (iup:label "" #:expand "HORIZONTAL")
+ (iup:frame
+ #:title "Other"
+ (iup:hbox
+ ;; (iup:button "Archive Test" #:action archive-test #:size "80x")
+ (iup:button "Close" #:action (lambda (x)(exit)) #:size "80x")
+ )))
(apply
iup:hbox
(list command-text-box command-launch-button))))
(set-fields-panel dbstruct run-id test-id testdat)
(let ((tabs
Index: dashboard-transport-mode.scm.template
==================================================================
--- dashboard-transport-mode.scm.template
+++ dashboard-transport-mode.scm.template
@@ -13,10 +13,10 @@
;; (dbfile:sync-method 'none)
;; (dbfile:cache-method 'none)
;; (rmt:transport-mode 'nfs)
;; uncomment this block to test with tcp and cachedb
-(dbfile:sync-method 'attach) ;; original was causing crash on start.
+(dbfile:sync-method 'none) ;; original was causing crash on start.
(dbfile:cache-method 'none)
(rmt:transport-mode 'nfs)
Index: dashboard.scm
==================================================================
--- dashboard.scm
+++ dashboard.scm
@@ -36,12 +36,14 @@
(declare (uses dashboard-context-menu))
(declare (uses vg))
(declare (uses subrun))
(declare (uses mt))
(declare (uses dbmod))
-(declare (uses rmtmod))
(declare (uses dbfile))
+(declare (uses dbfile.import))
+(declare (uses rmtmod))
+(declare (uses rmtmod.import))
(declare (uses commonmod))
(declare (uses commonmod.import))
(use format)
@@ -74,10 +76,12 @@
;; executables such as dashboard and mtutil
;;
(include "dashboard-transport-mode.scm")
(dbfile:db-init-proc db:initialize-main-db)
(set! rmtmod:send-receive rmt:send-receive)
+
+(debug:print-info 0 *default-log-port* "transport-mode="(rmt:transport-mode))
(define help (conc
"Megatest Dashboard, documentation at http://www.kiatoa.com/fossils/megatest version " megatest-version
" license GPL, Copyright (C) Matt Welland 2012-2017
@@ -118,10 +122,11 @@
0))
(if (args:get-arg "-mode")
(let* ((mode (string->symbol (args:get-arg "-mode"))))
(rmt:transport-mode mode)))
+;; (rmt:transport-mode 'tcp))
(if (args:get-arg "-test") ;; need to use tcp for test control panel
(rmt:transport-mode 'tcp))
;; RA => Might require revert for filters
@@ -138,33 +143,37 @@
;; (hash-table-set! args:arg-hash "-use-db-cache" #t)));;;)
;;)
;; data common to all tabs goes here
;;
-(defstruct dboard:commondat
- ((curr-tab-num 0) : number)
- please-update
- tabdats
- update-mutex
- updaters
- updating
- uidat ;; needs to move to tabdat at some time
- hide-not-hide-tabs
- target
- )
-
-(define (dboard:commondat-make)
- (make-dboard:commondat
- curr-tab-num: 0
- tabdats: (make-hash-table)
- please-update: #t
- update-mutex: (make-mutex)
- updaters: (make-hash-table)
- updating: #f
- hide-not-hide-tabs: #f
- target: ""
- ))
+;; Moved to dcommon.scm
+;;
+;; (defstruct dboard:commondat
+;; ((curr-tab-num 0) : number)
+;; please-update
+;; tabdats
+;; update-mutex
+;; updaters
+;; updating
+;; uidat ;; needs to move to tabdat at some time
+;; hide-not-hide-tabs
+;; target
+;; )
+;;
+;; (define (dboard:commondat-make)
+;; (make-dboard:commondat
+;; curr-tab-num: 0
+;; tabdats: (make-hash-table)
+;; please-update: #t
+;; update-mutex: (make-mutex)
+;; updaters: (make-hash-table)
+;; updating: #f
+;; hide-not-hide-tabs: #f
+;; target: ""
+;; ))
+
+(set! *journal-stats-enable* #f)
;;======================================================================
;; buttons color using image
;;======================================================================
@@ -207,39 +216,17 @@
;; (iup:attribute-set! img1 "2" "255 0 0")
(hash-table-set! images name img1)
name)))
-;; RA => returns the tabdat stored at hashkey passed in commondat-tabdats table (e.g. 0 gives summary)
-;;
-(define (dboard:common-get-tabdat commondat #!key (tab-num #f))
- (let* ((tnum (or tab-num
- (dboard:commondat-curr-tab-num commondat)
- 0)) ;; tab-num value is curr-tab-num value in passed commondat
- (ht (dboard:commondat-tabdats commondat))
- (res (hash-table-ref/default ht tnum #f)))
- (or res
- (let ((new-tabdat (dboard:tabdat-make-data)))
- (hash-table-set! ht tnum new-tabdat)
- new-tabdat))))
-
-;; RA => sets the tabdat passed to the hashkey at commondat:tabdats hash table
-;;
-(define (dboard:common-set-tabdat! commondat tabnum tabdat)
- (hash-table-set!
- (dboard:commondat-tabdats commondat)
- tabnum
- tabdat))
-
;; gets and calls updater list based on curr-tab-num
;;
(define (dboard:common-run-curr-updaters commondat #!key (tab-num #f))
;; (sync-db-to-tmp (dboard:common-get-tabdat commondat tab-num: tab-num)) ;; no longer applies
;; maybe need sleep here?
-
(if (dboard:common-get-tabdat commondat tab-num: tab-num) ;; only update if there is a tabdat
(let* ((tnum (or tab-num (dboard:commondat-curr-tab-num commondat)))
(updaters (hash-table-ref/default (dboard:commondat-updaters commondat)
tnum
'())))
@@ -401,12 +388,12 @@
(dboard:setup-tabdat dat)
(dboard:setup-num-rows dat)
dat))
(define (dboard:setup-tabdat tabdat)
- (dboard:tabdat-dbdir-set! tabdat (db:dbfile-path)) ;; (conc (configf:lookup *configdat* "setup" "linktree") "/.db"))
- (dboard:tabdat-dbfpath-set! tabdat (db:dbfile-path))
+ (dboard:tabdat-dbdir-set! tabdat (common:make-tmpdir-name *toppath* "")) ;; (conc (configf:lookup *configdat* "setup" "linktree") "/.db"))
+ (dboard:tabdat-dbfpath-set! tabdat (common:make-tmpdir-name *toppath* ""))
(dboard:tabdat-monitor-db-path-set! tabdat (conc (dboard:tabdat-dbdir tabdat) "/monitor.db"))
;; HACK ALERT: this is a hack, please fix.
(dboard:tabdat-ro-set! tabdat (not (file-read-access? (dboard:tabdat-dbfpath tabdat))))
@@ -670,11 +657,11 @@
;;
(define (dboard:get-tests-for-run-duplicate tabdat run-id run testnamepatt key-vals)
(let* ((start-time (current-seconds))
(access-mode (dboard:tabdat-access-mode tabdat))
(num-to-get (string->number (or (configf:lookup *configdat* "setup" "num-tests-to-get")
- "200")))
+ "1000")))
(states (hash-table-keys (dboard:tabdat-state-ignore-hash tabdat)))
(statuses (hash-table-keys (dboard:tabdat-status-ignore-hash tabdat)))
(do-not-use-db-file-timestamps #f) ;; (configf:lookup *configdat* "setup" "do-not-use-db-file-timestamps")) ;; this still hosts runs-summary-tab
(do-not-use-query-timestamps #t) ;; (configf:lookup *configdat* "setup" "do-not-use-query-timestamps")) ;; this no longer troubles runs-summary-tab
(sort-info (get-curr-sort))
@@ -697,12 +684,12 @@
(last-db-time (if do-not-use-db-file-timestamps
0
(dboard:rundat-last-db-time run-dat)))
(db-path (or (dboard:rundat-db-path run-dat)
(let* ((db-dir (get-environment-variable "MT_RUN_AREA_HOME")) ;; (common:get-db-tmp-area))
- (db-pth (conc db-dir "/.mtdb/main.db")))
- (dboard:rundat-db-path-set! run-dat db-pth)
+ (db-pth (conc db-dir "/.mtdb/*.db")))
+ (dboard:rundat-db-path-set! run-dat db-pth) ;; this is just a cache of the path
db-pth)))
(db-mod-time (common:lazy-sqlite-db-modification-time db-path))
(db-modified (>= db-mod-time last-db-time))
(multi-get (> (dboard:rundat-run-data-offset run-dat) 0)) ;; multi-get in progress
(tmptests (if (or do-not-use-db-file-timestamps
@@ -748,16 +735,16 @@
(for-each
(lambda (tdat)
(let ((test-id (db:test-get-id tdat))
(state (db:test-get-state tdat)))
- (dboard:rundat-data-changed-set! run-dat #t)
- (if (equal? state "DELETED")
- (hash-table-delete! tests-ht test-id)
- (hash-table-set! tests-ht test-id tdat))))
- tmptests)
-
+ (dboard:rundat-data-changed-set! run-dat #t)
+ (if (equal? state "DELETED")
+ (hash-table-delete! tests-ht test-id)
+ (hash-table-set! tests-ht test-id tdat))))
+ tmptests)
+
tests-ht))
;; tmptests - new tests data
;; prev-tests - old tests data
;;
@@ -852,10 +839,16 @@
(loop run tal new-res newmaxtests) ;; not done getting data for this run
(loop (car tal)(cdr tal) new-res newmaxtests)))))))
(dboard:tabdat-filters-changed-set! tabdat #f)
(dboard:update-tree tabdat runs-hash header tb)))
+
+(define *dashboard-last-run-id-update* (make-hash-table)) ;; id => seconds
+
+(define (dboard:clear-run-id-update-hash)
+ (hash-table-clear! *dashboard-last-run-id-update*))
+
;; this calls dboard:get-tests-for-run-duplicate for each run
;;
;; create a virtual table of all the tests
;; keypatts: ( (KEY1 "abc%def")(KEY2 "%") )
;;
@@ -888,63 +881,82 @@
(dboard:tabdat-item-test-names-set! tabdat '())
(hash-table-clear! (dboard:tabdat-allruns-by-id tabdat)))
(let loop ((run (car runs))
(tal (cdr runs))
(res '())
- (maxtests 0))
+ (maxtests 0)
+ (cont-run #f))
(let* ((run-id (db:get-value-by-header run header "id"))
+ (recently-done (< (- (current-seconds)
+ (hash-table-ref/default *dashboard-last-run-id-update* run-id 0)) 1))
(run-struct (hash-table-ref/default (dboard:tabdat-allruns-by-id tabdat) run-id #f))
;; (last-update (if run-struct (dboard:rundat-last-update run-struct) 0))
(key-vals (rmt:get-key-vals run-id))
- (tests-ht (dboard:get-tests-for-run-duplicate tabdat run-id run testnamepatt key-vals))
+ (tests-ht (let* ((tht (if (and recently-done run-struct)
+ (let ((rht (dboard:rundat-tests run-struct))) ;; (dboard:tabdat-allruns-by-id tabdat)))
+ (or rht
+ (dboard:get-tests-for-run-duplicate tabdat run-id run testnamepatt key-vals)))
+ (dboard:get-tests-for-run-duplicate tabdat run-id run testnamepatt key-vals))))
+ (assert (hash-table? tht) "FATAL: But here tht should be a hash-table")
+ tht))
;; GET RID OF dboard:get-tests-dat - it is superceded by dboard:get-tests-for-run-duplicate
;; dboard:get-tests-for-run-duplicate - returns a hash table
;; (dboard:get-tests-dat tabdat run-id last-update))
(all-test-ids (hash-table-keys tests-ht))
- (num-tests (length all-test-ids)))
- ;; (print "run-struct: " run-struct)
- ;; NOTE: bubble-up also sets the global (dboard:tabdat-item-test-names tabdat)
- ;; (tests (bubble-up tmptests priority: bubble-type))
- ;; NOTE: 11/01/2013 This routine is *NOT* getting called excessively.
- ;; (debug:print 0 *default-log-port* "Getting data for run " run-id " with key-vals=" key-vals)
- ;; Not sure this is needed?
- (let* ((newmaxtests (max num-tests maxtests))
- ;; (last-update (- (current-seconds) 10))
- (run-struct (or run-struct
- (dboard:rundat-make-init
- run: run
- tests: tests-ht
- key-vals: key-vals)))
- (new-res (if (null? all-test-ids)
- res
- (delete-duplicates
- (cons run-struct res)
- (lambda (a b)
- (eq? (db:get-value-by-header (dboard:rundat-run a) header "id")
- (db:get-value-by-header (dboard:rundat-run b) header "id"))))))
- (elapsed-time (- (current-seconds) start-time)))
- (if (null? all-test-ids)
+ (num-tests (length all-test-ids))
+ ;; (print "run-struct: " run-struct)
+ ;; NOTE: bubble-up also sets the global (dboard:tabdat-item-test-names tabdat)
+ ;; (tests (bubble-up tmptests priority: bubble-type))
+ ;; NOTE: 11/01/2013 This routine is *NOT* getting called excessively.
+ ;; (debug:print 0 *default-log-port* "Getting data for run " run-id " with key-vals=" key-vals)
+ ;; Not sure this is needed?
+ (newmaxtests (max num-tests maxtests))
+ ;; (last-update (- (current-seconds) 10))
+ (run-struct (or run-struct
+ (dboard:rundat-make-init
+ run: run
+ tests: tests-ht
+ key-vals: key-vals)))
+ (new-res (if (null? all-test-ids)
+ res
+ (delete-duplicates
+ (cons run-struct res)
+ (lambda (a b)
+ (eq? (db:get-value-by-header (dboard:rundat-run a) header "id")
+ (db:get-value-by-header (dboard:rundat-run b) header "id"))))))
+ (elapsed-time (- (current-seconds) start-time)))
+ (if (null? all-test-ids)
(hash-table-delete! (dboard:tabdat-allruns-by-id tabdat) run-id)
(hash-table-set! (dboard:tabdat-allruns-by-id tabdat) run-id run-struct))
- (if (or (null? tal)
- (> elapsed-time 2)) ;; stop loading data after 5 seconds, on the next call more data *should* be loaded since get-tests-for-run uses last update
- (begin
- (when (> elapsed-time 2)
- (debug:print 0 *default-log-port* "NOTE: updates are taking a long time, " elapsed-time "s elapsed.")
- (let* ((old-val (iup:attribute *tim* "TIME"))
- (new-val (number->string (inexact->exact (floor (* 2 (string->number old-val)))))))
- (if (< (string->number new-val) 5000)
- (begin
- (debug:print 0 *default-log-port* "NOTE: increasing poll interval from "old-val" to "new-val)
- (iup:attribute-set! *tim* "TIME" new-val)))))
- (dboard:tabdat-allruns-set! tabdat new-res)
- maxtests)
- (if (> (dboard:rundat-run-data-offset run-struct) 0)
- (loop run tal new-res newmaxtests) ;; not done getting data for this run
- (loop (car tal)(cdr tal) new-res newmaxtests)))))))
- (dboard:tabdat-filters-changed-set! tabdat #f)
- (dboard:update-tree tabdat runs-hash header tb)))
+
+ (if (or (null? tal)
+ (> elapsed-time 2)) ;; stop loading data after 5
+ ;; seconds, on the next call
+ ;; more data *should* be
+ ;; loaded since
+ ;; get-tests-for-run uses last
+ ;; update
+ (begin
+ (when (> elapsed-time 2)
+ (debug:print 2 *default-log-port* "NOTE: updates are taking a long time, " elapsed-time "s elapsed.")
+ (let* ((old-val (iup:attribute *tim* "TIME"))
+ (new-val (number->string (inexact->exact (floor (* 2 (string->number old-val)))))))
+ (if (< (string->number new-val) 5000)
+ (begin
+ (debug:print 2 *default-log-port* "NOTE: increasing poll interval from "old-val" to "new-val)
+ (iup:attribute-set! *tim* "TIME" new-val)))))
+ (dboard:tabdat-allruns-set! tabdat new-res)
+ maxtests)
+ (if (> (dboard:rundat-run-data-offset run-struct) 0)
+ (begin
+ (thread-sleep! 0.2) ;; let the gui re-draw
+ (loop run tal new-res newmaxtests #t)) ;; not done getting data for this run
+ (begin
+ (hash-table-set! *dashboard-last-run-id-update* run-id (current-seconds))
+ (loop (car tal)(cdr tal) new-res newmaxtests #f)))))))
+ (dboard:tabdat-filters-changed-set! tabdat #f)
+ (dboard:update-tree tabdat runs-hash header tb)))
(define *collapsed* (make-hash-table))
(define (toggle-hide lnum uidat) ; fulltestname)
(let* ((btn (vector-ref (dboard:uidat-get-lftcol uidat) lnum))
@@ -1152,15 +1164,15 @@
(drop (dboard:tabdat-all-test-names tabdat)
(dboard:tabdat-start-test-offset tabdat))
'())))
(append xl (make-list (- (dboard:tabdat-num-tests tabdat) (length xl)) ""))))
(update-labels uidat (dboard:tabdat-all-test-names tabdat))
- (for-each
+ (for-each ;;run
(lambda (rundat)
- ;; if rundat is junk clobber it with a decent placeholder
(if (or (not rundat) ;; handle padded runs
(not (dboard:rundat-run rundat)))
+ ;; Need to put an empty column in to erase previous contents.
(set! rundat (dboard:rundat-make-init
key-vals: (map (lambda (x) "")(dboard:tabdat-keys tabdat)))))
(let* ((run (dboard:rundat-run rundat))
(testsdat-by-name (dboard:rundat-tests-by-name rundat))
(key-val-dat (dboard:rundat-key-vals rundat))
@@ -1167,23 +1179,22 @@
(run-id (db:get-value-by-header run (dboard:tabdat-header tabdat) "id"))
(key-vals (append key-val-dat
(list (let ((x (db:get-value-by-header run (dboard:tabdat-header tabdat) "runname")))
(if (string? x) x "")))))
(run-key (string-intersperse key-vals "\n")))
-
+
;; fill in the run header key values
;;
- (let ((rown 0)
+ (let ((rown 0)
(headercol (vector-ref tableheader coln)))
(for-each (lambda (kval)
(let* ((labl (vector-ref headercol rown)))
(if (not (equal? kval (iup:attribute labl "TITLE")))
(iup:attribute-set! (vector-ref headercol rown) "TITLE" kval))
(set! rown (+ rown 1))))
key-vals))
-
- ;; For this run now fill in the buttons for each test
+ ;; For this run now fill in the buttons for each test
;;
(let ((rown 0)
(columndat (vector-ref table coln)))
(for-each
(lambda (testname)
@@ -1196,17 +1207,12 @@
;; testsdat)))
(if (not matching)
(vector -1 -1 "" "" "" 0 "" "" 0 "" "" "" 0 "" "")
;; (car matching))))
matching)))
- (testname (db:test-get-testname testdat))
- (itempath (db:test-get-item-path testdat))
- (testfullname (test:test-get-fullname testdat))
(teststatus (db:test-get-status testdat))
(teststate (db:test-get-state testdat))
- ;;(teststart (db:test-get-event_time test))
- ;;(runtime (db:test-get-run_duration test))
(buttontxt (cond
((member teststate '("COMPLETED" "ARCHIVED")) teststatus)
((and (equal? teststate "NOT_STARTED")
(member teststatus '("ZERO_ITEMS" "BLOCKED" "PREQ_FAIL" "PREQ_DISCARDED" "TIMED_OUT" "KEEP_TRYING" "TEN_STRIKES")))
teststatus)
@@ -1401,11 +1407,15 @@
tp)))
(states (dboard:tabdat-states tabdat))
(statuses (dboard:tabdat-statuses tabdat))
(target (let ((targ-list (dboard:tabdat-target tabdat)))
(if targ-list (string-intersperse targ-list "/") "no-target-selected")))
- (run-name (dboard:tabdat-run-name tabdat))
+ (run-name (let ((run-input (dboard:tabdat-run-name tabdat))
+ )
+ (if (equal? run-input "")
+ "no-runname-specified"
+ run-input)))
(states-str (if (or (not states)
(null? states))
""
(conc " -state " (string-intersperse states ","))))
(statuses-str (if (or (not statuses)
@@ -2403,20 +2413,21 @@
)
)) "runs-summary-click-callback"))))
(runs-summary-updater
(lambda ()
- (mutex-lock! update-mutex)
+ ;; (mutex-lock! update-mutex)
(if (or (dashboard:database-changed? commondat tabdat context-key: 'runs-summary-updater)
(dboard:tabdat-view-changed tabdat))
(debug:catch-and-dump
(lambda () ;; check that run-matrix is initialized before calling the updater
(if run-matrix
(dashboard:runs-summary-updater commondat tabdat tb cell-lookup run-matrix)))
"dashboard:runs-summary-updater")
)
- (mutex-unlock! update-mutex)))
+ #;(mutex-unlock! update-mutex)
+ ))
(runs-summary-control-panel (dashboard:runs-summary-control-panel tabdat))
)
(dboard:commondat-add-updater commondat runs-summary-updater tab-num: tab-num)
(dboard:tabdat-runs-tree-set! tabdat tb)
(iup:vbox
@@ -2459,11 +2470,11 @@
(iup:vbox
(iup:textbox #:size "120x15" #:fontsize "10" #:value "%"
#:expand "NO"
#:action (lambda (obj unk val)
(debug:catch-and-dump
- (lambda ()
+ (lambda ()57
(mark-for-update tabdat)
(update-search commondat tabdat "test-name" val))
"make-controls")))
(iup:hbox
(iup:button "Quit" #:action (lambda (obj)
@@ -2478,10 +2489,11 @@
(dboard:tabdat-allruns-by-id-set! tabdat (make-hash-table))
(dboard:tabdat-done-runs-set! tabdat '())
(dboard:tabdat-not-done-runs-set! tabdat '())
(dboard:tabdat-view-changed-set! tabdat #t)
(dboard:commondat-please-update-set! commondat #t)
+ (dboard:clear-run-id-update-hash)
(mark-for-update tabdat))
#:expand "NO" #:size "40x15")
(iup:button "Collapse" #:action (lambda (obj)
(debug:catch-and-dump
(lambda ()
@@ -3111,11 +3123,11 @@
(debug:print 2 *default-log-port* "WARNING: error in accessing databases in get-youngest-run-db-mod-time: "
((condition-property-accessor 'exn 'message) exn) " db-dir="dbdir ", exn=" exn)
(current-seconds)) ;; something went wrong - just print an error and return current-seconds
(common:max (map (lambda (filen)
(file-modification-time filen))
- (glob (conc dbdir "/*.db*"))))))
+ (cons (conc dbdir "/main.db") (glob (conc dbdir "/?.db")))))))
(define (dashboard:monitor-changed? commondat tabdat)
(let* ((run-update-time (current-seconds))
(monitor-db-path (dboard:tabdat-monitor-db-path tabdat))
(monitor-modtime (if (and monitor-db-path (common:file-exists? monitor-db-path))
@@ -3136,11 +3148,11 @@
(hash-table-set! (dboard:tabdat-last-db-update tabdat) context newtime))
;;
(define (dashboard:database-changed? commondat tabdat #!key (context-key 'default))
(let* ((run-update-time (current-seconds))
- (dbdir *toppath*)
+ (dbdir (conc *toppath* "/.mtdb"))
(modtime (dashboard:get-youngest-run-db-mod-time dbdir))
(recalc (dashboard:recalc modtime
(dboard:commondat-please-update commondat)
(dboard:get-last-db-update tabdat context-key))))
(if recalc
@@ -3343,14 +3355,14 @@
(vch (dboard:tabdat-view-changed tabdat)))
(if (and cnv dwg vch)
(begin
(vg:drawing-xoff-set! dwg (dboard:tabdat-xadj tabdat))
(vg:drawing-yoff-set! dwg (dboard:tabdat-yadj tabdat))
- (mutex-lock! mtx)
+ ;; (mutex-lock! mtx)
(canvas-clear! cnv)
(vg:draw dwg tabdat)
- (mutex-unlock! mtx)
+ ;; (mutex-unlock! mtx)
(dboard:tabdat-view-changed-set! tabdat #f)))))
;; doesn't work.
;;
;;(define (gotoescape tabdat escape)
@@ -3630,17 +3642,17 @@
(graph-uly (- (calc-y 0) canvas-margin))
(sec-per-50pt (/ 50 timescale))
)
;; (print "timeoffset: " timeoffset " timescale: " timescale " run-duration: " (seconds->hr-min-sec run-duration) " width: " width " sec-per-50pt: " sec-per-50pt)
;; (print "timescale: " timescale " timeoffset: " timeoffset " sizex: " sizex " originx: " originx)
- (mutex-lock! mtx)
+ ;; (mutex-lock! mtx)
(vg:add-comp-to-lib runslib run-full-name runcomp)
;; Have to keep moving the instantiated box as it is anchored at the lower left
;; this should have worked for x in next statement? (maptime run-start)
;; add 60 to make room for the graph
(vg:instantiate drawing "runslib" run-full-name run-full-name 8 (- (calc-y curr-run-start-row) (+ 5 graph-height run-to-run-margin)))
- (mutex-unlock! mtx)
+ ;; (mutex-unlock! mtx)
;; (set! run-start-row (+ max-row 2))
;; (dboard:tabdat-start-row-set! tabdat (+ new-run-start-row 1))
;; get tests in list sorted by event time ascending
(let testsloop ((test-ids (car hierdat)) ;; loop on tests (NOTE: not items!)
(tests-tal (cdr hierdat))
@@ -3741,13 +3753,13 @@
(outln (vg:make-rect-obj -5 lly ulx uly
text: run-full-name
line-color: (vg:rgb->number 255 0 255 a: 128))))
; (vg:components-get-extents d1 c1)))
;; this is the box around the run
- (mutex-lock! mtx)
+ ;; (mutex-lock! mtx)
(vg:add-obj-to-comp runcomp outln)
- (mutex-unlock! mtx)
+ ;; (mutex-unlock! mtx)
;; this is where we have enough info to place the graph
(dboard:graph commondat tabdat tab-num -5 (+ uly 10) ulx (+ uly graph-height 3) run-start run-end timescale maptime run-full-name canvas-margin)
(dboard:tabdat-max-row-set! tabdat (+ (dboard:tabdat-max-row tabdat)(quotient (+ graph-height 40 3) row-height)))
;; (vg:instance-move drawing run-full-name 0 (dboard:tabdat-max-row tabdat))
))
@@ -3827,11 +3839,12 @@
(let* ((mtdb-path (conc *toppath* "/.mtdb/main.db"))
(target (args:get-arg "-target"))
(commondat (dboard:commondat-make)))
(if target
(begin
- (args:remove-arg-from-ht "-target")
+ (hash-table-delete! args:arg-hash "-target") ;; workaround for the following commented out function
+ ;; (args:remove-arg-from-ht "-target") This function is in mtargs/mtargs.scm, but it's in an egg that is not in the current build of chicken 4.10
(dboard:commondat-target-set! commondat target)
)
)
(if (not (launch:setup))
@@ -3886,22 +3899,22 @@
;; tab-num: 2)
(iup:callback-set! *tim*
"ACTION_CB"
(lambda (time-obj)
(let ((update-is-running #f))
- (mutex-lock! (dboard:commondat-update-mutex commondat))
- (set! update-is-running (dboard:commondat-updating commondat))
- (if (not update-is-running)
- (dboard:commondat-updating-set! commondat #t))
- (mutex-unlock! (dboard:commondat-update-mutex commondat))
- (if (not update-is-running) ;; we know that the update was not running and we now have a lock on doing an update
+ ;; (mutex-lock! (dboard:commondat-update-mutex commondat))
+ (set! update-is-running (dboard:commondat-updating commondat))
+ (if (not update-is-running)
+ (dboard:commondat-updating-set! commondat #t))
+ ;; (mutex-unlock! (dboard:commondat-update-mutex commondat))
+ (if (not update-is-running) ;; we know that the update was not running and we now have a lock on doing an update
(begin
(dboard:common-run-curr-updaters commondat) ;; (dashboard:run-update commondat)
- (mutex-lock! (dboard:commondat-update-mutex commondat))
+ ;; (mutex-lock! (dboard:commondat-update-mutex commondat))
(dboard:commondat-updating-set! commondat #f)
- (mutex-unlock! (dboard:commondat-update-mutex commondat)))
- ))
+ ;; (mutex-unlock! (dboard:commondat-update-mutex commondat))
+ )))
1))))
;; (debug:print 0 *default-log-port* "Starting updaters")
(let ((th1 (make-thread (lambda ()
(thread-sleep! 1)
(dboard:common-run-curr-updaters commondat 0) ;; force update of summary tab
@@ -3922,11 +3935,11 @@
(define (sync-db-to-tmp tabdat)
(let* ((db-file "./.mtdb/main.db"))
(if (and (not (file-write-access? db-file)) ( > (current-seconds) (+ last-copy-time 5)))
(begin
- (db:multi-db-sync (db:setup #f) 'old2new)
+ (db:multi-db-sync (db:setup) 'old2new)
(set! last-copy-time (current-seconds))
)
)
)
)
@@ -3943,11 +3956,14 @@
(exit 1))))
'("MT_RUN_AREA_HOME" "MT_MEGATEST" "MT_CMDINFO" "MT_TEST_RUN_DIR" "MT_LINKTREE" "MT_TESTSUITENAME"))
)
)
-(setenv "MT_RUN_AREA_HOME" (get-environment-variable "PWD"))
+;; This is NOT good
+;; (setenv "MT_RUN_AREA_HOME" (get-environment-variable "PWD"))
+;; This should be OK but it really should not be necessary
+(setenv "MT_RUN_AREA_HOME" (current-directory))
(if (not (null? remargs))
(if remargs
(begin
(debug:print 0 *default-log-port* "Unrecognised arguments: " (string-intersperse remargs " "))
Index: db.scm
==================================================================
--- db.scm
+++ db.scm
@@ -131,15 +131,15 @@
(debug:print-error 0 *default-log-port* " query " stmt " failed, params: " params ", error: " ((condition-property-accessor 'exn 'message) exn) ", exn=" exn)
(print-call-chain (current-error-port))
default)))
(apply sqlite3:first-result db stmt params)))
-(define (db:setup do-sync)
+(define (db:setup)
(assert *toppath* "FATAL: db:setup called before launch:setup has been run.")
- (let* ((tmpdir (common:get-db-tmp-area)))
+ (let* ((tmpdir (common:make-tmpdir-name *toppath* "")))
(if (not *dbstruct-dbs*)
- (dbfile:setup do-sync *toppath* tmpdir)
+ (dbfile:setup (conc *toppath* "/.mtdb") tmpdir)
*dbstruct-dbs*)))
;; moved from dbfile
;;
;; ADD run-id SUPPORT
@@ -267,17 +267,10 @@
", arguments: " ((condition-property-accessor 'exn 'arguments) exn)
", location: " ((condition-property-accessor 'exn 'location) exn)
))
-;; NB// #f => return dbdir only
-;; (was planned to be; zeroth db with name=main.db)
-;;
-;; If run-id is #f return to create and retrieve the path where the db will live.
-;;
-(define db:dbfile-path common:get-db-tmp-area)
-
(define (db:set-sync db)
(let ((syncprag (configf:lookup *configdat* "setup" "sychronous")))
(sqlite3:execute db (conc "PRAGMA synchronous = " (or syncprag 0) ";"))))
@@ -467,11 +460,11 @@
(get-mtime shm-file))))
;; (define (db:all-db-sync dbstruct)
;; (let* ((dbdat (db:open-db dbstruct #f db:initialize-main-db))
;; (data-synced 0) ;; count of changed records
-;; (tmp-area (common:get-db-tmp-area))
+;; (tmp-area (common:make-tmpdir-name *toppath*))
;; (dbfiles (glob (conc tmp-area"/.mtdb/*.db")))
;; (sync-durations (make-hash-table))
;; (no-sync-db (db:open-no-sync-db)))
;; (for-each
;; (lambda (file) ;; tmp db file
@@ -528,23 +521,63 @@
;; (if dbdat (dbfile:add-dbdat dbstruct #f dbdat))
;; )
;; #t)
(define (db:kill-servers)
- (let* ((servers (server:choose-server *toppath* 'all-valid))) ;; (server:get-list *toppath*))
- (for-each
- (lambda (server)
- (handle-exceptions
- exn
- (begin
- (debug:print-info 0 *default-log-port* "Unable to get host and/or port from " server ", exn=" exn)
- #f)
- (match-let (((mod-time host port start-time server-id pid) server))
- (if (and host pid)
- (tasks:kill-server host pid)))))
- servers)
- (delete-file* (common:get-sync-lock-filepath))))
+ (let* ((tl (launch:setup)) ;; need this to initialize *toppath*
+ (servdir (conc *toppath* "/.servinfo"))
+ (servfiles (glob (conc servdir "/*:*.db")))
+ (fmtstr "~10a~22a~10a~25a~25a~8a\n")
+ (dbfiles (append (glob (conc *toppath* "/.mtdb/main.db")) (glob (conc *toppath* "/.mtdb/?.db"))(glob (conc *toppath* "/.mtdb/??.db"))))
+ (ttdat (make-tt areapath: *toppath*))
+ )
+ (format #t fmtstr "DB" "host:port" "PID" "age" "last mod" "state")
+ (for-each
+ (lambda (dbfile)
+ (let* (
+ (dbfname (conc (pathname-file dbfile) ".db"))
+ (sfiles (tt:find-server *toppath* dbfname))
+ )
+ (for-each
+ (lambda (sfile)
+ (let (
+ (sinfos (tt:get-server-info-sorted ttdat dbfname))
+ )
+ (for-each
+ (lambda (sinfo)
+ (let* (
+ (db (list-ref sinfo 5))
+ (pid (list-ref sinfo 4))
+ (host (list-ref sinfo 0))
+ (port (list-ref sinfo 1))
+ (server-id (list-ref sinfo 3))
+ (age (seconds->hr-min-sec (- (current-seconds) (list-ref sinfo 2))))
+ (last-mod (seconds->string (list-ref sinfo 2)))
+ (killed (system (conc "ssh " host " kill " pid " > /dev/null")))
+ (dummy2 (sleep 1))
+ (state (if (> (system (conc "ssh " host " ps " pid " > /dev/null")) 0) "dead" "alive"))
+ )
+ (format #t fmtstr db (conc host ":" port) pid age last-mod state)
+ (system (conc "rm " sfile))
+ )
+ )
+ sinfos
+ )
+ )
+ )
+ sfiles
+ )
+ )
+ )
+ dbfiles
+ )
+ ;; remove this db, because otherwise metadata contains records for old servers, and this causes a problem with db:no-sync-get-lock-with-id.
+ (if (file-exists? (conc *toppath* "/.mtdb/no-sync.db"))
+ (delete-file (conc *toppath* "/.mtdb/no-sync.db"))
+ )
+ )
+)
;; options:
;;
;; 'killservers - kills all servers
;; 'dejunk - removes junk records
@@ -556,33 +589,34 @@
;; run-ids: '(1 2 3 ...) or #f (for all)
;;
(define (db:multi-db-sync dbstruct . options)
(let* (;; (dbdat (db:open-db dbstruct #f dbfile:db-init-proc))
(data-synced 0) ;; count of changed records
- (tmp-area (common:get-db-tmp-area))
+ (tmp-area (common:make-tmpdir-name *toppath* ""))
(old2new (member 'old2new options))
(dejunk (member 'dejunk options))
(killservers (member 'killservers options))
(src-area (if old2new *toppath* tmp-area))
- (dest-area (if old2new tmp-area *toppath*))
- (dbfiles (if old2new (glob (conc *toppath* "/.mtdb/*.db")) (glob (conc tmp-area "/.mtdt/*.db"))))
+ (dest-area (if old2new tmp-area (conc *toppath* "/.mtdb")))
+ (dbfiles (if old2new (glob (conc *toppath* "/.mtdb/*.db"))
+ (glob (conc tmp-area "/*.db"))))
(keys (db:get-keys dbstruct))
(sync-durations (make-hash-table)))
;; kill servers
- (if killservers (db:kill-servers))
+ ;; (if killservers (db:kill-servers))
(if (not dbfiles)
(debug:print-error 0 *default-log-port* "no dbfiles found in " (conc *toppath* "/.mtdb"))
(for-each
(lambda (srcfile)
(debug:print-info 3 *default-log-port* "file: " srcfile)
(let* ((fname (conc (pathname-file srcfile) ".db"))
(basename (pathname-file srcfile))
(run-id (if (string= basename "main") #f (string->number basename)))
- (destfile (conc dest-area "/.mtdb/" fname))
- (dest-directory (conc dest-area "/.mtdb/"))
+ (destfile (conc dest-area "/" fname))
+ (dest-directory dest-area)
(time1 (file-modification-time srcfile))
(time2 (if (file-exists? destfile)
(begin
(debug:print-info 2 *default-log-port* "destfile " destfile " exists")
(file-modification-time destfile))
@@ -602,33 +636,37 @@
#t)
((and changed *time-to-exit*) ;; last sync
#t)
(else
#f))))
- (if (or dejunk do-cp)
+
+ (if (or dejunk do-cp)
(let* ((start-time (current-milliseconds))
- ;; subdb is misnamed - should be dbdat (I think...)
- (subdb (dbfile:open-db dbstruct run-id dbfile:db-init-proc))
- ;; (or (dbfile:get-subdb dbstruct run-id)
- ;; (dbfile:init-subdb dbstruct run-id dbfile:db-init-proc)))
+ (subdb (or (dbfile:get-subdb dbstruct run-id) (dbfile:init-subdb dbstruct run-id dbfile:db-init-proc)))
+ (dbdat (or (dbfile:get-dbdat dbstruct run-id) (dbfile:open-db dbstruct run-id dbfile:db-init-proc)))
(mtdb (dbr:subdb-mtdbdat subdb))
;;
;; BUG: -mrw- I think this next line is wrong. run-id should be the path to .mtdb/.db
;;
(tmpdb (dbfile:open-db dbstruct run-id dbfile:db-init-proc)))
+ (if dejunk
+ (begin
+ (debug:print 0 *default-log-port* "Cleaning tmp DB")
+ (db:clean-up run-id tmpdb)
+ (debug:print 0 *default-log-port* "Cleaning nfs DB")
+ (db:clean-up run-id mtdb)
+ )
+ )
(debug:print-info 2 *default-log-port* "delta syncing file: " srcfile ", time diff: " (- time1 time2) " seconds")
(if old2new
(begin
- (if dejunk (db:clean-up run-id mtdb))
(db:sync-tables (db:sync-all-tables-list
- dbstruct
(db:get-keys dbstruct))
#f mtdb tmpdb))
(begin
- (if dejunk (db:clean-up run-id tmpdb))
- (db:sync-tables (db:sync-all-tables-list dbstruct (db:get-keys dbstruct)) #f tmpdb mtdb)))
+ (db:sync-tables (db:sync-all-tables-list (db:get-keys dbstruct)) #f tmpdb mtdb)))
(hash-table-set! sync-durations (conc srcfile ".db") (- (current-milliseconds) start-time)))
(debug:print-info 2 *default-log-port* "skipping delta sync. " srcfile " is up to date"))))
dbfiles))
data-synced))
@@ -640,11 +678,11 @@
(for-each
(lambda (subdb)
(let* ((mtdb (dbr:subdb-mtdb subdb))
(tmpdb (db:get-subdb dbstruct run-id))
(refndb (dbr:subdb-refndb subdb))
- (newres (db:sync-tables (db:sync-all-tables-list dbstruct (db:get-keys dbstruct)) last-update tmpdb refndb mtdb)))
+ (newres (db:sync-tables (db:sync-all-tables-list (db:get-keys dbstruct)) last-update tmpdb refndb mtdb)))
;; (stack-push! (dbr:subdb-dbstack subdb) tmpdb)
;; BUG: verify this is really needed
(dbfile:add-dbdat dbstruct run-id tmpdb)
(set! res (cons newres res))))
subdbs)
@@ -1148,16 +1186,19 @@
;; 2. Look at run records
;; a. If have tests that are not deleted, set state='unknown'
;; b. ....
;;
(define (db:clean-up run-id dbdat)
- (debug:print 2 *default-log-port* "db:clean-up")
-
-
(if run-id
- (db:clean-up-rundb dbdat)
- (db:clean-up-maindb dbdat)
+ (begin
+ (debug:print 0 *default-log-port* "Cleaning run DB " run-id)
+ (db:clean-up-rundb dbdat run-id)
+ )
+ (begin
+ (debug:print 0 *default-log-port* "Cleaning main DB ")
+ (db:clean-up-maindb dbdat)
+ )
)
)
;; Clean out old junk and vacuum the database
@@ -1169,38 +1210,42 @@
;; b. If test dir gone, delete the test record
;; 2. Look at run records
;; a. If have tests that are not deleted, set state='unknown'
;; b. ....
;;
-(define (db:clean-up-rundb dbdat)
+(define (db:clean-up-rundb dbdat run-id)
;; (debug:print 0 *default-log-port* "WARNING: db clean up not fully ported to v1.60, cleanup action will be on megatest.db")
(let* ((db (dbr:dbdat-dbh dbdat))
- (count-stmt (sqlite3:prepare db "SELECT (SELECT count(id) FROM tests);"))
+ (test-count-stmt (sqlite3:prepare db "SELECT (SELECT count(id) FROM tests);"))
+ (step-count-stmt (sqlite3:prepare db "SELECT (SELECT count(id) FROM test_steps);"))
(statements
(map (lambda (stmt)
(sqlite3:prepare db stmt))
(list
- ;; delete all tests that belong to runs that are 'deleted'
- ;; (conc "DELETE FROM tests WHERE run_id NOT IN (" (string-intersperse (map conc valid-runs) ",") ");")
- ;; delete all tests that are 'DELETED'
"DELETE FROM tests WHERE state='DELETED';"
+ "DELETE FROM test_steps WHERE status = 'DELETED';"
+ "DELETE FROM tests WHERE run_id IN (SELECT id FROM runs WHERE state = 'deleted');"
))))
- ;; (db:delay-if-busy dbdat)
(sqlite3:with-transaction
db
(lambda ()
(sqlite3:for-each-row (lambda (tot)
- (debug:print-info 0 *default-log-port* "Records count before clean: " tot))
- count-stmt)
+ (debug:print-info 0 *default-log-port* "Test records count before clean: " tot))
+ test-count-stmt)
+ (sqlite3:for-each-row (lambda (tot)
+ (debug:print-info 0 *default-log-port* "Test_step records count before clean: " tot))
+ step-count-stmt)
(map sqlite3:execute statements)
(sqlite3:for-each-row (lambda (tot)
- (debug:print-info 0 *default-log-port* "Records count after clean: " tot))
- count-stmt)))
+ (debug:print-info 0 *default-log-port* "Test records count after clean: " tot))
+ test-count-stmt)
+ (sqlite3:for-each-row (lambda (tot)
+ (debug:print-info 0 *default-log-port* "Test_step records count after clean: " tot))
+ step-count-stmt)))
(map sqlite3:finalize! statements)
- (sqlite3:finalize! count-stmt)
- ;; (db:find-and-mark-incomplete db)
- ;; (db:delay-if-busy dbdat)
+ (sqlite3:finalize! test-count-stmt)
+ (sqlite3:finalize! step-count-stmt)
(sqlite3:execute db "VACUUM;")))
;; Clean out old junk and vacuum the database
;;
;; Ultimately do something like this:
@@ -1234,15 +1279,15 @@
;; (db:delay-if-busy dbdat)
(sqlite3:with-transaction
db
(lambda ()
(sqlite3:for-each-row (lambda (tot)
- (debug:print-info 0 *default-log-port* "Records count before clean: " tot))
+ (debug:print-info 0 *default-log-port* "Run records count before clean: " tot))
count-stmt)
(map sqlite3:execute statements)
(sqlite3:for-each-row (lambda (tot)
- (debug:print-info 0 *default-log-port* "Records count after clean: " tot))
+ (debug:print-info 0 *default-log-port* "Run records count after clean: " tot))
count-stmt)))
(map sqlite3:finalize! statements)
(sqlite3:finalize! count-stmt)
;; (db:find-and-mark-incomplete db)
;; (db:delay-if-busy dbdat)
@@ -1253,11 +1298,11 @@
;; no-sync.db - small bits of data to be shared between servers
;;======================================================================
(define (db:get-dbsync-path)
(case (rmt:transport-mode)
- ((http)(common:get-db-tmp-area))
+ ((http)(common:make-tmpdir-name *toppath* ""))
((tcp) (conc *toppath*"/.mtdb"))
((nfs) (conc *toppath*"/.mtdb"))
(else "/tmp/dunno-this-gonna-exist")))
;; This is needed for api.scm
@@ -1418,62 +1463,85 @@
#f
(simple-run-id (car runs)))))
;; called with run-id=#f so will operate on main.db
;;
-(define (db:insert-run dbstruct target runname run-meta)
+(define (db:insert-run dbstruct run-id target runname run-meta)
(let* ((keys (db:get-keys dbstruct))
(runs (db:simple-get-runs dbstruct runname #f #f target #f))) ;; runpatt count offset target last-update
;; need to insert run based on target and runname
(let* ((targvals (string-split target "/"))
(keystr (string-intersperse keys ","))
(key?str (string-intersperse (make-list (length targvals) "?") ","))
- (qrystr (conc "INSERT INTO runs (runname,"keystr") VALUES (?,"key?str")"))
+ (qrystr (conc "INSERT INTO runs (id,runname,"keystr") VALUES (?,?,"key?str")"))
(get-var (lambda (db qrystr)
(let* ((res #f))
(sqlite3:for-each-row
(lambda row
(set res (car row)))
- db qrystr runname)
+ db qrystr run-id runname)
res))))
(if (null? runs)
- (db:create-initial-run-record dbstruct runname target))
- (let* ((run-id (db:get-run-id dbstruct runname target)))
- (db:with-db
+ (begin
+ (db:create-initial-run-record dbstruct run-id runname target)
+ )
+ )
+ (let* ()
+ ;;(debug:print 0 *default-log-port* "db:insert-run: Calling db:with-db to update the run record")
+ (debug:print 0 *default-log-port* "db:insert-run: runid = " run-id)
+#; (db:with-db
dbstruct
#f #t
(lambda (dbdat db)
+ (debug:print 0 *default-log-port* "In the lambda proc for " dbdat " " db)
(for-each
(lambda (keyval)
+ (debug:print 0 *default-log-port* "In the lambda proc for " keyval)
(let* ((fieldname (car keyval))
(getqry (conc "SELECT "fieldname" FROM runs WHERE id=?;"))
(setqry (conc "UPDATE runs SET "fieldname"=? WHERE id=?;"))
(val (cdr keyval))
(valnum (if (number? val)
val
(if (string? val)
(string->number val)
#f))))
+ (debug:print 0 *default-log-port* "fieldname " fieldname " val " val " valnum " valnum)
(if (not (member fieldname (cons "runname" keys))) ;; don't attempt to tweak these
(let* ((curr-val (get-var db getqry))
(have-it (or (equal? curr-val val)
(equal? curr-val valnum))))
+ (debug:print 0 *default-log-port* "have-it = " have-it)
(if (not have-it)
- (sqlite3:execute db setqry (or valnum val) run-id))))))
+ (begin
+ (debug:print 0 *default-log-port* "Do sqlite3:execute")
+ ;; (sqlite3:execute db setqry (or valnum val) run-id)
+ )
+ )
+ )
+ )
+ (debug:print 0 *default-log-port* "Done with update")
+ )
+ (debug:print 0 *default-log-port* "next keyval")
+ )
run-meta)))
run-id))))
-(define (db:create-initial-run-record dbstruct runname target)
+(define (db:create-initial-run-record dbstruct run-id runname target)
(let* ((keys (db:get-keys dbstruct))
(targvals (string-split target "/"))
(keystr (string-intersperse keys ","))
- (key?str (string-intersperse (make-list (length targvals) "?") ","))
- (qrystr (conc "INSERT INTO runs (runname,"keystr") VALUES (?,"key?str")")))
+ (key?str (string-intersperse (make-list (length targvals) "?") ",")) ;; a string with the same length as targvals, where each element is "?" and interspersed with commas.
+ (qrystr (conc "INSERT INTO runs (id,runname,"keystr") VALUES (?,?,"key?str")")))
+ (debug:print 0 *default-log-port* "db:create-initial-run-record")
+ (debug:print 0 *default-log-port* "qrystr = " qrystr)
+
(db:with-db
- dbstruct #f #t
+ dbstruct #f #t ;; run-id writable
(lambda (dbdat db)
- (apply sqlite3:execute db qrystr runname targvals)))))
+ (debug:print 0 *default-log-port* "lambda proc: dbdat: " dbdat " db: " db)
+ (apply sqlite3:execute db qrystr run-id runname targvals)))))
(define (db:insert-test dbstruct run-id test-rec)
(let* ((testname (alist-ref "testname" test-rec equal?))
(item-path (alist-ref "item_path" test-rec equal?))
(id (db:get-test-id dbstruct run-id testname item-path))
@@ -1483,11 +1551,11 @@
(conc (car dat)"=?"))
fieldvals)
",")" WHERE id=?;"))
(insqry (conc "INSERT INTO tests ("(string-intersperse (map (lambda (x) (car x)) fieldvals) ",")
") VALUES ("(string-intersperse (make-list (length fieldvals) "?") ",")");")))
- (debug:print 0 *default-log-port* "id: "id"\nset: "setqry"\ninsqry: "insqry)
+ ;; (debug:print 0 *default-log-port* "id: "id"\nset: "setqry"\ninsqry: "insqry)
(db:with-db
dbstruct
run-id #t
(lambda (dbdat db)
(if id
@@ -1579,18 +1647,18 @@
;; TODO: Switch this to use max(update_time) from each run db? Then if using a server there is no disk traffic (using cachedb db) ???
;;
;; NOTE: This DOESN'T (necessarily) get the real run ids, but the number of the .db!!
(define (db:get-changed-run-ids since-time)
- (let* ((dbdir (db:dbfile-path)) ;; (configf:lookup *configdat* "setup" "dbdir"))
- (alldbs (glob (conc dbdir "/.mtdb/[0-9]*.db*")))
+ (let* ((dbdir (common:make-tmpdir-name *toppath* "")) ;; (configf:lookup *configdat* "setup" "dbdir"))
+ (alldbs (glob (conc *toppath* "/.mtdb/[0-9]*.db*")))
(changed (filter (lambda (dbfile)
(> (file-modification-time dbfile) since-time))
alldbs)))
(delete-duplicates
(map (lambda (dbfile)
- (let* ((res (string-match ".*\\/(\\d\\d)\\.db*" dbfile)))
+ (let* ((res (string-match ".*\\/(\\d+)\\.db*" dbfile)))
(if res
(string->number (cadr res))
(begin
(debug:print 2 *default-log-port* "WARNING: Failed to process " dbfile " for run-id")
0))))
@@ -2233,21 +2301,24 @@
qry
run-id
(or last-update 0))))))
(define (db:get-testinfo-state-status dbstruct run-id test-id)
- (let ((res #f))
- (db:with-db dbstruct run-id #f
- (lambda (dbdat db)
- (sqlite3:for-each-row
- (lambda (run-id testname item-path state status)
- ;; id,run_id,testname,state,status,event_time,host,cpuload,diskfree,uname,rundir,item_path,run_duration,final_logf,comment
- (set! res (vector test-id run-id testname state status -1 "" -1 -1 "" "-" item-path -1 "-" "-")))
- db
- "SELECT run_id,testname,item_path,state,status FROM tests WHERE id=? and run_id=?;"
- test-id run-id)))
- res))
+ (db:with-db
+ dbstruct run-id #f
+ (lambda (dbdat db)
+ (let* ((res #f)
+ (stmth (db:get-cache-stmth dbdat db "SELECT run_id,testname,item_path,state,status FROM tests WHERE id=? and run_id=?;")))
+ (sqlite3:for-each-row
+ (lambda (run-id testname item-path state status)
+ ;; id,run_id,testname,state,status,event_time,host,cpuload,diskfree,uname,rundir,item_path,run_duration,final_logf,comment
+ (set! res (vector test-id run-id testname state status -1 "" -1 -1 "" "-" item-path -1 "-" "-")))
+ ;; db
+ ;; "SELECT run_id,testname,item_path,state,status FROM tests WHERE id=? and run_id=?;"
+ stmth
+ test-id run-id)
+ res))))
;; get a useful subset of the tests data (used in dashboard
;; use db:mintest-get-{id ,run_id,testname ...}
;;
(define (db:get-tests-for-run-mindata dbstruct run-id testpatt states statuses not-in)
@@ -2278,25 +2349,37 @@
dbstruct run-id #t
(lambda (dbdat db)
(sqlite3:execute db "UPDATE tests SET state='DELETED',status='n/a',comment='' WHERE id=?;" test-id))))
;;
-(define (db:delete-old-deleted-test-records dbstruct)
- (let ((targtime (- (current-seconds)
- (or (configf:lookup-number *configdat* "setup" "keep-deleted-records")
- (* 30 24 60 60))))) ;; one month in the past
+(define (db:delete-old-deleted-test-records dbstruct run-id)
+ (let* ((targtime (- (current-seconds)
+ (or (configf:lookup-number *configdat* "setup" "keep-deleted-records")
+ (* 7 24 60 60)))) ;; cleanup if over one week old
+ (mtdbfile (dbmod:run-id->full-dbfname dbstruct run-id))
+ (qry1 "DELETE FROM test_steps WHERE test_id IN (SELECT id FROM tests WHERE state='DELETED' AND event_time);")
+ (qry2 "DELETE FROM test_data WHERE test_id IN (SELECT id FROM tests WHERE state='DELETED' AND event_time);")
+ (qry3 "DELETE FROM tests WHERE state='DELETED' AND event_time;")
+ (delproc (lambda (db)
+ (sqlite3:with-transaction
+ db
+ (lambda ()
+ (sqlite3:execute db qry1 targtime)
+ (sqlite3:execute db qry2 targtime)
+ (sqlite3:execute db qry3 targtime))))))
+ ;; first the /tmp db
(db:with-db
dbstruct
- 0
+ run-id
#t
(lambda (dbdat db)
- (sqlite3:with-transaction
- db
- (lambda ()
- (sqlite3:execute db "DELETE FROM test_steps WHERE test_id IN (SELECT id FROM tests WHERE state='DELETED' AND event_time);" targtime)
- (sqlite3:execute db "DELETE FROM test_data WHERE test_id IN (SELECT id FROM tests WHERE state='DELETED' AND event_time);" targtime)
- (sqlite3:execute db "DELETE FROM tests WHERE state='DELETED' AND event_time;" targtime)))))))
+ (delproc db)))
+ (if (and (file-exists? mtdbfile)
+ (file-write-access? mtdbfile))
+ (let* ((db (sqlite3:open-database mtdbfile)))
+ (delproc db)
+ (sqlite3:finalize! db)))))
;; set tests with state currstate and status currstatus to newstate and newstatus
;; use currstate = #f and or currstatus = #f to apply to any state or status respectively
;; WARNING: SQL injection risk. NB// See new but not yet used "faster" version below
;;
@@ -2359,20 +2442,18 @@
)
;; NEW BEHAVIOR: Count tests running in all runs!
;;
(define (db:get-count-tests-running dbstruct run-id) ;; fastmode)
- (let* ((qry ;; (if fastmode
- ;; "SELECT count(id) FROM tests WHERE state in ('RUNNING','LAUNCHED','REMOTEHOSTSTART') AND NOT (uname = 'n/a' AND item_path = '') LIMIT 1;"
- "SELECT count(id) FROM tests WHERE state in ('RUNNING','LAUNCHED','REMOTEHOSTSTART') AND NOT (uname = 'n/a' AND item_path = '');")) ;; )
- (db:with-db
- dbstruct
- run-id
- #f
- (lambda (dbdat db)
- (let* ((stmth (db:get-cache-stmth dbdat db qry)))
- (sqlite3:first-result stmth))))))
+ (let* ((qry "SELECT count(id) FROM tests WHERE state in ('RUNNING','LAUNCHED','REMOTEHOSTSTART') AND NOT (uname = 'n/a' AND item_path = '');"))
+ (db:with-db
+ dbstruct
+ run-id
+ #f
+ (lambda (dbdat db)
+ (let* ((stmth (db:get-cache-stmth dbdat db qry)))
+ (sqlite3:first-result stmth))))))
;; NEW BEHAVIOR: Count tests running in only one run!
;;
(define (db:get-count-tests-actually-running dbstruct run-id)
(db:with-db
@@ -2638,18 +2719,18 @@
(db:with-db
dbstruct
run-id
#f
(lambda (dbdat db)
- (let ((res (cons #f #f)))
-;; (stmth (db:get-cache-stmth dbdat db "SELECT state,status FROM tests WHERE id=?;")))
- (sqlite3:for-each-row ;; attemptnum added to hold pid of top process (not Megatest) controlling a test
- (lambda (state status)
- (cons state status))
- db
- "SELECT state,status FROM tests WHERE id=? AND run_id=?;" ;; stmth try not compiling this one - yes, this fixed the bind issue
- test-id run-id)
+ (let ((res (cons #f #f))
+ (stmth (db:get-cache-stmth dbdat db "SELECT state,status FROM tests WHERE id=? AND run_id=?;")))
+ (sqlite3:for-each-row ;; attemptnum added to hold pid of top process (not Megatest) controlling a test
+ (lambda (state status)
+ (cons state status))
+ ;; db
+ stmth ;;"SELECT state,status FROM tests WHERE id=? AND run_id=?;" ;; stmth try not compiling this one - yes, this fixed the bind issue
+ test-id run-id)
res))))
;; Use db:test-get* to access
;; Get test data using test_ids. NB// Only works within a single run!!
;;
@@ -3729,25 +3810,35 @@
tags)))
db
"SELECT testname,tags FROM test_meta")
(hash-table->alist res)))))
+;; testmeta doesn't change, we can cache it for up too an hour
+
+(define *db:testmeta-cache* (make-hash-table))
+(define *db:testmeta-last-update* 0)
+
;; read the record given a testname
(define (db:testmeta-get-record dbstruct testname)
- (let ((res #f))
- (db:with-db
- dbstruct
- #f
- #f
- (lambda (dbdat db)
- (sqlite3:for-each-row
- (lambda (id testname author owner description reviewed iterated avg_runtime avg_disk tags jobgroup)
- (set! res (vector id testname author owner description reviewed iterated avg_runtime avg_disk tags jobgroup)))
- db
- "SELECT id,testname,author,owner,description,reviewed,iterated,avg_runtime,avg_disk,tags,jobgroup FROM test_meta WHERE testname=?;"
- testname)
- res))))
+ (if (and (< (- (current-seconds) *db:testmeta-last-update*) 600)
+ (hash-table-exists? *db:testmeta-cache* testname))
+ (hash-table-ref *db:testmeta-cache* testname)
+ (let ((res #f))
+ (db:with-db
+ dbstruct
+ #f
+ #f
+ (lambda (dbdat db)
+ (sqlite3:for-each-row
+ (lambda (id testname author owner description reviewed iterated avg_runtime avg_disk tags jobgroup)
+ (set! res (vector id testname author owner description reviewed iterated avg_runtime avg_disk tags jobgroup)))
+ db
+ "SELECT id,testname,author,owner,description,reviewed,iterated,avg_runtime,avg_disk,tags,jobgroup FROM test_meta WHERE testname=?;"
+ testname)))
+ (hash-table-set! *db:testmeta-cache* testname res)
+ (set! *db:testmeta-last-update* (current-seconds))
+ res)))
;; create a new record for a given testname
(define (db:testmeta-add-record dbstruct testname)
(db:with-db dbstruct #f #t
(lambda (dbdat db)
@@ -4014,11 +4105,11 @@
(delete-duplicates result)))))
;;======================================================================
;; To sync individual run
;;======================================================================
-(define (db:get-run-record-ids dbstruct target run keynames test-patt)
+(define (db:get-run-record-ids dbstruct target run keynames)
(let* ((backcons (lambda (lst item)(cons item lst)))
(all_tests '())
(keystr (string-intersperse
(map (lambda (key val)
(conc key " like '" val "'"))
@@ -4025,39 +4116,19 @@
keynames
(string-split target "/"))
" AND ")
)
(run-qry (conc "SELECT id FROM runs WHERE " keystr " and runname='" run"'"))
- (test-qry (conc "SELECT id FROM tests WHERE run_id in (" run-qry ") and testname like '" test-patt "'"))
+ ; (test-qry (conc "SELECT id FROM tests WHERE run_id in (" run-qry ") and testname like '" test-patt "'"))
(run_ids
(db:with-db dbstruct #f #f
(lambda (dbdat db)
(sqlite3:fold-row backcons '() db run-qry))
)
)
- )
- (for-each
- (lambda (run_id)
- (set! all_tests
- (append
- (map (lambda (x) (cons x run_id))
- (db:with-db dbstruct run_id #f
- (lambda (dbdat db)
- (sqlite3:fold-row backcons '() db (conc "SELECT id FROM tests WHERE run_id in (" run_id ") and testname like '" test-patt "'"))
- )
- )
- ) all_tests
- )
- )
- )
- run_ids
- )
- `((runs . ,run_ids)
- (tests . ,all_tests)
- )
-
- )
+ )
+ run_ids)
)
;;======================================================================
;; Just for sync, procedures to make sync easy
;;======================================================================
@@ -4069,11 +4140,11 @@
;; The function takes two arguments: dbstruct, which represents the database structure, and since-time, which is a timestamp indicating the time of the last update.
;; The function first defines a few helper functions, including backcons, which takes a list and an item and adds the item to the front of the list.
;; It then initializes several variables to empty lists: all_tests, all_test_steps, all_test_data, all_run_ids, and all_test_ids.
;; The function then retrieves a list of IDs for runs that have been changed since since-time using the db:get-changed-run-ids function.
-;; It then filters the full list of run IDs to only include those that match the changed run IDs based on their modulo 100.
+;; It then filters the full list of run IDs to only include those that match the changed run IDs based on their modulo (num-run-dbs).
;; For each changed run ID, the function retrieves a list of test IDs, test step IDs, and test data IDs that have been updated since since-time.
;; It appends these IDs to the appropriate lists (all_tests, all_test_steps, and all_test_data) using the append and map functions.
;; The function then retrieves a list of run stat IDs that have been updated since since-time.
;; Finally, the function returns a list of associations between record types and their corresponding IDs: runs, tests, test_steps, test_data, and run_stats.
;;
@@ -4086,11 +4157,11 @@
(db:with-db dbstruct #f #f
(lambda (dbdat db)
(sqlite3:fold-row backcons '() db "SELECT id FROM runs"))
)
)
- (changed_run_ids (filter (lambda (run) (member (modulo run 100) changed_run_dbs)) all_run_ids))
+ (changed_run_ids (filter (lambda (run) (member (modulo run (num-run-dbs)) changed_run_dbs)) all_run_ids))
(run_ids
(db:with-db dbstruct #f #f
(lambda (dbdat db)
(sqlite3:fold-row backcons '() db "SELECT id FROM runs WHERE last_update>=?" since-time))
)
@@ -4119,10 +4190,38 @@
(tests . ,all_tests)
)
)
)
+
+
+(define (db:get-changed-record-test-ids dbstruct since-time run-id)
+ (let* ((backcons (lambda (lst item)(cons item lst)))
+ (all-tests (db:with-db dbstruct run-id #f
+ (lambda (dbdat db)
+ (sqlite3:fold-row backcons '() db "SELECT id FROM tests WHERE run_id=? and last_update>=?" run-id since-time)))))
+
+ all-tests))
+
+(define (db:get-changed-record-run-ids dbstruct since-time)
+ ;; no transaction, allow the db to be accessed between the big queries
+ (let* ((backcons (lambda (lst item)(cons item lst)))
+ (run_ids (db:with-db dbstruct #f #f
+ (lambda (dbdat db)
+ (sqlite3:fold-row backcons '() db "SELECT id FROM runs WHERE last_update>=?" since-time)))))
+ (debug:print 2 *default-log-port* "run_ids = " run_ids)
+ run_ids)
+)
+
+(define (db:get-all-runids dbstruct)
+ (let* ((backcons (lambda (lst item)(cons item lst)))
+ (all_run_ids (db:with-db dbstruct #f #f
+ (lambda (dbdat db)
+ (sqlite3:fold-row backcons '() db "SELECT id FROM runs")))))
+
+all_run_ids))
+
;;======================================================================
;; Extract ods file from the db
;;======================================================================
;; NOT REWRITTEN YET!!!!!
@@ -4307,11 +4406,11 @@
))))
;; sync for filesystem local db writes
;;
(define (db:run-lock-and-sync no-sync-db)
- (let* ((tmp-area (common:get-db-tmp-area))
+ (let* ((tmp-area (common:make-tmpdir-name *toppath* ""))
(dbfiles (glob (conc tmp-area"/.mtdb/*.db")))
(sync-durations (make-hash-table)))
;; (debug:print-info 0 *default-log-port* "lock-and-sync, dbfiles: "dbfiles)
(for-each
(lambda (file)
@@ -4363,11 +4462,11 @@
(sync-stale-seconds (configf:lookup-number *configdat* "server" "sync-stale-seconds" default: 300))
(debug-mode (debug:debug-mode 1))
(last-time (current-seconds)) ;; last time through the sync loop
(no-sync-db (db:open-no-sync-db))
(sync-duration 0) ;; run time of the sync in milliseconds
- (tmp-area (common:get-db-tmp-area)))
+ (tmp-area (common:make-tmpdir-name *toppath* "")))
;; Sync moved to http-transport keep-running loop
(debug:print-info 2 *default-log-port* "Periodic copy-based sync thread started. syncer is copy-sync, tmp-area is " tmp-area)
(debug:print-info 3 *default-log-port* "watchdog starting. syncer is copy-sync pid="(current-process-id));; " this-wd-num="this-wd-num)
(if (and legacy-sync (not *time-to-exit*))
@@ -4471,11 +4570,11 @@
(for-each
(lambda (subdb)
(let* (;;(dbstruct (db:setup))
(mtdb (dbr:subdb-mtdb subdb))
(mtpath (db:dbdat-get-path mtdb))
- (tmp-area (common:get-db-tmp-area))
+ (tmp-area (common:make-tmpdir-name *toppath* ""))
(res (db:sync-to-megatest.db dbstruct no-sync-db: no-sync-db))) ;; did we sync any data? If so need to set the db touched flag to keep the server alive
(set! sync-duration (- (current-milliseconds) sync-start))
(if (> res 0) ;; some records were transferred, keep the db alive
(begin
(mutex-lock! *heartbeat-mutex*)
@@ -4518,11 +4617,10 @@
;; ;; time to exit, close the no-sync db here
;; (db:no-sync-close-db no-sync-db stmt-cache)
(if (common:low-noise-print 30)
(debug:print-info 0 *default-log-port* "Exiting watchdog timer, *time-to-exit* = " *time-to-exit*" pid="(current-process-id) ))))
))
-
(define (std-exit-procedure)
;;(common:telemetry-log-close)
(on-exit (lambda () 0)) ;; why is this here?
;;(debug:print-info 13 *default-log-port* "std-exit-procedure called; *time-to-exit*="*time-to-exit*)
Index: dbfile.scm
==================================================================
--- dbfile.scm
+++ dbfile.scm
@@ -16,39 +16,86 @@
;; You should have received a copy of the GNU General Public License
;; along with Megatest. If not, see .
;;======================================================================
-(use srfi-18)
+(use srfi-18 posix hostinfo)
(declare (unit dbfile))
(declare (uses debugprint))
(declare (uses commonmod))
(module dbfile
*
-
- (import scheme
- chicken
+(import scheme)
+
+(cond-expand
+ (chicken-4
+
+ (import chicken
data-structures
extras
matchable
(prefix sqlite3 sqlite3:)
- posix typed-records
+ posix posix-extras typed-records
srfi-18
srfi-1
srfi-69
stack
files
ports
+ hostinfo
+
+ commonmod
+ debugprint
+ )
+ )
+ (chicken-5
+ (import (prefix sqlite3 sqlite3:)
+ ;; data-structures
+ ;; extras
+ ;; files
+ ;; posix
+ ;; posix-extras
+ chicken.base
+ chicken.condition
+ chicken.file
+ chicken.file.posix
+ chicken.format
+ chicken.io
+ chicken.pathname
+ chicken.port
+ chicken.process
+ chicken.process-context
+ chicken.process-context.posix
+ chicken.sort
+ chicken.string
+ chicken.time
+ chicken.time.posix
+ matchable
+ md5
+ message-digest
+ pathname-expand
+ regex
+ regex-case
+ srfi-1
+ srfi-18
+ srfi-69
+ typed-records
+ stack
+ system-information
+
commonmod
debugprint
)
-
+ (define file-write-access? file-writable?)
+ (define file-move move-file)
+ ))
+
;; parameters
;;
(define dbfile:testsuite-name (make-parameter #f))
(define keep-age-param (make-parameter 10)) ;; qif file age, if over move to attic
@@ -121,10 +168,50 @@
(define-record simple-run target id runname state status owner event_time)
(define-record-printer (simple-run x out)
(fprintf out "#,(simple-run ~S ~S ~S ~S)"
(simple-run-target x) (simple-run-id x) (simple-run-runname x) (time->string (seconds->local-time (simple-run-event_time x) ))))
+;; args is hash table of string to value
+;;
+(define (get-purpose args)
+ (let* ((get-arg (lambda (key)
+ (hash-table-ref/default args key #f)))
+ (get-switch (lambda keys
+ (fold
+ (lambda (key res)
+ (if (hash-table-ref/default args key #f)
+ (or key res)
+ res))
+ #f
+ keys)))
+ (action (get-switch "-server" "-execute" "-run" "-rerun")))
+ (cond
+ (action
+ (substring action 1 (string-length action)))
+ (else
+ "nopurpose"))))
+
+;; megatest process tracking
+
+(defstruct procinf
+ (start (current-seconds))
+ (end -1)
+ (host (get-host-name)) ;; why is this not being recognised?
+ (pid (current-process-id))
+ (port -1)
+ (cwd (current-directory))
+ (load #f)
+ (purpose #f) ;; get-purpose needed to be run in megatest.scm
+ (dbname #f)
+ (mtbin (car (argv)))
+ (mtversion #f)
+ (status "running")
+
+
+ )
+
+(define *procinf* (make-procinf))
(define *dbstruct-dbs* #f)
(define *db-open-mutex* (make-mutex))
(define *db-access-mutex* (make-mutex)) ;; used in common.scm
(define *no-sync-db* #f)
(define *db-sync-in-progress* #f)
@@ -201,11 +288,12 @@
#f
)
)
(define (dbfile:make-tmpdir-name areapath tmpadj)
- (let* ((dname (conc "/tmp/"(current-user-name)"/" (string-translate areapath "/" ".") tmpadj)))
+ (let* ((area (pathname-file areapath))
+ (dname (conc "/tmp/"(current-user-name)"/megatest_localdb/" area "/" (string-translate areapath "/" ".") tmpadj "/.mtdb")))
(unless (directory-exists? dname)
(create-directory dname #t))
dname))
(define (dbfile:run-id->path apath run-id)
@@ -226,18 +314,18 @@
(define (dbfile:run-id->dbfname run-id)
(conc (dbfile:run-id->dbnum run-id)".db"))
;; the path in MTRAH with the filename
(define (dbfile:run-id->dbname run-id)
- (conc ".mtdb/"(dbfile:run-id->dbfname run-id)))
+ (conc (dbfile:run-id->dbfname run-id)))
;; Make the dbstruct, setup up auxillary db's and call for main db at least once
;;
;; called in http-transport and replicated in rmt.scm for *local* access.
;;
-(define (dbfile:setup do-sync areapath tmppath)
+(define (dbfile:setup areapath tmppath)
(cond
(*dbstruct-dbs*
(dbfile:print-err "WARNING: dbfile:setup called when *dbstruct-dbs* is already initialized")
*dbstruct-dbs*) ;; TODO: when multiple areas are supported, this optimization will be a hazard
(else
@@ -317,11 +405,12 @@
(if dbdat
dbdat
(let* ((tmppath (dbr:dbstruct-tmppath dbstruct))
(tmpdbpath (dbfile:run-id->path tmppath run-id))
(dbdat (dbfile:open-sqlite3-db tmpdbpath init-proc sync-mode: 0 journal-mode: "WAL")))
- ;; the following line short-circuits the "one db handle per thread" model
+
+ ;; the following line short-circuits the "one db handle per thread" model
;;
;; (dbfile:add-dbdat dbstruct run-id dbdat)
;;
dbdat))))))
@@ -362,21 +451,22 @@
(with-output-to-port
(current-error-port)
(lambda ()
(apply print params))))
-(define (dbfile:cautious-open-database fname init-proc sync-mode journal-mode #!optional (tries-left 500))
+(define (dbfile:cautious-open-database fname init-proc sync-mode journal-mode
+ #!key (tries-left 500)(force-init #f))
(let* ((busy-file (conc fname "-journal"))
(delay-time (* (- 51 tries-left) 1.1))
(write-access (file-write-access? fname))
(dir-access (file-write-access? (pathname-directory fname)))
(retry (lambda ()
(thread-sleep! delay-time)
(if (> tries-left 0)
(dbfile:cautious-open-database fname init-proc
sync-mode journal-mode
- (- tries-left 1))))))
+ tries-left: (- tries-left 1))))))
(assert (>= tries-left 0) (conc "FATAL: too many attempts in dbfile:cautious-open-database of "fname", giving up."))
(if (and (file-write-access? fname)
(file-exists? busy-file))
(begin
@@ -386,11 +476,11 @@
(thread-sleep! 1)
(if (eq? tries-left 2)
(begin
(dbfile:print-err "INFO: forcing journal rollup "busy-file)
(dbfile:brute-force-salvage-db fname)))
- (dbfile:cautious-open-database fname init-proc sync-mode journal-mode (- tries-left 1)))
+ (dbfile:cautious-open-database fname init-proc sync-mode journal-mode tries-left: (- tries-left 1)))
(let* ((result (condition-case
(if dir-access
(dbfile:with-simple-file-lock
(conc fname ".lock")
@@ -400,21 +490,23 @@
(sqlite3:set-busy-handler! db (sqlite3:make-busy-timeout 30000))
(if sync-mode
(sqlite3:execute db (conc "PRAGMA synchronous = "sync-mode";")))
(if journal-mode
(sqlite3:execute db (conc "PRAGMA journal_mode = "journal-mode";")))
- (if (and init-proc (not db-exists))
+ (if (and init-proc (or force-init
+ (not db-exists)))
(init-proc db))
- db)))
+ db))
+ expire-time: 30)
(begin
(if (file-exists? fname )
(let ((db (sqlite3:open-database fname)))
;; pragmas synchronous not needed because this db is used read-only
;; (sqlite3:execute db (conc "PRAGMA synchronous = "mode";")
(sqlite3:set-busy-handler! db (sqlite3:make-busy-timeout 30000)) ;; read-only but still need timeout
db )
- (print "file doesn't exist: " fname))))
+ (print "cautious-open-database: file doesn't exist: " fname))))
(exn (io-error)
(dbfile:print-err exn "ERROR: i/o error with " fname ". Check permissions, disk space etc. and try again.")
(retry))
(exn (corrupt)
(dbfile:print-err exn "ERROR: database " fname " is corrupt. Repair it to proceed.")
@@ -444,11 +536,11 @@
;; NOTE: this is already protected by mutex *no-sync-db-mutex*
;;
(define (dbfile:raw-open-no-sync-db dbpath)
(if (not (file-exists? dbpath))
(create-directory dbpath #t))
- (debug:print-info 0 *default-log-port* "Opening "dbpath"/no-sync.db")
+ (debug:print-info 2 *default-log-port* "(dbfile:raw-open-no-sync-db: Opening "dbpath"/no-sync.db")
(let* ((dbname (conc dbpath "/no-sync.db"))
(db-exists (file-exists? dbname))
(init-proc (lambda (db)
(sqlite3:with-transaction
db
@@ -465,22 +557,124 @@
val TEXT,
CONSTRAINT no_sync_metadat_constraint UNIQUE (var));"
"CREATE TABLE IF NOT EXISTS no_sync_locks
(key TEXT,
val TEXT,
- CONSTRAINT no_sync_metadat_constraint UNIQUE (key));"))))))
+ CONSTRAINT no_sync_metadat_constraint UNIQUE (key));"
+ "CREATE TABLE IF NOT EXISTS processes
+ (id INTEGER PRIMARY KEY,
+ host TEXT,
+ port INTEGER,
+ pid INTEGER,
+ starttime INTEGER,
+ endtime INTEGER,
+ status TEXT,
+ purpose TEXT,
+ dbname TEXT,
+ mtversion TEXT,
+ reason TEXT DEFAULT 'none',
+ CONSTRAINT no_sync_processes UNIQUE (host,pid));"
+ ))))))
(on-tmp (equal? (car (string-split dbpath "/")) "tmp"))
(db (if on-tmp
- (dbfile:cautious-open-database dbname init-proc 0 "WAL")
- (dbfile:cautious-open-database dbname init-proc 0 #f)
+ (dbfile:cautious-open-database dbname init-proc 1 "WAL" force-init: #t) ;; WAL MODE should use syncronous=1
+ ;; (dbfile:cautious-open-database dbname init-proc 0 #f force-init: #t)
+ (dbfile:cautious-open-database dbname init-proc 0 "MEMORY" force-init: #t) ;; Journal mode = memory is fastest?
;; (sqlite3:open-database dbname)
)))
- (if on-tmp ;; done in cautious-open-database
- (begin
- (sqlite3:execute db "PRAGMA synchronous = 0;")
- (sqlite3:set-busy-handler! db (sqlite3:make-busy-timeout 136000))))
+ ;; (if on-tmp ;; done in cautious-open-database
+ ;; (begin
+ ;; (sqlite3:execute db "PRAGMA synchronous = 0;") ;; why was this here when is is handled by cautious-open-database?
+ (sqlite3:set-busy-handler! db (sqlite3:make-busy-timeout 136000)) ;; ))
db))
+
+;; mtest processes registry calls
+
+(define (dbfile:insert-or-update-process nsdb dat)
+ (let* ((host (procinf-host dat))
+ (pid (procinf-pid dat))
+ (curr-info (dbfile:get-process-info nsdb host pid)))
+ (if curr-info ;; record exists, do update
+ (match curr-info
+ ((host port pid starttime endtime status purpose dbname mtversion)
+ (sqlite3:execute
+ nsdb
+ "UPDATE processes SET port=?,starttime=?,endtime=?,status=?,
+ purpose=?,dbname=?,mtversion=?
+ WHERE host=? AND pid=?;"
+ (or (procinf-port dat) port)
+ (or (procinf-start dat) starttime)
+ (or (procinf-end dat) endtime)
+ (or (procinf-status dat) status)
+ (or (procinf-purpose dat) purpose)
+ (or (procinf-dbname dat) dbname)
+ (or (procinf-mtversion dat) mtversion)
+ host pid))
+ (else
+ #f ;; what to do?
+ ))
+ (dbfile:register-process
+ nsdb
+ (procinf-host dat)
+ (procinf-port dat)
+ (procinf-pid dat)
+ (procinf-start dat)
+ (procinf-end dat)
+ (procinf-status dat)
+ (procinf-purpose dat)
+ (procinf-dbname dat)
+ (procinf-mtversion dat)))))
+
+
+(define (dbfile:register-process nsdb host port pid starttime endtime status purpose dbname mtversion)
+ (sqlite3:execute nsdb "INSERT INTO processes (host,port,pid,starttime,endtime,status,purpose,dbname,mtversion) VALUES (?,?,?,?,?,?,?,?,?);"
+ host port pid starttime endtime status purpose dbname mtversion))
+
+(define (dbfile:set-process-status nsdb host pid newstatus)
+ (sqlite3:execute nsdb "UPDATE processes SET status=? WHERE host=? AND pid=?;" newstatus host pid))
+
+;; as sorted should be stable. can use to choose "winner"
+;;
+(define (dbfile:get-process-options nsdb purpose dbname)
+ (sqlite3:fold-row
+ ;; host port pid starttime status mtversion
+ (lambda (res . row)
+ (cons row res))
+ '()
+ nsdb
+ "SELECT host,port,pid,starttime,endtime,status,mtversion FROM processes WHERE purpose=? AND dbname LIKE ? AND status IN ('running','alive') ORDER BY starttime ASC,host,port;"
+ purpose dbname))
+
+(define (dbfile:get-process-info nsdb host pid)
+ (let ((res (sqlite3:fold-row
+ ;; host port pid starttime status mtversion
+ (lambda (res . row)
+ (cons row res))
+ '()
+ nsdb
+ "SELECT host,port,pid,starttime,endtime,status,purpose,dbname,mtversion FROM processes WHERE host=? AND pid=?;"
+ host pid)))
+ (if (null? res)
+ #f
+ (car res))))
+
+(define (dbfile:row->procinf row)
+ (match row
+ ((host port pid starttime endtime status mtversion)
+ (make-procinf host: host port: port pid: pid starttime: starttime endtime: endtime status: status mtversion: mtversion))
+ (else
+ (debug:print 0 *default-log-port* "ERROR: row "row" did not match host,port,pid,starttime,endtime,status,mtversion")
+ #f)))
+
+(define (dbfile:set-process-done nsdb host pid reason)
+ (sqlite3:execute nsdb "UPDATE processes SET status='done',endtime=?,reason=? WHERE host=? AND pid=?;" (current-seconds) reason host pid)
+ (dbfile:cleanup-old-entries nsdb))
+
+(define (dbfile:cleanup-old-entries nsdb)
+ (sqlite3:execute nsdb "DELETE FROM process WHERE status='done' AND endtime;" (- (current-seconds) (* 3600 48))))
+
+;; other no-sync functions
(define (dbfile:with-no-sync-db dbpath proc)
(mutex-lock! *no-sync-db-mutex*)
(let* ((already-open *no-sync-db*)
(db (or already-open (dbfile:raw-open-no-sync-db dbpath)))
@@ -541,20 +735,26 @@
;; fails returns (#f lock-creation-time identifier)
;; succeeds (returns (#t lock-creation-time identifier)
;; use (db:no-sync-del! db keyname) to release the lock
;;
(define (db:no-sync-get-lock-with-id db keyname identifier)
+ (debug:print 2 *default-log-port* "db:no-sync-get-lock-with-id: db: " db " keyname: " keyname " identifier: " identifier)
(sqlite3:with-transaction
db
(lambda ()
(condition-case
(let* ((curr-val (db:no-sync-get/default db keyname #f)))
+ (debug:print 2 *default-log-port* "db:no-sync-get-lock-with-id: curr-val: " curr-val)
(if curr-val
(match (db:extract-time-identifier curr-val) ;; result->timestamp, identifier
((timestamp . ident)
(cons (equal? ident identifier) timestamp))
- (else (cons #f 'malformed-lock))) ;; lock malformed
+ (else
+ (debug:print 2 *default-log-port* "db:no-sync-get-lock-with-id: malformed lock")
+ (cons #f 'malformed-lock)
+ )
+ ) ;; lock malformed
(let ((curr-sec (current-seconds))
(lock-value (if identifier
(conc (current-seconds)"+"identifier)
(current-seconds))))
(sqlite3:execute db "INSERT OR REPLACE INTO no_sync_metadat (var,val) VALUES(?,?);" keyname lock-value)
@@ -1372,11 +1572,11 @@
#f
(with-input-from-file fname
(lambda ()
(equal? key-string (read-line)))))
(begin
- (dbfile:print-err "dbfile:simple-file-lock created " fname " but it was gone 3 seconds later")
+ (dbfile:print-err "dbfile:simple-file-lock created " fname " but it was gone 0.25 seconds later")
#f
)
)
)
)
@@ -1438,7 +1638,15 @@
;; (db:hoh-set! stmt-cache db stmt newstmth)
(hash-table-set! stmt-cache stmt newstmth)
newstmth))))
(mutex-unlock! *get-cache-stmth-mutex*)
result))
+
+;; (define *mutex-stmth-call* (make-mutex))
+;;
+;; (define (db:with-mutex-for-stmth proc)
+;; (mutex-lock! *mutex-stmth-call*)
+;; (let* ((res (proc)))
+;; (mutex-unlock! *mutex-stmth-call*)
+;; res))
)
Index: dbmod.scm
==================================================================
--- dbmod.scm
+++ dbmod.scm
@@ -227,12 +227,12 @@
(lambda (last-update)
(if *sync-in-progress*
(debug:print 3 *default-log-port* "WARNING: overlapping calls to sync to disk")
(let* ((syncer-logfile (conc areapath"/logs/"dbfname"-syncer.log"))
(sync-cmd (if (eq? syncdir 'todisk)
- (conc "(NBFAKE_LOG="syncer-logfile" nbfake megatest -db2db -from "tmpdb" -to "dbfullname" -period 5 -timeout 10 > /dev/null 2&>1)&")
- (conc "(NBFAKE_LOG="syncer-logfile" nbfake megatest -db2db -from "dbfullname" -to "tmpdb" -period 5 -timeout 10 > /dev/null 2&>1)&")))
+ (conc "NBFAKE_QUIET=yes NBFAKE_LOG="syncer-logfile" nbfake megatest -db2db -from "tmpdb" -to "dbfullname" -period 5 -timeout 10")
+ (conc "NBFAKE_QUIET=yes NBFAKE_LOG="syncer-logfile" nbfake megatest -db2db -from "dbfullname" -to "tmpdb" -period 5 -timeout 10")))
(synclock-file (conc dbfullname".lock"))
(syncer-running-file (conc dbfullname"-sync-running"))
(synclock-mod-time (if (file-exists? synclock-file)
(handle-exceptions
exn