Index: api.scm ================================================================== --- api.scm +++ api.scm @@ -403,10 +403,16 @@ ;; NO SYNC DB ((no-sync-set) (apply db:no-sync-set *no-sync-db* params)) ((no-sync-get/default) (apply db:no-sync-get/default *no-sync-db* params)) ((no-sync-del!) (apply db:no-sync-del! *no-sync-db* params)) ((no-sync-get-lock) (apply db:no-sync-get-lock *no-sync-db* params)) + + ;; NO SYNC DB PROCESSES + ((register-process) (apply dbfile:register-process *no-sync-db* params)) + ((set-process-done) (apply dbfile:set-process-done *no-sync-db* params)) + ((set-process-status) (apply dbfile:set-process-status *no-sync-db* params)) + ((get-process-options) (apply dbfile:get-process-options *no-sync-db* params)) ;; ARCHIVES ;; ((archive-get-allocations) ((archive-register-disk) (apply db:archive-register-disk dbstruct params)) ((archive-register-block-name)(apply db:archive-register-block-name dbstruct params)) Index: dbfile.scm ================================================================== --- dbfile.scm +++ dbfile.scm @@ -16,11 +16,11 @@ ;; You should have received a copy of the GNU General Public License ;; along with Megatest. If not, see <http://www.gnu.org/licenses/>. ;;====================================================================== -(use srfi-18) +(use srfi-18 posix hostinfo) (declare (unit dbfile)) (declare (uses debugprint)) (declare (uses commonmod)) @@ -32,18 +32,19 @@ data-structures extras matchable (prefix sqlite3 sqlite3:) - posix typed-records + posix posix-extras typed-records srfi-18 srfi-1 srfi-69 stack files ports + hostinfo commonmod debugprint ) @@ -121,10 +122,50 @@ (define-record simple-run target id runname state status owner event_time) (define-record-printer (simple-run x out) (fprintf out "#,(simple-run ~S ~S ~S ~S)" (simple-run-target x) (simple-run-id x) (simple-run-runname x) (time->string (seconds->local-time (simple-run-event_time x) )))) +;; args is hash table of string to value +;; +(define (get-purpose args) + (let* ((get-arg (lambda (key) + (hash-table-ref/default args key #f))) + (get-switch (lambda keys + (fold + (lambda (key res) + (if (hash-table-ref/default args key #f) + (or key res) + res)) + #f + keys))) + (action (get-switch "-server" "-execute" "-run" "-rerun"))) + (cond + (action + (substring action 1 (string-length action))) + (else + "nopurpose")))) + +;; megatest process tracking + +(defstruct procinf + (start (current-seconds)) + (end -1) + (host (get-host-name)) ;; why is this not being recognised? + (pid (current-process-id)) + (port -1) + (cwd (current-directory)) + (load #f) + (purpose #f) ;; get-purpose needed to be run in megatest.scm + (dbname #f) + (mtbin (car (argv))) + (mtversion #f) + (status "running") + + + ) + +(define *procinf* (make-procinf)) (define *dbstruct-dbs* #f) (define *db-open-mutex* (make-mutex)) (define *db-access-mutex* (make-mutex)) ;; used in common.scm (define *no-sync-db* #f) (define *db-sync-in-progress* #f) @@ -362,21 +403,22 @@ (with-output-to-port (current-error-port) (lambda () (apply print params)))) -(define (dbfile:cautious-open-database fname init-proc sync-mode journal-mode #!optional (tries-left 500)) +(define (dbfile:cautious-open-database fname init-proc sync-mode journal-mode + #!key (tries-left 500)(force-init #f)) (let* ((busy-file (conc fname "-journal")) (delay-time (* (- 51 tries-left) 1.1)) (write-access (file-write-access? fname)) (dir-access (file-write-access? (pathname-directory fname))) (retry (lambda () (thread-sleep! delay-time) (if (> tries-left 0) (dbfile:cautious-open-database fname init-proc sync-mode journal-mode - (- tries-left 1)))))) + tries-left: (- tries-left 1)))))) (assert (>= tries-left 0) (conc "FATAL: too many attempts in dbfile:cautious-open-database of "fname", giving up.")) (if (and (file-write-access? fname) (file-exists? busy-file)) (begin @@ -386,11 +428,11 @@ (thread-sleep! 1) (if (eq? tries-left 2) (begin (dbfile:print-err "INFO: forcing journal rollup "busy-file) (dbfile:brute-force-salvage-db fname))) - (dbfile:cautious-open-database fname init-proc sync-mode journal-mode (- tries-left 1))) + (dbfile:cautious-open-database fname init-proc sync-mode journal-mode tries-left: (- tries-left 1))) (let* ((result (condition-case (if dir-access (dbfile:with-simple-file-lock (conc fname ".lock") @@ -400,11 +442,12 @@ (sqlite3:set-busy-handler! db (sqlite3:make-busy-timeout 30000)) (if sync-mode (sqlite3:execute db (conc "PRAGMA synchronous = "sync-mode";"))) (if journal-mode (sqlite3:execute db (conc "PRAGMA journal_mode = "journal-mode";"))) - (if (and init-proc (not db-exists)) + (if (and init-proc (or force-init + (not db-exists))) (init-proc db)) db))) (begin (if (file-exists? fname ) (let ((db (sqlite3:open-database fname))) @@ -465,22 +508,113 @@ val TEXT, CONSTRAINT no_sync_metadat_constraint UNIQUE (var));" "CREATE TABLE IF NOT EXISTS no_sync_locks (key TEXT, val TEXT, - CONSTRAINT no_sync_metadat_constraint UNIQUE (key));")))))) + CONSTRAINT no_sync_metadat_constraint UNIQUE (key));" + "CREATE TABLE IF NOT EXISTS processes + (id INTEGER PRIMARY KEY, + host TEXT, + port INTEGER, + pid INTEGER, + starttime INTEGER, + endtime INTEGER, + status TEXT, + purpose TEXT, + dbname TEXT, + mtversion TEXT, + reason TEXT DEFAULT 'none', + CONSTRAINT no_sync_processes UNIQUE (host,pid));" + )))))) (on-tmp (equal? (car (string-split dbpath "/")) "tmp")) (db (if on-tmp - (dbfile:cautious-open-database dbname init-proc 0 "WAL") - (dbfile:cautious-open-database dbname init-proc 0 #f) + (dbfile:cautious-open-database dbname init-proc 0 "WAL" force-init: #t) + (dbfile:cautious-open-database dbname init-proc 0 #f force-init: #t) ;; (sqlite3:open-database dbname) ))) (if on-tmp ;; done in cautious-open-database (begin (sqlite3:execute db "PRAGMA synchronous = 0;") (sqlite3:set-busy-handler! db (sqlite3:make-busy-timeout 136000)))) db)) + +;; mtest processes registry calls + +(define (dbfile:insert-or-update-process nsdb dat) + (let* ((host (procinf-host dat)) + (pid (procinf-pid dat)) + (curr-info (dbfile:get-process-info nsdb host pid))) + (if curr-info ;; record exists, do update + (match curr-info + ((host port pid starttime endtime status purpose dbname mtversion) + (sqlite3:execute + nsdb + "UPDATE processes SET port=?,starttime=?,endtime=?,status=?, + purpose=?,dbname=?,mtversion=? + WHERE host=? AND pid=?;" + (or (procinf-port dat) port) + (or (procinf-start dat) starttime) + (or (procinf-end dat) endtime) + (or (procinf-status dat) status) + (or (procinf-purpose dat) purpose) + (or (procinf-dbname dat) dbname) + (or (procinf-mtversion dat) mtversion) + host pid)) + (else + #f ;; what to do? + )) + (dbfile:register-process + nsdb + (procinf-host dat) + (procinf-port dat) + (procinf-pid dat) + (procinf-start dat) + (procinf-end dat) + (procinf-status dat) + (procinf-purpose dat) + (procinf-dbname dat) + (procinf-mtversion dat))))) + + +(define (dbfile:register-process nsdb host port pid starttime endtime status purpose dbname mtversion) + (sqlite3:execute nsdb "INSERT INTO processes (host,port,pid,starttime,endtime,status,purpose,dbname,mtversion) VALUES (?,?,?,?,?,?,?,?,?);" + host port pid starttime endtime status purpose dbname mtversion)) + +(define (dbfile:set-process-status nsdb host pid newstatus) + (sqlite3:execute nsdb "UPDATE processes SET status=? WHERE host=? AND pid=?;" newstatus host pid)) + +(define (dbfile:get-process-options nsdb purpose dbname) + (sqlite3:fold-row + ;; host port pid starttime status mtversion + (lambda (res . row) + (cons row res)) + '() + nsdb + "SELECT host,port,pid,starttime,endtime,status,mtversion FROM processes WHERE purpose=? AND dbname LIKE ? AND status='alive';" + purpose dbname)) + +(define (dbfile:get-process-info nsdb host pid) + (let ((res (sqlite3:fold-row + ;; host port pid starttime status mtversion + (lambda (res . row) + (cons row res)) + '() + nsdb + "SELECT host,port,pid,starttime,endtime,status,purpose,dbname,mtversion FROM processes WHERE host=? AND pid=?;" + host pid))) + (if (null? res) + #f + (car res)))) + +(define (dbfile:set-process-done nsdb host pid reason) + (sqlite3:execute nsdb "UPDATE processes SET status='ended',endtime=?,reason=? WHERE host=? AND pid=?;" (current-seconds) reason host pid) + (dbfile:cleanup-old-entries nsdb)) + +(define (dbfile:cleanup-old-entries nsdb) + (sqlite3:execute nsdb "DELETE FROM process WHERE status='ended' AND endtime<?;" (- (current-seconds) (* 3600 48)))) + +;; other no-sync functions (define (dbfile:with-no-sync-db dbpath proc) (mutex-lock! *no-sync-db-mutex*) (let* ((already-open *no-sync-db*) (db (or already-open (dbfile:raw-open-no-sync-db dbpath))) Index: launch.scm ================================================================== --- launch.scm +++ launch.scm @@ -32,13 +32,13 @@ (declare (uses ezsteps)) ;; (declare (uses dbmod)) (declare (uses dbfile)) (declare (uses mtargs)) -(use regex regex-case base64 sqlite3 srfi-18 directory-utils posix-extras z3 - call-with-environment-variables csv) -(use typed-records pathname-expand matchable) +(use regex regex-case base64 sqlite3 srfi-18 directory-utils posix posix-extras z3 + call-with-environment-variables csv hostinfo + typed-records pathname-expand matchable) (import (prefix base64 base64:) (prefix sqlite3 sqlite3:) (prefix mtargs args:) ) Index: megatest.scm ================================================================== --- megatest.scm +++ megatest.scm @@ -541,10 +541,15 @@ ;; immediately set MT_TARGET if -reqtarg or -target are available ;; (let ((targ (or (args:get-arg "-reqtarg")(args:get-arg "-target")))) (if targ (setenv "MT_TARGET" targ))) +;; set the purpose field in procinf + +(procinf-purpose-set! *procinf* (get-purpose args:arg-hash)) +(procinf-mtversion-set! *procinf* megatest-version) + ;; The watchdog is to keep an eye on things like db sync etc. ;; ;; TODO: for multiple areas, we will have multiple watchdogs; and multiple threads to manage ;;(define *watchdog* (make-thread Index: rmt.scm ================================================================== --- rmt.scm +++ rmt.scm @@ -726,10 +726,24 @@ (rmt:send-receive 'no-sync-del! #f `(,var))) (define (rmt:no-sync-get-lock keyname) (rmt:send-receive 'no-sync-get-lock #f `(,keyname))) +;; process registration + +(define (rmt:register-process host port pid starttime status purpose dbname mtversion) + (rmt:send-receive 'register-process #f (list host port pid starttime status purpose dbname mtversion))) + +(define (rmt:set-process-done host pid reason) + (rmt:send-receive 'set-process-done #f (list host pid reason))) + +(define (rmt:set-process-status host pid newstatus) + (rmt:send-receive 'set-process-status #f (list host pid newstatus))) + +(define (rmt:get-process-options purpose dbname) + (rmt:get-process-options 'get-process-options #f (list purpose dbname))) + ;;====================================================================== ;; A R C H I V E S ;;====================================================================== (define (rmt:archive-get-allocations testname itempath dneeded) Index: tcp-transportmod.scm ================================================================== --- tcp-transportmod.scm +++ tcp-transportmod.scm @@ -482,13 +482,39 @@ (run-thread (make-thread (lambda () (tt:keep-running ttdat dbfname dbstruct))))) (thread-start! tcp-thread) (thread-start! run-thread) - (thread-join! run-thread) ;; run thread will exit on timeout or other conditions - (debug:print 0 *default-log-port* "Exiting now.") - (exit)))))) + + (let* ((areapath (tt-areapath ttdat)) + (nosyncdbpath (conc areapath"/.mtdb"))) + ;; this didn't seem to work, is port not available yet? + (let loop ((count 0)) + (if (tt-port ttdat) + (begin + (procinf-port-set! *procinf* (tt-port ttdat)) + (procinf-dbname-set! *procinf* dbfname) + (dbfile:with-no-sync-db + nosyncdbpath + (lambda (nsdb) + (dbfile:insert-or-update-process nsdb *procinf*)))) + (if (< count 5) + (begin + (thread-sleep! 0.5) + (loop (+ count 1))) + (debug:print 0 *default-log-port* "ERROR: (tt-port ttdat) no port set!")))) + + (thread-join! run-thread) ;; run thread will exit on timeout or other conditions + ;; replace with call to (dbfile:set-process-done nsdb host pid reason) + (procinf-status-set! *procinf* "done") + (procinf-end-set! *procinf* (current-seconds)) + (dbfile:with-no-sync-db + nosyncdbpath + (lambda (nsdb) + (dbfile:insert-or-update-process nsdb *procinf*))) + (debug:print 0 *default-log-port* "Exiting now.") + (exit))))))) (define (tt:keep-running ttdat dbfname dbstruct) ;; verfiy conn for ready ;; listener socket has been started by this stage ;; wait for a port before creating the registration file