Index: api.scm
==================================================================
--- api.scm
+++ api.scm
@@ -403,10 +403,16 @@
     ;; NO SYNC DB
     ((no-sync-set)               (apply db:no-sync-set         *no-sync-db* params))
     ((no-sync-get/default)       (apply db:no-sync-get/default *no-sync-db* params))
     ((no-sync-del!)              (apply db:no-sync-del!        *no-sync-db* params))
     ((no-sync-get-lock)          (apply db:no-sync-get-lock    *no-sync-db* params))
+
+    ;; NO SYNC DB PROCESSES
+    ((register-process)          (apply dbfile:register-process *no-sync-db* params))
+    ((set-process-done)          (apply dbfile:set-process-done *no-sync-db* params))
+    ((set-process-status)        (apply dbfile:set-process-status *no-sync-db* params))
+    ((get-process-options)       (apply dbfile:get-process-options *no-sync-db* params))
     
     ;; ARCHIVES
     ;; ((archive-get-allocations)   
     ((archive-register-disk)     (apply db:archive-register-disk dbstruct params))
     ((archive-register-block-name)(apply db:archive-register-block-name dbstruct params))

Index: dbfile.scm
==================================================================
--- dbfile.scm
+++ dbfile.scm
@@ -16,11 +16,11 @@
 ;;     You should have received a copy of the GNU General Public License
 ;;     along with Megatest.  If not, see <http://www.gnu.org/licenses/>.
 
 ;;======================================================================
 
-(use srfi-18)
+(use srfi-18 posix hostinfo)
 
 (declare (unit dbfile))
 (declare (uses debugprint))
 (declare (uses commonmod))
 
@@ -32,18 +32,19 @@
 	  data-structures
 	  extras
 	  matchable
   
 	  (prefix sqlite3 sqlite3:)
-	  posix typed-records
+	  posix posix-extras typed-records
 
 	  srfi-18
 	  srfi-1
 	  srfi-69
 	  stack
 	  files
 	  ports
+	  hostinfo
 	  
 	  commonmod
 	  debugprint
 	  )
 
@@ -121,10 +122,50 @@
 (define-record simple-run target id runname state status owner event_time)
 (define-record-printer (simple-run x out)
   (fprintf out "#,(simple-run ~S ~S ~S ~S)"
 	   (simple-run-target x) (simple-run-id x) (simple-run-runname x) (time->string (seconds->local-time (simple-run-event_time x) ))))
 
+;; args is hash table of string to value
+;;
+(define (get-purpose args)
+  (let* ((get-arg    (lambda (key)
+		       (hash-table-ref/default args key #f)))
+	 (get-switch (lambda keys
+		       (fold
+			(lambda (key res)
+			  (if (hash-table-ref/default args key #f)
+			      (or key res)
+			      res))
+			#f
+			keys)))
+	 (action     (get-switch "-server" "-execute" "-run" "-rerun")))
+    (cond
+     (action
+      (substring action 1 (string-length action)))
+     (else
+      "nopurpose"))))
+
+;; megatest process tracking
+
+(defstruct procinf
+  (start      (current-seconds))
+  (end        -1)
+  (host       (get-host-name)) ;; why is this not being recognised?
+  (pid        (current-process-id))
+  (port       -1)
+  (cwd        (current-directory))
+  (load       #f)
+  (purpose    #f) ;; get-purpose needed to be run in megatest.scm
+  (dbname     #f)
+  (mtbin      (car (argv)))
+  (mtversion  #f)
+  (status     "running")
+  
+  
+  )
+
+(define *procinf* (make-procinf))
 (define *dbstruct-dbs* #f)
 (define *db-open-mutex* (make-mutex))
 (define *db-access-mutex* (make-mutex)) ;; used in common.scm
 (define *no-sync-db*   #f)
 (define *db-sync-in-progress* #f)
@@ -362,21 +403,22 @@
   (with-output-to-port
       (current-error-port)
     (lambda ()
       (apply print params))))
 
-(define (dbfile:cautious-open-database fname init-proc sync-mode journal-mode #!optional (tries-left 500))
+(define (dbfile:cautious-open-database fname init-proc sync-mode journal-mode
+				       #!key (tries-left 500)(force-init #f))
   (let* ((busy-file  (conc fname "-journal"))
 	 (delay-time (* (- 51 tries-left) 1.1))
       	 (write-access (file-write-access? fname))
          (dir-access (file-write-access? (pathname-directory fname)))
          (retry      (lambda ()
 		       (thread-sleep! delay-time)
 		       (if (> tries-left 0)
 			   (dbfile:cautious-open-database fname init-proc
 							  sync-mode journal-mode
-							  (- tries-left 1))))))
+							  tries-left: (- tries-left 1))))))
     (assert (>= tries-left 0) (conc "FATAL: too many attempts in dbfile:cautious-open-database of "fname", giving up."))
     
     (if (and (file-write-access? fname)
 	     (file-exists? busy-file))
 	(begin
@@ -386,11 +428,11 @@
 	  (thread-sleep! 1)
 	  (if (eq? tries-left 2)
 	      (begin
 	  	(dbfile:print-err "INFO: forcing journal rollup "busy-file)
 	  	(dbfile:brute-force-salvage-db fname)))
-	  (dbfile:cautious-open-database fname init-proc sync-mode journal-mode (- tries-left 1)))
+	  (dbfile:cautious-open-database fname init-proc sync-mode journal-mode tries-left: (- tries-left 1)))
 	
 	(let* ((result (condition-case
 		        (if dir-access
 			    (dbfile:with-simple-file-lock
 			     (conc fname ".lock")
@@ -400,11 +442,12 @@
                                  (sqlite3:set-busy-handler! db (sqlite3:make-busy-timeout 30000))
 				 (if sync-mode
 				     (sqlite3:execute db (conc "PRAGMA synchronous = "sync-mode";")))
 				 (if journal-mode
 				     (sqlite3:execute db (conc "PRAGMA journal_mode = "journal-mode";")))
-				 (if (and init-proc (not db-exists))
+				 (if (and init-proc (or force-init
+							(not db-exists)))
 				     (init-proc db))
 				 db)))
                             (begin
 			      (if (file-exists? fname )
                                   (let ((db (sqlite3:open-database fname)))
@@ -465,22 +508,113 @@
                                  val TEXT,
                                    CONSTRAINT no_sync_metadat_constraint UNIQUE (var));"
 			    "CREATE TABLE IF NOT EXISTS no_sync_locks 
                                 (key TEXT,
                                  val TEXT,
-                                   CONSTRAINT no_sync_metadat_constraint UNIQUE (key));"))))))
+                                   CONSTRAINT no_sync_metadat_constraint UNIQUE (key));"
+			    "CREATE TABLE IF NOT EXISTS processes
+                                (id INTEGER PRIMARY KEY,
+                                 host TEXT,
+                                 port INTEGER,
+                                 pid INTEGER,
+                                 starttime INTEGER,
+                                 endtime INTEGER,
+                                 status TEXT,
+                                 purpose TEXT,
+                                 dbname TEXT,
+                                 mtversion TEXT,
+                                 reason TEXT DEFAULT 'none',
+                                   CONSTRAINT no_sync_processes UNIQUE (host,pid));"
+			    ))))))
 	 (on-tmp      (equal? (car (string-split dbpath "/")) "tmp"))
 	 (db        (if on-tmp
-			(dbfile:cautious-open-database dbname init-proc 0 "WAL")
-			(dbfile:cautious-open-database dbname init-proc 0 #f)
+			(dbfile:cautious-open-database dbname init-proc 0 "WAL" force-init: #t)
+			(dbfile:cautious-open-database dbname init-proc 0 #f    force-init: #t)
 			;; (sqlite3:open-database dbname)
 			)))
     (if on-tmp	      ;; done in cautious-open-database
 	(begin
 	  (sqlite3:execute db "PRAGMA synchronous = 0;")
 	  (sqlite3:set-busy-handler! db (sqlite3:make-busy-timeout 136000))))
     db))
+
+;; mtest processes registry calls
+
+(define (dbfile:insert-or-update-process nsdb dat)
+  (let* ((host      (procinf-host dat))
+	 (pid       (procinf-pid  dat))
+	 (curr-info (dbfile:get-process-info nsdb host pid)))
+    (if curr-info ;; record exists, do update
+	(match curr-info
+	  ((host port pid starttime endtime status purpose dbname mtversion)
+	   (sqlite3:execute
+	    nsdb
+	    "UPDATE processes SET port=?,starttime=?,endtime=?,status=?,
+                                  purpose=?,dbname=?,mtversion=?
+              WHERE host=? AND pid=?;"
+	    (or (procinf-port      dat) port)
+	    (or (procinf-start     dat) starttime)
+	    (or (procinf-end       dat) endtime)
+	    (or (procinf-status    dat) status)
+	    (or (procinf-purpose   dat) purpose)
+	    (or (procinf-dbname    dat) dbname)
+	    (or (procinf-mtversion dat) mtversion)
+	    host pid))
+	  (else
+	   #f ;; what to do?
+	   ))
+	(dbfile:register-process
+	 nsdb
+	 (procinf-host      dat)
+	 (procinf-port      dat)
+	 (procinf-pid       dat)
+	 (procinf-start     dat)
+	 (procinf-end       dat)
+	 (procinf-status    dat)
+	 (procinf-purpose   dat)
+	 (procinf-dbname    dat)
+	 (procinf-mtversion dat)))))
+	  
+
+(define (dbfile:register-process nsdb host port pid starttime endtime status purpose dbname mtversion)
+  (sqlite3:execute nsdb "INSERT INTO processes (host,port,pid,starttime,endtime,status,purpose,dbname,mtversion) VALUES (?,?,?,?,?,?,?,?,?);"
+		   host port pid starttime endtime status purpose dbname mtversion))
+
+(define (dbfile:set-process-status nsdb host pid newstatus)
+  (sqlite3:execute nsdb "UPDATE processes SET status=? WHERE host=? AND pid=?;" newstatus host pid))
+
+(define (dbfile:get-process-options nsdb purpose dbname)
+  (sqlite3:fold-row
+   ;; host port pid starttime status mtversion
+   (lambda (res . row)
+     (cons row res))
+   '()
+   nsdb
+   "SELECT host,port,pid,starttime,endtime,status,mtversion FROM processes WHERE purpose=? AND dbname LIKE ? AND status='alive';"
+   purpose dbname))
+
+(define (dbfile:get-process-info nsdb host pid)
+  (let ((res (sqlite3:fold-row
+	      ;; host port pid starttime status mtversion
+	      (lambda (res . row)
+		(cons row res))
+	      '()
+	      nsdb
+	      "SELECT host,port,pid,starttime,endtime,status,purpose,dbname,mtversion FROM processes WHERE host=? AND pid=?;"
+	      host pid)))
+    (if (null? res)
+	#f
+	(car res))))
+
+(define (dbfile:set-process-done nsdb host pid reason)
+  (sqlite3:execute nsdb "UPDATE processes SET status='ended',endtime=?,reason=? WHERE host=? AND pid=?;" (current-seconds) reason host pid)
+  (dbfile:cleanup-old-entries nsdb))
+
+(define (dbfile:cleanup-old-entries nsdb)
+  (sqlite3:execute nsdb "DELETE FROM process WHERE status='ended' AND endtime<?;" (- (current-seconds) (* 3600 48))))
+
+;; other no-sync functions
 
 (define (dbfile:with-no-sync-db dbpath proc)
   (mutex-lock! *no-sync-db-mutex*)
   (let* ((already-open *no-sync-db*)
 	 (db  (or already-open (dbfile:raw-open-no-sync-db dbpath)))

Index: launch.scm
==================================================================
--- launch.scm
+++ launch.scm
@@ -32,13 +32,13 @@
 (declare (uses ezsteps))
 ;; (declare (uses dbmod))
 (declare (uses dbfile))
 (declare (uses mtargs))
 
-(use regex regex-case base64 sqlite3 srfi-18 directory-utils posix-extras z3
-     call-with-environment-variables csv)
-(use typed-records pathname-expand matchable)
+(use regex regex-case base64 sqlite3 srfi-18 directory-utils posix posix-extras z3
+     call-with-environment-variables csv hostinfo 
+     typed-records pathname-expand matchable)
 
 (import (prefix base64 base64:)
 	(prefix sqlite3 sqlite3:)
 	(prefix mtargs args:)
 )

Index: megatest.scm
==================================================================
--- megatest.scm
+++ megatest.scm
@@ -541,10 +541,15 @@
 ;; immediately set MT_TARGET if -reqtarg or -target are available
 ;;
 (let ((targ (or (args:get-arg "-reqtarg")(args:get-arg "-target"))))
   (if targ (setenv "MT_TARGET" targ)))
 
+;; set the purpose field in procinf
+
+(procinf-purpose-set! *procinf* (get-purpose args:arg-hash))
+(procinf-mtversion-set! *procinf* megatest-version)
+
 ;; The watchdog is to keep an eye on things like db sync etc.
 ;;
 
 ;; TODO: for multiple areas, we will have multiple watchdogs; and multiple threads to manage
 ;;(define *watchdog* (make-thread

Index: rmt.scm
==================================================================
--- rmt.scm
+++ rmt.scm
@@ -726,10 +726,24 @@
   (rmt:send-receive 'no-sync-del! #f `(,var)))
 
 (define (rmt:no-sync-get-lock keyname)
   (rmt:send-receive 'no-sync-get-lock #f `(,keyname)))
 
+;; process registration
+
+(define (rmt:register-process host port pid starttime status purpose dbname mtversion)
+  (rmt:send-receive 'register-process #f (list host port pid starttime status purpose dbname mtversion)))
+
+(define (rmt:set-process-done host pid reason)
+  (rmt:send-receive 'set-process-done #f (list host pid reason)))
+
+(define (rmt:set-process-status host pid newstatus)
+  (rmt:send-receive 'set-process-status #f (list host pid newstatus)))
+
+(define (rmt:get-process-options purpose dbname)
+  (rmt:get-process-options 'get-process-options #f (list purpose dbname)))
+
 ;;======================================================================
 ;; A R C H I V E S
 ;;======================================================================
 
 (define (rmt:archive-get-allocations  testname itempath dneeded)

Index: tcp-transportmod.scm
==================================================================
--- tcp-transportmod.scm
+++ tcp-transportmod.scm
@@ -482,13 +482,39 @@
 		 (run-thread (make-thread
 			      (lambda ()
 				(tt:keep-running ttdat dbfname dbstruct)))))
 	    (thread-start! tcp-thread)
 	    (thread-start! run-thread)
-	    (thread-join! run-thread) ;; run thread will exit on timeout or other conditions
-            (debug:print 0 *default-log-port* "Exiting now.")
-	    (exit))))))
+
+	    (let* ((areapath     (tt-areapath ttdat))
+		   (nosyncdbpath (conc areapath"/.mtdb")))
+	      ;; this didn't seem to work, is port not available yet?
+	      (let loop ((count 0))
+		(if (tt-port ttdat)
+		    (begin
+		      (procinf-port-set! *procinf* (tt-port ttdat))
+		      (procinf-dbname-set! *procinf* dbfname)
+		      (dbfile:with-no-sync-db
+		       nosyncdbpath
+		       (lambda (nsdb)
+			 (dbfile:insert-or-update-process nsdb *procinf*))))
+		    (if (< count 5)
+			(begin
+			  (thread-sleep! 0.5)
+			  (loop (+ count 1)))
+			(debug:print 0 *default-log-port* "ERROR: (tt-port ttdat) no port set!"))))
+	    
+	      (thread-join! run-thread) ;; run thread will exit on timeout or other conditions
+	      ;; replace with call to (dbfile:set-process-done nsdb host pid reason)
+	      (procinf-status-set! *procinf* "done")
+	      (procinf-end-set! *procinf* (current-seconds))
+	      (dbfile:with-no-sync-db
+	       nosyncdbpath
+	       (lambda (nsdb)
+		 (dbfile:insert-or-update-process nsdb *procinf*)))
+              (debug:print 0 *default-log-port* "Exiting now.")
+	      (exit)))))))
 
 (define (tt:keep-running ttdat dbfname dbstruct)
   ;; verfiy conn for ready
   ;; listener socket has been started by this stage
   ;; wait for a port before creating the registration file