Megatest

Check-in [c75b58ded7]
Login
Overview
Comment:Refactored to move most of the work to scan and recover dead tests out of the servers. This commit also fixes dependencies in the Makefile.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | v1.80
Files: files | file ages | folders
SHA1: c75b58ded796b22be29e2c051bbf95043e60f16c
User & Date: matt on 2023-04-12 06:38:01
Other Links: branch diff | manifest | tags
Context
2023-04-12
06:59
Fix random issue with directory creation when parallel startup causes collisions check-in: 07ee6da7ba user: matt tags: v1.80
06:38
Refactored to move most of the work to scan and recover dead tests out of the servers. This commit also fixes dependencies in the Makefile. check-in: c75b58ded7 user: matt tags: v1.80
2023-04-11
21:32
Throttle writes more than reads using mutex, avoid doing sync when writes are happening using mutex. This is a monte carlo approach. Might need tweaking. check-in: b5df408582 user: matt tags: v1.80
Changes

Modified Makefile from [9d72ba0941] to [3b43ac8a9b].

14
15
16
17
18
19
20
21

22
23
24
25
26
27
28
14
15
16
17
18
19
20

21
22
23
24
25
26
27
28







-
+







# 
#     You should have received a copy of the GNU General Public License
#     along with Megatest.  If not, see <http://www.gnu.org/licenses/>.

# make install CSCOPTS='-accumulate-profile -profile-name $(PWD)/profile-ww$(shell date +%V.%u)'
# rm <files>.o ; make install CSCOPTS='-profile' ; ... ;  chicken-profile | less

all : $(PREFIX)/bin/.$(ARCHSTR) mtest dboard mtut tcmt unitdeps.pdf
all : $(PREFIX)/bin/.$(ARCHSTR) mtest dboard mtut 

SHELL=/bin/bash
PREFIX=$(PWD)
CSCOPTS=
INSTALL=install
SRCFILES = common.scm items.scm launch.scm ods.scm runconfig.scm	\
           server.scm configf.scm db.scm keys.scm margs.scm		\
49
50
51
52
53
54
55
56

57
58
59
60
61
62
63
49
50
51
52
53
54
55

56
57
58
59
60
61
62
63







-
+







mofiles/portlogger.o : mofiles/dbmod.o

mofiles/dbfile.o     : \
       mofiles/debugprint.o mofiles/commonmod.o

configf.o : commonmod.import.o
mofiles/dbfile.o : mofiles/debugprint.o
mofiles/rmtmod.o mofiles/dbmod.o : mofiles/dbfile.o
mofiles/rmtmod.o mofiles/dbmod.o : mofiles/dbfile.o mofiles/commonmod.o mofiles/debugprint.o
db.o : mofiles/dbmod.o mofiles/dbfile.o
mofiles/debugprint.o : mofiles/mtargs.o
mofiles/tcp-transportmod.o : mofiles/portlogger.o

# ftail.scm rmtmod.scm commonmod.scm removed
# MSRCFILES = ducttape-lib.scm pkts.scm stml2.scm cookie.scm mutils.scm	\
#             mtargs.scm commonmod.scm dbmod.scm adjutant.scm ulex.scm	\
83
84
85
86
87
88
89

90
91
92
93
94
95
96
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97







+







#	csc $(CSCOPTS) -I mofiles -I $* -J -c $< -o $*.o
#	cp $*.o mofiles/$*.o
#	@touch $*.import.scm # ensure it is touched after the .o is made

%.import.scm mofiles/%.o : %.scm
	@mkdir -p mofiles
	csc $(CSCOPTS) -J -c $< -o mofiles/$*.o
	@if [[ -e $*.import.scm ]];then touch $*.import.scm;fi # ensure it is touched after the .o is made

ADTLSCR=mt_laststep mt_runstep mt_ezstep
HELPERS=$(addprefix $(PREFIX)/bin/,$(ADTLSCR))
DEPLOYHELPERS=$(addprefix deploytarg/,$(ADTLSCR))
MTESTHASH=$(shell fossil info|grep checkout:| awk '{print $$2}')

ifeq ($(MTESTHASH),)
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154






























155
156
157
158
159
160
161
119
120
121
122
123
124
125






























126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162







-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+







	csc $(CSCOPTS) $(OFILES) dashboard.scm $(GOFILES) $(MOFILES) $(MOIMPFILES) -o dboard

mtut: $(OFILES) $(MOFILES) $(MOIMPFILES) megatest-fossil-hash.scm mtut.scm megatest-version.scm 
	csc $(CSCOPTS) $(OFILES) $(MOFILES)  $(MOIMPFILES) mtut.scm -o mtut

# include makefile.inc

TCMTOBJS = \
	api.o \
	archive.o \
	cgisetup/models/pgdb.o \
	common.o \
	configf.o \
	db.o \
	env.o \
	items.o \
	keys.o \
	launch.o \
	margs.o \
	mt.o \
	ods.o \
	process.o \
	rmt.o \
	runconfig.o \
	runs.o \
	server.o \
	tasks.o \
	tdb.o \
	tests.o \
	subrun.o \
        ezsteps.o

#        mofiles/rmtmod.o \
#        mofiles/commonmod.o \

tcmt : $(TCMTOBJS) tcmt.scm megatest-version.scm $(MOFILES) $(MOIMPFILES)
	csc $(CSCOPTS) $(TCMTOBJS) $(MOFILES) $(MOIMPFILES) tcmt.scm -o tcmt
# TCMTOBJS = \
# 	api.o \
# 	archive.o \
# 	cgisetup/models/pgdb.o \
# 	common.o \
# 	configf.o \
# 	db.o \
# 	env.o \
# 	items.o \
# 	keys.o \
# 	launch.o \
# 	margs.o \
# 	mt.o \
# 	ods.o \
# 	process.o \
# 	rmt.o \
# 	runconfig.o \
# 	runs.o \
# 	server.o \
# 	tasks.o \
# 	tdb.o \
# 	tests.o \
# 	subrun.o \
#         ezsteps.o
# 
# #        mofiles/rmtmod.o \
# #        mofiles/commonmod.o \
# 
# tcmt : $(TCMTOBJS) tcmt.scm megatest-version.scm $(MOFILES) $(MOIMPFILES)
# 	csc $(CSCOPTS) $(TCMTOBJS) $(MOFILES) $(MOIMPFILES) tcmt.scm -o tcmt

# install documentation to $(PREFIX)/docs
# DOES NOT REBUILD DOCS
#
$(PREFIX)/share/docs/megatest_manual.html : docs/manual/megatest_manual.html
	mkdir -p $(PREFIX)/share/docs
	$(INSTALL) docs/manual/megatest_manual.html $(PREFIX)/share/docs/megatest_manual.html
169
170
171
172
173
174
175
176

177
178
179
180
181
182
183
170
171
172
173
174
175
176

177
178
179
180
181
182
183
184







-
+







$(PREFIX)/share/db/mt-pg.sql : mt-pg.sql
	mkdir -p $(PREFIX)/share/db
	$(INSTALL) mt-pg.sql $(PREFIX)/share/db/mt-pg.sql

# Special dependencies for the includes
$(MOFILE) $(MOIMPFILES) : megatest-fossil-hash.scm

mofiles/commonmod.o : megatest-fossil-hash.scm
mofiles/commonmod.o : mofiles/debugprint.o megatest-fossil-hash.scm
common.o : mofiles/commonmod.o

# mofiles/dbmod.o : mofiles/configfmod.o

# commonmod.o dashboard.o megatest.o tcmt.o apimod.o : megatest-fossil-hash.scm

tests.o db.o launch.o runs.o dashboard-tests.o				\
274
275
276
277
278
279
280
281
282
283
284
285
286






287
288
289
290
291
292
293
275
276
277
278
279
280
281






282
283
284
285
286
287
288
289
290
291
292
293
294







-
-
-
-
-
-
+
+
+
+
+
+








$(PREFIX)/bin/mtexec : $(PREFIX)/bin/.$(ARCHSTR)/mtexec utils/mk_wrapper
	utils/mk_wrapper $(PREFIX) mtexec $(PREFIX)/bin/mtexec
	chmod a+x $(PREFIX)/bin/mtexec

# tcmt

$(PREFIX)/bin/.$(ARCHSTR)/tcmt : tcmt
	$(INSTALL) tcmt $(PREFIX)/bin/.$(ARCHSTR)/tcmt

$(PREFIX)/bin/tcmt : $(PREFIX)/bin/.$(ARCHSTR)/tcmt utils/mk_wrapper
	utils/mk_wrapper $(PREFIX) tcmt $(PREFIX)/bin/tcmt
	chmod a+x $(PREFIX)/bin/tcmt
# $(PREFIX)/bin/.$(ARCHSTR)/tcmt : tcmt
# 	$(INSTALL) tcmt $(PREFIX)/bin/.$(ARCHSTR)/tcmt
# 
# $(PREFIX)/bin/tcmt : $(PREFIX)/bin/.$(ARCHSTR)/tcmt utils/mk_wrapper
# 	utils/mk_wrapper $(PREFIX) tcmt $(PREFIX)/bin/tcmt
# 	chmod a+x $(PREFIX)/bin/tcmt

$(PREFIX)/bin/mt_laststep : utils/mt_laststep
	$(INSTALL) $< $@
	chmod a+x $@

$(PREFIX)/bin/mt_runstep : utils/mt_runstep
	$(INSTALL) $< $@
377
378
379
380
381
382
383
384

385
386
387
388
389
390

391
392
393
394
395
396
397
378
379
380
381
382
383
384

385
386
387
388
389
390

391
392
393
394
395
396
397
398







-
+





-
+








install : $(PREFIX)/bin/.$(ARCHSTR) $(PREFIX)/bin/.$(ARCHSTR)/mtest $(PREFIX)/bin/megatest \
          $(PREFIX)/bin/.$(ARCHSTR)/dboard $(PREFIX)/bin/dashboard $(HELPERS) $(PREFIX)/bin/nbfake \
	  $(PREFIX)/bin/nbfind $(PREFIX)/bin/mtrunner $(PREFIX)/bin/viewscreen $(PREFIX)/bin/mt_xterm \
          $(PREFIX)/bin/mt-old-to-new.sh $(PREFIX)/bin/mt-new-to-old.sh \
	  $(PREFIX)/share/docs/megatest_manual.html $(PREFIX)/bin/remrun \
	  $(PREFIX)/share/docs/megatest_manual.html $(PREFIX)/bin/remrun $(PREFIX)/bin/mtutil \
	  $(PREFIX)/bin/tcmt $(PREFIX)/share/db/mt-pg.sql \
	  $(PREFIX)/share/db/mt-pg.sql \
          $(PREFIX)/share/js/jquery-3.1.0.slim.min.js \
          $(PREFIX)/bin/.$(ARCHSTR)/lib/libpangox-1.0.so \
          $(PREFIX)/bin/.$(ARCHSTR)/lib/libpangox-1.0.so.0 \
          $(PREFIX)/bin/.$(ARCHSTR)/lib/libxcb-xlib.so.0

#         $(PREFIX)/bin/.$(ARCHSTR)/ndboard
#         $(PREFIX)/bin/.$(ARCHSTR)/ndboard $(PREFIX)/bin/tcmt 
#         $(PREFIX)/bin/.$(ARCHSTR)/mtexec $(PREFIX)/bin/mtexec $(PREFIX)/bin/serialize-env \
# $(PREFIX)/bin/newdashboard

$(PREFIX)/bin/.$(ARCHSTR) : 
	mkdir -p $(PREFIX)/bin/.$(ARCHSTR)
	mkdir -p $(PREFIX)/bin/.$(ARCHSTR)/lib

Modified api.scm from [b07878d20a] to [9008afe383].

30
31
32
33
34
35
36

37
38
39
40
41
42
43
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44







+







(import commonmod)
(import dbmod)
(import dbfile)
(import debugprint)
(import tcp-transportmod)

(use srfi-69
     srfi-18
     posix
     matchable
     s11n)

;; allow these queries through without starting a server
;;
(define api:read-only-queries
343
344
345
346
347
348
349


350


351
352
353
354
355
356
357
344
345
346
347
348
349
350
351
352

353
354
355
356
357
358
359
360
361







+
+
-
+
+







    ;; TEST DATA
    ((test-data-rollup)             (apply db:test-data-rollup dbstruct params))
    ((csv->test-data)               (apply db:csv->test-data dbstruct params))

    ;; MISC
    ((sync-inmem->db)               (let ((run-id (car params)))
                                      (db:sync-touched dbstruct run-id db:initialize-main-db force-sync: #t)))
    ((get-toplevels-and-incompletes) (apply db:get-toplevels-and-incompletes dbstruct params))
    ((mark-incomplete)              #f);;(thread-start! (make-thread (lambda () ;; no need to block on this one
    ((mark-incomplete)              (apply db:find-and-mark-incomplete dbstruct params))
					;;			  (apply db:find-and-mark-incomplete dbstruct params)
					;;			  #t)))) 
    ((create-all-triggers)          (db:create-all-triggers dbstruct))
    ((drop-all-triggers)            (db:drop-all-triggers dbstruct)) 

    ;; TESTMETA
    ((testmeta-add-record)       (apply db:testmeta-add-record dbstruct params))
    ((testmeta-update-field)     (apply db:testmeta-update-field dbstruct params))
    ((get-tests-tags)            (db:get-tests-tags dbstruct))

Modified common.scm from [dad201907b] to [ec316c51cd].

2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2044
2045
2046
2047
2048
2049
2050

















2051
2052
2053
2054
2055
2056
2057







-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-







			      (if (and (number? result)
				       (> result 0))
				  (common:write-cached-info actual-host "num-cpus" result))
			      result))))
	  (hash-table-set! *numcpus-cache* actual-host numcpus)
	  numcpus))))

(define (common:generic-ssh ssh-command proc default #!optional (msg-proc #f))
  (let ((inp #f))
    (handle-exceptions
	exn
      (begin
	(close-input-port inp)
	(if msg-proc
	    (msg-proc)
	    (debug:print 0 *default-log-port* "Command: \""ssh-command"\" failed. exn="exn))
	default)
      (set! inp (open-input-pipe ssh-command))
      (with-input-from-port inp
	(lambda ()
	  (let ((res (proc)))
	    (close-input-port inp)
	    res))))))

;;======================================================================
;; wait for normalized cpu load to drop below maxload
;;
(define (common:wait-for-normalized-load maxnormload msg remote-host #!optional (rem-tries 5))
  (let ((num-cpus (common:get-num-cpus remote-host)))
    (if num-cpus
	(common:wait-for-cpuload maxnormload num-cpus 15 msg: msg remote-host: remote-host)

Modified commonmod.scm from [19a34cf301] to [a0125e2f23].

15
16
17
18
19
20
21
22

23
24
25
26
27
28
29
30
31
32
33

34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50




51
52
53
54
55
56
57
15
16
17
18
19
20
21

22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50

51
52
53
54
55
56
57
58
59
60
61







-
+











+
















-
+
+
+
+







;; 
;;     You should have received a copy of the GNU General Public License
;;     along with Megatest.  If not, see <http://www.gnu.org/licenses/>.

;;======================================================================

(declare (unit commonmod))
;; (declare (uses debugprint))
(declare (uses debugprint))

(use srfi-69)

(module commonmod
	*

(import scheme)
(cond-expand
 (chicken-4
  
  (import chicken
	  ports
	  
	  (prefix sqlite3 sqlite3:)
	  data-structures
	  extras
	  files
	  matchable
	  md5
	  message-digest
	  pathname-expand
	  posix
	  posix-extras
	  regex
	  regex-case
	  srfi-1
	  srfi-18
	  srfi-69
	  typed-records)
	  typed-records

	  debugprint
	  )
  (use srfi-69))
 (chicken-5
  (import (prefix sqlite3 sqlite3:)
	  ;; data-structures
	  ;; extras
	  ;; files
	  ;; posix
695
696
697
698
699
700
701
702
















703
























699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722

723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746








+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
				      (else exe)))))
			  '("../../" "../")))))
    (if (null? res)
	(begin
	  ;; (debug:print 0 *default-log-port* "Failed to find this executable! Using what can be found on the path")
	  progname)
	(car res))))

(define (common:generic-ssh ssh-command proc default #!optional (msg-proc #f))
  (let ((inp #f))
    (handle-exceptions
	exn
      (begin
	(close-input-port inp)
	(if msg-proc
	    (msg-proc)
	    (debug:print 0 *default-log-port* "Command: \""ssh-command"\" failed. exn="exn))
	default)
      (set! inp (open-input-pipe ssh-command))
      (with-input-from-port inp
	(lambda ()
	  (let ((res (proc)))
	    (close-input-port inp)
	    res))))))
)

;; this is a close duplicate of:
;;    process:alist-on-host?
;;    process:alive
;;
(define (commonmod:is-test-alive host pid)
  (let* ((same-host (equal? host (get-host-name)))
	 (cmd (conc 
	       (if same-host "" (conc "ssh "host" "))
	       "pstree -A "pid)))
    (if (and host pid
	     (not (equal? host "n/a")))
	
	(let* ((output (if same-host
			   (with-input-from-pipe cmd read-lines)
			   (common:generic-ssh cmd read-lines '())))) ;; (with-input-from-pipe cmd read-lines)))
	  (debug:print 2 *default-log-port* "Running " cmd " received " output)
	  (if (eq? (length output) 0)
	      #f
	      #t))
	#t))) ;; assuming bad query is about a live test is likely not the right thing to do?


)

Modified dashboard.scm from [b98c976a8a] to [7114e0e406].

20
21
22
23
24
25
26

27
28
29
30
31
32
33
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34







+








(declare (uses common))
(declare (uses mtargs))
(declare (uses mtargs.import))
(declare (uses keys))
(declare (uses items))
(declare (uses debugprint))
(declare (uses debugprint.import))
(declare (uses db))
(declare (uses configf))
(declare (uses process))
(declare (uses launch))
(declare (uses runs))
(declare (uses dashboard-tests))
(declare (uses tree))
70
71
72
73
74
75
76

77
78
79
80
81
82
83
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85







+







(include "vg_records.scm")

;; set some parameters here - these need to be put in something that can be loaded from other
;; executables such as dashboard and mtutil
;;
(include "dashboard-transport-mode.scm")
(dbfile:db-init-proc db:initialize-main-db)
(set! rmtmod:send-receive rmt:send-receive)

(define help (conc 
	      "Megatest Dashboard, documentation at http://www.kiatoa.com/fossils/megatest version " megatest-version 
              " license GPL, Copyright (C) Matt Welland 2012-2017

Usage: dashboard [options]
  -h                    : this help

Modified db.scm from [18cf960c23] to [3353e0f2ec].

1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1109
1110
1111
1112
1113
1114
1115

























































































1116
1117
1118
1119
1120
1121
1122







-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-







;;        (debug:print-info 18 *default-log-port* "Found " (length oldlaunched) " old LAUNCHED items, " (length toplevels) " old LAUNCHED toplevel tests and " (length incompleted) " tests marked RUNNING but apparently dead.")
;;        (if (and (null? incompleted)
;;                 (null? oldlaunched)
;;                 (null? toplevels))
;;            #f
;;            #t)))))

(define (db:get-status-from-final-status-file run-dir)
  (let ((infile (conc run-dir "/.final-status")))
    ;; first verify we are able to write the output file
    (if (not (file-read-access? infile))
        (begin 
	  (debug:print 2 *default-log-port* "ERROR: cannot read " infile)
          (debug:print 2 *default-log-port* "ERROR: run-dir is " run-dir)
          #f
          )
        (with-input-from-file infile read-lines)
	)))

;;  select end_time-now from
;;      (select testname,item_path,event_time+run_duration as
;;                          end_time,strftime('%s','now') as now from tests where state in
;;      ('RUNNING','REMOTEHOSTSTART','LAUNCHED'));
;;
;; NOT EASY TO MIGRATE TO db{file,mod}
;;
(define (db:find-and-mark-incomplete dbstruct run-id ovr-deadtime)
  (let* ((incompleted '())
	 (oldlaunched '())
	 (toplevels   '())
          ;; The default running-deadtime is 720 seconds = 12 minutes.
          ;; "(running-deadtime-default (+ server-start-allowance (* 2 launch-monitor-period)))" = 200 + (2 * (200 + 30 + 30))
         (deadtime-trim (or ovr-deadtime (configf:lookup-number *configdat* "setup" "deadtime")))
         (server-start-allowance 200)
         (server-overloaded-budget 200)
         (launch-monitor-off-time (or (configf:lookup-number *configdat* "setup" "test-stats-update-period") 30))
         (launch-monitor-on-time-budget 30)
         (launch-monitor-period (+ launch-monitor-off-time launch-monitor-on-time-budget server-overloaded-budget))
         (remotehoststart-deadtime-default (+ server-start-allowance server-overloaded-budget 30))
         (remotehoststart-deadtime (or deadtime-trim remotehoststart-deadtime-default))
         (running-deadtime-default (+ server-start-allowance (* 2 launch-monitor-period)))
         (running-deadtime (or deadtime-trim running-deadtime-default))) ;; two minutes (30 seconds between updates, this leaves 3x grace period)

    (debug:print-info 4  *default-log-port* "running-deadtime = " running-deadtime)
    (debug:print-info 4  *default-log-port* "deadtime-trim = " deadtime-trim)

    (let* ((dat (db:get-toplevels-and-incompletes dbstruct run-id running-deadtime remotehoststart-deadtime)))
      (set! oldlaunched (list-ref dat 1))
      (set! toplevels   (list-ref dat 2))
      (set! incompleted (list-ref dat 0)))

    (debug:print-info 18 *default-log-port* "Found " (length oldlaunched) " old LAUNCHED items, "
		      (length toplevels) " old LAUNCHED toplevel tests and "
		      (length incompleted) " tests marked RUNNING but apparently dead.")
  
    ;; These are defunct tests, do not do all the overhead of set-state-status. Force them to INCOMPLETE.
    ;;
    ;; (db:delay-if-busy dbdat)
    (let* ((min-incompleted-ids (map car incompleted)) ;; do 'em all
	   (all-ids             (append min-incompleted-ids (map car oldlaunched))))
      (if (> (length all-ids) 0)
	  (begin
	    ;; (launch:is-test-alive "localhost" 435)
	    (debug:print 0 *default-log-port* "WARNING: Marking test(s); " (string-intersperse (map conc all-ids) ", ")
			 " as DEAD")
	    (for-each
             (lambda (test-id)
               (let* ((tinfo   (db:get-test-info-by-id dbstruct run-id test-id))
		      (run-dir (db:test-get-rundir     tinfo))
		      (host    (db:test-get-host       tinfo))
		      (pid     (db:test-get-process_id tinfo))
		      (result (db:get-status-from-final-status-file run-dir)))
		 (if (and (list? result) (> (length result) 1) (equal? "PASS" (cadr result)) (equal? "COMPLETED" (car result))) 
		     (begin
		       (debug:print 0 *default-log-port* "INFO: test " test-id " actually passed, so marking PASS not DEAD")
		       (db:set-state-status-and-roll-up-items
			dbstruct run-id test-id 'foo "COMPLETED" "PASS"
			"Test stopped responding but it has PASSED; marking it PASS in the DB."))
		     (let ((is-alive (and (not (eq? pid 0))  ;; 0 is default in re-used field "attemptnum" where pid stored.
					  (launch:is-test-alive host pid))))
		       (if is-alive
			   (debug:print 0 *default-log-port* "INFO: test " test-id " on host " host
					" has a process on pid " pid ", NOT setting to DEAD.")
			   (begin
			     (debug:print 0 *default-log-port* "INFO: test " test-id
					  " final state/status is not COMPLETED/PASS. It is " result)
			     (db:set-state-status-and-roll-up-items
			      dbstruct run-id test-id 'foo "COMPLETED" "DEAD"
			      "Test stopped responding while in RUNNING or REMOTEHOSTSTART; presumed dead.")))))))
	     ;; call end of eud of run detection for posthook - from merge, is it needed?
	     ;; (launch:end-of-run-check run-id)
	     all-ids)
	    ;;call end of eud of run detection for posthook
	    (launch:end-of-run-check run-id)
	    )))))

;; BUG: Probably broken - does not explicitly use run-id in the query
;;
(define (db:top-test-set-per-pf-counts dbstruct run-id test-name)
  (db:general-call dbstruct run-id 'top-test-set-per-pf-counts (list test-name test-name test-name test-name test-name test-name test-name test-name test-name test-name test-name test-name test-name test-name test-name test-name test-name)))

;; Clean out old junk and vacuum the database
;;

Modified db_records.scm from [6eb715496b] to [92d912cdd4].

14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
14
15
16
17
18
19
20










21
22
23
24
25
26
27







-
-
-
-
-
-
-
-
-
-







;; 
;;     You should have received a copy of the GNU General Public License
;;     along with Megatest.  If not, see <http://www.gnu.org/licenses/>.

;;======================================================================
;; dbstruct
;;======================================================================


;; Returns the database for a particular run-id fron the dbstruct:localdbs
;;
(define (dbr:dbstruct-localdb v run-id)
  (hash-table-ref/default (dbr:dbstruct-locdbs v) run-id #f))

(define (dbr:dbstruct-localdb-set! v run-id db)
  (hash-table-set! (dbr:dbstruct-locdbs v) run-id db))


(define (make-db:test)(make-vector 20))
(define-inline (db:test-get-id           vec) (vector-ref vec 0))
(define-inline (db:test-get-run_id       vec) (vector-ref vec 1))
(define-inline (db:test-get-testname     vec) (vector-ref vec 2))
(define-inline (db:test-get-state        vec) (vector-ref vec 3))
(define-inline (db:test-get-status       vec) (vector-ref vec 4))

Modified launch.scm from [d5ebd1ded6] to [73ea6dbf9b].

838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
838
839
840
841
842
843
844





















845
846
847
848
849
850
851







-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-







	     (output (with-input-from-pipe cmd read-lines)))
	(debug:print 2 *default-log-port* "Running " cmd " received " output)
	(if (eq? (length output) 0)
	   #f
	   #t))
      #t))

;; this is a close duplicate of:
;;    process:alist-on-host?
;;    process:alive
;;
(define (launch:is-test-alive host pid)
  (let* ((same-host (equal? host (get-host-name)))
	 (cmd (conc 
	       (if same-host "" (conc "ssh "host" "))
	       "pstree -A "pid)))
    (if (and host pid
	     (not (equal? host "n/a")))
	
	(let* ((output (if same-host
			   (with-input-from-pipe cmd read-lines)
			   (common:generic-ssh cmd read-lines '())))) ;; (with-input-from-pipe cmd read-lines)))
	  (debug:print 2 *default-log-port* "Running " cmd " received " output)
	  (if (eq? (length output) 0)
	      #f
	      #t))
	#t))) ;; assuming bad query is about a live test is likely not the right thing to do?

(define (launch:kill-tests-if-dead run-id)
  (let* ((running-tests (rmt:get-tests-for-run run-id "%" `("RUNNING" "LAUNCHED" "REMOTEHOSTSTART") `() #f #f #f #f #f #f #f #f)))
       (let loop ((running-test (car running-tests))
			     (tal    (cdr running-tests))
			     (kill-cnt 0))
		       (let* ((test-name (vector-ref running-test 2))
                 (item-path (vector-ref running-test 11))

Modified megatest.scm from [6f4fa87a54] to [5b9e4f0ac8].

23
24
25
26
27
28
29


30
31
32
33
34
35
36
37
38
39
40
41
42
23
24
25
26
27
28
29
30
31
32
33
34
35


36
37
38
39
40
41
42







+
+




-
-







(define (toplevel-command . a) #f)

(declare (uses common))
;; (declare (uses megatest-version))
;; (declare (uses margs))
;; (declare (uses mtargs))
;; (declare (uses mtargs.import))
(declare (uses debugprint))
(declare (uses debugprint.import))
(declare (uses commonmod))
(declare (uses commonmod.import))
(declare (uses mtargs))
(declare (uses mtargs.import))
(declare (uses debugprint))
(declare (uses debugprint.import))
(declare (uses runs))
(declare (uses launch))
(declare (uses server))
(declare (uses tests))
(declare (uses genexample))
;; (declare (uses daemon))

Modified mtargs.scm from [1e6b59e54f] to [22cf29b180].

15
16
17
18
19
20
21


22
23
15
16
17
18
19
20
21
22
23
24
25







+
+


;; 
;;     You should have received a copy of the GNU General Public License
;;     along with Megatest.  If not, see <http://www.gnu.org/licenses/>.

;;======================================================================

(declare (unit mtargs))

(use srfi-69)

(include "mtargs/mtargs.scm")

Modified rmt.scm from [35e00a1063] to [0af3ea0170].

312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
312
313
314
315
316
317
318






























































319
320
321
322
323
324
325







-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-







  (rmt:send-receive 'get-run-times #f (list runpatt targetpatt ))) 


;;======================================================================
;;  T E S T S
;;======================================================================

;; Just some syntatic sugar
(define (rmt:register-test run-id test-name item-path)
  (assert (number? run-id) "FATAL: Run id required.")
  (rmt:general-call 'register-test run-id run-id test-name item-path))

(define (rmt:get-test-id run-id testname item-path)
  (assert (number? run-id) "FATAL: Run id required.")
  (rmt:send-receive 'get-test-id run-id (list run-id testname item-path)))

(define (rmt:get-test-info-by-id run-id test-id)
  (if (number? test-id)
      (rmt:send-receive 'get-test-info-by-id run-id (list run-id test-id))
      (begin
	(debug:print 0 *default-log-port* "WARNING: Bad data handed to rmt:get-test-info-by-id run-id=" run-id ", test-id=" test-id)
	(print-call-chain (current-error-port))
	#f)))

(define (rmt:get-test-state-status-by-id run-id test-id)
  (rmt:send-receive 'get-test-state-status-by-id run-id (list run-id test-id)))

(define (rmt:test-get-rundir-from-test-id run-id test-id)
  (rmt:send-receive 'test-get-rundir-from-test-id run-id (list run-id test-id)))

(define (rmt:open-test-db-by-test-id run-id test-id #!key (work-area #f))
  (assert (number? run-id) "FATAL: Run id required.")
  (let* ((test-path (if (string? work-area)
			work-area
			(rmt:test-get-rundir-from-test-id run-id test-id))))
    (debug:print 3 *default-log-port* "TEST PATH: " test-path)
    (open-test-db test-path)))

;; WARNING: This currently bypasses the transaction wrapped writes system
(define (rmt:test-set-state-status-by-id run-id test-id newstate newstatus newcomment)
  (assert (number? run-id) "FATAL: Run id required.")
  (rmt:send-receive 'test-set-state-status-by-id run-id (list run-id test-id newstate newstatus newcomment)))

(define (rmt:set-tests-state-status run-id testnames currstate currstatus newstate newstatus)
  (assert (number? run-id) "FATAL: Run id required.")
  (rmt:send-receive 'set-tests-state-status run-id (list run-id testnames currstate currstatus newstate newstatus)))

(define (rmt:get-tests-for-run run-id testpatt states statuses offset limit not-in sort-by sort-order qryvals last-update mode)
  (assert (number? run-id) "FATAL: Run id required.")
  ;; (if (number? run-id)
  (rmt:send-receive 'get-tests-for-run run-id (list run-id testpatt states statuses offset limit not-in sort-by sort-order qryvals last-update mode)))
  ;;    (begin
  ;;	(debug:print-error 0 *default-log-port* "rmt:get-tests-for-run called with bad run-id=" run-id)
  ;;	(print-call-chain (current-error-port))
  ;;	'())))

(define (rmt:get-tests-for-run-state-status run-id testpatt last-update)
  (assert (number? run-id) "FATAL: Run id required.")
  (rmt:send-receive 'get-tests-for-run-state-status run-id (list run-id testpatt last-update)))

;; get stuff via synchash 
(define (rmt:synchash-get run-id proc synckey keynum params)
  (assert (number? run-id) "FATAL: Run id required.")
  (rmt:send-receive 'synchash-get run-id (list run-id proc synckey keynum params)))

(define (rmt:get-tests-for-run-mindata run-id testpatt states status not-in)
  (assert (number? run-id) "FATAL: Run id required.")
  (rmt:send-receive 'get-tests-for-run-mindata run-id (list run-id testpatt states status not-in)))
  
;; IDEA: Threadify these - they spend a lot of time waiting ...
;;
(define (rmt:get-tests-for-runs-mindata run-ids testpatt states status not-in)
  (let ((multi-run-mutex (make-mutex))
	(run-id-list (if run-ids
			 run-ids
			 (rmt:get-all-run-ids)))
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
437
438
439
440
441
442
443






444
445
446
447
448
449
450







-
-
-
-
-
-







  (assert (number? run-id) "FATAL: Run id required.")
  (rmt:send-receive 'get-count-tests-running-for-testname run-id (list run-id testname)))

(define (rmt:get-count-tests-running-in-jobgroup run-id jobgroup)
  (assert (number? run-id) "FATAL: Run id required.")
  (rmt:send-receive 'get-count-tests-running-in-jobgroup run-id (list run-id jobgroup)))

;; state and status are extra hints not usually used in the calculation
;;
(define (rmt:set-state-status-and-roll-up-items run-id test-name item-path state status comment)
  (assert (number? run-id) "FATAL: Run id required.")
  (rmt:send-receive 'set-state-status-and-roll-up-items run-id (list run-id test-name item-path state status comment)))

(define (rmt:set-state-status-and-roll-up-run run-id state status)
  (assert (number? run-id) "FATAL: Run id required.")
  (rmt:send-receive 'set-state-status-and-roll-up-run run-id (list run-id state status)))


(define (rmt:update-pass-fail-counts run-id test-name)
  (assert (number? run-id) "FATAL: Run id required.")
829
830
831
832
833
834
835










761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777







+
+
+
+
+
+
+
+
+
+
	   (if runremote
	       (begin
		 (tt-ro-mode-set! runremote ro-mode)
		 (tt-ro-mode-checked-set! runremote #t)
		 ro-mode)
	       ro-mode))))))

;;======================================================================
;; Maintenance
;;======================================================================

(define (rmt:find-and-mark-incomplete run-id ovr-deadtime)
  (let* ((cfg-deadtime             (configf:lookup-number *configdat* "setup" "deadtime"))
	 (test-stats-update-period (configf:lookup-number *configdat* "setup" "test-stats-update-period")))
   (rmt:find-and-mark-incomplete-engine run-id ovr-deadtime cfg-deadtime test-stats-update-period)
   ;;call end of eud of run detection for posthook
   (launch:end-of-run-check run-id)))

Modified rmtmod.scm from [5ddda54a47] to [c4f748fe17].

15
16
17
18
19
20
21

22
23
24
25
26
27
28
29
30
31
32
33
34
35

36
37
38
39


40
41
42
43
44
45
46
15
16
17
18
19
20
21
22
23
24

25
26
27
28
29
30
31
32
33
34

35
36
37
38
39
40
41
42
43
44
45
46
47
48







+


-










-
+




+
+







;; 
;;     You should have received a copy of the GNU General Public License
;;     along with Megatest.  If not, see <http://www.gnu.org/licenses/>.

;;======================================================================

(declare (unit rmtmod))
(declare (uses debugprint))
(declare (uses commonmod))
(declare (uses dbfile))    ;; needed for records
(declare (uses debugprint))

;; (declare (uses apimod))
;; (declare (uses apimod.import))
;; (declare (uses ulex))

;; (include "ulex/ulex.scm")

(module rmtmod
	*
	
(import scheme chicken data-structures extras matchable)
(import scheme chicken data-structures extras matchable srfi-69)
(import (prefix sqlite3 sqlite3:) posix typed-records srfi-18)
(import commonmod dbfile debugprint) ;; (prefix commonmod cmod:))
;; (import apimod)
;; (import (prefix ulex ulex:))

(include "db_records.scm")

(defstruct alldat
  (areapath #f)
  (ulexdat  #f)
  )

;; hold the send-receive proc in this parameter
107
108
109
110
111
112
113



114




115



116






























































































































































109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127

128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285







+
+
+

+
+
+
+

+
+
+
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+

(define (rmt:insert-test run-id test-rec)
  (let* ((testname  (alist-ref "testname" test-rec equal?))
	 (item-path (alist-ref "item_path" test-rec equal?)))
    (debug:print 0 *default-log-port* "   Insert test in run "run-id": "testname"/"item-path)
    (rmtmod:send-receive 'insert-test run-id test-rec)))

;;======================================================================
;;  T E S T S
;;======================================================================

;; Just some syntatic sugar
(define (rmt:register-test run-id test-name item-path)
  (assert (number? run-id) "FATAL: Run id required.")
  (rmt:general-call 'register-test run-id run-id test-name item-path))

(define (rmt:get-test-id run-id testname item-path)
  (assert (number? run-id) "FATAL: Run id required.")
  (rmtmod:send-receive 'get-test-id run-id (list run-id testname item-path)))
)

(define (rmt:get-test-info-by-id run-id test-id)
  (if (number? test-id)
      (rmtmod:send-receive 'get-test-info-by-id run-id (list run-id test-id))
      (begin
	(debug:print 0 *default-log-port* "WARNING: Bad data handed to rmt:get-test-info-by-id run-id=" run-id ", test-id=" test-id)
	(print-call-chain (current-error-port))
	#f)))

(define (rmt:get-test-state-status-by-id run-id test-id)
  (rmtmod:send-receive 'get-test-state-status-by-id run-id (list run-id test-id)))

(define (rmt:test-get-rundir-from-test-id run-id test-id)
  (rmtmod:send-receive 'test-get-rundir-from-test-id run-id (list run-id test-id)))

;; (define (rmt:open-test-db-by-test-id run-id test-id #!key (work-area #f))
;;   (assert (number? run-id) "FATAL: Run id required.")
;;   (let* ((test-path (if (string? work-area)
;; 			work-area
;; 			(rmt:test-get-rundir-from-test-id run-id test-id))))
;;     (debug:print 3 *default-log-port* "TEST PATH: " test-path)
;;     (open-test-db test-path)))

;; WARNING: This currently bypasses the transaction wrapped writes system
(define (rmt:test-set-state-status-by-id run-id test-id newstate newstatus newcomment)
  (assert (number? run-id) "FATAL: Run id required.")
  (rmtmod:send-receive 'test-set-state-status-by-id run-id (list run-id test-id newstate newstatus newcomment)))

(define (rmt:set-tests-state-status run-id testnames currstate currstatus newstate newstatus)
  (assert (number? run-id) "FATAL: Run id required.")
  (rmtmod:send-receive 'set-tests-state-status run-id (list run-id testnames currstate currstatus newstate newstatus)))

(define (rmt:get-tests-for-run run-id testpatt states statuses offset limit not-in sort-by sort-order qryvals last-update mode)
  (assert (number? run-id) "FATAL: Run id required.")
  ;; (if (number? run-id)
  (rmtmod:send-receive 'get-tests-for-run run-id (list run-id testpatt states statuses offset limit not-in sort-by sort-order qryvals last-update mode)))
  ;;    (begin
  ;;	(debug:print-error 0 *default-log-port* "rmt:get-tests-for-run called with bad run-id=" run-id)
  ;;	(print-call-chain (current-error-port))
  ;;	'())))

(define (rmt:get-tests-for-run-state-status run-id testpatt last-update)
  (assert (number? run-id) "FATAL: Run id required.")
  (rmtmod:send-receive 'get-tests-for-run-state-status run-id (list run-id testpatt last-update)))

;; get stuff via synchash 
(define (rmt:synchash-get run-id proc synckey keynum params)
  (assert (number? run-id) "FATAL: Run id required.")
  (rmtmod:send-receive 'synchash-get run-id (list run-id proc synckey keynum params)))

(define (rmt:get-tests-for-run-mindata run-id testpatt states status not-in)
  (assert (number? run-id) "FATAL: Run id required.")
  (rmtmod:send-receive 'get-tests-for-run-mindata run-id (list run-id testpatt states status not-in)))
  
;; state and status are extra hints not usually used in the calculation
;;
(define (rmt:set-state-status-and-roll-up-items run-id test-name item-path state status comment)
  (assert (number? run-id) "FATAL: Run id required.")
  (rmtmod:send-receive 'set-state-status-and-roll-up-items run-id (list run-id test-name item-path state status comment)))


;;======================================================================
;; Maintenance
;;======================================================================


(define (rmt:get-toplevels-and-incompletes run-id running-deadtime remotehoststart-deadtime)
  (rmtmod:send-receive 'get-toplevels-and-incompletes run-id (list run-id running-deadtime remotehoststart-deadtime)))

(define (rmt:get-status-from-final-status-file run-dir)
  (let ((infile (conc run-dir "/.final-status")))
    ;; first verify we are able to write the output file
    (if (not (file-read-access? infile))
        (begin 
	  (debug:print 2 *default-log-port* "ERROR: cannot read " infile)
          (debug:print 2 *default-log-port* "ERROR: run-dir is " run-dir)
          #f
          )
        (with-input-from-file infile read-lines)
	)))
  
;;  select end_time-now from
;;      (select testname,item_path,event_time+run_duration as
;;                          end_time,strftime('%s','now') as now from tests where state in
;;      ('RUNNING','REMOTEHOSTSTART','LAUNCHED'));
;;
;; NOT EASY TO MIGRATE TO db{file,mod}
;;
(define (rmt:find-and-mark-incomplete-engine run-id ovr-deadtime cfg-deadtime test-stats-update-period)
  (let* ((incompleted '())
	 (oldlaunched '())
	 (toplevels   '())
          ;; The default running-deadtime is 720 seconds = 12 minutes.
          ;; "(running-deadtime-default (+ server-start-allowance (* 2 launch-monitor-period)))" = 200 + (2 * (200 + 30 + 30))
         (deadtime-trim (or ovr-deadtime cfg-deadtime))
         (server-start-allowance 200)
         (server-overloaded-budget 200)
         (launch-monitor-off-time (or test-stats-update-period 30))
         (launch-monitor-on-time-budget 30)
         (launch-monitor-period (+ launch-monitor-off-time launch-monitor-on-time-budget server-overloaded-budget))
         (remotehoststart-deadtime-default (+ server-start-allowance server-overloaded-budget 30))
         (remotehoststart-deadtime (or deadtime-trim remotehoststart-deadtime-default))
         (running-deadtime-default (+ server-start-allowance (* 2 launch-monitor-period)))
         (running-deadtime (or deadtime-trim running-deadtime-default))) ;; two minutes (30 seconds between updates, this leaves 3x grace period)

    (debug:print-info 4  *default-log-port* "running-deadtime = " running-deadtime)
    (debug:print-info 4  *default-log-port* "deadtime-trim = " deadtime-trim)

    (let* ((dat (rmt:get-toplevels-and-incompletes run-id running-deadtime remotehoststart-deadtime)))
      (set! oldlaunched (list-ref dat 1))
      (set! toplevels   (list-ref dat 2))
      (set! incompleted (list-ref dat 0)))

    (debug:print-info 18 *default-log-port* "Found " (length oldlaunched) " old LAUNCHED items, "
		      (length toplevels) " old LAUNCHED toplevel tests and "
		      (length incompleted) " tests marked RUNNING but apparently dead.")
  
    ;; These are defunct tests, do not do all the overhead of set-state-status. Force them to INCOMPLETE.
    ;;
    ;; (db:delay-if-busy dbdat)
    (let* ((min-incompleted-ids (map car incompleted)) ;; do 'em all
	   (all-ids             (append min-incompleted-ids (map car oldlaunched))))
      (if (> (length all-ids) 0)
	  (begin
	    ;; (launch:is-test-alive "localhost" 435)
	    (debug:print 0 *default-log-port* "WARNING: Marking test(s); " (string-intersperse (map conc all-ids) ", ")
			 " as DEAD")
	    (for-each
             (lambda (test-id)
               (let* ((tinfo   (rmt:get-test-info-by-id run-id test-id))
		      (run-dir (db:test-get-rundir     tinfo))
		      (host    (db:test-get-host       tinfo))
		      (pid     (db:test-get-process_id tinfo))
		      (result (rmt:get-status-from-final-status-file run-dir)))
		 (if (and (list? result) (> (length result) 1) (equal? "PASS" (cadr result)) (equal? "COMPLETED" (car result))) 
		     (begin
		       (debug:print 0 *default-log-port* "INFO: test " test-id " actually passed, so marking PASS not DEAD")
		       (rmt:set-state-status-and-roll-up-items
			run-id test-id 'foo "COMPLETED" "PASS"
			"Test stopped responding but it has PASSED; marking it PASS in the DB."))
		     (let ((is-alive (and (not (eq? pid 0))  ;; 0 is default in re-used field "attemptnum" where pid stored.
					  (commonmod:is-test-alive host pid))))
		       (if is-alive
			   (debug:print 0 *default-log-port* "INFO: test " test-id " on host " host
					" has a process on pid " pid ", NOT setting to DEAD.")
			   (begin
			     (debug:print 0 *default-log-port* "INFO: test " test-id
					  " final state/status is not COMPLETED/PASS. It is " result)
			     (rmt:set-state-status-and-roll-up-items
			      run-id test-id 'foo "COMPLETED" "DEAD"
			      "Test stopped responding while in RUNNING or REMOTEHOSTSTART; presumed dead.")))))))
	     ;; call end of eud of run detection for posthook - from merge, is it needed?
	     ;; (launch:end-of-run-check run-id)
	     all-ids)
	    )))))


)