Megatest

Changes On Branch 82da6b65d8f80157
Login

Changes In Branch v1.65-multi-db Through [82da6b65d8] Excluding Merge-Ins

This is equivalent to a diff from b5a0ecc65a to 82da6b65d8

2019-02-03
20:46
Ripped db.scm to shreds, converted to module. check-in: 439caadb72 user: matt tags: v1.65-multi-db
20:18
Added db stuff into mtserver check-in: 82da6b65d8 user: matt tags: v1.65-multi-db
19:47
Tore it all apart. Can Humpty be put back together again? check-in: e660d445be user: matt tags: v1.65-multi-db
2019-02-02
19:31
Merged v1.65 changes Leaf check-in: 1d106be172 user: matt tags: v1.65-intra-waiton
06:00
Updated portlogger to check on the port availability check-in: 60b0f467ff user: matt tags: v1.65-multi-db
2019-01-28
09:42
Moved boxes w/2 and h/2 in flow view. check-in: b5a0ecc65a user: mrwellan tags: v1.65-intra-waiton, v1.6521
2019-01-27
13:49
Merged in handy changes from v1.65 check-in: 1254c88236 user: matt tags: v1.65-intra-waiton

Modified Makefile from [8e9ef04054] to [171a28d49f].

25
26
27
28
29
30
31
32

33
34
35




36
37
38
39
40
41
42
43
44
45
46

47
48
49
50
51
52
53
25
26
27
28
29
30
31

32
33
34

35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57







-
+


-
+
+
+
+











+







   ods.scm runconfig.scm server.scm configf.scm \
   db.scm keys.scm margs.scm megatest-version.scm \
   process.scm runs.scm tasks.scm tests.scm genexample.scm \
   http-transport.scm filedb.scm tdb.scm \
   client.scm mt.scm \
   ezsteps.scm lock-queue.scm sdb.scm \
   rmt.scm api.scm subrun.scm \
   portlogger.scm archive.scm env.scm diff-report.scm cgisetup/models/pgdb.scm 
   archive.scm env.scm diff-report.scm cgisetup/models/pgdb.scm 

# module source files
MSRCFILES = ftail.scm
MSRCFILES = ftail.scm portlogger.scm nmsg-transport.scm

# files needed for mtserve
MTSERVEFILES = common.scm megatest-version.scm margs.scm server.scm db.scm keys.scm ods.scm

# Eggs to install (straightforward ones)
EGGS=matchable readline apropos base64 regex-literals format regex-case test coops trace csv \
dot-locking posix-utils posix-extras directory-utils hostinfo tcp-server rpc csv-xml fmt \
json md5 awful http-client spiffy uri-common intarweb spiffy-request-vars \
spiffy-directory-listing ssax sxml-serializer sxml-modifications iup canvas-draw sqlite3

GUISRCF  = dashboard-context-menu.scm dashboard-tests.scm dashboard-guimonitor.scm gutils.scm dcommon.scm tree.scm vg.scm

OFILES   = $(SRCFILES:%.scm=%.o)
GOFILES  = $(GUISRCF:%.scm=%.o)
MTSERVEOFILES = $(MTSERVEFILES:%.scm=%.o)

MOFILES = $(addprefix mofiles/,$(MSRCFILES:%.scm=%.o))

mofiles/%.o : %.scm
	mkdir -p mofiles
	csc $(CSCOPTS) -J -c $< -o mofiles/$*.o

66
67
68
69
70
71
72
73
74

75
76

77
78



79
80
81
82
83
84
85
86
87
88
89


90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120



121
122
123


124
125
126
127
128
129
130
70
71
72
73
74
75
76


77
78

79
80
81
82
83
84
85
86
87
88
89
90
91
92
93


94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114

115
116

117
118
119
120
121
122
123

124
125
126
127


128
129
130
131
132
133
134
135
136







-
-
+

-
+


+
+
+









-
-
+
+



















-


-







-
+
+
+

-
-
+
+







# BASH_MACHTYPE=$(shell bash -c "echo \$$MACHTYPE")
# ARCHSTR=$(BASH_MACHTYPE)_$(shell lsb_release -sr)
ARCHSTR=$(shell if [[ -e /usr/bin/sw_vers ]]; then /usr/bin/sw_vers -productVersion; else lsb_release -sr; fi)
# ARCHSTR=$(shell bash -c "echo \$$MACHTYPE")

PNGFILES = $(shell cd docs/manual;ls *png)

#all : $(PREFIX)/bin/.$(ARCHSTR) mtest dboard mtut ndboard
all : $(PREFIX)/bin/.$(ARCHSTR) mtest dboard mtut
all : $(PREFIX)/bin/.$(ARCHSTR) mtest dboard mtut mtserve

mtest: $(OFILES) readline-fix.scm megatest.o $(MOFILES) mofiles/ftail.o
mtest: $(OFILES) readline-fix.scm megatest.o $(MOFILES)
	csc $(CSCOPTS) $(OFILES) $(MOFILES) megatest.o -o mtest

mtserve: $(MTSERVEOFILES) readline-fix.scm mtserve.o $(MOFILES)
	csc $(CSCOPTS) $(MTSERVEOFILES) $(MOFILES) mtserve.o -o mtserve

showmtesthash:
	@echo $(MTESTHASH)

dboard : $(OFILES) $(GOFILES) dashboard.scm $(MOFILES)
	csc $(CSCOPTS) $(OFILES) dashboard.scm $(GOFILES) $(MOFILES) -o dboard

ndboard : newdashboard.scm $(OFILES) $(GOFILES)
	csc $(CSCOPTS) $(OFILES) $(GOFILES) newdashboard.scm -o ndboard

mtut: $(OFILES) megatest-fossil-hash.scm mtut.scm
	csc $(CSCOPTS) $(OFILES) mtut.scm -o mtut
mtut: $(OFILES) megatest-fossil-hash.scm mtut.scm $(MOFILES)
	csc $(CSCOPTS) $(OFILES) $(MOFILES) mtut.scm -o mtut

TCMTOBJS = \
	api.o \
	archive.o \
	cgisetup/models/pgdb.o \
	client.o \
	common.o \
	configf.o \
	db.o \
	env.o \
	http-transport.o \
	items.o \
	keys.o \
	launch.o \
	lock-queue.o \
	margs.o \
	mt.o \
	megatest-version.o \
	ods.o \
	portlogger.o \
	process.o \
	rmt.o \
	rpc-transport.o \
	runconfig.o \
	runs.o \
	server.o \
	tasks.o \
	tdb.o \
	tests.o \
	subrun.o \

 
#	rpc-transport.o \
#	portlogger.o \

tcmt : $(TCMTOBJS) tcmt.scm
	csc $(CSCOPTS) $(TCMTOBJS) tcmt.scm -o tcmt
tcmt : $(TCMTOBJS) tcmt.scm $(MOFILES)
	csc $(CSCOPTS) $(TCMTOBJS) $(MOFILES) tcmt.scm -o tcmt

# install documentation to $(PREFIX)/docs
# DOES NOT REBUILD DOCS
#
$(PREFIX)/share/docs/megatest_manual.html : docs/manual/megatest_manual.html
	mkdir -p $(PREFIX)/share/docs
	$(INSTALL) docs/manual/megatest_manual.html $(PREFIX)/share/docs/megatest_manual.html
154
155
156
157
158
159
160





161
162
163
164
165
166
167
168
169
170
171





172
173
174
175
176
177










178
179
180
181
182
183
184
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180


181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208







+
+
+
+
+









-
-
+
+
+
+
+






+
+
+
+
+
+
+
+
+
+







tests.o tasks.o dashboard-tasks.o : task_records.scm
runs.o : test_records.scm
megatest.o : megatest-fossil-hash.scm
rmt.scm client.scm common.scm configf.scm dashboard-guimonitor.scm dashboard-tests.scm dashboard.scm db.scm dcommon.scm ezsteps.scm fs-transport.scm http-transport.scm index-tree.scm items.scm keys.scm launch.scm megatest.scm monitor.scm mt.scm newdashboard.scm runconfig.scm runs.scm server.scm tdb.scm tests.scm tree.scm : common_records.scm
common_records.scm : altdb.scm
vg.o dashboard.o : vg_records.scm
dcommon.o : run_records.scm

# module deps
http-transport.o : mofiles/portlogger.o
megatest.o rnt.o : mofiles/nmsg-transport.o

# Temporary while transitioning to new routine
# runs.o : run-tests-queue-classic.scm  run-tests-queue-new.scm

megatest-fossil-hash.scm : $(SRCFILES) megatest.scm *_records.scm
	echo "(define megatest-fossil-hash \"$(MTESTHASH)\")" > megatest-fossil-hash.new
	if ! diff -q megatest-fossil-hash.new megatest-fossil-hash.scm ; then echo copying .new to .scm;cp -f megatest-fossil-hash.new megatest-fossil-hash.scm;fi

$(OFILES) $(GOFILES) : common_records.scm 

%.o : %.scm $(MOFILES)
	csc $(CSCOPTS) -c $< $(MOFILES)
# %.o : %.scm $(MOFILES)
#	csc $(CSCOPTS) -c $< $(MOFILES)

%.o : %.scm
	csc $(CSCOPTS) -c $<

$(PREFIX)/bin/.$(ARCHSTR)/mtest : mtest utils/mk_wrapper
	@echo Installing to PREFIX=$(PREFIX)
	$(INSTALL) mtest $(PREFIX)/bin/.$(ARCHSTR)/mtest
	utils/mk_wrapper $(PREFIX) mtest $(PREFIX)/bin/megatest
	chmod a+x $(PREFIX)/bin/megatest

$(PREFIX)/bin/.$(ARCHSTR)/mtserve : mtserve utils/mk_wrapper
	@echo Installing to PREFIX=$(PREFIX)
	$(INSTALL) mtserve $(PREFIX)/bin/.$(ARCHSTR)/mtserve
	utils/mk_wrapper $(PREFIX) mtserve $(PREFIX)/bin/mtserver
	chmod a+x $(PREFIX)/bin/mtserver

$(PREFIX)/bin/mtserver : $(PREFIX)/bin/.$(ARCHSTR)/mtserve utils/mk_wrapper
	utils/mk_wrapper $(PREFIX) mtserve $(PREFIX)/bin/mtserver
	chmod a+x $(PREFIX)/bin/mtserver

$(PREFIX)/bin/.$(ARCHSTR)/ndboard : ndboard
	$(INSTALL) ndboard $(PREFIX)/bin/.$(ARCHSTR)/ndboard

$(PREFIX)/bin/newdashboard : $(PREFIX)/bin/.$(ARCHSTR)/ndboard utils/mk_wrapper
	utils/mk_wrapper $(PREFIX) ndboard $(PREFIX)/bin/newdashboard
	chmod a+x $(PREFIX)/bin/newdashboard
276
277
278
279
280
281
282
283

284
285
286
287
288
289
290
300
301
302
303
304
305
306

307
308
309
310
311
312
313
314







-
+







$(PREFIX)/bin/.$(ARCHSTR)/dboard : dboard $(FILES) utils/mk_wrapper
	utils/mk_wrapper $(PREFIX) dboard $(PREFIX)/bin/dashboard
	chmod a+x $(PREFIX)/bin/dashboard
	$(INSTALL) dboard $(PREFIX)/bin/.$(ARCHSTR)/dboard

install : $(PREFIX)/bin/.$(ARCHSTR) $(PREFIX)/bin/.$(ARCHSTR)/mtest $(PREFIX)/bin/megatest \
          $(PREFIX)/bin/.$(ARCHSTR)/dboard $(PREFIX)/bin/dashboard $(HELPERS) $(PREFIX)/bin/nbfake \
          $(PREFIX)/bin/serialize-env \
          $(PREFIX)/bin/serialize-env $(PREFIX)/bin/mtserver \
	  $(PREFIX)/bin/nbfind $(PREFIX)/bin/loadrunner $(PREFIX)/bin/viewscreen $(PREFIX)/bin/mt_xterm \
	  $(PREFIX)/share/docs/megatest_manual.html $(PREFIX)/bin/remrun \
	  $(PREFIX)/share/docs/megatest_manual.html $(PREFIX)/bin/remrun $(PREFIX)/bin/mtutil \
	  $(PREFIX)/bin/tcmt $(PREFIX)/share/db/mt-pg.sql \
          $(PREFIX)/share/js/jquery-3.1.0.slim.min.js 
#         $(PREFIX)/bin/.$(ARCHSTR)/ndboard

301
302
303
304
305
306
307
308

309
310
311
312
313
314
315
325
326
327
328
329
330
331

332
333
334
335
336
337
338
339







-
+







	mkdir -p ext-tests
	cd ext-tests;fossil open --nested $(MTQA_FOSSIL)

$(MTQA_FOSSIL) :
	fossil clone https://www.kiatoa.com/fossils/megatest_qa $(MTQA_FOSSIL)

clean : 
	rm -f $(OFILES) $(GOFILES) $(MOFILES) $(TCMTOBJS) $(PREFIX)/megatest $(PREFIX)/dashboard mtest mtutil dboard dboard.o megatest.o dashboard.o megatest-fossil-hash.* altdb.scm mofiles/*.o vg.o
	rm -f $(OFILES) $(GOFILES) $(MOFILES) $(TCMTOBJS) $(PREFIX)/megatest $(PREFIX)/dashboard mtest mtutil dboard dboard.o megatest.o dashboard.o megatest-fossil-hash.* altdb.scm mofiles/*.o vg.o *import.scm

#======================================================================
# Make the records files
#======================================================================

# vg_records.scm : records.sh
#	./records.sh
400
401
402
403
404
405
406
407
408


409
410
411
412
413
414
415
424
425
426
427
428
429
430


431
432
433
434
435
436
437
438
439







-
-
+
+







	if  csi -ne '(use mysql-client)';then \
           echo "(use mysql-client)(hash-table-set! *available-db* 'mysql #t)" >> altdb.scm; \
	fi
	if csi -ne '(use postgresql)';then \
	   echo "(use postgresql)(hash-table-set! *available-db* 'postgresql #t)" >> altdb.scm;\
	fi

portlogger-example : portlogger-example.scm api.o archive.o client.o common.o configf.o dashboard-tests.o dashboard-context-menu.o db.o dcommon.o ezsteps.o filedb.o genexample.o gutils.o http-transport.o items.o keys.o launch.o lock-queue.o margs.o megatest-version.o mt.o ods.o portlogger.o process.o rmt.o rpc-transport.o runconfig.o runs.o sdb.o server.o synchash.o tasks.o tdb.o tests.o tree.o
	csc $(CSCOPTS) portlogger-example.scm api.o archive.o client.o common.o configf.o dashboard-tests.o dashboard-context-menu.o db.o dcommon.o ezsteps.o filedb.o genexample.o gutils.o http-transport.o items.o keys.o launch.o lock-queue.o margs.o megatest-version.o mt.o ods.o portlogger.o process.o rmt.o rpc-transport.o runconfig.o runs.o sdb.o server.o synchash.o tasks.o tdb.o tests.o tree.o
portlogger-example : portlogger-example.scm portlogger.o
	csc $(CSCOPTS) portlogger-example.scm portlogger.o

# create a pdf dot graphviz diagram from notations in rmt.scm
rmt.pdf : rmt.scm
	grep ';;DOT' rmt.scm | sed -e 's/.*;;DOT //' > rmt.dot;dot -Tpdf rmt.dot -o rmt.pdf

buildmanual:
	cd docs/manual && make

Modified api.scm from [1541791de9] to [b22a7f13c0].

14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86

87
88
89
90

91
92
93
94

95
96
97
98
99
100
101
102
103
104

105
106
107
108
109
110
111
112

113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210

211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306

307
308
309
310
311

312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332

333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
14
15
16
17
18
19
20


21



22



























































23




24




25










26








27























































































28










29


30























































































31





32





33














34






35













































-
-

-
-
-

-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
-
-
-
-
+
-
-
-
-
+
-
-
-
-
-
-
-
-
-
-
+
-
-
-
-
-
-
-
-
+
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-

-
-
-
-
-
-
-
-
-
-
+
-
-

-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-

-
-
-
-
-
+
-
-
-
-
-
+
-
-
-
-
-
-
-
-
-
-
-
-
-
-

-
-
-
-
-
-
+
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
;;     GNU General Public License for more details.
;; 
;;     You should have received a copy of the GNU General Public License
;;     along with Megatest.  If not, see <http://www.gnu.org/licenses/>.
;;
;;======================================================================

(use srfi-69 posix)

(declare (unit api))
(declare (uses rmt))
(declare (uses db))
(declare (uses tasks))

;; allow these queries through without starting a server
;;
(define api:read-only-queries
  '(get-key-val-pairs
    get-var
    get-keys
    get-key-vals
    test-toplevel-num-items
    get-test-info-by-id
    get-steps-info-by-id
    get-data-info-by-id
    test-get-rundir-from-test-id
    get-count-tests-running-for-testname
    get-count-tests-running
    get-count-tests-running-in-jobgroup
    get-previous-test-run-record
    get-matching-previous-test-run-records
    test-get-logfile-info
    test-get-records-for-index-file
    get-testinfo-state-status
    test-get-top-process-pid
    test-get-paths-matching-keynames-target-new
    get-prereqs-not-met
    get-count-tests-running-for-run-id
    get-run-info
    get-run-status
    get-run-stats
    get-run-times
    get-targets
    get-target
    ;; register-run
    get-tests-tags
    get-test-times
    get-tests-for-run
    get-test-id
    get-tests-for-runs-mindata
    get-tests-for-run-mindata
    get-run-name-from-id
    get-runs
    simple-get-runs
    get-num-runs
    get-runs-cnt-by-patt
    get-all-run-ids
    get-prev-run-ids
    get-run-ids-matching-target
    get-runs-by-patt
    get-steps-data
    get-steps-for-test
    read-test-data
    read-test-data*
    login
    tasks-get-last
    testmeta-get-record
    have-incompletes?
    synchash-get
    get-changed-record-ids
		get-run-record-ids 
    ))

(module
(define api:write-queries
  '(
    get-keys-write ;; dummy "write" query to force server start

    api
    ;; SERVERS
    start-server
    kill-server

    (
    ;; TESTS
    test-set-state-status-by-id
    delete-test-records
    delete-old-deleted-test-records
    test-set-state-status
    test-set-top-process-pid
    set-state-status-and-roll-up-items
    update-pass-fail-counts
    top-test-set-per-pf-counts ;; (db:top-test-set-per-pf-counts (db:get-db *db* 5) 5 "runfirst")

     *
    ;; RUNS
    register-run
    set-tests-state-status
    delete-run
    lock/unlock-run
    update-run-event_time
    mark-incomplete

     )
    ;; STEPS
    teststep-set-status!

    ;; TEST DATA
    test-data-rollup
    csv->test-data

    ;; MISC
    sync-inmem->db

    ;; TESTMETA
    testmeta-add-record
    testmeta-update-field

    ;; TASKS
    tasks-add
    tasks-set-state-given-param-key
    ))

;; These are called by the server on recipt of /api calls
;;    - keep it simple, only return the actual result of the call, i.e. no meta info here
;;
;;    - returns #( flag result )
;;
(define (api:execute-requests dbstruct dat)
  (handle-exceptions
   exn
   (let ((call-chain (get-call-chain)))
     (debug:print 0 *default-log-port* "WARNING: api:execute-requests received an exception from peer, dat=" dat)
     (print-call-chain (current-error-port))
     (debug:print 0 *default-log-port* " message: "  ((condition-property-accessor 'exn 'message) exn))       
     (vector #f (vector exn call-chain dat))) ;; return some stuff for debug if an exception happens
   (cond
    ((not (vector? dat))                    ;; it is an error to not receive a vector
     (vector #f (vector #f "remote must be called with a vector")))
    ((> *api-process-request-count* 20) ;; 20)
     (debug:print 0 *default-log-port* "WARNING: api:execute-requests received an overloaded message.")
     (set! *server-overloaded* #t)
     (vector #f (vector #f 'overloaded))) ;; the inner vector is what gets returned. nope, don't know why. please refactor!
    (else  
     (let* ((cmd-in            (vector-ref dat 0))
            (cmd               (if (symbol? cmd-in)
				   cmd-in
				   (string->symbol cmd-in)))
            (params            (vector-ref dat 1))
            (start-t           (current-milliseconds))
            (readonly-mode     (dbr:dbstruct-read-only dbstruct))
            (readonly-command  (member cmd api:read-only-queries))
            (writecmd-in-readonly-mode (and readonly-mode (not readonly-command)))
            (res    
             (if writecmd-in-readonly-mode
                 (conc "attempt to run write command "cmd" on a read-only database")
                 (case cmd
                   ;;===============================================
                   ;; READ/WRITE QUERIES
                   ;;===============================================

                   ((get-keys-write)                        (db:get-keys dbstruct)) ;; force a dummy "write" query to force server; for debug in -repl
                   
                   ;; SERVERS
                   ((start-server)                    (apply server:kind-run params))
                   ((kill-server)                     (set! *server-run* #f))

                   ;; TESTS

                   ;;((test-set-state-status-by-id)     (apply mt:test-set-state-status-by-id dbstruct params))
                   ;;BB - commented out above because it was calling below, eventually, incorrectly (dbstruct passed to mt:test-set-state-status-by-id, which previosly did more, but now only passes thru to db:set-state-status-and-roll-up-items.
                   ((test-set-state-status-by-id)

                    ;; (define (db:set-state-status-and-roll-up-items dbstruct run-id test-name item-path state status comment)
                    (db:set-state-status-and-roll-up-items
                     dbstruct
                     (list-ref params 0) ; run-id
                     (list-ref params 1) ; test-name
                     #f                  ; item-path
                     (list-ref params 2) ; state
                     (list-ref params 3) ; status
                     (list-ref params 4) ; comment
                     ))
                   
                   ((delete-test-records)             (apply db:delete-test-records dbstruct params))
                   ((delete-old-deleted-test-records) (apply db:delete-old-deleted-test-records dbstruct params))
                   ((test-set-state-status)           (apply db:test-set-state-status dbstruct params))
                   ((test-set-top-process-pid)        (apply db:test-set-top-process-pid dbstruct params))
                   ((set-state-status-and-roll-up-items) (apply db:set-state-status-and-roll-up-items dbstruct params))
                   ((top-test-set-per-pf-counts)      (apply db:top-test-set-per-pf-counts dbstruct params))
                   ((test-set-archive-block-id)       (apply db:test-set-archive-block-id dbstruct params))

                   ;; RUNS
                   ((register-run)                 (apply db:register-run dbstruct params))
                   ((set-tests-state-status)       (apply db:set-tests-state-status dbstruct params))
                   ((delete-run)                   (apply db:delete-run dbstruct params))
                   ((lock/unlock-run)              (apply db:lock/unlock-run dbstruct params))
                   ((update-run-event_time)        (apply db:update-run-event_time dbstruct params))
                   ((update-run-stats)             (apply db:update-run-stats dbstruct params))
                   ((set-var)                      (apply db:set-var dbstruct params))
                   ((del-var)                      (apply db:del-var dbstruct params))

(import scheme posix chicken data-structures ports)
                   ;; STEPS
                   ((teststep-set-status!)         (apply db:teststep-set-status! dbstruct params))

                   ;; TEST DATA
                   ((test-data-rollup)             (apply db:test-data-rollup dbstruct params))
                   ((csv->test-data)               (apply db:csv->test-data dbstruct params))

                   ;; MISC
                   ((sync-inmem->db)               (let ((run-id (car params)))
                                                     (db:sync-touched dbstruct run-id force-sync: #t)))
                   ((mark-incomplete)              (apply db:find-and-mark-incomplete dbstruct params))

                   ;; TESTMETA
                   ((testmeta-add-record)       (apply db:testmeta-add-record dbstruct params))
                   ((testmeta-update-field)     (apply db:testmeta-update-field dbstruct params))
                   ((get-tests-tags)            (db:get-tests-tags dbstruct))

                   ;; TASKS
                   ((tasks-add)                 (apply tasks:add dbstruct params))   
                   ((tasks-set-state-given-param-key) (apply tasks:set-state-given-param-key dbstruct params))
                   ((tasks-get-last)            (apply tasks:get-last dbstruct params))

		   ;; NO SYNC DB
		   ((no-sync-set)               (apply db:no-sync-set         *no-sync-db* params))
		   ((no-sync-get/default)       (apply db:no-sync-get/default *no-sync-db* params))
		   ((no-sync-del!)              (apply db:no-sync-del!        *no-sync-db* params))
		   ((no-sync-get-lock)          (apply db:no-sync-get-lock    *no-sync-db* params))
		 
                   ;; ARCHIVES
                   ;; ((archive-get-allocations)   
                   ((archive-register-disk)     (apply db:archive-register-disk dbstruct params))
                   ((archive-register-block-name)(apply db:archive-register-block-name dbstruct params))
                   ((archive-allocate-testsuite/area-to-block)(apply db:archive-allocate-testsuite/area-to-block dbstruct block-id testsuite-name areakey))

                   ;;======================================================================
                   ;; READ ONLY QUERIES
                   ;;======================================================================

                   ;; KEYS
                   ((get-key-val-pairs)               (apply db:get-key-val-pairs dbstruct params))
                   ((get-keys)                        (db:get-keys dbstruct))
                   ((get-key-vals)                    (apply db:get-key-vals dbstruct params))
                   ((get-target)                      (apply db:get-target dbstruct params))
                   ((get-targets)                     (db:get-targets dbstruct))

                   ;; ARCHIVES
                   ((test-get-archive-block-info)     (apply db:test-get-archive-block-info dbstruct params))
                   
                   ;; TESTS
                   ((test-toplevel-num-items)         (apply db:test-toplevel-num-items dbstruct params))
                   ((get-test-info-by-id)	       (apply db:get-test-info-by-id dbstruct params))
                   ((test-get-rundir-from-test-id)    (apply db:test-get-rundir-from-test-id dbstruct params))
                   ((get-count-tests-running-for-testname) (apply db:get-count-tests-running-for-testname dbstruct params))
                   ((get-count-tests-running)         (apply db:get-count-tests-running dbstruct params))
                   ((get-count-tests-running-in-jobgroup) (apply db:get-count-tests-running-in-jobgroup dbstruct params))
                   ;; ((delete-test-step-records)        (apply db:delete-test-step-records dbstruct params))
                   ((get-previous-test-run-record)    (apply db:get-previous-test-run-record dbstruct params))
                   ((get-matching-previous-test-run-records)(apply db:get-matching-previous-test-run-records dbstruct params))
                   ((test-get-logfile-info)           (apply db:test-get-logfile-info dbstruct params))
                   ((test-get-records-for-index-file)  (apply db:test-get-records-for-index-file dbstruct params))
                   ((get-testinfo-state-status)       (apply db:get-testinfo-state-status dbstruct params))
                   ((test-get-top-process-pid)        (apply db:test-get-top-process-pid dbstruct params))
                   ((test-get-paths-matching-keynames-target-new) (apply db:test-get-paths-matching-keynames-target-new dbstruct params))
                   ((get-prereqs-not-met)             (apply db:get-prereqs-not-met dbstruct params))
                   ((get-count-tests-running-for-run-id) (apply db:get-count-tests-running-for-run-id dbstruct params))
                   ((synchash-get)                    (apply synchash:server-get dbstruct params))
                   ((get-raw-run-stats)               (apply db:get-raw-run-stats dbstruct params))
		   ((get-test-times)                  (apply db:get-test-times dbstruct params))

                   ;; RUNS
                   ((get-run-info)                 (apply db:get-run-info dbstruct params))
                   ((get-run-status)               (apply db:get-run-status dbstruct params))
                   ((set-run-status)               (apply db:set-run-status dbstruct params))
                   ((get-tests-for-run)            (apply db:get-tests-for-run dbstruct params))
                   ((get-test-id)                  (apply db:get-test-id dbstruct params))
                   ((get-tests-for-run-mindata)    (apply db:get-tests-for-run-mindata dbstruct params))
                   ((get-tests-for-runs-mindata)   (apply db:get-tests-for-runs-mindata dbstruct params))
                   ((get-runs)                     (apply db:get-runs dbstruct params))
                   ((simple-get-runs)              (apply db:simple-get-runs dbstruct params))
                   ((get-num-runs)                 (apply db:get-num-runs dbstruct params))
                   ((get-runs-cnt-by-patt)         (apply db:get-runs-cnt-by-patt dbstruct params))
                   ((get-all-run-ids)              (db:get-all-run-ids dbstruct))
                   ((get-prev-run-ids)             (apply db:get-prev-run-ids dbstruct params))
                   ((get-run-ids-matching-target)  (apply db:get-run-ids-matching-target dbstruct params))
                   ((get-runs-by-patt)             (apply db:get-runs-by-patt dbstruct params))
                   ((get-run-name-from-id)         (apply db:get-run-name-from-id dbstruct params))
                   ((get-main-run-stats)           (apply db:get-main-run-stats dbstruct params))
                   ((get-var)                      (apply db:get-var dbstruct params))
                   ((get-run-stats)                (apply db:get-run-stats dbstruct params))
                   ((get-run-times)                (apply db:get-run-times dbstruct params)) 

                   ;; STEPS
                   ((get-steps-data)               (apply db:get-steps-data dbstruct params))
                   ((get-steps-for-test)           (apply db:get-steps-for-test dbstruct params))
		   ((get-steps-info-by-id)         (apply db:get-steps-info-by-id dbstruct params))

(define (api:execute-requests . args) #t)
                   ;; TEST DATA
                   ((read-test-data)               (apply db:read-test-data dbstruct params))
                   ((read-test-data*)              (apply db:read-test-data* dbstruct params))
                   ((get-data-info-by-id)          (apply db:get-data-info-by-id dbstruct params)) 

(define (api:process-request  . args) #t)
                   ;; MISC
                   ((get-latest-host-load)         (apply db:get-latest-host-load dbstruct params))
                   ((have-incompletes?)            (apply db:have-incompletes? dbstruct params))
                   ((login)                        (apply db:login dbstruct params))
                   ((general-call)                 (let ((stmtname   (car params))
                                                         (run-id     (cadr params))
                                                         (realparams (cddr params)))
                                                     (db:general-call dbstruct stmtname realparams)))
                   ((sdb-qry)                      (apply sdb:qry params))
                   ((ping)                         (current-process-id))
		   						 ((get-changed-record-ids)       (apply db:get-changed-record-ids dbstruct params))
		   						 ((get-run-record-ids) 					 (apply db:get-run-record-ids dbstruct params))	
                   ;; TESTMETA
                   ((testmeta-get-record)       (apply db:testmeta-get-record dbstruct params))

                   ;; TASKS 
                   ((find-task-queue-records)   (apply tasks:find-task-queue-records dbstruct params))
		   (else
		    (debug:print 0 *default-log-port* "ERROR: bad api call " cmd)
		    (conc "ERROR: BAD api call " cmd))))))
       
)
       ;; save all stats
       (let ((delta-t (- (current-milliseconds)
			 start-t)))
	 (hash-table-set! *db-api-call-time* cmd
			  (cons delta-t (hash-table-ref/default *db-api-call-time* cmd '()))))
       (if writecmd-in-readonly-mode
	   (vector #f res)
           (vector #t res)))))))

;; http-server  send-response
;;                 api:process-request
;;                    db:*
;;
;; NB// Runs on the server as part of the server loop
;;
(define (api:process-request dbstruct $) ;; the $ is the request vars proc
  (set! *api-process-request-count* (+ *api-process-request-count* 1))
  (let* ((cmd     ($ 'cmd))
	 (paramsj ($ 'params))
	 (params  (db:string->obj paramsj transport: 'http)) ;; incoming data from the POST (or is it a GET?)
	 (resdat  (api:execute-requests dbstruct (vector cmd params))) ;; process the request, resdat = #( flag result )
	 (success (vector-ref resdat 0))
	 (res     (vector-ref resdat 1))) ;; (vector flag payload), get the payload, ignore the flag (why?)
    (if (not success)
	(debug:print 0 *default-log-port* "ERROR: success flag is #f for " cmd " with params " params))
    (if (> *api-process-request-count* *max-api-process-requests*)
	(set! *max-api-process-requests* *api-process-request-count*))
    (set! *api-process-request-count* (- *api-process-request-count* 1))
    ;; This can be here but needs controls to ensure it doesn't run more than every 4 seconds
    ;; (rmt:dat->json-str
    ;;  (if (or (string? res)
    ;;          (list?   res)
    ;;          (number? res)
    ;;          (boolean? res))
    ;;      res 
    ;;      (list "ERROR, not string, list, number or boolean" 1 cmd params res)))))
    (db:obj->string res transport: 'http)))

Modified client.scm from [3b6119f752] to [58115c88ed].

29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50








51
52
53
54
55
56
57








58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73


















74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121















































29
30
31
32
33
34
35















36
37
38
39
40
41
42
43







44
45
46
47
48
49
50
51
















52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
















































70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116







-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
(declare (uses common))
(declare (uses db))
(declare (uses tasks)) ;; tasks are where stuff is maintained about what is running.

(include "common_records.scm")
(include "db_records.scm")

;; client:get-signature
(define (client:get-signature)
  (if *my-client-signature* *my-client-signature*
      (let ((sig (conc (get-host-name) " " (current-process-id))))
	(set! *my-client-signature* sig)
	*my-client-signature*)))

;; Not currently used! But, I think it *should* be used!!!
(define (client:logout serverdat)
  (let ((ok (and (socket? serverdat)
		 (cdb:logout serverdat *toppath* (client:get-signature)))))
    ok))

(define (client:connect iface port)
  (case (server:get-transport)
;; ;; Not currently used! But, I think it *should* be used!!!
;; (define (client:logout serverdat)
;;   (let ((ok (and (socket? serverdat)
;; 		 (cdb:logout serverdat *toppath* (client:get-signature)))))
;;     ok))
;; 
;; (define (client:connect iface port)
;;   (case (server:get-transport)
    ((rpc)  (rpc:client-connect  iface port))
    ((http) (http:client-connect iface port))
    ((zmq)  (zmq:client-connect  iface port))
    (else   (rpc:client-connect  iface port))))

(define (client:setup areapath #!key (remaining-tries 100) (failed-connects 0))
  (case (server:get-transport)
;;     ((http) (http:client-connect iface port))
;;     ((zmq)  (zmq:client-connect  iface port))
;;     (else   (begin
;; 	      (debug:print 0 *default-log-port* "ERROR: no such transport " (server:get-transport) ", exiting now.")
;; 	      (exit 1)))))
;; 
;; (define (client:setup areapath #!key (remaining-tries 100) (failed-connects 0))
;;   (case (server:get-transport)
    ((rpc) (rpc-transport:client-setup remaining-tries: remaining-tries failed-connects: failed-connects)) ;;(client:setup-rpc run-id))
    ((http)(client:setup-http areapath remaining-tries: remaining-tries failed-connects: failed-connects))
    (else  (rpc-transport:client-setup remaining-tries: remaining-tries failed-connects: failed-connects)))) ;; (client:setup-rpc run-id))))

;; Do all the connection work, look up the transport type and set up the
;; connection if required.
;;
;; There are two scenarios. 
;;   1. We are a test manager and we received *transport-type* and *runremote* via cmdline
;;   2. We are a run tests, list runs or other interactive process and we must figure out
;;      *transport-type* and *runremote* from the monitor.db
;;
;; client:setup
;;
;; lookup_server, need to remove *runremote* stuff
;;
;;     ((http)(client:setup-http areapath remaining-tries: remaining-tries failed-connects: failed-connects))
;;     (else  (begin
;; 	     (debug:print 0 *default-log-port* "ERROR: no such transport " (server:get-transport) ", exiting now.")
;; 	     (exit 1)))))
;; 
;; ;; Do all the connection work, look up the transport type and set up the
;; ;; connection if required.
;; ;;
;; ;; There are two scenarios. 
;; ;;   1. We are a test manager and we received *transport-type* and *runremote* via cmdline
;; ;;   2. We are a run tests, list runs or other interactive process and we must figure out
;; ;;      *transport-type* and *runremote* from the monitor.db
;; ;;
;; ;; client:setup
;; ;;
;; ;; lookup_server, need to remove *runremote* stuff
;; ;;
;; 

(define (client:setup-http areapath #!key (remaining-tries 100) (failed-connects 0)(area-dat #f))
  (debug:print-info 2 *default-log-port* "client:setup remaining-tries=" remaining-tries)
  (server:start-and-wait areapath)
  (if (<= remaining-tries 0)
      (begin
	(debug:print-error 0 *default-log-port* "failed to start or connect to server")
	(exit 1))
      ;;
      ;; Alternatively here, we can get the list of candidate servers and work our way
      ;; through them searching for a good one.
      ;;
      (let* ((server-dat (server:get-rand-best areapath)) ;; (server:get-first-best areapath))
	     (runremote  (or area-dat *runremote*)))
	(if (not server-dat) ;; no server found
	    (client:setup-http areapath remaining-tries: (- remaining-tries 1))
	    (let ((host  (cadr  server-dat))
		  (port  (caddr server-dat)))
	      (debug:print-info 4 *default-log-port* "client:setup server-dat=" server-dat ", remaining-tries=" remaining-tries)
	      (if (and (not area-dat)
		       (not *runremote*))
		  (set! *runremote* (make-remote)))
	      (if (and host port)
		  (let* ((start-res (case *transport-type*
				      ((http)(http-transport:client-connect host port))))
			 (ping-res  (case *transport-type* 
				      ((http)(rmt:login-no-auto-client-setup start-res)))))
		    (if (and start-res
			     ping-res)
			(let ((runremote (or area-dat *runremote*))) ;; it might have been generated only a few statements ago
			  (remote-conndat-set! runremote start-res) ;; (hash-table-set! runremote run-id start-res)
			  (debug:print-info 2 *default-log-port* "connected to " (http-transport:server-dat-make-url start-res))
			  start-res)
			(begin    ;; login failed but have a server record, clean out the record and try again
			  (debug:print-info 0 *default-log-port* "client:setup, login failed, will attempt to start server ... start-res=" start-res ", server-dat=" server-dat) ;; had runid.  Fixes part of Randy;s ticket 1405717332
			  (case *transport-type* 
			    ((http)(http-transport:close-connections)))
			  (remote-conndat-set! runremote #f)  ;; (hash-table-delete! runremote run-id)
			  (thread-sleep! 1)
			  (client:setup-http areapath remaining-tries: (- remaining-tries 1))
			  )))
		  (begin    ;; no server registered
		    ;; (server:kind-run areapath)
		    (server:start-and-wait areapath)
		    (debug:print-info 0 *default-log-port* "client:setup, no server registered, remaining-tries=" remaining-tries)
		    (thread-sleep! 1) ;; (+ 5 (random (- 20 remaining-tries))))  ;; give server a little time to start up, randomize a little to avoid start storms.
		    (client:setup-http areapath remaining-tries: (- remaining-tries 1)))))))))

;; (define (client:setup-http areapath #!key (remaining-tries 100) (failed-connects 0)(area-dat #f))
;;   (debug:print-info 2 *default-log-port* "client:setup remaining-tries=" remaining-tries)
;;   (server:start-and-wait areapath)
;;   (if (<= remaining-tries 0)
;;       (begin
;; 	(debug:print-error 0 *default-log-port* "failed to start or connect to server")
;; 	(exit 1))
;;       ;;
;;       ;; Alternatively here, we can get the list of candidate servers and work our way
;;       ;; through them searching for a good one.
;;       ;;
;;       (let* ((server-dat (server:get-rand-best areapath)) ;; (server:get-first-best areapath))
;; 	     (runremote  (or area-dat *runremote*)))
;; 	(if (not server-dat) ;; no server found
;; 	    (client:setup-http areapath remaining-tries: (- remaining-tries 1))
;; 	    (let ((host  (cadr  server-dat))
;; 		  (port  (caddr server-dat)))
;; 	      (debug:print-info 4 *default-log-port* "client:setup server-dat=" server-dat ", remaining-tries=" remaining-tries)
;; 	      (if (and (not area-dat)
;; 		       (not *runremote*))
;; 		  (set! *runremote* (make-remote)))
;; 	      (if (and host port)
;; 		  (let* ((start-res (case *transport-type*
;; 				      ((http)(http-transport:client-connect host port))))
;; 			 (ping-res  (case *transport-type* 
;; 				      ((http)(rmt:login-no-auto-client-setup start-res)))))
;; 		    (if (and start-res
;; 			     ping-res)
;; 			(let ((runremote (or area-dat *runremote*))) ;; it might have been generated only a few statements ago
;; 			  (remote-conndat-set! runremote start-res) ;; (hash-table-set! runremote run-id start-res)
;; 			  (debug:print-info 2 *default-log-port* "connected to " (http-transport:server-dat-make-url start-res))
;; 			  start-res)
;; 			(begin    ;; login failed but have a server record, clean out the record and try again
;; 			  (debug:print-info 0 *default-log-port* "client:setup, login failed, will attempt to start server ... start-res=" start-res ", server-dat=" server-dat) ;; had runid.  Fixes part of Randy;s ticket 1405717332
;; 			  (case *transport-type* 
;; 			    ((http)(http-transport:close-connections)))
;; 			  (remote-conndat-set! runremote #f)  ;; (hash-table-delete! runremote run-id)
;; 			  (thread-sleep! 1)
;; 			  (client:setup-http areapath remaining-tries: (- remaining-tries 1))
;; 			  )))
;; 		  (begin    ;; no server registered
;; 		    ;; (server:kind-run areapath)
;; 		    (server:start-and-wait areapath)
;; 		    (debug:print-info 0 *default-log-port* "client:setup, no server registered, remaining-tries=" remaining-tries)
;; 		    (thread-sleep! 1) ;; (+ 5 (random (- 20 remaining-tries))))  ;; give server a little time to start up, randomize a little to avoid start storms.
;; 		    (client:setup-http areapath remaining-tries: (- remaining-tries 1)))))))))
;; 

Modified common.scm from [f3a824a935] to [04f4dea505].

1133
1134
1135
1136
1137
1138
1139
1140

1141
1142
1143
1144
1145
1146
1147
1133
1134
1135
1136
1137
1138
1139

1140
1141
1142
1143
1144
1145
1146
1147







-
+







          (conc (getenv "MT_TEST_NAME") "/" (getenv "MT_ITEMPATH")))
      #f))

;; logic for getting homehost. Returns (host . at-home)
;; IF *toppath* is not set, wait up to five seconds trying every two seconds
;; (this is to accomodate the watchdog)
;;
(define (common:get-homehost #!key (trynum 5))
#;(define (common:get-homehost #!key (trynum 5))
  ;; called often especially at start up. use mutex to eliminate collisions
  (mutex-lock! *homehost-mutex*)
  (cond
   (*home-host*
    (mutex-unlock! *homehost-mutex*)
    *home-host*)
   ((not *toppath*)
1185
1186
1187
1188
1189
1190
1191
1192

1193
1194
1195
1196
1197
1198
1199
1185
1186
1187
1188
1189
1190
1191

1192
1193
1194
1195
1196
1197
1198
1199







-
+







			 (equal? homehost bestadrs))))
      (set! *home-host* (cons homehost at-home))
      (mutex-unlock! *homehost-mutex*)
      *home-host*))))

;; am I on the homehost?
;;
(define (common:on-homehost?)
#;(define (common:on-homehost?)
  (let ((hh (common:get-homehost)))
    (if hh
	(cdr hh)
	#f)))

;; do we honor the caches of the config files?
;;

Modified db.scm from [decb5b7df2] to [deea30d44d].

29
30
31
32
33
34
35
36
37


38
39
40
41
42
43
44
29
30
31
32
33
34
35


36
37
38
39
40
41
42
43
44







-
-
+
+







(import (prefix sqlite3 sqlite3:))
(import (prefix base64 base64:))

(declare (unit db))
(declare (uses common))
(declare (uses keys))
(declare (uses ods))
(declare (uses client))
(declare (uses mt))
;; (declare (uses client))
;; (declare (uses mt))

(include "common_records.scm")
(include "db_records.scm")
(include "key_records.scm")
(include "run_records.scm")

(define *rundb-mutex* (make-mutex)) ;; prevent problems opening/closing rundb's
1330
1331
1332
1333
1334
1335
1336
1337






1338
1339
1340
1341
1342
1343
1344
1330
1331
1332
1333
1334
1335
1336

1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349







-
+
+
+
+
+
+







                     comment      TEXT      DEFAULT '',
                     event_time   TIMESTAMP DEFAULT (strftime('%s','now')),
                     fail_count   INTEGER   DEFAULT 0,
                     pass_count   INTEGER   DEFAULT 0,
                     archived     INTEGER   DEFAULT 0, -- 0=no, > 1=archive block id where test data can be found
                     last_update  INTEGER DEFAULT (strftime('%s','now')),
                        CONSTRAINT testsconstraint UNIQUE (run_id, testname, item_path));")
	(sqlite3:execute db "CREATE INDEX IF NOT EXISTS tests_index ON tests (run_id, testname, item_path, uname);")
	;; deprecated -- (sqlite3:execute db "CREATE INDEX IF NOT EXISTS tests_index ON tests (run_id, testname, item_path, uname);")
        
        (sqlite3:execute db "CREATE INDEX IF NOT EXISTS tests_run_id_index ON tests (run_id);")  ;; new
        (sqlite3:execute db "CREATE INDEX IF NOT EXISTS tests_testname_index ON tests (testname,item_path);") ;; new
        (sqlite3:execute db "CREATE INDEX IF NOT EXISTS tests_state_status_index ON tests (state, status); ") ;; new
        
	(sqlite3:execute db "CREATE TRIGGER  IF NOT EXISTS update_tests_trigger AFTER UPDATE ON tests
                             FOR EACH ROW
                               BEGIN 
                                 UPDATE tests SET last_update=(strftime('%s','now'))
                                   WHERE id=old.id;
                               END;")
	(sqlite3:execute db "CREATE TABLE IF NOT EXISTS test_steps 
2662
2663
2664
2665
2666
2667
2668
2669

2670
2671

2672
2673
2674
2675
2676
2677
2678
2667
2668
2669
2670
2671
2672
2673

2674
2675

2676
2677
2678
2679
2680
2681
2682
2683







-
+

-
+







  (let* ((keyvals (db:get-key-vals dbstruct run-id))
	 (thekey  (string-intersperse (map (lambda (x)(if x x "-na-")) keyvals) "/")))
    thekey))

;; Get run-ids for runs with same target but different runnames and NOT run-id
;;
(define (db:get-prev-run-ids dbstruct run-id)
  (let* ((keyvals (rmt:get-key-val-pairs run-id))
  (let* ((keyvals (db:get-key-val-pairs run-id))
	 (kvalues (map cadr keyvals))
	 (keys    (rmt:get-keys))
	 (keys    (db:get-keys))
	 (qrystr  (string-intersperse (map (lambda (x)(conc x "=?")) keys) " AND ")))
    (let ((prev-run-ids '()))
      (if (null? keyvals)
          '()
          (begin
            (db:with-db dbstruct #f #f ;; #f means work with the zeroth db - i.e. the runs db
                        (lambda (db)
2900
2901
2902
2903
2904
2905
2906
2907


2908
2909
2910
2911
2912
2913
2914
2905
2906
2907
2908
2909
2910
2911

2912
2913
2914
2915
2916
2917
2918
2919
2920







-
+
+







	    (sqlite3:execute db qry
			     (or newstate  currstate "NOT_STARTED")
			     (or newstatus currstate "UNKNOWN")
			     run-id testname)))
	 (if test-id
	     (begin
	       (set! test-ids (cons test-id test-ids))
	       (mt:process-triggers dbstruct run-id test-id newstate newstatus)))))
	       #;(mt:process-triggers dbstruct run-id test-id newstate newstatus) ;; WARNING: trigger processing used to happen here!
	       ))))
     testnames)
    test-ids))

;; ;; speed up for common cases with a little logic
;; ;; NB// Ultimately this will be deprecated in deference to mt:test-set-state-status-by-id
;;
;;      NOTE: run-id is not used
2927
2928
2929
2930
2931
2932
2933
2934


2935
2936
2937
2938
2939
2940
2941
2933
2934
2935
2936
2937
2938
2939

2940
2941
2942
2943
2944
2945
2946
2947
2948







-
+
+







      ((and newstate newstatus)
       (sqlite3:execute db "UPDATE tests SET state=?,status=? WHERE id=?;" newstate newstatus test-id))
      (else
       (if newstate   (sqlite3:execute db "UPDATE tests SET state=?   WHERE id=?;" newstate   test-id))
       (if newstatus  (sqlite3:execute db "UPDATE tests SET status=?  WHERE id=?;" newstatus  test-id))
       (if newcomment (sqlite3:execute db "UPDATE tests SET comment=? WHERE id=?;" newcomment ;; (sdb:qry 'getid newcomment)
				       test-id))))))
  (mt:process-triggers dbstruct run-id test-id newstate newstatus))
  #;(mt:process-triggers dbstruct run-id test-id newstate newstatus) ;; WARNING: Trigger processing used to happen here!
  )

;; NEW BEHAVIOR: Count tests running in all runs!
;;
(define (db:get-count-tests-running dbstruct run-id)
  (db:with-db
   dbstruct
   run-id

Modified http-transport.scm from [da311848d8] to [b6b8ef7c43].

38
39
40
41
42
43
44





45
46
47
48
49
50
51
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56







+
+
+
+
+







(declare (uses portlogger))
(declare (uses rmt))

(include "common_records.scm")
(include "db_records.scm")
(include "js-path.scm")

(import portlogger)
(portlogger:set-default-log-port! *default-log-port*)
(portlogger:set-configdat! *configdat*)
(portlogger:set-printers! debug:print debug:print-error)

(require-library stml)
(define (http-transport:make-server-url hostport)
  (if (not hostport)
      #f
      (conc "http://" (car hostport) ":" (cadr hostport))))

(define *server-loop-heart-beat* (current-seconds))

Modified iup-test/Makefile from [2356e68571] to [a0f27c5501].

1

2
3
4
5

1
2
3
4
5
-
+




LIBSRC = "<$PATH>/chicken-4.10.0-patch"
LIBSRC = "PATH/chicken-4.10.0-patch"


sample : sample.c
	gcc -I$(LIBSRC)/include/ -L$(LIBSRC)/lib -L$(LIBSRC)/lib64 -liup -liupimglib -o sample sample.c

Modified launch.scm from [5b95a5518e] to [6dd1993f7c].

608
609
610
611
612
613
614
615
616
617

618
619
620
621
622
623
624
608
609
610
611
612
613
614



615
616
617
618
619
620
621
622







-
-
-
+







	      (debug:print 0 *default-log-port* "INFO: test is INCOMPLETE or KILLED, treat this execute call as a rerun request")
	      ;; (tests:test-force-state-status! run-id test-id "REMOTEHOSTSTART" "n/a")
	      (rmt:test-set-state-status run-id test-id "REMOTEHOSTSTART" "n/a" #f)
	      ) ;; prime it for running
	     ((member (db:test-get-state test-info) '("RUNNING" "REMOTEHOSTSTART"))
	      (if (process:alive-on-host? test-host test-pid)
		  (debug:print-error 0 *default-log-port* "test state is "  (db:test-get-state test-info) " and process " test-pid " is still running on host " test-host ", cannot proceed")
		  ;; (tests:test-force-state-status! run-id test-id "REMOTEHOSTSTART" "n/a")
		  (rmt:test-set-state-status run-id test-id "REMOTEHOSTSTART" "n/a" #f)
		  ))
		  (exit)))
	     ((not (member (db:test-get-state test-info) '("REMOVING" "REMOTEHOSTSTART" "RUNNING" "KILLREQ")))
	      ;; (tests:test-force-state-status! run-id test-id "REMOTEHOSTSTART" "n/a")
	      (rmt:test-set-state-status run-id test-id "REMOTEHOSTSTART" "n/a" #f)
	      )
	     (else ;; (member (db:test-get-state test-info) '("REMOVING" "REMOTEHOSTSTART" "RUNNING" "KILLREQ"))
	      (debug:print-error 0 *default-log-port* "test state is " (db:test-get-state test-info) ", cannot proceed")
	      (exit))))

Modified megatest-version.scm from [1094c4a239] to [58d639f18b].

16
17
18
19
20
21
22
23

16
17
18
19
20
21
22

23







-
+
;;     along with Megatest.  If not, see <http://www.gnu.org/licenses/>.

;; Always use two or four digit decimal
;; 1.01, 1.02...1.10,1.11,1.1101 ... 1.99,2.00..

(declare (unit megatest-version))

(define megatest-version 1.6521)
(define megatest-version 1.6523)

Modified megatest.scm from [cecad5eaf2] to [acbd9fac72].

20
21
22
23
24
25
26
27

28
29
30
31
32
33
34
20
21
22
23
24
25
26

27
28
29
30
31
32
33
34







-
+







;; (include "megatest-version.scm")

;; fake out readline usage of toplevel-command
(define (toplevel-command . a) #f)

(use (prefix sqlite3 sqlite3:) srfi-1 posix regex regex-case srfi-69 (prefix base64 base64:)
     readline apropos json http-client directory-utils typed-records
     http-client srfi-18 extras format)
     http-client srfi-18 extras format (prefix pkts pkts:))

;; Added for csv stuff - will be removed
;;
(use sparse-vectors)

(require-library mutils)

51
52
53
54
55
56
57




58
59
60
61
62
63
64
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68







+
+
+
+







(declare (uses mt))
(declare (uses api))
(declare (uses tasks)) ;; only used for debugging.
(declare (uses env))
(declare (uses diff-report))
(declare (uses ftail))
(import ftail)
(declare (uses portlogger))
(import portlogger)
(declare (uses nmsg-transport))
(import (prefix nmsg-transport nmsg:))

(define *db* #f) ;; this is only for the repl, do not use in general!!!!

(include "common_records.scm")
(include "key_records.scm")
(include "db_records.scm")
(include "run_records.scm")
108
109
110
111
112
113
114


115
116
117
118
119
120
121
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127







+
+







  -version                : print megatest version (currently " megatest-version ")

Launching and managing runs
  -run                    : run all tests or as specified by -testpatt
  -remove-runs            : remove the data for a run, requires -runname and -testpatt
                            Optionally use :state and :status, use -keep-records to remove only
                            the run data.
  -kill-runs               : kill existing run(s) (all incomplete tests killed)
  -kill-rerun              : kill an existing run (all incomplete tests killed and run is rerun)
  -set-state-status X,Y   : set state to X and status to Y, requires controls per -remove-runs
  -rerun FAIL,WARN...     : force re-run for tests with specificed status(s)
  -rerun-clean            : set all tests not COMPLETED+PASS,WARN,WAIVED to NOT_STARTED,n/a
                            and then run the specified testpatt with -preclean
  -rerun-all              : set all tests to NOT_STARTED,n/a and run with -preclean
  -lock                   : lock run specified by target and runname
  -unlock                 : unlock run specified by target and runname
289
290
291
292
293
294
295

296
297
298
299
300
301
302
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309







+







			"-itempatt"
			"-setlog"
			"-set-toplog"
			"-runstep"
			"-logpro"
			"-m"
			"-rerun"

			"-days"
			"-rename-run"
			"-to"
			;; values and messages
			":category"
			":variable"
			":value"
401
402
403
404
405
406
407


408
409
410
411
412
413
414
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423







+
+








			;; queries
			"-test-paths" ;; get path(s) to a test, ordered by youngest first

			"-runall"    ;; run all tests, respects -testpatt, defaults to %
			"-run"       ;; alias for -runall
			"-remove-runs"
                        "-kill-runs"
                        "-kill-rerun"
                        "-keep-records" ;; use with -remove-runs to remove only the run data
			"-rebuild-db"
			"-cleanup-db"
			"-rollup"
			"-update-meta"
			"-create-megatest-area"
			"-mark-incompletes"
569
570
571
572
573
574
575
576

577
578
579
580
581

582
583
584
585
586
587
588
578
579
580
581
582
583
584

585
586
587
588
589

590
591
592
593
594
595
596
597







-
+




-
+







					    (printf "Sending signal/term to ~A\n" pid)
					    (process-signal pid signal/term))))))
		       (process:children #f))
		      (original-exit exit-code)))))

;; for some switches always print the command to stderr
;;
(if (args:any? "-run" "-runall" "-remove-runs" "-set-state-status")
(if (args:any? "-run" "-runall" "-remove-runs" "-set-state-status" "-kill-runs" "-kill-rerun")
    (debug:print 0 *default-log-port* (string-intersperse (argv) " ")))

;; some switches imply homehost. Exit here if not on homehost
;;
(let ((homehost-required  (list "-cleanup-db" "-server")))
#;(let ((homehost-required  (list "-cleanup-db" "-server")))
  (if (apply args:any? homehost-required)
      (if (not (common:on-homehost?))
	  (for-each
	   (lambda (switch)
	     (if (args:get-arg switch)
		 (begin
		   (debug:print 0 *default-log-port* "ERROR: you must be on the homehost to run with " switch
1036
1037
1038
1039
1040
1041
1042
1043

1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063




































1064
1065
1066
1067
1068
1069
1070
1045
1046
1047
1048
1049
1050
1051

1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115







-
+




















+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+







     ((not target)
      (debug:print-error 0 *default-log-port* "Missing required parameter for " action ", you must specify -target or -reqtarg")
      (exit 1))
     ((not (or (args:get-arg ":runname")
	       (args:get-arg "-runname")))
      (debug:print-error 0 *default-log-port* "Missing required parameter for " action ", you must specify the run name pattern with -runname patt")
      (exit 2))
     ((not (args:get-arg "-testpatt"))
     ((not (or (args:get-arg "-testpatt") (eq? action 'kill-runs)))
      (debug:print-error 0 *default-log-port* "Missing required parameter for " action ", you must specify the test pattern with -testpatt")
      (exit 3))
     (else
      (if (not (car *configinfo*))
	  (begin
	    (debug:print-error 0 *default-log-port* "Attempted " action "on test(s) but run area config file not found")
	    (exit 1))
	  ;; put test parameters into convenient variables
	  (begin
	    ;; check for correct version, exit with message if not correct
	    (common:exit-on-version-changed)
	    (runs:operate-on  action
			      target
			      (common:args-get-runname)  ;; (or (args:get-arg "-runname")(args:get-arg ":runname"))
			      (common:args-get-testpatt #f) ;; (args:get-arg "-testpatt")
			      state: (common:args-get-state)
			      status: (common:args-get-status)
			      new-state-status: (args:get-arg "-set-state-status")
                              mode: mode)))
      (set! *didsomething* #t)))))

(if (args:get-arg "-kill-runs")
    (general-run-call 
     "-kill-runs"
     "kill runs"
     (lambda (target runname keys keyvals)
       (operate-on 'kill-runs mode: #f)
       )))

(if (args:get-arg "-kill-rerun")
    (let* ((target-patt (args:get-arg "-target"))
           (runname-patt (args:get-arg "-runname")))
      (cond ((not target-patt)
             (debug:print-error 0 *default-log-port* "Missing target, must specify target for -kill-rerun with -target <target name>")
             (exit 1))
            ((not runname-patt)
             (debug:print-error 0 *default-log-port* "Missing runname, must specify runname for -kill-rerun with -runname <run name>")
             (exit 1))
            ((string-search "[ ,%]" target-patt)
             (debug:print-error 0 *default-log-port* "Invalid target ["target-patt"], must specify exact target (no wildcards) for -kill-rerun with -target <target name>")
             (exit 1))
            ((string-search "[ ,%]" runname-patt)
             (debug:print-error 0 *default-log-port* "Invalid runname ["runname-patt"], must specify exact runname (no wildcards) for -kill-rerun with -runname <runname name>")
             (exit 1))
            (else
             (general-run-call 
              "-kill-runs"
              "kill runs"
              (lambda (target runname keys keyvals)
                (operate-on 'kill-runs mode: #f)
                ))
      
             (thread-sleep! 15))
            ;; fall thru and let "-run" loop fire
            )))


(if (args:get-arg "-remove-runs")
    (general-run-call 
     "-remove-runs"
     "remove runs"
     (lambda (target runname keys keyvals)
       (operate-on 'remove-runs mode: (if (args:get-arg "-keep-records")
1620
1621
1622
1623
1624
1625
1626
1627


1628
1629
1630
1631
1632
1633
1634

1635
1636
1637
1638
1639
1640
1641
1665
1666
1667
1668
1669
1670
1671

1672
1673
1674
1675
1676
1677
1678
1679

1680
1681
1682
1683
1684
1685
1686
1687







-
+
+






-
+







;;   process deferred tasks per above steps

;; run all tests are are Not COMPLETED and PASS or CHECK
(if (or (args:get-arg "-runall")
	(args:get-arg "-run")
	(args:get-arg "-rerun-clean")
	(args:get-arg "-rerun-all")
	(args:get-arg "-runtests"))
	(args:get-arg "-runtests")
        (args:get-arg "-kill-rerun"))
    (let ((need-clean (or (args:get-arg "-rerun-clean")
                          (args:get-arg "-rerun-all"))))
      (general-run-call 
       "-runall"
       "run all tests"
       (lambda (target runname keys keyvals)
         (if (args:get-arg "-rerun-clean") ;; first set states/statuses correct
         (if (or (args:get-arg "-kill-rerun") (args:get-arg "-rerun-clean")) ;; first set states/statuses correct
	     ;; For rerun-clean do we or do we not support the testpatt?
             (let ((states   (or (configf:lookup *configdat* "validvalues" "cleanrerun-states")
                                 "KILLREQ,KILLED,UNKNOWN,INCOMPLETE,STUCK,NOT_STARTED"))
                   (statuses (or (configf:lookup *configdat* "validvalues" "cleanrerun-statuses")
                                 "FAIL,INCOMPLETE,ABORT,CHECK,DEAD,PREQ_FAIL,PREQ_DISCARDED")))
               (hash-table-set! args:arg-hash "-preclean" #t)
               (runs:operate-on 'set-state-status
2148
2149
2150
2151
2152
2153
2154
2155

2156
2157
2158
2159
2160
2161
2162
2194
2195
2196
2197
2198
2199
2200

2201
2202
2203
2204
2205
2206
2207
2208







-
+







    (exit 0)))

(if (or (getenv "MT_RUNSCRIPT")
	(args:get-arg "-repl")
	(args:get-arg "-load"))
    (let* ((toppath (launch:setup))
	   (dbstruct (if (and toppath
                              (common:on-homehost?))
                              #;(common:on-homehost?))
                         (db:setup #t)
                         #f))) ;; make-dbr:dbstruct path: toppath local: (args:get-arg "-local")) #f)))
      (if *toppath*
	  (cond
	   ((getenv "MT_RUNSCRIPT")
	    ;; How to run megatest scripts
	    ;;
2172
2173
2174
2175
2176
2177
2178

2179
2180
2181
2182
2183
2184
2185
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232







+







	   (else
	    (begin
	      (set! *db* dbstruct)
	      (import extras) ;; might not be needed
	      ;; (import csi)
	      (import readline)
	      (import apropos)
	      (import portlogger)
	      ;; (import (prefix sqlite3 sqlite3:)) ;; doesn't work ...

	      (if *use-new-readline*
		  (begin
		    (install-history-file (get-environment-variable "HOME") ".megatest_history") ;;  [homedir] [filename] [nlines])
		    (current-input-port (make-readline-port "megatest> ")))
		  (begin

Modified mtut.scm from [848d0d5914] to [da1a65f5ac].

138
139
140
141
142
143
144


145
146
147
148
149
150
151
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153







+
+







  -version                   : print megatest version (currently " megatest-version ")
			     
Run management:		     
   run                       : initiate or resume a run, already completed and in-progress
                               tests are not affected.
   rerun-clean               : clean and rerun all not completed pass/fail tests
   rerun-all                 : clean and rerun entire run
   kill-run                  : kill all tests in run
   kill-rerun                : kill all tests in run and restart non-completed tests
   remove                    : remove runs
   set-ss                    : set state/status
   archive                   : compress and move test data to archive disk
   kill                      : stop tests or entire runs
   db                        : database utilities

Queries:
245
246
247
248
249
250
251


252
253
254
255
256
257
258
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262







+
+







    ))

;; alist to map actions to old megatest commands
(define *action-keys*
  '((run         . "-run")
    (rerun-clean . "-rerun-clean")
    (rerun-all   . "-rerun-all")
    (kill-run    . "-kill-runs")
    (kill-rerun  . "-kill-rerun")
    (sync        . "")
    (archive     . "-archive")
    (set-ss      . "-set-state-status")
    (remove      . "-remove-runs")))

;; manually keep this list updated from the keys to
;; the case *action* near the end of this file.
490
491
492
493
494
495
496


497
498
499
500
501
502
503
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509







+
+








(define-inline (decode data)
  (with-input-from-string
      data
    (lambda ()
      (read))))

;; moved to portlogger - TODO: remove from here and get from portlogger
;;
(define (is-port-in-use port-num)
 (let* ((ret #f))
     (let-values (((inp oup pid)
                (process "netstat" (list  "-tulpn" ))))
      (let loop ((inl (read-line inp)))
        (if (not (eof-object? inl))
            (begin 
1441
1442
1443
1444
1445
1446
1447
1448

1449
1450
1451
1452
1453
1454
1455
1447
1448
1449
1450
1451
1452
1453

1454
1455
1456
1457
1458
1459
1460
1461







-
+







	;    (hash-table-set! args:arg-hash "-log" logf)) ;; fake out future queries of -log
	(print *default-log-port* "Sending log output to " logf)
	(set! *default-log-port* oup)
)))

(if *action*
    (case (string->symbol *action*)
      ((run remove rerun rerun-clean rerun-all set-ss archive kill list)
      ((run remove rerun rerun-clean rerun-all set-ss archive kill list kill-run kill-rerun)
       (let* ((mtconfdat (simple-setup (args:get-arg "-start-dir")))
	      (mtconf    (car mtconfdat))
	      (area      (args:get-arg "-area")) ;; look up the area to dispatch to from [areas] section
	      (areasec   (if area (configf:lookup mtconf "areas" area) #f))
	      (areadat   (if areasec (common:val->alist areasec) #f))
	      (area-path (if areadat (alist-ref 'path areadat) #f))
	      (pktsdirs  (configf:lookup mtconf "setup" "pktsdirs"))

Added nmsg-transport.scm version [69e3d806c5].

































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+

;; Copyright 2006-2012, Matthew Welland.
;; 
;; This file is part of Megatest.
;; 
;;     Megatest is free software: you can redistribute it and/or modify
;;     it under the terms of the GNU General Public License as published by
;;     the Free Software Foundation, either version 3 of the License, or
;;     (at your option) any later version.
;; 
;;     Megatest is distributed in the hope that it will be useful,
;;     but WITHOUT ANY WARRANTY; without even the implied warranty of
;;     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
;;     GNU General Public License for more details.
;; 
;;     You should have received a copy of the GNU General Public License
;;     along with Megatest.  If not, see <http://www.gnu.org/licenses/>.

;;======================================================================
;; Support routines for nmsg usage.
;;   This should be reusable, non-megatest specific stuff
;;======================================================================

(declare (unit nmsg-transport))

(module
    nmsg-transport
    (
     nmsg:start-server
     nmsg:open-send-close
     nmsg:open-send-receive
     )

(import scheme posix chicken data-structures ports)

(use pkts)
(use nanomsg srfi-18)

;;start a server, returns the connection
;;
(define (nmsg:start-server portnum )
  (let ((rep (nn-socket 'rep)))
    (handle-exceptions
     exn
     (let ((emsg ((condition-property-accessor 'exn 'message) exn)))
       (print "ERROR: Failed to start server \"" emsg "\"")
       #f)
     (nn-bind rep (conc "tcp://*:" portnum)))
    rep))

;; open connection to server, send message, close connection
;;
;;  to take an action on failure use proc which is called with the error info
;;    (proc exn errormsg)
;;
(define (nmsg:open-send-close host-port msg attrib #!key (timeout 3)(proc #f)) ;; default timeout is 3 seconds
  (let ((req  (nn-socket 'req))
        (uri  (conc "tcp://" host-port))
        (res  #f)
        ;; (contacts (alist-ref 'contact attrib))
        (mode (alist-ref 'mode attrib))) 
    (handle-exceptions
     exn
     (let ((emsg ((condition-property-accessor 'exn 'message) exn)))
       ;; Send notification
       (if proc (proc exn emsg))
       #f)
     (nn-connect req uri)
     (print "Connected to the server " )
     (nn-send req msg)
     (print "Request Sent")  
     (let* ((th1  (make-thread (lambda ()
                                 (let ((resp (nn-recv req)))
                                   (nn-close req)
                                   (set! res (if (equal? resp "ok")
                                                 #t
                                                 #f))))
                               "recv thread"))
            (th2 (make-thread (lambda ()
                                (thread-sleep! timeout)
                                (thread-terminate! th1))
                             "timer thread")))
       (thread-start! th1)
       (thread-start! th2)
       (thread-join! th1)
       res))))

;; default timeout is 3 seconds
;;
(define (nmsg:open-send-receive host-port msg attrib #!key (timeout 3)(proc #f)) 
  (let ((req  (nn-socket 'req))
        (uri  (conc "tcp://" host-port))
        (res  #f)
        (mode (alist-ref 'mode attrib))) 
    (handle-exceptions
     exn
     (let ((emsg ((condition-property-accessor 'exn 'message) exn)))
       ;; Send notification      
       (if proc (proc exn emsg))
       #f)
     (nn-connect req uri)
     (print "Connected to the server " )
     (nn-send req msg)
     (print "Request Sent")  
     ;; receive code here
     ;;(print (nn-recv req))
     (let* ((th1  (make-thread (lambda ()
                                 (let ((resp (nn-recv req)))
                                   (nn-close req)
                                   (print resp)
                                   (set! res (if (equal? resp "ok")
                                                 #t
                                                 #f))))
                               "recv thread"))
            (th2 (make-thread (lambda ()
                                (thread-sleep! timeout)
                                (thread-terminate! th1))
                             "timer thread")))
       (thread-start! th1)
       (thread-start! th2)
       (thread-join! th1)
       res))))

;; get a signature for identifing this process
(define (nmsg:get-process-signature)
  (conc (get-host-name) " " (current-process-id)))

)

Modified portlogger-example.scm from [79b0759ae8] to [075b5430bd].

13
14
15
16
17
18
19








20
21
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29







+
+
+
+
+
+
+
+


;;     GNU General Public License for more details.
;; 
;;     You should have received a copy of the GNU General Public License
;;     along with Megatest.  If not, see <http://www.gnu.org/licenses/>.


(declare (uses portlogger))
(import portlogger)
(use trace (prefix sqlite3 sqlite3:))
(trace
 portlogger:open-db
 portlogger:take-port
 portlogger:open-run-close
 sqlite3:execute
 )

(print (apply portlogger:main (cdr (argv))))

Modified portlogger.scm from [8b8ee119e5] to [6ef6750d8e].

13
14
15
16
17
18
19

20
21
22
23
























24

25
26




27


28




29





30
31
32
33


34
35
36
37
38
39

40
41
42
43
44
45
46
47
48
49
50

51
52
53
54
55
56
57
58
59
60
61
62
63

64
65
66
67
68
69
70
71

72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87



88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105

106
107
108
109
110
111
112
13
14
15
16
17
18
19
20




21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46


47
48
49
50
51
52
53

54
55
56
57
58
59
60
61
62
63
64
65


66
67
68
69
70
71
72

73
74








75

76

77
78
79
80
81
82
83
84
85
86
87

88
89
90
91
92
93
94
95

96
97
98
99
100
101
102
103
104
105
106
107
108
109



110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129

130
131
132
133
134
135
136
137







+
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+

+
-
-
+
+
+
+

+
+
-
+
+
+
+

+
+
+
+
+


-
-
+
+





-
+

-
-
-
-
-
-
-
-

-
+
-











-
+







-
+













-
-
-
+
+
+

















-
+







;;     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
;;     GNU General Public License for more details.
;; 
;;     You should have received a copy of the GNU General Public License
;;     along with Megatest.  If not, see <http://www.gnu.org/licenses/>.
;;

(declare (unit portlogger))
(require-extension (srfi 18) extras tcp s11n)

(use sqlite3 srfi-1 posix srfi-69 hostinfo dot-locking z3)
(import (prefix sqlite3 sqlite3:))

(module
 portlogger
 (portlogger:set-configdat!
  portlogger:set-printers!
  portlogger:set-default-log-port!
  portlogger:open-db
  portlogger:open-run-close
  portlogger:take-port
  portlogger:get-prev-used-port
  portlogger:find-port
  portlogger:set-port
  portlogger:release-port
  portlogger:set-failed
  portlogger:is-port-in-use
  portlogger:main
)
 
(import scheme posix chicken data-structures ports)

(require-extension (srfi 18) extras tcp s11n)
(use srfi-1 posix srfi-69 hostinfo dot-locking z3 regex)
(use (prefix sqlite3 sqlite3:))
(use (prefix mtconfigf configf:))

;; lsof -i
(declare (unit portlogger))
(declare (uses db))

(define *configdat* #f)
(define (portlogger:set-configdat! cfgdat)
  (set! *configdat* cfgdat))

(define (debug:print level port . params)
  (with-output-to-port
;; lsof -i
      port
    (lambda ()(apply print params))))
(define debug:print-error debug:print)
(define *default-log-port* (current-error-port))

(define (portlogger:set-printers! pdebug pdebugerr)
  (set! debug:print pdebug)
  (set! debug:print-error pdebugerr))
(define (portlogger:set-default-log-port! port)
  (set! *default-log-port* port))

(define (portlogger:open-db fname)
  (let* ((avail    (tasks:wait-on-journal fname 5 remove: #t)) ;; wait up to about 10 seconds for the journal to go away
	 (exists   (common:file-exists? fname))
  (let* ((avail    #t) ;; for now - assume wait on journal not needed (tasks:wait-on-journal fname 5 remove: #t)) ;; wait up to about 10 seconds for the journal to go away
	 (exists   (file-exists? fname))
	 (db       (if avail 
		       (sqlite3:open-database fname)
		       (begin
			 (system (conc "rm -f " fname))
			 (sqlite3:open-database fname))))
	 (handler  (make-busy-timeout 136000))
	 (handler  (sqlite3:make-busy-timeout 136000))
	 (canwrite (file-write-access? fname)))
	 ;; (db-init  (lambda ()
	 ;;             (sqlite3:execute 
	 ;;              db
	 ;;              "CREATE TABLE IF NOT EXISTS ports (
         ;;                 port INTEGER PRIMARY KEY,
         ;;                 state TEXT DEFAULT 'not-used',
         ;;                 fail_count INTEGER DEFAULT 0,
         ;;                 update_time TIMESTAMP DEFAULT (strftime('%s','now')) );"))))
    (sqlite3:set-busy-handler! db handler)
    (db:set-sync db) ;; (sqlite3:execute db "PRAGMA synchronous = 0;")
    (sqlite3:execute db "PRAGMA synchronous = 0;")
    ;; (if (not exists) ;; needed with IF NOT EXISTS?
    (sqlite3:execute 
     db
     "CREATE TABLE IF NOT EXISTS ports (
            port INTEGER PRIMARY KEY,
            state TEXT DEFAULT 'not-used',
            fail_count INTEGER DEFAULT 0,
            update_time TIMESTAMP DEFAULT (strftime('%s','now')) );")
    db))

(define (portlogger:open-run-close proc . params)
  (let* ((fname  (conc "/tmp/." (current-user-name) "-portlogger.db"))
	 (avail  (tasks:wait-on-journal fname 10))) ;; wait up to about 10 seconds for the journal to go away
	 (avail  #t)) ;; (tasks:wait-on-journal fname 10))) ;; wait up to about 10 seconds for the journal to go away
    (handle-exceptions
     exn
     (begin
       ;; (release-dot-lock fname)
       (debug:print-error 0 *default-log-port* "portlogger:open-run-close failed. " proc " " params)
       (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))
       (debug:print 5 *default-log-port* "exn=" (condition->list exn))
       (if (common:file-exists? fname)(delete-file fname)) ;; brutally get rid of it
       (if (file-exists? fname)(delete-file fname)) ;; brutally get rid of it
       (print-call-chain (current-error-port)))
     (let* (;; (lock   (obtain-dot-lock fname 2 9 10))
	    (db     (portlogger:open-db fname))
	    (res    (apply proc db params)))
       (sqlite3:finalize! db)
       ;; (release-dot-lock fname)
       res))))

;; (fold-row PROC INIT DATABASE SQL . PARAMETERS) 
(define (portlogger:take-port db portnum)
  (let* ((qry1 (sqlite3:prepare db "INSERT INTO ports (port,state) VALUES (?,?);"))
	 (qry2 (sqlite3:prepare db "UPDATE ports SET state=?,update_time=strftime('%s','now') WHERE port=?;"))
	 (qry3 (sqlite3:prepare db "SELECT state FROM ports WHERE port=?;"))
	 (res  (sqlite3:with-transaction
		db
		(lambda ()
	 (res  ;; (sqlite3:with-transaction ;; move the transaction up to the find-port call
		;; db
		;; (lambda ()
		  ;; (fold-row (lambda (var curr) (or var curr)) #f db "SELECT var FROM foo WHERE id=100;")
		  (let* ((curr #f)
			 (res  #f))
		    (set! curr (sqlite3:fold-row
				(lambda (var curr)
				  (or curr var curr))
				"not-tried"
				qry3
				portnum))
		    ;; (print "curr=" curr)
		    (set! res (case (string->symbol curr)
				((released)  (sqlite3:execute qry2 "taken" portnum) 'taken)
				((not-tried) (sqlite3:execute qry1 portnum "taken") 'taken)
				((taken)                                            'already-taken)
				((failed)                                           'failed)
				(else                                               'error)))
		    ;; (print "res=" res)
		    res)))))
		    res))) ;; ))
    (sqlite3:finalize! qry1)
    (sqlite3:finalize! qry2)
    (sqlite3:finalize! qry3)
    res))

(define (portlogger:get-prev-used-port db)
  (handle-exceptions
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
























147
148
149
150
151
152




153
154
155
156












157
158
159
160
161
162
163
147
148
149
150
151
152
153


















154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210







-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+






+
+
+
+




+
+
+
+
+
+
+
+
+
+
+
+







     (lambda (var curr)
       (or curr var curr))
     #f
     db
     "SELECT (port) FROM ports WHERE state='released' LIMIT 1;")))

(define (portlogger:find-port db)
  (let* ((lowport (let ((val (configf:lookup *configdat* "server" "lowport")))
		    (if (and val 
			     (string->number val))
			(string->number val)
			32768)))
	 (portnum (or (portlogger:get-prev-used-port db)
		      (+ lowport ;; top of registered ports is 49152 but lets use ports in the registered range
			 (random (- 64000 lowport))))))
    (handle-exceptions
     exn
     (begin
       (debug:print 0 *default-log-port* "EXCEPTION: portlogger database probably overloaded or unreadable. If you see this message again remove /tmp/.$USER-portlogger.db")
       (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))
       (debug:print 5 *default-log-port* "exn=" (condition->list exn))
       (print-call-chain (current-error-port))
       (debug:print 0 *default-log-port* "Continuing anyway."))
     (portlogger:take-port db portnum))
    portnum))
  (let ((lowport (let ((val (configf:lookup *configdat* "server" "lowport")))
		   (if (and val 
			    (string->number val))
		       (string->number val)
		       32768))))
    (sqlite3:with-transaction
     db
     (lambda ()
       (let loop ((numtries 0))
	 (let* ((portnum (or (portlogger:get-prev-used-port db)
			     (+ lowport ;; top of registered ports is 49152 but let's use ports in the registered range
				(random (- 64000 lowport))))))
	   (handle-exceptions
	    exn
	    (begin
	      (debug:print 0 *default-log-port* "EXCEPTION: portlogger database probably overloaded or unreadable. If you see this message again remove /tmp/.$USER-portlogger.db")
	      (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))
	      (debug:print 5 *default-log-port* "exn=" (condition->list exn))
	      (print-call-chain (current-error-port))
	      (debug:print 0 *default-log-port* "Continuing anyway."))
	    (portlogger:take-port db portnum) ;; always "take the port"
	    (if (portlogger:is-port-in-use portnum)
		portnum
		(loop (add1 numtries))))))))))

;; set port to "released", "failed" etc.
;; 
(define (portlogger:set-port db portnum value)
  (sqlite3:execute db "UPDATE ports SET state=?,update_time=strftime('%s','now') WHERE port=?;" value portnum))

;; release port
(define (portlogger:release-port db portnum)
  (sqlite3:execute db "UPDATE ports SET state=?,update_time=strftime('%s','now') WHERE port=?;" "released" portnum))

;; set port to failed (attempted to take but got error)
;;
(define (portlogger:set-failed db portnum)
  (sqlite3:execute db "UPDATE ports SET state='failed',fail_count=fail_count+1,update_time=strftime('%s','now') WHERE port=?;" portnum))

;; pulled from mtut - TODO: remove from mtut
;;
(define (portlogger:is-port-in-use port-num)
  (let-values (((inp oup pid)
                (process "netstat" (list  "-tulpn" ))))
    (let loop ((inl (read-line inp)))
      (if (not (eof-object? inl))
	  (begin 
	    (if (string-search (regexp (conc ":" port-num "\\s+")) inl)
		#t
		(loop (read-line inp))))))))

;;======================================================================
;; MAIN
;;======================================================================

(define (portlogger:main . args)
  (let* ((dbfname (conc "/tmp/." (current-user-name) "-portlogger.db"))
178
179
180
181
182
183
184
185


186
187
188
189

225
226
227
228
229
230
231

232
233
234
235
236
237
238







-
+
+




+
	     ((find)(portlogger:find-port db))
	     ((set) (let ((port  (cadr  args))
			  (state (caddr args)))
		      (portlogger:set-port db 
					   (if (number? port) port (string->number port))
					   state)
		      state))
	     ((failed)(portlogger:set-failed db (string->number (cadr args))) 'failed)))))
	     ((failed)(portlogger:set-failed db (string->number (cadr args))) 'failed)
	     (else "nosuchcommand")))))
    (sqlite3:finalize! db)
    result))
     
;; (print (apply portlogger:main (cdr (argv))))
)

Modified rmt.scm from [0a05f35135] to [75cad94866].

18
19
20
21
22
23
24
25
26
27
28
29
30
31

32
33
34
35
36
37
38
39

40
41
42
43
44
45
46
47
48
49
50
51
52
53

54
55

56
57
58
59

60
61
62
63
64
65
66
67
68

69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95

96
97

98
99
100
101
102
103
104
105

106
107
108

109
110
111
112
113
114

115
116
117
118
119
120
121
122
123
124
125

126
127
128
129
130
131
132
133
134

135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212

213
214
215
216
217
218
219
220
221
222
223
224
225
226



227
228
229


230
231


232
233
234
235
236



237
238
239
240
241


242
243

244
245

246
247
248
249
250
251
252
253
254
255



256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280

281
282
283
284
285
286
287
288
289
290
291
292
293

294
295
296


297
298
299
300
301
302
303
304
305
306
307
308
309

310
311
312
313
314
315


316
317
318
319
320
321
322
323
324
325
326
327


328
329
330
331
332
333
334

335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352


353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395


396
397
398
399
400
401

402
403
404

405
406

407
408
409
410
411

412
413
414
415

416
417
418
419
420
421

422
423
424
425

426
427
428

429
430
431
432
433
434
435
436

437
438

439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456

457
458
459
460
461
462

463
464
465

466
467
468
469

470
471
472
473
474
475
476
477
478
479
480

481
482

483
484
485
486
487
488
489
490
491
492

493
494
495
496
497
498
499
500
501
502
503
504
505
506

507
508
509
510
511
512

513
514
515


516
517

518
519
520
521


522
523
524
525
526
527
528
529
530

531
532
533
534
535

536
537
538
539
540
541
542

543
544
545

546
547
548
549
550
551
552

553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607

608
609
610
611
612
613
614
615
616
617
618


619
620

621
622
623

624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644

645
646
647


648
649

650
651
652

653
654
655
656

657
658

659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693

694
695
696
697
698
699

700
701
702


703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769

770
771
772

773
774
775

776
777
778
779
780
781
782
783

784
785
786
787
788
789
790
791
792
793
794

795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818

819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837

838
839
840
841
842
843
844
845

846
847
848

849
850
851
852
853
854
855

856
857

858
859
860

861
862
863

864
865
866

867
868
869

870
871
872

873


874
875
876
877
878
879
880
881
882
883
884
885

886
887
888

889
890
891

892
893
894
895
896
897
898

899
900
901

902
903
904

905
906
907

908
909
910
911
912
913
914

915
916
917

918
919
920

921
922
923

924
925
926

927
928
929

930
18
19
20
21
22
23
24

25
26




27



28




29














30


31




32









33



















34







35


36








37



38






39











40









41














































































42














43
44
45



46
47


48
49





50
51
52





53
54


55


56










57
58
59

























60













61



62
63













64






65
66












67
68







69


















70
71











































72
73






74



75


76





77




78






79




80



81








82


83


















84






85



86




87











88


89










90














91






92



93
94


95




96
97









98





99







100



101







102























































103











104
105


106



107





















108



109
110


111



112




113


114



































115






116



117
118



































































119



120



121








122











123
























124



















125








126



127







128


129



130



131



132



133



134

135
136












137



138



139







140



141



142



143







144



145



146



147



148



149








-


-
-
-
-
+
-
-
-

-
-
-
-
+
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
-
-
+
-
-
-
-
+
-
-
-
-
-
-
-
-
-
+
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-

-
-
-
-
-
-
-
+
-
-
+
-
-
-
-
-
-
-
-
+
-
-
-
+
-
-
-
-
-
-
+
-
-
-
-
-
-
-
-
-
-
-
+
-
-
-
-
-
-
-
-
-
+
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
-
-
-
+
+
-
-
+
+
-
-
-
-
-
+
+
+
-
-
-
-
-
+
+
-
-
+
-
-
+
-
-
-
-
-
-
-
-
-
-
+
+
+
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
-
-
-
-
-
-
-
-
-
-
-
-
-
+
-
-
-
+
+
-
-
-
-
-
-
-
-
-
-
-
-
-
+
-
-
-
-
-
-
+
+
-
-
-
-
-
-
-
-
-
-
-
-
+
+
-
-
-
-
-
-
-
+
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
-
-
-
-
-
-
+
-
-
-
+
-
-
+
-
-
-
-
-
+
-
-
-
-
+
-
-
-
-
-
-
+
-
-
-
-
+
-
-
-
+
-
-
-
-
-
-
-
-
+
-
-
+
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
-
-
-
-
-
-
+
-
-
-
+
-
-
-
-
+
-
-
-
-
-
-
-
-
-
-
-
+
-
-
+
-
-
-
-
-
-
-
-
-
-
+
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
-
-
-
-
-
-
+
-
-
-
+
+
-
-
+
-
-
-
-
+
+
-
-
-
-
-
-
-
-
-
+
-
-
-
-
-
+
-
-
-
-
-
-
-
+
-
-
-
+
-
-
-
-
-
-
-
+
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
-
-
-
-
-
-
-
-
-
-
-
+
+
-
-
+
-
-
-
+
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
-
-
-
+
+
-
-
+
-
-
-
+
-
-
-
-
+
-
-
+
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
-
-
-
-
-
-
+
-
-
-
+
+
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
-
-
-
+
-
-
-
+
-
-
-
-
-
-
-
-
+
-
-
-
-
-
-
-
-
-
-
-
+
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
-
-
-
-
-
-
-
-
+
-
-
-
+
-
-
-
-
-
-
-
+
-
-
+
-
-
-
+
-
-
-
+
-
-
-
+
-
-
-
+
-
-
-
+
-
+
+
-
-
-
-
-
-
-
-
-
-
-
-
+
-
-
-
+
-
-
-
+
-
-
-
-
-
-
-
+
-
-
-
+
-
-
-
+
-
-
-
+
-
-
-
-
-
-
-
+
-
-
-
+
-
-
-
+
-
-
-
+
-
-
-
+
-
-
-
+
-
;;
;;======================================================================

(use format typed-records) ;; RADT => purpose of json format??

(declare (unit rmt))
(declare (uses api))
(declare (uses http-transport))
(include "common_records.scm")

;;
;; THESE ARE ALL CALLED ON THE CLIENT SIDE!!!
;;

(use (prefix pkts pkts:) srfi-18)
;; generate entries for ~/.megatestrc with the following
;;
;;  grep define ../rmt.scm | grep rmt: |perl -pi -e 's/\(define\s+\((\S+)\W.*$/\1/'|sort -u

;;======================================================================
;;  S U P P O R T   F U N C T I O N S
;;======================================================================

(defstruct cmdrec
;; if a server is either running or in the process of starting call client:setup
;; else return #f to let the calling proc know that there is no server available
;;
(define (rmt:get-connection-info areapath #!key (area-dat #f)) ;; TODO: push areapath down.
  (let* ((runremote (or area-dat *runremote*))
	 (cinfo     (if (remote? runremote)
			(remote-conndat runremote)
			#f)))
	  (if cinfo
	      cinfo
	      (if (server:check-if-running areapath)
		  (client:setup areapath)
		  #f))))

  cmd
(define *send-receive-mutex* (make-mutex)) ;; should have separate mutex per run-id

  (host    #f)
;; RA => e.g. usage (rmt:send-receive 'get-var #f (list varname))
;;
(define (rmt:send-receive cmd rid params #!key (attemptnum 1)(area-dat #f)) ;; start attemptnum at 1 so the modulo below works as expected

  (run-ids #f)
  ;;DOT digraph megatest_state_status {
  ;;DOT   ranksep=0;
  ;;DOT   // rankdir=LR;
  ;;DOT   node [shape="box"];
  ;;DOT "rmt:send-receive" -> MUTEXLOCK;
  ;;DOT { edge [style=invis];"case 1" -> "case 2" -> "case 3" -> "case 4" -> "case 5" -> "case 6" -> "case 7" -> "case 8" -> "case 9" -> "case 10" -> "case 11"; }
  ;; do all the prep locked under the rmt-mutex
  (mutex-lock! *rmt-mutex*)
  
  params)
  ;; 1. check if server is started IFF cmd is a write OR if we are not on the homehost, store in runremote
  ;; 2. check the age of the connections. refresh the connection if it is older than timeout-20 seconds.
  ;; 3. do the query, if on homehost use local access
  ;;
  (let* ((start-time    (current-seconds)) ;; snapshot time so all use cases get same value
         (areapath      *toppath*);; TODO - resolve from dbstruct to be compatible with multiple areas
	 (runremote     (or area-dat
			    *runremote*))
	 (readonly-mode (if (and runremote
				 (remote-ro-mode-checked runremote))
			    (remote-ro-mode runremote)
			    (let* ((dbfile  (conc *toppath* "/megatest.db"))
				   (ro-mode (not (file-write-access? dbfile)))) ;; TODO: use dbstruct or runremote to figure this out in future
			      (if runremote
				  (begin
				    (remote-ro-mode-set! runremote ro-mode)
				    (remote-ro-mode-checked-set! runremote #t)
				    ro-mode)
				  ro-mode)))))

    ;; DOT INIT_RUNREMOTE; // leaving off - doesn't really add to the clarity
    ;; DOT MUTEXLOCK -> INIT_RUNREMOTE [label="no remote?"];
    ;; DOT INIT_RUNREMOTE -> MUTEXLOCK;
    ;; ensure we have a record for our connection for given area
    (if (not runremote)                   ;; can remove this one. should never get here.         
	(begin
	  (set! *runremote* (make-remote))
;; call cmd on remote host (#f for any host)
	  (set! runremote   *runremote*))) ;; new runremote will come from this on next iteration
    
;;
    ;; DOT SET_HOMEHOST; // leaving off - doesn't really add to the clarity
    ;; DOT MUTEXLOCK -> SET_HOMEHOST [label="no homehost?"];
    ;; DOT SET_HOMEHOST -> MUTEXLOCK;
    ;; ensure we have a homehost record
    (if (not (pair? (remote-hh-dat runremote)))  ;; not on homehost
	(thread-sleep! 0.1) ;; since we shouldn't get here, delay a little
	(remote-hh-dat-set! runremote (common:get-homehost)))
    
;; example: (rmt:run 'get-runs target run-name test-patt state status)
    ;;(print "BB> readonly-mode is "readonly-mode" dbfile is "dbfile)
    (cond
     ;;DOT EXIT;
;;
     ;;DOT MUTEXLOCK -> EXIT [label="> 15 attempts"]; {rank=same "case 1" "EXIT" }
     ;; give up if more than 15 attempts
     ((> attemptnum 15)
      (debug:print 0 *default-log-port* "ERROR: 15 tries to start/connect to server. Giving up.")
      (exit 1))

(define (rmt:run cmd . params)
     ;;DOT CASE2 [label="local\nreadonly\nquery"];
     ;;DOT MUTEXLOCK -> CASE2; {rank=same "case 2" CASE2}
     ;;DOT CASE2 -> "rmt:open-qry-close-locally";
     ;; readonly mode, read request-  handle it - case 2
     ((and readonly-mode
           (member cmd api:read-only-queries)) 
      (mutex-unlock! *rmt-mutex*)
      (debug:print-info 12 *default-log-port* "rmt:send-receive, case 2")
      (rmt:open-qry-close-locally cmd 0 params)
      )

  (let ((server (rmt:get-server cmdrec))) ;; look up server 
     ;;DOT CASE3 [label="write in\nread-only mode"];
     ;;DOT MUTEXLOCK -> CASE3 [label="readonly\nmode?"]; {rank=same "case 3" CASE3}
     ;;DOT CASE3 -> "#f";
     ;; readonly mode, write request.  Do nothing, return #f
     (readonly-mode
      (mutex-unlock! *rmt-mutex*)
      (debug:print-info 12 *default-log-port* "rmt:send-receive, case 3")
      (debug:print 0 *default-log-port* "WARNING: write transaction requested on a readonly area.  cmd="cmd" params="params)
      #f)
    #f))

     ;; This block was for pre-emptively resetting the connection if there had been no communication for some time.
     ;; I don't think it adds any value. If the server is not there, just fail and start a new connection.
     ;; also, the expire-time calculation might not be correct. We want, time-since-last-server-access > (server:get-timeout)
     ;;
     ;;DOT CASE4 [label="reset\nconnection"];
     ;;DOT MUTEXLOCK -> CASE4 [label="have connection,\nlast_access > expire_time"]; {rank=same "case 4" CASE4}
     ;;DOT CASE4 -> "rmt:send-receive";
     ;; reset the connection if it has been unused too long
     ((and runremote
           (remote-conndat runremote)
	   (> (current-seconds) ;; if it has been more than server-timeout seconds since last contact, close this connection and start a new on
	      (+ (http-transport:server-dat-get-last-access (remote-conndat runremote))
		 (remote-server-timeout runremote))))
      (debug:print-info 0 *default-log-port* "Connection to " (remote-server-url runremote) " expired due to no accesses, forcing new connection.")
      (http-transport:close-connections area-dat: runremote)
      (remote-conndat-set! runremote #f) ;; invalidate the connection, thus forcing a new connection.
      (mutex-unlock! *rmt-mutex*)
      (rmt:send-receive cmd rid params attemptnum: attemptnum))
     
     ;;DOT CASE5 [label="local\nread"];
     ;;DOT MUTEXLOCK -> CASE5 [label="server not required,\non homehost,\nread-only query"]; {rank=same "case 5" CASE5};
     ;;DOT CASE5 -> "rmt:open-qry-close-locally";

     ;; on homehost and this is a read
     ((and (not (remote-force-server runremote)) ;; honor forced use of server, i.e. server NOT required
	   (cdr (remote-hh-dat runremote))       ;; on homehost
           (member cmd api:read-only-queries))   ;; this is a read
      (mutex-unlock! *rmt-mutex*)
      (debug:print-info 12 *default-log-port* "rmt:send-receive, case  5")
      (rmt:open-qry-close-locally cmd 0 params))

     ;;DOT CASE6 [label="init\nremote"];
     ;;DOT MUTEXLOCK -> CASE6 [label="on homehost,\nwrite query,\nhave server,\ncan't reach it"]; {rank=same "case 6" CASE6};
     ;;DOT CASE6 -> "rmt:send-receive";
     ;; on homehost and this is a write, we already have a server, but server has died
     ((and (cdr (remote-hh-dat runremote))           ;; on homehost
           (not (member cmd api:read-only-queries))  ;; this is a write
           (remote-server-url runremote)             ;; have a server
           (not (server:ping (remote-server-url runremote))))  ;; server has died. NOTE: this is not a cheap call! Need better approach.
      (set! *runremote* (make-remote))
      (remote-force-server-set! runremote (common:force-server?))
      (mutex-unlock! *rmt-mutex*)
      (debug:print-info 12 *default-log-port* "rmt:send-receive, case  6")
      (rmt:send-receive cmd rid params attemptnum: attemptnum))

     ;;DOT CASE7 [label="homehost\nwrite"];
     ;;DOT MUTEXLOCK -> CASE7 [label="server not required,\non homehost,\na write,\nhave a server"]; {rank=same "case 7" CASE7};
     ;;DOT CASE7 -> "rmt:open-qry-close-locally";
     ;; on homehost and this is a write, we already have a server
     ((and (not (remote-force-server runremote))     ;; honor forced use of server, i.e. server NOT required
	   (cdr (remote-hh-dat runremote))           ;; on homehost
           (not (member cmd api:read-only-queries))  ;; this is a write
           (remote-server-url runremote))            ;; have a server
      (mutex-unlock! *rmt-mutex*)
      (debug:print-info 12 *default-log-port* "rmt:send-receive, case  4.1")
      (rmt:open-qry-close-locally cmd 0 params))

     ;;DOT CASE8 [label="force\nserver"];
     ;;DOT MUTEXLOCK -> CASE8 [label="server not required,\nhave homehost info,\nno connection yet,\nnot a read-only query"]; {rank=same "case 8" CASE8};
     ;;DOT CASE8 -> "rmt:open-qry-close-locally";
     ;;  on homehost, no server contact made and this is a write, passively start a server 
     ((and (not (remote-force-server runremote))     ;; honor forced use of server, i.e. server NOT required
	   (cdr (remote-hh-dat runremote))           ;; have homehost
           (not (remote-server-url runremote))       ;; no connection yet
	   (not (member cmd api:read-only-queries))) ;; not a read-only query
      (debug:print-info 12 *default-log-port* "rmt:send-receive, case  8")
      (let ((server-url  (server:check-if-running *toppath*))) ;; (server:read-dotserver->url *toppath*))) ;; (server:check-if-running *toppath*))) ;; Do NOT want to run server:check-if-running - very expensive to do for every write call
	(if server-url
	    (remote-server-url-set! runremote server-url) ;; the string can be consumed by the client setup if needed
	    (if (common:force-server?)
		(server:start-and-wait *toppath*)
		(server:kind-run *toppath*))))
      (remote-force-server-set! runremote (common:force-server?))
      (mutex-unlock! *rmt-mutex*)
      (debug:print-info 12 *default-log-port* "rmt:send-receive, case  8.1")
      (rmt:open-qry-close-locally cmd 0 params))

  
     ;;DOT CASE9 [label="force server\nnot on homehost"];
     ;;DOT MUTEXLOCK -> CASE9 [label="no connection\nand either require server\nor not on homehost"]; {rank=same "case 9" CASE9};
     ;;DOT CASE9 -> "start\nserver" -> "rmt:send-receive";
     ((or (and (remote-force-server runremote)              ;; we are forcing a server and don't yet have a connection to one
	       (not (remote-conndat runremote)))
	  (and (not (cdr (remote-hh-dat runremote)))        ;; not on a homehost 
	       (not (remote-conndat runremote))))           ;; and no connection
      (debug:print-info 12 *default-log-port* "rmt:send-receive, case 9, hh-dat: " (remote-hh-dat runremote) " conndat: " (remote-conndat runremote))
      (mutex-unlock! *rmt-mutex*)
      (if (not (server:check-if-running *toppath*)) ;; who knows, maybe one has started up?
	  (server:start-and-wait *toppath*))
      (remote-conndat-set! runremote (rmt:get-connection-info *toppath*)) ;; calls client:setup which calls client:setup-http
      (rmt:send-receive cmd rid params attemptnum: attemptnum)) ;; TODO: add back-off timeout as

(define (rmt:get-connection-info . args) #t)
(define (rmt:send-receive . args) #t)
(define (rmt:print-db-stats . args) #t)
     ;;DOT CASE10 [label="on homehost"];
     ;;DOT MUTEXLOCK -> CASE10 [label="server not required,\non homehost"]; {rank=same "case 10" CASE10};
     ;;DOT CASE10 -> "rmt:open-qry-close-locally";
(define (rmt:get-max-query-average . args) #t)
(define (rmt:open-qry-close-locally . args) #t)
     ;; all set up if get this far, dispatch the query
     ((and (not (remote-force-server runremote))
(define (rmt:send-receive-no-auto-client-setup . args) #t)
(define (rmt:kill-server . args) #t)
	   (cdr (remote-hh-dat runremote))) ;; we are on homehost
      (mutex-unlock! *rmt-mutex*)
      (debug:print-info 12 *default-log-port* "rmt:send-receive, case 10")
      (rmt:open-qry-close-locally cmd (if rid rid 0) params))

(define (rmt:start-server . args) #t)
(define (rmt:login . args) #t)
(define (rmt:login-no-auto-client-setup . args) #t)
     ;;DOT CASE11 [label="send_receive"];
     ;;DOT MUTEXLOCK -> CASE11 [label="else"]; {rank=same "case 11" CASE11};
     ;;DOT CASE11 -> "rmt:send-receive" [label="call failed"];
     ;;DOT CASE11 -> "RESULT" [label="call succeeded"];
     ;; not on homehost, do server query
(define (rmt:general-call . args) #t)
(define (rmt:get-latest-host-load . args) #t)
     (else
      ;; (mutex-unlock! *rmt-mutex*)
(define (rmt:sdb-qry . args) #t)
      (debug:print-info 12 *default-log-port* "rmt:send-receive, case  9")
      ;; (mutex-lock! *rmt-mutex*)
(define (rmt:runtests . args) #t)
      (let* ((conninfo (remote-conndat runremote))
	     (dat      (case (remote-transport runremote)
			 ((http) (condition-case ;; handling here has caused a lot of problems. However it is needed to deal with attemtped communication to servers that have gone away
                                  (http-transport:client-api-send-receive 0 conninfo cmd params)
                                  ((commfail)(vector #f "communications fail"))
                                  ((exn)(vector #f "other fail" (print-call-chain)))))
			 (else
			  (debug:print 0 *default-log-port* "ERROR: transport " (remote-transport runremote) " not supported")
			  (exit))))
	     (success  (if (vector? dat) (vector-ref dat 0) #f))
(define (rmt:get-run-record-ids . args) #t)
(define (rmt:get-changed-record-ids . args) #t)
(define (rmt:get-tests-tags . args) #t)
	     (res      (if (vector? dat) (vector-ref dat 1) #f)))
	(if (and (vector? conninfo) (< 5 (vector-length conninfo)))
            (http-transport:server-dat-update-last-access conninfo) ;; refresh access time
	    (begin
              (debug:print 0 *default-log-port* "INFO: Should not get here! conninfo=" conninfo)
              (set! conninfo #f)
              (remote-conndat-set! *runremote* #f)
              (http-transport:close-connections  area-dat: runremote)))
	;; (mutex-unlock! *rmt-mutex*)
        (debug:print-info 13 *default-log-port* "rmt:send-receive, case  9. conninfo=" conninfo " dat=" dat " runremote = " runremote)
	(mutex-unlock! *rmt-mutex*)
	(if success ;; success only tells us that the transport was successful, have to examine the data to see if there was a detected issue at the other end
	    (if (and (vector? res)
		     (eq? (vector-length res) 2)
		     (eq? (vector-ref res 1) 'overloaded)) ;; since we are looking at the data to carry the error we'll use a fairly obtuse combo to minimise the chances of some sort of collision.
                ;; this is the case where the returned data is bad or the server is overloaded and we want
                ;; to ease off the queries
		(let ((wait-delay (+ attemptnum (* attemptnum 10))))
		  (debug:print 0 *default-log-port* "WARNING: server is overloaded. Delaying " wait-delay " seconds and trying call again.")
		  (mutex-lock! *rmt-mutex*)
		  (http-transport:close-connections area-dat: runremote)
		  (set! *runremote* #f) ;; force starting over
		  (mutex-unlock! *rmt-mutex*)
		  (thread-sleep! wait-delay)
		  (rmt:send-receive cmd rid params attemptnum: (+ attemptnum 1)))
(define (rmt:get-key-val-pairs . args) #t)
		res) ;; All good, return res
	    (begin
	      (debug:print 0 *default-log-port* "WARNING: communication failed. Trying again, try num: " attemptnum)
	      (mutex-lock! *rmt-mutex*)
              (remote-conndat-set!    runremote #f)
	      (http-transport:close-connections area-dat: runremote)
	      (remote-server-url-set! runremote #f)
	      (mutex-unlock! *rmt-mutex*)
              (debug:print-info 12 *default-log-port* "rmt:send-receive, case  9.1")
	      ;; (if (not (server:check-if-running *toppath*))
	      ;; 	  (server:start-and-wait *toppath*))
	      (rmt:send-receive cmd rid params attemptnum: (+ attemptnum 1)))))))))

(define (rmt:get-keys . args) #t)
    ;;DOT }
    
;; (define (rmt:update-db-stats run-id rawcmd params duration)
(define (rmt:get-keys-write . args) #t)
(define (rmt:get-key-vals . args) #t)
;;   (mutex-lock! *db-stats-mutex*)
;;   (handle-exceptions
;;    exn
;;    (begin
;;      (debug:print 0 *default-log-port* "WARNING: stats collection failed in update-db-stats")
;;      (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))
;;      (print "exn=" (condition->list exn))
;;      #f) ;; if this fails we don't care, it is just stats
;;    (let* ((cmd      (conc "run-id=" run-id " " (if (eq? rawcmd 'general-call) (car params) rawcmd)))
;; 	  (stat-vec (hash-table-ref/default *db-stats* cmd #f)))
;;      (if (not (vector? stat-vec))
;; 	 (let ((newvec (vector 0 0)))
;; 	   (hash-table-set! *db-stats* cmd newvec)
(define (rmt:get-targets . args) #t)
;; 	   (set! stat-vec newvec)))
;;      (vector-set! stat-vec 0 (+ (vector-ref stat-vec 0) 1))
;;      (vector-set! stat-vec 1 (+ (vector-ref stat-vec 1) duration))))
;;   (mutex-unlock! *db-stats-mutex*))

(define (rmt:print-db-stats)
(define (rmt:get-target . args) #t)
(define (rmt:get-run-times . args) #t)
  (let ((fmtstr "~40a~7-d~9-d~20,2-f")) ;; "~20,2-f"
    (debug:print 18 *default-log-port* "DB Stats\n========")
    (debug:print 18 *default-log-port* (format #f "~40a~8a~10a~10a" "Cmd" "Count" "TotTime" "Avg"))
    (for-each (lambda (cmd)
		(let ((cmd-dat (hash-table-ref *db-stats* cmd)))
		  (debug:print 18 *default-log-port* (format #f fmtstr cmd (vector-ref cmd-dat 0) (vector-ref cmd-dat 1) (/ (vector-ref cmd-dat 1)(vector-ref cmd-dat 0))))))
	      (sort (hash-table-keys *db-stats*)
		    (lambda (a b)
		      (> (vector-ref (hash-table-ref *db-stats* a) 0)
			 (vector-ref (hash-table-ref *db-stats* b) 0)))))))

(define (rmt:get-max-query-average run-id)
(define (rmt:register-test . args) #t)
(define (rmt:get-test-id . args) #t)
  (mutex-lock! *db-stats-mutex*)
  (let* ((runkey (conc "run-id=" run-id " "))
	 (cmds   (filter (lambda (x)
			   (substring-index runkey x))
			 (hash-table-keys *db-stats*)))
	 (res    (if (null? cmds)
		     (cons 'none 0)
(define (rmt:get-test-info-by-id . args) #t)
		     (let loop ((cmd (car cmds))
				(tal (cdr cmds))
				(max-cmd (car cmds))
				(res 0))
		       (let* ((cmd-dat (hash-table-ref *db-stats* cmd))
			      (tot     (vector-ref cmd-dat 0))
			      (curravg (/ (vector-ref cmd-dat 1) (vector-ref cmd-dat 0))) ;; count is never zero by construction
			      (currmax (max res curravg))
			      (newmax-cmd (if (> curravg res) cmd max-cmd)))
			 (if (null? tal)
			     (if (> tot 10)
				 (cons newmax-cmd currmax)
				 (cons 'none 0))
			     (loop (car tal)(cdr tal) newmax-cmd currmax)))))))
    (mutex-unlock! *db-stats-mutex*)
    res))

(define (rmt:open-qry-close-locally cmd run-id params #!key (remretries 5))
(define (rmt:test-get-rundir-from-test-id . args) #t)
(define (rmt:open-test-db-by-test-id . args) #t)
  (let* ((qry-is-write   (not (member cmd api:read-only-queries)))
	 (db-file-path   (db:dbfile-path)) ;;  0))
	 (dbstruct-local (db:setup #t))  ;; make-dbr:dbstruct path:  dbdir local: #t)))
	 (read-only      (not (file-write-access? db-file-path)))
	 (start          (current-milliseconds))
	 (resdat         (if (not (and read-only qry-is-write))
			     (let ((v (api:execute-requests dbstruct-local (vector (symbol->string cmd) params))))
			       (handle-exceptions ;; there has been a long history of receiving strange errors from values returned by the client when things go wrong..
				exn               ;;  This is an attempt to detect that situation and recover gracefully
				(begin
				  (debug:print0 *default-log-port* "ERROR: bad data from server " v " message: "  ((condition-property-accessor 'exn 'message) exn))
				  (vector #t '())) ;; should always get a vector but if something goes wrong return a dummy
				(if (and (vector? v)
					 (> (vector-length v) 1))
				    (let ((newvec (vector (vector-ref v 0)(vector-ref v 1))))
				      newvec)           ;; by copying the vector while inside the error handler we should force the detection of a corrupted record
				    (vector #t '()))))  ;; we could also check that the returned types are valid
			     (vector #t '())))
	 (success        (vector-ref resdat 0))
	 (res            (vector-ref resdat 1))
	 (duration       (- (current-milliseconds) start)))
    (if (and read-only qry-is-write)
        (debug:print 0 *default-log-port* "ERROR: attempt to write to read-only database ignored. cmd=" cmd))
    (if (not success)
	(if (> remretries 0)
	    (begin
	      (debug:print-error 0 *default-log-port* "local query failed. Trying again.")
	      (thread-sleep! (/ (random 5000) 1000)) ;; some random delay 
	      (rmt:open-qry-close-locally cmd run-id params remretries: (- remretries 1)))
	    (begin
	      (debug:print-error 0 *default-log-port* "too many retries in rmt:open-qry-close-locally, giving up")
	      #f))
	(begin
	  ;; (rmt:update-db-stats run-id cmd params duration)
	  ;; mark this run as dirty if this was a write, the watchdog is responsible for syncing it
	  (if qry-is-write
	      (let ((start-time (current-seconds)))
		(mutex-lock! *db-multi-sync-mutex*)
/		(set! *db-last-access* start-time)  ;; THIS IS PROBABLY USELESS? (we are on a client)
                (mutex-unlock! *db-multi-sync-mutex*)))))
    res))

(define (rmt:send-receive-no-auto-client-setup connection-info cmd run-id params)
(define (rmt:test-set-state-status-by-id . args) #t)
(define (rmt:set-tests-state-status . args) #t)
  (let* ((run-id   (if run-id run-id 0))
	 (res  	   (handle-exceptions
		    exn
		    #f
		    (http-transport:client-api-send-receive run-id connection-info cmd params))))
    (if (and res (vector-ref res 0))
(define (rmt:get-tests-for-run . args) #t)
	(vector-ref res 1) ;;; YES!! THIS IS CORRECT!! CHANGE IT HERE, THEN CHANGE rmt:send-receive ALSO!!!
	#f)))

(define (rmt:synchash-get . args) #t)
;; ;; Wrap json library for strings (why the ports crap in the first place?)
;; (define (rmt:dat->json-str dat)
(define (rmt:get-tests-for-run-mindata . args) #t)
;;   (with-output-to-string 
;;     (lambda ()
;;       (json-write dat))))
;; 
;; (define (rmt:json-str->dat json-str)
(define (rmt:get-tests-for-runs-mindata . args) #t)
;;   (with-input-from-string json-str
;;     (lambda ()
;;       (json-read))))

(define (rmt:delete-test-records . args) #t)
;;======================================================================
;;
;; A C T U A L   A P I   C A L L S  
;;
;;======================================================================

(define (rmt:test-set-state-status . args) #t)
;;======================================================================
;;  S E R V E R
;;======================================================================

(define (rmt:test-toplevel-num-items . args) #t)
(define (rmt:kill-server run-id)
  (rmt:send-receive 'kill-server run-id (list run-id)))

(define (rmt:get-matching-previous-test-run-records . args) #t)
(define (rmt:start-server run-id)
  (rmt:send-receive 'start-server 0 (list run-id)))

;;======================================================================
;;  M I S C
;;======================================================================

(define (rmt:login run-id)
(define (rmt:test-get-logfile-info . args) #t)
  (rmt:send-receive 'login run-id (list *toppath* megatest-version *my-client-signature*)))

(define (rmt:test-get-records-for-index-file . args) #t)
;; This login does no retries under the hood - it acts a bit like a ping.
;; Deprecated for nmsg-transport.
;;
(define (rmt:login-no-auto-client-setup connection-info)
  (case *transport-type* ;; run-id of 0 is just a placeholder
    ((http)(rmt:send-receive-no-auto-client-setup connection-info 'login 0 (list *toppath* megatest-version *my-client-signature*)))
    ;;((nmsg)(nmsg-transport:client-api-send-receive run-id connection-info 'login (list *toppath* megatest-version run-id *my-client-signature*)))
    ))

;; hand off a call to one of the db:queries statements
;; added run-id to make looking up the correct db possible 
;;
(define (rmt:general-call stmtname run-id . params)
  (rmt:send-receive 'general-call run-id (append (list stmtname run-id) params)))


;; given a hostname, return a pair of cpu load and update time representing latest intelligence from tests running on that host
(define (rmt:get-latest-host-load hostname)
(define (rmt:get-testinfo-state-status . args) #t)
  (rmt:send-receive 'get-latest-host-load 0 (list hostname)))

;; (define (rmt:sync-inmem->db run-id)
;;   (rmt:send-receive 'sync-inmem->db run-id '()))

(define (rmt:sdb-qry qry val run-id)
(define (rmt:test-set-log! . args) #t)
  ;; add caching if qry is 'getid or 'getstr
  (rmt:send-receive 'sdb-qry run-id (list qry val)))

(define (rmt:test-set-top-process-pid . args) #t)
;; NOT COMPLETED
(define (rmt:runtests user run-id testpatt params)
  (rmt:send-receive 'runtests run-id testpatt))

(define (rmt:test-get-top-process-pid . args) #t)
(define (rmt:get-run-record-ids  target run keynames test-patt)
  (rmt:send-receive 'get-run-record-ids #f (list target run keynames test-patt)))

(define (rmt:get-changed-record-ids since-time)
  (rmt:send-receive 'get-changed-record-ids #f (list since-time)) )

;;======================================================================
;;  T E S T   M E T A 
;;======================================================================

(define (rmt:get-tests-tags)
(define (rmt:get-run-ids-matching-target . args) #t)
  (rmt:send-receive 'get-tests-tags #f '()))

(define (rmt:test-get-paths-matching-keynames-target-new . args) #t)
;;======================================================================
;;  K E Y S 
;;======================================================================

;; These require run-id because the values come from the run!
;;
(define (rmt:get-key-val-pairs run-id)
  (rmt:send-receive 'get-key-val-pairs run-id (list run-id)))

(define (rmt:get-keys)
(define (rmt:get-prereqs-not-met . args) #t)
  (if *db-keys* *db-keys* 
     (let ((res (rmt:send-receive 'get-keys #f '())))
       (set! *db-keys* res)
       res)))

(define (rmt:get-keys-write) ;; dummy query to force server start
  (let ((res (rmt:send-receive 'get-keys-write #f '())))
    (set! *db-keys* res)
    res))

;; we don't reuse run-id's (except possibly *after* a db cleanup) so it is safe
;; to cache the resuls in a hash
;;
(define (rmt:get-key-vals run-id)
(define (rmt:get-count-tests-running-for-run-id . args) #t)
  (or (hash-table-ref/default *keyvals* run-id #f)
      (let ((res (rmt:send-receive 'get-key-vals #f (list run-id))))
        (hash-table-set! *keyvals* run-id res)
        res)))

(define (rmt:get-targets)
(define (rmt:get-count-tests-running . args) #t)
  (rmt:send-receive 'get-targets #f '()))

(define (rmt:get-target run-id)
(define (rmt:get-count-tests-running-for-testname . args) #t)
(define (rmt:get-count-tests-running-in-jobgroup . args) #t)
  (rmt:send-receive 'get-target run-id (list run-id)))

(define (rmt:set-state-status-and-roll-up-items . args) #t)
(define (rmt:get-run-times runpatt targetpatt)
  (rmt:send-receive 'get-run-times #f (list runpatt targetpatt ))) 


(define (rmt:update-pass-fail-counts . args) #t)
(define (rmt:top-test-set-per-pf-counts . args) #t)
;;======================================================================
;;  T E S T S
;;======================================================================

;; Just some syntatic sugar
(define (rmt:register-test run-id test-name item-path)
  (rmt:general-call 'register-test run-id run-id test-name item-path))

(define (rmt:get-test-id run-id testname item-path)
(define (rmt:get-raw-run-stats . args) #t)
  (rmt:send-receive 'get-test-id run-id (list run-id testname item-path)))

;; run-id is NOT used
;;
(define (rmt:get-test-info-by-id run-id test-id)
(define (rmt:get-test-times . args) #t)
  (if (number? test-id)
      (rmt:send-receive 'get-test-info-by-id run-id (list run-id test-id))
      (begin
	(debug:print 0 *default-log-port* "WARNING: Bad data handed to rmt:get-test-info-by-id run-id=" run-id ", test-id=" test-id)
	(print-call-chain (current-error-port))
	#f)))

(define (rmt:get-run-info . args) #t)
(define (rmt:test-get-rundir-from-test-id run-id test-id)
  (rmt:send-receive 'test-get-rundir-from-test-id run-id (list run-id test-id)))

(define (rmt:get-num-runs . args) #t)
(define (rmt:open-test-db-by-test-id run-id test-id #!key (work-area #f))
  (let* ((test-path (if (string? work-area)
			work-area
			(rmt:test-get-rundir-from-test-id run-id test-id))))
    (debug:print 3 *default-log-port* "TEST PATH: " test-path)
    (open-test-db test-path)))

(define (rmt:get-runs-cnt-by-patt . args) #t)
;; WARNING: This currently bypasses the transaction wrapped writes system
(define (rmt:test-set-state-status-by-id run-id test-id newstate newstatus newcomment)
  (rmt:send-receive 'test-set-state-status-by-id run-id (list run-id test-id newstate newstatus newcomment)))

(define (rmt:set-tests-state-status run-id                      testnames currstate currstatus newstate newstatus)
  (rmt:send-receive 'set-tests-state-status run-id (list run-id testnames currstate currstatus newstate newstatus)))

(define (rmt:get-tests-for-run run-id testpatt states statuses offset limit not-in sort-by sort-order qryvals last-update mode)
  ;; (if (number? run-id)
  (rmt:send-receive 'get-tests-for-run run-id (list run-id testpatt states statuses offset limit not-in sort-by sort-order qryvals last-update mode)))
  ;;    (begin
  ;;	(debug:print-error 0 *default-log-port* "rmt:get-tests-for-run called with bad run-id=" run-id)
  ;;	(print-call-chain (current-error-port))
  ;;	'())))

;; get stuff via synchash 
(define (rmt:synchash-get run-id proc synckey keynum params)
  (rmt:send-receive 'synchash-get run-id (list run-id proc synckey keynum params)))

(define (rmt:get-tests-for-run-mindata run-id testpatt states status not-in)
  (rmt:send-receive 'get-tests-for-run-mindata run-id (list run-id testpatt states status not-in)))
  
;; IDEA: Threadify these - they spend a lot of time waiting ...
;;
(define (rmt:get-tests-for-runs-mindata run-ids testpatt states status not-in)
  (let ((multi-run-mutex (make-mutex))
	(run-id-list (if run-ids
			 run-ids
			 (rmt:get-all-run-ids)))
	(result      '()))
    (if (null? run-id-list)
	'()
	(let loop ((hed     (car run-id-list))
		   (tal     (cdr run-id-list))
		   (threads '()))
	  (if (> (length threads) 5)
	      (loop hed tal (filter (lambda (th)(not (member (thread-state th) '(terminated dead)))) threads))
	      (let* ((newthread (make-thread
				 (lambda ()
				   (let ((res (rmt:send-receive 'get-tests-for-run-mindata hed (list hed testpatt states status not-in))))
				     (if (list? res)
					 (begin
					   (mutex-lock! multi-run-mutex)
					   (set! result (append result res))
					   (mutex-unlock! multi-run-mutex))
					 (debug:print-error 0 *default-log-port* "get-tests-for-run-mindata failed for run-id " hed ", testpatt " testpatt ", states " states ", status " status ", not-in " not-in))))
				 (conc "multi-run-thread for run-id " hed)))
		     (newthreads (cons newthread threads)))
		(thread-start! newthread)
		(thread-sleep! 0.05) ;; give that thread some time to start
		(if (null? tal)
		    newthreads
		    (loop (car tal)(cdr tal) newthreads))))))
    result))

(define (rmt:register-run . args) #t)
;; ;; IDEA: Threadify these - they spend a lot of time waiting ...
;; ;;
;; (define (rmt:get-tests-for-runs-mindata run-ids testpatt states status not-in)
;;   (let ((run-id-list (if run-ids
;; 			 run-ids
;; 			 (rmt:get-all-run-ids))))
;;     (apply append (map (lambda (run-id)
;; 			 (rmt:send-receive 'get-tests-for-run-mindata run-id (list run-ids testpatt states status not-in)))
;; 		       run-id-list))))

(define (rmt:delete-test-records run-id test-id)
(define (rmt:get-run-name-from-id . args) #t)
(define (rmt:delete-run . args) #t)
  (rmt:send-receive 'delete-test-records run-id (list run-id test-id)))

(define (rmt:update-run-stats . args) #t)
;; This is not needed as test steps are deleted on test delete call
;;
;; (define (rmt:delete-test-step-records run-id test-id)
(define (rmt:delete-old-deleted-test-records . args) #t)
;;   (rmt:send-receive 'delete-test-step-records run-id (list run-id test-id)))

(define (rmt:test-set-state-status run-id test-id state status msg)
  (rmt:send-receive 'test-set-state-status run-id (list run-id test-id state status msg)))

(define (rmt:test-toplevel-num-items run-id test-name)
  (rmt:send-receive 'test-toplevel-num-items run-id (list run-id test-name)))

;; (define (rmt:get-previous-test-run-record run-id test-name item-path)
;;   (rmt:send-receive 'get-previous-test-run-record run-id (list run-id test-name item-path)))

(define (rmt:get-matching-previous-test-run-records run-id test-name item-path)
  (rmt:send-receive 'get-matching-previous-test-run-records run-id (list run-id test-name item-path)))

(define (rmt:test-get-logfile-info run-id test-name)
  (rmt:send-receive 'test-get-logfile-info run-id (list run-id test-name)))

(define (rmt:test-get-records-for-index-file run-id test-name)
  (rmt:send-receive 'test-get-records-for-index-file run-id (list run-id test-name)))

(define (rmt:get-testinfo-state-status run-id test-id)
(define (rmt:get-runs . args) #t)
  (rmt:send-receive 'get-testinfo-state-status run-id (list run-id test-id)))

(define (rmt:test-set-log! run-id test-id logf)
(define (rmt:simple-get-runs . args) #t)
(define (rmt:get-all-run-ids . args) #t)
  (if (string? logf)(rmt:general-call 'test-set-log run-id logf test-id)))

(define (rmt:get-prev-run-ids . args) #t)
(define (rmt:test-set-top-process-pid run-id test-id pid)
  (rmt:send-receive 'test-set-top-process-pid run-id (list run-id test-id pid)))

(define (rmt:lock/unlock-run . args) #t)
(define (rmt:test-get-top-process-pid run-id test-id)
  (rmt:send-receive 'test-get-top-process-pid run-id (list run-id test-id)))

(define (rmt:get-run-ids-matching-target keynames target res runname testpatt statepatt statuspatt)
(define (rmt:get-run-status . args) #t)
  (rmt:send-receive 'get-run-ids-matching-target #f (list keynames target res runname testpatt statepatt statuspatt)))

(define (rmt:set-run-status . args) #t)
;; NOTE: This will open and access ALL run databases. 
;;
(define (rmt:test-get-paths-matching-keynames-target-new keynames target res testpatt statepatt statuspatt runname)
  (let ((run-ids (rmt:get-run-ids-matching-target keynames target res runname testpatt statepatt statuspatt)))
    (apply append 
	   (map (lambda (run-id)
		  (rmt:send-receive 'test-get-paths-matching-keynames-target-new run-id (list run-id keynames target res testpatt statepatt statuspatt runname)))
	   run-ids))))

;; (define (rmt:get-run-ids-matching keynames target res)
;;   (rmt:send-receive #f 'get-run-ids-matching (list keynames target res)))

(define (rmt:get-prereqs-not-met run-id waitons ref-test-name ref-item-path #!key (mode '(normal))(itemmaps #f))
  (rmt:send-receive 'get-prereqs-not-met run-id (list run-id waitons ref-test-name ref-item-path mode itemmaps)))

(define (rmt:get-count-tests-running-for-run-id run-id)
  (rmt:send-receive 'get-count-tests-running-for-run-id run-id (list run-id)))

;; Statistical queries

(define (rmt:get-count-tests-running run-id)
  (rmt:send-receive 'get-count-tests-running run-id (list run-id)))

(define (rmt:get-count-tests-running-for-testname run-id testname)
  (rmt:send-receive 'get-count-tests-running-for-testname run-id (list run-id testname)))

(define (rmt:get-count-tests-running-in-jobgroup run-id jobgroup)
  (rmt:send-receive 'get-count-tests-running-in-jobgroup run-id (list run-id jobgroup)))

;; state and status are extra hints not usually used in the calculation
;;
(define (rmt:set-state-status-and-roll-up-items run-id test-name item-path state status comment)
  (rmt:send-receive 'set-state-status-and-roll-up-items run-id (list run-id test-name item-path state status comment)))

(define (rmt:update-pass-fail-counts run-id test-name)
(define (rmt:update-run-event_time . args) #t)
  (rmt:general-call 'update-pass-fail-counts run-id test-name test-name test-name))

(define (rmt:top-test-set-per-pf-counts run-id test-name)
  (rmt:send-receive 'top-test-set-per-pf-counts run-id (list run-id test-name)))

(define (rmt:get-raw-run-stats run-id)
(define (rmt:get-runs-by-patt . args) #t)
  (rmt:send-receive 'get-raw-run-stats run-id (list run-id)))

(define (rmt:get-test-times runname target)
(define (rmt:find-and-mark-incomplete . args) #t)
(define (rmt:get-main-run-stats . args) #t)
  (rmt:send-receive 'get-test-times #f (list runname target ))) 

;;======================================================================
;;  R U N S
;;======================================================================

(define (rmt:get-run-info run-id)
  (rmt:send-receive 'get-run-info run-id (list run-id)))

(define (rmt:get-num-runs runpatt)
  (rmt:send-receive 'get-num-runs #f (list runpatt)))

(define (rmt:get-runs-cnt-by-patt runpatt targetpatt keys)
  (rmt:send-receive 'get-runs-cnt-by-patt #f (list runpatt  targetpatt keys)))

;; Use the special run-id == #f scenario here since there is no run yet
(define (rmt:register-run keyvals runname state status user contour)
  (rmt:send-receive 'register-run #f (list keyvals runname state status user contour)))
    
(define (rmt:get-run-name-from-id run-id)
  (rmt:send-receive 'get-run-name-from-id run-id (list run-id)))

(define (rmt:delete-run run-id)
  (rmt:send-receive 'delete-run run-id (list run-id)))

(define (rmt:update-run-stats run-id stats)
  (rmt:send-receive 'update-run-stats #f (list run-id stats)))

(define (rmt:delete-old-deleted-test-records)
  (rmt:send-receive 'delete-old-deleted-test-records #f '()))

(define (rmt:get-runs runpatt count offset keypatts)
  (rmt:send-receive 'get-runs #f (list runpatt count offset keypatts)))

(define (rmt:simple-get-runs runpatt count offset target)
  (rmt:send-receive 'simple-get-runs #f (list runpatt count offset target)))

(define (rmt:get-all-run-ids)
  (rmt:send-receive 'get-all-run-ids #f '()))

(define (rmt:get-prev-run-ids run-id)
  (rmt:send-receive 'get-prev-run-ids #f (list run-id)))

(define (rmt:lock/unlock-run run-id lock unlock user)
  (rmt:send-receive 'lock/unlock-run #f (list run-id lock unlock user)))

;; set/get status
(define (rmt:get-run-status run-id)
  (rmt:send-receive 'get-run-status #f (list run-id)))

(define (rmt:set-run-status run-id run-status #!key (msg #f))
  (rmt:send-receive 'set-run-status #f (list run-id run-status msg)))

(define (rmt:update-run-event_time run-id)
  (rmt:send-receive 'update-run-event_time #f (list run-id)))

(define (rmt:get-runs-by-patt  keys runnamepatt targpatt offset limit fields last-runs-update  #!key  (sort-order "asc")) ;; fields of #f uses default
  (rmt:send-receive 'get-runs-by-patt #f (list keys runnamepatt targpatt offset limit fields last-runs-update sort-order)))

(define (rmt:find-and-mark-incomplete run-id ovr-deadtime)
  ;; (if (rmt:send-receive 'have-incompletes? run-id (list run-id ovr-deadtime))
  (rmt:send-receive 'mark-incomplete run-id (list run-id ovr-deadtime))) ;; )

(define (rmt:get-main-run-stats run-id)
  (rmt:send-receive 'get-main-run-stats #f (list run-id)))

(define (rmt:get-var varname)
(define (rmt:get-var . args) #t)
  (rmt:send-receive 'get-var #f (list varname)))

(define (rmt:del-var varname)
(define (rmt:del-var . args) #t)
  (rmt:send-receive 'del-var #f (list varname)))

(define (rmt:set-var varname value)
(define (rmt:set-var . args) #t)
  (rmt:send-receive 'set-var #f (list varname value)))

;;======================================================================
;; M U L T I R U N   Q U E R I E S
;;======================================================================

;; Need to move this to multi-run section and make associated changes
(define (rmt:find-and-mark-incomplete-all-runs #!key (ovr-deadtime #f))
(define (rmt:find-and-mark-incomplete-all-runs . args) #t)
  (let ((run-ids (rmt:get-all-run-ids)))
    (for-each (lambda (run-id)
	       (rmt:find-and-mark-incomplete run-id ovr-deadtime))
	     run-ids)))

;; get the previous record for when this test was run where all keys match but runname
;; returns #f if no such test found, returns a single test record if found
;; 
;; Run this at the client end since we have to connect to multiple run-id dbs
;;
(define (rmt:get-previous-test-run-record run-id test-name item-path)
(define (rmt:get-previous-test-run-record . args) #t)
  (let* ((keyvals (rmt:get-key-val-pairs run-id))
	 (keys    (rmt:get-keys))
	 (selstr  (string-intersperse  keys ","))
	 (qrystr  (string-intersperse (map (lambda (x)(conc x "=?")) keys) " AND ")))
    (if (not keyvals)
	#f
	(let ((prev-run-ids (rmt:get-prev-run-ids run-id)))
	  ;; for each run starting with the most recent look to see if there is a matching test
	  ;; if found then return that matching test record
	  (debug:print 4 *default-log-port* "selstr: " selstr ", qrystr: " qrystr ", keyvals: " keyvals ", previous run ids found: " prev-run-ids)
	  (if (null? prev-run-ids) #f
	      (let loop ((hed (car prev-run-ids))
			 (tal (cdr prev-run-ids)))
		(let ((results (rmt:get-tests-for-run hed (conc test-name "/" item-path) '() '() ;; run-id testpatt states statuses
						      #f #f #f               ;; offset limit not-in hide/not-hide
						      #f #f #f #f 'normal))) ;; sort-by sort-order qryvals last-update mode
		  (debug:print 4 *default-log-port* "Got tests for run-id " run-id ", test-name " test-name ", item-path " item-path ": " results)
		  (if (and (null? results)
			   (not (null? tal)))
		      (loop (car tal)(cdr tal))
		      (if (null? results) #f
			  (car results))))))))))

(define (rmt:get-run-stats)
(define (rmt:get-run-stats . args) #t)
  (rmt:send-receive 'get-run-stats #f '()))

;;======================================================================
;;  S T E P S
;;======================================================================

;; Getting steps is more complicated.
;;
;; If given work area 
;;  1. Find the testdat.db file
;;  2. Open the testdat.db file and do the query
;; If not given the work area
;;  1. Do a remote call to get the test path
;;  2. Continue as above
;; 
;;(define (rmt:get-steps-for-test run-id test-id)
;;  (rmt:send-receive 'get-steps-data run-id (list test-id)))

(define (rmt:teststep-set-status! run-id test-id teststep-name state-in status-in comment logfile)
(define (rmt:teststep-set-status! . args) #t)
  (let* ((state     (items:check-valid-items "state" state-in))
	 (status    (items:check-valid-items "status" status-in)))
    (if (or (not state)(not status))
	(debug:print 3 *default-log-port* "WARNING: Invalid " (if status "status" "state")
		     " value \"" (if status state-in status-in) "\", update your validvalues section in megatest.config"))
    (rmt:send-receive 'teststep-set-status! run-id (list run-id test-id teststep-name state-in status-in comment logfile))))

(define (rmt:get-steps-for-test run-id test-id)
(define (rmt:get-steps-for-test . args) #t)
  (rmt:send-receive 'get-steps-for-test run-id (list run-id test-id)))

(define (rmt:get-steps-info-by-id test-step-id)
(define (rmt:get-steps-info-by-id . args) #t)
  (rmt:send-receive 'get-steps-info-by-id #f (list test-step-id)))

;;======================================================================
;;  T E S T   D A T A 
;;======================================================================

(define (rmt:read-test-data run-id test-id categorypatt #!key (work-area #f)) 
(define (rmt:read-test-data . args) #t)
  (rmt:send-receive 'read-test-data run-id (list run-id test-id categorypatt)))
(define (rmt:read-test-data* run-id test-id categorypatt varpatt #!key (work-area #f)) 
(define (rmt:read-test-data* . args) #t)
  (rmt:send-receive 'read-test-data* run-id (list run-id test-id categorypatt varpatt)))

(define (rmt:get-data-info-by-id test-data-id)
(define (rmt:get-data-info-by-id . args) #t)
   (rmt:send-receive 'get-data-info-by-id #f (list test-data-id)))

(define (rmt:testmeta-add-record testname)
(define (rmt:testmeta-add-record . args) #t)
  (rmt:send-receive 'testmeta-add-record #f (list testname)))

(define (rmt:testmeta-get-record testname)
(define (rmt:testmeta-get-record . args) #t)
  (rmt:send-receive 'testmeta-get-record #f (list testname)))

(define (rmt:testmeta-update-field test-name fld val)
(define (rmt:testmeta-update-field . args) #t)
  (rmt:send-receive 'testmeta-update-field #f (list test-name fld val)))

(define (rmt:test-data-rollup run-id test-id status)
(define (rmt:test-data-rollup . args) #t)
  (rmt:send-receive 'test-data-rollup run-id (list run-id test-id status)))
(define (rmt:csv->test-data . args) #t)
(define (rmt:tasks-find-task-queue-records . args) #t)

(define (rmt:csv->test-data run-id test-id csvdata)
  (rmt:send-receive 'csv->test-data run-id (list run-id test-id csvdata)))

;;======================================================================
;;  T A S K S
;;======================================================================

(define (rmt:tasks-find-task-queue-records target run-name test-patt state-patt action-patt)
  (rmt:send-receive 'find-task-queue-records #f (list target run-name test-patt state-patt action-patt)))

(define (rmt:tasks-add action owner target runname testpatt params)
(define (rmt:tasks-add . args) #t)
  (rmt:send-receive 'tasks-add #f (list action owner target runname testpatt params)))

(define (rmt:tasks-set-state-given-param-key param-key new-state)
(define (rmt:tasks-set-state-given-param-key . args) #t)
  (rmt:send-receive 'tasks-set-state-given-param-key #f (list  param-key new-state)))

(define (rmt:tasks-get-last target runname)
(define (rmt:tasks-get-last . args) #t)
  (rmt:send-receive 'tasks-get-last #f (list target runname)))

;;======================================================================
;; N O   S Y N C   D B 
;;======================================================================

(define (rmt:no-sync-set var val)
(define (rmt:no-sync-set . args) #t)
  (rmt:send-receive 'no-sync-set #f `(,var ,val)))

(define (rmt:no-sync-get/default var default)
(define (rmt:no-sync-get/default . args) #t)
  (rmt:send-receive 'no-sync-get/default #f `(,var ,default)))

(define (rmt:no-sync-del! var)
(define (rmt:no-sync-del! . args) #t)
  (rmt:send-receive 'no-sync-del! #f `(,var)))

(define (rmt:no-sync-get-lock keyname)
(define (rmt:no-sync-get-lock . args) #t)
  (rmt:send-receive 'no-sync-get-lock #f `(,keyname)))

;;======================================================================
;; A R C H I V E S
;;======================================================================

(define (rmt:archive-get-allocations  testname itempath dneeded)
(define (rmt:archive-get-allocations . args) #t)
  (rmt:send-receive 'archive-get-allocations #f (list testname itempath dneeded)))

(define (rmt:archive-register-block-name bdisk-id archive-path)
(define (rmt:archive-register-block-name . args) #t)
  (rmt:send-receive 'archive-register-block-name #f (list bdisk-id archive-path)))

(define (rmt:archive-allocate-testsuite/area-to-block block-id testsuite-name areakey)
(define (rmt:archive-allocate-testsuite/area-to-block . args) #t)
  (rmt:send-receive 'archive-allocate-test-to-block #f (list  block-id testsuite-name areakey)))

(define (rmt:archive-register-disk bdisk-name bdisk-path df)
(define (rmt:archive-register-disk . args) #t)
  (rmt:send-receive 'archive-register-disk #f (list bdisk-name bdisk-path df)))

(define (rmt:test-set-archive-block-id run-id test-id archive-block-id)
(define (rmt:test-set-archive-block-id . args) #t)
  (rmt:send-receive 'test-set-archive-block-id run-id (list run-id test-id archive-block-id)))

(define (rmt:test-get-archive-block-info archive-block-id)
(define (rmt:test-get-archive-block-info . args) #t)
  (rmt:send-receive 'test-get-archive-block-info #f (list archive-block-id)))

Deleted rpc-transport.scm version [dd887f94ec].

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237













































































































































































































































-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-

;; Copyright 2006-2012, Matthew Welland.
;; 
;; This file is part of Megatest.
;; 
;;     Megatest is free software: you can redistribute it and/or modify
;;     it under the terms of the GNU General Public License as published by
;;     the Free Software Foundation, either version 3 of the License, or
;;     (at your option) any later version.
;; 
;;     Megatest is distributed in the hope that it will be useful,
;;     but WITHOUT ANY WARRANTY; without even the implied warranty of
;;     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
;;     GNU General Public License for more details.
;; 
;;     You should have received a copy of the GNU General Public License
;;     along with Megatest.  If not, see <http://www.gnu.org/licenses/>.
;;

(require-extension (srfi 18) extras tcp s11n rpc)
(import (prefix rpc rpc:))

(use sqlite3 srfi-1 posix regex regex-case srfi-69 hostinfo md5 message-digest)
(import (prefix sqlite3 sqlite3:))

(declare (unit rpc-transport))

(declare (uses common))
(declare (uses db))
(declare (uses tests))
(declare (uses tasks)) ;; tasks are where stuff is maintained about what is running.

(include "common_records.scm")
(include "db_records.scm")

;; procstr is the name of the procedure to be called as a string
(define (rpc-transport:autoremote procstr params)
  (handle-exceptions
   exn
   (begin
     (debug:print 1 *default-log-port* "Remote failed for " proc " " params)
     (apply (eval (string->symbol procstr)) params))
   ;; (if *runremote*
   ;;    (apply (eval (string->symbol (conc "remote:" procstr))) params)
   (apply (eval (string->symbol procstr)) params)))

;; all routes though here end in exit ...
;;
;; start_server? 
;;
(define (rpc-transport:launch run-id)
  (let* ((tdbdat (tasks:open-db)))
    (BB> "rpc-transport:launch fired for run-id="run-id)
    (set! *run-id*   run-id)
    (if (args:get-arg "-daemonize")
        (daemon:ize))
    (if (server:check-if-running run-id)
        (begin
          (debug:print 0 *default-log-port* "INFO: Server for run-id " run-id " already running")
          (exit 0)))
    (let loop ((server-id (tasks:server-lock-slot (db:delay-if-busy tdbdat) run-id))
               (remtries  4))
      (if (not server-id)
          (if (> remtries 0)
              (begin
                (thread-sleep! 2)
                (loop (tasks:server-lock-slot (db:delay-if-busy tdbdat) run-id)
                      (- remtries 1)))
              (begin
                ;; since we didn't get the server lock we are going to clean up and bail out
                (debug:print-info 2 *default-log-port* "INFO: server pid=" (current-process-id) ", hostname=" (get-host-name) " not starting due to other candidates ahead in start queue")
                (tasks:server-delete-records-for-this-pid (db:delay-if-busy tdbdat) " rpc-transport:launch")))
          (begin
            (rpc-transport:run (if (args:get-arg "-server")(args:get-arg "-server") "-") run-id server-id)
            (exit))))))

(define (rpc-transport:run hostn run-id server-id)
  (debug:print 2 *default-log-port* "Attempting to start the rpc server ...")
   ;; (trace rpc:publish-procedure!)

  (rpc:publish-procedure! 'server:login server:login)
  (rpc:publish-procedure! 'testing (lambda () "Just testing"))

  (let* ((db              #f)
	 (hostname        (get-host-name))
	 (ipaddrstr       (let ((ipstr (if (string=? "-" hostn)
					   ;; (string-intersperse (map number->string (u8vector->list (hostname->ip hostname))) ".")
					   (server:get-best-guess-address hostname)
					   #f)))
			    (if ipstr ipstr hostn))) ;; hostname))) 
	 (start-port      (open-run-close tasks:server-get-next-port tasks:open-db))
	 (link-tree-path  (configf:lookup *configdat* "setup" "linktree"))
	 (rpc:listener    (rpc-transport:find-free-port-and-open (rpc:default-server-port)))
	 (th1             (make-thread
			   (lambda ()
			     ((rpc:make-server rpc:listener) #t))
			   "rpc:server"))
			   ;; (cute (rpc:make-server rpc:listener) "rpc:server")
			   ;; 'rpc:server))
	 (hostname        (if (string=? "-" hostn)
			      (get-host-name) 
			      hostn))
	 (ipaddrstr       (if (string=? "-" hostn)
			      (server:get-best-guess-address hostname) ;; (string-intersperse (map number->string (u8vector->list (hostname->ip hostname))) ".")
			      #f))
	 (portnum         (rpc:default-server-port))
	 (host:port       (conc (if ipaddrstr ipaddrstr hostname) ":" portnum))
	 (tdb             (tasks:open-db)))
    (thread-start! th1)
    (set! db *dbstruct-db*)
    (open-run-close tasks:server-set-interface-port 
		    tasks:open-db 
		    server-id 
		    ipaddrstr portnum)
    (debug:print 0 *default-log-port* "Server started on " host:port)
    
    ;; (trace rpc:publish-procedure!)
    ;; (rpc:publish-procedure! 'server:login server:login)
    ;; (rpc:publish-procedure! 'testing (lambda () "Just testing"))

    ;;======================================================================
    ;;	  ;; end of publish-procedure section
    ;;======================================================================
    ;;
    (on-exit (lambda ()
	       (open-run-close tasks:server-set-state! tasks:open-db server-id "stopped")))

    (set! *rpc:listener* rpc:listener)
    (tasks:server-set-state! tdb server-id "running")
    (set! *dbstruct-db*  (db:setup run-id))
    ;; if none running or if > 20 seconds since 
    ;; server last used then start shutdown
    (let loop ((count 0))
      (thread-sleep! 5) ;; no need to do this very often
      (let ((numrunning -1)) ;; (db:get-count-tests-running db)))
	(if (or (> numrunning 0)
		(> (+ *db-last-access* 60)(current-seconds)))
	    (begin
	      (debug:print-info 0 *default-log-port* "Server continuing, tests running: " numrunning ", seconds since last db access: " (- (current-seconds) *db-last-access*))
	      (loop (+ 1 count)))
	    (begin
	      (debug:print-info 0 *default-log-port* "Starting to shutdown the server side")
	      (open-run-close tasks:server-delete-record tasks:open-db server-id " rpc-transport:try-start-server stop")
	      (thread-sleep! 10)
	      (debug:print-info 0 *default-log-port* "Max cached queries was " *max-cache-size*)
	      (debug:print-info 0 *default-log-port* "Server shutdown complete. Exiting")
	      ))))))

(define (rpc-transport:find-free-port-and-open port)
  (handle-exceptions
   exn
	  (begin
     (print "Failed to bind to port " (rpc:default-server-port) ", trying next port")
     (rpc-transport:find-free-port-and-open (+ port 1)))
   (rpc:default-server-port port)
   (tcp-read-timeout 240000)
   (tcp-listen (rpc:default-server-port) 10000)))

(define (rpc-transport:ping run-id host port)
  (handle-exceptions
   exn
   (begin
     (print "SERVER_NOT_FOUND")
     (exit 1))
   (let ((login-res ((rpc:procedure 'server:login host port) *toppath*)))
     (if (and (list? login-res)
	      (car login-res))
	 (begin
	   (print "LOGIN_OK")
	   (exit 0))
	 (begin
	   (print "LOGIN_FAILED")
	   (exit 1))))))

(define (rpc-transport:client-setup run-id #!key (remtries 10))
  (if *runremote*
      (begin
	(debug:print-error 0 *default-log-port* "Attempt to connect to server but already connected")
	#f)
      (let* ((host-info (hash-table-ref/default *runremote* run-id #f))) ;; (open-run-close db:get-var #f "SERVER"))
	(if host-info
	    (let ((iface    (car host-info))
		  (port     (cadr host-info))
		  (ping-res ((rpc:procedure 'server:login host port) *toppath*)))
	      (if ping-res
		  (let ((server-dat (list iface port #f #f #f)))
		    (hash-table-set! *runremote* run-id server-dat)
		    server-dat)
		  (begin
		    (server:try-running *toppath*)
		    (thread-sleep! 2)
		    (rpc-transport:client-setup run-id (- remtries 1)))))
 	    (let* ((server-db-info (open-run-close tasks:get-server tasks:open-db run-id)))
 	      (debug:print-info 0 *default-log-port* "client:setup server-dat=" server-dat ", remaining-tries=" remaining-tries)
	      (if server-db-info
 		  (let* ((iface     (tasks:hostinfo-get-interface server-db-info))
 			 (port      (tasks:hostinfo-get-port      server-db-info))
			 (server-dat (list iface port #f #f #f))
 			 (ping-res  ((rpc:procedure 'server:login host port) *toppath*)))
 		    (if start-res
 			(begin
 			  (hash-table-set! *runremote* run-id server-dat)
			  server-dat)
			(begin
			  (server:try-running *toppath*)
			  (thread-sleep! 2)
			  (rpc-transport:client-setup run-id (- remtries 1)))))
		  (begin
		    (server:try-running *toppath*)
		    (thread-sleep! 2)
		    (rpc-transport:client-setup run-id (- remtries 1)))))))))
;; 
;; 	     (port     (if (and hostinfo (> (length hostdat) 1))(cadr hostdat) #f)))
;; 	(if (and port
;; 		 (string->number port))
;; 	    (let ((portn (string->number port)))
;; 	      (debug:print-info 2 *default-log-port* "Setting up to connect to host " host ":" port)
;; 	      (handle-exceptions
;; 	       exn
;; 	       (begin
;; 		 (debug:print-error 0 *default-log-port* "Failed to open a connection to the server at host: " host " port: " port)
;; 		 (debug:print 0 *default-log-port* "   EXCEPTION: " ((condition-property-accessor 'exn 'message) exn))
;; 		 ;; (open-run-close 
;; 		 ;;  (lambda (db . param) 
;; 		 ;;    (sqlite3:execute db "DELETE FROM metadat WHERE var='SERVER'"))
;; 		 ;;  #f)
;; 		 (set! *runremote* #f))
;; 	       (if (and (not (args:get-arg "-server")) ;; no point in the server using the server using the server
;; 			((rpc:procedure 'server:login host portn) *toppath*))
;; 		   (begin
;; 		     (debug:print-info 2 *default-log-port* "Logged in and connected to " host ":" port)
;; 		     (set! *runremote* (vector host portn)))
;; 		   (begin
;; 		     (debug:print-info 2 *default-log-port* "Failed to login or connect to " host ":" port)
;; 		     (set! *runremote* #f)))))
;; 	    (debug:print-info 2 *default-log-port* "no server available")))))

Modified runs.scm from [ec89ed5162] to [0d98a3ef41].

2042
2043
2044
2045
2046
2047
2048
2049

2050
2051
2052
2053
2054
2055
2056
2042
2043
2044
2045
2046
2047
2048

2049
2050
2051
2052
2053
2054
2055
2056







-
+







	 (states       (if state  (string-split state  ",") '()))
	 (statuses     (if status (string-split status ",") '()))
	 (state-status (if (string? new-state-status) (string-split new-state-status ",") '(#f #f)))
	 (rp-mutex     (make-mutex))
	 (bup-mutex    (make-mutex))
         (keep-records (args:get-arg "-keep-records"))) ;; used in conjunction with -remove-runs to keep the records, TODO: consolidate this with "mode".

    (let* ((write-access-actions '(remove-runs set-state-status archive run-wait))
    (let* ((write-access-actions '(remove-runs set-state-status archive run-wait kill-runs))
           (dbfile             (conc  *toppath* "/megatest.db"))
           (readonly-mode      (not (file-write-access? dbfile))))
      (when (and readonly-mode
                 (member action write-access-actions))
        (debug:print-error 0 *default-log-port* "megatest.db is readonly.  Cannot proceed with action ["action"] in which write-access isrequired .")
        (exit 1)))
    
2079
2080
2081
2082
2083
2084
2085




2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094

2095
2096
2097
2098
2099
2100
2101







+
+
+
+





-







			       '()))
		(lasttpath "/does/not/exist/I/hope")
		(worker-thread #f))
	   (debug:print-info 4 *default-log-port* "runs:operate-on run=" run ", header=" header)
	   (if (not (null? tests))
	       (begin
		 (case action
                   ((kill-runs)
                    (tasks:kill-runner target run-name "%")
                    (debug:print 1 *default-log-port* "Killing tests for run: " runkey " " (db:get-value-by-header run header "runname"))
                    )
		   ((remove-runs)
		    ;; (if (tasks:need-server run-id)(tasks:start-and-wait-for-server tdbdat run-id 10))
		    ;; seek and kill in flight -runtests with % as testpatt here
		    ;; (if (equal? testpatt "%")
		    (tasks:kill-runner target run-name testpatt)
		    
		    ;; (debug:print 0 *default-log-port* "not attempting to kill any run launcher processes as testpatt is " testpatt))
		    (debug:print 1 *default-log-port* "Removing tests for run: " runkey " " (db:get-value-by-header run header "runname")))
		   ((set-state-status)
		    ;; (if (tasks:need-server run-id)(tasks:start-and-wait-for-server tdbdat run-id 10))
		    (debug:print 1 *default-log-port* "Modifying state and staus for tests for run: " runkey " " (db:get-value-by-header run header "runname")))
		   ((print-run)
		    (debug:print 1 *default-log-port* "Printing info for run " runkey ", run=" run ", tests=" tests ", header=" header)
2192
2193
2194
2195
2196
2197
2198
2199

2200
2201
2202
2203
2204
2205
2206
2195
2196
2197
2198
2199
2200
2201

2202
2203
2204
2205
2206
2207
2208
2209







-
+







                                      ((started)
                                       ;; if last visit was within last second, sleep 1 second
                                       (if (< (- now last-visit) 1.0)
                                           (thread-sleep! 1.0))
                                       (hash-table-set! backgrounded-remove-last-visit test-fulln (current-seconds))
                                       ;; send to back of line, loop
                                       (let ((newtal (append tal (list test))))
                                        (loop (car newtal)(cdr newtal)))
                                         (loop (car newtal)(cdr newtal)))
                                       )
                                      ((done)
                                       ;; drop this one; if remaining, loop, else finish
                                       (hash-table-set! backgrounded-remove-last-visit test-fulln (current-seconds))
                                       (let ((subrun-remove-succeeded (hash-table-ref/default backgrounded-remove-result test-fulln 'exception)))
                                         (cond
                                          ((eq? subrun-remove-succeeded 'exception)
2249
2250
2251
2252
2253
2254
2255






















2256
2257
2258
2259
2260
2261
2262
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287







+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+







                                            (loop new-test-dat tal)
                                            (loop (car tal)(append tal (list new-test-dat)))))
                                      (begin
                                        (runs:remove-test-directory new-test-dat mode) ;; 'remove-all)
                                        (if (not (null? tal))
                                            (loop (car tal)(cdr tal)))))))
				(rmt:update-run-stats run-id (rmt:get-raw-run-stats run-id)))
                               ((kill-runs)
                                ;; RUNNING -> KILLREQ
                                ;; LAUNCHED,RUNNING,REMOTEHOSTSTART -> NOT STARTED
                                (cond
                                 ((and has-subrun (member test-state (list "RUNNING" "LAUNCHED" "REMOTEHOSTSTART" "KILLREQ")))
                                  (common:send-thunk-to-background-thread
                                   (lambda ()
                                     (let* ((subrun-remove-succeeded
                                             (subrun:kill-subrun run-dir keep-records)))
                                       #t)))
                                  (if (not (null? tal))
				    (loop (car tal)(cdr tal)))
                                  )
                                 ((member test-state (list "RUNNING" "LAUNCHED" "REMOTEHOSTSTART" "KILLREQ"))
                                  (debug:print 1 *default-log-port* "INFO: issuing killreq to test "test-fulln)
                                  (mt:test-set-state-status-by-id run-id (db:test-get-id test) "KILLREQ" "n/a" #f)
                                  (if (not (null? tal))
				      (loop (car tal)(cdr tal))))
                                 (else
                                  (if (not (null? tal))
				      (loop (car tal)(cdr tal)))
                                  )))
			       ((set-state-status)
                                (let* ((new-state (car state-status))
                                       (new-status (cadr state-status))
                                       (test-id (db:test-get-id test))
                                       (test-run-dir (db:test-get-rundir new-test-dat))
                                       (has-subrun (and (subrun:subrun-test-initialized? test-run-dir)
                                                      (not (subrun:subrun-removed? test-run-dir)))))

Modified server.scm from [8c943654ab] to [9ce5da2a1c].

13
14
15
16
17
18
19









20
21
22
23

24
25
26
27
28
29
30
31
32
33
34
35








36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55

56
57

58
59






60
61
62
63
64

65
66

67
68
69












70
71
72
73
74








75
76
77

78
79
80
81
82
83
84
85
86
87








88
89
90
91
92




93
94

95
96
97
98
99



100
101
102




103




104
105






106
107
108







109
110
111
112
113

114
115
116
117
118
119
120








121
122
123
124
125
126
127
128
129
130
131
132










133
134
135

136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
































155
156
157

158
159
160






161
162
163
164
165
166
167
168
169
170
171












172
173
174
175
176
177





178
179

180
181
182
183
184
185
186

187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238













































































239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485


























































































































































































































































486
487
488
489
490
491
492
493
494
495
496
497












498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559






























































560
561
562
563
564
565
566
567
568
569

570
571
572
573



574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620

























































13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31

32
33
34
35
36
37
38
39





40
41
42
43
44
45
46
47
48
49
50
51







52
53
54
55
56


57

58
59
60
61


62
63
64
65
66
67



68

69

70
71



72
73
74
75
76
77
78
79
80
81
82
83
84




85
86
87
88
89
90
91
92



93










94
95
96
97
98
99
100
101





102
103
104
105


106





107
108
109
110


111
112
113
114
115
116
117
118
119


120
121
122
123
124
125



126
127
128
129
130
131
132





133







134
135
136
137
138
139
140
141












142
143
144
145
146
147
148
149
150
151



152



















153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184



185



186
187
188
189
190
191











192
193
194
195
196
197
198
199
200
201
202
203






204
205
206
207
208


209







210




















































211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287























































































































































































































































288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537












538
539
540
541
542
543
544
545
546
547
548
549






























































550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611










612




613
614
615















































616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672







+
+
+
+
+
+
+
+
+



-
+







-
-
-
-
-
+
+
+
+
+
+
+
+




-
-
-
-
-
-
-





-
-

-
+


+
-
-
+
+
+
+
+
+
-
-
-

-
+
-

+
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+

-
-
-
-
+
+
+
+
+
+
+
+
-
-
-
+
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
-
-
-
-
-
+
+
+
+
-
-
+
-
-
-
-
-
+
+
+

-
-
+
+
+
+

+
+
+
+
-
-
+
+
+
+
+
+
-
-
-
+
+
+
+
+
+
+
-
-
-
-
-
+
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
-
-
-
+
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
-
-
-
+
-
-
-
+
+
+
+
+
+
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
-
-
-
-
-
-
+
+
+
+
+
-
-
+
-
-
-
-
-
-
-
+
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
-
-
-
-
-
-
-
-
-
-
+
-
-
-
-
+
+
+
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
;;     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
;;     GNU General Public License for more details.
;; 
;;     You should have received a copy of the GNU General Public License
;;     along with Megatest.  If not, see <http://www.gnu.org/licenses/>.
;;

;;======================================================================
;;
;; This is the Megatest specific stuff for starting and maintaining a
;; server. Stuff that talks to the server should go in client.scm.
;; General nanomsg stuff (not Megatest specific) should go in the
;; nmsg-transport.scm file.
;;
;;======================================================================

(require-extension (srfi 18) extras tcp s11n)

(use srfi-1 posix regex regex-case srfi-69 hostinfo md5 message-digest
     directory-utils posix-extras matchable)
     directory-utils posix-extras matchable typed-records)

(use spiffy uri-common intarweb http-client spiffy-request-vars)

(declare (unit server))

(declare (uses common))
(declare (uses db))
(declare (uses tasks)) ;; tasks are where stuff is maintained about what is running.
;; (declare (uses synchash))
(declare (uses http-transport))
;;(declare (uses rpc-transport))
(declare (uses launch))

;; Basic stuff for safely kicking off a server
(declare (uses portlogger))
(import portlogger)
(declare (uses nmsg-transport))
(import nmsg-transport)

;; Might want to bring the daemonizing back
;; (declare (uses daemon))

(include "common_records.scm")
(include "db_records.scm")

(define (server:make-server-url hostport)
  (if (not hostport)
      #f
      (conc "http://" (car hostport) ":" (cadr hostport))))

(define  *server-loop-heart-beat* (current-seconds))

;;======================================================================
;; P K T S   S T U F F 
;;======================================================================

;; ???

;;======================================================================
;; P K T S   S T U F F 
;;  N A N O M S G   B A S E D   S E R V E R
;;======================================================================

(defstruct nmsg
;; ???

  (conn  #f)
  (hosts (make-hash-table))
  pkt
  pktspec
  (mutex    (make-mutex))
  )
;;======================================================================
;; S E R V E R
;;======================================================================

;; Call this to start the actual server
;; make it a global? Well, it is local to nmsg module
;;

(define *nmsg-conndat* (make-nmsg))
;; all routes though here end in exit ...
;;
;; start_server
(nmsg-pktspec-set! *nmsg-conndat*
		   `((server (hostname . h)
			     (port     . p)
			     (pid      . i)
			     )))
;; get a port
;; start the nmsg server
;; look for other servers
;; contact other servers and compile list of servers
;; there are two types of server
;;     main servers - dashboards, runners and dedicated servers - need pkt
;;     passive servers - test executers, step calls, list-runs - no pkt
;;
(define (server:launch run-id transport-type)
  (case transport-type
    ((http)(http-transport:launch))
    ;;((nmsg)(nmsg-transport:launch run-id))
(define (server:start-nmsg #!key (force-server-type #f))
  (mutex-lock! (nmsg-mutex *nmsg-conndat*))
  (let* ((server-type  (or force-server-type
			   (if (args:any? "-run" "-server")
			       'main
			       'passive)))
	 (port-num     (portlogger:open-run-close portlogger:find-port))
	 (nmsg-conn    (nmsg:start-server port-num))
    ;;((rpc)  (rpc-transport:launch run-id))
    (else (debug:print-error 0 *default-log-port* "unknown server type " transport-type))))

	 (pktspec      (nmsg-pktspec *nmsg-conndat*))
;;======================================================================
;; S E R V E R   U T I L I T I E S 
;;======================================================================

;; Get the transport
(define (server:get-transport)
  (if *transport-type*
      *transport-type*
      (let ((ttype (string->symbol
		    (or (args:get-arg "-transport")
	 (pktdir       (conc (get-environment-variable "MT_RUN_AREA_HOME")
			     "/.server-pkts")))
    (if (not (directory? pktdir))(create-directory pktdir))
    ;; server is started, now create pkt if needed
    (if (eq? server-type 'main)
	(nmsg-pkt-set! *nmsg-conndat* 
		       (pkts:write-alist->pkt
			pktdir 
			(configf:lookup *configdat* "server" "transport")
			"rpc"))))
	(set! *transport-type* ttype)
	ttype)))
	    
			`((hostname . ,(get-host-name))
			  (port     . ,port-num)
			  (pid      . ,(current-process-id)))
			pktspec: pktspec
;; Generate a unique signature for this server
(define (server:mk-signature)
			ptype:   'server)))
  (message-digest-string (md5-primitive) 
			 (with-output-to-string
			   (lambda ()
			     (write (list (current-directory)
					  (argv)))))))
    (nmsg-conn-set! *nmsg-conndat* nmsg-conn)
    (mutex-unlock! (nmsg-mutex *nmsg-conndat*))
    ))

;; When using zmq this would send the message back (two step process)
;; with spiffy or rpc this simply returns the return data to be returned
;; 
;; 
;; ;; Call this to start the actual server
;; ;;
;; 
;; ;; all routes though here end in exit ...
;; ;;
;; ;; start_server
;; ;;
(define (server:reply return-addr query-sig success/fail result)
  (debug:print-info 11 *default-log-port* "server:reply return-addr=" return-addr ", result=" result)
;; (define (server:launch run-id transport-type)
;;   (case transport-type
;;     ((http)(http-transport:launch))
;;     ;;((nmsg)(nmsg-transport:launch run-id))
;;     ;;((rpc)  (rpc-transport:launch run-id))
;;     (else (debug:print-error 0 *default-log-port* "unknown server type " transport-type))))
  ;; (send-message pubsock target send-more: #t)
  ;; (send-message pubsock 
  (case (server:get-transport)
;; 
;; ;;======================================================================
;; ;; S E R V E R   U T I L I T I E S 
;; ;;======================================================================
;; 
;; ;; Get the transport
;; (define (server:get-transport)
    ((rpc)  (db:obj->string (vector success/fail query-sig result)))
    ((http) (db:obj->string (vector success/fail query-sig result)))
    ((fs)   result)
    (else 
     (debug:print-error 0 *default-log-port* "unrecognised transport type: " *transport-type*)
;;   (if *transport-type*
     result)))

;; Given a run id start a server process    ### NOTE ### > file 2>&1 
;; if the run-id is zero and the target-host is set 
;; try running on that host
;;   incidental: rotate logs in logs/ dir.
;;
;;       *transport-type*
;;       (let ((ttype (string->symbol
;; 		    (or (args:get-arg "-transport")
;; 			(configf:lookup *configdat* "server" "transport")
;; 			"rpc"))))
;; 	(set! *transport-type* ttype)
;; 	ttype)))
;; 	    
(define  (server:run areapath) ;; areapath is *toppath* for a given testsuite area
  (let* ((curr-host   (get-host-name))
         ;; (attempt-in-progress (server:start-attempted? areapath))
         ;; (dot-server-url (server:check-if-running areapath))
	 (curr-ip     (server:get-best-guess-address curr-host))
	 (curr-pid    (current-process-id))
	 (homehost    (common:get-homehost)) ;; configf:lookup *configdat* "server" "homehost" ))
	 (target-host (car homehost))
	 (testsuite   (common:get-testsuite-name))
	 (logfile     (conc areapath "/logs/server.log")) ;; -" curr-pid "-" target-host ".log"))
	 (cmdln (conc (common:get-megatest-exe)
		      " -server " (or target-host "-") (if (equal? (configf:lookup *configdat* "server" "daemonize") "yes")
;; ;; Generate a unique signature for this server
;; (define (server:mk-signature)
;;   (message-digest-string (md5-primitive) 
;; 			 (with-output-to-string
;; 			   (lambda ()
;; 			     (write (list (current-directory)
;; 					  (argv)))))))
;; 
;; ;; When using zmq this would send the message back (two step process)
;; ;; with spiffy or rpc this simply returns the return data to be returned
							   " -daemonize "
							   "")
		      ;; " -log " logfile
;; ;; 
		      " -m testsuite:" testsuite)) ;; (conc " >> " logfile " 2>&1 &")))))
	 (log-rotate  (make-thread common:rotate-logs  "server run, rotate logs thread"))
         (load-limit  (configf:lookup-number *configdat* "jobtools" "max-server-start-load" default: 3.0)))
    ;; we want the remote server to start in *toppath* so push there
    (push-directory areapath)
    (debug:print 0 *default-log-port* "INFO: Trying to start server (" cmdln ") ...")
    (thread-start! log-rotate)
    
    ;; host.domain.tld match host?
    (if (and target-host 
	     ;; look at target host, is it host.domain.tld or ip address and does it 
	     ;; match current ip or hostname
	     (not (string-match (conc "("curr-host "|" curr-host"\\..*)") target-host))
	     (not (equal? curr-ip target-host)))
	(begin
	  (debug:print-info 0 *default-log-port* "Starting server on " target-host ", logfile is " logfile)
	  (setenv "TARGETHOST" target-host)))
      
    (setenv "TARGETHOST_LOGF" logfile)
;; (define (server:reply return-addr query-sig success/fail result)
;;   (debug:print-info 11 *default-log-port* "server:reply return-addr=" return-addr ", result=" result)
;;   ;; (send-message pubsock target send-more: #t)
;;   ;; (send-message pubsock 
;;   (case (server:get-transport)
;;     ((rpc)  (db:obj->string (vector success/fail query-sig result)))
;;     ((http) (db:obj->string (vector success/fail query-sig result)))
;;     ((fs)   result)
;;     (else 
;;      (debug:print-error 0 *default-log-port* "unrecognised transport type: " *transport-type*)
;;      result)))
;; 
;; ;; Given a run id start a server process    ### NOTE ### > file 2>&1 
;; ;; if the run-id is zero and the target-host is set 
;; ;; try running on that host
;; ;;   incidental: rotate logs in logs/ dir.
;; ;;
;; (define  (server:run areapath) ;; areapath is *toppath* for a given testsuite area
;;   (let* ((curr-host   (get-host-name))
;;          ;; (attempt-in-progress (server:start-attempted? areapath))
;;          ;; (dot-server-url (server:check-if-running areapath))
;; 	 (curr-ip     (server:get-best-guess-address curr-host))
;; 	 (curr-pid    (current-process-id))
;; 	 (homehost    (common:get-homehost)) ;; configf:lookup *configdat* "server" "homehost" ))
;; 	 (target-host (car homehost))
;; 	 (testsuite   (common:get-testsuite-name))
;; 	 (logfile     (conc areapath "/logs/server.log")) ;; -" curr-pid "-" target-host ".log"))
;; 	 (cmdln (conc (common:get-megatest-exe)
;; 		      " -server " (or target-host "-") (if (equal? (configf:lookup *configdat* "server" "daemonize") "yes")
;; 							   " -daemonize "
;; 							   "")
;; 		      ;; " -log " logfile
    (thread-sleep! (/ (random 5000) 1000)) ;; add about a random (up to 5 seconds) initial delay. It seems pretty common that many running tests request a server at the same time
    (common:wait-for-normalized-load load-limit " delaying server start due to load" target-host) ;; do not try starting servers on an already overloaded machine, just wait forever
    (system (conc "nbfake " cmdln))
;; 		      " -m testsuite:" testsuite)) ;; (conc " >> " logfile " 2>&1 &")))))
    (unsetenv "TARGETHOST_LOGF")
    (if (get-environment-variable "TARGETHOST")(unsetenv "TARGETHOST"))
    (thread-join! log-rotate)
;; 	 (log-rotate  (make-thread common:rotate-logs  "server run, rotate logs thread"))
;;          (load-limit  (configf:lookup-number *configdat* "jobtools" "max-server-start-load" default: 3.0)))
;;     ;; we want the remote server to start in *toppath* so push there
;;     (push-directory areapath)
;;     (debug:print 0 *default-log-port* "INFO: Trying to start server (" cmdln ") ...")
;;     (thread-start! log-rotate)
    (pop-directory)))

;; given a path to a server log return: host port startseconds
;;
(define (server:logf-get-start-info logf)
  (let ((rx (regexp "^SERVER STARTED: (\\S+):(\\d+) AT ([\\d\\.]+)"))) ;; SERVER STARTED: host:port AT timesecs
    (handle-exceptions
	exn
	(list #f #f #f) ;; no idea what went wrong, call it a bad server
      (with-input-from-file
	  logf
;;     
;;     ;; host.domain.tld match host?
;;     (if (and target-host 
;; 	     ;; look at target host, is it host.domain.tld or ip address and does it 
;; 	     ;; match current ip or hostname
;; 	     (not (string-match (conc "("curr-host "|" curr-host"\\..*)") target-host))
;; 	     (not (equal? curr-ip target-host)))
;; 	(begin
;; 	  (debug:print-info 0 *default-log-port* "Starting server on " target-host ", logfile is " logfile)
;; 	  (setenv "TARGETHOST" target-host)))
;;       
;;     (setenv "TARGETHOST_LOGF" logfile)
	(lambda ()
	  (let loop ((inl  (read-line))
		     (lnum 0))
	    (if (not (eof-object? inl))
		(let ((mlst (string-match rx inl)))
		  (if (not mlst)
;;     (thread-sleep! (/ (random 5000) 1000)) ;; add about a random (up to 5 seconds) initial delay. It seems pretty common that many running tests request a server at the same time
;;     (common:wait-for-normalized-load load-limit " delaying server start due to load" target-host) ;; do not try starting servers on an already overloaded machine, just wait forever
;;     (system (conc "nbfake " cmdln))
;;     (unsetenv "TARGETHOST_LOGF")
;;     (if (get-environment-variable "TARGETHOST")(unsetenv "TARGETHOST"))
		      (if (< lnum 500) ;; give up if more than 500 lines of server log read
			  (loop (read-line)(+ lnum 1))
;;     (thread-join! log-rotate)
			  (list #f #f #f))
		      (let ((dat  (cdr mlst)))
			(list (car dat) ;; host
			      (string->number (cadr dat)) ;; port
			      (string->number (caddr dat))))))
		(list #f #f #f))))))))

;;     (pop-directory)))
;; get a list of servers with all relevant data
;; ( mod-time host port start-time pid )
;;
(define (server:get-list areapath #!key (limit #f))
  (let ((fname-rx    (regexp "^(|.*/)server-(\\d+)-(\\S+).log$"))
	(day-seconds (* 24 60 60)))
    ;; if the directory exists continue to get the list
    ;; otherwise attempt to create the logs dir and then
    ;; continue
    (if (if (directory-exists? (conc areapath "/logs"))
	    '()
	    (if (file-write-access? areapath)
		(begin
		  (condition-case
		      (create-directory (conc areapath "/logs") #t)
		    (exn (i/o file)(debug:print 0 *default-log-port* "ERROR: Cannot create directory at " (conc areapath "/logs")))
		    (exn ()(debug:print 0 *default-log-port* "ERROR: Unknown error attemtping to get server list.")))
		  (directory-exists? (conc areapath "/logs")))
		'()))
	(let* ((server-logs   (glob (conc areapath "/logs/server-*.log")))
	       (num-serv-logs (length server-logs)))
	  (if (null? server-logs)
	      '()
	      (let loop ((hed  (car server-logs))
			 (tal  (cdr server-logs))
			 (res '()))
		(let* ((mod-time  (handle-exceptions
				      exn
				      (current-seconds) ;; 0
				    (file-modification-time hed))) ;; default to *very* old so log gets ignored if deleted
		       (down-time (- (current-seconds) mod-time))
		       (serv-dat  (if (or (< num-serv-logs 10)
				  	  (< down-time 900)) ;; day-seconds))
				      (server:logf-get-start-info hed)
				      '())) ;; don't waste time processing server files not touched in the 15 minutes if there are more than ten servers to look at
		       (serv-rec (cons mod-time serv-dat))
		       (fmatch   (string-match fname-rx hed))
		       (pid      (if fmatch (string->number (list-ref fmatch 2)) #f))
		       (new-res  (if (null? serv-dat)
				     res
				     (cons (append serv-rec (list pid)) res))))
		(if (null? tal)
		    (if (and limit
			     (> (length new-res) limit))
			new-res ;; (take new-res limit)  <= need intelligent sorting before this will work
			new-res)
		    (loop (car tal)(cdr tal) new-res)))))))))

(define (server:get-num-alive srvlst)
  (let ((num-alive 0))
    (for-each
     (lambda (server)
;; 
;; ;; given a path to a server log return: host port startseconds
;; ;;
;; (define (server:logf-get-start-info logf)
;;   (let ((rx (regexp "^SERVER STARTED: (\\S+):(\\d+) AT ([\\d\\.]+)"))) ;; SERVER STARTED: host:port AT timesecs
;;     (handle-exceptions
;; 	exn
;; 	(list #f #f #f) ;; no idea what went wrong, call it a bad server
;;       (with-input-from-file
;; 	  logf
;; 	(lambda ()
;; 	  (let loop ((inl  (read-line))
;; 		     (lnum 0))
;; 	    (if (not (eof-object? inl))
;; 		(let ((mlst (string-match rx inl)))
;; 		  (if (not mlst)
;; 		      (if (< lnum 500) ;; give up if more than 500 lines of server log read
;; 			  (loop (read-line)(+ lnum 1))
;; 			  (list #f #f #f))
;; 		      (let ((dat  (cdr mlst)))
;; 			(list (car dat) ;; host
;; 			      (string->number (cadr dat)) ;; port
;; 			      (string->number (caddr dat))))))
;; 		(list #f #f #f))))))))
;; 
;; ;; get a list of servers with all relevant data
;; ;; ( mod-time host port start-time pid )
;; ;;
;; (define (server:get-list areapath #!key (limit #f))
;;   (let ((fname-rx    (regexp "^(|.*/)server-(\\d+)-(\\S+).log$"))
;; 	(day-seconds (* 24 60 60)))
;;     ;; if the directory exists continue to get the list
;;     ;; otherwise attempt to create the logs dir and then
;;     ;; continue
;;     (if (if (directory-exists? (conc areapath "/logs"))
;; 	    '()
;; 	    (if (file-write-access? areapath)
;; 		(begin
;; 		  (condition-case
;; 		      (create-directory (conc areapath "/logs") #t)
;; 		    (exn (i/o file)(debug:print 0 *default-log-port* "ERROR: Cannot create directory at " (conc areapath "/logs")))
;; 		    (exn ()(debug:print 0 *default-log-port* "ERROR: Unknown error attemtping to get server list.")))
;; 		  (directory-exists? (conc areapath "/logs")))
;; 		'()))
;; 	(let* ((server-logs   (glob (conc areapath "/logs/server-*.log")))
;; 	       (num-serv-logs (length server-logs)))
;; 	  (if (null? server-logs)
;; 	      '()
;; 	      (let loop ((hed  (car server-logs))
;; 			 (tal  (cdr server-logs))
;; 			 (res '()))
;; 		(let* ((mod-time  (handle-exceptions
;; 				      exn
;; 				      (current-seconds) ;; 0
;; 				    (file-modification-time hed))) ;; default to *very* old so log gets ignored if deleted
;; 		       (down-time (- (current-seconds) mod-time))
;; 		       (serv-dat  (if (or (< num-serv-logs 10)
;; 				  	  (< down-time 900)) ;; day-seconds))
;; 				      (server:logf-get-start-info hed)
;; 				      '())) ;; don't waste time processing server files not touched in the 15 minutes if there are more than ten servers to look at
;; 		       (serv-rec (cons mod-time serv-dat))
;; 		       (fmatch   (string-match fname-rx hed))
;; 		       (pid      (if fmatch (string->number (list-ref fmatch 2)) #f))
;; 		       (new-res  (if (null? serv-dat)
;; 				     res
;; 				     (cons (append serv-rec (list pid)) res))))
;; 		(if (null? tal)
;; 		    (if (and limit
;; 			     (> (length new-res) limit))
;; 			new-res ;; (take new-res limit)  <= need intelligent sorting before this will work
;; 			new-res)
;; 		    (loop (car tal)(cdr tal) new-res)))))))))
;; 
;; (define (server:get-num-alive srvlst)
;;   (let ((num-alive 0))
;;     (for-each
;;      (lambda (server)
       (match-let (((mod-time host port start-time pid)
		    server))
	 (let* ((uptime  (- (current-seconds) mod-time))
		(runtime (if start-time
			     (- mod-time start-time)
			     0)))
	   (if (< uptime 5)(set! num-alive (+ num-alive 1))))))
     srvlst)
    num-alive))

;; given a list of servers get a list of valid servers, i.e. at least
;; 10 seconds old, has started and is less than 1 hour old and is
;; active (i.e. mod-time < 10 seconds
;;
;; mod-time host port start-time pid
;;
;; sort by start-time descending. I.e. get the oldest first. Young servers will thus drop off
;; and servers should stick around for about two hours or so.
;;
(define (server:get-best srvlst)
  (let* ((nums (server:get-num-servers))
	 (now  (current-seconds))
	 (slst (sort
		(filter (lambda (rec)
			  (if (and (list? rec)
				   (> (length rec) 2))
			      (let ((start-time (list-ref rec 3))
				    (mod-time   (list-ref rec 0)))
				;; (print "start-time: " start-time " mod-time: " mod-time)
				(and start-time mod-time
				     (> (- now start-time) 0)    ;; been running at least 0 seconds
				     (< (- now mod-time)   16)   ;; still alive - file touched in last 16 seconds
				     (< (- now start-time)       
					(+ (- (string->number (or (configf:lookup *configdat* "server" "runtime") "3600"))
					      180)
					   (random 360))) ;; under one hour running time +/- 180
				     ))
			      #f))
			srvlst)
		(lambda (a b)
		  (< (list-ref a 3)
		     (list-ref b 3))))))
    (if (> (length slst) nums)
	(take slst nums)
	slst)))

(define (server:get-first-best areapath)
  (let ((srvrs (server:get-best (server:get-list areapath))))
    (if (and srvrs
	     (not (null? srvrs)))
	(car srvrs)
	#f)))

(define (server:get-rand-best areapath)
  (let ((srvrs (server:get-best (server:get-list areapath))))
    (if (and (list? srvrs)
	     (not (null? srvrs)))
	(let* ((len (length srvrs))
	       (idx (random len)))
	  (list-ref srvrs idx))
	#f)))


(define (server:record->url servr)
  (match-let (((mod-time host port start-time pid)
	       servr))
    (if (and host port)
	(conc host ":" port)
	#f)))

(define (server:get-client-signature) ;; BB> why is this proc named "get-"?  it returns nothing -- set! has not return value.
  (if *my-client-signature* *my-client-signature*
      (let ((sig (server:mk-signature)))
        (set! *my-client-signature* sig)
        *my-client-signature*)))

;; kind start up of servers, wait 40 seconds before allowing another server for a given
;; run-id to be launched
(define (server:kind-run areapath)
  (if (not (server:check-if-running areapath)) ;; why try if there is already a server running?
      (let* ((last-run-dat (hash-table-ref/default *server-kind-run* areapath '(0 0))) ;; callnum, whenrun
	     (call-num     (car last-run-dat))
	     (when-run     (cadr last-run-dat))
	     (run-delay    (+ (case call-num
				((0)    0)
				((1)   20)
				((2)  300)
				(else 600))
			      (random 5)))   ;; add a small random number just in case a lot of jobs hit the work hosts simultaneously
	     (lock-file    (conc areapath "/logs/server-start.lock")))
	(if	(> (- (current-seconds) when-run) run-delay)
		(begin
		  (common:simple-file-lock-and-wait lock-file expire-time: 15)
		  (server:run areapath)
		  (thread-sleep! 5) ;; don't release the lock for at least a few seconds
		  (common:simple-file-release-lock lock-file)))
	(hash-table-set! *server-kind-run* areapath (list (+ call-num 1)(current-seconds))))))

(define (server:start-and-wait areapath #!key (timeout 60))
  (let ((give-up-time (+ (current-seconds) timeout)))
    (let loop ((server-url (server:check-if-running areapath))
	       (try-num    0))
      (if (or server-url
	      (> (current-seconds) give-up-time)) ;; server-url will be #f if no server available.
	  server-url
	  (let ((num-ok (length (server:get-best (server:get-list areapath)))))
	    (if (and (> try-num 0)  ;; first time through simply wait a little while then try again
		     (< num-ok 1))  ;; if there are no decent candidates for servers then try starting a new one
		(server:kind-run areapath))
	    (thread-sleep! 5)
	    (loop (server:check-if-running areapath)
		  (+ try-num 1)))))))

(define server:try-running server:run) ;; there is no more per-run servers ;; REMOVE ME. BUG.

(define (server:get-num-servers #!key (numservers 2))
  (let ((ns (string->number
	     (or (configf:lookup *configdat* "server" "numservers") "notanumber"))))
    (or ns numservers)))

;; no longer care if multiple servers are started by accident. older servers will drop off in time.
;;
(define (server:check-if-running areapath) ;;  #!key (numservers "2"))
  (let* ((ns            (server:get-num-servers))
	 (servers       (server:get-best (server:get-list areapath))))
    ;; (print "servers: " servers " ns: " ns)
    (if (or (and servers
		 (null? servers))
	    (not servers)
	    (and (list? servers)
		 (< (length servers) (random ns)))) ;; somewhere between 0 and numservers
        #f
        (let loop ((hed (car servers))
                   (tal (cdr servers)))
          (let ((res (server:check-server hed)))
            (if res
                res
                (if (null? tal)
                    #f
                    (loop (car tal)(cdr tal)))))))))

;; ping the given server
;;
(define (server:check-server server-record)
  (let* ((server-url (server:record->url server-record))
         (res        (case *transport-type*
                       ((http)(server:ping server-url))
                       ;; ((nmsg)(nmsg-transport:ping (tasks:hostinfo-get-interface server)
                       )))
    (if res
        server-url
	#f)))

(define (server:kill servr)
  (match-let (((mod-time hostname port start-time pid)
	       servr))
    (tasks:kill-server hostname pid)))

;; called in megatest.scm, host-port is string hostname:port
;;
;; NOTE: This is NOT called directly from clients as not all transports support a client running
;;       in the same process as the server.
;;
(define (server:ping host-port-in #!key (do-exit #f))
  (let ((host:port (if (not host-port-in) ;; use read-dotserver to find
		       #f ;; (server:check-if-running *toppath*)
		;; (if (number? host-port-in) ;; we were handed a server-id
		;; 	   (let ((srec (tasks:get-server-by-id (db:delay-if-busy (tasks:open-db)) host-port-in)))
		;; 	     ;; (print "srec: " srec " host-port-in: " host-port-in)
		;; 	     (if srec
		;; 		 (conc (vector-ref srec 3) ":" (vector-ref srec 4))
		;; 		 (conc "no such server-id " host-port-in)))
		       host-port-in))) ;; )
    (let* ((host-port (if host:port
			  (let ((slst (string-split   host:port ":")))
			    (if (eq? (length slst) 2)
				(list (car slst)(string->number (cadr slst)))
				#f))
			  #f)))
;;	   (toppath       (launch:setup)))
      ;; (print "host-port=" host-port)
      (if (not host-port)
	  (begin
	    (if host-port-in
		(debug:print 0 *default-log-port*  "ERROR: bad host:port"))
	    (if do-exit (exit 1))
	    #f)
	  (let* ((iface      (car host-port))
		 (port       (cadr host-port))
		 (server-dat (http-transport:client-connect iface port))
		 (login-res  (rmt:login-no-auto-client-setup server-dat)))
	    (if (and (list? login-res)
		     (car login-res))
		(begin
		  ;; (print "LOGIN_OK")
		  (if do-exit (exit 0))
		  #t)
		(begin
		  ;; (print "LOGIN_FAILED")
		  (if do-exit (exit 1))
		  #f)))))))

;; run ping in separate process, safest way in some cases
;;
(define (server:ping-server ifaceport)
  (with-input-from-pipe 
   (conc (common:get-megatest-exe) " -ping " ifaceport)
   (lambda ()
     (let loop ((inl (read-line))
		(res "NOREPLY"))
       (if (eof-object? inl)
	   (case (string->symbol res)
	     ((NOREPLY)  #f)
	     ((LOGIN_OK) #t)
	     (else       #f))
	   (loop (read-line) inl))))))

;; NOT USED (well, ok, reference in rpc-transport but otherwise not used).
;;
(define (server:login toppath)
  (lambda (toppath)
    (set! *db-last-access* (current-seconds)) ;; might not be needed.
    (if (equal? *toppath* toppath)
	#t
	#f)))

;; timeout is hms string: 1h 5m 3s, default is 1 minute
;;
(define (server:expiration-timeout)
  (let ((tmo (configf:lookup *configdat* "server" "timeout")))
    (if (and (string? tmo)
	     (common:hms-string->seconds tmo)) ;; BUG: hms-string->seconds is broken, if given "10" returns 0. Also, it doesn't belong in this logic unless the string->number is changed below
        (* 3600 (string->number tmo))
	60)))

(define (server:get-best-guess-address hostname)
  (let ((res #f))
    (for-each 
     (lambda (adr)
       (if (not (eq? (u8vector-ref adr 0) 127))
	   (set! res adr)))
     ;; NOTE: This can fail when there is no mention of the host in /etc/hosts. FIXME
     (vector->list (hostinfo-addresses (hostname->hostinfo hostname))))
    (string-intersperse 
     (map number->string
	  (u8vector->list
	   (if res res (hostname->ip hostname)))) ".")))
;;        (match-let (((mod-time host port start-time pid)
;; 		    server))
;; 	 (let* ((uptime  (- (current-seconds) mod-time))
;; 		(runtime (if start-time
;; 			     (- mod-time start-time)
;; 			     0)))
;; 	   (if (< uptime 5)(set! num-alive (+ num-alive 1))))))
;;      srvlst)
;;     num-alive))
;; 
;; ;; given a list of servers get a list of valid servers, i.e. at least
;; ;; 10 seconds old, has started and is less than 1 hour old and is
;; ;; active (i.e. mod-time < 10 seconds
;; ;;
;; ;; mod-time host port start-time pid
;; ;;
;; ;; sort by start-time descending. I.e. get the oldest first. Young servers will thus drop off
;; ;; and servers should stick around for about two hours or so.
;; ;;
;; (define (server:get-best srvlst)
;;   (let* ((nums (server:get-num-servers))
;; 	 (now  (current-seconds))
;; 	 (slst (sort
;; 		(filter (lambda (rec)
;; 			  (if (and (list? rec)
;; 				   (> (length rec) 2))
;; 			      (let ((start-time (list-ref rec 3))
;; 				    (mod-time   (list-ref rec 0)))
;; 				;; (print "start-time: " start-time " mod-time: " mod-time)
;; 				(and start-time mod-time
;; 				     (> (- now start-time) 0)    ;; been running at least 0 seconds
;; 				     (< (- now mod-time)   16)   ;; still alive - file touched in last 16 seconds
;; 				     (< (- now start-time)       
;; 					(+ (- (string->number (or (configf:lookup *configdat* "server" "runtime") "3600"))
;; 					      180)
;; 					   (random 360))) ;; under one hour running time +/- 180
;; 				     ))
;; 			      #f))
;; 			srvlst)
;; 		(lambda (a b)
;; 		  (< (list-ref a 3)
;; 		     (list-ref b 3))))))
;;     (if (> (length slst) nums)
;; 	(take slst nums)
;; 	slst)))
;; 
;; (define (server:get-first-best areapath)
;;   (let ((srvrs (server:get-best (server:get-list areapath))))
;;     (if (and srvrs
;; 	     (not (null? srvrs)))
;; 	(car srvrs)
;; 	#f)))
;; 
;; (define (server:get-rand-best areapath)
;;   (let ((srvrs (server:get-best (server:get-list areapath))))
;;     (if (and (list? srvrs)
;; 	     (not (null? srvrs)))
;; 	(let* ((len (length srvrs))
;; 	       (idx (random len)))
;; 	  (list-ref srvrs idx))
;; 	#f)))
;; 
;; 
;; (define (server:record->url servr)
;;   (match-let (((mod-time host port start-time pid)
;; 	       servr))
;;     (if (and host port)
;; 	(conc host ":" port)
;; 	#f)))
;; 
;; (define (server:get-client-signature) ;; BB> why is this proc named "get-"?  it returns nothing -- set! has not return value.
;;   (if *my-client-signature* *my-client-signature*
;;       (let ((sig (server:mk-signature)))
;;         (set! *my-client-signature* sig)
;;         *my-client-signature*)))
;; 
;; ;; kind start up of servers, wait 40 seconds before allowing another server for a given
;; ;; run-id to be launched
;; (define (server:kind-run areapath)
;;   (if (not (server:check-if-running areapath)) ;; why try if there is already a server running?
;;       (let* ((last-run-dat (hash-table-ref/default *server-kind-run* areapath '(0 0))) ;; callnum, whenrun
;; 	     (call-num     (car last-run-dat))
;; 	     (when-run     (cadr last-run-dat))
;; 	     (run-delay    (+ (case call-num
;; 				((0)    0)
;; 				((1)   20)
;; 				((2)  300)
;; 				(else 600))
;; 			      (random 5)))   ;; add a small random number just in case a lot of jobs hit the work hosts simultaneously
;; 	     (lock-file    (conc areapath "/logs/server-start.lock")))
;; 	(if	(> (- (current-seconds) when-run) run-delay)
;; 		(begin
;; 		  (common:simple-file-lock-and-wait lock-file expire-time: 15)
;; 		  (server:run areapath)
;; 		  (thread-sleep! 5) ;; don't release the lock for at least a few seconds
;; 		  (common:simple-file-release-lock lock-file)))
;; 	(hash-table-set! *server-kind-run* areapath (list (+ call-num 1)(current-seconds))))))
;; 
;; (define (server:start-and-wait areapath #!key (timeout 60))
;;   (let ((give-up-time (+ (current-seconds) timeout)))
;;     (let loop ((server-url (server:check-if-running areapath))
;; 	       (try-num    0))
;;       (if (or server-url
;; 	      (> (current-seconds) give-up-time)) ;; server-url will be #f if no server available.
;; 	  server-url
;; 	  (let ((num-ok (length (server:get-best (server:get-list areapath)))))
;; 	    (if (and (> try-num 0)  ;; first time through simply wait a little while then try again
;; 		     (< num-ok 1))  ;; if there are no decent candidates for servers then try starting a new one
;; 		(server:kind-run areapath))
;; 	    (thread-sleep! 5)
;; 	    (loop (server:check-if-running areapath)
;; 		  (+ try-num 1)))))))
;; 
;; (define server:try-running server:run) ;; there is no more per-run servers ;; REMOVE ME. BUG.
;; 
;; (define (server:get-num-servers #!key (numservers 2))
;;   (let ((ns (string->number
;; 	     (or (configf:lookup *configdat* "server" "numservers") "notanumber"))))
;;     (or ns numservers)))
;; 
;; ;; no longer care if multiple servers are started by accident. older servers will drop off in time.
;; ;;
;; (define (server:check-if-running areapath) ;;  #!key (numservers "2"))
;;   (let* ((ns            (server:get-num-servers))
;; 	 (servers       (server:get-best (server:get-list areapath))))
;;     ;; (print "servers: " servers " ns: " ns)
;;     (if (or (and servers
;; 		 (null? servers))
;; 	    (not servers)
;; 	    (and (list? servers)
;; 		 (< (length servers) (random ns)))) ;; somewhere between 0 and numservers
;;         #f
;;         (let loop ((hed (car servers))
;;                    (tal (cdr servers)))
;;           (let ((res (server:check-server hed)))
;;             (if res
;;                 res
;;                 (if (null? tal)
;;                     #f
;;                     (loop (car tal)(cdr tal)))))))))
;; 
;; ;; ping the given server
;; ;;
;; (define (server:check-server server-record)
;;   (let* ((server-url (server:record->url server-record))
;;          (res        (case *transport-type*
;;                        ((http)(server:ping server-url))
;;                        ;; ((nmsg)(nmsg-transport:ping (tasks:hostinfo-get-interface server)
;;                        )))
;;     (if res
;;         server-url
;; 	#f)))
;; 
;; ;; DO STUFF HERE - BUG
;; (define (server:kill servr)
;;   (match-let (((mod-time hostname port start-time pid)
;; 	       servr))
;;     #;(tasks:kill-server hostname pid)
;;     #f
;;     ))
;; 
;; ;; called in megatest.scm, host-port is string hostname:port
;; ;;
;; ;; NOTE: This is NOT called directly from clients as not all transports support a client running
;; ;;       in the same process as the server.
;; ;;
;; (define (server:ping host-port-in #!key (do-exit #f))
;;   (let ((host:port (if (not host-port-in) ;; use read-dotserver to find
;; 		       #f ;; (server:check-if-running *toppath*)
;; 		;; (if (number? host-port-in) ;; we were handed a server-id
;; 		;; 	   (let ((srec (tasks:get-server-by-id (db:delay-if-busy (tasks:open-db)) host-port-in)))
;; 		;; 	     ;; (print "srec: " srec " host-port-in: " host-port-in)
;; 		;; 	     (if srec
;; 		;; 		 (conc (vector-ref srec 3) ":" (vector-ref srec 4))
;; 		;; 		 (conc "no such server-id " host-port-in)))
;; 		       host-port-in))) ;; )
;;     (let* ((host-port (if host:port
;; 			  (let ((slst (string-split   host:port ":")))
;; 			    (if (eq? (length slst) 2)
;; 				(list (car slst)(string->number (cadr slst)))
;; 				#f))
;; 			  #f)))
;; ;;	   (toppath       (launch:setup)))
;;       ;; (print "host-port=" host-port)
;;       (if (not host-port)
;; 	  (begin
;; 	    (if host-port-in
;; 		(debug:print 0 *default-log-port*  "ERROR: bad host:port"))
;; 	    (if do-exit (exit 1))
;; 	    #f)
;; 	  (let* ((iface      (car host-port))
;; 		 (port       (cadr host-port))
;; 		 (server-dat (http-transport:client-connect iface port))
;; 		 (login-res  (rmt:login-no-auto-client-setup server-dat)))
;; 	    (if (and (list? login-res)
;; 		     (car login-res))
;; 		(begin
;; 		  ;; (print "LOGIN_OK")
;; 		  (if do-exit (exit 0))
;; 		  #t)
;; 		(begin
;; 		  ;; (print "LOGIN_FAILED")
;; 		  (if do-exit (exit 1))
;; 		  #f)))))))
;; 
;; ;; run ping in separate process, safest way in some cases
;; ;;
;; (define (server:ping-server ifaceport)
;;   (with-input-from-pipe 
;;    (conc (common:get-megatest-exe) " -ping " ifaceport)
;;    (lambda ()
;;      (let loop ((inl (read-line))
;; 		(res "NOREPLY"))
;;        (if (eof-object? inl)
;; 	   (case (string->symbol res)
;; 	     ((NOREPLY)  #f)
;; 	     ((LOGIN_OK) #t)
;; 	     (else       #f))
;; 	   (loop (read-line) inl))))))
;; 
;; ;; NOT USED (well, ok, reference in rpc-transport but otherwise not used).
;; ;;
;; (define (server:login toppath)
;;   (lambda (toppath)
;;     (set! *db-last-access* (current-seconds)) ;; might not be needed.
;;     (if (equal? *toppath* toppath)
;; 	#t
;; 	#f)))
;; 
;; ;; timeout is hms string: 1h 5m 3s, default is 1 minute
;; ;;
;; (define (server:expiration-timeout)
;;   (let ((tmo (configf:lookup *configdat* "server" "timeout")))
;;     (if (and (string? tmo)
;; 	     (common:hms-string->seconds tmo)) ;; BUG: hms-string->seconds is broken, if given "10" returns 0. Also, it doesn't belong in this logic unless the string->number is changed below
;;         (* 3600 (string->number tmo))
;; 	60)))
;; 
;; (define (server:get-best-guess-address hostname)
;;   (let ((res #f))
;;     (for-each 
;;      (lambda (adr)
;;        (if (not (eq? (u8vector-ref adr 0) 127))
;; 	   (set! res adr)))
;;      ;; NOTE: This can fail when there is no mention of the host in /etc/hosts. FIXME
;;      (vector->list (hostinfo-addresses (hostname->hostinfo hostname))))
;;     (string-intersperse 
;;      (map number->string
;; 	  (u8vector->list
;; 	   (if res res (hostname->ip hostname)))) ".")))

;; moving this here as it needs access to db and cannot be in common.
;;
(define (server:writable-watchdog dbstruct)
  (thread-sleep! 0.05) ;; delay for startup
  (let ((legacy-sync  (common:run-sync?))
        (sync-stale-seconds (configf:lookup-number *configdat* "server" "sync-stale-seconds" default: 300))
	(debug-mode   (debug:debug-mode 1))
	(last-time    (current-seconds))
	(no-sync-db   (db:open-no-sync-db))
        (sync-duration 0) ;; run time of the sync in milliseconds
        ;;(this-wd-num  (begin (mutex-lock! *wdnum*mutex) (let ((x *wdnum*)) (set! *wdnum* (add1 *wdnum*)) (mutex-unlock! *wdnum*mutex) x)))
;; 
;; ;; moving this here as it needs access to db and cannot be in common.
;; ;;
;; (define (server:writable-watchdog dbstruct)
;;   (thread-sleep! 0.05) ;; delay for startup
;;   (let ((legacy-sync  (common:run-sync?))
;;         (sync-stale-seconds (configf:lookup-number *configdat* "server" "sync-stale-seconds" default: 300))
;; 	(debug-mode   (debug:debug-mode 1))
;; 	(last-time    (current-seconds))
;; 	(no-sync-db   (db:open-no-sync-db))
;;         (sync-duration 0) ;; run time of the sync in milliseconds
;;         ;;(this-wd-num  (begin (mutex-lock! *wdnum*mutex) (let ((x *wdnum*)) (set! *wdnum* (add1 *wdnum*)) (mutex-unlock! *wdnum*mutex) x)))
        )
    (set! *no-sync-db* no-sync-db) ;; make the no sync db available to api calls
    (debug:print-info 2 *default-log-port* "Periodic sync thread started.")
    (debug:print-info 3 *default-log-port* "watchdog starting. legacy-sync is " legacy-sync" pid="(current-process-id)  );;  " this-wd-num="this-wd-num)
    (if (and legacy-sync (not *time-to-exit*))
	(let* (;;(dbstruct (db:setup))
	       (mtdb       (dbr:dbstruct-mtdb dbstruct))
	       (mtpath     (db:dbdat-get-path mtdb))
	       (tmp-area   (common:get-db-tmp-area))
	       (start-file (conc tmp-area "/.start-sync"))
	       (end-file   (conc tmp-area "/.end-sync")))
	  (debug:print-info 0 *default-log-port* "Server running, periodic sync started.")
	  (let loop ()
	    ;; sync for filesystem local db writes
	    ;;
	    (mutex-lock! *db-multi-sync-mutex*)
	    (let* ((need-sync        (>= *db-last-access* *db-last-sync*)) ;; no sync since last write
		   (sync-in-progress *db-sync-in-progress*)
                   (min-intersync-delay (configf:lookup-number *configdat* "server" "minimum-intersync-delay" default: 5))
		   (should-sync      (and (not *time-to-exit*)
                                          (> (- (current-seconds) *db-last-sync*) min-intersync-delay))) ;; sync every five seconds minimum, deprecated logic, can probably be removed
		   (start-time       (current-seconds))
                   (cpu-load-adj     (alist-ref 'adj-proc-load (common:get-normalized-cpu-load #f)))
		   (mt-mod-time      (file-modification-time mtpath))
		   (last-sync-start  (if (common:file-exists? start-file)
					 (file-modification-time start-file)
					 0))
		   (last-sync-end    (if (common:file-exists? end-file)
					 (file-modification-time end-file)
					 10))
                   (sync-period      (+ 3 (* cpu-load-adj 30))) ;; as adjusted load increases increase the sync period
		   (recently-synced  (and (< (- start-time mt-mod-time) sync-period) ;; not useful if sync didn't modify megatest.db!
					  (< mt-mod-time last-sync-start)))
		   (sync-done        (<= last-sync-start last-sync-end))
                   (sync-stale       (> start-time (+ last-sync-start sync-stale-seconds)))
		   (will-sync        (and (not *time-to-exit*)       ;; do not start a sync if we are in the process of exiting
                                          (or need-sync should-sync)
					  (or sync-done sync-stale)
					  (not sync-in-progress)
					  (not recently-synced))))
              (debug:print-info 13 *default-log-port* "WD writable-watchdog top of loop.  need-sync="need-sync" sync-in-progress=" sync-in-progress
				" should-sync="should-sync" start-time="start-time" mt-mod-time="mt-mod-time" recently-synced="recently-synced" will-sync="will-sync
				" sync-done=" sync-done " sync-period=" sync-period)
              (if (and (> sync-period 5)
                       (common:low-noise-print 30 "sync-period"))
                  (debug:print-info 0 *default-log-port* "Increased sync period due to long sync times, sync took: " sync-period " seconds."))
	      ;; (if recently-synced (debug:print-info 0 *default-log-port* "Skipping sync due to recently-synced flag=" recently-synced))
	      ;; (debug:print-info 0 *default-log-port* "need-sync: " need-sync " sync-in-progress: " sync-in-progress " should-sync: " should-sync " will-sync: " will-sync)
	      (if will-sync (set! *db-sync-in-progress* #t))
	      (mutex-unlock! *db-multi-sync-mutex*)
	      (if will-sync
                  (let (;; (max-sync-duration  (configf:lookup-number *configdat* "server" "max-sync-duration")) ;; KEEPING THIS AVAILABLE BUT SHOULD NOT USE, I'M PRETTY SURE IT DOES NOT WORK!
                        (sync-start         (current-milliseconds)))
		    (with-output-to-file start-file (lambda ()(print (current-process-id))))
		    
		    ;; put lock here
		    
                    ;; (if (or (not max-sync-duration)
                    ;;        (< sync-duration max-sync-duration)) ;; NOTE: db:sync-to-megatest.db keeps track of time of last sync and syncs incrementally
                        (let ((res        (db:sync-to-megatest.db dbstruct no-sync-db: no-sync-db))) ;; did we sync any data? If so need to set the db touched flag to keep the server alive
                          (set! sync-duration (- (current-milliseconds) sync-start))
                          (if (> res 0) ;; some records were transferred, keep the db alive
;;         )
;;     (set! *no-sync-db* no-sync-db) ;; make the no sync db available to api calls
;;     (debug:print-info 2 *default-log-port* "Periodic sync thread started.")
;;     (debug:print-info 3 *default-log-port* "watchdog starting. legacy-sync is " legacy-sync" pid="(current-process-id)  );;  " this-wd-num="this-wd-num)
;;     (if (and legacy-sync (not *time-to-exit*))
;; 	(let* (;;(dbstruct (db:setup))
;; 	       (mtdb       (dbr:dbstruct-mtdb dbstruct))
;; 	       (mtpath     (db:dbdat-get-path mtdb))
;; 	       (tmp-area   (common:get-db-tmp-area))
;; 	       (start-file (conc tmp-area "/.start-sync"))
;; 	       (end-file   (conc tmp-area "/.end-sync")))
;; 	  (debug:print-info 0 *default-log-port* "Server running, periodic sync started.")
;; 	  (let loop ()
;; 	    ;; sync for filesystem local db writes
;; 	    ;;
;; 	    (mutex-lock! *db-multi-sync-mutex*)
;; 	    (let* ((need-sync        (>= *db-last-access* *db-last-sync*)) ;; no sync since last write
;; 		   (sync-in-progress *db-sync-in-progress*)
;;                    (min-intersync-delay (configf:lookup-number *configdat* "server" "minimum-intersync-delay" default: 5))
;; 		   (should-sync      (and (not *time-to-exit*)
;;                                           (> (- (current-seconds) *db-last-sync*) min-intersync-delay))) ;; sync every five seconds minimum, deprecated logic, can probably be removed
;; 		   (start-time       (current-seconds))
;;                    (cpu-load-adj     (alist-ref 'adj-proc-load (common:get-normalized-cpu-load #f)))
;; 		   (mt-mod-time      (file-modification-time mtpath))
;; 		   (last-sync-start  (if (common:file-exists? start-file)
;; 					 (file-modification-time start-file)
;; 					 0))
;; 		   (last-sync-end    (if (common:file-exists? end-file)
;; 					 (file-modification-time end-file)
;; 					 10))
;;                    (sync-period      (+ 3 (* cpu-load-adj 30))) ;; as adjusted load increases increase the sync period
;; 		   (recently-synced  (and (< (- start-time mt-mod-time) sync-period) ;; not useful if sync didn't modify megatest.db!
;; 					  (< mt-mod-time last-sync-start)))
;; 		   (sync-done        (<= last-sync-start last-sync-end))
;;                    (sync-stale       (> start-time (+ last-sync-start sync-stale-seconds)))
;; 		   (will-sync        (and (not *time-to-exit*)       ;; do not start a sync if we are in the process of exiting
;;                                           (or need-sync should-sync)
;; 					  (or sync-done sync-stale)
;; 					  (not sync-in-progress)
;; 					  (not recently-synced))))
;;               (debug:print-info 13 *default-log-port* "WD writable-watchdog top of loop.  need-sync="need-sync" sync-in-progress=" sync-in-progress
;; 				" should-sync="should-sync" start-time="start-time" mt-mod-time="mt-mod-time" recently-synced="recently-synced" will-sync="will-sync
;; 				" sync-done=" sync-done " sync-period=" sync-period)
;;               (if (and (> sync-period 5)
;;                        (common:low-noise-print 30 "sync-period"))
;;                   (debug:print-info 0 *default-log-port* "Increased sync period due to long sync times, sync took: " sync-period " seconds."))
;; 	      ;; (if recently-synced (debug:print-info 0 *default-log-port* "Skipping sync due to recently-synced flag=" recently-synced))
;; 	      ;; (debug:print-info 0 *default-log-port* "need-sync: " need-sync " sync-in-progress: " sync-in-progress " should-sync: " should-sync " will-sync: " will-sync)
;; 	      (if will-sync (set! *db-sync-in-progress* #t))
;; 	      (mutex-unlock! *db-multi-sync-mutex*)
;; 	      (if will-sync
;;                   (let (;; (max-sync-duration  (configf:lookup-number *configdat* "server" "max-sync-duration")) ;; KEEPING THIS AVAILABLE BUT SHOULD NOT USE, I'M PRETTY SURE IT DOES NOT WORK!
;;                         (sync-start         (current-milliseconds)))
;; 		    (with-output-to-file start-file (lambda ()(print (current-process-id))))
;; 		    
;; 		    ;; put lock here
;; 		    
;;                     ;; (if (or (not max-sync-duration)
;;                     ;;        (< sync-duration max-sync-duration)) ;; NOTE: db:sync-to-megatest.db keeps track of time of last sync and syncs incrementally
;;                         (let ((res        (db:sync-to-megatest.db dbstruct no-sync-db: no-sync-db))) ;; did we sync any data? If so need to set the db touched flag to keep the server alive
;;                           (set! sync-duration (- (current-milliseconds) sync-start))
;;                           (if (> res 0) ;; some records were transferred, keep the db alive
                              (begin
                                (mutex-lock! *heartbeat-mutex*)
                                (set! *db-last-access* (current-seconds))
                                (mutex-unlock! *heartbeat-mutex*)
                                (debug:print-info 0 *default-log-port* "sync called, " res " records transferred."))
                              (debug:print-info 2 *default-log-port* "sync called but zero records transferred")))))
;;                         ;; TODO: factor this next routine out into a function
;;                         (with-input-from-pipe ;; this should not block other threads but need to verify this
;;                          (conc "megatest -sync-to-megatest.db -m testsuite:" (common:get-area-name) ":" *toppath*)
;;                          (lambda ()
;;                               (begin
;;                            (let loop ((inl (read-line))
;;                                       (res #f))
;;                              (if (eof-object? inl)
;;                                  (begin
;;                                 (mutex-lock! *heartbeat-mutex*)
;;                                 (set! *db-last-access* (current-seconds))
;;                                 (mutex-unlock! *heartbeat-mutex*)
;;                                    (set! sync-duration (- (current-milliseconds) sync-start))
;;                                    (cond
;;                                     ((not res)
;;                                      (debug:print 0 *default-log-port* "ERROR: sync from /tmp db to megatest.db appears to have failed. Recommended that you stop your runs and run \"megatest -cleanup-db\""))
;;                                     ((> res 0)
;;                                      (mutex-lock! *heartbeat-mutex*)
;;                                      (set! *db-last-access* (current-seconds))
;;                                      (mutex-unlock! *heartbeat-mutex*))))
;;                                  (let ((num-synced (let ((matches (string-match "^Synced (\\d+).*$" inl)))
;;                                                      (if matches
;;                                                          (string->number (cadr matches))
;;                                                          #f))))
;;                                    (loop (read-line)
;;                                          (or num-synced res))))))))))
	      (if will-sync
		  (begin
		    (mutex-lock! *db-multi-sync-mutex*)
		    (set! *db-sync-in-progress* #f)
		    (set! *db-last-sync* start-time)
		    (with-output-to-file end-file (lambda ()(print (current-process-id))))

		    ;; release lock here

		    (mutex-unlock! *db-multi-sync-mutex*)))
	      (if (and debug-mode
		       (> (- start-time last-time) 60))
		  (begin
		    (set! last-time start-time)
		    (debug:print-info 4 *default-log-port* "timestamp -> " (seconds->time-string (current-seconds)) ", time since start -> " (seconds->hr-min-sec (- (current-seconds) *time-zero*))))))
	    
	    ;; keep going unless time to exit
	    ;;
	    (if (not *time-to-exit*)
		(let delay-loop ((count 0))
                  ;;(debug:print-info 13 *default-log-port* "delay-loop top; count="count" pid="(current-process-id)" this-wd-num="this-wd-num" *time-to-exit*="*time-to-exit*)
                                                            
		  (if (and (not *time-to-exit*)
			   (< count 6)) ;; was 11, changing to 4. 
		      (begin
			(thread-sleep! 1)
			(delay-loop (+ count 1))))
		  (if (not *time-to-exit*) (loop))))
	    ;; time to exit, close the no-sync db here
	    (db:no-sync-close-db no-sync-db)
	    (if (common:low-noise-print 30)
		(debug:print-info 0 *default-log-port* "Exiting watchdog timer, *time-to-exit* = " *time-to-exit*" pid="(current-process-id) ))))))) ;;" this-wd-num="this-wd-num)))))))

;;                                 (debug:print-info 0 *default-log-port* "sync called, " res " records transferred."))
;;                               (debug:print-info 2 *default-log-port* "sync called but zero records transferred")))))
;; ;;                         ;; TODO: factor this next routine out into a function
;; ;;                         (with-input-from-pipe ;; this should not block other threads but need to verify this
;; ;;                          (conc "megatest -sync-to-megatest.db -m testsuite:" (common:get-area-name) ":" *toppath*)
;; ;;                          (lambda ()
;; ;;                            (let loop ((inl (read-line))
;; ;;                                       (res #f))
;; ;;                              (if (eof-object? inl)
;; ;;                                  (begin
;; ;;                                    (set! sync-duration (- (current-milliseconds) sync-start))
;; ;;                                    (cond
;; ;;                                     ((not res)
;; ;;                                      (debug:print 0 *default-log-port* "ERROR: sync from /tmp db to megatest.db appears to have failed. Recommended that you stop your runs and run \"megatest -cleanup-db\""))
;; ;;                                     ((> res 0)
;; ;;                                      (mutex-lock! *heartbeat-mutex*)
;; ;;                                      (set! *db-last-access* (current-seconds))
;; ;;                                      (mutex-unlock! *heartbeat-mutex*))))
;; ;;                                  (let ((num-synced (let ((matches (string-match "^Synced (\\d+).*$" inl)))
;; ;;                                                      (if matches
;; ;;                                                          (string->number (cadr matches))
;; ;;                                                          #f))))
;; ;;                                    (loop (read-line)
;; ;;                                          (or num-synced res))))))))))
;; 	      (if will-sync
;; 		  (begin
;; 		    (mutex-lock! *db-multi-sync-mutex*)
;; 		    (set! *db-sync-in-progress* #f)
;; 		    (set! *db-last-sync* start-time)
;; 		    (with-output-to-file end-file (lambda ()(print (current-process-id))))
;; 
;; 		    ;; release lock here
;; 
;; 		    (mutex-unlock! *db-multi-sync-mutex*)))
;; 	      (if (and debug-mode
;; 		       (> (- start-time last-time) 60))
;; 		  (begin
;; 		    (set! last-time start-time)
;; 		    (debug:print-info 4 *default-log-port* "timestamp -> " (seconds->time-string (current-seconds)) ", time since start -> " (seconds->hr-min-sec (- (current-seconds) *time-zero*))))))
;; 	    
;; 	    ;; keep going unless time to exit
;; 	    ;;
;; 	    (if (not *time-to-exit*)
;; 		(let delay-loop ((count 0))
;;                   ;;(debug:print-info 13 *default-log-port* "delay-loop top; count="count" pid="(current-process-id)" this-wd-num="this-wd-num" *time-to-exit*="*time-to-exit*)
;;                                                             
;; 		  (if (and (not *time-to-exit*)
;; 			   (< count 6)) ;; was 11, changing to 4. 
;; 		      (begin
;; 			(thread-sleep! 1)
;; 			(delay-loop (+ count 1))))
;; 		  (if (not *time-to-exit*) (loop))))
;; 	    ;; time to exit, close the no-sync db here
;; 	    (db:no-sync-close-db no-sync-db)
;; 	    (if (common:low-noise-print 30)
;; 		(debug:print-info 0 *default-log-port* "Exiting watchdog timer, *time-to-exit* = " *time-to-exit*" pid="(current-process-id) ))))))) ;;" this-wd-num="this-wd-num)))))))
;; 

Modified subrun.scm from [bb7061fde4] to [ad3bd444b3].

116
117
118
119
120
121
122









123
124
125
126
127
128
129
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138







+
+
+
+
+
+
+
+
+







              (subrun:exec-sub-megatest test-run-dir action-switches-str "remove")))
        (if remove-result
            (begin
              (subrun:set-subrun-removed test-run-dir)
              #t)
            #f))
      #t))

(define (subrun:kill-subrun test-run-dir )
  (if (and (not (subrun:subrun-removed? test-run-dir)) (subrun:subrun-test-initialized? test-run-dir))
      (let* ((action-switches-str
              (conc "-kill-runs" ))
             (kill-result
              (subrun:exec-sub-megatest test-run-dir action-switches-str "kill")))
        kill-result)
      #t))

(define (subrun:launch-cmd test-run-dir)
  (if (subrun:subrun-removed? test-run-dir)
      (subrun:unset-subrun-removed test-run-dir))      

  (let* ((log-prefix "run")
         (switches (subrun:selector+log-switches test-run-dir log-prefix))

Added utils/get-procedures.sh version [aa92973588].






1
2
3
4
5
+
+
+
+
+
#!/bin/bash

fname=$1

grep '(define (' $fname | tr '()' '  '|awk '{print $2}'