Megatest

Changes On Branch f803cb519f1bffc0
Login

Changes In Branch v1.80 Through [f803cb519f] Excluding Merge-Ins

This is equivalent to a diff from abb42df5ef to f803cb519f

2023-03-01
10:22
Added template for dashboard-transport-mode.scm check-in: 50f20490df user: mrwellan tags: v1.80
07:41
ATTACH sync working ok. check-in: f803cb519f user: matt tags: v1.80
2023-02-28
20:27
ATTACH Sync not working. check-in: 692ebd2f40 user: mrwellan tags: v1.80
2022-12-02
11:57
new version branch check-in: 6cb6675102 user: mmgraham tags: v1.80
2022-11-23
20:16
Merged in nohomehost since multi-area dashboard will depend on nohomehost Leaf check-in: aac724292e user: matt tags: v1.70-ndboard
2022-11-22
09:06
Turn the handler for opening server info files back on since those files can disappear without warning. Closed-Leaf check-in: abb42df5ef user: matt tags: v1.70-nohomehost
08:59
Some more tweaks and output reduction. Still get crashes due to db lock but system seems to keep going pretty well. This is with 300 tests running on one machine. check-in: 4dcb84418f user: matt tags: v1.70-nohomehost

Modified Makefile from [2a2c13125c] to [36df27952a].

26
27
28
29
30
31
32
33


34
35
36









37
38

39
40
41
42
43
44
45
26
27
28
29
30
31
32

33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56







-
+
+



+
+
+
+
+
+
+
+
+


+







           process.scm runs.scm tasks.scm tests.scm genexample.scm	\
           http-transport.scm tdb.scm client.scm mt.scm	\
           ezsteps.scm lock-queue.scm rmt.scm api.scm		\
           subrun.scm portlogger.scm archive.scm env.scm		\
           diff-report.scm cgisetup/models/pgdb.scm

# module source files
MSRCFILES = dbfile.scm debugprint.scm mtargs.scm commonmod.scm dbmod.scm
MSRCFILES = dbfile.scm debugprint.scm mtargs.scm commonmod.scm dbmod.scm \
            tcp-transportmod.scm

all : $(PREFIX)/bin/.$(ARCHSTR) mtest dboard mtut tcmt

transport-mode.scm : transport-mode.scm.template
	cp transport-mode.scm.template transport-mode.scm

dashboard-transport-mode.scm : transport-mode.scm.template
	cp transport-mode.scm.template dashboard-transport-mode.scm

megatest.scm : transport-mode.scm
dashboard.scm : dashboard-transport-mode.scm

# dbmod.import.o is just a hack here
mofiles/dbfile.o     : mofiles/debugprint.o mofiles/commonmod.o dbmod.import.o
configf.o : commonmod.import.o
db.o : dbmod.import.o
mofiles/debugprint.o : mofiles/mtargs.o

# ftail.scm rmtmod.scm commonmod.scm removed
# MSRCFILES = ducttape-lib.scm pkts.scm stml2.scm cookie.scm mutils.scm	\
#             mtargs.scm commonmod.scm dbmod.scm adjutant.scm ulex.scm	\
#             rmtmod.scm apimod.scm
94
95
96
97
98
99
100
101
102


103
104
105
106
107
108
109
105
106
107
108
109
110
111


112
113
114
115
116
117
118
119
120







-
-
+
+








showmtesthash:
	@echo $(MTESTHASH)

dboard : $(OFILES) $(GOFILES) dashboard.scm $(MOFILES) $(MOIMPFILES) megatest-version.scm megatest-fossil-hash.scm
	csc $(CSCOPTS) $(OFILES) dashboard.scm $(GOFILES) $(MOFILES) $(MOIMPFILES) -o dboard

mtut: $(OFILES) $(MOFILES) megatest-fossil-hash.scm mtut.scm megatest-version.scm
	csc $(CSCOPTS) $(OFILES) $(MOFILES) mtut.scm -o mtut
mtut: $(OFILES) $(MOFILES) $(MOIMPFILES) megatest-fossil-hash.scm mtut.scm megatest-version.scm 
	csc $(CSCOPTS) $(OFILES) $(MOFILES)  $(MOIMPFILES) mtut.scm -o mtut

# include makefile.inc

TCMTOBJS = \
	api.o \
	archive.o \
	cgisetup/models/pgdb.o \
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373

374
375

376
377
378
379
380
381
382
368
369
370
371
372
373
374

375
376
377
378
379
380
381
382
383
384
385

386
387
388
389
390
391
392
393







-









+

-
+







	if [[ $(ARCHSTR) == 12.5 ]]; then \
	mkdir -p $(PREFIX)/bin/.$(ARCHSTR)/lib; \
	$(INSTALL) lib/libxcb-xlib.so.0 $(PREFIX)/bin/.$(ARCHSTR)/lib/libxcb-xlib.so.0; \
        fi

install : $(PREFIX)/bin/.$(ARCHSTR) $(PREFIX)/bin/.$(ARCHSTR)/mtest $(PREFIX)/bin/megatest \
          $(PREFIX)/bin/.$(ARCHSTR)/dboard $(PREFIX)/bin/dashboard $(HELPERS) $(PREFIX)/bin/nbfake \
          $(PREFIX)/bin/.$(ARCHSTR)/mtexec $(PREFIX)/bin/mtexec $(PREFIX)/bin/serialize-env \
	  $(PREFIX)/bin/nbfind $(PREFIX)/bin/mtrunner $(PREFIX)/bin/viewscreen $(PREFIX)/bin/mt_xterm \
          $(PREFIX)/bin/mt-old-to-new.sh $(PREFIX)/bin/mt-new-to-old.sh \
	  $(PREFIX)/share/docs/megatest_manual.html $(PREFIX)/bin/remrun \
	  $(PREFIX)/share/docs/megatest_manual.html $(PREFIX)/bin/remrun $(PREFIX)/bin/mtutil \
	  $(PREFIX)/bin/tcmt $(PREFIX)/share/db/mt-pg.sql \
          $(PREFIX)/share/js/jquery-3.1.0.slim.min.js \
          $(PREFIX)/bin/.$(ARCHSTR)/lib/libpangox-1.0.so \
          $(PREFIX)/bin/.$(ARCHSTR)/lib/libpangox-1.0.so.0 \
          $(PREFIX)/bin/.$(ARCHSTR)/lib/libxcb-xlib.so.0

#         $(PREFIX)/bin/.$(ARCHSTR)/ndboard

#         $(PREFIX)/bin/.$(ARCHSTR)/mtexec $(PREFIX)/bin/mtexec $(PREFIX)/bin/serialize-env \
# $(PREFIX)/bin/newdashboard

$(PREFIX)/bin/.$(ARCHSTR) : 
	mkdir -p $(PREFIX)/bin/.$(ARCHSTR)
	mkdir -p $(PREFIX)/bin/.$(ARCHSTR)/lib

test: tests/tests.scm

Modified TODO from [da5eae4898] to [fa3d981ca6].

14
15
16
17
18
19
20



21
22
23
24
25
26
27
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30







+
+
+







# 
#     You should have received a copy of the GNU General Public License
#     along with Megatest.  If not, see <http://www.gnu.org/licenses/>.

TODO
====

23WW07
. Remove use of *dbstruct-dbs*

WW15
. fill newview matrix with data, filter pipeline gui elements
. improve [script], especially indent handling

WW16
. split db into megatest.db (runs etc.) db/<something>.db
. release basic newview implementation

Modified api.scm from [e629c948c8] to [edf4eba965].

16
17
18
19
20
21
22
23
24
25
26
27
28
29
30

31
32
33






34
35
36
37
38
39
40
41
42
43

44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65


66
67
68
69
70
71
72
16
17
18
19
20
21
22


23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69


70
71
72
73
74
75
76
77
78







-
-






+



+
+
+
+
+
+










+




















-
-
+
+







;;     GNU General Public License for more details.
;; 
;;     You should have received a copy of the GNU General Public License
;;     along with Megatest.  If not, see <http://www.gnu.org/licenses/>.
;;
;;======================================================================

(use srfi-69 posix)

(declare (unit api))
(declare (uses rmt))
(declare (uses db))
(declare (uses dbmod))
(declare (uses dbfile))
(declare (uses tasks))
(declare (uses tcp-transportmod))

(import dbmod)
(import dbfile)
(import tcp-transportmod)

(use srfi-69
     posix
     matchable
     s11n)

;; allow these queries through without starting a server
;;
(define api:read-only-queries
  '(get-key-val-pairs
    get-var
    get-keys
    get-key-vals
    test-toplevel-num-items
    get-test-info-by-id
    get-test-state-status-by-id
    get-steps-info-by-id
    get-data-info-by-id
    test-get-rundir-from-test-id
    get-count-tests-running-for-testname
    get-count-tests-running
    get-count-tests-running-in-jobgroup
    get-previous-test-run-record
    get-matching-previous-test-run-records
    test-get-logfile-info
    test-get-records-for-index-file
    get-testinfo-state-status
    test-get-top-process-pid
    test-get-paths-matching-keynames-target-new
    get-prereqs-not-met
    get-count-tests-running-for-run-id
    get-run-info
    get-run-status
    get-run-state
    get-run-stats
    get-run-times
    get-targets
    get-target
    get-target
    get-targets
    ;; register-run
    get-tests-tags
    get-test-times
    get-tests-for-run
    get-tests-for-run-state-status
    get-test-id
    get-tests-for-runs-mindata
139
140
141
142
143
144
145
146

147
148
149
150
151
152
153
154
155
156
157
158
159






160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200














































































































201
202
203
204
205
206





207
208

209
210
211
212



213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232



















233
234
235
236
237
238
239
240
241
242
243
244
245












246
247
248
249
250
251
252
253







254
255
256
257
258
259
260






261
262
263
264
265




266
267
268
269
270




271
272
273
274
275
276
277
278
279
280
281
282











283
284
285
286



287
288
289
290
291
292
293






294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318

























319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344


























345
346
347
348
349




350
351
352
353
354




355
356
357
358
359
360
361
362
363
364
365
366
367
368
369














370
371
372
373
374
375





376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
145
146
147
148
149
150
151

152
153
154
155
156
157
158
159






160
161
162
163
164
165
166
167
168
169
170
171
172


































173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283





284
285
286
287
288
289

290
291



292
293
294
295



















296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315












316
317
318
319
320
321
322
323
324
325
326
327
328







329
330
331
332
333
334
335
336






337
338
339
340
341
342
343




344
345
346
347
348




349
350
351
352
353











354
355
356
357
358
359
360
361
362
363
364
365



366
367
368
369






370
371
372
373
374
375
376
























377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402

























403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429




430
431
432
433
434




435
436
437
438
439














440
441
442
443
444
445
446
447
448
449
450
451
452
453
454





455
456
457
458
459






















460
461
462
463
464
465
466







-
+







-
-
-
-
-
-
+
+
+
+
+
+







-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+

-
-
-
-
-
+
+
+
+
+

-
+

-
-
-
+
+
+

-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+

-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+

-
-
-
-
-
-
-
+
+
+
+
+
+
+

-
-
-
-
-
-
+
+
+
+
+
+

-
-
-
-
+
+
+
+

-
-
-
-
+
+
+
+

-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+

-
-
-
+
+
+

-
-
-
-
-
-
+
+
+
+
+
+

-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+

-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+

-
-
-
-
+
+
+
+

-
-
-
-
+
+
+
+

-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+

-
-
-
-
-
+
+
+
+
+
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-








    ;; TASKS
    tasks-add
    tasks-set-state-given-param-key
    ))

(define *db-write-mutexes* (make-hash-table))

(define *server-signature* #f)
;; These are called by the server on recipt of /api calls
;;    - keep it simple, only return the actual result of the call, i.e. no meta info here
;;
;;    - returns #( flag result )
;;
(define (api:execute-requests dbstruct dat)
  (db:open-no-sync-db) ;; sets *no-sync-db*
;;   (handle-exceptions
;;    exn
;;    (let ((call-chain (get-call-chain)))
;;      (debug:print 0 *default-log-port* "WARNING: api:execute-requests received an exception from peer, dat=" dat ", exn=" exn)
;;      (print-call-chain (current-error-port))
;;      (debug:print 0 *default-log-port* " message: "  ((condition-property-accessor 'exn 'message) exn))       
  ;;   (handle-exceptions
  ;;    exn
  ;;    (let ((call-chain (get-call-chain)))
  ;;      (debug:print 0 *default-log-port* "WARNING: api:execute-requests received an exception from peer, dat=" dat ", exn=" exn)
  ;;      (print-call-chain (current-error-port))
  ;;      (debug:print 0 *default-log-port* " message: "  ((condition-property-accessor 'exn 'message) exn))       
  ;;      (vector #f (vector exn call-chain dat))) ;; return some stuff for debug if an exception happens
  (if (> *api-process-request-count* 200)
      (begin
	(if (common:low-noise-print 30 "too many threads")
	    (debug:print 0 *default-log-port* "WARNING: "*api-process-request-count*" threads, potential overload, adding 0.5 sec delay."))
	(thread-sleep! 0.5) ;; take a nap
	))
   (cond
    ((not (vector? dat))                    ;; it is an error to not receive a vector
     (vector #f (vector #f "remote must be called with a vector")))
    #;((> *api-process-request-count* 200) ;; 20)
     (debug:print 0 *default-log-port* "WARNING: api:execute-requests received an overloaded message.")
     (set! *server-overloaded* #t)
     (vector #f (vector #f 'overloaded))) ;; the inner vector is what gets returned. nope, don't know why. please refactor!
    (else  
     (let* ((cmd-in            (vector-ref dat 0))
            (cmd               (if (symbol? cmd-in)
				   cmd-in
				   (string->symbol cmd-in)))
            (params            (vector-ref dat 1))
	    (run-id            (if (null? params)
				   0
				   (car params)))
	    (write-mutex       (if (hash-table-exists? *db-write-mutexes* run-id)
				   (hash-table-ref *db-write-mutexes* run-id)
				   (let* ((newmutex (make-mutex)))
				     (hash-table-set! *db-write-mutexes* run-id newmutex)
				     newmutex)))
            (start-t           (current-milliseconds))
            (readonly-mode     (dbr:dbstruct-read-only dbstruct))
            (readonly-command  (member cmd api:read-only-queries))
            (writecmd-in-readonly-mode (and readonly-mode (not readonly-command))))
       (if (not readonly-command)
	   (mutex-lock! write-mutex))
       (let* ((res    
               (if writecmd-in-readonly-mode
                   (conc "attempt to run write command "cmd" on a read-only database")
                   (case cmd
                     ;;===============================================
                     ;; READ/WRITE QUERIES
                     ;;===============================================
  (cond
   ((not (vector? dat))                    ;; it is an error to not receive a vector
    (vector #f (vector #f "remote must be called with a vector")))
   #;((> *api-process-request-count* 200) ;; 20)
   (debug:print 0 *default-log-port* "WARNING: api:execute-requests received an overloaded message.")
   (set! *server-overloaded* #t)
   (vector #f (vector #f 'overloaded))) ;; the inner vector is what gets returned. nope, don't know why. please refactor!
   (else  
    (let* ((cmd-in            (vector-ref dat 0))
           (cmd               (if (symbol? cmd-in)
				  cmd-in
				  (string->symbol cmd-in)))
           (params            (vector-ref dat 1))
	   (run-id            (if (null? params)
				  0
				  (car params)))
	   (write-mutex       (if (hash-table-exists? *db-write-mutexes* run-id)
				  (hash-table-ref *db-write-mutexes* run-id)
				  (let* ((newmutex (make-mutex)))
				    (hash-table-set! *db-write-mutexes* run-id newmutex)
				    newmutex)))
           (start-t           (current-milliseconds))
           (readonly-mode     (dbr:dbstruct-read-only dbstruct))
           (readonly-command  (member cmd api:read-only-queries))
           (writecmd-in-readonly-mode (and readonly-mode (not readonly-command))))
      (if (not readonly-command)
	  (mutex-lock! write-mutex))
      (let* ((tmppath    (dbr:dbstruct-tmppath  dbstruct))
	     (clean-run-id (cond
			    ((number? run-id)   run-id)
			    ((equal? run-id #f) "main")
			    (else               "other")))
	     (crumbfile  (dbfile:wait-for-qif tmppath clean-run-id (cons cmd params)))
	     (res    
              (if writecmd-in-readonly-mode
                  (conc "attempt to run write command "cmd" on a read-only database")
		  (api:dispatch-request dbstruct cmd run-id params))))
	(delete-file* crumbfile)
	(if (not readonly-command)
	    (mutex-unlock! write-mutex))
	
	;; save all stats
	(let ((delta-t (- (current-milliseconds)
			  start-t))
	      (modified-cmd (if (eq? cmd 'general-call)
				(string->symbol (conc "general-call-" (car params)))
				cmd)))
	  (hash-table-set! *db-api-call-time* modified-cmd
			   (cons delta-t (hash-table-ref/default *db-api-call-time* modified-cmd '()))))
	(if writecmd-in-readonly-mode
            (begin
              #;(common:telemetry-log (conc "api-out:"(->string cmd))
              payload: `((params . ,params)
              (ok-res . #t)))
	      (vector #f res))
            (begin
              #;(common:telemetry-log (conc "api-out:"(->string cmd))
              payload: `((params . ,params)
              (ok-res . #f)))
              (vector #t res))))))))

;; indat is (cmd run-id params meta)
;;
;; WARNING: Do not print anything in the lambda of this function as it
;;          reads/writes to current in/out port
;;
(define (api:tcp-dispatch-request-make-handler dbstruct) ;; cmd run-id params)
  (assert *toppath* "FATAL: api:tcp-dispatch-request-make-handler called but *toppath* not set.")
  (if (not *server-signature*)
      (set! *server-signature* (tt:mk-signature *toppath*)))
  (lambda ()
    (let* ((indat      (deserialize))
	   (newcount   (+ *api-process-request-count* 1))
	   (delay-wait (if (> newcount 10)
			   (- newcount 10)
			   0)))
      (set! *api-process-request-count* newcount)
      (set! *db-last-access* (current-seconds))
      (match indat
	((cmd run-id params meta)
	 (let* ((status  (cond
			  ((> newcount 30) 'busy)
			  ((> newcount 15) 'loaded)
			  (else 'ok)))
		(errmsg  (case status
			   ((busy)   (conc "Server overloaded, "newcount" threads in flight"))
			   ((loaded) (conc "Server loaded, "newcount" threads in flight"))
			   (else     #f)))
		(result  (case status
			   ((busy)  (- newcount 29))
			   ((loaded) #f)
			   (else
			    (case cmd
			      ((ping) *server-signature*)
			      (else
			       (api:dispatch-request dbstruct cmd run-id params))))))
		(meta   `((wait . ,delay-wait)))
		(payload (list status errmsg result meta)))
	   (set! *api-process-request-count* (- *api-process-request-count* 1))
	   (serialize payload)))
	(else
	 (assert #f "FATAL: failed to deserialize indat "indat))))))
       

(define (api:dispatch-request dbstruct cmd run-id params)
  (db:open-no-sync-db)
  (case cmd
    ;;===============================================
    ;; READ/WRITE QUERIES
    ;;===============================================

                     ((get-keys-write)                        (db:get-keys dbstruct)) ;; force a dummy "write" query to force server; for debug in -repl
                     
                     ;; SERVERS
                     ((start-server)                    (apply server:kind-run params))
                     ((kill-server)                     (set! *server-run* #f))
    ((get-keys-write)                        (db:get-keys dbstruct)) ;; force a dummy "write" query to force server; for debug in -repl
    
    ;; SERVERS
    ((start-server)                    (apply server:kind-run params))
    ((kill-server)                     (set! *server-run* #f))

                     ;; TESTS
    ;; TESTS

                     ;;((test-set-state-status-by-id)     (apply mt:test-set-state-status-by-id dbstruct params))
                     ;;BB - commented out above because it was calling below, eventually, incorrectly (dbstruct passed to mt:test-set-state-status-by-id, which previosly did more, but now only passes thru to db:set-state-status-and-roll-up-items.
                     ((test-set-state-status-by-id)
    ;;((test-set-state-status-by-id)     (apply mt:test-set-state-status-by-id dbstruct params))
    ;;BB - commented out above because it was calling below, eventually, incorrectly (dbstruct passed to mt:test-set-state-status-by-id, which previosly did more, but now only passes thru to db:set-state-status-and-roll-up-items.
    ((test-set-state-status-by-id)

                      ;; (define (db:set-state-status-and-roll-up-items dbstruct run-id test-name item-path state status comment)
                      (db:set-state-status-and-roll-up-items
                       dbstruct
                       (list-ref params 0) ; run-id
                       (list-ref params 1) ; test-name
                       #f                  ; item-path
                       (list-ref params 2) ; state
                       (list-ref params 3) ; status
                       (list-ref params 4) ; comment
                       ))
                     
                     ((delete-test-records)             (apply db:delete-test-records dbstruct params))
                     ((delete-old-deleted-test-records) (apply db:delete-old-deleted-test-records dbstruct params))
                     ((test-set-state-status)           (apply db:test-set-state-status dbstruct params))
                     ((test-set-top-process-pid)        (apply db:test-set-top-process-pid dbstruct params))
                     ((set-state-status-and-roll-up-items) (apply db:set-state-status-and-roll-up-items dbstruct params))
                     ((set-state-status-and-roll-up-run) (apply db:set-state-status-and-roll-up-run dbstruct params)) 
                     ((top-test-set-per-pf-counts)      (apply db:top-test-set-per-pf-counts dbstruct params))
                     ((test-set-archive-block-id)       (apply db:test-set-archive-block-id dbstruct params))
     ;; (define (db:set-state-status-and-roll-up-items dbstruct run-id test-name item-path state status comment)
     (db:set-state-status-and-roll-up-items
      dbstruct
      (list-ref params 0) ; run-id
      (list-ref params 1) ; test-name
      #f                  ; item-path
      (list-ref params 2) ; state
      (list-ref params 3) ; status
      (list-ref params 4) ; comment
      ))
    
    ((delete-test-records)             (apply db:delete-test-records dbstruct params))
    ((delete-old-deleted-test-records) (apply db:delete-old-deleted-test-records dbstruct params))
    ((test-set-state-status)           (apply db:test-set-state-status dbstruct params))
    ((test-set-top-process-pid)        (apply db:test-set-top-process-pid dbstruct params))
    ((set-state-status-and-roll-up-items) (apply db:set-state-status-and-roll-up-items dbstruct params))
    ((set-state-status-and-roll-up-run) (apply db:set-state-status-and-roll-up-run dbstruct params)) 
    ((top-test-set-per-pf-counts)      (apply db:top-test-set-per-pf-counts dbstruct params))
    ((test-set-archive-block-id)       (apply db:test-set-archive-block-id dbstruct params))

                     ;; RUNS
                     ((register-run)                 (apply db:register-run dbstruct params))
                     ((set-tests-state-status)       (apply db:set-tests-state-status dbstruct params))
                     ((delete-run)                   (apply db:delete-run dbstruct params))
                     ((lock/unlock-run)              (apply db:lock/unlock-run dbstruct params))
                     ((update-run-event_time)        (apply db:update-run-event_time dbstruct params))
                     ((update-run-stats)             (apply db:update-run-stats dbstruct params))
                     ((set-var)                      (apply db:set-var dbstruct params))
                     ((inc-var)                      (apply db:inc-var dbstruct params))
		     ((dec-var)                      (apply db:dec-var dbstruct params))
                     ((del-var)                      (apply db:del-var dbstruct params))
		     ((add-var)                      (apply db:add-var dbstruct params))
    ;; RUNS
    ((register-run)                 (apply db:register-run dbstruct params))
    ((set-tests-state-status)       (apply db:set-tests-state-status dbstruct params))
    ((delete-run)                   (apply db:delete-run dbstruct params))
    ((lock/unlock-run)              (apply db:lock/unlock-run dbstruct params))
    ((update-run-event_time)        (apply db:update-run-event_time dbstruct params))
    ((update-run-stats)             (apply db:update-run-stats dbstruct params))
    ((set-var)                      (apply db:set-var dbstruct params))
    ((inc-var)                      (apply db:inc-var dbstruct params))
    ((dec-var)                      (apply db:dec-var dbstruct params))
    ((del-var)                      (apply db:del-var dbstruct params))
    ((add-var)                      (apply db:add-var dbstruct params))

                     ;; STEPS
                     ((teststep-set-status!)         (apply db:teststep-set-status! dbstruct params))
                     ((delete-steps-for-test!)       (apply db:delete-steps-for-test! dbstruct params))
                     
                     ;; TEST DATA
                     ((test-data-rollup)             (apply db:test-data-rollup dbstruct params))
                     ((csv->test-data)               (apply db:csv->test-data dbstruct params))
    ;; STEPS
    ((teststep-set-status!)         (apply db:teststep-set-status! dbstruct params))
    ((delete-steps-for-test!)       (apply db:delete-steps-for-test! dbstruct params))
    
    ;; TEST DATA
    ((test-data-rollup)             (apply db:test-data-rollup dbstruct params))
    ((csv->test-data)               (apply db:csv->test-data dbstruct params))

                     ;; MISC
                     ((sync-inmem->db)               (let ((run-id (car params)))
                                                       (db:sync-touched dbstruct run-id db:initialize-main-db force-sync: #t)))
                     ((mark-incomplete)              (apply db:find-and-mark-incomplete dbstruct params))
                     ((create-all-triggers)          (db:create-all-triggers dbstruct))
                     ((drop-all-triggers)            (db:drop-all-triggers dbstruct)) 
    ;; MISC
    ((sync-inmem->db)               (let ((run-id (car params)))
                                      (db:sync-touched dbstruct run-id db:initialize-main-db force-sync: #t)))
    ((mark-incomplete)              (apply db:find-and-mark-incomplete dbstruct params))
    ((create-all-triggers)          (db:create-all-triggers dbstruct))
    ((drop-all-triggers)            (db:drop-all-triggers dbstruct)) 

                     ;; TESTMETA
                     ((testmeta-add-record)       (apply db:testmeta-add-record dbstruct params))
                     ((testmeta-update-field)     (apply db:testmeta-update-field dbstruct params))
                     ((get-tests-tags)            (db:get-tests-tags dbstruct))
    ;; TESTMETA
    ((testmeta-add-record)       (apply db:testmeta-add-record dbstruct params))
    ((testmeta-update-field)     (apply db:testmeta-update-field dbstruct params))
    ((get-tests-tags)            (db:get-tests-tags dbstruct))

                     ;; TASKS
                     ((tasks-add)                 (apply tasks:add dbstruct params))   
                     ((tasks-set-state-given-param-key) (apply tasks:set-state-given-param-key dbstruct params))
                     ((tasks-get-last)            (apply tasks:get-last dbstruct params))
    ;; TASKS
    ((tasks-add)                 (apply tasks:add dbstruct params))   
    ((tasks-set-state-given-param-key) (apply tasks:set-state-given-param-key dbstruct params))
    ((tasks-get-last)            (apply tasks:get-last dbstruct params))

		     ;; NO SYNC DB
		     ((no-sync-set)               (apply db:no-sync-set         *no-sync-db* params))
		     ((no-sync-get/default)       (apply db:no-sync-get/default *no-sync-db* params))
		     ((no-sync-del!)              (apply db:no-sync-del!        *no-sync-db* params))
		     ((no-sync-get-lock)          (apply db:no-sync-get-lock    *no-sync-db* params))
		     
                     ;; ARCHIVES
                     ;; ((archive-get-allocations)   
                     ((archive-register-disk)     (apply db:archive-register-disk dbstruct params))
                     ((archive-register-block-name)(apply db:archive-register-block-name dbstruct params))
                     ;; ((archive-allocate-testsuite/area-to-block)(apply db:archive-allocate-testsuite/area-to-block dbstruct block-id testsuite-name areakey))
    ;; NO SYNC DB
    ((no-sync-set)               (apply db:no-sync-set         *no-sync-db* params))
    ((no-sync-get/default)       (apply db:no-sync-get/default *no-sync-db* params))
    ((no-sync-del!)              (apply db:no-sync-del!        *no-sync-db* params))
    ((no-sync-get-lock)          (apply db:no-sync-get-lock    *no-sync-db* params))
    
    ;; ARCHIVES
    ;; ((archive-get-allocations)   
    ((archive-register-disk)     (apply db:archive-register-disk dbstruct params))
    ((archive-register-block-name)(apply db:archive-register-block-name dbstruct params))
    ;; ((archive-allocate-testsuite/area-to-block)(apply db:archive-allocate-testsuite/area-to-block dbstruct block-id testsuite-name areakey))

                     ;;======================================================================
                     ;; READ ONLY QUERIES
                     ;;======================================================================
    ;;======================================================================
    ;; READ ONLY QUERIES
    ;;======================================================================

                     ;; KEYS
                     ((get-key-val-pairs)               (apply db:get-key-val-pairs dbstruct params))
                     ((get-keys)                        (db:get-keys dbstruct))
                     ((get-key-vals)                    (apply db:get-key-vals dbstruct params))
                     ((get-target)                      (apply db:get-target dbstruct params))
                     ((get-targets)                     (db:get-targets dbstruct))
    ;; KEYS
    ((get-key-val-pairs)               (apply db:get-key-val-pairs dbstruct params))
    ((get-keys)                        (db:get-keys dbstruct))
    ((get-key-vals)                    (apply db:get-key-vals dbstruct params))
    ((get-target)                      (apply db:get-target dbstruct params))
    ((get-targets)                     (db:get-targets dbstruct))

                     ;; ARCHIVES
                     ((test-get-archive-block-info)     (apply db:test-get-archive-block-info dbstruct params))
                     
                     ;; TESTS
                     ((test-toplevel-num-items)         (apply db:test-toplevel-num-items dbstruct params))
                     ((get-test-info-by-id)	       (apply db:get-test-info-by-id dbstruct params))
                     ((test-get-rundir-from-test-id)    (apply db:test-get-rundir-from-test-id dbstruct params))
                     ((get-count-tests-running-for-testname) (apply db:get-count-tests-running-for-testname dbstruct params))
                     ((get-count-tests-running)         (apply db:get-count-tests-running dbstruct params))
                     ((get-count-tests-running-in-jobgroup) (apply db:get-count-tests-running-in-jobgroup dbstruct params))
                     ;; ((delete-test-step-records)        (apply db:delete-test-step-records dbstruct params))
                     ;; ((get-previous-test-run-record)    (apply db:get-previous-test-run-record dbstruct params))
                     ((get-matching-previous-test-run-records)(apply db:get-matching-previous-test-run-records dbstruct params))
                     ((test-get-logfile-info)           (apply db:test-get-logfile-info dbstruct params))
                     ((test-get-records-for-index-file)  (apply db:test-get-records-for-index-file dbstruct params))
                     ((get-testinfo-state-status)       (apply db:get-testinfo-state-status dbstruct params))
                     ((test-get-top-process-pid)        (apply db:test-get-top-process-pid dbstruct params))
                     ((test-get-paths-matching-keynames-target-new) (apply db:test-get-paths-matching-keynames-target-new dbstruct params))
                     ((get-prereqs-not-met)             (apply db:get-prereqs-not-met dbstruct params))
                     ((get-count-tests-running-for-run-id) (apply db:get-count-tests-running-for-run-id dbstruct params))
                     ((get-not-completed-cnt)           (apply db:get-not-completed-cnt  dbstruct params)) 
                     ;; ((synchash-get)                    (apply synchash:server-get dbstruct params))
                     ((get-raw-run-stats)               (apply db:get-raw-run-stats dbstruct params))
		     ((get-test-times)                  (apply db:get-test-times dbstruct params))
    ;; ARCHIVES
    ((test-get-archive-block-info)     (apply db:test-get-archive-block-info dbstruct params))
    
    ;; TESTS
    ((test-toplevel-num-items)         (apply db:test-toplevel-num-items dbstruct params))
    ((get-test-info-by-id)	       (apply db:get-test-info-by-id dbstruct params))
    ((get-test-state-status-by-id)     (apply db:get-test-state-status-by-id dbstruct params))
    ((test-get-rundir-from-test-id)    (apply db:test-get-rundir-from-test-id dbstruct params))
    ((get-count-tests-running-for-testname) (apply db:get-count-tests-running-for-testname dbstruct params))
    ((get-count-tests-running)         (apply db:get-count-tests-running dbstruct params))
    ((get-count-tests-running-in-jobgroup) (apply db:get-count-tests-running-in-jobgroup dbstruct params))
    ;; ((delete-test-step-records)        (apply db:delete-test-step-records dbstruct params))
    ;; ((get-previous-test-run-record)    (apply db:get-previous-test-run-record dbstruct params))
    ((get-matching-previous-test-run-records)(apply db:get-matching-previous-test-run-records dbstruct params))
    ((test-get-logfile-info)           (apply db:test-get-logfile-info dbstruct params))
    ((test-get-records-for-index-file)  (apply db:test-get-records-for-index-file dbstruct params))
    ((get-testinfo-state-status)       (apply db:get-testinfo-state-status dbstruct params))
    ((test-get-top-process-pid)        (apply db:test-get-top-process-pid dbstruct params))
    ((test-get-paths-matching-keynames-target-new) (apply db:test-get-paths-matching-keynames-target-new dbstruct params))
    ((get-prereqs-not-met)             (apply db:get-prereqs-not-met dbstruct params))
    ((get-count-tests-running-for-run-id) (apply db:get-count-tests-running-for-run-id dbstruct params))
    ((get-not-completed-cnt)           (apply db:get-not-completed-cnt  dbstruct params)) 
    ;; ((synchash-get)                    (apply synchash:server-get dbstruct params))
    ((get-raw-run-stats)               (apply db:get-raw-run-stats dbstruct params))
    ((get-test-times)                  (apply db:get-test-times dbstruct params))

                     ;; RUNS
                     ((get-run-info)                 (apply db:get-run-info dbstruct params))
                     ((get-run-status)               (apply db:get-run-status dbstruct params))
                     ((get-run-state)                (apply db:get-run-state dbstruct params))
                     ((set-run-status)               (apply db:set-run-status dbstruct params))
                     ((set-run-state-status)  			 (apply db:set-run-state-status dbstruct params))
                     ((update-tesdata-on-repilcate-db) (apply db:update-tesdata-on-repilcate-db  dbstruct params)) 
                     ((get-tests-for-run)            (apply db:get-tests-for-run dbstruct params))
                     ((get-tests-for-run-state-status) (apply db:get-tests-for-run-state-status dbstruct params))
                     ((get-test-id)                  (apply db:get-test-id dbstruct params))
                     ((get-tests-for-run-mindata)    (apply db:get-tests-for-run-mindata dbstruct params))
                     ;; ((get-tests-for-runs-mindata)   (apply db:get-tests-for-runs-mindata dbstruct params))
                     ((get-runs)                     (apply db:get-runs dbstruct params))
                     ((simple-get-runs)              (apply db:simple-get-runs dbstruct params))
                     ((get-num-runs)                 (apply db:get-num-runs dbstruct params))
                     ((get-runs-cnt-by-patt)         (apply db:get-runs-cnt-by-patt dbstruct params))
                     ((get-all-run-ids)              (db:get-all-run-ids dbstruct))
                     ((get-prev-run-ids)             (apply db:get-prev-run-ids dbstruct params))
                     ((get-run-ids-matching-target)  (apply db:get-run-ids-matching-target dbstruct params))
                     ((get-runs-by-patt)             (apply db:get-runs-by-patt dbstruct params))
                     ((get-run-name-from-id)         (apply db:get-run-name-from-id dbstruct params))
                     ((get-main-run-stats)           (apply db:get-main-run-stats dbstruct params))
                     ((get-var)                      (apply db:get-var dbstruct params))
                     ((get-run-stats)                (apply db:get-run-stats dbstruct params))
                     ((get-run-times)                (apply db:get-run-times dbstruct params)) 
    ;; RUNS
    ((get-run-info)                 (apply db:get-run-info dbstruct params))
    ((get-run-status)               (apply db:get-run-status dbstruct params))
    ((get-run-state)                (apply db:get-run-state dbstruct params))
    ((get-run-state-status)         (apply db:get-run-state-status dbstruct params))
    ((set-run-status)               (apply db:set-run-status dbstruct params))
    ((set-run-state-status)  			 (apply db:set-run-state-status dbstruct params))
    ((update-tesdata-on-repilcate-db) (apply db:update-tesdata-on-repilcate-db  dbstruct params)) 
    ((get-tests-for-run)            (apply db:get-tests-for-run dbstruct params))
    ((get-tests-for-run-state-status) (apply db:get-tests-for-run-state-status dbstruct params))
    ((get-test-id)                  (apply db:get-test-id dbstruct params))
    ((get-tests-for-run-mindata)    (apply db:get-tests-for-run-mindata dbstruct params))
    ;; ((get-tests-for-runs-mindata)   (apply db:get-tests-for-runs-mindata dbstruct params))
    ((get-runs)                     (apply db:get-runs dbstruct params))
    ((simple-get-runs)              (apply db:simple-get-runs dbstruct params))
    ((get-num-runs)                 (apply db:get-num-runs dbstruct params))
    ((get-runs-cnt-by-patt)         (apply db:get-runs-cnt-by-patt dbstruct params))
    ((get-all-run-ids)              (db:get-all-run-ids dbstruct))
    ((get-prev-run-ids)             (apply db:get-prev-run-ids dbstruct params))
    ((get-run-ids-matching-target)  (apply db:get-run-ids-matching-target dbstruct params))
    ((get-runs-by-patt)             (apply db:get-runs-by-patt dbstruct params))
    ((get-run-name-from-id)         (apply db:get-run-name-from-id dbstruct params))
    ((get-main-run-stats)           (apply db:get-main-run-stats dbstruct params))
    ((get-var)                      (apply db:get-var dbstruct params))
    ((get-run-stats)                (apply db:get-run-stats dbstruct params))
    ((get-run-times)                (apply db:get-run-times dbstruct params)) 

                     ;; STEPS
                     ((get-steps-data)               (apply db:get-steps-data dbstruct params))
                     ((get-steps-for-test)           (apply db:get-steps-for-test dbstruct params))
		     ((get-steps-info-by-id)         (apply db:get-steps-info-by-id dbstruct params))
    ;; STEPS
    ((get-steps-data)               (apply db:get-steps-data dbstruct params))
    ((get-steps-for-test)           (apply db:get-steps-for-test dbstruct params))
    ((get-steps-info-by-id)         (apply db:get-steps-info-by-id dbstruct params))

                     ;; TEST DATA
                     ((read-test-data)               (apply db:read-test-data dbstruct params))
                     ((read-test-data-varpatt)       (apply db:read-test-data-varpatt dbstruct params))
                     ((get-data-info-by-id)          (apply db:get-data-info-by-id dbstruct params)) 
    ;; TEST DATA
    ((read-test-data)               (apply db:read-test-data dbstruct params))
    ((read-test-data-varpatt)       (apply db:read-test-data-varpatt dbstruct params))
    ((get-data-info-by-id)          (apply db:get-data-info-by-id dbstruct params)) 

                     ;; MISC
                     ((get-latest-host-load)         (apply db:get-latest-host-load dbstruct params))
                     ((have-incompletes?)            (apply db:have-incompletes? dbstruct params))
                     ((login)                        (apply db:login dbstruct params))
                     ((general-call)                 (let ((stmtname   (car params))
                                                           (run-id     (cadr params))
                                                           (realparams (cddr params)))
                                                       (db:general-call dbstruct run-id stmtname realparams)))
                     ((sdb-qry)                      (apply sdb:qry params))
                     ((ping)                         (current-process-id))
		     ((get-changed-record-ids)       (apply db:get-changed-record-ids dbstruct params))
		     ((get-run-record-ids) 	   (apply db:get-run-record-ids dbstruct params))	
                     ;; TESTMETA
                     ((testmeta-get-record)       (apply db:testmeta-get-record dbstruct params))
    ;; MISC
    ((get-latest-host-load)         (apply db:get-latest-host-load dbstruct params))
    ((have-incompletes?)            (apply db:have-incompletes? dbstruct params))
    ((login)                        (apply db:login dbstruct params))
    ((general-call)                 (let ((stmtname   (car params))
                                          (run-id     (cadr params))
                                          (realparams (cddr params)))
                                      (db:general-call dbstruct run-id stmtname realparams)))
    ((sdb-qry)                      (apply sdb:qry params))
    ((ping)                         (current-process-id))
    ((get-changed-record-ids)       (apply db:get-changed-record-ids dbstruct params))
    ((get-run-record-ids) 	   (apply db:get-run-record-ids dbstruct params))	
    ;; TESTMETA
    ((testmeta-get-record)       (apply db:testmeta-get-record dbstruct params))

                     ;; TASKS 
                     ((find-task-queue-records)   (apply tasks:find-task-queue-records dbstruct params))
		     (else
		      (debug:print 0 *default-log-port* "ERROR: bad api call " cmd)
		      (conc "ERROR: BAD api call " cmd))))))
    ;; TASKS 
    ((find-task-queue-records)   (apply tasks:find-task-queue-records dbstruct params))
    (else
     (debug:print 0 *default-log-port* "ERROR: bad api call " cmd)
     (conc "ERROR: BAD api call " cmd))))
	 (if (not readonly-command)
	     (mutex-unlock! write-mutex))
	 
	 ;; save all stats
	 (let ((delta-t (- (current-milliseconds)
			   start-t))
	       (modified-cmd (if (eq? cmd 'general-call)
				 (string->symbol (conc "general-call-" (car params)))
				 cmd)))
	   (hash-table-set! *db-api-call-time* modified-cmd
			    (cons delta-t (hash-table-ref/default *db-api-call-time* modified-cmd '()))))
	 (if writecmd-in-readonly-mode
             (begin
               #;(common:telemetry-log (conc "api-out:"(->string cmd))
               payload: `((params . ,params)
               (ok-res . #t)))
	       (vector #f res))
             (begin
               #;(common:telemetry-log (conc "api-out:"(->string cmd))
               payload: `((params . ,params)
               (ok-res . #f)))
               (vector #t res))))))))

;; http-server  send-response
;;                 api:process-request
;;                    db:*
;;
;; NB// Runs on the server as part of the server loop
;;

Modified archive.scm from [25e6383e3d] to [220e8f084a].

19
20
21
22
23
24
25


26
27
28
29
30
31
32
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34







+
+







;;  strftime('%m/%d/%Y %H:%M:%S','now','localtime')

(use (prefix sqlite3 sqlite3:) srfi-1 posix regex regex-case srfi-69 format md5 message-digest srfi-18)

(declare (unit archive))
(declare (uses db))
(declare (uses common))
(declare (uses commonmod))
(import commonmod)

(include "common_records.scm")
(include "db_records.scm")

;;======================================================================
;; 
;;======================================================================

Added artifacts.scm version [2d0631d1c4].

























1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
;;======================================================================
;; Copyright 2019, Matthew Welland.
;; 
;; This file is part of Megatest.
;; 
;;     Megatest is free software: you can redistribute it and/or modify
;;     it under the terms of the GNU General Public License as published by
;;     the Free Software Foundation, either version 3 of the License, or
;;     (at your option) any later version.
;; 
;;     Megatest is distributed in the hope that it will be useful,
;;     but WITHOUT ANY WARRANTY; without even the implied warranty of
;;     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
;;     GNU General Public License for more details.
;; 
;;     You should have received a copy of the GNU General Public License
;;     along with Megatest.  If not, see <http://www.gnu.org/licenses/>.

;;======================================================================

(declare (unit artifacts))

(include "artifacts/artifacts.scm")

Added artifacts/README version [f734b55b91].


1
+
NOTE: keep megatest/artifacts/ in sync with datastore/artifacts

Added artifacts/artifacts.meta version [9ccf727941].






















1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
;; -*- scheme -*-
(
; Your egg's license:
(license "BSD")

; Pick one from the list of categories (see below) for your egg and enter it
; here.
(category db)

; A list of eggs pkts depends on.  If none, you can omit this declaration
; altogether. If you are making an egg for chicken 3 and you need to use
; procedures from the `files' unit, be sure to include the `files' egg in the
; `needs' section (chicken versions < 3.4.0 don't provide the `files' unit).
; `depends' is an alias to `needs'.
;; (needs (autoload "3.0"))

; A list of eggs required for TESTING ONLY.  See the `Tests' section.
(test-depends test)

(author "Matt Welland")
(synopsis "A sha1-chain based datastore similar to the data format in fossil scm, consisting of artifacts of single line cards."))

Added artifacts/artifacts.release-info version [fbbc2937bb].




1
2
3
+
+
+
(repo fossil "http://www.kiatoa.com/cgi-bin/fossils/{egg-name}")
(uri zip "http://www.kiatoa.com/cgi-bin/fossils/{egg-name}/zip/{egg-name}.zip?uuid={egg-release}")
(release "1.0")

Added artifacts/artifacts.scm version [b5b4746c14].

























































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
;; Copyright 2006-2017, Matthew Welland.
;; 
;; This file is part of artifacts
;; 
;;     Pkts is free software: you can redistribute it and/or modify
;;     it under the terms of the GNU General Public License as published by
;;     the Free Software Foundation, either version 3 of the License, or
;;     (at your option) any later version.
;; 
;;     Pkts is distributed in the hope that it will be useful,
;;     but WITHOUT ANY WARRANTY; without even the implied warranty of
;;     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
;;     GNU General Public License for more details.
;; 
;;     You should have received a copy of the GNU General Public License
;;     along with Pkts.  If not, see <http://www.gnu.org/licenses/>.
;;

;; CARDS:
;;
;; A card is a line of text, the first two characters are a letter followed by a
;;   space. The letter is the card type.
;;
;; artifact:
;;
;; An artifact is a sorted list of cards with a final card Z that contains the shar1 hash
;;   of all of the preceding cards.
;;
;; AARTIFACT:
;;
;;  An alist mapping card types to card data
;;      '((T . "artifacttype")
;;        (a . "some content"))
;;
;; EARTIFACT:
;;
;;  Extended packet using friendly keys. Must use a artifactspec to convert to/from eartifacts
;;    '((ptype . "artifacttype")
;;      (adata . "some content))
;;
;; DARTIFACT:
;;
;; artifacts pulled from the database have this format:
;;
;;((aartifact (Z . "7de89c09ac024b3832c93e16cd78d11e2e28733b")     <= this is a the alist
;;       (t . "v1.63/tip/dev")
;;       (c . "QUICKPATT")
;;       (T . "runstart")
;;       (P . "354eeb67120a921e3e33155ecab1b522ac10b6bd")
;;       (D . "1488995096.0"))
;;  (id . 8)
;;  (group-id . 0)
;;  (uuid . "7de89c09ac024b3832c93e16cd78d11e2e28733b")
;;  (parent . "")
;;  (artifact-type . "runstart")
;;  (artifact . "D 1488995096.0\nP 354eeb67120a921e3e33155ecab1b522ac10b6bd\nT runstart\nc QUICKPATT\nt v1.63/tip/dev\nZ 7de89c09ac024b3832c93e16cd78d11e2e28733b"))
;;
;; artifactspec is alist of alists mapping types and nicekeys to keys
;;
;; '((posting . ((title . t)
;;               (url   . u)
;;               (blurb . b)))
;;   (comment . ((comment . c)
;;               (score   . s))))

;; Reserved cards:
;;   P      : artifact parent
;;   R      : reference artifact containing mapping of short string -> sha1sum strings
;;   T      : artifact type
;;   D      : current time from (current-time), unless provided
;;   Z      : shar1 hash of the packet

;; Example usage:
;;
;; Create a artifact:
;;
;; (use artifacts)
;; (define-values (uuid artifact)
;;     (alist->artifact
;;       '((fruit . "apple") (meat . "beef"))  ;; this is the data to convert
;;       '((foods (fruit . f) (meat . m)))     ;; this is the artifact spec
;;       ptype:
;;       'foods))
;;
;; Add to artifact queue:
;;
;; (define db (open-queue-db "/tmp/artifacts" "artifacts.db"))
;; (add-to-queue db artifact uuid 'foods #f 0) ;; no parent and use group_id of 0
;;
;; Retrieve the packet from the db and extract a value:
;;
;; (alist-ref
;;    'meat
;;    (dartifact->alist
;;         (car (get-dartifacts db #f 0 #f))
;;        '((foods (fruit . f)
;;                 (meat . m)))))
;; => "beef"
;;

(module artifacts
(
;; cards, util and misc
;; sort-cards
;; calc-sha1
;;
;; low-level constructor procs, exposed only for development/testing, will be removed
construct-sdat
construct-artifact     
card->type/value  
add-z-card

;; queue database procs
open-queue-db
add-to-queue
create-and-queue
;; lookup-by-uuid
lookup-by-id
get-dartifacts
get-not-processed-artifacts
get-related
find-artifacts
process-artifacts
get-descendents
get-ancestors
get-artifacts
;; get-last-descendent
;; with-queue-db
;; load-artifacts-to-db

;; procs that operate directly on artifacts, sdat, aartifacts, dartifacts etc.
artifact->alist    ;; artifact -> aartifact (i.e. alist)
artifact->sdat     ;; artifact -> '("a aval" "b bval" ...)
sdat->alist   ;; '("a aval" "b bval"...) -> ((a . "aval")(b . "bval") ...)
dblst->dartifacts  ;; convert list of tuples from queue db into dartifacts
dartifact->alist   ;; flatten a dartifact into an alist containing all db fields and the artifact alist
dartifacts->alists ;; apply dartifact->alist to a list of alists using a artifact-spec
alist->artifact    ;; returns two values uuid, artifact
get-value     ;; looks up a value given a key in a dartifact
flatten-all   ;; merge the list of values from a query which includes a artifact into a flat alist <== really useful!
check-artifact

;; artifact alists
write-alist->artifact
read-artifact->alist

;; archive database
;; archive-open-db
;; write-archive-artifacts
;; archive-artifacts
;; mark-processed

;; artifactsdb
artifactdb-conn     ;; useful
artifactdb-fname
artifactsdb-open
artifactsdb-close
artifactsdb-add-record
;; temporary
artifactdb-artifactspec

;; utility procs
increment-string ;; used to get indexes for strings in ref artifacts
make-report      ;; make a .dot file
calc-sha1
uuid-first-two-letters
uuid-remaining-letters

;; file and directory utils
multi-glob
capture-dir
file-get-sha1
check-same
link-or-copy
same-partition?
link-if-same-partition
archive-copy
write-to-archive
artifact-rollup
read-artifacts-into-hash
hash-of-artifacts->bundle
archive-dest

;; pathname-full-filename

;; minimal artifact functions
minimal-artifact-read
minimal-artifact->alist
afact-get-D
afact-get-Z
afact-get-T
afact-get
afact-get-number/default


;; bundles
write-bundle
read-bundle

;; new artifacts db
with-todays-adb
get-all-artifacts
refresh-artifacts-db

)

(import (chicken base) scheme (chicken process) (chicken time posix)
	(chicken io) (chicken file) (chicken pathname)
        chicken.process-context.posix (chicken string)
	(chicken time) (chicken sort) (chicken file posix) (chicken condition) srfi-1
	regex srfi-13 srfi-69 (chicken port) (chicken process-context)
	crypt sha1 matchable message-digest sqlite3 typed-records
	directory-utils
	scsh-process)

;;======================================================================
;; DATA MANIPULATION UTILS
;;======================================================================

(define-inline (unescape-data data)
  (string-translate* data '(("\\n" . "\n") ("\\\\" . "\\"))))

(define-inline (escape-data data)
  (string-translate* data '(("\n" . "\\n") ("\\" . "\\\\"))))

(define-inline (make-card type data)
  (conc type " " (escape-data (->string data))))

;; reverse an alist for doing artifactkey -> external key conversions
;;
(define-inline (reverse-aspec aspec)
  (map (lambda (dat)
	 (cons (cdr dat)(car dat)))
       aspec))

;; add a card to the list of cards, sdat
;; if type is #f return only sdat
;; if data is #f return only sdat
;;
(define-inline (add-card sdat type data)
  (if (and type data)
      (cons (make-card type data) sdat)
      sdat))

;;======================================================================
;; STRING AS FUNKY NUMBER
;;======================================================================

;; NOTE: PTDZ are removed as they are reserved. NB// the R card is not used in a
;;       ref, instead the P parent card is used.
;;       Question: Why does it matter to remove PTDZ?
;;                 To make the ref easier to use the ref strings will be the keys
;;                 so we cannot have overlap with any actual keys. But this is a
;;                 bit silly. What we need to do instead is reject keys of length
;;                 one where the char is in PTDZ
;;
;; This is basically base92
;;
(define string-num-chars (string->list "!#$%&'()*+,-./0123456789:;<=>?@ABCEFGHIJKLMNOQRSUVWXY[\\]^_abcdefghijklmnopqrstuvwxyz{|}~"))
;; "0123456789abcdefghijklmnopqrstuvwxyzABCEFGHIJKLMNOQSUVWXY!#$%&'()*+,-./[]:;<=>?\\^_{}|"))

(define (char-incr inchar)
  (let* ((carry     #f)
	 (next-char (let ((rem (member inchar string-num-chars)))
		      (if (eq? (length rem) 1) ;; we are at the last character in our string-num-chars list
			  (begin
			    (set! carry #t)
			    (car string-num-chars))
			  (cadr rem)))))
    (values next-char carry)))
    
(define (increment-string str)
  (if (string-null? str)
      "0"
      (let ((strlst (reverse (string->list str)))) ;; need to process the string from the lsd
	(list->string
	 (let loop ((hed (car strlst))
		    (tal (cdr strlst))
		    (res '()))
	   (let-values (((newhed carry)(char-incr hed)))
	     ;; (print "newhed: " newhed " carry: " carry " tal: " tal)
	     (let ((newres (cons newhed res)))
	       (if carry ;; we'll have to propagate the carry
		   (if (null? tal) ;; at the end, tack on "0" (which is really a "1")
		       (cons (car string-num-chars) newres)
		       (loop (car tal)(cdr tal) newres))
		   (append (reverse tal) newres)))))))))
    
;;======================================================================
;; P K T S D B   I N T E R F A C E
;;
;; INTEGER, REAL, TEXT
;;======================================================================
;;
;; spec
;;  ( (tablename1 . (field1name L1 TYPE)
;;                  (field2name L2 TYPE) ... )
;;    (tablename2 ... ))
;;
;;  Example: (tests (testname n TEXT)
;;                  (rundir   r TEXT)
;;                   ... )
;;
;; artifact keys are taken from the first letter, if that is not unique
;; then look at the next letter and so on
;;

;; simplify frequent need to get one result with default
;;
(define (get-one db default qry . params)
  (apply fold-row
   car
   default
   db
   qry
   params))

(define (get-rows db qry . params)
  (apply fold-row
   cons
   db
   qry
   params))
  
;; use this struct to hold the artifactspec and the db handle
;;
(defstruct artifactdb
  (fname       #f)
  (artifactsdb-spec #f)
  (artifactspec     #f)  ;; cache the artifactspec
  (field-keys  #f)  ;; cache the field->key mapping (field1 . k1) ...
  (key-fields  #f)  ;; cache the key->field mapping
  (conn        #f)
  )

;; WARNING: There is a simplification in the artifactsdb spec w.r.t. artifactspec.
;;          The field specs are the cdr of the table list - not a full
;;          list. The extra list level in artifactspec is gratuitous and should
;;          be removed.
;;
(define (artifactsdb-spec->artifactspec tables-spec)
  (map (lambda (tablespec)
	 (list (car tablespec)
	       (map (lambda (field-spec)
		      (cons (car field-spec)(cadr field-spec)))
		    (cdr tablespec))))
       tables-spec))

(define (artifactsdb-open dbfname artifactsdb-spec)
  (let* ((pdb      (make-artifactdb))
	 (dbexists (file-exists? dbfname))
	 (db       (open-database dbfname)))
    (artifactdb-artifactsdb-spec-set! pdb artifactsdb-spec)
    (artifactdb-artifactspec-set!     pdb (artifactsdb-spec->artifactspec artifactsdb-spec))
    (artifactdb-fname-set!       pdb dbfname)
    (artifactdb-conn-set!        pdb db)
    (if (not dbexists)
	(artifactsdb-init pdb))
    pdb))

(define (artifactsdb-init artifactsdb)
  (let* ((db          (artifactdb-conn artifactsdb))
	 (artifactsdb-spec (artifactdb-artifactsdb-spec artifactsdb)))
    ;; create a table for the artifacts themselves
    (execute db "CREATE TABLE IF NOT EXISTS artifacts (id INTEGER PRIMARY KEY, zkey TEXT, record_id INTEGER, artifact TEXT);")
    (for-each
     (lambda (table)
       (let* ((table-name (car table))
	      (fields     (cdr table))
	      (stmt (conc "CREATE TABLE IF NOT EXISTS "
			  table-name
			  " (id INTEGER PRIMARY KEY,"
			  (string-intersperse
			   (map (lambda (fieldspec)
				  (conc (car fieldspec) " "
					(caddr fieldspec)))
				fields)
			   ",")
			  ");")))
	 (execute db stmt)))
     artifactsdb-spec)))

;; create artifact from the data and insert into artifacts table
;; 
;; data is assoc list of (field . value) ...
;; tablename is a symbol matching the table name
;;
(define (artifactsdb-add-record artifactsdb tablename data #!optional (parent #f))
  (let*-values (((zkey artifact) (alist->artifact data (artifactdb-artifactspec artifactsdb) ptype: tablename)))
    ;; have the data as alist so insert it into appropriate table also
    (let* ((db        (artifactdb-conn artifactsdb)))
      ;; TODO: Address collisions
      (execute db "INSERT INTO artifacts (zkey,artifact,record_id) VALUES (?,?,?);"
		zkey artifact -1)
      (let* (;; (artifactid     (artifactsdb-artifactkey->artifactid artifactsdb artifactkey))
	     (record-id (artifactsdb-insert artifactsdb tablename data)))
	(execute db "UPDATE artifacts SET record_id=? WHERE zkey=?;"
		  record-id zkey)
      ))))

;; 
(define (artifactsdb-insert artifactsdb tablename data)
  (let* ((db (artifactdb-conn artifactsdb))
	 (stmt (conc "INSERT INTO " tablename
		     " (" (string-intersperse (map conc (map car data)) ",")
		     ") VALUES ('"
		     ;; TODO: Add lookup of data type and do not
		     ;;       wrap integers with quotes
		     (string-intersperse (map conc (map cdr data)) "','")
		     "');")))
    (print "stmt: " stmt)
    (execute db stmt)
    ;; lookup the record-id and return it
    
    ))
    
(define (artifactsdb-close artifactsdb)
  (finalize! (artifactdb-conn artifactsdb)))

;; (let loop ((s "0")(n 0))(print s)(if (< n 5000)(loop (increment-string s)(+ n 1))))

;;======================================================================
;; CARDS, MISC and UTIL
;;======================================================================

;; given string (likely multi-line) "dat" return shar1 hash
;;
(define (calc-sha1 instr)
  (message-digest-string
   (sha1-primitive)
   instr))

;; given a single card return its type and value
;;
(define (card->type/value card)
  (let ((ctype (substring card 0 1))
	(cval  (substring card 2 (string-length card))))
    (values (string->symbol ctype) cval)))

;;======================================================================
;; SDAT procs
;;  sdat is legacy/internal usage. Intention is to remove sdat calls from
;;  the exposed calls.
;;======================================================================

;; sort list of cards
;;
(define-inline (sort-cards sdat)
  (sort sdat string<=?))

;; artifact rules
;; 1. one card per line
;; 2. at least one card
;; 3. no blank lines

;; given sdat, a list of cards return uuid, packet (as sdat)
;;
(define (add-z-card sdat)
  (let* ((sorted-sdat (sort-cards sdat))
	 (dat         (string-intersperse sorted-sdat "\n"))
	 (uuid        (calc-sha1 dat)))
    (values
     uuid
     (conc
      dat
      "\nZ "
      uuid))))

(define (check-artifact artifact)
  (handle-exceptions
      exn
      #f ;; anything goes wrong - call it a crappy artifact
    (let* ((sdat (string-split artifact "\n"))
	   (rdat (reverse sdat)) ;; reversed
	   (zdat (car rdat))
	   (Z    (cadr (string-split zdat)))
	   (cdat (string-intersperse (reverse (cdr rdat)) "\n")))
      (equal? Z (calc-sha1 cdat)))))

;;======================================================================
;; AARTIFACTs
;;======================================================================

;; convert a sdat (list of cards) to an alist
;;
(define (sdat->alist sdat)
  (let loop ((hed (car sdat))
	     (tal (cdr sdat))
	     (res '()))
    (let-values (( (ctype cval)(card->type/value hed) ))
      ;; if this card is not one of the common ones tack it on to rem
      (let* ((oldval (alist-ref ctype res))
	     (newres (cons (cons ctype
				 (if oldval ;; list or string
				     (if (list? oldval)
					 (cons cval oldval)
					 (cons cval (list oldval)))
				     cval))
			   res)))
	(if (null? tal)
	    newres
	    (loop (car tal)(cdr tal) newres))))))

;;((aartifact (Z . "7de89c09ac024b3832c93e16cd78d11e2e28733b")     <= this is a the alist
;;       (t . "v1.63/tip/dev")
;;       (c . "QUICKPATT")
;;       (T . "runstart")
;;       (P . "354eeb67120a921e3e33155ecab1b522ac10b6bd")
;;       (D . "1488995096.0"))
;;  (id . 8)
;;  (group-id . 0)
;;  (uuid . "7de89c09ac024b3832c93e16cd78d11e2e28733b")
;;  (parent . "")
;;  (artifact-type . "runstart")
;;  (artifact . "D 1488995096.0\nP 354eeb67120a921e3e33155ecab1b522ac10b6bd\nT runstart\nc QUICKPATT\nt v1.63/tip/dev\nZ 7de89c09ac024b3832c93e16cd78d11e2e28733b"))
;;
;; artifactspec is alist of alists mapping types and nicekeys to keys
;;
;; '((posting . ((title . t)
;;               (url   . u)
;;               (blurb . b)))
;;   (comment . ((comment . c)
;;               (score   . s))))

;; DON'T USE? 
;;
(define (get-value field dartifact . spec-in)
  (if (null? spec-in)
      (alist-ref field dartifact)
      (let* ((spec  (car spec-in))
	     (aartifact  (alist-ref 'aartifact dartifact))) ;; get the artifact alist
	(if (and aartifact spec)
	    (let* ((ptype (alist-ref 'artifact-type dartifact))
		   (pspec (alist-ref (string->symbol ptype) spec))) ;; do we have a spec for this type of artifact
	      (and pspec
		  (let* ((key (alist-ref field pspec)))
		    (and key (alist-ref key aartifact)))))
	    #f))))

;; convert a dartifact to a pure alist given a artifactspec
;; this flattens out the alist to include the data from
;; the queue database record
;;
(define (dartifact->alist dartifact artifactspec)
  (let* ((aartifact       (alist-ref 'aartifact dartifact))
	 (artifact-type   (or (alist-ref 'artifact-type dartifact) ;; artifact-type is from the database field artifact_type
			 (alist-ref 'T aartifact)))
	 (artifact-fields (alist-ref (string->symbol artifact-type) artifactspec))
	 (rev-fields (if artifact-fields
			 (reverse-aspec artifact-fields)
			 '())))
    (append (map (lambda (entry)
		   (let* ((artifact-key (car entry))
			  (new-key (or (alist-ref artifact-key rev-fields) artifact-key)))
		     `(,new-key . ,(cdr entry))))
		 aartifact)
	    dartifact)))

;; convert a list of dartifacts into a list of alists using artifact-spec
;;
(define (dartifacts->alists dartifacts artifact-spec)
   (map (lambda (x)
	  (dartifact->alist x artifact-spec))
	dartifacts))

;; Generic flattener, make the tuple and artifact into a single flat alist
;;
;; qry-result-spec is a list of symbols corresponding to each field
;;
(define (flatten-all inlst artifactspec . qry-result-spec)
  (map
   (lambda (tuple)
     (dartifact->alist
      (apply dblst->dartifacts tuple qry-result-spec)
      artifactspec))
   inlst))

;; call like this:
;;  (construct-sdat 'a "a data" 'S "S data" ...)
;; returns list of cards
;;  ( "A a value" "D 12345678900" ...)
;;
(define (construct-sdat . alldat)
  (let ((have-D-card #f)) ;; flag
    (if (even? (length alldat))
	(let loop ((type (car alldat))
		   (data (cadr alldat))
		   (tail (cddr alldat))
		   (res  '()))
	  (if (eq? type 'D)(set! have-D-card #t))
	  (if (null? tail)
	      (if have-D-card ;; return the constructed artifact, add a D card if none found
		  (add-card res type data)
		  (add-card 
		   (add-card res 'D (current-seconds))
		   type data))
	      (loop (car tail)
		    (cadr tail)
		    (cddr tail)
		    (add-card res type data))))
	#f))) ;; #f means it failed to create the sdat

(define (construct-artifact . alldat)
  (add-z-card
   (apply construct-sdat alldat)))

;;======================================================================
;; CONVERTERS
;;======================================================================

(define (artifact->sdat artifact)
  (map unescape-data (string-split artifact "\n")))

;; given a pure artifact return an alist
;;
(define (artifact->alist artifact #!key (artifactspec #f))
  (let ((sdat (cond
	       ((string? artifact)  (artifact->sdat artifact))
	       ((list? artifact)    artifact)
	       (else #f))))
    (if artifact
	(if artifactspec
	    (dartifact->alist (list (cons 'aartifact (sdat->alist sdat))) artifactspec)
	    (sdat->alist sdat))
	#f)))

;; convert an alist to an sdat
;;  in: '((a . "blah")(b . "foo"))
;; out: '("a blah" "b foo")
;;
(define (alist->sdat adat)
  (map (lambda (dat)
	 (conc (car dat) " " (cdr dat)))
       adat))

;; adat is the incoming alist, aspec is the mapping
;; from incoming key to the artifact key (usually one
;; letter to keep data tight) see the artifactspec at the
;; top of this file
;;
;; NOTE: alists can contain multiple instances of the same key (supported fine by artifacts)
;;       but you (obviously I suppose) cannot use alist-ref to access those entries.
;;
(define (alist->artifact adat aspec #!key (ptype #f)(no-d #f))
  (let* ((artifact-type (or ptype
			    (alist-ref 'T adat) ;; can provide in the incoming alist
			    #f))
	 (artifact-spec (if artifact-type            ;; alist of external-key -> key
			    (or (alist-ref artifact-type aspec) '())
			    (if (null? aspec)
				'()
				(cdar aspec)))) ;; default to first one if nothing specified
	 (new-alist (map (lambda (dat)
			   (let* ((key    (car dat))
				  (val    (cdr dat))
				  (newkey (or (alist-ref key artifact-spec)
					      key)))
			     (cons newkey (escape-data (conc val))))) ;; convert all incoming data (symbols, numbers etc.) to a string and then escape newlines.
			 adat))
	 (new-with-type (if (alist-ref 'T new-alist)
			    new-alist
			    (cons `(T . ,artifact-type) new-alist)))
	 (with-d-card   (if (or no-d ;; no timestamp wanted
				(alist-ref 'D new-with-type))
			    new-with-type
			    (cons `(D . ,(current-seconds))
				  new-with-type))))
    (add-z-card
     (alist->sdat with-d-card))))

;;======================================================================
;;  D B   Q U E U E   I N T E R F A C E
;;======================================================================

;; artifacts (
;;   id SERIAL PRIMARY KEY,
;;   uuid TEXT NOT NULL,
;;   parent_uuid TEXT default '',
;;   artifact_type INTEGER DEFAULT 0,
;;   group_id INTEGER NOT NULL,
;;   artifact TEXT NOT NULL

;; schema is list of SQL statements - can be used to extend db with more tables
;;
(define (open-queue-db dbpath dbfile #!key (schema '()))
  (let* ((dbfname  (conc dbpath "/" dbfile))
	 (dbexists (if (file-exists? dbfname) #t (begin (create-directory dbpath #t) #f)))
	 (db       (open-database dbfname)))
    ;; (set-busy-handler! (dbi:db-conn db) (busy-timeout 10000))
    (if (not dbexists) ;; NOTE: In the archive we allow duplicates and other messiness. 
	(for-each
	 (lambda (stmt)
	   (execute db stmt))
	 (cons "CREATE TABLE IF NOT EXISTS artifacts
                          (id           INTEGER PRIMARY KEY,
                           group_id     INTEGER NOT NULL,
                           uuid         TEXT NOT NULL,
                           parent_uuid  TEXT TEXT DEFAULT '',
                           artifact_type     TEXT NOT NULL,
                           artifact          TEXT NOT NULL,
                           processed    INTEGER DEFAULT 0)"
		   schema))) ;; 0=not processed, 1=processed, 2... for expansion
    db))

(define (add-to-queue db artifact uuid artifact-type parent-uuid group-id)
  (execute db "INSERT INTO artifacts (uuid,parent_uuid,artifact_type,artifact,group_id)
                   VALUES(?,?,?,?,?);" ;; $1,$2,$3,$4,$5);"
	    uuid
	    (if parent-uuid parent-uuid "");; use null string as placeholder for no parent uuid.
	    (if artifact-type (conc artifact-type) "") 
	    artifact
	    group-id))

;; given all needed parameters create a artifact and store it in the queue
;;  procs is an alist that maps artifact-type to a function that takes a list of artifact params
;;  in data and returns the uuid and artifact
;;
(define (create-and-queue conn procs artifact-type parent-uuid group-id data)
  (let ((proc (alist-ref artifact-type procs)))
    (if proc
	(let-values (( (uuid artifact) (proc data) ))
	  (add-to-queue conn artifact uuid artifact-type parent-uuid group-id)
	  uuid)
	#f)))

;; given uuid get artifact, if group-id is specified use it (reduces probablity of
;;     being messed up by a uuid collision)
;;
(define (lookup-by-uuid db artifact-uuid group-id)
  (if group-id
      (get-one db "SELECT artifact FROM artifacts WHERE group_id=? AND uuid=?;" group-id artifact-uuid)
      (get-one db "SELECT artifact FROM artifacts WHERE uuid=?;" artifact-uuid)))
      
;; find a packet by its id
;;
(define (lookup-by-id db id)
  (get-one db "SELECT artifact FROM artifacts WHERE id=?;" id))


;;======================================================================
;;  P R O C E S S   P K T S
;;======================================================================

;; given a list of field values pulled from the queue db generate a list
;; of dartifact's
;;
(define (dblst->dartifacts lst . altmap)
  (let* ((maplst (if (null? altmap)
		     '(id group-id uuid parent artifact-type artifact processed)
		     altmap))
	 (res (map cons maplst lst))) ;; produces list of pairs, i.e an alist
    (cons `(aartifact . ,(artifact->alist (alist-ref 'artifact res)))
	  res)))

;; NB// ptypes is a list of symbols, '() or #f find all types
;;
(define (get-dartifacts db ptypes group-id parent-uuid #!key (uuid #f))
  (let* ((ptype-qry (if (and ptypes
			     (not (null? ptypes)))
			(conc " IN ('" (string-intersperse (map conc ptypes) "','") "')")
			(conc " LIKE '%' ")))
	 (rows      (get-rows
		     db
		     (conc
		      "SELECT id,group_id,uuid,parent_uuid,artifact_type,artifact,processed FROM artifacts
                         WHERE artifact_type " ptype-qry " AND group_id=?
                         AND processed=0 "
			 (if parent-uuid (conc "AND parent_uuid='" parent-uuid "' ") "")
			 (if uuid        (conc "AND        uuid='"        uuid "' ") "")
			 "ORDER BY id DESC;")
		     group-id)))
    (map dblst->dartifacts (map vector->list rows))))

;; get N artifacts not yet processed for group-id
;;
(define (get-not-processed-artifacts db group-id artifact-type limit offset)
  (map dblst->dartifacts
       (map vector->list
	    (get-rows
	     db
	     "SELECT id,group_id,uuid,parent_uuid,artifact_type,artifact,processed FROM artifacts
                WHERE artifact_type = ? AND group_id = ? AND processed=0
                LIMIT ? OFFSET ?;"
	     (conc artifact-type) ;; convert symbols to string
	     group-id
	     limit
	     offset
	     ))))

;; given a uuid, get not processed child artifacts 
;;
(define (get-related db group-id uuid)
  (map dblst->dartifacts
       (get-rows
	db
	"SELECT id,group_id,uuid,parent_uuid,artifact_type,artifact,processed FROM artifacts
           WHERE parent_uuid=? AND group_id=? AND processed=0;"
	uuid group-id)))

;; generic artifact processor
;;
;; find all packets in group-id of type in ptypes and apply proc to artifactdat
;;
(define (process-artifacts conn group-id ptypes parent-uuid proc)
  (let* ((artifacts (get-dartifacts conn ptypes group-id parent-uuid)))
    (map proc artifacts)))

;; criteria is an alist ((k . valpatt) ...)
;;   - valpatt is a regex
;;   - ptypes is a list of types (symbols expected)
;;   match-type: 'any or 'all
;;
(define (find-artifacts db ptypes criteria #!key (processed #f)(match-type 'any)(artifact-spec #f)) ;; processed=#f, don't use, else use
  (let* ((artifacts (get-dartifacts db ptypes 0 #f))
	 (match-rules (lambda (artifactdat) ;; returns a list of matching rules
			(filter (lambda (c)
				  ;; (print "c: " c)
				  (let* ((ctype (car c)) ;; card type
					 (rx    (cdr c)) ;; card pattern
					 ;; (t     (alist-ref 'artifact-type artifactdat))
					 (artifact   (alist-ref 'artifact artifactdat))
					 (aartifact  (artifact->alist artifact))
					 (cdat  (alist-ref ctype aartifact)))
				    ;; (print "cdat: " cdat) ;; " aartifact: " aartifact)
				    (if cdat
					(string-match rx cdat)
					#f)))
				criteria)))
	 (res         (filter (lambda (artifactdat)
				(if (null? criteria) ;; looking for all artifacts
				    #t
				    (case match-type
				      ((any)(not (null? (match-rules artifactdat))))
				      ((all)(eq? (length (match-rules artifactdat))(length criteria)))
				      (else
				       (print "ERROR: bad match type " match-type ", expecting any or all.")))))
			      artifacts)))
    (if artifact-spec
	(dartifacts->alists res artifact-spec)
	res)))

;; get descendents of parent-uuid
;;
;; NOTE: Should be doing something like the following:
;;
;; given a uuid, get not processed child artifacts 
;; processed:
;;    #f => get all
;;     0 => get not processed
;;     1 => get processed
;;
(define (get-ancestors db group-id uuid #!key (processed #f))
  (map dblst->dartifacts
       (map vector->list
	    (get-rows
	     db
	     (conc
	      "SELECT id,group_id,uuid,parent_uuid,artifact_type,artifact,processed 
                FROM artifacts
                 WHERE uuid IN 
                     (WITH RECURSIVE
                       tree(uuid,parent_uuid)
                        AS
                        (
                           SELECT uuid, parent_uuid
                           FROM artifacts
                           WHERE uuid = ?
                           UNION ALL
                           SELECT t.uuid, t.parent_uuid
                           FROM artifacts t
                           JOIN tree ON t.uuid = tree.parent_uuid
                        )
	              SELECT uuid FROM tree)
	    AND group_id=?" (if processed (conc " AND processed=" processed) "") ";")
	     uuid group-id))))

;; Untested
;;
(define (get-descendents db group-id uuid #!key (processed #f))
  (map dblst->dartifacts
       (map vector->list
	    (get-rows
	     db
	     (conc
	      "SELECT id,group_id,uuid,parent_uuid,artifact_type,artifact,processed 
                FROM artifacts
                 WHERE uuid IN 
                     (WITH RECURSIVE
                       tree(uuid,parent_uuid)
                        AS
                        (
                           SELECT uuid, parent_uuid
                           FROM artifacts
                           WHERE uuid = ?
                           UNION ALL
                           SELECT t.uuid, t.parent_uuid
                           FROM artifacts t
                           JOIN tree ON t.parent_uuid = tree.uuid
                        )
	              SELECT uuid FROM tree)
	    AND group_id=?" (if processed (conc " AND processed=" processed) "") ";")
	     uuid group-id))))

;; look up descendents based on given info unless passed in a list via inlst
;;
;; (define (get-last-descendent db group-id uuid #!key (processed #f)(inlst #f))
;;   (let ((descendents (or inlst (get-descendents db group-id uuid processed: processed))))
;;     (if (null? descendents)
;; 	#f
;; 	(last descendents))))

;;======================================================================
;;  A R C H I V E S - always to a sqlite3 db 
;;======================================================================

;; open an archive db
;; path: archive-dir/<year>/month.db
;;
#;(define (archive-open-db archive-dir)
  (let* ((curr-time (seconds->local-time (current-seconds)))
	 (dbpath    (conc archive-dir "/" (time->string curr-time "%Y")))
	 (dbfile    (conc dbpath "/" (time->string curr-time "%m") ".db"))
	 (dbexists (if (file-exists? dbfile) #t (begin (create-directory dbpath #t) #f))))
    (let ((db (open-database dbfile)))
      ;; (set-busy-handler! db (busy-timeout 10000))
      (if (not dbexists) ;; NOTE: In the archive we allow duplicates and other messiness. 
	  (execute db "CREATE TABLE IF NOT EXISTS artifacts
                          (id           INTEGER,
                           group_id     INTEGER,
                           uuid         TEXT,
                           parent_uuid  TEXT,
                           artifact_type     TEXT,
                           artifact          TEXT,
                           processed    INTEGER DEFAULT 0)"))
      db)))

;; turn on transactions! otherwise this will be painfully slow
;;
#;(define (write-archive-artifacts src-db db artifact-ids)
  (let ((artifacts (get-rows
	       src-db
	       (conc "SELECT id,group_id,uuid,parent_uuid,artifact_type,artifact FROM artifacts WHERE id IN ("
		     (string-intersperse (map conc artifact-ids) ",") ")"))))
    ;; (dbi:with-transaction
    ;;  db
     (lambda ()
       (for-each
	(lambda (artifact)
	  (apply execute  db "INSERT INTO artifacts (id,group_id,uuid,parent_uuid,artifact_type,artifact)
                               VALUES (?,?,?,?,?,?)"
		 artifact))
	artifacts)))) ;; )

;; given a list of uuids and lists of uuids move all to
;; the sqlite3 db for the current archive period
;;
#;(define (archive-artifacts conn artifact-ids archive-dir)
  (let ((db (archive-open-db archive-dir)))
    (write-archive-artifacts conn db artifact-ids)
    (finalize! db))
  ;; (pg:with-transaction
  ;;  conn
  ;; (lambda ()
     (for-each
      (lambda (id)
	(get-one
	 conn
	 "DELETE FROM artifacts WHERE id=?" id))
      artifact-ids)) ;; ))

;; given a list of ids mark all as processed
;;
(define (mark-processed conn artifact-ids)
  ;; (pg:with-transaction
   ;; conn
   ;; (lambda ()
     (for-each
      (lambda (id)
	(get-one
	 conn
	 "UPDATE artifacts SET processed=1 WHERE id=?;" id))
      artifact-ids)) ;; x))

;; a generic artifact getter, gets from the artifacts db
;;
(define (get-artifacts conn ptypes)
  (let* ((ptypes-str    (if (null? ptypes)
			    ""
			    (conc " WHERE artifact_type IN ('" (string-intersperse ptypes ",") "') ")))
	 (qry-str       (conc "SELECT id,group_id,uuid,parent_uuid,artifact_type,artifact,processed FROM artifacts" ptypes-str)))
    (map vector->list (get-rows conn qry-str))))

;; make a report of the artifacts in the db
;; ptypes of '() gets all artifacts
;; display-fields
;;
(define (make-report dest conn artifactspec display-fields . ptypes)
  (let* (;; (conn          (dbi:db-conn (s:db)))
	 (all-rows      (get-artifacts conn ptypes))
	 (all-artifacts      (flatten-all
			 all-rows
			 artifactspec
			 'id 'group-id 'uuid 'parent 'artifact-type 'artifact 'processed))
	 (by-uuid       (let ((ht (make-hash-table)))
			  (for-each
			   (lambda (artifact)
			     (let ((uuid (alist-ref 'uuid artifact)))
			       (hash-table-set! ht uuid artifact)))
			   all-artifacts)
			  ht))
	 (by-parent     (let ((ht (make-hash-table)))
			  (for-each
			   (lambda (artifact)
			     (let ((parent (alist-ref 'parent artifact)))
			       (hash-table-set! ht parent (cons artifact (hash-table-ref/default ht parent '())))))
			   all-artifacts)
			    ht))
	 (oup           (if dest (open-output-file dest) (current-output-port))))
    
    (with-output-to-port
	oup
      (lambda ()
	(print "digraph megatest_state_status {
  // ranksep=0.05
  rankdir=LR;
  node [shape=\"box\"];
")
	;; first all the names
	(for-each
	 (lambda (artifact)
	   (let* ((uuid        (alist-ref 'uuid artifact))
		  (shortuuid   (substring uuid 0 4))
		  (type        (alist-ref 'artifact-type artifact))
		  (processed   (alist-ref 'processed artifact)))
	     
	     (print "\"" uuid "\" [label=\"" shortuuid ", ("
		    type ", "
		    (if processed "processed" "not processed") ")")
	     (for-each
	      (lambda (key-field)
		(let ((val (alist-ref key-field artifact)))
		  (if val
		      (print key-field "=" val))))
	      display-fields)
	     (print "\" ];")))
	 all-artifacts)
	;; now for parent-child relationships
	(for-each
	 (lambda (artifact)
	   (let ((uuid   (alist-ref 'uuid artifact))
		 (parent (alist-ref 'parent artifact)))
	     (if (not (equal? parent ""))
		 (print "\"" parent "\" -> \"" uuid"\";"))))
	 all-artifacts)

	(print "}")
	))
    (if dest
	(begin
	  (close-output-port oup)
	  (system "dot -Tpdf out.dot -o out.pdf")))
    
    ))

;;======================================================================
;; Read ref artifacts into a vector < laststr hash table > 
;;======================================================================



;;======================================================================
;; Read/write packets to files (convience functions)
;;======================================================================

;; write alist to a artifact file
;;
(define (write-alist->artifact targdir dat #!key (artifactspec '())(ptype #f))
  (let-values (((uuid artifact)(alist->artifact dat artifactspec ptype: ptype)))
    (with-output-to-file (conc targdir "/" uuid ".artifact")
      (lambda ()
	(print artifact)))
    uuid)) ;; return the uuid

;; read artifact into alist
;;
(define (read-artifact->alist artifact-file #!key (artifactspec #f))
  (artifact->alist (with-input-from-file
		  artifact-file
		read-string)
	      artifactspec: artifactspec))

;;======================================================================
;; File utils, stuff useful for file management
;;======================================================================

(define (file-get-sha1 fname)
  (let* ((sha1-res (run/strings (sha1sum ,fname))))
    (car (string-split (car sha1-res)))))

(define (link-or-copy srcf destf)
  (or (handle-exceptions
       exn
       #f
       (file-link srcf destf))
      (if (file-exists? destf)
	  (print "NOTE: destination already exists, skipping copy.")
	  (copy-file srcf destf))))

;; (define (files-diff file1 file2)
;;   (let* ((diff-res (with-input-from-port
;; 		       (run/port (diff "-q" ,file1 ,file2))
;; 		     (lambda ()
;; 		       (let* ((res (read-line)))
;; 			 (read-lines)
;; 			 res)))))
;;     (car (string-split sha1-res))))
;; 


(define (check-same file1 file2)
  (cond
   ((not (and (file-exists? file1)(file-exists? file2))) #f)
   ((not (equal? (file-size file1)(file-size file2))) #f)
   (else
    (let-values (((status run-ok process-id)
		  (run (diff "-q" ,file1 ,file2))))
      status))))

(define *pcache* (make-hash-table))
(define (get-device dir)
  (let ((indat (or (hash-table-ref/default *pcache* dir #f)
		   (let* ((inp (open-input-pipe (conc "df --output=source \""dir"\"")))
			  (res (read-lines inp)))
		     (close-input-port inp)
		     (hash-table-set! *pcache* dir res)
		     res))))
    (cadr indat)))

(define (same-partition? dir1 dir2)
  (equal? (get-device dir1)(get-device dir2)))

(define (link-if-same-partition file1 file2)
  (let* ((dir1 (pathname-directory file1))
	 (dir2 (pathname-directory file2))
	 (f1   (pathname-file file1))
	 (f2   (pathname-file file2)))
    (if (same-partition? dir1 dir2)
	(let* ((tmpname (conc "."f2"-"(current-seconds))))
	  ;; this steps needs to be executed as actual user    
	  (move-file file2 (conc dir1 "/" tmpname))
	  (file-link file1 file2)
	  (delete-file (conc dir1 "/" tmpname))))))

(define (uuid-first-two-letters sha1sum)
  (substring sha1sum 0 2))

(define (uuid-remaining-letters sha1sum)
  (let ((slen (string-length sha1sum)))
    (substring sha1sum 2 slen)))

(define (archive-dest destd sha1sum)
  (let* ((subdir         (uuid-first-two-letters sha1sum)) ;; (substring sha1sum 0 2))
	 ;; (slen           (string-length sha1sum))
	 (rem            sha1sum #;(uuid-remaining-letters sha1sum)) ;; (substring sha1sum 3 slen))
	 (full-dest-dir  (conc destd"/"subdir))
	 (full-dest-file (conc full-dest-dir"/"rem)))
    (if (not (directory-exists? full-dest-dir))
	(create-directory full-dest-dir #t))
    full-dest-file))

(define (write-to-archive data destd #!optional (nextnum #f))
  (let* ((sha1sum    (calc-sha1 data))
	 (full-dest  (conc (archive-dest destd sha1sum)
			   (if nextnum (conc "."nextnum) ""))))
    (if (file-exists? full-dest)
	(if (equal? (string-intersperse (with-input-from-file full-dest read-lines) "\n")
		    data)
	    (begin
	      ;; (print "INFO: data already exists in "full-dest" and is identical")
	      sha1sum)
	    (let ((nextnum (if nextnum (+ nextnum 1) 0)))
	      (print "WARN: data already exists in "full-dest" but is different! Trying again...")
	      (write-to-archive data destd nextnum)))
	(begin
	  (with-output-to-file
	      full-dest
	    (lambda ()
	      (print data)))
	  sha1sum)))) ;; BUG? Does print munge data?

;; copy srcf with sha1sum aabc... to aa/bc...
;;
(define (archive-copy srcf destd sha1sum)
  (let* ((full-dest-file (archive-dest destd sha1sum)))
    (let loop ((trynum 0))
      (let ((dest-name (if (> trynum 0)
			   (conc full-dest-file"-"trynum)
			   full-dest-file)))
	(cond
	 ((not (file-exists? srcf)) #f) ;; this should be an error?
	 ((and (file-exists? srcf)
	       (file-exists? dest-name))
	  (if (check-same srcf dest-name)
	      (link-if-same-partition dest-name srcf)
	      (loop (+ trynum 1)))) ;; collisions are rare, this protects against them
	 ((not (file-exists? dest-name))
	  (link-or-copy srcf dest-name))
	 (else #f))))))

;; multi-glob
(define (multi-glob globstrs inpath)
  ;; (print "multi-glob: "globstrs", "inpath)
  (if (equal? inpath "")
      globstrs
      (let* ((parts     (string-split inpath "/" #t))
	     (nextpart  (car parts))
	     (remaining (string-intersperse (cdr parts) "/")))
	(if (and (equal? nextpart "") ;; this must be a leading / meaning root directory
	         (null? globstrs))
	    (multi-glob '("/") remaining)
	    (begin
	      ;; (print "nextpart="nextpart", remaining="remaining)
	      (apply append
              (map (lambda (gstr)
                      (let* ((pathstr  (conc gstr"/"nextpart))
		   	     (pathstrs (glob pathstr)))
		          ;; (print "pathstr="pathstr)
		          (multi-glob pathstrs remaining)))
	               globstrs)))))))
	     

;; perm[/user:group]:
;;   DDD - octal perm (future expansion)
;;   -   - use umask/defacto perms (i.e. don't actively do anything)
;;   x   - mark as executable
;;
;; Cards:
;;   file:      f perm fname
;;   directory: d perm fname artifactid
;;   link:      l perm lname destpath
;;
;;   NOTE: cards are kept as (C . "value")
;;
;; given a directory path, ignore list and artifact store (hash-table):
;;  1. create sha1 tree at dest (e.g. aa/b3a7 ...)
;;  2. create artifact for each dir
;;    - cards for all files
;;    - cards for files that are symlinks or executables
;;  3. return (artifactid . artifact)
;;
;; NOTES:
;;   Use destdir of #f to not create sha1 tree
;;   Hard links will be used if srcdir and destdir appear to be same partion
;;
;; (alist->artifact adat aspec #!key (ptype #f))
;;
;;
;; (load "../../artifacts/artifacts.scm")(import big-chicken srfi-69 artifacts)(define dirdat (make-hash-table))
;; (capture-dir ".." ".." "/tmp/junk" '() dirdat)
;;
;; [procedure] (file-type FILE [LINK [ERROR]])
;; Returns the file-type for FILE, which should be a filename, a file-descriptor or a port object. If LINK is given and true, symbolic-links are not followed:
;; 
;;  regular-file
;;  directory
;;  fifo
;;  socket
;;  symbolic-link
;;  character-device
;;  block-device
;; Note that not all types are supported on every platform. If ERROR is given and false, then file-type returns #f if the file does not exist; otherwise, it signals an error.
;; 
;; 
(define (capture-dir curr-dir src-dir dest-dir ignore-list artifacts all-seen)
  (let* ((dir-dat (directory-fold
		   (lambda (fname res) ;; res is a list of artifact cards
		     (let* ((fullname   (conc curr-dir"/"fname)))
		       ;; (print "INFO: processing "fullname)
		       (if (hash-table-ref/default all-seen fullname #f) ;; something circular going on
			   (begin
			     (print "WARNING: possible circular link(s) "fullname)
			     res)
			   (let* ((ftype (file-type fullname #t #f)))
			     (hash-table-set! all-seen fullname ftype)
			     (cons
			      (case ftype ;; get the card
			       ((directory) ;; (directory? fullname)
				(let* ((new-curr-dir (conc curr-dir"/"fname))
				       (new-src-dir  (conc src-dir"/"fname)))
				  (let* ((dir-dat (capture-dir new-curr-dir new-src-dir
							       dest-dir ignore-list artifacts all-seen))
					 (a-id (car dir-dat))
					 (artf (cdr dir-dat)))
				    (hash-table-set! artifacts a-id artf)
				    (cons 'd (conc "- "a-id" "fname))))) ;; the card
			       ((symbolic-link) ;; (symbolic-link? fullname)
				(let ((ldest (read-symbolic-link fullname)))
				  (cons 'l (conc "- "fname"/"ldest)))) ;; delimit link name from dest with /
			       ((regular-file) ;; must be a file
				(let* ((start      (current-seconds))
				       (sha1sum    (file-get-sha1 fullname))
				       (perms      (if (file-executable? fullname) "x" "-")))
				  (let ((runtime (- (current-seconds) start)))
				    (if (> runtime 1)
					(print "INFO: file "fullname" took "runtime" seconds to calculate sha1.")))
				  (if dest-dir
				      (archive-copy fullname dest-dir sha1sum))
				  (cons 'f (conc perms " "sha1sum" "fname))))
			       (else
				(print "WARNING: file "fullname" of type "ftype" is NOT supported and will converted to empty file.")
				(let* ((sha1sum (write-to-archive "" dest-dir)))
				  (cons 'f (conc "- "sha1sum" "fname)))))
			      res)))))
		       '() src-dir #:dotfiles? #t))) ;; => (values srcdir_artifact sub_artifacts_list)
    ;; (print "dir-dat: " dir-dat)
    (let-values (((a-id artf)
		  (alist->artifact dir-dat '() ptype: 'd no-d: #t)))
      (hash-table-set! artifacts a-id artf)
      (cons a-id artf))))

;; maybe move this into artifacts?
;;
;; currently moves *.artifact into a bundle and moves the artifacts into attic
;; future: move artifacts under 1 meg in size into bundle up to 10 meg in size
;;
(define (artifact-rollup bundle-dir) ;; cfg storepath)
  ;; (let* ((bundle-dir (calc-bundle-dir cfg storepath)))
    (let* ((bundles   (glob (conc bundle-dir"/*.bundle")))
	   (artifacts (glob (conc bundle-dir"/*.artifact"))))
      (if (> (length artifacts) 30) ;; rollup only if > 30 artifacts
	  ;; if we have unbundled artifacts, bundle them
	  (let* ((ht     (read-artifacts-into-hash #f artifacts: artifacts))
		 (bundle (hash-of-artifacts->bundle ht)))
	    (write-bundle bundle bundle-dir)
	    (create-directory (conc bundle-dir"/attic") #t)
	    (for-each
	     (lambda (full-fname)
	       (let* ((fname   (pathname-strip-directory full-fname))
		      (newname (conc bundle-dir"/attic/"fname)))
		 (move-file full-fname newname #t)))
	     artifacts)
	    (conc "bundled "(length artifacts)))
	  "not enough artifacts to bundle")))

;; if destfile is a directory then calculate the sha1sum of the bundle and store it
;; by <sha1sum>.bundle
;;
;; incoming dat is pure text (bundle already sorted and appended:
;;
(define (write-bundle bdl-data destdir)
  (let* ((bdl-uuid  (calc-sha1 bdl-data)))
    (with-output-to-file
	(conc destdir"/"bdl-uuid".bundle")
      (lambda ()
	(print bdl-data)))))

;; minimal (and hopefully fast) artifact reader
;; TODO: Add check of shar sum.
;;
(define (minimal-artifact-read fname)
  (let* ((indat (with-input-from-file fname read-lines)))
    (if (null? indat)
	(values #f (conc "did not find an artifact in "fname))
	(let* ((zcard (last indat))
	       (cardk (substring zcard 0 1))
	       (cardv (substring zcard 2 (string-length zcard))))
	  (if (equal? cardk "Z")
	      (values cardv (string-intersperse indat "\n"))
	      (values #f (conc fname" is not a valid artifact")))))))

;; read artifacts from directory into hash
;; NOTE: support for max-count not implemented yet
;;
(define (read-artifacts-into-hash dir #!key (artifacts #f) (max-count #f)(ht #f))
  (let* ((artifacts (or artifacts
			(glob (conc dir"/*.artifact"))))
	 (ht        (or ht (make-hash-table))))
    (for-each
     (lambda (fname)
       (let-values (((uuid afct)
		     (minimal-artifact-read fname)))
	 (hash-table-set! ht uuid afct)))
     artifacts)
    ht))

;; ht is:
;;   uuid => artifact text
;; use write-bundle to put result into a bundle file
;;
(define (hash-of-artifacts->bundle ht)    
  (fold (lambda (k res)
	  (let* ((v (hash-table-ref ht k)))
	    (if res
		(conc res"\n"v)
		v)))
	#f
	(sort (hash-table-keys ht) string<=?)))

;; minimal artifact to alist
;;
(define (minimal-artifact->alist afact)
  (let* ((lines   (string-split afact "\n")))
    (map (lambda (a)
	   (let* ((key (string->symbol (substring a 0 1)))
		  (sl  (string-length a))
		  (val (if (> sl 2)
			   (substring a 2 sl)
			   "")))
	     (cons key val)))
	 lines)))

;; some accessors for common cards
(define (afact-get-D afact)
  (let ((dval (alist-ref 'D afact)))
    (if dval
	(string->number dval)
	#f)))

(define (afact-get-T afact) ;; get the artifact type as a symbol
  (let ((val (alist-ref 'T afact)))
    (if val
	(string->symbol val)
	val)))

(define (afact-get-Z afact)
  (alist-ref 'Z afact))

(define (afact-get afact key default)
  (or (alist-ref key afact)
      default))

(define (afact-get-number/default afact key default)
  (let ((val (alist-ref key afact)))
    (if val
	(or (string->number val) default) ;; seems wrong
	default)))

;; bundles are never big and reading into memory for processing is fine
;;
(define (read-bundle srcfile #!optional (mode 'uuid-raw))
  (let* ((indat (with-input-from-file srcfile read-lines)))
    (let loop ((tail indat)
	       (dat '())  ;; artifact being extracted
	       (res '())) ;; list of artifacts
      (if (null? tail)
	  (reverse res) ;; last dat should be empty list
	  (let* ((curr-line (car tail)))
	    (let-values (((ctype cdata)
			  (card->type/value curr-line)))
	      (let* ((is-z-card (eq? 'Z ctype))
		     (new-dat   (cons (case mode
					((uuid-raw) curr-line)
					(else       (cons ctype cdata)))
				      dat)))
		(if is-z-card
		    (loop (cdr tail) ;; done with this artifact
			  '()
			  (cons (case mode
				  ((uuid-raw) (cons cdata (string-intersperse (reverse new-dat) "\n")))
				  (else       (reverse new-dat)))
				res))
		    (loop (cdr tail)
			  new-dat
			  res)))))))))


;; find all .bundle and .artifacts files in bundle-dir
;; and inport them into sqlite handle adb
;; 
(define (refresh-artifacts-db adb bundle-dir)
  (let* ((bundles   (glob (conc bundle-dir"/*.bundle")))
	 (artifacts (glob (conc bundle-dir"/*.artifact")))
	 (uuids     (get-all-uuids adb 'hash)))
    (with-transaction
     adb
     (lambda ()
       (for-each
	(lambda (bundle-file)
	  ;; (print "Importing artifacts from "bundle-file)
	  (let* ((bdat (read-bundle bundle-file 'uuid-raw))
		 (count 0)
		 (inc   (lambda ()(set! count (+ count 1)))))
	    (for-each
	     (lambda (adat)
	       (match
		adat
		((zval . artifact)
		 (if (not (hash-table-exists? uuids zval))
		     (begin
		       ;; (print "INFO: importing new artifact "zval" from bundle "bundle-file)
		       (inc)
		       (execute adb "INSERT INTO artifacts (uuid, artifact) VALUES (?,?);"
				zval artifact)
		       (hash-table-set! uuids zval #t))))
		(else
		 (print "ERROR: Bad artifact data "adat))))
	     bdat)
	    (print "INFO: imported "count" artifacts from "bundle-file)))
	bundles)
       (for-each
	(lambda (artifact-file)
	  ;; (print "Importing artifact from "artifact-file)
	  (let-values (((uuid artifact) (minimal-artifact-read artifact-file)))
	    (if uuid
		(if (not (hash-table-exists? uuids uuid))
		    (begin
		      ;; (print "INFO: importing new artifact "uuid" from "artifact-file)
		      (execute adb "INSERT INTO artifacts (uuid, artifact) VALUES (?,?);"
			       uuid artifact)
		      (hash-table-set! uuids uuid #t)))
		(print "Bad artifact in "artifact-file))))
	artifacts)))))

;;======================================================================
;;  Artifacts db cache
;;======================================================================

;; artifacts
;;   id SERIAL PRIMARY KEY,
;;   uuid TEXT NOT NULL,
;;   artifact TEXT NOT NULL
;;
;; parents
;;   id INTEGER REFERENCES artids.id,  -- 
;;   parent_id  REFERENCES artids.id
;;
;; schema is list of SQL statements - can be used to extend db with more tables
;;
(define (open-artifacts-db dbpath dbfile #!key (schema '()))
  (let* ((dbfname  (conc dbpath "/" dbfile))
	 (dbexists (if (file-exists? dbfname) #t (begin (create-directory dbpath #t) #f)))
	 (adb      (open-database dbfname)))
    (set-busy-handler! adb (make-busy-timeout 10000))
    (execute adb "PRAGMA synchronous = 0;")
    (if (not dbexists)
	(with-transaction
	 adb
	 (lambda ()
	   (for-each
	    (lambda (stmt)
	      (execute adb stmt))
	    (append `("CREATE TABLE IF NOT EXISTS artifacts
                          (id           INTEGER PRIMARY KEY,
                           uuid         TEXT NOT NULL,
                           artifact     TEXT NOT NULL)"
		      
		      "CREATE TABLE IF NOT EXISTS parents
                          (id INTEGER REFERENCES artifacts(id) NOT NULL,
                           parent_id INTEGER REFERENCES artifacts(id) NOT NULL)")
		    schema)))))
    adb))

(define (generate-year-month-name #!optional (seconds #f))
  (let* ((curr-time (seconds->local-time (or seconds (current-seconds)))))
    (time->string curr-time "%Y%m")))

;; I don't like this function. TODO: remove the
;; mode and option to return ht. Use instead the
;; get-all-artifacts below
;;
(define (get-all-uuids adb #!optional (mode #f))
  (let* ((res (fold-row
	       (lambda (res uuid)
		 (cons uuid res))
	       '()
	       adb
	       "SELECT uuid FROM artifacts;")))
    (case mode
      ((hash)
       (let* ((ht (make-hash-table)))
	 (for-each
	  (lambda (uuid)
	    (hash-table-set! ht uuid #t))
	  res)
	 ht))
      (else res))))

;; returns raw artifacts (i.e. NOT alists but instead plain text)
(define (get-all-artifacts adb)
  (let* ((ht  (make-hash-table)))
    (for-each-row
     (lambda (id uuid artifact)
       (hash-table-set! ht uuid `(,id ,uuid ,artifact)))
     adb
     "SELECT id,uuid,artifact FROM artifacts;")
    ht))

;; given a bundle-dir copy or create to /tmp and open
;; the YYMM.db file and hand the handle to the given proc
;; NOTE: we operate in /tmp/ to accomodate users on NFS
;; where slamming Unix locks at an NFS filer can cause
;; locking fails. Eventually this /tmp behavior will be
;; configurable.
;;
(define (with-todays-adb bundle-dir proc)
  (let* ((dbname   (conc (generate-year-month-name) ".db"))
	 (destname (conc bundle-dir"/"dbname))
	 (tmparea  (conc "/tmp/"(current-user-name)"-"(calc-sha1 bundle-dir)))
	 (tmpname  (conc tmparea"/"dbname))
	 (lockfile (conc destname".update-in-progress")))
    ;; (print "with-todays-adb, bundle-dir: "bundle-dir", dbname: "dbname", destname: "destname",\n    tmparea: " tmparea", lockfile: "lockfile)
    (if (not (file-exists? tmparea))(create-directory tmparea #t))
    (let loop ((count 0))
      (if (file-exists? lockfile)
	  (if (< count 30) ;; aproximately 30 seconds
	      (begin
		(sleep 1)
		(loop (+ 1 count)))
	      (print "ERROR: "lockfile" exists, proceeding anyway"))
	  (if (file-exists? destname)
	      (begin
		(copy-file destname tmpname #t)
		(copy-file destname lockfile #t)))))
    (let* ((adb  (open-artifacts-db tmparea dbname))
	   (res  (proc adb)))
      (finalize! adb)
      (copy-file tmpname destname #t)
      (delete-file* lockfile)
      res)))

) ;; module artifacts

;; ATTIC

Added artifacts/artifacts.setup version [bf666feb42].












1
2
3
4
5
6
7
8
9
10
11
+
+
+
+
+
+
+
+
+
+
+
;; Copyright 2007-2017, Matthew Welland.
;;
;;  This program is made available under the GNU GPL version 2.0 or
;;  greater. See the accompanying file COPYING for details.
;;
;;  This program is distributed WITHOUT ANY WARRANTY; without even the
;;  implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
;;  PURPOSE.

;;;; pkts.setup
(standard-extension 'pkts "1.0")

Added artifacts/artifactsrec.scm version [28997466b3].





































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
(define-syntax define-record-type
  (syntax-rules ()
    ((define-record-type type
       (constructor constructor-tag ...)
       predicate
       (field-tag accessor . more) ...)
     (begin
       (define type
         (make-record-type 'type '(field-tag ...)))
       (define constructor
         (record-constructor type '(constructor-tag ...)))
       (define predicate
         (record-predicate type))
       (define-record-field type field-tag accessor . more)
       ...))))

; An auxilliary macro for define field accessors and modifiers.
; This is needed only because modifiers are optional.

(define-syntax define-record-field
  (syntax-rules ()
    ((define-record-field type field-tag accessor)
     (define accessor (record-accessor type 'field-tag)))
    ((define-record-field type field-tag accessor modifier)
     (begin
       (define accessor (record-accessor type 'field-tag))
       (define modifier (record-modifier type 'field-tag))))))

; Record types

; We define the following procedures:
; 
; (make-record-type <type-name <field-names>)    -> <record-type>
; (record-constructor <record-type<field-names>) -> <constructor>
; (record-predicate <record-type>)               -> <predicate>
; (record-accessor <record-type <field-name>)    -> <accessor>
; (record-modifier <record-type <field-name>)    -> <modifier>
;   where
; (<constructor> <initial-value> ...)         -> <record>
; (<predicate> <value>)                       -> <boolean>
; (<accessor> <record>)                       -> <value>
; (<modifier> <record> <value>)         -> <unspecific>

; Record types are implemented using vector-like records.  The first
; slot of each record contains the record's type, which is itself a
; record.

(define (record-type record)
  (record-ref record 0))

;----------------
; Record types are themselves records, so we first define the type for
; them.  Except for problems with circularities, this could be defined as:
;  (define-record-type :record-type
;    (make-record-type name field-tags)
;    record-type?
;    (name record-type-name)
;    (field-tags record-type-field-tags))
; As it is, we need to define everything by hand.

(define :record-type (make-record 3))
(record-set! :record-type 0 :record-type)	; Its type is itself.
(record-set! :record-type 1 ':record-type)
(record-set! :record-type 2 '(name field-tags))

; Now that :record-type exists we can define a procedure for making more
; record types.

(define (make-record-type name field-tags)
  (let ((new (make-record 3)))
    (record-set! new 0 :record-type)
    (record-set! new 1 name)
    (record-set! new 2 field-tags)
    new))

; Accessors for record types.

(define (record-type-name record-type)
  (record-ref record-type 1))

(define (record-type-field-tags record-type)
  (record-ref record-type 2))

;----------------
; A utility for getting the offset of a field within a record.

(define (field-index type tag)
  (let loop ((i 1) (tags (record-type-field-tags type)))
    (cond ((null? tags)
           (error "record type has no such field" type tag))
          ((eq? tag (car tags))
           i)
          (else
           (loop (+ i 1) (cdr tags))))))

;----------------
; Now we are ready to define RECORD-CONSTRUCTOR and the rest of the
; procedures used by the macro expansion of DEFINE-RECORD-TYPE.

(define (record-constructor type tags)
  (let ((size (length (record-type-field-tags type)))
        (arg-count (length tags))
        (indexes (map (lambda (tag)
                        (field-index type tag))
                      tags)))
    (lambda args
      (if (= (length args)
             arg-count)
          (let ((new (make-record (+ size 1))))
            (record-set! new 0 type)
            (for-each (lambda (arg i)
			(record-set! new i arg))
                      args
                      indexes)
            new)
          (error "wrong number of arguments to constructor" type args)))))

(define (record-predicate type)
  (lambda (thing)
    (and (record? thing)
         (eq? (record-type thing)
              type))))

(define (record-accessor type tag)
  (let ((index (field-index type tag)))
    (lambda (thing)
      (if (and (record? thing)
               (eq? (record-type thing)
                    type))
          (record-ref thing index)
          (error "accessor applied to bad value" type tag thing)))))

(define (record-modifier type tag)
  (let ((index (field-index type tag)))
    (lambda (thing value)
      (if (and (record? thing)
               (eq? (record-type thing)
                    type))
          (record-set! thing index value)
          (error "modifier applied to bad value" type tag thing)))))

Records

; This implements a record abstraction that is identical to vectors,
; except that they are not vectors (VECTOR? returns false when given a
; record and RECORD? returns false when given a vector).  The following
; procedures are provided:
;   (record? <value>)                -> <boolean>
;   (make-record <size>)             -> <record>
;   (record-ref <record> <index>)    -> <value>
;   (record-set! <record> <index> <value>) -> <unspecific>
;
; These can implemented in R5RS Scheme as vectors with a distinguishing
; value at index zero, providing VECTOR? is redefined to be a procedure
; that returns false if its argument contains the distinguishing record
; value.  EVAL is also redefined to use the new value of VECTOR?.

; Define the marker and redefine VECTOR? and EVAL.

(define record-marker (list 'record-marker))

(define real-vector? vector?)

(define (vector? x)
  (and (real-vector? x)
       (or (= 0 (vector-length x))
	   (not (eq? (vector-ref x 0)
		record-marker)))))

; This won't work if ENV is the interaction environment and someone has
; redefined LAMBDA there.

(define eval
  (let ((real-eval eval))
    (lambda (exp env)
      ((real-eval `(lambda (vector?) ,exp))
       vector?))))

; Definitions of the record procedures.

(define (record? x)
  (and (real-vector? x)
       (< 0 (vector-length x))
       (eq? (vector-ref x 0)
            record-marker)))

(define (make-record size)
  (let ((new (make-vector (+ size 1))))
    (vector-set! new 0 record-marker)
    new))

(define (record-ref record index)
  (vector-ref record (+ index 1)))

(define (record-set! record index value)
  (vector-set! record (+ index 1) value))

Added artifacts/tests/run.scm version [957c7c2ae2].












































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
(use test)

;; (use (prefix pkts pkts:))
(use pkts (prefix dbi dbi:))
;; (use trace)(trace sdat->alist pkt->alist)

(if (file-exists? "queue.db")(delete-file "queue.db"))

(test-begin "pkts and pkt archives")

;;======================================================================
;; Basic pkt creation, parsing and conversion routines
;;======================================================================

(test-begin "basic packets")
(test #f '(A "This is a packet") (let-values (((t v)
					       (card->type/value "A This is a packet")))
				   (list t v)))
(test #f "A A\nZ 664449e7299e0065a3e25c138ccef2df13ba291e"
      (let-values (((uuid res)
		    (add-z-card '("A A"))))
	res))
(test #f '("CC C++" "D 1486332719.0" "a A" "b C")(sort (construct-sdat 'b "C" 'a "A" 'CC "C++" 'D 1486332719.0)
						       string<=?))
(define pkt-example #f)
(test #f "CC C++\nD 1486332719.0\na A\nb C\nZ 263eb3b6193de7fe65b1ded5bcda513e8b4d6b84"
      (let-values (((uuid res)
		    (construct-pkt 'b "C" 'a "A" 'CC "C++" 'D 1486332719.0)))
	(set! pkt-example (cons uuid res))
	res))
(test-end "basic packets")

;;======================================================================
;; Sqlite and postgresql based queue of pkts
;;======================================================================

(test-begin "pkt queue")
(define db #f)
(test #f 'sqlite3 (let ((dbh (open-queue-db "." "queue.db")))
		    (set! db dbh)
		    (dbi:db-dbtype dbh)))
(test #f (cdr pkt-example)
      (begin
	(add-to-queue db (cdr pkt-example)(car pkt-example) 'basic #f 0)
	(lookup-by-uuid db (car pkt-example) 0)))
(test #f (cdr pkt-example)
      (lookup-by-id db 1))
(test #f 1 (length (find-pkts db '(basic) '())))

(test-end "pkt queue")


;;======================================================================
;; Process groups of pkts
;;======================================================================

(test-begin "lists of packets")
(test #f '((apkt . #f) (id . 1) (group-id . 2) (uuid . 3) (parent . 4) (pkt-type . 5)) ;; ((id . 1) (group-id . 2) (uuid . 3) (parent . 4) (pkt-type . 5))
      (dblst->dpkts '(1 2 3 4 5)))
(test #f '(((apkt (Z . "263eb3b6193de7fe65b1ded5bcda513e8b4d6b84") (b . "C") (a . "A") (D . "1486332719.0") (C . " C++")) (id . 1) (group-id . 0) (uuid . "263eb3b6193de7fe65b1ded5bcda513e8b4d6b84") (parent . "") (pkt-type . "basic") (pkt . "CC C++\nD 1486332719.0\na A\nb C\nZ 263eb3b6193de7fe65b1ded5bcda513e8b4d6b84") (processed . 0)))
      ;; '(((apkt (Z . "263eb3b6193de7fe65b1ded5bcda513e8b4d6b84") (b . "C") (a . "A") (D . "1486332719.0") (C . " C++")) (id . 1) (group-id . 0) (uuid . "263eb3b6193de7fe65b1ded5bcda513e8b4d6b84") (parent . "") (pkt-type . "basic") (pkt . "CC C++\nD 1486332719.0\na A\nb C\nZ 263eb3b6193de7fe65b1ded5bcda513e8b4d6b84")))
      ;; '(((id . 1) (group-id . 0) (uuid . "263eb3b6193de7fe65b1ded5bcda513e8b4d6b84") (parent . "") (pkt-type . "basic") (pkt . "CC C++\nD 1486332719.0\na A\nb C\nZ 263eb3b6193de7fe65b1ded5bcda513e8b4d6b84")))
      (get-dpkts db '(basic) 0 #f))
(test #f '(((apkt (Z . "263eb3b6193de7fe65b1ded5bcda513e8b4d6b84") (b . "C") (a . "A") (D . "1486332719.0") (C . " C++")) (id . 1) (group-id . 0) (uuid . "263eb3b6193de7fe65b1ded5bcda513e8b4d6b84") (parent . "") (pkt-type . "basic") (pkt . "CC C++\nD 1486332719.0\na A\nb C\nZ 263eb3b6193de7fe65b1ded5bcda513e8b4d6b84") (processed . 0)))
      ;; '(((apkt (Z . "263eb3b6193de7fe65b1ded5bcda513e8b4d6b84") (b . "C") (a . "A") (D . "1486332719.0") (C . " C++")) (id . 1) (group-id . 0) (uuid . "263eb3b6193de7fe65b1ded5bcda513e8b4d6b84") (parent . "") (pkt-type . "basic") (pkt . "CC C++\nD 1486332719.0\na A\nb C\nZ 263eb3b6193de7fe65b1ded5bcda513e8b4d6b84")))
      ;; '(((id . 1) (group-id . 0) (uuid . "263eb3b6193de7fe65b1ded5bcda513e8b4d6b84") (parent . "") (pkt-type . "basic") (pkt . "CC C++\nD 1486332719.0\na A\nb C\nZ 263eb3b6193de7fe65b1ded5bcda513e8b4d6b84")))
      (get-not-processed-pkts db 0 'basic 1000 0))
(test-end "lists of packets")

(test-begin "pkts as alists")
(define pktspec '((posting . ((title . t)   ;; NOTE: same as ((posting (title . t)(url . u)(blub . b)) ... 
			      (url   . u)
			      (blurb . b)))
		  (comment . ((comment . c)
			      (score   . s)))
		  (basic   . ((b-field . b)
			      (a-field . a)))))
(define pktlst (find-pkts db '(basic) '()))
(define dpkt (car pktlst))
(test #f "A" (get-value 'a-field dpkt pktspec))

(test #f "C" (alist-ref 'b-field (dpkt->alist dpkt pktspec)))

(define basic-spec '((nada (foo . b)(bar . f))(basic (foo . f)(bar . b))))
(define test-pkt   '((foo . "fooval")(bar . "barval")))
(let*-values (((u p)  (alist->pkt test-pkt basic-spec ptype: 'basic))
		((apkt) (pkt->alist p))
		((bpkt) (pkt->alist p pktspec: basic-spec)))
    (test #f "fooval" (alist-ref 'f apkt))
    (test #f "fooval" (alist-ref 'foo bpkt))
    (test #f #f       (alist-ref 'f   bpkt)))

(test-end "pkts as alists")

(test-begin "descendents and ancestors")

(define (get-uuid pkt)(alist-ref 'uuid pkt))

;; add a child to 263e
(let-values (((uuid pkt)
	      (construct-pkt 'x "X" 'y "Y" 'P "263eb3b6193de7fe65b1ded5bcda513e8b4d6b84"
			     'D "1486332719.0")))
  (add-to-queue db pkt uuid 'basic "263eb3b6193de7fe65b1ded5bcda513e8b4d6b84" 0))

(test #f '("263eb3b6193de7fe65b1ded5bcda513e8b4d6b84" "818fe30988c9673441b8f203972a8bda6af682f8")
      (map (lambda (x)(alist-ref 'uuid x))
	   (get-descendents
	    db 0
	    "263eb3b6193de7fe65b1ded5bcda513e8b4d6b84")))

(test #f '("263eb3b6193de7fe65b1ded5bcda513e8b4d6b84" "818fe30988c9673441b8f203972a8bda6af682f8")
      (map (lambda (x)(alist-ref 'uuid x))
	   (get-ancestors
	    db 0
	    "818fe30988c9673441b8f203972a8bda6af682f8")))

(test-end "descendents and ancestors")

(test-end "pkts and pkt archives")

(test-begin "pktsdb")

(define spec '((tests (testname n TEXT)
		      (testpath p TEXT)
		      (duration d INTEGER))))
;; (define pktsdb (make-pktdb))
;; (pktdb-pktsdb-spec-set! pktsdb spec)

(define pktsdb #f)

(test #f #t (dbi:database? (let ((pdb (pktsdb-open "test.db" spec)))
			     (set! pktsdb pdb)
			     (pktdb-conn pdb))))
;; (pp (pktdb-pktspec pktsdb))
(test #f #t (pktsdb-add-record pktsdb 'tests '((testname . "test1"))))

(pktsdb-close pktsdb)

(test-end "pktsdb")

Modified client.scm from [2a6738b25e] to [cc83111095].

25
26
27
28
29
30
31









32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64










65

66
67

68
69
70
71

72
73
74
75
76
77
78
79
80
81
82
83


84


85

86
87
88
89
90
91
92
93







94
95
96
97



98
99
100


101
102

103
104
105


106
107
108
109



110
111

112
113

114
115
116
117
118

119
120
121
122
123
124
125

126
127
128






















25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57



58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80

81
82

83
84
85
86

87
88
89
90
91
92
93
94
95
96
97


98
99
100
101
102

103
104
105
106





107
108
109
110
111
112
113
114
115


116
117
118
119


120
121


122



123
124




125
126
127
128

129


130



131

132
133
134
135
136
137
138

139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164







+
+
+
+
+
+
+
+
+

















-
-
-













+
+
+
+
+
+
+
+
+
+
-
+

-
+



-
+










-
-
+
+

+
+
-
+



-
-
-
-
-
+
+
+
+
+
+
+


-
-
+
+
+

-
-
+
+
-
-
+
-
-
-
+
+
-
-
-
-
+
+
+

-
+
-
-
+
-
-
-

-
+






-
+



+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
     spiffy-request-vars uri-common intarweb directory-utils)

(declare (unit client))

(declare (uses common))
(declare (uses db))
(declare (uses tasks)) ;; tasks are where stuff is maintained about what is running.
(declare (uses commonmod))
(import commonmod)

(module client
*

)

(import client)

(include "common_records.scm")
(include "db_records.scm")

;; client:get-signature
(define (client:get-signature)
  (if *my-client-signature* *my-client-signature*
      (let ((sig (conc (get-host-name) " " (current-process-id))))
	(set! *my-client-signature* sig)
	*my-client-signature*)))

;; Not currently used! But, I think it *should* be used!!!
#;(define (client:logout serverdat)
  (let ((ok (and (socket? serverdat)
		 (cdb:logout serverdat *toppath* (client:get-signature)))))
    ok))

(define (client:setup areapath #!key (remaining-tries 100) (failed-connects 0))
  (client:setup-http areapath remaining-tries: remaining-tries failed-connects: failed-connects))

;; Do all the connection work, look up the transport type and set up the
;; connection if required.
;;
;; There are two scenarios. 
;;   1. We are a test manager and we received *transport-type* and *runremote* via cmdline
;;   2. We are a run tests, list runs or other interactive process and we must figure out
;;      *transport-type* and *runremote* from the monitor.db
;;
;; client:setup
;;
;; lookup_server, need to remove *runremote* stuff
;;
 
;;(define (http-transport:server-dat-make-url runremote)
(define (client:get-url runremote)
  (if (and (remote-iface runremote)
	   (remote-port  runremote))
      (conc "http://" 
	    (remote-iface runremote)
	    ":"
	    (remote-port  runremote))
      #f))

(define (client:setup-http areapath #!key (remaining-tries 100) (failed-connects 0)(area-dat #f))
(define (client:setup areapath runremote #!key (remaining-tries 100) (failed-connects 0)(area-dat #f))
  (mutex-lock! *rmt-mutex*)
  (let ((res (client:setup-http-baby areapath remaining-tries: remaining-tries failed-connects: failed-connects area-dat: area-dat)))
  (let ((res (client:setup-http areapath runremote remaining-tries: remaining-tries failed-connects: failed-connects area-dat: area-dat)))
    (mutex-unlock! *rmt-mutex*)
    res))

(define (client:setup-http-baby areapath #!key (remaining-tries 100) (failed-connects 0)(area-dat #f))
(define (client:setup-http areapath runremote #!key (remaining-tries 100) (failed-connects 0))
  (debug:print-info 2 *default-log-port* "client:setup remaining-tries=" remaining-tries)
  (server:start-and-wait areapath)
  (if (<= remaining-tries 0)
      (begin
	(debug:print-error 0 *default-log-port* "failed to start or connect to server")
	(exit 1))
      ;;
      ;; Alternatively here, we can get the list of candidate servers and work our way
      ;; through them searching for a good one.
      ;;
      (let* ((server-dat (server:choose-server areapath 'best))
	     (runremote  (or area-dat *runremote*)))
      (let* ((server-dat (server:choose-server areapath 'best))) ;; list host port start-time server-id pid
;;	     (runremote  (or area-dat *runremote*)))
	(if (not server-dat) ;; no server found
	    (begin
	      (if (< remaining-tries 99)(thread-sleep! 1)) ;; obviously it needs time
	    (client:setup-http-baby areapath remaining-tries: (- remaining-tries 1))
	      (client:setup-http areapath runremote remaining-tries: (- remaining-tries 1)))
	    (match server-dat
	      ((host port start-time server-id pid)
	       (debug:print-info 4 *default-log-port* "client:setup server-dat=" server-dat ", remaining-tries=" remaining-tries)
	       (if (and (not area-dat)
			(not *runremote*))
                   (begin       
		     (set! *runremote* (make-remote))
                     (let* ((server-info (remote-server-info *runremote*))) 
	       (if (not runremote)
                   (begin
		     ;; Here we are creating a runremote where there was none or it was clobbered with #f
		     ;;
		     (set! runremote (make-and-init-remote))
                     (let* ((server-info (server:check-if-running areapath)))
		       (remote-server-info-set! runremote server-info)
                       (if server-info
                           (begin
                             (remote-server-url-set! *runremote* (server:record->url server-info))
                             (remote-server-id-set! *runremote* (server:record->id server-info)))))))
                             (remote-server-url-set! runremote (server:record->url server-info))
                             (remote-server-id-set! runremote (server:record->id server-info)))))))
	       ;; at this point we have a runremote
	       (if (and host port server-id)
		   (let* ((start-res (http-transport:client-connect host port server-id))
			  (ping-res  (rmt:login-no-auto-client-setup start-res)))
		   (let* ((nada     (client:connect host port server-id runremote))
			  (ping-res (rmt:login-no-auto-client-setup runremote)))
		     (if (and start-res
			      ping-res)
		     (if ping-res
			 (let ((runremote (or area-dat *runremote*))) ;; it might have been generated only a few statements ago
			   (if runremote
			       (begin
			 (if runremote
			     (begin
				 (remote-conndat-set! runremote start-res) ;; (hash-table-set! runremote run-id start-res)
				 (debug:print-info 2 *default-log-port* "connected to " (http-transport:server-dat-make-url start-res))
				 start-res)
			       (client:setup-http-baby areapath remaining-tries: (- remaining-tries 1))))
			       (debug:print-info 2 *default-log-port* "connected to " (client:get-url runremote))
			       runremote)
			     (client:setup-http areapath runremote remaining-tries: (- remaining-tries 1)))
			 (begin    ;; login failed but have a server record, clean out the record and try again
			   (debug:print-info 0 *default-log-port* "client:setup, login unsuccessful, will attempt to start server ... start-res=" start-res ", server-dat=" server-dat) ;; had runid.  Fixes part of Randy;s ticket 1405717332
			   (debug:print-info 0 *default-log-port* "client:setup, login unsuccessful, will attempt to start server ... ping-res=" ping-res ", server-dat=" server-dat) ;; had runid.  Fixes part of Randy;s ticket 1405717332
			   (case *transport-type* 
			     ((http)(http-transport:close-connections)))
			   (http-transport:close-connections runremote)
                           (if *runremote* 
			       (remote-conndat-set! runremote #f)  ;; (hash-table-delete! runremote run-id)
                               )
			   (thread-sleep! 1)
			   (client:setup-http-baby areapath remaining-tries: (- remaining-tries 1))
			   (client:setup-http areapath runremote remaining-tries: (- remaining-tries 1))
			   )))
		   (begin    ;; no server registered
		     ;; (server:kind-run areapath)
		     (server:start-and-wait areapath)
		     (debug:print-info 0 *default-log-port* "client:setup, no server registered, remaining-tries=" remaining-tries)
		     (thread-sleep! 1) ;; (+ 5 (random (- 20 remaining-tries))))  ;; give server a little time to start up, randomize a little to avoid start storms.
		     (client:setup-http-baby areapath remaining-tries: (- remaining-tries 1)))))
		     (client:setup-http areapath runremote remaining-tries: (- remaining-tries 1)))))
	      (else
	       (debug:print 0 *default-log-port* "ERROR: malformed server-dat="server-dat)))))))

;;
;; connect - stored in remote-condat
;;
;; (define (http-transport:client-connect iface port server-id runremote)
(define (client:connect iface port server-id runremote-in)
  (let* ((runremote (or runremote-in
			(make-runremote))))
    (debug:print-info 2 *default-log-port* "Connecting to server at "iface":"port", id "server-id)
    (let* ((api-url      (conc "http://" iface ":" port "/api"))
	   (api-uri      (uri-reference (conc "http://" iface ":" port "/api")))
	   (api-req      (make-request method: 'POST uri: api-uri)))
      ;;	 (server-dat   (vector iface port api-uri api-url api-req (current-seconds) server-id)))
      (remote-iface-set!   runremote iface)
      (remote-port-set!    runremote port)
      (remote-server-id-set! runremote server-id)
      (remote-connect-time-set! runremote (current-seconds))
      (remote-last-access-set! runremote (current-seconds))
      (remote-api-url-set! runremote api-url)
      (remote-api-uri-set! runremote api-uri)
      (remote-api-req-set! runremote api-req)
      runremote)))

Modified common.scm from [5559976353] to [1402f87c8b].

22
23
24
25
26
27
28


29
30
31
32
33
34
35
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37







+
+







     format dot-locking csv-xml z3 udp ;; sql-de-lite
     hostinfo md5 message-digest typed-records directory-utils stack
     matchable regex posix (srfi 18) extras ;; tcp 
     (prefix nanomsg nmsg:)
     (prefix sqlite3 sqlite3:)
     pkts (prefix dbi dbi:)
     )
(use posix-extras pathname-expand files)


(declare (unit common))
(declare (uses commonmod))
(import commonmod)

(include "common_records.scm")

208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
210
211
212
213
214
215
216


217
218
219
220
221
222
223







-
-







(define *launch-setup-mutex* (make-mutex))     ;; need to be able to call launch:setup often so mutex it and re-call the real deal only if *toppath* not set
(define *homehost-mutex*     (make-mutex))

;; Miscellaneous
(define *triggers-mutex*     (make-mutex))     ;; block overlapping processing of triggers
(define *numcpus-cache* (make-hash-table))

(use posix-extras pathname-expand files)

;; this plugs a hole in posix-extras in recent chicken versions > 4.9)
(let-values (( (chicken-release-number chicken-major-version)
               (apply values
                      (map string->number
                           (take
                            (string-split (chicken-version) ".")
                            2)))))
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
247
248
249
250
251
252
253























254
255
256
257
258
259
260







-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-







(define *common:this-exe-name*     (pathname-strip-directory *common:this-exe-fullpath*))

(define (common:get-sync-lock-filepath)
  (let* ((tmp-area     (common:get-db-tmp-area))
         (lockfile     (conc tmp-area "/megatest.db.sync-lock")))
    lockfile))

;;======================================================================
;; when called from a wrapper I need sometimes to find the calling
;; wrapper, this is for dashboard to find the correct megatest.
;;
(define (common:find-local-megatest #!optional (progname "megatest"))
  (let ((res (filter file-exists?
		     (map (lambda (updir)
			    (let* ((lm  (car (argv)))
				   (dir (pathname-directory lm))
				   (exe (pathname-strip-directory lm)))
			      (conc (if dir (conc dir "/") "")
				    (case (string->symbol exe)
				      ((dboard)    (conc updir progname))
				      ((mtest)     (conc updir progname))
				      ((dashboard) progname)
				      (else exe)))))
			  '("../../" "../")))))
    (if (null? res)
	(begin
	  (debug:print 0 *default-log-port* "Failed to find this executable! Using what can be found on the path")
	  progname)
	(car res))))

(define *common:logpro-exit-code->status-sym-alist*
  '( ( 0 . pass )
     ( 1 . fail )
     ( 2 . warn )
     ( 3 . check )
     ( 4 . waived )
     ( 5 . abort )
313
314
315
316
317
318
319

320





321
322
323
324
325
326
327

328
329
330



331
332
333
334
335








336
337
338
339
340
341
342
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309

310
311


312
313
314

315
316
317

318
319
320
321
322
323
324
325
326
327
328
329
330
331
332







+

+
+
+
+
+






-
+

-
-
+
+
+
-



-
+
+
+
+
+
+
+
+







    ((abort) "ABORT")
    ((skip) "SKIP")
    (else "FAIL")))

(define (common:logpro-exit-code->test-status exit-code)
  (status-sym->string (common:logpro-exit-code->status-sym exit-code)))

;; 
(defstruct remote

  ;; transport to be used
  ;; http              - use http-transport
  ;; http-read-cached  - use http-transport for writes but in-mem cached for reads
  (rmode            'http)
  (hh-dat            (let ((res (or (server:choose-server *toppath* 'homehost)
				    (cons #f #f))))
		       (assert (pair? res)(conc "FATAL: hh-dat should be a pair, got "res))
		       res))
  (server-url        #f) ;; (server:check-if-running *toppath*) #f))
  (server-id         #f)
  (server-info       (if *toppath* (server:check-if-running *toppath*) #f))
  (server-info       #f) ;; (if *toppath* (server:check-if-running *toppath*) #f))
  (last-server-check 0)  ;; last time we checked to see if the server was alive
  (connect-time      (current-seconds))
  (conndat           #f)
  (connect-time      (current-seconds)) ;; when we first connected
  (last-access       (current-seconds)) ;; last time we talked to server
  ;; (conndat           #f) ;; iface port api-uri api-url api-req seconds server-id
  (transport         *transport-type*)
  (server-timeout    (server:expiration-timeout))
  (force-server      #f)
  (ro-mode           #f)  
  (ro-mode-checked   #f)) ;; flag that indicates we have checked for ro-mode
  (ro-mode-checked   #f) ;; flag that indicates we have checked for ro-mode

  ;; conndat stuff
  (iface             #f) ;; TODO: Consolidate this data with server-url and server-info above
  (port              #f)
  (api-url           #f)
  (api-uri           #f)
  (api-req           #f))

;; launching and hosts
(defstruct host
  (reachable    #f)
  (last-update  0)
  (last-used    0)
  (last-cpuload 1))
406
407
408
409
410
411
412



413

414
415







416
417
418
419
420
421


422
423
424
425
426
427
428
429


















430
431
432
433
434
435
436
396
397
398
399
400
401
402
403
404
405
406
407


408
409
410
411
412
413
414
415
416
417
418
419
420
421
422








423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447







+
+
+

+
-
-
+
+
+
+
+
+
+






+
+
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+







(define (common:version-db-delta)
  (- megatest-version (common:get-last-run-version-number)))

(define (common:version-changed?)
  (not (equal? (common:get-last-run-version)
               (common:version-signature))))


;; From 1.70 to 1.80, db's are compatible.

(define (common:api-changed?)
  (let* (
  (not (equal? (substring (->string megatest-version) 0 4)
               (substring (conc (common:get-last-run-version)) 0 4))))
    (megatest-major-version (substring (->string megatest-version) 0 4))
    (run-major-version (substring (conc (common:get-last-run-version)) 0 4))
   )
   (and (not (equal? megatest-major-version "1.80"))
     (not (equal? megatest-major-version megatest-run-version)))
  )
)

;;======================================================================
;; Move me elsewhere ...
;; RADT => Why do we meed the version check here, this is called only if version misma
;;
(define (common:cleanup-db dbstruct #!key (full #f))
  (case (rmt:transport-mode)
    ((http)
  (apply db:multi-db-sync 
   dbstruct
   'schema
   'killservers
   'adj-target
   'new2old
   '(dejunk)
  )
     (apply db:multi-db-sync 
	    dbstruct
	    'schema
	    'killservers
	    'adj-target
	    'new2old
	    '(dejunk)
	    ))
    ((tcp nfs)
     (debug:print 0 *default-log-port* "WARNING: cleanup-db NOT implemented yet for tcp and nfs.")
     #;(apply db:multi-db-sync 
	    dbstruct
	    'schema
	    'killservers
	    'adj-target
	    'new2old
	    '(dejunk)
	    )))
  (if (common:api-changed?)
      (common:set-last-run-version)))

(define (common:snapshot-file filepath #!key (subdir  ".") )
  (if (file-exists? filepath)
      (let* ((age-sec  (lambda (file)
                         (if (file-exists? file)
518
519
520
521
522
523
524
525

526
527
528
529
530
531
532
529
530
531
532
533
534
535

536
537
538
539
540
541
542
543







-
+







;;          logs directory you wish to log-rotate.
;;
(define (common:rotate-logs)
  (let* ((all-files (make-hash-table))
	 (stats     (make-hash-table))
	 (inc-stat  (lambda (key)
		      (hash-table-set! stats key (+ (hash-table-ref/default stats key 0) 1))))
	(max-allowed (string->number (or (configf:lookup *configdat* "setup" "max-logfiles") "300")))) ;; name -> age
	(max-allowed (string->number (or (configf:lookup *configdat* "setup" "max-logfiles") "600")))) ;; name -> age
    (if (not (directory-exists? "logs"))(create-directory "logs"))
    (directory-fold 
     (lambda (file rem)
       (handle-exceptions
	exn
	(begin
	  (debug:print-info 2 *default-log-port* "unable to rotate log " file ", probably handled by another process, this is safe to ignore. exn=" exn)
597
598
599
600
601
602
603

604

605
606
607

608
609
610
611
612
613
614
608
609
610
611
612
613
614
615

616
617
618

619
620
621
622
623
624
625
626







+
-
+


-
+







	    (debug:print-info 0 *default-log-port* "Deleted " (length files) " files from logs, keeping " max-allowed " files."))))))

;;======================================================================
;; Force a megatest cleanup-db if version is changed and skip-version-check not specified
;; Do NOT check if not on homehost!
;;
(define (common:exit-on-version-changed)
  (if (and *toppath*              ;; do nothing if *toppath* not yet provided
  (if (common:on-homehost?)
	   (common:on-homehost?))
      (if (common:api-changed?)
	  (let* ((mtconf (conc (get-environment-variable "MT_RUN_AREA_HOME") "/megatest.config"))
                 (dbfile  (conc (get-environment-variable "MT_RUN_AREA_HOME") "/megatest.db"))
                 (dbfile  (conc (get-environment-variable "MT_RUN_AREA_HOME") ".megatest/main.db"))
                 (read-only (not (file-write-access? dbfile)))
                 (dbstruct (db:setup #t))) ;; (db:setup-db *dbstruct-dbs* *toppath* #f))) ;;  #t)))
	    (debug:print 0 *default-log-port*
			 "WARNING: Version mismatch!\n"
			 "   expected: " (common:version-signature) "\n"
			 "   got:      " (common:get-last-run-version))
            (cond
624
625
626
627
628
629
630
631

632
633
634

635
636
637
638
639
640
641
636
637
638
639
640
641
642

643
644
645

646
647
648
649
650
651
652
653







-
+


-
+







                 (print-call-chain (current-error-port))
                 (exit 1))
               (common:cleanup-db dbstruct)))
             ((not (common:file-exists? mtconf))
              (debug:print 0 *default-log-port* "   megatest.config does not exist in this area.  Cannot proceed with megatest version migration.")
              (exit 1))
             ((not (common:file-exists? dbfile))
              (debug:print 0 *default-log-port* "   megatest.db does not exist in this area.  Cannot proceed with megatest version migration.")
              (debug:print 0 *default-log-port* "   .megatest/main.db does not exist in this area.  Cannot proceed with megatest version migration.")
              (exit 1))
             ((not (eq? (current-user-id)(file-owner mtconf)))
              (debug:print 0 *default-log-port* "   You do not own megatest.db in this area.  Cannot proceed with megatest version migration.")
              (debug:print 0 *default-log-port* "   You do not own .megatest/main.db in this area.  Cannot proceed with megatest version migration.")
              (exit 1))
             (read-only
              (debug:print 0 *default-log-port* "   You have read-only access to this area.  Cannot proceed with megatest version migration.")
              (exit 1))
             (else
              (debug:print 0 *default-log-port* " to switch versions you can run: \"megatest -cleanup-db\"")
              (exit 1)))))))
708
709
710
711
712
713
714


715
716
717
718
719
720
721
722
723









724
725
726
727
728
729
730
731
732
733
720
721
722
723
724
725
726
727
728









729
730
731
732
733
734
735
736
737



738
739
740
741
742
743
744







+
+
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
-
-
-







    (if dat
	dat
	""))))

(define (common:alist-ref/default key alist default)
  (or (alist-ref key alist) default))

;; moved into commonmod
;;
(define (common:low-noise-print waitval . keys)
  (let* ((key      (string-intersperse (map conc keys) "-" ))
	 (lasttime (hash-table-ref/default *common:denoise* key 0))
	 (currtime (current-seconds)))
    (if (> (- currtime lasttime) waitval)
	(begin
	  (hash-table-set! *common:denoise* key currtime)
	  #t)
	#f)))
;; (define (common:low-noise-print waitval . keys)
;;   (let* ((key      (string-intersperse (map conc keys) "-" ))
;; 	 (lasttime (hash-table-ref/default *common:denoise* key 0))
;; 	 (currtime (current-seconds)))
;;     (if (> (- currtime lasttime) waitval)
;; 	(begin
;; 	  (hash-table-set! *common:denoise* key currtime)
;; 	  #t)
;; 	#f)))

(define (common:get-megatest-exe)
  (or (getenv "MT_MEGATEST") "megatest"))

(define (common:read-encoded-string instr)
  (handle-exceptions
   exn
   (handle-exceptions
    exn
    (begin
944
945
946
947
948
949
950

951

952
953
954
955
956

957
958
959
960

961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984

985
986


987
988
989
990
991
992
993
994
995
996
955
956
957
958
959
960
961
962

963
964
965
966
967

968
969
970
971

972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988



989
990
991
992
993
994


995
996



997
998
999
1000
1001
1002
1003







+
-
+




-
+



-
+
















-
-
-





+
-
-
+
+
-
-
-







      *db-cache-path*
      (if *toppath* ;; common:get-create-writeable-dir
	  (handle-exceptions
	      exn
	      (begin
		(debug:print-error 0 *default-log-port* "Couldn't create path to " *db-cache-path* ", exn=" exn)
		(exit 1))
	      (let* ((toppath (common:real-path *toppath*))
	      (let* ((tsname (common:get-testsuite-name))
		     (tsname (common:get-testsuite-name))
		     (dbpath (common:get-create-writeable-dir
			      (list (conc "/tmp/" (current-user-name)
					  "/megatest_localdb/"
					  tsname "/"
					  (string-translate *toppath* "/" "."))
					  (string-translate toppath "/" "."))
				    (conc "/tmp/" (current-process-id) ;; just in case we have an issue with the dir by own user name
					  "/megatest_localdb/"
					  tsname
					  (string-translate *toppath* "/" "."))
					  (string-translate toppath "/" "."))
				    ))))
		(set! *db-cache-path* dbpath)
		;; ensure megatest area has .megatest
		(let ((dbarea (conc *toppath* "/.megatest")))
		  (if (not (file-exists? dbarea))
		      (create-directory dbarea)))
		;; ensure tmp area has .megatest
		(let ((dbarea (conc dbpath "/.megatest")))
		  (if (not (file-exists? dbarea))
		      (create-directory dbarea)))
		dbpath))
	  #f)))

(define (common:get-area-path-signature)
  (message-digest-string (md5-primitive) *toppath*))

(define (common:get-signature str)
  (message-digest-string (md5-primitive) str))

;;======================================================================
;; E X I T   H A N D L I N G
;;======================================================================

(define (common:run-sync?)
  (and *toppath*               ;; gate if called before *toppath* is set
    (and (common:on-homehost?)
	 (args:get-arg "-server")))
       (common:on-homehost?)
       (args:get-arg "-server")))

(define (common:human-time)
  (time->string (seconds->local-time (current-seconds)) "%Y-%m-%d %H:%M:%S"))


(define (std-signal-handler signum)
  ;; (signal-mask! signum)
  (set! *time-to-exit* #t) 
  ;;(debug:print-info 13 *default-log-port* "got signal "signum)
  (debug:print-error 0 *default-log-port* "Received signal " signum " aaa exiting promptly")
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1053
1054
1055
1056
1057
1058
1059
















1060
1061
1062
1063
1064
1065
1066







-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-







;;======================================================================
;; '(print (string-intersperse (map cadr (hash-table-ref/default (read-config "megatest.config" \#f \#t) "disks" '"'"'("none" ""))) "\n"))'
(define (common:get-disks #!key (configf #f))
  (hash-table-ref/default 
   (or configf (read-config "megatest.config" #f #t))
   "disks" '("none" "")))

;;======================================================================
;; return first command that exists, else #f
;;
(define (common:which cmds)
  (if (null? cmds)
      #f
      (let loop ((hed (car cmds))
		 (tal (cdr cmds)))
	(let ((res (with-input-from-pipe (conc "which " hed) read-line)))
	  (if (and (string? res)
		   (common:file-exists? res))
	      res
	      (if (null? tal)
		  #f
		  (loop (car tal)(cdr tal))))))))
  
(define (common:get-install-area)
  (let ((exe-path (car (argv))))
    (if (common:file-exists? exe-path)
	(handle-exceptions
	 exn
	 #f
	 (pathname-directory
1343
1344
1345
1346
1347
1348
1349
1350

1351
1352
1353
1354
1355
1356
1357
1334
1335
1336
1337
1338
1339
1340

1341
1342
1343
1344
1345
1346
1347
1348







-
+







					#t
					#f))
			  (else
			   (debug:print 0 *default-log-port* "ERROR: Bad server force setting " force-setting ", forcing server.")
			   #t)))) ;; default to requiring server
    (if force-result
	(begin
	  (debug:print-info 0 *default-log-port* "forcing use of server, force setting is \"" force-setting "\".")
	  (debug:print-info 0 *default-log-port* "ATTENTION! Forcing use of server, force setting is \"" force-setting "\".")
	  #t)
	#f)))

;;======================================================================
;; M I S C   L I S T S
;;======================================================================

1596
1597
1598
1599
1600
1601
1602




















1603
1604
1605
1606
1607
1608
1609
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620







+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+







      (begin
	(debug:print-error 0 *default-log-port* "command \"/bin/readlink -f " path "\" failed. exn=" exn)
	path) ;; just give up
    (with-input-from-pipe
	(conc "/bin/readlink -f " path)
      (lambda ()
	(read-line)))))

;; for reasons I don't understand multiple calls to real-path in parallel threads
;; must be protected by mutexes
;;
(define (common:real-path inpath)
  ;; (process:cmd-run-with-stderr->list "readlink" "-f" inpath)) ;; cmd . params)
  ;; (let-values 
  ;;  (((inp oup pid) (process "readlink" (list "-f" inpath))))
  ;;  (with-input-from-port inp
  ;;    (let loop ((inl (read-line))
  ;;       	(res #f))
  ;;      (print "inl=" inl)
  ;;      (if (eof-object? inl)
  ;;          (begin
  ;;            (close-input-port inp)
  ;;            (close-output-port oup)
  ;;            ;; (process-wait pid)
  ;;            res)
  ;;          (loop (read-line) inl))))))
  (with-input-from-pipe (conc "readlink -f " inpath) read-line))

;;======================================================================
;; returns *effective load* (not normalized)
;;
(define (common:get-intercept onemin fivemin)
  (if (< onemin fivemin) ;; load is decreasing, just use the onemin load
      onemin
1987
1988
1989
1990
1991
1992
1993









1994
1995
1996
1997
1998
1999








2000
2001
2002
2003
2004
2005
2006
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013






2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028







+
+
+
+
+
+
+
+
+
-
-
-
-
-
-
+
+
+
+
+
+
+
+







		  (begin ;; found a host, return it
		    (debug:print 0 *default-log-port* "INFO: Found host: " new-best " load: " load " last-used: " delta " seconds ago, with job-rate: " job-rate)
		    (host-last-used-set! rec curr-time)
		    new-best)
		  (if (null? tal) #f (loop (car tal)(cdr tal) best-host)))))))))

(define (common:wait-for-homehost-load maxnormload msg)
  (let loop ((start-time (current-seconds))) ;; we saw some instances of this being called before *toppath* was set. This might be an early setup race. This delay should help but it is impossible to test...
    (if (not *toppath*)
	(begin
	  (debug:print 0 *default-log-port* "ERROR: common:wait-for-homehost-load called before *toppath* set.")
	  (thread-sleep! 30)
	  (if (< (- (current-seconds) start-time) 300)
	      (loop start-time)))))
  (case (rmt:transport-mode)
    ((http)
  (let* ((hh-dat (if (common:on-homehost?) ;; if we are on the homehost then pass in #f so the calls are local.
                     #f
                     (server:choose-server *toppath* 'homehost)))
         (hh     (if hh-dat (car hh-dat) #f)))
    (common:wait-for-normalized-load maxnormload msg hh)))

     (let* ((hh-dat (if (common:on-homehost?) ;; if we are on the homehost then pass in #f so the calls are local.
			#f
			(server:choose-server *toppath* 'homehost)))
            (hh     (if hh-dat (car hh-dat) #f)))
       (common:wait-for-normalized-load maxnormload msg hh)))
    (else
     (common:wait-for-normalized-load maxnormload msg (get-host-name)))))
    
(define (common:get-num-cpus remote-host)
  (let* ((actual-host (or remote-host (get-host-name))))
    ;; hosts had better not be changing the number of cpus too often!
    (or (hash-table-ref/default *numcpus-cache* actual-host #f)
	(let* ((numcpus (or (common:get-cached-info actual-host "num-cpus" age: (+ 2592000 (random 3600)))
			    (let* ((proc   (lambda ()
					     (let loop ((numcpu 0)
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2238
2239
2240
2241
2242
2243
2244




















2245
2246
2247
2248
2249
2250
2251







-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-







(define (get-uname . params)
  (let* ((uname-res (process:cmd-run->list (conc "uname " (if (null? params) "-a" (car params)))))
	 (uname #f))
    (if (null? (car uname-res))
	"unknown"
	(caar uname-res))))

;; for reasons I don't understand multiple calls to real-path in parallel threads
;; must be protected by mutexes
;;
(define (common:real-path inpath)
  ;; (process:cmd-run-with-stderr->list "readlink" "-f" inpath)) ;; cmd . params)
  ;; (let-values 
  ;;  (((inp oup pid) (process "readlink" (list "-f" inpath))))
  ;;  (with-input-from-port inp
  ;;    (let loop ((inl (read-line))
  ;;       	(res #f))
  ;;      (print "inl=" inl)
  ;;      (if (eof-object? inl)
  ;;          (begin
  ;;            (close-input-port inp)
  ;;            (close-output-port oup)
  ;;            ;; (process-wait pid)
  ;;            res)
  ;;          (loop (read-line) inl))))))
  (with-input-from-pipe (conc "readlink -f " inpath) read-line))

;;======================================================================
;; D I S K   S P A C E 
;;======================================================================

(define (common:get-disk-space-used fpath)
  (with-input-from-pipe (conc "/usr/bin/du -s " fpath) read))

2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853
2854
2855
2856
2857
2858
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871
2872
2873
2874
2875
2876
2877
2878
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911
2619
2620
2621
2622
2623
2624
2625

























































































































































































































































































2626
2627
2628
2629
2630
2631
2632







-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-







                       (conc "viewscreen " cmd))))
    (debug:print-info 02 *default-log-port* "Running command: " fullcmd)
    (cond
     (with-vars     (common:without-vars  fullcmd))
     (with-orig-env (common:with-orig-env fullcmd))
     (else          (common:without-vars  fullcmd "MT_.*")))))
		  
;;======================================================================
;; T I M E   A N D   D A T E
;;======================================================================

;;======================================================================
;; Convert strings like "5s 2h 3m" => 60x60x2 + 3x60 + 5
(define (common:hms-string->seconds tstr)
  (let ((parts     (string-split-fields "\\w+" tstr))
	(time-secs 0)
	;; s=seconds, m=minutes, h=hours, d=days, M=months, y=years, w=weeks
	(trx       (regexp "(\\d+)([smhdMyw])")))
    (for-each (lambda (part)
		(let ((match  (string-match trx part)))
		  (if match
		      (let ((val (string->number (cadr match)))
			    (unt (caddr match)))
			(if val 
			    (set! time-secs (+ time-secs (* val
							    (case (string->symbol unt)
							      ((s) 1)
							      ((m) 60) ;; minutes
							      ((h) 3600)
							      ((d) 86400)
							      ((w) 604800)
							      ((M) 2628000) ;; aproximately one month
							      ((y) 31536000)
							      (else #f))))))))))
	      parts)
    time-secs))
		       
(define (seconds->hr-min-sec secs)
  (let* ((hrs (quotient secs 3600))
	 (min (quotient (- secs (* hrs 3600)) 60))
	 (sec (- secs (* hrs 3600)(* min 60))))
    (conc (if (> hrs 0)(conc hrs "hr ") "")
	  (if (> min 0)(conc min "m ")  "")
	  sec "s")))

(define (seconds->time-string sec)
  (time->string 
   (seconds->local-time sec) "%H:%M:%S"))

(define (seconds->work-week/day-time sec)
  (time->string
   (seconds->local-time sec) "ww%V.%u %H:%M"))

(define (seconds->work-week/day sec)
  (time->string
   (seconds->local-time sec) "ww%V.%u"))

(define (seconds->year-work-week/day sec)
  (time->string
   (seconds->local-time sec) "%yww%V.%w"))

(define (seconds->year-work-week/day-time sec)
  (time->string
   (seconds->local-time sec) "%Yww%V.%w %H:%M"))

(define (seconds->year-week/day-time sec)
  (time->string
   (seconds->local-time sec) "%Yw%V.%w %H:%M"))

(define (seconds->quarter sec)
  (case (string->number
	 (time->string 
	  (seconds->local-time sec)
	  "%m"))
    ((1 2 3) 1)
    ((4 5 6) 2)
    ((7 8 9) 3)
    ((10 11 12) 4)
    (else #f)))

;;======================================================================
;; basic ISO8601 format (e.g. "2017-02-28 06:02:54") date time => Unix epoch
;;
(define (common:date-time->seconds datetime)
  (local-time->seconds (string->time datetime "%Y-%m-%d %H:%M:%S")))

;;======================================================================
;; given span of seconds tstart to tend
;; find start time to mark and mark delta
;;
(define (common:find-start-mark-and-mark-delta tstart tend)
  (let* ((deltat   (- (max tend (+ tend 10)) tstart)) ;; can't handle runs of less than 4 seconds. Pad it to 10 seconds ...
	 (result   #f)
	 (min      60)
	 (hr       (* 60 60))
	 (day      (* 24 hr))
	 (yr       (* 365 day)) ;; year
	 (mo       (/ yr 12))
	 (wk       (* day 7)))
    (for-each
     (lambda (max-blks)
       (for-each
	(lambda (span) ;; 5 2 1
	  (if (not result)
	      (for-each 
	       (lambda (timeunit timesym) ;; year month day hr min sec
		 (if (not result)
		     (let* ((time-blk (* span timeunit))
			    (num-blks (quotient deltat time-blk)))
		       (if (and (> num-blks 4)(< num-blks max-blks))
			   (let ((first (* (quotient tstart time-blk) time-blk)))
			     (set! result (list span timeunit time-blk first timesym))
			     )))))
	       (list yr mo wk day hr min 1)
	       '(     y  mo w  d   h  m   s))))
	(list 8 6 5 2 1)))
     '(5 10 15 20 30 40 50 500))
    (if values
	(apply values result)
	(values 0 day 1 0 'd))))

;;======================================================================
;; given x y lim return the cron expansion
;;
(define (common:expand-cron-slash x y lim)
  (let loop ((curr x)
	     (res  `()))
    (if (< curr lim)
	(loop (+ curr y) (cons curr res))
	(reverse res))))

;;======================================================================
;; expand a complex cron string to a list of cron strings
;;
;;  x/y   => x, x+y, x+2y, x+3y while x+Ny<max_for_field
;;  a,b,c => a, b ,c
;;
;;   NOTE: with flatten a lot of the crud below can be factored down.
;;
(define (common:cron-expand cron-str)
  (if (list? cron-str)
      (flatten
       (fold (lambda (x res)
	       (if (list? x)
		   (let ((newres (map common:cron-expand x)))
		     (append x newres))
		   (cons x res)))
	     '()
	     cron-str)) ;; (map common:cron-expand cron-str))
      (let ((cron-items (string-split cron-str))
	    (slash-rx   (regexp "(\\d+)/(\\d+)"))
	    (comma-rx   (regexp ".*,.*"))
	    (max-vals   '((min        . 60)
			  (hour       . 24)
			  (dayofmonth . 28) ;;; BUG!!!! This will be a bug for some combinations
			  (month      . 12)
			  (dayofweek  . 7))))
	(if (< (length cron-items) 5) ;; bad spec
	    cron-str ;; `(,cron-str)              ;; just return the string, something downstream will fix it
	    (let loop ((hed  (car cron-items))
		       (tal  (cdr cron-items))
		       (type 'min)
		       (type-tal '(hour dayofmonth month dayofweek))
		       (res  '()))
	      (regex-case
		  hed
		(slash-rx ( _ base incr ) (let* ((basen          (string->number base))
						 (incrn          (string->number incr))
						 (expanded-vals  (common:expand-cron-slash basen incrn (alist-ref type max-vals)))
						 (new-list-crons (fold (lambda (x myres)
									 (cons (conc (if (null? res)
											 ""
											 (conc (string-intersperse res " ") " "))
										     x " " (string-intersperse tal " "))
									       myres))
								       '() expanded-vals)))
					    ;; (print "new-list-crons: " new-list-crons)
					    ;; (fold (lambda (x res)
					    ;; 	    (if (list? x)
					    ;; 		(let ((newres (map common:cron-expand x)))
					    ;; 		  (append x newres))
					    ;; 		(cons x res)))
					    ;; 	  '()
					    (flatten (map common:cron-expand new-list-crons))))
		;;					    (map common:cron-expand (map common:cron-expand new-list-crons))))
		(else (if (null? tal)
			  cron-str
			  (loop (car tal)(cdr tal)(car type-tal)(cdr type-tal)(append res (list hed)))))))))))

;;======================================================================
;; given a cron string and the last time event was processed return #t to run or #f to not run
;;
;;  min    hour   dayofmonth month  dayofweek
;; 0-59    0-23   1-31       1-12   0-6          ### NOTE: dayofweek does not include 7
;;
;;  #t => yes, run the job
;;  #f => no, do not run the job
;;
(define (common:cron-event cron-str now-seconds-in last-done) ;; ref-seconds = #f is NOW.
  (let* ((cron-items     (map string->number (string-split cron-str)))
	 (now-seconds    (or now-seconds-in (current-seconds)))
	 (now-time       (seconds->local-time now-seconds))
	 (last-done-time (seconds->local-time last-done))
	 (all-times      (make-hash-table)))
    ;; (print "cron-items: " cron-items "(length cron-items): " (length cron-items))
    (if (not (eq? (length cron-items) 5)) ;; don't even try to figure out junk strings
	#f
	(match-let (((     cmin chour cdayofmonth cmonth    cdayofweek)
		     cron-items)
		    ;; 0     1    2        3         4    5      6
		    ((nsec nmin nhour ndayofmonth nmonth nyr ndayofweek n7 n8 n9)
		     (vector->list now-time))
		    ((lsec lmin lhour ldayofmonth lmonth lyr ldayofweek l7 l8 l9)
		     (vector->list last-done-time)))
	  ;; create all possible time slots
	  ;; remove invalid slots due to (for example) day of week
	  ;; get the start and end entries for the ref-seconds (current) time
	  ;; if last-done > ref-seconds => this is an ERROR!
	  ;; does the last-done time fall in the legit region?
	  ;;    yes => #f  do not run again this command
	  ;;    no  => #t  ok to run the command
	  (for-each ;; month
	   (lambda (month)
	     (for-each ;; dayofmonth
	      (lambda (dom)
		(for-each
		 (lambda (hr) ;; hour
		   (for-each
		    (lambda (minute) ;; minute
		      (let ((copy-now (apply vector (vector->list now-time))))
			(vector-set! copy-now 0 0) ;; force seconds to zero
			(vector-set! copy-now 1 minute)
			(vector-set! copy-now 2 hr)
			(vector-set! copy-now 3 dom)  ;; dom is already corrected for zero referenced
			(vector-set! copy-now 4 month)
			(let* ((copy-now-secs (local-time->seconds copy-now))
			       (new-copy      (seconds->local-time copy-now-secs))) ;; remake the time vector
			  (if (or (not cdayofweek)
				  (equal? (vector-ref new-copy 6)
					  cdayofweek)) ;; if the day is specified and a match OR if the day is NOT specified
			      (if (or (not cdayofmonth)
				      (equal? (vector-ref new-copy 3)
					      (+ 1 cdayofmonth))) ;; if the month is specified and a match OR if the month is NOT specified
				  (hash-table-set! all-times copy-now-secs new-copy))))))
		    (if cmin
			`(,cmin)  ;; if given cmin, have to use it
			(list (- nmin 1) nmin (+ nmin 1))))) ;; minute
		 (if chour
		     `(,chour)
		     (list (- nhour 1) nhour (+ nhour 1))))) ;; hour
	      (if cdayofmonth
		  `(,cdayofmonth)
		  (list (- ndayofmonth 1) ndayofmonth (+ ndayofmonth 1)))))
	   (if cmonth
	       `(,cmonth)
	       (list (- nmonth 1) nmonth (+ nmonth 1))))
	  (let ((before #f)
		(is-in  #f))
	    (for-each
	     (lambda (moment)
	       (if (and before
			(<= before now-seconds)
			(>= moment now-seconds))
		   (begin
		     ;; (print)
		     ;; (print "Before: " (time->string (seconds->local-time before)))
		     ;; (print "Now:    " (time->string (seconds->local-time now-seconds)))
		     ;; (print "After:  " (time->string (seconds->local-time moment)))
		     ;; (print "Last:   " (time->string (seconds->local-time last-done)))
		     (if (<  last-done before)
			 (set! is-in before))
		     ))
	       (set! before moment))
	     (sort (hash-table-keys all-times) <))
	    is-in)))))

(define (common:extended-cron  cron-str now-seconds-in last-done)
  (let ((expanded-cron (common:cron-expand cron-str)))
    (if (string? expanded-cron)
	(common:cron-event expanded-cron now-seconds-in last-done)
	(let loop ((hed (car expanded-cron))
		   (tal (cdr expanded-cron)))
	  (if (common:cron-event hed now-seconds-in last-done)
	      #t
	      (if (null? tal)
		  #f
		  (loop (car tal)(cdr tal))))))))

;;======================================================================
;; C O L O R S
;;======================================================================
      
(define (common:name->iup-color name)
  (case (string->symbol (string-downcase name))
    ((red)    "223 33 49")

Modified commonmod.scm from [2570fcf4eb] to [09e197a941].

15
16
17
18
19
20
21

22
23
24
25
26
27
28
29
30
31
32






















33
34
35
36
37
38
39
15
16
17
18
19
20
21
22
23
24
25
26
27
28





29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57







+






-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+







;; 
;;     You should have received a copy of the GNU General Public License
;;     along with Megatest.  If not, see <http://www.gnu.org/licenses/>.

;;======================================================================

(declare (unit commonmod))
;; (declare (uses debugprint))

(use srfi-69)

(module commonmod
	*

(import scheme chicken data-structures extras files)
(import (prefix sqlite3 sqlite3:)
	posix typed-records srfi-18 srfi-69
	md5 message-digest
	regex srfi-1)
(import scheme
	chicken

	(prefix sqlite3 sqlite3:)
	data-structures
	extras
	files
	matchable
	md5
	message-digest
	pathname-expand
	posix
	posix-extras
	regex
	regex-case
	srfi-1
	srfi-18
	srfi-69
	typed-records

	;; debugprint
	)

;;======================================================================
;; CONTENTS
;;
;;  config file utils
;;  misc conversion, data manipulation functions
;;  testsuite and area utilites
134
135
136
137
138
139
140












































141
142
143
144
145
146
147
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209







+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+







      #f ;; I don't really care why this failed (at least for now)
    (delete-file* fname)))

;;======================================================================
;; misc conversion, data manipulation functions
;;======================================================================

;;======================================================================
;; return first command that exists, else #f
;;
(define (common:which cmds)
  (if (null? cmds)
      #f
      (let loop ((hed (car cmds))
		 (tal (cdr cmds)))
	(let ((res (with-input-from-pipe (conc "which " hed) read-line)))
	  (if (and (string? res)
		   (file-exists? res))
	      res
	      (if (null? tal)
		  #f
		  (loop (car tal)(cdr tal))))))))
  
(define (common:get-megatest-exe)
  (let* ((mtexe (or (get-environment-variable "MT_MEGATEST")
		    (common:which '("megatest"))
		    "megatest")))
    (if (file-exists? mtexe)
	(realpath mtexe)
	mtexe)))

(define (common:get-megatest-exe-dir)
  (let* ((mtexe (common:get-megatest-exe)))
    (pathname-directory mtexe)))

;; more generic and comprehensive version of get-megatest-exe
;;
(define (common:get-mtexe)
  (let* ((mtpathdir  (common:get-megatest-exe-dir)))
    (or (common:get-megatest-exe)
	(if mtpathdir
	    (conc mtpathdir"/megatest")
	    #f)
	"megatest")))

(define (common:get-megatest-exe-path)
  (let* ((mtpathdir (common:get-megatest-exe-dir)))
    (conc mtpathdir":"(get-environment-variable "PATH") ":.")))

(define (realpath x) (resolve-pathname  (pathname-expand (or x "/dev/null")) ))

;; if it looks like a number -> convert it to a number, else return it
;;
(define (lazy-convert inval)
  (let* ((as-num (if (string? inval)(string->number inval) #f)))
    (or as-num inval)))

;; to '((a . 1)(b . 2)(c . "a silly thing")(d . ""))
159
160
161
162
163
164
165






























166
167
168
169
170
171
172
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264







+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+







			     (if convert (lazy-convert inval) inval))))
		   (else f))))
	     (filter (lambda (x)
		       (not (string-match "^\\s*" x)))
		     val-list))
	'())))

(define (get-cpu-load)
  (let* ((load-info (with-input-from-file "/proc/loadavg" read-lines)))
    (map string->number (string-split load-info))))

(define *current-host-cores* #f)

(define (get-current-host-cores)
  (or *current-host-cores*
      (let ((cpu-info (with-input-from-file "/proc/cpuinfo" read-lines)))
	(let loop ((lines cpu-info))
	  (if (null? lines)
	      1 ;; gotta be at least one!
	      (let* ((inl (car lines))
		     (tail (cdr lines))
		     (parts (string-split inl)))
		(match parts
		  (("cpu" "cores" ":" num) (string->number num))
		  (else (loop tail)))))))))

(define (number-of-processes-running processname)
  (with-input-from-pipe
   (conc "ps -def | egrep \""processname"\" |wc -l")
   (lambda ()
     (string->number (read-line)))))

;; get the normalized (i.e. load / numcpus) for *this* host
;;
(define (get-normalized-cpu-load)
  (/ (get-cpu-load)(get-current-host-cores)))

;;======================================================================
;; testsuite and area utilites
;;======================================================================

(define (get-testsuite-name toppath configdat)
  (or (lookup configdat "setup" "area-name")
      (lookup configdat "setup" "testsuite")
206
207
208
209
210
211
212


































































































































































































































































































213
214
215
216
217


218
219
220
221
222
223








































224
225
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601






602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643







+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+





+
+
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+


;;
(define (get-cfg-areas cfgdat)
  (let ((adat (get-section cfgdat "areas")))
    (map (lambda (entry)
	   `(,(car entry) . 
	     ,(val->alist (cadr entry))))
	 adat)))

;;======================================================================
;; time utils
;;======================================================================

(define (common:human-time)
  (time->string (seconds->local-time (current-seconds)) "%Y-%m-%d %H:%M:%S"))

;;======================================================================
;; T I M E   A N D   D A T E
;;======================================================================

;;======================================================================
;; Convert strings like "5s 2h 3m" => 60x60x2 + 3x60 + 5
(define (common:hms-string->seconds tstr)
  (let ((parts     (string-split-fields "\\w+" tstr))
	(time-secs 0)
	;; s=seconds, m=minutes, h=hours, d=days, M=months, y=years, w=weeks
	(trx       (regexp "(\\d+)([smhdMyw])")))
    (for-each (lambda (part)
		(let ((match  (string-match trx part)))
		  (if match
		      (let ((val (string->number (cadr match)))
			    (unt (caddr match)))
			(if val 
			    (set! time-secs (+ time-secs (* val
							    (case (string->symbol unt)
							      ((s) 1)
							      ((m) 60) ;; minutes
							      ((h) 3600)
							      ((d) 86400)
							      ((w) 604800)
							      ((M) 2628000) ;; aproximately one month
							      ((y) 31536000)
							      (else #f))))))))))
	      parts)
    time-secs))
		       
(define (seconds->hr-min-sec secs)
  (let* ((hrs (quotient secs 3600))
	 (min (quotient (- secs (* hrs 3600)) 60))
	 (sec (- secs (* hrs 3600)(* min 60))))
    (conc (if (> hrs 0)(conc hrs "hr ") "")
	  (if (> min 0)(conc min "m ")  "")
	  sec "s")))

(define (seconds->time-string sec)
  (time->string 
   (seconds->local-time sec) "%H:%M:%S"))

(define (seconds->work-week/day-time sec)
  (time->string
   (seconds->local-time sec) "ww%V.%u %H:%M"))

(define (seconds->work-week/day sec)
  (time->string
   (seconds->local-time sec) "ww%V.%u"))

(define (seconds->year-work-week/day sec)
  (time->string
   (seconds->local-time sec) "%yww%V.%w"))

(define (seconds->year-work-week/day-time sec)
  (time->string
   (seconds->local-time sec) "%Yww%V.%w %H:%M"))

(define (seconds->year-week/day-time sec)
  (time->string
   (seconds->local-time sec) "%Yw%V.%w %H:%M"))

(define (seconds->quarter sec)
  (case (string->number
	 (time->string 
	  (seconds->local-time sec)
	  "%m"))
    ((1 2 3) 1)
    ((4 5 6) 2)
    ((7 8 9) 3)
    ((10 11 12) 4)
    (else #f)))

;;======================================================================
;; basic ISO8601 format (e.g. "2017-02-28 06:02:54") date time => Unix epoch
;;
(define (common:date-time->seconds datetime)
  (local-time->seconds (string->time datetime "%Y-%m-%d %H:%M:%S")))

;;======================================================================
;; given span of seconds tstart to tend
;; find start time to mark and mark delta
;;
(define (common:find-start-mark-and-mark-delta tstart tend)
  (let* ((deltat   (- (max tend (+ tend 10)) tstart)) ;; can't handle runs of less than 4 seconds. Pad it to 10 seconds ...
	 (result   #f)
	 (min      60)
	 (hr       (* 60 60))
	 (day      (* 24 hr))
	 (yr       (* 365 day)) ;; year
	 (mo       (/ yr 12))
	 (wk       (* day 7)))
    (for-each
     (lambda (max-blks)
       (for-each
	(lambda (span) ;; 5 2 1
	  (if (not result)
	      (for-each 
	       (lambda (timeunit timesym) ;; year month day hr min sec
		 (if (not result)
		     (let* ((time-blk (* span timeunit))
			    (num-blks (quotient deltat time-blk)))
		       (if (and (> num-blks 4)(< num-blks max-blks))
			   (let ((first (* (quotient tstart time-blk) time-blk)))
			     (set! result (list span timeunit time-blk first timesym))
			     )))))
	       (list yr mo wk day hr min 1)
	       '(     y  mo w  d   h  m   s))))
	(list 8 6 5 2 1)))
     '(5 10 15 20 30 40 50 500))
    (if values
	(apply values result)
	(values 0 day 1 0 'd))))

;;======================================================================
;; given x y lim return the cron expansion
;;
(define (common:expand-cron-slash x y lim)
  (let loop ((curr x)
	     (res  `()))
    (if (< curr lim)
	(loop (+ curr y) (cons curr res))
	(reverse res))))

;;======================================================================
;; expand a complex cron string to a list of cron strings
;;
;;  x/y   => x, x+y, x+2y, x+3y while x+Ny<max_for_field
;;  a,b,c => a, b ,c
;;
;;   NOTE: with flatten a lot of the crud below can be factored down.
;;
(define (common:cron-expand cron-str)
  (if (list? cron-str)
      (flatten
       (fold (lambda (x res)
	       (if (list? x)
		   (let ((newres (map common:cron-expand x)))
		     (append x newres))
		   (cons x res)))
	     '()
	     cron-str)) ;; (map common:cron-expand cron-str))
      (let ((cron-items (string-split cron-str))
	    (slash-rx   (regexp "(\\d+)/(\\d+)"))
	    (comma-rx   (regexp ".*,.*"))
	    (max-vals   '((min        . 60)
			  (hour       . 24)
			  (dayofmonth . 28) ;;; BUG!!!! This will be a bug for some combinations
			  (month      . 12)
			  (dayofweek  . 7))))
	(if (< (length cron-items) 5) ;; bad spec
	    cron-str ;; `(,cron-str)              ;; just return the string, something downstream will fix it
	    (let loop ((hed  (car cron-items))
		       (tal  (cdr cron-items))
		       (type 'min)
		       (type-tal '(hour dayofmonth month dayofweek))
		       (res  '()))
	      (regex-case
		  hed
		(slash-rx ( _ base incr ) (let* ((basen          (string->number base))
						 (incrn          (string->number incr))
						 (expanded-vals  (common:expand-cron-slash basen incrn (alist-ref type max-vals)))
						 (new-list-crons (fold (lambda (x myres)
									 (cons (conc (if (null? res)
											 ""
											 (conc (string-intersperse res " ") " "))
										     x " " (string-intersperse tal " "))
									       myres))
								       '() expanded-vals)))
					    ;; (print "new-list-crons: " new-list-crons)
					    ;; (fold (lambda (x res)
					    ;; 	    (if (list? x)
					    ;; 		(let ((newres (map common:cron-expand x)))
					    ;; 		  (append x newres))
					    ;; 		(cons x res)))
					    ;; 	  '()
					    (flatten (map common:cron-expand new-list-crons))))
		;;					    (map common:cron-expand (map common:cron-expand new-list-crons))))
		(else (if (null? tal)
			  cron-str
			  (loop (car tal)(cdr tal)(car type-tal)(cdr type-tal)(append res (list hed)))))))))))

;;======================================================================
;; given a cron string and the last time event was processed return #t to run or #f to not run
;;
;;  min    hour   dayofmonth month  dayofweek
;; 0-59    0-23   1-31       1-12   0-6          ### NOTE: dayofweek does not include 7
;;
;;  #t => yes, run the job
;;  #f => no, do not run the job
;;
(define (common:cron-event cron-str now-seconds-in last-done) ;; ref-seconds = #f is NOW.
  (let* ((cron-items     (map string->number (string-split cron-str)))
	 (now-seconds    (or now-seconds-in (current-seconds)))
	 (now-time       (seconds->local-time now-seconds))
	 (last-done-time (seconds->local-time last-done))
	 (all-times      (make-hash-table)))
    ;; (print "cron-items: " cron-items "(length cron-items): " (length cron-items))
    (if (not (eq? (length cron-items) 5)) ;; don't even try to figure out junk strings
	#f
	(match-let (((     cmin chour cdayofmonth cmonth    cdayofweek)
		     cron-items)
		    ;; 0     1    2        3         4    5      6
		    ((nsec nmin nhour ndayofmonth nmonth nyr ndayofweek n7 n8 n9)
		     (vector->list now-time))
		    ((lsec lmin lhour ldayofmonth lmonth lyr ldayofweek l7 l8 l9)
		     (vector->list last-done-time)))
	  ;; create all possible time slots
	  ;; remove invalid slots due to (for example) day of week
	  ;; get the start and end entries for the ref-seconds (current) time
	  ;; if last-done > ref-seconds => this is an ERROR!
	  ;; does the last-done time fall in the legit region?
	  ;;    yes => #f  do not run again this command
	  ;;    no  => #t  ok to run the command
	  (for-each ;; month
	   (lambda (month)
	     (for-each ;; dayofmonth
	      (lambda (dom)
		(for-each
		 (lambda (hr) ;; hour
		   (for-each
		    (lambda (minute) ;; minute
		      (let ((copy-now (apply vector (vector->list now-time))))
			(vector-set! copy-now 0 0) ;; force seconds to zero
			(vector-set! copy-now 1 minute)
			(vector-set! copy-now 2 hr)
			(vector-set! copy-now 3 dom)  ;; dom is already corrected for zero referenced
			(vector-set! copy-now 4 month)
			(let* ((copy-now-secs (local-time->seconds copy-now))
			       (new-copy      (seconds->local-time copy-now-secs))) ;; remake the time vector
			  (if (or (not cdayofweek)
				  (equal? (vector-ref new-copy 6)
					  cdayofweek)) ;; if the day is specified and a match OR if the day is NOT specified
			      (if (or (not cdayofmonth)
				      (equal? (vector-ref new-copy 3)
					      (+ 1 cdayofmonth))) ;; if the month is specified and a match OR if the month is NOT specified
				  (hash-table-set! all-times copy-now-secs new-copy))))))
		    (if cmin
			`(,cmin)  ;; if given cmin, have to use it
			(list (- nmin 1) nmin (+ nmin 1))))) ;; minute
		 (if chour
		     `(,chour)
		     (list (- nhour 1) nhour (+ nhour 1))))) ;; hour
	      (if cdayofmonth
		  `(,cdayofmonth)
		  (list (- ndayofmonth 1) ndayofmonth (+ ndayofmonth 1)))))
	   (if cmonth
	       `(,cmonth)
	       (list (- nmonth 1) nmonth (+ nmonth 1))))
	  (let ((before #f)
		(is-in  #f))
	    (for-each
	     (lambda (moment)
	       (if (and before
			(<= before now-seconds)
			(>= moment now-seconds))
		   (begin
		     ;; (print)
		     ;; (print "Before: " (time->string (seconds->local-time before)))
		     ;; (print "Now:    " (time->string (seconds->local-time now-seconds)))
		     ;; (print "After:  " (time->string (seconds->local-time moment)))
		     ;; (print "Last:   " (time->string (seconds->local-time last-done)))
		     (if (<  last-done before)
			 (set! is-in before))
		     ))
	       (set! before moment))
	     (sort (hash-table-keys all-times) <))
	    is-in)))))

(define (common:extended-cron  cron-str now-seconds-in last-done)
  (let ((expanded-cron (common:cron-expand cron-str)))
    (if (string? expanded-cron)
	(common:cron-event expanded-cron now-seconds-in last-done)
	(let loop ((hed (car expanded-cron))
		   (tal (cdr expanded-cron)))
	  (if (common:cron-event hed now-seconds-in last-done)
	      #t
	      (if (null? tal)
		  #f
		  (loop (car tal)(cdr tal))))))))



;;======================================================================
;; misc stuff
;;======================================================================

(define (common:get-signature str)
  (message-digest-string (md5-primitive) str))
;; (define (debug:print . params) #f)
;; (define (debug:print-info . params) #f)
;; 
;; (define (set-functions dbgp dbgpinfo)
;;   (set! debug:print dbgp)
;;   (set! debug:print-info dbgpinfo))

;;======================================================================
;; hash of hashs
;;======================================================================

(define (db:hoh-set! dat key1 key2 val)
  (let* ((subhash (hash-table-ref/default dat key1 #f)))
    (if subhash
	(hash-table-set! subhash key2 val)
	(begin
	  (hash-table-set! dat key1 (make-hash-table))
	  (db:hoh-set! dat key1 key2 val)))))

(define (db:hoh-get dat key1 key2)
  (let* ((subhash (hash-table-ref/default dat key1 #f)))
    (and subhash
	 (hash-table-ref/default subhash key2 #f))))

;;======================================================================
;; when called from a wrapper I need sometimes to find the calling
;; wrapper, this is for dashboard to find the correct megatest.
;;
(define (common:find-local-megatest #!optional (progname "megatest"))
  (let ((res (filter file-exists?
		     (map (lambda (updir)
			    (let* ((lm  (car (argv)))
				   (dir (pathname-directory lm))
				   (exe (pathname-strip-directory lm)))
			      (conc (if dir (conc dir "/") "")
				    (case (string->symbol exe)
				      ((dboard)    (conc updir progname))
				      ((mtest)     (conc updir progname))
				      ((dashboard) progname)
				      (else exe)))))
			  '("../../" "../")))))
    (if (null? res)
	(begin
	  ;; (debug:print 0 *default-log-port* "Failed to find this executable! Using what can be found on the path")
	  progname)
	(car res))))

)

Modified configf.scm from [6390e213ef] to [1768130e73].

23
24
25
26
27
28
29



30
31
32
33
34
35
36
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39







+
+
+







;;======================================================================

(use regex regex-case matchable) ;;  directory-utils)
(declare (unit configf))
(declare (uses process))
(declare (uses env))
(declare (uses keys))
(declare (uses commonmod))
(declare (uses commonmod.import))
(import commonmod)

(include "common_records.scm")

;; return list (path fullpath configname)
(define (find-config configname #!key (toppath #f))
  (if toppath
      (let ((cfname (conc toppath "/" configname)))
95
96
97
98
99
100
101


102
103
104
105
106
107
108
109
110
111
112
113
114
115
116

117
118
119
120
121
122
123
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120

121
122
123
124
125
126
127
128







+
+














-
+







;; read a line and process any #{ ... } constructs

(define configf:var-expand-regex (regexp "^(.*)#\\{(scheme|system|shell|getenv|get|runconfigs-get|rget|scm|sh|rp|gv|g|mtrah)\\s+([^\\}\\{]*)\\}(.*)"))

(define (configf:system ht cmd)
  (system cmd)
  )

(define configf:imports "(import commonmod)")

(define (configf:process-line l ht allow-system #!key (linenum #f))
  (let loop ((res l))
    (if (string? res)
	(let ((matchdat (string-search configf:var-expand-regex res)))
	  (if matchdat
	      (let* ((prestr  (list-ref matchdat 1))
		     (cmdtype (list-ref matchdat 2)) ;; eval, system, shell, getenv
		     (cmd     (list-ref matchdat 3))
		     (poststr (list-ref matchdat 4))
		     (result  #f)
		     (start-time (current-seconds))
		     (cmdsym  (string->symbol cmdtype))
		     (fullcmd (case cmdsym
				((scheme scm) (conc "(lambda (ht)" cmd ")"))
				((scheme scm) (conc "(lambda (ht)(begin " configf:imports cmd "))"))
				((system)     (conc "(lambda (ht)(configf:system ht \"" cmd "\"))"))
				((shell sh)   (conc "(lambda (ht)(string-translate (shell \""  cmd "\") \"\n\" \" \"))"))
				((realpath rp)(conc "(lambda (ht)(common:nice-path \"" cmd "\"))"))
				((getenv gv)  (conc "(lambda (ht)(get-environment-variable \"" cmd "\"))"))
				((mtrah)      (conc "(lambda (ht)"
                                                    "    (let ((extra \"" cmd "\"))"
						    "       (conc (or *toppath* (get-environment-variable \"MT_RUN_AREA_HOME\"))"

Modified dashboard-context-menu.scm from [83fc3e6c83] to [9827592e3b].

30
31
32
33
34
35
36

37
38
39
40
41
42
43
44
45
46
47


48
49
50
51
52
53
54
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57







+











+
+







(use canvas-draw)

(use srfi-1 posix regex regex-case srfi-69)
(use (prefix sqlite3 sqlite3:))

(declare (unit dashboard-context-menu))
(declare (uses common))
(declare (uses commonmod))
(declare (uses db))
(declare (uses gutils))
(declare (uses rmt))
(declare (uses ezsteps))
;; (declare (uses sdb))
;; (declare (uses filedb))
(declare (uses subrun))

(include "common_records.scm")
(include "db_records.scm")
(include "run_records.scm")

(import commonmod)

(define (dboard:launch-testpanel run-id test-id)
  (let* ((dboardexe (common:find-local-megatest "dashboard"))
         (cmd (conc dboardexe
                    " -test " run-id "," test-id
                    " &")))
    (system cmd)))

Modified dashboard-guimonitor.scm from [60455a8a12] to [76c7fb97a3].

32
33
34
35
36
37
38


39
40
41
42
43
44
45
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47







+
+







(import (prefix sqlite3 sqlite3:))

(declare (unit dashboard-guimonitor))
(declare (uses common))
(declare (uses keys))
(declare (uses db))
(declare (uses tasks))
(declare (uses commonmod))
(import commonmod)

(include "common_records.scm")
(include "db_records.scm")
(include "run_records.scm")
(include "task_records.scm")

(define (control-panel db tdb keys)

Modified dashboard-tests.scm from [b934cba7e8] to [124658aa85].

29
30
31
32
33
34
35

36
37
38
39
40
41
42


43
44
45
46
47
48
49
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52







+







+
+







(use canvas-draw)

(use srfi-1 posix regex regex-case srfi-69)
(use (prefix sqlite3 sqlite3:))

(declare (unit dashboard-tests))
(declare (uses common))
(declare (uses commonmod))
(declare (uses db))
(declare (uses gutils))
(declare (uses rmt))
(declare (uses ezsteps))
;; (declare (uses sdb))
;; (declare (uses filedb))
(declare (uses subrun))

(import commonmod)

(include "common_records.scm")
(include "db_records.scm")
(include "run_records.scm")

;;======================================================================
;; C O M M O N
457
458
459
460
461
462
463
464

465
466
467
468
469
470
471
472
460
461
462
463
464
465
466

467

468
469
470
471
472
473
474







-
+
-









;;======================================================================
;;
;;======================================================================
(define (dashboard-tests:examine-test run-id test-id) ;; run-id run-key origtest)
  (let* ((db-path       (db:dbfile-path)) ;; (conc (configf:lookup *configdat* "setup" "linktree") "/db/" run-id ".db"))
	 (dbstruct      #f) ;; NOT ACTUALLY USED (db:setup)) ;; (make-dbr:dbstruct path:  (db:dbfile-path #f) ;; (configf:lookup *configdat* "setup" "linktree") 
	 (dbstruct      #f) ;; NOT USED
			    ;;		   local: #t))
	 (testdat        (rmt:get-test-info-by-id run-id test-id)) ;; (db:get-test-info-by-id dbstruct run-id test-id))
	 (db-mod-time   0) ;; (file-modification-time db-path))
	 (last-update   0) ;; (current-seconds))
	 (request-update #t))
    (if (not testdat)
	(begin
	  (debug:print 2 *default-log-port* "ERROR: No test data found for test " test-id ", exiting")

Modified dashboard.scm from [4ad343f07e] to [b72cad9255].

42
43
44
45
46
47
48


49






50
51
52
53
54
55
56
57
58




59
60
61
62
63
64
65
42
43
44
45
46
47
48
49
50

51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76







+
+
-
+
+
+
+
+
+









+
+
+
+







(declare (uses dashboard-tests))
(declare (uses tree))
(declare (uses dcommon))
(declare (uses dashboard-context-menu))
(declare (uses vg))
(declare (uses subrun))
(declare (uses mt))
(declare (uses dbmod))
;; (declare (uses dbmemmod))
(declare (uses dbfile))        
(declare (uses dbfile))
(declare (uses commonmod))
(declare (uses commonmod.import))
(import commonmod)

(import dbmod dbfile)

(include "common_records.scm")
(include "db_records.scm")
(include "run_records.scm")
(include "task_records.scm")
(include "megatest-version.scm")
(include "megatest-fossil-hash.scm")
(include "vg_records.scm")

;; set some parameters here - these need to be put in something that can be loaded from other
;; executables such as dashboard and mtutil
;;
(include "dashboard-transport-mode.scm")
(dbfile:db-init-proc db:initialize-main-db)

(define help (conc 
	      "Megatest Dashboard, documentation at http://www.kiatoa.com/fossils/megatest version " megatest-version 
              " license GPL, Copyright (C) Matt Welland 2012-2017

Usage: dashboard [options]

Modified db.scm from [e18bcc992d] to [e3db33630a].

20
21
22
23
24
25
26












27
28
29
30
31
32
33
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45







+
+
+
+
+
+
+
+
+
+
+
+








;;======================================================================
;; Database access
;;======================================================================

;; dbstruct vector containing all the relevant dbs like main.db, megatest.db, run.db etc

(declare (unit db))
(declare (uses common))
(declare (uses debugprint))
(declare (uses dbmod))
(declare (uses dbfile))
(declare (uses keys))
(declare (uses ods))
(declare (uses client))
(declare (uses mt))
(declare (uses commonmod))
(import commonmod)

(use (srfi 18)
     extras
     tcp
     stack
     (prefix sqlite3 sqlite3:)
     srfi-1
     posix
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66

67
68
69
70
71
72
73
74
75
76
77
78







79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
54
55
56
57
58
59
60










61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106









107
108
109
110
111
112
113







-
-
-
-
-
-
-
-
-
-








+












+
+
+
+
+
+
+


















-
-
-
-
-
-
-
-
-







     format
     dot-locking
     z3
     typed-records
     matchable
     files)

(declare (unit db))
(declare (uses common))
(declare (uses dbmod))
;; (declare (uses debugprint))
(declare (uses dbfile))
(declare (uses keys))
(declare (uses ods))
(declare (uses client))
(declare (uses mt))

(include "common_records.scm")
(include "db_records.scm")
(include "key_records.scm")
(include "run_records.scm")

(define *number-of-writes* 0)
(define *number-non-write-queries* 0)

(import debugprint)
(import dbmod)
(import dbfile)

;; record for keeping state,status and count for doing roll-ups in
;; iterated tests
;;
(defstruct dbr:counts
  (state #f)
  (status #f)
  (count  0)) 


(define (db:with-db dbstruct run-id r/w proc . params)
  (case (rmt:transport-mode)
    ((http)(dbfile:with-db dbstruct run-id r/w proc params))
    ((tcp) (dbmod:with-db dbstruct run-id r/w proc params))
    ((nfs) (dbmod:with-db dbstruct run-id r/w proc params))
    (else (assert #f "FATAL: db:with-db called with non-existant transport mode"))))

;;======================================================================
;; hash of hashs
;;======================================================================


(define (db:hoh-set! dat key1 key2 val)
  (let* ((subhash (hash-table-ref/default dat key1 #f)))
    (if subhash
	(hash-table-set! subhash key2 val)
	(begin
	  (hash-table-set! dat key1 (make-hash-table))
	  (db:hoh-set! dat key1 key2 val)))))

(define (db:hoh-get dat key1 key2)
  (let* ((subhash (hash-table-ref/default dat key1 #f)))
    (and subhash
	 (hash-table-ref/default subhash key2 #f))))

(define (db:get-cache-stmth dbdat run-id db stmt)
  (let* (;; (dbdat       (dbfile:get-dbdat dbstruct run-id))
	 (stmt-cache  (dbr:dbdat-stmt-cache dbdat))
	 (stmth       (db:hoh-get stmt-cache db stmt)))
    (or stmth
	(let* ((newstmth (sqlite3:prepare db stmt)))
	  (db:hoh-set! stmt-cache db stmt newstmth)
	  newstmth))))

;;======================================================================
;; SQLITE3 HELPERS
;;======================================================================

(define (db:general-sqlite-error-dump exn stmt . params)
  (let ((err-status ((condition-property-accessor 'sqlite3 'status #f) exn))) ;; RADT ... how does this work?
    ;; check for (exn sqlite3) ((condition-property-accessor 'exn 'message) exn)
131
132
133
134
135
136
137









































































138
139
140
141
142
143
144
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218







+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+








(define (db:setup do-sync)
  (assert *toppath* "FATAL: db:setup called before launch:setup has been run.")
  (let* ((tmpdir (common:get-db-tmp-area)))
    (if (not *dbstruct-dbs*)
	(dbfile:setup do-sync *toppath* tmpdir)
	*dbstruct-dbs*)))

;; moved from dbfile
;;
;; ADD run-id SUPPORT
;;
(define (db:create-all-triggers dbstruct)
  (db:with-db
   dbstruct #f #f
   (lambda (dbdat db)
     (db:create-triggers db))))

(define (db:create-triggers db)
    (for-each (lambda (key)
              (sqlite3:execute db (cadr key)))
          db:trigger-list))

(define (db:drop-all-triggers dbstruct)
  (db:with-db
   dbstruct #f #f
   (lambda (dbdat db)
     (db:drop-triggers db))))

(define (db:have-incompletes? dbstruct run-id ovr-deadtime)
  (let* ((incompleted '())
	 (oldlaunched '())
	 (toplevels   '())
	 ;; (deadtime-str (configf:lookup *configdat* "setup" "deadtime"))
	 (deadtime  (or ovr-deadtime 72000))) ;; twenty hours
    (db:with-db
     dbstruct run-id #f
     (lambda (dbdat db)
       
       ;; in RUNNING or REMOTEHOSTSTART for more than 10 minutes
       ;;
       ;; HOWEVER: this code in run:test seems to work fine
       ;;              (> (- (current-seconds)(+ (db:test-get-event_time testdat)
       ;;                     (db:test-get-run_duration testdat)))
       ;;                    600) 
       ;; (db:delay-if-busy dbdat)
       (sqlite3:for-each-row 
        (lambda (test-id run-dir uname testname item-path)
          (if (and (equal? uname "n/a")
                   (equal? item-path "")) ;; this is a toplevel test
              ;; what to do with toplevel? call rollup?
              (begin
                (set! toplevels   (cons (list test-id run-dir uname testname item-path run-id) toplevels)))
                ;; (print-info "Found old toplevel test in RUNNING state, test-id=" test-id))
              (set! incompleted (cons (list test-id run-dir uname testname item-path run-id) incompleted))))
        (db:get-cache-stmth dbdat db
        "SELECT id,rundir,uname,testname,item_path FROM tests WHERE run_id=? AND (strftime('%s','now') - event_time) > (run_duration + ?) AND state IN ('RUNNING','REMOTEHOSTSTART');")
        run-id deadtime)

       ;; in LAUNCHED for more than one day. Could be long due to job queues TODO/BUG: Need override for this in config
       ;;
       ;; (db:delay-if-busy dbdat)
       (sqlite3:for-each-row
        (lambda (test-id run-dir uname testname item-path)
          (if (and (equal? uname "n/a")
                   (equal? item-path "")) ;; this is a toplevel test
              ;; what to do with toplevel? call rollup?
              (set! toplevels   (cons (list test-id run-dir uname testname item-path run-id) toplevels))
              (set! oldlaunched (cons (list test-id run-dir uname testname item-path run-id) oldlaunched))))
        (db:get-cache-stmth dbdat db
        "SELECT id,rundir,uname,testname,item_path FROM tests WHERE run_id=? AND (strftime('%s','now') - event_time) > 86400 AND state IN ('LAUNCHED');")
        run-id)
       
       ;; (print-info "Found " (length oldlaunched) " old LAUNCHED items, " (length toplevels) " old LAUNCHED toplevel tests and " (length incompleted) " tests marked RUNNING but apparently dead.")
       (if (and (null? incompleted)
                (null? oldlaunched)
                (null? toplevels))
           #f
           #t)))))


;; looks up subdb and returns it, if not found then set up
;; and then return it.
;;
#;(define (db:get-db dbstruct run-id)
  (let* ((res (dbfile:get-subdb dbstruct run-id)))
    (if res
357
358
359
360
361
362
363

364
365
366




367
368
369
370
371
372
373
431
432
433
434
435
436
437
438



439
440
441
442
443
444
445
446
447
448
449







+
-
-
-
+
+
+
+







;; return the target db handle so it can be used
;;
(define (db:cache-for-read-only source target #!key (use-last-update #f))
  (if (and (hash-table-ref/default *global-db-store* target #f)
	   (>= (file-modification-time target)(file-modification-time source)))
      (hash-table-ref *global-db-store* target)
      (let* ((toppath   (launch:setup))
	     (targ-db-last-mod (db:get-sqlite3-mod-time target))
	     (targ-db-last-mod (if (common:file-exists? target)
				   (file-modification-time target)
				   0))
;;	      (if (common:file-exists? target)
;; BUG: This needs to include wal mode stuff .shm etc.
;;				   (file-modification-time target)
;;				   0))
	     (cache-db  (or (hash-table-ref/default *global-db-store* target #f)
			    (db:open-megatest-db path: target)))
	     (source-db (db:open-megatest-db path: source))
	     (curr-time (current-seconds))
	     (res      '())
	     (last-update (if use-last-update (cons "last_update" targ-db-last-mod) #f)))
	(db:sync-tables (db:sync-main-list source-db) last-update source-db cache-db)
403
404
405
406
407
408
409
410
411
412












413
414
415
416
417
418
419
420
421

422
423
424
425
426
427
428
429
430
431
432
433
434






















435
436
437
438
439
440
441
479
480
481
482
483
484
485



486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505

506
507












508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536







-
-
-
+
+
+
+
+
+
+
+
+
+
+
+








-
+

-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+







;; 			  megatest-db
;; 			  (conc cache-dir "/" fname)
;; 			  use-last-update: #t)))
;; 	  (thread-start! th1)
;; 	  (apply proc cache-db params)
;; 	  ))))




(define (db:get-sqlite3-mod-time fname)
  (let* ((wal-file (conc fname "-wal"))
	 (shm-file (conc fname "-shm"))
	 (get-mtime (lambda (f)
		      (if (and (file-exists? f)
			       (file-read-access? f))
			  (file-modification-time f)
			  0))))
    (max (get-mtime fname)
	 (get-mtime wal-file)
	 (get-mtime shm-file))))
	 
(define (db:all-db-sync dbstruct)
  (let* ((dbdat (db:open-db dbstruct #f db:initialize-main-db))
	 (data-synced       0) ;; count of changed records
    (tmp-area       (common:get-db-tmp-area))
    (dbfiles        (glob (conc tmp-area"/.megatest/*.db")))
    (sync-durations (make-hash-table))
    (no-sync-db        (db:open-no-sync-db)))
    (for-each
     (lambda (file)
     (lambda (file) ;; tmp db file
       (debug:print-info 3 *default-log-port* "file: " file)
       (let* ((fname       (conc (pathname-file file) ".db"))
	      (fulln       (conc *toppath*"/.megatest/"fname))
	      (time1       (if (file-exists? file)
			       (file-modification-time file)
			       (begin
				 (debug:print-info 2 *default-log-port* "Sync - I do not see file "file)
			   1)))
	      (time2       (if (file-exists? fulln)
			       (file-modification-time fulln)
			       (begin
				 (debug:print-info 2 *default-log-port* "Sync - I do not see file "fulln)
				 0)))
       (let* ((fname       (conc (pathname-file file) ".db")) ;; fname is tmp db file
              (wal-file (conc fname "-wal"))
              (shm-file (conc fname "-shm"))
	      (fulln       (conc *toppath*"/.megatest/"fname)) ;; fulln is nfs db name
              (wal-time     (if (file-exists? wal-file)             
			       (file-modification-time wal-file)
                               0))
              (shm-time     (if (file-exists? shm-file)             
			       (file-modification-time shm-file)
                               0))
	      (time1        (db:get-sqlite3-mod-time file))
;;	       (if (file-exists? file)              ;; time1 is the max itime of the tmp db, -wal and -shm files.
;;			       (max (file-modification-time file) wal-time shm-time)
;;			       (begin
;;				 (debug:print-info 2 *default-log-port* "Sync - I do not see file "file)
;;			   1)))
	      (time2       (db:get-sqlite3-mod-time fulln))
;;	      (if (file-exists? fulln)             ;; time2 is nfs file time
;;			       (file-modification-time fulln)
;;			       (begin
;;				 (debug:print-info 2 *default-log-port* "Sync - I do not see file "fulln)
;;				 0)))
	      (changed      (> (- time1 time2) (+ (random 5) 1)))  ;; it has been at some few seconds since last synced
	      (changed10    (> (- time1 time2) 10)) ;; it has been at least ten seconds since sync'd
	      (jfile-exists (file-exists? (conc file"-journal"))) ;; i.e. are we busy?
	      (do-cp        (cond
			     ((not (file-exists? fulln)) ;; shouldn't happen, but this might recover
			      (cons #t (conc "File "fulln" not found! Copying "fname" to "fulln)))
			     ((and (not jfile-exists) changed)
454
455
456
457
458
459
460

461


462
463















464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481




482
483
484
485
486
487





488
489
490


491
492
493
494
495
496
497
498
499
500
501
502
503
504

505
506
507
508
509
510
511
512
513










514
515
516
517
518
519
520





521
522
523
524
525




526
527
528
529
530
531
532
533








534
535
536
537
538
539
540






541
542
543
544
545
546
547











548
549

550
551
552
553
554
555
556
557
558










559
560
561
562


563
564
565
566

567
568

569
570
571
572
573
574
575
576
577
578
579
580

581
582
583
584

585
586
587
588
589
590
591
549
550
551
552
553
554
555
556

557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589




590
591
592
593






594
595
596
597
598
599


600
601














602









603
604
605
606
607
608
609
610
611
612







613
614
615
616
617





618
619
620
621
622







623
624
625
626
627
628
629
630







631
632
633
634
635
636







637
638
639
640
641
642
643
644
645
646
647


648









649
650
651
652
653
654
655
656
657
658




659
660




661


662


663
664
665
666
667
668
669
670


671
672
673
674
675
676
677
678
679
680
681
682
683







+
-
+
+


+
+
+
+
+
+
+
+
+
+
+
+
+
+
+














-
-
-
-
+
+
+
+
-
-
-
-
-
-
+
+
+
+
+

-
-
+
+
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
-
-
-
-
-
-
-
+
+
+
+
+
-
-
-
-
-
+
+
+
+

-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
-
-
-
-
-
-
-
+
+
+
+
+
+
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
-
-
+
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
-
-
-
-
+
+
-
-
-
-
+
-
-
+
-
-








-
-
+




+







				 fname", delta: " (- time1 time2) " seconds, reason: "(cdr do-cp))
	       (db:lock-and-delta-sync no-sync-db dbstruct fname runid (db:get-keys dbstruct) db:initialize-main-db)
	       (hash-table-set! sync-durations (conc fname".db")
				(- (current-milliseconds) start-time)))
	     (debug:print-info 3 *default-log-port* "skipping sync. " file " is up to date")
         )))
     dbfiles)
    ;; WHY does the dbdat need to be added back?
    (if dbdat (dbfile:add-dbdat dbstruct #f dbdat)))
    (if dbdat (dbfile:add-dbdat dbstruct #f dbdat))
    )
  #t)

(define (db:kill-servers)
  (let* ((servers (server:choose-server *toppath* 'all-valid))) ;; (server:get-list *toppath*))
    (for-each
     (lambda (server)
       (handle-exceptions
           exn
         (begin 
           (debug:print-info 0 *default-log-port*  "Unable to get host and/or port from " server ", exn=" exn)     
           #f)
	 (match-let (((mod-time host port start-time server-id pid) server))
		    (if (and host pid)
			(tasks:kill-server host pid)))))
     servers)
    (delete-file* (common:get-sync-lock-filepath))))

;; options:
;;
;;  'killservers  - kills all servers
;;  'dejunk       - removes junk records
;;  'adj-testids  - move test-ids into correct ranges
;;  'old2new      - sync megatest.db to /tmp/.../megatest.db and /tmp/.../megatest_ref.db
;;  'new2old      - sync /tmp/.../megatest.db to megatest.db and /tmp/.../megatest_ref.db (and update data_synced)
;;  'closeall     - close all opened dbs
;;  'schema       - attempt to apply schema changes
;;  run-ids: '(1 2 3 ...) or #f (for all)
;;
(define (db:multi-db-sync dbstruct . options)
  (let* (;; (dbdat       (db:open-db dbstruct #f dbfile:db-init-proc))
	 (data-synced 0) ;; count of changed records
    (tmp-area       (common:get-db-tmp-area))
    (old2new (member 'old2new options))
    (dejunk (member 'dejunk options))
    (killservers (member 'killservers options))
	 (tmp-area       (common:get-db-tmp-area))
	 (old2new (member 'old2new options))
	 (dejunk (member 'dejunk options))
	 (killservers (member 'killservers options))
    (servers (server:choose-server *toppath* 'all-valid)) ;; (server:get-list *toppath*))
    (src-area (if old2new *toppath* tmp-area))
    (dest-area (if old2new tmp-area *toppath*))
    (dbfiles        (if old2new (glob (conc *toppath* "/.megatest/*.db")) (glob (conc tmp-area "/.megatest/*.db"))))
    (keys (db:get-keys dbstruct))
    (sync-durations (make-hash-table)))
	 (src-area (if old2new *toppath* tmp-area))
	 (dest-area (if old2new tmp-area *toppath*))
	 (dbfiles        (if old2new (glob (conc *toppath* "/.megatest/*.db")) (glob (conc tmp-area "/.megatest/*.db"))))
	 (keys (db:get-keys dbstruct))
	 (sync-durations (make-hash-table)))


    (if killservers
    ;; kill servers
    (if killservers (db:kill-servers))
      (begin
       	  (for-each
	   (lambda (server)
             (handle-exceptions
             exn
             (begin 
               (debug:print-info 0 *default-log-port*  "Unable to get host and/or port from " server ", exn=" exn)     
               #f)
	     (match-let (((mod-time host port start-time server-id pid) server))
	       (if (and host pid)
		   (tasks:kill-server host pid)))))
	   servers)
          (delete-file* (common:get-sync-lock-filepath))
      )
    
    )
    (for-each
     (lambda (srcfile)
       (debug:print-info 3 *default-log-port* "file: " srcfile)
       (let* ((fname (conc (pathname-file srcfile) ".db"))
              (basename (pathname-file srcfile))
              (run-id (if (string= basename "main") #f (string->number basename)))
	      (destfile (conc dest-area "/.megatest/" fname))
              (dest-directory  (conc dest-area "/.megatest/"))
    (if (not dbfiles)
	(debug:print-error 0 *default-log-port* "no dbfiles found in " (conc *toppath* "/.megatest"))
	(for-each
	 (lambda (srcfile)
	   (debug:print-info 3 *default-log-port* "file: " srcfile)
	   (let* ((fname    (conc (pathname-file srcfile) ".db"))
		  (basename (pathname-file srcfile))
		  (run-id   (if (string= basename "main") #f (string->number basename)))
		  (destfile (conc dest-area "/.megatest/" fname))
		  (dest-directory  (conc dest-area "/.megatest/"))
              (dummy (debug:print-info 2 *default-log-port* "destfile = " destfile))
              (dummy2 (debug:print-info 2 *default-log-port* "dejunk = " dejunk))
	      (time1 (file-modification-time srcfile))
              (time2 (if (file-exists? destfile)
                         (begin
                            (debug:print-info 2 *default-log-port* "destfile " destfile " exists")
			    (file-modification-time destfile)
		  (time1    (file-modification-time srcfile))
		  (time2    (if (file-exists? destfile)
				(begin
				  (debug:print-info 2 *default-log-port* "destfile " destfile " exists")
				  (file-modification-time destfile))
                         )
			 (begin
			   (debug:print-info 0 *default-log-port* "Sync - I do not see file " destfile)
			   0)))
	      (changed ( < (- time2 time1) 6.0)) ;; dest db not updated within last 6 seconds
				(begin
				  (debug:print-info 0 *default-log-port* "Sync - I do not see file " destfile)
				  0)))
		  (changed   ( < (- time2 time1) 6.0)) ;; dest db not updated within last 6 seconds

      (do-cp (cond
		      ((not (file-exists? destfile)) ;; shouldn't happen, but this might recover
		       (debug:print-info 2 *default-log-port* "File " destfile " not found. Copying "srcfile" to "destfile)
                       (system (conc "/bin/mkdir -p " dest-directory))
                       (system (conc "/bin/cp " srcfile " " destfile))
		       #t)
		      (changed ;; (and changed
		  (do-cp     (cond
			      ((not (file-exists? destfile)) ;; shouldn't happen, but this might recover
			       (debug:print-info 2 *default-log-port* "File " destfile " not found. Copying "srcfile" to "destfile)
			       ;; TODO: Need to fix this for WAL mod. Can't just copy.
			       (system (conc "/bin/mkdir -p " dest-directory))
			       (system (conc "/bin/cp " srcfile " " destfile))
			       #t)
			      (changed ;; (and changed
		       ;; (> (- (current-seconds) time1) 3)) ;; if file is changed and three seconds have passed.
		       #t)
		      ((and changed *time-to-exit*) ;; last sync
		       #t)
		      (else
		       #f))))
          (if (or dejunk do-cp)
			       #t)
			      ((and changed *time-to-exit*) ;; last sync
			       #t)
			      (else
			       #f))))
             (if (or dejunk do-cp)
	     (let* (
                    (start-time (current-milliseconds))

                    (subdb (or (dbfile:get-subdb dbstruct run-id) (dbfile:init-subdb dbstruct run-id dbfile:db-init-proc)))
                    (mtdb      (dbr:subdb-mtdbdat subdb))
                    (tmpdb     (dbfile:open-db dbstruct run-id dbfile:db-init-proc))

		 (let* ((start-time (current-milliseconds))
			;; subdb is misnamed - should be dbdat (I think...)
			(subdb    (dbfile:open-db dbstruct run-id dbfile:db-init-proc))
			;;        (or (dbfile:get-subdb dbstruct run-id)
			;;            (dbfile:init-subdb dbstruct run-id dbfile:db-init-proc)))
			(mtdb      (dbr:subdb-mtdbdat subdb))
			;;
			;; BUG: -mrw- I think this next line is wrong. run-id should be the path to .megatest/<runid>.db
			;; 
			(tmpdb     (dbfile:open-db dbstruct run-id dbfile:db-init-proc)))
		   
                    )
	       (debug:print-info 2 *default-log-port* "delta syncing file: " srcfile ", time diff: " (- time1 time2) " seconds")
		   (debug:print-info 2 *default-log-port* "delta syncing file: " srcfile ", time diff: " (- time1 time2) " seconds")

               (if old2new
                 (begin
                   (if dejunk (db:clean-up run-id mtdb))
		   (db:sync-tables (db:sync-all-tables-list dbstruct (db:get-keys dbstruct)) #f mtdb tmpdb)
                 )
                 (begin
                   (if dejunk (db:clean-up run-id tmpdb))
		   (db:sync-tables (db:sync-all-tables-list dbstruct (db:get-keys dbstruct)) #f tmpdb mtdb)
		   (if old2new
                       (begin
			 (if dejunk (db:clean-up run-id mtdb))
			 (db:sync-tables (db:sync-all-tables-list
					  dbstruct
					  (db:get-keys dbstruct))
					 #f mtdb tmpdb))
                       (begin
			 (if dejunk (db:clean-up run-id tmpdb))
			 (db:sync-tables (db:sync-all-tables-list dbstruct (db:get-keys dbstruct)) #f tmpdb mtdb)))
                 )
               )
	       (hash-table-set! sync-durations (conc srcfile ".db") (- (current-milliseconds) start-time)))
	     (debug:print-info 2 *default-log-port* "skipping delta sync. " srcfile " is up to date")
		   (hash-table-set! sync-durations (conc srcfile ".db") (- (current-milliseconds) start-time)))
		 (debug:print-info 2 *default-log-port* "skipping delta sync. " srcfile " is up to date"))))
          )
       )
     )
     dbfiles
	 dbfiles))
    )
    data-synced
    data-synced))
  )
)

;; Sync all changed db's
;;
(define (db:tmp->megatest.db-sync dbstruct run-id last-update)
  (let* ((subdbs (hash-table-values (dbr:dbstruct-subdbs dbstruct)))
	 (res    '()))
    (for-each
     (lambda (subdb)
       (let* ((dbname (db:run-id->dbname run-id))
	      (mtdb   (dbr:subdb-mtdb subdb))
       (let* ((mtdb   (dbr:subdb-mtdb subdb))
	      (tmpdb  (db:get-subdb dbstruct run-id))
	      (refndb (dbr:subdb-refndb subdb))
	      (newres (db:sync-tables (db:sync-all-tables-list dbstruct (db:get-keys dbstruct)) last-update tmpdb refndb mtdb)))
	 ;; (stack-push! (dbr:subdb-dbstack subdb) tmpdb)
	 ;; BUG: verify this is really needed
	 (dbfile:add-dbdat dbstruct run-id tmpdb)
	 (set! res (cons newres res))))
     subdbs)
    res))

;;;; run-ids
;;    if #f use *db-local-sync* : or 'local-sync-flags
838
839
840
841
842
843
844
845


846
847
848
849
850
851
852
853


854
855
856
857
858
859
860
930
931
932
933
934
935
936

937
938
939
940
941
942
943
944
945

946
947
948
949
950
951
952
953
954







-
+
+







-
+
+







	(sqlite3:execute db "CREATE TABLE IF NOT EXISTS test_rundat (
                              id           INTEGER PRIMARY KEY,
                              test_id      INTEGER,
                              update_time  TIMESTAMP,
                              cpuload      INTEGER DEFAULT -1,
                              diskfree     INTEGER DEFAULT -1,
                              diskusage    INTGER DEFAULT -1,
                              run_duration INTEGER DEFAULT 0);")
                              run_duration INTEGER DEFAULT 0,
 last_update INTEGER DEFAULT (strftime('%s','now')));")
	(sqlite3:execute db "CREATE TABLE IF NOT EXISTS archives (
                              id           INTEGER PRIMARY KEY,
                              test_id      INTEGER,
                              state        TEXT DEFAULT 'new',
                              status       TEXT DEFAULT 'n/a',
                              archive_type TEXT DEFAULT 'bup',
                              du           INTEGER,
                              archive_path TEXT);")))
                              archive_path TEXT,
 last_update INTEGER DEFAULT (strftime('%s','now')));")))
        (db:create-triggers db)    
     db)) ;; )

;;======================================================================
;; A R C H I V E S
;;======================================================================

884
885
886
887
888
889
890

891
892
893
894
895
896
897
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992







+







	 db 
	 (conc
	  "SELECT d.id,d.archive_area_name,disk_path,last_df,last_df_time FROM archive_disks AS d
             INNER JOIN archive_blocks AS b ON d.id=b.archive_disk_id
             WHERE b.id IN (" (string-intersperse (map conc res) ",") ") AND
         last_df > ?;")
	 dneeded))
    ;; BUG: Verfify this is really needed
    (dbfile:add-dbdat dbstruct #f dbdat)
    blocks))
    
;; returns id of the record, register a disk allocated to archiving and record it's last known
;; available space
;;
(define (db:archive-register-disk dbstruct bdisk-name bdisk-path df)
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048






















































1049
1050
1051
1052
1053
1054
1055
1083
1084
1085
1086
1087
1088
1089






















































1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150







-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+







;; D B   U T I L S
;;======================================================================

;;======================================================================
;; M A I N T E N A N C E
;;======================================================================

(define (db:have-incompletes? dbstruct run-id ovr-deadtime)
  (let* ((incompleted '())
	 (oldlaunched '())
	 (toplevels   '())
	 (deadtime-str (configf:lookup *configdat* "setup" "deadtime"))
	 (deadtime     (if (and deadtime-str
				(string->number deadtime-str))
			   (string->number deadtime-str)
			   72000))) ;; twenty hours
    (db:with-db
     dbstruct run-id #f
     (lambda (dbdat db)
       (if (number? ovr-deadtime)(set! deadtime ovr-deadtime))
       
       ;; in RUNNING or REMOTEHOSTSTART for more than 10 minutes
       ;;
       ;; HOWEVER: this code in run:test seems to work fine
       ;;              (> (- (current-seconds)(+ (db:test-get-event_time testdat)
       ;;                     (db:test-get-run_duration testdat)))
       ;;                    600) 
       ;; (db:delay-if-busy dbdat)
       (sqlite3:for-each-row 
        (lambda (test-id run-dir uname testname item-path)
          (if (and (equal? uname "n/a")
                   (equal? item-path "")) ;; this is a toplevel test
              ;; what to do with toplevel? call rollup?
              (begin
                (set! toplevels   (cons (list test-id run-dir uname testname item-path run-id) toplevels))
                (debug:print-info 0 *default-log-port* "Found old toplevel test in RUNNING state, test-id=" test-id))
              (set! incompleted (cons (list test-id run-dir uname testname item-path run-id) incompleted))))
        db
        "SELECT id,rundir,uname,testname,item_path FROM tests WHERE run_id=? AND (strftime('%s','now') - event_time) > (run_duration + ?) AND state IN ('RUNNING','REMOTEHOSTSTART');"
        run-id deadtime)

       ;; in LAUNCHED for more than one day. Could be long due to job queues TODO/BUG: Need override for this in config
       ;;
       ;; (db:delay-if-busy dbdat)
       (sqlite3:for-each-row
        (lambda (test-id run-dir uname testname item-path)
          (if (and (equal? uname "n/a")
                   (equal? item-path "")) ;; this is a toplevel test
              ;; what to do with toplevel? call rollup?
              (set! toplevels   (cons (list test-id run-dir uname testname item-path run-id) toplevels))
              (set! oldlaunched (cons (list test-id run-dir uname testname item-path run-id) oldlaunched))))
        db
        "SELECT id,rundir,uname,testname,item_path FROM tests WHERE run_id=? AND (strftime('%s','now') - event_time) > 86400 AND state IN ('LAUNCHED');"
        run-id)
       
       (debug:print-info 18 *default-log-port* "Found " (length oldlaunched) " old LAUNCHED items, " (length toplevels) " old LAUNCHED toplevel tests and " (length incompleted) " tests marked RUNNING but apparently dead.")
       (if (and (null? incompleted)
                (null? oldlaunched)
                (null? toplevels))
           #f
           #t)))))
;; (define (db:have-incompletes? dbstruct run-id ovr-deadtime)
;;   (let* ((incompleted '())
;; 	 (oldlaunched '())
;; 	 (toplevels   '())
;; 	 (deadtime-str (configf:lookup *configdat* "setup" "deadtime"))
;; 	 (deadtime     (if (and deadtime-str
;; 				(string->number deadtime-str))
;; 			   (string->number deadtime-str)
;; 			   72000))) ;; twenty hours
;;     (db:with-db
;;      dbstruct run-id #f
;;      (lambda (dbdat db)
;;        (if (number? ovr-deadtime)(set! deadtime ovr-deadtime))
;;        
;;        ;; in RUNNING or REMOTEHOSTSTART for more than 10 minutes
;;        ;;
;;        ;; HOWEVER: this code in run:test seems to work fine
;;        ;;              (> (- (current-seconds)(+ (db:test-get-event_time testdat)
;;        ;;                     (db:test-get-run_duration testdat)))
;;        ;;                    600) 
;;        ;; (db:delay-if-busy dbdat)
;;        (sqlite3:for-each-row 
;;         (lambda (test-id run-dir uname testname item-path)
;;           (if (and (equal? uname "n/a")
;;                    (equal? item-path "")) ;; this is a toplevel test
;;               ;; what to do with toplevel? call rollup?
;;               (begin
;;                 (set! toplevels   (cons (list test-id run-dir uname testname item-path run-id) toplevels))
;;                 (debug:print-info 0 *default-log-port* "Found old toplevel test in RUNNING state, test-id=" test-id))
;;               (set! incompleted (cons (list test-id run-dir uname testname item-path run-id) incompleted))))
;;         (db:get-cache-stmth dbdat db
;;         "SELECT id,rundir,uname,testname,item_path FROM tests WHERE run_id=? AND (strftime('%s','now') - event_time) > (run_duration + ?) AND state IN ('RUNNING','REMOTEHOSTSTART');")
;;         run-id deadtime)
;; 
;;        ;; in LAUNCHED for more than one day. Could be long due to job queues TODO/BUG: Need override for this in config
;;        ;;
;;        ;; (db:delay-if-busy dbdat)
;;        (sqlite3:for-each-row
;;         (lambda (test-id run-dir uname testname item-path)
;;           (if (and (equal? uname "n/a")
;;                    (equal? item-path "")) ;; this is a toplevel test
;;               ;; what to do with toplevel? call rollup?
;;               (set! toplevels   (cons (list test-id run-dir uname testname item-path run-id) toplevels))
;;               (set! oldlaunched (cons (list test-id run-dir uname testname item-path run-id) oldlaunched))))
;;         (db:get-cache-stmth dbdat db
;;         "SELECT id,rundir,uname,testname,item_path FROM tests WHERE run_id=? AND (strftime('%s','now') - event_time) > 86400 AND state IN ('LAUNCHED');")
;;         run-id)
;;        
;;        (debug:print-info 18 *default-log-port* "Found " (length oldlaunched) " old LAUNCHED items, " (length toplevels) " old LAUNCHED toplevel tests and " (length incompleted) " tests marked RUNNING but apparently dead.")
;;        (if (and (null? incompleted)
;;                 (null? oldlaunched)
;;                 (null? toplevels))
;;            #f
;;            #t)))))

(define (db:get-status-from-final-status-file run-dir)
  (let ((infile (conc run-dir "/.final-status")))
    ;; first verify we are able to write the output file
    (if (not (file-read-access? infile))
        (begin 
	  (debug:print 2 *default-log-port* "ERROR: cannot read " infile)
1084
1085
1086
1087
1088
1089
1090
1091

1092
1093
1094
1095
1096

1097
1098
1099
1100
1101

1102
1103
1104
1105
1106
1107
1108
1179
1180
1181
1182
1183
1184
1185

1186
1187
1188
1189
1190

1191
1192
1193
1194
1195

1196
1197
1198
1199
1200
1201
1202
1203







-
+




-
+




-
+







    (debug:print-info 4  *default-log-port* "running-deadtime = " running-deadtime)
    (debug:print-info 4  *default-log-port* "deadtime-trim = " deadtime-trim)

    (db:with-db 
     dbstruct run-id #f
     (lambda (dbdat db)
       (let* ((stmth1 (db:get-cache-stmth
		       dbdat run-id db
		       dbdat db
		       "SELECT id,rundir,uname,testname,item_path,event_time,run_duration FROM tests 
                           WHERE run_id=? AND (strftime('%s','now') - event_time) > (run_duration + ?)
                                          AND state IN ('RUNNING');"))
	      (stmth2 (db:get-cache-stmth
		       dbdat run-id db
		       dbdat db
		       "SELECT id,rundir,uname,testname,item_path,event_time,run_duration FROM tests 
                           WHERE run_id=? AND (strftime('%s','now') - event_time) > (run_duration + ?)
                                          AND state IN ('REMOTEHOSTSTART');"))
	      (stmth3 (db:get-cache-stmth
		       dbdat run-id db
		       dbdat db
		       "SELECT id,rundir,uname,testname,item_path FROM tests
                           WHERE run_id=? AND (strftime('%s','now') - event_time) > 86400
                                          AND state IN ('LAUNCHED');")))
	 ;; in RUNNING or REMOTEHOSTSTART for more than 10 minutes
	 ;;
	 ;; HOWEVER: this code in run:test seems to work fine
	 ;;              (> (- (current-seconds)(+ (db:test-get-event_time testdat)
1363
1364
1365
1366
1367
1368
1369
1370

1371
1372
1373
1374
1375

1376
1377
1378
1379
1380

1381
1382
1383
1384
1385







1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397













1398
1399
1400

1401
1402
1403
1404

1405
1406
1407
1408
1409
1410
1411
1458
1459
1460
1461
1462
1463
1464

1465
1466
1467
1468
1469

1470
1471
1472
1473
1474

1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487












1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502

1503
1504
1505
1506

1507
1508
1509
1510
1511
1512
1513
1514







-
+




-
+




-
+





+
+
+
+
+
+
+
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+


-
+



-
+







;; 	(begin
;; 	  (debug:print-info 4 *default-log-port* "launch throttle factor=" *global-delta*)
;; 	  (set! *last-global-delta-printed* *global-delta*)))

(define (db:set-var dbstruct var val)
  (db:with-db dbstruct #f #t 
	      (lambda (dbdat db)
		(sqlite3:execute db "INSERT OR REPLACE INTO metadat (var,val) VALUES (?,?);" var val))))
		(sqlite3:execute  (db:get-cache-stmth dbdat db "INSERT OR REPLACE INTO metadat (var,val) VALUES (?,?);") var val))))

(define (db:add-var dbstruct var val)
  (db:with-db dbstruct #f #t 
	      (lambda (dbdat db)
		(sqlite3:execute db "UPDATE metadat SET val=val+? WHERE var=?;" val var))))
		(sqlite3:execute  (db:get-cache-stmth dbdat db "UPDATE metadat SET val=val+? WHERE var=?;") val var))))

(define (db:del-var dbstruct var)
  (db:with-db dbstruct #f #t 
	      (lambda (dbdat db)
		(sqlite3:execute db "DELETE FROM metadat WHERE var=?;" var))))
		(sqlite3:execute  (db:get-cache-stmth dbdat db "DELETE FROM metadat WHERE var=?;") var))))

;;======================================================================
;; no-sync.db - small bits of data to be shared between servers
;;======================================================================

(define (db:get-dbsync-path)
  (case (rmt:transport-mode)
    ((http)(common:get-db-tmp-area))
    ((tcp) (conc *toppath*"/.megatest"))
    ((nfs) (conc *toppath*"/.megatest"))
    (else "/tmp/dunno-this-gonna-exist")))

(define (db:no-sync-db db-in)
  (if db-in
      db-in
      (if *no-sync-db*
	  *no-sync-db*
	  (begin
	    (mutex-lock! *db-access-mutex*)
	    (let ((dbpath (common:get-db-tmp-area))
		  (db     (dbfile:open-no-sync-db dbpath)))
	      (set! *no-sync-db* db)
	      (mutex-unlock! *db-access-mutex*)
	      db)))))
 ;; (define (db:no-sync-db db-in)
 ;;   (if db-in
 ;;       db-in
 ;;       (if *no-sync-db*
 ;; 	  *no-sync-db*
 ;; 	  (begin
 ;; 	    (mutex-lock! *db-access-mutex*)
 ;; 	    (let ((dbpath (db:get-dbsync-path))
 ;; 		  (db     (dbfile:open-no-sync-db dbpath)))
 ;; 	      (assert (sqlite3:database? db) "FATAL: db:no-sync-db failed to open a database")
 ;; 	      (set! *no-sync-db* db)
 ;; 	      (mutex-unlock! *db-access-mutex*)
 ;; 	      db)))))

(define (with-no-sync-db proc)
  (let* ((db  (db:no-sync-db *no-sync-db*)))
  (let* ((db  (db:open-no-sync-db)))
    (proc db)))

(define (db:open-no-sync-db)
  (dbfile:open-no-sync-db (db:dbfile-path)))
  (dbfile:open-no-sync-db (db:get-dbsync-path)))

(define (db:no-sync-close-db db stmt-cache)
  (db:safely-close-sqlite3-db db stmt-cache))


;; use a global for some primitive caching, it is just silly to
;; re-read the db over and over again for the keys since they never
1940
1941
1942
1943
1944
1945
1946

1947
1948
1949
1950
1951
1952
1953
1954
1955









1956
1957
1958
1959
1960
1961
1962
2043
2044
2045
2046
2047
2048
2049
2050









2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066







+
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+







			(if offset (conc " OFFSET " offset) "")
			";"))
    (debug:print-info 4 *default-log-port* "runs:get-runs-by-patt qry=" qry-str " " runnamepatt)
    ;(print "runs:get-runs-by-patt qry=" qry-str " " runnamepatt)

    (vector header 
            (reverse
             (db:with-db
             (db:with-db dbstruct #f #f ;; reads db, does not write to it.
                         (lambda (dbdat db)
                           (sqlite3:fold-row
                            (lambda (res . r)
                              (cons (list->vector r) res))
                            '()
                            db
                            qry-str
                            runnamepatt)))))))
	      dbstruct #f #f ;; reads db, does not write to it.
              (lambda (dbdat db)
                (sqlite3:fold-row
                 (lambda (res . r)
                   (cons (list->vector r) res))
                 '()
                 db
                 qry-str
                 runnamepatt)))))))

;; use (get-value-by-header (db:get-header runinfo)(db:get-rows runinfo))
;; NOTE: Does NOT return a list of rows (or one row) for the first slot of the vector
;;       this is inconsistent with get-runs but it makes some sense.
;;
(define (db:get-run-info dbstruct run-id)
  ;;(if (hash-table-ref/default *run-info-cache* run-id #f)
2027
2028
2029
2030
2031
2032
2033





2034
2035
2036
2037
2038
2039
2040


2041
2042
2043
2044
2045
2046
2047
2048
2049

2050
2051


2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062

2063
2064
















2065
2066
2067
2068
2069
2070
2071
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146



2147
2148

2149
2150
2151
2152
2153
2154
2155
2156
2157


2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171


2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194







+
+
+
+
+




-
-
-
+
+
-








+
-
-
+
+











+
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+







  (db:with-db
   dbstruct #f #f
   (lambda (dbdat db)
       (if msg
         (sqlite3:execute db "UPDATE runs SET status=?,comment=? WHERE id=?;" status msg run-id)
         (sqlite3:execute db "UPDATE runs SET status=? WHERE id=?;" status run-id)))))

(define (db:set-run-state-status-db dbdat db run-id state status )
  (sqlite3:execute
   (db:get-cache-stmth
    dbdat db "UPDATE runs SET status=?,state=? WHERE id=?;") status state run-id))

(define (db:set-run-state-status dbstruct run-id state status )
  (db:with-db
   dbstruct #f #f
   (lambda (dbdat db)
          (sqlite3:execute db "UPDATE runs SET status=?,state=? WHERE id=?;" status state run-id))))


     (db:set-run-state-status-db dbdat db run-id state status))))
     

(define (db:get-run-status dbstruct run-id)
  (let ((res "n/a"))
    (db:with-db
     dbstruct #f #f
     (lambda (dbdat db)
       (sqlite3:for-each-row 
	(lambda (status)
	  (set! res status))
	(db:get-cache-stmth
	db
	"SELECT status FROM runs WHERE id=?;" 
	 dbdat db
	 "SELECT status FROM runs WHERE id=?;" )
	run-id)
       res))))

(define (db:get-run-state dbstruct run-id)
  (let ((res "n/a"))
    (db:with-db
     dbstruct #f #f
     (lambda (dbdat db)
       (sqlite3:for-each-row 
	(lambda (status)
	  (set! res status))
	(db:get-cache-stmth
	db
	"SELECT state FROM runs WHERE id=?;" 
	 dbdat db
	 "SELECT state FROM runs WHERE id=?;" )
	run-id)
       res))))

(define (db:get-run-state-status dbstruct run-id)
  (let ((res (cons "n/a" "n/a")))
    (db:with-db
     dbstruct #f #f
     (lambda (dbdat db)
       (sqlite3:for-each-row 
	(lambda (state status)
	  (set! res (cons state status)))
	(db:get-cache-stmth
	 dbdat db
	 "SELECT state,status FROM runs WHERE id=?;" )
	run-id)
       res))))


;;======================================================================
;; K E Y S
;;======================================================================
2304
2305
2306
2307
2308
2309
2310
2311
2312


2313
2314
2315
2316
2317
2318
2319
2427
2428
2429
2430
2431
2432
2433


2434
2435
2436
2437
2438
2439
2440
2441
2442







-
-
+
+







    (db:with-db dbstruct run-id #f
		(lambda (dbdat db)
		  (sqlite3:for-each-row
		   (lambda (run-id testname item-path state status)
		     ;; id,run_id,testname,state,status,event_time,host,cpuload,diskfree,uname,rundir,item_path,run_duration,final_logf,comment
		     (set! res (vector test-id run-id testname state status -1 "" -1 -1 "" "-" item-path -1 "-" "-")))
		   db 
		   "SELECT run_id,testname,item_path,state,status FROM tests WHERE id=?;" 
		   test-id)))
		   "SELECT run_id,testname,item_path,state,status FROM tests WHERE id=? and run_id=?;" 
		   test-id run-id)))
    res))

;; get a useful subset of the tests data (used in dashboard
;; use db:mintest-get-{id ,run_id,testname ...}
;;
(define (db:get-tests-for-run-mindata dbstruct run-id testpatt states statuses not-in)
  (db:get-tests-for-run dbstruct run-id testpatt states statuses #f #f not-in #f #f "id,run_id,testname,state,status,event_time,item_path" 0 #f))
2400
2401
2402
2403
2404
2405
2406
2407

2408
2409



2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421













2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434

2435
2436
2437
2438
2439
2440
2441
2523
2524
2525
2526
2527
2528
2529

2530

2531
2532
2533
2534












2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559

2560
2561
2562
2563
2564
2565
2566
2567







-
+
-

+
+
+
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+












-
+







;; ;; NB// Ultimately this will be deprecated in deference to mt:test-set-state-status-by-id
;;
;;      NOTE: run-id is not used
;; ;;
(define (db:test-set-state-status dbstruct run-id test-id newstate newstatus newcomment)
  (db:with-db
   dbstruct
   run-id
   run-id #f
   #t
   (lambda (dbdat db)
     (db:test-set-state-status-db db run-id test-id newstate newstatus newcomment))))

(define (db:test-set-state-status-db db run-id test-id newstate newstatus newcomment)
     (cond
      ((and newstate newstatus newcomment)
       (sqlite3:execute db "UPDATE tests SET state=?,status=?,comment=? WHERE id=?;" newstate newstatus newcomment ;; (sdb:qry 'getid newcomment)
			test-id))
      ((and newstate newstatus)
       (sqlite3:execute db "UPDATE tests SET state=?,status=? WHERE id=?;" newstate newstatus test-id))
      (else
       (if newstate   (sqlite3:execute db "UPDATE tests SET state=?   WHERE id=?;" newstate   test-id))
       (if newstatus  (sqlite3:execute db "UPDATE tests SET status=?  WHERE id=?;" newstatus  test-id))
       (if newcomment (sqlite3:execute db "UPDATE tests SET comment=? WHERE id=?;" newcomment ;; (sdb:qry 'getid newcomment)
				       test-id))))))
  (mt:process-triggers dbstruct run-id test-id newstate newstatus))
  (cond
   ((and newstate newstatus newcomment)
    (sqlite3:execute db "UPDATE tests SET state=?,status=?,comment=? WHERE id=?;" newstate newstatus newcomment ;; (sdb:qry 'getid newcomment)
		     test-id))
   ((and newstate newstatus)
    (sqlite3:execute db "UPDATE tests SET state=?,status=? WHERE id=?;" newstate newstatus test-id))
   (else
    (if newstate   (sqlite3:execute db "UPDATE tests SET state=?   WHERE id=?;" newstate   test-id))
    (if newstatus  (sqlite3:execute db "UPDATE tests SET status=?  WHERE id=?;" newstatus  test-id))
    (if newcomment (sqlite3:execute db "UPDATE tests SET comment=? WHERE id=?;" newcomment ;; (sdb:qry 'getid newcomment)
				    test-id))))
  ;; (mt:process-triggers dbstruct run-id test-id newstate newstatus)) ;; NOTE: Moved into calling function
  )

;; NEW BEHAVIOR: Count tests running in all runs!
;;
(define (db:get-count-tests-running dbstruct run-id) ;; fastmode)
  (let* ((qry ;; (if fastmode
		;;   "SELECT count(id) FROM tests WHERE state in ('RUNNING','LAUNCHED','REMOTEHOSTSTART') AND NOT (uname = 'n/a' AND item_path = '') LIMIT 1;"
		  "SELECT count(id) FROM tests WHERE state in ('RUNNING','LAUNCHED','REMOTEHOSTSTART') AND NOT (uname = 'n/a' AND item_path = '');")) ;; )
  (db:with-db
   dbstruct
   run-id
   #f
   (lambda (dbdat db)
     (let* ((stmth (db:get-cache-stmth dbdat run-id db qry)))
     (let* ((stmth (db:get-cache-stmth dbdat db qry)))
       (sqlite3:first-result stmth))))))

;; NEW BEHAVIOR: Count tests running in only one run!
;;
(define (db:get-count-tests-actually-running dbstruct run-id)
  (db:with-db
   dbstruct
2457
2458
2459
2460
2461
2462
2463
2464

2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477

2478
2479
2480
2481
2482
2483
2484
2583
2584
2585
2586
2587
2588
2589

2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602

2603
2604
2605
2606
2607
2608
2609
2610







-
+












-
+







		 ;;  "SELECT count(id) FROM tests WHERE state in ('RUNNING','LAUNCHED','REMOTEHOSTSTART') AND run_id=? LIMIT 1;"
		  "SELECT count(id) FROM tests WHERE state in ('RUNNING','LAUNCHED','REMOTEHOSTSTART') AND run_id=?;")) ;; )
    (db:with-db
     dbstruct
     run-id
     #f
     (lambda (dbdat db)
       (let* ((stmth (db:get-cache-stmth dbdat run-id db qry)))
       (let* ((stmth (db:get-cache-stmth dbdat db qry)))
	 (sqlite3:first-result stmth run-id))))))

;; For a given testname how many items are running? Used to determine
;; probability for regenerating html
;;
(define (db:get-count-tests-running-for-testname dbstruct run-id testname)
  (db:with-db
   dbstruct
   run-id
   #f
   (lambda (dbdat db)
     (let* ((stmt "SELECT count(id) FROM tests WHERE state in ('RUNNING','LAUNCHED','REMOTEHOSTSTART') AND run_id=? AND NOT (uname = 'n/a' AND item_path = '') AND testname=?;")
	    (stmth (db:get-cache-stmth dbdat run-id db stmt)))
	    (stmth (db:get-cache-stmth dbdat db stmt)))
       (sqlite3:first-result
	stmth run-id testname)))))

(define (db:get-not-completed-cnt dbstruct run-id)
(db:with-db
   dbstruct
   run-id
2670
2671
2672
2673
2674
2675
2676
2677

2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691


















2692
2693
2694
2695
2696
2697
2698
2796
2797
2798
2799
2800
2801
2802

2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815


2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840







-
+












-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+







  (let* ((run-ids (db:get-all-run-ids mtdb)))
    (for-each 
     (lambda (run-id)
       (let ((testrecs (db:get-all-tests-info-by-run-id mtdb run-id)))
	 (db:prep-megatest.db-adj-test-ids (dbr:dbdat-dbh mtdb) run-id testrecs)))
     run-ids)))

;; Get test data using test_id, run-id is not used
;; Get test data using test_id
;; 
(define (db:get-test-info-by-id dbstruct run-id test-id)
  (db:with-db
   dbstruct
   run-id
   #f
   (lambda (dbdat db)
     (let ((res #f))
       (sqlite3:for-each-row ;; attemptnum added to hold pid of top process (not Megatest) controlling a test
	(lambda (id run-id testname state status event-time host cpuload diskfree uname rundir-id item-path run_duration final-logf-id comment short-dir-id attemptnum archived last-update)
	  ;;                0    1       2      3      4        5       6      7        8     9     10      11          12          13           14         15          16
	  (set! res (vector id run-id testname state status event-time host cpuload diskfree uname rundir-id item-path run_duration final-logf-id comment short-dir-id attemptnum archived last-update)))
	db
	(conc "SELECT " db:test-record-qry-selector " FROM tests WHERE id=?;")
	(db:get-cache-stmth dbdat db
			    (conc "SELECT " db:test-record-qry-selector " FROM tests WHERE id=?;"))
	test-id)
       res))))

;; Get test state, status using test_id
;; 
(define (db:get-test-state-status-by-id dbstruct run-id test-id)
  (db:with-db
   dbstruct
   run-id
   #f
   (lambda (dbdat db)
     (let ((res (cons #f #f)))
       (sqlite3:for-each-row ;; attemptnum added to hold pid of top process (not Megatest) controlling a test
	(lambda (state status)
	  (cons state status))
	(db:get-cache-stmth dbdat db "SELECT state,status FROM tests WHERE id=?;")
	test-id)
       res))))

;; Use db:test-get* to access
;; Get test data using test_ids. NB// Only works within a single run!!
;;
(define (db:get-test-info-by-ids dbstruct run-id test-ids)
2713
2714
2715
2716
2717
2718
2719



2720
2721
2722
2723
2724
2725
2726
2727








2728
2729
2730
2731
2732
2733
2734
2855
2856
2857
2858
2859
2860
2861
2862
2863
2864








2865
2866
2867
2868
2869
2870
2871
2872
2873
2874
2875
2876
2877
2878
2879







+
+
+
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+








(define (db:get-test-info dbstruct run-id test-name item-path)
  (db:with-db
   dbstruct
   run-id
   #f
   (lambda (dbdat db)
     (db:get-test-info-db db run-id test-name item-path))))

(define (db:get-test-info-db db run-id test-name item-path)
     (let ((res #f))
       (sqlite3:for-each-row
	(lambda (a . b)
	  (set! res (apply vector a b)))
	db
	(conc "SELECT " db:test-record-qry-selector " FROM tests WHERE testname=? AND item_path=? AND run_id=?;")
	test-name item-path run-id)
       res))))
  (let ((res #f))
    (sqlite3:for-each-row
     (lambda (a . b)
       (set! res (apply vector a b)))
     db
     (conc "SELECT " db:test-record-qry-selector " FROM tests WHERE testname=? AND item_path=? AND run_id=?;")
     test-name item-path run-id)
    res))

(define (db:test-get-rundir-from-test-id dbstruct run-id test-id)
  (db:with-db
   dbstruct
   run-id
   #f
   (lambda (dbdat db)
2841
2842
2843
2844
2845
2846
2847
2848

2849
2850
2851
2852
2853
2854
2855
2986
2987
2988
2989
2990
2991
2992

2993
2994
2995
2996
2997
2998
2999
3000







-
+







(define (db:get-data-info-by-id dbstruct run-id test-data-id)
  (let* ((stmt        "SELECT id,test_id, category, variable, value, expected, tol, units, comment, status, type, last_update FROM test_data WHERE id=? ORDER BY id ASC;")) ;; event_time DESC,id ASC;
    (db:with-db
     dbstruct
     run-id
     #f
     (lambda (dbdat db)
       (let* ((stmth (db:get-cache-stmth dbdat #f db stmt))
       (let* ((stmth (db:get-cache-stmth dbdat db stmt))
	      (res   (sqlite3:fold-row
		      (lambda (res id test-id  category variable value expected tol units comment status type last-update)
			(vector id test-id  category variable value expected tol units comment status type last-update))
		      (vector #f #f #f #f #f #f #f #f #f #f #f #f)
		      stmth
		      test-data-id)))
        res)))))
3175
3176
3177
3178
3179
3180
3181
3182



3183
3184
3185
3186
3187
3188
3189
3190
3191
3192
3193
3194

3195
3196
3197
3198
3199






3200
3201
3202
3203
3204
3205





3206
3207
3208


3209
3210
3211
3212


3213
3214
3215
3216
3217
3218
3219
3320
3321
3322
3323
3324
3325
3326

3327
3328
3329
3330
3331
3332
3333
3334
3335
3336
3337
3338
3339
3340

3341
3342




3343
3344
3345
3346
3347
3348
3349





3350
3351
3352
3353
3354



3355
3356
3357
3358
3359
3360
3361
3362
3363
3364
3365
3366
3367
3368
3369







-
+
+
+











-
+

-
-
-
-
+
+
+
+
+
+

-
-
-
-
-
+
+
+
+
+
-
-
-
+
+




+
+







	 (test-name    (if (number? test-name)
			   (db:test-get-testname testdat)
			   test-name))
	 (item-path    (db:test-get-item-path testdat))
         (tl-testdat   (db:get-test-info dbstruct run-id test-name ""))
         (tl-test-id   (if tl-testdat
			   (db:test-get-id tl-testdat)
			   #f)))
			   #f))
	 (new-state-eh #f)
	 (new-status-eh #f))
    (if (member state '("LAUNCHED" "REMOTEHOSTSTART")) 
	(db:general-call dbstruct run-id 'set-test-start-time (list test-id)))
    (mutex-lock! *db-transaction-mutex*)
    (db:with-db
     dbstruct run-id #f
     (lambda (dbdat db)
       (let ((tr-res
              (sqlite3:with-transaction
               db
               (lambda ()
                 ;; NB// Pass the db so it is part fo the transaction
                 (db:test-set-state-status db run-id test-id state status comment) ;; this call sets the item state/status
                 (db:test-set-state-status-db db run-id test-id state status comment) ;; this call sets the item state/status
                 (if (not (equal? item-path "")) ;; only roll up IF incoming test is an item
                     (let* ((state-status-counts  (db:get-all-state-status-counts-for-test db run-id test-name item-path state status)) ;; item-path is used to exclude current state/status of THIS test
			    (state-statuses        (db:roll-up-rules state-status-counts state status))
                          (newstate (car state-statuses))
                          (newstatus (cadr state-statuses)))
                     (let* ((state-status-counts (db:get-all-state-status-counts-for-test db run-id test-name item-path state status)) ;; item-path is used to exclude current state/status of THIS test
			    (state-statuses      (db:roll-up-rules state-status-counts state status))
                            (newstate            (car state-statuses))
                            (newstatus           (cadr state-statuses)))
		       (set! new-state-eh newstate)
		       (set! new-status-eh newstatus)
                       (debug:print 4 *default-log-port* "BB> tl-test-id="tl-test-id" ; "test-name":"item-path" newstate="newstate" newstatus="newstatus" len(sscs)="(length state-status-counts)  " state-status-counts: "
							(apply conc
                  (map (lambda (x)
                     (conc
                     		(with-output-to-string (lambda () (pp (dbr:counts->alist x)))) " | "))
                              state-status-counts))); end debug:print
				    (apply conc
					   (map (lambda (x)
						  (conc
                     				   (with-output-to-string (lambda () (pp (dbr:counts->alist x)))) " | "))
						state-status-counts))); end debug:print
   
                       (if tl-test-id
			   (db:test-set-state-status db run-id tl-test-id newstate newstatus #f)) ;; we are still in the transaction - must access the db and not the dbstruct
		       (if tl-test-id
			   (db:test-set-state-status-db db run-id tl-test-id newstate newstatus #f)) ;; we are still in the transaction - must access the db and not the dbstruct
		       ))))))
         (mutex-unlock! *db-transaction-mutex*)
         (if (and test-id state status (equal? status "AUTO")) 
             (db:test-data-rollup dbstruct run-id test-id status))
	 (if new-state-eh ;; moved from db:test-set-state-status
	      (mt:process-triggers dbstruct run-id test-id new-state-eh new-status-eh))
         tr-res)))))

(define (db:roll-up-rules state-status-counts state status)
  (let* ((running     (length (filter (lambda (x)
					(member (dbr:counts-state x) *common:running-states*))
				      state-status-counts)))
	 (bad-not-started      (length (filter (lambda (x)
3273
3274
3275
3276
3277
3278
3279
3280

3281
3282
3283
3284
3285

3286
3287
3288
3289

3290
3291
3292
3293
3294
3295
3296
3297
3298







3299
3300





3301
3302
3303
3304
3305
3306
3307


3308
3309
3310

3311
3312
3313
3314
3315
3316
3317
3318
3319






3320
3321
3322
3323


3324
3325

3326
3327
3328
3329
3330
3331
3332
3333
3334
3335
3336
3337
3338
3339
3340
3341
3342
3343
3344
3345
3346
3423
3424
3425
3426
3427
3428
3429

3430
3431
3432
3433
3434

3435
3436
3437
3438

3439









3440
3441
3442
3443
3444
3445
3446

3447
3448
3449
3450
3451
3452
3453
3454
3455
3456
3457


3458
3459
3460
3461

3462









3463
3464
3465
3466
3467
3468

3469


3470
3471
3472

3473
3474
3475
3476
3477
3478
3479
3480
3481
3482
3483
3484
3485
3486

3487
3488
3489
3490
3491
3492
3493







-
+




-
+



-
+
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
-

+
+
+
+
+





-
-
+
+


-
+
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
-

-
-
+
+

-
+













-







    (db:with-db
     dbstruct run-id #f
     (lambda (dbdat db)
       (let ((tr-res
              (sqlite3:with-transaction
               db
               (lambda ()
                   (let* ((state-status-counts (db:get-all-state-status-counts-for-run db run-id))
                   (let* ((state-status-counts (db:get-all-state-status-counts-for-run-db dbdat db run-id))
			  (state-statuses      (db:roll-up-rules state-status-counts #f #f ))
                          (newstate            (car state-statuses))
                          (newstatus           (cadr state-statuses))) 
		     (if (or (not (eq? newstate curr-state)) (not (eq?  newstatus curr-status)))
			 (db:set-run-state-status db run-id newstate newstatus )))))))
			 (db:set-run-state-status-db dbdat db run-id newstate newstatus )))))))
         (mutex-unlock! *db-transaction-mutex*)
         tr-res))))

(define (db:get-all-state-status-counts-for-run dbstruct run-id)
(define (db:get-all-state-status-counts-for-run-db dbdat db run-id)
 (let* ((test-count-recs (db:with-db
                          dbstruct #f #f
                          (lambda (dbdat db)
                            (sqlite3:map-row
                             (lambda (state status count)
                               (make-dbr:counts state: state status: status count: count))
                             db
                             "SELECT state,status,count(id) FROM tests WHERE run_id=?  GROUP BY state,status;"
                             run-id )))))
  (sqlite3:map-row
   (lambda (state status count)
     (make-dbr:counts state: state status: status count: count))
   (db:get-cache-stmth
    dbdat db
    "SELECT state,status,count(id) FROM tests WHERE run_id=?  GROUP BY state,status;")
   run-id ))
   test-count-recs))

(define (db:get-all-state-status-counts-for-run dbstruct run-id)
  (db:with-db
   dbstruct #f #f
   (lambda (dbdat db)
     (db:get-all-state-status-counts-for-run-db dbdat db run-id))))

;; BBnote: db:get-all-state-status-counts-for-test returns dbr:counts object aggregating state and status of items of a given test, *not including rollup state/status*
;;
;; NOTE: This is called within a transaction
;;
(define (db:get-all-state-status-counts-for-test dbstruct run-id test-name item-path item-state-in item-status-in)
  (let* ((test-info   (db:get-test-info dbstruct run-id test-name item-path))
(define (db:get-all-state-status-counts-for-test db run-id test-name item-path item-state-in item-status-in)
  (let* ((test-info   (db:get-test-info-db db run-id test-name item-path))
         (item-state  (or item-state-in (db:test-get-state test-info))) 
         (item-status (or item-status-in (db:test-get-status test-info)))
         (other-items-count-recs (db:with-db
         (other-items-count-recs (sqlite3:map-row
                                  dbstruct run-id #f
                                  (lambda (dbdat db)
                                    (sqlite3:map-row
                                     (lambda (state status count)
                                       (make-dbr:counts state: state status: status count: count))
                                     db
                                     ;; ignore current item because we have changed its value in the current transation so this select will see the old value.
                                     "SELECT state,status,count(id) FROM tests WHERE run_id=? AND testname=? AND item_path != '' AND item_path !=? GROUP BY state,status;"
                                     run-id test-name item-path))))
                                  (lambda (state status count)
                                    (make-dbr:counts state: state status: status count: count))
                                  db
                                  ;; ignore current item because we have changed its value in the current transation so this select will see the old value.
                                  "SELECT state,status,count(id) FROM tests WHERE run_id=? AND testname=? AND item_path != '' AND item_path !=? GROUP BY state,status;"
                                  run-id test-name item-path))

         ;; add current item to tally outside of sql query
         (match-countrec-lambda (lambda (countrec) 
                                  (and (equal? (dbr:counts-state  countrec) item-state)
	 (match-countrec-lambda (lambda (countrec) 
				  (and (equal? (dbr:counts-state  countrec) item-state)
                                       (equal? (dbr:counts-status countrec) item-status))))

	 
         (already-have-count-rec-list
          (filter match-countrec-lambda other-items-count-recs)) ;; will have either 0 or 1 count recs depending if another item shares this item's state/status
         
         (updated-count-rec    (if (null? already-have-count-rec-list)
                                   (make-dbr:counts state: item-state status: item-status count: 1)
                                   (let* ((our-count-rec (car already-have-count-rec-list))
                                          (new-count (add1 (dbr:counts-count our-count-rec))))
                                     (make-dbr:counts state: item-state status: item-status count: new-count))))

         (nonmatch-countrec-lambda (lambda (countrec) (not (match-countrec-lambda countrec))))
         
         (unrelated-rec-list   
          (filter nonmatch-countrec-lambda other-items-count-recs)))
    
    (cons updated-count-rec unrelated-rec-list)))

;; (define (db:get-all-item-states db run-id test-name)
;;   (sqlite3:map-row 
;;    (lambda (a) a)
;;    db
;;    "SELECT DISTINCT state FROM tests WHERE item_path != '' AND state != 'DELETED' AND run_id=? AND testname=?"
4610
4611
4612
4613
4614
4615
4616



4617
4618


4619
4620

4621

4622
4623
4624
4625
4626
4627
4628
4757
4758
4759
4760
4761
4762
4763
4764
4765
4766
4767

4768
4769
4770
4771
4772

4773
4774
4775
4776
4777
4778
4779
4780







+
+
+

-
+
+


+
-
+







				  (let ((db (cdr *task-db*)))
				    (if (sqlite3:database? db)
					(begin
					  (sqlite3:interrupt! db)
					  (sqlite3:finalize! db #t)
					  ;; (vector-set! *task-db* 0 #f)
					  (set! *task-db* #f)))))
			      (if (and *no-sync-db*
				       (sqlite3:database? *no-sync-db*))
				  (sqlite3:finalize! *no-sync-db* #t))
			      (if (and (not (args:get-arg "-server"))
				       *runremote*)
				       *runremote*
				       (eq? (rmt:transport-mode) 'http))
				  (begin
				    (debug:print-info 0 *default-log-port* "Closing all client connections...")
				    (http-transport:close-connections *runremote*)
				    (http-client#close-all-connections!)))
				    #;(http-client#close-all-connections!)))
                              ;; (if (and *runremote*
                              ;;          (remote-conndat *runremote*))
                              ;;     (begin
                              ;;       (http-client#close-all-connections!))) ;; for http-client
                              (if (not (eq? *default-log-port* (current-error-port)))
                                  (close-output-port *default-log-port*))
			      (set! *default-log-port* (current-error-port))) "Cleanup db exit thread"))

Modified dbfile.scm from [d24edd08a7] to [a3424eedef].

35
36
37
38
39
40
41

42
43




44






45
46
47
48
49
50
51
52
53
54
55
56
57
58









59
60
61
62
63
64
65
35
36
37
38
39
40
41
42
43
44
45
46
47
48

49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84







+


+
+
+
+
-
+
+
+
+
+
+














+
+
+
+
+
+
+
+
+







	posix typed-records srfi-18 srfi-1
	srfi-69
	stack
	files
	ports

	commonmod
	;; debugprint
	)

(define keep-age-param (make-parameter 10)) ;; qif file age, if over move to attic
(define num-run-dbs    (make-parameter 10))     ;; number of db's in .megatest
(define dbfile:testsuite-name (make-parameter #f))
(define dbfile:sync-method (make-parameter 'attach)) ;; 'attach or 'original
;; (import debugprint)

;; 'original     - use old condition code
;; 'suicide-mode - create mtrah/stop-the-train with info on what went wrong
;; else use no condition code (should be production mode)
;;
(define no-condition-db-with-db (make-parameter 'suicide-mode))

;;======================================================================
;;  R E C O R D S
;;======================================================================

;; a single Megatest area with it's multiple dbs is
;; managed in a dbstruct
;;
(defstruct dbr:dbstruct
  (areapath  #f)
  (homehost  #f)
  (tmppath   #f)
  (read-only #f)
  (subdbs (make-hash-table))
  ;;
  ;; for the inmem approach (see dbmod.scm)
  ;; this is one db per server
  (inmem     #f)  ;; handle for the in memory copy
  (dbfile    #f)  ;; path to the db file on disk
  (ondiskdb  #f)  ;; handle for the on-disk file
  (dbdat     #f)  ;; create a dbdat for the downstream calls such as db:with-db
  (last-update 0)
  (sync-proc #f)
  )

;; NOTE: Need one dbr:subdb per main.db, 1.db ...
;;
(defstruct dbr:subdb
  (dbname      #f) ;; .megatest/1.db
  (mtdbfile    #f) ;; mtrah/.megatest/1.db
90
91
92
93
94
95
96

97
98
99
100
101
102
103
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123







+







(define *db-sync-in-progress* #f)
(define *db-with-db-mutex*    (make-mutex))
(define *max-api-process-requests* 0)
(define *api-process-request-count* 0)
(define *db-write-access*     #t)
(define *db-last-sync*        0)                 ;; last time the sync to megatest.db happened
(define *db-multi-sync-mutex* (make-mutex))      ;; protect access to *db-sync-in-progress*, *db-last-sync*
(define *db-last-access*      (current-seconds))

(define (db:generic-error-printout exn . message)
  (print-call-chain (current-error-port))
  (apply dbfile:print-err message)
  (dbfile:print-err
    ", error: "     ((condition-property-accessor 'exn 'message)   exn)
    ", arguments: " ((condition-property-accessor 'exn 'arguments) exn)
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195







196
197
198
199
200
201

202
203
204
205
206
207
208
209
210
211
212
213
214

215
216
217
218
219
220
221
222
223
224
225
226
227
228
229




230
231
232

233
234
235
236
237
238
239
240
241
242
243
244
245
246
247






248
249
250
251
252
253
254
175
176
177
178
179
180
181





























182
183
184
185
186
187
188
189
190
191
192
193
194
195
196



197

198
199
200
201
202
203
204
205
206
207
208

209
210


211
212
213
214
215
216
217
218
219
220
221

222
223
224
225
226
227

228
229
230
231
232
233
234
235
236
237
238
239
240
241


242
243
244
245
246
247
248
249
250
251
252
253
254







-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-





+
+
+
+
+
+
+



-
-
-
+
-











-
+

-
-











-
+
+
+
+


-
+













-
-
+
+
+
+
+
+







	   subdbs)
           #t
          )
          #f
  )
)

;; ;; set up a single db (e.g. main.db, 1.db ... etc.)
;; ;;
;; (define (db:setup-db dbstruct areapath run-id)
;;   (let* ((dbname   (db:run-id->dbname run-id))
;; 	 (dbstruct (hash-table-ref/default dbstructs dbname #f)))
;;     (if dbstruct
;; 	dbstruct
;; 	(let* ((dbstruct-new (make-dbr:dbstruct)))
;; 	  (db:open-db dbstruct-new run-id areapath: areapath do-sync: #t)
;; 	  (hash-table-set! dbstructs dbname dbstruct-new)
;; 	  dbstruct-new))))
    
;; ; Returns the dbdat for a particular dbfile inside the area
;; ;;
;; (define (dbr:dbstruct-get-dbdat dbstruct dbfile)
;;   (hash-table-ref/default (dbr:dbstruct-dbdats dbstruct) dbfile #f))
;; 
;; (define (dbr:dbstruct-dbdat-put! dbstruct dbfile db)
;;   (hash-table-set! (dbr:dbstruct-dbdats dbstruct) dbfile db))
;; 
;; (define (db:run-id->first-num run-id)
;;   (let* ((s (number->string run-id))
;; 	 (l (string-length s)))
;;     (substring s (- l 1) l)))

;; 1234 => 4/1234.db
;;   #f => 0/main.db
;;   (abandoned the idea of num/db)
;; 
(define (dbfile:run-id->path apath run-id)
  (conc apath"/"(dbfile:run-id->dbname run-id)))

(define (db:dbname->path apath dbname)
  (conc apath"/"dbname))

(define (dbfile:run-id->dbnum run-id)
  (cond
   ((number? run-id)
    (modulo run-id (num-run-dbs)))
   ((not run-id) "main")   ;; 0 or main?
   (else run-id)))

;; POTENTIAL BUG: this implementation could produce a db file if run-id is neither #f or a number
(define (dbfile:run-id->dbname run-id)
  (cond
   ((number? run-id) (conc ".megatest/" (modulo run-id 100) ".db"))
   ((not run-id)     (conc ".megatest/main.db"))
  (conc ".megatest/"(dbfile:run-id->dbnum run-id)".db"))
   (else             run-id)))

;; Make the dbstruct, setup up auxillary db's and call for main db at least once
;;
;; called in http-transport and replicated in rmt.scm for *local* access. 
;;
(define (dbfile:setup do-sync areapath tmppath)
  (cond
   (*dbstruct-dbs*
    (dbfile:print-err "WARNING: dbfile:setup called when *dbstruct-dbs* is already initialized")
    *dbstruct-dbs*) ;; TODO: when multiple areas are supported, this optimization will be a hazard
   (else
    (let* ((dbstruct (make-dbr:dbstruct)))
    (let* ((dbstruct (make-dbr:dbstruct areapath: areapath tmppath: tmppath)))
      (set! *dbstruct-dbs* dbstruct)
      (dbr:dbstruct-areapath-set! dbstruct areapath)
      (dbr:dbstruct-tmppath-set!  dbstruct tmppath)
      dbstruct))))

(define (dbfile:get-subdb dbstruct run-id)
  (let* ((dbfname (dbfile:run-id->dbname run-id)))
    (hash-table-ref/default (dbr:dbstruct-subdbs dbstruct) dbfname #f)))

(define (dbfile:set-subdb dbstruct run-id subdb)
  (hash-table-set! (dbr:dbstruct-subdbs dbstruct) (dbfile:run-id->dbname run-id) subdb))

;; (define *dbfile:num-handles-in-use* 0)

;; Get/open a database
;; Get/open a database.
;;
;;    NOTE: most usage should call dbfile:open-db to get a dbdat
;;
;;    if run-id => get run specific db
;;    if #f     => get main db
;;    if run-id is a string treat it as a filename
;;    if run-id is a string treat it as a filename - DON'T use this - we'll get rid of it.
;;    if db already open - return inmem
;;    if db not open, open inmem, rundb and sync then return inmem
;;    inuse gets set automatically for rundb's
;;
(define (dbfile:get-dbdat dbstruct run-id)
  (let* ((subdb (dbfile:get-subdb dbstruct run-id)))
    (if (stack-empty? (dbr:subdb-dbstack subdb))
	#f
	(begin
	  (stack-pop! (dbr:subdb-dbstack subdb))))))

;; return a previously opened db handle to the stack of available handles
(define (dbfile:add-dbdat dbstruct run-id dbdat)
  (let* ((subdb (dbfile:get-subdb dbstruct run-id)))
    (stack-push! (dbr:subdb-dbstack subdb) dbdat)
  (let* ((subdb (dbfile:get-subdb dbstruct run-id))
	 (dbstk (dbr:subdb-dbstack subdb))
	 (count (stack-count dbstk)))
    (if (> count 15)
	(dbfile:print-err "WARNING: stack for "run-id".db is "count"."))
    (stack-push! dbstk dbdat)
    dbdat))

;; set up a subdb
;;
(define (dbfile:init-subdb dbstruct run-id init-proc)
  (let* ((dbname    (dbfile:run-id->dbname run-id))
	 (areapath  (dbr:dbstruct-areapath dbstruct))
324
325
326
327
328
329
330
331

332
333

334
335
336
337
338
339
340
341

342
343
344
345
346
347
348
349
350
351
352
353
354
355
356

357
358
359
360
361
362
363
324
325
326
327
328
329
330

331
332

333
334
335
336
337
338
339
340

341
342
343
344
345
346
347
348
349
350
351
352
353
354
355

356
357
358
359
360
361
362
363







-
+

-
+







-
+














-
+







  (exit 1))
    
(define (dbfile:print-err . params)
  (with-output-to-port
      (current-error-port)
    (lambda ()
      (apply print params))))
    

(define (dbfile:cautious-open-database fname init-proc sync-mode journal-mode #!optional (tries-left 500))
  (let* ((busy-file  (conc fname"-journal"))
  (let* ((busy-file  (conc fname "-journal"))
	 (delay-time (* (- 51 tries-left) 1.1))
      	 (write-access (file-write-access? fname))
         (dir-access (file-write-access? (pathname-directory fname)))
         (retry      (lambda ()
		       (thread-sleep! delay-time)
		       (if (> tries-left 0)
			   (dbfile:cautious-open-database fname init-proc
							  sync-mode: sync-mode journal-mode: journal-mode
							  sync-mode journal-mode
							  (- tries-left 1))))))
    (assert (>= tries-left 0) (conc "FATAL: too many attempts in dbfile:cautious-open-database of "fname", giving up."))
    
    (if (and (file-write-access? fname)
	     (file-exists? busy-file))
	(begin
	  (if (common:low-noise-print 120 busy-file)
	      (dbfile:print-err "INFO: dbfile:cautious-open-database: journal file "
				busy-file" exists, trying again in few seconds."))
	  (thread-sleep! 1)
	  (if (eq? tries-left 2)
	      (begin
	  	(dbfile:print-err "INFO: forcing journal rollup "busy-file)
	  	(dbfile:brute-force-salvage-db fname)))
	  (dbfile:cautious-open-database fname init-proc sync-mode: sync-mode journal-mode: journal-mode (- tries-left 1)))
	  (dbfile:cautious-open-database fname init-proc sync-mode journal-mode (- tries-left 1)))
	
	(let* ((result (condition-case
		        (if dir-access
			    (dbfile:with-simple-file-lock
			     (conc fname ".lock")
			     (lambda ()
			       (let* ((db-exists (file-exists? fname))
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420


421
422
423








424
425
426
427
428
429
430
400
401
402
403
404
405
406

407
408
409
410
411
412
413
414
415
416
417
418
419
420
421



422
423
424
425
426
427
428
429
430
431
432
433
434
435
436







-













+
+
-
-
-
+
+
+
+
+
+
+
+







  (let* ((backupfname (conc fname"-"(current-process-id)".bak"))
	 (cmd (conc "cp "fname" "backupfname";mv "fname" "(conc fname ".delme;")
		    "cp "backupfname" "fname)))
    (dbfile:print-err "WARNING: attempting recovery of file "fname" by running commands:\n"
		      "  "cmd)
    (system cmd)))


(define (dbfile:open-no-sync-db dbpath)
  (if *no-sync-db*
      *no-sync-db*
      (begin
	(if (not (file-exists? dbpath))
	    (create-directory dbpath #t))
	(let* ((dbname    (conc dbpath "/no-sync.db"))
	       (db-exists (file-exists? dbname))
	       (init-proc (lambda (db)
			    (if (not db-exists)
				(begin
				  (sqlite3:execute db "CREATE TABLE IF NOT EXISTS no_sync_metadat (var TEXT,val TEXT, CONSTRAINT no_sync_metadat_constraint UNIQUE (var));"))
				)))
	       (on-tmp      (equal? (car (string-split dbpath "/")) "tmp"))
	       (db        (if on-tmp
	       (db        (dbfile:cautious-open-database dbname init-proc 0 "WAL"))) ;; (sqlite3:open-database dbname)))
	  ;; (sqlite3:execute db "PRAGMA synchronous = 0;")
	  ;; (sqlite3:set-busy-handler! db (sqlite3:make-busy-timeout 136000)) ;; done in cautious-open-database
			      (dbfile:cautious-open-database dbname init-proc 0 "WAL")
			      (dbfile:cautious-open-database dbname init-proc 0 #f)
			      ;; (sqlite3:open-database dbname)
			      )))
	  (if on-tmp	      ;; done in cautious-open-database
	      (begin
		(sqlite3:execute db "PRAGMA synchronous = 0;")
		(sqlite3:set-busy-handler! db (sqlite3:make-busy-timeout 136000))))
	  (set! *no-sync-db* db)
	  db))))

(define (db:no-sync-set db var val)
  (sqlite3:execute db "INSERT OR REPLACE INTO no_sync_metadat (var,val) VALUES (?,?);" var val))

(define (db:no-sync-del! db var)
511
512
513
514
515
516
517

518

519
520
521
522
523
524
525
517
518
519
520
521
522
523
524

525
526
527
528
529
530
531
532







+
-
+







	  (dbfile:print-err "INFO: db:lock-and-delta-sync copying db "runid" at "(current-seconds))
	  (set! *db-sync-in-progress* #t)
	  (db:sync-touched dbstruct runid keys dbinit)
	  (set! *db-sync-in-progress* #f)
	  (delete-file* lock-file)
	  #t)
        (begin
	  (if (common:low-noise-print 120 (conc "no lock "from-db-file))
          (dbfile:print-err "INFO: could not get lock for " from-db-file ", sync likely in progress.")
	      (dbfile:print-err "INFO: could not get lock for " from-db-file ", sync likely in progress."))
	  #f
	  ))))

;; ;; Get a lock from the no-sync-db for the from-db, then delta sync the from-db to the to-db, otherwise return #f
;; ;;
;; (define (db:lock-and-delta-sync-orig no-sync-db dbstruct from-db-file runid keys dbinit)
;;   (assert (not *db-sync-in-progress*) "FATAL: db:lock-and-sync called while a sync is in progress.")
554
555
556
557
558
559
560
561

562
563
564
565
566
567
568
561
562
563
564
565
566
567

568
569
570
571
572
573
574
575







-
+







         (tmpdbfile (dbr:subdb-tmpdbfile subdb))
	 (mtdb      (dbr:subdb-mtdbdat subdb))
         (tmpdb     (db:open-db dbstruct run-id dbinit)) ;; sqlite3-db tmpdbfile #f))
	 (start-t   (current-seconds)))
    (mutex-lock! *db-multi-sync-mutex*)
    (let ((update_info (cons "last_update" (if force-sync 0 *db-last-sync*) )))
      (mutex-unlock! *db-multi-sync-mutex*)
      (db:sync-tables (db:sync-all-tables-list dbstruct keys) update_info tmpdb mtdb))
      (db:sync-tables (db:sync-all-tables-list keys) update_info tmpdb mtdb))
    (mutex-lock! *db-multi-sync-mutex*)
    (set! *db-last-sync* start-t)
    (set! *db-last-access* start-t)
    (mutex-unlock! *db-multi-sync-mutex*)
    (dbfile:add-dbdat dbstruct run-id tmpdb)
  #t))

617
618
619
620
621
622
623
624
625


626
627
628
629
630
631
632
624
625
626
627
628
629
630


631
632
633
634
635
636
637
638
639







-
-
+
+







	 '("comment"        #f)
	 '("status"         #f)
	 '("type"           #f)
         '("last_update"    #f))))

;; needs db to get keys, this is for syncing all tables
;;
(define (db:sync-main-list dbstruct keys)
  (let ((keys  keys)) ;; (db:get-keys dbstruct)))
(define (db:sync-main-list keys)
  (let ((keys  keys))
    (list
     (list "keys"
	   '("id"        #f)
	   '("fieldname" #f)
	   '("fieldtype" #f))
     (list "metadat" '("var" #f) '("val" #f))
     (append (list "runs" 
656
657
658
659
660
661
662
663

664















665
666


667
668
669
670
671
672
673
663
664
665
666
667
668
669

670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686


687
688
689
690
691
692
693
694
695







-
+

+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
-
-
+
+







	   '("owner"          #f)
	   '("description"    #f)
	   '("reviewed"       #f)
	   '("iterated"       #f)
	   '("avg_runtime"    #f)
	   '("avg_disk"       #f)
	   '("tags"           #f)
	   '("jobgroup"       #f)))))
	   '("jobgroup"       #f))


     (list "tasks_queue"
           '("id"            #f)
           '("action"        #f)
           '("owner"         #f) 
           '("state"         #f)
           '("target"        #f)
           '("name"          #f)
           '("testpatt"      #f)
           '("keylock"       #f)
           '("params"        #f)
           '("creation_time" #f)
           '("execution_time" #f))
     )))

(define (db:sync-all-tables-list dbstruct keys)
  (append (db:sync-main-list dbstruct keys)
(define (db:sync-all-tables-list keys)
  (append (db:sync-main-list keys)
	  db:sync-tests-only))

;; tbls is ( ("tablename" ( "field1" [#f|proc1] ) ( "field2" [#f|proc2] ) .... ) )
;; db's are dbdat's
;;
;; if last-update specified ("field-name" . time-in-seconds)
;;    then sync only records where field-name >= time-in-seconds
868
869
870
871
872
873
874
875


876
877
878
879
880
881
882
890
891
892
893
894
895
896

897
898
899
900
901
902
903
904
905







-
+
+







	     (append (list todb) slave-dbs)
           )
          )
        )
	tbls)
       (let* ((runtime      (- (current-milliseconds) start-time))
	      (should-print (or ;; (debug:debug-mode 12)
				(common:low-noise-print 120 "db sync" (> runtime 500))))) ;; low and high sync times treated as separate.
			     (common:low-noise-print 120 "db sync")
			     (> runtime 500)))) ;; low and high sync times treated as separate.
	 (for-each 
	  (lambda (dat)
	    (let ((tblname (car dat))
		  (count   (cdr dat)))
	      (set! tot-count (+ tot-count count))
              )) 
	  (sort (hash-table->alist numrecs)(lambda (a b)(> (cdr a)(cdr b))))))
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
936
937
938
939
940
941
942




















943
944
945
946
947
948
949







-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-







                               END;" )
	   (list "update_test_data_trigger"  "CREATE TRIGGER  IF NOT EXISTS update_test_data_trigger AFTER UPDATE ON test_data
                             FOR EACH ROW
                               BEGIN 
                                 UPDATE test_data SET last_update=(strftime('%s','now'))
                                   WHERE id=old.id;
                               END;" )))
;;
;; ADD run-id SUPPORT
;;
(define (db:create-all-triggers dbstruct)
  (db:with-db
   dbstruct #f #f
   (lambda (dbdat db)
     (db:create-triggers db))))

(define (db:create-triggers db)
    (for-each (lambda (key)
              (sqlite3:execute db (cadr key)))
          db:trigger-list))

(define (db:drop-all-triggers dbstruct)
  (db:with-db
   dbstruct #f #f
   (lambda (dbdat db)
     (db:drop-triggers db))))

(define (db:is-trigger-dropped db tbl-name)
  (let* ((trigger-name (if (equal? tbl-name "test_steps")
			   "update_teststeps_trigger" 
                           (conc "update_" tbl-name "_trigger")))
	 (res          #f))
    (sqlite3:for-each-row
     (lambda (name)
980
981
982
983
984
985
986




987
988
989
990
991
992























































993
994
995
996



997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008





1009
1010
1011
1012








1013
1014
1015
1016
1017
1018
1019


1020
1021
1022
1023






1024
1025
1026
1027

1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040






















1041
1042
1043
1044
1045
1046
1047
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057

1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077




1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091

1092
1093
1094



1095
1096
1097
1098
1099
1100




1101













1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130







+
+
+
+






+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+



-
+
+
+












+
+
+
+
+
-
-
-
-
+
+
+
+
+
+
+
+






-
+
+

-
-
-
+
+
+
+
+
+
-
-
-
-
+
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+







;;======================================================================

;; call with dbinit=db:initialize-main-db
;;
(define (db:open-db dbstruct run-id dbinit)
  ;; (mutex-lock! *db-open-mutex*)
  (let* ((dbdat (dbfile:open-db dbstruct run-id dbinit)))
              #;(case (rmt:transport-mode)
		  ((http) (dbfile:open-db dbstruct run-id dbinit))
		  ((tcp)  (dbmod:open-db  dbstruct run-id dbinit))
		  (else (assert #f "FATAL: rmt:transport-node not correct value"(rmt:transport-mode))))
    (set! *db-write-access* (not (dbr:dbdat-read-only dbdat)))
    ;; (mutex-unlock! *db-open-mutex*)
    dbdat))

(define dbfile:db-init-proc (make-parameter #f))

;; in xmaxima this gives a curve close to what I want:
;;    plot2d ((exp(x/1.2)-1)/300, [x, 0, 10])$
;;    plot2d ((exp(x/1.5)-1)/40, [x, 0, 10])$
;;    plot2d ((exp(x/5)-1)/40, [x, 0, 20])$
(define (dbfile:droop x)
  (/ (- (exp (/ x 5)) 1) 40))
  ;; (* numqrys (/ 1 (qif-slope))))

;; create a dropping near the db file in a qif dir
;; use count of such files to gate queries (queries in flight)
;;
(define (dbfile:wait-for-qif fname run-id params)
  (let* ((thedir  (pathname-directory fname))
	 (dbnum   (dbfile:run-id->dbnum run-id))
	 (destdir (conc thedir"/qif-"dbnum))
	 (uniqn   (get-area-path-signature (conc dbnum params)))
	 (crumbn  (conc destdir"/"(current-seconds)"-"uniqn"."(current-process-id))))
    (if (not (file-exists? destdir))(create-directory (conc destdir"/attic") #t))
    (let loop ((count 0))
      (let* ((currlks (glob (conc destdir"/*")))
	     (numqrys (length currlks))
	     (delayval (cond ;; do a droopish curve
			((> numqrys 25)
			 (for-each
			  (lambda (f)
			    (if (> (- (current-seconds)
				      (handle-exceptions
					  exn
					(current-seconds) ;; file is likely gone, just fake out
					(file-modification-time f)))
				   (keep-age-param))
				(let* ((basedir (pathname-directory f))
				       (filen   (pathname-file f))
				       (destf   (conc basedir"/attic/"filen)))
				  (dbfile:print-err "Moving qif file "f" older than 10 seconds to "destf)
				  ;; (delete-file* f)
				  (handle-exceptions
				      exn
				    #t
				    (file-move f destf #t)))))
			  currlks)
			 4)
			((> numqrys 0)  (dbfile:droop numqrys)) ;; slope of 1/100
			(else #f))))
	(if (and delayval
		 (< count 5))
	    (begin
	      (thread-sleep! delayval)
	      (loop (+ count 1))))))
    (with-output-to-file crumbn
      (lambda ()
	(print fname" run-id="run-id" params="params)
	))
    crumbn))

;; (db:with-db dbstruct run-id sqlite3:exec "select blah fgrom blaz;")
;; r/w is a flag to indicate if the db is modified by this query #t = yes, #f = no
;;
(define (db:with-db dbstruct run-id r/w proc . params)
(define (dbfile:with-db dbstruct run-id r/w proc params)
  (assert dbstruct "FATAL: db:with-db called with dbstruct "#f)
  (assert (dbr:dbstruct? dbstruct) "FATAL: dbstruct is "dbstruct)
  (let* ((use-mutex (> *api-process-request-count* 25)) ;; risk of db corruption
	 (have-struct (dbr:dbstruct? dbstruct))
         (dbdat     (if have-struct                ;; this stuff just allows us to call with a db handle directly
			(db:open-db dbstruct run-id (dbfile:db-init-proc)) ;; (dbfile:get-subdb dbstruct run-id)
			#f))
	 (db        (if have-struct                ;; this stuff just allows us to call with a db handle directly
			(dbr:dbdat-dbh dbdat)
			dbstruct))
	 (fname     (if dbdat
			(dbr:dbdat-dbfile dbdat)
			"nofilenameavailable"))
	 (jfile     (conc fname"-journal"))
	 (qryproc   (lambda ()
		      (if use-mutex (mutex-lock! *db-with-db-mutex*))
		      (let ((res (apply proc dbdat db params))) ;; the actual call is here.
			(if use-mutex (mutex-unlock! *db-with-db-mutex*))
			;; (if (vector? dbstruct)(db:done-with dbstruct run-id r/w))
	 #;(subdb     (if have-struct
			(dbfile:get-subdb dbstruct run-id)
			#f))
	 ) ;; was 25
			(if dbdat
			    (dbfile:add-dbdat dbstruct run-id dbdat))
			;; (delete-file* crumbfile)
			res)))
	 (stop-train (conc (dbr:dbstruct-areapath dbstruct)"/stop-the-train")))

    (assert (sqlite3:database? db) "FATAL: db:with-db, db is not a database, db="db
	    ", fname="fname)
    (if (file-exists? jfile)
	(begin
	  (dbfile:print-err "INFO: "jfile" exists, delaying to reduce database load")
	  (thread-sleep! 0.2)))
    (if (and use-mutex
	     (common:low-noise-print 120 "over-50-parallel-api-requests"))
	(dbfile:print-err *api-process-request-count* " parallel api requests being processed in process "
	(dbfile:print-err *api-process-request-count*
			  " parallel api requests being processed in process "
			  (current-process-id))) ;;  ", throttling access"))
    (condition-case
	(begin
	  (if use-mutex (mutex-lock! *db-with-db-mutex*))
    (case (no-condition-db-with-db)
      ((production)(qryproc))
      ((suicide-mode)
       (handle-exceptions
	exn
	(with-output-to-file stop-train
	  (let ((res (apply proc dbdat db params))) ;; the actual call is here.
	    (if use-mutex (mutex-unlock! *db-with-db-mutex*))
	    ;; (if (vector? dbstruct)(db:done-with dbstruct run-id r/w))
	    (if dbdat
	  (lambda ()
		(dbfile:add-dbdat dbstruct run-id dbdat))
	    res))
      (exn (io-error)
	   (db:generic-error-printout exn "ERROR: i/o error with " fname ". Check permissions, disk space etc. and try again."))
      (exn (corrupt)
	   (db:generic-error-printout exn "ERROR: database " fname " is corrupt. Repair it to proceed."))
      (exn (busy)
	   (db:generic-error-printout exn "ERROR: database " fname
				      " is locked. Try copying to another location, remove original and copy back."))
      (exn (permission)(db:generic-error-printout exn "ERROR: database " fname " has some permissions problem."))
      (exn ()
	   (db:generic-error-printout exn "ERROR: Unknown error with database " fname " message: "
				      ((condition-property-accessor 'exn 'message) exn))))))
	    (db:generic-error-printout exn "Stop train mode, run-id: "run-id
				       " params: "params" proc: "proc)))
	(qryproc)))
      (else
       (condition-case
	(qryproc)
	(exn (io-error)
	     (db:generic-error-printout exn "ERROR: i/o error with "fname
					". Check permissions, disk space etc. and try again."))
	(exn (corrupt)
	     (db:generic-error-printout exn "ERROR: database "fname
					" is corrupt. Repair it to proceed."))
	(exn (busy)
	     (db:generic-error-printout exn "ERROR: database "fname
					" is locked. Try copying to another location,"
					" remove original and copy back."))
	(exn (permission)(db:generic-error-printout exn "ERROR: database "fname
						    " has some permissions problem."))
	(exn ()
	     (db:generic-error-printout exn "ERROR: Unknown error with database "fname
					" message: "
					((condition-property-accessor 'exn 'message) exn))))))))

;;======================================================================
;; another attempt at a transactionized queue
;;======================================================================

;; ;; ;; (define *transaction-queues* (make-hash-table))
;; ;; ;; 
1164
1165
1166
1167
1168
1169
1170










1171




1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263

1264
1265
1266
1267







+
+
+
+
+
+
+
+
+
+
-
+
+
+
+
  (let ((gotlock (dbfile:simple-file-lock-and-wait fname expire-time: expire-time)))
    (if gotlock
	(let ((res (proc)))
	  (dbfile:simple-file-release-lock fname)
	  res)
	(assert #t "FATAL: simple file lock never got a lock."))))
  
(define (db:get-cache-stmth dbdat db stmt)
  (let* (;; (dbdat       (dbfile:get-dbdat dbstruct run-id))
	 (stmt-cache  (dbr:dbdat-stmt-cache dbdat))
	 ;; (stmth       (db:hoh-get stmt-cache db stmt))
	 (stmth       (hash-table-ref/default stmt-cache stmt #f)))
    (or stmth
	(let* ((newstmth (sqlite3:prepare db stmt)))
	  ;; (db:hoh-set! stmt-cache db stmt newstmth)
	  (hash-table-set! stmt-cache stmt newstmth)
	  newstmth))))
)



)

Modified dbmod.scm from [043beb90c3] to [cad5788f26].

15
16
17
18
19
20
21



22
23
24
25
26
27
28
29

























































































































30



31
32








33
34
35





36
37





38
39

40

































































41
42
43
44
45
46
47
48
49
50
51
52
53
54




































































































































































55
56
15
16
17
18
19
20
21
22
23
24
25
26
27
28




29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153


154
155
156
157
158
159
160
161



162
163
164
165
166
167
168
169
170
171
172
173
174

175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241














242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407







+
+
+




-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+

+
+
+
-
-
+
+
+
+
+
+
+
+
-
-
-
+
+
+
+
+


+
+
+
+
+

-
+

+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+


;; 
;;     You should have received a copy of the GNU General Public License
;;     along with Megatest.  If not, see <http://www.gnu.org/licenses/>.

;;======================================================================

(declare (unit dbmod))
(declare (uses dbfile))
(declare (uses commonmod))
(declare (uses debugprint))

(module dbmod
	*
	
(import scheme chicken data-structures extras)
(import (prefix sqlite3 sqlite3:)
	posix typed-records srfi-18
	srfi-69)
(import scheme
	chicken
	data-structures
	extras

	(prefix sqlite3 sqlite3:)
	posix
	typed-records
	srfi-1
	srfi-18
	srfi-69

	commonmod
	dbfile
	debugprint
	)

;; NOTE: This returns only the name "1.db", "main.db", not the path
;;
(define (dbmod:run-id->dbfname run-id)
  (conc (dbfile:run-id->dbnum run-id)".db"))

(define (dbmod:get-dbdir dbstruct)
  (let* ((areapath (dbr:dbstruct-areapath dbstruct))
	 (dbdir    (conc areapath"/.megatest")))
    (if (and (file-write-access? areapath)
	     (not (file-exists? dbdir)))
	(create-directory dbdir))
    dbdir))

(define (dbmod:run-id->full-dbfname dbstruct run-id)
  (conc (dbmod:get-dbdir dbstruct run-id)"/"(dbmod:run-id->dbfname run-id)))

;;======================================================================
;; Read-only inmem cached direct from disk method
;;======================================================================

(define *dbmod:nfs-db-handles* (make-hash-table)) ;; dbfname -> dbstruct

(define (dbmod:nfs-get-dbstruct run-id keys init-proc areapath)
  (assert areapath "FATAL: dbmod:nfs-get-dbstruct called without areapath set.")
  (let* ((dbfname  (dbmod:run-id->dbfname run-id))
	 (dbstruct (hash-table-ref/default *dbmod:nfs-db-handles* dbfname #f)))
    (if dbstruct
	(let* ((last-update (dbr:dbstruct-last-update dbstruct))
	       (curr-secs   (current-seconds)))
	  (if (> (- curr-secs last-update) 2)
	      (begin
		((dbr:dbstruct-sync-proc dbstruct) last-update)
		(dbr:dbstruct-last-update-set! dbstruct curr-secs)))
	  dbstruct)
	(let* ((newdbstruct (dbmod:open-dbmoddb areapath run-id dbfname init-proc keys syncdir: 'fromdisk)))
	  (hash-table-set! *dbmod:nfs-db-handles* dbfname newdbstruct)
	  newdbstruct))))

;;======================================================================
;; The inmem one-db file per server method goes in here
;;======================================================================

(define (dbmod:with-db dbstruct run-id r/w proc params)
  (let* ((dbdat  (dbmod:open-db dbstruct run-id (dbfile:db-init-proc)))
	 (dbh    (dbr:dbdat-dbh dbdat))
	 (dbfile (dbr:dbdat-dbfile dbdat)))
    (apply proc dbdat dbh params)))

(define (dbmod:open-inmem-db initproc)
  (let* ((db      (sqlite3:open-database ":memory:"))
	 (handler (sqlite3:make-busy-timeout 3600)))
    (sqlite3:set-busy-handler! db handler)
    (initproc db)
    db))

(define (dbmod:open-db dbstruct run-id dbinit)
  (or (dbr:dbstruct-dbdat dbstruct)
      (let* ((dbdat (make-dbr:dbdat
		     dbfile: (dbr:dbstruct-dbfile dbstruct)
		     dbh:    (dbr:dbstruct-inmem  dbstruct)
		     )))
	(dbr:dbstruct-dbdat-set! dbstruct dbdat)
	dbdat)))

;; Open the inmem db and the on-disk db
;; populate the inmem db with data
;;
;; Updates fields in dbstruct
;; Returns dbstruct
;;
;; * This routine creates the db if not found
;; * Probably can get rid of the dbstruct-in
;; 
(define (dbmod:open-dbmoddb areapath run-id dbfname-in init-proc keys
			    #!key (dbstruct-in #f)
			    (syncdir 'todisk))
  (let* ((dbstruct     (or dbstruct-in (make-dbr:dbstruct areapath: areapath)))
	 (dbfname      (or dbfname-in (dbmod:run-id->dbfname run-id)))
	 (dbpath       (dbmod:get-dbdir dbstruct))             ;; directory where all the .db files are kept
	 (dbfullname   (conc dbpath"/"dbfname)) ;; (dbmod:run-id->full-dbfname dbstruct run-id))
	 (dbexists     (file-exists? dbfullname))
	 (inmem        (dbmod:open-inmem-db init-proc))
	 (write-access (file-write-access? dbpath))
	 (db           (dbfile:with-simple-file-lock
			(conc dbfullname".lock")
			(lambda ()
			  (let* ((db      (sqlite3:open-database dbfullname))
				 (handler (sqlite3:make-busy-timeout 136000)))
			    (sqlite3:set-busy-handler! db handler)
			    (if write-access
				(init-proc db))
			    db))))
	 (tables       (db:sync-all-tables-list keys)))
    (dbr:dbstruct-inmem-set!    dbstruct inmem)
    (dbr:dbstruct-ondiskdb-set! dbstruct db)
    (dbr:dbstruct-dbfile-set!   dbstruct dbfullname)
    (dbr:dbstruct-sync-proc-set! dbstruct
				 (lambda (last-update)
				   (sync-gasket tables last-update inmem db
						dbfullname syncdir)))
    ;; (dbmod:sync-tables tables #f db inmem)
    (sync-gasket tables #f inmem db dbfullname 'fromdest)
    (dbr:dbstruct-last-update-set! dbstruct (current-seconds)) ;; should this be offset back in time by one second?
    dbstruct))

;;    (if (eq? syncdir 'todisk) ;; sync to disk normally, sync from in dashboard
;;        (dbmod:sync-tables tables last-update inmem db)
;;        (dbmod:sync-tables tables last-update db inmem))))
(define (db:run-id->dbname run-id)
  (cond

;; direction: 'fromdest 'todest
;;
(define (sync-gasket tables last-update inmem dbh dbfname direction)
  (case (dbfile:sync-method)
    ((attach)
     (dbmod:attach-sync tables inmem dbfname direction))
    (else
   ((number? run-id)(conc run-id ".db"))
   ((not run-id)    "main.db")
   (else            run-id)))
     (case direction
       ((todest)
	(dbmod:sync-tables tables last-update inmem dbh))
       (else
	(dbmod:sync-tables tables last-update dbh inmem))))))


(define (dbmod:close-db dbstruct)
  ;; do final sync to disk file
  ;; (do-sync ...)
  (sqlite3:finalize! (dbr:dbstruct-ondiskdb dbstruct)))

;;======================================================================
;; hash of hashs
;; Sync db
;;======================================================================

(define (dbmod:calc-use-last-update has-last-update fields last-update)
  (cond
   ((and has-last-update
	 (member "last_update" fields))
    #t) ;; if given a number, just use it for all fields
   ((number? last-update) #f) ;; if not matched first entry then ignore last-update for this table
   ((and (pair? last-update)
	 (member (car last-update)    ;; last-update field name
		 (map car fields)))
    #t)
   ((and last-update (not (pair? last-update)) (not (number? last-update)))
    (debug:print 0 *default-log-port* "ERROR: parameter last-update for db:sync-tables must be a pair or a number, received: " last-update);; found in fields
    #f)
   (else
    #f)))

;; tbls is ( ("tablename" ( "field1" [#f|proc1] ) ( "field2" [#f|proc2] ) .... ) )
;; dbs are sqlite3 db handles
;;
;; if last-update specified ("field-name" . time-in-seconds)
;;    then sync only records where field-name >= time-in-seconds
;;    IFF field-name exists
;;
;; Use (db:sync-all-tables-list keys) to get the tbls input
;;
(define (dbmod:sync-tables tbls last-update fromdb todb)
  (let ((stmts       (make-hash-table)) ;; table-field => stmt
	(all-stmts   '())              ;; ( ( stmt1 value1 ) ( stml2 value2 ))
	(numrecs     (make-hash-table))
	(start-time  (current-milliseconds))
	(tot-count   0))
    (for-each ;; table
     (lambda (tabledat)
       (let* ((tablename        (car tabledat))
	      (fields           (cdr tabledat))
	      (has-last-update  (member "last_update" fields))
	      (use-last-update  (dbmod:calc-use-last-update has-last-update fields last-update))
	      (last-update-value (if use-last-update ;; no need to check for has-last-update - it is already accounted for
				     (if (number? last-update)
					 last-update
					 (cdr last-update))
				     #f))
	      (last-update-field (if use-last-update
				     (if (number? last-update)
					 "last_update"
					 (car last-update))
				     #f))
	      (num-fields (length fields))
	      (field->num (make-hash-table))
	      (num->field (apply vector (map car fields))) ;; BBHERE
	      (full-sel   (conc "SELECT " (string-intersperse (map car fields) ",") 
				" FROM " tablename (if use-last-update ;; apply last-update criteria
						       (conc " WHERE " last-update-field " >= " last-update-value)
						       "")
				";"))
	      (full-ins   (conc "INSERT OR REPLACE INTO " tablename " ( " (string-intersperse (map car fields) ",") " ) "
				" VALUES ( " (string-intersperse (make-list num-fields "?") ",") " );"))
	      (fromdat    '())
	      (fromdats   '())
	      (totrecords 0)
	      (batch-len  100) ;; (string->number (or (configf:lookup *configdat* "sync" "batchsize") "100")))
	      (todat      (make-hash-table))
	      (count      0)
              (field-names (map car fields)))


(define (db:hoh-set! dat key1 key2 val)
  (let* ((subhash (hash-table-ref/default dat key1 #f)))
    (if subhash
	(hash-table-set! subhash key2 val)
	(begin
	  (hash-table-set! dat key1 (make-hash-table))
	  (db:hoh-set! dat key1 key2 val)))))

(define (db:hoh-get dat key1 key2)
  (let* ((subhash (hash-table-ref/default dat key1 #f)))
    (and subhash
	 (hash-table-ref/default subhash key2 #f))))
	 
	 ;; set up the field->num table
	 (for-each
	  (lambda (field)
	    (hash-table-set! field->num field count)
	    (set! count (+ count 1)))
	  fields)
	 
	 ;; read the source table
         ;; store a list of all rows in the table in fromdat, up to batch-len.
         ;; Then add fromdat to the fromdats list, clear fromdat and repeat.
	 (sqlite3:for-each-row
	  (lambda (a . b)
	       (set! fromdat (cons (apply vector a b) fromdat))
	       (if (> (length fromdat) batch-len)
		   (begin
		     (set! fromdats (cons fromdat fromdats))
		     (set! fromdat  '())
		     (set! totrecords (+ totrecords 1)))))
	  fromdb
	  full-sel)
	 
         ;; Count less than batch-len as a record
         (if (> (length fromdat) 0)
             (set! totrecords (+ totrecords 1)))
	 
	 ;; tack on remaining records in fromdat
	 (if (not (null? fromdat))
	     (set! fromdats (cons fromdat fromdats)))
	 
	 (sqlite3:for-each-row
	  (lambda (a . b)
	    (hash-table-set! todat a (apply vector a b)))
	  todb
	  full-sel)
	 
	 ;; first pass implementation, just insert all changed rows
	 (let* ((db                 todb)
                (drp-trigger        (if (member "last_update" field-names)
					(db:drop-trigger db tablename) 
					#f))
		(has-last-update    (member "last_update" field-names))
                (is-trigger-dropped (if has-last-update
                                        (db:is-trigger-dropped db tablename)
					#f)) 
		(stmth  (sqlite3:prepare db full-ins))
                (changed-rows 0))
	   (for-each
	    (lambda (fromdat-lst)
	      (sqlite3:with-transaction
	       db
	       (lambda ()
		 (for-each ;; 
		  (lambda (fromrow)
		    (let* ((a    (vector-ref fromrow 0))
			   (curr (hash-table-ref/default todat a #f))
			   (same #t))
		      (let loop ((i 0))
			(if (or (not curr)
				(not (equal? (vector-ref fromrow i)(vector-ref curr i))))
			    (set! same #f))
			(if (and same
				 (< i (- num-fields 1)))
			    (loop (+ i 1))))
		      (if (not same)
			  (begin
			    (apply sqlite3:execute stmth (vector->list fromrow))
			    (hash-table-set! numrecs tablename (+ 1 (hash-table-ref/default numrecs tablename 0)))
                            (set! changed-rows (+ changed-rows 1))))))
		  fromdat-lst))))
	    fromdats)
	   
	   (sqlite3:finalize! stmth)
           (if (member "last_update" field-names)
               (db:create-trigger db tablename)))))
     tbls)
    (let* ((runtime      (- (current-milliseconds) start-time))
	   (should-print (or ;; (debug:debug-mode 12)
			  (common:low-noise-print 120 "db sync")
			  (> runtime 500)))) ;; low and high sync times treated as separate.
      (for-each 
       (lambda (dat)
	 (let ((tblname (car dat))
	       (count   (cdr dat)))
	   (set! tot-count (+ tot-count count)))) 
       (sort (hash-table->alist numrecs)(lambda (a b)(> (cdr a)(cdr b))))))
    tot-count))

(define (has-last-update dbh tablename)
  (let* ((has-last #f))
    (sqlite3:for-each-row
     (lambda (name)
       (if (equal? name "last_update")
	   (set! has-last #t)))
     dbh
     (conc "SELECT name FROM pragma_table_info('"tablename"') as tblInfo;"))
    has-last))

;; tbls is ( ("tablename" ( "field1" [#f|proc1] ) ( "field2" [#f|proc2] ) .... ) )
;;
;; direction = fromdest, todest
;; mode = 'full, 'incr
;;
(define (dbmod:attach-sync tables dbh destdbfile direction #!key
			   (mode 'full)
			   (no-update '("keys")) ;; do
			   )
  (debug:print 0 *default-log-port* "Doing sync "direction" "destdbfile)
  (let* ((table-names  (map car tables))
	 (dest-exists  (file-exists? destdbfile)))
    (assert dest-exists "FATAL: sync called with non-existant file, "destdbfile)
    ;; attach the destdbfile
    ;; for each table
    ;;    insert into dest.<table> select * from src.<table> where last_update>last_update
    ;; done
    (debug:print 0 *default-log-port* "Attaching "destdbfile" as auxdb")
    (sqlite3:execute dbh (conc "ATTACH '"destdbfile"' AS auxdb;"))
    (for-each
     (lambda (table)
       (let* ((tbldat (alist-ref table tables equal?))
	      (fields (map car tbldat))
	      (fields-str (string-intersperse fields ","))
	      (dir    (eq? direction 'todest))
	      (fromdb (if dir "" "auxdb."))
	      (todb   (if dir "auxdb." ""))
	      (stmt1 (conc "INSERT OR IGNORE INTO "todb table
			  " SELECT * FROM "fromdb table";"))
	      (stmt2 (conc "INSERT OR REPLACE INTO "todb table
			   " SELECT * FROM "fromdb table" WHERE "
			   fromdb table".last_update > "
			   todb table".last_update;"))
	      (stmt3 (conc "INSERT OR REPLACE INTO "todb"."table
			   " SELECT * FROM "fromdb table";"))
	      (stmt4 (conc "DELETE FROM "todb table" WHERE "fromdb
			   table ".last_update > "todb table".last_update;"))
	      (stmt5 (conc "DELETE FROM "todb table";"))
	      (stmt6 (conc "INSERT OR REPLACE INTO "todb table" ("fields-str") SELECT "fields-str" FROM "fromdb table";"))
	      (start-ms (current-milliseconds)))
	 ;; (if (not (has-last-update dbh table))
	 ;;     (sqlite3:execute dbh (conc "ALTER TABLE "table" ADD COLUMN last_update INTEGER;")))
	 ;; (if (not (has-last-update dbh (conc "auxdb."table)))
	 ;;     (sqlite3:execute dbh (conc "ALTER TABLE auxdb."table" ADD COLUMN last_update INTEGER;")))
	 (sqlite3:with-transaction
	  dbh
	  (lambda ()
	    (sqlite3:execute dbh stmt5)
	    ;; (sqlite3:execute dbh stmt4) ;; if it worked this would be better for incremental up
	    ;; (sqlite3:execute dbh stmt1)
	    (sqlite3:execute dbh stmt6)
	    ))
	 (debug:print 0 *default-log-port* "Synced table "table" in "(- (current-milliseconds) start-ms)"ms")
	 ))
     table-names)
    (sqlite3:execute dbh "DETACH auxdb;")))

;; prefix is "" or "auxdb."
;;
;; (define (dbmod:last-update-patch dbh prefix)
;;   (let ((
  
;;======================================================================
;; Moved from dbfile
;;======================================================================


)

Modified dcommon.scm from [eab340744b] to [4b1e45fd47].

25
26
27
28
29
30
31



32
33
34
35
36
37
38
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41







+
+
+







(import canvas-draw-iup)
(use regex typed-records matchable)

(declare (unit dcommon))

(declare (uses gutils))
(declare (uses db))
(declare (uses commonmod))

(import commonmod)
;; (declare (uses synchash))

(include "megatest-version.scm")
(include "common_records.scm")
(include "db_records.scm")
(include "key_records.scm")
(include "run_records.scm")
633
634
635
636
637
638
639

640

641
642
643
644
645
646
647
636
637
638
639
640
641
642
643

644
645
646
647
648
649
650
651







+
-
+







	     (max-row      (if (null? row-indices) 1 (common:max (map cadr row-indices))))
	     (max-col      (if (null? col-indices) 1 
			       (common:max (map cadr col-indices))))
	     (max-visible  (max (- (dboard:tabdat-num-tests tabdat) 15) 3))
	     (max-col-vis  (if (> max-col 10) 10 max-col))
	     (numrows      1)
	     (numcols      1))
	(if (common:low-noise-print 60 "runs-stats-update-clear")
	(iup:attribute-set! stats-matrix "CLEARVALUE" "CONTENTS")
	    (iup:attribute-set! stats-matrix "CLEARVALUE" "CONTENTS"))
	(iup:attribute-set! stats-matrix "NUMCOL" max-col )
	(iup:attribute-set! stats-matrix "NUMLIN" (if (< max-row max-visible) max-visible max-row)) ;; min of 20
	(iup:attribute-set! stats-matrix "NUMCOL_VISIBLE" max-col-vis)
	(iup:attribute-set! stats-matrix "NUMLIN_VISIBLE" (if (> max-row max-visible) max-visible max-row))
	;;(print "row-indices: " row-indices " col-indices: " col-indices)
	;; Row labels
	(for-each (lambda (ind)
702
703
704
705
706
707
708

709


710
711
712
713
714
715
716
706
707
708
709
710
711
712
713

714
715
716
717
718
719
720
721
722







+
-
+
+







				     #:numcol 7
				     #:numcol-visible 7
				     #:numlin-visible 5
				     ))
	 (colnames       (list "Id" "MTver" "Pid" "Host" "Interface:OutPort" "RunTime" "State" "RunId"))
	 (updater        (lambda ()
			   (if (dashboard:monitor-changed? commondat tabdat)
			       (let ((servers  (case (rmt:transport-mode)
			       (let ((servers  (server:choose-server *toppath* 'all-valid))) ;; (server:get-list *toppath* limit: 10)))
						 ((http)(server:choose-server *toppath* 'all-valid)) ;; (server:get-list *toppath* limit: 10)))
						 (else '()))))
				 (iup:attribute-set! servers-matrix "NUMLIN" (length servers))
				 ;; (set! colnum 0)
				 ;; (for-each (lambda (colname)
				 ;;    	 ;; (print "colnum: " colnum " colname: " colname)
				 ;;    	 (iup:attribute-set! servers-matrix (conc "0:" colnum) colname)
				 ;;    	 (set! colnum (+ 1 colnum)))
				 ;;           colnames)

Modified diff-report.scm from [f999ffe6db] to [0fbca9ae2f].

15
16
17
18
19
20
21


22
23
24
25
26
27
28
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30







+
+







;;     You should have received a copy of the GNU General Public License
;;     along with Megatest.  If not, see <http://www.gnu.org/licenses/>.
;;

(declare (unit diff-report))
(declare (uses common))
(declare (uses rmt))
(declare (uses commonmod))
(import commonmod)
         
(include "common_records.scm")
(use matchable)
(use fmt)
(use ducttape-lib)
(define css "")

Added docs/reference/coping-with-the-tcp-time-wait-state-on-busy-linux-servers.pdf version [bb7c60045d].

cannot compute difference between binary files

Added docs/reference/networking-increasing-the-max-number-of-tcp-ip-connections.pdf version [c3bfe57256].

cannot compute difference between binary files

Added docs/reference/queues-dont-fix-overload.pdf version [938610cb6e].

cannot compute difference between binary files

Modified ezsteps.scm from [aab87817a5] to [077453aa67].

25
26
27
28
29
30
31


32
33
34
35
36
37
38
39
40
41
42
43

44
45
46
47
48
49
50
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44

45
46
47
48
49
50
51
52







+
+











-
+







(declare (unit ezsteps))
(declare (uses db))
(declare (uses common))
(declare (uses items))
(declare (uses runconfig))
;; (declare (uses sdb))
;; (declare (uses filedb))
(declare (uses commonmod))
(import commonmod)

(include "common_records.scm")
(include "key_records.scm")
(include "db_records.scm")
(include "run_records.scm")


;;(rmt:get-test-info-by-id run-id test-id) -> testdat

;; TODO: deprecate me in favor of ezsteps.scm
;;
(define (launch:runstep ezstep run-id test-id exit-info m tal testconfig all-steps-dat)
(define (ezsteps:runstep ezstep run-id test-id exit-info m tal testconfig all-steps-dat)
  (let* ((stepname       (car ezstep))  ;; do stuff to run the step
	 (stepinfo       (cadr ezstep))
	;; (let ((info (cadr ezstep)))
	;; 		   (if (proc? info) "" info)))
	;; (stepproc       (let ((info (cadr ezstep)))
	;; 		   (if (proc? info) info #f)))
	 (stepparts      (string-match (regexp "^(\\{([^\\}\\{]*)\\}\\s*|)(.*)$") stepinfo))
61
62
63
64
65
66
67
68


69
70
71
72
73
74
75
63
64
65
66
67
68
69

70
71
72
73
74
75
76
77
78







-
+
+







			     (list-ref stepparts 3)
			     (conc "# error, no command for step "stepname)))
	 (script         "") ; "#!/bin/bash\n") ;; yep, we depend on bin/bash FIXME!!!\
	 (logpro-file    (conc stepname ".logpro"))
	 (html-file      (conc stepname ".html"))
	 (dat-file       (conc stepname ".dat"))
	 (tconfig-logpro (configf:lookup testconfig "logpro" stepname))
	 (logpro-used    (common:file-exists? logpro-file)))
	 (logpro-used    (common:file-exists? logpro-file))
	 (mtexepath      (common:get-megatest-exe-path)))
    (setenv "MT_STEP_NAME" stepname)
    (hash-table-set! all-steps-dat stepname `((params . ,paramparts)))
    (debug:print 0 *default-log-port* "stepparts: " stepparts ", stepparams: " stepparams
                 ", paramparts: " paramparts ", subrun: " subrun ", stepcmd: " stepcmd)
    
    (if (and tconfig-logpro
	     (not logpro-used)) ;; no logpro file found but have a defn in the testconfig
94
95
96
97
98
99
100
101

102
103
104
105
106
107
108
97
98
99
100
101
102
103

104
105
106
107
108
109
110
111







-
+







    ;; call the command using mt_ezstep
    ;; (set! script (conc "mt_ezstep " stepname " " (if prevstep prevstep "x") " " stepcmd))
    
    (debug:print 4 *default-log-port* "script: " script)
    (rmt:teststep-set-status! run-id test-id stepname "start" "-" #f #f)
    ;; now launch the actual process
    (call-with-environment-variables 
     (list (cons "PATH" (conc (get-environment-variable "PATH") ":.")))
     (list (cons "PATH" mtexepath))
     (lambda () ;; (process-run "/bin/bash" "-c" "exec ls -l /tmp/foobar > /tmp/delme-more.log 2>&1")
       (let* ((cmd (conc stepcmd " > " stepname ".log 2>&1")) ;; >outfile 2>&1 
	      (pid #f))
	 (let ((proc (lambda ()
		       (set! pid (process-run "/bin/bash" (list "-c" cmd))))))
	   (if subrun
               (begin

Modified http-transport.scm from [84bebe0a7c] to [c61c306ba3].

151
152
153
154
155
156
157
158

159
160
161
162
163
164
165
151
152
153
154
155
156
157

158
159
160
161
162
163
164
165







-
+







	(config-use-proxy (equal? (configf:lookup *configdat* "client" "use-http_proxy") "yes")))
    (if (not config-use-proxy)
	(determine-proxy (constantly #f)))
    (debug:print-info 0 *default-log-port* "http-transport:try-start-server time=" (seconds->time-string (current-seconds)) " ipaddrsstr=" ipaddrstr " portnum=" portnum " config-hostname=" config-hostname)
    (handle-exceptions
	exn
	(begin
	  (print-error-message exn)
	  ;; (print-error-message exn)
	  (if (< portnum 64000)
	      (begin 
		(debug:print 0 *default-log-port* "WARNING: attempt to start server failed. Trying again ...")
		(debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))
		(debug:print 5 *default-log-port* "exn=" (condition->list exn))
		(portlogger:open-run-close portlogger:set-failed portnum)
		(debug:print 0 *default-log-port* "WARNING: failed to start on portnum: " portnum ", trying next port")
239
240
241
242
243
244
245
246

247
248
249
250

251

252
253
254
255
256

257
258
259
260
261
262
263
264
265
266
267
239
240
241
242
243
244
245

246




247

248
249
250
251


252




253
254
255
256
257
258
259







-
+
-
-
-
-
+
-
+



-
-
+
-
-
-
-








(define (http-transport:inc-requests-and-prep-to-close-all-connections)
  (mutex-lock! *http-mutex*)
  (set! *http-requests-in-progress* (+ 1 *http-requests-in-progress*)))

;; Send "cmd" with json payload "params" to serverdat and receive result
;;
(define (http-transport:client-api-send-receive run-id serverdat cmd params #!key (numretries 3)(area-dat #f))
(define (http-transport:client-api-send-receive run-id runremote cmd params #!key (numretries 3))
  (let* ((fullurl    (if (vector? serverdat)
			 (http-transport:server-dat-get-api-req serverdat)
			 (begin
			   (debug:print 0 *default-log-port* "FATAL ERROR: http-transport:client-api-send-receive called with no server info")
  (assert (remote? runremote) "FATAL: http-transport:client-api-send-receive called with serverdat="serverdat)
			   (exit 1))))
  (let* ((fullurl    (remote-api-req runremote))
	 (res        (vector #f "uninitialized"))
	 (success    #t)
	 (sparams    (db:obj->string params transport: 'http))
	 (runremote  (or area-dat *runremote*))
         (server-id   (if (vector? serverdat) 
         (server-id  (remote-server-id runremote)))
                           (http-transport:server-dat-get-server-id serverdat)
                           (begin
			     (debug:print 0 *default-log-port* "FATAL ERROR: http-transport:client-api-send-receive called with no server info")
			     (exit 1)))))
       (debug:print-info 11 *default-log-port* "cmd=" cmd " fullurl=" fullurl " server-id=" server-id " current time:" (current-seconds)) 

       ;; set up the http-client here
       (max-retry-attempts 1)
       ;; consider all requests indempotent
       (retry-request? (lambda (request)
			 #f))
284
285
286
287
288
289
290
291
292
293

294
295
296
297
298
299
300
301

302
303
304
305
306
307
308
276
277
278
279
280
281
282



283



284




285
286
287
288
289
290
291
292







-
-
-
+
-
-
-

-
-
-
-
+







                                                    (debug:print-info 0 *default-log-port* "couldn't talk to server, trying again ...")
                                                    (begin
                                                      (debug:print 0 *default-log-port* "WARNING: failure in with-input-from-request to " fullurl ".")
                                                      (debug:print 0 *default-log-port* " message: " msg ", exn=" exn)
                                                      (debug:print 0 *default-log-port* " cmd: " cmd " params: " params " key:" (or server-id "thekey"))
                                                      (debug:print 0 *default-log-port* " call-chain: " call-chain)))
						;; what if another thread is communicating ok? Can't happen due to mutex
						(set! *runremote* #f)
						(set! runremote #f)
						;; (if runremote
						(http-transport:close-connections runremote)
						;;    (remote-conndat-set! runremote #f))
						;; Killing associated server to allow clean retry.")
						;; (tasks:kill-server-run-id run-id)  ;; better to kill the server in the logic that called this routine?
						(mutex-unlock! *http-mutex*)
						;; (signal (make-composite-condition
						;;          (make-property-condition 'commfail 'message "failed to connect to server")))
						;; "communications failed"
						(close-all-connections!)
						;; (close-connection! fullurl)
						(db:obj->string #f))
					      (with-input-from-request ;; was dat
					       fullurl 
					       (list (cons 'key (or server-id   "thekey"))
						     (cons 'cmd cmd)
						     (cons 'params sparams))
					       read-string))
343
344
345
346
347
348
349
350

351
352
353

354
355
356
357


358
359
360
361
362






363
364
365
366
367

368
369
370
371
372
373
374
375
376
377

378
379
380
381
382
383
384
385
386

387
388
389
390
391
392
393

394
395
396
397
398
399
400
401
402
403
404


405
406
407
408
409
410
411
412
413
327
328
329
330
331
332
333

334



335




336
337
338




339
340
341
342
343
344



345

346










347









348







349











350
351


352
353
354
355
356
357
358







-
+
-
-
-
+
-
-
-
-
+
+

-
-
-
-
+
+
+
+
+
+
-
-
-

-
+
-
-
-
-
-
-
-
-
-
-
+
-
-
-
-
-
-
-
-
-
+
-
-
-
-
-
-
-
+
-
-
-
-
-
-
-
-
-
-
-
+
+
-
-







	     (signal (make-composite-condition
		      (make-property-condition 
		       'timeout
		       'message "nmsg-transport:client-api-send-receive-raw timed out talking to server")))))))

;; careful closing of connections stored in *runremote*
;;
(define (http-transport:close-connections #!key (area-dat #f))
(define (http-transport:close-connections runremote)
  (let* ((runremote  (or area-dat *runremote*))
	 (server-dat (if runremote
                         (remote-conndat runremote)
  (if (remote? runremote)
                         #f))) ;; (hash-table-ref/default *runremote* run-id #f)))
    (if (vector? server-dat)
	(let ((api-dat (http-transport:server-dat-get-api-uri server-dat)))
	  (handle-exceptions
      (let ((api-dat (remote-api-uri runremote)))
	(handle-exceptions
	    exn
	    (begin
	      (print-call-chain *default-log-port*)
	      (debug:print-error 0 *default-log-port* " closing connection failed with error: " ((condition-property-accessor 'exn 'message) exn) ", exn=" exn))
	    (close-connection! api-dat)
	  (begin
	    (print-call-chain *default-log-port*)
	    (debug:print-error 0 *default-log-port* " closing connection failed with error: " ((condition-property-accessor 'exn 'message) exn) ", exn=" exn))
	  (if (args:any-defined? "-server" "-execute" "-run")
	      (debug:print-info 0 *default-log-port* "Closing connections to "api-dat))
	  (if api-dat (close-connection! api-dat))
            (close-idle-connections!)
	    #t))
	#f)))


	  ;; Would it be better to set *runremote* to #f? I don't think so. But we may
(define (make-http-transport:server-dat)(make-vector 6))
(define (http-transport:server-dat-get-iface         vec)    (vector-ref  vec 0))
(define (http-transport:server-dat-get-port          vec)    (vector-ref  vec 1))
(define (http-transport:server-dat-get-api-uri       vec)    (vector-ref  vec 2))
(define (http-transport:server-dat-get-api-url       vec)    (vector-ref  vec 3))
(define (http-transport:server-dat-get-api-req       vec)    (vector-ref  vec 4))
(define (http-transport:server-dat-get-last-access   vec)    (vector-ref  vec 5))
;(define (http-transport:server-dat-get-socket        vec)    (vector-ref  vec 6))
(define (http-transport:server-dat-get-server-id     vec)    (vector-ref  vec 6))

	  ;; need to clear more of the runremote fields
(define (http-transport:server-dat-make-url vec)
  (if (and (http-transport:server-dat-get-iface vec)
	   (http-transport:server-dat-get-port  vec))
      (conc "http://" 
	    (http-transport:server-dat-get-iface vec)
	    ":"
	    (http-transport:server-dat-get-port  vec))
      #f))

	  (remote-api-url-set! runremote #f) ;; used as a flag for connection up and running
(define (http-transport:server-dat-update-last-access vec)
  (if (vector? vec)
      (vector-set! vec 5 (current-seconds))
      (begin
	(print-call-chain (current-error-port))
	(debug:print-error 0 *default-log-port* "call to http-transport:server-dat-update-last-access with non-vector!!"))))

	  
;;
;; connect
;;
(define (http-transport:client-connect iface port server-id)
  (debug:print-info 0 *default-log-port* "Connecting to client at "iface":"port", with server-id "server-id)
  (let* ((api-url      (conc "http://" iface ":" port "/api"))
	 (api-uri      (uri-reference (conc "http://" iface ":" port "/api")))
	 (api-req      (make-request method: 'POST uri: api-uri))
	 (server-dat   (vector iface port api-uri api-url api-req (current-seconds) server-id)))
    server-dat))

	  #t))
      #f))



;; run http-transport:keep-running in a parallel thread to monitor that the db is being 
;; used and to shutdown after sometime if it is not.
;;
(define (http-transport:keep-running) 
  ;; if none running or if > 20 seconds since 
  ;; server last used then start shutdown
427
428
429
430
431
432
433
434

435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463

464

465
466
467


468
469
470
471
472

473
474
475
476


477
478
479
480
481
482
483
372
373
374
375
376
377
378

379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397






398
399
400
401
402
403

404
405


406
407





408




409
410
411
412
413
414
415
416
417







-
+


















-
-
-
-
-
-





+
-
+

-
-
+
+
-
-
-
-
-
+
-
-
-
-
+
+







			  (debug:print-info 0 *default-log-port* "Waiting for server alive signature")
                          (mutex-lock! *heartbeat-mutex*)
                          (set! sdat *server-info*)
                          (mutex-unlock! *heartbeat-mutex*)
                          (if (and sdat
				   (not changed)
				   (> (- (current-seconds) start-time) 2))
			      (let* ((servinfodir (conc *toppath*"/.servinfo"))
			      (let* ((servinfodir (server:get-servinfo-dir *toppath*)) ;; (conc *toppath*"/.servinfo"))
				     (ipaddr      (car sdat))
				     (port        (cadr sdat))
				     (servinf     (conc servinfodir"/"ipaddr":"port)))
				(set! servinfofile servinf)
				(if (not (file-exists? servinfodir))
				    (create-directory servinfodir #t))
				(with-output-to-file servinf
				  (lambda ()
				    (let* ((serv-id (server:mk-signature)))
				      (set! *server-id* serv-id)
				      (print "SERVER STARTED: "ipaddr":"port" AT "(current-seconds)" server-id: "serv-id" pid: "(current-process-id))
				      (print "started: "(seconds->year-week/day-time (current-seconds))))))
				(set! *on-exit-procs* (cons
						       (lambda ()
							 (delete-file* servinf))
						       *on-exit-procs*))
				;; put data about this server into a simple flat file host.port
				(debug:print-info 0 *default-log-port* "Received server alive signature")
                                #;(common:save-pkt `((action . alive)
                                                   (T      . server)
                                                   (pid    . ,(current-process-id))
                                                   (ipaddr . ,(car sdat))
                                                   (port   . ,(cadr sdat)))
                                                 *configdat* #t)
				sdat)
                              (begin
				(debug:print-info 0 *default-log-port* "Still waiting, last-sdat=" last-sdat)
                                (sleep 4)
				(if (> (- (current-seconds) start-time) 120) ;; been waiting for two minutes
                                    (if sdat 
				    (let* ((ipaddr  (car sdat))
				      (let* ((ipaddr  (car sdat))
					   (port    (cadr sdat))
					   (servinf (conc *toppath*"/.servinfo/"ipaddr":"port)))
				      (debug:print-error 0 *default-log-port* "transport appears to have died, exiting server")
					   (servinf (conc (server:get-servinfo-dir *toppath*)"/"ipaddr":"port)))
				        (debug:print-error 0 *default-log-port* "transport appears to have died, exiting server")
				      ;; (delete-file* servinf) ;; handled by on-exit, can be removed
                                      #;(common:save-pkt `((action . died)
                                                         (T      . server)
                                                         (pid    . ,(current-process-id))
                                                         (ipaddr . ,(car sdat))
				      (exit))
                                                         (port   . ,(cadr sdat))
                                                         (msg    . "Transport died?"))
						       *configdat* #t)
				      (exit))
                                      (exit)
                                    )
				    (loop start-time
					  (equal? sdat last-sdat)
					  sdat)))))))
	 (iface       (car server-info))
         (port        (cadr server-info))
         (last-access 0)
	 (server-timeout (server:expiration-timeout))
502
503
504
505
506
507
508
509


510
511
512
513
514
515
516
436
437
438
439
440
441
442

443
444
445
446
447
448
449
450
451







-
+
+







	    (set! server-going #t)
	    (debug:print 0 *default-log-port* "SERVER: running, megatest version: " (common:get-full-version))) ;; NOTE: the server is NOT yet marked as running in the log. We do that in the keep-running routine.
	  (if (and no-sync-db
		   (common:low-noise-print 10 "sync-all")) ;; cheesy way to reduce frequency of running sync :)
              (begin
		(if (common:low-noise-print 120 "sync-all-print")
                    (debug:print 0 *default-log-port* "keep-running calling db:all-db-sync at " (time->string (seconds->local-time) "%H:%M:%S")))
		(db:all-db-sync *dbstruct-dbs*))))
		(db:all-db-sync *dbstruct-dbs*)
		)))
      
      ;; when things go wrong we don't want to be doing the various queries too often
      ;; so we strive to run this stuff only every four seconds or so.
      (let* ((sync-time (- (current-milliseconds) start-time))
	    (rem-time  (quotient (- 4000 sync-time) 1000)))
	(if (and (<= rem-time 4)
		 (>  rem-time 0))
558
559
560
561
562
563
564
565

566
567
568
569

570
571
572

573
574
575
576

577
578
579
580
581
582
583
493
494
495
496
497
498
499

500
501
502
503

504
505
506

507
508
509
510

511
512
513
514
515
516
517
518







-
+



-
+


-
+



-
+







		  (current-seconds)))
          (if (common:low-noise-print 120 "server continuing")
              (debug:print-info 0 *default-log-port* "Server continuing, seconds since last db access: " (- (current-seconds) last-access))
	      (let ((curr-time (current-seconds)))
		(handle-exceptions
		    exn
		    (debug:print 0 *default-log-port* "ERROR: Failed to change timestamp on info file " servinfofile ". Are you out of space on that disk? exn=" exn)
		    (if (and (< (- (current-seconds) server-start-time) 600) ;; run for ten minutes for experiment, 3600 thereafter
		    (if (and ;; (< (- (current-seconds) server-start-time) 600) ;; run for ten minutes for experiment, 3600 thereafter
			     (not *server-overloaded*)
			     (file-exists? servinfofile))
			(change-file-times servinfofile curr-time curr-time)))
		(if (or (common:low-noise-print 120 "start new server")
		(if (and (common:low-noise-print 120 "start new server")
			(> *api-process-request-count* 50)) ;; if this server is kind of busy start up another
		    (begin
		      (debug:print-info 0 *default-log-port* "Server is busy, parallel-api-count "*api-process-request-count*", start another if possible...")
		      (debug:print-info 0 *default-log-port* "Server is busy, api-count "*api-process-request-count*", start another if possible...")
		      (server:kind-run *toppath*)
		      (if (> *api-process-request-count* 100)
			  (begin
			    (debug:print-info 0 *default-log-port* "Server is overloaded at parallel-api-count="*api-process-request-count*", removing "servinfofile) 
			    (debug:print-info 0 *default-log-port* "Server is overloaded at api-count=" *api-process-request-count*", removing "servinfofile) 
			    (delete-file* servinfofile)))))))
          (loop 0 server-state bad-sync-count (current-milliseconds)))
         (else
          (debug:print-info 0 *default-log-port* "Server timed out. seconds since last db access: " (- (current-seconds) last-access))
          (http-transport:server-shutdown port)))))))

(define (http-transport:server-shutdown port)
619
620
621
622
623
624
625
626
627
628



629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644

645
646
647
648
649
650
651
652
653
654
655




656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674

















675
676
677
678
679
680
681
554
555
556
557
558
559
560



561
562
563
















564
565










566
567
568
569



















570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593







-
-
-
+
+
+
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+

-
-
-
-
-
-
-
-
-
-
+
+
+
+
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+







    (exit)))

;; all routes though here end in exit ...
;;
;; start_server? 
;;
(define (http-transport:launch)
  ;; check that a server start is in progress, pause or exit if so
  (let* ((tmp-area            (common:get-db-tmp-area))
	 (server-start        (conc tmp-area "/.server-start"))
  ;; check the .servinfo directory, are there other servers running on this
  ;; or another host?
  (let* ((server-start-is-ok  (server:minimal-check *toppath*)))
	 (server-started      (conc tmp-area "/.server-started"))
	 (start-time          (common:lazy-modification-time server-start))
	 (started-time        (common:lazy-modification-time server-started))
	 (server-starting     (< start-time started-time)) ;; if start-time is less than started-time then a server is still starting
	 (start-time-old      (> (- (current-seconds) start-time) 5))
         (cleanup-proc        (lambda (msg)
                                (let* ((serv-fname      (conc "server-" (current-process-id) "-" (get-host-name) ".log"))
                                       (full-serv-fname (conc *toppath* "/logs/" serv-fname))
                                       (new-serv-fname  (conc *toppath* "/logs/" "defunct-" serv-fname)))
                                  (debug:print 0 *default-log-port* msg)
                                  (if (common:file-exists? full-serv-fname)
                                      (system (conc "sleep 1;mv -f " full-serv-fname " " new-serv-fname))
                                      (debug:print 0 *default-log-port* "INFO: cannot move " full-serv-fname " to " new-serv-fname))
                                  (exit)))))
    #;(if (and (not start-time-old) ;; last server start try was less than five seconds ago
	     (not server-starting))
    (if (not server-start-is-ok)
	(begin
	  (cleanup-proc "NOT starting server, there is either a recently started server or a server in process of starting")
	  (exit)))
    ;; lets not even bother to start if there are already three or more server files ready to go
    #;(let* ((num-alive   (server:get-num-alive (server:get-list *toppath*))))
      (if (> num-alive 3)
          (begin
            (cleanup-proc (conc "ERROR: Aborting server start because there are already " num-alive " possible servers either running or starting up"))
            (exit))))
    #;(common:save-pkt `((action . start)
		       (T      . server)
	  (debug:print 0 *default-log-port* "ERROR: server start not ok, exiting now.")
	  (exit 1))))
    
  ;; check that a server start is in progress, pause or exit if so
		       (pid    . ,(current-process-id)))
		     *configdat* #t)
    (let* ((th2 (make-thread (lambda ()
                               (debug:print-info 0 *default-log-port* "Server run thread started")
                               (http-transport:run 
                                (if (args:get-arg "-server")
                                    (args:get-arg "-server")
                                    "-")
                                )) "Server run"))
           (th3 (make-thread (lambda ()
                               (debug:print-info 0 *default-log-port* "Server monitor thread started")
                               (http-transport:keep-running)
                               "Keep running"))))
      (thread-start! th2)
      (thread-sleep! 0.25) ;; give the server time to settle before starting the keep-running monitor.
      (thread-start! th3)
      (set! *didsomething* #t)
      (thread-join! th2)
      (exit))))
  (let* ((th2 (make-thread (lambda ()
                             (debug:print-info 0 *default-log-port* "Server run thread started")
                             (http-transport:run 
                              (if (args:get-arg "-server")
                                  (args:get-arg "-server")
                                  "-")
                              )) "Server run"))
         (th3 (make-thread (lambda ()
                             (debug:print-info 0 *default-log-port* "Server monitor thread started")
                             (http-transport:keep-running)
                             "Keep running"))))
    (thread-start! th2)
    (thread-sleep! 0.25) ;; give the server time to settle before starting the keep-running monitor.
    (thread-start! th3)
    (set! *didsomething* #t)
    (thread-join! th2)
    (exit)))

;; (define (http-transport:server-signal-handler signum)
;;   (signal-mask! signum)
;;   (handle-exceptions
;;    exn
;;    (debug:print 0 *default-log-port* " ... exiting ...")
;;    (let ((th1 (make-thread (lambda ()

Modified index-tree.scm from [10c620fbfc] to [6384bce0d0].

27
28
29
30
31
32
33


34
35
36
37
38
39
40
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42







+
+








(declare (unit tests))
(declare (uses lock-queue))
(declare (uses db))
(declare (uses common))
(declare (uses items))
(declare (uses runconfig))
(declare (uses commonmod))
(import commonmod)

(include "common_records.scm")
(include "key_records.scm")
(include "db_records.scm")
(include "run_records.scm")
(include "test_records.scm")

Modified items.scm from [16328a4b96] to [4ca6320933].

19
20
21
22
23
24
25


26
27
28
29
30
31
32
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34







+
+








;; (define itemdat '((ripeness    "green ripe overripe")
;; 		     (temperature "cool medium hot")
;; 		     (season      "summer winter fall spring")))

(declare (unit items))
(declare (uses common))
(declare (uses commonmod))
(import commonmod)

(include "common_records.scm")

;; Puts out all combinations
(define (process-itemlist hierdepth curritemkey itemlist)
  (let ((res '()))
    (if (not hierdepth)

Modified keys.scm from [9fa2c0cfa5] to [d7b8d553eb].

22
23
24
25
26
27
28


29
30
31
32
33
34
35
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37







+
+







;;======================================================================

(use sqlite3 srfi-1 posix regex regex-case srfi-69)
(import (prefix sqlite3 sqlite3:))

(declare (unit keys))
(declare (uses common))
(declare (uses commonmod))
(import commonmod)

(include "key_records.scm")
(include "common_records.scm")

(define (keys->keystr keys) ;; => key1,key2,key3,additiona1, ...
  (string-intersperse keys ","))

Modified launch.scm from [9881087e2c] to [6fda936e21].

27
28
29
30
31
32
33

34
35
36

37
38
39
40
41



42
43
44
45
46
47
48
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53







+



+





+
+
+








(import (prefix base64 base64:))
(import (prefix sqlite3 sqlite3:))

(declare (unit launch))
(declare (uses subrun))
(declare (uses common))
(declare (uses commonmod))
(declare (uses configf))
(declare (uses db))
(declare (uses ezsteps))
(declare (uses dbfile))

(include "common_records.scm")
(include "key_records.scm")
(include "db_records.scm")
(include "megatest-fossil-hash.scm")

(import commonmod
	dbfile)

;;======================================================================
;; ezsteps
;;======================================================================

;; ezsteps were going to be coded as
;; stepname[,predstep1,predstep2 ...] [{VAR1=first,second,third}] command to execute
181
182
183
184
185
186
187
188

189
190
191
192
193
194
195
186
187
188
189
190
191
192

193
194
195
196
197
198
199
200







-
+







		   (setenv "MT_STEP_NAMES" (string-intersperse all-step-names " "))
		   (let loop ((ezstep (car ezstepslst))
			     (tal    (cdr ezstepslst))
			     (prevstep #f))
                    (debug:print-info 0 *default-log-port* "Processing ezstep \"" (string-intersperse ezstep " ") "\"")
		    ;; check exit-info (vector-ref exit-info 1)
		    (if (launch:einf-exit-status exit-info) ;; (vector-ref exit-info 1)
			(let* ((logpro-used (launch:runstep ezstep run-id test-id exit-info m tal testconfig all-steps-dat))
			(let* ((logpro-used (ezsteps:runstep ezstep run-id test-id exit-info m tal testconfig all-steps-dat))
			       (stepname    (car ezstep))
			       (stepparms   (hash-table-ref all-steps-dat stepname)))
			  (setenv "MT_STEP_NAME" stepname)
			  (pp (hash-table->alist all-steps-dat))
			  ;; if logpro-used read in the stepname.dat file
			  (if (and logpro-used (common:file-exists? (conc stepname ".dat")))
			      (launch:load-logpro-dat run-id test-id stepname))
203
204
205
206
207
208
209
210

211
212
213
214
215
216
217
208
209
210
211
212
213
214

215
216
217
218
219
220
221
222







-
+







			(debug:print 0 *default-log-port* "WARNING: a prior step failed, stopping at " ezstep)))
                        (file-close status-file)
                        )

                        ))))))

(define (launch:monitor-job run-id test-id item-path fullrunscript ezsteps test-name tconfigreg exit-info m work-area runtlim misc-flags)
  (let* ((update-period (string->number (or (configf:lookup *configdat* "setup" "test-stats-update-period") "30")))
  (let* ((update-period (string->number (or (configf:lookup *configdat* "setup" "test-stats-update-period") "60")))
         (start-seconds (current-seconds))
	 (calc-minutes  (lambda ()
			  (inexact->exact 
			   (round 
			    (- 
			     (current-seconds) 
			     start-seconds)))))
237
238
239
240
241
242
243
244
245
246



247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263

264

265
266
267
268
269
270
271
242
243
244
245
246
247
248



249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269

270
271
272
273
274
275
276
277







-
-
-
+
+
+

















+
-
+







                                   (delta (abs (- df disk-free))))
                              (if (and (> df 0)
                                       (> (/ delta df) 0.1)) ;; (> delta 200) ;; ignore changes under 200 Meg
                                  df
                                  #f)))
             (do-sync       (or new-cpu-load new-disk-free over-time))

             (test-info   (rmt:get-test-info-by-id run-id test-id))
             (state       (db:test-get-state test-info))
             (status      (db:test-get-status test-info))
             (test-info   (rmt:get-test-state-status-by-id run-id test-id))
             (state       (car test-info));; (db:test-get-state test-info))
             (status      (cdr test-info));; (db:test-get-status test-info))
             (kill-reason  "no kill reason specified")
             (kill-job?    #f))
        ;; (common:telemetry-log "zombie" (conc "launch:monitor-job - decision time encountered at "(current-seconds)" with last-sync="last-sync" do-sync="do-sync" over-time="over-time" update-period="update-period))
        (cond
         ((test-get-kill-request run-id test-id)
          (set! kill-reason "KILLING TEST since received kill request (KILLREQ)")
          (set! kill-job? #t))
         ((and runtlim (> (- (current-seconds) start-seconds) runtlim))
          (set! kill-reason (conc "KILLING TEST DUE TO TIME LIMIT EXCEEDED! Runtime=" (- (current-seconds) start-seconds) " seconds, limit=" runtlim))
          (set! kill-job? #t))
         ((equal? status "DEAD")
          (tests:update-central-meta-info run-id test-id new-cpu-load new-disk-free (calc-minutes) #f #f)
          (rmt:set-state-status-and-roll-up-items run-id test-id 'foo "RUNNING" "n/a" "was marked dead; really still running.")
          ;;(set! kill-reason "KILLING TEST because it was marked as DEAD by launch:handle-zombie-tests (might indicate really overloaded server or else overzealous setup.deadtime)") ;; MARK RUNNING
          (set! kill-job? #f)))

        (debug:print 4 *default-log-port* "cpu: " new-cpu-load " disk: " new-disk-free " last-sync: " last-sync " do-sync: " do-sync)
        (if (common:low-noise-print 600 "run zombie") ;; every five minutes is plenty
        (launch:handle-zombie-tests run-id)
	    (launch:handle-zombie-tests run-id))
        (when do-sync
          ;;(with-output-to-file (conc (getenv "MT_TEST_RUN_DIR") "/last-loadinfo.log" #:append)
          ;;  (lambda () (pp (list (current-seconds) new-cpu-load new-disk-free (calc-minutes)))))
          ;; (common:telemetry-log "zombie" (conc  "launch:monitor-job - dosync started at "(current-seconds)))
          (tests:update-central-meta-info run-id test-id new-cpu-load new-disk-free (calc-minutes) #f #f)
          ;; (common:telemetry-log "zombie" (conc "launch:monitor-job - dosync finished at "(current-seconds)))
	  )
312
313
314
315
316
317
318
319

320
321
322
323
324
325
326
318
319
320
321
322
323
324

325
326
327
328
329
330
331
332







-
+







		      (tests:test-set-status! run-id test-id "KILLED"  "KILLED" (conc (args:get-arg "-m")" "kill-reason) #f)) ;; BB ADDED kill-reason -- confirm OK with Matt
		    (begin
		      (debug:print-error 0 *default-log-port* "Nothing to kill, pid1=" pid1 ", pid2=" pid2)
		      (tests:test-set-status! run-id test-id "KILLED"  "FAILED TO KILL" (conc (args:get-arg "-m")" "kill-reason) #f) ;; BB ADDED kill-reason -- confirm OK with Matt
		      )))
	      (mutex-unlock! m)
	      ;; no point in sticking around. Exit now. But run end of run before exiting?
        (launch:end-of-run-check run-id)
              (launch:end-of-run-check run-id)
	      (exit)))
	(if (hash-table-ref/default misc-flags 'keep-going #f)
	    (begin
	      (thread-sleep! 3) ;; (+ 3 (random 6))) ;; add some jitter to the call home time to spread out the db accesses
	      (if (hash-table-ref/default misc-flags 'keep-going #f)  ;; keep originals for cpu-load and disk-free unless they change more than the allowed delta
		  (loop (calc-minutes)
                        (or new-cpu-load cpu-load)
763
764
765
766
767
768
769
770

771
772
773

774

775
776
777
778
779












780
781
782
783
784
785
786
769
770
771
772
773
774
775

776
777
778

779
780
781





782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800







-
+


-
+

+
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+







;; if dead safe to mark the test as killed in the db
;; State/status table
;; new
;; 100% COMPLETED/ (PASS,FAIL,ABORT etc.) ==> COMPLETED / X where X is same as itemized rollup
;; > 3 RUNNING with not test_dead do nothing (run should already be RUNNING/ na
;; > 0 RUNNING and test_dead then send KILLREQ ==> COMPLETED
;; 0 RUNNING ==> this is actually the first condition, should not get here

(define *last-rollup* 0)
(define (launch:end-of-run-check run-id )
    (let* ((not-completed-cnt (rmt:get-not-completed-cnt run-id))  
           (running-cnt (rmt:get-count-tests-running-for-run-id run-id))
           (running-cnt       (rmt:get-count-tests-running-for-run-id run-id))
           (all-test-launched (rmt:get-var (conc "lunch-complete-" run-id)))
	   (current-state-status (rmt:get-run-state-status run-id))
           (current-state (rmt:get-run-state run-id))
           (current-status (rmt:get-run-status run-id)))
     ;;get-vars run-id to query metadata table to check if all completed. if all-test-launched = yes then only not-completed-cnt = 0 means everyting is completed if no entry found in the table do nothing 
     (debug:print 0 *default-log-port* "Running test cnt :" running-cnt)                      
     (rmt:set-state-status-and-roll-up-run  run-id current-state current-status)
           (current-state        (car current-state-status))  ;; (rmt:get-run-state run-id))
           (current-status       (cdr current-state-status))) ;; (rmt:get-run-status run-id)))
      ;;get-vars run-id to query metadata table to check if all completed. if all-test-launched = yes then only not-completed-cnt = 0 means everyting is completed if no entry found in the table do nothing 
      (debug:print 0 *default-log-port* "Running test cnt :" running-cnt)
      ;;
      ;; TODO: add a final rollup when run is done (if there isn't one already)
      ;;
      (if (or (< running-cnt 3)                              ;; have only few running
	      (> (- (current-seconds) *last-rollup*) 10))    ;; or haven't rolled up in past ten seconds
	  (begin
	    (rmt:set-state-status-and-roll-up-run  run-id current-state current-status)
	    (set! *last-rollup* (current-seconds))))
     (runs:update-junit-test-reporter-xml run-id) 
     (cond 
       ((and all-test-launched (eq? not-completed-cnt 0) (equal? all-test-launched "yes" ))
                (if (and (equal? (rmt:get-var (conc "end-of-run-" run-id)) "no") (common:simple-lock (conc "endOfRun" run-id)))
                (begin
           	(debug:print 4 *default-log-port* "look for  post hook. currseconds: " (current-seconds) " EOR " (rmt:get-var (conc "end-of-run-" run-id)))
                (debug:print 0 *default-log-port* "End of Run Detected.")
1129
1130
1131
1132
1133
1134
1135
1136




1137
1138
1139
1140
1141
1142
1143
1143
1144
1145
1146
1147
1148
1149

1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160







-
+
+
+
+







	    (begin
	      (setenv "MT_RUN_AREA_HOME" *toppath*)
	      (setenv "MT_TESTSUITENAME" (common:get-testsuite-name)))
	    (begin
	      (debug:print-error 0 *default-log-port* "failed to find the top path to your Megatest area.")
	      (set! *toppath* #f) ;; force it to be false so we return #f
	      #f))
	

	;; needed by various transport and db modules
	(dbfile:testsuite-name (common:get-testsuite-name)) ;; (get-testsuite-name *toppath* *configdat*))

        ;; one more attempt to cache the configs for future reading
        (let* ((cachefiles   (launch:get-cache-file-paths areapath toppath target mtconfig))
               (mtcachef     (car cachefiles))
               (rccachef     (cdr cachefiles)))

          ;; trap exception due to stale NFS handle -- Error: (open-output-file) cannot open file - Stale NFS file handle: "...somepath.../.runconfigs.cfg-1.6427-7d1e789cb3f62f9cde719a4865bb51b3c17ea853" - ticket 220546342
          ;; TODO - consider 1) using simple-lock to bracket cache write
1565
1566
1567
1568
1569
1570
1571
1572

1573
1574
1575
1576
1577
1578
1579
1582
1583
1584
1585
1586
1587
1588

1589
1590
1591
1592
1593
1594
1595
1596







-
+







			   (write (list (list 'testpath  test-path)
					;; (list 'transport (conc *transport-type*))
					;; (list 'serverinf *server-info*)
					#;(list 'homehost  (let* ((hhdat (server:get-homehost)))
							   (if hhdat
							       (car hhdat)
							       #f)))
					(list 'serverurl (if *runremote*
					#;(list 'serverurl (if *runremote* ;; would like to add this back ... WORK NEEDED
							     (remote-server-url *runremote*)
							     #f)) ;;
					(list 'areaname  (common:get-testsuite-name))
					(list 'toppath   *toppath*)
					(list 'work-area work-area)
					(list 'test-name test-name) 
					(list 'runscript runscript) 

Modified lock-queue.scm from [21543b63ce] to [fbbd0328d6].

17
18
19
20
21
22
23


24
25
26
27
28
29
30
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32







+
+







;;

(use (prefix sqlite3 sqlite3:) srfi-18)

(declare (unit lock-queue))
(declare (uses common))
(declare (uses tasks))
(declare (uses commonmod))
(import commonmod)

;;======================================================================
;; attempt to prevent overlapping updates of rollup files by queueing
;; update requests in an sqlite db
;;======================================================================

;;======================================================================

Modified margs.scm from [cc1616820d] to [7a5ab19394].

21
22
23
24
25
26
27










28
29
30
31
32
33
34
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44







+
+
+
+
+
+
+
+
+
+








(define args:arg-hash (make-hash-table))

(define (args:get-arg arg . default)
  (if (null? default)
      (hash-table-ref/default args:arg-hash arg #f)
      (hash-table-ref/default args:arg-hash arg (car default))))

;; get an arg as a number
(define (args:get-arg-number arg . default)
  (let* ((val-str (args:get-arg arg))
	 (val     (if val-str (string->number val-str) #f)))
    (if val
	val
	(if (null? default)
	    #f
	    default))))

(define (args:any? . args)
  (not (null? (filter (lambda (x) x)
		      (map args:get-arg args)))))

(define (args:get-arg-from ht arg . default)
  (if (null? default)

Modified megatest-version.scm from [1aac8cc071] to [8f8303d56f].

16
17
18
19
20
21
22
23

16
17
18
19
20
21
22

23







-
+
;;     along with Megatest.  If not, see <http://www.gnu.org/licenses/>.

;; Always use two or four digit decimal
;; 1.01, 1.02...1.10,1.11,1.1101 ... 1.99,2.00..

;; (declare (unit megatest-version))

(define megatest-version 1.7009)
(define megatest-version 1.8007)

Modified megatest.scm from [89decfdb89] to [a9076c4fb8].

21
22
23
24
25
26
27








28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51






52
53
54
55
56
57
58
59
60



61
62



63
64
65
66
67
68
69
70
71
72
73
74

75
76
77
78
79
80
81
82
83
84




85
86
87
88
89
90
91
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53






54
55
56
57
58
59
60
61


62
63
64
65

66
67
68
69

70
71
72
73
74
75
76
77
78
79
80
81
82
83

84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105







+
+
+
+
+
+
+
+


















-
-
-
-
-
-
+
+
+
+
+
+


-
-




-
+
+
+

-
+
+
+











-
+










+
+
+
+








;; fake out readline usage of toplevel-command
(define (toplevel-command . a) #f)

(declare (uses common))
;; (declare (uses megatest-version))
(declare (uses margs))
(declare (uses mtargs))
(declare (uses mtargs.import))
(declare (uses commonmod))
(declare (uses commonmod.import))
(declare (uses mtargs))
(declare (uses mtargs.import))
(declare (uses debugprint))
(declare (uses debugprint.import))
(declare (uses runs))
(declare (uses launch))
(declare (uses server))
(declare (uses client))
(declare (uses tests))
(declare (uses genexample))
;; (declare (uses daemon))

(declare (uses db))
;; (declare (uses dcommon))

(declare (uses tdb))
(declare (uses mt))
(declare (uses api))
(declare (uses tasks)) ;; only used for debugging.
(declare (uses env))
(declare (uses diff-report))
(declare (uses db))
(declare (uses dbmod))
(declare (uses dbmod.import))
(declare (uses commonmod))
(declare (uses commonmod.import))
(declare (uses dbfile))
(declare (uses dbfile.import))
(declare (uses dbfile))
(declare (uses dbfile.import))
(declare (uses dbmod))
(declare (uses dbmod.import))
(declare (uses tcp-transportmod))
(declare (uses tcp-transportmod.import))
;; (declare (uses debugprint))
;; (declare (uses debugprint.import))
;; (declare (uses mtargs))
;; (declare (uses mtargs.import))

;; (declare (uses ftail))
;; (import ftail)

(import dbmod
(import mtargs
        debugprint
	dbmod
	commonmod
	dbfile)
	dbfile
	tcp-transportmod
        )

(define *db* #f) ;; this is only for the repl, do not use in general!!!!

(include "common_records.scm")
(include "key_records.scm")
(include "db_records.scm")
(include "run_records.scm")
(include "megatest-fossil-hash.scm")

(use (prefix sqlite3 sqlite3:) srfi-1 posix regex regex-case srfi-69 (prefix base64 base64:)
     readline apropos json http-client directory-utils typed-records
     http-client srfi-18 extras format)
     http-client srfi-18 extras format tcp-server tcp)

;; Added for csv stuff - will be removed
;;
(use sparse-vectors)

(require-library mutils)

(define *usage-log-file* #f)    ;; put path to file for logging usage in this var in the ~/.megatestrc file
(define *usage-use-seconds* #t) ;; for Epoc seconds in usage logging change this to #t in ~/.megatestrc file

;; set some parameters here - these need to be put in something that can be loaded from other
;; executables such as dashboard and mtutil
;;
(include "transport-mode.scm")
(dbfile:db-init-proc db:initialize-main-db)

;; load the ~/.megatestrc file, put (use trace)(trace-call-sites #t)(trace function-you-want-to-trace) in this file
;;
(let ((debugcontrolf (conc (get-environment-variable "HOME") "/.megatestrc")))
  (if (common:file-exists? debugcontrolf)
      (load debugcontrolf)))
371
372
373
374
375
376
377

378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399







+







			"-override-timeout"
			"-test-files"  ;; -test-paths is for listing all
			"-load"        ;; load and exectute a scheme file
			"-section"
			"-var"
			"-dumpmode"
			"-run-id"
			"-db"
			"-ping"
			"-refdb2dat"
			"-o"
			"-log"
                        "-sync-log"
			"-since"
			"-fields"
583
584
585
586
587
588
589
590
591


592
593

594
595

596
597
598
599
600
601
602
598
599
600
601
602
603
604


605
606

607
608
609

610
611
612
613
614
615
616
617







-
-
+
+
-

+

-
+







;; this segment will run launch:setup only if -log is not set. This is fairly safe as servers are not
;; manually started and thus should never be started in a non-megatest area. Thus no need to handle situation
;; where (launch:setup) returns #f?
;;
(if (or (args:get-arg "-log")(args:get-arg "-server")) ;; redirect the log always when a server
    (handle-exceptions
	exn
	(begin
	  (print "ERROR: Failed to switch to log output. " ((condition-property-accessor 'exn 'message) exn) ", exn=" exn)
      (begin
	(print "ERROR: Failed to switch to log output. " ((condition-property-accessor 'exn 'message) exn) ", exn=" exn))
	  )
      (let* ((tl   (or (args:get-arg "-log")(launch:setup)))   ;; run launch:setup if -server, ensure we do NOT run launch:setup if -log specified
	     (dbname (args:get-arg "-db"))   ;; for the server logfile name
	     (logf (or (args:get-arg "-log") ;; use -log unless we are a server, then craft a logfile name
		       (conc tl "/logs/server-" (current-process-id) "-" (get-host-name) ".log")))
		       (conc tl "/logs/server-"(or dbname "unk")"-"(current-process-id) "-" (get-host-name) ".log")))
	     (oup  (open-logfile logf)))
	(if (not (args:get-arg "-log"))
	    (hash-table-set! args:arg-hash "-log" logf)) ;; fake out future queries of -log
	(debug:print-info 0 *default-log-port* "Sending log output to " logf)
	(set! *default-log-port* oup))))

(if (or (args:get-arg "-h")
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
667
668
669
670
671
672
673













674
675
676
677
678
679
680







-
-
-
-
-
-
-
-
-
-
-
-
-







		      (original-exit exit-code)))))

;; for some switches always print the command to stderr
;;
(if (args:any? "-run" "-runall" "-remove-runs" "-set-state-status" "-kill-runs" "-kill-rerun")
    (debug:print 0 *default-log-port* (string-intersperse (argv) " ")))

;; some switches imply homehost. Exit here if not on homehost
;;
(let ((homehost-required  (list "-cleanup-db")))
  (if (apply args:any? homehost-required)
      (if (not (server:choose-server *toppath* 'home?))
	  (for-each
	   (lambda (switch)
	     (if (args:get-arg switch)
		 (begin
		   (debug:print 0 *default-log-port* "ERROR: you must be on the homehost to run with " switch
				", you can move homehost by removing the .homehost file but this will disrupt any runs in progress.")
		   (exit 1))))
	   homehost-required))))

;;======================================================================
;; Misc setup stuff
;;======================================================================

(debug:setup)

932
933
934
935
936
937
938


939
940
941












942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957






958
959
960



961
962

963
964

965
966

967

968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
934
935
936
937
938
939
940
941
942



943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976



977
978
979
980

981
982

983

984
985

986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002

1003
1004
1005
1006
1007
1008
1009







+
+
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
















+
+
+
+
+
+
-
-
-
+
+
+

-
+

-
+
-

+
-
+
















-







;; Start the server - can be done in conjunction with -runall or -runtests (one day...)
;;   we start the server if not running else start the client thread
;;======================================================================

;; Server? Start up here.
;;
(if (args:get-arg "-server")
    (let* (;; (run-id     (args:get-arg "-run-id"))
	   (dbfname    (args:get-arg "-db"))
    (let ((tl        (launch:setup))
          (transport-type (string->symbol (or (args:get-arg "-transport") "http"))))
      (server:launch 0 transport-type)
	   (tl         (launch:setup))
	   (keys       (keys:config-get-fields *configdat*)))
      (case (rmt:transport-mode)
	((http)(http-transport:launch))
	((tcp)
	 (debug:print 0 *default-log-port* "INFO: Running using tcp method.")
	 (if dbfname
	     (tt:start-server tl #f dbfname api:tcp-dispatch-request-make-handler keys)
	     (begin
	       (debug:print 0 *default-log-port* "ERROR: transport mode is tcp - -db is required.")
	       (exit 1))))
	(else (debug:print 0 *default-log-port* "ERROR: rmt:transport-mode value not recognised "(rmt:transport-mode))))
      (set! *didsomething* #t)))

;; The adjutant is a bit different, it does NOT run (launch:setup) as it is not necessarily tied to
;; a specific Megatest area. Detail are being hashed out and this may change.
;;
(if (args:get-arg "-adjutant")
    (begin
      (adjutant-run)
      (set! *didsomething* #t)))

(if (or (args:get-arg "-list-servers")
        (args:get-arg "-kill-servers"))
    (let ((tl (launch:setup)))
      (if tl ;; all roads from here exit
	  (let* ((servers (server:choose-server *toppath* 'all-valid)) ;; (server:get-list *toppath*))
		 (fmtstr  "~33a~22a~20a~20a~8a\n"))
            (if (not servers)
              (begin
                (debug:print-info 1 *default-log-port* "No servers found")
                (exit)
              )
            )
	    (format #t fmtstr "ID" "host:port" "age (hms)" "Last mod" "State")
	    (format #t fmtstr "==" "=========" "=========" "========" "=====")
	    (for-each ;;  ( mod-time host port start-time pid )
       	    (format #t fmtstr "PID" "host:port" "age (hms)" "Last mod" "State")
	    (format #t fmtstr "===" "=========" "=========" "========" "=====")
	    (for-each ;;  (ip-addr port? mod-time host port start-time pid )
	     (lambda (server)
	       (let* ((mtm (any->number (car server)))
	       (let* ((mtm (any->number (caddr server)))
		      (mod (if mtm (- (current-seconds) mtm) "unk"))
		      (age (- (current-seconds)(or (any->number (list-ref server 3)) (current-seconds))))
		      (age (- (current-seconds)(or (any->number mtm) (current-seconds))))
		      (url (conc (cadr server) ":" (caddr server)))
		      (pid (list-ref server 4))
		      (url (conc (car server) ":" (cadr server)))
		      (alv (if (number? mod)(< mod 10) #f)))
		      (alv (if (number? mod)(< mod 360) #f)))
		 (format #t
			 fmtstr
			 pid
			 url
			 (seconds->hr-min-sec age)
			 (seconds->hr-min-sec mod)
			 (if alv "alive" "dead"))
		 (if (and alv
			  (args:get-arg "-kill-servers"))
		     (begin
		       (debug:print-info 0 *default-log-port* "Attempting to kill server with pid " pid)
		       (server:kill server)))))
	     (sort servers (lambda (a b)
			     (let ((ma (or (any->number (car a)) 9e9))
				   (mb (or (any->number (car b)) 9e9)))
			       (> ma mb)))))
	    ;; (debug:print-info 1 *default-log-port* "Done with listservers")
	    (set! *didsomething* #t)
	    (exit))
	  (exit))))
      ;; must do, would have to add checks to many/all calls below

;;======================================================================
;; Weird special calls that need to run *after* the server has started?
1381
1382
1383
1384
1385
1386
1387
1388
1389

1390
1391
1392
1393
1394
1395
1396
1399
1400
1401
1402
1403
1404
1405


1406
1407
1408
1409
1410
1411
1412
1413







-
-
+







;; NOTE: list-runs and list-db-targets operate on local db!!!
;;
;; IDEA: megatest list -runname blah% ...
;;
(if (or (args:get-arg "-list-runs")
	(args:get-arg "-list-db-targets"))
    (if (launch:setup)
	(let* (;; (dbstruct    (make-dbr:dbstruct path: *toppath* local: (args:get-arg "-local")))
	       (runpatt     (args:get-arg "-list-runs"))
	(let* ((runpatt     (args:get-arg "-list-runs"))
               (access-mode (db:get-access-mode))
	       (testpatt    (common:args-get-testpatt #f))
	       ;; (if (args:get-arg "-testpatt") 
	       ;;  	        (args:get-arg "-testpatt") 
	       ;;  	        "%"))
	       (keys        (rmt:get-keys)) ;; (db:get-keys dbstruct))
	       ;; (runsdat  (db:get-runs dbstruct runpatt #f #f '()))
1431
1432
1433
1434
1435
1436
1437





1438
1439
1440
1441
1442
1443
1444
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466







+
+
+
+
+







	       (tests-spec  (let ((t (alist-ref "tests" fields-spec equal?)))
			      (if (and t (null? t)) ;; all fields
				  db:test-record-fields
				  t)))
	       (adj-tests-spec (delete-duplicates (if tests-spec (cons "id" tests-spec) db:test-record-fields))) ;; '("id"))))
	       (steps-spec  (alist-ref "steps" fields-spec equal?))
	       (test-field-index (make-hash-table)))
	  (if (and (args:get-arg "-dumpmode")
		   (not (member (args:get-arg "-dumpmode") '("sexpr" "json" "ods" "list"))))
	      (begin
		(debug:print 0 *default-log-port* "ERROR: dumpmode "(args:get-arg "-dumpmode")" not recognised. Use sexpr, json, ods or list")
		(exit)))
	  (if (and tests-spec (not (null? tests-spec))) ;; do some validation and processing of the test-spec
	      (let ((invalid-tests-spec (filter (lambda (x)(not (member x db:test-record-fields))) tests-spec)))
		(if (null? invalid-tests-spec)
		    ;; generate the lookup map test-field-name => index-number
		    (let loop ((hed (car adj-tests-spec))
			       (tal (cdr adj-tests-spec))
			       (idx 0))
1486
1487
1488
1489
1490
1491
1492
1493

1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509




1510
1511
1512
1513
1514
1515
1516
1508
1509
1510
1511
1512
1513
1514

1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530

1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541







-
+















-
+
+
+
+







			;; (mutils:hierhash-set! data (db:get-value-by-header run header "status")     targetstr runname "meta" "status"     )
			;; (mutils:hierhash-set! data (db:get-value-by-header run header "state")      targetstr runname "meta" "state"      )
			;; (mutils:hierhash-set! data (conc (db:get-value-by-header run header "id"))  targetstr runname "meta" "id"         )
			;; (mutils:hierhash-set! data (db:get-value-by-header run header "event_time") targetstr runname "meta" "event_time" )
			;; (mutils:hierhash-set! data (db:get-value-by-header run header "comment")    targetstr runname "meta" "comment"    )
			;; ;; add last entry twice - seems to be a bug in hierhash?
			;; (mutils:hierhash-set! data (db:get-value-by-header run header "comment")    targetstr runname "meta" "comment"    )
		       (else
		       ((#f list)
			(if (null? runs-spec)
			    (print "Run: " targetstr "/" runname 
				   " status: " (db:get-value-by-header run header "state")
				   " run-id: " run-id ", number tests: " (length tests)
				   " event_time: " (db:get-value-by-header run header "event_time"))
			    (begin
			      (if (not (member "target" runs-spec))
			          ;; (display (conc "Target: " targetstr))
			          (display (conc "Run: " targetstr "/" runname " ")))
			      (for-each
			       (lambda (field-name)
				 (if (equal? field-name "target")
				     (display (conc "target: " targetstr " "))
				     (display (conc field-name ": " (db:get-value-by-header run header (conc field-name)) " "))))
			       runs-spec)
			      (newline)))))
			      (newline))))
		       (else
			(debug:print 0 *default-log-port* "ERROR: dumpmode "(args:get-arg "-dumpmode")" not recognised. Use sexpr, json, ods or list")
			))
		       
		     (for-each 
		      (lambda (test)
		      	(common:debug-handle-exceptions #f
			 exn
			 (begin
			   (debug:print-error 0 *default-log-port* "Bad data in test record? " test)
2052
2053
2054
2055
2056
2057
2058
2059

2060
2061
2062
2063
2064
2065
2066
2077
2078
2079
2080
2081
2082
2083

2084
2085
2086
2087
2088
2089
2090
2091







-
+







;;======================================================================

(if (args:get-arg "-extract-ods")
    (general-run-call
     "-extract-ods"
     "Make ods spreadsheet"
     (lambda (target runname keys keyvals)
       (let ((dbstruct   (make-dbr:dbstruct path: *toppath* local: #t))
       (let ((dbstruct   (make-dbr:dbstruct areapath: *toppath* local: #t))
	     (outputfile (args:get-arg "-extract-ods"))
	     (runspatt   (or (args:get-arg "-runname")(args:get-arg ":runname")))
	     (pathmod    (args:get-arg "-pathmod")))
	     ;; (keyvalalist (keys->alist keys "%")))
	 (debug:print 2 *default-log-port* "Extract ods, outputfile: " outputfile " runspatt: " runspatt " keyvals: " keyvals)
	 (db:extract-ods-file dbstruct outputfile keyvals (if runspatt runspatt "%") pathmod)
	 (db:close-all dbstruct)
2319
2320
2321
2322
2323
2324
2325






2326
2327
2328
2329
2330
2331
2332
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363







+
+
+
+
+
+








(if (args:get-arg "-cleanup-db")
    (begin
      (if (not (launch:setup))
	  (begin
	    (debug:print 0 *default-log-port* "Failed to setup, exiting") 
	    (exit 1)))

;;      (if (not (server:choose-server *toppath* 'home?))
;;	  (begin
;;	    (debug:print 0 *default-log-port* "Servers are not running on this host or no servers alive. Cannot run cleanup-db")
;;	    (exit 1)))

      (let ((dbstructs (db:setup #f)))
        (common:cleanup-db dbstructs))
      (set! *didsomething* #t)))

(if (args:get-arg "-mark-incompletes")
    (begin
      (if (not (launch:setup))
2377
2378
2379
2380
2381
2382
2383


2384

2385
2386
2387
2388
2389
2390
2391
2408
2409
2410
2411
2412
2413
2414
2415
2416

2417
2418
2419
2420
2421
2422
2423
2424







+
+
-
+







    (exit 0)))

(if (or (getenv "MT_RUNSCRIPT")
	(args:get-arg "-repl")
	(args:get-arg "-load"))
    (let* ((toppath (launch:setup))
	   (dbstructs (if (and toppath
			       ;; NOTE: server:choose-server is starting a server
			       ;;   either add equivalent for tcp mode or ????
                               (server:choose-server toppath 'home?))
                               #;(server:choose-server toppath 'home?))
                          (db:setup #t)
                          #f))) ;; make-dbr:dbstruct path: toppath local: (args:get-arg "-local")) #f)))
      (if *toppath*
	  (cond
	   ((getenv "MT_RUNSCRIPT")
	    ;; How to run megatest scripts
	    ;;

Modified mlaunch.scm from [5bcd34288f] to [62be2ae3e1].

26
27
28
29
30
31
32


33
26
27
28
29
30
31
32
33
34
35







+
+

;;======================================================================

(use sqlite3 srfi-1 posix regex regex-case srfi-69 format)

(declare (unit mlaunch))
(declare (uses db))
(declare (uses common))
(declare (uses commonmod))
(import commonmod)

Modified monitor.scm from [3df55c85ea] to [3205ec8bdb].

21
22
23
24
25
26
27


28
29
30
31
32
33
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35







+
+






(import (prefix sqlite3 sqlite3:))

(declare (unit runs))
(declare (uses db))
(declare (uses common))
(declare (uses items))
(declare (uses runconfig))
(declare (uses commonmod))
(import commonmod)

(include "common_records.scm")
(include "key_records.scm")
(include "db_records.scm")
(include "run_records.scm")

Added mtargs/mtargs.egg version [deacd4afda].








1
2
3
4
5
6
7
+
+
+
+
+
+
+
((license "LGPL")
 (version 0.1)
 (category misc)
 (dependencies srfi-69 srfi-1)
 (author "Matt Welland")
 (synopsis "Primitive argument processor.")
 (components (extension mtargs)))

Modified mtargs/mtargs.scm from [54d4e74749] to [49b76da6ef].

16
17
18
19
20
21
22
23
24


25

26
27
28
29

30

31
32








33

34
35
36
37
38
39
40










41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
16
17
18
19
20
21
22


23
24
25
26
27
28


29
30
31


32
33
34
35
36
37
38
39
40
41
42

43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69


















70
71
72
73
74
75
76







-
-
+
+

+


-
-
+

+
-
-
+
+
+
+
+
+
+
+

+

-





+
+
+
+
+
+
+
+
+
+












-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-







;;     along with mtargs.  If not, see <http://www.gnu.org/licenses/>.


(module mtargs
    (
     arg-hash
     get-arg
     get-arg-from
     usage
     get-arg-number
     get-arg-from
     get-args
     usage
     print-args
     any-defined?
     help
     )
     ) 

(import scheme) ;; gives us cond-expand in chicken-4
(import scheme chicken data-structures extras posix ports files)
(use srfi-69 srfi-1)

(cond-expand
 (chicken-5
  (import scheme (chicken base) (chicken port) (chicken file) (chicken process-context))
  (import srfi-69 srfi-1))
 (chicken-4
  (import chicken posix srfi-69 srfi-1))
 (else))

(define usage (make-parameter print))
(define arg-hash (make-hash-table))
(define help "")

(define (get-arg arg . default)
  (if (null? default)
      (hash-table-ref/default arg-hash arg #f)
      (hash-table-ref/default arg-hash arg (car default))))

;; get an arg as a number
(define (get-arg-number arg . default)
  (let* ((val-str (get-arg arg))
	 (val     (if val-str (string->number val-str) #f)))
    (if val
	val
	(if (null? default)
	    #f
	    default))))

(define (any-defined? . args)
  (not (null? (filter (lambda (x) x)
		      (map get-arg args)))))

;; (define any any-defined?)

(define (get-arg-from ht arg . default)
  (if (null? default)
      (hash-table-ref/default ht arg #f)
      (hash-table-ref/default ht arg (car default))))

(define (usage . args)
  (if (> (length args) 0)
      (apply print "ERROR: " args))
  (if (string? help)
      (print help)
      (print "Usage: " (car (argv)) " ... "))
  (exit 0))

 ;; one-of args defined
(define (any-defined? . param)
  (let ((res #f))
    (for-each 
     (lambda (arg)
       (if (get-arg arg)(set! res #t)))
     param)
    res))

;; args: 
(define (get-args args params switches arg-hash num-needed)
  (let* ((numtargs (length args))
	 (adj-num-needed (if num-needed (+ num-needed 2) #f)))
    (if (< numtargs (if adj-num-needed adj-num-needed 2))
	(if (>= num-needed 1)
	    (usage "No arguments provided")
	    '())
92
93
94
95
96
97
98
99

100
101
102
103
104
105
106
91
92
93
94
95
96
97

98

99
100
101
102
103
104







-
+
-






	    (if (null? tail) remtargs
		(loop (car tail)(cdr tail) remtargs)))
	   (else
	    (if (null? tail)(append remtargs (list arg)) ;; return the non-used args
		(loop (car tail)(cdr tail)(append remtargs (list arg))))))))
    ))

(define (print-args remtargs arg-hash)
(define (print-args arg-hash)
  (print "ARGS: " remtargs)
  (for-each (lambda (arg)
	      (print "   " arg "   " (hash-table-ref/default arg-hash arg #f)))
	    (hash-table-keys arg-hash)))


)

Modified mtexec.scm from [6016ee8684] to [3a9610856f].

27
28
29
30
31
32
33


34
35
36
37
38
39
40
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42







+
+







     (prefix dbi dbi:)
     )

;; (declare (uses common))
(declare (uses margs))
(declare (uses configf))
;; (declare (uses rmt))
(declare (uses commonmod))
(import commonmod)

;; (use ducttape-lib)
(include "megatest-version.scm")
(include "megatest-fossil-hash.scm")

;; (require-library stml)

Modified mtut.scm from [413cf26858] to [b04fee463b].

28
29
30
31
32
33
34



35
36
37
38
39
40
41
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44







+
+
+







     (prefix sqlite3 sqlite3:)
     nanomsg)

(declare (uses common))
(declare (uses margs))
(declare (uses configf))
;; (declare (uses rmt))
(declare (uses commonmod))
(declare (uses commonmod.import))
(import commonmod)

(use ducttape-lib)

(include "megatest-fossil-hash.scm")

(require-library stml)

Modified newdashboard.scm from [a0c1909f88] to [c27106b5bc].

27
28
29
30
31
32
33


34
35
36
37
38
39
40
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42







+
+








(use sql-de-lite srfi-1 posix regex regex-case srfi-69 typed-records sparse-vectors ;; defstruct
     (prefix dbi dbi:))

(declare (uses common))
(declare (uses megatest-version))
(declare (uses margs))
(declare (uses commonmod))
(import commonmod)

;; (declare (uses launch))
;; (declare (uses gutils))
;; (declare (uses db))
;; (declare (uses server))
;; (declare (uses synchash))
(declare (uses dcommon))

Modified ods.scm from [42e94b826f] to [1b93bc9256].

15
16
17
18
19
20
21


22
23
24
25
26
27
28
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30







+
+







;;     You should have received a copy of the GNU General Public License
;;     along with Megatest.  If not, see <http://www.gnu.org/licenses/>.
;;

(use csv-xml regex)
(declare (unit ods))
(declare (uses common))
(declare (uses commonmod))
(import commonmod)

(define ods:dirs
  '("Configurations2"
    "Configurations2/toolpanel"
    "Configurations2/menubar"
    "Configurations2/toolbar"
    "Configurations2/progressbar"

Modified portlogger.scm from [36a4964f50] to [8344cdf37f].

62
63
64
65
66
67
68
69

70
71
72
73
74
75
76
62
63
64
65
66
67
68

69
70
71
72
73
74
75
76







-
+







	 (avail  (tasks:wait-on-journal fname 10))) ;; wait up to about 10 seconds for the journal to go away
    (handle-exceptions
     exn
     (begin
       ;; (release-dot-lock fname)
       (debug:print-error 0 *default-log-port* "portlogger:open-run-close failed. " proc " " params)
       (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))
       (debug:print 5 *default-log-port* "exn=" (condition->list exn))
       ;; (debug:print 5 *default-log-port* "exn=" (condition->list exn))
       (if (common:file-exists? fname)(delete-file fname)) ;; brutally get rid of it
       (print-call-chain (current-error-port)))
     (let* (;; (lock   (obtain-dot-lock fname 2 9 10))
	    (db     (portlogger:open-db fname))
	    (res    (apply proc db params)))
       (sqlite3:finalize! db)
       ;; (release-dot-lock fname)

Modified rmt.scm from [91ffe1108a] to [6a03756866].

19
20
21
22
23
24
25

26



27
28
29

30
31











32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47

48
49
50



51
52
53
54
55
56





57
58
59
60
61
62
63
64
65





66
67
68
69
70
71
72
73
74

75
76
77
78
79
80
81

82
83
84
85



86




87
88
89



90
91
92
93
94
95
96
97


98
99
100
101
102
103
104
105
106
107
108



109





110
111
112
113
114
115
116
117
118
119
120
121
122
123





























124
125
126
127
128
129
130
131
132







133
134

135
136
137
138
139
140
141
142
143
144
145
146
147




148
149
150
151
152
153
154
155
156
157
158







159
160
161
162
163
164


165
166
167
168
169




170
171
172
173
174
175
176
177
178
179
180










181
182
183


184
185
186
187
188
189
190
191
192
193
194
195








196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231








232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249














250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267














268
269
270
271
272
273
274
275
276
277









278
279
280
281
282
283
284






285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316

317
318
319
320
321
322
323
324
325
326
327
328
329













330
331
332
333
334
335
336
337
338

339
340


341
342
343


344
345
346


347
348
349
350
351
352
353
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62

63



64
65
66
67





68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94

95




96
97

98
99
100
101
102
103
104
105
106
107
108
109
110



111
112
113








114
115
116
117
118
119
120
121
122
123
124
125

126
127
128
129
130
131
132
133
134














135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163









164
165
166
167
168
169
170


171













172
173
174
175
176










177
178
179
180
181
182
183
184





185
186
187




188
189
190
191











192
193
194
195
196
197
198
199
200
201



202
203












204
205
206
207
208
209
210
211
212



































213
214
215
216
217
218
219
220
221

















222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240













241
242
243
244
245
246
247
248
249
250
251
252
253
254
255









256
257
258
259
260
261
262
263
264
265






266
267
268
269
270
271























272
273
274
275
276
277
278
279

280













281
282
283
284
285
286
287
288
289
290
291
292
293



294
295
296
297
298
299
300


301
302
303


304
305



306
307
308
309
310
311
312
313
314







+

+
+
+



+


+
+
+
+
+
+
+
+
+
+
+















-
+
-
-
-
+
+
+

-
-
-
-
-
+
+
+
+
+









+
+
+
+
+








-
+
-
-
-
-


-
+




+
+
+

+
+
+
+
-
-
-
+
+
+
-
-
-
-
-
-
-
-
+
+










-
+
+
+

+
+
+
+
+
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
-
-
+
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+

-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+

-
-
-
-
-
+
+

-
-
-
-
+
+
+
+
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
-
-
-
+
+
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+

-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+

-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+





-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+

-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+

-
-
-
-
-
-
+
+
+
+
+
+
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-








-
+
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
-
-
-






+
-
-
+
+

-
-
+
+
-
-
-
+
+







;;======================================================================

(use format typed-records) ;; RADT => purpose of json format??

(declare (unit rmt))
(declare (uses api))
(declare (uses http-transport))
(declare (uses commonmod))
(declare (uses dbfile))
;; (declare (uses dbmemmod))
(declare (uses dbmod))
(declare (uses tcp-transportmod))
(include "common_records.scm")
;; (declare (uses rmtmod))

;; used by http-transport
(import dbfile) ;; rmtmod)

(import commonmod
;; 	dbmemmod
	dbfile
	dbmod
	tcp-transportmod)

;; http - use the old http + in /tmp db
;; tcp  - use tcp transport with inmem db
;; nfs  - use direct to disk access (read-only)
;;
(define rmt:transport-mode (make-parameter 'http))
;;
;; THESE ARE ALL CALLED ON THE CLIENT SIDE!!!
;;

;; generate entries for ~/.megatestrc with the following
;;
;;  grep define ../rmt.scm | grep rmt: |perl -pi -e 's/\(define\s+\((\S+)\W.*$/\1/'|sort -u

;;======================================================================
;;  S U P P O R T   F U N C T I O N S
;;======================================================================

;; if a server is either running or in the process of starting call client:setup
;; else return #f to let the calling proc know that there is no server available
;;
(define (rmt:get-connection-info areapath #!key (area-dat #f)) ;; TODO: push areapath down.
(define (rmt:get-connection-info areapath runremote) ;; TODO: push areapath down.
  (let* ((runremote (or area-dat *runremote*))
	 (cinfo     (if (remote? runremote)
			(remote-conndat runremote)
  (let* ((cinfo     (if (and (remote? runremote)
			     (remote-api-url runremote)) ;; we have a connection
			runremote
			#f)))
	  (if cinfo
	      cinfo
	      (if (server:check-if-running areapath)
		  (client:setup areapath)
		  #f))))
    (if cinfo
	cinfo
	(if (server:check-if-running areapath)
	    (client:setup areapath runremote)
	    #f))))

(define (rmt:on-homehost? runremote)
  (let* ((hh-dat (remote-hh-dat runremote)))
    (if (pair? hh-dat)
	(cdr hh-dat)
	(begin
	  (debug:print-info 0 *default-log-port* "hh-dat="hh-dat)
	  #f))))

(define (make-and-init-remote areapath)
   (case (rmt:transport-mode)
     ((http)(make-remote))
     ((tcp) (tt:make-remote areapath))
     (else #f)))

;;======================================================================

(define *send-receive-mutex* (make-mutex)) ;; should have separate mutex per run-id

;; RA => e.g. usage (rmt:send-receive 'get-var #f (list varname))
;;
(define (rmt:send-receive cmd rid params #!key (attemptnum 1)(area-dat #f)) ;; start attemptnum at 1 so the modulo below works as expected

  (assert *toppath* "FATAL: rmt:send-receive called with *toppath* not set.")
  #;(common:telemetry-log (conc "rmt:"(->string cmd))
                        payload: `((rid . ,rid)
                                   (params . ,params)))

  (if (> attemptnum 2)
      (debug:print 0 *default-log-port* "INFO: attemptnum in rmt:send-receive is " attemptnum))
    
  
  (cond
   ((> attemptnum 2) (thread-sleep! 0.05))
   ((> attemptnum 10) (thread-sleep! 0.5))
   ((> attemptnum 20) (thread-sleep! 1)))

  ;; I'm turning this off, it may make sense to move it
  ;; into http-transport-handler
  (if (and (> attemptnum 5) (= 0 (modulo attemptnum 15)))  
      (begin
	(debug:print 0 *default-log-port* "ERROR: can't connect to server, trying to start a server.")
	(case (rmt:transport-mode)
	  ((http)
    (begin (server:run *toppath*) (thread-sleep! 3))) 
  
  
	   (server:run *toppath*)
	   (thread-sleep! 3))
	  (else
  ;;DOT digraph megatest_state_status {
  ;;DOT   ranksep=0;
  ;;DOT   // rankdir=LR;
  ;;DOT   node [shape="box"];
  ;;DOT "rmt:send-receive" -> MUTEXLOCK;
  ;;DOT { edge [style=invis];"case 1" -> "case 2" -> "case 3" -> "case 4" -> "case 5" -> "case 6" -> "case 7" -> "case 8" -> "case 9" -> "case 10" -> "case 11"; }
  ;; do all the prep locked under the rmt-mutex
  (mutex-lock! *rmt-mutex*)
	   (thread-sleep! 1) ;; for tcp the server is started by routines in tcp-transportmod. For nfs there is no server
	   ))))
  
  ;; 1. check if server is started IFF cmd is a write OR if we are not on the homehost, store in runremote
  ;; 2. check the age of the connections. refresh the connection if it is older than timeout-20 seconds.
  ;; 3. do the query, if on homehost use local access
  ;;
  (let* ((start-time    (current-seconds)) ;; snapshot time so all use cases get same value
         (areapath      *toppath*);; TODO - resolve from dbstruct to be compatible with multiple areas
	 (runremote     (or area-dat
			    *runremote*))
         (attemptnum    (+ 1 attemptnum))
	 (readonly-mode (rmtmod:calc-ro-mode runremote *toppath*)))
	 (readonly-mode (rmtmod:calc-ro-mode runremote *toppath*))
	 (testsuite     (common:get-testsuite-name))
	 (mtexe         (common:find-local-megatest)))

    (case (rmt:transport-mode)
      ((http)(http-transport-handler runremote cmd rid params attemptnum area-dat areapath readonly-mode))
      ((tcp) (tcp-transport-handler  runremote cmd rid params attemptnum area-dat areapath readonly-mode testsuite mtexe))
      ((nfs) (nfs:transport-handler  runremote cmd rid params attemptnum area-dat areapath readonly-mode testsuite mtexe))
      )))
    ;; DOT INIT_RUNREMOTE; // leaving off - doesn't really add to the clarity
    ;; DOT MUTEXLOCK -> INIT_RUNREMOTE [label="no remote?"];
    ;; DOT INIT_RUNREMOTE -> MUTEXLOCK;
    ;; ensure we have a record for our connection for given area
    (if (not runremote)                   ;; can remove this one. should never get here.         
	(begin
	  (set! *runremote* (make-remote))
          (let* ((server-info (remote-server-info *runremote*))) 
            (if server-info
		(begin
			(remote-server-url-set! *runremote* (server:record->url server-info))
			(remote-server-id-set! *runremote* (server:record->id server-info)))))  
	  (set! runremote   *runremote*))) ;; new runremote will come from this on next iteration
    

(define (nfs:transport-handler  runremote cmd run-id params attemptnum area-dat areapath readonly-mode testsuite mtexe)
  (let* ((keys     (common:get-fields *configdat*))
	 (dbstruct (dbmod:nfs-get-dbstruct run-id keys (dbfile:db-init-proc) areapath)))
    (api:dispatch-request dbstruct cmd run-id params)))
	 
(define (tcp-transport-handler runremote cmd run-id params attemptnum area-dat areapath readonly-mode testsuite mtexe)
  (if (not runremote)
      (let* ((newremote  (make-and-init-remote areapath)))
	(set! *runremote* newremote)
	(set! runremote newremote)))
  (let* ((dbfname (conc (dbfile:run-id->dbnum run-id)".db"))) ;;(dbfile:run-id->path areapath run-id)))
    (tt:handler runremote cmd run-id params attemptnum area-dat areapath readonly-mode dbfname testsuite mtexe)))
	
(define (http-transport-handler runremote cmd rid params attemptnum area-dat areapath readonly-mode)
    ;; do all the prep locked under the rmt-mutex
  (mutex-lock! *rmt-mutex*)
  
  ;; ensure we have a record for our connection for given area
  (if (not runremote)                   ;; can remove this one. should never get here.         
      (begin
	(set! *runremote* (make-and-init-remote areapath))
        (let* ((server-info (remote-server-info *runremote*))) 
          (if server-info
	      (begin
		(remote-server-url-set! *runremote* (server:record->url server-info))
		(remote-server-id-set! *runremote* (server:record->id server-info)))))  
	(set! runremote   *runremote*))) ;; new runremote will come from this on next iteration

    ;; DOT SET_HOMEHOST; // leaving off - doesn't really add to the clarity
    ;; DOT MUTEXLOCK -> SET_HOMEHOST [label="no homehost?"];
    ;; DOT SET_HOMEHOST -> MUTEXLOCK;
    ;; ensure we have a homehost record
    (if (not (pair? (remote-hh-dat runremote)))  ;; not on homehost
	(thread-sleep! 0.1) ;; since we shouldn't get here, delay a little
	(let ((hh-data (server:choose-server areapath 'homehost)))
	  (remote-hh-dat-set! runremote (or hh-data (cons #f #f)))))
    
  ;; ensure we have a homehost record
  (if (or (not (pair? (remote-hh-dat runremote)))  ;; not on homehost
	  (not (cdr (remote-hh-dat runremote))))   ;; not on homehost
      (thread-sleep! 0.1) ;; since we shouldn't get here, delay a little
      (let ((hh-data (server:choose-server areapath 'homehost)))
	(remote-hh-dat-set! runremote (or hh-data (cons #f #f)))))
  
    ;;(print "BB> readonly-mode is "readonly-mode" dbfile is "dbfile)
    (cond
  (cond
     #;((> (- (current-seconds)(remote-connect-time runremote)) 180) ;; reconnect to server every 180 seconds
      (debug:print 0 *default-log-port* "Forcing reconnect to server(s) due to 180 second timeout.")
      (set! *runremote* #f)
      ;; BUG: close-connections should go here?
      (mutex-unlock! *rmt-mutex*)
      (rmt:send-receive cmd rid params attemptnum: 1 area-dat: area-dat))
     
     ;;DOT EXIT;
     ;;DOT MUTEXLOCK -> EXIT [label="> 15 attempts"]; {rank=same "case 1" "EXIT" }
     ;; give up if more than 150 attempts
     ((> attemptnum 150)
      (debug:print 0 *default-log-port* "ERROR: 150 tries to start/connect to server. Giving up.")
      (exit 1))
   ;; give up if more than 150 attempts
   ((> attemptnum 150)
    (debug:print 0 *default-log-port* "ERROR: 150 tries to start/connect to server. Giving up.")
    (exit 1))

     ;;DOT CASE2 [label="local\nreadonly\nquery"];
     ;;DOT MUTEXLOCK -> CASE2; {rank=same "case 2" CASE2}
     ;;DOT CASE2 -> "rmt:open-qry-close-locally";
     ;; readonly mode, read request-  handle it - case 2
     ((and readonly-mode
           (member cmd api:read-only-queries)) 
      (mutex-unlock! *rmt-mutex*)
      (debug:print-info 12 *default-log-port* "rmt:send-receive, case 2")
      (rmt:open-qry-close-locally cmd 0 params)
      )
   ;; readonly mode, read request-  handle it - case 2
   ((and readonly-mode
         (member cmd api:read-only-queries)) 
    (mutex-unlock! *rmt-mutex*)
    (debug:print-info 12 *default-log-port* "rmt:send-receive, case 2")
    (rmt:open-qry-close-locally cmd 0 params)
    )

     ;;DOT CASE3 [label="write in\nread-only mode"];
     ;;DOT MUTEXLOCK -> CASE3 [label="readonly\nmode?"]; {rank=same "case 3" CASE3}
     ;;DOT CASE3 -> "#f";
     ;; readonly mode, write request.  Do nothing, return #f
     (readonly-mode (extras-readonly-mode *rmt-mutex* *default-log-port* cmd params))
   ;; readonly mode, write request.  Do nothing, return #f
   (readonly-mode (extras-readonly-mode *rmt-mutex* *default-log-port* cmd params))

     ;; This block was for pre-emptively resetting the connection if there had been no communication for some time.
     ;; I don't think it adds any value. If the server is not there, just fail and start a new connection.
     ;; also, the expire-time calculation might not be correct. We want, time-since-last-server-access > (server:get-timeout)
     ;;
   ;; This block was for pre-emptively resetting the connection if there had been no communication for some time.
   ;; I don't think it adds any value. If the server is not there, just fail and start a new connection.
   ;; also, the expire-time calculation might not be correct. We want, time-since-last-server-access > (server:get-timeout)
   ;;
     ;;DOT CASE4 [label="reset\nconnection"];
     ;;DOT MUTEXLOCK -> CASE4 [label="have connection,\nlast_access > expire_time"]; {rank=same "case 4" CASE4}
     ;;DOT CASE4 -> "rmt:send-receive";
     ;; reset the connection if it has been unused too long
     ((and runremote
           (remote-conndat runremote)
	   (> (current-seconds) ;; if it has been more than server-timeout seconds since last contact, close this connection and start a new on
	      (+ (http-transport:server-dat-get-last-access (remote-conndat runremote))
		 (remote-server-timeout runremote))))
      (debug:print-info 0 *default-log-port* "Connection to " (remote-server-url runremote) " expired due to no accesses, forcing new connection.")
      (remote-conndat-set! runremote #f) ;; invalidate the connection, thus forcing a new connection.
   ;; reset the connection if it has been unused too long
   ((and runremote
         (remote-api-url runremote)
	 (> (current-seconds) ;; if it has been more than server-timeout seconds since last contact, close this connection and start a new on
	    (+ (remote-last-access runremote)
	       (remote-server-timeout runremote))))
    (debug:print-info 0 *default-log-port* "Connection to " (remote-server-url runremote) " expired due to no accesses in " (remote-server-timeout runremote) " seconds, forcing new connection.")
    (http-transport:close-connections runremote)
    ;; moving this setting of runremote conndat to #f to inside the http-transport:close-connections
    ;; (remote-conndat-set! runremote #f) ;; invalidate the connection, thus forcing a new connection.
      (http-transport:close-connections area-dat: runremote)
      (mutex-unlock! *rmt-mutex*)
      (rmt:send-receive cmd rid params attemptnum: attemptnum))
    (mutex-unlock! *rmt-mutex*)
    (rmt:send-receive cmd rid params attemptnum: attemptnum))
     
     ;;DOT CASE5 [label="local\nread"];
     ;;DOT MUTEXLOCK -> CASE5 [label="server not required,\non homehost,\nread-only query"]; {rank=same "case 5" CASE5};
     ;;DOT CASE5 -> "rmt:open-qry-close-locally";

     ;; on homehost and this is a read
     ((and (not (remote-force-server runremote)) ;; honor forced use of server, i.e. server NOT required
	   (rmt:on-homehost? runremote)
           (member cmd api:read-only-queries))   ;; this is a read
      (mutex-unlock! *rmt-mutex*)
      (debug:print-info 12 *default-log-port* "rmt:send-receive, case  5")
      (rmt:open-qry-close-locally cmd 0 params))
   
   ;; on homehost and this is a read
   ((and (not (remote-force-server runremote)) ;; honor forced use of server, i.e. server NOT required
	 (rmt:on-homehost? runremote)
         (member cmd api:read-only-queries))   ;; this is a read
    (mutex-unlock! *rmt-mutex*)
    (debug:print-info 12 *default-log-port* "rmt:send-receive, case  5")
    (rmt:open-qry-close-locally cmd 0 params))

     ;;DOT CASE6 [label="init\nremote"];
     ;;DOT MUTEXLOCK -> CASE6 [label="on homehost,\nwrite query,\nhave server,\ncan't reach it"]; {rank=same "case 6" CASE6};
     ;;DOT CASE6 -> "rmt:send-receive";
     ;; on homehost and this is a write, we already have a server, but server has died

     ;; reinstate this keep-alive section but inject a time condition into the (add ...
     
     #;((and (cdr (remote-hh-dat runremote))           ;; on homehost
           (not (member cmd api:read-only-queries))  ;; this is a write
           (remote-server-url runremote)             ;; have a server
           (not (server:ping (remote-server-url runremote) (remote-server-id runremote))))  ;; server has died. NOTE: this is not a cheap call! Need better approach.
      (debug:print 0 *default-log-port* "WARNING: server appears to have died, trying to reconnect, case 6")
      (http-transport:close-connections area-dat: runremote) ;; make sure to clean up
      (set! *runremote* (make-remote))
      (let* ((server-info (remote-server-info *runremote*))) 
            (if server-info
		(begin
		  (remote-server-url-set! *runremote* (server:record->url server-info))
                  (remote-server-id-set! *runremote* (server:record->id server-info)))))
      (remote-force-server-set! runremote (common:force-server?))
      (mutex-unlock! *rmt-mutex*)
      (debug:print-info 12 *default-log-port* "rmt:send-receive, case  6")
      (rmt:send-receive cmd rid params attemptnum: attemptnum))

     ;;DOT CASE7 [label="homehost\nwrite"];
     ;;DOT MUTEXLOCK -> CASE7 [label="server not required,\non homehost,\na write,\nhave a server"]; {rank=same "case 7" CASE7};
     ;;DOT CASE7 -> "rmt:open-qry-close-locally";
     ;; on homehost and this is a write, we already have a server
     ((and (not (remote-force-server runremote))     ;; honor forced use of server, i.e. server NOT required
	   (cdr (remote-hh-dat runremote))           ;; on homehost
           (not (member cmd api:read-only-queries))  ;; this is a write
           (remote-server-url runremote))            ;; have a server
      (mutex-unlock! *rmt-mutex*)
      (debug:print-info 12 *default-log-port* "rmt:send-receive, case  4.1")
      (rmt:open-qry-close-locally cmd 0 params))
   ;; on homehost and this is a write, we already have a server
   ((and (not (remote-force-server runremote))     ;; honor forced use of server, i.e. server NOT required
	 (cdr (remote-hh-dat runremote))           ;; on homehost
         (not (member cmd api:read-only-queries))  ;; this is a write
         (remote-server-url runremote))            ;; have a server (needed to sync written data back)
    (mutex-unlock! *rmt-mutex*)
    (debug:print-info 12 *default-log-port* "rmt:send-receive, case  4.1")
    (rmt:open-qry-close-locally cmd 0 params))

     ;;DOT CASE8 [label="force\nserver"];
     ;;DOT MUTEXLOCK -> CASE8 [label="server not required,\nhave homehost info,\nno connection yet,\nnot a read-only query"]; {rank=same "case 8" CASE8};
     ;;DOT CASE8 -> "rmt:open-qry-close-locally";
     ;;  on homehost, no server contact made and this is a write, passively start a server 
     ((and (not (remote-force-server runremote))     ;; honor forced use of server, i.e. server NOT required
	   (cdr (remote-hh-dat runremote))           ;; have homehost
           (not (remote-server-url runremote))       ;; no connection yet
	   (not (member cmd api:read-only-queries))) ;; not a read-only query
      (debug:print-info 12 *default-log-port* "rmt:send-receive, case  8")
      (let ((server-info  (server:check-if-running *toppath*))) ;; (server:read-dotserver->url *toppath*))) ;; (server:check-if-running *toppath*))) ;; Do NOT want to run server:check-if-running - very expensive to do for every write call
	(if server-info
	    (begin
              (remote-server-url-set! runremote (server:record->url server-info)) ;; the string can be consumed by the client setup if needed
              (remote-server-id-set! runremote (server:record->id server-info)))  
	    (if (common:force-server?)
		(server:start-and-wait *toppath*)
		(server:kind-run *toppath*)))
   ;;  on homehost, no server contact made and this is a write, passively start a server 
   ((and (not (remote-force-server runremote))     ;; honor forced use of server, i.e. server NOT required
	 (cdr (remote-hh-dat runremote))           ;; have homehost
         (not (remote-server-url runremote))       ;; no connection yet
	 (not (member cmd api:read-only-queries))) ;; not a read-only query
    (debug:print-info 12 *default-log-port* "rmt:send-receive, case  8")
    (let ((server-info  (server:check-if-running *toppath*))) ;; (server:read-dotserver->url *toppath*))) ;; (server:check-if-running *toppath*))) ;; Do NOT want to run server:check-if-running - very expensive to do for every write call
      (if server-info
	  (begin
            (remote-server-url-set! runremote (server:record->url server-info)) ;; the string can be consumed by the client setup if needed
            (remote-server-id-set! runremote (server:record->id server-info)))  
	  (if (common:force-server?)
	      (server:start-and-wait *toppath*)
	      (server:kind-run *toppath*)))
      (remote-force-server-set! runremote (common:force-server?))
      (mutex-unlock! *rmt-mutex*)
      (debug:print-info 12 *default-log-port* "rmt:send-receive, case  8.1")
      (rmt:open-qry-close-locally cmd 0 params)))

     ;;DOT CASE9 [label="force server\nnot on homehost"];
     ;;DOT MUTEXLOCK -> CASE9 [label="no connection\nand either require server\nor not on homehost"]; {rank=same "case 9" CASE9};
     ;;DOT CASE9 -> "start\nserver" -> "rmt:send-receive";
     ((or (and (remote-force-server runremote)              ;; we are forcing a server and don't yet have a connection to one
	       (not (remote-conndat runremote)))
	  (and (not (cdr (remote-hh-dat runremote)))        ;; not on a homehost 
	       (not (remote-conndat runremote))))           ;; and no connection
      (debug:print-info 12 *default-log-port* "rmt:send-receive, case 9, hh-dat: " (remote-hh-dat runremote) " conndat: " (remote-conndat runremote))
      (mutex-unlock! *rmt-mutex*)
      (if (not (server:check-if-running *toppath*)) ;; who knows, maybe one has started up?
	  (server:start-and-wait *toppath*))
      (remote-conndat-set! runremote (rmt:get-connection-info *toppath*)) ;; calls client:setup which calls client:setup-http
      (rmt:send-receive cmd rid params attemptnum: attemptnum)) ;; TODO: add back-off timeout as
   ;;DOT CASE9 [label="force server\nnot on homehost"];
   ;;DOT MUTEXLOCK -> CASE9 [label="no connection\nand either require server\nor not on homehost"]; {rank=same "case 9" CASE9};
   ;;DOT CASE9 -> "start\nserver" -> "rmt:send-receive";
   ((or (and (remote-force-server runremote)              ;; we are forcing a server and don't yet have a connection to one
	     (not (remote-api-url runremote)))
	(and (not (cdr (remote-hh-dat runremote)))        ;; not on a homehost 
	     (not (remote-api-url runremote))))           ;; and no connection
    (debug:print-info 12 *default-log-port* "rmt:send-receive, case 9, hh-dat: " (remote-hh-dat runremote) " runremote: " (remote->alist runremote))
    (mutex-unlock! *rmt-mutex*)
    (if (not (server:check-if-running *toppath*)) ;; who knows, maybe one has started up?
	(server:start-and-wait *toppath*))
    ;; was: (remote-conndat-set! runremote (rmt:get-connection-info *toppath* runremote)) ;; calls client:setup which calls client:setup-http
    (set! runremote (rmt:get-connection-info *toppath* runremote)) ;; calls client:setup which calls client:setup-http
    (rmt:send-receive cmd rid params attemptnum: attemptnum)) ;; TODO: add back-off timeout as

     ;;DOT CASE10 [label="on homehost"];
     ;;DOT MUTEXLOCK -> CASE10 [label="server not required,\non homehost"]; {rank=same "case 10" CASE10};
     ;;DOT CASE10 -> "rmt:open-qry-close-locally";
     ;; all set up if get this far, dispatch the query
     ((and (not (remote-force-server runremote))
	   (cdr (remote-hh-dat runremote))) ;; we are on homehost
      (mutex-unlock! *rmt-mutex*)
      (debug:print-info 12 *default-log-port* "rmt:send-receive, case 10")
      (rmt:open-qry-close-locally cmd (if rid rid 0) params))
   ;;DOT CASE10 [label="on homehost"];
   ;;DOT MUTEXLOCK -> CASE10 [label="server not required,\non homehost"]; {rank=same "case 10" CASE10};
   ;;DOT CASE10 -> "rmt:open-qry-close-locally";
   ;; all set up if get this far, dispatch the query
   ((and (not (remote-force-server runremote))
	 (cdr (remote-hh-dat runremote))) ;; we are on homehost
    (mutex-unlock! *rmt-mutex*)
    (debug:print-info 12 *default-log-port* "rmt:send-receive, case 10")
    (rmt:open-qry-close-locally cmd (if rid rid 0) params))

     ;;DOT CASE11 [label="send_receive"];
     ;;DOT MUTEXLOCK -> CASE11 [label="else"]; {rank=same "case 11" CASE11};
     ;;DOT CASE11 -> "rmt:send-receive" [label="call failed"];
     ;;DOT CASE11 -> "RESULT" [label="call succeeded"];
     ;; not on homehost, do server query
     (else (extras-case-11 *default-log-port* runremote cmd params attemptnum rid)))))
   ;;DOT CASE11 [label="send_receive"];
   ;;DOT MUTEXLOCK -> CASE11 [label="else"]; {rank=same "case 11" CASE11};
   ;;DOT CASE11 -> "rmt:send-receive" [label="call failed"];
   ;;DOT CASE11 -> "RESULT" [label="call succeeded"];
   ;; not on homehost, do server query
   (else (extras-case-11 *default-log-port* runremote cmd params attemptnum rid))))
    ;;DOT }

;; No Title 
;; Error: (vector-ref) out of range
;; #(#<condition: (exn type)> (#("db.scm:3740: regex#regexp" #f #f) #("db.scm:3739: regex#string-substitute" #f #f) #("db.scm:3738: base64#base64-decode" #f #f) #("db.scm:3737: z3#z3:decode-buffer" #f #f) #("db.scm:3736: with-input-from-string" #f #f) #("db.scm:3741: s11n#deserialize" #f #f) #("api.scm:374: api:execute-requests" #f #f) #("api.scm:139: call-with-current-continuation" #f #f) #("api.scm:139: with-exception-handler" #f #f) #("api.scm:139: ##sys#call-with-values" #f #f) #("api.scm:158: string->symbol" #f #f) #("api.scm:160: current-milliseconds" #f #f) #("api.scm:161: dbr:dbstruct-read-only" #f #f) #("api.scm:139: k15" #f #f) #("api.scm:139: g19" #f #f) #("api.scm:142: get-call-chain" #f #f)) #("get-test-info-by-id" (1102 507299)))
;; 6
;; 
;; 	Call history:
;; 
;; 	http-transport.scm:306: thread-terminate!	  
;; 	http-transport.scm:307: debug:print-info	  
;; 	common_records.scm:235: debug:debug-mode	  
;; 	rmt.scm:259: k587	  
;; 	rmt.scm:259: g591	  
;; 	rmt.scm:276: http-transport:server-dat-update-last-access	  
;; 	http-transport.scm:364: current-seconds	  
;; 	rmt.scm:282: debug:print-info	  
;; 	common_records.scm:235: debug:debug-mode	  
;; 	rmt.scm:283: mutex-unlock!	  
;; 	rmt.scm:287: extras-transport-succeded	  	<--
;; +-----------------------------------------------------------------------------+
;; | Exit Status    : 70  
;;  

;; bunch of small functions factored out of send-receive to make debug easier
;;

(define (extras-case-11 *default-log-port* runremote cmd params attemptnum rid)
  ;; (mutex-unlock! *rmt-mutex*)
  (debug:print-info 12 *default-log-port* "rmt:send-receive, case  9")
  ;; (mutex-lock! *rmt-mutex*)
  (let* ((conninfo (remote-conndat runremote))
  (let* (;; (conninfo (remote-conndat runremote))
	 (dat-in      (case (remote-transport runremote)
		     ((http) (condition-case ;; handling here has
					     ;; caused a lot of
					     ;; problems. However it
					     ;; is needed to deal with
					     ;; attemtped
					     ;; communication to
					     ;; servers that have gone
					     ;; away
			      (http-transport:client-api-send-receive 0 conninfo cmd params)
                              ((servermismatch)  (vector #f "Server id mismatch" ))
			      ((commfail)(vector #f "communications fail"))
			      ((exn)(vector #f "other fail" (print-call-chain)))))
	 (dat-in  (condition-case ;; handling here has
			     ;; caused a lot of
			     ;; problems. However it
			     ;; is needed to deal with
			     ;; attemtped
			     ;; communication to
			     ;; servers that have gone
			     ;; away
			     (http-transport:client-api-send-receive 0 runremote cmd params)
			     ;; (http-transport:client-api-send-receive 0 conninfo cmd params runremote)
			     ((servermismatch)  (vector #f "Server id mismatch" ))
			     ((commfail)(vector #f "communications fail"))
			     ((exn)(vector #f "other fail" (print-call-chain)))))
		     (else
		      (debug:print 0 *default-log-port* "ERROR: transport " (remote-transport runremote) " not supported")
		      (exit))))
	 (dat      (if (and (vector? dat-in) ;; ... check it is a correct size
			    (> (vector-length dat-in) 1))
		       dat-in
		       (vector #f (conc "communications fail (type 2), dat-in=" dat-in))))
	 (success  (if (vector? dat) (vector-ref dat 0) #f))
	 (res      (if (vector? dat) (vector-ref dat 1) #f)))
    (if (and (remote? runremote)
    (if (and (vector? conninfo) (< 5 (vector-length conninfo)))
	(http-transport:server-dat-update-last-access conninfo) ;; refresh access time
	     (remote-api-url runremote)) ;; (and (vector? conninfo) (< 5 (vector-length conninfo)))
	(remote-last-access-set! runremote (current-seconds)) ;; refresh access time
	(begin
	  (debug:print 0 *default-log-port* "INFO: Should not get here! conninfo=" conninfo)
	  (set! conninfo #f)
	  (debug:print 0 *default-log-port* "INFO: Should not get here! runremote="(remote->alist runremote))
	  ;; (set! conninfo #f)
	  (remote-conndat-set! *runremote* #f) ;; NOTE: *runremote* is global copy of runremote. Purpose: factor out global.
	  (http-transport:close-connections  area-dat: runremote)))
    (debug:print-info 13 *default-log-port* "rmt:send-receive, case  9. conninfo=" conninfo " dat=" dat " runremote = " runremote)
	  (http-transport:close-connections runremote)))
    (debug:print-info 13 *default-log-port* "rmt:send-receive, case  9. runremote=" (remote->alist runremote) " dat=" dat " runremote = " runremote)
    (mutex-unlock! *rmt-mutex*)
    (if success ;; success only tells us that the transport was
	;; successful, have to examine the data to see if
	;; there was a detected issue at the other end
	(extras-transport-succeded *default-log-port* *rmt-mutex* attemptnum runremote res params rid cmd)
	(begin
           (debug:print-error 0 *default-log-port* " dat=" dat) 
390
391
392
393
394
395
396
397

398
399
400
401
402
403
404
351
352
353
354
355
356
357

358
359
360
361
362
363
364
365







-
+







			     (loop (car tal)(cdr tal) newmax-cmd currmax)))))))
    (mutex-unlock! *db-stats-mutex*)
    res))

(define (rmt:open-qry-close-locally cmd run-id params #!key (remretries 5))
  (let* ((qry-is-write    (not (member cmd api:read-only-queries)))
	 (db-file-path    (db:dbfile-path)) ;;  0))
	 (dbstructs-local (db:setup #t))  ;; make-dbr:dbstruct path:  dbdir local: #t)))
	 (dbstructs-local (db:setup #t))
	 (read-only       (not (file-write-access? db-file-path)))
	 (start           (current-milliseconds))
	 (resdat          (if (not (and read-only qry-is-write))
			      (let ((v (api:execute-requests dbstructs-local (vector (symbol->string cmd) params))))
			;;	(handle-exceptions ;; there has been a long history of receiving strange errors from values returned by the client when things go wrong..
			;;	 exn               ;;  This is an attempt to detect that situation and recover gracefully
			;;	 (begin
430
431
432
433
434
435
436
437

438
439
440
441
442
443
444

445
446
447
448
449
450
451
391
392
393
394
395
396
397

398
399






400
401
402
403
404
405
406
407







-
+

-
-
-
-
-
-
+







	  (if qry-is-write
	      (let ((start-time (current-seconds)))
		(mutex-lock! *db-multi-sync-mutex*)
/		(set! *db-last-access* start-time)  ;; THIS IS PROBABLY USELESS? (we are on a client)
                (mutex-unlock! *db-multi-sync-mutex*)))))
    res))

(define (rmt:send-receive-no-auto-client-setup connection-info cmd run-id params)
(define (rmt:send-receive-no-auto-client-setup runremote cmd run-id params)
  (let* ((run-id   (if run-id run-id 0))
	 (res  	   ;; (handle-exceptions
		   ;;     exn
		   ;;   (begin
		   ;;     (print "transport failed. exn=" exn)
		   ;;     #f)
		     (http-transport:client-api-send-receive run-id connection-info cmd params))) ;; )
	 (res  	   (http-transport:client-api-send-receive run-id runremote cmd params)))
    (if (and res (vector-ref res 0))
	(vector-ref res 1) ;;; YES!! THIS IS CORRECT!! CHANGE IT HERE, THEN CHANGE rmt:send-receive ALSO!!!
	#f)))

;;======================================================================
;;
;; A C T U A L   A P I   C A L L S  
468
469
470
471
472
473
474
475

476
477

478
479
480
481
482
483
484
485
486
424
425
426
427
428
429
430

431


432


433
434
435
436
437
438
439







-
+
-
-
+
-
-








(define (rmt:login run-id)
  (rmt:send-receive 'login run-id (list *toppath* megatest-version (client:get-signature))))

;; This login does no retries under the hood - it acts a bit like a ping.
;; Deprecated for nmsg-transport.
;;
(define (rmt:login-no-auto-client-setup connection-info)
(define (rmt:login-no-auto-client-setup runremote)
  (case *transport-type* ;; run-id of 0 is just a placeholder
    ((http)(rmt:send-receive-no-auto-client-setup connection-info 'login 0 (list *toppath* megatest-version (client:get-signature))))
  (rmt:send-receive-no-auto-client-setup runremote 'login 0 (list *toppath* megatest-version (client:get-signature))))
    ;;((nmsg)(nmsg-transport:client-api-send-receive run-id connection-info 'login (list *toppath* megatest-version run-id *my-client-signature*)))
    ))

;; hand off a call to one of the db:queries statements
;; added run-id to make looking up the correct db possible 
;;
(define (rmt:general-call stmtname run-id . params)
  (rmt:send-receive 'general-call run-id (append (list stmtname run-id) params)))

517
518
519
520
521
522
523

524
525
526

527
528
529
530
531
532
533
470
471
472
473
474
475
476
477
478
479

480
481
482
483
484
485
486
487







+


-
+







  (rmt:send-receive 'get-tests-tags #f '()))

;;======================================================================
;;  K E Y S 
;;======================================================================

;; These require run-id because the values come from the run!
;; however the query must still apply to main.db
;;
(define (rmt:get-key-val-pairs run-id)
  (rmt:send-receive 'get-key-val-pairs run-id (list run-id)))
  (rmt:send-receive 'get-key-val-pairs #f (list run-id)))

(define (rmt:get-keys)
  (if *db-keys* *db-keys* 
     (let ((res (rmt:send-receive 'get-keys #f '())))
       (set! *db-keys* res)
       res)))

546
547
548
549
550
551
552
553

554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581



582
583
584
585
586
587
588
500
501
502
503
504
505
506

507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525


526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543







-
+


















-
-








+
+
+







        res)))

(define (rmt:get-targets)
  (rmt:send-receive 'get-targets #f '()))

(define (rmt:get-target run-id)
  (assert (number? run-id) "FATAL: Run id required.")
  (rmt:send-receive 'get-target run-id (list run-id)))
  (rmt:send-receive 'get-target #f (list run-id)))

(define (rmt:get-run-times runpatt targetpatt)
  (rmt:send-receive 'get-run-times #f (list runpatt targetpatt ))) 


;;======================================================================
;;  T E S T S
;;======================================================================

;; Just some syntatic sugar
(define (rmt:register-test run-id test-name item-path)
  (assert (number? run-id) "FATAL: Run id required.")
  (rmt:general-call 'register-test run-id run-id test-name item-path))

(define (rmt:get-test-id run-id testname item-path)
  (assert (number? run-id) "FATAL: Run id required.")
  (rmt:send-receive 'get-test-id run-id (list run-id testname item-path)))

;; run-id is NOT used
;;
(define (rmt:get-test-info-by-id run-id test-id)
  (if (number? test-id)
      (rmt:send-receive 'get-test-info-by-id run-id (list run-id test-id))
      (begin
	(debug:print 0 *default-log-port* "WARNING: Bad data handed to rmt:get-test-info-by-id run-id=" run-id ", test-id=" test-id)
	(print-call-chain (current-error-port))
	#f)))

(define (rmt:get-test-state-status-by-id run-id test-id)
  (rmt:send-receive 'get-test-state-status-by-id run-id (list run-id test-id)))

(define (rmt:test-get-rundir-from-test-id run-id test-id)
  (rmt:send-receive 'test-get-rundir-from-test-id run-id (list run-id test-id)))

(define (rmt:open-test-db-by-test-id run-id test-id #!key (work-area #f))
  (assert (number? run-id) "FATAL: Run id required.")
  (let* ((test-path (if (string? work-area)
			work-area
827
828
829
830
831
832
833



834
835
836
837
838
839
840
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798







+
+
+







  (assert (number? run-id) "FATAL: Run id required.")
  (rmt:send-receive 'get-run-status #f (list run-id)))

(define (rmt:get-run-state run-id)
  (assert (number? run-id) "FATAL: Run id required.")
  (rmt:send-receive 'get-run-state #f (list run-id)))

(define (rmt:get-run-state-status run-id)
  (assert (number? run-id) "FATAL: Run id required.")
  (rmt:send-receive 'get-run-state-status #f (list run-id)))

(define (rmt:set-run-status run-id run-status #!key (msg #f))
  (assert (number? run-id) "FATAL: Run id required.")
  (rmt:send-receive 'set-run-status #f (list run-id run-status msg)))

(define (rmt:set-run-state-status run-id state status )
  (assert (number? run-id) "FATAL: Run id required.")
1042
1043
1044
1045
1046
1047
1048
1049
1050


1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061























1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074


1075
1076
1077
1078
1079
1080
1081
1000
1001
1002
1003
1004
1005
1006

1007
1008
1009











1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042



1043
1044
1045
1046
1047
1048
1049
1050
1051







-

+
+
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+










-
-
-
+
+







(define (rmt:test-set-archive-block-id run-id test-id archive-block-id)
  (assert (number? run-id) "FATAL: Run id required.")
  (rmt:send-receive 'test-set-archive-block-id run-id (list run-id test-id archive-block-id)))

(define (rmt:test-get-archive-block-info archive-block-id)
  (rmt:send-receive 'test-get-archive-block-info #f (list archive-block-id)))


(define (rmtmod:calc-ro-mode runremote *toppath*)
  (case (rmt:transport-mode)
    ((http)
  (if (and runremote
	   (remote-ro-mode-checked runremote))
      (remote-ro-mode runremote)
      (let* ((mtcfgfile  (conc *toppath* "/megatest.config"))
	     (ro-mode (not (file-write-access? mtcfgfile)))) ;; TODO: use dbstruct or runremote to figure this out in future
	(if runremote
	    (begin
	      (remote-ro-mode-set! runremote ro-mode)
	      (remote-ro-mode-checked-set! runremote #t)
	      ro-mode)
	    ro-mode))))
     (if (and runremote
	      (remote-ro-mode-checked runremote))
	 (remote-ro-mode runremote)
	 (let* ((mtcfgfile  (conc *toppath* "/megatest.config"))
		(ro-mode (not (file-write-access? mtcfgfile)))) ;; TODO: use dbstruct or runremote to figure this out in future
	   (if runremote
	       (begin
		 (remote-ro-mode-set! runremote ro-mode)
		 (remote-ro-mode-checked-set! runremote #t)
		 ro-mode)
	       ro-mode))))
    ((tcp)
     (if (and runremote
	      (tt-ro-mode-checked runremote))
	 (tt-ro-mode runremote)
	 (let* ((mtcfgfile  (conc *toppath* "/megatest.config"))
		(ro-mode (not (file-write-access? mtcfgfile)))) ;; TODO: use dbstruct or runremote to figure this out in future
	   (if runremote
	       (begin
		 (tt-ro-mode-set! runremote ro-mode)
		 (tt-ro-mode-checked-set! runremote #t)
		 ro-mode)
	       ro-mode))))))

(define (extras-readonly-mode rmt-mutex log-port cmd params)
  (mutex-unlock! rmt-mutex)
  (debug:print-info 12 log-port "rmt:send-receive, case 3")
  (debug:print 0 log-port "WARNING: write transaction requested on a readonly area.  cmd="cmd" params="params)
  #f)

(define (extras-transport-failed *default-log-port* *rmt-mutex* attemptnum runremote cmd rid params)
  (debug:print 0 *default-log-port* "WARNING: communication failed. Trying again, try num: " attemptnum)
  (mutex-lock! *rmt-mutex*)
  (remote-conndat-set!    runremote #f)
  (http-transport:close-connections area-dat: runremote)
  (remote-server-url-set! runremote #f)
  (http-transport:close-connections runremote)
  ;; (remote-server-url-set! runremote #f)
  (mutex-unlock! *rmt-mutex*)
  (debug:print-info 12 *default-log-port* "rmt:send-receive, case  9.1")
  (rmt:send-receive cmd rid params attemptnum: (+ attemptnum 1)))
  
(define (extras-transport-succeded *default-log-port* *rmt-mutex* attemptnum runremote res params rid cmd)
  (if (and (vector? res)
	   (eq? (vector-length res) 2)
1094
1095
1096
1097
1098
1099
1100
1101

1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1064
1065
1066
1067
1068
1069
1070

1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082







-
+











						 ;; server is
						 ;; overloaded and we
						 ;; want to ease off
						 ;; the queries
      (let ((wait-delay (+ attemptnum (* attemptnum 10))))
	(debug:print 0 *default-log-port* "WARNING: server is overloaded. Delaying " wait-delay " seconds and trying call again.")
	(mutex-lock! *rmt-mutex*)
	(http-transport:close-connections area-dat: runremote)
	(http-transport:close-connections runremote)
	(set! *runremote* #f) ;; force starting over
	(mutex-unlock! *rmt-mutex*)
	(thread-sleep! wait-delay)
	(rmt:send-receive cmd rid params attemptnum: (+ attemptnum 1)))
      res)) ;; All good, return res

#;(set-functions rmt:send-receive                       remote-server-url-set!
	       http-transport:close-connections	      remote-conndat-set!
	       debug:print                            debug:print-info
	       remote-ro-mode                         remote-ro-mode-set!
	       remote-ro-mode-checked-set!            remote-ro-mode-checked)

Modified runconfig.scm from [66b9c38588] to [7a53eaa476].

20
21
22
23
24
25
26


27
28
29
30
31
32
33
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35







+
+







;; to this run field1val/field2val/field3val ...
;;======================================================================

(use format directory-utils)

(declare (unit runconfig))
(declare (uses common))
(declare (uses commonmod))
(import commonmod)

(include "common_records.scm")

(define (runconfig:read fname target environ-patt)
  (let ((ht (make-hash-table)))
    (if target (hash-table-set! ht target '()))
    (read-config fname ht #t environ-patt: environ-patt sections: (if target (list "default" target) #f))))

Modified runs.scm from [52f98f2a96] to [9ed9863b9e].

20
21
22
23
24
25
26

27
28
29
30
31
32
33
34
35
36
37
38
39
40
41


42
43
44
45
46
47
48
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51







+















+
+







(use (prefix sqlite3 sqlite3:) srfi-1 posix regex regex-case srfi-69 (srfi 18) 
     posix-extras directory-utils pathname-expand typed-records format  sxml-serializer
     sxml-modifications matchable)

(declare (unit runs))
(declare (uses db))
(declare (uses common))
(declare (uses commonmod))
(declare (uses items))
(declare (uses runconfig))
(declare (uses tests))
(declare (uses server))
(declare (uses mt))
(declare (uses archive))
;; (declare (uses filedb))

(include "common_records.scm")
(include "key_records.scm")
(include "db_records.scm")
(include "run_records.scm")
(include "test_records.scm")

;; (include "debugger.scm")

(import commonmod)

;; use this struct to facilitate refactoring
;;

(defstruct runs:dat
  reglen regfull
  runname max-concurrent-jobs run-id
1277
1278
1279
1280
1281
1282
1283
1284

1285

1286
1287
1288








1289
1290
1291
1292
1293
1294
1295
1280
1281
1282
1283
1284
1285
1286

1287
1288
1289
1290
1291

1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306







-
+

+


-
+
+
+
+
+
+
+
+







			 ", "))
      (thread-sleep! 0.051)
      (list hed tal reg reruns))
     
     ;; If no resources are available just kill time and loop again
     ;;
     ((not have-resources) ;; simply try again after waiting a second
      (if (runs:lownoise "no resources" 60)
      (if (runs:lownoise "no resources" 600)
	  (debug:print-info 1 *default-log-port* "no resources to run new tests, waiting ..."))

      ;; Have gone back and forth on this but db starvation is an issue.
      ;; wait one second before looking again to run jobs.
      (thread-sleep! 0.25)
      ;; (thread-sleep! 0.25)
      
      ;; new logic.
      ;; If it has been more than 10 seconds since we were last here don't wait at all
      ;; otherwise sleep 2 seconds to give db a rest and let dashboard read data
      (if (runs:lownoise "frequent-no-resources" 10)
	  (thread-sleep! 0.25) ;; no significant delay
	  (thread-sleep! 2))
      ;; could have done hed tal here but doing car/cdr of newtal to rotate tests
      (list (car newtal)(cdr newtal) reg reruns))
     
     ;; This is the final stage, everything is in place so launch the test
     ;;
     ((and have-resources
	   (or (null? prereqs-not-met)
1773
1774
1775
1776
1777
1778
1779
1780

1781
1782
1783
1784
1785
1786
1787
1784
1785
1786
1787
1788
1789
1790

1791
1792
1793
1794
1795
1796
1797
1798







-
+







		 (let* ((jobgroup              (runs:testdat-jobgroup testdat-in))
			(can-run-more-tests    (runs:dat-can-run-more-tests runsdat))
			(last-jobs-check-time  (runs:dat-last-jobs-check-time runsdat))
			(should-check-jobs     (match can-run-more-tests
						 ((can-run-more-flag num-running nr-in-jobgroup max-concurrent-jobs . params)
						  (if (< (- max-concurrent-jobs num-running) 25)
						      (begin
							(debug:print-info 0 *default-log-port*
							(debug:print-info 2 *default-log-port*
									  "less than 20 jobs headroom, ("max-concurrent-jobs
									  "-"num-running")>20. Forcing prelaunch check.")
							#t)
						      #f))
						 (else #f)))) ;; no record yet
		   (if should-check-jobs
		       (let loop-can-run-more
1853
1854
1855
1856
1857
1858
1859

1860

1861
1862
1863
1864
1865
1866
1867
1864
1865
1866
1867
1868
1869
1870
1871

1872
1873
1874
1875
1876
1877
1878
1879







+
-
+







                                   (my-item-path (item-list->path my-itemdat))

                                   (newtestname (db:test-make-full-name hed my-item-path)))    ;; test names are unique on testname/item-path
                              (tests:testqueue-set-items!     new-test-record #f)
                              (tests:testqueue-set-itemdat!   new-test-record my-itemdat)
                              (tests:testqueue-set-item_path! new-test-record my-item-path)
                              (hash-table-set! test-records newtestname new-test-record)
			      ;; BUG: This next line sucks up a lot of horsepower
                              (set! tal (append tal (list newtestname)))))  ;; since these are itemized create new test names testname/itempath
			      (set! tal (append tal (list newtestname)))))  ;; since these are itemized create new test names testname/itempath
                          items-in-testpatt)))
          
          

	  ;; At this point we have possibly added items to tal but all must be handed off to 
	  ;; INNER COND logic. I think loop without rotating the queue 
	  ;; (loop hed tal reg reruns))

Modified server.scm from [167f3b570d] to [4124af2653].

63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
63
64
65
66
67
68
69











70
71
72
73
74
75
76







-
-
-
-
-
-
-
-
-
-
-







;;======================================================================
;; S E R V E R
;;======================================================================

;; Call this to start the actual server
;;

;; all routes though here end in exit ...
;;
;; start_server
;;
(define (server:launch run-id transport-type)
  (case transport-type
    ((http)(http-transport:launch))
    ;;((nmsg)(nmsg-transport:launch run-id))
    ;;((rpc)  (rpc-transport:launch run-id))
    (else (debug:print-error 0 *default-log-port* "unknown server type " transport-type))))

;;======================================================================
;; S E R V E R   U T I L I T I E S 
;;======================================================================

;; Get the transport
(define (server:get-transport)
  (if *transport-type*
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130














131
132
133
134
135
136
137
138
139
140
141
142
143
144
145

146
147
148
149
150
151
152
99
100
101
102
103
104
105














106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126








127
128
129
130
131
132
133
134







-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+







-
-
-
-
-
-
-
-
+








(define (server:get-server-id)
  (if *server-id* *server-id*
      (let ((sig (server:mk-signature))) ;; clients re-use the server:mk-signature logic
        (set! *server-id* sig)
        *server-id*)))

;; When using zmq this would send the message back (two step process)
;; with spiffy or rpc this simply returns the return data to be returned
;; 
(define (server:reply return-addr query-sig success/fail result)
  (debug:print-info 11 *default-log-port* "server:reply return-addr=" return-addr ", result=" result)
  ;; (send-message pubsock target send-more: #t)
  ;; (send-message pubsock 
  (case (server:get-transport)
    ((rpc)  (db:obj->string (vector success/fail query-sig result)))
    ((http) (db:obj->string (vector success/fail query-sig result)))
    ((fs)   result)
    (else 
     (debug:print-error 0 *default-log-port* "unrecognised transport type: " *transport-type*)
     result)))
;; ;; When using zmq this would send the message back (two step process)
;; ;; with spiffy or rpc this simply returns the return data to be returned
;; ;; 
;; (define (server:reply return-addr query-sig success/fail result)
;;   (debug:print-info 11 *default-log-port* "server:reply return-addr=" return-addr ", result=" result)
;;   ;; (send-message pubsock target send-more: #t)
;;   ;; (send-message pubsock 
;;   (case (server:get-transport)
;;     ((rpc)  (db:obj->string (vector success/fail query-sig result)))
;;     ((http) (db:obj->string (vector success/fail query-sig result)))
;;     ((fs)   result)
;;     (else 
;;      (debug:print-error 0 *default-log-port* "unrecognised transport type: " *transport-type*)
;;      result)))

;; Given an area path,  start a server process    ### NOTE ### > file 2>&1 
;; if the target-host is set 
;; try running on that host
;;   incidental: rotate logs in logs/ dir.
;;
(define  (server:run areapath) ;; areapath is *toppath* for a given testsuite area
  (let* (;; (curr-host   (get-host-name))
         ;; (attempt-in-progress (server:start-attempted? areapath))
         ;; (dot-server-url (server:check-if-running areapath))
	 ;; (curr-ip     (server:get-best-guess-address curr-host))
	 ;; (curr-pid    (current-process-id))
	 ;; (homehost    (server:get-homehost)) ;; configf:lookup *configdat* "server" "homehost" ))
	 ;; (target-host (car homehost))
	 (testsuite   (common:get-testsuite-name))
  (let* ((testsuite   (common:get-testsuite-name))
	 (logfile     (conc areapath "/logs/server.log")) ;; -" curr-pid "-" target-host ".log"))
	 (profile-mode (or (configf:lookup *configdat* "misc" "profilesw")
			   ""))
	 (cmdln (conc (common:get-megatest-exe)
		      " -server - ";; (or target-host "-")
		      (if (equal? (configf:lookup *configdat* "server" "daemonize") "yes")
			  " -daemonize "
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211



















212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230

















231
232
233
234
235
236
237
170
171
172
173
174
175
176

















177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197

















198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221







-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+


-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+








(define (server:logf-get-start-info logf)
  (let ((server-rx    (regexp "^SERVER STARTED: (\\S+):(\\d+) AT ([\\d\\.]+) server-id: (\\S+) pid: (\\d+)")) ;; SERVER STARTED: host:port AT timesecs server id
        (dbprep-rx    (regexp "^SERVER: dbprep"))
        (dbprep-found 0)
	(bad-dat      (list #f #f #f #f #f)))
    (handle-exceptions
	exn
      (begin
	(debug:print-info 0 *default-log-port* "Unable to get server info from " logf ", exn=" exn)
	bad-dat) ;; no idea what went wrong, call it a bad server
      (with-input-from-file
	  logf
	(lambda ()
	  (let loop ((inl  (read-line))
		     (lnum 0))
	    (if (not (eof-object? inl))
		(let ((mlst (string-match server-rx inl))
                      (dbprep (string-match dbprep-rx inl)))
                  (if dbprep (set! dbprep-found 1))
		  (if (not mlst)
		      (if (< lnum 500) ;; give up if more than 500 lines of server log read
			  (loop (read-line)(+ lnum 1))
			  (begin 
     exn
     (begin
       ;; WARNING: this is potentially dangerous to blanket ignore the errors
       (if (file-exists? logf)
	   (debug:print-info 2 *default-log-port* "Unable to get server info from "logf", exn=" exn))
       bad-dat) ;; no idea what went wrong, call it a bad server
     (with-input-from-file
	 logf
       (lambda ()
	 (let loop ((inl  (read-line))
		    (lnum 0))
	   (if (not (eof-object? inl))
	       (let ((mlst (string-match server-rx inl))
		     (dbprep (string-match dbprep-rx inl)))
		 (if dbprep (set! dbprep-found 1))
		 (if (not mlst)
		     (if (< lnum 500) ;; give up if more than 500 lines of server log read
			 (loop (read-line)(+ lnum 1))
			 (begin 
                           (debug:print-info 0 *default-log-port* "Unable to get server info from first 500 lines of " logf )
                           bad-dat))
		      (match mlst
			((_ host port start server-id pid)
			 (list host
			       (string->number port)
			       (string->number start)
			       server-id
			       (string->number pid)))
			(else
			 (debug:print 0 *current-log-port* "ERROR: did not recognise SERVER line info "mlst)
			 bad-dat))))
                (begin 
                  (if dbprep-found
                      (begin
                         (debug:print-info 2 *default-log-port* "Server is in dbprep at " (common:human-time))
                         (thread-sleep! 0.5)) ;; was 25 sec but that blocked things from starting?
                      (debug:print-info 0 *default-log-port* "Unable to get server info from " logf " at " (seconds->time-string (current-seconds))))
		  bad-dat))))))))
		     (match mlst
			    ((_ host port start server-id pid)
			     (list host
				   (string->number port)
				   (string->number start)
				   server-id
				   (string->number pid)))
			    (else
			     (debug:print 0 *current-log-port* "ERROR: did not recognise SERVER line info "mlst)
			     bad-dat))))
	       (begin 
		 (if dbprep-found
		     (begin
		       (debug:print-info 2 *default-log-port* "Server is in dbprep at " (common:human-time))
		       (thread-sleep! 0.5)) ;; was 25 sec but that blocked things from starting?
		     (debug:print-info 0 *default-log-port* "Unable to get server info from " logf " at " (seconds->time-string (current-seconds))))
		 bad-dat))))))))

;; ;; get a list of servers from the log files, with all relevant data
;; ;; ( mod-time host port start-time pid )
;; ;;
;; (define (server:get-list areapath #!key (limit #f))
;;   (let ((fname-rx    (regexp "^(|.*/)server-(\\d+)-(\\S+).log$"))
;; 	(day-seconds (* 24 60 60)))
417
418
419
420
421
422
423

424

425
426
427
428
429
430
431
432
433
434
435
436
437

438
439

440
441






























442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457














458
459
460

461
462
463
464
465


466
467
468
469
470
471
472
473
474
475
476






477
478
479
480
481
482
483
484
485
486
487
488
489


490
491
492
493

494
495
496
497



498
499
500
501
502
503

504

505
506
507
508




























509
510
511

512


513
514
515
516
517
518
519
520






521

522
523
524
525
526
527
528
401
402
403
404
405
406
407
408

409
410
411
412
413
414
415
416
417
418
419
420
421

422
423

424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488

489
490
491
492
493
494
495
496
497
498
499
500
501
502
503




504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520


521
522
523
524
525

526
527



528
529
530
531
532
533
534
535
536
537

538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574

575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590

591
592
593
594
595
596
597
598







+
-
+












-
+

-
+


+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
















+
+
+
+
+
+
+
+
+
+
+
+
+
+


-
+





+
+







-
-
-
-
+
+
+
+
+
+











-
-
+
+



-
+

-
-
-
+
+
+






+
-
+




+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+



+
-
+
+








+
+
+
+
+
+
-
+







		 (thread-sleep! ( + 1 idletime))
		 (server:wait-for-server-start-last-flag areapath)))))))

;; oldest server alive determines host then choose random of youngest
;; five servers on that host
;;
(define (server:get-servers-info areapath)
  ;; (assert *toppath* "FATAL: server:get-servers-info called before *toppath* has been set.")
  (let* ((servinfodir (conc *toppath*"/.servinfo")))
  (let* ((servinfodir (server:get-servinfo-dir areapath))) ;; (conc *toppath*"/.servinfo")))
    (if (not (file-exists? servinfodir))
	(create-directory servinfodir))
    (let* ((allfiles    (glob (conc servinfodir"/*")))
	   (res         (make-hash-table)))
      (for-each
       (lambda (f)
	 (let* ((hostport  (pathname-strip-directory f))
		(serverdat (server:logf-get-start-info f)))
	   (match serverdat
	     ((host port start server-id pid)
	      (if (and host port start server-id pid)
		  (hash-table-set! res hostport serverdat)
		  (debug:print-info 0 *default-log-port* "bad server info for "f": "serverdat)))
		  (debug:print-info 2 *default-log-port* "bad server info for "f": "serverdat)))
	     (else
	      (debug:print-info 0 *default-log-port* "bad server info for "f": "serverdat)))))
	      (debug:print-info 2 *default-log-port* "bad server info for "f": "serverdat)))))
       allfiles)
      res)))

;; check the .servinfo directory, are there other servers running on this
;; or another host?
;;
;; returns #t => ok to start another server
;;         #f => not ok to start another server
;;
(define (server:minimal-check areapath)
  (server:clean-up-old areapath)
  (let* ((srvdir      (server:get-servinfo-dir areapath)) ;; (conc areapath"/.servinfo"))
	 (servrs      (glob (conc srvdir"/*")))
	 (thishostip  (server:get-best-guess-address (get-host-name)))
	 (thisservrs  (glob (conc srvdir"/"thishostip":*")))
	 (homehostinf (server:choose-server areapath 'homehost))
	 (havehome    (car homehostinf))
	 (wearehome   (cdr homehostinf)))
    (debug:print-info 0 *default-log-port* thishostip", have homehost: "havehome", we are homehost: "wearehome
		      ", numservers: "(length thisservrs))
    (cond
     ((not havehome) #t) ;; no homehost yet, go for it
     ((and havehome wearehome (< (length thisservrs) 20)) #t) ;; we are home and less than 20 servers, ok to start another
     ((and havehome (not wearehome)) #f)     ;; we are not the home host
     ((and havehome wearehome (>= (length thisservrs) 20)) #f) ;; have enough running
     (else
      (debug:print 0 *default-log-port* "WARNING: Unrecognised scenario, servrs="servrs", thishostip="thishostip", thisservrs="thisservrs)
      #t))))
	 

(define server-last-start 0)


;; oldest server alive determines host then choose random of youngest
;; five servers on that host
;;
;; mode:
;;   best - get best server (random of newest five)
;;   home - get home host based on oldest server
;;   info - print info
(define (server:choose-server areapath #!optional (mode 'best))
  ;; age is current-starttime
  ;; find oldest alive
  ;;   1. sort by age ascending and ping until good
  ;; find alive rand from youngest
  ;;   1. sort by age descending
  ;;   2. take five
  ;;   3. check alive, discard if not and repeat
  ;; first we clean up old server files
  (assert (eq? (rmt:transport-mode) 'http) "FATAL: server:run called with rmt:transport-mode="(rmt:transport-mode))
  (server:clean-up-old areapath)
  (let* ((since-last (- (current-seconds) server-last-start))
        (server-start-delay 10))     
    (if ( < (- (current-seconds) server-last-start) 10 )
      (begin
        (debug:print 2 *default-log-port* "server:choose-server: seconds since last server start: " (- (current-seconds) server-last-start))
        (debug:print 2 *default-log-port* "server:choose-server: last server start less than " server-start-delay " seconds ago. Sleeping " server-start-delay " seconds")
        (thread-sleep! server-start-delay)
      )
      (debug:print 2 *default-log-port* "server:choose-server: seconds since last server start: " (- (current-seconds) server-last-start))
    )
  )
  (let* ((serversdat  (server:get-servers-info areapath))
	 (servkeys    (hash-table-keys serversdat))
	 (by-time-asc (if (not (null? servkeys))
	 (by-time-asc (if (not (null? servkeys)) ;; NOTE: Oldest is last
			  (sort servkeys ;; list of "host:port"
				(lambda (a b)
				  (>= (list-ref (hash-table-ref serversdat a) 2)
				      (list-ref (hash-table-ref serversdat b) 2))))
			  '())))
    (debug:print 2 *default-log-port* "server:choose-server: serversdat: " serversdat)
    (debug:print 2 *default-log-port* "server:choose-server: servkeys: " servkeys)
    (if (not (null? by-time-asc))
	(let* ((oldest     (last by-time-asc))
	       (oldest-dat (hash-table-ref serversdat oldest))
	       (host       (list-ref oldest-dat 0))
	       (all-valid  (filter (lambda (x)
				     (equal? host (list-ref (hash-table-ref serversdat x) 0)))
				   by-time-asc))
	       (best-five  (lambda ()
			     (if (> (length all-valid) 5)
				 (take all-valid 5)
				 all-valid)))
	       (best-ten  (lambda ()
			     (if (> (length all-valid) 11)
				 (take (drop-right all-valid 1) 10) ;; remove the oldest from consideration so it can age out
				 (if (> (length all-valid) 8)
				     (drop-right all-valid 1)
				     all-valid))))
	       (names->dats (lambda (names)
			      (map (lambda (x)
				     (hash-table-ref serversdat x))
				   names)))
	       (am-home?    (lambda ()
			      (let* ((currhost (get-host-name))
				     (bestadrs (server:get-best-guess-address currhost)))
				(or (equal? host currhost)
				    (equal? host bestadrs))))))
	  (case mode
	    ((info)
	     (print "oldest: "oldest-dat", selected host: "host", all-valid: "all-valid)
	     (print "youngest: "(hash-table-ref serversdat (car all-valid))))
	     (debug:print 0 *default-log-port* "oldest: "oldest-dat", selected host: "host", all-valid: "all-valid)
	     (debug:print 0 *default-log-port* "youngest: "(hash-table-ref serversdat (car all-valid))))
	    ((home)     host)
	    ((homehost) (cons host (am-home?))) ;; shut up old code
	    ((home?)    (am-home?))
	    ((best-five)(names->dats (best-five)))
	    ((best-ten)(names->dats (best-ten)))
	    ((all-valid)(names->dats all-valid))
	    ((best)     (let* ((best-five (best-five))
			       (len       (length best-five)))
			  (hash-table-ref serversdat (list-ref best-five (random len)))))
	    ((best)     (let* ((best-ten (best-ten))
			       (len       (length best-ten)))
			  (hash-table-ref serversdat (list-ref best-ten (random len)))))
	    ((count)(length all-valid))
	    (else
	     (debug:print 0 *default-log-port* "ERROR: invalid command "mode)
	     #f)))
	(begin
	  (server:run areapath)
          (set! server-last-start (current-seconds))
	  (thread-sleep! 3)
	  ;; (thread-sleep! 3)
	  (case mode
	    ((homehost) (cons #f #f))
	    (else	#f))))))

(define (server:get-servinfo-dir areapath)
  (let* ((spath (conc areapath"/.servinfo")))
    (if (not (file-exists? spath))
	(create-directory spath #t))
    spath))

(define (server:clean-up-old areapath)
  ;; any server file that has not been touched in ten minutes is effectively dead
  (let* ((sfiles (glob (conc (server:get-servinfo-dir areapath)"/*"))))
    (for-each
     (lambda (sfile)
       (let* ((modtime (handle-exceptions
			   exn
			 (begin
			   (debug:print 0 *default-log-port* "WARNING: failed to get modification file for "sfile)
			   (current-seconds))
			 (file-modification-time sfile))))
	 (if (and (number? modtime)
		  (> (- (current-seconds) modtime)
		     600))
	     (begin
	       (debug:print 0 *default-log-port* "WARNING: found old server info file "sfile", removing it.")
	       (handle-exceptions
		   exn
		 (debug:print 0 *default-log-port* "WARNING: failed to delete old server info file "sfile)
		 (delete-file sfile))))))
     sfiles)))

;; would like to eventually get rid of this
;;
(define (common:on-homehost?)
  (if (eq? (rmt:transport-mode) 'http)
  (server:choose-server *toppath* 'home?))
      (server:choose-server *toppath* 'home?)
      #t)) ;; there is no homehost for tcp and nfs is always on home so #t should work

;; kind start up of server, wait before allowing another server for a given
;; area to be launched
;;
(define (server:kind-run areapath)
  ;; look for $MT_RUN_AREA_HOME/logs/server-start-last
  ;; and wait for it to be at least <server idletime> seconds old
  ;; (server:wait-for-server-start-last-flag areapath)
  (let loop ()
    (if (> (alist-ref 'adj-proc-load (common:get-normalized-cpu-load #f)) 2)
	(begin
	  (if (common:low-noise-print 30 "our-host-load")
	      (debug:print 0 *default-log-port* "WARNING: system load is high, waiting to start server."))
	  (loop))))
  (if (< (server:choose-server areapath 'count) 10)
  (if (< (server:choose-server areapath 'count) 20)
      (server:run areapath))
  #;(if (not (server:check-if-running areapath)) ;; why try if there is already a server running?
      (let* ((lock-file    (conc areapath "/logs/server-start.lock")))
	(let* ((start-flag (conc areapath "/logs/server-start-last")))
	  (common:simple-file-lock-and-wait lock-file expire-time: 25)
	  (debug:print-info  2 *default-log-port* "server:kind-run: touching " start-flag)
	  (system (conc "touch " start-flag)) ;; lazy but safe
536
537
538
539
540
541
542

543

544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560

561
562
563
564
565
566
567
606
607
608
609
610
611
612
613

614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630

631
632
633
634
635
636
637
638







+
-
+
















-
+







(define (server:start-and-wait areapath #!key (timeout 60))
  (let ((give-up-time (+ (current-seconds) timeout)))
    (let loop ((server-info (server:check-if-running areapath))
	       (try-num    0))
      (if (or server-info
	      (> (current-seconds) give-up-time)) ;; server-url will be #f if no server available.
	  (server:record->url server-info)
	  (let* ( (servers (server:choose-server areapath 'all-valid))
	  (let ((num-ok (length (server:choose-server areapath 'all-valid))))
                (num-ok (if servers (length (server:choose-server areapath 'all-valid)) 0)))
	    (if (and (> try-num 0)  ;; first time through simply wait a little while then try again
		     (< num-ok 1))  ;; if there are no decent candidates for servers then try starting a new one
		(server:run areapath))
	    (thread-sleep! 5)
	    (loop (server:check-if-running areapath)
		  (+ try-num 1)))))))

(define (server:get-num-servers #!key (numservers 2))
  (let ((ns (string->number
	     (or (configf:lookup *configdat* "server" "numservers") "notanumber"))))
    (or ns numservers)))

;; no longer care if multiple servers are started by accident. older servers will drop off in time.
;;
(define (server:check-if-running areapath) ;;  #!key (numservers "2"))
  (let* ((ns            (server:get-num-servers)) ;; get the setting the for maximum number of servers allowed
	 (servers       (server:choose-server areapath 'best-five))) ;; (server:get-best (server:get-list areapath))))
	 (servers       (server:choose-server areapath 'best-ten))) ;; (server:get-best (server:get-list areapath))))
    (if (or (and servers
		 (null? servers))
	    (not servers))
	    ;; (and (list? servers)
	    ;;	 (< (length servers) (+ 1 (random ns))))) ;; somewhere between 1 and numservers
        #f
        (let loop ((hed (car servers))
585
586
587
588
589
590
591
592

593
594
595
596
597
598
599
600
601

602
603
604
605
606
607
608
609
610
611
612
613
614
615
616








617
618
619


620
621

622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638






















639
640
641
642
643
644
645
656
657
658
659
660
661
662

663
664
665
666
667
668
669
670
671

672















673
674
675
676
677
678
679
680



681
682


683

















684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712







-
+








-
+
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
-
-
-
+
+
-
-
+
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+








(define (server:kill servr)
  (handle-exceptions
    exn
    (begin 
      (debug:print-info 0 *default-log-port*  "Unable to get host and/or port from " servr ", exn=" exn)     
    #f)
  (match-let (((mod-time hostname port start-time server-id pid)
  (match-let (((hostname port start-time server-id pid)
	       servr))
    (tasks:kill-server hostname pid))))

;; called in megatest.scm, host-port is string hostname:port
;;
;; NOTE: This is NOT called directly from clients as not all transports support a client running
;;       in the same process as the server.
;;
(define (server:ping host-port-in server-id #!key (do-exit #f))
(define (server:ping host:port server-id #!key (do-exit #f))
  (let ((host:port (if (not host-port-in) ;; use read-dotserver to find
		       #f ;; (server:check-if-running *toppath*)
		;; (if (number? host-port-in) ;; we were handed a server-id
		;; 	   (let ((srec (tasks:get-server-by-id (db:delay-if-busy (tasks:open-db)) host-port-in)))
		;; 	     ;; (print "srec: " srec " host-port-in: " host-port-in)
		;; 	     (if srec
		;; 		 (conc (vector-ref srec 3) ":" (vector-ref srec 4))
		;; 		 (conc "no such server-id " host-port-in)))
		       host-port-in))) ;; )
    (let* ((host-port (if host:port
			  (let ((slst (string-split   host:port ":")))
			    (if (eq? (length slst) 2)
				(list (car slst)(string->number (cadr slst)))
				#f))
			  #f)))
  (let* ((host-port (cond
		     ((string? host:port)
		      (let ((slst (string-split   host:port ":")))
			(if (eq? (length slst) 2)
			    (list (car slst)(string->number (cadr slst)))
			    #f)))
		     (else
		      #f))))
;;	   (toppath       (launch:setup)))
      ;; (print "host-port=" host-port)
      (if (not host-port)
    (cond
     ((and (list? host-port)
	  (begin
	    (if host-port-in
	   (eq? (length host-port) 2))
		(debug:print 0 *default-log-port*  "ERROR: bad host:port"))
	    (if do-exit (exit 1))
	    #f)
	  (let* ((iface      (car host-port))
		 (port       (cadr host-port))
		 (server-dat (http-transport:client-connect iface port server-id))
		 (login-res  (rmt:login-no-auto-client-setup server-dat)))
	    (if (and (list? login-res)
		     (car login-res))
		(begin
		  ;; (print "LOGIN_OK")
		  (if do-exit (exit 0))
		  #t)
		(begin
		  ;; (print "LOGIN_FAILED")
		  (if do-exit (exit 1))
		  #f)))))))
      (let* ((myrunremote (make-and-init-remote *toppath*))
	     (iface       (car host-port))
	     (port        (cadr host-port))
	     (server-dat  (client:connect iface port server-id myrunremote))
	     (login-res   (rmt:login-no-auto-client-setup myrunremote)))
	(http-transport:close-connections myrunremote)
	(if (and (list? login-res)
		 (car login-res))
	    (begin
	      ;; (print "LOGIN_OK")
	      (if do-exit (exit 0))
	      #t)
	    (begin
	      ;; (print "LOGIN_FAILED")
	      (if do-exit (exit 1))
	      #f))))
     (else 
      (if host:port
	  (debug:print 0 *default-log-port*  "ERROR: bad host:port "host:port))
      (if do-exit
	  (exit 1)
	  #f)))))

;; run ping in separate process, safest way in some cases
;;
(define (server:ping-server ifaceport)
  (with-input-from-pipe 
   (conc (common:get-megatest-exe) " -ping " ifaceport)
   (lambda ()
666
667
668
669
670
671
672
673

674
675
676
677
678
679
680
733
734
735
736
737
738
739

740
741
742
743
744
745
746
747







-
+







;; Default is 60 seconds.
;;
(define (server:expiration-timeout)
  (let ((tmo (configf:lookup *configdat* "server" "timeout")))
    (if (and (string? tmo)
	     (common:hms-string->seconds tmo)) ;; BUG: hms-string->seconds is broken, if given "10" returns 0. Also, it doesn't belong in this logic unless the string->number is changed below
        (* 3600 (string->number tmo))
	60)))
	600)))

(define (server:get-best-guess-address hostname)
  (let ((res #f))
    (for-each 
     (lambda (adr)
       (if (not (eq? (u8vector-ref adr 0) 127))
	   (set! res adr)))

Modified subrun.scm from [8e4ec606e5] to [4fc2a9b685].

21
22
23
24
25
26
27

28
29
30
31
32
33
34


35
36
37
38
39
40
41
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44







+







+
+







(use (prefix sqlite3 sqlite3:) srfi-1 posix regex regex-case srfi-69 (srfi 18) 
     posix-extras directory-utils pathname-expand typed-records format
     call-with-environment-variables)
(declare (unit subrun))
;;(declare (uses runs))
(declare (uses db))
(declare (uses common))
(declare (uses commonmod))
;;(declare (uses items))
;;(declare (uses runconfig))
;;(declare (uses tests))
;;(declare (uses server))
(declare (uses mt))
;;(declare (uses archive))
;; (declare (uses filedb))

(import commonmod)

;(include "common_records.scm")
;;(include "key_records.scm")
(include "db_records.scm") ;; provides db:test-get-id
;;(include "run_records.scm")
;;(include "test_records.scm")

133
134
135
136
137
138
139
140

141
142
143
144
145
146
147
136
137
138
139
140
141
142

143
144
145
146
147
148
149
150







-
+







(define (subrun:launch-cmd test-run-dir run-mode #!optional (sub-cmd "-run")) ;; BUG: "-run" should be changed to "-rerun-clean" but current doesn't work
  (if (subrun:subrun-removed? test-run-dir)
      (subrun:unset-subrun-removed test-run-dir))      

  (let* ((log-prefix "run")
         (switches (subrun:selector+log-switches test-run-dir log-prefix))
         (run-wait (equal? run-mode "yes"))
         (cmd      (conc "megatest " sub-cmd " " switches" "
         (cmd      (conc (common:get-mtexe)" "sub-cmd" "switches" "
                         (if run-wait "-run-wait " ""))))
    cmd))


(define (subrun:sanitize-path inpath)
  (let* ((insane-pattern (irregex "[^[a-zA-Z0-9_\\-]")))
    (regex#string-substitute insane-pattern "_" inpath #t)))
230
231
232
233
234
235
236


237


238
239


240
241
242
243
244
245
246

247
248
249
250
251
252
253
233
234
235
236
237
238
239
240
241
242
243
244


245
246
247
248
249
250
251
252

253
254
255
256
257
258
259
260







+
+

+
+
-
-
+
+






-
+







            (map
             (lambda (x)
               (list (car x) (cdr x)))
             switch-alist))
           " ")))
    res))

;; NOTE: Here we run sub megatest but this is not intended for one version
;;       of megatest to test another version. Thus we propagate the 
(define (subrun:exec-sub-megatest test-run-dir action-switches-str log-prefix)
  (let* ((mtpathdir          (common:get-megatest-exe-dir))
	 (mtexe              (common:get-mtexe))
  (let* ((selector-switches  (subrun:selector+log-switches test-run-dir log-prefix))
         (cmd (conc "megatest " selector-switches " " action-switches-str ))
	 (selector-switches  (subrun:selector+log-switches test-run-dir log-prefix))
         (cmd (conc mtexe" "selector-switches" "action-switches-str ))
         (pid #f)
         (proc (lambda ()
                 (debug:print-info 0 *default-log-port* "Running sub megatest command: "cmd)
                 ;;(set! pid (process-run "/usr/bin/xterm" (list ))))))
                 (set! pid (process-run "/bin/bash" (list "-c" cmd))))))
    (call-with-environment-variables 
     (list (cons "PATH" (conc (get-environment-variable "PATH") ":.")))
     (list (cons "PATH"  (common:get-megatest-exe-path)))
     (lambda  ()
       (common:without-vars proc "^MT_.*")))
    (let processloop ((i 0))
      (let-values (((pid-val exit-status exit-code)(process-wait pid #t)))
        (if (eq? pid-val 0)
            (begin
              (thread-sleep! 2)

Modified tasks.scm from [0f38bdbcce] to [499c2cc5ba].

23
24
25
26
27
28
29


30
31
32
33
34
35
36
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38







+
+








(declare (unit tasks))
(declare (uses dbfile))
(declare (uses db))
(declare (uses rmt))
(declare (uses common))
(declare (uses pgdb))
(declare (uses commonmod))
(import commonmod)

(import dbfile)
;; (import pgdb) ;; pgdb is a module

(include "task_records.scm")
(include "db_records.scm")

Modified tcmt.scm from [6658a745e5] to [bb0554607f].

28
29
30
31
32
33
34


35
36
37
38
39
40
41
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43







+
+







(use trace)
;; (trace-call-sites #t)

(declare (uses margs))
(declare (uses rmt))
(declare (uses common))
;; (declare (uses megatest-version))
(declare (uses commonmod))
(import commonmod)

(include "megatest-version.scm")
(include "megatest-fossil-hash.scm")
(include "db_records.scm")

(define origargs (cdr (argv)))
(define remargs (args:get-args

Added tcp-transportmod.scm version [e26313be2b].
















































































































































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
;;======================================================================
;; Copyright 2017, Matthew Welland.
;; 
;; This file is part of Megatest.
;; 
;;     Megatest is free software: you can redistribute it and/or modify
;;     it under the terms of the GNU General Public License as published by
;;     the Free Software Foundation, either version 3 of the License, or
;;     (at your option) any later version.
;; 
;;     Megatest is distributed in the hope that it will be useful,
;;     but WITHOUT ANY WARRANTY; without even the implied warranty of
;;     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
;;     GNU General Public License for more details.
;; 
;;     You should have received a copy of the GNU General Public License
;;     along with Megatest.  If not, see <http://www.gnu.org/licenses/>.

;;======================================================================

(declare (unit tcp-transportmod))
(declare (uses debugprint))
(declare (uses commonmod))
(declare (uses dbfile))
(declare (uses dbmod))

(use address-info)

(module tcp-transportmod
	*
	
  (import scheme
	  (prefix sqlite3 sqlite3:)
	  chicken
	  data-structures

	  address-info
	  directory-utils
	  extras
	  files
	  hostinfo
	  matchable
	  md5
	  message-digest
	  ports
	  posix
	  regex
	  regex-case
	  s11n
	  srfi-1
	  srfi-18
	  srfi-4
	  srfi-69
	  stack
	  typed-records
	  tcp-server
	  tcp
	  
	  debugprint
	  commonmod
	  dbfile
	  dbmod
	)

;;======================================================================
;; client
;;======================================================================

;; (define keep-age-param (make-parameter 10)) ;; qif file age, if over move to attic

;; Used ONLY for client
;;
(defstruct tt-conn
  host
  port
  host-port
  dbfname
  server-id
  server-start
  pid
)

;; Used for BOTH clients and servers
(defstruct tt
  ;; client related
  (conns (make-hash-table)) ;; dbfname -> conn

  ;; server related
  (areapath     #f)
  (host         #f)
  (port         #f)
  (conn         #f)
  (cleanup-proc #f)
  (handler      #f) ;; receives data and responds
  (socket       #f)
  (thread       #f)
  (host-port    #f)
  (cmd-thread   #f)
  (ro-mode      #f)
  (ro-mode-checked #f)
  (last-access  (current-seconds))
  (servinf-file #f)
  (last-serv-start 0)
  )

(define (tt:make-remote areapath)
  (make-tt areapath: areapath))

;; 1 ... or #f
(define (tt:valid-run-id run-id)
  (or (number? run-id)
      (not run-id)))

;; do all the busy work of finding and setting up conn for
;; connecting to a server
;; 
(define (tt:client-connect-to-server ttdat dbfname run-id testsuite)
  (assert (tt:valid-run-id run-id) "FATAL: invalid run-id "run-id)
  (let* ((conn (hash-table-ref/default (tt-conns ttdat) dbfname #f))
	 (server-start-proc (lambda ()
			      (tt:server-process-run
			       (tt-areapath ttdat)
			       testsuite ;; (dbfile:testsuite-name)
			       (common:find-local-megatest)
			       run-id))))
    (if conn
	conn ;; we are already connected to the server
	(let* ((sdat (tt:get-current-server-info ttdat dbfname)))
	  (match sdat
	    ((host port start-time server-id pid dbfname2 servinffile)
	     (assert (equal? dbfname dbfname2) "FATAL: read server info from wrong file.")
	     (let* ((host-port (conc host":"port))
		    (conn (make-tt-conn
			   host: host
			   port: port
			   host-port: host-port
			   dbfname: dbfname
			   servinf-file: servinffile
			   server-id: server-id
			   server-start: start-time
			   pid: pid)))
	       (hash-table-set! (tt-conns ttdat) dbfname conn)
	       ;; verify we can talk to this server
	       (if (tt:ping host port server-id)
		   conn
		   (let* ((curr-secs (current-seconds)))
		     ;; rm the (last server) would go here
		     (if (> (- curr-secs (tt-last-serv-start ttdat)) 10)
			 (begin
			   (tt-last-serv-start-set! ttdat curr-secs)
			   (server-start-proc))) ;; don't try and start server unless 30 sec has gone by since last attempt
		     (thread-sleep! 1)
		     (tt:client-connect-to-server ttdat dbfname run-id testsuite)))))
	    (else
	     (if (> (- (current-seconds) (tt-last-serv-start ttdat)) 5) ;; really do not want to swamp the machine with servers
		 (begin
		   (debug:print-info 0 *default-log-port* "No server found. Starting one for run-id "run-id" in dbfile "dbfname)
		   (server-start-proc)
		   (tt-last-serv-start-set! ttdat (current-seconds))))
	     (thread-sleep! 1)
	     (tt:client-connect-to-server ttdat dbfname run-id testsuite)))))))
    
(define (tt:ping host port server-id)
  (let*  ((res (tt:send-receive-direct host port `(ping #f #f #f)))) ;; please send me your server-id
    ;;
    ;; need two threads, one a 5 second timer
    ;;
    (match res
      ((status errmsg result meta)
       (if (equal? result server-id)
	   (begin
	     ;; (debug:print 0 *default-log-port* "Ping to "host":"port" successful.")
	     #t) ;; then we are good
	   (begin
	     (debug:print 0 *default-log-port* "WARNING: server-id does not match, expected: "server-id", got: "result)
	     #f)))
      (else
       ;; (debug:print 0 *default-log-port* "res not in form (status errmsg result meta), got: "res)
       #f))))

;; client side handler
;;
;;(tt:handler #<tt> get-keys #f () 2 #f "/home/matt/data/megatest/ext-tests" #f "main.db" "ext-tests" "/home/matt/data/megatest/bin/.22.04/../megatest")
;;
(define (tt:handler ttdat cmd run-id params attemptnum area-dat areapath readonly-mode dbfname testsuite mtexe)
  ;; NOTE: areapath is passed in and in tt struct. We'll use passed in value for now.
  (let* ((conn (tt:client-connect-to-server ttdat dbfname run-id testsuite))) ;; (hash-table-ref/default (tt-conns ttdat) dbfname #f)))
    (if conn
	;; have connection, call the server
	(let* ((res (tt:send-receive ttdat conn cmd run-id params)))
	  ;; res is (status errmsg result meta)
	  (match res
	    ((status errmsg result meta)
	     (if (list? meta)
		 (let* ((delay-wait (alist-ref 'delay-wait meta)))
		   (if (and (number? delay-wait)
			    (> delay-wait 0))
		       (begin
			 (debug:print 0 *default-log-port* "Server is loaded, delaying "delay-wait" seconds")
			 (thread-sleep! delay-wait)))))
	     (case status
	       ((busy) ;; result will be how long the server wants you to delay
		(debug:print 0 *default-log-port* "WARNING: server is overloaded, will try again in "result" seconds.")
		(thread-sleep! (if (number? result) result 2))
		(tt:handler  ttdat cmd run-id params (+ attemptnum 1) area-dat areapath readonly-mode dbfname testsuite mtexe))
	       ((loaded)
		(debug:print 0 *default-log-port* "WARNING: server is loaded, will try again in a 1/4 second.")
		(thread-sleep! 0.25)
		(tt:handler  ttdat cmd run-id params (+ attemptnum 1) area-dat areapath readonly-mode dbfname testsuite mtexe))
	       (else
		result)))
	    (else
	     (if (not res)
		 (let* ((host    (tt-conn-host conn))
			(port    (tt-conn-port conn))
			;; (dbfname (tt-conn-port conn)) ;; 192.168.0.127:4242-726924:4.db
			(pid     (tt-conn-pid  conn))
			(servinf (conc areapath"/.servinfo/"host":"port"-"pid":"dbfname)))
		   (hash-table-set! (tt-conns ttdat) dbfname #f)
		   (if (file-exists? servinf)
		       (begin
			 (debug:print 0 *default-log-port* "INFO: connection to server "host":"port" broken for "dbfname", attempting to remove servinfo file.")
			 (delete-file* servinf))
		       (debug:print 0 *default-log-port* "INFO: connection to server "host":"port" broken for "dbfname", but do not see servinf file "servinf))
		   (tt:handler ttdat cmd run-id params (+ attemptnum 1) area-dat areapath readonly-mode dbfname testsuite mtexe))
		 (assert #f "FATAL: tt:handler received bad data "res)))))
	(begin
	  (thread-sleep! 1) ;; give it a rest and try again
	  (tt:handler ttdat cmd run-id params attemptnum area-dat areapath readonly-mode dbfname testsuite mtexe)))))

	;; no conn yet, find and or start and find a server
;; 	(let* ((server (tt:find-server ttdat dbfname)))
;; 	  (if server
;; 	      (let* ((conn (tt:client-connect-to-server server)))
;; 		(hash-table-set! (tt-conns ttdat) dbfname conn)
;; 		(tt:handler  ttdat cmd run-id params attemptnum area-dat areapath readonly-mode
;; 			     dbfname testsuite mtexe))
;; 	      ;; no server, try to start a server process
;; 	      (begin
;; 		(tt:server-process-run areapath testsuite mtexe run-id) ;;  #!key (profile-mode "")) 
;; 		(thread-sleep! 1)
;; 		(tt:handler  ttdat cmd run-id params attemptnum area-dat areapath
;; 			     readonly-mode dbfname testsuite mtexe)))))))

(define (tt:bid-for-servership run-id)
  #f)

;; gets server info and appends path to server file
;; sorts by age, oldest first
;;
;; returns list of (host port startseconds server-id servinfofile)
;;
(define (tt:get-server-info-sorted ttdat dbfname)
  (let* ((areapath (tt-areapath ttdat))
	 (sfiles   (tt:find-server areapath dbfname))
	 (sdats    (filter car (map tt:server-get-info sfiles))) ;; first element is #f if the file disappeared while being read
	 (sorted   (sort sdats (lambda (a b)
				 (< (list-ref a 2)(list-ref b 2))))))
    sorted))
    
(define (tt:get-current-server-info ttdat dbfname)
  (assert (tt-areapath ttdat) "FATAL: areapath not set in ttdat.")
  ;;
  ;; TODO - replace most of below with tt;get-server-info-sorted
  ;;
  (let* ((areapath (tt-areapath ttdat))
	 (sfiles   (tt:find-server areapath dbfname))
	 (sdats    (filter car (map tt:server-get-info sfiles))) ;; first element is #f if the file disappeared while being read
	 (sorted   (sort sdats (lambda (a b)
				 (< (list-ref a 2)(list-ref b 2))))))
    (if (null? sorted)
	#f  ;; we'll want to wait until extra servers have exited
	(car sorted))))

(define (tt:send-receive ttdat conn cmd run-id params)
  (let* ((host-port (tt-conn-host-port conn)) ;; (conc (tt-conn-host conn)":"(tt-conn-port conn)))
	 (host      (tt-conn-host conn))
	 (port      (tt-conn-port conn))
	 (dat       (list cmd run-id params #f))) ;; no meta data yet
    (tt:send-receive-direct host port dat)))

(define (tt:send-receive-direct host port dat)
  (assert (number? port) "FATAL: tt:send-receive-direct called with port not a number "port)
  (handle-exceptions
      exn
    #f ;; Add condition-case or better handling here
    (let-values (((inp oup)(tcp-connect host port)))
      (let ((res (if (and inp oup)
		     (begin
		       (serialize dat oup)
		       (close-output-port oup)
		       (deserialize inp))
		     )))
	(close-input-port inp)
	res))))



;;======================================================================
;; server
;;======================================================================

(define (tt:sync-dbs ttdat)
  #f)

;; start the listener and start responding to requests
;;
;; NOTE: organise by dbfname, not run-id so we don't need
;;       to pull in more modules
;;
;; This is the routine called in megatest.scm to start a server
;;
;; Server viability is checked in keep-running. Blindly start and run here.
;;
(define (tt:start-server areapath run-id dbfname-in handler keys)
  (assert areapath "FATAL: areapath not provided for tt:start-server")
  ;; is there already a server for this dbfile? Then exit.
  (let* ((ttdat   (make-tt areapath: areapath))
	 (dbfname (or dbfname-in (dbmod:run-id->dbfname run-id))))
    ;; (servers (tt:find-server areapath dbfname))) ;; should use tt:get-current-server-info instead
    ;; (if (null? servers)
    (let* ((dbstruct   (dbmod:open-dbmoddb areapath run-id dbfname (dbfile:db-init-proc) keys)))
      (tt-handler-set! ttdat (handler dbstruct))
      (let* ((tcp-thread (make-thread
			  (lambda ()
			    (tt:start-tcp-server ttdat)) ;; start the tcp-server which applies handler to incoming data
			  "tcp-server-thread"))
	     (run-thread (make-thread
			  (lambda ()
			    (tt:keep-running ttdat dbfname dbstruct)))))
	(thread-start! tcp-thread)
	(thread-start! run-thread)
	(thread-join! run-thread) ;; run thread will exit on timeout or other conditions
	(exit)))
    ;;(begin
    ;; (debug:print 0 *default-log-port* "INFO: found server(s) already running for db "dbfname", "(string-intersperse servers ",")" Exiting.")
    ;; (exit)))))
    ))

(define (tt:keep-running ttdat dbfname dbstruct)
  ;; verfiy conn for ready
  ;; listener socket has been started by this stage
  ;; wait for a port before creating the registration file
  ;;
  (let* ((cleanup (lambda ()
		    (if (tt-cleanup-proc ttdat)
			((tt-cleanup-proc ttdat))))))
    (let loop ((count 0))
      (if (> count 240)
	  (begin
	    (debug:print 0 *default-log-port* "FATAL: Could not start a tcp server, giving up.")
	    (exit 1))
	  (if (not (tt-port ttdat)) ;; no connection yet
	      (begin
		(thread-sleep! 0.25)
		(loop (+ count 1))))))
    
    ;; load or reload the data into inmem db before
    ;; ((dbr:dbstruct-sync-proc dbstruct) (dbr:dbstruct-last-update dbstruct))
    ;; (dbr:dbstruct-last-update-set! dbstruct (- (current-seconds) 1))
    (tt:create-server-registration-file ttdat dbfname)
    ;; now start watching the last-access, if it hasn't been touched
    ;; in over ten seconds we exit
    (thread-sleep! 0.05) ;; any real need for delay here?
    (let loop ()
      (let* ((servers (tt:get-server-info-sorted ttdat dbfname))
	     (ok      (cond
		       ((null? servers) #f) ;; not ok
		       ((equal? (list-ref (car servers) 6) ;; compare the servinfofile
				(tt-servinf-file ttdat))
			(debug:print-info 0 *default-log-port* "Keep running, I'm the top server.")
			#t)
		       (else
			(debug:print-info 0 *default-log-port* "I'm not the lead server: "servers)
			(let* ((leadsrv (car servers)))
			  (match leadsrv
			    ((host port startseconds server-id pid dbfname servinfofile)
			     (if (tt:ping host port server-id)
				 #f ;; not the server, but all good, want to exit
				 (if (and (file-exists? servinfofile)
					  (> (- (current-seconds)(file-modification-time servinfofile)) 5))
				     (begin
				       ;; can't ping and file has been on disk 5 seconds, go ahead and try to remove it
				       (debug:print-info 0 *default-log-port* "Removing apparently dead server info file: "servinfofile)
				       (delete-file* servinfofile)
				       #t) ;; not the server but the server is not reachable
				     #t)))
			    (else ;; should never get here
			     (debug:print 0 *default-log-port* "BAD SERVER RECORD: "leadsrv)
			     (assert #f "Bad server record "leadsrv))))))))
	(if ok
	    ;; (if (> *api-process-request-count* 0) ;; have requests in flight
	    ;;	(tt-last-access-set! ttdat (current-seconds)))
	    (tt-last-access-set! ttdat *db-last-access*) ;; bit silly, just use db-last-access
	    (begin
	      (cleanup)
	      (exit)))

	(let* ((last-update (dbr:dbstruct-last-update dbstruct))
	       (curr-secs   (current-seconds)))
	  (if (> (- curr-secs last-update) 3) ;; every 3-4 seconds update the db? maybe this should be refresh the inmem?
	      (begin
		((dbr:dbstruct-sync-proc dbstruct) last-update)
		(dbr:dbstruct-last-update-set! dbstruct curr-secs))))
	  
	(if (< (- (current-seconds) (tt-last-access ttdat)) 60)
	    (begin
	      (thread-sleep! 5)
	      (loop)))))
    (cleanup)
    (debug:print 0 *default-log-port* "INFO: Server timed out, exiting.")))

  
;; ;; given an already set up uconn start the cmd-loop
;; ;;
;; (define (tt:cmd-loop ttdat)
;;   (let* ((serv-listener (-socket uconn))
;; 	 (listener      (lambda ()
;; 			  (let loop ((state 'start))
;; 			    (let-values (((inp oup)(tcp-accept serv-listener)))
;; 			      ;; (mutex-lock! *send-mutex*) ;; DOESN'T SEEM TO HELP
;; 			      (let* ((rdat  (deserialize inp)) ;; '(my-host-port qrykey cmd params)
;; 				     (resp  (ulex-handler uconn rdat)))
;; 				(serialize resp oup)
;; 				(close-input-port inp)
;; 				(close-output-port oup)
;; 				;; (mutex-unlock! *send-mutex*) ;; DOESN'T SEEM TO HELP
;; 				)
;; 			      (loop state))))))
;;     ;; start N of them
;;     (let loop ((thnum   0)
;; 	       (threads '()))
;;       (if (< thnum 100)
;; 	  (let* ((th (make-thread listener (conc "listener" thnum))))
;; 	    (thread-start! th)
;; 	    (loop (+ thnum 1)
;; 		  (cons th threads)))
;; 	  (map thread-join! threads)))))
;; 
;; 
;; 
;; (define (wait-and-close uconn)
;;   (thread-join! (udat-cmd-thread uconn))
;;   (tcp-close (udat-socket uconn)))
;; 
;; 

(define (tt:shutdown-server ttdat)
  (let* ((cleanproc (tt-cleanup-proc ttdat)))
    (if cleanproc (cleanproc))
    (tcp-close (tt-socket ttdat)) ;; close up ports here
    ))

;; (define (wait-and-close uconn)
;;   (thread-join! (tt-cmd-thread uconn))
;;   (tcp-close (tt-socket uconn)))

;; return servid
;; side-effects:
;;   ttdat-cleanup-proc is populated with function to remove the serverinfo file
(define (tt:create-server-registration-file ttdat dbfname)
  (let* ((areapath (tt-areapath ttdat))
	 (servdir  (tt:get-servinfo-dir areapath))
	 (host     (tt-host ttdat))
	 (port     (tt-port ttdat))
	 (servinf (conc servdir"/"host":"port"-"(current-process-id)":"dbfname))
	 (serv-id (tt:mk-signature areapath))
	 (clean-proc (lambda ()
		       (delete-file* servinf))))
    (assert (and host port) "FATAL: tt:create-server-registration-file called with no conn, dbfname="dbfname)
    (tt-cleanup-proc-set! ttdat clean-proc)
    (tt-servinf-file-set! ttdat servinf)
    (with-output-to-file servinf
      (lambda ()
	(print "SERVER STARTED: "host":"port" AT "(current-seconds)" server-id: "serv-id" pid: "(current-process-id)" dbfname: "dbfname)))
      serv-id))

;; find valid server
;; get servers listed, last part of name must match :<dbfname>
;; if more than one, wait one second and look again
;; future: ping oldest, if alive remove other :<dbfname> files
;;
(define (tt:find-server areapath dbfname)
  (let* ((servdir  (tt:get-servinfo-dir areapath))
	 (sfiles   (glob (conc servdir"/*:"dbfname))))
    sfiles))

;; given a path to a server info file return: host port startseconds server-id pid dbfname logf
;; example of what it's looking for in the log file:
;;     SERVER STARTED: 10.38.175.67:50216 AT 1616502350.0 server-id: 4907e90fc55c7a09694e3f658c639cf4 
;;
(define (tt:server-get-info logf)
  (let ((server-rx    (regexp "^SERVER STARTED: (\\S+):(\\d+) AT ([\\d\\.]+) server-id: (\\S+) pid: (\\d+) dbfname: (\\S+)")) ;; SERVER STARTED: host:port AT timesecs server id
        (dbprep-rx    (regexp "^SERVER: dbprep"))
        (dbprep-found 0)
	(bad-dat      (list #f #f #f #f #f #f logf)))
     (let ((fdat     (handle-exceptions
			 exn
		       (begin
			 ;; WARNING: this is potentially dangerous to blanket ignore the errors
			 (debug:print-info 0 *default-log-port* "Unable to get server info from "logf", exn=" exn)
			 '()) ;; no idea what went wrong, call it a bad server, return empty list
		       (with-input-from-file logf read-lines))))
       (if (null? fdat) ;; bad data, return bad-dat
	   bad-dat
	   (let loop ((inl  (car fdat))
		      (tail (cdr fdat))
		      (lnum 0))
	     (let ((mlst (string-match server-rx inl))
		   (dbprep (string-match dbprep-rx inl)))
	       (if dbprep (set! dbprep-found 1))
	       (if (not mlst)
		   (if (> lnum 500) ;; give up if more than 500 lines of server log read
		       bad-dat
		       (if (null? tail)
			   bad-dat
			   (loop (car tail)(cdr tail)(+ lnum 1))))
		   (match mlst ;; have a not null list
		     ((_ host port start server-id pid dbfname)
		      (list host
			    (string->number port)
			    (string->number start)
			    server-id
			    (string->number pid)
			    dbfname
			    logf))
		     (else
		      (debug:print 0 *default-log-port* "ERROR: did not recognise SERVER line info "mlst)
		      bad-dat)))))))))

;; Given an area path,  start a server process    ### NOTE ### > file 2>&1 
;; if the target-host is set 
;; try running on that host
;;   incidental: rotate logs in logs/ dir.
;;
(define  (tt:server-process-run areapath testsuite mtexe run-id #!key (profile-mode "")) ;; areapath is *toppath* for a given testsuite area
  (assert areapath  "FATAL: tt:server-process-run called without areapath defined.")
  (assert testsuite "FATAL: tt:server-process-run called without testsuite defined.")
  (assert mtexe     "FATAL: tt:server-process-run called without mtexe defined.")
  ;; mtest -server - -m testsuite:ext-tests -db 6.db
  (let* ((dbfname  (dbmod:run-id->dbfname run-id))
	 (load     (get-normalized-cpu-load))
	 (nrun     (number-of-processes-running (conc "mtest.*server.*"testsuite".*"dbfname))))
    (cond
     ((> load 2.0)
      (debug:print 0 *default-log-port* "Normalized load "load" is over the limit of 2.0. Not starting a server.")
      (thread-sleep! 1))
     ((> nrun 100)
      (debug:print 0 *default-log-port* nrun" servers running on this host, not starting another.")
      (thread-sleep! 1))
     (else
      (if (not (file-exists? (conc areapath"/logs")))
	      (create-directory (conc areapath"/logs") #t))
	  (let* ((logfile   (conc areapath "/logs/server.log")) ;; -" curr-pid "-" target-host ".log"))
		 (cmdln     (conc
			     mtexe
			     " -server - ";; (or target-host "-")
			     " -m testsuite:" testsuite
			     ;; " -run-id " (or run-id "main") ;; NO, we do NOT want to have run id as part of this
			     " -db "  dbfname ;; (dbmod:run-id->dbfname run-id)
			     " " profile-mode
			     ))) ;; (conc " >> " logfile " 2>&1 &")))))
	    ;; we want the remote server to start in *toppath* so push there
	    ;; (push-directory areapath) ;; use cd in the command line instead
	    (debug:print 0 *default-log-port* "INFO: Trying to start server in tcp mode (" cmdln ") at "(common:human-time)" for "areapath)
	    ;; (debug:print 0 *default-log-port* "INFO: starting server at " (common:human-time))
	    (setenv "NBFAKE_QUIET" "yes") ;; BUG: change to with-environment-variable ...
	    (setenv "NBFAKE_LOG" logfile)
	    (system (conc "cd "areapath" ; nbfake " cmdln))
	    (unsetenv "NBFAKE_QUIET")
	    (unsetenv "NBFAKE_LOG")
	    ;;(pop-directory)
	    )))))

;;======================================================================
;; tcp connection stuff
;;======================================================================

;; find a port and start tcp-server. This only starts the tcp portion of
;; the server, look at (tt:start-server ...) above for the entry point
;; for the entire server system
;;
(define (tt:start-tcp-server ttdat)
  (setup-listener ttdat)
  (let* ((socket   (tt-socket  ttdat))
	 (handler  (tt-handler ttdat)))
    ((make-tcp-server socket handler)
     #f ;; yes, send error messages to std-err
     )))

;; create a tcp listener and return a populated udat struct with
;; my port, address, hostname, pid etc.
;; return #f if fail to find a port to allocate.
;;
;;  if udata-in is #f create the record
;;  if there is already a serv-listener return the udata
;;
(define (setup-listener uconn #!optional (port 4242))
  (assert (tt? uconn) "FATAL: setup-listener called with wrong struct "uconn)
  (handle-exceptions
   exn
   (if (< port 65535)
       (setup-listener uconn (+ port 1))
       #f)
   (connect-listener uconn port)))

(define (connect-listener uconn port)
  ;; (tcp-listener-socket LISTENER)(socket-name so)
  ;; sockaddr-address, sockaddr-port, sockaddr->string
  (let* ((tlsn (tcp-listen port 1000 #f)) ;; (tcp-listen TCPPORT [BACKLOG [HOST]])
	 (addr  (tt:get-best-guess-address (get-host-name)))) ;; (get-my-best-address))) ;; (hostinfo-addresses (host-information (current-hostname)))
    (tt-port-set!      uconn port)
    (tt-host-set!      uconn addr)
    (tt-host-port-set! uconn (conc addr":"port))
    (tt-socket-set!    uconn tlsn)
    uconn))

;;======================================================================
;; utils
;;======================================================================

;; Generate a unique signature for this server
(define (tt:mk-signature areapath)
  (message-digest-string (md5-primitive) 
			 (with-output-to-string
			   (lambda ()
			     (write (list areapath
                                          (current-process-id)
					  (argv)))))))


(define (tt:get-best-guess-address hostname)
  (let ((res #f))
    (for-each 
     (lambda (adr)
       (if (not (eq? (u8vector-ref adr 0) 127))
	   (set! res adr)))
     ;; NOTE: This can fail when there is no mention of the host in /etc/hosts. FIXME
     (vector->list (hostinfo-addresses (hostname->hostinfo hostname))))
    (string-intersperse 
     (map number->string
	  (u8vector->list
	   (if res res (hostname->ip hostname)))) ".")))

(define (tt:get-servinfo-dir areapath)
  (let* ((spath (conc areapath"/.servinfo")))
    (if (not (file-exists? spath))
	(create-directory spath #t))
    spath))

;;======================================================================
;; network utilities
;;======================================================================

;; NOTE: Look at address-info egg as alternative to some of this

(define (rate-ip ipaddr)
  (regex-case ipaddr
    ( "^127\\..*" _ 0 )
    ( "^(10\\.0|192\\.168)\\..*" _ 1 )
    ( else 2 ) ))

;; Change this to bias for addresses with a reasonable broadcast value?
;;
(define (ip-pref-less? a b)
  (> (rate-ip a) (rate-ip b)))

(define (get-my-best-address)
  (let ((all-my-addresses (get-all-ips)))
    (cond
     ((null? all-my-addresses)
      (get-host-name))                                          ;; no interfaces?
     ((eq? (length all-my-addresses) 1)
      (car all-my-addresses))                      ;; only one to choose from, just go with it
     (else
      (car (sort all-my-addresses ip-pref-less?))))))

(define (get-all-ips-sorted)
  (sort (get-all-ips) ip-pref-less?))

(define (get-all-ips)
  (map address-info-host
       (filter (lambda (x)
		 (equal? (address-info-type x) "tcp"))
	       (address-infos (get-host-name)))))

)

Modified tdb.scm from [6edff6262d] to [0211217236].

30
31
32
33
34
35
36


37
38
39
40
41
42
43
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45







+
+







(declare (unit tdb))
(declare (uses common))
(declare (uses keys))
(declare (uses ods))
(declare (uses client))
(declare (uses mt))
(declare (uses db))
(declare (uses commonmod))
(import commonmod)

(include "common_records.scm")
(include "db_records.scm")
(include "key_records.scm")
(include "run_records.scm")

;;======================================================================

Modified tests.scm from [5c2006972a] to [b17b9fa8b7].

1964
1965
1966
1967
1968
1969
1970
1971

1972
1973

1974
1975
1976
1977
1978
1979
1980
1964
1965
1966
1967
1968
1969
1970

1971
1972

1973
1974
1975
1976
1977
1978
1979
1980







-
+

-
+







;;======================================================================
;; test steps
;;======================================================================

;; teststep-set-status! used to be here

(define (test-get-kill-request run-id test-id) ;; run-id test-name itemdat)
  (let* ((testdat   (rmt:get-test-info-by-id run-id test-id)))
  (let* ((testdat   (rmt:get-test-state-status-by-id run-id test-id)))
    (and testdat
	 (equal? (test:get-state testdat) "KILLREQ"))))
	 (equal? (car testdat) "KILLREQ"))))

(define (test:tdb-get-rundat-count tdb)
  (if tdb
      (let ((res 0))
	(sqlite3:for-each-row
	 (lambda (count)
	   (set! res count))

Added transport-mode.scm.template version [0281a19fe7].




1
2
3
+
+
+
;; 'http or 'tcp
(rmt:transport-mode 'tcp)
;; (rmt:transport-mode 'http)

Added ulex/dbmgr.scm version [afcee6ee9f].












































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
;;======================================================================
;; Copyright 2022, Matthew Welland.
;; 
;; This file is part of Megatest.
;; 
;;     Megatest is free software: you can redistribute it and/or modify
;;     it under the terms of the GNU General Public License as published by
;;     the Free Software Foundation, either version 3 of the License, or
;;     (at your option) any later version.
;; 
;;     Megatest is distributed in the hope that it will be useful,
;;     but WITHOUT ANY WARRANTY; without even the implied warranty of
;;     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
;;     GNU General Public License for more details.
;; 
;;     You should have received a copy of the GNU General Public License
;;     along with Megatest.  If not, see <http://www.gnu.org/licenses/>.

;;======================================================================

(declare (unit dbmgrmod))

(declare (uses ulex))
(declare (uses apimod))
(declare (uses pkts))
(declare (uses commonmod))
(declare (uses dbmod))
(declare (uses mtargs))
(declare (uses portloggermod))
(declare (uses debugprint))

(module dbmgrmod
    *

(import scheme
	chicken.base
	chicken.condition
	chicken.file
	chicken.format
	chicken.port
	chicken.process
	chicken.process-context
	chicken.process-context.posix
	chicken.sort
	chicken.string
	chicken.time
	
	(prefix sqlite3 sqlite3:)
	matchable
	md5
	message-digest
	regex
	s11n
	srfi-1
	srfi-18
	srfi-69
	system-information
	typed-records
	
	pkts
	ulex

	commonmod
	apimod
	dbmod
	debugprint
	(prefix mtargs args:)
	portloggermod
	)

;; Configurations for server
;; (tcp-buffer-size 2048)
;; (max-connections 2048) 

;; info about me as a listener and my connections to db servers
;; stored (for now) in *db-serv-info*
;;
(defstruct servdat
  (host #f)
  (port #f)
  (uuid #f)
  (dbfile #f)
  (uconn   #f) ;; this is the listener *FOR THIS PROCESS*
  (mode    #f)
  (status 'starting)
  (trynum 0) ;; count the number of ports we've tried
  (conns  (make-hash-table)) ;; apath/dbname => conndat
  ) 

(define *db-serv-info* (make-servdat))

(define (servdat->url sdat)
  (conc (servdat-host sdat)":"(servdat-port sdat)))

;; db servers contact info
;;
(defstruct conndat
  (apath    #f)
  (dbname   #f)
  (fullname #f)
  (hostport #f)
  (ipaddr   #f)
  (port     #f)
  (srvpkt   #f)
  (srvkey   #f)
  (lastmsg  0)
  (expires  0))

(define *srvpktspec*
  `((server (host    . h)
	    (port    . p)
	    (servkey . k)
	    (pid     . i)
	    (ipaddr  . a)
	    (dbpath  . d))))

;;======================================================================
;;  S U P P O R T   F U N C T I O N S
;;======================================================================

;; set up the api proc, seems like there should be a better place for this?
;;
;; IS THIS NEEDED ANYMORE? TODO - REMOVE IF POSSIBLE
;;
;; (define api-proc (make-parameter conc))
;; (api-proc api:execute-requests)

;; do we have a connection to apath dbname and
;; is it not expired? then return it
;;
;; else setup a connection
;;
;; if that fails, return '(#f "some reason") ;; NB// convert to raising an exception
;;
(define (rmt:get-conn remdat apath dbname)
  (let* ((fullname (db:dbname->path apath dbname)))
    (hash-table-ref/default (servdat-conns remdat) fullname #f)))

(define (rmt:drop-conn remdat apath dbname)
  (let* ((fullname (db:dbname->path apath dbname)))
    (hash-table-delete! (servdat-conns remdat) fullname)))
  
(define (rmt:find-main-server uconn apath dbname)
  (let* ((pktsdir     (get-pkts-dir apath))
	 (all-srvpkts (get-all-server-pkts pktsdir *srvpktspec*))
	 (viable-srvs (get-viable-servers all-srvpkts dbname)))
    (get-the-server uconn apath viable-srvs)))


(define *connstart-mutex* (make-mutex))
(define *last-main-start* 0)

;; looks for a connection to main, returns if have and not exired
;; creates new otherwise
;; 
;; connections for other servers happens by requesting from main
;;
;; TODO: This is unnecessarily re-creating the record in the hash table
;;
(define (rmt:open-main-connection remdat apath)
  (let* ((fullpath (db:dbname->path apath ".db/main.db"))
	 (conns    (servdat-conns remdat))
	 (conn     (rmt:get-conn remdat apath ".db/main.db")) ;; (hash-table-ref/default conns fullpath #f)) ;; TODO - create call for this
	 (start-rmt:run (lambda ()
			  (let* ((th1 (make-thread (lambda ()(rmt:run (get-host-name))) "non-db mode server")))
			    (thread-start! th1)
			    (thread-sleep! 1)
			    (let loop ((count 0))
			      (assert (< count 30) "FATAL: responder failed to initialize in rmt:open-main-connection")
			      (if (or (not *db-serv-info*)
				      (not (servdat-uconn *db-serv-info*)))
				  (begin
				    (thread-sleep! 1)
				    (loop (+ count 1)))
				  (begin
				    (servdat-mode-set! *db-serv-info* 'non-db)
				    (servdat-uconn *db-serv-info*)))))))
	 (myconn    (servdat-uconn *db-serv-info*)))
    (cond
     ((not myconn)
      (start-rmt:run)
      (rmt:open-main-connection remdat apath))
     ((and conn                                             ;; conn is NOT a socket, just saying ...
	   (< (current-seconds) (conndat-expires conn)))
      #t) ;; we are current and good to go - we'll deal elsewhere with a server that was killed or died 
     ((and conn
	   (>= (current-seconds)(conndat-expires conn)))
      (debug:print-info 0 *default-log-port* "connection to "fullpath" server expired. Reconnecting.")
      (rmt:drop-conn remdat apath ".db/main.db") ;;
      (rmt:open-main-connection remdat apath))
     (else
      ;; Below we will find or create and connect to main
      (debug:print-info 0 *default-log-port* "rmt:open-main-connection - starting from scratch")
      (let* ((dbname         (db:run-id->dbname #f))
	     (the-srv        (rmt:find-main-server myconn apath dbname))
	     (start-main-srv (lambda () ;; call IF there is no the-srv found
			       (mutex-lock! *connstart-mutex*)
			       (if (> (- (current-seconds) *last-main-start*) 5) ;; at least four seconds since last attempt to start main server
				   (begin
				     (api:run-server-process apath dbname)
				     (set! *last-main-start* (current-seconds))
				     (thread-sleep! 1))
				   (thread-sleep! 0.25))
			       (mutex-unlock! *connstart-mutex*)
			       (rmt:open-main-connection remdat apath) ;; TODO: Add limit to number of tries
			       )))
	(if (not the-srv) ;; have server, try connecting to it
	    (start-main-srv)
	    (let* ((srv-addr (server-address the-srv)) ;; need serv
		   (ipaddr   (alist-ref 'ipaddr  the-srv))
		   (port     (alist-ref 'port    the-srv))
		   (srvkey   (alist-ref 'servkey the-srv))
		   (fullpath (db:dbname->path apath dbname))
		   
		   (new-the-srv (make-conndat
				 apath:   apath
				 dbname:  dbname
				 fullname: fullpath
				 hostport: srv-addr
				 ;; socket: (open-nn-connection srv-addr)  - TODO - open ulex connection?
				 ipaddr: ipaddr
				 port: port
				 srvpkt: the-srv
				 srvkey: srvkey ;; generated by rmt:get-signature on the server side
				 lastmsg: (current-seconds)
				 expires: (+ (current-seconds)
					     (server:expiration-timeout)
					     -2) ;; this needs to be gathered during the ping
				 )))
	      (hash-table-set! conns fullpath new-the-srv)))
	#t)))))

;; NB// sinfo is a servdat struct
;;
(define (rmt:general-open-connection sinfo apath dbname #!key (num-tries 5))
  (assert (not (equal? dbname ".db/main.db")) "ERROR: general-open-connection should never be called with main as the db")
  (let* ((mdbname  ".db/main.db") ;; (db:run-id->dbname #f)) TODO: put this back to the lookup when stable
	 (fullname (db:dbname->path apath dbname))
	 (conns    (servdat-conns sinfo))
	 (mconn    (rmt:get-conn sinfo apath ".db/main.db"))
	 (dconn    (rmt:get-conn sinfo apath dbname)))
    #;(if (and mconn
	     (not (debug:print-logger)))
	(begin
	  (debug:print-info 0 *default-log-port* "Turning on logging to main, look in logs dir for main log.")
	  (debug:print-logger rmt:log-to-main)))
    (cond
     ((and mconn
	   dconn
	   (< (current-seconds)(conndat-expires dconn)))
      #t) ;; good to go
     ((not mconn) ;; no channel open to main? open it...
      (rmt:open-main-connection sinfo apath)
      (rmt:general-open-connection sinfo apath dbname num-tries: (- num-tries 1)))
     ((not dconn)                 ;; no channel open to dbname?     
      (let* ((res (rmt:send-receive-real sinfo apath mdbname 'get-server `(,apath ,dbname))))
	(case res
	  ((server-started)
	   (if (> num-tries 0)
	       (begin
		 (thread-sleep! 2)
		 (rmt:general-open-connection sinfo apath dbname num-tries: (- num-tries 1)))
	       (begin
		 (debug:print-error 0 *default-log-port* "Failed to start servers needed or open channel to "apath", "dbname)
		 (exit 1))))
	  (else
	   (if (list? res) ;; server has been registered and the info was returned. pass it on.
	       (begin ;;  ("192.168.0.9" 53817
		      ;;  "5e34239f48e8973b3813221e54701a01" "24310"
		      ;;  "192.168.0.9"
		      ;;  "/home/matt/data/megatest/tests/simplerun"
		 ;;  ".db/1.db")
		 (match
		  res
		  ((host port servkey pid ipaddr apath dbname)
		   (debug:print-info 0 *default-log-port* "got "res)
		   (hash-table-set! conns
				    fullname
				    (make-conndat
				     apath: apath
				     dbname: dbname
				     hostport: (conc host":"port)
				     ;; socket: (open-nn-connection (conc host":"port)) ;; TODO - open ulex connection?
				     ipaddr: ipaddr
				     port: port
				     srvkey: servkey
				     lastmsg: (current-seconds)
				     expires: (+ (current-seconds)
						 (server:expiration-timeout)
						 -2))))
		  (else
		   (debug:print-info 0 *default-log-port* "return data from starting server did not match host port servkey pid ipaddr apath dbname " res)))
		 res)
	       (begin
		 (debug:print-info 0 *default-log-port* "Unexpected result: " res)
		 res)))))))
    #t))

;;======================================================================

;; FOR DEBUGGING SET TO #t
;; (define *localmode* #t)
(define *localmode* #f)
(define *dbstruct* (make-dbr:dbstruct))

;; Defaults to current area
;;
(define (rmt:send-receive cmd rid params #!key (attemptnum 1)(area-dat #f))
  (let* ((apath      *toppath*)
	 (sinfo      *db-serv-info*)
	 (dbname     (db:run-id->dbname rid)))
    (if *localmode*
	(api:execute-requests *dbstruct* cmd params)
	(begin
	  (rmt:open-main-connection sinfo apath)
	  (if rid (rmt:general-open-connection sinfo apath dbname))
	  #;(if (not (member cmd '(log-to-main)))
	      (debug:print-info 0 *default-log-port* "rmt:send-receive "cmd" params="params))
	  (rmt:send-receive-real sinfo apath dbname cmd params)))))

;; db is at apath/.db/dbname, rid is an intermediary solution and will be removed
;; sometime in the future
;;
(define (rmt:send-receive-real sinfo apath dbname cmd params)
  (assert (not (eq? 'primordial (thread-name (current-thread)))) "FATAL: Do not call rmt:send-receive-real in the primodial thread.")
  (let* ((cdat (rmt:get-conn sinfo apath dbname)))
    (assert cdat "FATAL: rmt:send-receive-real called without the needed channels opened")
    (let* ((uconn    (servdat-uconn sinfo)) ;; get the interface to ulex
	   ;; then send-receive using the ulex layer to host-port stored in cdat
	   (res      (send-receive uconn (conndat-hostport cdat) cmd params))
	   #;(th1      (make-thread (lambda ()
				    (set! res (send-receive uconn (conndat-hostport cdat) cmd params)))
				  "send-receive thread")))
      ;; (thread-start! th1)
      ;; (thread-join! th1)   ;; gratuitious thread stuff is so that mailbox is not used in primordial thead
      ;; since we accessed the server we can bump the expires time up
      (conndat-expires-set! cdat (+ (current-seconds)
				    (server:expiration-timeout)
				    -10)) ;; ten second margin for network time misalignments etc.
      res)))

;; db is at apath/.db/dbname, rid is an intermediary solution and will be removed
;; sometime in the future.
;;
;; Purpose - call the main.db server and request a server be started
;; for the given area path and dbname
;;

(define (rmt:print-db-stats)
  (let ((fmtstr "~40a~7-d~9-d~20,2-f")) ;; "~20,2-f"
    (debug:print 18 *default-log-port* "DB Stats, "(seconds->year-week/day-time (current-seconds))"\n=====================")
    (debug:print 18 *default-log-port* (format #f "~40a~8a~10a~10a" "Cmd" "Count" "TotTime" "Avg"))
    (for-each (lambda (cmd)
		(let ((cmd-dat (hash-table-ref *db-stats* cmd)))
		  (debug:print 18 *default-log-port* (format #f fmtstr cmd (vector-ref cmd-dat 0) (vector-ref cmd-dat 1) (/ (vector-ref cmd-dat 1)(vector-ref cmd-dat 0))))))
	      (sort (hash-table-keys *db-stats*)
		    (lambda (a b)
		      (> (vector-ref (hash-table-ref *db-stats* a) 0)
			 (vector-ref (hash-table-ref *db-stats* b) 0)))))))

(define (rmt:get-max-query-average run-id)
  (mutex-lock! *db-stats-mutex*)
  (let* ((runkey (conc "run-id=" run-id " "))
	 (cmds   (filter (lambda (x)
			   (substring-index runkey x))
			 (hash-table-keys *db-stats*)))
	 (res    (if (null? cmds)
		     (cons 'none 0)
		     (let loop ((cmd (car cmds))
				(tal (cdr cmds))
				(max-cmd (car cmds))
				(res 0))
		       (let* ((cmd-dat (hash-table-ref *db-stats* cmd))
			      (tot     (vector-ref cmd-dat 0))
			      (curravg (/ (vector-ref cmd-dat 1) (vector-ref cmd-dat 0))) ;; count is never zero by construction
			      (currmax (max res curravg))
			      (newmax-cmd (if (> curravg res) cmd max-cmd)))
			 (if (null? tal)
			     (if (> tot 10)
				 (cons newmax-cmd currmax)
				 (cons 'none 0))
			     (loop (car tal)(cdr tal) newmax-cmd currmax)))))))
    (mutex-unlock! *db-stats-mutex*)
    res))

;; host and port are used to ensure we are remove proper records
(define (rmt:server-shutdown host port)
  (let ((dbfile   (servdat-dbfile *db-serv-info*)))
    (debug:print-info 0 *default-log-port* "dbfile is "dbfile)
    (if dbfile
	(let* ((am-server  (args:get-arg "-server"))
	       (dbfile     (args:get-arg "-db"))
	       (apath      *toppath*)
	       #;(sinfo     *remotedat*)) ;; foundation for future fix
	  (if *dbstruct-db*
	      (let* ((dbdat      (db:get-dbdat *dbstruct-db* apath dbfile))
		     (db         (dbr:dbdat-db dbdat))
		     (inmem      (dbr:dbdat-db dbdat))   ;; WRONG
		     )
		;; do a final sync here
		(debug:print-info 0 *default-log-port* "Doing final sync for "apath" "dbfile" at "(current-seconds))
		(db:sync-inmem->disk *dbstruct-db* apath dbfile force-sync: #t)
		;; let's finalize here
		(debug:print-info 0 *default-log-port* "Finalizing db and inmem")
		(if (sqlite3:database? db)
		    (sqlite3:finalize! db)
		    (debug:print-info 0 *default-log-port* "in rmt:server-shutdown, db is not a database, not finalizing..."))
		(if (sqlite3:database? inmem)
		    (sqlite3:finalize! inmem)
		    (debug:print-info 0 *default-log-port* "in rmt:server-shutdown, inmem is not a database, not finalizing..."))
		(debug:print-info 0 *default-log-port* "Finalizing db and inmem complete"))
	      (debug:print-info 0 *default-log-port* "Db was never opened, no cleanup to do."))
	  (if (not am-server)
	      (debug:print-info 0 *default-log-port* "I am not a server, should NOT get here!")
	      (if (string-match ".*/main.db$" dbfile)
		  (let ((pkt-file (conc (get-pkts-dir *toppath*)
					"/" (servdat-uuid *db-serv-info*)
					".pkt")))
		    (debug:print-info 0 *default-log-port* "removing pkt "pkt-file)
		    (delete-file* pkt-file)
		    (debug:print-info 0 *default-log-port* "Releasing lock (if any) for "dbfile ", host "host", port "port)
		    (db:with-lock-db
		     (servdat-dbfile *db-serv-info*)
		     (lambda (dbh dbfile)
		       (db:release-lock dbh dbfile host port)))) ;; I'm not the server - should not have a lock to remove
		  (let* ((sdat *db-serv-info*) ;; we have a run-id server
			 (host (servdat-host sdat))
			 (port (servdat-port sdat))
			 (uuid (servdat-uuid sdat))
			 (res  (rmt:deregister-server *db-serv-info* *toppath* host port uuid dbfile)))
		    (debug:print-info 0 *default-log-port* "deregistered-server, res="res)
		    (debug:print-info 0 *default-log-port* "deregistering server "host":"port" with uuid "uuid)
		    )))))))


(define (common:run-sync?)
    ;; (and (common:on-homehost?)
  (args:get-arg "-server"))

(define *rmt:run-mutex* (make-mutex))
(define *rmt:run-flag* #f)

;; Main entry point to start a server. was start-server
(define (rmt:run hostn)
  (mutex-lock! *rmt:run-mutex*)
  (if *rmt:run-flag*
      (begin
	(debug:print-warn 0 *default-log-port* "rmt:run already running.")
	(mutex-unlock! *rmt:run-mutex*))
      (begin
	(set! *rmt:run-flag* #t)
	(mutex-unlock! *rmt:run-mutex*)
	;;  ;; Configurations for server
	;;  (tcp-buffer-size 2048)
	;;  (max-connections 2048) 
	(debug:print 2 *default-log-port* "PID: "(current-process-id)". Attempting to start the server ...")
	(if (and *db-serv-info*
		 (servdat-uconn *db-serv-info*))
	    (let* ((uconn (servdat-uconn *db-serv-info*)))
	      (wait-and-close uconn))
	    (let* ((port            (portlogger:open-run-close portlogger:find-port))
		   (handler-proc    (lambda (rem-host-port qrykey cmd params) ;;
				      (set! *db-last-access* (current-seconds))
				      (assert (list? params) "FATAL: handler called with non-list params")
				      (assert (args:get-arg "-server") "FATAL: handler called on non-server side. cmd="cmd", params="params)
				      (debug:print 0 *default-log-port* "handler call: "cmd", params="params)
				      (api:execute-requests *dbstruct-db* cmd params))))
	      ;; (api:process-request *dbstuct-db* 
	      (if (not *db-serv-info*)
		  (set! *db-serv-info* (make-servdat host: hostn port: port)))
	      (let* ((uconn (run-listener handler-proc port))
		     (rport (udat-port uconn))) ;; the real port
		(servdat-host-set! *db-serv-info* hostn)
		(servdat-port-set! *db-serv-info* rport)
		(servdat-uconn-set! *db-serv-info* uconn)
		(wait-and-close uconn)
		(db:print-current-query-stats)
		)))
	(let* ((host (servdat-host *db-serv-info*))
	       (port (servdat-port *db-serv-info*))
	       (mode (or (servdat-mode *db-serv-info*)
			 "non-db")))
	  ;; server exit stuff here
	  ;; (rmt:server-shutdown host port) - always do in on-exit
	  ;; (portlogger:open-run-close portlogger:set-port port "released") ;; moved to on-exit 
	  (debug:print-info 0 *default-log-port* "Server "host":"port" mode "mode"shutdown complete. Exiting")
	  ))))

;;======================================================================
;; S E R V E R   U T I L I T I E S 
;;======================================================================


;;======================================================================
;; NEW SERVER METHOD
;;======================================================================

;; only use for main.db - need to re-write some of this :(
;;
(define (get-lock-db sdat dbfile host port)
  (assert host "FATAL: get-lock-db called with host not set.")
  (assert port "FATAL: get-lock-db called with port not set.")
  (let* ((dbh (db:open-run-db dbfile db:initialize-db)) ;; open-run-db creates a standard db with schema used by all situations
	 (res (db:get-iam-server-lock dbh dbfile host port))
	 (uconn (servdat-uconn sdat)))
    ;; res => list then already locked, check server is responsive
    ;;     => #t then sucessfully got the lock
    ;;     => #f reserved for future use as to indicate something went wrong
    (match res
      ((owner_pid owner_host owner_port event_time)
       (if (server-ready? uconn (conc owner_host":"owner_port) "abc")
	   #f      ;; locked by someone else
	   (begin  ;; locked by someone dead and gone
	     (debug:print 0 *default-log-port* "WARNING: stale lock - have to steal it. This may fail.")
	     (db:steal-lock-db dbh dbfile port))))
      (#t  #t) ;; placeholder so that we don't touch res if it is #t
      (else (set! res #f)))
    (sqlite3:finalize! dbh)
    res))


(define (register-server pkts-dir pkt-spec host port servkey ipaddr dbpath)
  (let* ((pkt-dat `((host    . ,host)
		    (port    . ,port)
		    (servkey . ,servkey)
		    (pid     . ,(current-process-id))
		    (ipaddr  . ,ipaddr)
		    (dbpath  . ,dbpath)))
	 (uuid    (write-alist->pkt
		   pkts-dir
		   pkt-dat
		   pktspec: pkt-spec
		   ptype: 'server)))
    (debug:print 0 *default-log-port* "Server on "host":"port" registered in pkt "uuid)
    uuid))

(define (get-pkts-dir #!optional (apath #f))
  (let* ((effective-toppath (or *toppath* apath)))
    (assert effective-toppath
	    "ERROR: get-pkts-dir called without *toppath* set. Exiting.")
    (let* ((pdir (conc effective-toppath "/.meta/srvpkts")))
      (if (file-exists? pdir)
	  pdir
	  (begin
	    (handle-exceptions ;; this exception handler should NOT be needed but ...
		exn
		pdir
	      (create-directory pdir #t))
	    pdir)))))

;; given a pkts dir read 
;;
(define (get-all-server-pkts pktsdir-in pktspec)
  (let* ((pktsdir  (if (file-exists? pktsdir-in)
		       pktsdir-in
		       (begin
			 (create-directory pktsdir-in #t)
			 pktsdir-in)))
	 (all-pkt-files (glob (conc pktsdir "/*.pkt"))))
    (map (lambda (pkt-file)
	   (read-pkt->alist pkt-file pktspec: pktspec))
	 all-pkt-files)))

(define (server-address srv-pkt)
  (conc (alist-ref 'host srv-pkt) ":"
	(alist-ref 'port srv-pkt)))
	
(define (server-ready? uconn host-port key) ;; server-address is host:port
  (let* ((params `((cmd . ping)(key . ,key)))
	 (data `((cmd . ping)
		 (key . ,key)
		 (params . ,params))) ;; I don't get it.
	 (res  (send-receive uconn host-port 'ping data)))
    (if (eq? res 'ack) ;; yep, likely it is who we want on the other end
	res
	#f)))
;; (begin (debug:print-info 0 *default-log-port* "server-ready? => "res) #f))))

; from the pkts return servers associated with dbpath
;; NOTE: Only one can be alive - have to check on each
;;       in the list of pkts returned
;;
(define (get-viable-servers serv-pkts dbpath)
  (let loop ((tail serv-pkts)
	     (res  '()))
    (if (null? tail)
	res ;; NOTE: sort by age so oldest is considered first
	(let* ((spkt (car tail)))
	  (loop (cdr tail)
		(if (equal? dbpath (alist-ref 'dbpath spkt))
		    (cons spkt res)
		    res))))))

(define (remove-pkts-if-not-alive uconn serv-pkts)
  (filter (lambda (pkt)
	    (let* ((host (alist-ref 'host pkt))
		   (port (alist-ref 'port pkt))
		   (host-port (conc host":"port))
		   (key  (alist-ref 'servkey  pkt))
		   (pktz (alist-ref 'Z        pkt))
		   (res  (server-ready? uconn host-port key)))
	      (if res
		  res
		  (let* ((pktsdir (get-pkts-dir *toppath*))
			 (pktpath (conc pktsdir"/"pktz".pkt")))
		    (debug:print 0 *default-log-port* "WARNING: pkt with no server "pktpath)
		    (delete-file* pktpath)
		    #f))))
	  serv-pkts))

;; from viable servers get one that is alive and ready
;;
(define (get-the-server uconn apath serv-pkts)
  (let loop ((tail serv-pkts))
    (if (null? tail)
	#f
	(let* ((spkt  (car tail))
	       (host  (alist-ref 'ipaddr spkt))
	       (port  (alist-ref 'port spkt))
	       (host-port (conc host":"port))
	       (dbpth (alist-ref 'dbpath spkt))
	       (srvkey (alist-ref 'Z spkt)) ;; (alist-ref 'srvkey spkt))
	       (addr  (server-address spkt)))
	  (if (server-ready? uconn host-port srvkey)
	      spkt
	      (loop (cdr tail)))))))

;; am I the "first" in line server? I.e. my D card is smallest
;; use Z card as tie breaker
;;
(define (get-best-candidate serv-pkts dbpath)
  (if (null? serv-pkts)
      #f
      (let loop ((tail serv-pkts)
		 (best  (car serv-pkts)))
	(if (null? tail)
	    best
	    (let* ((candidate (car tail))
		   (candidate-bd (string->number (alist-ref 'D candidate)))
		   (best-bd      (string->number (alist-ref 'D best)))
		   ;; bigger number is younger
		   (candidate-z  (alist-ref 'Z candidate))
		   (best-z       (alist-ref 'Z best))
		   (new-best     (cond
				  ((> best-bd candidate-bd) ;; best is younger than candidate
				   candidate)
				  ((< best-bd candidate-bd) ;; candidate is younger than best
				   best)
				  (else
				   (if (string>=? best-z candidate-z)
				       best
				       candidate))))) ;; use Z card as tie breaker
	      (if (null? tail)
		  new-best
		  (loop (cdr tail) new-best)))))))
	  

;;======================================================================
;; END NEW SERVER METHOD
;;======================================================================

;; if .db/main.db check the pkts
;; 
(define (rmt:wait-for-server pkts-dir db-file server-key)
  (let* ((sdat *db-serv-info*))
    (let loop ((start-time (current-seconds))
	       (changed    #t)
	       (last-sdat  "not this"))
      (begin ;; let ((sdat #f))
	(thread-sleep! 0.01)
	(debug:print-info 0 *default-log-port* "Waiting for server alive signature")
	(mutex-lock! *heartbeat-mutex*)
	(set! sdat *db-serv-info*)
	(mutex-unlock! *heartbeat-mutex*)
	(if (and sdat
		 (not changed)
		 (> (- (current-seconds) start-time) 2))
	    (let* ((uconn (servdat-uconn sdat)))
	      (servdat-status-set! sdat 'iface-stable)
	      (debug:print-info 0 *default-log-port* "Received server alive signature, now attempting to lock in server")
	      ;; create a server pkt in *toppath*/.meta/srvpkts
	      
	      ;; TODO:
	      ;;   1. change sdat to stuct
	      ;;   2. add uuid to struct
	      ;;   3. update uuid in sdat here
	      ;;
	      (servdat-uuid-set! sdat
				 (register-server
				  pkts-dir *srvpktspec*
				  (get-host-name)
				  (servdat-port sdat) server-key
				  (servdat-host sdat) db-file))
	      ;; (set! *my-signature* (servdat-uuid sdat)) ;; replace with Z, no, stick with proper key
	      ;; now read pkts and see if we are a contender
	      (let* ((all-pkts     (get-all-server-pkts pkts-dir *srvpktspec*))
		     (viables      (get-viable-servers all-pkts db-file))
		     (alive        (remove-pkts-if-not-alive uconn viables))
		     (best-srv     (get-best-candidate alive db-file))
		     (best-srv-key (if best-srv (alist-ref 'servkey best-srv) #f))
		     (i-am-srv     (equal? best-srv-key server-key))
		     (delete-pkt   (lambda ()
				     (let* ((pktfile (conc (get-pkts-dir *toppath*)
							 "/" (servdat-uuid *db-serv-info*)
							 ".pkt")))
				       (debug:print-info 0 *default-log-port* "Attempting to remove bogus pkt file "pktfile)
				       (delete-file* pktfile))))) ;; remove immediately instead of waiting for on-exit
		(debug:print 0 *default-log-port* "best-srv-key: "best-srv-key", server-key: "server-key", i-am-srv: "i-am-srv)
		;; am I the best-srv, compare server-keys to know
		(if i-am-srv
		    (if (get-lock-db sdat db-file (servdat-host sdat)(servdat-port sdat)) ;; (db:get-iam-server-lock *dbstruct-db* *toppath* run-id)
			(begin
			  (debug:print-info 0 *default-log-port* "I'm the server!")
			  (servdat-dbfile-set! sdat db-file)
			  (servdat-status-set! sdat 'db-locked))
			(begin
			  (debug:print-info 0 *default-log-port* "I'm not the server, exiting.")
			  (bdat-time-to-exit-set! *bdat* #t)
			  (delete-pkt)
			  (thread-sleep! 0.2)
			  (exit)))
		    (begin
		      (debug:print-info 0 *default-log-port*
				   "Keys do not match "best-srv-key", "server-key", exiting.")
		      (bdat-time-to-exit-set! *bdat* #t)
		      (delete-pkt)
		      (thread-sleep! 0.2)
		      (exit)))
		sdat))
	    (begin ;; sdat not yet contains server info
	      (debug:print-info 0 *default-log-port* "Still waiting, last-sdat=" last-sdat)
	      (sleep 4)
	      (if (> (- (current-seconds) start-time) 120) ;; been waiting for two minutes
		  (begin
		    (debug:print-error 0 *default-log-port* "transport appears to have died, exiting server")
		    (exit))
		  (loop start-time
			(equal? sdat last-sdat)
			sdat))))))))

(define (rmt:register-server sinfo apath iface port server-key dbname)
  (servdat-conns sinfo) ;; just checking types
  (rmt:open-main-connection sinfo apath) ;; we need a channel to main.db
  (rmt:send-receive-real sinfo apath      ;; params: host port servkey pid ipaddr dbpath
			 (db:run-id->dbname #f)
			 'register-server `(,iface
					    ,port
					    ,server-key
					    ,(current-process-id)
					    ,iface
					    ,apath
					    ,dbname)))

(define (rmt:get-count-servers sinfo apath)
  (servdat-conns sinfo) ;; just checking types
  (rmt:open-main-connection sinfo apath) ;; we need a channel to main.db
  (rmt:send-receive-real sinfo apath      ;; params: host port servkey pid ipaddr dbpath
			 (db:run-id->dbname #f)
			 'get-count-servers `(,apath)))

(define (rmt:get-servers-info apath)
  (rmt:send-receive 'get-servers-info #f `(,apath)))

(define (rmt:deregister-server db-serv-info apath iface port server-key dbname)
  (rmt:open-main-connection db-serv-info apath) ;; we need a channel to main.db
  (rmt:send-receive-real db-serv-info apath      ;; params: host port servkey pid ipaddr dbpath
                         (db:run-id->dbname #f)
                         'deregister-server `(,iface
                                              ,port
                                              ,server-key
                                              ,(current-process-id)
                                              ,iface
                                              ,apath
                                              ,dbname)))

(define (rmt:wait-for-stable-interface #!optional (num-tries-allowed 100))
  ;; wait until *db-serv-info* stops changing
  (let* ((stime (current-seconds)))
    (let loop ((last-host  #f)
	       (last-port  #f)
	       (tries 0))
      (let* ((curr-host (and *db-serv-info* (servdat-host *db-serv-info*)))
	     (curr-port (and *db-serv-info* (servdat-port *db-serv-info*))))
	;; first we verify port and interface, update *db-serv-info* in need be.
	(cond
	 ((> tries num-tries-allowed)
	  (debug:print 0 *default-log-port* "rmt:keep-running, giving up after trying for several minutes.")
	  (exit 1))
	 ((not *db-serv-info*)
	  (thread-sleep! 0.25)
	  (loop curr-host curr-port (+ tries 1)))
	 ((or (not last-host)(not last-port))
	  (debug:print 0 *default-log-port* "rmt:keep-running, still no interface, tries="tries)
	  (thread-sleep! 0.25)
	  (loop curr-host curr-port (+ tries 1)))
	 ((or (not (equal? last-host curr-host))
	      (not (equal? last-port curr-port)))
	  (debug:print-info 0 *default-log-port* "WARNING: interface changed, refreshing iface and port info")
	  (thread-sleep! 0.25)
	  (loop curr-host curr-port (+ tries 1)))
	 ((< (- (current-seconds) stime) 1) ;; keep up the looping until at least 3 seconds have passed
	  (thread-sleep! 0.5)
	  (loop curr-host curr-port (+ tries 1)))
	 (else
	  (rmt:get-signature) ;; sets *my-signature* as side effect
	  (servdat-status-set! *db-serv-info* 'interface-stable)
	  (debug:print 0 *default-log-port*
		       "SERVER STARTED: " curr-host
		       ":" curr-port
		       " AT " (current-seconds) " server signature: " *my-signature*
		       " with "(servdat-trynum *db-serv-info*)" port changes")
	  (flush-output *default-log-port*)
	  #t))))))

;; run rmt:keep-running in a parallel thread to monitor that the db is being 
;; used and to shutdown after sometime if it is not.
;;
(define (rmt:keep-running dbname) 
  ;; if none running or if > 20 seconds since 
  ;; server last used then start shutdown
  ;; This thread waits for the server to come alive
  (debug:print-info 0 *default-log-port* "Starting the sync-back, keep alive thread in server")

  (let* ((sinfo             *db-serv-info*)
	 (server-start-time (current-seconds))
	 (pkts-dir          (get-pkts-dir))
	 (server-key        (rmt:get-signature)) ;; This servers key
	 (is-main           (equal? (args:get-arg "-db") ".db/main.db"))
	 (last-access       0)
	 (server-timeout    (server:expiration-timeout))
	 (shutdown-server-sequence (lambda (host port)
				     (set! *unclean-shutdown* #f) ;; Should not be needed anymore
				     (debug:print-info 0 *default-log-port* "Starting to shutdown the server. pid="(current-process-id))
				     ;; (rmt:server-shutdown host port) -- called in on-exit
				     ;; (portlogger:open-run-close portlogger:set-port port "released") called in on-exit
				     (exit)))
	 (timed-out?        (lambda ()
			      (<= (+ last-access server-timeout)
				  (current-seconds)))))
    (servdat-dbfile-set! *db-serv-info* (args:get-arg "-db"))
    ;; main and run db servers have both got wait logic (could/should merge it)
    (if is-main
	(rmt:wait-for-server pkts-dir dbname server-key)
	(rmt:wait-for-stable-interface))
    ;; this is our forever loop
    (let* ((iface (servdat-host *db-serv-info*))
	   (port  (servdat-port *db-serv-info*))
	   (uconn (servdat-uconn *db-serv-info*)))
      (let loop ((count          0)
		 (bad-sync-count 0)
		 (start-time     (current-milliseconds)))
	(if (and (not is-main)
		 (common:low-noise-print 60 "servdat-status"))
	    (debug:print-info 0 *default-log-port* "servdat-status is " (servdat-status *db-serv-info*)))

	(mutex-lock! *heartbeat-mutex*)
	;; set up the database handle
	(if (not *dbstruct-db*) ;; no db opened yet, open the db and register with main if appropriate
	      (let ((watchdog (bdat-watchdog *bdat*)))
		(debug:print 0 *default-log-port* "SERVER: dbprep")
		(db:setup dbname) ;; sets *dbstruct-db* as side effect
		(servdat-status-set! *db-serv-info* 'db-opened)
		;; IFF I'm not main, call into main and register self
		(if (not is-main)
		    (let ((res (rmt:register-server sinfo
						    *toppath* iface port
						    server-key dbname)))
		      (if res ;; we are the server
			  (servdat-status-set! *db-serv-info* 'have-interface-and-db)
			  ;; now check that the db locker is alive, clear it out if not
			  (let* ((serv-info (rmt:server-info *toppath* dbname)))
			    (match serv-info
				   ((host port servkey pid ipaddr apath dbpath)
				    (if (not (server-ready? uconn (conc host":"port) servkey))
					(begin
					  (debug:print-info 0 *default-log-port* "Server registered but not alive. Removing and trying again.")
					  (rmt:deregister-server sinfo apath host port servkey dbpath) ;; servkey pid ipaddr apath dbpath)
					  (loop (+ count 1) bad-sync-count start-time))))
				   (else
				    (debug:print 0 *default-log-port* "We are not the server for "dbname", exiting. Server info is: "serv-info)
				    (exit)))))))
		(debug:print 0 *default-log-port*
			     "SERVER: running, db "dbname" opened, megatest version: "
			   (common:get-full-version))
	      ;; start the watchdog

	      ;; is this really needed?
	      
	      #;(if watchdog
		  (if (not (member (thread-state watchdog)
				   '(ready running blocked
					   sleeping dead)))
		      (begin
			(debug:print-info 0 *default-log-port* "Starting watchdog thread (in state "(thread-state watchdog)")")
			(thread-start! watchdog))
		      (debug:print-info 0 *default-log-port* "Not starting watchdog thread (in state "(thread-state watchdog)")"))
		  (debug:print 0 *default-log-port* "ERROR: *watchdog* not setup, cannot start it."))
	      #;(loop (+ count 1) bad-sync-count start-time)
	      ))
	
	(db:sync-inmem->disk *dbstruct-db* *toppath* dbname force-sync: #t)
	
	(mutex-unlock! *heartbeat-mutex*)
	
	;; when things go wrong we don't want to be doing the various
	;; queries too often so we strive to run this stuff only every
	;; four seconds or so.
	(let* ((sync-time (- (current-milliseconds) start-time))
	       (rem-time  (quotient (- 4000 sync-time) 1000)))
	  (if (and (<= rem-time 4)
		   (>  rem-time 0))
	      (thread-sleep! rem-time)))
    
	;; Transfer *db-last-access* to last-access to use in checking that we are still alive
	(set! last-access *db-last-access*)
	
	(if (< count 1) ;; 3x3 = 9 secs aprox
	    (loop (+ count 1) bad-sync-count (current-milliseconds)))
	
	(if (common:low-noise-print 60 "dbstats")
	    (begin
	      (debug:print 0 *default-log-port* "Server stats:")
	      (db:print-current-query-stats)))
	(let* ((hrs-since-start  (/ (- (current-seconds) server-start-time) 3600)))
	  (cond
	   ((not *server-run*)
	    (debug:print-info 0 *default-log-port* "*server-run* set to #f. Shutting down.")
	    (shutdown-server-sequence (get-host-name) port))
	   ((timed-out?)
	    (debug:print-info 0 *default-log-port* "Server timed out. seconds since last db access: " (- (current-seconds) last-access))
	    (shutdown-server-sequence (get-host-name) port))
	   ((and *server-run*
		 (or (not (timed-out?))
		     (if is-main ;; do not exit if there are other servers (keep main open until all others gone)
			 (> (rmt:get-count-servers sinfo *toppath*) 1)
			 #f)))
	    (if (common:low-noise-print 120 "server continuing")
		(debug:print-info 0 *default-log-port* "Server continuing, seconds since last db access: " (- (current-seconds) last-access)))
	    (loop 0 bad-sync-count (current-milliseconds)))
	   (else
	    (set! *unclean-shutdown* #f)
	    (debug:print-info 0 *default-log-port* "Server timed out. seconds since last db access: " (- (current-seconds) last-access))
	    (shutdown-server-sequence (get-host-name) port)
	    #;(debug:print-info 0 *default-log-port* "Sending 'quit to server, received: "
			      (open-send-receive-nn (conc iface":"port)      ;; do this here and not in server-shutdown
						    (sexpr->string 'quit))))))))))

(define (rmt:get-reasonable-hostname)
  (let* ((inhost (or (args:get-arg "-server") "-")))
    (if (equal? inhost "-")
	(get-host-name)
	inhost)))

;; Call this to start the actual server
;;
;; all routes though here end in exit ...
;;
;; This is the point at which servers are started
;;
(define (rmt:server-launch dbname)
  (debug:print-info 0 *default-log-port* "Entered rmt:server-launch")
  (let* ((th2 (make-thread (lambda ()
			     (debug:print-info 0 *default-log-port* "Server run thread started")
			     (rmt:run (rmt:get-reasonable-hostname)))
			   "Server run"))
	 (th3 (make-thread (lambda ()
			     (debug:print-info 0 *default-log-port* "Server monitor thread started")
			     (if (args:get-arg "-server")
				 (rmt:keep-running dbname)))
			     "Keep running")))
    (thread-start! th2)
    (thread-sleep! 0.252) ;; give the server time to settle before starting the keep-running monitor.
    (thread-start! th3)
    (set! *didsomething* #t)
    (thread-join! th2)
    (thread-join! th3))
  #f)
	    
;;======================================================================
;;  S E R V E R   -  D I R E C T   C A L L S
;;======================================================================

(define (rmt:kill-server run-id)
  (rmt:send-receive 'kill-server #f (list run-id)))

(define (rmt:start-server run-id)
  (rmt:send-receive 'start-server #f (list run-id)))

(define (rmt:server-info apath dbname)
  (rmt:send-receive 'get-server-info #f (list apath dbname)))

;;======================================================================
;; Nanomsg transport
;;======================================================================

#;(define (is-port-in-use port-num)
  (let* ((ret #f))
    (let-values (((inp oup pid)
		  (process "netstat" (list  "-tulpn" ))))
      (let loop ((inl (read-line inp)))
        (if (not (eof-object? inl))
            (begin 
	      (if (string-search (regexp (conc ":" port-num)) inl)
		  (begin
					;(print "Output: "  inl)
		    (set! ret  #t))
		  (loop (read-line inp)))))))
    ret))

#;(define (open-nn-connection host-port)
  (let ((req  (make-req-socket))
        (uri  (conc "tcp://" host-port)))
    (nng-dial req uri)
    (socket-set! req 'nng/recvtimeo 2000)
    req))

#;(define (send-receive-nn req msg)
  (nng-send req msg)
  (nng-recv req))

#;(define (close-nn-connection req)
  (nng-close! req))
  
;; ;; open connection to server, send message, close connection
;; ;;
;; (define (open-send-close-nn host-port msg #!key (timeout 3) ) ;; default timeout is 3 seconds
;;   (let ((req  (make-req-socket 'req))
;;         (uri  (conc "tcp://" host-port))
;;         (res  #f)
;;         ;; (contacts (alist-ref 'contact attrib))
;;         ;; (mode (alist-ref 'mode attrib))
;; 	)
;;     (socket-set! req 'nng/recvtimeo 2000)
;;     (handle-exceptions
;;      exn
;;      (let ((emsg ((condition-property-accessor 'exn 'message) exn)))
;;        ;; Send notification       
;;        (debug:print 0 *default-log-port* "ERROR: Failed to connect / send to " uri " message was \"" emsg "\"" )
;;        #f)
;;      (nng-dial req uri)
;;      ;; (print "Connected to the server " )
;;      (nng-send req msg)
;;      ;; (print "Request Sent")  
;;      (let* ((th1  (make-thread (lambda ()
;;                                  (let ((resp (nng-recv req)))
;;                                    (nng-close! req)
;;                                    (set! res (if (equal? resp "ok")
;;                                                  #t
;;                                                  #f))))
;;                                "recv thread"))
;;             (th2 (make-thread (lambda ()
;;                                 (thread-sleep! timeout)
;;                                 (thread-terminate! th1))
;; 			      "timer thread")))
;;        (thread-start! th1)
;;        (thread-start! th2)
;;        (thread-join! th1)
;;        res))))
;; 
#;(define (open-send-receive-nn host-port msg #!key (timeout 3) ) ;; default timeout is 3 seconds
  (let ((req  (make-req-socket))
        (uri  (conc "tcp://" host-port))
        (res  #f)) 
    (handle-exceptions
     exn
     (let ((emsg ((condition-property-accessor 'exn 'message) exn)))
       ;; Send notification      
       (debug:print 0 *default-log-port* "ERROR: Failed to connect / send to " uri " message was \"" emsg "\", exn=" exn)
       #f)
     (nng-dial req uri)
     (nng-send req msg)
     (let* ((th1  (make-thread (lambda ()
                                 (let ((resp (nng-recv req)))
                                   (nng-close! req)
                                   ;; (print resp)
                                   (set! res resp)))
                               "recv thread"))
            (th2 (make-thread (lambda ()
                                (thread-sleep! timeout)
                                (thread-terminate! th1))
                             "timer thread")))
       (thread-start! th1)
       (thread-start! th2)
       (thread-join! th1)
       res))))

;;======================================================================
;; S E R V E R   U T I L I T I E S 
;;======================================================================

;; run ping in separate process, safest way in some cases
;;
#;(define (server:ping-server ifaceport)
  (with-input-from-pipe 
   (conc (common:get-megatest-exe) " -ping " ifaceport)
   (lambda ()
     (let loop ((inl (read-line))
		(res "NOREPLY"))
       (if (eof-object? inl)
	   (case (string->symbol res)
	     ((NOREPLY)  #f)
	     ((LOGIN_OK) #t)
	     (else       #f))
	   (loop (read-line) inl))))))

;; NOT USED (well, ok, reference in rpc-transport but otherwise not used).
;;
#;(define (server:login toppath)
  (lambda (toppath)
    (set! *db-last-access* (current-seconds)) ;; might not be needed.
    (if (equal? *toppath* toppath)
	#t
	#f)))

;; (define server:sync-lock-token "SERVER_SYNC_LOCK")
;; (define (server:release-sync-lock)
;;   (db:no-sync-del! *no-sync-db* server:sync-lock-token))
;; (define (server:have-sync-lock?)
;;   (let* ((have-lock-pair (db:no-sync-get-lock *no-sync-db* server:sync-lock-token))
;;          (have-lock?     (car have-lock-pair))
;;          (lock-time      (cdr have-lock-pair))
;;          (lock-age       (- (current-seconds) lock-time)))
;;     (cond
;;      (have-lock? #t)
;;      ((>lock-age
;;        (* 3 (configf:lookup-number *configdat* "server" "minimum-intersync-delay" default: 180)))
;;       (server:release-sync-lock)
;;       (server:have-sync-lock?))
;;      (else #f))))

)

Modified ulex/ulex.scm from [42b648b50c] to [c73b8e0289].

1
2
3

4
5
6
7
8
9
10
1
2

3
4
5
6
7
8
9
10


-
+







;; ulex: Distributed sqlite3 db
;;;
;; Copyright (C) 2018 Matt Welland
;; Copyright (C) 2018-2021 Matt Welland
;; Redistribution and use in source and binary forms, with or without
;; modification, is permitted.
;;
;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
;; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
;; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
;; ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE
21
22
23
24
25
26
27
28
29
30
31







32


33
34
35
36
37
38
39
40
41
42
43



















44
45
46





47
48
49
50
51










52
53
54

55
56
57












58
59
60
61
62







63
64
65


66
67
68
69
70
71
72
73











74
75
76
77
78
79
80
81















82
83
84
85
86
87













88
89
90
91
92
93





94
95
96
97





98
99
100

101
102




103
104
105
106
107
108







109
110
111
112
113







114
115
116
117
118
119









120






121















122



123
124








125

126

127


128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159


160
161

162
163
164
165
166
167
168












169
170
171
172
173

















174
175
176
177
178
179
180
181
182
183
184

























185














186


187
188
189







190


191
192
193
194
195
196








197
198
199



200
201
202
203
204
205
206














207
208
209
210






211
212
213
214
215
216







217
218
219






220
221

222
223
224
225
226
227



























228
229
230
231
232
233
234













235
236
237
238











239
240


241
242
243
244
245
246
247

248
249
250

251
252
253
254
255
256
257

258
259
260
261
262
263
264
265
266
267

















268
269

270
271
272
273
274










275
276
277
278
279
280
281
282
283
284
285
286
287
288
289











290
291
292
293
294
295
296







297
298
299
300
301
302
303
304
305
306





















307
308
309
310
311




312
313
314
315
316
317
318











319
320
321








322
323



324
325
326
327
328
329
330
331








332
333
334
335
336




337
338
339
340
341
342
343
344



















345
346
347


348
349
350
351
352
353
354
355
356
357
358
359

360
361
362

363
364
365
366
367
368
369
370
371
372

373
374
375
376
377
378
379
380
381
382
383
384
385
386
387

388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842

843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862

863
864
865
866

867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785

1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252

21
22
23
24
25
26
27




28
29
30
31
32
33
34
35
36
37











38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56



57
58
59
60
61





62
63
64
65
66
67
68
69
70
71



72



73
74
75
76
77
78
79
80
81
82
83
84





85
86
87
88
89
90
91
92


93
94








95
96
97
98
99
100
101
102
103
104
105








106
107
108
109
110
111
112
113
114
115
116
117
118
119
120






121
122
123
124
125
126
127
128
129
130
131
132
133






134
135
136
137
138




139
140
141
142
143

144

145
146
147
148
149
150
151






152
153
154
155
156
157
158
159




160
161
162
163
164
165
166


167



168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183

184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202


203
204
205
206
207
208
209
210
211
212

213

214
215
216































217
218


219

220





221
222
223
224
225
226
227
228
229
230
231
232





233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249











250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274

275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291



292
293
294
295
296
297
298

299
300
301





302
303
304
305
306
307
308
309



310
311
312







313
314
315
316
317
318
319
320
321
322
323
324
325
326




327
328
329
330
331
332






333
334
335
336
337
338
339



340
341
342
343
344
345


346






347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374






375
376
377
378
379
380
381
382
383
384
385
386
387




388
389
390
391
392
393
394
395
396
397
398


399
400
401






402



403
404
405





406
407
408








409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425


426
427




428
429
430
431
432
433
434
435
436
437















438
439
440
441
442
443
444
445
446
447
448







449
450
451
452
453
454
455










456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476





477
478
479
480







481
482
483
484
485
486
487
488
489
490
491



492
493
494
495
496
497
498
499


500
501
502








503
504
505
506
507
508
509
510





511
512
513
514








515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549

550

551

552


553
554
555
556
557

558

559






560
561
562
563
564




565







































































































































































































































































































































































































































































566




















567




568























































































































































































































































































































































































































































































































































































































































































































































































































































































































































569



















































































































































































































































































































































































































































































570







-
-
-
-
+
+
+
+
+
+
+

+
+
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
-
-
-
+
+
+
+
+
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
-
-
-
+
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
-
-
-
-
-
+
+
+
+
+
+
+

-
-
+
+
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
-
-
-
-
-
-
+
+
+
+
+
-
-
-
-
+
+
+
+
+
-

-
+


+
+
+
+
-
-
-
-
-
-
+
+
+
+
+
+
+

-
-
-
-
+
+
+
+
+
+
+
-
-

-
-
-
+
+
+
+
+
+
+
+
+

+
+
+
+
+
+
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+

+
+
+
-
-
+
+
+
+
+
+
+
+

+
-
+
-
+
+

-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
-
-
+
-

-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+

+
+
-
-
-
+
+
+
+
+
+
+
-
+
+

-
-
-
-
-
+
+
+
+
+
+
+
+
-
-
-
+
+
+
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
-
-
-
-
+
+
+
+
+
+
-
-
-
-
-
-
+
+
+
+
+
+
+
-
-
-
+
+
+
+
+
+
-
-
+
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+

-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
-
-
+
+

-
-
-
-
-
-
+
-
-
-
+


-
-
-
-
-
+


-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
-
-
+

-
-
-
-
+
+
+
+
+
+
+
+
+
+
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
-
-
-
-
-
-
-
+
+
+
+
+
+
+
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
-
-
-
-
-
+
+
+
+
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
-
-
-
+
+
+
+
+
+
+
+
-
-
+
+
+
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
-
-
-
-
-
+
+
+
+
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+



+
+











-
+
-

-
+
-
-





-

-
+
-
-
-
-
-
-





-
-
-
-
+
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
-
-
-
-
+
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
;; ABOUT:
;;   See README in the distribution at https://www.kiatoa.com/fossils/ulex
;; NOTES:
;;   Why sql-de-lite and not say, dbi?  - performance mostly, then simplicity.
;;
;;======================================================================

(use mailbox)

(module ulex
 *
(module ulex
	*
	#;(
     
     ;; NOTE: looking for the handler proc - find the run-listener :)
     
     run-listener     ;; (run-listener handler-proc [port]) => uconn

     ;; NOTE: handler-proc params;
     ;;       (handler-proc rem-host-port qrykey cmd params)
(import scheme posix chicken data-structures ports extras files mailbox)
(import srfi-18 pkts matchable regex
	typed-records srfi-69 srfi-1
	srfi-4 regex-case
	(prefix sqlite3 sqlite3:)
	foreign
	tcp6
	;; ulex-netutil
	hostinfo
	)


     send-receive     ;; (send-receive uconn host-port cmd data)

     ;; NOTE: cmd can be any plain text symbol except for these;
     ;;         'ping 'ack 'goodbye 'response
     
     set-work-handler ;; (set-work-handler proc)

     wait-and-close   ;; (wait-and-close uconn)

     ulex-listener?
     
     ;; needed to get the interface:port that was automatically found
     udat-port
     udat-host-port
     
     ;; for testing only
     ;; pp-uconn
     
;; make it a global? Well, it is local to area module

(define *captain-pktspec*
     ;; parameters
     work-method   ;; parameter; 'threads, 'mailbox, 'limited, 'direct
     return-method ;; parameter; 'mailbox, 'polling, 'direct
     )

  `((captain (host     . h)
	     (port     . p)
	     (pid      . i)
	     (ipaddr   . a)
	     )
(import scheme
	chicken.base
	chicken.file
	chicken.io
	chicken.time
	chicken.condition
	chicken.string
	chicken.sort
	chicken.pretty-print
	
    #;(data   (hostname . h)  ;; sender hostname
	    (port     . p)  ;; sender port
	    (ipaddr   . a)  ;; sender ip
	address-info
	    (hostkey  . k)  ;; sending host key - store info at server under this key
	    (servkey  . s)  ;; server key - this needs to match at server end or reject the msg
	    (format   . f)  ;; sb=serialized-base64, t=text, sx=sexpr, j=json
	mailbox
	matchable
	;; queues
	regex
	regex-case
	simple-exceptions
	s11n
	srfi-1
	srfi-18
	srfi-4
	srfi-69
	system-information
	    (data     . d)  ;; base64 encoded slln data
	    )))

;; struct for keeping track of our world

	tcp6
	typed-records
	)

;; udat struct, used by both caller and callee
;; instantiated as uconn by convention
;;
(defstruct udat
  ;; captain info
  (captain-address #f)
  ;; the listener side
  (port #f)
  (captain-host    #f)
  (captain-port    #f)
  (captain-pid     #f)
  (captain-lease   0)    ;; time (unix epoc) seconds when the lease is up
  (ulex-dir        (conc (get-environment-variable "HOME") "/.ulex"))
  (cpkts-dir       (conc (get-environment-variable "HOME") "/.ulex/pkts"))
  (cpkt-spec       *captain-pktspec*)
  ;; this processes info
  (host-port #f)
  (socket #f)
  ;; the peers
  (peers  (make-hash-table)) ;; host:port->peer
  ;; work handling
  (work-queue (make-mailbox))
  (work-proc  #f)                ;; set by user
  (cnum       0)                 ;; cookie number
  (mboxes     (make-hash-table)) ;; for the replies
  (avail-cmboxes '())            ;; list of (<cookie> . <mbox>) for re-use
  ;; threads
  (my-cpkt-key     #f)   ;; put Z card here when I create a pkt for myself as captain
  (my-address      #f)
  (my-hostname     #f)
  (my-port         #f)
  (my-pid          (current-process-id))
  (my-dbs          '())
  ;; server and handler thread
  (serv-listener   #f)                 ;; this processes server info
  (numthreads 10)
  (cmd-thread #f)
  (work-queue-thread #f)
  (num-threads-running 0)
  ) 

;; Parameters

;; work-method:
(define work-method (make-parameter 'mailbox))
;;    mailbox - all rdat goes through mailbox
;;    threads - all rdat immediately executed in new thread
;;    direct  - no queuing
;;

  (handler-thread  #f)
  (mboxes          (make-hash-table))  ;; key => mbox
  ;; other servers
  (peers           (make-hash-table))  ;; host-port => peer record
  (dbowners        (make-hash-table))  ;; dbfile => host-port
  (handlers        (make-hash-table))  ;; dbfile => proc
;; return-method, return the result to waiting send-receive:
(define return-method (make-parameter 'mailbox))
;;    mailbox - create a mailbox and use it for passing returning results to send-receive
;;    polling - put the result in a hash table keyed by qrykey and send-receive can poll it for result
;;    direct  - no queuing, result is passed back in single tcp connection
;;

;; ;; struct for keeping track of others we are talking to
;; ;;
;; (defstruct pdat
;;   (host-port  #f)
;;   (conns      '()) ;; list of pcon structs, pop one off when calling the peer
;;   )
  ;; (outgoing-conns  (make-hash-table))  ;; host:port -> conn
  (work-queue      (make-queue))       ;; most stuff goes here
  ;; (fast-queue      (make-queue))       ;; super quick stuff goes here (e.g. ping)
  (busy            #f)                 ;; is either of the queues busy, use to switch between queuing tasks or doing immediately
  ;; app info
  (appname         #f)
;; 
;; ;; struct for peer connections, keep track of expiration etc.
;; ;;
;; (defstruct pcon
;;   (inp #f)
  (dbtypes         (make-hash-table))  ;; this should be an alist but hash is easier. dbtype => [ initproc syncproc ]
  ;; cookies
  (cnum            0) ;; cookie num
  )
;;   (oup #f)
;;   (exp (+ (current-seconds) 59)) ;; expires at this time, set to (+ (current-seconds) 59)
;;   (lifetime (+ (current-seconds) 600)) ;; throw away and create new after five minutes
;;   )


;;======================================================================
;; NEW APPROACH
;; listener
;;======================================================================

;; is uconn a ulex connector (listener)
;;
(define (ulex-listener? uconn)
  (udat? uconn))
;;  start-server-find-port  ;; gotta have a server port ready from the very begining

;; udata    - all the connection info, captain, server, ulex db etc. MUST BE PASSED IN
;; dbpath   - full path and filename of the db to talk to or a symbol naming the db?
;; callname - the remote call to execute
;; params   - parameters to pass to the remote call

;; create a tcp listener and return a populated udat struct with
;; my port, address, hostname, pid etc.
;; return #f if fail to find a port to allocate.
;;
;;  if udata-in is #f create the record
;;  if there is already a serv-listener return the udata
;;
(define (remote-call udata dbpath dbtype callname . params)
  (start-server-find-port udata) ;; ensure we have a local server
  (find-or-setup-captain udata)
  ;; look at connect, process-request, send, send-receive
(define (setup-listener uconn #!optional (port 4242))
  (handle-exceptions
   exn
   (if (< port 65535)
       (setup-listener uconn (+ port 1))
       #f)
   (connect-listener uconn port)))
  (let-values (((cookie-key host-port)(get-db-owner udata dbpath dbtype)))
    (send-receive udata host-port callname cookie-key params)))

;;======================================================================
;; KEY FUNCTIONS - THESE ARE TOO BE EXPOSED AND USED
;;======================================================================
(define (connect-listener uconn port)
  ;; (tcp-listener-socket LISTENER)(socket-name so)
  ;; sockaddr-address, sockaddr-port, sockaddr->string
  (let* ((tlsn (tcp-listen port 1000 #f)) ;; (tcp-listen TCPPORT [BACKLOG [HOST]])
	 (addr (get-my-best-address))) ;; (hostinfo-addresses (host-information (current-hostname)))
    (udat-port-set!      uconn port)
    (udat-host-port-set! uconn (conc addr":"port))
    (udat-socket-set!    uconn tlsn)
    uconn))

;; run-listener does all the work of starting a listener in a thread
;; it then returns control
;;
(define (run-listener handler-proc #!optional (port-suggestion 4242))
  (let* ((uconn (make-udat)))
    (udat-work-proc-set! uconn handler-proc)
;; connection setup and management functions
    (if (setup-listener uconn port-suggestion)
	(let* ((th1 (make-thread (lambda ()(ulex-cmd-loop uconn)) "Ulex command loop"))
	       (th2 (make-thread (lambda ()
				   (case (work-method)
				     ((mailbox limited)
				      (process-work-queue uconn))))
				 "Ulex work queue processor")))
	  ;; (tcp-buffer-size 2048)
	  (thread-start! th1)
	  (thread-start! th2)
	  (udat-cmd-thread-set! uconn th1)
	  (udat-work-queue-thread-set! uconn th2)
	  (print "cmd loop and process workers started, listening on "(udat-host-port uconn)".")
	  uconn)
	(assert #f "ERROR: run-listener called without proper setup."))))

(define (wait-and-close uconn)
  (thread-join! (udat-cmd-thread uconn))
  (tcp-close (udat-socket uconn)))
;; This is the basic setup command. Must always be
;; called before connecting to a db using connect.

;;======================================================================
;; peers and connections
;;======================================================================

(define *send-mutex* (make-mutex))

;; send structured data to recipient
;;
;;  NOTE: qrykey is what was called the "cookie" previously
;; find or become the captain
;;
;; setup and return a ulex object
;;     retval tells send to expect and wait for return data (one line) and return it or time out
;;       this is for ping where we don't want to necessarily have set up our own server yet.
;;
(define (find-or-setup-captain udata)
  ;; see if we already have a captain and if the lease is ok
  (if (and (udat-captain-address udata)
	   (udat-captain-port    udata)
	   (< (current-seconds) (udat-captain-lease udata)))
      udata
      (let* ((cpkts (get-all-captain-pkts udata)) ;; read captain pkts
	     (captn (get-winning-pkt cpkts)))
	(if captn
	    (let* ((port   (alist-ref 'port   captn))
		   (host   (alist-ref 'host   captn))
		   (ipaddr (alist-ref 'ipaddr captn))
		   (pid    (alist-ref 'pid    captn))
		   (Z      (alist-ref 'Z      captn)))
	      (udat-captain-address-set! udata ipaddr)
	      (udat-captain-host-set!    udata host)
	      (udat-captain-port-set!    udata port)
	      (udat-captain-pid-set!     udata pid)
	      (udat-captain-lease-set!   udata (+ (current-seconds) 10))
	      (let-values (((success pingtime)(ping udata (conc ipaddr ":" port))))
		(if success
		    udata
		    (begin
		      (print "Found unreachable captain at " ipaddr ":" port ", removing pkt")
		      (remove-captain-pkt udata captn)
		      (find-or-setup-captain udata))))
	      (begin
		(setup-as-captain udata)  ;; this saves the thread to captain-thread and starts the thread
		(find-or-setup-captain udata)))))))

;; connect to a specific dbfile
;; NOTE: see below for beginnings of code to allow re-use of tcp connections
;;        - I believe (without substantial evidence) that re-using connections will
;;   - if already connected - return the dbowner host-port
;;   - ask the captain who to talk to for this db
;;          be beneficial ...
;;   - put the entry in the dbowners hash as dbfile => host-port
;;
(define (connect udata dbfname dbtype)
  (or (hash-table-ref/default (udat-dbowners udata) dbfname #f)
      (let-values (((success dbowner-host-port)(get-db-owner udata dbfname dbtype)))
	(if success
	    (begin
(define (send udata host-port qrykey cmd params)
  (let* ((my-host-port (udat-host-port udata))          ;; remote will return to this
	 (isme         #f #;(equal? host-port my-host-port)) ;; calling myself?
	 ;; dat is a self-contained work block that can be sent or handled locally
	 (dat          (list my-host-port qrykey cmd params #;(cons (current-seconds)(current-milliseconds)))))
    (cond
     (isme (ulex-handler udata dat)) ;; no transmission needed
     (else
      (handle-exceptions ;; TODO - MAKE THIS EXCEPTION CMD SPECIFIC?
	  exn
	  (message exn)
	(begin
	      ;; just clobber the record, this is the new data no matter what
	      (hash-table-set! (udat-dbowners udata) dbfname dbowner-host-port)
	      dbowner-host-port)
	    #f))))

	  ;; (mutex-lock! *send-mutex*) ;; DOESN'T SEEM TO HELP
	  (let-values (((inp oup)(tcp-connect host-port)))
	    (let ((res (if (and inp oup)
			   (begin
			     (serialize dat oup)
			     (close-output-port oup)
			     (deserialize inp)
			     )
			   (begin
			     (print "ERROR: send called but no receiver has been setup. Please call setup first!")
			     #f))))
	      (close-input-port inp)
	      ;; (mutex-unlock! *send-mutex*) ;; DOESN'T SEEM TO HELP
	      res)))))))) ;; res will always be 'ack unless return-method is direct

(define (send-via-polling uconn host-port cmd data)
  (let* ((qrykey (make-cookie uconn))
;; returns: success pingtime
;;
;; NOTE: causes the callee to store the info on this host along with the dbs this host currently owns
;;
(define (ping udata host-port)
  (let* ((start  (current-milliseconds))
	 (cookie (make-cookie udata))
	 (dbs    (udat-my-dbs udata))
	 (msg    (string-intersperse dbs " "))
	 (res (send udata host-port 'ping cookie msg retval: #t))
	 (delta (- (current-milliseconds) start)))
	 (sres   (send uconn host-port qrykey cmd data)))
    (case sres
      ((ack)
       (let loop ((start-time (current-milliseconds)))
	 (if (> (current-milliseconds)(+ start-time 10000)) ;; ten seconds timeout
	     (begin
	       (print "ULEX ERROR: timed out waiting for response from "host-port", "cmd" "data)
	       #f)
	     (let* ((result (hash-table-ref/default (udat-mboxes uconn) qrykey #f))) ;; NOTE: we are re-using mboxes hash
	       (if result ;; result is '(status . result-data) or #f for nothing yet
		   (begin
		     (hash-table-delete! (udat-mboxes uconn) qrykey)
		     (cdr result))
		   (begin
		     (thread-sleep! 0.01)
		     (loop start-time)))))))
      (else
       (print "ULEX ERROR: Communication failed? sres="sres)
       #f))))

(define (send-via-mailbox uconn host-port cmd data)
  (let* ((cmbox     (get-cmbox uconn)) ;; would it be better to keep a stack of mboxes to reuse?
	 (qrykey    (car cmbox))
	 (mbox      (cdr cmbox))
	 (mbox-time (current-milliseconds))
    (values (equal? res cookie) delta)))
	 (sres      (send uconn host-port qrykey cmd data))) ;; short res
    (if (eq? sres 'ack) ;; BUG: change to be less than server:expiration-timeout?
	(let* ((mbox-timeout-secs    120 #;(if (eq? 'primordial (thread-name (current-thread)))
				     #f
				     120)) ;; timeout)
	       (mbox-timeout-result 'MBOX_TIMEOUT)
	       (res                  (mailbox-receive! mbox mbox-timeout-secs mbox-timeout-result))
	       (mbox-receive-time    (current-milliseconds)))
	  ;; (put-cmbox uconn cmbox) ;; reuse mbox and cookie. is it worth it?
	  (hash-table-delete! (udat-mboxes uconn) qrykey)
	  (if (eq? res 'MBOX_TIMEOUT)
	      (begin
		(print "WARNING: mbox timed out for query "cmd", with data "data
		       ", waiting for response from "host-port".")

		;; here it might make sense to clean up connection records and force clean start?
		;; NO. The progam using ulex needs to do the reset. Right thing here is exception
;; returns: success pingtime
;;
;; NOTE: causes all references to this worker to be wiped out in the
		
		#f)  ;; convert to raising exception?
	      res))
	(begin
	  (print "ERROR: Communication failed? Got "sres)
	  #f))))
  
;; callee (ususally the captain)
;; send a request to the given host-port and register a mailbox in udata
;; wait for the mailbox data and return it
;;
(define (goodbye-ping udata host-port)
  (let* ((start  (current-milliseconds))
	 (cookie (make-cookie udata))
	 (dbs    (udat-my-dbs udata))
	 (res (send udata host-port 'goodbye cookie "nomsg" retval: #t))
(define (send-receive uconn host-port cmd data)
  (let* ((start-time (current-milliseconds))
	 (result     (cond
		      ((member cmd '(ping goodbye)) ;; these are immediate
		       (send uconn host-port 'ping cmd data))
		      ((eq? (work-method) 'direct)
		       ;; the result from send will be the actual result, not an 'ack
		       (send uconn host-port 'direct cmd data))
	 (delta (- (current-milliseconds) start)))
    (values (equal? res cookie) delta)))

		      (else
		       (case (return-method)
			 ((polling)
(define (goodbye-captain udata)
  (let* ((host-port (udat-captain-host-port udata)))
    (if host-port
	(goodbye-ping udata host-port)
	(values #f -1))))

(define (get-db-owner udata dbname dbtype)
			  (send-via-polling uconn host-port cmd data))
			 ((mailbox) 
			  (send-via-mailbox uconn host-port cmd data))
			 (else
			  (print "ULEX ERROR: unrecognised return-method "(return-method)".")
			  #f)))))
	 (duration    (- (current-milliseconds) start-time)))
    ;; this is ONLY for development and debugging. It will be removed once Ulex is stable.
    (if (< 5000 duration)
	(print "ULEX WARNING: round-trip took "(inexact->exact (round (/ duration 1000)))
	       " seconds; "cmd", host-port="host-port", data="data))
    result))
    

  (let* ((host-port (udat-captain-host-port udata)))
    (if host-port
	(let* ((cookie (make-cookie udata))
	       (msg    #f) ;; (conc dbname " " dbtype))
;;======================================================================
;; responder side
;;======================================================================

;; take a request, rdat, and if not immediate put it in the work queue
;;
	       (params `(,dbname ,dbtype))
	       (res    (send udata host-port 'db-owner cookie msg
			     params: params retval: #t)))
	  (match (string-split res)
	    ((retcookie owner-host-port)
	     (values (equal? retcookie cookie) owner-host-port))))
;; Reserved cmds; ack ping goodbye response
;;
(define (ulex-handler uconn rdat)
  (assert (list? rdat) "FATAL: ulex-handler give rdat as not list")
  (match rdat ;;  (string-split controldat)
    ((rem-host-port qrykey cmd params);; timedata)
     ;; (print "ulex-handler got: "rem-host-port" qrykey: "qrykey" cmd: "cmd" params: "params)
	(values #f -1))))

;; called in ulex-handler to dispatch work, called on the workers side
     (case cmd
       ;; ((ack )(print "Got ack! But why? Should NOT get here.") 'ack)
       ((ping)
	;; (print "Got Ping!")
	;; (add-to-work-queue uconn rdat)
	'ack)
;;     calls (proc params data)
;;     returns result with cookie
       ((goodbye)
;;
;; pdat is the info of the caller, used to send the result data
;; prockey is key into udat-handlers hash dereferencing a proc
;; procparam is a first param handed to proc - often to do further derefrencing
;; NOTE: params is intended to be a list of strings, encoding on data
;;       is up to the user but data must be a single line
	;; just clear out references to the caller. NOT COMPLETE
	(add-to-work-queue uconn rdat)
	'ack)
       ((response) ;; this is a result from remote processing, send it as mail ...
	(case (return-method)
	  ((polling)
	   (hash-table-set! (udat-mboxes uconn) qrykey (cons 'ok params))
	   'ack)
	  ((mailbox)
	   (let ((mbox (hash-table-ref/default (udat-mboxes uconn) qrykey #f)))
	     (if mbox
		 (begin
		   (mailbox-send! mbox params) ;; params here is our result
		   'ack)
		 (begin
		   (print "ERROR: received result but no associated mbox for cookie "qrykey)
		   'no-mbox-found))))
	  (else (print "ULEX ERROR: unrecognised return-method "(return-method))
		'bad-return-method)))
       (else ;; generic request - hand it to the work queue
	(add-to-work-queue uconn rdat)
	'ack)))
    (else
     (print "ULEX ERROR: bad rdat "rdat)
     'bad-rdat)))

;; given an already set up uconn start the cmd-loop
;;
(define (process-request udata pdat dbname cookie prockey procparam data)
  (let* ((dbrec (ulex-open-db udata dbname))     ;; this will be a dbconn record, looks for in udata first
	 (proc  (hash-table-ref udata prockey)))
    (let* ((result (proc dbrec procparam data)))
      result)))

(define (ulex-cmd-loop uconn)
  (let* ((serv-listener (udat-socket uconn))
	 (listener      (lambda ()
			  (let loop ((state 'start))
			    (let-values (((inp oup)(tcp-accept serv-listener)))
			      ;; (mutex-lock! *send-mutex*) ;; DOESN'T SEEM TO HELP
			      (let* ((rdat  (deserialize inp)) ;; '(my-host-port qrykey cmd params)
				     (resp  (ulex-handler uconn rdat)))
				(serialize resp oup)
				(close-input-port inp)
				(close-output-port oup)
				;; (mutex-unlock! *send-mutex*) ;; DOESN'T SEEM TO HELP
				)
;; remote-request - send to remote to process in process-request
;; uconn comes from a call to connect and can be used instead of calling connect again
;; uconn is the host-port to call
;; we send dbname to the worker so they know which file to open
			      (loop state))))))
    ;; start N of them
    (let loop ((thnum   0)
	       (threads '()))
      (if (< thnum 100)
	  (let* ((th (make-thread listener (conc "listener" thnum))))
	    (thread-start! th)
	    (loop (+ thnum 1)
		  (cons th threads)))
	  (map thread-join! threads)))))

;; data must be a string with no newlines, it will be handed to the proc
;; at the remote site unchanged. It is up to the user to encode/decode it's contents
;; add a proc to the cmd list, these are done symetrically (i.e. in all instances)
;; so that the proc can be dereferenced remotely
;;
;;   rtype: immediate, read-only, normal, low-priority
;; 
(define (remote-request udata uconn rtype dbname prockey procparam data)
  (let* ((cookie    (make-cookie udata)))
    (send-receive udata uconn rtype cookie data `(,prockey procparam))))

(define (set-work-handler uconn proc)
(define (ulex-open-db udata dbname)
  #f)

  (udat-work-proc-set! uconn proc))

;;======================================================================
;; Ulex db
;;
;;   - track who is captain, lease expire time
;;   - track who owns what db, lease
;;
;; work queues - this is all happening on the listener side
;;======================================================================

;;
;;
(define (ulex-dbfname)
  (let ((dbdir (conc (get-environment-variable "HOME") "/.ulex")))
    (if (not (file-exists? dbdir))
	(create-directory dbdir #t))
    (conc dbdir "/network.db")))
	 
;; rdat is (rem-host-port qrykey cmd params)
					     
(define (add-to-work-queue uconn rdat)
  #;(queue-add! (udat-work-queue uconn) rdat)
  (case (work-method)
    ((threads)
     (thread-start! (make-thread (lambda ()
				   (do-work uconn rdat))
				 "worker thread")))
    ((mailbox)
     (mailbox-send! (udat-work-queue uconn) rdat))
    ((direct)
     (do-work uconn rdat))
    (else
     (print "ULEX ERROR: work-method "(work-method)" not recognised, using mailbox.")
     (mailbox-send! (udat-work-queue uconn) rdat))))
     
;; always goes in ~/.ulex/network.db
;; role is captain, adjutant, node
;; move the logic to return the result somewhere else?
;;
(define (ulexdb-setup)
  (let* ((dbfname (ulex-dbfname))
	 (have-db (file-exists? dbfname))
	 (db      (sqlite3:open-database dbfname)))
(define (do-work uconn rdat)
  (let* ((proc (udat-work-proc uconn))) ;; get it each time - conceivebly it could change
    ;; put this following into a do-work procedure
    (match rdat
      ((rem-host-port qrykey cmd params)
       (let* ((start-time (current-milliseconds))
	      (result (proc rem-host-port qrykey cmd params))
	      (end-time (current-milliseconds))
	      (run-time (- end-time start-time)))
	 (case (work-method)
    (sqlite3:set-busy-handler! db (sqlite3:make-busy-timeout 136000))
    (sqlite3:execute db "PRAGMA synchronous = 0;")
    (if (not have-db)
	(sqlite3:with-transaction
	 db
	 (lambda ()
	   (for-each
	    (lambda (stmt)
	      (if stmt (sqlite3:execute db stmt)))
	    `("CREATE TABLE IF NOT EXISTS nodes
                 (id INTEGER PRIMARY KEY,
                  role  TEXT NOT NULL,
                  host  TEXT NOT NULL,
                  port TEXT NOT NULL,
                  ipadr TEXT NOT NULL,
	   ((direct) result)
	   (else
	    (if (> run-time 1000)(print "ULEX: Warning, work "cmd", "params" done in "run-time" ms"))
	    ;; send 'response as cmd and result as params
	    (send uconn rem-host-port qrykey 'response result) ;; could check for ack
	    (let* ((duration (- (current-milliseconds) end-time)))
	      (if (> duration 500)(print "ULEX: Warning, response sent back to "rem-host-port" for "qrykey" in "duration"ms")))))))
      (MBOX_TIMEOUT 'do-work-timeout)
      (else
       (print "ERROR: rdat "rdat", did not match rem-host-port qrykey cmd params")))))

                  pid   INTEGER NOT NULL,
                  zcard TEXT NOT NULL,
                  regtime INTEGER DEFAULT (strftime('%s','now')),
                  lease_thru INTEGER DEFAULT (strftime('%s','now')),
                  last_update INTEGER DEFAULT (strftime('%s','now')));"
	      "CREATE TRIGGER  IF NOT EXISTS update_nodes_trigger AFTER UPDATE ON nodes
                             FOR EACH ROW
;; NEW APPROACH:
;;   
(define (process-work-queue uconn) 
  (let ((wqueue (udat-work-queue uconn))
	(proc   (udat-work-proc  uconn))
	(numthr (udat-numthreads uconn)))
    (let loop ((thnum    1)
                               BEGIN 
                                 UPDATE nodes SET last_update=(strftime('%s','now'))
                                   WHERE id=old.id;
                               END;"
	      "CREATE TABLE IF NOT EXISTS dbs
                 (id INTEGER PRIMARY KEY,
                  dbname TEXT NOT NULL,
                  dbfile TEXT NOT NULL,
                  dbtype TEXT NOT NULL,
                  host_port TEXT NOT NULL,
	       (threads '()))
      (let ((thlst (cons (make-thread (lambda ()
					(let work-loop ()
					  (let ((rdat (mailbox-receive! wqueue 24000 'MBOX_TIMEOUT)))
					    (do-work uconn rdat))
					  (work-loop)))
				      (conc "work thread " thnum))
			 threads)))
	(if (< thnum numthr)
	    (loop (+ thnum 1)
		  thlst)
	    (begin
	      (print "ULEX: Starting "(length thlst)" worker threads.")
	      (map thread-start! thlst)
	      (print "ULEX: Threads started. Joining all.")
	      (map thread-join! thlst)))))))

;; below was to enable re-use of connections. This seems non-trivial so for
;; now lets open on each call
;;
;; ;; given host-port get or create peer struct
                  regtime INTEGER DEFAULT (strftime('%s','now')),
                  lease_thru INTEGER DEFAULT (strftime('%s','now')),
                  last_update INTEGER DEFAULT (strftime('%s','now')));"
	      "CREATE TRIGGER  IF NOT EXISTS update_dbs_trigger AFTER UPDATE ON dbs
                             FOR EACH ROW
;; ;;
;; (define (udat-get-peer uconn host-port)
;;   (or (hash-table-ref/default (udat-peers uconn) host-port #f)
;;       ;; no peer, so create pdat and init it
                               BEGIN 
                                 UPDATE dbs SET last_update=(strftime('%s','now'))
                                   WHERE id=old.id;
                               END;")))))
    db))

(define (get-host-port-lease db dbfname)
;;       
;;       ;; NEED stack of connections, pop and use; inp, oup,
;;       ;; creation_time (remove and create new if over 24hrs old
;;       ;; 
;;       (let ((pdat (make-pdat host-port: host-port)))
;; 	(hash-table-set! (udat-peers uconn) host-port pdat)
;; 	pdat)))
;; 
;; ;; is pcon alive
;; 
;; ;; given host-port and pdat get a pcon
  (sqlite3:fold-row
   (lambda (rem host-port lease-thru)
     (list host-port lease-thru))
;; ;;
;; (define (pdat-get-pcon pdat host-port)
;;   (let loop ((conns (pdat-conns pdat)))
;;     (if (null? conns) ;; none? make and return - do NOT add - it will be pushed back on list later
;; 	(init-pcon (make-pcon))
;; 	(let* ((conn (pop conns)))
;; 	  
;; ;; given host-port get a pcon struct
   #f db "SELECT host_port,lease_thru FROM dbs WHERE dbfile = ?" dbfname))
  
;; ;;
;; (define (udat-get-pcon 
      
(define (register-captain db host ipadr port pid zcard #!key (lease 20))
  (let* ((dbfname (ulex-dbfname))
	 (host-port  (conc host ":" port)))
    (sqlite3:with-transaction
     db
     (lambda ()
       (match (get-host-port-lease db dbfname)
	 ((host-port lease-thru)
;;======================================================================
;; misc utils
;;======================================================================

(define (make-cookie uconn)
  (let ((newcnum (+ (udat-cnum uconn) 1)))
    (udat-cnum-set! uconn newcnum)
    (conc (udat-host-port uconn) ":"
	  (if (> (current-seconds) lease-thru)
	      (begin
		(sqlite3:execute db "UPDATE dbs SET host_port=?,lease_thru=? WHERE dbname=?"
				 (conc host ":" port)
				 (+ (current-seconds) lease)
	  newcnum)))

;; cookie/mboxes

				 dbfname)
		#t)
	      #f))
	 (#f  (sqlite3:execute db "INSERT INTO dbs (dbname,dbfile,dbtype,host_port,lease_thru) VALUES (?,?,?,?,?)"
			       "captain" dbfname "captain" host-port (+ (current-seconds) lease)))
	 (else (print "ERROR: Unrecognised result from fold-row")
	       (exit 1)))))))
							    
;; we store each mbox with a cookie (<cookie> . <mbox>)
;;
(define (get-cmbox uconn)
  (if (null? (udat-avail-cmboxes uconn))
      (let ((cookie (make-cookie uconn))
	    (mbox   (make-mailbox)))
	(hash-table-set! (udat-mboxes uconn) cookie mbox)
	`(,cookie . ,mbox))
      (let ((cmbox (car (udat-avail-cmboxes uconn))))
	(udat-avail-cmboxes-set! uconn (cdr (udat-avail-cmboxes uconn)))
	cmbox)))

(define (put-cmbox uconn cmbox)
  (udat-avail-cmboxes-set! uconn (cons cmbox (udat-avail-cmboxes uconn))))

(define (pp-uconn uconn)
  (pp (udat->alist uconn)))

  
;;======================================================================
;; network utilities
;;======================================================================

;; NOTE: Look at address-info egg as alternative to some of this

(define (rate-ip ipaddr)
  (regex-case ipaddr
    ( "^127\\..*" _ 0 )
    ( "^(10\\.0|192\\.168)\\..*" _ 1 )
    ( else 2 ) ))

;; Change this to bias for addresses with a reasonable broadcast value?
;;
(define (ip-pref-less? a b)
  (> (rate-ip a) (rate-ip b)))
  


(define (get-my-best-address)
  (let ((all-my-addresses (get-all-ips))
  (let ((all-my-addresses (get-all-ips)))
        ;;(all-my-addresses-old (vector->list (hostinfo-addresses (hostname->hostinfo (get-host-name)))))
        )
    (cond
     ((null? all-my-addresses)
      (get-host-name))                                          ;; no interfaces?
     ((eq? (length all-my-addresses) 1)
      (car all-my-addresses))                      ;; only one to choose from, just go with it
     
     (else
      (car (sort all-my-addresses ip-pref-less?)))
      (car (sort all-my-addresses ip-pref-less?))))))
     ;; (else 
     ;;  (ip->string (car (filter (lambda (x)                      ;; take any but 127.
     ;;    			 (not (eq? (u8vector-ref x 0) 127)))
     ;;    		       all-my-addresses))))

     )))

(define (get-all-ips-sorted)
  (sort (get-all-ips) ip-pref-less?))

(define (get-all-ips)
  (map ip->string (vector->list 
		   (hostinfo-addresses
		    (host-information (current-hostname))))))

  (map address-info-host
(define (udat-my-host-port udata)
  (if (and (udat-my-address udata)(udat-my-port udata))
      (conc (udat-my-address udata) ":" (udat-my-port udata))
      #f))

(define (udat-captain-host-port udata)
  (if (and (udat-captain-address udata)(udat-captain-port udata))
      (conc (udat-captain-address udata) ":" (udat-captain-port udata))
      #f))

(define (udat-get-peer udata host-port)
  (hash-table-ref/default (udat-peers udata) host-port #f))

;; struct for keeping track of others we are talking to

(defstruct peer
  (addr-port       #f)
  (hostname        #f)
  (pid             #f)
  ;; (inp             #f)
  ;; (oup             #f)
  (dbs            '()) ;; list of databases this peer is currently handling
  )

(defstruct work
  (peer-dat   #f)
  (handlerkey #f)
  (qrykey     #f)
  (data       #f)
  (start      (current-milliseconds)))

#;(defstruct dbowner
  (pdat        #f)
  (last-update (current-seconds)))

;;======================================================================
;; Captain functions
;;======================================================================

;; NB// This needs to be started in a thread
;;
;; setup to be a captain
;;   - local server MUST be started already
;;   - create pkt
;;   - start server port handler
;;
(define (setup-as-captain udata)
  (if (create-captain-pkt udata)
      (let* ((my-addr (udat-my-address udata))
	     (my-port (udat-my-port    udata))
	     (th (make-thread (lambda ()
				(ulex-handler-loop udata)) "Captain handler")))
	(udat-handler-thread-set! udata th)
	(udat-captain-address-set! udata my-addr)
	(udat-captain-port-set!    udata my-port)
	(thread-start! th))
      (begin
	(print "ERROR: failed to create captain pkt")
	#f)))

;; given a pkts dir read 
;;
(define (get-all-captain-pkts udata)
  (let* ((pktsdir       (let ((d (udat-cpkts-dir udata)))
			  (if (file-exists? d)
			      d
			      (begin
				(create-directory d #t)
				d))))
	 (all-pkt-files (glob (conc pktsdir "/*.pkt")))
	 (pkt-spec      (udat-cpkt-spec udata)))
    (map (lambda (pkt-file)
	   (read-pkt->alist pkt-file pktspec: pkt-spec))
	 all-pkt-files)))

;; sort by D then Z, return one, choose the oldest then
;; differentiate if needed using the Z key
;;l
(define (get-winning-pkt pkts)
  (if (null? pkts)
      #f
      (car (sort pkts (lambda (a b)
			(let ((ad (string->number (alist-ref 'D a)))
			      (bd (string->number (alist-ref 'D b))))
			  (if (eq? a b)
			      (let ((az (alist-ref 'Z a))
				    (bz (alist-ref 'Z b)))
				(string>=? az bz))
			      (> ad bd))))))))

;; put the host, ip, port and pid into a pkt in
;; the captain pkts dir
;;  - assumes user has already fired up a server
;;    which will be in the udata struct
;;
(define (create-captain-pkt udata)
  (if (not (udat-serv-listener udata))
      (begin
	(print "ERROR: create-captain-pkt called with out a listener")
	#f)
      (let* ((pktdat `((port   . ,(udat-my-port udata))
		       (host   . ,(udat-my-hostname udata))
		       (ipaddr . ,(udat-my-address udata))
		       (pid    . ,(udat-my-pid     udata))))
	     (pktdir  (udat-cpkts-dir udata))
	     (pktspec (udat-cpkt-spec udata))
	     )
	(udat-my-cpkt-key-set!
	 udata
	 (write-alist->pkt
	  pktdir
	  pktdat
	  pktspec: pktspec
	  ptype:   'captain))
	(udat-my-cpkt-key udata))))

;; remove pkt associated with captn (the Z key .pkt)
;;
(define (remove-captain-pkt udata captn)
  (let ((Z       (alist-ref 'Z captn))
	(cpktdir (udat-cpkts-dir udata)))
    (delete-file* (conc cpktdir "/" Z ".pkt"))))

;; call all known peers and tell them to delete their info on the captain
;; thus forcing them to re-read pkts and connect to a new captain
;; call this when the captain needs to exit and if an older captain is
;; detected. Due to delays in sending file meta data in NFS multiple
;; captains can be initiated in a "Storm of Captains", book soon to be
;; on Amazon
;;
(define (drop-captain udata)
  (let* ((peers (hash-table-keys (udat-peers udata)))
	 (cookie (make-cookie udata)))
    (for-each
     (lambda (host-port)
       (send udata host-port 'dropcaptain cookie "nomsg" retval: #t))
     peers)))

;;======================================================================
;; server primitives
;;======================================================================

(define (make-cookie udata)
  (let ((newcnum (+ (udat-cnum udata) 1)))
    (udat-cnum-set! udata newcnum)
    (conc (udat-my-address udata) ":"
	  (udat-my-port    udata) "-"
	  (udat-my-pid     udata) "-"
	  newcnum)))

;; create a tcp listener and return a populated udat struct with
;; my port, address, hostname, pid etc.
;; return #f if fail to find a port to allocate.
;;
;;  if udata-in is #f create the record
;;  if there is already a serv-listener return the udata
;;
(define (start-server-find-port udata-in #!optional (port 4242))
  (let ((udata (or udata-in (make-udat))))
    (if (udat-serv-listener udata) ;; TODO - add check that the listener is alive and ready?
	udata
	(handle-exceptions
	    exn
	  (if (< port 65535)
	      (start-server-find-port udata (+ port 1))
	      #f)
	  (connect-server udata port)))))

(define (connect-server udata port)
  ;; (tcp-listener-socket LISTENER)(socket-name so)
  ;; sockaddr-address, sockaddr-port, sockaddr->string
  (let* ((tlsn (tcp-listen port 1000 #f)) ;; (tcp-listen TCPPORT [BACKLOG [HOST]])
	 (addr (get-my-best-address))) ;; (hostinfo-addresses (host-information (current-hostname)))
    (udat-my-address-set!    udata addr)
    (udat-my-port-set!       udata port)
    (udat-my-hostname-set!   udata (get-host-name))
    (udat-serv-listener-set! udata tlsn)
    udata))

(define (get-peer-dat udata host-port #!optional (hostname #f)(pid #f))
  (let* ((pdat (or (udat-get-peer udata host-port)
		   (handle-exceptions ;; ERROR - MAKE THIS EXCEPTION HANDLER MORE SPECIFIC
		    exn
		    #f
		    (let ((npdat (make-peer addr-port: host-port)))
		      (if hostname (peer-hostname-set! npdat hostname))
		      (if pid (peer-pid-set! npdat pid))
		      npdat)))))
    pdat))

;; send structured data to recipient
;;
;;  NOTE: qrykey is what was called the "cookie" previously
;;
;;     retval tells send to expect and wait for return data (one line) and return it or time out
;;       this is for ping where we don't want to necessarily have set up our own server yet.
;;
(define (send udata host-port handler qrykey data
	      #!key (hostname #f)(pid #f)(params '())(retval #f))
  (let* ((my-host-port (udat-my-host-port udata))
	 (isme         (equal? host-port my-host-port)) ;; am I calling
							;; myself?
	 (dat          (list
			handler              ;; " "
			my-host-port         ;; " "
			(udat-my-pid  udata) ;; " "
			qrykey
			params ;;(if (null? params) "" (conc " "
			       ;;(string-intersperse params " ")))
			)))
    ;; (print "send isme is " (if isme "true!" "false!") ",
    ;; my-host-port: " my-host-port ", host-port: " host-port)
    (if isme
	(ulex-handler udata dat data)
	(handle-exceptions ;; ERROR - MAKE THIS EXCEPTION HANDLER MORE
			   ;; SPECIFIC
	    exn
	    #f 
	  (let-values (((inp oup)(tcp-connect host-port)))
	    ;;
	    ;; CONTROL LINE:
	    ;;    handlerkey host:port pid qrykey params ...
	    ;;
	    (let ((res
		   (if (and inp oup)
		       (let* ()
			 (if my-host-port
			     (begin
			       (write dat  oup)
			       (write data oup) ;; send as sexpr
			       ;; (print "Sent dat: " dat " data: " data)
			       (if retval
				   (read inp)
				   #t))
			     (begin
			       (print "ERROR: send called but no receiver has been setup. Please call setup first!")
			       #f))
			 ;; NOTE: DO NOT BE TEMPTED TO LOOK AT ANY DATA ON INP HERE!
			 ;;       (there is a listener for handling that)
			 )
		       #f))) ;; #f means failed to connect and send
	      (close-input-port inp)
	      (close-output-port oup)
	      res))))))

;; send a request to the given host-port and register a mailbox in udata
;; wait for the mailbox data and return it
;;
(define (send-receive udata host-port handler qrykey data #!key (hostname #f)(pid #f)(params '())(timeout 20))
  (let ((mbox      (make-mailbox))
	(mbox-time (current-milliseconds))
	(mboxes    (udat-mboxes udata)))
    (hash-table-set! mboxes qrykey mbox)
    (if (send udata host-port handler qrykey data hostname: hostname pid: pid params: params)
	(let* ((mbox-timeout-secs    timeout)
	       (mbox-timeout-result 'MBOX_TIMEOUT)
	       (res                  (mailbox-receive! mbox mbox-timeout-secs mbox-timeout-result))
	       (mbox-receive-time    (current-milliseconds)))
	  (hash-table-delete! mboxes qrykey)
	  (if (eq? res 'MBOX_TIMEOUT)
	      #f
	      res))
	#f))) ;; #f means failed to communicate

;; 
(define (ulex-handler udata controldat data)
  (print "controldat: " controldat " data: " data)
  (match controldat ;;  (string-split controldat)
    ((handlerkey host-port pid qrykey params ...)
     ;; (print "handlerkey: " handlerkey " host-port: " host-port " pid: " pid " qrykey: " qrykey " params: " params)
     (case handlerkey ;; (string->symbol handlerkey)
       ((ack)(print "Got ack!"))
       ((ping) ;; special case - return result immediately on the same connection
	(let* ((proc  (hash-table-ref/default (udat-handlers udata) 'ping #f))
	       (val   (if proc (proc) "gotping"))
	       (peer  (make-peer addr-port: host-port pid: pid))
	       (dbshash (udat-dbowners udata)))
	  (peer-dbs-set! peer params) ;; params for ping is list of dbs owned by pinger
	  (for-each (lambda (dbfile)
		      (hash-table-set! dbshash dbfile host-port)) ;; WRONG?
		    params) ;; register each db in the dbshash
	  (if (not (hash-table-exists? (udat-peers udata) host-port))
	      (hash-table-set! (udat-peers udata) host-port peer)) ;; save the details of this caller in peers
	  qrykey)) ;; End of ping
       ((goodbye)
	;; remove all traces of the caller in db ownership etc.
	(let* ((peer  (hash-table-ref/default (udat-peers udata) host-port #f))
	       (dbs   (if peer (peer-dbs peer) '()))
	       (dbshash (udat-dbowners udata)))
	  (for-each (lambda (dbfile)(hash-table-delete! dbshash dbfile)) dbs)
	  (hash-table-delete! (udat-peers udata) host-port)
	  qrykey))
       ((dropcaptain)
	;; remove all traces of the captain
	(udat-captain-address-set! udata #f)
	(udat-captain-host-set!    udata #f)
	(udat-captain-port-set!    udata #f)
	(udat-captain-pid-set!     udata #f)
	qrykey)
       ((rucaptain) ;; remote is asking if I'm the captain
	(if (udat-my-cpkt-key udata) "yes" "no"))
       ((db-owner) ;; given a db name who do I send my queries to
	;; look up the file in handlers, if have an entry ping them to be sure
	;; they are still alive and then return that host:port.
	;; if no handler found or if the ping fails pick from peers the oldest that
	;; is managing the fewest dbs
	(match params
	  ((dbfile dbtype)
	   (let* ((owner-host-port (hash-table-ref/default (udat-dbowners udata) dbfile #f)))
	     (if owner-host-port
		 (conc qrykey " " owner-host-port)
		 (let* ((pdat (or (hash-table-ref/default (udat-peers udata) host-port #f) ;; no owner - caller gets to own it!
				  (make-peer addr-port: host-port pid: pid dbs: `(,dbfile)))))
		   (hash-table-set! (udat-peers udata) host-port pdat)
		   (hash-table-set! (udat-dbowners udata) dbfile host-port)
		   (conc qrykey " " host-port)))))
	  (else (conc qrykey " BADDATA"))))
       ;; for work items:
       ;;    handler is one of; immediate, read-only, read-write, high-priority
       ((immediate read-only normal low-priority) ;; do this work immediately
	;; host-port (caller), pid (caller), qrykey (cookie), params <= all from first line
	;; data => a single line encoded however you want, or should I build json into it?
	(print "handlerkey=" handlerkey)
	(let* ((pdat (get-peer-dat udata host-port)))
	  (match params ;; dbfile prockey procparam
	    ((dbfile prockey procparam)
	     (case handlerkey
	       ((immediate read-only)
		(process-request udata pdat dbfile qrykey prockey procparam data))
	       ((normal low-priority) ;; split off later and add logic to support low priority
		(add-to-work-queue udata pdat dbfile qrykey prockey procparam data))
	       (else
		#f)))
	    (else
	     (print "INFO: params=" params " handlerkey=" handlerkey " controldat=" controldat)
	     #f))))
       (else
	;; (add-to-work-queue udata (get-peer-dat udata host-port) handlerkey qrykey data)
	#f)))
    (else
     (print "BAD DATA? controldat=" controldat " data=" data)
     #f)));; handles the incoming messages and dispatches to queues

;;
(define (ulex-handler-loop udata)
  (let* ((serv-listener (udat-serv-listener udata)))
    ;; data comes as two lines
    ;;   handlerkey resp-addr:resp-port hostname pid qrykey [dbpath/dbfile.db]
    ;;   data
    (let loop ((state 'start))
      (let-values (((inp oup)(tcp-accept serv-listener)))
	(let* ((controldat (read inp))
	       (data       (read inp))
	       (resp       (ulex-handler udata controldat data)))
	  (if resp (write resp oup))
	  (close-input-port inp)
	  (close-output-port oup))
	(loop state)))))

;; add a proc to the handler list, these are done symetrically (i.e. in all instances)
;; so that the proc can be dereferenced remotely
;;
(define (register-handler udata key proc)
  (hash-table-set! (udat-handlers udata) key proc))


;;======================================================================
;; work queues
;;======================================================================

(define (add-to-work-queue udata peer-dat handlerkey qrykey data)
  (let ((wdat (make-work peer-dat: peer-dat handlerkey: handlerkey qrykey: qrykey data: data)))
    (if (udat-busy udata)
	(queue-add! (udat-work-queue udata) wdat)
	(process-work udata wdat)) ;; passing in wdat tells process-work to first process the passed in wdat
    ))

(define (do-work udata wdat)
  #f)

(define (process-work udata #!optional wdat)
  (if wdat (do-work udata wdat)) ;; process wdat
  (let ((wqueue (udat-work-queue udata)))
    (if (not (queue-empty? wqueue))
	(let loop ((wd (queue-remove! wqueue)))
	  (do-work udata wd)
	  (if (not (queue-empty? wqueue))
	      (loop (queue-remove! wqueue)))))))

;;======================================================================
;; Generic db handling
;;   setup a inmem db instance
;;   open connection to on-disk db
;;   sync on-disk db to inmem
;;   get lock in on-disk db for dbowner of this db
;;   put sync-proc, init-proc, on-disk handle, inmem handle in dbconn stuct
;;   return the stuct
;;======================================================================

(defstruct dbconn
  (fname  #f)
  (inmem  #f)
  (conn   #f)
  (sync   #f) ;; sync proc
  (init   #f) ;; init proc
  (lastsync (current-seconds))
  )

(defstruct dbinfo
  (initproc #f)
  (syncproc #f))

;; open inmem and disk database
;;   init with initproc
;;   return db struct
;;
;;   appname; megatest, ulex or something else.
;;
(define (setup-db-connection udata fname-in appname dbtype)
  (let* ((is-ulex (eq? appname 'ulex))
	 (dbinf   (if is-ulex ;; ulex is a built-in special case
		      (make-dbinfo initproc: ulexdb-init syncproc: ulexdb-sync)
		      (hash-table-ref/default (udat-dbtypes udata) dbtype #f)))
	 (initproc (dbinfo-initproc dbinf))
	 (syncproc (dbinfo-syncproc dbinf))
	 (fname   (if is-ulex
		      (conc (udat-ulex-dir udata) "/ulex.db")
		      fname-in))
	 (inmem-db (open-and-initdb udata #f 'inmem (dbinfo-initproc dbinf)))
	 (disk-db  (open-and-initdb udata fname 'disk (dbinfo-initproc dbinf))))
    (make-dbconn inmem: inmem-db conn: disk-db sync: syncproc init: initproc)))

;; dest='inmem or 'disk
;;
(define (open-and-initdb udata filename dest init-proc)
  (let* ((inmem    (eq? dest 'inmem))
	 (dbfile   (if inmem
		       ":INMEM:"
		       filename))
	 (dbexists (if inmem #t (file-exists? dbfile)))
	 (db       (sqlite3:open-database dbfile)))
    (sqlite3:set-busy-handler! db (sqlite3:make-busy-timeout 136000))
    (if (not dbexists)
	(init-proc db))
    db))


;;======================================================================
;; Previous Ulex db stuff
;;======================================================================

(define (ulexdb-init db inmem)
  (sqlite3:with-transaction
   db
   (lambda ()
       (filter (lambda (x)
     (for-each
      (lambda (stmt)
	(if stmt (sqlite3:execute db stmt)))
      `("CREATE TABLE IF NOT EXISTS processes 
                 (id INTEGER PRIMARY KEY,
                  host  TEXT NOT NULL,
                  ipadr TEXT NOT NULL,
                  port  INTEGER NOT NULL,
                  pid   INTEGER NOT NULL,
                  regtime INTEGER DEFAULT (strftime('%s','now')),
                  last_update INTEGER DEFAULT (strftime('%s','now')));"
	(if inmem
	    "CREATE TRIGGER  IF NOT EXISTS update_proces_trigger AFTER UPDATE ON processes
                             FOR EACH ROW
                               BEGIN 
                                 UPDATE processes SET last_update=(strftime('%s','now'))
                                   WHERE id=old.id;
                               END;"
	    #f))))))

		 (equal? (address-info-type x) "tcp"))
;; open databases, do initial sync
(define (ulexdb-sync dbconndat udata)
  #f)

	       (address-infos (get-host-name)))))

) ;; END OF ULEX


;;; ;;======================================================================
;;; ;; D E B U G   H E L P E R S
;;; ;;======================================================================
;;;     
;;; (define (dbg> . args)
;;;   (with-output-to-port (current-error-port)
;;;     (lambda ()
;;;       (apply print "dbg> " args))))
;;; 
;;; (define (debug-pp . args)
;;;   (if (get-environment-variable "ULEX_DEBUG")
;;;       (with-output-to-port (current-error-port)
;;; 	(lambda ()
;;; 	  (apply pp args)))))
;;; 
;;; (define *default-debug-port* (current-error-port))
;;; 
;;; (define (sdbg> fn stage-name stage-start stage-end start-time . message)
;;;   (if (get-environment-variable "ULEX_DEBUG")
;;;       (with-output-to-port *default-debug-port* 
;;; 	(lambda ()
;;; 	  (apply print "ulex:" fn " " stage-name " took " (- (if stage-end stage-end (current-milliseconds)) stage-start) " ms. "
;;; 		 (if start-time
;;; 		     (conc "total time " (- (current-milliseconds) start-time)
;;; 			   " ms.")
;;; 		     "")
;;; 		 message
;;; 		 )))))

;;======================================================================
;; M A C R O S
;;======================================================================
;; iup callbacks are not dumping the stack, this is a work-around
;;

;; Some of these routines use:
;;
;;     http://www.cs.toronto.edu/~gfb/scheme/simple-macros.html
;;
;; Syntax for defining macros in a simple style similar to function definiton,
;;  when there is a single pattern for the argument list and there are no keywords.
;;
;; (define-simple-syntax (name arg ...) body ...)
;;
;; 
;; (define-syntax define-simple-syntax
;;   (syntax-rules ()
;;     ((_ (name arg ...) body ...)
;;      (define-syntax name (syntax-rules () ((name arg ...) (begin body ...)))))))
;; 
;; (define-simple-syntax (catch-and-dump proc procname)
;;   (handle-exceptions
;;    exn
;;    (begin
;;      (print-call-chain (current-error-port))
;;      (with-output-to-port (current-error-port)
;;        (lambda ()
;;          (print ((condition-property-accessor 'exn 'message) exn))
;;          (print "Callback error in " procname)
;;          (print "Full condition info:\n" (condition->list exn)))))
;;    (proc)))
;; 
;; 
;;======================================================================
;;  R E C O R D S
;;======================================================================

;;; ;; information about me as a server
;;; ;;
;;; (defstruct area
;;;   ;; about this area
;;;   (useportlogger #f)
;;;   (lowport       32768)
;;;   (server-type   'auto)  ;; auto=create up to five servers/pkts, main=create pkts, passive=no pkt (unless there are no pkts at all)
;;;   (conn          #f)
;;;   (port          #f)
;;;   (myaddr        (get-my-best-address))
;;;   pktid          ;; get pkt from hosts table if needed
;;;   pktfile
;;;   pktsdir
;;;   dbdir
;;;   (dbhandles     (make-hash-table)) ;; fname => list-of-dbh, NOTE: Should really never need more than one?
;;;   (mutex         (make-mutex))
;;;   (rtable        (make-hash-table)) ;; registration table of available actions
;;;   (dbs           (make-hash-table)) ;; filename => random number, used for choosing what dbs I serve
;;;   ;; about other servers
;;;   (hosts         (make-hash-table)) ;; key => hostdat
;;;   (hoststats     (make-hash-table)) ;; key => alist of fname => ( qcount . qtime )
;;;   (reqs          (make-hash-table)) ;; uri => queue
;;;   ;; work queues
;;;   (wqueues       (make-hash-table)) ;; fname => qdat
;;;   (stats         (make-hash-table)) ;; fname => totalqueries
;;;   (last-srvup    (current-seconds)) ;; last time we updated the known servers
;;;   (cookie2mbox   (make-hash-table)) ;; map cookie for outstanding request to mailbox of awaiting call
;;;   (ready #f)
;;;   (health        (make-hash-table)) ;; ipaddr:port => num failed pings since last good ping
;;;   )
;;; 
;;; ;; host stats
;;; ;;
;;; (defstruct hostdat
;;;   (pkt      #f)
;;;   (dbload   (make-hash-table))  ;; "dbfile.db" => queries/min
;;;   (hostload #f)                 ;; normalized load ( 5min load / numcpus )
;;;   )
;;; 
;;; ;; dbdat
;;; ;;
;;; (defstruct dbdat
;;;   (dbh    #f)
;;;   (fname  #f)
;;;   (write-access #f)
;;;   (sths   (make-hash-table))  ;; hash mapping query strings to handles
;;;   )
;;; 
;;; ;; qdat
;;; ;;
;;; (defstruct qdat
;;;   (writeq  (make-queue))
;;;   (readq   (make-queue))
;;;   (rwq     (make-queue))
;;;   (logq    (make-queue)) ;; do we need a queue for logging? yes, if we use sqlite3 db for logging
;;;   (osshort (make-queue))
;;;   (oslong  (make-queue))
;;;   (misc    (make-queue)) ;; used for things like ping-full
;;;   )
;;; 
;;; ;; calldat
;;; ;;
;;; (defstruct calldat
;;;   (ctype 'dbwrite)
;;;   (obj   #f)              ;; this would normally be an SQL statement e.g. SELECT, INSERT etc.
;;;   (rtime (current-milliseconds)))
;;; 
;;; ;; make it a global? Well, it is local to area module
;;; 
;;; (define *pktspec*
;;;   `((server (hostname . h)
;;; 	    (port     . p)
;;; 	    (pid      . i)
;;; 	    (ipaddr   . a)
;;; 	    )
;;;     (data   (hostname . h)  ;; sender hostname
;;; 	    (port     . p)  ;; sender port
;;; 	    (ipaddr   . a)  ;; sender ip
;;; 	    (hostkey  . k)  ;; sending host key - store info at server under this key
;;; 	    (servkey  . s)  ;; server key - this needs to match at server end or reject the msg
;;; 	    (format   . f)  ;; sb=serialized-base64, t=text, sx=sexpr, j=json
;;; 	    (data     . d)  ;; base64 encoded slln data
;;; 	    )))
;;; 
;;; ;; work item
;;; ;;
;;; (defstruct witem
;;;   (rhost #f)   ;; return host
;;;   (ripaddr #f) ;; return ipaddr
;;;   (rport #f)   ;; return port
;;;   (servkey #f) ;; the packet representing the client of this workitem, used by final send-message
;;;   (rdat  #f)   ;; the request - usually an sql query, type is rdat
;;;   (action #f)  ;; the action: immediate, dbwrite, dbread,oslong, osshort
;;;   (cookie #f)  ;; cookie id for response
;;;   (data   #f)  ;; the data payload, i.e. parameters
;;;   (result #f)  ;; the result from processing the data
;;;   (caller #f)) ;; the calling peer according to rpc itself
;;; 
;;; (define (trim-pktid pktid)
;;;   (if (string? pktid)
;;;       (substring pktid 0 4)
;;;       "nopkt"))
;;; 
;;; (define (any->number num)
;;;   (cond
;;;    ((number? num) num)
;;;    ((string? num) (string->number num))
;;;    (else num)))
;;; 
;;; (use trace)
;;; (trace-call-sites #t)
;;; 
;;; ;;======================================================================
;;; ;; D A T A B A S E   H A N D L I N G 
;;; ;;======================================================================
;;; 
;;; ;; look in dbhandles for a db, return it, else return #f
;;; ;;
;;; (define (get-dbh acfg fname)
;;;   (let ((dbh-lst (hash-table-ref/default (area-dbhandles acfg) fname '())))
;;;     (if (null? dbh-lst)
;;; 	(begin
;;; 	  ;; (print "opening db for " fname)
;;; 	  (open-db acfg fname)) ;; Note that the handles get put back in the queue in the save-dbh calls
;;; 	(let ((rem-lst (cdr dbh-lst)))
;;; 	  ;; (print "re-using saved connection for " fname)
;;; 	  (hash-table-set! (area-dbhandles acfg) fname rem-lst)
;;; 	  (car dbh-lst)))))
;;; 
;;; (define (save-dbh acfg fname dbdat)
;;;     ;; (print "saving dbh for " fname)
;;;     (hash-table-set! (area-dbhandles acfg) fname (cons dbdat (hash-table-ref/default (area-dbhandles acfg) fname '()))))
;;; 
;;; ;; open the database, if never before opened init it. put the handle in the
;;; ;; open db's hash table
;;; ;; returns: the dbdat
;;; ;;
;;; (define (open-db acfg fname)
;;;   (let* ((fullname     (conc (area-dbdir acfg) "/" fname))
;;; 	 (exists       (file-exists? fullname))
;;; 	 (write-access (if exists
;;; 			   (file-write-access? fullname)
;;; 			   (file-write-access? (area-dbdir acfg))))
;;; 	 (db           (sqlite3:open-database fullname))
;;; 	 (handler      (sqlite3:make-busy-timeout 136000))
;;; 	 )
;;;     (sqlite3:set-busy-handler! db handler)
;;;     (sqlite3:execute db "PRAGMA synchronous = 0;")
;;;     (if (not exists) ;; need to init the db
;;; 	(if write-access
;;; 	    (let ((isql (get-rsql acfg 'dbinitsql))) ;; get the init sql statements
;;; 	      ;; (sqlite3:with-transaction
;;; 	      ;;  db
;;; 	      ;;  (lambda ()
;;; 		 (if isql
;;; 		     (for-each
;;; 		      (lambda (sql)
;;; 			(sqlite3:execute db sql))
;;; 		      isql)))
;;; 	    (print "ERROR: no write access to " (area-dbdir acfg))))
;;;     (make-dbdat dbh: db fname: fname write-access: write-access)))
;;; 
;;; ;; This is a low-level command to retrieve or to prepare, save and return a prepared statment
;;; ;; you must extract the db handle
;;; ;;
;;; (define (get-sth db cache stmt)
;;;   (if (hash-table-exists? cache stmt)
;;;       (begin
;;; 	;; (print "Reusing cached stmt for " stmt)
;;; 	(hash-table-ref/default cache stmt #f))
;;;       (let ((sth (sqlite3:prepare db stmt)))
;;; 	(hash-table-set! cache stmt sth)
;;; 	;; (print "prepared stmt for " stmt)
;;; 	sth)))
;;; 
;;; ;; a little more expensive but does all the tedious deferencing - only use if you don't already
;;; ;; have dbdat and db sitting around
;;; ;;
;;; (define (full-get-sth acfg fname stmt)
;;;   (let* ((dbdat  (get-dbh acfg fname))
;;; 	 (db     (dbdat-dbh dbdat))
;;; 	 (sths   (dbdat-sths dbdat)))
;;;     (get-sth db sths stmt)))
;;; 
;;; ;; write to a db
;;; ;; acfg: area data
;;; ;; rdat: request data
;;; ;; hdat: (host . port)
;;; ;;
;;; ;; (define (dbwrite acfg rdat hdat data-in)
;;; ;;   (let* ((dbname (car data-in))
;;; ;; 	 (dbdat  (get-dbh acfg dbname))
;;; ;; 	 (db     (dbdat-dbh dbdat))
;;; ;; 	 (sths   (dbdat-sths dbdat))
;;; ;; 	 (stmt   (calldat-obj rdat))
;;; ;; 	 (sth    (get-sth db sths stmt))
;;; ;; 	 (data   (cdr data-in)))
;;; ;;     (print "dbname: " dbname " acfg: " acfg " rdat: " (calldat->alist rdat) " hdat: " hdat " data: " data)
;;; ;;     (print "dbdat: " (dbdat->alist dbdat))
;;; ;;     (apply sqlite3:execute sth data)
;;; ;;     (save-dbh acfg dbname dbdat)
;;; ;;     #t
;;; ;;     ))
;;; 
;;; (define (finalize-all-db-handles acfg)
;;;   (let* ((dbhandles (area-dbhandles acfg))  ;; dbhandles is hash of fname ==> dbdat
;;; 	 (num       0))
;;;     (for-each
;;;      (lambda (area-name)
;;;        (print "Closing handles for " area-name)
;;;        (let ((dbdats (hash-table-ref/default dbhandles area-name '())))
;;; 	 (for-each
;;; 	  (lambda (dbdat)
;;; 	    ;; first close all statement handles
;;; 	    (for-each
;;; 	     (lambda (sth)
;;; 	       (sqlite3:finalize! sth)
;;; 	       (set! num (+ num 1)))
;;; 	     (hash-table-values (dbdat-sths dbdat)))
;;; 	    ;; now close the dbh
;;; 	    (set! num (+ num 1))
;;; 	    (sqlite3:finalize! (dbdat-dbh dbdat)))
;;; 	  dbdats)))
;;;      (hash-table-keys dbhandles))
;;;     (print "FINALIZED " num " dbhandles")))
;;; 
;;; ;;======================================================================
;;; ;; W O R K   Q U E U E   H A N D L I N G 
;;; ;;======================================================================
;;; 
;;; (define (register-db-as-mine acfg dbname)
;;;   (let ((ht (area-dbs acfg)))
;;;     (if (not (hash-table-ref/default ht dbname #f))
;;; 	(hash-table-set! ht dbname (random 10000)))))
;;; 	
;;; (define (work-queue-add acfg fname witem)
;;;   (let* ((work-queue-start (current-milliseconds))
;;; 	 (action           (witem-action witem)) ;; NB the action is the index into the rdat actions
;;; 	 (qdat             (or (hash-table-ref/default (area-wqueues acfg) fname #f)
;;; 			       (let ((newqdat (make-qdat)))
;;; 				 (hash-table-set! (area-wqueues acfg) fname newqdat)
;;; 				 newqdat)))
;;; 	 (rdat             (hash-table-ref/default (area-rtable acfg) action #f)))
;;;     (if rdat
;;; 	(queue-add!
;;; 	 (case (calldat-ctype rdat)
;;; 	   ((dbwrite)   (register-db-as-mine acfg fname)(qdat-writeq qdat))
;;; 	   ((dbread)    (register-db-as-mine acfg fname)(qdat-readq  qdat))
;;; 	   ((dbrw)      (register-db-as-mine acfg fname)(qdat-rwq    qdat))
;;; 	   ((oslong)    (qdat-oslong qdat))
;;; 	   ((osshort)   (qdat-osshort qdat))
;;; 	   ((full-ping) (qdat-misc  qdat))
;;; 	   (else
;;; 	    (print "ERROR: no queue for " action ". Adding to dbwrite queue.")
;;; 	    (qdat-writeq qdat)))
;;; 	 witem)
;;; 	(case action
;;; 	  ((full-ping)(qdat-misc qdat))
;;; 	  (else
;;; 	   (print "ERROR: No action " action " was registered"))))
;;;     (sdbg> "work-queue-add" "queue-add" work-queue-start #f #f)
;;;     #t)) ;; for now, simply return #t to indicate request got to the queue
;;; 
;;; (define (doqueue acfg q fname dbdat dbh)
;;;   ;; (print "doqueue: " fname)
;;;   (let* ((start-time (current-milliseconds))
;;; 	 (qlen       (queue-length q)))
;;;     (if (> qlen 1)
;;; 	(print "Processing queue of length " qlen))
;;;     (let loop ((count      0)
;;; 	       (responses '()))
;;;       (let ((delta (- (current-milliseconds) start-time)))
;;; 	(if (or (queue-empty? q)
;;; 		(> delta 400)) ;; stop working on this queue after 400ms have passed
;;; 	    (list count delta responses) ;; return count, delta and responses list
;;; 	    (let* ((witem  (queue-remove! q))
;;; 		   (action (witem-action witem))
;;; 		   (rdat   (witem-rdat   witem))
;;; 		   (stmt   (calldat-obj rdat))
;;; 		   (sth    (full-get-sth acfg fname stmt))
;;; 		   (ctype  (calldat-ctype rdat))
;;; 		   (data   (witem-data   witem))
;;; 		   (cookie (witem-cookie witem)))
;;; 	      ;; do the processing and save the result in witem-result
;;; 	      (witem-result-set!
;;; 	       witem
;;; 	       (case ctype ;; action
;;; 		 ((noblockwrite) ;; blind write, no ack of success returned
;;; 		  (apply sqlite3:execute sth data)
;;; 		  (sqlite3:last-insert-rowid dbh))
;;; 		 ((dbwrite)      ;; blocking write   
;;; 		  (apply sqlite3:execute sth data)
;;; 		  #t)
;;; 		 ((dbread) ;; TODO: consider breaking this up and shipping in pieces for large query
;;; 		  (apply sqlite3:map-row (lambda x x) sth data))
;;; 		 ((full-ping)  'full-ping)
;;; 		 (else (print "Not ready for action " action) #f)))
;;; 	      (loop (add1 count)
;;; 		    (if cookie
;;; 			(cons witem responses)
;;; 			responses))))))))
;;; 
;;; ;; do up to 400ms of processing on each queue
;;; ;; - the work-queue-processor will allow the max 1200ms of work to complete but it will flag as overloaded
;;; ;; 
;;; (define (process-db-queries acfg fname)
;;;   (if (hash-table-exists? (area-wqueues acfg) fname)
;;;       (let* ((process-db-queries-start-time (current-milliseconds))
;;; 	     (qdat             (hash-table-ref/default (area-wqueues acfg) fname #f))
;;; 	     (queue-sym->queue (lambda (queue-sym)
;;; 				 (case queue-sym  ;; lookup the queue from qdat given a name (symbol)
;;; 				   ((wqueue)  (qdat-writeq qdat))
;;; 				   ((rqueue)  (qdat-readq  qdat))
;;; 				   ((rwqueue) (qdat-rwq    qdat))
;;; 				   ((misc)    (qdat-misc   qdat))
;;; 				   (else #f))))
;;; 	     (dbdat   (get-dbh acfg fname))
;;; 	     (dbh     (if (dbdat? dbdat)(dbdat-dbh dbdat) #f))
;;; 	     (nowtime (current-seconds)))
;;; 	;; handle the queues that require a transaction
;;; 	;;
;;; 	(map ;; 
;;; 	 (lambda (queue-sym)
;;; 	   ;; (print "processing queue " queue-sym)
;;; 	   (let* ((queue (queue-sym->queue queue-sym)))
;;; 	     (if (not (queue-empty? queue))
;;; 		 (let ((responses
;;; 			(sqlite3:with-transaction ;; todo - catch exceptions...
;;; 			 dbh
;;; 			 (lambda ()
;;; 			   (let* ((res (doqueue acfg queue fname dbdat dbh))) ;; this does the work!
;;; 			     ;; (print "res=" res)
;;; 			     (match res
;;; 			      ((count delta responses)
;;; 			       (update-stats acfg fname queue-sym delta count)
;;; 			       (sdbg> "process-db-queries" "sqlite3-transaction" process-db-queries-start-time #f #f)
;;; 			       responses) ;; return responses
;;; 			      (else
;;; 			       (print "ERROR: bad return data from doqueue " res)))
;;; 			     )))))
;;; 		   ;; having completed the transaction, send the responses.
;;; 		   ;; (print "INFO: sending " (length responses) " responses.")
;;; 		   (let loop ((responses-left responses))
;;; 		     (cond
;;; 		      ((null? responses-left)  #t)
;;; 		      (else
;;; 		       (let* ((witem    (car responses-left))
;;; 			      (response (cdr responses-left)))  
;;; 			 (call-deliver-response acfg (witem-ripaddr witem)(witem-rport witem)
;;; 						(witem-cookie witem)(witem-result witem)))
;;; 		       (loop (cdr responses-left))))))
;;; 		 )))
;;; 	 '(wqueue rwqueue rqueue))
;;; 	
;;; 	;; handle misc queue
;;; 	;;
;;; 	;; (print "processing misc queue")
;;; 	(let ((queue (queue-sym->queue 'misc)))
;;; 	  (doqueue acfg queue fname dbdat dbh))
;;; 	;; ....
;;; 	(save-dbh acfg fname dbdat)
;;; 	#t ;; just to let the tests know we got here
;;; 	)
;;;       #f ;; nothing processed
;;;       ))
;;; 
;;; ;; run all queues in parallel per db but sequentially per queue for that db.
;;; ;;  - process the queues every 500 or so ms
;;; ;;  - allow for long running queries to continue but all other activities for that
;;; ;;    db will be blocked.
;;; ;;
;;; (define (work-queue-processor acfg)
;;;   (let* ((threads (make-hash-table))) ;; fname => thread
;;;     (let loop ((fnames      (hash-table-keys (area-wqueues acfg)))
;;; 	       (target-time (+ (current-milliseconds) 50)))
;;;       ;;(if (not (null? fnames))(print "Processing for these databases: " fnames))
;;;       (for-each
;;;        (lambda (fname)
;;; 	 ;; (print "processing for " fname)
;;; 	 ;;(process-db-queries acfg fname))
;;; 	 (let ((th (hash-table-ref/default threads fname #f)))
;;; 	   (if (and th (not (member (thread-state th) '(dead terminated))))
;;; 	       (begin
;;; 		 (print "WARNING: worker thread for " fname " is taking a long time.")
;;; 		 (print "Thread is in state " (thread-state th)))
;;; 	       (let ((th1 (make-thread (lambda ()
;;; 					 ;; (catch-and-dump
;;; 					 ;;  (lambda ()
;;; 					    ;; (print "Process queries for " fname)
;;; 					    (let ((start-time (current-milliseconds)))
;;; 					      (process-db-queries acfg fname)
;;; 					      ;; (thread-sleep! 0.01) ;; need the thread to take at least some time
;;; 					      (hash-table-delete! threads fname)) ;; no mutexes?
;;; 					    fname)
;;; 					  "th1"))) ;; ))
;;; 		 (hash-table-set! threads fname th1)
;;; 		 (thread-start! th1)))))
;;;        fnames)
;;;       ;; (thread-sleep! 0.1) ;; give the threads some time to process requests
;;;       ;; burn time until 400ms is up
;;;       (let ((now-time (current-milliseconds)))
;;; 	(if (< now-time target-time)
;;; 	    (let ((delta (- target-time now-time)))
;;; 	      (thread-sleep! (/ delta 1000)))))
;;;       (loop (hash-table-keys (area-wqueues acfg))
;;; 	    (+ (current-milliseconds) 50)))))
;;; 
;;; ;;======================================================================
;;; ;; S T A T S   G A T H E R I N G
;;; ;;======================================================================
;;; 
;;; (defstruct stat
;;;   (qcount-avg  0)                  ;; coarse running average
;;;   (qtime-avg   0)                  ;; coarse running average
;;;   (qcount      0)                  ;; total
;;;   (qtime       0)                  ;; total
;;;   (last-qcount 0)                  ;; last 
;;;   (last-qtime  0)                  ;; last
;;;   (dbs        '())                 ;; list of db files handled by this node
;;;   (when        0))                 ;; when the last query happened - seconds
;;; 
;;; 
;;; (define (update-stats acfg fname bucket duration numqueries)
;;;   (let* ((key   fname) ;; for now do not use bucket. Was: (conc fname "-" bucket)) ;; lazy but good enough
;;; 	 (stats (or (hash-table-ref/default (area-stats acfg) key #f)
;;; 		    (let ((newstats (make-stat)))
;;; 		      (hash-table-set! (area-stats acfg) key newstats)
;;; 		      newstats))))
;;;     ;; when the last query happended (used to remove the fname from the active list)
;;;     (stat-when-set! stats (current-seconds))
;;;     ;; last values
;;;     (stat-last-qcount-set! stats numqueries)
;;;     (stat-last-qtime-set!  stats duration)
;;;     ;; total over process lifetime
;;;     (stat-qcount-set! stats (+ (stat-qcount stats) numqueries))
;;;     (stat-qtime-set!  stats (+ (stat-qtime  stats) duration))
;;;     ;; coarse average
;;;     (stat-qcount-avg-set! stats (/ (+ (stat-qcount-avg stats) numqueries) 2))
;;;     (stat-qtime-avg-set!  stats (/ (+ (stat-qtime-avg  stats) duration)   2))
;;; 
;;;     ;; here is where we add the stats for a given dbfile
;;;     (if (not (member fname (stat-dbs stats)))
;;; 	(stat-dbs-set! stats (cons fname (stat-dbs stats))))
;;; 
;;;     ))
;;; 
;;; ;;======================================================================
;;; ;; S E R V E R   S T U F F 
;;; ;;======================================================================
;;; 
;;; ;; this does NOT return!
;;; ;;
;;; (define (find-free-port-and-open acfg)
;;;   (let ((port (or (area-port acfg) 3200)))
;;;     (handle-exceptions
;;; 	exn
;;; 	(begin
;;; 	  (print "INFO: cannot bind to port " (rpc:default-server-port) ", trying next port")
;;; 	  (area-port-set! acfg (+ port 1))
;;; 	  (find-free-port-and-open acfg))
;;;       (rpc:default-server-port port)
;;;       (area-port-set! acfg port)
;;;       (tcp-read-timeout 120000)
;;;       ;; ((rpc:make-server (tcp-listen port)) #t)
;;;       (tcp-listen (rpc:default-server-port)
;;;       ))))
;;; 
;;; ;; register this node by putting a packet into the pkts dir.
;;; ;; look for other servers
;;; ;; contact other servers and compile list of servers
;;; ;; there are two types of server
;;; ;;     main servers - dashboards, runners and dedicated servers - need pkt
;;; ;;     passive servers - test executers, step calls, list-runs - no pkt
;;; ;;
;;; (define (register-node acfg hostip port-num)
;;;   ;;(mutex-lock! (area-mutex acfg))
;;;   (let* ((server-type  (area-server-type acfg)) ;; auto, main, passive (no pkt created)
;;; 	 (best-ip      (or hostip (get-my-best-address)))
;;; 	 (mtdir        (area-dbdir acfg))
;;; 	 (pktdir       (area-pktsdir acfg))) ;; conc mtdir "/.server-pkts")))
;;;     (print "Registering node " best-ip ":" port-num)
;;;     (if (not mtdir) ;; require a home for this node to put or find databases
;;; 	#f
;;; 	(begin
;;; 	  (if  (not (directory? pktdir))(create-directory pktdir))
;;; 	  ;; server is started, now create pkt if needed
;;; 	  (print "Starting server in " server-type " mode with port " port-num)
;;; 	  (if (member server-type '(auto main)) ;; TODO: if auto, count number of servers registers, if > 3 then don't put out a pkt
;;; 	      (begin
;;; 		(area-pktid-set! acfg
;;; 				 (write-alist->pkt
;;; 				  pktdir 
;;; 				  `((hostname . ,(get-host-name))
;;; 				    (ipaddr   . ,best-ip)
;;; 				    (port     . ,port-num)
;;; 				    (pid      . ,(current-process-id)))
;;; 				  pktspec: *pktspec*
;;; 				  ptype:   'server))
;;; 		(area-pktfile-set! acfg (conc pktdir "/" (area-pktid acfg) ".pkt"))))
;;; 	  (area-port-set!    acfg port-num)
;;; 	  #;(mutex-unlock! (area-mutex acfg))))))
;;; 
;;; (define *cookie-seqnum* 0)
;;; (define (make-cookie key)
;;;   (set! *cookie-seqnum* (add1 *cookie-seqnum*))
;;;   ;;(print "MAKE COOKIE CALLED -- on "servkey"-"*cookie-seqnum*)
;;;   (conc key "-" *cookie-seqnum*)
;;;   )
;;; 
;;; ;; dispatch locally if possible
;;; ;;
;;; (define (call-deliver-response acfg ipaddr port cookie data)
;;;   (if (and (equal? (area-myaddr acfg) ipaddr)
;;; 	   (equal? (area-port     acfg) port))
;;;       (deliver-response acfg cookie data)
;;;       ((rpc:procedure 'response ipaddr port) cookie data)))
;;; 
;;; (define (deliver-response acfg cookie data)
;;;   (let ((deliver-response-start (current-milliseconds)))
;;;     (thread-start! (make-thread
;;; 		    (lambda ()
;;; 		      (let loop ((tries-left 5))
;;; 			;;(print "TOP OF DELIVER_RESPONSE LOOP; triesleft="tries-left)
;;; 			;;(pp (hash-table->alist (area-cookie2mbox acfg)))
;;; 			(let* ((mbox (hash-table-ref/default (area-cookie2mbox acfg) cookie #f)))
;;; 			  (cond
;;; 			   ((eq? 0 tries-left)
;;; 			    (print "ulex:deliver-response: I give up. Mailbox never appeared. cookie="cookie)
;;; 			    )
;;; 			   (mbox
;;; 			    ;;(print "got mbox="mbox"  got data="data"  send.")
;;; 			    (mailbox-send! mbox data))
;;; 			   (else
;;; 			    ;;(print "no mbox yet.  look for "cookie)
;;; 			    (thread-sleep! (/ (- 6 tries-left) 10))
;;; 			    (loop (sub1 tries-left))))))
;;; 		      ;; (debug-pp (list (conc "ulex:deliver-response took " (- (current-milliseconds) deliver-response-start) " ms, cookie=" cookie " data=") data))
;;; 		      (sdbg> "deliver-response" "mailbox-send" deliver-response-start #f #f cookie)
;;; 		      )
;;; 		    (conc "deliver-response thread for cookie="cookie))))
;;;   #t)
;;; 
;;; ;; action:
;;; ;;   immediate - quick actions, no need to put in queues
;;; ;;   dbwrite   - put in dbwrite queue
;;; ;;   dbread    - put in dbread queue
;;; ;;   oslong    - os actions, e.g. du, that could take a long time
;;; ;;   osshort   - os actions that should be quick, e.g. df
;;; ;;
;;; (define (request acfg from-ipaddr from-port servkey action cookie fname params) ;; std-peer-handler
;;;   ;; NOTE: Use rpc:current-peer for getting return address
;;;   (let* ((std-peer-handler-start (current-milliseconds))
;;; 	 ;; (raw-data               (alist-ref 'data     dat))
;;; 	 (rdat                   (hash-table-ref/default
;;; 				  (area-rtable acfg) action #f)) ;; this looks up the sql query or other details indexed by the action
;;; 	 (witem                  (make-witem ripaddr: from-ipaddr ;; rhost:   from-host   
;;; 					     rport:   from-port   action:  action
;;; 					     rdat:    rdat        cookie:  cookie
;;; 					     servkey: servkey     data:    params ;; TODO - rename data to params
;;; 					     caller:  (rpc:current-peer))))
;;;     (if (not (equal? servkey (area-pktid acfg)))
;;; 	`(#f . ,(conc "I don't know you servkey=" servkey ", pktid=" (area-pktid acfg))) ;; immediately return this
;;; 	(let* ((ctype (if rdat 
;;; 			  (calldat-ctype rdat) ;; is this necessary? these should be identical
;;; 			  action)))
;;; 	  (sdbg> "std-peer-handler" "immediate" std-peer-handler-start #f #f)
;;; 	  (case ctype
;;; 	    ;; (dbwrite acfg rdat (cons from-ipaddr from-port) data)))
;;; 	    ((full-ping)  `(#t  "ack to full ping"        ,(work-queue-add acfg fname witem) ,cookie))
;;; 	    ((response)   `(#t  "ack from requestor"      ,(deliver-response acfg fname params)))
;;; 	    ((dbwrite)    `(#t  "db write submitted"      ,(work-queue-add acfg fname witem) ,cookie))
;;; 	    ((dbread)     `(#t  "db read submitted"       ,(work-queue-add acfg fname witem) ,cookie  ))
;;; 	    ((dbrw)       `(#t  "db read/write submitted" ,cookie))
;;; 	    ((osshort)    `(#t  "os short submitted"      ,cookie))
;;; 	    ((oslong)     `(#t  "os long submitted"       ,cookie))
;;; 	    (else         `(#f  "unrecognised action"     ,ctype)))))))
;;; 
;;; ;; Call this to start the actual server
;;; ;;
;;; ;; start_server
;;; ;;
;;; ;;   mode: '
;;; ;;   handler: proc which takes pktrecieved as argument
;;; ;;
;;; 
;;; (define (start-server acfg)
;;;   (let* ((conn (find-free-port-and-open acfg))
;;; 	 (port (area-port acfg)))
;;;     (rpc:publish-procedure!
;;;      'delist-db
;;;      (lambda (fname)
;;;        (hash-table-delete! (area-dbs acfg) fname)))
;;;     (rpc:publish-procedure!
;;;      'calling-addr
;;;      (lambda ()
;;;        (rpc:current-peer)))
;;;     (rpc:publish-procedure!
;;;      'ping
;;;      (lambda ()(real-ping acfg)))
;;;     (rpc:publish-procedure!
;;;      'request
;;;      (lambda (from-addr from-port servkey action cookie dbname params)
;;;        (request acfg from-addr from-port servkey action cookie dbname params)))
;;;     (rpc:publish-procedure!
;;;      'response
;;;      (lambda (cookie res-dat)
;;;        (deliver-response acfg cookie res-dat)))
;;;     (area-ready-set! acfg #t)
;;;     (area-conn-set! acfg conn)
;;;     ((rpc:make-server conn) #f)));; ((tcp-listen (rpc:default-server-port)) #t)
;;; 
;;; 
;;; (define (launch acfg) ;;  #!optional (proc std-peer-handler))
;;;   (print "starting launch")
;;;   (update-known-servers acfg) ;; gotta do this on every start (thus why limit number of publicised servers)
;;;   #;(let ((original-handler (current-exception-handler))) ;; is th
;;;     (lambda (exception)
;;;       (server-exit-procedure)
;;;       (original-handler exception)))
;;;   (on-exit (lambda ()
;;; 	     (shutdown acfg))) ;; (finalize-all-db-handles acfg)))
;;;   ;; set up the rpc handler
;;;   (let* ((th1  (make-thread
;;; 		(lambda ()(start-server acfg))
;;; 		"server thread"))
;;; 	 (th2   (make-thread
;;; 		 (lambda ()
;;; 		   (print "th2 starting")
;;; 		   (let loop ()
;;; 		     (work-queue-processor acfg)
;;; 		     (print "work-queue-processor crashed!")
;;; 		     (loop)))
;;; 		 "work queue thread")))
;;;     (thread-start! th1)
;;;     (thread-start! th2)
;;;     (let loop ()
;;;       (thread-sleep! 0.025)
;;;       (if (area-ready acfg)
;;; 	  #t
;;; 	  (loop)))
;;;     ;; attempt to fix my address
;;;     (let* ((all-addr (get-all-ips-sorted)))	     ;; could use (tcp-addresses conn)?
;;;       (let loop ((rem-addrs all-addr))
;;; 	(if (null? rem-addrs)
;;; 	    (begin
;;; 	      (print "ERROR: Failed to figure out the ip address of myself as a server. Giving up.")
;;; 	      (exit 1)) ;; BUG Changeme to raising an exception
;;; 		
;;; 	    (let* ((addr      (car rem-addrs))
;;; 		   (good-addr (handle-exceptions
;;; 				  exn
;;; 				  #f
;;; 				((rpc:procedure 'calling-addr addr (area-port acfg))))))
;;; 	      (if good-addr
;;; 		  (begin
;;; 		    (print "Got good-addr of " good-addr)
;;; 		    (area-myaddr-set! acfg good-addr))
;;; 		  (loop (cdr rem-addrs)))))))
;;;     (register-node acfg (area-myaddr acfg)(area-port acfg))
;;;     (print "INFO: Server started on " (area-myaddr acfg) ":" (area-port acfg))
;;;     ;; (update-known-servers acfg) ;; gotta do this on every start (thus why limit number of publicised servers)
;;;     ))
;;; 
;;; (define (clear-server-pkt acfg)
;;;   (let ((pktf (area-pktfile acfg)))
;;;     (if pktf (delete-file* pktf))))
;;; 
;;; (define (shutdown acfg)
;;;   (let (;;(conn (area-conn    acfg))
;;; 	(pktf (area-pktfile acfg))
;;; 	(port (area-port    acfg)))
;;;     (if pktf (delete-file* pktf))
;;;     (send-all "imshuttingdown")
;;;     ;; (rpc:close-all-connections!) ;; don't know if this is actually needed
;;;     (finalize-all-db-handles acfg)))
;;; 
;;; (define (send-all msg)
;;;   #f)
;;; 
;;; ;; given a area record look up all the packets
;;; ;;
;;; (define (get-all-server-pkts acfg)
;;;   (let ((all-pkt-files (glob (conc (area-pktsdir acfg) "/*.pkt"))))
;;;     (map (lambda (pkt-file)
;;; 	   (read-pkt->alist pkt-file pktspec: *pktspec*))
;;; 	 all-pkt-files)))
;;; 
;;; #;((Z . "9a0212302295a19610d5796fce0370fa130758e9")
;;;   (port . "34827")
;;;   (pid . "28748")
;;;   (hostname . "zeus")
;;;   (T . "server")
;;;   (D . "1549427032.0"))
;;; 
;;; #;(define (get-my-best-address)
;;;   (let ((all-my-addresses (get-all-ips))) ;; (vector->list (hostinfo-addresses (hostname->hostinfo (get-host-name))))))
;;;     (cond
;;;      ((null? all-my-addresses)
;;;       (get-host-name))                                          ;; no interfaces?
;;;      ((eq? (length all-my-addresses) 1)
;;;       (ip->string (car all-my-addresses)))                      ;; only one to choose from, just go with it
;;;      (else 
;;;       (ip->string (car (filter (lambda (x)                      ;; take any but 127.
;;; 				 (not (eq? (u8vector-ref x 0) 127)))
;;; 			       all-my-addresses)))))))
;;; 
;;; ;; whoami? I am my pkt
;;; ;;
;;; (define (whoami? acfg)
;;;   (hash-table-ref/default (area-hosts acfg)(area-pktid acfg) #f))
;;; 
;;; ;;======================================================================
;;; ;; "Client side" operations
;;; ;;======================================================================
;;; 
;;; (define (safe-call call-key host port . params)
;;;   (handle-exceptions
;;;    exn
;;;    (begin
;;;      (print "Call " call-key " to " host ":" port " failed")
;;;      #f)
;;;    (apply (rpc:procedure call-key host port) params)))
;;; 
;;; ;; ;; convert to/from string / sexpr
;;; ;; 
;;; ;; (define (string->sexpr str)
;;; ;;   (if (string? str)
;;; ;;       (with-input-from-string str read)
;;; ;;       str))
;;; ;; 
;;; ;; (define (sexpr->string s)
;;; ;;   (with-output-to-string (lambda ()(write s))))
;;; 
;;; ;; is the server alive?
;;; ;;
;;; (define (ping acfg host port)
;;;   (let* ((myaddr     (area-myaddr acfg))
;;; 	 (myport     (area-port   acfg))
;;; 	 (start-time (current-milliseconds))
;;; 	 (res        (if (and (equal? myaddr host)
;;; 			      (equal? myport port))
;;; 			 (real-ping acfg)
;;; 			 ((rpc:procedure 'ping host port)))))
;;;     (cons (- (current-milliseconds) start-time)
;;; 	  res)))
;;; 
;;; ;; returns ( ipaddr port alist-fname=>randnum )
;;; (define (real-ping acfg)
;;;   `(,(area-myaddr acfg) ,(area-port acfg) ,(get-host-stats acfg)))
;;; 
;;; ;; is the server alive AND the queues processing?
;;; ;;
;;; #;(define (full-ping acfg servpkt)
;;;   (let* ((start-time (current-milliseconds))
;;; 	 (res        (send-message acfg servpkt '(full-ping) 'full-ping)))
;;;     (cons (- (current-milliseconds) start-time)
;;; 	  res))) ;; (equal? res "got ping"))))
;;; 
;;; 
;;; ;; look up all pkts and get the server id (the hash), port, host/ip
;;; ;; store this info in acfg
;;; ;; return the number of responsive servers found
;;; ;;
;;; ;; DO NOT VERIFY THAT THE SERVER IS ALIVE HERE. This is called at times where the current server is not yet alive and cannot ping itself
;;; ;;
;;; (define (update-known-servers acfg)
;;;   ;; readll all pkts
;;;   ;; foreach pkt; if it isn't me ping the server; if alive, add to hosts hash, else rm the pkt
;;;   (let* ((start-time (current-milliseconds))
;;; 	 (all-pkts  (delete-duplicates
;;; 		     (append (get-all-server-pkts acfg)
;;; 			     (hash-table-values (area-hosts acfg)))))
;;; 	 (hostshash (area-hosts acfg))
;;; 	 (my-id     (area-pktid acfg))
;;; 	 (pktsdir   (area-pktsdir acfg)) ;; needed to remove pkts from non-responsive servers
;;; 	 (numsrvs   0)
;;; 	 (delpkt    (lambda (pktsdir sid)
;;; 		      (print "clearing out server " sid)
;;; 		      (delete-file* (conc pktsdir "/" sid ".pkt"))
;;; 		      (hash-table-delete! hostshash sid))))
;;;     (area-last-srvup-set! acfg (current-seconds))
;;;     (for-each
;;;      (lambda (servpkt)
;;;        (if (list? servpkt)
;;; 	   ;; (pp servpkt)
;;; 	   (let* ((shost (alist-ref 'ipaddr servpkt))
;;; 		  (sport (any->number (alist-ref 'port servpkt)))
;;; 		  (res   (handle-exceptions
;;; 			  exn
;;; 			  (begin
;;; 			    ;; (print "INFO: bad server on " shost ":" sport)
;;; 			    #f)
;;; 			  (ping acfg shost sport)))
;;; 		  (sid   (alist-ref 'Z servpkt)) ;; Z code is our name for the server
;;; 		  (url   (conc shost ":" sport))
;;; 		  )
;;; 	     #;(if (or (not res)
;;; 		     (null? res))
;;; 		 (begin
;;; 		   (print "STRANGE: ping of " url " gave " res)))
;;; 	     
;;; 	     ;; (print "Got " res " from " shost ":" sport)
;;; 	     (match res
;;; 		    ((qduration . payload)
;;; 		     ;; (print "Server pkt:" (alist-ref 'ipaddr servpkt) ":" (alist-ref 'port servpkt)
;;; 		     ;;        (if payload
;;; 		     ;;            "Success" "Fail"))
;;; 		     (match payload
;;; 			    ((host port stats)
;;; 			     ;; (print "From " host ":" port " got stats: " stats)
;;; 			     (if (and host port stats)
;;; 				 (let ((url (conc host ":" port)))
;;; 				   (hash-table-set! hostshash sid servpkt)
;;; 				   ;; store based on host:port
;;; 				   (hash-table-set! (area-hoststats acfg) sid stats))
;;; 				 (print "missing data from the server, not sure what that means!"))
;;; 			     (set! numsrvs (+ numsrvs 1)))
;;; 			    (#f
;;; 			     (print "Removing pkt " sid " due to #f from server or failed ping")
;;; 			     (delpkt pktsdir sid))
;;; 			    (else
;;; 			     (print "Got ")(pp res)(print " from server ")(pp servpkt) " but response did not match (#f/#t . msg)")))
;;; 		    (else
;;; 		     ;; here we delete the pkt - can't reach the server, remove it
;;; 		     ;; however this logic is inadequate. we should mark the server as checked
;;; 		     ;; and not good, if it happens a second time - then remove the pkt
;;; 		     ;; or something similar. I.e. don't be too quick to assume the server is wedged or dead
;;; 		     ;; could be it is simply too busy to reply
;;; 		     (let ((bad-pings (hash-table-ref/default (area-health acfg) url 0)))
;;; 		       (if (> bad-pings 1) ;; two bad pings - remove pkt
;;; 			   (begin
;;; 			     (print "INFO: " bad-pings " bad responses from " url ", deleting pkt " sid)
;;; 			     (delpkt pktsdir sid))
;;; 			   (begin
;;; 			     (print "INFO: " bad-pings " bad responses from " shost ":" sport " not deleting pkt yet")
;;; 			     (hash-table-set! (area-health acfg)
;;; 					      url
;;; 					      (+ (hash-table-ref/default (area-health acfg) url 0) 1))
;;; 			     ))
;;; 		       ))))
;;; 	   ;; servpkt is not actually a pkt?
;;; 	   (begin
;;; 	     (print "Bad pkt " servpkt))))
;;;      all-pkts)
;;;     (sdbg> "update-known-servers" "end" start-time #f #f " found " numsrvs
;;; 	   " servers, pkts: " (map (lambda (p)
;;; 				     (alist-ref 'Z p))
;;; 				   all-pkts))

;;;     numsrvs))
;;; 
;;; (defstruct srvstat
;;;   (numfiles 0)   ;; number of db files handled by this server - subtract 1 for the db being currently looked at
;;;   (randnum  #f)  ;; tie breaker number assigned to by the server itself - applies only to the db under consideration
;;;   (pkt      #f)) ;; the server pkt
;;; 
;;; ;;(define (srv->srvstat srvpkt)
;;;   
;;; ;; Get the server best for given dbname and key
;;; ;;
;;; ;;   NOTE: key is not currently used. The key points to the kind of query, this may be useful for directing read-only queries.
;;; ;;
;;; (define (get-best-server acfg dbname key)
;;;   (let* (;; (servers (hash-table-values (area-hosts acfg)))
;;; 	 (servers     (area-hosts acfg))
;;; 	 (skeys       (sort (hash-table-keys servers) string>=?)) ;; a stable listing
;;; 	 (start-time  (current-milliseconds))
;;; 	 (srvstats    (make-hash-table))  ;; srvid => srvstat
;;; 	 (url         (conc (area-myaddr acfg) ":" (area-port acfg))))
;;;     ;; (print "scores for " dbname ": " (map (lambda (k)(cons k (calc-server-score acfg dbname k))) skeys))
;;;     (if (null? skeys)
;;; 	(if (> (update-known-servers acfg) 0)
;;; 	    (get-best-server acfg dbname key) ;; some risk of infinite loop here, TODO add try counter
;;; 	    (begin
;;; 	      (print "ERROR: no server found!") ;; since this process is also a server this should never happen
;;; 	      #f))
;;; 	(begin
;;; 	  ;; (print "in get-best-server with skeys=" skeys)
;;; 	  (if (> (- (current-seconds) (area-last-srvup acfg)) 10)
;;; 	      (begin
;;; 		(update-known-servers acfg)
;;; 		(sdbg> "get-best-server" "update-known-servers" start-time #f #f)))
;;; 
;;; 	  ;; for each server look at the list of dbfiles, total number of dbs being handled
;;; 	  ;; and the rand number, save the best host
;;; 	  ;; also do a delist-db for each server dbfile not used
;;; 	  (let* ((best-server       #f)
;;; 		 (servers-to-delist (make-hash-table)))
;;; 	    (for-each
;;; 	     (lambda (srvid)
;;; 	       (let* ((server    (hash-table-ref/default servers srvid #f))
;;; 		      (stats     (hash-table-ref/default (area-hoststats acfg) srvid '(()))))
;;; 		 ;; (print "stats: " stats)
;;;  		 (if server
;;; 		     (let* ((dbweights (car stats))
;;; 			    (srvload   (length (filter (lambda (x)(not (equal? dbname (car x)))) dbweights)))
;;; 			    (dbrec     (alist-ref dbname dbweights equal?))  ;; get the pair with fname . randscore
;;; 			    (randnum   (if dbrec
;;; 					   dbrec ;; (cdr dbrec)
;;; 					   0)))
;;; 		       (hash-table-set! srvstats srvid (make-srvstat numfiles: srvload randnum: randnum pkt: server))))))
;;; 	     skeys)
;;; 	    
;;; 	    (let* ((sorted    (sort (hash-table-values srvstats) 
;;; 				    (lambda (a b)
;;; 				      (let ((numfiles-a (srvstat-numfiles a))
;;; 					    (numfiles-b (srvstat-numfiles b))
;;; 					    (randnum-a  (srvstat-randnum a))
;;; 					    (randnum-b  (srvstat-randnum b)))
;;; 					(if (< numfiles-a numfiles-b) ;; Note, I don't think adding an offset works here. Goal was only move file handling to a different server if it has 2 less
;;; 					    #t
;;; 					    (if (and (equal? numfiles-a numfiles-b)
;;; 						     (< randnum-a randnum-b))
;;; 						#t
;;; 						#f))))))
;;; 		   (best      (if (null? sorted)
;;; 				  (begin
;;; 				    (print "ERROR: should never be null due to self as server.")
;;; 				    #f)
;;; 				  (srvstat-pkt (car sorted)))))
;;; 	      #;(print "SERVER(" url "): " dbname ": " (map (lambda (srv)
;;; 							    (let ((p (srvstat-pkt srv)))
;;; 							      (conc (alist-ref 'ipaddr p) ":" (alist-ref 'port p)
;;; 								    "(" (srvstat-numfiles srv)","(srvstat-randnum srv)")")))
;;; 							    sorted))
;;; 	      best))))))
;;;     
;;;     ;; send out an "I'm about to exit notice to all known servers"
;;;     ;;
;;; (define (death-imminent acfg)
;;;   '())
;;; 
;;; ;;======================================================================
;;; ;; U L E X  -  T H E   I N T E R E S T I N G   S T U F F ! !
;;; ;;======================================================================
;;; 
;;; ;; register a handler
;;; ;;   NOTES:
;;; ;;     dbinitsql   is reserved for a list of sql statements for initializing the db
;;; ;;     dbinitfn    is reserved for a db init function, if exists called after dbinitsql
;;; ;;     
;;; (define (register acfg key obj #!optional (ctype 'dbwrite))
;;;   (let ((ht (area-rtable acfg)))
;;;     (if (hash-table-exists? ht key)
;;; 	(print "WARNING: redefinition of entry " key))
;;;     (hash-table-set! ht key (make-calldat obj: obj ctype: ctype))))
;;; 
;;; ;; usage: register-batch acfg '((key1 . sql1) (key2 . sql2) ... )
;;; ;; NB// obj is often an sql query
;;; ;;
;;; (define (register-batch acfg ctype data)
;;;   (let ((ht (area-rtable acfg)))
;;;     (map (lambda (dat)
;;; 	   (hash-table-set! ht (car dat)(make-calldat obj: (cdr dat) ctype: ctype)))
;;; 	 data)))
;;; 
;;; (define (initialize-area-calls-from-specfile area specfile)
;;;   (let* ((callspec (with-input-from-file specfile read )))
;;;     (for-each (lambda (group)
;;;                 (register-batch
;;;                  area
;;;                  (car group)
;;;                  (cdr group)))
;;;               callspec)))
;;; 
;;; ;; get-rentry
;;; ;;
;;; (define (get-rentry acfg key)
;;;   (hash-table-ref/default (area-rtable acfg) key #f))
;;; 
;;; (define (get-rsql acfg key)
;;;   (let ((cdat (get-rentry acfg key)))
;;;     (if cdat
;;; 	(calldat-obj cdat)
;;; 	#f)))
;;; 
;;; 
;;; 
;;; ;; blocking call:
;;; ;;    client                         server
;;; ;;    ------                         ------
;;; ;;    call()
;;; ;;    send-message()
;;; ;;    nmsg-send()
;;; ;;                                   nmsg-receive()
;;; ;;                                   nmsg-respond(ack,cookie)
;;; ;;    ack, cookie
;;; ;;    mbox-thread-wait(cookie)
;;; ;;                                   nmsg-send(client,cookie,result)
;;; ;;        nmsg-respond(ack)
;;; ;;        return result
;;; ;;
;;; ;; reserved action:
;;; ;;    'immediate
;;; ;;    'dbinitsql
;;; ;;
;;; (define (call acfg dbname action params #!optional (count 0))
;;;   (let* ((call-start-time     (current-milliseconds))
;;; 	 (srv                 (get-best-server acfg dbname action))
;;; 	 (post-get-start-time (current-milliseconds))
;;; 	 (rdat                (hash-table-ref/default (area-rtable acfg) action #f))
;;; 	 (myid                (trim-pktid (area-pktid acfg)))
;;; 	 (srvid               (trim-pktid (alist-ref 'Z srv)))
;;; 	 (cookie              (make-cookie myid)))
;;;     (sdbg> "call" "get-best-server" call-start-time #f call-start-time " from: " myid " to server: " srvid " for " dbname " action: " action " params: " params " rdat: " rdat)
;;;     (print "INFO: call to " (alist-ref 'ipaddr srv) ":" (alist-ref 'port srv) " from " (area-myaddr acfg) ":" (area-port acfg) " for " dbname)
;;;     (if (and srv rdat) ;; need both to dispatch a request
;;; 	(let* ((ripaddr  (alist-ref 'ipaddr srv))
;;; 	       (rsrvid   (alist-ref 'Z srv))
;;; 	       (rport    (any->number (alist-ref 'port   srv)))
;;; 	       (res-full (if (and (equal? ripaddr (area-myaddr acfg))
;;; 				  (equal? rport   (area-port acfg)))
;;; 			     (request acfg ripaddr rport (area-pktid acfg) action cookie dbname params)
;;; 			     (safe-call 'request ripaddr rport
;;; 					(area-myaddr acfg)
;;; 					(area-port   acfg)
;;; 					#;(area-pktid acfg)
;;; 					rsrvid
;;; 					action cookie dbname params))))
;;; 	  ;; (print "res-full: " res-full)
;;; 	  (match res-full
;;; 	    ((response-ok response-msg rem ...)
;;; 	     (let* ((send-message-time (current-milliseconds))
;;; 		    ;; (match res-full
;;; 		    ;;  ((response-ok response-msg)
;;; 		    ;; (response-ok  (car res-full))
;;; 		    ;; (response-msg (cadr res-full)
;;; 		    )
;;; 	       ;; (res (take res-full 3))) ;; ctype == action, TODO: converge on one term <<=== what was this? BUG 
;;; 	       ;; (print "ulex:call: send-message took " (- send-message-time post-get-start-time) " ms params=" params)
;;; 	       (sdbg> "call" "send-message" post-get-start-time #f call-start-time)
;;; 	       (cond
;;; 		((not response-ok) #f)
;;; 		((member response-msg '("db read submitted" "db write submitted"))
;;; 		 (let* ((cookie-id   (cadddr res-full))
;;; 			(mbox        (make-mailbox))
;;; 			(mbox-time   (current-milliseconds)))
;;; 		   (hash-table-set! (area-cookie2mbox acfg) cookie-id mbox)
;;; 		   (let* ((mbox-timeout-secs    20)
;;; 			  (mbox-timeout-result 'MBOX_TIMEOUT)
;;; 			  (res                  (mailbox-receive! mbox mbox-timeout-secs mbox-timeout-result))
;;; 			  (mbox-receive-time    (current-milliseconds)))
;;; 		     (hash-table-delete! (area-cookie2mbox acfg) cookie-id)
;;; 		     (sdbg> "call" "mailbox-receive" mbox-time #f call-start-time " from: " myid " to server: " srvid " for " dbname)
;;; 		     ;; (print "ulex:call mailbox-receive took " (- mbox-receive-time mbox-time) "ms params=" params)
;;; 		     res)))
;;; 		(else
;;; 		 (print "Unhandled response \""response-msg"\"")
;;; 		 #f))
;;; 	       ;; depending on what action (i.e. ctype) is we will block here waiting for
;;; 	       ;; all the data (mechanism to be determined)
;;; 	       ;;
;;; 	       ;; if res is a "working on it" then wait
;;; 	       ;;    wait for result
;;; 	       ;; mailbox thread wait on 
;;; 	       
;;; 	       ;; if res is a "can't help you" then try a different server
;;; 	       ;; if res is a "ack" (e.g. for one-shot requests) then return res
;;; 	       ))
;;; 	    (else
;;; 	     (if (< count 10)
;;; 		 (let* ((url (conc (alist-ref 'ipaddr srv) ":" (alist-ref 'port srv))))
;;; 		   (thread-sleep! 1)
;;; 		   (print "ERROR: Bad result from " url ", dbname: " dbname ", action: " action ", params: " params ". Trying again in 1 second.")
;;; 		   (call acfg dbname action params (+ count 1)))
;;; 		 (begin
;;; 		   (error (conc "ERROR: " count " tries, still have improper response res-full=" res-full)))))))
;;; 	(begin
;;; 	  (if (not rdat)
;;; 	      (print "ERROR: action " action " not registered.")
;;; 	      (if (< count 10)
;;; 		 (begin
;;; 		   (thread-sleep! 1)
;;; 		   (area-hosts-set! acfg (make-hash-table)) ;; clear out all known hosts
;;; 		   (print "ERROR: no server found, srv=" srv ", trying again in 1 seconds")
;;; 		   (call acfg dbname action params (+ count 1)))
;;; 		 (begin
;;; 		   (error (conc "ERROR: no server found after 10 tries, srv=" srv ", giving up."))
;;; 		   #;(error "No server available"))))))))
;;; 
;;; 
;;; ;;======================================================================
;;; ;; U T I L I T I E S 
;;; ;;======================================================================
;;; 
;;; ;; get a signature for identifing this process
;;; ;;
;;; (define (get-process-signature)
;;;   (cons (get-host-name)(current-process-id)))
;;; 
;;; ;;======================================================================
;;; ;; S Y S T E M   S T U F F
;;; ;;======================================================================
;;; 
;;; ;; get normalized cpu load by reading from /proc/loadavg and
;;; ;; /proc/cpuinfo return all three values and the number of real cpus
;;; ;; and the number of threads returns alist '((adj-cpu-load
;;; ;; . normalized-proc-load) ... etc.  keys: adj-proc-load,
;;; ;; adj-core-load, 1m-load, 5m-load, 15m-load
;;; ;;
;;; (define (get-normalized-cpu-load)
;;;   (let ((res (get-normalized-cpu-load-raw))
;;; 	(default `((adj-proc-load . 2) ;; there is no right answer
;;; 		   (adj-core-load . 2)
;;; 		   (1m-load       . 2)
;;; 		   (5m-load       . 0) ;; causes a large delta - thus causing default of throttling if stuff goes wrong
;;; 		   (15m-load      . 0)
;;; 		   (proc          . 1)
;;; 		   (core          . 1)
;;; 		   (phys          . 1)
;;; 		   (error         . #t))))
;;;     (cond
;;;      ((and (list? res)
;;; 	   (> (length res) 2))
;;;       res)
;;;      ((eq? res #f)   default) ;; add messages?
;;;      ((eq? res #f) default)   ;; this would be the #eof
;;;      (else default))))
;;; 
;;; (define (get-normalized-cpu-load-raw)
;;;   (let* ((actual-host           (get-host-name))) ;; #f is localhost
;;;     (let ((data  (append 
;;; 		  (with-input-from-file "/proc/loadavg" read-lines)
;;; 		  (with-input-from-file "/proc/cpuinfo" read-lines)
;;; 		  (list "end")))
;;; 	  (load-rx  (regexp "^([\\d\\.]+)\\s+([\\d\\.]+)\\s+([\\d\\.]+)\\s+.*$"))
;;; 	  (proc-rx  (regexp "^processor\\s+:\\s+(\\d+)\\s*$"))
;;; 	  (core-rx  (regexp "^core id\\s+:\\s+(\\d+)\\s*$"))
;;; 	  (phys-rx  (regexp "^physical id\\s+:\\s+(\\d+)\\s*$"))
;;; 	  (max-num  (lambda (p n)(max (string->number p) n))))
;;;       ;; (print "data=" data)
;;;       (if (null? data) ;; something went wrong
;;; 	  #f
;;; 	  (let loop ((hed      (car data))
;;; 		     (tal      (cdr data))
;;; 		     (loads    #f)
;;; 		     (proc-num 0)  ;; processor includes threads
;;; 		     (phys-num 0)  ;; physical chip on motherboard
;;; 		     (core-num 0)) ;; core
;;; 	    ;; (print hed ", " loads ", " proc-num ", " phys-num ", " core-num)
;;; 	    (if (null? tal) ;; have all our data, calculate normalized load and return result
;;; 		(let* ((act-proc (+ proc-num 1))
;;; 		       (act-phys (+ phys-num 1))
;;; 		       (act-core (+ core-num 1))
;;; 		       (adj-proc-load (/ (car loads) act-proc))
;;; 		       (adj-core-load (/ (car loads) act-core))
;;; 		       (result
;;; 			(append (list (cons 'adj-proc-load adj-proc-load)
;;; 				      (cons 'adj-core-load adj-core-load))
;;; 				(list (cons '1m-load (car loads))
;;; 				      (cons '5m-load (cadr loads))
;;; 				      (cons '15m-load (caddr loads)))
;;; 				(list (cons 'proc act-proc)
;;; 				      (cons 'core act-core)
;;; 				      (cons 'phys act-phys)))))
;;; 		  result)
;;; 		(regex-case
;;; 		    hed
;;; 		  (load-rx  ( x l1 l5 l15 ) (loop (car tal)(cdr tal)(map string->number (list l1 l5 l15)) proc-num phys-num core-num))
;;; 		  (proc-rx  ( x p         ) (loop (car tal)(cdr tal) loads           (max-num p proc-num) phys-num core-num))
;;; 		  (phys-rx  ( x p         ) (loop (car tal)(cdr tal) loads           proc-num (max-num p phys-num) core-num))
;;; 		  (core-rx  ( x c         ) (loop (car tal)(cdr tal) loads           proc-num phys-num (max-num c core-num)))
;;; 		  (else 
;;; 		   (begin
;;; 		     ;; (print "NO MATCH: " hed)
;;; 		     (loop (car tal)(cdr tal) loads proc-num phys-num core-num))))))))))
;;; 
;;; (define (get-host-stats acfg)
;;;   (let ((stats-hash (area-stats acfg)))
;;;     ;; use this opportunity to remove references to dbfiles which have not been accessed in a while
;;;     (for-each
;;;      (lambda (dbname)
;;;        (let* ((stats       (hash-table-ref stats-hash dbname))
;;; 	      (last-access (stat-when stats)))
;;; 	 (if (and (> last-access 0)                             ;; if zero then there has been no access
;;; 		  (> (- (current-seconds) last-access) 10))     ;; not used in ten seconds
;;; 	     (begin
;;; 	       (print "Removing " dbname " from stats list")
;;; 	       (hash-table-delete! stats-hash dbname) ;; remove from stats hash
;;; 	       (stat-dbs-set! stats (hash-table-keys stats))))))
;;;      (hash-table-keys stats-hash))
;;;     
;;;     `(,(hash-table->alist (area-dbs acfg)) ;; dbname => randnum
;;;       ,(map (lambda (dbname)  ;; dbname is the db name
;;; 	      (cons dbname (stat-when (hash-table-ref stats-hash dbname))))
;;; 	    (hash-table-keys stats-hash))
;;;       (cpuload . ,(get-normalized-cpu-load)))))
;;;     #;(stats   . ,(map (lambda (k) ;; create an alist from the stats data
;;; 		       (cons k (stat->alist (hash-table-ref (area-stats acfg) k))))
;;; 		     (hash-table-keys (area-stats acfg))))
;;; 
;;; #;(trace
;;;  ;; assv
;;;  ;; cdr
;;;  ;; caar
;;;  ;; ;; cdr
;;;  ;; call
;;;  ;; finalize-all-db-handles
;;;  ;; get-all-server-pkts
;;;  ;; get-normalized-cpu-load
;;;  ;; get-normalized-cpu-load-raw
;;;  ;; launch
;;;  ;; nmsg-send
;;;  ;; process-db-queries
;;;  ;; receive-message
;;;  ;; std-peer-handler
;;;  ;; update-known-servers
;;;  ;; work-queue-processor
;;;  )
;;; 
;;; ;;======================================================================
;;; ;; netutil
;;; ;;   move this back to ulex-netutil.scm someday?
;;; ;;======================================================================
;;; 
;;; ;; #include <stdio.h>
;;; ;; #include <netinet/in.h>
;;; ;; #include <string.h>
;;; ;; #include <arpa/inet.h>
;;; 
;;; (foreign-declare "#include \"sys/types.h\"")
;;; (foreign-declare "#include \"sys/socket.h\"")
;;; (foreign-declare "#include \"ifaddrs.h\"")
;;; (foreign-declare "#include \"arpa/inet.h\"")
;;; 
;;; ;; get IP addresses from ALL interfaces
;;; (define get-all-ips
;;;   (foreign-safe-lambda* scheme-object ()
;;;     "
;;; 
;;; // from https://stackoverflow.com/questions/17909401/linux-c-get-default-interfaces-ip-address :
;;; 
;;; 
;;;     C_word lst = C_SCHEME_END_OF_LIST, len, str, *a;
;;; //    struct ifaddrs *ifa, *i;
;;; //    struct sockaddr *sa;
;;; 
;;;     struct ifaddrs * ifAddrStruct = NULL;
;;;     struct ifaddrs * ifa = NULL;
;;;     void * tmpAddrPtr = NULL;
;;; 
;;;     if ( getifaddrs(&ifAddrStruct) != 0)
;;;       C_return(C_SCHEME_FALSE);
;;; 
;;; //    for (i = ifa; i != NULL; i = i->ifa_next) {
;;;     for (ifa = ifAddrStruct; ifa != NULL; ifa = ifa->ifa_next) {
;;;         if (ifa->ifa_addr->sa_family==AF_INET) { // Check it is
;;;             // a valid IPv4 address
;;;             tmpAddrPtr = &((struct sockaddr_in *)ifa->ifa_addr)->sin_addr;
;;;             char addressBuffer[INET_ADDRSTRLEN];
;;;             inet_ntop(AF_INET, tmpAddrPtr, addressBuffer, INET_ADDRSTRLEN);
;;; //            printf(\"%s IP Address %s\\n\", ifa->ifa_name, addressBuffer);
;;;             len = strlen(addressBuffer);
;;;             a = C_alloc(C_SIZEOF_PAIR + C_SIZEOF_STRING(len));
;;;             str = C_string(&a, len, addressBuffer);
;;;             lst = C_a_pair(&a, str, lst);
;;;         } 
;;; 
;;; //        else if (ifa->ifa_addr->sa_family==AF_INET6) { // Check it is
;;; //            // a valid IPv6 address
;;; //            tmpAddrPtr = &((struct sockaddr_in6 *)ifa->ifa_addr)->sin6_addr;
;;; //            char addressBuffer[INET6_ADDRSTRLEN];
;;; //            inet_ntop(AF_INET6, tmpAddrPtr, addressBuffer, INET6_ADDRSTRLEN);
;;; ////            printf(\"%s IP Address %s\\n\", ifa->ifa_name, addressBuffer);
;;; //            len = strlen(addressBuffer);
;;; //            a = C_alloc(C_SIZEOF_PAIR + C_SIZEOF_STRING(len));
;;; //            str = C_string(&a, len, addressBuffer);
;;; //            lst = C_a_pair(&a, str, lst);
;;; //       }
;;; 
;;; //       else {
;;; //         printf(\" not an IPv4 address\\n\");
;;; //       }
;;; 
;;;     }
;;; 
;;;     freeifaddrs(ifa);
;;;     C_return(lst);
;;; 
;;; "))
;;; 
;;; ;; Change this to bias for addresses with a reasonable broadcast value?
;;; ;;
;;; (define (ip-pref-less? a b)
;;;   (let* ((rate (lambda (ipstr)
;;;                  (regex-case ipstr
;;;                              ( "^127\\." _ 0 )
;;;                              ( "^(10\\.0|192\\.168\\.)\\..*" _ 1 )
;;;                              ( else 2 ) ))))
;;;     (< (rate a) (rate b))))
;;;   
;;; 
;;; (define (get-my-best-address)
;;;   (let ((all-my-addresses (get-all-ips))
;;;         ;;(all-my-addresses-old (vector->list (hostinfo-addresses (hostname->hostinfo (get-host-name)))))
;;;         )
;;;     (cond
;;;      ((null? all-my-addresses)
;;;       (get-host-name))                                          ;; no interfaces?
;;;      ((eq? (length all-my-addresses) 1)
;;;       (car all-my-addresses))                      ;; only one to choose from, just go with it
;;;      
;;;      (else
;;;       (car (sort all-my-addresses ip-pref-less?)))
;;;      ;; (else 
;;;      ;;  (ip->string (car (filter (lambda (x)                      ;; take any but 127.
;;;      ;;    			 (not (eq? (u8vector-ref x 0) 127)))
;;;      ;;    		       all-my-addresses))))
;;; 
;;;      )))
;;; 
;;; (define (get-all-ips-sorted)
;;;   (sort (get-all-ips) ip-pref-less?))
;;; 
;;; 

)

Modified utils/nbfake from [3dc733e001] to [3e514ddfd2].

37
38
39
40
41
42
43

44
45
46
47
48
49
50
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51







+







nbfake <command to run>

nbfake behavior can be changed by setting the following env vars:
   NBFAKE_HOST       SSH to \$NBFAKE_HOST and run command
   NBFAKE_LOG        Logfile for nbfake output
   NB_WASH_GROUPS    comma-separated list of groups to wash into
   NB_WASH_ENABLED   must be set in order to enable wash groups
   NBFAKE_QUIET      set to suppress informational output

__EOF
  exit
fi

#==============================================================================
# Setup
85
86
87
88
89
90
91

92
93
94
95
96
97

98
99
100
101
102
103
104
105
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108







+






+








   WASHCMD="wash -q -n $grouplist -X"
fi

#==============================================================================
# Run and log
#==============================================================================

if [[ -z "$NBFAKE_QUIET" ]];then
cat <<__EOF >&2
#======================================================================
# NBFAKE logging command to: $MY_NBFAKE_LOG
#     $WASHCMD $*
#======================================================================
__EOF
fi

if [[ -z "$MY_NBFAKE_HOST" ]]; then
  # Run locally
  sh -c "cd $CURRWD;export DISPLAY=$DISPLAY; export PATH=\"$PATH\"; nohup $WASHCMD $* >> $MY_NBFAKE_LOG 2>&1 &"
else
  # run remotely
  ssh -X -n -f $MY_NBFAKE_HOST "sh -c \"cd $CURRWD;export DISPLAY=$DISPLAY; export PATH=\"$PATH\"; nohup $WASHCMD $* >> $MY_NBFAKE_LOG 2>&1 &\""
fi