Megatest

Diff
Login

Differences From Artifact [13d1c5f978]:

To Artifact [659d29d212]:


132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164

165
166
167
168
169




170
171
172
173
174
175
176
177



178
179
180
181
182
183
184
;;
;;  grep define ../rmt.scm | grep rmt: |perl -pi -e 's/\(define\s+\((\S+)\W.*$/\1/'|sort -u

;;======================================================================
;;  S U P P O R T   F U N C T I O N S
;;======================================================================

;; if a server is either running or in the process of starting call client:setup
;; else return #f to let the calling proc know that there is no server available
;;
(define (rmt:get-connection-info areapath #!key (area-dat #f)) ;; TODO: push areapath down.
  (let* ((runremote (or area-dat *runremote*))
	 (cinfo     (if (remote? runremote)
			(remote-conndat runremote)
			#f)))
	  (if cinfo
	      cinfo
	      (if (server:check-if-running areapath)
		  (client:setup areapath)
		  #f))))

(defstruct rmt:remote
  (conns (make-hash-table)) ;; apath/dbname => rmt:conn
  )

(defstruct rmt:conn
  (apath    #f)
  (dbname   #f)
  (fullname #f)
  (hostport #f)
  (lastmsg  0)
  (expires  0))


(define *rmt:remote* (make-rmt:remote))

;; do we have a connection to apath dbname and
;; is it not expired? then return it
;;




(define (rmt:get-existing-live-conn remote apath dbname)
  (let* ((fullname (db:dbname->path apath dbname))
	 (conn     (hash-table-ref/default (rmt:remote-conns remote) fullname #f)))
    (if (and conn
	     (> (current-seconds) (rmt:conn-expires conn)))
	conn
	#f)))




;; looks for a connection to main
;; connections for other servers happens by requesting from main
;;
(define (rmt:open-main-connection remote apath)
  (let* ((pktsdir     (get-pkts-dir apath))
	 (all-srvpkts (get-all-server-pkts pktsdir *srvpktspec*))
	 (viable-srvs (get-viable-servers all-srvpkts apath))







|
|
|
|
|
|
|
|
|
|
|
|
|













>





>
>
>
>
|







>
>
>







132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
;;
;;  grep define ../rmt.scm | grep rmt: |perl -pi -e 's/\(define\s+\((\S+)\W.*$/\1/'|sort -u

;;======================================================================
;;  S U P P O R T   F U N C T I O N S
;;======================================================================

;; ;; if a server is either running or in the process of starting call client:setup
;; ;; else return #f to let the calling proc know that there is no server available
;; ;;
;; (define (rmt:get-connection-info areapath #!key (area-dat #f)) ;; TODO: push areapath down.
;;   (let* ((runremote (or area-dat *runremote*))
;; 	 (cinfo     (if (remote? runremote)
;; 			(remote-conndat runremote)
;; 			#f)))
;; 	  (if cinfo
;; 	      cinfo
;; 	      (if (server:check-if-running areapath)
;; 		  (client:setup areapath)
;; 		  #f))))

(defstruct rmt:remote
  (conns (make-hash-table)) ;; apath/dbname => rmt:conn
  )

(defstruct rmt:conn
  (apath    #f)
  (dbname   #f)
  (fullname #f)
  (hostport #f)
  (lastmsg  0)
  (expires  0))

;; replaces *runremote*
(define *rmt:remote* (make-rmt:remote))

;; do we have a connection to apath dbname and
;; is it not expired? then return it
;;
;; else setup a connection
;;
;; if that fails, return '(#f "some reason") ;; NB// convert to raising an exception
;;
(define (rmt:get-connection remote apath dbname)
  (let* ((fullname (db:dbname->path apath dbname))
	 (conn     (hash-table-ref/default (rmt:remote-conns remote) fullname #f)))
    (if (and conn
	     (> (current-seconds) (rmt:conn-expires conn)))
	conn
	#f)))


;; 	(rmt:general-open-connection remote apath dbname))))

;; looks for a connection to main
;; connections for other servers happens by requesting from main
;;
(define (rmt:open-main-connection remote apath)
  (let* ((pktsdir     (get-pkts-dir apath))
	 (all-srvpkts (get-all-server-pkts pktsdir *srvpktspec*))
	 (viable-srvs (get-viable-servers all-srvpkts apath))
204
205
206
207
208
209
210


211
212


213

214
215



216
217
218
219
220

221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238

239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
				hostport: srv-addr
				lastmsg: (current-seconds)
				expires: (+ (current-seconds) 60) ;; this needs to be gathered during the ping
				))
	      (start-main-srv)))
	(start-main-srv))))



(define (rmt:general-open-connection remote apath dbname)
  (let  ((mainconn (rmt:get-existing-live-conn remote apath (db:run-id->dbname #f))))


    (if (not mainconn)(rmt:open-main-connection remote apath))

    ;; TODO - call main for connection info
    ))



    
    
;;======================================================================

;; RA => e.g. usage (rmt:send-receive 'get-var #f (list varname))

;;
(define (rmt:send-receive cmd rid params #!key (attemptnum 1)(area-dat #f))
  (if (not *rmt:remote*)(set! *rmt:remote* (make-rmt:remote)))
  (let* ((apath *toppath*)
	 (conns *rmt:remote*)
	 (dbname (db:run-id->dbname rid)))
    (rmt:send-receive-real conns apath dbname rid params)))

;; db is at apath/.db/dbname, rid is an intermediary solution and will be removed
;; sometime in the future
;;
(define (rmt:send-receive-real remote apath dbname rid params)
  ;; do we have a connection to the needed db?
  ;; has the connection expired?
  (let connloop ((conn (rmt:get-existing-live-conn remote apath dbname)))
    (if (not conn)
	(connloop (rmt:general-open-connection remote apath dbname))
	(begin

	  #t ;; here we do the actual connection work
	  ))))
    
  
;; ;; ;; start attemptnum at 1 so the modulo below works as expected
;; ;;   #;(common:telemetry-log (conc "rmt:"(->string cmd))
;; ;;                         payload: `((rid . ,rid)
;; ;;                                    (params . ,params)))
;; ;;                           
;; ;;   (if (> attemptnum 2)
;; ;;       (debug:print 0 *default-log-port* "INFO: attemptnum in rmt:send-receive is " attemptnum))
;; ;;     
;; ;;   (cond
;; ;;    ((> attemptnum 2) (thread-sleep! 0.053))
;; ;;    ((> attemptnum 10) (thread-sleep! 0.5))
;; ;;    ((> attemptnum 20) (thread-sleep! 1)))
;; ;;   (if (and (> attemptnum 5) (= 0 (modulo attemptnum 15)))  
;; ;;     (begin (server:run *toppath*) (thread-sleep! 3))) 
;; ;;   
;; ;;   
;; ;;   ;;DOT digraph megatest_state_status {
;; ;;   ;;DOT   ranksep=0;
;; ;;   ;;DOT   // rankdir=LR;
;; ;;   ;;DOT   node [shape="box"];
;; ;;   ;;DOT "rmt:send-receive" -> MUTEXLOCK;
;; ;;   ;;DOT { edge [style=invis];"case 1" -> "case 2" -> "case 3" -> "case 4" -> "case 5" -> "case 6" -> "case 7" -> "case 8" -> "case 9" -> "case 10" -> "case 11"; }
;; ;;   ;; do all the prep locked under the rmt-mutex
;; ;;   (mutex-lock! *rmt-mutex*)
;; ;;   
;; ;;   ;; 1. check if server is started IFF cmd is a write OR if we are not on the homehost, store in runremote
;; ;;   ;; 2. check the age of the connections. refresh the connection if it is older than timeout-20 seconds.
;; ;;   ;; 3. do the query, if on homehost use local access
;; ;;   ;;
;; ;;   (let* ((start-time    (current-seconds)) ;; snapshot time so all use cases get same value
;; ;;          (areapath      *toppath*);; TODO - resolve from dbstruct to be compatible with multiple areas
;; ;; 	 (runremote     (or area-dat
;; ;; 			    *runremote*))
;; ;;          (attemptnum    (+ 1 attemptnum))
;; ;; 	 (readonly-mode (rmtmod:calc-ro-mode runremote *toppath*)))
;; ;; 
;; ;;     ;; DOT INIT_RUNREMOTE; // leaving off - doesn't really add to the clarity
;; ;;     ;; DOT MUTEXLOCK -> INIT_RUNREMOTE [label="no remote?"];
;; ;;     ;; DOT INIT_RUNREMOTE -> MUTEXLOCK;
;; ;;     ;; ensure we have a record for our connection for given area
;; ;;     (if (not runremote)                   ;; can remove this one. should never get here.         
;; ;; 	(begin
;; ;; 	  (set! *runremote* (make-and-init-remote))
;; ;;           (let* ((server-info (remote-server-info *runremote*))) 
;; ;;             (if server-info
;; ;; 		(begin
;; ;; 			(remote-server-url-set! *runremote* (server:record->url server-info))
;; ;; 			(remote-server-id-set! *runremote* (server:record->id server-info)))))  
;; ;; 	  (set! runremote   *runremote*))) ;; new runremote will come from this on next iteration
;; ;;     
;; ;;     ;; DOT SET_HOMEHOST; // leaving off - doesn't really add to the clarity
;; ;;     ;; DOT MUTEXLOCK -> SET_HOMEHOST [label="no homehost?"];
;; ;;     ;; DOT SET_HOMEHOST -> MUTEXLOCK;
;; ;;     ;; ensure we have a homehost record
;; ;;     (if (not (pair? (remote-hh-dat runremote)))  ;; not on homehost
;; ;; 	(thread-sleep! 0.1) ;; since we shouldn't get here, delay a little
;; ;; 	(remote-hh-dat-set! runremote (common:get-homehost)))
;; ;;     
;; ;;     ;;(print "BB> readonly-mode is "readonly-mode" dbfile is "dbfile)
;; ;;     (cond
;; ;;      ;;DOT EXIT;
;; ;;      ;;DOT MUTEXLOCK -> EXIT [label="> 15 attempts"]; {rank=same "case 1" "EXIT" }
;; ;;      ;; give up if more than 150 attempts
;; ;;      ((> attemptnum 150)
;; ;;       (debug:print 0 *default-log-port* "ERROR: 150 tries to start/connect to server. Giving up.")
;; ;;       (exit 1))
;; ;; 
;; ;;      ;;DOT CASE2 [label="local\nreadonly\nquery"];
;; ;;      ;;DOT MUTEXLOCK -> CASE2; {rank=same "case 2" CASE2}
;; ;;      ;;DOT CASE2 -> "rmt:open-qry-close-locally";
;; ;;      ;; readonly mode, read request-  handle it - case 2
;; ;;      ((and readonly-mode
;; ;;            (member cmd api:read-only-queries)) 
;; ;;       (mutex-unlock! *rmt-mutex*)
;; ;;       (debug:print-info 12 *default-log-port* "rmt:send-receive, case 2")
;; ;;       (rmt:open-qry-close-locally cmd 0 params)
;; ;;       )
;; ;; 
;; ;;      ;;DOT CASE3 [label="write in\nread-only mode"];
;; ;;      ;;DOT MUTEXLOCK -> CASE3 [label="readonly\nmode?"]; {rank=same "case 3" CASE3}
;; ;;      ;;DOT CASE3 -> "#f";
;; ;;      ;; readonly mode, write request.  Do nothing, return #f
;; ;;      (readonly-mode (extras-readonly-mode *rmt-mutex* *default-log-port* cmd params))
;; ;; 
;; ;;      ;; This block was for pre-emptively resetting the connection if there had been no communication for some time.
;; ;;      ;; I don't think it adds any value. If the server is not there, just fail and start a new connection.
;; ;;      ;; also, the expire-time calculation might not be correct. We want, time-since-last-server-access > (server:get-timeout)
;; ;;      ;;
;; ;;      ;;DOT CASE4 [label="reset\nconnection"];
;; ;;      ;;DOT MUTEXLOCK -> CASE4 [label="have connection,\nlast_access > expire_time"]; {rank=same "case 4" CASE4}
;; ;;      ;;DOT CASE4 -> "rmt:send-receive";
;; ;;      ;; reset the connection if it has been unused too long
;; ;;      ((and runremote
;; ;;            (remote-conndat runremote)
;; ;; 	   (> (current-seconds) ;; if it has been more than server-timeout seconds since last contact, close this connection and start a new on
;; ;; 	      (+ (http-transport:server-dat-get-last-access (remote-conndat runremote))
;; ;; 		 (remote-server-timeout runremote))))
;; ;;       (debug:print-info 0 *default-log-port* "Connection to " (remote-server-url runremote) " expired due to no accesses, forcing new connection.")
;; ;;       (http-transport:close-connections area-dat: runremote)
;; ;;       (remote-conndat-set! runremote #f) ;; invalidate the connection, thus forcing a new connection.
;; ;;       (mutex-unlock! *rmt-mutex*)
;; ;;       (rmt:send-receive cmd rid params attemptnum: attemptnum))
;; ;;      
;; ;;      ;;DOT CASE5 [label="local\nread"];
;; ;;      ;;DOT MUTEXLOCK -> CASE5 [label="server not required,\non homehost,\nread-only query"]; {rank=same "case 5" CASE5};
;; ;;      ;;DOT CASE5 -> "rmt:open-qry-close-locally";
;; ;; 
;; ;;      ;; on homehost and this is a read
;; ;;      ((and (not (remote-force-server runremote)) ;; honor forced use of server, i.e. server NOT required
;; ;; 	   (cdr (remote-hh-dat runremote))       ;; on homehost
;; ;;            (member cmd api:read-only-queries))   ;; this is a read
;; ;;       (mutex-unlock! *rmt-mutex*)
;; ;;       (debug:print-info 12 *default-log-port* "rmt:send-receive, case  5")
;; ;;       (rmt:open-qry-close-locally cmd 0 params))
;; ;; 
;; ;;      ;;DOT CASE6 [label="init\nremote"];
;; ;;      ;;DOT MUTEXLOCK -> CASE6 [label="on homehost,\nwrite query,\nhave server,\ncan't reach it"]; {rank=same "case 6" CASE6};
;; ;;      ;;DOT CASE6 -> "rmt:send-receive";
;; ;;      ;; on homehost and this is a write, we already have a server, but server has died
;; ;;      ((and (cdr (remote-hh-dat runremote))           ;; on homehost
;; ;;            (not (member cmd api:read-only-queries))  ;; this is a write
;; ;;            (remote-server-url runremote)             ;; have a server
;; ;;            (not (server:ping (remote-server-url runremote) (remote-server-id runremote))))  ;; server has died. NOTE: this is not a cheap call! Need better approach.
;; ;;       (set! *runremote* (make-and-init-remote))
;; ;;       (let* ((server-info (remote-server-info *runremote*))) 
;; ;;             (if server-info
;; ;; 		(begin
;; ;; 		  (remote-server-url-set! *runremote* (server:record->url server-info))
;; ;;                   (remote-server-id-set! *runremote* (server:record->id server-info)))))
;; ;;       (remote-force-server-set! runremote (common:force-server?))
;; ;;       (mutex-unlock! *rmt-mutex*)
;; ;;       (debug:print-info 12 *default-log-port* "rmt:send-receive, case  6")
;; ;;       (rmt:send-receive cmd rid params attemptnum: attemptnum))
;; ;; 
;; ;;      ;;DOT CASE7 [label="homehost\nwrite"];
;; ;;      ;;DOT MUTEXLOCK -> CASE7 [label="server not required,\non homehost,\na write,\nhave a server"]; {rank=same "case 7" CASE7};
;; ;;      ;;DOT CASE7 -> "rmt:open-qry-close-locally";
;; ;;      ;; on homehost and this is a write, we already have a server
;; ;;      ((and (not (remote-force-server runremote))     ;; honor forced use of server, i.e. server NOT required
;; ;; 	   (cdr (remote-hh-dat runremote))           ;; on homehost
;; ;;            (not (member cmd api:read-only-queries))  ;; this is a write
;; ;;            (remote-server-url runremote))            ;; have a server
;; ;;       (mutex-unlock! *rmt-mutex*)
;; ;;       (debug:print-info 12 *default-log-port* "rmt:send-receive, case  4.1")
;; ;;       (rmt:open-qry-close-locally cmd 0 params))
;; ;; 
;; ;;      ;;DOT CASE8 [label="force\nserver"];
;; ;;      ;;DOT MUTEXLOCK -> CASE8 [label="server not required,\nhave homehost info,\nno connection yet,\nnot a read-only query"]; {rank=same "case 8" CASE8};
;; ;;      ;;DOT CASE8 -> "rmt:open-qry-close-locally";
;; ;;      ;;  on homehost, no server contact made and this is a write, passively start a server 
;; ;;      ((and (not (remote-force-server runremote))     ;; honor forced use of server, i.e. server NOT required
;; ;; 	   (cdr (remote-hh-dat runremote))           ;; have homehost
;; ;;            (not (remote-server-url runremote))       ;; no connection yet
;; ;; 	   (not (member cmd api:read-only-queries))) ;; not a read-only query
;; ;;       (debug:print-info 12 *default-log-port* "rmt:send-receive, case  8")
;; ;;       (let ((server-info  (server:check-if-running *toppath*))) ;; (server:read-dotserver->url *toppath*))) ;; (server:check-if-running *toppath*))) ;; Do NOT want to run server:check-if-running - very expensive to do for every write call
;; ;; 	(if server-info
;; ;; 	    (begin
;; ;;               (remote-server-url-set! runremote (server:record->url server-info)) ;; the string can be consumed by the client setup if needed
;; ;;               (remote-server-id-set! runremote (server:record->id server-info)))  
;; ;; 	    (if (common:force-server?)
;; ;; 		(server:start-and-wait *toppath*)
;; ;; 		(server:kind-run *toppath*)))
;; ;;       (remote-force-server-set! runremote (common:force-server?))
;; ;;       (mutex-unlock! *rmt-mutex*)
;; ;;       (debug:print-info 12 *default-log-port* "rmt:send-receive, case  8.1")
;; ;;       (rmt:open-qry-close-locally cmd 0 params)))
;; ;; 
;; ;;      ;;DOT CASE9 [label="force server\nnot on homehost"];
;; ;;      ;;DOT MUTEXLOCK -> CASE9 [label="no connection\nand either require server\nor not on homehost"]; {rank=same "case 9" CASE9};
;; ;;      ;;DOT CASE9 -> "start\nserver" -> "rmt:send-receive";
;; ;;      ((or (and (remote-force-server runremote)              ;; we are forcing a server and don't yet have a connection to one
;; ;; 	       (not (remote-conndat runremote)))
;; ;; 	  (and (not (cdr (remote-hh-dat runremote)))        ;; not on a homehost 
;; ;; 	       (not (remote-conndat runremote))))           ;; and no connection
;; ;;       (debug:print-info 12 *default-log-port* "rmt:send-receive, case 9, hh-dat: " (remote-hh-dat runremote) " conndat: " (remote-conndat runremote))
;; ;;       (mutex-unlock! *rmt-mutex*)
;; ;;       (if (not (server:check-if-running *toppath*)) ;; who knows, maybe one has started up?
;; ;; 	  (server:start-and-wait *toppath*))
;; ;;       (remote-conndat-set! runremote (rmt:get-connection-info *toppath*)) ;; calls client:setup which calls client:setup-http
;; ;;       (rmt:send-receive cmd rid params attemptnum: attemptnum)) ;; TODO: add back-off timeout as
;; ;; 
;; ;;      ;;DOT CASE10 [label="on homehost"];
;; ;;      ;;DOT MUTEXLOCK -> CASE10 [label="server not required,\non homehost"]; {rank=same "case 10" CASE10};
;; ;;      ;;DOT CASE10 -> "rmt:open-qry-close-locally";
;; ;;      ;; all set up if get this far, dispatch the query
;; ;;      ((and (not (remote-force-server runremote))
;; ;; 	   (cdr (remote-hh-dat runremote))) ;; we are on homehost
;; ;;       (mutex-unlock! *rmt-mutex*)
;; ;;       (debug:print-info 12 *default-log-port* "rmt:send-receive, case 10")
;; ;;       (rmt:open-qry-close-locally cmd (if rid rid 0) params))
;; ;; 
;; ;;      ;;DOT CASE11 [label="send_receive"];
;; ;;      ;;DOT MUTEXLOCK -> CASE11 [label="else"]; {rank=same "case 11" CASE11};
;; ;;      ;;DOT CASE11 -> "rmt:send-receive" [label="call failed"];
;; ;;      ;;DOT CASE11 -> "RESULT" [label="call succeeded"];
;; ;;      ;; not on homehost, do server query
;; ;;      (else (extras-case-11 *default-log-port* runremote cmd params attemptnum rid)))))
;; ;;     ;;DOT }

;; bunch of small functions factored out of send-receive to make debug easier
;;

;; No Title 
;; Error: (vector-ref) out of range
;; #(#<condition: (exn type)> (#("db.scm:3740: regex#regexp" #f #f) #("db.scm:3739: regex#string-substitute" #f #f) #("db.scm:3738: base64#base64-decode" #f #f) #("db.scm:3737: z3#z3:decode-buffer" #f #f) #("db.scm:3736: with-input-from-string" #f #f) #("db.scm:3741: s11n#deserialize" #f #f) #("api.scm:374: api:execute-requests" #f #f) #("api.scm:139: call-with-current-continuation" #f #f) #("api.scm:139: with-exception-handler" #f #f) #("api.scm:139: ##sys#call-with-values" #f #f) #("api.scm:158: string->symbol" #f #f) #("api.scm:160: current-milliseconds" #f #f) #("api.scm:161: dbr:dbstruct-read-only" #f #f) #("api.scm:139: k15" #f #f) #("api.scm:139: g19" #f #f) #("api.scm:142: get-call-chain" #f #f)) #("get-test-info-by-id" (1102 507299)))
;; 6
;; 
;; 	Call history:
;; 
;; 	http-transport.scm:306: thread-terminate!	  
;; 	http-transport.scm:307: debug:print-info	  
;; 	common_records.scm:235: debug:debug-mode	  
;; 	rmt.scm:259: k587	  
;; 	rmt.scm:259: g591	  
;; 	rmt.scm:276: http-transport:server-dat-update-last-access	  
;; 	http-transport.scm:364: current-seconds	  
;; 	rmt.scm:282: debug:print-info	  
;; 	common_records.scm:235: debug:debug-mode	  
;; 	rmt.scm:283: mutex-unlock!	  
;; 	rmt.scm:287: extras-transport-succeded	  	<--
;; +-----------------------------------------------------------------------------+
;; | Exit Status    : 70  
;;  

(define (extras-case-11 *default-log-port* runremote cmd params attemptnum rid)
  ;; (mutex-unlock! *rmt-mutex*)
  (debug:print-info 12 *default-log-port* "rmt:send-receive, case  9")
  ;; (mutex-lock! *rmt-mutex*)
  (let* ((conninfo (remote-conndat runremote))
	 (dat-in   (condition-case ;; handling here has
		    ;; caused a lot of
		    ;; problems. However it
		    ;; is needed to deal with
		    ;; attemtped
		    ;; communication to
		    ;; servers that have gone
		    ;; away
		    (http-transport:send-receive 0 conninfo cmd params)
		    ((servermismatch)  (vector #f "Server id mismatch" ))
		    ((commfail)(vector #f "communications fail"))
		    ((exn)(vector #f "other fail" (print-call-chain)))))
	 (dat      (if (and (vector? dat-in) ;; ... check it is a correct size
			    (> (vector-length dat-in) 1))
		       dat-in
		       (vector #f (conc "communications fail (type 2), dat-in=" dat-in))))
	 (success  (if (vector? dat) (vector-ref dat 0) #f))
	 (res      (if (vector? dat) (vector-ref dat 1) #f)))
    (if (and (vector? conninfo) (< 5 (vector-length conninfo)))
	(http-transport:server-dat-update-last-access conninfo) ;; refresh access time
	(begin
	  (debug:print 0 *default-log-port* "INFO: Should not get here! conninfo=" conninfo)
	  (set! conninfo #f)
	  (remote-conndat-set! *runremote* #f) ;; NOTE: *runremote* is global copy of runremote. Purpose: factor out global.
	  (http-transport:close-connections  area-dat: runremote)))
    (debug:print-info 13 *default-log-port* "rmt:send-receive, case  9. conninfo=" conninfo " dat=" dat " runremote = " runremote)
    (mutex-unlock! *rmt-mutex*)
    (if success ;; success only tells us that the transport was
	;; successful, have to examine the data to see if
	;; there was a detected issue at the other end
	(extras-transport-succeded *default-log-port* *rmt-mutex* attemptnum runremote res params rid cmd)
	(begin
           (debug:print-error 0 *default-log-port* " dat=" dat) 
           (extras-transport-failed *default-log-port* *rmt-mutex* attemptnum runremote cmd rid params))
	)))

(define (rmt:print-db-stats)
  (let ((fmtstr "~40a~7-d~9-d~20,2-f")) ;; "~20,2-f"
    (debug:print 18 *default-log-port* "DB Stats\n========")
    (debug:print 18 *default-log-port* (format #f "~40a~8a~10a~10a" "Cmd" "Count" "TotTime" "Avg"))
    (for-each (lambda (cmd)
		(let ((cmd-dat (hash-table-ref *db-stats* cmd)))
		  (debug:print 18 *default-log-port* (format #f fmtstr cmd (vector-ref cmd-dat 0) (vector-ref cmd-dat 1) (/ (vector-ref cmd-dat 1)(vector-ref cmd-dat 0))))))







>
>

|
>
>
|
>
|
<
>
>
>
|
|


<
>












<
<
<
<
|
<
>
|
|
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
|
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<







212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227

228
229
230
231
232
233
234

235
236
237
238
239
240
241
242
243
244
245
246
247




248

249
250
251









































































































































































































252


































































253
254
255
256
257
258
259
				hostport: srv-addr
				lastmsg: (current-seconds)
				expires: (+ (current-seconds) 60) ;; this needs to be gathered during the ping
				))
	      (start-main-srv)))
	(start-main-srv))))

;; NB// remote is a rmt:remote struct
;;
(define (rmt:general-open-connection remote apath dbname)
  (let  ((mainconn (rmt:get-connection remote apath (db:run-id->dbname #f))))
    (if (not mainconn)
	(begin
	  (rmt:open-main-connection remote apath)
	  (thread-sleep! 1)
	  (rmt:general-open-connection remote apath dbname))

	;; we have a connection to main, ask for contact info for dbname
	(let* ((res (http-transport:send-receive mainconn "x" 'get-server `(,apath ,dbname))))
	  (print "rmt:general-open-connection got res="res)))))
	  

;;======================================================================


;; Defaults to 
;;
(define (rmt:send-receive cmd rid params #!key (attemptnum 1)(area-dat #f))
  (if (not *rmt:remote*)(set! *rmt:remote* (make-rmt:remote)))
  (let* ((apath *toppath*)
	 (conns *rmt:remote*)
	 (dbname (db:run-id->dbname rid)))
    (rmt:send-receive-real conns apath dbname rid params)))

;; db is at apath/.db/dbname, rid is an intermediary solution and will be removed
;; sometime in the future
;;
(define (rmt:send-receive-real remote apath dbname rid params)




  (let* ((conn (rmt:get-connection remote apath dbname)))

    (assert conn "FATAL: Unable to connect to db "apath"/"dbname)
    #t ;; here we do the actual connection work
    ))









































































































































































































    


































































(define (rmt:print-db-stats)
  (let ((fmtstr "~40a~7-d~9-d~20,2-f")) ;; "~20,2-f"
    (debug:print 18 *default-log-port* "DB Stats\n========")
    (debug:print 18 *default-log-port* (format #f "~40a~8a~10a~10a" "Cmd" "Count" "TotTime" "Avg"))
    (for-each (lambda (cmd)
		(let ((cmd-dat (hash-table-ref *db-stats* cmd)))
		  (debug:print 18 *default-log-port* (format #f fmtstr cmd (vector-ref cmd-dat 0) (vector-ref cmd-dat 1) (/ (vector-ref cmd-dat 1)(vector-ref cmd-dat 0))))))
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
			     (if (> tot 10)
				 (cons newmax-cmd currmax)
				 (cons 'none 0))
			     (loop (car tal)(cdr tal) newmax-cmd currmax)))))))
    (mutex-unlock! *db-stats-mutex*)
    res))

(define (rmt:open-qry-close-locally cmd run-id params #!key (remretries 5))
  (let* ((qry-is-write   (not (member cmd api:read-only-queries)))
	 (db-file-path   (common:get-db-tmp-area)) ;;  0))
	 (dbstruct-local (db:setup #t))  ;; make-dbr:dbstruct path:  dbdir local: #t)))
	 (read-only      (not (file-writable? db-file-path)))
	 (start          (current-milliseconds))
	 (resdat         (if (not (and read-only qry-is-write))
			     (let ((v (api:execute-requests dbstruct-local (vector (symbol->string cmd) params))))
			       (handle-exceptions ;; there has been a long history of receiving strange errors from values returned by the client when things go wrong..
				exn               ;;  This is an attempt to detect that situation and recover gracefully
				(begin
				  (debug:print 0 *default-log-port* "ERROR: bad data from server " v " message: "  ((condition-property-accessor 'exn 'message) exn) ", exn=" exn)
				  (vector #t '())) ;; should always get a vector but if something goes wrong return a dummy
				(if (and (vector? v)
					 (> (vector-length v) 1))
				    (let ((newvec (vector (vector-ref v 0)(vector-ref v 1))))
				      newvec)           ;; by copying the vector while inside the error handler we should force the detection of a corrupted record
				    (vector #t '()))))  ;; we could also check that the returned types are valid
			     (vector #t '())))
	 (success        (vector-ref resdat 0))
	 (res            (vector-ref resdat 1))
	 (duration       (- (current-milliseconds) start)))
    (if (and read-only qry-is-write)
        (debug:print 0 *default-log-port* "ERROR: attempt to write to read-only database ignored. cmd=" cmd))
    (if (not success)
	(if (> remretries 0)
	    (begin
	      (debug:print-error 0 *default-log-port* "local query failed. Trying again.")
	      (thread-sleep! (/ (pseudo-random-integer 5000) 1000)) ;; some random delay 
	      (rmt:open-qry-close-locally cmd run-id params remretries: (- remretries 1)))
	    (begin
	      (debug:print-error 0 *default-log-port* "too many retries in rmt:open-qry-close-locally, giving up")
	      #f))
	(begin
	  ;; (rmt:update-db-stats run-id cmd params duration)
	  ;; mark this run as dirty if this was a write, the watchdog is responsible for syncing it
	  (if qry-is-write
	      (let ((start-time (current-seconds)))
		(mutex-lock! *db-multi-sync-mutex*)
		(set! *db-last-access* start-time)  ;; THIS IS PROBABLY USELESS? (we are on a client)
                (mutex-unlock! *db-multi-sync-mutex*)))))
    res))

;;======================================================================
;;
;; A C T U A L   A P I   C A L L S  
;;
;;======================================================================








<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<







283
284
285
286
287
288
289










































290
291
292
293
294
295
296
			     (if (> tot 10)
				 (cons newmax-cmd currmax)
				 (cons 'none 0))
			     (loop (car tal)(cdr tal) newmax-cmd currmax)))))))
    (mutex-unlock! *db-stats-mutex*)
    res))












































;;======================================================================
;;
;; A C T U A L   A P I   C A L L S  
;;
;;======================================================================

1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200

(define (rmt:test-set-archive-block-id run-id test-id archive-block-id)
  (rmt:send-receive 'test-set-archive-block-id run-id (list run-id test-id archive-block-id)))

(define (rmt:test-get-archive-block-info archive-block-id)
  (rmt:send-receive 'test-get-archive-block-info #f (list archive-block-id)))


(define (rmtmod:calc-ro-mode runremote *toppath*)
  (if (and runremote
	   (remote-ro-mode-checked runremote))
      (remote-ro-mode runremote)
      (let* ((dbfile  (conc *toppath* "/megatest.db"))
	     (ro-mode (not (file-writable? dbfile)))) ;; TODO: use dbstruct or runremote to figure this out in future
	(if runremote
	    (begin
	      (remote-ro-mode-set! runremote ro-mode)
	      (remote-ro-mode-checked-set! runremote #t)
	      ro-mode)
	    ro-mode))))

(define (extras-readonly-mode rmt-mutex log-port cmd params)
  (mutex-unlock! rmt-mutex)
  (debug:print-info 12 log-port "rmt:send-receive, case 3")
  (debug:print 0 log-port "WARNING: write transaction requested on a readonly area.  cmd="cmd" params="params)
  #f)

(define (extras-transport-failed *default-log-port* *rmt-mutex* attemptnum runremote cmd rid params)
  (debug:print 0 *default-log-port* "WARNING: communication failed. Trying again, try num: " attemptnum)
  (mutex-lock! *rmt-mutex*)
  (remote-conndat-set!    runremote #f)
  (http-transport:close-connections area-dat: runremote)
  (remote-server-url-set! runremote #f)
  (mutex-unlock! *rmt-mutex*)
  (debug:print-info 12 *default-log-port* "rmt:send-receive, case  9.1")
  (rmt:send-receive cmd rid params attemptnum: (+ attemptnum 1)))
  
(define (extras-transport-succeded *default-log-port* *rmt-mutex* attemptnum runremote res params rid cmd)
  (if (and (vector? res)
	   (eq? (vector-length res) 2)
	   (eq? (vector-ref res 1) 'overloaded)) ;; since we are
						 ;; looking at the
						 ;; data to carry the
						 ;; error we'll use a
						 ;; fairly obtuse
						 ;; combo to minimise
						 ;; the chances of
						 ;; some sort of
						 ;; collision.  this
						 ;; is the case where
						 ;; the returned data
						 ;; is bad or the
						 ;; server is
						 ;; overloaded and we
						 ;; want to ease off
						 ;; the queries
      (let ((wait-delay (+ attemptnum (* attemptnum 10))))
	(debug:print 0 *default-log-port* "WARNING: server is overloaded. Delaying " wait-delay " seconds and trying call again.")
	(mutex-lock! *rmt-mutex*)
	(http-transport:close-connections area-dat: runremote)
	(set! *runremote* #f) ;; force starting over
	(mutex-unlock! *rmt-mutex*)
	(thread-sleep! wait-delay)
	(rmt:send-receive cmd rid params attemptnum: (+ attemptnum 1)))
      res)) ;; All good, return res

#;(set-functions rmt:send-receive                       remote-server-url-set!
	       http-transport:close-connections	      remote-conndat-set!
	       debug:print                            debug:print-info
	       remote-ro-mode                         remote-ro-mode-set!
	       remote-ro-mode-checked-set!            remote-ro-mode-checked)

;; gets mtpg-run-id and syncs the record if different
;;
(define (tasks:run-id->mtpg-run-id dbh cached-info run-id area-info smallest-last-update-time)
  (let* ((runs-ht (hash-table-ref cached-info 'runs))
	 (runinf  (hash-table-ref/default runs-ht run-id #f))
         (area-id (vector-ref area-info 0)))
       (if runinf







<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<







824
825
826
827
828
829
830

































































831
832
833
834
835
836
837

(define (rmt:test-set-archive-block-id run-id test-id archive-block-id)
  (rmt:send-receive 'test-set-archive-block-id run-id (list run-id test-id archive-block-id)))

(define (rmt:test-get-archive-block-info archive-block-id)
  (rmt:send-receive 'test-get-archive-block-info #f (list archive-block-id)))


































































;; gets mtpg-run-id and syncs the record if different
;;
(define (tasks:run-id->mtpg-run-id dbh cached-info run-id area-info smallest-last-update-time)
  (let* ((runs-ht (hash-table-ref cached-info 'runs))
	 (runinf  (hash-table-ref/default runs-ht run-id #f))
         (area-id (vector-ref area-info 0)))
       (if runinf
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
				    (delete-file* pkt-file)
				    (if (and dbfile
					     (string-match ".*/main.db$" dbfile))
					(begin
					  (debug:print-info 0 *default-log-port* "Releasing lock for "dbfile)
					  (db:with-lock-db (servdat-dbfile *server-info*)
							   (lambda (dbh dbfile)
							     (db:release-lock dbh)))))))
			      (if (bdat-task-db *bdat*)    ;; TODO: Check that this is correct for task db
				  (let ((db (cdr (bdat-task-db *bdat*))))
				    (if (sqlite3:database? db)
					(begin
					  (sqlite3:interrupt! db)
					  (sqlite3:finalize! db #t)
					  (bdat-task-db-set! *bdat* #f)))))







|







1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
				    (delete-file* pkt-file)
				    (if (and dbfile
					     (string-match ".*/main.db$" dbfile))
					(begin
					  (debug:print-info 0 *default-log-port* "Releasing lock for "dbfile)
					  (db:with-lock-db (servdat-dbfile *server-info*)
							   (lambda (dbh dbfile)
							     (db:release-lock dbh dbfile)))))))
			      (if (bdat-task-db *bdat*)    ;; TODO: Check that this is correct for task db
				  (let ((db (cdr (bdat-task-db *bdat*))))
				    (if (sqlite3:database? db)
					(begin
					  (sqlite3:interrupt! db)
					  (sqlite3:finalize! db #t)
					  (bdat-task-db-set! *bdat* #f)))))
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
	  (let ((num-ok (length (server:get-best (server:get-list areapath)))))
	    (if (and (> try-num 0)  ;; first time through simply wait a little while then try again
		     (< num-ok 1))  ;; if there are no decent candidates for servers then try starting a new one
		(server:kind-run areapath))
	    (thread-sleep! 5)
	    (loop (server:check-if-running areapath)
		  (+ try-num 1)))))))

(define (make-and-init-remote)
  (make-remote ;; hh-dat:      (common:get-homehost)
	       server-info:  (if *toppath* (server:check-if-running *toppath*) #f)
	       server-timeout: (server:expiration-timeout)))




;; called in megatest.scm, host-port is string hostname:port
;;
;; NOTE: This is NOT called directly from clients as not all transports support a client running
;;       in the same process as the server.
;;
(define (server:ping host port server-id #!key (do-exit #f))







<
<
<
<
<
<
<
<







1507
1508
1509
1510
1511
1512
1513








1514
1515
1516
1517
1518
1519
1520
	  (let ((num-ok (length (server:get-best (server:get-list areapath)))))
	    (if (and (> try-num 0)  ;; first time through simply wait a little while then try again
		     (< num-ok 1))  ;; if there are no decent candidates for servers then try starting a new one
		(server:kind-run areapath))
	    (thread-sleep! 5)
	    (loop (server:check-if-running areapath)
		  (+ try-num 1)))))))









;; called in megatest.scm, host-port is string hostname:port
;;
;; NOTE: This is NOT called directly from clients as not all transports support a client running
;;       in the same process as the server.
;;
(define (server:ping host port server-id #!key (do-exit #f))