Megatest

Check-in [ff60788828]
Login
Overview
Comment:Limit backoff delay, check that param is a condition
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | v1.80
Files: files | file ages | folders
SHA1: ff6078882826dee93922de63033e6794de14a609
User & Date: matt on 2023-04-12 08:47:36
Other Links: branch diff | manifest | tags
Context
2023-04-12
10:19
merged fork check-in: 5d3a86162e user: mmgraham tags: v1.80
08:47
Limit backoff delay, check that param is a condition check-in: ff60788828 user: matt tags: v1.80
06:59
Fix random issue with directory creation when parallel startup causes collisions check-in: 07ee6da7ba user: matt tags: v1.80
Changes

Modified tcp-transportmod.scm from [fb31b50fe1] to [1211c35eaf].

240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
		result)))
	    (else ;; did not receive properly formated result
	     (if (not res) ;; tt:handler is telling us that communication failed
		 (let* ((host    (tt-conn-host conn))
			(port    (tt-conn-port conn))
			;; (dbfname (tt-conn-port conn)) ;; 192.168.0.127:4242-726924:4.db
			(pid     (tt-conn-pid  conn))
			(servinf (conc areapath"/.servinfo/"host":"port"-"pid":"dbfname)))
		   (hash-table-set! (tt-conns ttdat) dbfname #f)
		   (if (file-exists? servinf)
		       (begin
			 (if (< attemptnum 3)
			     (begin
			       (thread-sleep! 0.25)
			       (tt:handler ttdat cmd run-id params (+ attemptnum 1) area-dat areapath readonly-mode dbfname testsuite mtexe))







|







240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
		result)))
	    (else ;; did not receive properly formated result
	     (if (not res) ;; tt:handler is telling us that communication failed
		 (let* ((host    (tt-conn-host conn))
			(port    (tt-conn-port conn))
			;; (dbfname (tt-conn-port conn)) ;; 192.168.0.127:4242-726924:4.db
			(pid     (tt-conn-pid  conn))
			(servinf (tt-servinf-file ttdat))) ;; (conc areapath"/.servinfo/"host":"port"-"pid":"dbfname))) ;; TODO, use (server:get-servinfo-dir areapath)
		   (hash-table-set! (tt-conns ttdat) dbfname #f)
		   (if (file-exists? servinf)
		       (begin
			 (if (< attemptnum 3)
			     (begin
			       (thread-sleep! 0.25)
			       (tt:handler ttdat cmd run-id params (+ attemptnum 1) area-dat areapath readonly-mode dbfname testsuite mtexe))
263
264
265
266
267
268
269

270
271
272
273
274
275
276
277
278
279
280
281
				       #f
				       (delete-file* servinf))
				     (tt:handler ttdat cmd run-id params (+ attemptnum 1) area-dat areapath readonly-mode dbfname testsuite mtexe))
				   (begin
				     ;; start server - addressed in client-connect-to-server
				     ;; delay        - addressed in client-connect-to-server
				     ;; try again

				     (tt:handler ttdat cmd run-id params (+ attemptnum 1) area-dat areapath readonly-mode dbfname testsuite mtexe))
				   ))))
		       (begin ;; no server file, delay and try again
			 (debug:print 0 *default-log-port* "INFO: connection to server "host":"port" broken for "dbfname", but do not see servinf file "servinf)
			 (thread-sleep! 1)
			 (tt:handler ttdat cmd run-id params (+ attemptnum 1) area-dat areapath readonly-mode dbfname testsuite mtexe))))
		 (begin ;; this case is where res is malformed. Probably should abort
		   (assert #f "FATAL: tt:handler received bad data "res)
		   ;; (debug:print 0 *default-log-port* "INFO: got corrupt data from server "host":"port", "res", for "dbfname", will try again.")
		   ;; (tt:handler ttdat cmd run-id params (+ attemptnum 1) area-dat areapath readonly-mode dbfname testsuite mtexe)
		   )))))
	(begin







>




|







263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
				       #f
				       (delete-file* servinf))
				     (tt:handler ttdat cmd run-id params (+ attemptnum 1) area-dat areapath readonly-mode dbfname testsuite mtexe))
				   (begin
				     ;; start server - addressed in client-connect-to-server
				     ;; delay        - addressed in client-connect-to-server
				     ;; try again
				     (thread-sleep! 0.25) ;; dunno, I think this needs to be here
				     (tt:handler ttdat cmd run-id params (+ attemptnum 1) area-dat areapath readonly-mode dbfname testsuite mtexe))
				   ))))
		       (begin ;; no server file, delay and try again
			 (debug:print 0 *default-log-port* "INFO: connection to server "host":"port" broken for "dbfname", but do not see servinf file "servinf)
			 (thread-sleep! 0.5)
			 (tt:handler ttdat cmd run-id params (+ attemptnum 1) area-dat areapath readonly-mode dbfname testsuite mtexe))))
		 (begin ;; this case is where res is malformed. Probably should abort
		   (assert #f "FATAL: tt:handler received bad data "res)
		   ;; (debug:print 0 *default-log-port* "INFO: got corrupt data from server "host":"port", "res", for "dbfname", will try again.")
		   ;; (tt:handler ttdat cmd run-id params (+ attemptnum 1) area-dat areapath readonly-mode dbfname testsuite mtexe)
		   )))))
	(begin
372
373
374
375
376
377
378


379
380
381
382
383
384
385

386
387
388
389
390
391
392

(define (tt:send-receive-direct host port dat #!key (ping-mode #f)(tries-remaining 25))
  (assert (number? port) "FATAL: tt:send-receive-direct called with port not a number "port)
  (tt:backoff-decr-and-wait host port)
  (let* ((retry          (lambda ()
			   (tt:send-receive-direct host port dat tries-remaining: (- tries-remaining 1))))
	 (full-err-print (lambda (exn msg)


			   (pp (condition->list exn) *default-log-port*)
			   (pp dat *default-log-port*)
			   (debug:print 0 *default-log-port* msg
					", error: "     ((condition-property-accessor 'exn 'message)   exn)
					", arguments: " ((condition-property-accessor 'exn 'arguments) exn)
					", location: "  ((condition-property-accessor 'exn 'location)  exn)
					))))

    (condition-case
     (let-values (((inp oup)(tcp-connect host port)))
       (let ((res (if (and inp oup)
		      (begin
			(serialize dat oup)
			(close-output-port oup)
			(deserialize inp))







>
>
|
|
|
|
|
|
|
>







373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396

(define (tt:send-receive-direct host port dat #!key (ping-mode #f)(tries-remaining 25))
  (assert (number? port) "FATAL: tt:send-receive-direct called with port not a number "port)
  (tt:backoff-decr-and-wait host port)
  (let* ((retry          (lambda ()
			   (tt:send-receive-direct host port dat tries-remaining: (- tries-remaining 1))))
	 (full-err-print (lambda (exn msg)
			   (if (condition? exn)
			       (begin
				 (pp (condition->list exn) *default-log-port*)
				 (pp dat *default-log-port*)
				 (debug:print 0 *default-log-port* msg
					      ", error: "     ((condition-property-accessor 'exn 'message)   exn)
					      ", arguments: " ((condition-property-accessor 'exn 'arguments) exn)
					      ", location: "  ((condition-property-accessor 'exn 'location)  exn)
					      ))
			       (debug:print 0 *default-log-port* msg "(note: exn="exn", is not a condition object.")))))
    (condition-case
     (let-values (((inp oup)(tcp-connect host port)))
       (let ((res (if (and inp oup)
		      (begin
			(serialize dat oup)
			(close-output-port oup)
			(deserialize inp))
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
	  (if ping-mode
	      #f
	      (cond
	       ((>  tries-remaining 4) ;; server likely defunct
		(tt:backoff-incr host port)
		#f)
	       ((>= tries-remaining 0)
		(let* ((backoff-delay (* (- 26 tries-remaining) 0.1)))
		  (debug:print 0 *default-log-port* "WARNING: TCP overload, trying again in "backoff-delay"s.")
		  (thread-sleep! backoff-delay)
		  (tt:backoff-incr host port)
		  (retry))
		(assert #f "FATAL: Too many retries in tt:send-receive-direct"))
	       (else #f))))
     (exn ()







|







414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
	  (if ping-mode
	      #f
	      (cond
	       ((>  tries-remaining 4) ;; server likely defunct
		(tt:backoff-incr host port)
		#f)
	       ((>= tries-remaining 0)
		(let* ((backoff-delay (max (* (- 26 tries-remaining) 0.1) 1.0)))
		  (debug:print 0 *default-log-port* "WARNING: TCP overload, trying again in "backoff-delay"s.")
		  (thread-sleep! backoff-delay)
		  (tt:backoff-incr host port)
		  (retry))
		(assert #f "FATAL: Too many retries in tt:send-receive-direct"))
	       (else #f))))
     (exn ()