Megatest

Check-in [ff60788828]
Login
Overview
Comment:Limit backoff delay, check that param is a condition
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | v1.80
Files: files | file ages | folders
SHA1: ff6078882826dee93922de63033e6794de14a609
User & Date: matt on 2023-04-12 08:47:36
Other Links: branch diff | manifest | tags
Context
2023-04-12
10:19
merged fork check-in: 5d3a86162e user: mmgraham tags: v1.80
08:47
Limit backoff delay, check that param is a condition check-in: ff60788828 user: matt tags: v1.80
06:59
Fix random issue with directory creation when parallel startup causes collisions check-in: 07ee6da7ba user: matt tags: v1.80
Changes

Modified tcp-transportmod.scm from [fb31b50fe1] to [1211c35eaf].

240
241
242
243
244
245
246
247

248
249
250
251
252
253
254
240
241
242
243
244
245
246

247
248
249
250
251
252
253
254







-
+







		result)))
	    (else ;; did not receive properly formated result
	     (if (not res) ;; tt:handler is telling us that communication failed
		 (let* ((host    (tt-conn-host conn))
			(port    (tt-conn-port conn))
			;; (dbfname (tt-conn-port conn)) ;; 192.168.0.127:4242-726924:4.db
			(pid     (tt-conn-pid  conn))
			(servinf (conc areapath"/.servinfo/"host":"port"-"pid":"dbfname)))
			(servinf (tt-servinf-file ttdat))) ;; (conc areapath"/.servinfo/"host":"port"-"pid":"dbfname))) ;; TODO, use (server:get-servinfo-dir areapath)
		   (hash-table-set! (tt-conns ttdat) dbfname #f)
		   (if (file-exists? servinf)
		       (begin
			 (if (< attemptnum 3)
			     (begin
			       (thread-sleep! 0.25)
			       (tt:handler ttdat cmd run-id params (+ attemptnum 1) area-dat areapath readonly-mode dbfname testsuite mtexe))
263
264
265
266
267
268
269

270
271
272
273
274

275
276
277
278
279
280
281
263
264
265
266
267
268
269
270
271
272
273
274

275
276
277
278
279
280
281
282







+




-
+







				       #f
				       (delete-file* servinf))
				     (tt:handler ttdat cmd run-id params (+ attemptnum 1) area-dat areapath readonly-mode dbfname testsuite mtexe))
				   (begin
				     ;; start server - addressed in client-connect-to-server
				     ;; delay        - addressed in client-connect-to-server
				     ;; try again
				     (thread-sleep! 0.25) ;; dunno, I think this needs to be here
				     (tt:handler ttdat cmd run-id params (+ attemptnum 1) area-dat areapath readonly-mode dbfname testsuite mtexe))
				   ))))
		       (begin ;; no server file, delay and try again
			 (debug:print 0 *default-log-port* "INFO: connection to server "host":"port" broken for "dbfname", but do not see servinf file "servinf)
			 (thread-sleep! 1)
			 (thread-sleep! 0.5)
			 (tt:handler ttdat cmd run-id params (+ attemptnum 1) area-dat areapath readonly-mode dbfname testsuite mtexe))))
		 (begin ;; this case is where res is malformed. Probably should abort
		   (assert #f "FATAL: tt:handler received bad data "res)
		   ;; (debug:print 0 *default-log-port* "INFO: got corrupt data from server "host":"port", "res", for "dbfname", will try again.")
		   ;; (tt:handler ttdat cmd run-id params (+ attemptnum 1) area-dat areapath readonly-mode dbfname testsuite mtexe)
		   )))))
	(begin
372
373
374
375
376
377
378


379
380
381
382
383
384
385








386
387
388
389
390
391
392
373
374
375
376
377
378
379
380
381







382
383
384
385
386
387
388
389
390
391
392
393
394
395
396







+
+
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+








(define (tt:send-receive-direct host port dat #!key (ping-mode #f)(tries-remaining 25))
  (assert (number? port) "FATAL: tt:send-receive-direct called with port not a number "port)
  (tt:backoff-decr-and-wait host port)
  (let* ((retry          (lambda ()
			   (tt:send-receive-direct host port dat tries-remaining: (- tries-remaining 1))))
	 (full-err-print (lambda (exn msg)
			   (if (condition? exn)
			       (begin
			   (pp (condition->list exn) *default-log-port*)
			   (pp dat *default-log-port*)
			   (debug:print 0 *default-log-port* msg
					", error: "     ((condition-property-accessor 'exn 'message)   exn)
					", arguments: " ((condition-property-accessor 'exn 'arguments) exn)
					", location: "  ((condition-property-accessor 'exn 'location)  exn)
					))))
				 (pp (condition->list exn) *default-log-port*)
				 (pp dat *default-log-port*)
				 (debug:print 0 *default-log-port* msg
					      ", error: "     ((condition-property-accessor 'exn 'message)   exn)
					      ", arguments: " ((condition-property-accessor 'exn 'arguments) exn)
					      ", location: "  ((condition-property-accessor 'exn 'location)  exn)
					      ))
			       (debug:print 0 *default-log-port* msg "(note: exn="exn", is not a condition object.")))))
    (condition-case
     (let-values (((inp oup)(tcp-connect host port)))
       (let ((res (if (and inp oup)
		      (begin
			(serialize dat oup)
			(close-output-port oup)
			(deserialize inp))
410
411
412
413
414
415
416
417

418
419
420
421
422
423
424
414
415
416
417
418
419
420

421
422
423
424
425
426
427
428







-
+







	  (if ping-mode
	      #f
	      (cond
	       ((>  tries-remaining 4) ;; server likely defunct
		(tt:backoff-incr host port)
		#f)
	       ((>= tries-remaining 0)
		(let* ((backoff-delay (* (- 26 tries-remaining) 0.1)))
		(let* ((backoff-delay (max (* (- 26 tries-remaining) 0.1) 1.0)))
		  (debug:print 0 *default-log-port* "WARNING: TCP overload, trying again in "backoff-delay"s.")
		  (thread-sleep! backoff-delay)
		  (tt:backoff-incr host port)
		  (retry))
		(assert #f "FATAL: Too many retries in tt:send-receive-direct"))
	       (else #f))))
     (exn ()