Megatest

Check-in [4860a4e6aa]
Login
Overview
Comment:Clean up uses of tcp. tcp6 did not seem to work. Increased tcp backlog (didn't seem to help) and improved backoff
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | v1.80
Files: files | file ages | folders
SHA1: 4860a4e6aa32078091539c9cb3a2d212283f635e
User & Date: matt on 2023-04-09 14:45:02
Other Links: branch diff | manifest | tags
Context
2023-04-09
21:07
Use run-id in queries looking for test info. Can't assume test-ids are unique. Added separate exception handler for serialization to help with debug (still causes grief when hit but at least you can find the issue but looking in server logs). check-in: ea060a034b user: matt tags: v1.80
14:45
Clean up uses of tcp. tcp6 did not seem to work. Increased tcp backlog (didn't seem to help) and improved backoff check-in: 4860a4e6aa user: matt tags: v1.80
2023-04-08
11:48
Added better fallback on communication failure. check-in: 7dcbd017e3 user: matt tags: v1.80
Changes

Modified db.scm from [a9fc779d25] to [cf4037913b].

37
38
39
40
41
42
43
44

45
46
47
48
49
50
51
37
38
39
40
41
42
43

44
45
46
47
48
49
50
51







-
+







(declare (uses rmtmod))

(import commonmod
	(prefix mtargs args:))

(use (srfi 18)
     extras
     tcp
     ;; tcp
     stack
     (prefix sqlite3 sqlite3:)
     srfi-1
     posix
     regex
     regex-case
     srfi-69

Modified megatest.scm from [1aad76e09c] to [6f4fa87a54].

81
82
83
84
85
86
87
88
89
90



91
92
93
94
95
96
97
81
82
83
84
85
86
87



88
89
90
91
92
93
94
95
96
97







-
-
-
+
+
+








(include "common_records.scm")
(include "key_records.scm")
(include "db_records.scm")
(include "run_records.scm")
(include "megatest-fossil-hash.scm")

(use (prefix sqlite3 sqlite3:) srfi-1 posix regex regex-case srfi-69 (prefix base64 base64:)
     readline apropos json http-client directory-utils typed-records
     http-client srfi-18 extras format tcp-server tcp)
(use (prefix sqlite3 sqlite3:) srfi-1 posix regex regex-case srfi-69 (prefix base64 base64:))
(use readline apropos json http-client directory-utils typed-records)
(use http-client srfi-18 extras format tcp-server tcp)

;; Added for csv stuff - will be removed
;;
(use sparse-vectors)

(require-library mutils)

Modified portlogger.scm from [0097927637] to [3334cefb6f].

23
24
25
26
27
28
29
30

31
32
33
34
35
36
37
23
24
25
26
27
28
29

30
31
32
33
34
35
36
37







-
+







(declare (uses dbmod))

(module portlogger
*

(import scheme chicken data-structures)
(import srfi-1 posix srfi-69 hostinfo dot-locking z3
	(srfi 18) extras tcp s11n)
	(srfi 18) extras s11n)
(import (prefix sqlite3 sqlite3:))
(import debugprint dbmod)
;; lsof -i

(define (portlogger:open-db fname)
  (let* ((avail    (tasks:wait-on-journal fname 5 remove: #t)) ;; wait up to about 10 seconds for the journal to go away
	 (exists   (file-exists? fname))

Modified server.scm from [a78488d9e1] to [39953c681c].

25
26
27
28
29
30
31
32

33
34
35


36
37
38
39
40
41
42
43
25
26
27
28
29
30
31

32



33
34

35
36
37
38
39
40
41







-
+
-
-
-
+
+
-







(declare (uses tasks)) ;; tasks are where stuff is maintained about what is running.
;; (declare (uses synchash))
;;(declare (uses rpc-transport))
(declare (uses launch))
;; (declare (uses daemon))
(declare (uses mtargs))

(require-extension (srfi 18) extras tcp s11n)
(use (srfi 18) extras s11n)

(use srfi-1 posix regex regex-case srfi-69 hostinfo md5 message-digest
     directory-utils posix-extras matchable utils)
(use srfi-1 posix regex regex-case srfi-69 hostinfo md5 message-digest)
(use directory-utils posix-extras matchable utils)

(use spiffy uri-common intarweb http-client spiffy-request-vars)

(import commonmod
	debugprint
	(prefix mtargs args:))

(include "common_records.scm")

Modified tcp-transportmod.scm from [32789e6d1a] to [574d8a43e3].

21
22
23
24
25
26
27
28

29
30
31
32
33
34
35
21
22
23
24
25
26
27

28
29
30
31
32
33
34
35







-
+







(declare (unit tcp-transportmod))
(declare (uses debugprint))
(declare (uses commonmod))
(declare (uses dbfile))
(declare (uses dbmod))
(declare (uses portlogger))

(use address-info)
(use address-info tcp)

(module tcp-transportmod
	*
	
  (import scheme
	  (prefix sqlite3 sqlite3:)
	  chicken
180
181
182
183
184
185
186
187

188
189
190
191
192
193
194
180
181
182
183
184
185
186

187
188
189
190
191
192
193
194







-
+







		   (debug:print-info 0 *default-log-port* "No server found. Starting one for run-id "run-id" in dbfile "dbfname)
		   (server-start-proc)
		   (tt-last-serv-start-set! ttdat (current-seconds))))
	     (thread-sleep! 1)
	     (tt:client-connect-to-server ttdat dbfname run-id testsuite)))))))

(define (tt:ping host port server-id #!optional (tries-left 5))
  (let*  ((res      (tt:send-receive-direct host port `(ping #f #f #f))) ;; please send me your server-id
  (let*  ((res      (tt:send-receive-direct host port `(ping #f #f #f) ping-mode: #t)) ;; please send me your server-id
	  (try-again (lambda ()
		       (if (> tries-left 0)
			   (begin
			     (thread-sleep! 1)
			     (tt:ping host port server-id (- tries-left 1)))
			   #f))))
    ;;
321
322
323
324
325
326
327
328

329
330
331
332
333
334
335
336
337
338
339
340
341
342




































343
344
345
346
347
348
349
321
322
323
324
325
326
327

328
329













330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372







-
+

-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+







(define (tt:send-receive ttdat conn cmd run-id params)
  (let* ((host-port (tt-conn-host-port conn)) ;; (conc (tt-conn-host conn)":"(tt-conn-port conn)))
	 (host      (tt-conn-host conn))
	 (port      (tt-conn-port conn))
	 (dat       (list cmd run-id params #f))) ;; no meta data yet
    (tt:send-receive-direct host port dat)))

(define (tt:send-receive-direct host port dat)
(define (tt:send-receive-direct host port dat #!key (ping-mode #f)(tries-remaining 25))
  (assert (number? port) "FATAL: tt:send-receive-direct called with port not a number "port)
  (handle-exceptions
      exn
    #f ;; Add condition-case or better handling here
    (let-values (((inp oup)(tcp-connect host port)))
      (let ((res (if (and inp oup)
		     (begin
		       (serialize dat oup)
		       (close-output-port oup)
		       (deserialize inp))
		     )))
	(close-input-port inp)
	res))))

  (let* ((retry          (lambda ()
			   (tt:send-receive-direct host port dat tries-remaining: (- tries-remaining 1))))
	 (full-err-print (lambda (exn)
			   (pp (condition->list exn) *default-log-port*)
			   (pp dat *default-log-port*)
			   (debug:print 0 *default-log-port*
					", error: "     ((condition-property-accessor 'exn 'message)   exn)
					", arguments: " ((condition-property-accessor 'exn 'arguments) exn)
					", location: "  ((condition-property-accessor 'exn 'location)  exn)
					))))
    (condition-case
     (let-values (((inp oup)(tcp-connect host port)))
       (let ((res (if (and inp oup)
		      (begin
			(serialize dat oup)
			(close-output-port oup)
			(deserialize inp))
		      )))
	 (close-input-port inp)
	 res))
     (exn (io-error)
	  (full-err-print exn)
	  (debug:print 0 *default-log-port* exn "ERROR: i/o error")
	  #f)
     (exn (i/o net)
	  (if ping-mode
	      #f
	      (if (>= tries-remaining 0)
		  (let* ((backoff-delay (* (- 26 tries-remaining) 0.5)))
		    (debug:print 0 *default-log-port* "WARNING: TCP overload, trying again in "backoff-delay"s.")
		    (thread-sleep! backoff-delay)
		    (retry))
		  (assert #f "FATAL: Too many retries in tt:send-receive-direct"))))
     (exn ()
	  (full-err-print exn)
	  #f))))


;;======================================================================
;; server
;;======================================================================

(define (tt:sync-dbs ttdat)
699
700
701
702
703
704
705
706

707
708
709
710
711
712
713
722
723
724
725
726
727
728

729
730
731
732
733
734
735
736







-
+







	    (setup-listener-portlogger uconn))
	  #f)
      (connect-listener uconn port))))

(define (connect-listener uconn port)
  ;; (tcp-listener-socket LISTENER)(socket-name so)
  ;; sockaddr-address, sockaddr-port, sockaddr->string
  (let* ((tlsn (tcp-listen port 1000 #f)) ;; (tcp-listen TCPPORT [BACKLOG [HOST]])
  (let* ((tlsn (tcp-listen port 10000 #f)) ;; (tcp-listen TCPPORT [BACKLOG [HOST]])
	 (addr  (tt:get-best-guess-address (get-host-name)))) ;; (get-my-best-address))) ;; (hostinfo-addresses (host-information (current-hostname)))
    (tt-port-set!      uconn port)
    (tt-host-set!      uconn addr)
    (tt-host-port-set! uconn (conc addr":"port))
    (tt-socket-set!    uconn tlsn)
    uconn))

Modified tests.scm from [1a4573f7da] to [6fa611f761].

30
31
32
33
34
35
36
37

38
39

40
41
42
43
44
45
46
47
48
49
30
31
32
33
34
35
36

37
38

39



40
41
42
43
44
45
46







-
+

-
+
-
-
-







(declare (uses commonmod))
(declare (uses items))
(declare (uses runconfig))
(declare (uses server))
(declare (uses mtargs))
(declare (uses rmtmod))

(use sqlite3 srfi-1 posix regex regex-case srfi-69 dot-locking tcp directory-utils)
(use sqlite3 srfi-1 posix regex regex-case srfi-69 dot-locking directory-utils)
(import (prefix sqlite3 sqlite3:))
(import commonmod
(import commonmod (prefix mtargs args:) debugprint rmtmod)
	(prefix mtargs args:)
	debugprint
	rmtmod)
(require-library stml)

(include "common_records.scm")
(include "key_records.scm")
(include "db_records.scm")
(include "run_records.scm")
(include "test_records.scm")