Megatest

Check-in [4860a4e6aa]
Login
Overview
Comment:Clean up uses of tcp. tcp6 did not seem to work. Increased tcp backlog (didn't seem to help) and improved backoff
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | v1.80
Files: files | file ages | folders
SHA1: 4860a4e6aa32078091539c9cb3a2d212283f635e
User & Date: matt on 2023-04-09 14:45:02
Other Links: branch diff | manifest | tags
Context
2023-04-09
21:07
Use run-id in queries looking for test info. Can't assume test-ids are unique. Added separate exception handler for serialization to help with debug (still causes grief when hit but at least you can find the issue but looking in server logs). check-in: ea060a034b user: matt tags: v1.80
14:45
Clean up uses of tcp. tcp6 did not seem to work. Increased tcp backlog (didn't seem to help) and improved backoff check-in: 4860a4e6aa user: matt tags: v1.80
2023-04-08
11:48
Added better fallback on communication failure. check-in: 7dcbd017e3 user: matt tags: v1.80
Changes

Modified db.scm from [a9fc779d25] to [cf4037913b].

37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
(declare (uses rmtmod))

(import commonmod
	(prefix mtargs args:))

(use (srfi 18)
     extras
     tcp
     stack
     (prefix sqlite3 sqlite3:)
     srfi-1
     posix
     regex
     regex-case
     srfi-69







|







37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
(declare (uses rmtmod))

(import commonmod
	(prefix mtargs args:))

(use (srfi 18)
     extras
     ;; tcp
     stack
     (prefix sqlite3 sqlite3:)
     srfi-1
     posix
     regex
     regex-case
     srfi-69

Modified megatest.scm from [1aad76e09c] to [6f4fa87a54].

81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97

(include "common_records.scm")
(include "key_records.scm")
(include "db_records.scm")
(include "run_records.scm")
(include "megatest-fossil-hash.scm")

(use (prefix sqlite3 sqlite3:) srfi-1 posix regex regex-case srfi-69 (prefix base64 base64:)
     readline apropos json http-client directory-utils typed-records
     http-client srfi-18 extras format tcp-server tcp)

;; Added for csv stuff - will be removed
;;
(use sparse-vectors)

(require-library mutils)








|
|
|







81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97

(include "common_records.scm")
(include "key_records.scm")
(include "db_records.scm")
(include "run_records.scm")
(include "megatest-fossil-hash.scm")

(use (prefix sqlite3 sqlite3:) srfi-1 posix regex regex-case srfi-69 (prefix base64 base64:))
(use readline apropos json http-client directory-utils typed-records)
(use http-client srfi-18 extras format tcp-server tcp)

;; Added for csv stuff - will be removed
;;
(use sparse-vectors)

(require-library mutils)

Modified portlogger.scm from [0097927637] to [3334cefb6f].

23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
(declare (uses dbmod))

(module portlogger
*

(import scheme chicken data-structures)
(import srfi-1 posix srfi-69 hostinfo dot-locking z3
	(srfi 18) extras tcp s11n)
(import (prefix sqlite3 sqlite3:))
(import debugprint dbmod)
;; lsof -i

(define (portlogger:open-db fname)
  (let* ((avail    (tasks:wait-on-journal fname 5 remove: #t)) ;; wait up to about 10 seconds for the journal to go away
	 (exists   (file-exists? fname))







|







23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
(declare (uses dbmod))

(module portlogger
*

(import scheme chicken data-structures)
(import srfi-1 posix srfi-69 hostinfo dot-locking z3
	(srfi 18) extras s11n)
(import (prefix sqlite3 sqlite3:))
(import debugprint dbmod)
;; lsof -i

(define (portlogger:open-db fname)
  (let* ((avail    (tasks:wait-on-journal fname 5 remove: #t)) ;; wait up to about 10 seconds for the journal to go away
	 (exists   (file-exists? fname))

Modified server.scm from [a78488d9e1] to [39953c681c].

25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
(declare (uses tasks)) ;; tasks are where stuff is maintained about what is running.
;; (declare (uses synchash))
;;(declare (uses rpc-transport))
(declare (uses launch))
;; (declare (uses daemon))
(declare (uses mtargs))

(require-extension (srfi 18) extras tcp s11n)

(use srfi-1 posix regex regex-case srfi-69 hostinfo md5 message-digest
     directory-utils posix-extras matchable utils)

(use spiffy uri-common intarweb http-client spiffy-request-vars)

(import commonmod
	debugprint
	(prefix mtargs args:))

(include "common_records.scm")







|
<
|
|
<







25
26
27
28
29
30
31
32

33
34

35
36
37
38
39
40
41
(declare (uses tasks)) ;; tasks are where stuff is maintained about what is running.
;; (declare (uses synchash))
;;(declare (uses rpc-transport))
(declare (uses launch))
;; (declare (uses daemon))
(declare (uses mtargs))

(use (srfi 18) extras s11n)

(use srfi-1 posix regex regex-case srfi-69 hostinfo md5 message-digest)
(use directory-utils posix-extras matchable utils)

(use spiffy uri-common intarweb http-client spiffy-request-vars)

(import commonmod
	debugprint
	(prefix mtargs args:))

(include "common_records.scm")

Modified tcp-transportmod.scm from [32789e6d1a] to [574d8a43e3].

21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
(declare (unit tcp-transportmod))
(declare (uses debugprint))
(declare (uses commonmod))
(declare (uses dbfile))
(declare (uses dbmod))
(declare (uses portlogger))

(use address-info)

(module tcp-transportmod
	*
	
  (import scheme
	  (prefix sqlite3 sqlite3:)
	  chicken







|







21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
(declare (unit tcp-transportmod))
(declare (uses debugprint))
(declare (uses commonmod))
(declare (uses dbfile))
(declare (uses dbmod))
(declare (uses portlogger))

(use address-info tcp)

(module tcp-transportmod
	*
	
  (import scheme
	  (prefix sqlite3 sqlite3:)
	  chicken
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
		   (debug:print-info 0 *default-log-port* "No server found. Starting one for run-id "run-id" in dbfile "dbfname)
		   (server-start-proc)
		   (tt-last-serv-start-set! ttdat (current-seconds))))
	     (thread-sleep! 1)
	     (tt:client-connect-to-server ttdat dbfname run-id testsuite)))))))

(define (tt:ping host port server-id #!optional (tries-left 5))
  (let*  ((res      (tt:send-receive-direct host port `(ping #f #f #f))) ;; please send me your server-id
	  (try-again (lambda ()
		       (if (> tries-left 0)
			   (begin
			     (thread-sleep! 1)
			     (tt:ping host port server-id (- tries-left 1)))
			   #f))))
    ;;







|







180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
		   (debug:print-info 0 *default-log-port* "No server found. Starting one for run-id "run-id" in dbfile "dbfname)
		   (server-start-proc)
		   (tt-last-serv-start-set! ttdat (current-seconds))))
	     (thread-sleep! 1)
	     (tt:client-connect-to-server ttdat dbfname run-id testsuite)))))))

(define (tt:ping host port server-id #!optional (tries-left 5))
  (let*  ((res      (tt:send-receive-direct host port `(ping #f #f #f) ping-mode: #t)) ;; please send me your server-id
	  (try-again (lambda ()
		       (if (> tries-left 0)
			   (begin
			     (thread-sleep! 1)
			     (tt:ping host port server-id (- tries-left 1)))
			   #f))))
    ;;
321
322
323
324
325
326
327
328
329
330

331







332
333
334
335
336
337
338
339
340
341
342















343
344
345
346
347
348
349
(define (tt:send-receive ttdat conn cmd run-id params)
  (let* ((host-port (tt-conn-host-port conn)) ;; (conc (tt-conn-host conn)":"(tt-conn-port conn)))
	 (host      (tt-conn-host conn))
	 (port      (tt-conn-port conn))
	 (dat       (list cmd run-id params #f))) ;; no meta data yet
    (tt:send-receive-direct host port dat)))

(define (tt:send-receive-direct host port dat)
  (assert (number? port) "FATAL: tt:send-receive-direct called with port not a number "port)
  (handle-exceptions

      exn







    #f ;; Add condition-case or better handling here
    (let-values (((inp oup)(tcp-connect host port)))
      (let ((res (if (and inp oup)
		     (begin
		       (serialize dat oup)
		       (close-output-port oup)
		       (deserialize inp))
		     )))
	(close-input-port inp)
	res))))


















;;======================================================================
;; server
;;======================================================================

(define (tt:sync-dbs ttdat)







|

|
>
|
>
>
>
>
>
>
>
|
|
|
|
|
|
|
|
|
|
|
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
(define (tt:send-receive ttdat conn cmd run-id params)
  (let* ((host-port (tt-conn-host-port conn)) ;; (conc (tt-conn-host conn)":"(tt-conn-port conn)))
	 (host      (tt-conn-host conn))
	 (port      (tt-conn-port conn))
	 (dat       (list cmd run-id params #f))) ;; no meta data yet
    (tt:send-receive-direct host port dat)))

(define (tt:send-receive-direct host port dat #!key (ping-mode #f)(tries-remaining 25))
  (assert (number? port) "FATAL: tt:send-receive-direct called with port not a number "port)
  (let* ((retry          (lambda ()
			   (tt:send-receive-direct host port dat tries-remaining: (- tries-remaining 1))))
	 (full-err-print (lambda (exn)
			   (pp (condition->list exn) *default-log-port*)
			   (pp dat *default-log-port*)
			   (debug:print 0 *default-log-port*
					", error: "     ((condition-property-accessor 'exn 'message)   exn)
					", arguments: " ((condition-property-accessor 'exn 'arguments) exn)
					", location: "  ((condition-property-accessor 'exn 'location)  exn)
					))))
    (condition-case
     (let-values (((inp oup)(tcp-connect host port)))
       (let ((res (if (and inp oup)
		      (begin
			(serialize dat oup)
			(close-output-port oup)
			(deserialize inp))
		      )))
	 (close-input-port inp)
	 res))
     (exn (io-error)
	  (full-err-print exn)
	  (debug:print 0 *default-log-port* exn "ERROR: i/o error")
	  #f)
     (exn (i/o net)
	  (if ping-mode
	      #f
	      (if (>= tries-remaining 0)
		  (let* ((backoff-delay (* (- 26 tries-remaining) 0.5)))
		    (debug:print 0 *default-log-port* "WARNING: TCP overload, trying again in "backoff-delay"s.")
		    (thread-sleep! backoff-delay)
		    (retry))
		  (assert #f "FATAL: Too many retries in tt:send-receive-direct"))))
     (exn ()
	  (full-err-print exn)
	  #f))))


;;======================================================================
;; server
;;======================================================================

(define (tt:sync-dbs ttdat)
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
	    (setup-listener-portlogger uconn))
	  #f)
      (connect-listener uconn port))))

(define (connect-listener uconn port)
  ;; (tcp-listener-socket LISTENER)(socket-name so)
  ;; sockaddr-address, sockaddr-port, sockaddr->string
  (let* ((tlsn (tcp-listen port 1000 #f)) ;; (tcp-listen TCPPORT [BACKLOG [HOST]])
	 (addr  (tt:get-best-guess-address (get-host-name)))) ;; (get-my-best-address))) ;; (hostinfo-addresses (host-information (current-hostname)))
    (tt-port-set!      uconn port)
    (tt-host-set!      uconn addr)
    (tt-host-port-set! uconn (conc addr":"port))
    (tt-socket-set!    uconn tlsn)
    uconn))








|







722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
	    (setup-listener-portlogger uconn))
	  #f)
      (connect-listener uconn port))))

(define (connect-listener uconn port)
  ;; (tcp-listener-socket LISTENER)(socket-name so)
  ;; sockaddr-address, sockaddr-port, sockaddr->string
  (let* ((tlsn (tcp-listen port 10000 #f)) ;; (tcp-listen TCPPORT [BACKLOG [HOST]])
	 (addr  (tt:get-best-guess-address (get-host-name)))) ;; (get-my-best-address))) ;; (hostinfo-addresses (host-information (current-hostname)))
    (tt-port-set!      uconn port)
    (tt-host-set!      uconn addr)
    (tt-host-port-set! uconn (conc addr":"port))
    (tt-socket-set!    uconn tlsn)
    uconn))

Modified tests.scm from [1a4573f7da] to [6fa611f761].

30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
(declare (uses commonmod))
(declare (uses items))
(declare (uses runconfig))
(declare (uses server))
(declare (uses mtargs))
(declare (uses rmtmod))

(use sqlite3 srfi-1 posix regex regex-case srfi-69 dot-locking tcp directory-utils)
(import (prefix sqlite3 sqlite3:))
(import commonmod
	(prefix mtargs args:)
	debugprint
	rmtmod)
(require-library stml)

(include "common_records.scm")
(include "key_records.scm")
(include "db_records.scm")
(include "run_records.scm")
(include "test_records.scm")







|

|
<
<
<







30
31
32
33
34
35
36
37
38
39



40
41
42
43
44
45
46
(declare (uses commonmod))
(declare (uses items))
(declare (uses runconfig))
(declare (uses server))
(declare (uses mtargs))
(declare (uses rmtmod))

(use sqlite3 srfi-1 posix regex regex-case srfi-69 dot-locking directory-utils)
(import (prefix sqlite3 sqlite3:))
(import commonmod (prefix mtargs args:) debugprint rmtmod)



(require-library stml)

(include "common_records.scm")
(include "key_records.scm")
(include "db_records.scm")
(include "run_records.scm")
(include "test_records.scm")