Megatest

Changes On Branch v1.80-revolution-multi-server
Login

Changes In Branch v1.80-revolution-multi-server Excluding Merge-Ins

This is equivalent to a diff from b9d51df3ee to 92d02e9bba

2023-12-16
19:51
Merged b9d5 Leaf check-in: 92d02e9bba user: matt tags: v1.80-revolution-multi-server
19:50
Merged 7372 check-in: f605e2b0d5 user: matt tags: v1.80-revolution-multi-server
2023-12-13
15:49
Improved dashboard performance check-in: b229b3f7b0 user: mrwellan tags: v1.80-revolution
13:06
Moved the addition of /.mtdb for db paths up to db:setup, and removed it from other places. Initial implementation of -cleanup-db. check-in: b9d51df3ee user: mmgraham tags: v1.80-revolution
2023-12-08
15:27
Added extra info condition->list check-in: 73727dc595 user: mrwellan tags: v1.80-revolution

Modified rmt.scm from [c3f010a183] to [478ec22375].

96
97
98
99
100
101
102
103












104
105
106
107
108
109
110
				#f
				(lambda ()
				  ;; (debug:print-info 0 *default-log-port* "starting server for dbfname: "dbfname)
				  (rmt:start-server ;; tt:server-process-run
				   areapath
				   testsuite ;; (dbfile:testsuite-name)
				   mtexe
				   run-id)))))












    ;; here we look at ttdat, if dbfname is NOT main.db we check that a conn exists for it
    ;; and if there is no conn we first send a request to the main.db server to start a
    ;; server for the dbfname.
    #;(if (and (not is-main)(not conn)) ;; no existing connection to non-main server, call in a start up request
	(begin
	  (server-start-proc)
	  (thread-sleep! 1)))







|
>
>
>
>
>
>
>
>
>
>
>
>







96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
				#f
				(lambda ()
				  ;; (debug:print-info 0 *default-log-port* "starting server for dbfname: "dbfname)
				  (rmt:start-server ;; tt:server-process-run
				   areapath
				   testsuite ;; (dbfile:testsuite-name)
				   mtexe
				   run-id))))
	 ;; current method does not take advantage of simply getting the list of
	 ;; servers from no-sync db. srv-get-proc would be a first step but is not used yet
	 (srv-get-proc (lambda ()
			 (let* ((candidates (rmt:get-process-options "server" dbfname))
				(ccount     (length candidates)))
			   (case ccount
			     ((0) #f) ;; need to call rmt:start-server
			     ((1) (car candidates))
			     (else
			      (if (> (random 100) 50)
				  (car candidates)
				  (cadr candidates))))))))
    ;; here we look at ttdat, if dbfname is NOT main.db we check that a conn exists for it
    ;; and if there is no conn we first send a request to the main.db server to start a
    ;; server for the dbfname.
    #;(if (and (not is-main)(not conn)) ;; no existing connection to non-main server, call in a start up request
	(begin
	  (server-start-proc)
	  (thread-sleep! 1)))

Modified tcp-transportmod.scm from [5cef09f36d] to [108f76cedd].

149
150
151
152
153
154
155
156
157
158
159

160
161
162
163
164
165
166
				   testsuite ;; (dbfile:testsuite-name)
				   (common:find-local-megatest)
				   run-id)))))
    (if conn
	(begin 
          (debug:print-info 2 *default-log-port* "already connected to a server")
           conn) ;; we are already connected to the server
	(let* ((sdats (tt:get-server-info-sorted ttdat dbfname))
	       (sdat  (if (null? sdats)
			  #f
			  (car sdats))))

	  (match sdat
	    ((host port start-time server-id pid dbfname2 servinffile)
	     (assert (equal? dbfname dbfname2) "FATAL: read server info from wrong file.")
             (debug:print-info 2 *default-log-port* "no conn - in match servinffile:" servinffile)
	     (let* ((host-port (conc host":"port))
		    (conn (make-tt-conn
			   host: host







|
|
|
|
>







149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
				   testsuite ;; (dbfile:testsuite-name)
				   (common:find-local-megatest)
				   run-id)))))
    (if conn
	(begin 
          (debug:print-info 2 *default-log-port* "already connected to a server")
           conn) ;; we are already connected to the server
	(let* (;; (sdats (tt:get-server-info-sorted ttdat dbfname))
	       ;; (sdat  (if (null? sdats)
	       ;;	  #f
	       ;;	  (car sdats))))
	       (sdat (tt:get-valid-server-random ttdat dbfname)))
	  (match sdat
	    ((host port start-time server-id pid dbfname2 servinffile)
	     (assert (equal? dbfname dbfname2) "FATAL: read server info from wrong file.")
             (debug:print-info 2 *default-log-port* "no conn - in match servinffile:" servinffile)
	     (let* ((host-port (conc host":"port))
		    (conn (make-tt-conn
			   host: host
347
348
349
350
351
352
353
354



















355
356
357
358
359
360
361
     (lambda (rec)
       (if (or (> (length sorted) 1)
	       (common:low-noise-print 120 "server info sorted"))
	   (debug:print 2 *default-log-port* "SERVER #"count": "(string-intersperse (map conc sorted) ", ")))
       (set! count (+ count 1)))
     sorted)
    sorted))
    



















(define (tt:send-receive ttdat conn cmd run-id params)
  (let* ((host-port (tt-conn-host-port conn)) ;; (conc (tt-conn-host conn)":"(tt-conn-port conn)))
	 (host      (tt-conn-host conn))
	 (port      (tt-conn-port conn))
	 (dat       (list cmd run-id params #f))) ;; no meta data yet
    (tt:send-receive-direct host port dat)))








|
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
     (lambda (rec)
       (if (or (> (length sorted) 1)
	       (common:low-noise-print 120 "server info sorted"))
	   (debug:print 2 *default-log-port* "SERVER #"count": "(string-intersperse (map conc sorted) ", ")))
       (set! count (+ count 1)))
     sorted)
    sorted))

(define (tt:get-valid-server-random ttdat dbfname)
  (let* ((candidates (tt:get-server-info-sorted ttdat dbfname))
	 (numc       (length candidates)))
    (case numc
      ((0) #f)
      ((1) (car candidates))
      (else
       (let* ((firsthost        (caar candidates))
	      (valid-candidates (filter (lambda (x)(equal? (car x) firsthost)) candidates))
	      (numvalid         (length valid-candidates)))
	 (case numvalid
	   ((0) (debug:print 0 *default-log-port* "ERROR: code issue, filter broke?") #f)
	   ((1) (car valid-candidates))
	   (else
	    ;; expand logic here to support more than two servers
	    (if (> (random 100) 50)
		(car valid-candidates)
		(cadr valid-candidates)))))))))
   
(define (tt:send-receive ttdat conn cmd run-id params)
  (let* ((host-port (tt-conn-host-port conn)) ;; (conc (tt-conn-host conn)":"(tt-conn-port conn)))
	 (host      (tt-conn-host conn))
	 (port      (tt-conn-port conn))
	 (dat       (list cmd run-id params #f))) ;; no meta data yet
    (tt:send-receive-direct host port dat)))

500
501
502
503
504
505
506



507

508
509
510
511
512
513
514
515
		;; contact servers via ping, if no response remove the .servinfo file
		(let loop ((servrs     servers)
			   (prime-host #f)
			   (result    '()))
		  (if (null? servrs)
		      (reverse result)
		      (let* ((servdat (car servrs)))



			(match servdat

			     ((host port startseconds server-id servinfofile)
			      (let* ((ping-res  (tt:timed-ping host port server-id))
				     (good-ping (match ping-res
						   ((result . ping-time)
						    (not result)) ;; we couldn't reach the server or it was not a megatest server
						   (else #f))) ;; the ping failed completely?
				     (same-host (or (not prime-host) ;; i.e. this is the first host
						    (equal? prime-host host)))







>
>
>

>
|







520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
		;; contact servers via ping, if no response remove the .servinfo file
		(let loop ((servrs     servers)
			   (prime-host #f)
			   (result    '()))
		  (if (null? servrs)
		      (reverse result)
		      (let* ((servdat (car servrs)))
	;;; INFO: (0) 23:08:10 ERROR: bad servinfo record
	;;; "(127.0.1.1 36797 1701662813.0 88fff570fa3996d6082df8a1875e6cb1 15462 6.db /home/matt/data/megatest/ext-tests/sixtyfivek/.servinfo/127.0.1.1:36797-15462:6.db)"
			     
			(match servdat
			  ;; host port startt server-id pid dbfname servinffilr
			  ((host port startseconds server-id pid dbfname servinfofile)
			      (let* ((ping-res  (tt:timed-ping host port server-id))
				     (good-ping (match ping-res
						   ((result . ping-time)
						    (not result)) ;; we couldn't reach the server or it was not a megatest server
						   (else #f))) ;; the ping failed completely?
				     (same-host (or (not prime-host) ;; i.e. this is the first host
						    (equal? prime-host host)))