Megatest

Diff
Login

Differences From Artifact [ec84ec4c9e]:

To Artifact [98befc0e3b]:


130
131
132
133
134
135
136

137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170

171
172
173
174

175
176
177
178
179
180
181
182
183

184
185
186
187
188
189
190


191

192
193
194
195
196
197
198
;; (max-connections 4096) 

;; do all the busy work of finding and setting up conn for
;; connecting to a server
;; 
(define (tt:client-connect-to-server ttdat dbfname run-id testsuite)
  (assert (tt:valid-run-id run-id dbfname) "FATAL: invalid run-id "run-id)

  (let* ((conn (hash-table-ref/default (tt-conns ttdat) dbfname #f))
	 (server-start-proc (lambda ()
			      (tt:server-process-run
			       (tt-areapath ttdat)
			       testsuite ;; (dbfile:testsuite-name)
			       (common:find-local-megatest)
			       dbfname ;; run-id
			       ))))
    (if conn
	(begin 
          ; (debug:print-info 0 *default-log-port* "already connected to the server")
           conn) ;; we are already connected to the server
	(let* ((sdat (tt:get-current-server-info ttdat dbfname)))
	  (match sdat
	    ((host port start-time server-id pid dbfname2 servinffile)
	     (assert (equal? dbfname dbfname2) "FATAL: read server info from wrong file.")
             ;(debug:print-info 0 *default-log-port* "in match servinffile:" servinffile)
	     (let* ((host-port (conc host":"port))
		    (conn (make-tt-conn
			   host: host
			   port: port
			   host-port: host-port
			   dbfname: dbfname
			   servinf-file: servinffile
			   server-id: server-id
			   server-start: start-time
			   pid: pid)))
	       ;; verify we can talk to this server
	       (let* ((result   (tt:timed-ping host port server-id))
		      (ping-res (car result))
		      (ping     (cdr result)))
                 (debug:print-info 0 *default-log-port* "ping time: " ping)
		 (case ping-res
		   ((running)

		    (hash-table-set! (tt-conns ttdat) dbfname conn) ;;; is this ok to save before validating that the connection is good?
		    conn)
		   ((starting)
		    (thread-sleep! 0.5)

		    (tt:client-connect-to-server ttdat dbfname run-id testsuite))
		   (else
		    (let* ((curr-secs (current-seconds)))
		      ;; rm the (last server) would go here
		      (if (> (- curr-secs (tt-last-serv-start ttdat)) 10)
			  (begin
			    (tt-last-serv-start-set! ttdat curr-secs)
			    (server-start-proc))) ;; start server if 30 sec since last attempt
		      (thread-sleep! 1)

		      (tt:client-connect-to-server ttdat dbfname run-id testsuite)))))))
	    (else ;; no good server found, if haven't started server in > 5 secs, start another
	     (if (> (- (current-seconds) (tt-last-serv-start ttdat)) 5) ;; BUG - grow this number really do not want to swamp the machine with servers
		 (begin
		   (debug:print-info 0 *default-log-port* "No server found. Starting one for run-id "run-id" in dbfile "dbfname)
		   (server-start-proc)
		   (tt-last-serv-start-set! ttdat (current-seconds))))


	     (thread-sleep! 1)

	     (tt:client-connect-to-server ttdat dbfname run-id testsuite)))))))

(define (tt:timed-ping host port server-id)
  (let* ((start-time (current-milliseconds))
	 (result     (tt:ping host port server-id)))
    (cons result (- (current-milliseconds) start-time))))
    







>










|





|














|


>



|
>







|

>


|

|

|
>
>

>







130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
;; (max-connections 4096) 

;; do all the busy work of finding and setting up conn for
;; connecting to a server
;; 
(define (tt:client-connect-to-server ttdat dbfname run-id testsuite)
  (assert (tt:valid-run-id run-id dbfname) "FATAL: invalid run-id "run-id)
  (debug:print-info 2 *default-log-port* "tt:client-connect-to-server " dbfname " " run-id)
  (let* ((conn (hash-table-ref/default (tt-conns ttdat) dbfname #f))
	 (server-start-proc (lambda ()
			      (tt:server-process-run
			       (tt-areapath ttdat)
			       testsuite ;; (dbfile:testsuite-name)
			       (common:find-local-megatest)
			       dbfname ;; run-id
			       ))))
    (if conn
	(begin 
          (debug:print-info 2 *default-log-port* "already connected to a server")
           conn) ;; we are already connected to the server
	(let* ((sdat (tt:get-current-server-info ttdat dbfname)))
	  (match sdat
	    ((host port start-time server-id pid dbfname2 servinffile)
	     (assert (equal? dbfname dbfname2) "FATAL: read server info from wrong file.")
             (debug:print-info 2 *default-log-port* "no conn - in match servinffile:" servinffile)
	     (let* ((host-port (conc host":"port))
		    (conn (make-tt-conn
			   host: host
			   port: port
			   host-port: host-port
			   dbfname: dbfname
			   servinf-file: servinffile
			   server-id: server-id
			   server-start: start-time
			   pid: pid)))
	       ;; verify we can talk to this server
	       (let* ((result   (tt:timed-ping host port server-id))
		      (ping-res (car result))
		      (ping     (cdr result)))
                 (debug:print-info 2 *default-log-port* "host " host " port " port " ping time: " ping " result " ping-res)
		 (case ping-res
		   ((running)
                    (debug:print-info 2 *default-log-port* "Setting conn = " conn " in hash table")
		    (hash-table-set! (tt-conns ttdat) dbfname conn) ;;; is this ok to save before validating that the connection is good?
		    conn)
		   ((starting)
		    (thread-sleep! 2)
                    (debug:print-info 0 *default-log-port* "server for " dbfname " is in starting state, retrying connect")
		    (tt:client-connect-to-server ttdat dbfname run-id testsuite))
		   (else
		    (let* ((curr-secs (current-seconds)))
		      ;; rm the (last server) would go here
		      (if (> (- curr-secs (tt-last-serv-start ttdat)) 10)
			  (begin
			    (tt-last-serv-start-set! ttdat curr-secs)
			    (server-start-proc))) ;; start server if 10 sec since last attempt
		      (thread-sleep! 1)
                      (debug:print-info 2 *default-log-port* "server ping result was neither running nor starting. Retrying connect")
		      (tt:client-connect-to-server ttdat dbfname run-id testsuite)))))))
	    (else ;; no good server found, if haven't started server in > 5 secs, start another
	     (if (> (- (current-seconds) (tt-last-serv-start ttdat)) 3) ;; BUG - grow this number really do not want to swamp the machine with servers
		 (begin
		   (debug:print-info 0 *default-log-port* "Starting server for "dbfname)
		   (server-start-proc)
		   (tt-last-serv-start-set! ttdat (current-seconds))
                   (thread-sleep! 3)
                   ))
	     (thread-sleep! 1)
             (debug:print-info 0 *default-log-port* "Connect to server for " dbfname)
	     (tt:client-connect-to-server ttdat dbfname run-id testsuite)))))))

(define (tt:timed-ping host port server-id)
  (let* ((start-time (current-milliseconds))
	 (result     (tt:ping host port server-id)))
    (cons result (- (current-milliseconds) start-time))))
    
222
223
224
225
226
227
228

229
230
231
232
233
234
235
       (try-again)))))

;; client side handler
;;
;;(tt:handler #<tt> get-keys #f () 2 #f "/home/matt/data/megatest/ext-tests" #f "main.db" "ext-tests" "/home/matt/data/megatest/bin/.22.04/../megatest")
;;
(define (tt:handler ttdat cmd run-id params attemptnum area-dat areapath readonly-mode dbfname testsuite mtexe)

  ;; NOTE: areapath is passed in and in tt struct. We'll use passed in value for now.
  (let* ((conn (tt:client-connect-to-server ttdat dbfname run-id testsuite))) ;; (hash-table-ref/default (tt-conns ttdat) dbfname #f)))
    (if conn
	;; have connection, call the server
	(let* ((res (tt:send-receive ttdat conn cmd run-id params)))
	  ;; res is (status errmsg result meta)
         ; (debug:print 0 *default-log-port* "conn:" conn " res: " res)







>







229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
       (try-again)))))

;; client side handler
;;
;;(tt:handler #<tt> get-keys #f () 2 #f "/home/matt/data/megatest/ext-tests" #f "main.db" "ext-tests" "/home/matt/data/megatest/bin/.22.04/../megatest")
;;
(define (tt:handler ttdat cmd run-id params attemptnum area-dat areapath readonly-mode dbfname testsuite mtexe)
  (debug:print 2 *default-log-port* "tt:handler cmd: " cmd " run-id: " run-id " attemptnum: " attemptnum)
  ;; NOTE: areapath is passed in and in tt struct. We'll use passed in value for now.
  (let* ((conn (tt:client-connect-to-server ttdat dbfname run-id testsuite))) ;; (hash-table-ref/default (tt-conns ttdat) dbfname #f)))
    (if conn
	;; have connection, call the server
	(let* ((res (tt:send-receive ttdat conn cmd run-id params)))
	  ;; res is (status errmsg result meta)
         ; (debug:print 0 *default-log-port* "conn:" conn " res: " res)
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
				     ;; start server - addressed in client-connect-to-server
				     ;; delay        - addressed in client-connect-to-server
				     ;; try again
				     (thread-sleep! 0.25) ;; dunno, I think this needs to be here
				     (tt:handler ttdat cmd run-id params (+ attemptnum 1) area-dat areapath readonly-mode dbfname testsuite mtexe))
				   ))))
		       (begin ;; no server file, delay and try again
			 (debug:print 0 *default-log-port* "INFO: connection to server "host":"port" broken for "dbfname", but do not see servinf file "servinf)
			 (thread-sleep! 0.5)
			 (tt:handler ttdat cmd run-id params (+ attemptnum 1) area-dat areapath readonly-mode dbfname testsuite mtexe))))
		 (begin ;; this case is where res is malformed. Probably should abort
		   (assert #f "FATAL: tt:handler received bad data "res)
		   ;; (debug:print 0 *default-log-port* "INFO: got corrupt data from server "host":"port", "res", for "dbfname", will try again.")
		   ;; (tt:handler ttdat cmd run-id params (+ attemptnum 1) area-dat areapath readonly-mode dbfname testsuite mtexe)
		   )))))







|







292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
				     ;; start server - addressed in client-connect-to-server
				     ;; delay        - addressed in client-connect-to-server
				     ;; try again
				     (thread-sleep! 0.25) ;; dunno, I think this needs to be here
				     (tt:handler ttdat cmd run-id params (+ attemptnum 1) area-dat areapath readonly-mode dbfname testsuite mtexe))
				   ))))
		       (begin ;; no server file, delay and try again
			 (debug:print 2 *default-log-port* "INFO: connection to server "host":"port" broken for "dbfname", but do not see servinf file "servinf)
			 (thread-sleep! 0.5)
			 (tt:handler ttdat cmd run-id params (+ attemptnum 1) area-dat areapath readonly-mode dbfname testsuite mtexe))))
		 (begin ;; this case is where res is malformed. Probably should abort
		   (assert #f "FATAL: tt:handler received bad data "res)
		   ;; (debug:print 0 *default-log-port* "INFO: got corrupt data from server "host":"port", "res", for "dbfname", will try again.")
		   ;; (tt:handler ttdat cmd run-id params (+ attemptnum 1) area-dat areapath readonly-mode dbfname testsuite mtexe)
		   )))))
463
464
465
466
467
468
469

470
471
472
473
474
475
476
;; This is the routine called in megatest.scm to start a server
;;
;; Server viability is checked in keep-running. Blindly start and run here.
;;
(define (tt:start-server areapath run-id dbfname-in handler keys)
  (assert areapath "FATAL: areapath not provided for tt:start-server")
  ;; is there already a server for this dbfile? Then exit.

  (let* ((ttdat   (make-tt areapath: areapath))
	 (dbfname (or dbfname-in (dbmod:run-id->dbfname run-id)))
	 (servers (tt:find-server areapath dbfname))) ;; should use tt:get-current-server-info instead
    (if (> (length servers) 4)
	(begin
	  (debug:print 0 *default-log-port* "INFO: found server(s) already running for db "dbfname", "(string-intersperse servers ",")" Exiting.")
	  (exit))







>







471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
;; This is the routine called in megatest.scm to start a server
;;
;; Server viability is checked in keep-running. Blindly start and run here.
;;
(define (tt:start-server areapath run-id dbfname-in handler keys)
  (assert areapath "FATAL: areapath not provided for tt:start-server")
  ;; is there already a server for this dbfile? Then exit.
  (debug:print 2 *default-log-port* "tt:start-server: " dbfname-in)
  (let* ((ttdat   (make-tt areapath: areapath))
	 (dbfname (or dbfname-in (dbmod:run-id->dbfname run-id)))
	 (servers (tt:find-server areapath dbfname))) ;; should use tt:get-current-server-info instead
    (if (> (length servers) 4)
	(begin
	  (debug:print 0 *default-log-port* "INFO: found server(s) already running for db "dbfname", "(string-intersperse servers ",")" Exiting.")
	  (exit))
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
			     " -server - ";; (or target-host "-")
			     " -m testsuite:"testsuite
			     " -db "dbfname ;; (dbmod:run-id->dbfname run-id)
			     " " profile-mode
			     (conc " >> " logfile " 2>&1 &"))))
	    ;; we want the remote server to start in *toppath* so push there
	    ;; (push-directory areapath) ;; use cd in the command line instead
	    (debug:print 0 *default-log-port* "INFO: Trying to start server in tcp mode (" cmdln ") at "(common:human-time)" for "areapath)
	    ;; (debug:print 0 *default-log-port* "INFO: starting server at " (common:human-time))

	    (system cmdln)
	    ;; ;; use below to go back to nbfake - nbfake does cause trouble ...
	    ;; (setenv "NBFAKE_QUIET" "yes") ;; BUG: change to with-environment-variable ...
	    ;; (setenv "NBFAKE_LOG" logfile)
	    ;; (system (conc "cd "areapath" ; nbfake " cmdln))







|







798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
			     " -server - ";; (or target-host "-")
			     " -m testsuite:"testsuite
			     " -db "dbfname ;; (dbmod:run-id->dbfname run-id)
			     " " profile-mode
			     (conc " >> " logfile " 2>&1 &"))))
	    ;; we want the remote server to start in *toppath* so push there
	    ;; (push-directory areapath) ;; use cd in the command line instead
	    (debug:print 2 *default-log-port* "INFO: Trying to start server in tcp mode (" cmdln ") at "(common:human-time)" for "areapath)
	    ;; (debug:print 0 *default-log-port* "INFO: starting server at " (common:human-time))

	    (system cmdln)
	    ;; ;; use below to go back to nbfake - nbfake does cause trouble ...
	    ;; (setenv "NBFAKE_QUIET" "yes") ;; BUG: change to with-environment-variable ...
	    ;; (setenv "NBFAKE_LOG" logfile)
	    ;; (system (conc "cd "areapath" ; nbfake " cmdln))