︙ | | |
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
|
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
|
+
+
-
+
-
+
-
+
-
+
+
-
+
+
-
+
+
-
+
-
+
-
+
+
+
+
|
;; (max-connections 4096)
;; do all the busy work of finding and setting up conn for
;; connecting to a server
;;
(define (tt:client-connect-to-server ttdat dbfname run-id testsuite)
(assert (tt:valid-run-id run-id dbfname) "FATAL: invalid run-id "run-id)
(debug:print-info 2 *default-log-port* "tt:client-connect-to-server " dbfname " " run-id)
(let* ((conn (hash-table-ref/default (tt-conns ttdat) dbfname #f))
(server-start-proc (lambda ()
(tt:server-process-run
(tt-areapath ttdat)
testsuite ;; (dbfile:testsuite-name)
(common:find-local-megatest)
dbfname ;; run-id
run-id))))
))))
(if conn
(begin
; (debug:print-info 0 *default-log-port* "already connected to the server")
(debug:print-info 2 *default-log-port* "already connected to a server")
conn) ;; we are already connected to the server
(let* ((sdat (tt:get-current-server-info ttdat dbfname)))
(match sdat
((host port start-time server-id pid dbfname2 servinffile)
(assert (equal? dbfname dbfname2) "FATAL: read server info from wrong file.")
;(debug:print-info 0 *default-log-port* "in match servinffile:" servinffile)
(debug:print-info 2 *default-log-port* "no conn - in match servinffile:" servinffile)
(let* ((host-port (conc host":"port))
(conn (make-tt-conn
host: host
port: port
host-port: host-port
dbfname: dbfname
servinf-file: servinffile
server-id: server-id
server-start: start-time
pid: pid)))
;; verify we can talk to this server
(let* ((result (tt:timed-ping host port server-id))
(ping-res (car result))
(ping (cdr result)))
(debug:print-info 0 *default-log-port* "ping time: " ping)
(debug:print-info 2 *default-log-port* "host " host " port " port " ping time: " ping " result " ping-res)
(case ping-res
((running)
(debug:print-info 2 *default-log-port* "Setting conn = " conn " in hash table")
(hash-table-set! (tt-conns ttdat) dbfname conn) ;;; is this ok to save before validating that the connection is good?
conn)
((starting)
(thread-sleep! 0.5)
(thread-sleep! 2)
(debug:print-info 0 *default-log-port* "server for " dbfname " is in starting state, retrying connect")
(tt:client-connect-to-server ttdat dbfname run-id testsuite))
(else
(let* ((curr-secs (current-seconds)))
;; rm the (last server) would go here
(if (> (- curr-secs (tt-last-serv-start ttdat)) 10)
(begin
(tt-last-serv-start-set! ttdat curr-secs)
(server-start-proc))) ;; start server if 30 sec since last attempt
(server-start-proc))) ;; start server if 10 sec since last attempt
(thread-sleep! 1)
(debug:print-info 2 *default-log-port* "server ping result was neither running nor starting. Retrying connect")
(tt:client-connect-to-server ttdat dbfname run-id testsuite)))))))
(else ;; no good server found, if haven't started server in > 5 secs, start another
(if (> (- (current-seconds) (tt-last-serv-start ttdat)) 5) ;; BUG - grow this number really do not want to swamp the machine with servers
(if (> (- (current-seconds) (tt-last-serv-start ttdat)) 3) ;; BUG - grow this number really do not want to swamp the machine with servers
(begin
(debug:print-info 0 *default-log-port* "No server found. Starting one for run-id "run-id" in dbfile "dbfname)
(debug:print-info 0 *default-log-port* "Starting server for "dbfname)
(server-start-proc)
(tt-last-serv-start-set! ttdat (current-seconds))))
(tt-last-serv-start-set! ttdat (current-seconds))
(thread-sleep! 3)
))
(thread-sleep! 1)
(debug:print-info 0 *default-log-port* "Connect to server for " dbfname)
(tt:client-connect-to-server ttdat dbfname run-id testsuite)))))))
(define (tt:timed-ping host port server-id)
(let* ((start-time (current-milliseconds))
(result (tt:ping host port server-id)))
(cons result (- (current-milliseconds) start-time))))
|
︙ | | |
221
222
223
224
225
226
227
228
229
230
231
232
233
234
|
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
|
+
|
(try-again)))))
;; client side handler
;;
;;(tt:handler #<tt> get-keys #f () 2 #f "/home/matt/data/megatest/ext-tests" #f "main.db" "ext-tests" "/home/matt/data/megatest/bin/.22.04/../megatest")
;;
(define (tt:handler ttdat cmd run-id params attemptnum area-dat areapath readonly-mode dbfname testsuite mtexe)
(debug:print 2 *default-log-port* "tt:handler cmd: " cmd " run-id: " run-id " attemptnum: " attemptnum)
;; NOTE: areapath is passed in and in tt struct. We'll use passed in value for now.
(let* ((conn (tt:client-connect-to-server ttdat dbfname run-id testsuite))) ;; (hash-table-ref/default (tt-conns ttdat) dbfname #f)))
(if conn
;; have connection, call the server
(let* ((res (tt:send-receive ttdat conn cmd run-id params)))
;; res is (status errmsg result meta)
; (debug:print 0 *default-log-port* "conn:" conn " res: " res)
|
︙ | | |
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
|
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
|
-
+
|
;; start server - addressed in client-connect-to-server
;; delay - addressed in client-connect-to-server
;; try again
(thread-sleep! 0.25) ;; dunno, I think this needs to be here
(tt:handler ttdat cmd run-id params (+ attemptnum 1) area-dat areapath readonly-mode dbfname testsuite mtexe))
))))
(begin ;; no server file, delay and try again
(debug:print 0 *default-log-port* "INFO: connection to server "host":"port" broken for "dbfname", but do not see servinf file "servinf)
(debug:print 2 *default-log-port* "INFO: connection to server "host":"port" broken for "dbfname", but do not see servinf file "servinf)
(thread-sleep! 0.5)
(tt:handler ttdat cmd run-id params (+ attemptnum 1) area-dat areapath readonly-mode dbfname testsuite mtexe))))
(begin ;; this case is where res is malformed. Probably should abort
(assert #f "FATAL: tt:handler received bad data "res)
;; (debug:print 0 *default-log-port* "INFO: got corrupt data from server "host":"port", "res", for "dbfname", will try again.")
;; (tt:handler ttdat cmd run-id params (+ attemptnum 1) area-dat areapath readonly-mode dbfname testsuite mtexe)
)))))
|
︙ | | |
462
463
464
465
466
467
468
469
470
471
472
473
474
475
|
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
|
+
|
;; This is the routine called in megatest.scm to start a server
;;
;; Server viability is checked in keep-running. Blindly start and run here.
;;
(define (tt:start-server areapath run-id dbfname-in handler keys)
(assert areapath "FATAL: areapath not provided for tt:start-server")
;; is there already a server for this dbfile? Then exit.
(debug:print 2 *default-log-port* "tt:start-server: " dbfname-in)
(let* ((ttdat (make-tt areapath: areapath))
(dbfname (or dbfname-in (dbmod:run-id->dbfname run-id)))
(servers (tt:find-server areapath dbfname))) ;; should use tt:get-current-server-info instead
(if (> (length servers) 4)
(begin
(debug:print 0 *default-log-port* "INFO: found server(s) already running for db "dbfname", "(string-intersperse servers ",")" Exiting.")
(exit))
|
︙ | | |
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
|
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
|
-
+
+
+
-
+
|
bad-dat)))))))))
;; Given an area path, start a server process ### NOTE ### > file 2>&1
;; if the target-host is set
;; try running on that host
;; incidental: rotate logs in logs/ dir.
;;
(define (tt:server-process-run areapath testsuite mtexe run-id #!key (profile-mode "")) ;; areapath is *toppath* for a given testsuite area
(define (tt:server-process-run areapath testsuite mtexe
dbfname ;; run-id
#!key (profile-mode "")) ;; areapath is *toppath* for a given testsuite area
(assert areapath "FATAL: tt:server-process-run called without areapath defined.")
(assert testsuite "FATAL: tt:server-process-run called without testsuite defined.")
(assert mtexe "FATAL: tt:server-process-run called without mtexe defined.")
;; mtest -server - -m testsuite:ext-tests -db 6.db
(let* ((dbfname (dbmod:run-id->dbfname run-id))
(let* (;; (dbfname (dbmod:run-id->dbfname run-id))
(load (get-normalized-cpu-load))
(trying (length (tt:find-server areapath dbfname)))
(nrun (number-of-processes-running (conc "mtest.*server.*"testsuite".*"dbfname))))
(cond
((> load 2.0)
(debug:print 0 *default-log-port* "Normalized load "load" on " (get-host-name) " is over the limit of 2.0. Not starting a server.")
(thread-sleep! 1))
|
︙ | | |
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
|
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
|
-
+
|
" -server - ";; (or target-host "-")
" -m testsuite:"testsuite
" -db "dbfname ;; (dbmod:run-id->dbfname run-id)
" " profile-mode
(conc " >> " logfile " 2>&1 &"))))
;; we want the remote server to start in *toppath* so push there
;; (push-directory areapath) ;; use cd in the command line instead
(debug:print 0 *default-log-port* "INFO: Trying to start server in tcp mode (" cmdln ") at "(common:human-time)" for "areapath)
(debug:print 2 *default-log-port* "INFO: Trying to start server in tcp mode (" cmdln ") at "(common:human-time)" for "areapath)
;; (debug:print 0 *default-log-port* "INFO: starting server at " (common:human-time))
(system cmdln)
;; ;; use below to go back to nbfake - nbfake does cause trouble ...
;; (setenv "NBFAKE_QUIET" "yes") ;; BUG: change to with-environment-variable ...
;; (setenv "NBFAKE_LOG" logfile)
;; (system (conc "cd "areapath" ; nbfake " cmdln))
|
︙ | | |