︙ | | |
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
|
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
|
+
-
+
-
+
+
+
+
+
-
+
-
+
|
(socket #f)
(thread #f)
(host-port #f)
(cmd-thread #f)
(ro-mode #f)
(ro-mode-checked #f)
(last-access (current-seconds))
(servinf-file #f)
)
(define (tt:make-remote areapath)
(make-tt areapath: areapath))
;; do all the busy work of finding and setting up conn for
;; connecting to a server
;;
(define (tt:client-connect-to-server ttdat dbfname run-id testsuite)
(let* ((conn (hash-table-ref/default (tt-conns ttdat) dbfname #f))
(server-start-proc (lambda ()
(tt:server-process-run
(tt-areapath ttdat)
testsuite ;; (dbfile:testsuite-name)
(common:find-local-megatest)
run-id))))
(if conn
conn ;; we are already connected to the server
(let* ((sdat (tt:get-current-server-info ttdat dbfname run-id)))
(let* ((sdat (tt:get-current-server-info ttdat dbfname)))
(match sdat
((host port start-time server-id pid dbfname2)
((host port start-time server-id pid dbfname2 servinffile)
(assert (equal? dbfname dbfname2) "FATAL: read server info from wrong file.")
(let* ((host-port (conc host":"port))
(conn (make-tt-conn
host: host
port: port
host-port: host-port
dbfname: dbfname
servinf-file: servinffile
server-id: server-id
server-start: start-time
pid: pid)))
(hash-table-set! (tt-conns ttdat) dbfname conn)
;; verify we can talk to this server
(if (tt:ping host port server-id)
conn
(begin
;; rm the (last server) would go here
(server-start-proc)
(thread-sleep! 1)
(tt:client-connect-to-server ttdat dbfname run-id testsuite)))))
(else
(debug:print-info 0 *default-log-port* "Number of records did not match expected. Bad server info?")
(server-start-proc)
(thread-sleep! 1)
(tt:client-connect-to-server ttdat dbfname run-id testsuite)))))))
(define (tt:ping host port server-id)
(let* ((res (tt:send-receive-direct host port `(ping #f #f #f)))) ;; please send me your server-id
;;
;; need two threads, one a 5 second timer
;;
(match res
((status errmsg result meta)
(if (equal? result server-id)
(begin
(debug:print 0 *default-log-port* "Ping to "host":"port" successful.")
#t ;; then we are good
#t) ;; then we are good
(begin
(debug:print 0 *default-log-port* "WARNING: server-id does not match, expected: "server-id", got: "result)
#f)))
(else
(debug:print 0 *default-log-port* "res not in form (status errmsg resutl meta), got: "res)
;; (debug:print 0 *default-log-port* "res not in form (status errmsg result meta), got: "res)
#f))))
;; client side handler
;;
(define (tt:handler ttdat cmd run-id params attemptnum area-dat areapath readonly-mode dbfname testsuite mtexe)
;; NOTE: areapath is passed in and in tt struct. We'll use passed in value for now.
(let* ((conn (tt:client-connect-to-server ttdat dbfname run-id testsuite))) ;; (hash-table-ref/default (tt-conns ttdat) dbfname #f)))
|
︙ | | |
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
|
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
|
+
+
+
+
+
+
+
+
+
+
+
+
+
-
+
+
+
+
|
;; (thread-sleep! 1)
;; (tt:handler ttdat cmd run-id params attemptnum area-dat areapath
;; readonly-mode dbfname testsuite mtexe)))))))
(define (tt:bid-for-servership run-id)
#f)
;; gets server info and appends path to server file
;; sorts by age, oldest first
;;
;; returns list of (host port startseconds server-id servinfofile)
;;
(define (tt:get-server-info-sorted ttdat dbfname)
(let* ((areapath (tt-areapath ttdat))
(sfiles (tt:find-server areapath dbfname))
(sdats (filter car (map tt:server-get-info sfiles))) ;; first element is #f if the file disappeared while being read
(sorted (sort sdats (lambda (a b)
(< (list-ref a 2)(list-ref b 2))))))
sorted))
(define (tt:get-current-server-info ttdat dbfname run-id)
(define (tt:get-current-server-info ttdat dbfname)
(assert (tt-areapath ttdat) "FATAL: areapath not set in ttdat.")
;;
;; TODO - replace most of below with tt;get-server-info-sorted
;;
(let* ((areapath (tt-areapath ttdat))
(sfiles (tt:find-server areapath dbfname))
(sdats (filter car (map tt:server-get-info sfiles))) ;; first element is #f if the file disappeared while being read
(sorted (sort sdats (lambda (a b)
(< (list-ref a 2)(list-ref b 2))))))
(if (null? sorted)
#f ;; we'll want to wait until extra servers have exited
|
︙ | | |
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
|
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
|
+
+
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
-
-
-
+
+
+
|
;; start the listener and start responding to requests
;;
;; NOTE: organise by dbfname, not run-id so we don't need
;; to pull in more modules
;;
;; This is the routine called in megatest.scm to start a server
;;
;; Server viability is checked in keep-running. Blindly start and run here.
;;
(define (tt:start-server areapath run-id dbfname handler keys)
(assert areapath "FATAL: areapath not provided for tt:start-server")
;; is there already a server for this dbfile? Then exit.
(let* ((ttdat (make-tt areapath: areapath))
(servers (tt:find-server areapath dbfname))) ;; should use tt:get-current-server-info instead
(if (null? servers)
(let* ((dbstruct (dbmod:open-dbmoddb areapath run-id (dbfile:db-init-proc) keys)))
(tt-handler-set! ttdat (handler dbstruct))
(let* ((tcp-thread (make-thread
(lambda ()
(tt:start-tcp-server ttdat)) ;; start the tcp-server which applies handler to incoming data
"tcp-server-thread"))
(run-thread (make-thread
(lambda ()
(tt:keep-running ttdat dbfname dbstruct)))))
(thread-start! tcp-thread)
(thread-start! run-thread)
(thread-join! run-thread) ;; run thread will exit on timeout or other conditions
;;
;; set a flag here to tell tcp-thread to stop running
;;
;; (thread-join! tcp-thread) ;; can't wait
;;
;; remove the servinfo file
;;
;; close the database, remove lock in on-disk db
;;
;; close the listener ports
;;
(exit)))
(begin
(debug:print 0 *default-log-port* "INFO: found server(s) already running for db "dbfname", "(string-intersperse servers ",")" Exiting.")
(exit)))))
(let* ((ttdat (make-tt areapath: areapath)))
;; (servers (tt:find-server areapath dbfname))) ;; should use tt:get-current-server-info instead
;; (if (null? servers)
(let* ((dbstruct (dbmod:open-dbmoddb areapath run-id (dbfile:db-init-proc) keys)))
(tt-handler-set! ttdat (handler dbstruct))
(let* ((tcp-thread (make-thread
(lambda ()
(tt:start-tcp-server ttdat)) ;; start the tcp-server which applies handler to incoming data
"tcp-server-thread"))
(run-thread (make-thread
(lambda ()
(tt:keep-running ttdat dbfname dbstruct)))))
(thread-start! tcp-thread)
(thread-start! run-thread)
(thread-join! run-thread) ;; run thread will exit on timeout or other conditions
;;
;; set a flag here to tell tcp-thread to stop running
;;
;; (thread-join! tcp-thread) ;; can't wait
;;
;; remove the servinfo file
;;
;; close the database, remove lock in on-disk db
;;
;; close the listener ports
;;
(exit)))
;;(begin
;; (debug:print 0 *default-log-port* "INFO: found server(s) already running for db "dbfname", "(string-intersperse servers ",")" Exiting.")
;; (exit)))))
))
(define (tt:keep-running ttdat dbfname dbstruct)
;; verfiy conn for ready
;; listener socket has been started by this stage
(thread-sleep! 1)
(let* ((cleanup (lambda ()
(if (tt-cleanup-proc ttdat)
((tt-cleanup-proc ttdat))))))
(let loop ((count 0))
(if (> count 60)
(begin
(debug:print 0 *default-log-port* "FATAL: Could not start a tcp server, giving up.")
(exit 1))
(if (not (tt-port ttdat)) ;; no connection yet
(let* ((last-update (dbr:dbstruct-last-update dbstruct))
(curr-secs (current-seconds)))
(if (> (- curr-secs last-update) 3) ;; every 3-4 seconds
(begin
((dbr:dbstruct-sync-proc dbstruct) last-update)
(dbr:dbstruct-last-update-set! curr-secs)))
(thread-sleep! 1)
(loop (+ count 1))))))
(tt:create-server-registration-file ttdat dbfname)
;; now start watching the last-access, if it hasn't been touched
;; in over ten seconds we exit
(let loop ()
(if (< (- (current-seconds) (tt-last-access ttdat)) 60)
(begin
(thread-sleep! 2)
(loop))))
(let loop ((count 0))
(if (> count 60)
(begin
(debug:print 0 *default-log-port* "FATAL: Could not start a tcp server, giving up.")
(exit 1))
(if (not (tt-port ttdat)) ;; no connection yet
(let* ((last-update (dbr:dbstruct-last-update dbstruct))
(curr-secs (current-seconds)))
(if (> (- curr-secs last-update) 3) ;; every 3-4 seconds
(begin
((dbr:dbstruct-sync-proc dbstruct) last-update)
(dbr:dbstruct-last-update-set! curr-secs)))
(thread-sleep! 1)
(loop (+ count 1))))))
(tt:create-server-registration-file ttdat dbfname)
;; now start watching the last-access, if it hasn't been touched
;; in over ten seconds we exit
(thread-sleep! 1)
(let loop ()
(let* ((servers (tt:get-server-info-sorted ttdat dbfname))
(ok (cond
((null? servers) #f) ;; not ok
((equal? (list-ref (car servers) 6) ;; compare the servinfofile
(tt-servinf-file ttdat))
(debug:print-info 0 *default-log-port* "Keep running, I'm the top server.")
#t)
(else
(debug:print-info 0 *default-log-port* "I'm not the lead server: "servers)
(let* ((leadsrv (car servers)))
(match leadsrv
((host port startseconds server-id pid dbfname servinfofile)
(if (tt:ping host port server-id)
#f ;; not the server, but all good, want to exit
(begin
;; what to do here? remove the offending file?
(debug:print-info 0 *default-log-port* "Removing apparently dead server info file: "servinfofile)
(delete-file* servinfofile)
#t ;; not the server but the server is not reachable
)))
(else
(debug:print 0 *default-log-port* "BAD SERVER RECORD: "leadsrv)
(assert #f "Bad server record "leadsrv))))))))
(if (not ok)
(begin
(cleanup)
(exit)))
(if (< (- (current-seconds) (tt-last-access ttdat)) 60)
(begin
(thread-sleep! 5)
(loop)))))
(if (tt-cleanup-proc ttdat)
((tt-cleanup-proc ttdat)))
(debug:print 0 *default-log-port* "INFO: Server timed out, exiting."))
(cleanup)
(debug:print 0 *default-log-port* "INFO: Server timed out, exiting.")))
;; ;; given an already set up uconn start the cmd-loop
;; ;;
;; (define (tt:cmd-loop ttdat)
;; (let* ((serv-listener (-socket uconn))
;; (listener (lambda ()
;; (let loop ((state 'start))
;; (let-values (((inp oup)(tcp-accept serv-listener)))
|
︙ | | |
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
|
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
|
+
-
+
-
+
|
(port (tt-port ttdat))
(servinf (conc servdir"/"host":"port"-"(current-process-id)":"dbfname))
(serv-id (tt:mk-signature areapath))
(clean-proc (lambda ()
(delete-file* servinf))))
(assert (and host port) "FATAL: tt:create-server-registration-file called with no conn, dbfname="dbfname)
(tt-cleanup-proc-set! ttdat clean-proc)
(tt-servinf-file-set! ttdat servinf)
(with-output-to-file servinf
(lambda ()
(print "SERVER STARTED: "host":"port" AT "(current-seconds)" server-id: "serv-id" pid: "(current-process-id)" dbfname: "dbfname)))
serv-id))
;; find valid server
;; get servers listed, last part of name must match :<dbfname>
;; if more than one, wait one second and look again
;; future: ping oldest, if alive remove other :<dbfname> files
;;
(define (tt:find-server areapath dbfname)
(let* ((servdir (tt:get-servinfo-dir areapath))
(sfiles (glob (conc servdir"/*:"dbfname))))
sfiles))
;; given a path to a server info file return: host port startseconds server-id
;; given a path to a server info file return: host port startseconds server-id pid dbfname logf
;; example of what it's looking for in the log file:
;; SERVER STARTED: 10.38.175.67:50216 AT 1616502350.0 server-id: 4907e90fc55c7a09694e3f658c639cf4
;;
(define (tt:server-get-info logf)
(let ((server-rx (regexp "^SERVER STARTED: (\\S+):(\\d+) AT ([\\d\\.]+) server-id: (\\S+) pid: (\\d+) dbfname: (\\S+)")) ;; SERVER STARTED: host:port AT timesecs server id
(dbprep-rx (regexp "^SERVER: dbprep"))
(dbprep-found 0)
(bad-dat (list #f #f #f #f #f #f)))
(bad-dat (list #f #f #f #f #f #f logf)))
(handle-exceptions
exn
(begin
;; WARNING: this is potentially dangerous to blanket ignore the errors
(if (file-exists? logf)
(debug:print-info 2 *default-log-port* "Unable to get server info from "logf", exn=" exn))
bad-dat) ;; no idea what went wrong, call it a bad server
|
︙ | | |
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
|
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
|
-
+
+
|
(match mlst
((_ host port start server-id pid dbfname)
(list host
(string->number port)
(string->number start)
server-id
(string->number pid)
dbfname))
dbfname
logf))
(else
(debug:print 0 *default-log-port* "ERROR: did not recognise SERVER line info "mlst)
bad-dat))))
(begin
(if dbprep-found
(begin
(debug:print-info 2 *default-log-port* "Server is in dbprep at " (common:human-time))
|
︙ | | |
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
|
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
|
+
+
|
" " profile-mode
))) ;; (conc " >> " logfile " 2>&1 &")))))
;; we want the remote server to start in *toppath* so push there
;; (push-directory areapath) ;; use cd in the command line instead
(debug:print 0 *default-log-port* "INFO: Trying to start server in tcp mode (" cmdln ") at "(common:human-time)"...")
;; (debug:print 0 *default-log-port* "INFO: starting server at " (common:human-time))
(setenv "NBFAKE_QUIET" "yes") ;; BUG: change to with-environment-variable ...
(setenv "NBFAKE_LOG" logfile)
(system (conc "cd "areapath" ; nbfake " cmdln))
(unsetenv "NBFAKE_QUIET")
(unsetenv "NBFAKE_LOG")
;;(pop-directory)
))
;;======================================================================
;; tcp connection stuff
;;======================================================================
|
︙ | | |