Overview
Comment: | wip. better but now there are run-away issues in ext-tests: |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | v1.80-tcp-inmem |
Files: | files | file ages | folders |
SHA1: |
94e5d1fb4383e49101ca5f5c81c76473 |
User & Date: | matt on 2023-02-20 14:17:06 |
Other Links: | branch diff | manifest | tags |
Context
2023-02-20
| ||
20:05 | Hacky solution for no-sync db check-in: b8f2578046 user: matt tags: v1.80-tcp-inmem | |
14:17 | wip. better but now there are run-away issues in ext-tests: check-in: 94e5d1fb43 user: matt tags: v1.80-tcp-inmem | |
10:32 | wip check-in: 0ac0c8e72f user: matt tags: v1.80-tcp-inmem | |
Changes
Modified tcp-transportmod.scm from [c554113ebe] to [b2bb58190e].
︙ | ︙ | |||
95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 | (socket #f) (thread #f) (host-port #f) (cmd-thread #f) (ro-mode #f) (ro-mode-checked #f) (last-access (current-seconds)) ) (define (tt:make-remote areapath) (make-tt areapath: areapath)) ;; do all the busy work of finding and setting up conn for ;; connecting to a server ;; (define (tt:client-connect-to-server ttdat dbfname run-id testsuite) (let* ((conn (hash-table-ref/default (tt-conns ttdat) dbfname #f)) (server-start-proc (lambda () (tt:server-process-run (tt-areapath ttdat) testsuite ;; (dbfile:testsuite-name) (common:find-local-megatest) run-id)))) (if conn conn ;; we are already connected to the server | > | | > > > > | | | 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 | (socket #f) (thread #f) (host-port #f) (cmd-thread #f) (ro-mode #f) (ro-mode-checked #f) (last-access (current-seconds)) (servinf-file #f) ) (define (tt:make-remote areapath) (make-tt areapath: areapath)) ;; do all the busy work of finding and setting up conn for ;; connecting to a server ;; (define (tt:client-connect-to-server ttdat dbfname run-id testsuite) (let* ((conn (hash-table-ref/default (tt-conns ttdat) dbfname #f)) (server-start-proc (lambda () (tt:server-process-run (tt-areapath ttdat) testsuite ;; (dbfile:testsuite-name) (common:find-local-megatest) run-id)))) (if conn conn ;; we are already connected to the server (let* ((sdat (tt:get-current-server-info ttdat dbfname))) (match sdat ((host port start-time server-id pid dbfname2 servinffile) (assert (equal? dbfname dbfname2) "FATAL: read server info from wrong file.") (let* ((host-port (conc host":"port)) (conn (make-tt-conn host: host port: port host-port: host-port dbfname: dbfname servinf-file: servinffile server-id: server-id server-start: start-time pid: pid))) (hash-table-set! (tt-conns ttdat) dbfname conn) ;; verify we can talk to this server (if (tt:ping host port server-id) conn (begin ;; rm the (last server) would go here (server-start-proc) (thread-sleep! 1) (tt:client-connect-to-server ttdat dbfname run-id testsuite))))) (else (debug:print-info 0 *default-log-port* "Number of records did not match expected. Bad server info?") (server-start-proc) (thread-sleep! 1) (tt:client-connect-to-server ttdat dbfname run-id testsuite))))))) (define (tt:ping host port server-id) (let* ((res (tt:send-receive-direct host port `(ping #f #f #f)))) ;; please send me your server-id ;; ;; need two threads, one a 5 second timer ;; (match res ((status errmsg result meta) (if (equal? result server-id) (begin (debug:print 0 *default-log-port* "Ping to "host":"port" successful.") #t) ;; then we are good (begin (debug:print 0 *default-log-port* "WARNING: server-id does not match, expected: "server-id", got: "result) #f))) (else ;; (debug:print 0 *default-log-port* "res not in form (status errmsg result meta), got: "res) #f)))) ;; client side handler ;; (define (tt:handler ttdat cmd run-id params attemptnum area-dat areapath readonly-mode dbfname testsuite mtexe) ;; NOTE: areapath is passed in and in tt struct. We'll use passed in value for now. (let* ((conn (tt:client-connect-to-server ttdat dbfname run-id testsuite))) ;; (hash-table-ref/default (tt-conns ttdat) dbfname #f))) |
︙ | ︙ | |||
206 207 208 209 210 211 212 | ;; (thread-sleep! 1) ;; (tt:handler ttdat cmd run-id params attemptnum area-dat areapath ;; readonly-mode dbfname testsuite mtexe))))))) (define (tt:bid-for-servership run-id) #f) | > > > > > > > > > > > > > | > > > | 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 | ;; (thread-sleep! 1) ;; (tt:handler ttdat cmd run-id params attemptnum area-dat areapath ;; readonly-mode dbfname testsuite mtexe))))))) (define (tt:bid-for-servership run-id) #f) ;; gets server info and appends path to server file ;; sorts by age, oldest first ;; ;; returns list of (host port startseconds server-id servinfofile) ;; (define (tt:get-server-info-sorted ttdat dbfname) (let* ((areapath (tt-areapath ttdat)) (sfiles (tt:find-server areapath dbfname)) (sdats (filter car (map tt:server-get-info sfiles))) ;; first element is #f if the file disappeared while being read (sorted (sort sdats (lambda (a b) (< (list-ref a 2)(list-ref b 2)))))) sorted)) (define (tt:get-current-server-info ttdat dbfname) (assert (tt-areapath ttdat) "FATAL: areapath not set in ttdat.") ;; ;; TODO - replace most of below with tt;get-server-info-sorted ;; (let* ((areapath (tt-areapath ttdat)) (sfiles (tt:find-server areapath dbfname)) (sdats (filter car (map tt:server-get-info sfiles))) ;; first element is #f if the file disappeared while being read (sorted (sort sdats (lambda (a b) (< (list-ref a 2)(list-ref b 2)))))) (if (null? sorted) #f ;; we'll want to wait until extra servers have exited |
︙ | ︙ | |||
255 256 257 258 259 260 261 262 263 264 | ;; start the listener and start responding to requests ;; ;; NOTE: organise by dbfname, not run-id so we don't need ;; to pull in more modules ;; ;; This is the routine called in megatest.scm to start a server ;; (define (tt:start-server areapath run-id dbfname handler keys) (assert areapath "FATAL: areapath not provided for tt:start-server") ;; is there already a server for this dbfile? Then exit. | > > | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | > > > > | | | | | | | | | | | | | | | | | | > | > > > > > > > > > > > > > > > > > > > > > > > > > > > > | | | | < | | > | 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 | ;; start the listener and start responding to requests ;; ;; NOTE: organise by dbfname, not run-id so we don't need ;; to pull in more modules ;; ;; This is the routine called in megatest.scm to start a server ;; ;; Server viability is checked in keep-running. Blindly start and run here. ;; (define (tt:start-server areapath run-id dbfname handler keys) (assert areapath "FATAL: areapath not provided for tt:start-server") ;; is there already a server for this dbfile? Then exit. (let* ((ttdat (make-tt areapath: areapath))) ;; (servers (tt:find-server areapath dbfname))) ;; should use tt:get-current-server-info instead ;; (if (null? servers) (let* ((dbstruct (dbmod:open-dbmoddb areapath run-id (dbfile:db-init-proc) keys))) (tt-handler-set! ttdat (handler dbstruct)) (let* ((tcp-thread (make-thread (lambda () (tt:start-tcp-server ttdat)) ;; start the tcp-server which applies handler to incoming data "tcp-server-thread")) (run-thread (make-thread (lambda () (tt:keep-running ttdat dbfname dbstruct))))) (thread-start! tcp-thread) (thread-start! run-thread) (thread-join! run-thread) ;; run thread will exit on timeout or other conditions ;; ;; set a flag here to tell tcp-thread to stop running ;; ;; (thread-join! tcp-thread) ;; can't wait ;; ;; remove the servinfo file ;; ;; close the database, remove lock in on-disk db ;; ;; close the listener ports ;; (exit))) ;;(begin ;; (debug:print 0 *default-log-port* "INFO: found server(s) already running for db "dbfname", "(string-intersperse servers ",")" Exiting.") ;; (exit))))) )) (define (tt:keep-running ttdat dbfname dbstruct) ;; verfiy conn for ready ;; listener socket has been started by this stage (thread-sleep! 1) (let* ((cleanup (lambda () (if (tt-cleanup-proc ttdat) ((tt-cleanup-proc ttdat)))))) (let loop ((count 0)) (if (> count 60) (begin (debug:print 0 *default-log-port* "FATAL: Could not start a tcp server, giving up.") (exit 1)) (if (not (tt-port ttdat)) ;; no connection yet (let* ((last-update (dbr:dbstruct-last-update dbstruct)) (curr-secs (current-seconds))) (if (> (- curr-secs last-update) 3) ;; every 3-4 seconds (begin ((dbr:dbstruct-sync-proc dbstruct) last-update) (dbr:dbstruct-last-update-set! curr-secs))) (thread-sleep! 1) (loop (+ count 1)))))) (tt:create-server-registration-file ttdat dbfname) ;; now start watching the last-access, if it hasn't been touched ;; in over ten seconds we exit (thread-sleep! 1) (let loop () (let* ((servers (tt:get-server-info-sorted ttdat dbfname)) (ok (cond ((null? servers) #f) ;; not ok ((equal? (list-ref (car servers) 6) ;; compare the servinfofile (tt-servinf-file ttdat)) (debug:print-info 0 *default-log-port* "Keep running, I'm the top server.") #t) (else (debug:print-info 0 *default-log-port* "I'm not the lead server: "servers) (let* ((leadsrv (car servers))) (match leadsrv ((host port startseconds server-id pid dbfname servinfofile) (if (tt:ping host port server-id) #f ;; not the server, but all good, want to exit (begin ;; what to do here? remove the offending file? (debug:print-info 0 *default-log-port* "Removing apparently dead server info file: "servinfofile) (delete-file* servinfofile) #t ;; not the server but the server is not reachable ))) (else (debug:print 0 *default-log-port* "BAD SERVER RECORD: "leadsrv) (assert #f "Bad server record "leadsrv)))))))) (if (not ok) (begin (cleanup) (exit))) (if (< (- (current-seconds) (tt-last-access ttdat)) 60) (begin (thread-sleep! 5) (loop))))) (cleanup) (debug:print 0 *default-log-port* "INFO: Server timed out, exiting."))) ;; ;; given an already set up uconn start the cmd-loop ;; ;; ;; (define (tt:cmd-loop ttdat) ;; (let* ((serv-listener (-socket uconn)) ;; (listener (lambda () ;; (let loop ((state 'start)) ;; (let-values (((inp oup)(tcp-accept serv-listener))) |
︙ | ︙ | |||
378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 | (port (tt-port ttdat)) (servinf (conc servdir"/"host":"port"-"(current-process-id)":"dbfname)) (serv-id (tt:mk-signature areapath)) (clean-proc (lambda () (delete-file* servinf)))) (assert (and host port) "FATAL: tt:create-server-registration-file called with no conn, dbfname="dbfname) (tt-cleanup-proc-set! ttdat clean-proc) (with-output-to-file servinf (lambda () (print "SERVER STARTED: "host":"port" AT "(current-seconds)" server-id: "serv-id" pid: "(current-process-id)" dbfname: "dbfname))) serv-id)) ;; find valid server ;; get servers listed, last part of name must match :<dbfname> ;; if more than one, wait one second and look again ;; future: ping oldest, if alive remove other :<dbfname> files ;; (define (tt:find-server areapath dbfname) (let* ((servdir (tt:get-servinfo-dir areapath)) (sfiles (glob (conc servdir"/*:"dbfname)))) sfiles)) | > | | | 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 | (port (tt-port ttdat)) (servinf (conc servdir"/"host":"port"-"(current-process-id)":"dbfname)) (serv-id (tt:mk-signature areapath)) (clean-proc (lambda () (delete-file* servinf)))) (assert (and host port) "FATAL: tt:create-server-registration-file called with no conn, dbfname="dbfname) (tt-cleanup-proc-set! ttdat clean-proc) (tt-servinf-file-set! ttdat servinf) (with-output-to-file servinf (lambda () (print "SERVER STARTED: "host":"port" AT "(current-seconds)" server-id: "serv-id" pid: "(current-process-id)" dbfname: "dbfname))) serv-id)) ;; find valid server ;; get servers listed, last part of name must match :<dbfname> ;; if more than one, wait one second and look again ;; future: ping oldest, if alive remove other :<dbfname> files ;; (define (tt:find-server areapath dbfname) (let* ((servdir (tt:get-servinfo-dir areapath)) (sfiles (glob (conc servdir"/*:"dbfname)))) sfiles)) ;; given a path to a server info file return: host port startseconds server-id pid dbfname logf ;; example of what it's looking for in the log file: ;; SERVER STARTED: 10.38.175.67:50216 AT 1616502350.0 server-id: 4907e90fc55c7a09694e3f658c639cf4 ;; (define (tt:server-get-info logf) (let ((server-rx (regexp "^SERVER STARTED: (\\S+):(\\d+) AT ([\\d\\.]+) server-id: (\\S+) pid: (\\d+) dbfname: (\\S+)")) ;; SERVER STARTED: host:port AT timesecs server id (dbprep-rx (regexp "^SERVER: dbprep")) (dbprep-found 0) (bad-dat (list #f #f #f #f #f #f logf))) (handle-exceptions exn (begin ;; WARNING: this is potentially dangerous to blanket ignore the errors (if (file-exists? logf) (debug:print-info 2 *default-log-port* "Unable to get server info from "logf", exn=" exn)) bad-dat) ;; no idea what went wrong, call it a bad server |
︙ | ︙ | |||
431 432 433 434 435 436 437 | (match mlst ((_ host port start server-id pid dbfname) (list host (string->number port) (string->number start) server-id (string->number pid) | | > | 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 | (match mlst ((_ host port start server-id pid dbfname) (list host (string->number port) (string->number start) server-id (string->number pid) dbfname logf)) (else (debug:print 0 *default-log-port* "ERROR: did not recognise SERVER line info "mlst) bad-dat)))) (begin (if dbprep-found (begin (debug:print-info 2 *default-log-port* "Server is in dbprep at " (common:human-time)) |
︙ | ︙ | |||
466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 | " " profile-mode ))) ;; (conc " >> " logfile " 2>&1 &"))))) ;; we want the remote server to start in *toppath* so push there ;; (push-directory areapath) ;; use cd in the command line instead (debug:print 0 *default-log-port* "INFO: Trying to start server in tcp mode (" cmdln ") at "(common:human-time)"...") ;; (debug:print 0 *default-log-port* "INFO: starting server at " (common:human-time)) (setenv "NBFAKE_QUIET" "yes") ;; BUG: change to with-environment-variable ... (system (conc "cd "areapath" ; nbfake " cmdln)) (unsetenv "NBFAKE_QUIET") ;;(pop-directory) )) ;;====================================================================== ;; tcp connection stuff ;;====================================================================== | > > | 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 | " " profile-mode ))) ;; (conc " >> " logfile " 2>&1 &"))))) ;; we want the remote server to start in *toppath* so push there ;; (push-directory areapath) ;; use cd in the command line instead (debug:print 0 *default-log-port* "INFO: Trying to start server in tcp mode (" cmdln ") at "(common:human-time)"...") ;; (debug:print 0 *default-log-port* "INFO: starting server at " (common:human-time)) (setenv "NBFAKE_QUIET" "yes") ;; BUG: change to with-environment-variable ... (setenv "NBFAKE_LOG" logfile) (system (conc "cd "areapath" ; nbfake " cmdln)) (unsetenv "NBFAKE_QUIET") (unsetenv "NBFAKE_LOG") ;;(pop-directory) )) ;;====================================================================== ;; tcp connection stuff ;;====================================================================== |
︙ | ︙ |