Overview
Comment: | Improve some of the logic around servers |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | v1.80 |
Files: | files | file ages | folders |
SHA1: |
87813c75d277c0130b42616fd865e1a0 |
User & Date: | mrwellan on 2023-03-02 21:32:23 |
Other Links: | branch diff | manifest | tags |
Context
2023-03-03
| ||
02:16 | Speculative fix for wrong server access issue check-in: 0f348aa84b user: mrwellan tags: v1.80 | |
2023-03-02
| ||
21:32 | Improve some of the logic around servers check-in: 87813c75d2 user: mrwellan tags: v1.80 | |
19:19 | Better message check-in: c3879943f0 user: mrwellan tags: v1.80 | |
Changes
Modified tcp-transportmod.scm from [99ffeec5af] to [6b42c5c5cc].
︙ | ︙ | |||
215 216 217 218 219 220 221 | (port (tt-conn-port conn)) ;; (dbfname (tt-conn-port conn)) ;; 192.168.0.127:4242-726924:4.db (pid (tt-conn-pid conn)) (servinf (conc areapath"/.servinfo/"host":"port"-"pid":"dbfname))) (hash-table-set! (tt-conns ttdat) dbfname #f) (if (file-exists? servinf) (begin | | > > > > | | 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 | (port (tt-conn-port conn)) ;; (dbfname (tt-conn-port conn)) ;; 192.168.0.127:4242-726924:4.db (pid (tt-conn-pid conn)) (servinf (conc areapath"/.servinfo/"host":"port"-"pid":"dbfname))) (hash-table-set! (tt-conns ttdat) dbfname #f) (if (file-exists? servinf) (begin (debug:print 0 *default-log-port* "INFO: no ping response from server "host":"port" for "dbfname) (if (and (file-exists? servinf) (> (- (current-seconds)(file-modification-time servinf)) 60)) (begin (debug:print 0 *default-log-port* "INFO: "servinf" file seems old and no ping response, removing it.") (delete-file* servinf)))) (debug:print 0 *default-log-port* "INFO: connection to server "host":"port" broken for "dbfname", but do not see servinf file "servinf)) (tt:handler ttdat cmd run-id params (+ attemptnum 1) area-dat areapath readonly-mode dbfname testsuite mtexe)) (assert #f "FATAL: tt:handler received bad data "res))))) (begin (thread-sleep! 1) ;; give it a rest and try again (tt:handler ttdat cmd run-id params attemptnum area-dat areapath readonly-mode dbfname testsuite mtexe))))) |
︙ | ︙ | |||
370 371 372 373 374 375 376 | (thread-sleep! 0.05) ;; any real need for delay here? (let loop () (let* ((servers (tt:get-server-info-sorted ttdat dbfname)) (ok (cond ((null? servers) #f) ;; not ok ((equal? (list-ref (car servers) 6) ;; compare the servinfofile (tt-servinf-file ttdat)) | | > | > > | | 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 | (thread-sleep! 0.05) ;; any real need for delay here? (let loop () (let* ((servers (tt:get-server-info-sorted ttdat dbfname)) (ok (cond ((null? servers) #f) ;; not ok ((equal? (list-ref (car servers) 6) ;; compare the servinfofile (tt-servinf-file ttdat)) (debug:print-info 0 *default-log-port* "Keep running, I'm the top server on "(tt-host ttdat)":"(tt-port ttdat)) (if db-locked-in #t (let* ((lockinfo (dbfile:with-no-sync-db nosyncdbpath (lambda (db) (db:no-sync-get-lock db dbfname)))) (success (car lockinfo))) (if success (begin (debug:print 0 *default-log-port* "Got server lock for "dbfname) (set! db-locked-in #t) #t) (begin (debug:print 0 *default-log-port* "Failed to get server lock for "dbfname) #f))))) (else (debug:print-info 0 *default-log-port* "I'm not the lead server: "servers) (let* ((leadsrv (car servers))) (match leadsrv ((host port startseconds server-id pid dbfname servinfofile) (if (tt:ping host port server-id) #f ;; not the server, but all good, want to exit (if (and (file-exists? servinfofile) (> (- (current-seconds)(file-modification-time servinfofile)) 15)) (begin ;; can't ping and file has been on disk 5 seconds, go ahead and try to remove it (debug:print-info 0 *default-log-port* "Removing apparently dead server info file: "servinfofile) (delete-file* servinfofile) #t) ;; not the server but the server is not reachable (begin (debug:print 0 *default-log-port* "I'm not the server but will try again since "servinfofile" is fresh") #t)))) (else ;; should never get here (debug:print 0 *default-log-port* "BAD SERVER RECORD: "leadsrv) (assert #f "Bad server record "leadsrv)))))))) (if ok ;; (if (> *api-process-request-count* 0) ;; have requests in flight ;; (tt-last-access-set! ttdat (current-seconds))) (tt-last-access-set! ttdat *db-last-access*) ;; bit silly, just use db-last-access |
︙ | ︙ | |||
614 615 616 617 618 619 620 | ;; if there is already a serv-listener return the udata ;; (define (setup-listener uconn #!optional (port 4242)) (assert (tt? uconn) "FATAL: setup-listener called with wrong struct "uconn) (handle-exceptions exn (if (< port 65535) | > > | | 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 | ;; if there is already a serv-listener return the udata ;; (define (setup-listener uconn #!optional (port 4242)) (assert (tt? uconn) "FATAL: setup-listener called with wrong struct "uconn) (handle-exceptions exn (if (< port 65535) (begin (thread-sleep! 0.25) (setup-listener uconn (+ port 1))) #f) (connect-listener uconn port))) (define (connect-listener uconn port) ;; (tcp-listener-socket LISTENER)(socket-name so) ;; sockaddr-address, sockaddr-port, sockaddr->string (let* ((tlsn (tcp-listen port 1000 #f)) ;; (tcp-listen TCPPORT [BACKLOG [HOST]]) |
︙ | ︙ |