Overview
Comment: | Start on increasing allowed number of servers for a db file |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | v1.81-multi-server |
Files: | files | file ages | folders |
SHA1: |
66e4d2383a4e8dd4687c951e30d7d2a8 |
User & Date: | mrwellan on 2024-07-02 13:55:35 |
Other Links: | branch diff | manifest | tags |
Context
2024-07-03
| ||
19:06 | Multi-servers working. Needs polish but the machine didn't overload on > 10 parallel runs of sixtyfivek check-in: 16d75bb8f1 user: matt tags: v1.81-multi-server | |
2024-07-02
| ||
13:55 | Start on increasing allowed number of servers for a db file check-in: 66e4d2383a user: mrwellan tags: v1.81-multi-server | |
2024-07-01
| ||
17:27 | Changed version to v1.8101 check-in: 6b8bacba76 user: mmgraham tags: v1.81, v1.8101 | |
Changes
Modified tcp-transportmod.scm from [2d80f8e52f] to [f5588f1f60].
︙ | ︙ | |||
301 302 303 304 305 306 307 | (else ;; (debug:print 0 *default-log-port* "res not in form (status errmsg result meta), got: "res) (try-again))))) ;; client side handler ;; ;;(tt:handler #<tt> get-keys #f () 2 #f "/home/matt/data/megatest/ext-tests" #f "main.db" "ext-tests" "/home/matt/data/megatest/bin/.22.04/../megatest") | | | 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 | (else ;; (debug:print 0 *default-log-port* "res not in form (status errmsg result meta), got: "res) (try-again))))) ;; client side handler ;; ;;(tt:handler #<tt> get-keys #f () 2 #f "/home/matt/data/megatest/ext-tests" #f "main.db" "ext-tests" "/home/matt/data/megatest/bin/.22.04/../megatest") ;;g (define (tt:handler ttdat cmd run-id params attemptnum readonly-mode dbfname testsuite mtexe server-start-proc) ;; connect-to-server will start a server if needed. (let* ((areapath (tt-areapath ttdat)) (conn (tt:client-connect-to-server ttdat dbfname run-id testsuite server-start-proc))) ;; looks up conn keyed by dbfname (if conn ;; have connection, call the server (let* ((res (tt:send-receive ttdat conn cmd run-id params))) |
︙ | ︙ | |||
592 593 594 595 596 597 598 | #f (caar good-srvrs)))) ;; by here we have a trustworthy list of servers and we have removed the .servinfo file for any unresponsive servers ;; and the list is in good-srvrs (cond ((not home-host) ;; no servers yet, go ahead and start (debug:print-info 0 *default-log-port* "No servers yet, starting on "(get-host-name))) | | | 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 | #f (caar good-srvrs)))) ;; by here we have a trustworthy list of servers and we have removed the .servinfo file for any unresponsive servers ;; and the list is in good-srvrs (cond ((not home-host) ;; no servers yet, go ahead and start (debug:print-info 0 *default-log-port* "No servers yet, starting on "(get-host-name))) ((> (length good-srvrs) 3) ;; don't need more, just exit (debug:print-info 0 *default-log-port* "Have "(length good-srvrs)", no need for more, exiting.") (exit)) ((not (equal? home-host (get-host-name))) ;; there is a home-host and we are not on it (debug:print-info 0 *default-log-port* "Prime main server is on host "home-host", but we are on host "(get-host-name)", exiting.") (exit)) (else (debug:print-info 0 *default-log-port* "Starting on host "(get-host-name)", along with "(length good-srvrs)" other servers."))) |
︙ | ︙ | |||
620 621 622 623 624 625 626 627 628 629 630 631 632 633 | (thread-sleep! 0.25) (loop (+ count 1))) (begin (debug:print 0 *default-log-port* "ERROR: (tt-port ttdat) no port set! Exiting.") (exit))))) ;; create a servinfo file start keep-running (debug:print 0 *default-log-port* "Creating servinfo file for " dbfname) (tt:create-server-registration-file ttdat dbfname) (procinf-status-set! *procinf* "running") (tt-state-set! ttdat 'running) (dbfile:with-no-sync-db nosyncdbpath (lambda (nsdb) | > > | 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 | (thread-sleep! 0.25) (loop (+ count 1))) (begin (debug:print 0 *default-log-port* "ERROR: (tt-port ttdat) no port set! Exiting.") (exit))))) ;; create a servinfo file start keep-running ;; On WSL there seems to be a race condition where the .servinfo file ;; is not created fast enough (debug:print 0 *default-log-port* "Creating servinfo file for " dbfname) (tt:create-server-registration-file ttdat dbfname) (procinf-status-set! *procinf* "running") (tt-state-set! ttdat 'running) (dbfile:with-no-sync-db nosyncdbpath (lambda (nsdb) |
︙ | ︙ | |||
662 663 664 665 666 667 668 669 670 671 672 673 674 | #f (caar servers))) (my-index (list-index (lambda (x) (equal? (list-ref x 6) (tt-servinf-file ttdat))) servers)) (ok (cond ((not *server-run*) (debug:print 0 *default-log-port* "WARNING: received a stop server from client by remote request.") #f) ((null? servers) (debug:print 0 *default-log-port* "WARNING: no servinfo files found, this cannot be.") #f) ;; not ok | > > > | | | 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 | #f (caar servers))) (my-index (list-index (lambda (x) (equal? (list-ref x 6) (tt-servinf-file ttdat))) servers)) (ok (cond ((not my-index) (debug:print 0 *default-log-port* "WARNING: Apparently I don't exist.") 'not-yet) ;; keep trying ? ((not *server-run*) (debug:print 0 *default-log-port* "WARNING: received a stop server from client by remote request.") #f) ((null? servers) (debug:print 0 *default-log-port* "WARNING: no servinfo files found, this cannot be.") #f) ;; not ok ((> my-index 3) (debug:print 0 *default-log-port* "WARNING: there are more than three servers ahead of me, I'm not needed, exiting.") #f) ;; not ok to not be in first three ((eq? (tt-state ttdat) 'running) #t) ;; we are good to keep going ((> (- (current-seconds) start-time) 30) (debug:print 0 *default-log-port* "WARNING: over 30 seconds and not yet in runnning mode. Exiting.") #f) (else #t)))) (if ok |
︙ | ︙ | |||
726 727 728 729 730 731 732 | (servinf (conc servdir"/"host":"port"-"(current-process-id)":"dbfname)) (serv-id (tt:mk-signature areapath))) (assert (and host port) "FATAL: tt:create-server-registration-file called with no conn, dbfname="dbfname) (tt-servinf-file-set! ttdat servinf) (with-output-to-file servinf (lambda () (print "SERVER STARTED: "host":"port" AT "(current-seconds)" server-id: "serv-id" pid: "(current-process-id)" dbfname: "dbfname))) | > > > > > > > | | 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 | (servinf (conc servdir"/"host":"port"-"(current-process-id)":"dbfname)) (serv-id (tt:mk-signature areapath))) (assert (and host port) "FATAL: tt:create-server-registration-file called with no conn, dbfname="dbfname) (tt-servinf-file-set! ttdat servinf) (with-output-to-file servinf (lambda () (print "SERVER STARTED: "host":"port" AT "(current-seconds)" server-id: "serv-id" pid: "(current-process-id)" dbfname: "dbfname))) (let loop ((count 0)) (if (not (file-exists? servinf)) (begin (debug:print 0 *default-log-port* "WARNING: file "servinf" was created but it doesn't show up on disk! We'll try again.") (thread-sleep! 1) (if (< count 10) (loop (+ count 1)))))) serv-id)) ;; find valid server ;; get servers listed, last part of name must match :<dbfname> ;; if more than one, wait one second and look again ;; future: ping oldest, if alive remove other :<dbfname> files ;; (define (tt:find-server areapath dbfname) |
︙ | ︙ | |||
827 828 829 830 831 832 833 | (debug:print 0 *default-log-port* "Normalized load "load" on " (get-host-name) " is over the limit of 2.0. Not starting a server. Please reduce the load on "(get-host-name)" by killing some processes") (thread-sleep! 1) #f) ((> nrun 100) (debug:print 0 *default-log-port* nrun" servers running on " (get-host-name) ", not starting another.") (thread-sleep! 1) #f) | | | 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 | (debug:print 0 *default-log-port* "Normalized load "load" on " (get-host-name) " is over the limit of 2.0. Not starting a server. Please reduce the load on "(get-host-name)" by killing some processes") (thread-sleep! 1) #f) ((> nrun 100) (debug:print 0 *default-log-port* nrun" servers running on " (get-host-name) ", not starting another.") (thread-sleep! 1) #f) ((> trying 3) (debug:print 0 *default-log-port* trying" servers registered in .servinfo dir. not starting another.") (thread-sleep! 1) #f) (else (if (not (file-exists? (conc areapath"/logs"))) (create-directory (conc areapath"/logs") #t)) (let* ((logfile (conc areapath "/logs/server-"dbfname"-"(current-process-id)".log")) ;; -" curr-pid "-" target-host ".log")) |
︙ | ︙ |