Overview
Comment: | No more starting servers for not main.db |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | v1.80-revolution |
Files: | files | file ages | folders |
SHA1: |
0710176e691632fa3a64ae786e907aa7 |
User & Date: | matt on 2023-11-12 18:51:56 |
Other Links: | branch diff | manifest | tags |
Context
2023-11-13
| ||
14:55 | Trimmed and cleaned up some rmt and tcp-transport code in prep for rip up and retry check-in: aa4d6364a1 user: mrwellan tags: v1.80-revolution | |
2023-11-12
| ||
18:51 | No more starting servers for not main.db check-in: 0710176e69 user: matt tags: v1.80-revolution | |
2023-11-11
| ||
18:35 | Cherrypicked cddccb0, 4310, (kinda) passes list-runs check-in: 0e0e9f5e2d user: matt tags: v1.80-revolution | |
Changes
Modified tcp-transportmod.scm from [f3861613c7] to [f8e0cc5988].
︙ | ︙ | |||
133 134 135 136 137 138 139 140 141 142 143 | ;; connecting to a server ;; (define (tt:client-connect-to-server ttdat dbfname run-id testsuite) (assert (tt:valid-run-id run-id dbfname) "FATAL: invalid run-id "run-id) (debug:print-info 2 *default-log-port* "tt:client-connect-to-server " dbfname " " run-id) (let* ((conn (hash-table-ref/default (tt-conns ttdat) dbfname #f)) (server-start-proc (lambda () (tt:server-process-run (tt-areapath ttdat) testsuite ;; (dbfile:testsuite-name) (common:find-local-megatest) | > > > | > | 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 | ;; connecting to a server ;; (define (tt:client-connect-to-server ttdat dbfname run-id testsuite) (assert (tt:valid-run-id run-id dbfname) "FATAL: invalid run-id "run-id) (debug:print-info 2 *default-log-port* "tt:client-connect-to-server " dbfname " " run-id) (let* ((conn (hash-table-ref/default (tt-conns ttdat) dbfname #f)) (server-start-proc (lambda () (print "dbfname: "dbfname) (assert (equal? dbfname "main.db") ;; only main.db is started here "FATAL: called server-start-proc for db other than main.db") (tt:server-process-run (tt-areapath ttdat) testsuite ;; (dbfile:testsuite-name) (common:find-local-megatest) run-id)) )) (if conn (begin (debug:print-info 2 *default-log-port* "already connected to a server") conn) ;; we are already connected to the server (let* ((sdat (tt:get-current-server-info ttdat dbfname))) (match sdat ((host port start-time server-id pid dbfname2 servinffile) |
︙ | ︙ | |||
714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 | ;; get servers listed, last part of name must match :<dbfname> ;; if more than one, wait one second and look again ;; future: ping oldest, if alive remove other :<dbfname> files ;; (define (tt:find-server areapath dbfname) (let* ((servdir (tt:get-servinfo-dir areapath)) (sfiles (glob (conc servdir"/*:"dbfname)))) sfiles)) ;; given a path to a server info file return: host port startseconds server-id pid dbfname logf ;; example of what it's looking for in the log file: ;; SERVER STARTED: 10.38.175.67:50216 AT 1616502350.0 server-id: 4907e90fc55c7a09694e3f658c639cf4 ;; (define (tt:server-get-info logf) (let ((server-rx (regexp "^SERVER STARTED: (\\S+):(\\d+) AT ([\\d\\.]+) server-id: (\\S+) pid: (\\d+) dbfname: (\\S+)")) ;; SERVER STARTED: host:port AT timesecs server id (dbprep-rx (regexp "^SERVER: dbprep")) (dbprep-found 0) (bad-dat (list #f #f #f #f #f #f logf))) (let ((fdat (handle-exceptions exn (begin | > > > > > | | > | 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 | ;; get servers listed, last part of name must match :<dbfname> ;; if more than one, wait one second and look again ;; future: ping oldest, if alive remove other :<dbfname> files ;; (define (tt:find-server areapath dbfname) (let* ((servdir (tt:get-servinfo-dir areapath)) (sfiles (glob (conc servdir"/*:"dbfname)))) ;; filter the files here by looking in processes table (if we are not main.db) ;; and or look at the time stamp on the servinfo file, a running server will ;; touch the file every minute (again, this will only apply for main.db) sfiles)) ;; given a path to a server info file return: host port startseconds server-id pid dbfname logf ;; example of what it's looking for in the log file: ;; SERVER STARTED: 10.38.175.67:50216 AT 1616502350.0 server-id: 4907e90fc55c7a09694e3f658c639cf4 ;; (define (tt:server-get-info logf) (let ((server-rx (regexp "^SERVER STARTED: (\\S+):(\\d+) AT ([\\d\\.]+) server-id: (\\S+) pid: (\\d+) dbfname: (\\S+)")) ;; SERVER STARTED: host:port AT timesecs server id (dbprep-rx (regexp "^SERVER: dbprep")) (dbprep-found 0) (bad-dat (list #f #f #f #f #f #f logf))) (let ((fdat (handle-exceptions exn (begin ;; BUG, TODO: add err checking, for now blanket ignore the errors? (debug:print-info 0 *default-log-port* "Unable to get server info from "logf ", exn="(condition->list exn)) '()) ;; no idea what went wrong, call it a bad server, return empty list (with-input-from-file logf read-lines)))) (if (null? fdat) ;; bad data, return bad-dat bad-dat (let loop ((inl (car fdat)) (tail (cdr fdat)) (lnum 0)) |
︙ | ︙ | |||
771 772 773 774 775 776 777 | (define (tt:server-process-run areapath testsuite mtexe run-id #!key (profile-mode "")) ;; areapath is *toppath* for a given testsuite area (assert areapath "FATAL: tt:server-process-run called without areapath defined.") (assert testsuite "FATAL: tt:server-process-run called without testsuite defined.") (assert mtexe "FATAL: tt:server-process-run called without mtexe defined.") ;; mtest -server - -m testsuite:ext-tests -db 6.db (let* ((dbfname (dbmod:run-id->dbfname run-id)) (load (get-normalized-cpu-load)) | | > | | > | > | | > | 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 | (define (tt:server-process-run areapath testsuite mtexe run-id #!key (profile-mode "")) ;; areapath is *toppath* for a given testsuite area (assert areapath "FATAL: tt:server-process-run called without areapath defined.") (assert testsuite "FATAL: tt:server-process-run called without testsuite defined.") (assert mtexe "FATAL: tt:server-process-run called without mtexe defined.") ;; mtest -server - -m testsuite:ext-tests -db 6.db (let* ((dbfname (dbmod:run-id->dbfname run-id)) (load (get-normalized-cpu-load)) (srvrs (tt:find-server areapath dbfname)) (trying (length srvrs)) (nrun (number-of-processes-running (conc "mtest.*server.*"testsuite".*"dbfname)))) (cond ((> load 2.0) (debug:print 0 *default-log-port* "Normalized load "load" on " (get-host-name) " is over the limit of 2.0. Not starting a server. Please reduce the load on "(get-host-name)" by killing some processes") (thread-sleep! 1) #f) ((> nrun 100) (debug:print 0 *default-log-port* nrun" servers running on " (get-host-name) ", not starting another.") (thread-sleep! 1) #f) ((> trying 2) (debug:print 0 *default-log-port* trying" servers registered in .servinfo dir. not starting another.") (thread-sleep! 1) #f) (else (if (not (file-exists? (conc areapath"/logs"))) (create-directory (conc areapath"/logs") #t)) (let* ((logfile (conc areapath "/logs/server-"dbfname"-"(current-process-id)".log")) ;; -" curr-pid "-" target-host ".log")) (cmdln (conc mtexe " -startdir "areapath |
︙ | ︙ | |||
809 810 811 812 813 814 815 | ;; (setenv "NBFAKE_QUIET" "yes") ;; BUG: change to with-environment-variable ... ;; (setenv "NBFAKE_LOG" logfile) ;; (system (conc "cd "areapath" ; nbfake " cmdln)) ;; (unsetenv "NBFAKE_QUIET") ;; (unsetenv "NBFAKE_LOG") ;;(pop-directory) | | | 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 | ;; (setenv "NBFAKE_QUIET" "yes") ;; BUG: change to with-environment-variable ... ;; (setenv "NBFAKE_LOG" logfile) ;; (system (conc "cd "areapath" ; nbfake " cmdln)) ;; (unsetenv "NBFAKE_QUIET") ;; (unsetenv "NBFAKE_LOG") ;;(pop-directory) #t))))) ;;====================================================================== ;; tcp connection stuff ;;====================================================================== ;; find a port and start tcp-server. This only starts the tcp portion of ;; the server, look at (tt:start-server ...) above for the entry point |
︙ | ︙ |