Changes In Branch v1.80 Through [8e5977eca9] Excluding Merge-Ins
This is equivalent to a diff from 60b31fb56a to 8e5977eca9
2024-05-07
| ||
16:39 | Cherry picked postcmd and precmd for tests check-in: 40cce970c0 user: mrwellan tags: v1.80 | |
2023-11-09
| ||
14:17 | Pulled the todo back for creating the evolution branch. Leaf check-in: 0c56685866 user: mrwellan tags: v1.80-evolution | |
2023-11-06
| ||
12:29 | Specialized server - revolution Closed-Leaf check-in: a2bab764c1 user: mrwellan tags: v1.80-revolution | |
03:22 | Start all servers (rebased) Leaf check-in: 8a1f055698 user: matt tags: v1.80-start-all | |
2023-11-01
| ||
14:26 | Changed megatest version to 1.8019 check-in: 8e5977eca9 user: icfadm tags: v1.80, v1.8019 | |
2023-10-31
| ||
12:22 | Delete .servinfo files for servers that are no longer running check-in: 1993359947 user: mmgraham tags: v1.80 | |
2023-10-20
| ||
05:12 |
Merged fork
This node ran run-core-tests.sh through kill-rerun - a pretty good result. Dashboard comes up quickly also. check-in: e607892c7d user: mrwellan tags: v1.80 | |
04:57 | Merged fork check-in: 53900a0d02 user: mrwellan tags: v1.80-start-all | |
2023-10-19
| ||
18:55 | changed a debug msg to level2, increased delay from 0.5 to 2 secs Leaf check-in: 60b31fb56a user: mmgraham tags: v1.80-processes | |
16:09 | moved make-tmpdir-name into commonmod check-in: 1624c400a9 user: mmgraham tags: v1.80-processes | |
Modified api.scm from [5fa313076b] to [13a08c65d1].
︙ | ︙ | |||
369 370 371 372 373 374 375 | ((dec-var) (apply db:dec-var dbstruct params)) ((del-var) (apply db:del-var dbstruct params)) ((add-var) (apply db:add-var dbstruct params)) ((insert-run) (apply db:insert-run dbstruct params)) ;; STEPS | > | > | 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 | ((dec-var) (apply db:dec-var dbstruct params)) ((del-var) (apply db:del-var dbstruct params)) ((add-var) (apply db:add-var dbstruct params)) ((insert-run) (apply db:insert-run dbstruct params)) ;; STEPS ((teststep-set-status!) ;; (apply db:teststep-set-status! dbstruct params)) (db:add-cached-write dbstruct db:teststep-set-status! run-id params)) ((delete-steps-for-test!) (apply db:delete-steps-for-test! dbstruct params)) ;; TEST DATA ((test-data-rollup) (apply db:test-data-rollup dbstruct params)) ((csv->test-data) (apply db:csv->test-data dbstruct params)) ;; MISC |
︙ | ︙ |
Modified dashboard.scm from [92015a98e3] to [2fba186660].
︙ | ︙ | |||
3110 3111 3112 3113 3114 3115 3116 | exn (begin (debug:print 2 *default-log-port* "WARNING: error in accessing databases in get-youngest-run-db-mod-time: " ((condition-property-accessor 'exn 'message) exn) " db-dir="dbdir ", exn=" exn) (current-seconds)) ;; something went wrong - just print an error and return current-seconds (common:max (map (lambda (filen) (file-modification-time filen)) | | | 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 | exn (begin (debug:print 2 *default-log-port* "WARNING: error in accessing databases in get-youngest-run-db-mod-time: " ((condition-property-accessor 'exn 'message) exn) " db-dir="dbdir ", exn=" exn) (current-seconds)) ;; something went wrong - just print an error and return current-seconds (common:max (map (lambda (filen) (file-modification-time filen)) (cons (conc dbdir "/main.db") (glob (conc dbdir "/?.db"))))))) (define (dashboard:monitor-changed? commondat tabdat) (let* ((run-update-time (current-seconds)) (monitor-db-path (dboard:tabdat-monitor-db-path tabdat)) (monitor-modtime (if (and monitor-db-path (common:file-exists? monitor-db-path)) (file-modification-time monitor-db-path) -1))) |
︙ | ︙ |
Modified db.scm from [b1837f1312] to [0a367c507f].
︙ | ︙ | |||
2737 2738 2739 2740 2741 2742 2743 | (sqlite3:execute db "INSERT OR REPLACE into test_steps (test_id,stepname,state,status,event_time,comment,logfile) VALUES(?,?,?,?,?,?,?);" test-id teststep-name state-in status-in (current-seconds) (if comment comment "") (if logfile logfile ""))))) | < < | 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 | (sqlite3:execute db "INSERT OR REPLACE into test_steps (test_id,stepname,state,status,event_time,comment,logfile) VALUES(?,?,?,?,?,?,?);" test-id teststep-name state-in status-in (current-seconds) (if comment comment "") (if logfile logfile ""))))) (define (db:delete-steps-for-test! dbstruct run-id test-id) ;; TODO: figure out why status is the key field rather than state (note: CONSTRAINT test_steps_constraint UNIQUE (test_id,stepname,state) ) (db:with-db dbstruct run-id #t (lambda (dbdat db) |
︙ | ︙ | |||
4245 4246 4247 4248 4249 4250 4251 4252 4253 4254 4255 4256 4257 4258 | (begin (debug:print 0 *default-log-port* "WARNING: path given, " outputfile " is relative, prefixing with current directory") (conc (current-directory) "/" outputfile))) results) ;; brutal clean up (dbfile:add-dbdat dbstruct #f dbdat) (system "rm -rf tempdir"))) ;; (db:extract-ods-file db "outputfile.ods" '(("sysname" "%")("fsname" "%")("datapath" "%")) "%") ;;====================================================================== ;; moving watch dogs here due to dependencies ;;====================================================================== | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 4243 4244 4245 4246 4247 4248 4249 4250 4251 4252 4253 4254 4255 4256 4257 4258 4259 4260 4261 4262 4263 4264 4265 4266 4267 4268 4269 4270 4271 4272 4273 4274 4275 4276 4277 4278 4279 4280 4281 4282 4283 4284 4285 4286 4287 4288 4289 4290 4291 4292 4293 4294 4295 4296 4297 4298 4299 4300 4301 4302 | (begin (debug:print 0 *default-log-port* "WARNING: path given, " outputfile " is relative, prefixing with current directory") (conc (current-directory) "/" outputfile))) results) ;; brutal clean up (dbfile:add-dbdat dbstruct #f dbdat) (system "rm -rf tempdir"))) ;;====================================================================== ;; cached writes stuff ;;====================================================================== (define (db:add-cached-write dbstruct proc run-id params) (debug:print 0 *default-log-port* "Adding cached write for run-id "run-id" params " params) (mutex-lock! *cached-writes-mutex*) (let* ((hkey (cons dbstruct run-id)) (cached-writes-queue (hash-table-ref/default *cached-writes-queues* hkey '()))) (hash-table-set! *cached-writes-queues* hkey (cons (list proc params) cached-writes-queue))) (if (not *cached-writes-flag*) (begin (set! *cached-writes-flag* #t) (thread-start! (make-thread (lambda () (debug:print 0 *default-log-port* "process cached writes thread started.") (thread-sleep! 1) (db:process-cached-writes-queue)))))) (mutex-unlock! *cached-writes-mutex*)) (define (db:process-cached-writes-queue) (mutex-lock! *cached-writes-mutex*) (hash-table-for-each *cached-writes-queues* (lambda (hkey writes-list) (let* ((dbstruct (car hkey)) (run-id (cdr hkey))) (debug:print 0 *default-log-port* "Processing "(length writes-list)" cached writes for run "run-id) (db:with-db dbstruct run-id #t (lambda (dbdat db) (sqlite3:with-transaction db (lambda () (for-each (lambda (queued-write) (match queued-write ((proc params)(apply proc dbstruct params)) (else (assert #f "BAD queued-write")))) writes-list))) (hash-table-delete! *cached-writes-queues* hkey)))))) (set! *cached-writes-flag* #f) (mutex-unlock! *cached-writes-mutex*)) ;; (db:extract-ods-file db "outputfile.ods" '(("sysname" "%")("fsname" "%")("datapath" "%")) "%") ;;====================================================================== ;; moving watch dogs here due to dependencies ;;====================================================================== |
︙ | ︙ |
Modified dbfile.scm from [b5eea0764a] to [5eebf6582a].
︙ | ︙ | |||
1577 1578 1579 1580 1581 1582 1583 1584 1585 | (result (or stmth (let* ((newstmth (sqlite3:prepare db stmt))) ;; (db:hoh-set! stmt-cache db stmt newstmth) (hash-table-set! stmt-cache stmt newstmth) newstmth)))) (mutex-unlock! *get-cache-stmth-mutex*) result)) ) | > > > > > > > > > > | 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 | (result (or stmth (let* ((newstmth (sqlite3:prepare db stmt))) ;; (db:hoh-set! stmt-cache db stmt newstmth) (hash-table-set! stmt-cache stmt newstmth) newstmth)))) (mutex-unlock! *get-cache-stmth-mutex*) result)) ;;====================================================================== ;; cached writes - run list of procs inside transaction ;; NOTE: this only works because we have once database per process ;;====================================================================== (define *cached-writes-mutex* (make-mutex)) (define *cached-writes-flag* #f) (define *cached-writes-queues* (make-hash-table)) ;; dbstruct->list of writes ) |
Modified dbmod.scm from [9f0ce614a3] to [00037bb607].
︙ | ︙ | |||
117 118 119 120 121 122 123 | (debug:print-info 0 *default-log-port* "dbmod:with-db, database is busy, will try "count" more times.") (thread-sleep! 1) (loop (- count 1))) (begin (debug:print-info 0 *default-log-port* "dbmod:with-db, database is busy, giving up.") (exit 1)))) (exn () | | > | > | 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 | (debug:print-info 0 *default-log-port* "dbmod:with-db, database is busy, will try "count" more times.") (thread-sleep! 1) (loop (- count 1))) (begin (debug:print-info 0 *default-log-port* "dbmod:with-db, database is busy, giving up.") (exit 1)))) (exn () (dbfile:print-err exn "ERROR: Unknown error with db for run-id " run-id", message: " ((condition-property-accessor 'exn 'message) exn) ", details: "(condition->list exn)) (exit 2)))))) (if use-mutex (mutex-unlock! *db-with-db-mutex*)) res))) (define (db:with-db dbstruct run-id w/r proc . params) (dbmod:with-db dbstruct run-id w/r proc params)) |
︙ | ︙ |
Modified megatest-version.scm from [5a374d2bf1] to [93d0d09127].
︙ | ︙ | |||
16 17 18 19 20 21 22 | ;; along with Megatest. If not, see <http://www.gnu.org/licenses/>. ;; Always use two or four digit decimal ;; 1.01, 1.02...1.10,1.11,1.1101 ... 1.99,2.00.. ;; (declare (unit megatest-version)) | | | 16 17 18 19 20 21 22 23 | ;; along with Megatest. If not, see <http://www.gnu.org/licenses/>. ;; Always use two or four digit decimal ;; 1.01, 1.02...1.10,1.11,1.1101 ... 1.99,2.00.. ;; (declare (unit megatest-version)) (define megatest-version 1.8019) |
Modified megatest.scm from [af8974dd23] to [b8a61f70e3].
︙ | ︙ | |||
997 998 999 1000 1001 1002 1003 | (ttdat (make-tt areapath: *toppath*)) ) (format #t fmtstr "DB" "host:port" "PID" "age" "last mod" "state") (for-each (lambda (dbfile) (let* ( (dbfname (conc (pathname-file dbfile) ".db")) | < < < | 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 | (ttdat (make-tt areapath: *toppath*)) ) (format #t fmtstr "DB" "host:port" "PID" "age" "last mod" "state") (for-each (lambda (dbfile) (let* ( (dbfname (conc (pathname-file dbfile) ".db")) ) (let ( (sinfos (tt:get-server-info-sorted ttdat dbfname)) ) (for-each (lambda (sinfo) (let* ( (db (list-ref sinfo 5)) |
︙ | ︙ | |||
1026 1027 1028 1029 1030 1031 1032 | ) (format #t fmtstr db (conc host ":" port) pid age last-mod state) ) ) sinfos ) ) | < < < | 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 | ) (format #t fmtstr db (conc host ":" port) pid age last-mod state) ) ) sinfos ) ) ) ) dbfiles ) (set! *didsomething* #t) (exit) ) |
︙ | ︙ | |||
1077 1078 1079 1080 1081 1082 1083 | (age (seconds->hr-min-sec (- (current-seconds) (list-ref sinfo 2)))) (last-mod (seconds->string (list-ref sinfo 2))) (killed (system (conc "ssh " host " kill " pid " > /dev/null"))) (dummy2 (sleep 1)) (state (if (> (system (conc "ssh " host " ps " pid " > /dev/null")) 0) "dead" "alive")) ) (format #t fmtstr db (conc host ":" port) pid age last-mod state) | | | 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 | (age (seconds->hr-min-sec (- (current-seconds) (list-ref sinfo 2)))) (last-mod (seconds->string (list-ref sinfo 2))) (killed (system (conc "ssh " host " kill " pid " > /dev/null"))) (dummy2 (sleep 1)) (state (if (> (system (conc "ssh " host " ps " pid " > /dev/null")) 0) "dead" "alive")) ) (format #t fmtstr db (conc host ":" port) pid age last-mod state) (delete-file* sfile) ) ) sinfos ) ) ) sfiles |
︙ | ︙ |
Modified rmt.scm from [564930aec3] to [b6600b2d34].
︙ | ︙ | |||
70 71 72 73 74 75 76 | (define *send-receive-mutex* (make-mutex)) ;; should have separate mutex per run-id ;; RA => e.g. usage (rmt:send-receive 'get-var #f (list varname)) ;; (define (rmt:send-receive cmd rid params #!key (attemptnum 1)(area-dat #f)) ;; start attemptnum at 1 so the modulo below works as expected (assert *toppath* "FATAL: rmt:send-receive called with *toppath* not set.") | | < < < < < < < < < < < < < < < < < < < < < < < | 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 | (define *send-receive-mutex* (make-mutex)) ;; should have separate mutex per run-id ;; RA => e.g. usage (rmt:send-receive 'get-var #f (list varname)) ;; (define (rmt:send-receive cmd rid params #!key (attemptnum 1)(area-dat #f)) ;; start attemptnum at 1 so the modulo below works as expected (assert *toppath* "FATAL: rmt:send-receive called with *toppath* not set.") ;; 1. check if server is started IFF cmd is a write OR if we are not on the homehost, store in runremote ;; 2. check the age of the connections. refresh the connection if it is older than timeout-20 seconds. ;; 3. do the query, if on homehost use local access ;; (let* ((start-time (current-seconds)) ;; snapshot time so all use cases get same value (areapath *toppath*);; TODO - resolve from dbstruct to be compatible with multiple areas (runremote (or area-dat |
︙ | ︙ |
Modified tcp-transportmod.scm from [cc561d90e9] to [157488cd36].
︙ | ︙ | |||
230 231 232 233 234 235 236 237 238 239 240 241 242 243 | ;; client side handler ;; ;;(tt:handler #<tt> get-keys #f () 2 #f "/home/matt/data/megatest/ext-tests" #f "main.db" "ext-tests" "/home/matt/data/megatest/bin/.22.04/../megatest") ;; (define (tt:handler ttdat cmd run-id params attemptnum area-dat areapath readonly-mode dbfname testsuite mtexe) (debug:print 2 *default-log-port* "tt:handler cmd: " cmd " run-id: " run-id " attemptnum: " attemptnum) ;; NOTE: areapath is passed in and in tt struct. We'll use passed in value for now. (let* ((conn (tt:client-connect-to-server ttdat dbfname run-id testsuite))) ;; (hash-table-ref/default (tt-conns ttdat) dbfname #f))) (if conn ;; have connection, call the server (let* ((res (tt:send-receive ttdat conn cmd run-id params))) ;; res is (status errmsg result meta) ; (debug:print 0 *default-log-port* "conn:" conn " res: " res) (match res | > | 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 | ;; client side handler ;; ;;(tt:handler #<tt> get-keys #f () 2 #f "/home/matt/data/megatest/ext-tests" #f "main.db" "ext-tests" "/home/matt/data/megatest/bin/.22.04/../megatest") ;; (define (tt:handler ttdat cmd run-id params attemptnum area-dat areapath readonly-mode dbfname testsuite mtexe) (debug:print 2 *default-log-port* "tt:handler cmd: " cmd " run-id: " run-id " attemptnum: " attemptnum) ;; NOTE: areapath is passed in and in tt struct. We'll use passed in value for now. ;; connect-to-server will start a server if needed. (let* ((conn (tt:client-connect-to-server ttdat dbfname run-id testsuite))) ;; (hash-table-ref/default (tt-conns ttdat) dbfname #f))) (if conn ;; have connection, call the server (let* ((res (tt:send-receive ttdat conn cmd run-id params))) ;; res is (status errmsg result meta) ; (debug:print 0 *default-log-port* "conn:" conn " res: " res) (match res |
︙ | ︙ | |||
258 259 260 261 262 263 264 | ((loaded) (debug:print 0 *default-log-port* "WARNING: server for "dbfname" is loaded, slowing queries.") (tt:backoff-incr (tt-conn-host conn)(tt-conn-port conn)) result) ;; (tt:handler ttdat cmd run-id params (+ attemptnum 1) area-dat areapath readonly-mode dbfname testsuite mtexe)) (else result))) (else ;; did not receive properly formated result | | | 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 | ((loaded) (debug:print 0 *default-log-port* "WARNING: server for "dbfname" is loaded, slowing queries.") (tt:backoff-incr (tt-conn-host conn)(tt-conn-port conn)) result) ;; (tt:handler ttdat cmd run-id params (+ attemptnum 1) area-dat areapath readonly-mode dbfname testsuite mtexe)) (else result))) (else ;; did not receive properly formated result (if (not res) ;; tt:send-receive telling us that communication failed (let* ((host (tt-conn-host conn)) (port (tt-conn-port conn)) ;; (dbfname (tt-conn-port conn)) ;; 192.168.0.127:4242-726924:4.db (pid (tt-conn-pid conn)) ;;(servinf (tt-conn-servinf-file conn))) (servinf (tt-servinf-file ttdat))) ;; (conc areapath"/.servinfo/"host":"port"-"pid":"dbfname))) ;; TODO, use (server:get-servinfo-dir areapath) (hash-table-set! (tt-conns ttdat) dbfname #f) |
︙ | ︙ | |||
291 292 293 294 295 296 297 | ;; start server - addressed in client-connect-to-server ;; delay - addressed in client-connect-to-server ;; try again (thread-sleep! 0.25) ;; dunno, I think this needs to be here (tt:handler ttdat cmd run-id params (+ attemptnum 1) area-dat areapath readonly-mode dbfname testsuite mtexe)) )))) (begin ;; no server file, delay and try again | | | 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 | ;; start server - addressed in client-connect-to-server ;; delay - addressed in client-connect-to-server ;; try again (thread-sleep! 0.25) ;; dunno, I think this needs to be here (tt:handler ttdat cmd run-id params (+ attemptnum 1) area-dat areapath readonly-mode dbfname testsuite mtexe)) )))) (begin ;; no server file, delay and try again (debug:print 2 *default-log-port* "INFO: connection to server "host":"port" broken for "dbfname", no servinf file. Server exited? ") (thread-sleep! 0.5) (tt:handler ttdat cmd run-id params (+ attemptnum 1) area-dat areapath readonly-mode dbfname testsuite mtexe)))) (begin ;; this case is where res is malformed. Probably should abort (assert #f "FATAL: tt:handler received bad data "res) ;; (debug:print 0 *default-log-port* "INFO: got corrupt data from server "host":"port", "res", for "dbfname", will try again.") ;; (tt:handler ttdat cmd run-id params (+ attemptnum 1) area-dat areapath readonly-mode dbfname testsuite mtexe) ))))) |
︙ | ︙ | |||
474 475 476 477 478 479 480 | (define (tt:start-server areapath run-id dbfname-in handler keys) (assert areapath "FATAL: areapath not provided for tt:start-server") ;; is there already a server for this dbfile? Then exit. (debug:print 2 *default-log-port* "tt:start-server: " dbfname-in) (let* ((ttdat (make-tt areapath: areapath)) (dbfname (or dbfname-in (dbmod:run-id->dbfname run-id))) (servers (tt:find-server areapath dbfname))) ;; should use tt:get-current-server-info instead | > | | 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 | (define (tt:start-server areapath run-id dbfname-in handler keys) (assert areapath "FATAL: areapath not provided for tt:start-server") ;; is there already a server for this dbfile? Then exit. (debug:print 2 *default-log-port* "tt:start-server: " dbfname-in) (let* ((ttdat (make-tt areapath: areapath)) (dbfname (or dbfname-in (dbmod:run-id->dbfname run-id))) (servers (tt:find-server areapath dbfname))) ;; should use tt:get-current-server-info instead (debug:print 0 *default-log-port* "Found " (length servers) " already running for " dbfname) (if (> (length servers) 0) (begin (debug:print 0 *default-log-port* "INFO: found server(s) already running for db "dbfname", "(string-intersperse servers ",")" Exiting.") (exit)) (let* ((dbstruct (dbmod:open-dbmoddb areapath run-id dbfname (dbfile:db-init-proc) keys))) (tt-handler-set! ttdat (handler dbstruct)) (let* ((tcp-thread (make-thread (lambda () |
︙ | ︙ | |||
535 536 537 538 539 540 541 | (if (tt-cleanup-proc ttdat) ((tt-cleanup-proc ttdat))) (dbfile:with-no-sync-db nosyncdbpath (lambda (db) (let* ((dbtmpname (dbr:dbstruct-dbtmpname dbstruct))) (debug:print-info 0 *default-log-port* "Running clean up, including removing db file "dbtmpname) (db:no-sync-del! db dbfname) | < | | 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 | (if (tt-cleanup-proc ttdat) ((tt-cleanup-proc ttdat))) (dbfile:with-no-sync-db nosyncdbpath (lambda (db) (let* ((dbtmpname (dbr:dbstruct-dbtmpname dbstruct))) (debug:print-info 0 *default-log-port* "Running clean up, including removing db file "dbtmpname) (db:no-sync-del! db dbfname) )))))) (set! *server-info* ttdat) (let loop ((count 0)) (if (> count 240) (begin (debug:print 0 *default-log-port* "FATAL: Could not start a tcp server, giving up.") (exit 1)) (if (not (tt-port ttdat)) ;; no connection yet |
︙ | ︙ | |||
583 584 585 586 587 588 589 590 591 592 593 594 595 596 | (debug:print 0 *default-log-port* "Failed to get server lock for "dbfname) #f)))))) (if (and res (common:low-noise-print 120 "top server message")) (debug:print-info 0 *default-log-port* "Keep running, I'm the top server for " dbfname" on "(tt-host ttdat)":"(tt-port ttdat))) res)) (else (debug:print-info 0 *default-log-port* "I'm not the lead server: "servers) (let* ((leadsrv (car servers))) (match leadsrv ((host port startseconds server-id pid dbfname servinfofile) (let* ((result (tt:timed-ping host port server-id)) (res (car result)) (ping (cdr result))) | > | 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 | (debug:print 0 *default-log-port* "Failed to get server lock for "dbfname) #f)))))) (if (and res (common:low-noise-print 120 "top server message")) (debug:print-info 0 *default-log-port* "Keep running, I'm the top server for " dbfname" on "(tt-host ttdat)":"(tt-port ttdat))) res)) (else ;; wrong servinfo file (debug:print-info 0 *default-log-port* "I'm not the lead server: "servers) (let* ((leadsrv (car servers))) (match leadsrv ((host port startseconds server-id pid dbfname servinfofile) (let* ((result (tt:timed-ping host port server-id)) (res (car result)) (ping (cdr result))) |
︙ | ︙ | |||
711 712 713 714 715 716 717 | ;; find valid server ;; get servers listed, last part of name must match :<dbfname> ;; if more than one, wait one second and look again ;; future: ping oldest, if alive remove other :<dbfname> files ;; (define (tt:find-server areapath dbfname) (let* ((servdir (tt:get-servinfo-dir areapath)) | | > > > > > > > > > > > > > > > > | > > > | 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 | ;; find valid server ;; get servers listed, last part of name must match :<dbfname> ;; if more than one, wait one second and look again ;; future: ping oldest, if alive remove other :<dbfname> files ;; (define (tt:find-server areapath dbfname) (let* ((servdir (tt:get-servinfo-dir areapath)) (sfiles (glob (conc servdir"/*:"dbfname))) (good-files '())) (for-each (lambda (sfile) (let* ((sinfo (tt:server-get-info sfile)) (host (list-ref sinfo 0)) (port (list-ref sinfo 1)) (server-id (list-ref sinfo 3)) (pid (list-ref sinfo 4)) (status (system (conc "ssh " host " ps " pid " > /dev/null"))) ) (if (= status 0) (set! good-files (cons sfile good-files)) (delete-file* sfile) ) ) ) sfiles ) (debug:print-info 2 *default-log-port* "tt:find-server: good-files: " good-files " sfiles: " sfiles) good-files)) ;; given a path to a server info file return: host port startseconds server-id pid dbfname logf ;; example of what it's looking for in the log file: ;; SERVER STARTED: 10.38.175.67:50216 AT 1616502350.0 server-id: 4907e90fc55c7a09694e3f658c639cf4 ;; (define (tt:server-get-info logf) (let ((server-rx (regexp "^SERVER STARTED: (\\S+):(\\d+) AT ([\\d\\.]+) server-id: (\\S+) pid: (\\d+) dbfname: (\\S+)")) ;; SERVER STARTED: host:port AT timesecs server id |
︙ | ︙ |