Overview
Comment: | wip to clean up server shutdown process |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | v1.6584-nanomsg |
Files: | files | file ages | folders |
SHA1: |
e75192ae303f9ab465ffb69b6c46b67d |
User & Date: | matt on 2021-06-18 05:44:26 |
Other Links: | branch diff | manifest | tags |
Context
2021-06-18
| ||
22:29 | Added clean exit for nn using 'quit check-in: 48b415009f user: matt tags: v1.6584-nanomsg | |
05:44 | wip to clean up server shutdown process check-in: e75192ae30 user: matt tags: v1.6584-nanomsg | |
05:05 | Removed need for watchdog check-in: b1c8817ecf user: matt tags: v1.6584-nanomsg | |
Changes
Modified commonmod.scm from [d20588a683] to [917d82cc3a].
︙ | ︙ | |||
226 227 228 229 230 231 232 233 234 235 236 | (define *home-host* #f) ;; (define *total-non-write-delay* 0) (define *heartbeat-mutex* (make-mutex)) (define *api-process-request-count* 0) (define *max-api-process-requests* 0) (define *server-overloaded* #f) (define *writes-total-delay* 0) ;; client (define *rmt-mutex* (make-mutex)) ;; remote access calls mutex | > < < < | 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 | (define *home-host* #f) ;; (define *total-non-write-delay* 0) (define *heartbeat-mutex* (make-mutex)) (define *api-process-request-count* 0) (define *max-api-process-requests* 0) (define *server-overloaded* #f) (define *writes-total-delay* 0) (define *unclean-shutdown* #t) ;; flag to clear on clean shutdown ;; client (define *rmt-mutex* (make-mutex)) ;; remote access calls mutex ;; KEY info ;; (define *target* (make-hash-table)) ;; cache the target here; target is keyval1/keyval2/.../keyvalN (define *keys* (make-hash-table)) ;; cache the keys here (define *keyvals* (make-hash-table)) (define *toptest-paths* (make-hash-table)) ;; cache toptest path settings here (define *test-paths* (make-hash-table)) ;; cache test-id to test run paths here (define *test-ids* (make-hash-table)) ;; cache run-id, testname, and item-path => test-id |
︙ | ︙ |
Modified rmtmod.scm from [be3b150966] to [dca8b0d5fd].
︙ | ︙ | |||
1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 | (define (common:version-changed?) (not (equal? (common:get-last-run-version) (common:version-signature)))) (define (common:api-changed?) (not (equal? (substring (->string megatest-version) 0 4) (substring (conc (common:get-last-run-version)) 0 4)))) (define (std-exit-procedure) ;;(common:telemetry-log-close) (on-exit (lambda () 0)) ;;(debug:print-info 13 *default-log-port* "std-exit-procedure called; *time-to-exit*="*time-to-exit*) (let ((no-hurry (if (bdat-time-to-exit *bdat*) ;; hurry up #f (begin (bdat-time-to-exit-set! *bdat* #t) #t)))) (debug:print-info 4 *default-log-port* "starting exit process, finalizing databases.") (if (and no-hurry (debug:debug-mode 18)) (rmt:print-db-stats)) (let ((th1 (make-thread (lambda () ;; thread for cleaning up, give it five seconds (let* ((start-time (current-seconds))) | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | < < | < < < < < < | < < < < < < < < < < < < < < < < | 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 | (define (common:version-changed?) (not (equal? (common:get-last-run-version) (common:version-signature)))) (define (common:api-changed?) (not (equal? (substring (->string megatest-version) 0 4) (substring (conc (common:get-last-run-version)) 0 4)))) (define (rmt:server-shutdown) (let ((dbfile (servdat-dbfile *server-info*))) (debug:print-info 0 *default-log-port* "dbfile is "dbfile) (if dbfile (let* ((am-server (args:get-arg "-server")) (dbfile (args:get-arg "-db")) (apath *toppath*)) ;; do a final sync here (debug:print-info 0 *default-log-port* "Doing final sync for "apath" "dbfile" at "(current-seconds)) (db:sync-inmem->disk *dbstruct-db* apath dbfile force-sync: #t) (if am-server (if (string-match ".*/main.db$" dbfile) (let ((pkt-file (conc (get-pkts-dir *toppath*) "/" (servdat-uuid *server-info*) ".pkt"))) (debug:print-info 0 *default-log-port* "removing pkt "pkt-file) (delete-file* pkt-file) (debug:print-info 0 *default-log-port* "Releasing lock for "dbfile) (db:with-lock-db (servdat-dbfile *server-info*) (lambda (dbh dbfile) (db:release-lock dbh dbfile)))) (let* ((sdat *server-info*) ;; we have a run-id server (host (servdat-host sdat)) (port (servdat-port sdat)) (uuid (servdat-uuid sdat))) (if (not (string-match ".db/main.db" (args:get-arg "-db"))) (let* ((res (rmt:deregister-server *rmt:remote* ;; TODO/BUG: why is this requiring *rmt:remote*? *toppath* (servdat-host *server-info*) ;; iface (servdat-port *server-info*) (servdat-uuid *server-info*) (current-process-id) ))) (debug:print-info 0 *default-log-port* "deregistered-server, res="res))) (debug:print-info 0 *default-log-port* "deregistering server "host":"port" with uuid "uuid) ))))))) (define (std-exit-procedure) ;;(common:telemetry-log-close) (on-exit (lambda () 0)) ;;(debug:print-info 13 *default-log-port* "std-exit-procedure called; *time-to-exit*="*time-to-exit*) (let ((no-hurry (if (bdat-time-to-exit *bdat*) ;; hurry up #f (begin (bdat-time-to-exit-set! *bdat* #t) #t)))) (debug:print-info 4 *default-log-port* "starting exit process, finalizing databases.") (if (and no-hurry (debug:debug-mode 18)) (rmt:print-db-stats)) (let ((th1 (make-thread (lambda () ;; thread for cleaning up, give it five seconds (let* ((start-time (current-seconds))) (if (and *server-info* *unclean-shutdown*) (rmt:server-shutdown)) (debug:print-info 0 *default-log-port* "Shutdown activities completed in "(- (current-seconds) start-time)" seconds")) ;; (if *dbstruct-db* (db:close-all *dbstruct-db*)) ;; one second allocated #;(if (bdat-task-db *bdat*) ;; TODO: Check that this is correct for task db (let ((db (cdr (bdat-task-db *bdat*)))) (if (sqlite3:database? db) (begin (debug:print-info 0 *default-log-port* "Closing down task db "db) |
︙ | ︙ | |||
1589 1590 1591 1592 1593 1594 1595 | (servdat-port-set! *server-info* port) (servdat-status-set! *server-info* 'trying-port) (servdat-trynum-set! *server-info* (+ (servdat-trynum *server-info*) 1))) (set! *server-info* (make-servdat host: ipaddrstr port: port))) (let* ((rep (rmt:try-start-server ipaddrstr port))) (let loop ((instr (nn-recv rep))) (let* ((data (string->sexpr instr)) | > > | > > | | | > | 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 | (servdat-port-set! *server-info* port) (servdat-status-set! *server-info* 'trying-port) (servdat-trynum-set! *server-info* (+ (servdat-trynum *server-info*) 1))) (set! *server-info* (make-servdat host: ipaddrstr port: port))) (let* ((rep (rmt:try-start-server ipaddrstr port))) (let loop ((instr (nn-recv rep))) (let* ((data (string->sexpr instr)) (res (case data ((quit) 'quit) (else (api:process-request *dbstruct-db* data)))) (resdat (sexpr->string res))) (if (not (eq? res 'quit)) (begin (set! *db-last-access* (current-seconds)) (nn-send rep resdat) (loop (nn-recv rep))))))) ;; server exit stuff here (let* ((portnum (servdat-port *server-info*))) (portlogger:open-run-close portlogger:set-port portnum "released") (debug:print 1 *default-log-port* "INFO: server has been stopped")))) (define (rmt:try-start-server ipaddrstr portnum) (if *server-info* ;; update the server info as we might be trying next port (begin |
︙ | ︙ | |||
2137 2138 2139 2140 2141 2142 2143 | (> (+ last-access server-timeout) (current-seconds))) (if (common:low-noise-print 120 "server continuing") (debug:print-info 0 *default-log-port* "Server continuing, seconds since last db access: " (- (current-seconds) last-access))) (loop 0 bad-sync-count (current-milliseconds))) (else (debug:print-info 0 *default-log-port* "Server timed out. seconds since last db access: " (- (current-seconds) last-access)) | | | | | | | | | | | > | 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 | (> (+ last-access server-timeout) (current-seconds))) (if (common:low-noise-print 120 "server continuing") (debug:print-info 0 *default-log-port* "Server continuing, seconds since last db access: " (- (current-seconds) last-access))) (loop 0 bad-sync-count (current-milliseconds))) (else (debug:print-info 0 *default-log-port* "Server timed out. seconds since last db access: " (- (current-seconds) last-access)) ;; send self 'quit here (http-transport:server-shutdown port)))))))) (define (http-transport:server-shutdown port) (begin ;;(BB> "http-transport:server-shutdown called") (debug:print-info 0 *default-log-port* "Starting to shutdown the server. pid="(current-process-id)) ;; ;; start_shutdown ;; ;; deregister the server (rmt:server-shutdown) (set! *unclean-shutdown* #f) (bdat-time-to-exit-set! *bdat* #t) ;; tell on-exit to be fast as we've already cleaned up ;; (portlogger:open-run-close portlogger:set-port port "released") ;; done in rmt:run (thread-sleep! 1) ;; (debug:print-info 0 *default-log-port* "Max cached queries was " *max-cache-size*) ;; (debug:print-info 0 *default-log-port* "Number of cached writes " *number-of-writes*) |
︙ | ︙ |