Overview
Comment: | Remove case 6 from rmt:send-receive. This was pinging the server constantly and opening addtional connections that don't appear to being closed. Also, just let the connection die, it should retry. |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | v1.70-nohomehost |
Files: | files | file ages | folders |
SHA1: |
08646db430e863506fed170998c073b4 |
User & Date: | matt on 2022-11-22 06:36:06 |
Other Links: | branch diff | manifest | tags |
Context
2022-11-22
| ||
07:49 | Keep more servers around but let them also expire quickly if not used check-in: c0ef1c5bfa user: matt tags: v1.70-nohomehost | |
06:36 | Remove case 6 from rmt:send-receive. This was pinging the server constantly and opening addtional connections that don't appear to being closed. Also, just let the connection die, it should retry. check-in: 08646db430 user: matt tags: v1.70-nohomehost | |
02:57 | sixyfivek is running pretty good check-in: d1ac2665ef user: matt tags: v1.70-nohomehost | |
Changes
Modified db.scm from [32f96d6d77] to [e18bcc992d].
︙ | ︙ | |||
4610 4611 4612 4613 4614 4615 4616 | (let ((db (cdr *task-db*))) (if (sqlite3:database? db) (begin (sqlite3:interrupt! db) (sqlite3:finalize! db #t) ;; (vector-set! *task-db* 0 #f) (set! *task-db* #f))))) | > > > > | | 4610 4611 4612 4613 4614 4615 4616 4617 4618 4619 4620 4621 4622 4623 4624 4625 4626 4627 4628 | (let ((db (cdr *task-db*))) (if (sqlite3:database? db) (begin (sqlite3:interrupt! db) (sqlite3:finalize! db #t) ;; (vector-set! *task-db* 0 #f) (set! *task-db* #f))))) (if (and (not (args:get-arg "-server")) *runremote*) (begin (debug:print-info 0 *default-log-port* "Closing all client connections...") (http-client#close-all-connections!))) ;; (if (and *runremote* ;; (remote-conndat *runremote*)) ;; (begin ;; (http-client#close-all-connections!))) ;; for http-client (if (not (eq? *default-log-port* (current-error-port))) (close-output-port *default-log-port*)) (set! *default-log-port* (current-error-port))) "Cleanup db exit thread")) |
︙ | ︙ |
Modified http-transport.scm from [acc168fde7] to [9bfccef351].
︙ | ︙ | |||
283 284 285 286 287 288 289 290 291 292 293 294 295 296 | (if (debug:debug-mode 1) (debug:print-info 0 *default-log-port* "couldn't talk to server, trying again ...") (begin (debug:print 0 *default-log-port* "WARNING: failure in with-input-from-request to " fullurl ".") (debug:print 0 *default-log-port* " message: " msg ", exn=" exn) (debug:print 0 *default-log-port* " cmd: " cmd " params: " params " key:" (or server-id "thekey")) (debug:print 0 *default-log-port* " call-chain: " call-chain))) (set! *runremote* #f) (set! runremote #f) ;; (if runremote ;; (remote-conndat-set! runremote #f)) ;; Killing associated server to allow clean retry.") ;; (tasks:kill-server-run-id run-id) ;; better to kill the server in the logic that called this routine? (mutex-unlock! *http-mutex*) | > | 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 | (if (debug:debug-mode 1) (debug:print-info 0 *default-log-port* "couldn't talk to server, trying again ...") (begin (debug:print 0 *default-log-port* "WARNING: failure in with-input-from-request to " fullurl ".") (debug:print 0 *default-log-port* " message: " msg ", exn=" exn) (debug:print 0 *default-log-port* " cmd: " cmd " params: " params " key:" (or server-id "thekey")) (debug:print 0 *default-log-port* " call-chain: " call-chain))) ;; what if another thread is communicating ok? Can't happen due to mutex (set! *runremote* #f) (set! runremote #f) ;; (if runremote ;; (remote-conndat-set! runremote #f)) ;; Killing associated server to allow clean retry.") ;; (tasks:kill-server-run-id run-id) ;; better to kill the server in the logic that called this routine? (mutex-unlock! *http-mutex*) |
︙ | ︙ | |||
389 390 391 392 393 394 395 | (begin (print-call-chain (current-error-port)) (debug:print-error 0 *default-log-port* "call to http-transport:server-dat-update-last-access with non-vector!!")))) ;; ;; connect ;; | | > | 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 | (begin (print-call-chain (current-error-port)) (debug:print-error 0 *default-log-port* "call to http-transport:server-dat-update-last-access with non-vector!!")))) ;; ;; connect ;; (define (http-transport:client-connect iface port server-id) (debug:print-info 0 *default-log-port* "Connecting to client at "iface":"port", with server-id "server-id) (let* ((api-url (conc "http://" iface ":" port "/api")) (api-uri (uri-reference (conc "http://" iface ":" port "/api"))) (api-req (make-request method: 'POST uri: api-uri)) (server-dat (vector iface port api-uri api-url api-req (current-seconds) server-id))) server-dat)) |
︙ | ︙ |
Modified rmt.scm from [f32646f514] to [91ffe1108a].
︙ | ︙ | |||
194 195 196 197 198 199 200 | (debug:print-info 12 *default-log-port* "rmt:send-receive, case 5") (rmt:open-qry-close-locally cmd 0 params)) ;;DOT CASE6 [label="init\nremote"]; ;;DOT MUTEXLOCK -> CASE6 [label="on homehost,\nwrite query,\nhave server,\ncan't reach it"]; {rank=same "case 6" CASE6}; ;;DOT CASE6 -> "rmt:send-receive"; ;; on homehost and this is a write, we already have a server, but server has died | > > > | | 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 | (debug:print-info 12 *default-log-port* "rmt:send-receive, case 5") (rmt:open-qry-close-locally cmd 0 params)) ;;DOT CASE6 [label="init\nremote"]; ;;DOT MUTEXLOCK -> CASE6 [label="on homehost,\nwrite query,\nhave server,\ncan't reach it"]; {rank=same "case 6" CASE6}; ;;DOT CASE6 -> "rmt:send-receive"; ;; on homehost and this is a write, we already have a server, but server has died ;; reinstate this keep-alive section but inject a time condition into the (add ... #;((and (cdr (remote-hh-dat runremote)) ;; on homehost (not (member cmd api:read-only-queries)) ;; this is a write (remote-server-url runremote) ;; have a server (not (server:ping (remote-server-url runremote) (remote-server-id runremote)))) ;; server has died. NOTE: this is not a cheap call! Need better approach. (debug:print 0 *default-log-port* "WARNING: server appears to have died, trying to reconnect, case 6") (http-transport:close-connections area-dat: runremote) ;; make sure to clean up (set! *runremote* (make-remote)) (let* ((server-info (remote-server-info *runremote*))) |
︙ | ︙ |