Overview
Comment: | Added a little time spread on client starting servers - try to avoid startup storms |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | try-nanomsg |
Files: | files | file ages | folders |
SHA1: |
b3a83a0cec1ff4307b0218ec05dd90c0 |
User & Date: | matt on 2014-11-29 21:50:06 |
Other Links: | branch diff | manifest | tags |
Context
2014-11-29
| ||
22:44 | Re-enable closing connections if open longer than the server timeout and no accesses Closed-Leaf check-in: cec4ee3511 user: matt tags: try-nanomsg | |
21:50 | Added a little time spread on client starting servers - try to avoid startup storms check-in: b3a83a0cec user: matt tags: try-nanomsg | |
20:33 | http transport with try-nanomsg changes completed first pass Validation 100% check-in: b260c4c5d9 user: matt tags: try-nanomsg | |
Changes
Modified client.scm from [8f8fe7296f] to [72d1a98b4a].
︙ | ︙ | |||
90 91 92 93 94 95 96 97 | (hash-table-delete! *runremote* run-id) (tasks:kill-server-run-id run-id) (tasks:server-force-clean-run-record (db:delay-if-busy tdbdat) run-id (tasks:hostinfo-get-interface server-dat) (tasks:hostinfo-get-port server-dat) " client:setup (server-dat = #t)") (server:try-running run-id) | > > > | | > | | 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 | (hash-table-delete! *runremote* run-id) (tasks:kill-server-run-id run-id) (tasks:server-force-clean-run-record (db:delay-if-busy tdbdat) run-id (tasks:hostinfo-get-interface server-dat) (tasks:hostinfo-get-port server-dat) " client:setup (server-dat = #t)") (if (> remaining-tries 8) (thread-sleep! (+ 1 (random 5))) ;; spread out the starts a little (thread-sleep! (+ 15 (random 20)))) ;; it isn't going well. give it plenty of time (server:try-running run-id) (thread-sleep! 5) ;; give server a little time to start up (client:setup run-id remaining-tries: (- remaining-tries 1)) ))) (begin ;; no server registered (let ((num-available (tasks:num-in-available-state (db:dbdat-get-db tdbdat) run-id))) (debug:print-info 0 "client:setup, no server registered, remaining-tries=" remaining-tries " num-available=" num-available) (if (< num-available 2) (server:try-running run-id)) (thread-sleep! (+ 5 (random (- 20 remaining-tries)))) ;; give server a little time to start up, randomize a little to avoid start storms. (client:setup run-id remaining-tries: (- remaining-tries 1))))))))) ;; (let ((host-info (hash-table-ref/default *runremote* run-id #f))) ;; (if host-info ;; this is a bit circular. the host-info *is* the start-res FIXME ;; (let* ((iface (http-transport:server-dat-get-iface host-info)) ;; (port (http-transport:server-dat-get-port host-info)) ;; (start-res (case *transport-type* |
︙ | ︙ |
Modified http-transport.scm from [4218ba432d] to [012f18812a].
︙ | ︙ | |||
285 286 287 288 289 290 291 | exn (begin (set! success #f) (debug:print 0 "WARNING: failure in with-input-from-request to " fullurl ".") (debug:print 0 " message: " ((condition-property-accessor 'exn 'message) exn)) (hash-table-delete! *runremote* run-id) ;; Killing associated server to allow clean retry.") | | > > | 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 | exn (begin (set! success #f) (debug:print 0 "WARNING: failure in with-input-from-request to " fullurl ".") (debug:print 0 " message: " ((condition-property-accessor 'exn 'message) exn)) (hash-table-delete! *runremote* run-id) ;; Killing associated server to allow clean retry.") (tasks:kill-server-run-id run-id) ;; better to kill the server in the logic that called this routine? (signal (make-composite-condition (make-property-condition 'commfail 'message "failed to connect to server"))) #f) (with-input-from-request ;; was dat fullurl (list (cons 'key "thekey") (cons 'cmd cmd) (cons 'params sparams)) read-string)) |
︙ | ︙ |
Modified rmt.scm from [fbd1a17708] to [df66a53f8b].
︙ | ︙ | |||
85 86 87 88 89 90 91 | ;; (mutex-lock! *send-receive-mutex*) (let* ((run-id (if rid rid 0)) (connection-info (rmt:get-connection-info run-id))) ;; the nmsg method does the encoding under the hood (the http method should be changed to do this also) (if connection-info ;; use the server if have connection info (let* ((dat (case *transport-type* | > | > | 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 | ;; (mutex-lock! *send-receive-mutex*) (let* ((run-id (if rid rid 0)) (connection-info (rmt:get-connection-info run-id))) ;; the nmsg method does the encoding under the hood (the http method should be changed to do this also) (if connection-info ;; use the server if have connection info (let* ((dat (case *transport-type* ((http)(condition-case (http-transport:client-api-send-receive run-id connection-info cmd params) ((commfail)(vector #f "communications fail")))) ((nmsg)(condition-case (nmsg-transport:client-api-send-receive run-id connection-info cmd params) ((timeout)(vector #f "timeout talking to server")))) (else (exit)))) (success (if (and dat (vector? dat)) (vector-ref dat 0) #f)) (res (if (and dat (vector? dat)) (vector-ref dat 1) #f))) (http-transport:server-dat-update-last-access connection-info) |
︙ | ︙ |