Overview
Comment: | another pass to allow distinct per-run-id transports to be used |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | rpc-transport |
Files: | files | file ages | folders |
SHA1: |
140ed85cfb2c2fa4457c4979f04b2745 |
User & Date: | bjbarcla on 2016-11-03 16:52:49 |
Other Links: | branch diff | manifest | tags |
Context
2016-11-03
| ||
21:31 | rpc-transport:client-setup implemented and tested check-in: 9102de0262 user: bjbarcla tags: rpc-transport | |
16:52 | another pass to allow distinct per-run-id transports to be used check-in: 140ed85cfb user: bjbarcla tags: rpc-transport | |
2016-11-02
| ||
03:09 | made changes to fix tcp-buffer size to 0 so rpc worked. small fight here between http-transport and rpc-transport here... hopefully this patch does the job. check-in: 58d1011733 user: bjbarcla tags: rpc-transport | |
Changes
Modified client.scm from [69eebee107] to [a2787b3361].
︙ | ︙ | |||
69 70 71 72 73 74 75 | (if (< num-available 2) (server:try-running run-id)) (thread-sleep! (+ 5 (random (- 20 remaining-tries)))) ;; give server a little time to start up, randomize a little to avoid start storms. (client:setup run-id remaining-tries: (- remaining-tries 1)))))) ((http)(client:setup-http run-id server-dat remaining-tries)) ;; ((rpc) (rpc-transport:client-setup run-id)) ;;(client:setup-rpc run-id)) rpc not implemented; want to see a failure here for now. (else | | | 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 | (if (< num-available 2) (server:try-running run-id)) (thread-sleep! (+ 5 (random (- 20 remaining-tries)))) ;; give server a little time to start up, randomize a little to avoid start storms. (client:setup run-id remaining-tries: (- remaining-tries 1)))))) ((http)(client:setup-http run-id server-dat remaining-tries)) ;; ((rpc) (rpc-transport:client-setup run-id)) ;;(client:setup-rpc run-id)) rpc not implemented; want to see a failure here for now. (else (debug:print-error 0 *default-log-port* "(6) Transport [" transport "] specified for run-id [" run-id "] is not implemented in client:setup. Cannot proceed.") (exit 1))))) ;; client:setup-http ;; ;; For http transport, robustly ensure an advertised-running server is actually working and responding, and ;; establish tcp connection to server. For servers marked running but not responding, kill them and clear from mdb |
︙ | ︙ | |||
91 92 93 94 95 96 97 | (ping-res (rmt:login-no-auto-client-setup start-res run-id))) (if (and start-res ping-res) (begin (hash-table-set! *runremote* run-id start-res) ;; side-effect - *runremote* cache init fpr rmt:* (debug:print-info 2 *default-log-port* "connected to " (http-transport:server-dat-make-url start-res)) start-res) (begin ;; login failed but have a server record, clean out the record and try again | | | | 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 | (ping-res (rmt:login-no-auto-client-setup start-res run-id))) (if (and start-res ping-res) (begin (hash-table-set! *runremote* run-id start-res) ;; side-effect - *runremote* cache init fpr rmt:* (debug:print-info 2 *default-log-port* "connected to " (http-transport:server-dat-make-url start-res)) start-res) (begin ;; login failed but have a server record, clean out the record and try again (debug:print-info 0 *default-log-port* "client:setup-http, login failed, will attempt to start server ... start-res=" start-res ", run-id=" run-id ", server-dat=" server-dat) (http-transport:close-connections run-id) (hash-table-delete! *runremote* run-id) (tasks:kill-server-run-id run-id) (tasks:bb-server-force-clean-run-record run-id iface port " client:setup-http (server-dat = #t)") (if (> remaining-tries 8) (thread-sleep! (+ 1 (random 5))) ;; spread out the starts a little (thread-sleep! (+ 15 (random 20)))) ;; it isn't going well. give it plenty of time (server:try-running run-id) (thread-sleep! 5) ;; give server a little time to start up (client:setup run-id remaining-tries: (- remaining-tries 1)) )))) |
︙ | ︙ |
Modified common.scm from [87c3dc36b9] to [20fc73cdb7].
︙ | ︙ | |||
75 76 77 78 79 80 81 | (define *inmemdb* #f) (define *task-db* #f) ;; (vector db path-to-db) (define *db-access-allowed* #t) ;; flag to allow access (define *db-access-mutex* (make-mutex)) ;; SERVER (define *my-client-signature* #f) | | > > | > | 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 | (define *inmemdb* #f) (define *task-db* #f) ;; (vector db path-to-db) (define *db-access-allowed* #t) ;; flag to allow access (define *db-access-mutex* (make-mutex)) ;; SERVER (define *my-client-signature* #f) ;; default preference for transport-type is set here ;; (define *transport-type* 'http) ;; override with [server] transport http|rpc (define *runremote* (make-hash-table)) ;; if set up for server communication this will hold <host port> (define *max-cache-size* 0) (define *logged-in-clients* (make-hash-table)) (define *client-non-blocking-mode* #f) (define *server-id* #f) (define *server-info* #f) (define *time-to-exit* #f) |
︙ | ︙ |
Modified launch.scm from [af266dccde] to [7e23bea701].
︙ | ︙ | |||
1120 1121 1122 1123 1124 1125 1126 | (create-directory work-area #t) (debug:print 0 *default-log-port* "WARNING: No disk work area specified - running in the test directory under tmp_run"))) (set! cmdparms (base64:base64-encode (z3:encode-buffer (with-output-to-string (lambda () ;; (list 'hosts hosts) (write (list (list 'testpath test-path) | | | 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 | (create-directory work-area #t) (debug:print 0 *default-log-port* "WARNING: No disk work area specified - running in the test directory under tmp_run"))) (set! cmdparms (base64:base64-encode (z3:encode-buffer (with-output-to-string (lambda () ;; (list 'hosts hosts) (write (list (list 'testpath test-path) (list 'transport (conc (rmt:run-id->transport-type run-id))) ;; (list 'serverinf *server-info*) (list 'toppath *toppath*) (list 'work-area work-area) (list 'test-name test-name) (list 'runscript runscript) (list 'run-id run-id ) (list 'test-id test-id ) |
︙ | ︙ |
Modified rmt.scm from [fa9d72cfee] to [905d4e405c].
︙ | ︙ | |||
11 12 13 14 15 16 17 | (use json format) ;; RADT => purpose of json format?? (declare (unit rmt)) (declare (uses api)) (declare (uses tdb)) (declare (uses http-transport)) | | > | 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 | (use json format) ;; RADT => purpose of json format?? (declare (unit rmt)) (declare (uses api)) (declare (uses tdb)) (declare (uses http-transport)) (declare (uses rpc-transport)) ;; ;; THESE ARE ALL CALLED ON THE CLIENT SIDE!!! ;; ;; ;; For debugging add the following to ~/.megatestrc ;; ;; (require-library trace) |
︙ | ︙ | |||
66 67 68 69 70 71 72 73 74 75 76 77 78 | (if cinfo cinfo ;; NB// can cache the answer for server running for 10 seconds ... ;; ;; (and (not (rmt:write-frequency-over-limit? cmd run-id)) (if (tasks:server-running-or-starting? (db:delay-if-busy (tasks:open-db)) run-id) (client:setup run-id) #f)))) (define *send-receive-mutex* (make-mutex)) ;; should have separate mutex per run-id ;; RA => e.g. usage (rmt:send-receive 'get-var #f (list varname)) ;; (define (rmt:send-receive cmd rid params #!key (attemptnum 1)) ;; start attemptnum at 1 so the modulo below works as expected | > > > > > > > > > > > > < < > | > > > | | > | > | > | > > > > > > > | | > > > > > > | | | 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 | (if cinfo cinfo ;; NB// can cache the answer for server running for 10 seconds ... ;; ;; (and (not (rmt:write-frequency-over-limit? cmd run-id)) (if (tasks:server-running-or-starting? (db:delay-if-busy (tasks:open-db)) run-id) (client:setup run-id) #f)))) (define (rmt:run-id->transport-type rid) (let* ((run-id (if rid rid 0)) (connection-info (hash-table-ref/default *runremote* run-id #f))) ;; the nmsg method does the encoding under the hood (the http method should be changed to do this also) (if connection-info ;; if we already have a connection for this run-id, use that precendent ;; use the server if have connection info (let* ((transport-type (vector-ref connection-info 6))) ;; BB: assumes all transport-type'-servertdat vector's item 6 ids transport type transport-type) ;; otherwise pick the global default as preference. *transport-type*))) (define *send-receive-mutex* (make-mutex)) ;; should have separate mutex per run-id ;; RA => e.g. usage (rmt:send-receive 'get-var #f (list varname)) ;; (define (rmt:send-receive cmd rid params #!key (attemptnum 1)) ;; start attemptnum at 1 so the modulo below works as expected ;; side-effect: clean out old connections ;; (mutex-lock! *db-multi-sync-mutex*) (let ((expire-time (- (current-seconds) (server:get-timeout) 10))) ;; don't forget the 10 second margin (for-each (lambda (run-id) (let ((connection (hash-table-ref/default *runremote* run-id #f))) (if (and (vector? connection) (< (http-transport:server-dat-get-last-access connection) expire-time)) ;; BB> BBTODO: make this generic, not http transport specific. (begin (debug:print-info 0 *default-log-port* "Discarding connection to server for run-id " run-id ", too long between accesses") (hash-table-delete! *runremote* run-id))))) (hash-table-keys *runremote*))) ;; (mutex-unlock! *db-multi-sync-mutex*) ;; (mutex-lock! *send-receive-mutex*) (let* ((run-id (if rid rid 0)) (connection-info (rmt:get-connection-info run-id))) ;; the nmsg method does the encoding under the hood (the http method should be changed to do this also) (if connection-info ;; use the server if have connection info (let* ((transport-type (rmt:run-id->transport-type run-id)) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; Here, we make request to remote server ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (dat (case transport-type ;; BB: replaced *transport-type* global with run-id specific transport-type ((http)(condition-case (http-transport:client-api-send-receive run-id connection-info cmd params) ((commfail)(vector #f "communications fail")) ((exn)(vector #f "other fail")))) ;;((rpc) (rpc-transport:client-api-send-receive run-id connection-info cmd params)) ;; BB: let us error out for now (else (debug:print-error 0 *default-log-port* "(1) Transport [" transport-type "] specified for run-id [" run-id "] is not implemented in rmt:send-receive. Cannot proceed.") (vector #f (conc "transport ["transport-type"] unimplemented"))))) (success (if (vector? dat) (vector-ref dat 0) #f)) (res (if (vector? dat) (vector-ref dat 1) #f))) (if (vector? connection-info)(http-transport:server-dat-update-last-access connection-info)) ;; BB> BBTODO: make this generic, not http transport specific. (if success (begin ;; (mutex-unlock! *send-receive-mutex*) (case transport-type ((http rpc) res) ;; (db:string->obj res)) (else (debug:print-error 0 *default-log-port* "(2) Transport [" transport-type "] specified for run-id [" run-id "] is not implemented in rmt:send-receive. Cannot proceed. Also unexpected since this branch follows success which would follow a suported transport...") #f) ;; ((nmsg) res) )) ;; (vector-ref res 1))) (begin ;; let ((new-connection-info (client:setup run-id))) (debug:print 0 *default-log-port* "WARNING: Communication failed, trying call to rmt:send-receive again.") (case transport-type ((http) (hash-table-delete! *runremote* run-id) ;; don't keep using the same connection ;; NOTE: killing server causes this process to block forever. No idea why. Dec 2. ;; (if (eq? (modulo attemptnum 5) 0) ;; (tasks:kill-server-run-id run-id tag: "api-send-receive-failed")) ;; (mutex-unlock! *send-receive-mutex*) ;; close the mutex here to allow other threads access to communications (tasks:start-and-wait-for-server (tasks:open-db) run-id 15) ;; (nmsg-transport:client-api-send-receive run-id connection-info cmd param remtries: (- remtries 1)))))) ;; no longer killing the server in http-transport:client-api-send-receive ;; may kill it here but what are the criteria? ;; start with three calls then kill server ;; (if (eq? attemptnum 3)(tasks:kill-server-run-id run-id)) ;; (thread-sleep! 2) (rmt:send-receive cmd run-id params attemptnum: (+ attemptnum 1))) (else (debug:print-error 0 *default-log-port* "(3) Transport [" transport-type "] specified for run-id [" run-id "] is not implemented in rmt:send-receive. Cannot proceed.") #f))))) ;; no connection info; try to start a server, or access locally if no ;; server and the query is read-only ;; ;; Note: The tasks db was checked for a server in starting mode in the rmt:get-connection-info call ;; (if (and (< attemptnum 15) (member cmd api:write-queries)) (let* ((faststart (configf:lookup *configdat* "server" "faststart"))) (hash-table-delete! *runremote* run-id) ;; (mutex-unlock! *send-receive-mutex*) (if (and faststart (equal? faststart "no")) (begin (tasks:start-and-wait-for-server (db:delay-if-busy (tasks:open-db)) run-id 10) (thread-sleep! (random 5)) ;; give some time to settle and minimize collison? (rmt:send-receive cmd rid params attemptnum: (+ attemptnum 1))) |
︙ | ︙ | |||
313 314 315 316 317 318 319 | (define (rmt:login run-id) (rmt:send-receive 'login run-id (list *toppath* megatest-version run-id *my-client-signature*))) ;; This login does no retries under the hood - it acts a bit like a ping. ;; Deprecated for nmsg-transport. ;; (define (rmt:login-no-auto-client-setup connection-info run-id) | | | > > > | | 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 | (define (rmt:login run-id) (rmt:send-receive 'login run-id (list *toppath* megatest-version run-id *my-client-signature*))) ;; This login does no retries under the hood - it acts a bit like a ping. ;; Deprecated for nmsg-transport. ;; (define (rmt:login-no-auto-client-setup connection-info run-id) (case (rmt:run-id->transport-type run-id) ((http)(rmt:send-receive-no-auto-client-setup connection-info 'login run-id (list *toppath* megatest-version run-id *my-client-signature*))) (else (debug:print-error 0 *default-log-port* "(4) Transport [" *transport-type* "] specified for run-id [" run-id "] is not implemented in rmt:send-receive. Cannot proceed.") (exit 1)))) ;; hand off a call to one of the db:queries statements ;; added run-id to make looking up the correct db possible ;; (define (rmt:general-call stmtname run-id . params) (rmt:send-receive 'general-call run-id (append (list stmtname run-id) params))) |
︙ | ︙ |
Modified server.scm from [fc86462d4a] to [1d7dcc4237].
︙ | ︙ | |||
70 71 72 73 74 75 76 | (or (args:get-arg "-transport") (configf:lookup *configdat* "server" "transport") "http")))) (set! *transport-type* ttype) ttype)) ;; Get the transport -- DO NOT call this from client code. In client code, this is run-id sensitive and not a global | | | 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 | (or (args:get-arg "-transport") (configf:lookup *configdat* "server" "transport") "http")))) (set! *transport-type* ttype) ttype)) ;; Get the transport -- DO NOT call this from client code. In client code, this is run-id sensitive and not a global ;; For code communicating with existing run-id with a server, use: (rmt:run-id->transport-type run-id) (define (server:get-transport) (if *transport-type* *transport-type* (server:set-transport))) ;; Generate a unique signature for this server (define (server:mk-signature) |
︙ | ︙ | |||
192 193 194 195 196 197 198 | (trycount 0)) (if server ;; note: client:start will set *runremote*. this needs to be changed ;; also, client:start will login to the server, also need to change that. ;; ;; client:start returns #t if login was successful. ;; | > | | | | > | | 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 | (trycount 0)) (if server ;; note: client:start will set *runremote*. this needs to be changed ;; also, client:start will login to the server, also need to change that. ;; ;; client:start returns #t if login was successful. ;; (let* ((transport-type (rmt:run-id->transport-type run-id)) (res (case transport-type ((http)(server:ping-server run-id (tasks:hostinfo-get-interface server) (tasks:hostinfo-get-port server))) (else (debug:print-error 0 *default-log-port* "(5) Transport [" transport-type "] specified for run-id [" run-id "] is not implemented in rmt:send-receive. Cannot proceed.") (exit 1))))) ;; if the server didn't respond we must remove the record (if res #t (begin (debug:print-info 0 *default-log-port* "server at " server " not responding, removing record") (tasks:server-force-clean-running-records-for-run-id (db:delay-if-busy tdbdat) run-id " server:check-if-running") |
︙ | ︙ |
Modified tasks.scm from [afe54500ce] to [133d4feb50].
︙ | ︙ | |||
335 336 337 338 339 340 341 342 343 344 345 346 347 348 | ;; BB> bb opinion - want to push responsibility into api (encapsulation), like waiting if db is busy and finding the db handle in the first place. why should the caller need to be concerned?? If my opinion carries, we'll remove the bb- and make other needful adjustments. (define (bb-mdb-inserter mdb-expecting-proc mdbless-args) (let ((mdb (db:delay-if-busy (tasks:open-db)))) (apply mdb-expecting-proc (cons mdb mdbless-args)))) (define (tasks:bb-server-lock-slot . args) (bb-mdb-inserter tasks:server-lock-slot args)) (define (tasks:bb-server-set-interface-port . args) (bb-mdb-inserter tasks:server-set-interface-port args)) (define (tasks:bb-server-am-i-the-server? . args) | > > > | 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 | ;; BB> bb opinion - want to push responsibility into api (encapsulation), like waiting if db is busy and finding the db handle in the first place. why should the caller need to be concerned?? If my opinion carries, we'll remove the bb- and make other needful adjustments. (define (bb-mdb-inserter mdb-expecting-proc mdbless-args) (let ((mdb (db:delay-if-busy (tasks:open-db)))) (apply mdb-expecting-proc (cons mdb mdbless-args)))) (define (tasks:bb-server-force-clean-run-record . args) (bb-mdb-inserter tasks:server-force-clean-run-record args)) (define (tasks:bb-server-lock-slot . args) (bb-mdb-inserter tasks:server-lock-slot args)) (define (tasks:bb-server-set-interface-port . args) (bb-mdb-inserter tasks:server-set-interface-port args)) (define (tasks:bb-server-am-i-the-server? . args) |
︙ | ︙ |