Overview
Comment: | got rpc to work... at least one call from megatest -repl :) |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | rpc-transport |
Files: | files | file ages | folders |
SHA1: |
985c43c44cdbd283e6e22b4c014edfa1 |
User & Date: | bjbarcla on 2016-11-04 17:54:28 |
Other Links: | branch diff | manifest | tags |
Context
2016-11-04
| ||
18:52 | changed default transport to rpc check-in: 122f376d3c user: bjbarcla tags: rpc-transport | |
17:54 | got rpc to work... at least one call from megatest -repl :) check-in: 985c43c44c user: bjbarcla tags: rpc-transport | |
2016-11-03
| ||
21:31 | rpc-transport:client-setup implemented and tested check-in: 9102de0262 user: bjbarcla tags: rpc-transport | |
Changes
Modified common.scm from [20fc73cdb7] to [86bc64eb87].
︙ | ︙ | |||
78 79 80 81 82 83 84 | (define *db-access-mutex* (make-mutex)) ;; SERVER (define *my-client-signature* #f) ;; default preference for transport-type is set here ;; | | | 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 | (define *db-access-mutex* (make-mutex)) ;; SERVER (define *my-client-signature* #f) ;; default preference for transport-type is set here ;; (define *transport-type* 'rpc) ;; override with [server] transport http|rpc (define *runremote* (make-hash-table)) ;; if set up for server communication this will hold <host port> (define *max-cache-size* 0) (define *logged-in-clients* (make-hash-table)) (define *client-non-blocking-mode* #f) (define *server-id* #f) (define *server-info* #f) |
︙ | ︙ |
Modified rmt.scm from [905d4e405c] to [7835a76c1e].
︙ | ︙ | |||
77 78 79 80 81 82 83 | (connection-info (hash-table-ref/default *runremote* run-id #f))) ;; the nmsg method does the encoding under the hood (the http method should be changed to do this also) (if connection-info ;; if we already have a connection for this run-id, use that precendent ;; use the server if have connection info (let* ((transport-type (vector-ref connection-info 6))) ;; BB: assumes all transport-type'-servertdat vector's item 6 ids transport type transport-type) | | | 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 | (connection-info (hash-table-ref/default *runremote* run-id #f))) ;; the nmsg method does the encoding under the hood (the http method should be changed to do this also) (if connection-info ;; if we already have a connection for this run-id, use that precendent ;; use the server if have connection info (let* ((transport-type (vector-ref connection-info 6))) ;; BB: assumes all transport-type'-servertdat vector's item 6 ids transport type transport-type) ;; otherwise pick the global default as preference. (set in common.scm) *transport-type*))) (define *send-receive-mutex* (make-mutex)) ;; should have separate mutex per run-id ;; RA => e.g. usage (rmt:send-receive 'get-var #f (list varname)) ;; (define (rmt:send-receive cmd rid params #!key (attemptnum 1)) ;; start attemptnum at 1 so the modulo below works as expected |
︙ | ︙ | |||
115 116 117 118 119 120 121 | ;; Here, we make request to remote server ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (dat (case transport-type ;; BB: replaced *transport-type* global with run-id specific transport-type ((http)(condition-case (http-transport:client-api-send-receive run-id connection-info cmd params) ((commfail)(vector #f "communications fail")) ((exn)(vector #f "other fail")))) | | | < | | | 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 | ;; Here, we make request to remote server ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (dat (case transport-type ;; BB: replaced *transport-type* global with run-id specific transport-type ((http)(condition-case (http-transport:client-api-send-receive run-id connection-info cmd params) ((commfail)(vector #f "communications fail")) ((exn)(vector #f "other fail")))) ((rpc) (rpc-transport:client-api-send-receive run-id connection-info cmd params)) ;; BB: let us error out for now (else (debug:print-error 0 *default-log-port* "(1) Transport [" transport-type "] specified for run-id [" run-id "] is not implemented in rmt:send-receive. Cannot proceed." (symbol? transport-type)) (vector #f (conc "transport ["transport-type"] unimplemented"))))) (success (if (vector? dat) (vector-ref dat 0) #f)) (res (if (vector? dat) (vector-ref dat 1) #f))) (if (vector? connection-info)(http-transport:server-dat-update-last-access connection-info)) ;; BB> BBTODO: make this generic, not http transport specific. (if success (begin ;; (mutex-unlock! *send-receive-mutex*) (case transport-type ((http rpc) res) ;; (db:string->obj res)) (else (debug:print-error 0 *default-log-port* "(2) Transport [" transport-type "] specified for run-id [" run-id "] is not implemented in rmt:send-receive. Cannot proceed. Also unexpected since this branch follows success which would follow a suported transport...") #f) )) ;; (vector-ref res 1))) ;; no success... (begin ;; let ((new-connection-info (client:setup run-id))) (debug:print 0 *default-log-port* "WARNING: Communication failed, trying call to rmt:send-receive again.") (case transport-type ((http rpc) (hash-table-delete! *runremote* run-id) ;; don't keep using the same connection ;; NOTE: killing server causes this process to block forever. No idea why. Dec 2. ;; (if (eq? (modulo attemptnum 5) 0) ;; (tasks:kill-server-run-id run-id tag: "api-send-receive-failed")) ;; (mutex-unlock! *send-receive-mutex*) ;; close the mutex here to allow other threads access to communications (tasks:start-and-wait-for-server (tasks:open-db) run-id 15) ;; (nmsg-transport:client-api-send-receive run-id connection-info cmd param remtries: (- remtries 1)))))) |
︙ | ︙ |
Modified rpc-transport.scm from [0b43c472fb] to [73278da0e8].
︙ | ︙ | |||
25 26 27 28 29 30 31 32 | (include "db_records.scm") (define *heartbeat-mutex* (make-mutex)) (define *server-loop-heart-beat* (current-seconds)) ;; procstr is the name of the procedure to be called as a string (define (rpc-transport:autoremote procstr params) | > > > > > > > > > > > > > > > > > > > > > > | | | | | | | | > > > > > > > > > > > | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | > > > > > > > | | > > > | > | > > | > > > > > > > > > | > > > > > > > > > | | > > > > > > > > > > > > > > > > > | 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 | (include "db_records.scm") (define *heartbeat-mutex* (make-mutex)) (define *server-loop-heart-beat* (current-seconds)) ;; procstr is the name of the procedure to be called as a string (define (rpc-transport:autoremote procstr params) (print "BB> rpc-transport:autoremote entered with procstr="procstr" params="params" string?"(string? procstr)" symbol?"(symbol? procstr)" list?"(list? params) ) (let* ((procsym (if (symbol? procstr) procstr (string->symbol (->string procstr)))) (res (begin (print "BB>before apply") (apply (eval procsym) params)))) (print "BB> after apply; rpc-transport res="res) res )) ;; rpc receiver (define (rpc-transport:api-exec cmd params) (BB> "rpc-transport:api-exec cmd="cmd" params="params" inmemdb="*inmemdb*) (let* ( (resdat (api:execute-requests *inmemdb* (vector cmd params))) ;; #( flag result ) (flag (vector-ref resdat 0)) (res (vector-ref resdat 1))) (BB> "rpc-transport:api-exec flag="flag" res="res) res)) ;; (handle-exceptions ;; exn ;; (begin ;; (debug:print 0 *default-log-port* "Remote failed for " proc " " params " exn="exn) ;; (apply (eval (string->symbol procstr)) params)) ;; ;; (if *runremote* ;; ;; (apply (eval (string->symbol (conc "remote:" procstr))) params) ;; (apply (eval (string->symbol procstr)) params))) ;; retry an operation (depends on srfi-18) ;; ================== ;; idea here is to avoid spending time on coding retrying something. Trying to be generic here. ;; ;; Exception handling: ;; ------------------- ;; if evaluating the thunk results in exception, it will be retried. ;; on last try, if final-failure-returns-actual is true, the exception will be re-thrown to caller. ;; ;; look at options below #!key to see how to configure behavior ;; ;; (define (retry-thunk the-thunk #!key ;;;; options below (accept-result? (lambda (x) x)) ;; retry if predicate applied to thunk's result is false (retries 4) ;; how many tries (failure-value #f) ;; return this on final failure, unless following option is enabled: (final-failure-returns-actual #f) ;; on failure, on the last try, just return the result, not failure-value (retry-delay 0.1) ;; delay between tries (back-off-factor 1) ;; multiply retry-delay by this factor on retry (random-delay 0.1) ;; add a random portion of this value to wait (chatty #f) ;; print status as we go, for debugging. ) (when chatty (print) (print "Entered retry-thunk") (print "-=-=-=-=-=-")) (let* ((guarded-thunk ;; we are guarding the thunk against exceptions. We will record whether result of evaluation is an exception or a regular result. (lambda () (let* ((EXCEPTION (gensym)) ;; using gensym to avoid potential collision (res (condition-case (the-thunk) ;; this is what we are guarding the execution of [x () (cons EXCEPTION x)] ))) (cond ((and (pair? res) (eq? (car res) EXCEPTION)) (if chatty (print " - the-thunk threw exception >"(cdr res)"<")) (cons 'exception (cdr res))) (else (if chatty (print " - the-thunk returned result >"res"<")) (cons 'regular-result res))))))) (let loop ((guarded-res (guarded-thunk)) (retries-left retries) (fail-wait retry-delay)) (if chatty (print " ==========")) (let* ((wait-time (+ fail-wait (+ (* fail-wait back-off-factor) (* random-delay (/ (random 1024) 1024) )))) (res-type (car guarded-res)) (res-value (cdr guarded-res))) (cond ((and (eq? res-type 'regular-result) (accept-result? res-value)) (if chatty (print " + return result that satisfied accept-result? >"res-value"<")) res-value) ((> retries-left 0) (if chatty (print " - sleep "wait-time)) (thread-sleep! wait-time) (if chatty (print " + retry ["retries-left" tries left]")) (loop (guarded-thunk) (sub1 retries-left) wait-time)) ((eq? res-type 'regular-result) (if final-failure-returns-actual (begin (if chatty (print " + last try failed- return the result >"res-value"<")) res-value) (begin (if chatty (print " + last try failed- return canned failure value >"failure-value"<")) failure-value))) (else ;; no retries left; result was not accepted and res-type can only be 'exception (if final-failure-returns-actual (begin (if chatty (print " + last try failed with exception- re-throw it >"res-value"<")) (abort res-value)); re-raise the exception. TODO: find a way for call-history to show as though from entry to this function (begin (if chatty (print " + last try failed with exception- return canned failure value >"failure-value"<")) failure-value)))))))) (define (rpc-transport:server-shutdown server-id rpc:listener #!key (from-on-exit #f)) (BB> "rpc-transport:server-shutdown entered.") (on-exit (lambda () #t)) ;; turn off on-exit stuff ;;(tcp-close rpc:listener) ;; gotta exit nicely ;;(tasks:bb-server-set-state! server-id "stopped") ;; TODO: (low) the following is extraordinaritly slow. Maybe we don't even need portlogger for rpc anyway?? the exception-based failover when ports are taken is fast! ;;(BB> "before plog rel") ;;(portlogger:open-run-close portlogger:set-port (rpc:default-server-port) "released") (set! *time-to-exit* #t) (BB> "before db:sync-touched") (if *inmemdb* (db:sync-touched *inmemdb* *run-id* force-sync: #t)) (BB> "before bb-server-delete-record") (tasks:bb-server-delete-record server-id " rpc-transport:keep-running complete") (BB> "Before (exit) (from-on-exit="from-on-exit")") (unless from-on-exit (exit)) ;; sometimes we hang (around) here with 100% cpu. (BB> "After") ;; strace reveals endless: ;; getrusage(RUSAGE_SELF, {ru_utime={413, 917868}, ru_stime={0, 60003}, ...}) = 0 ;; getrusage(RUSAGE_SELF, {ru_utime={414, 9874}, ru_stime={0, 60003}, ...}) = 0 ;; getrusage(RUSAGE_SELF, {ru_utime={414, 13874}, ru_stime={0, 60003}, ...}) = 0 ;; getrusage(RUSAGE_SELF, {ru_utime={414, 105880}, ru_stime={0, 60003}, ...}) = 0 ;; getrusage(RUSAGE_SELF, {ru_utime={414, 109880}, ru_stime={0, 60003}, ...}) = 0 ;; getrusage(RUSAGE_SELF, {ru_utime={414, 201886}, ru_stime={0, 60003}, ...}) = 0 ;; getrusage(RUSAGE_SELF, {ru_utime={414, 205886}, ru_stime={0, 60003}, ...}) = 0 ;; getrusage(RUSAGE_SELF, {ru_utime={414, 297892}, ru_stime={0, 60003}, ...}) = 0 ;; getrusage(RUSAGE_SELF, {ru_utime={414, 301892}, ru_stime={0, 60003}, ...}) = 0 ;; getrusage(RUSAGE_SELF, {ru_utime={414, 393898}, ru_stime={0, 60003}, ...}) = 0 ;; getrusage(RUSAGE_SELF, {ru_utime={414, 397898}, ru_stime={0, 60003}, ...}) = 0 ;; make a post to chicken-users w/ http://paste.call-cc.org/paste?id=60a4b66a29ccf7d11359ea866db642c970735978 (if from-on-exit ;; avoid above condition! End current process externally since 1 in 20 (exit)'s result in hung, 100% cpu zombies. (see above) (system (conc "kill -9 "(current-process-id)))) ) ;; all routes though here end in exit ... ;; ;; start_server? ;; |
︙ | ︙ | |||
91 92 93 94 95 96 97 98 99 100 101 102 103 104 | (debug:print 0 *default-log-port* "INFO: Server for run-id " run-id " already running") (exit 0)) ;; let's get a server-id for this server ;; if at first we do not suceed, try 3 more times. (let ((server-id (retry-thunk (lambda () (tasks:bb-server-lock-slot run-id 'rpc)) retries: 4))) (when (not server-id) ;; dang we couldn't get a server-id. ;; since we didn't get the server lock we are going to clean up and bail out (debug:print-info 2 *default-log-port* "INFO: server pid=" (current-process-id) ", hostname=" (get-host-name) " not starting due to other candidates ahead in start queue") (tasks:bb-server-delete-records-for-this-pid " rpc-transport:launch") (exit 1)) | > | 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 | (debug:print 0 *default-log-port* "INFO: Server for run-id " run-id " already running") (exit 0)) ;; let's get a server-id for this server ;; if at first we do not suceed, try 3 more times. (let ((server-id (retry-thunk (lambda () (tasks:bb-server-lock-slot run-id 'rpc)) chatty: #t retries: 4))) (when (not server-id) ;; dang we couldn't get a server-id. ;; since we didn't get the server lock we are going to clean up and bail out (debug:print-info 2 *default-log-port* "INFO: server pid=" (current-process-id) ", hostname=" (get-host-name) " not starting due to other candidates ahead in start queue") (tasks:bb-server-delete-records-for-this-pid " rpc-transport:launch") (exit 1)) |
︙ | ︙ | |||
112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 | server-id) (exit))) (define *rpc-listener-port* #f) (define *rpc-listener-port-bind-timestamp* #f) (define *on-exit-flag #f) (define (rpc-transport:run hostn run-id server-id) (BB> "rpc-transport:run fired for hostn="hostn" run-id="run-id" server-id="server-id) (debug:print 2 *default-log-port* "Attempting to start the rpc server ...") ;; (trace rpc:publish-procedure!) ;;====================================================================== ;; start of publish-procedure section ;;====================================================================== (rpc:publish-procedure! 'server:login server:login) ;; this allows client to validate it is the same megatest instance as the server. No security here, just making sure we're in the right room. (BB> "published 'testing") (rpc:publish-procedure! 'testing (lambda () (BB> "Current-peer=["(rpc:current-peer)"]") (BB> "published rpc proc 'testing was invoked") "Just testing")) ;; procedure to receive arbitrary API request from client's rpc:send-receive/rpc-transport:client-api-send-receive (rpc:publish-procedure! 'rpc-transport:autoremote rpc-transport:autoremote) ;; can use this to run most anything at the remote | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | < < < < < | 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 | server-id) (exit))) (define *rpc-listener-port* #f) (define *rpc-listener-port-bind-timestamp* #f) (define *on-exit-flag #f) (define (rpc-transport:server-dat-get-iface vec) (vector-ref vec 0)) (define (rpc-transport:server-dat-get-port vec) (vector-ref vec 1)) (define (rpc-transport:server-dat-get-last-access vec) (vector-ref vec 5)) (define (rpc-transport:server-dat-get-transport vec) (vector-ref vec 6)) (define (rpc-transport:server-dat-update-last-access vec) (if (vector? vec) (vector-set! vec 5 (current-seconds)) (begin (print-call-chain (current-error-port)) (debug:print-error 0 *default-log-port* "call to rpc-transport:server-dat-update-last-access with non-vector!!")))) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; this client-side procedure makes rpc call to server and returns result ;; (define (rpc-transport:client-api-send-receive run-id serverdat cmd params #!key (numretries 3)) (let* ((iface (rpc-transport:server-dat-get-iface serverdat)) (port (rpc-transport:server-dat-get-port serverdat)) (res #f) (run-remote (rpc:procedure 'rpc-transport:autoremote iface port)) (api-exec (rpc:procedure 'api-exec iface port)) (send-receive (lambda () (tcp-buffer-size 0) (BB> "Entered SR run-id="run-id" cmd="cmd" params="params" iface="iface" port="port) (set! res (retry-thunk (lambda () (condition-case ;;(vector #t (run-remote cmd params)) (vector 'success (api-exec cmd params)) [x (exn i/o net) (vector 'comms-fail (conc "communications fail ["(->string x)"]") x)] [x () (vector 'other-fail "other fail ["(->string x)"]" x)])) chatty: #t accept-result?: (lambda(x) (and (vector? x) (vector-ref x 0))) retries: 4 back-off-factor: 1.5 random-wait: 0.2 retry-delay: 0.1 final-failure-returns-actual: #t)) (BB> "Leaving SR w/ "res) res )) (th1 (make-thread send-receive "send-receive")) (time-out-reached #f) (time-out (lambda () (thread-sleep! 45) (set! time-out-reached #t) (thread-terminate! th1) #f)) (th2 (make-thread time-out "time out"))) (thread-start! th1) (thread-start! th2) (thread-join! th1) (thread-terminate! th2) (debug:print-info 11 *default-log-port* "got res=" res) (if (vector? res) (case (vector-ref res 0) ((success) (vector #t (vector-ref res 1))) ((comms-fail) (debug:print 0 *default-log-port* "WARNING: comms failure for rpc request") ;;(debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) (vector #f (vector-ref res 1))) (else (BB> "res="res) (debug:print-error 0 *default-log-port* "error occured at server, info=" (vector-ref res 1)) (debug:print 0 *default-log-port* " client call chain:") (print-call-chain (current-error-port)) (debug:print 0 *default-log-port* " server call chain:") (pp (vector-ref res 1) (current-error-port)) (signal (vector-ref res 2)))) (signal (make-composite-condition (make-property-condition 'timeout 'message "nmsg-transport:client-api-send-receive-raw timed out talking to server")))))) (define (rpc-transport:run hostn run-id server-id) (BB> "rpc-transport:run fired for hostn="hostn" run-id="run-id" server-id="server-id) (debug:print 2 *default-log-port* "Attempting to start the rpc server ...") ;; (trace rpc:publish-procedure!) ;;====================================================================== ;; start of publish-procedure section ;;====================================================================== (rpc:publish-procedure! 'server:login server:login) ;; this allows client to validate it is the same megatest instance as the server. No security here, just making sure we're in the right room. (BB> "published 'testing") (rpc:publish-procedure! 'testing (lambda () (BB> "Current-peer=["(rpc:current-peer)"]") (BB> "published rpc proc 'testing was invoked") "Just testing")) ;; procedure to receive arbitrary API request from client's rpc:send-receive/rpc-transport:client-api-send-receive (rpc:publish-procedure! 'rpc-transport:autoremote rpc-transport:autoremote) ;; can use this to run most anything at the remote (rpc:publish-procedure! 'api-exec rpc-transport:api-exec) ;;====================================================================== ;; end of publish-procedure section ;;====================================================================== (let* ((db #f) (hostname (let ((res (get-host-name))) (BB> "hostname="res) res)) (server-start-time (current-seconds)) (server-timeout (server:get-timeout)) (ipaddrstr (let* ((ipstr (if (string=? "-" hostn) ;; (string-intersperse (map number->string (u8vector->list (hostname->ip hostname))) ".") (server:get-best-guess-address hostname) |
︙ | ︙ |