Index: client.scm ================================================================== --- client.scm +++ client.scm @@ -69,11 +69,11 @@ (begin (let ((num-available (tasks:bb-num-in-available-state run-id))) (debug:print-info 0 *default-log-port* "client:setup, no server registered, remaining-tries=" remaining-tries " num-available=" num-available) (if (< num-available 2) (server:try-running run-id)) - (thread-sleep! (+ 5 (random (- 20 remaining-tries)))) ;; give server a little time to start up, randomize a little to avoid start storms. + (thread-sleep! (+ 2 (random (- 20 remaining-tries)))) ;; give server a little time to start up, randomize a little to avoid start storms. (client:setup run-id remaining-tries: (- remaining-tries 1)))))) ((http) (client:setup-http run-id server-dat remaining-tries)) ((rpc) (rpc-transport:client-setup run-id server-dat remtries: remaining-tries)) (else (debug:print-error 0 *default-log-port* "(6) Transport [" @@ -111,148 +111,5 @@ (server:try-running run-id) (thread-sleep! 5) ;; give server a little time to start up (client:setup run-id remaining-tries: (- remaining-tries 1)) )))) -;; ((rpc) (rpc-transport:client-setup run-id remaining-tries: remaining-tries failed-connects: failed-connects)) ;;(client:setup-rpc run-id)) -;; ((http)(client:setup-http run-id remaining-tries: remaining-tries failed-connects: failed-connects)) -;; (else (rpc-transport:client-setup run-id remaining-tries: remaining-tries failed-connects: failed-connects)))) ;; (client:setup-rpc run-id)))) -;; -;; (define (client:login-no-auto-setup server-info run-id) -;; (case (server:get-transport) -;; ((rpc) (rpc:login-no-auto-client-setup server-info run-id)) -;; ((http) (rmt:login-no-auto-client-setup server-info run-id)) -;; (else (rpc:login-no-auto-client-setup server-info run-id)))) -;; -;; (define (client:setup-rpc run-id) -;; (debug:print 0 *default-log-port* "INFO: client:setup remaining-tries=" remaining-tries) -;; (if (<= remaining-tries 0) -;; (begin -;; (debug:print-error 0 *default-log-port* "failed to start or connect to server for run-id " run-id) -;; (exit 1)) -;; (let ((host-info (hash-table-ref/default *runremote* run-id #f))) -;; (debug:print-info 0 *default-log-port* "client:setup host-info=" host-info ", remaining-tries=" remaining-tries) -;; (if host-info -;; (let* ((iface (car host-info)) -;; (port (cadr host-info)) -;; (start-res (client:connect iface port)) -;; ;; (ping-res (server:ping-server run-id iface port)) -;; (ping-res (client:login-no-auto-setup start-res run-id))) -;; (if ping-res ;; sucessful login? -;; (begin -;; (hash-table-set! *runremote* run-id start-res) -;; start-res) ;; return the server info -;; (if (member remaining-tries '(3 4 6)) -;; (begin ;; login failed -;; (debug:print 25 *default-log-port* "INFO: client:setup start-res=" start-res ", run-id=" run-id ", server-dat=" host-info) -;; (hash-table-delete! *runremote* run-id) -;; (open-run-close tasks:server-force-clean-run-record -;; tasks:open-db -;; run-id -;; (car host-info) -;; (cadr host-info) -;; " client:setup (host-info=#t)") -;; (thread-sleep! 5) -;; (client:setup run-id remaining-tries: 10)) ;; (- remaining-tries 1))) -;; (begin -;; (debug:print 25 *default-log-port* "INFO: client:setup failed to connect, start-res=" start-res ", run-id=" run-id ", host-info=" host-info) -;; (thread-sleep! 5) -;; (client:setup run-id remaining-tries: (- remaining-tries 1)))))) -;; ;; YUK: rename server-dat here -;; (let* ((server-dat (open-run-close tasks:get-server-info tasks:open-db run-id))) -;; (debug:print-info 0 *default-log-port* "client:setup server-dat=" server-dat ", remaining-tries=" remaining-tries) -;; (if server-dat -;; (let* ((iface (tasks:hostinfo-get-interface server-dat)) -;; (port (tasks:hostinfo-get-port server-dat)) -;; (start-res (http-transport:client-connect iface port)) -;; ;; (ping-res (server:ping-server run-id iface port)) -;; (ping-res (rmt:login-no-auto-client-setup start-res run-id))) -;; (if start-res -;; (begin -;; (hash-table-set! *runremote* run-id start-res) -;; start-res) -;; (if (member remaining-tries '(2 5)) -;; (begin ;; login failed -;; (debug:print 25 *default-log-port* "INFO: client:setup start-res=" start-res ", run-id=" run-id ", server-dat=" server-dat) -;; (hash-table-delete! *runremote* run-id) -;; (open-run-close tasks:server-force-clean-run-record -;; tasks:open-db -;; run-id -;; (tasks:hostinfo-get-interface server-dat) -;; (tasks:hostinfo-get-port server-dat) -;; " client:setup (server-dat = #t)") -;; (thread-sleep! 2) -;; (server:try-running run-id) -;; (thread-sleep! 10) ;; give server a little time to start up -;; (client:setup run-id remaining-tries: 10)) ;; (- remaining-tries 1))) -;; (begin -;; (debug:print 25 *default-log-port* "INFO: client:setup start-res=" start-res ", run-id=" run-id ", server-dat=" server-dat) -;; (thread-sleep! 5) -;; (client:setup run-id remaining-tries: (- remaining-tries 1)))))) -;; (begin ;; no server registered -;; (if (eq? remaining-tries 2) -;; (begin -;; ;; (open-run-close tasks:server-clean-out-old-records-for-run-id tasks:open-db run-id " client:setup (server-dat=#f)") -;; (client:setup run-id remaining-tries: 10)) -;; (begin -;; (thread-sleep! 2) -;; (debug:print 25 *default-log-port* "INFO: client:setup start-res (not defined here), run-id=" run-id ", server-dat=" server-dat) -;; (if (< (open-run-close tasks:num-in-available-state tasks:open-db run-id) 3) -;; (begin -;; ;; (open-run-close tasks:server-clean-out-old-records-for-run-id tasks:open-db run-id " client:setup (server-dat=#f)") -;; (server:try-running run-id))) -;; (thread-sleep! 10) ;; give server a little time to start up -;; (client:setup run-id remaining-tries: (- remaining-tries 1))))))))))) - -;; Do all the connection work, look up the transport type and set up the -;; connection if required. -;; -;; There are two scenarios. -;; 1. We are a test manager and we received *transport-type* and *runremote* via cmdline -;; 2. We are a run tests, list runs or other interactive process and we must figure out -;; *transport-type* and *runremote* from the monitor.db -;; -;; client:setup -;; -;; lookup_server, need to remove *runremote* stuff -;; - -;; BB: commenting out orphan code. -;; -;; ;; keep this as a function to ease future -;; (define (client:start run-id server-info) -;; (http-transport:client-connect (tasks:hostinfo-get-interface server-info) -;; (tasks:hostinfo-get-port server-info))) - -;; ;; client:signal-handler -;; (define (client:signal-handler signum) -;; (signal-mask! signum) -;; (set! *time-to-exit* #t) -;; (handle-exceptions -;; exn -;; (debug:print 0 *default-log-port* " ... exiting ...") -;; (let ((th1 (make-thread (lambda () -;; "") ;; do nothing for now (was flush out last call if applicable) -;; "eat response")) -;; (th2 (make-thread (lambda () -;; (debug:print-error 0 *default-log-port* "Received ^C, attempting clean exit. Please be patient and wait a few seconds before hitting ^C again.") -;; (thread-sleep! 1) ;; give the flush one second to do it's stuff -;; (debug:print 0 *default-log-port* " Done.") -;; (exit 4)) -;; "exit on ^C timer"))) -;; (thread-start! th2) -;; (thread-start! th1) -;; (thread-join! th2)))) -;; -;; ;; client:launch -;; ;; Need to set the signal handler somewhere other than here as this -;; ;; routine will go away. -;; ;; -;; (define (client:launch run-id) -;; (set-signal-handler! signal/int client:signal-handler) -;; (set-signal-handler! signal/term client:signal-handler) -;; (if (client:setup run-id) -;; (debug:print-info 2 *default-log-port* "connected as client") -;; (begin -;; (debug:print-error 0 *default-log-port* "Failed to connect as client") -;; (exit)))) -;; Index: rpc-transport.scm ================================================================== --- rpc-transport.scm +++ rpc-transport.scm @@ -241,18 +241,22 @@ (print-call-chain (current-error-port)) (debug:print-error 0 *default-log-port* "call to rpc-transport:server-dat-update-last-access with non-vector!!")))) (define *api-exec-ht* (make-hash-table)) - +(define *api-exec-mutex* (make-mutex)) ;; let's see if caching the rpc stub curbs thread-profusion on server side (define (rpc-transport:get-api-exec iface port) + (mutex-lock! *api-exec-mutex*) (let* ((lu (hash-table-ref/default *api-exec-ht* (cons iface port) #f))) (if lu - lu + (begin + (mutex-unlock! *api-exec-mutex*) + lu) (let ((res (rpc:procedure 'api-exec iface port))) (hash-table-set! *api-exec-ht* (cons iface port) res) + (mutex-unlock! *api-exec-mutex*) res)))) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; this client-side procedure makes rpc call to server and returns result ;; @@ -277,11 +281,11 @@ [x (exn i/o net) (vector 'comms-fail (conc "communications fail ["(->string x)"]") x)] [x () (vector 'other-fail "other fail ["(->string x)"]" x)])) chatty: #f accept-result?: (lambda(x) (and (vector? x) (vector-ref x 0))) - retries: 4 + retries: 8 back-off-factor: 1.5 random-wait: 0.2 retry-delay: 0.1 final-failure-returns-actual: #t)) ;;(BB> "HEY res="res) @@ -303,11 +307,13 @@ ;;(BB> "alt got res="res) (debug:print-info 11 *default-log-port* "got res=" res) (if (vector? res) (case (vector-ref res 0) ((success) (vector #t (vector-ref res 1))) - ((comms-fail) + ( + (comms-fail other-fail) + ;;(comms-fail) (debug:print 0 *default-log-port* "WARNING: comms failure for rpc request >>"res"<<") ;;(debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) (vector #f (vector-ref res 1))) (else (debug:print-error 0 *default-log-port* "error occured at server, info=" (vector-ref res 1))