Overview
Comment: | Cleaned up client:start |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | try-nanomsg |
Files: | files | file ages | folders |
SHA1: |
f64f2f8ca80622ad2f123a7a1e5fc9ab |
User & Date: | matt on 2014-11-26 22:49:33 |
Other Links: | branch diff | manifest | tags |
Context
2014-11-26
| ||
23:09 | nanomsg transport fixed. check-in: 675b89e392 user: matt tags: try-nanomsg | |
22:49 | Cleaned up client:start check-in: f64f2f8ca8 user: matt tags: try-nanomsg | |
21:29 | Add timeout on all remote calls check-in: 245a3a0b6d user: matt tags: try-nanomsg | |
Changes
Modified client.scm from [ec1ead835f] to [0a4fff7154].
︙ | ︙ | |||
51 52 53 54 55 56 57 | ;; 2. We are a run tests, list runs or other interactive process and we must figure out ;; *transport-type* and *runremote* from the monitor.db ;; ;; client:setup ;; ;; lookup_server, need to remove *runremote* stuff ;; | | < < < < < < < < < < < < < < < < < < < < < < < < < < < | | | | | | | | | | | | > > > | | | | | | | | | | | | | | | | < | | | | | | < | < | | | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 | ;; 2. We are a run tests, list runs or other interactive process and we must figure out ;; *transport-type* and *runremote* from the monitor.db ;; ;; client:setup ;; ;; lookup_server, need to remove *runremote* stuff ;; (define (client:setup run-id #!key (remaining-tries 2) (failed-connects 0)) (debug:print-info 2 "client:setup remaining-tries=" remaining-tries) (let* ((tdbdat (tasks:open-db))) (if (<= remaining-tries 0) (begin (debug:print 0 "ERROR: failed to start or connect to server for run-id " run-id) (exit 1)) (let* ((server-dat (tasks:get-server (db:delay-if-busy tdbdat) run-id))) (debug:print-info 4 "client:setup server-dat=" server-dat ", remaining-tries=" remaining-tries) (if server-dat (let* ((iface (tasks:hostinfo-get-interface server-dat)) (hostname (tasks:hostinfo-get-hostname server-dat)) (port (tasks:hostinfo-get-port server-dat)) (start-res (case *transport-type* ((http)(http-transport:client-connect iface port)) ((nmsg)(nmsg-transport:client-connect hostname port)))) (ping-res (case *transport-type* ((http)(rmt:login-no-auto-client-setup start-res run-id)) ((nmsg)(let ((logininfo (rmt:login-no-auto-client-setup start-res run-id))) (if logininfo (vector-ref (vector-ref logininfo 1) 1) #f)))))) (if (and start-res ping-res) (begin (hash-table-set! *runremote* run-id start-res) (debug:print-info 2 "connected to " (http-transport:server-dat-make-url start-res)) start-res) (begin ;; login failed but have a server record, clean out the record and try again (debug:print-info 0 "client:setup, login failed, will attempt to start server ... start-res=" start-res ", run-id=" run-id ", server-dat=" server-dat) (case *transport-type* ((http)(http-transport:close-connections run-id))) (hash-table-delete! *runremote* run-id) (tasks:server-force-clean-run-record (db:delay-if-busy tdbdat) run-id (tasks:hostinfo-get-interface server-dat) (tasks:hostinfo-get-port server-dat) " client:setup (server-dat = #t)") (server:try-running run-id) (thread-sleep! 5) ;; give server a little time to start up (client:setup run-id remaining-tries: (- remaining-tries 1))))) (begin ;; no server registered (let ((num-available (tasks:num-in-available-state (db:dbdat-get-db tdbdat) run-id))) (debug:print-info 0 "client:setup, no server registered, remaining-tries=" remaining-tries " num-available=" num-available) (if (< num-available 2) (server:try-running run-id)) (thread-sleep! 5) ;; give server a little time to start up (client:setup run-id remaining-tries: (- remaining-tries 1))))))))) ;; (let ((host-info (hash-table-ref/default *runremote* run-id #f))) ;; (if host-info ;; this is a bit circular. the host-info *is* the start-res FIXME ;; (let* ((iface (http-transport:server-dat-get-iface host-info)) ;; (port (http-transport:server-dat-get-port host-info)) ;; (start-res (case *transport-type* ;; ((http)(http-transport:client-connect iface port)) ;; ((nmsg)(nmsg-transport:client-connect iface port)) ;; (http-transport:server-dat-get-socket host-info)) ;; (else #f))) ;; (ping-res (case *transport-type* ;; ((http)(rmt:login-no-auto-client-setup start-res run-id)) ;; ((nmsg)(let ((logininfo (rmt:login-no-auto-client-setup start-res run-id))) ;; (if logininfo ;; (vector-ref (vector-ref logininfo 1) 1) ;; #f))) ;; (else #f)))) ;; (if ping-res ;; sucessful login? ;; (begin ;; (debug:print-info 2 "client:setup, ping is good using host-info=" host-info ", remaining-tries=" remaining-tries) ;; start-res) ;; return the server info ;; ;; have host info but no ping. shutdown the current connection and try again ;; (begin ;; login failed ;; (debug:print-info 1 "client:setup, ping is bad for start-res=" start-res " and *runremote*=" host-info) ;; (case *transport-type* ;; ((http)(http-transport:close-connections run-id))) ;; (hash-table-delete! *runremote* run-id) ;; (if (< remaining-tries 8) ;; (thread-sleep! 5) ;; (thread-sleep! 1)) ;; (client:setup run-id remaining-tries: (- remaining-tries 1))))) ;; ;; YUK: rename server-dat here ;; ;; keep this as a function to ease future (define (client:start run-id server-info) (http-transport:client-connect (tasks:hostinfo-get-interface server-info) (tasks:hostinfo-get-port server-info))) ;; client:signal-handler |
︙ | ︙ |
Modified rmt.scm from [ac5c069146] to [e68219c025].
︙ | ︙ | |||
93 94 95 96 97 98 99 | (else (exit)))) (success (if (and dat (vector? dat)) (vector-ref dat 0) #f)) (res (if (and dat (vector? dat)) (vector-ref dat 1) #f))) (http-transport:server-dat-update-last-access connection-info) (if success (case *transport-type* ((http)(db:string->obj res)) | | | | > > | | 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 | (else (exit)))) (success (if (and dat (vector? dat)) (vector-ref dat 0) #f)) (res (if (and dat (vector? dat)) (vector-ref dat 1) #f))) (http-transport:server-dat-update-last-access connection-info) (if success (case *transport-type* ((http)(db:string->obj res)) ((nmsg)(vector-ref res 1))) (begin ;; let ((new-connection-info (client:setup run-id))) (debug:print 0 "WARNING: Communication failed, trying call to http-transport:client-api-send-receive again.") ;; (case *transport-type* ;; ((nmsg)(nn-close (http-transport:server-dat-get-socket connection-info)))) (hash-table-delete! *runremote* run-id) ;; don't keep using the same connection (tasks:kill-server-run-id run-id tag: "api-send-receive-failed") (tasks:start-and-wait-for-server (tasks:open-db) run-id 15) ;; (nmsg-transport:client-api-send-receive run-id connection-info cmd param remtries: (- remtries 1)))))) ;; no longer killing the server in http-transport:client-api-send-receive ;; may kill it here but what are the criteria? ;; start with three calls then kill server ;; (if (eq? attemptnum 3)(tasks:kill-server-run-id run-id)) ;; (thread-sleep! 2) (rmt:send-receive cmd run-id params attemptnum: (+ attemptnum 1)))))) (if (and (< attemptnum 10) (tasks:need-server run-id)) (begin (tasks:start-and-wait-for-server (db:delay-if-busy (tasks:open-db)) run-id 10) (hash-table-delete! *runremote* run-id) (client:setup run-id) (rmt:send-receive cmd rid params attemptnum: (+ attemptnum 1))) (rmt:open-qry-close-locally cmd run-id params)))) (define (rmt:update-db-stats run-id rawcmd params duration) (mutex-lock! *db-stats-mutex*) (handle-exceptions exn (begin |
︙ | ︙ | |||
244 245 246 247 248 249 250 | (define (rmt:login run-id) (rmt:send-receive 'login run-id (list *toppath* megatest-version run-id *my-client-signature*))) ;; This login does no retries under the hood - it acts a bit like a ping. ;; (define (rmt:login-no-auto-client-setup connection-info run-id) | > | > | | 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 | (define (rmt:login run-id) (rmt:send-receive 'login run-id (list *toppath* megatest-version run-id *my-client-signature*))) ;; This login does no retries under the hood - it acts a bit like a ping. ;; (define (rmt:login-no-auto-client-setup connection-info run-id) (case *transport-type* ((http)(rmt:send-receive-no-auto-client-setup connection-info 'login run-id (list *toppath* megatest-version run-id *my-client-signature*))) ((nmsg)(nmsg-transport:client-api-send-receive run-id connection-info 'login (list *toppath* megatest-version run-id *my-client-signature*))))) ;; hand off a call to one of the db:queries statements ;; added run-id to make looking up the correct db possible ;; (define (rmt:general-call stmtname run-id . params) (rmt:send-receive 'general-call run-id (append (list stmtname run-id) params))) (define (rmt:sync-inmem->db run-id) |
︙ | ︙ |
Modified tests/fullrun/megatest.config from [7518de8980] to [26d5518426].
︙ | ︙ | |||
131 132 133 134 135 136 137 | # If the server can't be started on this port it will try the next port until # it succeeds port 8080 # This server will keep running this number of hours after last access. # Three minutes is 0.05 hours # timeout 0.025 | | | 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 | # If the server can't be started on this port it will try the next port until # it succeeds port 8080 # This server will keep running this number of hours after last access. # Three minutes is 0.05 hours # timeout 0.025 timeout 0.061 # Server is required - slower but more resistant to Sqlite issues. required yes # Start server when average query takes longer than this # server-query-threshold 55500 server-query-threshold -1 |
︙ | ︙ |