Overview
Comment: | still broke but differently |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | v1.65-wip |
Files: | files | file ages | folders |
SHA1: |
0869d10a938cbcba1b028d87fd7a6cc8 |
User & Date: | matt on 2019-10-02 00:05:38 |
Other Links: | branch diff | manifest | tags |
Context
2019-10-02
| ||
17:46 | Remove muffs from *toppath* in calc-ro-mode. Added trace to rmtmod.scm while debugging. check-in: dc7ab217fe user: mrwellan tags: v1.65-wip | |
00:05 | still broke but differently check-in: 0869d10a93 user: matt tags: v1.65-wip | |
2019-10-01
| ||
22:54 | Broke it with mutex deadlock check-in: 71a141e42e user: matt tags: v1.65-wip | |
Changes
Modified rmt.scm from [5ad0893a6b] to [a8b59bdcdd].
︙ | ︙ | |||
74 75 76 77 78 79 80 | (if (member cmd '(blah)) (begin (mutex-lock! *send-receive-mutex*) (if (not *runremote*)(set! *runremote* (make-remote))) (let ((ulex:conn (remote-ulex:conn *runremote*))) (if (not ulex:conn)(remote-ulex:conn-set! *runremote* (rmtmod:setup-ulex *toppath*))) (rmtmod:send-receive-ulex ulex:conn cmd rid params attemptnum area-dat))) | | | | 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 | (if (member cmd '(blah)) (begin (mutex-lock! *send-receive-mutex*) (if (not *runremote*)(set! *runremote* (make-remote))) (let ((ulex:conn (remote-ulex:conn *runremote*))) (if (not ulex:conn)(remote-ulex:conn-set! *runremote* (rmtmod:setup-ulex *toppath*))) (rmtmod:send-receive-ulex ulex:conn cmd rid params attemptnum area-dat))) (rmt:send-receive-orig *default-log-port* runremote *rmt-mutex* areapath *db-multi-sync-mutex* cmd rid params attemptnum: attemptnum area-dat: area-dat ro-queries: api:read-only-queries)))) ;; RA => e.g. usage (rmt:send-receive 'get-var #f (list varname)) ;; ;; add multi-sync-mutex ;; (define (rmt:send-receive-orig log-port runremote rmt-mutex toppath multi-sync-mutex cmd rid params #!key (attemptnum 1)(area-dat #f)) ;; start attemptnum at 1 so the modulo below works as expected #;(common:telemetry-log (conc "rmt:"(->string cmd)) payload: `((rid . ,rid) (params . ,params))) ;; do all the prep locked under the rmt-mutex ;;(mutex-lock! rmt-mutex) ;; 1. check if server is started IFF cmd is a write OR if we are not on the homehost, store in runremote ;; 2. check the age of the connections. refresh the connection if it is older than timeout-20 seconds. ;; 3. do the query, if on homehost use local access ;; (let* ((start-time (current-seconds)) ;; snapshot time so all use cases get same value |
︙ | ︙ | |||
113 114 115 116 117 118 119 | ((> attemptnum 15) (debug:print 0 log-port "ERROR: 15 tries to start/connect to server. Giving up.") (exit 1)) ;; readonly mode, read request- handle it - case 2 ((and readonly-mode (member cmd api:read-only-queries)) | | | | | | | | | | | | | | | 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 | ((> attemptnum 15) (debug:print 0 log-port "ERROR: 15 tries to start/connect to server. Giving up.") (exit 1)) ;; readonly mode, read request- handle it - case 2 ((and readonly-mode (member cmd api:read-only-queries)) ;; (mutex-unlock! rmt-mutex) (debug:print-info 12 log-port "rmt:send-receive, case 2") (rmt:open-qry-close-locally log-port multi-sync-mutex cmd 0 params ro-queries: api:read-only-queries) ) ;; readonly mode, write request. Do nothing, return #f (readonly-mode (extras-readonly-mode rmt-mutex log-port cmd params)) ;; This block was for pre-emptively resetting the connection if there had been no communication for some time. ;; I don't think it adds any value. If the server is not there, just fail and start a new connection. ;; also, the expire-time calculation might not be correct. We want, time-since-last-server-access > (server:get-timeout) ;; ;; reset the connection if it has been unused too long ((and runremote (remote-conndat runremote) (> (current-seconds) ;; if it has been more than server-timeout seconds since last contact, close this connection and start a new on (+ (http-transport:server-dat-get-last-access (remote-conndat runremote)) (remote-server-timeout runremote)))) (debug:print-info 0 log-port "Connection to " (remote-server-url runremote) " expired due to no accesses, forcing new connection.") (http-transport:close-connections area-dat: runremote) (remote-conndat-set! runremote #f) ;; invalidate the connection, thus forcing a new connection. ;; (mutex-unlock! rmt-mutex) (rmt:send-receive-orig log-port runremote rmt-mutex toppath multi-sync-mutex cmd rid params attemptnum: attemptnum)) ;; on homehost and this is a read ((and (not (remote-force-server runremote)) ;; honor forced use of server, i.e. server NOT required (cdr (remote-hh-dat runremote)) ;; on homehost (member cmd api:read-only-queries)) ;; this is a read ;; (mutex-unlock! rmt-mutex) (debug:print-info 12 log-port "rmt:send-receive, case 5") (rmt:open-qry-close-locally log-port multi-sync-mutex cmd 0 params ro-queries: api:read-only-queries)) ;; on homehost and this is a write, we already have a server, but server has died ((and (cdr (remote-hh-dat runremote)) ;; on homehost (not (member cmd api:read-only-queries)) ;; this is a write (remote-server-url runremote) ;; have a server (not (server:ping (remote-server-url runremote)))) ;; server has died. NOTE: this is not a cheap call! Need better approach. (set! *runremote* (make-remote)) (remote-force-server-set! runremote (common:force-server?)) ;; (mutex-unlock! rmt-mutex) (debug:print-info 12 log-port "rmt:send-receive, case 6") (rmt:send-receive-orig log-port runremote rmt-mutex toppath multi-sync-mutex cmd rid params attemptnum: attemptnum)) ;; on homehost and this is a write, we already have a server ((and (not (remote-force-server runremote)) ;; honor forced use of server, i.e. server NOT required (cdr (remote-hh-dat runremote)) ;; on homehost (not (member cmd api:read-only-queries)) ;; this is a write (remote-server-url runremote)) ;; have a server ;;(mutex-unlock! rmt-mutex) (debug:print-info 12 log-port "rmt:send-receive, case 4.1") (rmt:open-qry-close-locally log-port multi-sync-mutex cmd 0 params ro-queries: api:read-only-queries)) ;; on homehost, no server contact made and this is a write, passively start a server ((and (not (remote-force-server runremote)) ;; honor forced use of server, i.e. server NOT required (cdr (remote-hh-dat runremote)) ;; have homehost (not (remote-server-url runremote)) ;; no connection yet (not (member cmd api:read-only-queries))) ;; not a read-only query (debug:print-info 12 log-port "rmt:send-receive, case 8") (let ((server-url (server:check-if-running toppath))) ;; (server:read-dotserver->url toppath))) ;; (server:check-if-running toppath))) ;; Do NOT want to run server:check-if-running - very expensive to do for every write call (if server-url (remote-server-url-set! runremote server-url) ;; the string can be consumed by the client setup if needed (if (common:force-server?) (server:start-and-wait toppath) (server:kind-run toppath)))) (remote-force-server-set! runremote (common:force-server?)) ;; (mutex-unlock! rmt-mutex) (debug:print-info 12 log-port "rmt:send-receive, case 8.1") (rmt:open-qry-close-locally log-port multi-sync-mutex cmd 0 params ro-queries: api:read-only-queries)) ((or (and (remote-force-server runremote) ;; we are forcing a server and don't yet have a connection to one (not (remote-conndat runremote))) (and (not (cdr (remote-hh-dat runremote))) ;; not on a homehost (not (remote-conndat runremote)))) ;; and no connection (debug:print-info 12 log-port "rmt:send-receive, case 9, hh-dat: " (remote-hh-dat runremote) " conndat: " (remote-conndat runremote)) ;;(mutex-unlock! rmt-mutex) (if (not (server:check-if-running toppath)) ;; who knows, maybe one has started up? (server:start-and-wait toppath)) (remote-conndat-set! runremote (rmt:get-connection-info toppath)) ;; calls client:setup which calls client:setup-http (rmt:send-receive-orig log-port runremote rmt-mutex toppath multi-sync-mutex cmd rid params attemptnum: attemptnum)) ;; TODO: add back-off timeout as ;; all set up if get this far, dispatch the query ((and (not (remote-force-server runremote)) (cdr (remote-hh-dat runremote))) ;; we are on homehost ;;(mutex-unlock! rmt-mutex) (debug:print-info 12 log-port "rmt:send-receive, case 10") (rmt:open-qry-close-locally log-port multi-sync-mutex cmd (if rid rid 0) params ro-queries: api:read-only-queries)) ;; not on homehost, do server query (else (extras-case-11 log-port runremote cmd params attemptnum rid))))) ;; bunch of small functions factored out of send-receive to make debug easier ;; |
︙ | ︙ |
Modified rmtmod.scm from [dce20d5361] to [fd791a8953].
︙ | ︙ | |||
73 74 75 76 77 78 79 | ;; db stuff for local db access (set! db:dbfile-path dbfile-path) (set! db:setup dbsetup) (set! apt:execute-requests exec-req) (set! api:read-only-queries read-only-queries) ) | | | | 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 | ;; db stuff for local db access (set! db:dbfile-path dbfile-path) (set! db:setup dbsetup) (set! apt:execute-requests exec-req) (set! api:read-only-queries read-only-queries) ) (define (rmt:open-qry-close-locally log-port multi-sync-mutex cmd run-id params #!key (ro-queries '())(remretries 5)) (let* ((qry-is-write (not (member cmd ro-queries))) (db-file-path (db:dbfile-path)) ;; 0)) (dbstruct-local (db:setup #t)) ;; make-dbr:dbstruct path: dbdir local: #t))) (read-only (not (file-write-access? db-file-path))) (start (current-milliseconds)) (resdat (if (not (and read-only qry-is-write)) (let ((v (api:execute-requests dbstruct-local (vector (symbol->string cmd) params)))) (handle-exceptions ;; there has been a long history of receiving strange errors from values returned by the client when things go wrong.. |
︙ | ︙ | |||
102 103 104 105 106 107 108 | (if (and read-only qry-is-write) (debug:print 0 log-port "ERROR: attempt to write to read-only database ignored. cmd=" cmd)) (if (not success) (if (> remretries 0) (begin (debug:print-error 0 log-port "local query failed. Trying again.") (thread-sleep! (/ (random 5000) 1000)) ;; some random delay | | | 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 | (if (and read-only qry-is-write) (debug:print 0 log-port "ERROR: attempt to write to read-only database ignored. cmd=" cmd)) (if (not success) (if (> remretries 0) (begin (debug:print-error 0 log-port "local query failed. Trying again.") (thread-sleep! (/ (random 5000) 1000)) ;; some random delay (rmt:open-qry-close-locally log-port multi-sync-mutex cmd run-id params ro-queries: ro-queries remretries: (- remretries 1))) (begin (debug:print-error 0 log-port "too many retries in rmt:open-qry-close-locally, giving up") #f)) (begin ;; (rmt:update-db-stats run-id cmd params duration) ;; mark this run as dirty if this was a write, the watchdog is responsible for syncing it #;(if qry-is-write |
︙ | ︙ | |||
132 133 134 135 136 137 138 | (begin (remote-ro-mode-set! runremote ro-mode) (remote-ro-mode-checked-set! runremote #t) ro-mode) ro-mode)))) (define (extras-readonly-mode rmt-mutex log-port cmd params) | | | | | 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 | (begin (remote-ro-mode-set! runremote ro-mode) (remote-ro-mode-checked-set! runremote #t) ro-mode) ro-mode)))) (define (extras-readonly-mode rmt-mutex log-port cmd params) ;;(mutex-unlock! rmt-mutex) (debug:print-info 12 log-port "rmt:send-receive, case 3") (debug:print 0 log-port "WARNING: write transaction requested on a readonly area. cmd="cmd" params="params) #f) (define (extras-transport-failed *default-log-port* *rmt-mutex* attemptnum runremote cmd rid params) (debug:print 0 *default-log-port* "WARNING: communication failed. Trying again, try num: " attemptnum) ;;(mutex-lock! *rmt-mutex*) (remote-conndat-set! runremote #f) (http-transport:close-connections area-dat: runremote) (remote-server-url-set! runremote #f) ;;(mutex-unlock! *rmt-mutex*) (debug:print-info 12 *default-log-port* "rmt:send-receive, case 9.1") (rmt:send-receive cmd rid params attemptnum: (+ attemptnum 1))) (define (extras-transport-succeded *default-log-port* *rmt-mutex* attemptnum runremote res params rid cmd) (if (and (vector? res) (eq? (vector-length res) 2) (eq? (vector-ref res 1) 'overloaded)) ;; since we are |
︙ | ︙ | |||
168 169 170 171 172 173 174 | ;; is bad or the ;; server is ;; overloaded and we ;; want to ease off ;; the queries (let ((wait-delay (+ attemptnum (* attemptnum 10)))) (debug:print 0 *default-log-port* "WARNING: server is overloaded. Delaying " wait-delay " seconds and trying call again.") | | | | 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 | ;; is bad or the ;; server is ;; overloaded and we ;; want to ease off ;; the queries (let ((wait-delay (+ attemptnum (* attemptnum 10)))) (debug:print 0 *default-log-port* "WARNING: server is overloaded. Delaying " wait-delay " seconds and trying call again.") ;;(mutex-lock! *rmt-mutex*) (http-transport:close-connections area-dat: runremote) (set! *runremote* #f) ;; force starting over ;;(mutex-unlock! *rmt-mutex*) (thread-sleep! wait-delay) (rmt:send-receive cmd rid params attemptnum: (+ attemptnum 1))) res)) ;; All good, return res ;;====================================================================== ;; ulex and steps stuff ;;====================================================================== |
︙ | ︙ |