Overview
Comment: | Fixed couple issues with the throttle by api load but still not sure it is working right. Cleaned up few other transport things. |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | multi-server-hack |
Files: | files | file ages | folders |
SHA1: |
e7dcebe6869ab0bfa639c16b78ac1fc8 |
User & Date: | matt on 2017-03-24 18:22:57 |
Other Links: | branch diff | manifest | tags |
Context
2017-03-24
| ||
23:35 | Merged in multi-server-hack check-in: 99a40be38a user: matt tags: v1.64 | |
22:18 | non-good commit of merge multi-server-hack into v1.64 Leaf check-in: 42e2fcd1c8 user: matt tags: private (unpublished) | |
18:22 | Fixed couple issues with the throttle by api load but still not sure it is working right. Cleaned up few other transport things. Closed-Leaf check-in: e7dcebe686 user: matt tags: multi-server-hack | |
15:55 | Default to treating a log as recent instead of ancient. This might be part of the run-away servers issue check-in: 31427e83dd user: matt tags: multi-server-hack | |
Changes
Modified api.scm from [466ca51286] to [510d0b0df2].
︙ | ︙ | |||
114 115 116 117 118 119 120 | ;; - keep it simple, only return the actual result of the call, i.e. no meta info here ;; ;; - returns #( flag result ) ;; (define (api:execute-requests dbstruct dat) (handle-exceptions exn | | < | | | | | | | | | | | | 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 | ;; - keep it simple, only return the actual result of the call, i.e. no meta info here ;; ;; - returns #( flag result ) ;; (define (api:execute-requests dbstruct dat) (handle-exceptions exn (let ((call-chain (get-call-chain))) (debug:print 0 *default-log-port* "WARNING: api:execute-requests received an exception from peer") (print-call-chain (current-error-port)) (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) (vector #f (vector exn call-chain dat))) ;; return some stuff for debug if an exception happens (cond ((not (vector? dat)) ;; it is an error to not receive a vector (vector #f (vector #f "remote must be called with a vector"))) ((> *api-process-request-count* 20) ;; 20) 'overloaded) ;; the inner vector is what gets returned. nope, don't know why. please refactor! (else (let* ((cmd-in (vector-ref dat 0)) (cmd (if (symbol? cmd-in) cmd-in (string->symbol cmd-in))) (params (vector-ref dat 1)) (start-t (current-milliseconds)) (readonly-mode (dbr:dbstruct-read-only dbstruct)) (readonly-command (member cmd api:read-only-queries)) (writecmd-in-readonly-mode (and readonly-mode (not readonly-command))) (res (if writecmd-in-readonly-mode (conc "attempt to run write command "cmd" on a read-only database") (case cmd ;;=============================================== ;; READ/WRITE QUERIES |
︙ | ︙ | |||
270 271 272 273 274 275 276 | ((sdb-qry) (apply sdb:qry params)) ((ping) (current-process-id)) ;; TESTMETA ((testmeta-get-record) (apply db:testmeta-get-record dbstruct params)) ;; TASKS | | > > > > | | | | | > | > > | 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 | ((sdb-qry) (apply sdb:qry params)) ((ping) (current-process-id)) ;; TESTMETA ((testmeta-get-record) (apply db:testmeta-get-record dbstruct params)) ;; TASKS ((find-task-queue-records) (apply tasks:find-task-queue-records dbstruct params)) (else (debug:print 0 *default-log-port* "ERROR: bad api call " cmd) (conc "ERROR: BAD api call " cmd)))))) ;; save all stats (let ((delta-t (- (current-milliseconds) start-t))) (hash-table-set! *db-api-call-time* cmd (cons delta-t (hash-table-ref/default *db-api-call-time* cmd '())))) (if writecmd-in-readonly-mode (vector #f res) (vector #t res))))))) ;; http-server send-response ;; api:process-request ;; db:* ;; ;; NB// Runs on the server as part of the server loop ;; (define (api:process-request dbstruct $) ;; the $ is the request vars proc (set! *api-process-request-count* (+ *api-process-request-count* 1)) (let* ((cmd ($ 'cmd)) (paramsj ($ 'params)) (params (db:string->obj paramsj transport: 'http)) ;; incoming data from the POST (or is it a GET?) (resdat (api:execute-requests dbstruct (vector cmd params))) ;; process the request, resdat = #( flag result ) (success (vector-ref resdat 0)) (res (vector-ref resdat 1))) ;; (vector flag payload), get the payload, ignore the flag (why?) (if (not success) (debug:print 0 *default-log-port* "ERROR: success flag is #f for " cmd " with params " params)) (if (> *api-process-request-count* *max-api-process-requests*) (set! *max-api-process-requests* *api-process-request-count*)) (set! *api-process-request-count* (- *api-process-request-count* 1)) ;; This can be here but needs controls to ensure it doesn't run more than every 4 seconds ;; (rmt:dat->json-str ;; (if (or (string? res) ;; (list? res) ;; (number? res) ;; (boolean? res)) ;; res ;; (list "ERROR, not string, list, number or boolean" 1 cmd params res))))) (db:obj->string res transport: 'http))) |
Modified http-transport.scm from [9751cbc3b5] to [9769576fcd].
︙ | ︙ | |||
264 265 266 267 268 269 270 | (th2 (make-thread time-out "time out"))) (thread-start! th1) (thread-start! th2) (thread-join! th1) (thread-terminate! th2) (debug:print-info 11 *default-log-port* "got res=" res) (if (vector? res) | | | | 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 | (th2 (make-thread time-out "time out"))) (thread-start! th1) (thread-start! th2) (thread-join! th1) (thread-terminate! th2) (debug:print-info 11 *default-log-port* "got res=" res) (if (vector? res) (if (vector-ref res 0) ;; this is the first flag or the second flag? res ;; this is the *inner* vector? seriously? why? (if (debug:debug-mode 11) (begin ;; note: this code also called in nmsg-transport - consider consolidating it (debug:print-error 11 *default-log-port* "error occured at server, info=" (vector-ref res 2)) (debug:print 11 *default-log-port* " client call chain:") (print-call-chain (current-error-port)) (debug:print 11 *default-log-port* " server call chain:") (pp (vector-ref res 1) (current-error-port)) |
︙ | ︙ |
Modified rmt.scm from [a6b31e3da3] to [c559126227].
︙ | ︙ | |||
195 196 197 198 199 200 201 | (else (debug:print 0 *default-log-port* "ERROR: transport " (remote-transport runremote) " not supported") (exit)))) (success (if (vector? dat) (vector-ref dat 0) #f)) (res (if (vector? dat) (vector-ref dat 1) #f))) (if (vector? conninfo)(http-transport:server-dat-update-last-access conninfo)) ;; refresh access time ;; (mutex-unlock! *rmt-mutex*) | | < < < | > | < < < < | > | | | | | < | | | | 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 | (else (debug:print 0 *default-log-port* "ERROR: transport " (remote-transport runremote) " not supported") (exit)))) (success (if (vector? dat) (vector-ref dat 0) #f)) (res (if (vector? dat) (vector-ref dat 1) #f))) (if (vector? conninfo)(http-transport:server-dat-update-last-access conninfo)) ;; refresh access time ;; (mutex-unlock! *rmt-mutex*) (debug:print-info 13 *default-log-port* "rmt:send-receive, case 9. conninfo=" conninfo " dat=" dat " runremote = " runremote) (mutex-unlock! *rmt-mutex*) (if success ;; success only tells us that the transport was successful, have to examine the data to see if there was a detected issue at the other end (if (and (symbol? res) (eq? res 'overloaded)) (let ((wait-delay (+ attemptnum (* attemptnum 10)))) (debug:print 0 *default-log-port* "WARNING: server is overloaded. Delaying " wait-delay " seconds and trying call again.") (thread-sleep! wait-delay) (rmt:send-receive cmd rid params attemptnum: (+ attemptnum 1))) res) ;; All good, return res (begin (debug:print 0 *default-log-port* "WARNING: communication failed. Trying again, try num: " attemptnum) (remote-conndat-set! runremote #f) (remote-server-url-set! runremote #f) (debug:print-info 12 *default-log-port* "rmt:send-receive, case 9.1") (if (not (server:check-if-running *toppath*)) (server:start-and-wait *toppath*)) (rmt:send-receive cmd rid params attemptnum: (+ attemptnum 1))))))))) ;; (define (rmt:update-db-stats run-id rawcmd params duration) ;; (mutex-lock! *db-stats-mutex*) ;; (handle-exceptions ;; exn ;; (begin ;; (debug:print 0 *default-log-port* "WARNING: stats collection failed in update-db-stats") |
︙ | ︙ |
Modified tests.scm from [bccd138868] to [897e386321].
︙ | ︙ | |||
484 485 486 487 488 489 490 | (tests:generate-html-summary-for-iterated-test run-id test-id test-name outputfilename)) (common:simple-file-release-lock lockf) (change-directory orig-dir) ;; NB// tests:test-set-toplog! is remote internal... (tests:test-set-toplog! run-id test-name outputfilename)) ;; didn't get the lock, check to see if current update started later than this ;; update, if so we can exit without doing any work | > > > | | 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 | (tests:generate-html-summary-for-iterated-test run-id test-id test-name outputfilename)) (common:simple-file-release-lock lockf) (change-directory orig-dir) ;; NB// tests:test-set-toplog! is remote internal... (tests:test-set-toplog! run-id test-name outputfilename)) ;; didn't get the lock, check to see if current update started later than this ;; update, if so we can exit without doing any work (if (> my-start-time (handle-exceptions exn 0 (file-modification-time lockf))) ;; we started since current re-gen in flight, delay a little and try again (begin (debug:print-info 1 *default-log-port* "Waiting to update " outputfilename ", another test currently updating it") (thread-sleep! (+ 5 (random 5))) ;; delay between 5 and 10 seconds (loop (common:simple-file-lock lockf)))))))))) (define (tests:generate-html-summary-for-iterated-test run-id test-id test-name outputfilename) |
︙ | ︙ |