Overview
Comment: | Tentative improvements to server starting - separate out server start from db start (dbprep). |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | v1.60 |
Files: | files | file ages | folders |
SHA1: |
f8c63429f726b99d5759e257fd6002aa |
User & Date: | matt on 2014-09-09 23:06:00 |
Other Links: | branch diff | manifest | tags |
Context
2014-09-10
| ||
13:40 | Server bypass options increased. Now should bypass the server unless in 'running' or 'dbprep' mode check-in: e29bdd1210 user: mrwellan tags: v1.60 | |
2014-09-09
| ||
23:06 | Tentative improvements to server starting - separate out server start from db start (dbprep). check-in: f8c63429f7 user: matt tags: v1.60 | |
2014-09-05
| ||
11:53 | Use nbfake for launching servers check-in: d8c9e3b4ca user: mrwellan tags: v1.60 | |
Changes
Modified http-transport.scm from [36a92be4c5] to [4bbddddd32].
︙ | ︙ | |||
263 264 265 266 267 268 269 270 271 272 273 274 275 276 | (retry-request? (lambda (request) #f)) ;; send the data and get the response ;; extract the needed info from the http data and ;; process and return it. (let* ((send-recieve (lambda () (mutex-lock! *http-mutex*) (set! res (with-input-from-request ;; was dat fullurl (list (cons 'key "thekey") (cons 'cmd cmd) (cons 'params params)) read-string)) ;; Shouldn't this be a call to the managed call-all-connections stuff above? | > > | 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 | (retry-request? (lambda (request) #f)) ;; send the data and get the response ;; extract the needed info from the http data and ;; process and return it. (let* ((send-recieve (lambda () (mutex-lock! *http-mutex*) ;; (condition-case (with-input-from-request "http://localhost"; #f read-lines) ;; ((exn http client-error) e (print e))) (set! res (with-input-from-request ;; was dat fullurl (list (cons 'key "thekey") (cons 'cmd cmd) (cons 'params params)) read-string)) ;; Shouldn't this be a call to the managed call-all-connections stuff above? |
︙ | ︙ | |||
374 375 376 377 378 379 380 381 382 383 384 385 386 387 | (debug:print 0 "SYNC: time= " sync-time ", rem-time=" rem-time) ;; ;; set_running after our first pass through and start the db ;; (if (eq? server-state 'available) (begin (set! *inmemdb* (db:setup run-id)) (tasks:server-set-state! tdb server-id "running"))) (if (and (<= rem-time 4) (> rem-time 0)) (thread-sleep! rem-time) (thread-sleep! 4))) ;; fallback for if the math is changed ... | > > | 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 | (debug:print 0 "SYNC: time= " sync-time ", rem-time=" rem-time) ;; ;; set_running after our first pass through and start the db ;; (if (eq? server-state 'available) (begin (tasks:server-set-state! tdb server-id "dbprep") (thread-sleep! 5) ;; give some margin for queries to complete before switching from file based access to server based access (set! *inmemdb* (db:setup run-id)) (tasks:server-set-state! tdb server-id "running"))) (if (and (<= rem-time 4) (> rem-time 0)) (thread-sleep! rem-time) (thread-sleep! 4))) ;; fallback for if the math is changed ... |
︙ | ︙ |
Modified rmt.scm from [caf7bcfa2c] to [124a9a8be5].
︙ | ︙ | |||
67 68 69 70 71 72 73 | ;; bypass starting the server. ;; ;; NB// can cache the answer for server running for 10 seconds ... ;; (if (and (not (rmt:write-frequency-over-limit? cmd run-id)) (not (open-run-close tasks:server-running-or-starting? tasks:open-db run-id))) #f | | | | < | | 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 | ;; bypass starting the server. ;; ;; NB// can cache the answer for server running for 10 seconds ... ;; (if (and (not (rmt:write-frequency-over-limit? cmd run-id)) (not (open-run-close tasks:server-running-or-starting? tasks:open-db run-id))) #f (let loop ((numtries 2)) (let ((res (client:setup run-id))) (if res (hash-table-ref/default *runremote* run-id #f) ;; client:setup filled this in (hopefully) (if (> numtries 0) (begin ;; junk records can cause stuckness here. use this time to ;; clean out (open-run-close tasks:server-clean-out-old-records-for-run-id tasks:open-db run-id "auto-start-clean-up") (thread-sleep! 1) (loop (- numtries 1))) #f) ;; try couple times to start server - give up and do local queries ))))))) (jparams (db:obj->string params))) (if connection-info (let ((res (http-transport:client-api-send-receive run-id connection-info cmd jparams))) (if res (db:string->obj res) (let ((new-connection-info (client:setup run-id))) (debug:print 0 "WARNING: Communication failed, trying call to http-transport:client-api-send-receive again.") |
︙ | ︙ |
Modified tasks.scm from [9d8c491acc] to [929251744a].
︙ | ︙ | |||
167 168 169 170 171 172 173 | (lambda (num-running) (set! res num-running)) mdb "SELECT count(id) FROM servers WHERE run_id != 0 AND state = 'running';") res)) (define (tasks:server-clean-out-old-records-for-run-id mdb run-id tag) | | | | 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 | (lambda (num-running) (set! res num-running)) mdb "SELECT count(id) FROM servers WHERE run_id != 0 AND state = 'running';") res)) (define (tasks:server-clean-out-old-records-for-run-id mdb run-id tag) (sqlite3:execute mdb "UPDATE servers SET state=?,heartbeat=strftime('%s','now') WHERE state in ('available','dbprep','shutting-down') AND (strftime('%s','now') - start_time) > 50 AND run_id=?;" (conc "defunct" tag) run-id)) (define (tasks:server-force-clean-running-records-for-run-id mdb run-id tag) (sqlite3:execute mdb "UPDATE servers SET state=?,heartbeat=strftime('%s','now') WHERE state = 'running' AND run_id=?;" (conc "defunct" tag) run-id)) (define (tasks:server-force-clean-run-record mdb run-id iface port tag) (sqlite3:execute mdb "UPDATE servers SET state=?,heartbeat=strftime('%s','now') WHERE state = 'running' AND run_id=? AND interface=? AND port=?;" (conc "defunct" tag) run-id iface port)) (define (tasks:server-delete-records-for-this-pid mdb tag) (sqlite3:execute mdb "UPDATE servers SET state=?,heartbeat=strftime('%s','now') WHERE hostname=? AND pid=?;" (conc "defunct" tag) (get-host-name) (current-process-id))) (define (tasks:server-delete-record mdb server-id tag) (sqlite3:execute mdb "UPDATE servers SET state=?,heartbeat=strftime('%s','now') WHERE id=?;" (conc "defunct" tag) server-id) ;; use this opportuntity to clean out records over one month old or over 10 minutes old with port = -1 (i.e. a never used placeholder) (sqlite3:execute mdb "DELETE FROM servers WHERE state not in ('running','shutting-down','dbprep') AND (strftime('%s','now') - start_time) > 2628000;") (sqlite3:execute mdb "DELETE FROM servers WHERE state like 'defunct%' AND port=-1 AND (strftime('%s','now') - start_time) > 600;") ) (define (tasks:server-set-state! mdb server-id state) (sqlite3:execute mdb "UPDATE servers SET state=?,heartbeat=strftime('%s','now') WHERE id=?;" state server-id)) (define (tasks:server-set-interface-port mdb server-id interface port) |
︙ | ︙ | |||
262 263 264 265 266 267 268 | (let* ((header (list "id" "hostname" "pid" "interface" "port" "pubport" "state" "run_id" "priority" "start_time")) (selstr (string-intersperse header ",")) (res '())) (sqlite3:for-each-row (lambda (a . b) (set! res (cons (apply vector a b) res))) mdb | | | 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 | (let* ((header (list "id" "hostname" "pid" "interface" "port" "pubport" "state" "run_id" "priority" "start_time")) (selstr (string-intersperse header ",")) (res '())) (sqlite3:for-each-row (lambda (a . b) (set! res (cons (apply vector a b) res))) mdb (conc "SELECT " selstr " FROM servers WHERE run_id=? AND state in ('available','running','dbprep') ORDER BY start_time DESC;") run-id) (vector header res))) (define (tasks:get-server mdb run-id) (let ((res #f) (best #f)) (sqlite3:for-each-row |
︙ | ︙ | |||
285 286 287 288 289 290 291 | res)) (define (tasks:server-running-or-starting? mdb run-id) (let ((res #f)) (sqlite3:for-each-row (lambda (id) (set! res id)) | | | | 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 | res)) (define (tasks:server-running-or-starting? mdb run-id) (let ((res #f)) (sqlite3:for-each-row (lambda (id) (set! res id)) mdb ;; NEEDS dbprep ADDED "SELECT id FROM servers WHERE run_id=? AND (state = 'running' OR (state = 'dbprep' AND (strftime('%s','now') - start_time) < 60));" run-id) res)) (define (tasks:get-all-servers mdb) (let ((res '())) (sqlite3:for-each-row (lambda (id pid hostname interface port pubport start-time priority state mt-version last-update transport run-id) ;; 0 1 2 3 4 5 6 7 8 9 10 11 12 |
︙ | ︙ | |||
331 332 333 334 335 336 337 338 339 340 341 342 343 344 | (if (equal? hostname (get-host-name)) (begin (debug:print-info 1 "Sending signal/term to " pid " on " hostname) (process-signal pid signal/term) ;; local machine, send sig term (thread-sleep! 5) ;; give it five seconds to die peacefully then do a brutal kill (process-signal pid signal/kill)) (debug:print 0 "WARNING: Can't kill frozen server on remote host " hostname)))))) ;;====================================================================== ;; Tasks and Task monitors ;;====================================================================== ;;====================================================================== | > | 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 | (if (equal? hostname (get-host-name)) (begin (debug:print-info 1 "Sending signal/term to " pid " on " hostname) (process-signal pid signal/term) ;; local machine, send sig term (thread-sleep! 5) ;; give it five seconds to die peacefully then do a brutal kill (process-signal pid signal/kill)) (debug:print 0 "WARNING: Can't kill frozen server on remote host " hostname)))))) ;;====================================================================== ;; Tasks and Task monitors ;;====================================================================== ;;====================================================================== |
︙ | ︙ |
tests/installall/config/megatest.config.dat became a regular file with contents [736a5da885].
tests/installall/config/runconfigs.config.dat became a regular file with contents [3b8f260acb].