Overview
Comment: | Random port assignment, minimize re-use of ports |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | v1.60 |
Files: | files | file ages | folders |
SHA1: |
bbd7cb0f5a1d5d9963c1b00c2ea56637 |
User & Date: | matt on 2014-02-24 19:44:31 |
Other Links: | branch diff | manifest | tags |
Context
2014-02-24
| ||
22:15 | Added debugging tags to server state changes. Cleaned up dashboard to display new server data. check-in: 904e5f7d6c user: matt tags: v1.60 | |
19:44 | Random port assignment, minimize re-use of ports check-in: bbd7cb0f5a user: matt tags: v1.60 | |
16:46 | Trying to get reliable server starts in the face of ports, hosts, kills etc. What fun check-in: cb995a99df user: mrwellan tags: v1.60 | |
Changes
Modified docs/manual/howto.txt from [d3633d3bbf] to [b28c4b0da6].
︙ | ︙ | |||
55 56 57 58 59 60 61 | The runscript method is a brute force way to run scripts where the user is responsible for setting STATE and STATUS ------------------- runscript main.csh ------------------- | > > > > > > > > | 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 | The runscript method is a brute force way to run scripts where the user is responsible for setting STATE and STATUS ------------------- runscript main.csh ------------------- Debugging Server Problems ~~~~~~~~~~~~~~~~~~~~~~~~~ ---------------- sudo lsof -i sudo netstat -lptu sudo netstat -tulpn ---------------- |
Modified http-transport.scm from [78b88f80d9] to [f8416b573d].
︙ | ︙ | |||
389 390 391 392 393 394 395 | (debug:print-info 0 "Average non-cached time " (if (eq? *number-non-write-queries* 0) "n/a (no queries)" (/ *total-non-write-delay* *number-non-write-queries*)) " ms") (debug:print-info 0 "Server shutdown complete. Exiting") | | | > | 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 | (debug:print-info 0 "Average non-cached time " (if (eq? *number-non-write-queries* 0) "n/a (no queries)" (/ *total-non-write-delay* *number-non-write-queries*)) " ms") (debug:print-info 0 "Server shutdown complete. Exiting") (tasks:server-delete-record tdb server-id) (exit)))))) ;; all routes though here end in exit ... ;; ;; start_server? ;; (define (http-transport:launch run-id) (set! *run-id* run-id) (if (args:get-arg "-daemonize") (daemon:ize)) (if (server:check-if-running run-id) (begin (debug:print 0 "INFO: Server for run-id " run-id " already running") (exit 0))) (let ((server-id (open-run-close tasks:server-lock-slot tasks:open-db run-id))) (if (not server-id) (begin ;; since we didn't get the server lock we are going to clean up and bail out (debug:print-info 2 "INFO: server pid=" (current-process-id) ", hostname=" (get-host-name) " not starting due to other candidates ahead in start queue") (open-run-close tasks:server-delete-records-for-this-pid tasks:open-db) ) (let* ((th2 (make-thread (lambda () (http-transport:run (if (args:get-arg "-server") (args:get-arg "-server") "-") run-id server-id)) "Server run")) |
︙ | ︙ |
Modified tasks.scm from [0a12e30788] to [8d2eda9003].
︙ | ︙ | |||
126 127 128 129 130 131 132 | (set! res num-in-queue)) mdb "SELECT count(id) FROM servers WHERE run_id=? AND state = 'available';" run-id) res)) (define (tasks:server-clean-out-old-records-for-run-id mdb run-id) | | | | > > > > > > > > < < < < < < > > > | > > > | | | | | | | | > | | | | > > > > | > > | | 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 | (set! res num-in-queue)) mdb "SELECT count(id) FROM servers WHERE run_id=? AND state = 'available';" run-id) res)) (define (tasks:server-clean-out-old-records-for-run-id mdb run-id) (sqlite3:execute mdb "UPDATE servers SET state='defunct' WHERE state in ('available','shutting-down') AND (strftime('%s','now') - start_time) > 300 AND run_id=?;" run-id)) (define (tasks:server-force-clean-running-records-for-run-id mdb run-id) (sqlite3:execute mdb "UPDATE servers SET state='defunct' WHERE state = 'running' AND run_id=?;" run-id)) (define (tasks:server-force-clean-run-record mdb run-id iface port) (sqlite3:execute mdb "UPDATE servers SET state='defunct' WHERE state = 'running' AND run_id=? AND interface=? AND port=?;" run-id iface port)) (define (tasks:server-delete-records-for-this-pid mdb) (sqlite3:execute mdb "UPDATE servers SET state='defunct' WHERE hostname=? AND pid=?;" (get-host-name) (current-process-id))) (define (tasks:server-delete-record mdb server-id) (sqlite3:execute mdb "UPDATE servers SET state='defunct' WHERE id=?;" server-id) ;; use this opportuntity to clean out records over one month old (sqlite3:execute mdb "DELETE FROM servers WHERE state not in ('running','shutting-down') AND (strftime('%s','now') - start_time) > 2628000;")) (define (tasks:server-set-state! mdb server-id state) (sqlite3:execute mdb "UPDATE servers SET state=? WHERE id=?;" state server-id)) (define (tasks:server-set-interface-port mdb server-id interface port) (sqlite3:execute mdb "UPDATE servers SET interface=?,port=? WHERE id=?;" interface port server-id)) ;; Get random port not used in long time ;; (define (tasks:server-get-next-port mdb) (let* ((lownum 30000) (highnum 64000) (used-ports '()) (get-rand-port (lambda () (+ lownum (random (- highnum lownum))))) (port-param (if (and (args:get-arg "-port") (string->number (args:get-arg "-port"))) (string->number (args:get-arg "-port")) #f)) ;; (config-port (if (and (config-lookup *configdat* "server" "port") ;; (string->number (config-lookup *configdat* "server" "port"))) ;; (string->number (config-lookup *configdat* "server" "port")) ;; #f)) ) (sqlite3:for-each-row (lambda (port) (set! used-ports (cons port used-ports))) mdb "SELECT port FROM servers;") (cond ((and port-param res) (if (> res port-param) res port-param)) (port-param port-param) ;; ((and config-port res) (if (> res config-port) res config-port)) ;; (config-port config-port) (else (let loop ((port (get-rand-port)) (remtries 100)) (if (member port used-ports) (if (> remtries 0) (loop (get-rand-port)(- remtries 1)) (get-rand-port)) port)))))) (define (tasks:server-am-i-the-server? mdb run-id) (let* ((all (tasks:server-get-servers-vying-for-run-id mdb run-id)) (first (if (null? all) (begin (debug:print 0 "ERROR: no servers listed, should be at least one by now.") (sqlite3:finalize! mdb) (exit 1)) |
︙ | ︙ |