Overview
Comment: | Fixed issue with server record not reflecting actual server when have port collisions. |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | development | v1.5415 |
Files: | files | file ages | folders |
SHA1: |
99ca17a0cc61ca660eaa51f6305bcf67 |
User & Date: | mrwellan on 2013-04-22 16:38:28 |
Other Links: | branch diff | manifest | tags |
Context
2013-04-23
| ||
08:06 | Released version v1.5415 check-in: a97c05c022 user: mrwellan tags: trunk, v1.5415 | |
2013-04-22
| ||
23:38 | Trying fork instead of system for launching server, added better guesser for ip address to bind to (should bind to all?) check-in: 1b71a45029 user: matt tags: development | |
19:47 | Merged dev into runcontrol check-in: df98c96bb1 user: matt tags: runcontrol | |
16:38 | Fixed issue with server record not reflecting actual server when have port collisions. check-in: 99ca17a0cc user: mrwellan tags: development, v1.5415 | |
2013-04-17
| ||
15:28 | Converted test steps to matrix, added browsing of log check-in: d187e07c8b user: mrwellan tags: development, v1.5414 | |
Changes
Modified db.scm from [a540e1036f] to [cc06b656a5].
︙ | ︙ | |||
1273 1274 1275 1276 1277 1278 1279 | (list item-path "")))) (cdb:client-call serverdat 'register-test #t *default-numtries* run-id test-name item-path))) (define (cdb:flush-queue serverdat) (cdb:client-call serverdat 'flush #f *default-numtries*)) (define (cdb:kill-server serverdat) | | | 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 | (list item-path "")))) (cdb:client-call serverdat 'register-test #t *default-numtries* run-id test-name item-path))) (define (cdb:flush-queue serverdat) (cdb:client-call serverdat 'flush #f *default-numtries*)) (define (cdb:kill-server serverdat) (cdb:client-call serverdat 'killserver #t *default-numtries*)) (define (cdb:roll-up-pass-fail-counts serverdat run-id test-name item-path status) (cdb:client-call serverdat 'immediate #f *default-numtries* open-run-close db:roll-up-pass-fail-counts #f run-id test-name item-path status)) (define (cdb:get-test-info serverdat run-id test-name item-path) (cdb:client-call serverdat 'immediate #f *default-numtries* open-run-close db:get-test-info #f run-id test-name item-path)) |
︙ | ︙ | |||
1331 1332 1333 1334 1335 1336 1337 | (define db:special-queries '(rollup-tests-pass-fail db:roll-up-pass-fail-counts login immediate flush sync set-verbosity | | > | 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 | (define db:special-queries '(rollup-tests-pass-fail db:roll-up-pass-fail-counts login immediate flush sync set-verbosity killserver )) ;; not used, intended to indicate to run in calling process (define db:run-local-queries '()) ;; rollup-tests-pass-fail)) (define (db:process-cached-writes db) (let ((queries (make-hash-table)) (data #f)) |
︙ | ︙ | |||
1466 1467 1468 1469 1470 1471 1472 | (debug:print-info 7 "Received " response " from wrapped write") (server:reply return-address qry-sig response response)) ;; otherwise if appropriate flush the queue (this is a read or complex query) (begin (cond ((member stmt-key db:special-queries) (let ((starttime (current-milliseconds))) | | | 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 | (debug:print-info 7 "Received " response " from wrapped write") (server:reply return-address qry-sig response response)) ;; otherwise if appropriate flush the queue (this is a read or complex query) (begin (cond ((member stmt-key db:special-queries) (let ((starttime (current-milliseconds))) (debug:print-info 9 "Handling special statement " stmt-key) (case stmt-key ((immediate) ;; This is a read or mixed read-write query, must clear the cache (case *transport-type* ((http) (mutex-lock! *db:process-queue-mutex*) (db:process-cached-writes db) |
︙ | ︙ |
Modified http-transport.scm from [161d181983] to [c1f69a9798].
︙ | ︙ | |||
119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 | (begin (print-error-message exn) (if (< portnum 9000) (begin (print "WARNING: failed to start on portnum: " portnum ", trying next port") (thread-sleep! 0.1) ;; (open-run-close tasks:remove-server-records tasks:open-db) (http-transport:try-start-server ipaddrstr (+ portnum 1))) (print "ERROR: Tried and tried but could not start the server"))) (set! *runremote* (list ipaddrstr portnum)) ;; (open-run-close tasks:remove-server-records tasks:open-db) (open-run-close tasks:server-register tasks:open-db (current-process-id) ipaddrstr portnum 0 'live 'http) (print "INFO: Trying to start server on " ipaddrstr ":" portnum) ;; This starts the spiffy server (start-server port: portnum) (print "INFO: server has been stopped"))) ;;====================================================================== ;; S E R V E R U T I L I T I E S ;;====================================================================== ;;====================================================================== | > > > | 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 | (begin (print-error-message exn) (if (< portnum 9000) (begin (print "WARNING: failed to start on portnum: " portnum ", trying next port") (thread-sleep! 0.1) ;; (open-run-close tasks:remove-server-records tasks:open-db) (open-run-close tasks:server-delete tasks:open-db ipaddrstr portnum) (http-transport:try-start-server ipaddrstr (+ portnum 1))) (print "ERROR: Tried and tried but could not start the server"))) ;; any error in following steps will result in a retry (set! *runremote* (list ipaddrstr portnum)) ;; (open-run-close tasks:remove-server-records tasks:open-db) (open-run-close tasks:server-register tasks:open-db (current-process-id) ipaddrstr portnum 0 'live 'http) (print "INFO: Trying to start server on " ipaddrstr ":" portnum) ;; This starts the spiffy server (start-server port: portnum) (open-run-close tasks:server-delete tasks:open-db ipaddrstr portnum) (print "INFO: server has been stopped"))) ;;====================================================================== ;; S E R V E R U T I L I T I E S ;;====================================================================== ;;====================================================================== |
︙ | ︙ | |||
227 228 229 230 231 232 233 234 235 236 237 238 239 240 | (thread-sleep! 4) ;; no need to do this very often ;; NB// sync currently does NOT return queue-length (let () ;; (queue-len (cdb:client-call server-info 'sync #t 1))) ;; (print "Server running, count is " count) (if (< count 1) ;; 3x3 = 9 secs aprox (loop (+ count 1))) ;; NOTE: Get rid of this mechanism! It really is not needed... (tasks:server-update-heartbeat tdb spid) ;; (if ;; (or (> numrunning 0) ;; stay alive for two days after last access (mutex-lock! *heartbeat-mutex*) (set! last-access *last-db-access*) (mutex-unlock! *heartbeat-mutex*) | > > > > > > > > > > > > | 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 | (thread-sleep! 4) ;; no need to do this very often ;; NB// sync currently does NOT return queue-length (let () ;; (queue-len (cdb:client-call server-info 'sync #t 1))) ;; (print "Server running, count is " count) (if (< count 1) ;; 3x3 = 9 secs aprox (loop (+ count 1))) ;; Check that iface and port have not changed (can happen if server port collides) (mutex-lock! *heartbeat-mutex*) (set! sdat *runremote*) (mutex-unlock! *heartbeat-mutex*) (if (not (equal? sdat (list iface port))) (begin (debug:print-info 1 "interface changed, refreshing iface and port info") (set! iface (car sdat)) (set! port (cadr sdat)) (set! spid (tasks:server-get-server-id tdb #f iface port #f)))) ;; NOTE: Get rid of this mechanism! It really is not needed... (tasks:server-update-heartbeat tdb spid) ;; (if ;; (or (> numrunning 0) ;; stay alive for two days after last access (mutex-lock! *heartbeat-mutex*) (set! last-access *last-db-access*) (mutex-unlock! *heartbeat-mutex*) |
︙ | ︙ |
Modified megatest-version.scm from [139eab9acd] to [24437845eb].
1 2 3 4 5 | ;; Always use two digit decimal ;; 1.01, 1.02...1.10,1.11 ... 1.99,2.00.. (declare (unit megatest-version)) | | | 1 2 3 4 5 6 7 | ;; Always use two digit decimal ;; 1.01, 1.02...1.10,1.11 ... 1.99,2.00.. (declare (unit megatest-version)) (define megatest-version 1.5415) |
Modified megatest.scm from [f7af8ee8db] to [2419ab740a].
︙ | ︙ | |||
31 32 33 34 35 36 37 | (include "common_records.scm") (include "key_records.scm") (include "db_records.scm") (include "megatest-fossil-hash.scm") ;; (use trace dot-locking) ;; (trace | | < < < < > > > > > > > | | < | | | < < | | | 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 | (include "common_records.scm") (include "key_records.scm") (include "db_records.scm") (include "megatest-fossil-hash.scm") ;; (use trace dot-locking) ;; (trace ;; cdb:client-call ;; cdb:remote-run ;; cdb:test-set-status-state ;; change-directory ;; db:process-queue-item ;; db:test-get-logfile-info ;; db:teststep-set-status! ;; nice-path ;; obtain-dot-lock ;; open-run-close ;; read-config ;; runs:can-run-more-tests ;; sqlite3:execute ;; sqlite3:for-each-row ;; tests:check-waiver-eligibility ;; tests:summarize-items ;; tests:test-set-status! ;; thread-sleep! ;;) (define help (conc " Megatest, documentation at http://www.kiatoa.com/fossils/megatest version " megatest-version " license GPL, Copyright Matt Welland 2006-2012 |
︙ | ︙ | |||
128 129 130 131 132 133 134 135 136 137 138 139 140 141 | -setvars VAR1=val1,VAR2=val2 : Add environment variables to a run NB// these are overwritten by values set in config files. -server -|hostname : start the server (reduces contention on megatest.db), use - to automatically figure out hostname -transport http|zmq : use http or zmq for transport (default is http) -daemonize : fork into background and disconnect from stdin/out -list-servers : list the servers -repl : start a repl (useful for extending megatest) -load file.scm : load and run file.scm Spreadsheet generation -extract-ods fname.ods : extract an open document spreadsheet from the database -pathmod path : insert path, i.e. path/runame/itempath/logfile.html will clear the field if no rundir/testname/itempath/logfile | > | 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 | -setvars VAR1=val1,VAR2=val2 : Add environment variables to a run NB// these are overwritten by values set in config files. -server -|hostname : start the server (reduces contention on megatest.db), use - to automatically figure out hostname -transport http|zmq : use http or zmq for transport (default is http) -daemonize : fork into background and disconnect from stdin/out -list-servers : list the servers -stop-server id : stop server specified by id (see output of -list-servers) -repl : start a repl (useful for extending megatest) -load file.scm : load and run file.scm Spreadsheet generation -extract-ods fname.ods : extract an open document spreadsheet from the database -pathmod path : insert path, i.e. path/runame/itempath/logfile.html will clear the field if no rundir/testname/itempath/logfile |
︙ | ︙ | |||
151 152 153 154 155 156 157 | megatest -test-files 'logs/*.log' -target ubuntu/n%/no% :runname w49% -testpatt test_mt% Called as " (string-intersperse (argv) " ") " Version " megatest-version ", built from " megatest-fossil-hash )) ;; -gui : start a gui interface ;; -config fname : override the runconfig file with fname | < | 152 153 154 155 156 157 158 159 160 161 162 163 164 165 | megatest -test-files 'logs/*.log' -target ubuntu/n%/no% :runname w49% -testpatt test_mt% Called as " (string-intersperse (argv) " ") " Version " megatest-version ", built from " megatest-fossil-hash )) ;; -gui : start a gui interface ;; -config fname : override the runconfig file with fname ;; process args (define remargs (args:get-args (argv) (list "-runtests" ;; run a specific test "-config" ;; override the config file name "-execute" ;; run the command encoded in the base64 parameter |
︙ | ︙ | |||
191 192 193 194 195 196 197 | ":value" ":expected" ":tol" ":units" ;; misc "-server" "-transport" | | | 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 | ":value" ":expected" ":tol" ":units" ;; misc "-server" "-transport" "-stop-server" "-port" "-extract-ods" "-pathmod" "-env2file" "-setvars" "-set-state-status" "-debug" ;; for *verbosity* > 2 |
︙ | ︙ | |||
323 324 325 326 327 328 329 | ;; (server:launch (string->symbol (args:get-arg "-transport" "http")))) (system (conc (car (argv)) " -server - -daemonize -transport " (args:get-arg "-transport" "http"))) (thread-sleep! 3)) ;; give the server a few seconds to start (debug:print 0 "INFO: Servers already running " servers) ))))) | | | | | | > > > < < < | | 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 | ;; (server:launch (string->symbol (args:get-arg "-transport" "http")))) (system (conc (car (argv)) " -server - -daemonize -transport " (args:get-arg "-transport" "http"))) (thread-sleep! 3)) ;; give the server a few seconds to start (debug:print 0 "INFO: Servers already running " servers) ))))) (if (or (args:get-arg "-list-servers") (args:get-arg "-stop-server")) (let ((tl (setup-for-run))) (if tl (let* ((servers (open-run-close tasks:get-all-servers tasks:open-db)) (fmtstr "~5a~8a~8a~20a~20a~10a~10a~10a~10a~10a\n") (servers-to-kill '()) (killinfo (args:get-arg "-stop-server")) (khost-port (if killinfo (if (substring-index ":" killinfo)(string-split ":") #f) #f)) (sid (if killinfo (if (substring-index ":" killinfo) #f (string->number killinfo)) #f))) (format #t fmtstr "Id" "MTver" "Pid" "Host" "Interface" "OutPort" "InPort" "LastBeat" "State" "Transport") (format #t fmtstr "==" "=====" "===" "====" "=========" "=======" "======" "========" "=====" "=========") (for-each (lambda (server) (let* ((id (vector-ref server 0)) (pid (vector-ref server 1)) (hostname (vector-ref server 2)) (interface (vector-ref server 3)) (pullport (vector-ref server 4)) (pubport (vector-ref server 5)) (start-time (vector-ref server 6)) (priority (vector-ref server 7)) |
︙ | ︙ | |||
359 360 361 362 363 364 365 | ;; no need to login as status of #t indicates we are connecting to correct ;; server (if (equal? state "dead") (if (> last-update (* 25 60 60)) ;; keep records around for slighly over a day. (open-run-close tasks:server-deregister tasks:open-db hostname pullport: pullport pid: pid action: 'delete)) (if (> last-update 20) ;; Mark as dead if not updated in last 20 seconds (open-run-close tasks:server-deregister tasks:open-db hostname pullport: pullport pid: pid))) | < | > > > > | 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 | ;; no need to login as status of #t indicates we are connecting to correct ;; server (if (equal? state "dead") (if (> last-update (* 25 60 60)) ;; keep records around for slighly over a day. (open-run-close tasks:server-deregister tasks:open-db hostname pullport: pullport pid: pid action: 'delete)) (if (> last-update 20) ;; Mark as dead if not updated in last 20 seconds (open-run-close tasks:server-deregister tasks:open-db hostname pullport: pullport pid: pid))) (format #t fmtstr id mt-ver pid hostname interface pullport pubport last-update (if status "alive" "dead") transport) (if (equal? id sid) (begin (debug:print-info 0 "Attempting to stop server with pid " pid) (tasks:kill-server status hostname pullport pid transport))))) servers) (debug:print-info 1 "Done with listservers") (set! *didsomething* #t) (exit) ;; must do, would have to add checks to many/all calls below ) (exit))) ;; if not list or kill then start a client (if appropriate) |
︙ | ︙ |
Modified tasks.scm from [16cec1c8c7] to [e04939ec8c].
︙ | ︙ | |||
21 22 23 24 25 26 27 | ;;====================================================================== ;; Tasks db ;;====================================================================== (define (tasks:open-db) (let* ((dbpath (conc *toppath* "/monitor.db")) (exists (file-exists? dbpath)) | < < < < < < < < | 21 22 23 24 25 26 27 28 29 30 31 32 33 34 | ;;====================================================================== ;; Tasks db ;;====================================================================== (define (tasks:open-db) (let* ((dbpath (conc *toppath* "/monitor.db")) (exists (file-exists? dbpath)) (mdb (sqlite3:open-database dbpath)) ;; (never-give-up-open-db dbpath)) (handler (make-busy-timeout 36000))) (sqlite3:set-busy-handler! mdb handler) (sqlite3:execute mdb (conc "PRAGMA synchronous = 0;")) (if (not exists) (begin (sqlite3:execute mdb "CREATE TABLE IF NOT EXISTS tasks_queue (id INTEGER PRIMARY KEY, |
︙ | ︙ | |||
123 124 125 126 127 128 129 130 131 132 133 134 135 136 | (case action ((delete)(sqlite3:execute mdb "DELETE FROM servers WHERE hostname=? AND port=?;" hostname port)) (else (sqlite3:execute mdb "UPDATE servers SET state='dead' WHERE hostname=? AND port=?;" hostname port))) (debug:print 0 "ERROR: tasks:server-deregister called with neither pid nor port specified")))) (define (tasks:server-deregister-self mdb hostname) (tasks:server-deregister mdb hostname pid: (current-process-id))) (define (tasks:server-get-server-id mdb hostname iface port pid) (debug:print-info 12 "tasks:server-get-server-id " mdb " " hostname " " iface " " port " " pid) (let ((res #f)) (sqlite3:for-each-row (lambda (id) (set! res id)) | > > > > | 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 | (case action ((delete)(sqlite3:execute mdb "DELETE FROM servers WHERE hostname=? AND port=?;" hostname port)) (else (sqlite3:execute mdb "UPDATE servers SET state='dead' WHERE hostname=? AND port=?;" hostname port))) (debug:print 0 "ERROR: tasks:server-deregister called with neither pid nor port specified")))) (define (tasks:server-deregister-self mdb hostname) (tasks:server-deregister mdb hostname pid: (current-process-id))) ;; need a simple call for robustly removing records given host and port (define (tasks:server-delete mdb hostname port) (tasks:server-deregister mdb hostname port: port action: 'delete)) (define (tasks:server-get-server-id mdb hostname iface port pid) (debug:print-info 12 "tasks:server-get-server-id " mdb " " hostname " " iface " " port " " pid) (let ((res #f)) (sqlite3:for-each-row (lambda (id) (set! res id)) |
︙ | ︙ | |||
227 228 229 230 231 232 233 | ;; (if (null? tal) ;; #f ;; (loop (car tal)(cdr tal)))))))))) (define (tasks:remove-server-records mdb) (sqlite3:execute mdb "DELETE FROM servers;")) | | | > | > | > > > > | | 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 | ;; (if (null? tal) ;; #f ;; (loop (car tal)(cdr tal)))))))))) (define (tasks:remove-server-records mdb) (sqlite3:execute mdb "DELETE FROM servers;")) (define (tasks:mark-server hostname port pid state transport) (if port (open-run-close tasks:server-deregister tasks:open-db hostname port: port) (open-run-close tasks:server-deregister tasks:open-db hostname pid: pid))) (define (tasks:kill-server status hostname port pid transport) (debug:print-info 1 "Removing defunct server record for " hostname ":" port) (if port (open-run-close tasks:server-deregister tasks:open-db hostname port: port) (open-run-close tasks:server-deregister tasks:open-db hostname pid: pid)) (if status ;; #t means alive (begin (if (equal? hostname (get-host-name)) (handle-exceptions exn (debug:print-info 0 "server may or may not be dead, check for megatest -server running as pid " pid "\n" " EXCEPTION: " ((condition-property-accessor 'exn 'message) exn)) (debug:print 1 "Sending signal/term to " pid " on " hostname) (process-signal pid signal/term) (thread-sleep! 5) ;; give it five seconds to die peacefully then do a brutal kill ;;(process-signal pid signal/kill) ) ;; local machine, send sig term (begin (debug:print-info 1 "Stopping remote servers not yet supported.")))) ;; (debug:print-info 1 "Telling alive server on " hostname ":" port " to commit servercide") ;; (let ((serverdat (list hostname port))) ;; (case (string->symbol transport) ;; ((http)(http-transport:client-connect hostname port)) ;; (else (debug:print "ERROR: remote stopping servers of type " transport " not supported yet"))) ;; (cdb:kill-server serverdat))))) ;; remote machine, try telling server to commit suicide (begin (if status (if (equal? hostname (get-host-name)) (begin (debug:print-info 1 "Sending signal/term to " pid " on " hostname) (process-signal pid signal/term) ;; local machine, send sig term (thread-sleep! 5) ;; give it five seconds to die peacefully then do a brutal kill |
︙ | ︙ |
Modified tests/fullrun/megatest.config from [c5fdf7800f] to [d25787fc32].
︙ | ︙ | |||
55 56 57 58 59 60 61 | # XTERM [system xterm] # RUNDEAD [system exit 56] [server] # If the server can't be started on this port it will try the next port until # it succeeds | | | 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 | # XTERM [system xterm] # RUNDEAD [system exit 56] [server] # If the server can't be started on this port it will try the next port until # it succeeds port 8080 # This server will keep running this number of hours after last access. # Three minutes is 0.05 hours timeout 0.05 ## disks are: ## name host:/path/to/area |
︙ | ︙ |