Changes In Branch re-re-factor-server Through [fb46b13dc5] Excluding Merge-Ins
This is equivalent to a diff from 559228be40 to fb46b13dc5
2014-02-15
| ||
22:41 | Streamlined port sequencing for per-run db servers check-in: c6b8e8a9d8 user: matt tags: re-re-factor-server | |
22:02 | Cleaned up server start sequence check-in: fb46b13dc5 user: matt tags: re-re-factor-server | |
2014-02-13
| ||
16:51 | Added pdf and regenerated Closed-Leaf check-in: 6d98d0aa7d user: mrwellan tags: inmem-per-run-db-per-run-server | |
2014-02-11
| ||
00:05 | Added more files to the map check-in: a1ef0f490c user: matt tags: re-re-factor-server | |
2014-02-10
| ||
19:56 | Re-re-factor server handling check-in: f68ed29f16 user: matt tags: re-re-factor-server | |
2014-02-09
| ||
23:54 | Added more detail to server/client flow check-in: 559228be40 user: matt tags: inmem-per-run-db-per-run-server | |
22:55 | Updated docs with proposed server spec check-in: 180fe4e32d user: matt tags: inmem-per-run-db-per-run-server | |
Modified client.scm from [d859fde28d] to [c71df3eef9].
︙ | ︙ | |||
48 49 50 51 52 53 54 55 56 57 58 59 60 61 | ;; ;; There are two scenarios. ;; 1. We are a test manager and we received *transport-type* and *runremote* via cmdline ;; 2. We are a run tests, list runs or other interactive process and we must figure out ;; *transport-type* and *runremote* from the monitor.db ;; ;; client:setup (define (client:setup run-id #!key (remaining-tries 3)) (if (not *toppath*) (if (not (setup-for-run)) (begin (debug:print 0 "ERROR: failed to find megatest.config, exiting") (exit)))) ;; (push-directory *toppath*) ;; This is probably NOT needed | > > > | 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 | ;; ;; There are two scenarios. ;; 1. We are a test manager and we received *transport-type* and *runremote* via cmdline ;; 2. We are a run tests, list runs or other interactive process and we must figure out ;; *transport-type* and *runremote* from the monitor.db ;; ;; client:setup ;; ;; lookup_server, need to remove *runremote* stuff ;; (define (client:setup run-id #!key (remaining-tries 3)) (if (not *toppath*) (if (not (setup-for-run)) (begin (debug:print 0 "ERROR: failed to find megatest.config, exiting") (exit)))) ;; (push-directory *toppath*) ;; This is probably NOT needed |
︙ | ︙ | |||
80 81 82 83 84 85 86 | (begin (debug:print 0 "ERROR: Expected to be able to connect to a server by now. No server available for run-id = " run-id) (exit 1))) (begin (hash-table-set! *runremote* run-id hostinfo) (debug:print-info 11 "CLIENT SETUP, hostinfo=" hostinfo) (debug:print-info 11 "Using transport type of " transport (if hostinfo (conc " to connect to " hostinfo) "")) | > > > | | | | | | | | | | | | | | < | 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 | (begin (debug:print 0 "ERROR: Expected to be able to connect to a server by now. No server available for run-id = " run-id) (exit 1))) (begin (hash-table-set! *runremote* run-id hostinfo) (debug:print-info 11 "CLIENT SETUP, hostinfo=" hostinfo) (debug:print-info 11 "Using transport type of " transport (if hostinfo (conc " to connect to " hostinfo) "")) (client:start run-id transport server-info))))))) (define (client:start run-id transport server-info) (case transport ;; ((fs)(if (not *megatest-db*)(set! *megatest-db* (open-db)))) ((http) ;; this saves the server-info in the *runremote* hash and returns it (http-transport:client-connect run-id (tasks:hostinfo-get-interface server-info) (tasks:hostinfo-get-port server-info))) ((zmq) (zmq-transport:client-connect (tasks:hostinfo-get-interface server-info) (tasks:hostinfo-get-port server-info) (tasks:hostinfo-get-pubport server-info))) (else ;; default to fs (debug:print 0 "ERROR: unrecognised transport type " transport ) #f))) ;; client:signal-handler (define (client:signal-handler signum) (handle-exceptions exn (debug:print " ... exiting ...") (let ((th1 (make-thread (lambda () |
︙ | ︙ |
Modified docs/manual/server.dot from [4efd80e71a] to [2cf0263449].
︙ | ︙ | |||
44 45 46 47 48 49 50 | ping_server -> exit [label=alive]; ping_server -> remove_server_record [label=dead]; remove_server_record -> set_available; set_available -> avail_delay [label="delay 3s"]; avail_delay -> "first_in_queue?"; "first_in_queue?" -> set_running [label=yes]; | | > | > > | 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 | ping_server -> exit [label=alive]; ping_server -> remove_server_record [label=dead]; remove_server_record -> set_available; set_available -> avail_delay [label="delay 3s"]; avail_delay -> "first_in_queue?"; "first_in_queue?" -> set_running [label=yes]; set_running -> get_next_port -> handle_requests; "first_in_queue?" -> "dead_entry_in_queue?" [label=no]; "dead_entry_in_queue?" -> "server_running?" [label=no]; "dead_entry_in_queue?" -> "remove_dead_entries" [label=yes]; remove_dead_entries -> "server_running?"; handle_requests -> start_shutdown [label="no traffic"]; handle_requests -> shutdown_request; start_shutdown -> shutdown_delay; shutdown_request -> shutdown_delay; shutdown_delay -> exit; |
︙ | ︙ |
Added docs/results.pdf version [8c482a4606].
cannot compute difference between binary files
Modified http-transport.scm from [cb9f17b39f] to [043ad30c04].
︙ | ︙ | |||
152 153 154 155 156 157 158 159 160 161 162 163 164 165 | exn (begin (print-error-message exn) (if (< portnum 9000) (begin (debug:print 0 "WARNING: failed to start on portnum: " portnum ", trying next port") (thread-sleep! 0.1) (http-transport:try-start-server ipaddrstr (+ portnum 1) server-id)) (print "ERROR: Tried and tried but could not start the server"))) ;; any error in following steps will result in a retry (set! *server-info* (list ipaddrstr portnum)) (open-run-close tasks:server-set-interface-port tasks:open-db server-id | > > > | 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 | exn (begin (print-error-message exn) (if (< portnum 9000) (begin (debug:print 0 "WARNING: failed to start on portnum: " portnum ", trying next port") (thread-sleep! 0.1) ;; get_next_port goes here (http-transport:try-start-server ipaddrstr (+ portnum 1) server-id)) (print "ERROR: Tried and tried but could not start the server"))) ;; any error in following steps will result in a retry (set! *server-info* (list ipaddrstr portnum)) (open-run-close tasks:server-set-interface-port tasks:open-db server-id |
︙ | ︙ | |||
369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 | (thread-start! th1) (thread-start! th2) (thread-join! th1) (thread-terminate! th2) (debug:print-info 11 "got res=" res) res))))) (define (http-transport:client-connect run-id iface port) (let* ((login-res #f) (uri-dat (make-request method: 'POST uri: (uri-reference (conc "http://" iface ":" port "/ctrl")))) (uri-api-dat (make-request method: 'POST uri: (uri-reference (conc "http://" iface ":" port "/api")))) (serverdat (list iface port uri-dat uri-api-dat))) (hash-table-set! *runremote* run-id serverdat) ;; may or may not be good ... (set! login-res (rmt:login run-id)) (if (and (list? login-res) (car login-res)) (begin (debug:print-info 2 "Logged in and connected to " iface ":" port) (hash-table-set! *runremote* run-id serverdat) serverdat) (begin (debug:print-info 0 "ERROR: Failed to login or connect to " iface ":" port) | > > > | | 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 | (thread-start! th1) (thread-start! th2) (thread-join! th1) (thread-terminate! th2) (debug:print-info 11 "got res=" res) res))))) ;; ;; connect ;; (define (http-transport:client-connect run-id iface port) (let* ((login-res #f) (uri-dat (make-request method: 'POST uri: (uri-reference (conc "http://" iface ":" port "/ctrl")))) (uri-api-dat (make-request method: 'POST uri: (uri-reference (conc "http://" iface ":" port "/api")))) (serverdat (list iface port uri-dat uri-api-dat))) (hash-table-set! *runremote* run-id serverdat) ;; may or may not be good ... (set! login-res (rmt:login run-id)) (if (and (list? login-res) (car login-res)) (begin (debug:print-info 2 "Logged in and connected to " iface ":" port) (hash-table-set! *runremote* run-id serverdat) serverdat) (begin (debug:print-info 0 "ERROR: Failed to login or connect to " iface ":" port) #f)))) ;; run http-transport:keep-running in a parallel thread to monitor that the db is being ;; used and to shutdown after sometime if it is not. ;; (define (http-transport:keep-running server-id) ;; if none running or if > 20 seconds since ;; server last used then start shutdown |
︙ | ︙ | |||
419 420 421 422 423 424 425 426 427 428 429 430 431 432 | (tdb (tasks:open-db)) (server-timeout (let ((tmo (config-lookup *configdat* "server" "timeout"))) (if (and (string? tmo) (string->number tmo)) (* 60 60 (string->number tmo)) ;; default to three days (* 3 24 60 60))))) (tasks:server-set-state! tdb server-id "running") (let loop ((count 0)) ;; Use this opportunity to sync the inmemdb to db (let ((start-time (current-milliseconds)) (sync-time #f) (rem-time #f)) (if *inmemdb* (db:sync-touched *inmemdb* force-sync: #t)) | > > > | 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 | (tdb (tasks:open-db)) (server-timeout (let ((tmo (config-lookup *configdat* "server" "timeout"))) (if (and (string? tmo) (string->number tmo)) (* 60 60 (string->number tmo)) ;; default to three days (* 3 24 60 60))))) ;; ;; set_running ;; (tasks:server-set-state! tdb server-id "running") (let loop ((count 0)) ;; Use this opportunity to sync the inmemdb to db (let ((start-time (current-milliseconds)) (sync-time #f) (rem-time #f)) (if *inmemdb* (db:sync-touched *inmemdb* force-sync: #t)) |
︙ | ︙ | |||
453 454 455 456 457 458 459 | (begin (debug:print-info 0 "interface changed, refreshing iface and port info") (set! iface (car sdat)) (set! port (cadr sdat)))) ;; NOTE: Get rid of this mechanism! It really is not needed... ;; (open-run-close tasks:server-update-heartbeat tasks:open-db spid) | > > > > | > > > > > > | 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 | (begin (debug:print-info 0 "interface changed, refreshing iface and port info") (set! iface (car sdat)) (set! port (cadr sdat)))) ;; NOTE: Get rid of this mechanism! It really is not needed... ;; (open-run-close tasks:server-update-heartbeat tasks:open-db spid) ;; ;; NOT USED ANY MORE ;; ;; (tasks:server-update-heartbeat tdb server-id) ;; (if ;; (or (> numrunning 0) ;; stay alive for two days after last access ;; Transfer *last-db-access* to last-access to use in checking that we are still alive (mutex-lock! *heartbeat-mutex*) (set! last-access *last-db-access*) (mutex-unlock! *heartbeat-mutex*) ;; (debug:print 11 "last-access=" last-access ", server-timeout=" server-timeout) ;; ;; no_traffic ;; (if (and *server-run* (> (+ last-access server-timeout) (current-seconds))) (begin (debug:print-info 0 "Server continuing, seconds since last db access: " (- (current-seconds) last-access)) (loop 0)) (begin (debug:print-info 0 "Starting to shutdown the server.") ;; need to delete only *my* server entry (future use) (set! *time-to-exit* #t) (if *inmemdb* (db:sync-touched *inmemdb* force-sync: #t)) ;; ;; start_shutdown ;; ( tasks:server-set-state! tdb server-id "shutting-down") (thread-sleep! 5) (debug:print-info 0 "Max cached queries was " *max-cache-size*) (debug:print-info 0 "Number of cached writes " *number-of-writes*) (debug:print-info 0 "Average cached write time " (if (eq? *number-of-writes* 0) "n/a (no writes)" |
︙ | ︙ | |||
496 497 498 499 500 501 502 503 504 | *number-non-write-queries*)) " ms") (debug:print-info 0 "Server shutdown complete. Exiting") (tasks:server-delete-record! tdb server-id) (exit)))))) ;; all routes though here end in exit ... (define (http-transport:launch run-id) (set! *run-id* run-id) | > > > < < < < < < > > > < | | | | | | | | | | | | | | | | < < | | 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 | *number-non-write-queries*)) " ms") (debug:print-info 0 "Server shutdown complete. Exiting") (tasks:server-delete-record! tdb server-id) (exit)))))) ;; all routes though here end in exit ... ;; ;; start_server? ;; (define (http-transport:launch run-id) (set! *run-id* run-id) (if (args:get-arg "-daemonize") (daemon:ize)) ;; ;; set_available ;; (let ((server-id (open-run-close tasks:server-lock-slot tasks:open-db run-id))) (if (not server-id) (begin (debug:print-info 2 "INFO: server pid=" (current-process-id) ", hostname=" (get-host-name) " not starting due to other candidates ahead in start queue") (open-run-close tasks:server-delete-records-for-this-pid tasks:open-db)) (let* ((th2 (make-thread (lambda () (http-transport:run (if (args:get-arg "-server") (args:get-arg "-server") "-") run-id server-id)) "Server run")) (th3 (make-thread (lambda () (http-transport:keep-running server-id)) "Keep running"))) ;; Database connection (set! *inmemdb* (db:setup run-id)) (thread-start! th2) (thread-start! th3) (set! *didsomething* #t) (thread-join! th2) (exit))))) (define (http-transport:server-signal-handler signum) (handle-exceptions exn (debug:print " ... exiting ...") (let ((th1 (make-thread (lambda () (thread-sleep! 1)) |
︙ | ︙ |
Modified server.scm from [29b0c253ff] to [ba9371a66f].
︙ | ︙ | |||
40 41 42 43 44 45 46 47 | ;; S E R V E R ;;====================================================================== ;; Call this to start the actual server ;; ;; all routes though here end in exit ... (define (server:launch transport run-id) | > > > > | < | < | < < | < | | | | | | 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 | ;; S E R V E R ;;====================================================================== ;; Call this to start the actual server ;; ;; all routes though here end in exit ... ;; ;; start_server ;; (define (server:launch transport run-id) (let ((server-running (server:check-if-running run-id transport))) (if server-running ;; a server is already running (exit) (case transport ((http) (http-transport:launch run-id)) ((zmq) (zmq-transport:launch run-id)) (else (debug:print "WARNING: unrecognised transport " transport) (exit)))))) ;;====================================================================== ;; Q U E U E M A N A G E M E N T ;;====================================================================== ;; We don't want to flush the queue if it was just flushed (define *server:last-write-flush* (current-milliseconds)) |
︙ | ︙ | |||
143 144 145 146 147 148 149 | (thread-sleep! 4))) (if (< trycount 10) (loop (open-run-close tasks:get-server tasks:open-db run-id) (+ trycount 1)) (debug:print 0 "WARNING: Couldn't start or find a server."))) (debug:print 2 "INFO: Server(s) running " servers) ))) | > > > > > > > > > | 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 | (thread-sleep! 4))) (if (< trycount 10) (loop (open-run-close tasks:get-server tasks:open-db run-id) (+ trycount 1)) (debug:print 0 "WARNING: Couldn't start or find a server."))) (debug:print 2 "INFO: Server(s) running " servers) ))) (define (server:check-if-running run-id transport) (let loop ((server (open-run-close tasks:get-server tasks:open-db run-id)) (trycount 0)) (if server ;; note: client:start will set *runremote*. this needs to be changed ;; also, client:start will login to the server, also need to change that. (client:start run-id transport server) #f))) |
Modified tasks.scm from [db6ec670d8] to [620093d5f8].
︙ | ︙ | |||
101 102 103 104 105 106 107 | (tasks:server-am-i-the-server? mdb run-id))) ;; register that this server may come online (first to register goes though with the process) (define (tasks:server-set-available mdb run-id) (sqlite3:execute mdb "INSERT INTO servers (pid,hostname,port,pubport,start_time, priority,state,mt_version,heartbeat, interface,transport,run_id) | | | 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 | (tasks:server-am-i-the-server? mdb run-id))) ;; register that this server may come online (first to register goes though with the process) (define (tasks:server-set-available mdb run-id) (sqlite3:execute mdb "INSERT INTO servers (pid,hostname,port,pubport,start_time, priority,state,mt_version,heartbeat, interface,transport,run_id) VALUES(?, ?, ?, ?, strftime('%s','now'), ?, ?, ?,-1,?, ?, ?);" (current-process-id) ;; pid (get-host-name) ;; hostname -1 ;; port -1 ;; pubport (random 1000) ;; priority (used a tiebreaker on get-available) "available" ;; state (common:version-signature) ;; mt_version |
︙ | ︙ |
Added utils/plot-code.scm version [de4d05b676].
> > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 | #!/mfs/pkgs/chicken/4.8.0.5/bin/csi -nbq (use regex srfi-69 srfi-13) (define targs #f) (define files (cddddr (argv))) (let ((targdat (cadddr (argv)))) (if (equal? targdat "-") (set! targs files) (set! targs (string-split targdat ",")))) (define filedat-defns (make-hash-table)) (define filedat-usages (make-hash-table)) (define defn-rx (regexp "^\\s*\\(define\\s+\\(([^\\s\\)]+).*")) (define all-regexs (make-hash-table)) (define all-fns '()) (define (print-err . data) (with-output-to-port (current-error-port) (lambda () (apply print data)))) (print-err "Making graph for files: " (string-intersperse targs ", ")) (print-err "Looking at files: " (string-intersperse files ", ")) ;; Gather the functions ;; (for-each (lambda (fname) (print-err "Processing file " fname) (with-input-from-file fname (lambda () (let loop ((inl (read-line))) (if (not (eof-object? inl)) (let ((match (string-match defn-rx inl))) (if match (let ((fnname (cadr match))) ;; (print " " fnname) (set! all-fns (cons fnname all-fns)) (hash-table-set! filedat-defns fname (cons fnname (hash-table-ref/default filedat-defns fname '()))) )) (loop (read-line)))))))) files) ;; fill up the regex hash (print-err "Make the huge regex hash") (for-each (lambda (fnname) (hash-table-set! all-regexs fnname (regexp (conc "^(|.*[^a-zA-Z]+)" fnname "([^a-zA-Z]+|)$")))) (cons "toplevel" all-fns)) (define breadcrumbs (make-hash-table)) (define (have-function inl) (let loop ((hed (car all-fns)) (tal (cdr all-fns))) (if (string-contains inl hed) #t (if (null? tal) #f (loop (car tal)(cdr tal)))))) (define (look-for-all-calls inl fnname) (if (have-function inl) ;; (string-search have-function-rx inl) (let loop ((hed (car all-fns)) (tal (cdr all-fns)) (res '())) (let ((match (string-match (hash-table-ref all-regexs hed) inl))) (if match (let ((newres (cons hed res))) (if (null? tal) newres (loop (car tal) (cdr tal) newres))) (if (null? tal) res (loop (car tal)(cdr tal) res))))) '())) ;; Gather the usages (print "digraph G {") (define curr-cluster-num 0) (define function-calls '()) (for-each (lambda (fname) (let ((last-func #f)) (print-err "Processing file " fname) (print "subgraph cluster_" curr-cluster-num " {") (set! curr-cluster-num (+ curr-cluster-num 1)) (with-input-from-file fname (lambda () (with-output-to-port (current-error-port) (lambda () (print "Analyzing file " fname))) (print "label=\"" fname "\";") (let loop ((inl (read-line)) (fnname "toplevel") (allcalls '())) (if (eof-object? inl) (begin (set! function-calls (cons (list fnname allcalls) function-calls)) (for-each (lambda (call-name) (hash-table-set! breadcrumbs call-name #t)) allcalls) (print-err "function: " fnname " allcalls: " allcalls)) (let ((match (string-match defn-rx inl))) (if match (let ((func-name (cadr match))) (if last-func (print "\"" func-name "\" -> \"" last-func "\";") (print "\"" func-name "\";")) (set! last-func func-name) (hash-table-set! breadcrumbs func-name #t) (loop (read-line) func-name allcalls)) (let ((calls (look-for-all-calls inl fnname))) (loop (read-line) fnname (append allcalls calls))))))))) (print "}"))) targs) (print-err "breadcrumbs: " (hash-table-keys breadcrumbs)) (print-err "function-calls: " function-calls) (for-each (lambda (function-call) (print-err "function-call: " function-call) (let ((fnname (car function-call)) (calls (cadr function-call))) (for-each (lambda (callname) (print (if (hash-table-ref/default breadcrumbs callname #f) "" "// ") "\"" fnname "\" -> \"" callname "\";")) calls))) function-calls) (print "}") (exit) |