Overview
Comment: | Fine tune/cleanup of server start and restart logic |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | v1.60 |
Files: | files | file ages | folders |
SHA1: |
268ec8267446dd1d36757d0f178ff01d |
User & Date: | matt on 2014-03-13 22:41:20 |
Other Links: | branch diff | manifest | tags |
Context
2014-03-13
| ||
23:21 | Tightened up run constraints until load management is more consistent check-in: 0faa89e84d user: matt tags: v1.60 | |
22:41 | Fine tune/cleanup of server start and restart logic check-in: 268ec82674 user: matt tags: v1.60 | |
2014-03-12
| ||
00:43 | Add some time back to client restarts check-in: a7747154c2 user: matt tags: v1.60 | |
Changes
Modified client.scm from [9e7c8a3060] to [933a52eee6].
︙ | ︙ | |||
52 53 54 55 56 57 58 | ;; *transport-type* and *runremote* from the monitor.db ;; ;; client:setup ;; ;; lookup_server, need to remove *runremote* stuff ;; (define (client:setup run-id #!key (remaining-tries 10) (failed-connects 0)) | | < < > > | | | 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 | ;; *transport-type* and *runremote* from the monitor.db ;; ;; client:setup ;; ;; lookup_server, need to remove *runremote* stuff ;; (define (client:setup run-id #!key (remaining-tries 10) (failed-connects 0)) (debug:print-info 0 "client:setup remaining-tries=" remaining-tries) (if (<= remaining-tries 0) (begin (debug:print 0 "ERROR: failed to start or connect to server for run-id " run-id) (exit 1)) (let ((host-info (hash-table-ref/default *runremote* run-id #f))) (if host-info (let* ((iface (http-transport:server-dat-get-iface host-info)) (port (http-transport:server-dat-get-port host-info)) (start-res (http-transport:client-connect iface port)) (ping-res (rmt:login-no-auto-client-setup start-res run-id))) (if ping-res ;; sucessful login? (begin (debug:print-info 0 "client:setup, ping is good using host-info=" host-info ", remaining-tries=" remaining-tries) ;; Why add the close-connections here? ;; (http-transport:close-connections run-id) (hash-table-set! *runremote* run-id start-res) start-res) ;; return the server info (if (member remaining-tries '(9 6 4 2)) (begin ;; login failed (debug:print-info 0 "client:setup, ping is bad for start-res=" start-res " and *runremote*=" host-info) (http-transport:close-connections run-id) (hash-table-delete! *runremote* run-id) (open-run-close tasks:server-force-clean-run-record tasks:open-db run-id iface port |
︙ | ︙ | |||
96 97 98 99 100 101 102 | ;; YUK: rename server-dat here (let* ((server-dat (open-run-close tasks:get-server tasks:open-db run-id))) (debug:print-info 0 "client:setup server-dat=" server-dat ", remaining-tries=" remaining-tries) (if server-dat (let* ((iface (tasks:hostinfo-get-interface server-dat)) (port (tasks:hostinfo-get-port server-dat)) (start-res (http-transport:client-connect iface port)) | < | > > | | | > | < > | | | 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 | ;; YUK: rename server-dat here (let* ((server-dat (open-run-close tasks:get-server tasks:open-db run-id))) (debug:print-info 0 "client:setup server-dat=" server-dat ", remaining-tries=" remaining-tries) (if server-dat (let* ((iface (tasks:hostinfo-get-interface server-dat)) (port (tasks:hostinfo-get-port server-dat)) (start-res (http-transport:client-connect iface port)) (ping-res (rmt:login-no-auto-client-setup start-res run-id))) (if (and start-res ping-res) (begin (hash-table-set! *runremote* run-id start-res) (debug:print-info 0 "connected to " (http-transport:server-dat-make-url start-res))) start-res) (if (member remaining-tries '(2 5)) (begin ;; login failed (debug:print-info 0 "client:setup, login failed, will attempt to start server ... start-res=" start-res ", run-id=" run-id ", server-dat=" server-dat) (http-transport:close-connections run-id) (hash-table-delete! *runremote* run-id) (open-run-close tasks:server-force-clean-run-record tasks:open-db run-id (tasks:hostinfo-get-interface server-dat) (tasks:hostinfo-get-port server-dat) " client:setup (server-dat = #t)") (thread-sleep! 2) (server:try-running run-id) (thread-sleep! 10) ;; give server a little time to start up (client:setup run-id remaining-tries: 10)) ;; (- remaining-tries 1))) (begin (debug:print 25 "INFO: client:setup start-res=" start-res ", run-id=" run-id ", server-dat=" server-dat) (thread-sleep! 5) (client:setup run-id remaining-tries: (- remaining-tries 1))))) (begin ;; no server registered (if (member remaining-tries '(2)) (begin (debug:print-info 0 "no server registered, remaining-tries=" remaining-tries ", try running client:setup again") ;; (open-run-close tasks:server-clean-out-old-records-for-run-id tasks:open-db run-id " client:setup (server-dat=#f)") (client:setup run-id remaining-tries: remaining-tries)) (let ((num-available (open-run-close tasks:num-in-available-state tasks:open-db run-id))) (thread-sleep! 2) (debug:print-info 0 "client:setup, no server registered, remaining-tries=" remaining-tries " num-available=" num-available) (if (< num-available 2) (begin ;; (open-run-close tasks:server-clean-out-old-records-for-run-id tasks:open-db run-id " client:setup (server-dat=#f)") (server:try-running run-id))) (thread-sleep! 10) ;; give server a little time to start up (client:setup run-id remaining-tries: (- remaining-tries 1))))))))))) ;; keep this as a function to ease future |
︙ | ︙ |
Modified http-transport.scm from [ffb8788cf7] to [14ceeefe67].
︙ | ︙ | |||
288 289 290 291 292 293 294 295 296 297 298 299 300 301 | (define (make-http-transport:server-dat)(make-vector 5)) (define (http-transport:server-dat-get-iface vec) (vector-ref vec 0)) (define (http-transport:server-dat-get-port vec) (vector-ref vec 1)) (define (http-transport:server-dat-get-api-uri vec) (vector-ref vec 2)) (define (http-transport:server-dat-get-api-url vec) (vector-ref vec 3)) (define (http-transport:server-dat-get-api-req vec) (vector-ref vec 4)) ;; ;; connect ;; (define (http-transport:client-connect iface port) (let* ((api-url (conc "http://" iface ":" port "/api")) (api-uri (uri-reference (conc "http://" iface ":" port "/api"))) | > > > > > > > > > | 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 | (define (make-http-transport:server-dat)(make-vector 5)) (define (http-transport:server-dat-get-iface vec) (vector-ref vec 0)) (define (http-transport:server-dat-get-port vec) (vector-ref vec 1)) (define (http-transport:server-dat-get-api-uri vec) (vector-ref vec 2)) (define (http-transport:server-dat-get-api-url vec) (vector-ref vec 3)) (define (http-transport:server-dat-get-api-req vec) (vector-ref vec 4)) (define (http-transport:server-dat-make-url vec) (if (and (http-transport:server-dat-get-iface vec) (http-transport:server-dat-get-port vec)) (conc "http://" (http-transport:server-dat-get-iface vec) ":" (http-transport:server-dat-get-port vec)) #f)) ;; ;; connect ;; (define (http-transport:client-connect iface port) (let* ((api-url (conc "http://" iface ":" port "/api")) (api-uri (uri-reference (conc "http://" iface ":" port "/api"))) |
︙ | ︙ |
Modified megatest.scm from [4877ac5bd9] to [965c358e86].
︙ | ︙ | |||
342 343 344 345 346 347 348 | x " => ")) (common:get-disks) ) "\n")) (set! *didsomething* #t))) (if (args:get-arg "-ping") | | | | | | | > | > < > > | | 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 | x " => ")) (common:get-disks) ) "\n")) (set! *didsomething* #t))) (if (args:get-arg "-ping") (let* ((run-id (string->number (args:get-arg "-run-id"))) (host-port (let ((slst (string-split (args:get-arg "-ping") ":"))) (if (eq? (length slst) 2) (list (car slst)(string->number (cadr slst))) #f))) (toppath (setup-for-run)) (server-db-dat (if (not host-port)(open-run-close tasks:get-server tasks:open-db run-id) #f))) (if (not run-id) (begin (debug:print 0 "ERROR: must specify run-id when doing ping, -run-id n") (print "ERROR: No run-id") (exit 1)) (if (and (not host-port) (not server-db-dat)) (begin (print "ERROR: bad host:port") (exit 1)) (let* ((iface (if host-port (car host-port) (tasks:hostinfo-get-interface server-db-dat))) (port (if host-port (cadr host-port)(tasks:hostinfo-get-port server-db-dat))) (server-dat (http-transport:client-connect iface port)) (login-res (rmt:login-no-auto-client-setup server-dat run-id))) (if (and (list? login-res) (car login-res)) (begin (print "LOGIN_OK") (exit 0)) (begin |
︙ | ︙ | |||
795 796 797 798 799 800 801 | ;;====================================================================== ;; Get test paths matching target, runname, and testpatt (if (or (args:get-arg "-test-files")(args:get-arg "-test-paths")) ;; if we are in a test use the MT_CMDINFO data (if (getenv "MT_CMDINFO") (let* ((startingdir (current-directory)) (cmdinfo (read (open-input-string (base64:base64-decode (getenv "MT_CMDINFO"))))) | < | 798 799 800 801 802 803 804 805 806 807 808 809 810 811 | ;;====================================================================== ;; Get test paths matching target, runname, and testpatt (if (or (args:get-arg "-test-files")(args:get-arg "-test-paths")) ;; if we are in a test use the MT_CMDINFO data (if (getenv "MT_CMDINFO") (let* ((startingdir (current-directory)) (cmdinfo (read (open-input-string (base64:base64-decode (getenv "MT_CMDINFO"))))) (testpath (assoc/default 'testpath cmdinfo)) (test-name (assoc/default 'test-name cmdinfo)) (runscript (assoc/default 'runscript cmdinfo)) (db-host (assoc/default 'db-host cmdinfo)) (run-id (assoc/default 'run-id cmdinfo)) (itemdat (assoc/default 'itemdat cmdinfo)) (state (args:get-arg ":state")) |
︙ | ︙ | |||
843 844 845 846 847 848 849 | ;;====================================================================== ;; Archive tests matching target, runname, and testpatt (if (args:get-arg "-archive") ;; if we are in a test use the MT_CMDINFO data (if (getenv "MT_CMDINFO") (let* ((startingdir (current-directory)) (cmdinfo (read (open-input-string (base64:base64-decode (getenv "MT_CMDINFO"))))) | < | 845 846 847 848 849 850 851 852 853 854 855 856 857 858 | ;;====================================================================== ;; Archive tests matching target, runname, and testpatt (if (args:get-arg "-archive") ;; if we are in a test use the MT_CMDINFO data (if (getenv "MT_CMDINFO") (let* ((startingdir (current-directory)) (cmdinfo (read (open-input-string (base64:base64-decode (getenv "MT_CMDINFO"))))) (testpath (assoc/default 'testpath cmdinfo)) (test-name (assoc/default 'test-name cmdinfo)) (runscript (assoc/default 'runscript cmdinfo)) (db-host (assoc/default 'db-host cmdinfo)) (run-id (assoc/default 'run-id cmdinfo)) (itemdat (assoc/default 'itemdat cmdinfo)) (state (args:get-arg ":state")) |
︙ | ︙ | |||
922 923 924 925 926 927 928 | (define (megatest:step step state status logfile msg) (if (not (getenv "MT_CMDINFO")) (begin (debug:print 0 "ERROR: MT_CMDINFO env var not set, -step must be called *inside* a megatest invoked environment!") (exit 5)) (let* ((cmdinfo (read (open-input-string (base64:base64-decode (getenv "MT_CMDINFO"))))) | < | 923 924 925 926 927 928 929 930 931 932 933 934 935 936 | (define (megatest:step step state status logfile msg) (if (not (getenv "MT_CMDINFO")) (begin (debug:print 0 "ERROR: MT_CMDINFO env var not set, -step must be called *inside* a megatest invoked environment!") (exit 5)) (let* ((cmdinfo (read (open-input-string (base64:base64-decode (getenv "MT_CMDINFO"))))) (testpath (assoc/default 'testpath cmdinfo)) (test-name (assoc/default 'test-name cmdinfo)) (runscript (assoc/default 'runscript cmdinfo)) (db-host (assoc/default 'db-host cmdinfo)) (run-id (assoc/default 'run-id cmdinfo)) (test-id (assoc/default 'test-id cmdinfo)) (itemdat (assoc/default 'itemdat cmdinfo)) |
︙ | ︙ | |||
969 970 971 972 973 974 975 | (args:get-arg "-summarize-items")) (if (not (getenv "MT_CMDINFO")) (begin (debug:print 0 "ERROR: MT_CMDINFO env var not set, commands -test-status, -runstep and -setlog must be called *inside* a megatest environment!") (exit 5)) (let* ((startingdir (current-directory)) (cmdinfo (read (open-input-string (base64:base64-decode (getenv "MT_CMDINFO"))))) | < | 969 970 971 972 973 974 975 976 977 978 979 980 981 982 | (args:get-arg "-summarize-items")) (if (not (getenv "MT_CMDINFO")) (begin (debug:print 0 "ERROR: MT_CMDINFO env var not set, commands -test-status, -runstep and -setlog must be called *inside* a megatest environment!") (exit 5)) (let* ((startingdir (current-directory)) (cmdinfo (read (open-input-string (base64:base64-decode (getenv "MT_CMDINFO"))))) (testpath (assoc/default 'testpath cmdinfo)) (test-name (assoc/default 'test-name cmdinfo)) (runscript (assoc/default 'runscript cmdinfo)) (db-host (assoc/default 'db-host cmdinfo)) (run-id (assoc/default 'run-id cmdinfo)) (test-id (assoc/default 'test-id cmdinfo)) (itemdat (assoc/default 'itemdat cmdinfo)) |
︙ | ︙ |
Modified rmt.scm from [33f581f7a5] to [d0d3068feb].
︙ | ︙ | |||
52 53 54 55 56 57 58 | (loop (- numtries 1))) (begin (debug:print 0 "ERROR: 100 tries and no server, giving up") (exit 1))))))))) (jparams (db:obj->string params)) (res (http-transport:client-api-send-receive run-id connection-info cmd jparams))) (if res | | | < | 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 | (loop (- numtries 1))) (begin (debug:print 0 "ERROR: 100 tries and no server, giving up") (exit 1))))))))) (jparams (db:obj->string params)) (res (http-transport:client-api-send-receive run-id connection-info cmd jparams))) (if res (db:string->obj res) (let ((new-connection-info (client:setup run-id))) (debug:print 0 "WARNING: Communication failed, trying call to http-transport:client-api-send-receive again.") (rmt:send-receive cmd run-id params))))) (define (rmt:send-receive-no-auto-client-setup connection-info cmd run-id params) (let* ((run-id (if run-id run-id 0)) (jparams (db:obj->string params)) ;; (rmt:dat->json-str params)) (res (http-transport:client-api-send-receive run-id connection-info cmd jparams))) (if res (db:string->obj res) res))) ;; Wrap json library for strings (why the ports crap in the first place?) (define (rmt:dat->json-str dat) (with-output-to-string (lambda () (json-write dat)))) |
︙ | ︙ |
Modified server.scm from [307679905c] to [fd1955db21].
︙ | ︙ | |||
120 121 122 123 124 125 126 | (trycount 0)) (if server ;; note: client:start will set *runremote*. this needs to be changed ;; also, client:start will login to the server, also need to change that. ;; ;; client:start returns #t if login was successful. ;; | | > > > | 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 | (trycount 0)) (if server ;; note: client:start will set *runremote*. this needs to be changed ;; also, client:start will login to the server, also need to change that. ;; ;; client:start returns #t if login was successful. ;; (let ((res (server:ping-server run-id (tasks:hostinfo-get-interface server) (tasks:hostinfo-get-port server)))) ;; if the server didn't respond we must remove the record (if res #t (begin (debug:print-info 0 "server at " server " not responding, removing record") (open-run-close tasks:server-force-clean-running-records-for-run-id tasks:open-db run-id " server:check-if-running") res))) #f))) (define (server:ping-server run-id iface port) (with-input-from-pipe |
︙ | ︙ |