Overview
Comment: | Clean up that broke stuff :( - reapply needed |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | broken-fixes |
Files: | files | file ages | folders |
SHA1: |
6e33de13e069e95b680ee5b8430bb718 |
User & Date: | mrwellan on 2014-02-18 13:28:29 |
Other Links: | branch diff | manifest | tags |
Context
2014-03-03
| ||
08:56 | Bringing these changes forward to verify they were accounted for Closed-Leaf check-in: b137ace97d user: mrwellan tags: broken-fixes | |
2014-02-18
| ||
13:28 | Clean up that broke stuff :( - reapply needed check-in: 6e33de13e0 user: mrwellan tags: broken-fixes | |
06:48 | Merging in old v1.60 branch to create new v1.60 branch check-in: 7d4d4f4f88 user: mrwellan tags: v1.60 | |
Changes
Modified client.scm from [5cb1c0c7dc] to [f6d1b77f60].
︙ | ︙ | |||
52 53 54 55 56 57 58 | ;; *transport-type* and *runremote* from the monitor.db ;; ;; client:setup ;; ;; lookup_server, need to remove *runremote* stuff ;; (define (client:setup run-id #!key (remaining-tries 10)) | < > | | | | | < < > | | | | | | | | | < < > | | | 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 | ;; *transport-type* and *runremote* from the monitor.db ;; ;; client:setup ;; ;; lookup_server, need to remove *runremote* stuff ;; (define (client:setup run-id #!key (remaining-tries 10)) (if (<= remaining-tries 0) (begin (debug:print 0 "ERROR: failed to start or connect to server for run-id " run-id) (exit 1)) (let ((server-dat (and run-id (hash-table-ref/default *runremote* run-id #f)))) (thread-sleep! 1) ;; try to avoid race conditons (if server-dat (let ((new-dat (http-transport:client-connect run-id ;; NB// confusion over server-dat and connection result! (car server-dat) (cadr server-dat)))) (if new-dat ;; sucessful login? new-dat (begin ;; login failed (debug:print 0 "INFO: login failed in client:setup with existing server-dat: " server-dat ", new-dat: " new-dat ", cleaning out records and then trying again") (hash-table-delete! *runremote* run-id) (open-run-close tasks:server-force-clean-run-record tasks:open-db run-id (car server-dat) (cadr server-dat)) (thread-sleep! 5) (client:setup run-id remaining-tries: (- remaining-tries 1))))) (let* ((server-info (open-run-close tasks:get-server tasks:open-db run-id))) (if server-info (let ((new-dat (http-transport:client-connect run-id (tasks:hostinfo-get-interface server-info) (tasks:hostinfo-get-port server-info)))) (if new-dat new-dat (begin ;; login failed (debug:print 0 "INFO: login failed in client:setup with no existing server-dat: " server-dat ", new-dat: " new-dat ", and server-info: " server-info ", cleaning out records and then trying again") (hash-table-delete! *runremote* run-id) (open-run-close tasks:server-force-clean-run-record tasks:open-db run-id (tasks:hostinfo-get-interface server-dat) (tasks:hostinfo-get-port server-dat)) ;; (thread-sleep! 2) (server:try-running run-id) (thread-sleep! 5) ;; give server a little time to start up (client:setup run-id remaining-tries: (- remaining-tries 1))))) (begin ;; no server registered ;; (thread-sleep! 2) (server:try-running run-id) (thread-sleep! 5) ;; give server a little time to start up (client:setup run-id remaining-tries: (- remaining-tries 1))))))))) ;; keep this as a function to ease future (define (client:start run-id server-info) (http-transport:client-connect run-id |
︙ | ︙ |
Modified http-transport.scm from [fdad451b60] to [38152c3968].
︙ | ︙ | |||
265 266 267 268 269 270 271 | ;; ;; connect ;; (define (http-transport:client-connect run-id iface port) (let* ((uri-dat (make-request method: 'POST uri: (uri-reference (conc "http://" iface ":" port "/ctrl")))) (uri-api-dat (make-request method: 'POST uri: (uri-reference (conc "http://" iface ":" port "/api")))) | | | | | | | 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 | ;; ;; connect ;; (define (http-transport:client-connect run-id iface port) (let* ((uri-dat (make-request method: 'POST uri: (uri-reference (conc "http://" iface ":" port "/ctrl")))) (uri-api-dat (make-request method: 'POST uri: (uri-reference (conc "http://" iface ":" port "/api")))) (server-dat (list iface port uri-dat uri-api-dat)) (login-res (rmt:login-no-auto-client-setup server-dat run-id))) ;; (hash-table-set! *runremote* run-id serverdat) ;; may or may not be good ... (if (and (list? login-res) (car login-res)) (begin (debug:print-info 2 "Logged in and connected to " iface ":" port) (hash-table-set! *runremote* run-id server-dat) server-dat) (begin (debug:print-info 0 "ERROR: Failed to login or connect to " iface ":" port) #f)))) ;; run http-transport:keep-running in a parallel thread to monitor that the db is being ;; used and to shutdown after sometime if it is not. ;; |
︙ | ︙ |
Modified rmt.scm from [a5c523b577] to [2624718f57].
︙ | ︙ | |||
42 43 44 45 46 47 48 | (connection-info (let ((cinfo (hash-table-ref/default *runremote* run-id #f))) (if cinfo cinfo (let loop ((numtries 100)) (thread-sleep! 1) (let ((res (client:setup run-id))) (if res | | | 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 | (connection-info (let ((cinfo (hash-table-ref/default *runremote* run-id #f))) (if cinfo cinfo (let loop ((numtries 100)) (thread-sleep! 1) (let ((res (client:setup run-id))) (if res (hash-table-ref *runremote* run-id) ;; client:setup filled this in (hopefully) (if (> numtries 0) (loop (- numtries 1)) (begin (debug:print 0 "ERROR: 100 tries and no server, giving up") (exit 1))))))))) (jparams (db:obj->string params)) (res (http-transport:client-api-send-receive run-id connection-info cmd jparams))) |
︙ | ︙ |
Modified server.scm from [8eb4730569] to [b8b02ced57].
︙ | ︙ | |||
105 106 107 108 109 110 111 | (define (server:reply return-addr query-sig success/fail result) (db:obj->string (vector success/fail query-sig result))) ;; > file 2>&1 (define (server:try-running run-id) (let* ((rand-name (random 100)) (cmdln (conc (if (getenv "MT_MEGATEST") (getenv "MT_MEGATEST") "megatest") | | > > | | | | > > > > | 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 | (define (server:reply return-addr query-sig success/fail result) (db:obj->string (vector success/fail query-sig result))) ;; > file 2>&1 (define (server:try-running run-id) (let* ((rand-name (random 100)) (cmdln (conc (if (getenv "MT_MEGATEST") (getenv "MT_MEGATEST") "megatest") " -server - -run-id " run-id " name=" rand-name " > " *toppath* "/db/" run-id ".log 2>&1 &"))) ;; ".log &" ))) (debug:print 0 "INFO: Starting server (" cmdln ") as none running ...") (push-directory *toppath*) (system cmdln) (pop-directory))) (define (server:check-if-running run-id) (let loop ((server-info (open-run-close tasks:get-server tasks:open-db run-id)) (trycount 0)) (thread-sleep! 2) (if server-info ;; note: client:start will set *runremote*. this needs to be changed ;; also, client:start will login to the server, also need to change that. ;; ;; client:start returns #t if login was successful. ;; (let ((res (http-transport:client-connect run-id (tasks:hostinfo-get-interface server-info) (tasks:hostinfo-get-port server-info)))) ;; if the server didn't respond we must remove the record (if res res (begin (debug:print 0 "WARNING: running server not reachable, removing record: " server-info) (open-run-close tasks:server-force-clean-running-records-for-run-id tasks:open-db run-id) res))) #f))) |
Modified tasks.scm from [19f1225d86] to [6da0e18c74].
︙ | ︙ | |||
93 94 95 96 97 98 99 | (define (tasks:hostinfo-get-hostname vec) (vector-ref vec 6)) (define (tasks:server-lock-slot mdb run-id) (tasks:server-clean-out-old-records-for-run-id mdb run-id) (if (< (tasks:num-in-available-state mdb run-id) 4) (begin (tasks:server-set-available mdb run-id) | | | 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 | (define (tasks:hostinfo-get-hostname vec) (vector-ref vec 6)) (define (tasks:server-lock-slot mdb run-id) (tasks:server-clean-out-old-records-for-run-id mdb run-id) (if (< (tasks:num-in-available-state mdb run-id) 4) (begin (tasks:server-set-available mdb run-id) (thread-sleep! 0.2) ;; Try removing this. It may not be needed. (tasks:server-am-i-the-server? mdb run-id)) #f)) ;; register that this server may come online (first to register goes though with the process) (define (tasks:server-set-available mdb run-id) (sqlite3:execute mdb |
︙ | ︙ | |||
126 127 128 129 130 131 132 | (set! res num-in-queue)) mdb "SELECT count(id) FROM servers WHERE run_id=?;" run-id) res)) (define (tasks:server-clean-out-old-records-for-run-id mdb run-id) | | | 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 | (set! res num-in-queue)) mdb "SELECT count(id) FROM servers WHERE run_id=?;" run-id) res)) (define (tasks:server-clean-out-old-records-for-run-id mdb run-id) (sqlite3:execute mdb "DELETE FROM servers WHERE state in ('available','shutting-down') AND (strftime('%s','now') - start_time) > 100 AND run_id=?;" run-id)) (define (tasks:server-force-clean-running-records-for-run-id mdb run-id) (sqlite3:execute mdb "DELETE FROM servers WHERE state = 'running' AND run_id=?;" run-id)) (define (tasks:server-force-clean-run-record mdb run-id iface port) (sqlite3:execute mdb "DELETE FROM servers WHERE state = 'running' AND run_id=? AND interface=? AND port=?;" run-id iface port)) |
︙ | ︙ | |||
182 183 184 185 186 187 188 | (exit 1)) (car (db:get-rows all)))) (header (db:get-header all)) (id (db:get-value-by-header first header "id")) (hostname (db:get-value-by-header first header "hostname")) (pid (db:get-value-by-header first header "pid")) (priority (db:get-value-by-header first header "priority"))) | < > | | | | > > > | 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 | (exit 1)) (car (db:get-rows all)))) (header (db:get-header all)) (id (db:get-value-by-header first header "id")) (hostname (db:get-value-by-header first header "hostname")) (pid (db:get-value-by-header first header "pid")) (priority (db:get-value-by-header first header "priority"))) ;; for now a basic check. add tiebreaking by priority later (let* ((my-pid (current-process-id)) (res (if (and (equal? hostname (get-host-name)) (equal? pid my-pid)) id #f))) (debug:print 0 "INFO: am-i-the-server got record " first ", my-pid: " my-pid ", pid: " pid ", result: " res) res))) ;; Use: (db:get-value-by-header (car (db:get-rows dat)) (db:get-header dat) "fieldname") ;; to extract info from the structure returned ;; (define (tasks:server-get-servers-vying-for-run-id mdb run-id) (let* ((header (list "id" "hostname" "pid" "interface" "port" "pubport" "state" "run_id" "priority" "start_time")) (selstr (string-intersperse header ",")) |
︙ | ︙ |
Modified tests/fullrun/megatest.config from [c76d4b28b2] to [bc391c991d].
︙ | ︙ | |||
75 76 77 78 79 80 81 82 83 84 85 86 87 88 | [validvalues] state start end 0 1 - 2 status pass fail n/a 0 1 running - 2 # These are set before all tests, override them # in the testconfig [pre-launch-env-overrides] section [env-override] # This variable is honored by the loadrunner script. The value is in percent # a value of 200 will stop new jobs from starting. MAX_ALLOWED_LOAD 200 # MT_XTERM_CMD overrides the terminal command # MT_XTERM_CMD xterm -bg lightgreen -fg black | > | 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 | [validvalues] state start end 0 1 - 2 status pass fail n/a 0 1 running - 2 # These are set before all tests, override them # in the testconfig [pre-launch-env-overrides] section [env-override] # This variable is honored by the loadrunner script. The value is in percent # a value of 200 will stop new jobs from starting. MAX_ALLOWED_LOAD 200 # MT_XTERM_CMD overrides the terminal command # MT_XTERM_CMD xterm -bg lightgreen -fg black |
︙ | ︙ | |||
101 102 103 104 105 106 107 | # The empty var should have a definition with null string EMPTY_VAR WRAPPEDVAR This var should have the work blah thrice: \ blah \ blah | < < < < | 102 103 104 105 106 107 108 109 110 111 112 113 114 115 | # The empty var should have a definition with null string EMPTY_VAR WRAPPEDVAR This var should have the work blah thrice: \ blah \ blah # XTERM [system xterm] # RUNDEAD [system exit 56] [server] # If the server can't be started on this port it will try the next port until # it succeeds |
︙ | ︙ |