Overview
Comment: | Improved auto server reconnect in client.scm but it is still flakey under extreme load |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | v1.60 |
Files: | files | file ages | folders |
SHA1: |
b13bf213d405d1446f3ff22faac32c69 |
User & Date: | matt on 2014-02-18 21:16:23 |
Other Links: | branch diff | manifest | tags |
Context
2014-02-19
| ||
23:30 | Partial fix for issues with $MT_MEGATEST -test-files lookithome.log -target $MT_TARGET :runname $MT_RUNNAME -testpatt ez_fail check-in: 845c375e3f user: matt tags: v1.60 | |
2014-02-18
| ||
21:16 | Improved auto server reconnect in client.scm but it is still flakey under extreme load check-in: b13bf213d4 user: matt tags: v1.60 | |
19:41 | Clean up, removed old references to *runremote* check-in: 7590c8479a user: matt tags: v1.60 | |
Changes
Modified client.scm from [67235b3676] to [4e17077a64].
︙ | ︙ | |||
51 52 53 54 55 56 57 | ;; 2. We are a run tests, list runs or other interactive process and we must figure out ;; *transport-type* and *runremote* from the monitor.db ;; ;; client:setup ;; ;; lookup_server, need to remove *runremote* stuff ;; | | | | > > | | | | | | | | > > > | > > | | | | | | | | | | > > > | | 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 | ;; 2. We are a run tests, list runs or other interactive process and we must figure out ;; *transport-type* and *runremote* from the monitor.db ;; ;; client:setup ;; ;; lookup_server, need to remove *runremote* stuff ;; (define (client:setup run-id #!key (remaining-tries 20) (failed-connects 0)) (debug:print 0 "INFO: client:setup remaining-tries=" remaining-tries) (if (<= remaining-tries 0) (begin (debug:print 0 "ERROR: failed to start or connect to server for run-id " run-id) (exit 1)) (let ((server-dat (and run-id (hash-table-ref/default *runremote* run-id #f)))) (if server-dat (let ((start-res (http-transport:client-connect run-id ;; NB// confusion over server-dat and connection result! (car server-dat) (cadr server-dat)))) (if start-res ;; sucessful login? start-res (if (and (< remaining-tries 10) (odd? remaining-tries)) (begin ;; login failed (hash-table-delete! *runremote* run-id) (open-run-close tasks:server-force-clean-run-record tasks:open-db run-id (car server-dat) (cadr server-dat)) (thread-sleep! 5) (client:setup run-id remaining-tries: (- remaining-tries 1))) (begin (thread-sleep! 5) (client:setup run-id remaining-tries: (- remaining-tries 1)))))) (let* ((server-dat (open-run-close tasks:get-server tasks:open-db run-id))) (if server-dat (let ((start-res (http-transport:client-connect run-id (tasks:hostinfo-get-interface server-dat) (tasks:hostinfo-get-port server-dat)))) (if start-res start-res (if (and (< remaining-tries 10) (odd? remaining-tries)) (begin ;; login failed (hash-table-delete! *runremote* run-id) (open-run-close tasks:server-force-clean-run-record tasks:open-db run-id (tasks:hostinfo-get-interface server-dat) (tasks:hostinfo-get-port server-dat)) (thread-sleep! 2) (server:try-running run-id) (thread-sleep! 5) ;; give server a little time to start up (client:setup run-id remaining-tries: (- remaining-tries 1))) (begin (thread-sleep! 5) (client:setup run-id remaining-tries: (- remaining-tries 1)))))) (begin ;; no server registered (thread-sleep! 2) (server:try-running run-id) (thread-sleep! 5) ;; give server a little time to start up (client:setup run-id remaining-tries: (- remaining-tries 1))))))))) ;; keep this as a function to ease future |
︙ | ︙ |
Modified server.scm from [8eb4730569] to [3ef5cd2281].
︙ | ︙ | |||
105 106 107 108 109 110 111 | (define (server:reply return-addr query-sig success/fail result) (db:obj->string (vector success/fail query-sig result))) ;; > file 2>&1 (define (server:try-running run-id) (let* ((rand-name (random 100)) (cmdln (conc (if (getenv "MT_MEGATEST") (getenv "MT_MEGATEST") "megatest") | | > | 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 | (define (server:reply return-addr query-sig success/fail result) (db:obj->string (vector success/fail query-sig result))) ;; > file 2>&1 (define (server:try-running run-id) (let* ((rand-name (random 100)) (cmdln (conc (if (getenv "MT_MEGATEST") (getenv "MT_MEGATEST") "megatest") ;; " -server - -run-id " run-id " name=" rand-name " > " *toppath* "/db/" run-id "-" rand-name ".log 2>&1 &"))) " -server - -run-id " run-id " > " *toppath* "/db/" run-id ".log 2>&1 &"))) (debug:print 0 "INFO: Starting server (" cmdln ") as none running ...") (push-directory *toppath*) (system cmdln) (pop-directory))) (define (server:check-if-running run-id) (let loop ((server (open-run-close tasks:get-server tasks:open-db run-id)) |
︙ | ︙ |