Overview
Comment: | Completed server re-write |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | re-re-factor-server |
Files: | files | file ages | folders |
SHA1: |
cd8a4f1a4169081fd4f0fd827a933828 |
User & Date: | matt on 2014-02-17 21:11:03 |
Other Links: | branch diff | manifest | tags |
Context
2014-02-17
| ||
23:04 | Partial fix for borked server-dat Closed-Leaf check-in: 8ec315acf0 user: matt tags: re-re-factor-server | |
21:11 | Completed server re-write check-in: cd8a4f1a41 user: matt tags: re-re-factor-server | |
19:36 | Trimmed out some junk code and fixed some logic in the server start up sequencing check-in: 4a2103f62b user: matt tags: re-re-factor-server | |
Changes
Modified client.scm from [065a0a550e] to [5cb1c0c7dc].
︙ | ︙ | |||
51 52 53 54 55 56 57 | ;; 2. We are a run tests, list runs or other interactive process and we must figure out ;; *transport-type* and *runremote* from the monitor.db ;; ;; client:setup ;; ;; lookup_server, need to remove *runremote* stuff ;; | | > | | | > | | > | > | | 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 | ;; 2. We are a run tests, list runs or other interactive process and we must figure out ;; *transport-type* and *runremote* from the monitor.db ;; ;; client:setup ;; ;; lookup_server, need to remove *runremote* stuff ;; (define (client:setup run-id #!key (remaining-tries 10)) (debug:print 0 "INFO: client:setup remaining-tries=" remaining-tries) (if (<= remaining-tries 0) (begin (debug:print 0 "ERROR: failed to start or connect to server for run-id " run-id) (exit 1)) (let ((server-dat (and run-id (hash-table-ref/default *runremote* run-id #f)))) (if server-dat (let ((start-res (http-transport:client-connect run-id ;; NB// confusion over server-dat and connection result! (tasks:hostinfo-get-interface server-dat) (tasks:hostinfo-get-port server-dat)))) (if start-res ;; sucessful login? (begin (hash-table-set! *runremote* run-id start-res) start-res) (begin ;; login failed (hash-table-delete! *runremote* run-id) (open-run-close tasks:server-force-clean-run-record tasks:open-db run-id (tasks:hostinfo-get-interface server-dat) (tasks:hostinfo-get-port server-dat)) (thread-sleep! 5) (client:setup run-id remaining-tries: (- remaining-tries 1))))) (let* ((server-dat (open-run-close tasks:get-server tasks:open-db run-id))) (if server-dat (let ((start-res (http-transport:client-connect run-id (tasks:hostinfo-get-interface server-dat) (tasks:hostinfo-get-port server-dat)))) (if start-res (begin (hash-table-set! *runremote* run-id start-res) start-res) (begin ;; login failed (hash-table-delete! *runremote* run-id) (open-run-close tasks:server-force-clean-run-record tasks:open-db run-id (tasks:hostinfo-get-interface server-dat) (tasks:hostinfo-get-port server-dat)) (thread-sleep! 2) (server:try-running run-id) (thread-sleep! 5) ;; give server a little time to start up (client:setup run-id remaining-tries: (- remaining-tries 1))))) (begin ;; no server registered (thread-sleep! 2) (server:try-running run-id) (thread-sleep! 5) ;; give server a little time to start up (client:setup run-id remaining-tries: (- remaining-tries 1))))))))) ;; keep this as a function to ease future (define (client:start run-id server-info) (http-transport:client-connect run-id (tasks:hostinfo-get-interface server-info) (tasks:hostinfo-get-port server-info))) |
︙ | ︙ |
Modified http-transport.scm from [7e052a2b1c] to [d9e94ba5d7].
︙ | ︙ | |||
398 399 400 401 402 403 404 405 406 407 408 409 410 411 | ;; ;; start_server? ;; (define (http-transport:launch run-id) (set! *run-id* run-id) (if (args:get-arg "-daemonize") (daemon:ize)) (let ((server-id (open-run-close tasks:server-lock-slot tasks:open-db run-id))) (if (not server-id) (begin ;; since we didn't get the server lock we are going to clean up and bail out (debug:print-info 2 "INFO: server pid=" (current-process-id) ", hostname=" (get-host-name) " not starting due to other candidates ahead in start queue") (open-run-close tasks:server-delete-records-for-this-pid tasks:open-db)) (let* ((th2 (make-thread (lambda () | > > > > | 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 | ;; ;; start_server? ;; (define (http-transport:launch run-id) (set! *run-id* run-id) (if (args:get-arg "-daemonize") (daemon:ize)) (if (server:check-if-running run-id) (begin (debug:print 0 "INFO: Server for run-id " run-id " already running") (exit 0))) (let ((server-id (open-run-close tasks:server-lock-slot tasks:open-db run-id))) (if (not server-id) (begin ;; since we didn't get the server lock we are going to clean up and bail out (debug:print-info 2 "INFO: server pid=" (current-process-id) ", hostname=" (get-host-name) " not starting due to other candidates ahead in start queue") (open-run-close tasks:server-delete-records-for-this-pid tasks:open-db)) (let* ((th2 (make-thread (lambda () |
︙ | ︙ |
Modified rmt.scm from [deb40682ce] to [206f8532c1].
︙ | ︙ | |||
33 34 35 36 37 38 39 | ;;====================================================================== ;; S U P P O R T F U N C T I O N S ;;====================================================================== ;; cmd is a symbol ;; vars is a json string encoding the parameters for the call ;; | | > | > > > > > > > > > > > > | > | | 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 | ;;====================================================================== ;; S U P P O R T F U N C T I O N S ;;====================================================================== ;; cmd is a symbol ;; vars is a json string encoding the parameters for the call ;; (define (rmt:send-receive cmd rid params) (let* ((run-id (if rid rid 0)) (connection-info (let ((cinfo (hash-table-ref/default *runremote* run-id #f))) (if cinfo cinfo (let loop ((numtries 100)) (thread-sleep! 1) (let ((res (client:setup run-id))) (if res res (if (> numtries 0) (loop (- numtries 1)) (begin (debug:print 0 "ERROR: 100 tries and no server, giving up") (exit 1))))))))) (jparams (db:obj->string params)) (res (http-transport:client-api-send-receive run-id connection-info cmd jparams))) (if res (db:string->obj res) ;; (rmt:json-str->dat res) (let ((new-connection-info (client:setup run-id))) (debug:print 0 "WARNING: Communication failed, trying call to http-transport:client-api-send-receive again.") (rmt:send-receive cmd run-id params))))) (define (rmt:send-receive-no-auto-client-setup connection-info cmd run-id params) (let* ((run-id (if run-id run-id 0)) (jparams (db:obj->string params)) ;; (rmt:dat->json-str params)) (res (http-transport:client-api-send-receive run-id connection-info cmd jparams numretries: 3))) (if res (db:string->obj res) ;; (rmt:json-str->dat res) ;; this one does NOT keep trying res))) ;; Wrap json library for strings (why the ports crap in the first place?) |
︙ | ︙ |
Modified server.scm from [cffc1a2257] to [8eb4730569].
︙ | ︙ | |||
101 102 103 104 105 106 107 108 | ;; When using zmq this would send the message back (two step process) ;; with spiffy or rpc this simply returns the return data to be returned ;; (define (server:reply return-addr query-sig success/fail result) (db:obj->string (vector success/fail query-sig result))) (define (server:try-running run-id) | > > | | | 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 | ;; When using zmq this would send the message back (two step process) ;; with spiffy or rpc this simply returns the return data to be returned ;; (define (server:reply return-addr query-sig success/fail result) (db:obj->string (vector success/fail query-sig result))) ;; > file 2>&1 (define (server:try-running run-id) (let* ((rand-name (random 100)) (cmdln (conc (if (getenv "MT_MEGATEST") (getenv "MT_MEGATEST") "megatest") " -server - -run-id " run-id " name=" rand-name " > " *toppath* "/db/" run-id "-" rand-name ".log 2>&1 &"))) (debug:print 0 "INFO: Starting server (" cmdln ") as none running ...") (push-directory *toppath*) (system cmdln) (pop-directory))) (define (server:check-if-running run-id) (let loop ((server (open-run-close tasks:get-server tasks:open-db run-id)) |
︙ | ︙ |
Modified tasks.scm from [c13323f697] to [19f1225d86].
︙ | ︙ | |||
90 91 92 93 94 95 96 | (define (tasks:hostinfo-get-pubport vec) (vector-ref vec 3)) (define (tasks:hostinfo-get-transport vec) (vector-ref vec 4)) (define (tasks:hostinfo-get-pid vec) (vector-ref vec 5)) (define (tasks:hostinfo-get-hostname vec) (vector-ref vec 6)) (define (tasks:server-lock-slot mdb run-id) (tasks:server-clean-out-old-records-for-run-id mdb run-id) | < | | 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 | (define (tasks:hostinfo-get-pubport vec) (vector-ref vec 3)) (define (tasks:hostinfo-get-transport vec) (vector-ref vec 4)) (define (tasks:hostinfo-get-pid vec) (vector-ref vec 5)) (define (tasks:hostinfo-get-hostname vec) (vector-ref vec 6)) (define (tasks:server-lock-slot mdb run-id) (tasks:server-clean-out-old-records-for-run-id mdb run-id) (if (< (tasks:num-in-available-state mdb run-id) 4) (begin (tasks:server-set-available mdb run-id) (thread-sleep! 2) ;; Try removing this. It may not be needed. (tasks:server-am-i-the-server? mdb run-id)) #f)) ;; register that this server may come online (first to register goes though with the process) (define (tasks:server-set-available mdb run-id) (sqlite3:execute mdb "INSERT INTO servers (pid,hostname,port,pubport,start_time, priority,state,mt_version,heartbeat, interface,transport,run_id) VALUES(?, ?, ?, ?, strftime('%s','now'), ?, ?, ?,-1,?, ?, ?);" |
︙ | ︙ |
Modified tests/Makefile from [f03136dea1] to [488ee3eee2].
︙ | ︙ | |||
20 21 22 23 24 25 26 | NEWTARGET = "$(OS)/$(FS)/$(VER)" TARGET = "ubuntu/nfs/none" all : test1 test2 test3 test4 test5 test6 test7 test8 test9 server : cd ..;make;make install | | | 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 | NEWTARGET = "$(OS)/$(FS)/$(VER)" TARGET = "ubuntu/nfs/none" all : test1 test2 test3 test4 test5 test6 test7 test8 test9 server : cd ..;make;make install cd fullrun;../../bin/megatest -server - -debug $(DEBUG) -run-id $(RUNID) stopserver : cd ..;make && make install cd fullrun;$(MEGATEST) -stop-server 0 repl : cd ..;make && make install |
︙ | ︙ |