Megatest

Check-in [d2a221e401]
Login
Overview
Comment:Experimental fix for starting up servers on communication failure
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | inmem-per-run-db-per-run-server
Files: files | file ages | folders
SHA1: d2a221e401e573a31568ed06ab41707259ba124a
User & Date: matt on 2014-01-30 00:52:46
Other Links: branch diff | manifest | tags
Context
2014-01-30
19:52
Fixed db access for dashboard check-in: ba97cc54c6 user: matt tags: inmem-per-run-db-per-run-server
00:52
Experimental fix for starting up servers on communication failure check-in: d2a221e401 user: matt tags: inmem-per-run-db-per-run-server
2014-01-29
21:03
Fixed currentisblah.sh grep export of CURRENT check-in: 39d8c4920a user: matt tags: inmem-per-run-db-per-run-server
Changes

Modified http-transport.scm from [383d241383] to [ea6604d38f].

291
292
293
294
295
296
297
298

299
300
301
302
303
304
305
306
307
308
309
310

311
312

313
314
315
316
317
318
319
291
292
293
294
295
296
297

298
299
300
301
302
303
304
305
306
307
308
309

310
311

312
313
314
315
316
317
318
319







-
+











-
+

-
+







	   (debug:print-info 11 "match=" match)
	   (let ((final (cadr match)))
	     (debug:print-info 11 "final=" final)
	     final)))))))

;; Send "cmd" with json payload "params" to serverdat and receive result
;;
(define (http-transport:client-api-send-receive serverdat cmd params #!key (numretries 30))
(define (http-transport:client-api-send-receive run-id serverdat cmd params #!key (numretries 30))
  (let* ((fullurl    (if (list? serverdat)
			 (cadddr serverdat) ;; this is the uri for /api
			 (begin
			   (debug:print 0 "FATAL ERROR: http-transport:client-send-receive called with no server info")
			   (exit 1))))
	 (res        #f))
    (handle-exceptions
     exn
     (begin
       ;; TODO: Send this output to a log file so it isn't lost when running as daemon
       (print "ERROR IN http-transport:client-send-receive " ((condition-property-accessor 'exn 'message) exn))
       (thread-sleep! 2)
       (server:ensure-running run-id)
       (if (> numretries 0)
	   (http-transport:client-api-send-receive serverdat cmd params numretries: (- numretries 1))))
	   (http-transport:client-api-send-receive run-id serverdat cmd params numretries: (- numretries 1))))
     (begin
       (debug:print-info 11 "fullurl=" fullurl "\n")
       ;; set up the http-client here
       (max-retry-attempts 5)
       ;; consider all requests indempotent
       (retry-request? (lambda (request)
			 #t))   ;;  		 (thread-sleep! (/ (if (> numretries 100) 100 numretries) 10))

Modified rmt.scm from [c769930165] to [c7c7b5a349].

43
44
45
46
47
48
49
50

51
52
53
54
55
56
57
43
44
45
46
47
48
49

50
51
52
53
54
55
56
57







-
+







     (debug:print 0 "ERROR: Not yet (re)supported")
     (exit 1))
    ((fs http)
     ;; if run-id is #f send the request to run-id = 0 server. This will be for main.db
     ;;
     (let* ((connection-info (client:setup (if run-id run-id 0)))
	    (jparams         (db:obj->string params)) ;; (rmt:dat->json-str params))
	    (res (http-transport:client-api-send-receive connection-info cmd jparams)))
	    (res (http-transport:client-api-send-receive run-id connection-info cmd jparams)))
       (if res
	   (db:string->obj res) ;; (rmt:json-str->dat res)
	   (begin
	     (debug:print 0 "ERROR: Bad value from http-transport:client-api-send-receive " res)
	     #f))))
    (else
     (debug:print 0 "ERROR: Transport " *transport-type* " not yet (re)supported")

Modified server.scm from [15b87d66c1] to [25a04c111c].

1
2
3
4
5
6
7
8
9
10
11
12
13

14
15
16
17
18
19
20
1
2
3
4
5
6
7
8
9
10
11
12

13
14
15
16
17
18
19
20












-
+








;; Copyright 2006-2012, Matthew Welland.
;; 
;;  This program is made available under the GNU GPL version 2.0 or
;;  greater. See the accompanying file COPYING for details.
;; 
;;  This program is distributed WITHOUT ANY WARRANTY; without even the
;;  implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
;;  PURPOSE.

(require-extension (srfi 18) extras tcp s11n)

(use srfi-1 posix regex regex-case srfi-69 hostinfo md5 message-digest)
(use srfi-1 posix regex regex-case srfi-69 hostinfo md5 message-digest directory-utils)
;; (use zmq)

(use spiffy uri-common intarweb http-client spiffy-request-vars)

(declare (unit server))

(declare (uses common))
123
124
125
126
127
128
129
130

131
132
133
134

135

136
137
138
139
140
141
142
143
144
145
146
147
123
124
125
126
127
128
129

130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149







-
+




+

+












  (let loop ((servers  (open-run-close tasks:get-server tasks:open-db run-id))
	     (trycount 0))
    (if (or (not servers)
	    (null? servers))
	(begin
	  (if (even? trycount) ;; just do the server start every other time through this loop (every 8 seconds)
	      (let ((cmdln (conc (if (getenv "MT_MEGATEST") (getenv "MT_MEGATEST") "megatest")
				 " -server - -daemonize -run-id " run-id)))
				 " -server - -run-id " run-id " &> " run-id ".log &")))
		(debug:print 0 "INFO: Starting server (" cmdln ") as none running ...")
		;; (server:launch (string->symbol (args:get-arg "-transport" "http"))))
		;; no need to use fork, no need to do the list-servers trick. Just start the damn server, it will exit on it's own
		;; if there is an existing server
		(push-directory *toppath*)
		(system cmdln)
		(pop-directory)
		(thread-sleep! 3)
		;; (process-run (car (argv)) (list "-server" "-" "-daemonize" "-transport" (args:get-arg "-transport" "http")))
		)
	      (begin
		(debug:print-info 0 "Waiting for server to start")
		(thread-sleep! 4)))
	  (if (< trycount 10)
	      (loop (open-run-close tasks:get-server tasks:open-db run-id) 
		    (+ trycount 1))
	      (debug:print 0 "WARNING: Couldn't start or find a server.")))
	(debug:print 2 "INFO: Server(s) running " servers)
	)))