Megatest

Check-in [8c3d4217c8]
Login
Overview
Comment:Merged in refactoring of http-transport
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | v1.54
Files: files | file ages | folders
SHA1: 8c3d4217c809b399f2468bed5b89ddb5511515bc
User & Date: matt on 2013-05-08 00:42:16
Other Links: branch diff | manifest | tags
Context
2013-05-08
10:58
Improved timeout handling in client/server communication check-in: be36b8399e user: mrwellan tags: v1.54
00:42
Merged in refactoring of http-transport check-in: 8c3d4217c8 user: matt tags: v1.54
00:34
Added timeout on no server. Auto starts a server on timeout Closed-Leaf check-in: bf5639be89 user: matt tags: refactor-http-transport
2013-05-07
23:40
Added mutex and close-all-connections to http transport check-in: 9681d97154 user: matt tags: v1.54
Changes

Modified client.scm from [6a286c8420] to [8b3b6e88e1].

15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
(require-extension (srfi 18) extras tcp s11n)

(use sqlite3 srfi-1 posix regex regex-case srfi-69 hostinfo md5 message-digest)
;; (use zmq)

(import (prefix sqlite3 sqlite3:))

(use spiffy uri-common intarweb http-client spiffy-request-vars)

(declare (unit client))

(declare (uses common))
(declare (uses db))
(declare (uses tasks)) ;; tasks are where stuff is maintained about what is running.








|







15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
(require-extension (srfi 18) extras tcp s11n)

(use sqlite3 srfi-1 posix regex regex-case srfi-69 hostinfo md5 message-digest)
;; (use zmq)

(import (prefix sqlite3 sqlite3:))

(use spiffy uri-common intarweb http-client spiffy-request-vars uri-common intarweb)

(declare (unit client))

(declare (uses common))
(declare (uses db))
(declare (uses tasks)) ;; tasks are where stuff is maintained about what is running.

73
74
75
76
77
78
79
80






81
82
83
84
85
86
87
    			       (string->symbol (tasks:hostinfo-get-transport hostinfo))
			       'fs))
    ;; ;; DEBUG STUFF
    ;; (if (eq? *transport-type* 'fs)(begin (print "ERROR!!!!!!! refusing to run with transport " *transport-type*)(exit 99)))
    
    (debug:print-info 11 "Using transport type of " *transport-type* (if hostinfo (conc " to connect to " hostinfo) ""))
    (case *transport-type* 
      ((fs)(if (not *megatest-db*)(set! *megatest-db* (open-db))))






      ((http)
       (http-transport:client-connect (tasks:hostinfo-get-interface hostinfo)
				      (tasks:hostinfo-get-port hostinfo)))
      ((zmq)
       (zmq-transport:client-connect (tasks:hostinfo-get-interface hostinfo)
				     (tasks:hostinfo-get-port      hostinfo)
				     (tasks:hostinfo-get-pubport   hostinfo)))







|
>
>
>
>
>
>







73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
    			       (string->symbol (tasks:hostinfo-get-transport hostinfo))
			       'fs))
    ;; ;; DEBUG STUFF
    ;; (if (eq? *transport-type* 'fs)(begin (print "ERROR!!!!!!! refusing to run with transport " *transport-type*)(exit 99)))
    
    (debug:print-info 11 "Using transport type of " *transport-type* (if hostinfo (conc " to connect to " hostinfo) ""))
    (case *transport-type* 
      ((fs) ;; (if (not *megatest-db*)(set! *megatest-db* (open-db))))
       ;; we are not doing fs any longer. let's cheat and start up a server
       (set! *transport-type* #f)
       (system "megatest -list-servers | grep alive || megatest -server - -daemonize && sleep 3")
       (thread-sleep! 1)
       (if (> numtries 0)
	   (client:setup numtries: (- numtries 1))))
      ((http)
       (http-transport:client-connect (tasks:hostinfo-get-interface hostinfo)
				      (tasks:hostinfo-get-port hostinfo)))
      ((zmq)
       (zmq-transport:client-connect (tasks:hostinfo-get-interface hostinfo)
				     (tasks:hostinfo-get-port      hostinfo)
				     (tasks:hostinfo-get-pubport   hostinfo)))

Modified db.scm from [98d99ca28b] to [113f8f5bae].

1262
1263
1264
1265
1266
1267
1268


1269
1270



1271
1272
1273
1274
1275
1276
1277
	    (query-sig   (message-digest-string (md5-primitive) (conc qtype immediate params)))
	    (zdat        (db:obj->string (vector client-sig qtype immediate query-sig params (current-seconds))))) ;; (with-output-to-string (lambda ()(serialize params))))
       (debug:print-info 11 "zdat=" zdat)
       (let* ((res  #f)
	      (rawdat      (http-transport:client-send-receive serverdat zdat))
	      (tmp         #f))
	 (debug:print-info 11 "Sent " zdat ", received " rawdat)


	 (set! tmp (db:string->obj rawdat))
	 (vector-ref tmp 2))))



    ((zmq)
     (handle-exceptions
      exn
      (begin
	(debug:print-info 0 "cdb:client-call timeout or error. Trying again in 5 seconds")
	(thread-sleep! 5) 
	(if (> numretries 0)(apply cdb:client-call serverdat qtype immediate (- numretries 1) params)))







>
>
|
|
>
>
>







1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
	    (query-sig   (message-digest-string (md5-primitive) (conc qtype immediate params)))
	    (zdat        (db:obj->string (vector client-sig qtype immediate query-sig params (current-seconds))))) ;; (with-output-to-string (lambda ()(serialize params))))
       (debug:print-info 11 "zdat=" zdat)
       (let* ((res  #f)
	      (rawdat      (http-transport:client-send-receive serverdat zdat))
	      (tmp         #f))
	 (debug:print-info 11 "Sent " zdat ", received " rawdat)
	 (if rawdat
	     (begin
	       (set! tmp (db:string->obj rawdat))
	       (vector-ref tmp 2))
	     (begin
	       (debug:print 0 "ERROR: Communication with the server failed. Exiting if possible")
	       (exit 1))))))
    ((zmq)
     (handle-exceptions
      exn
      (begin
	(debug:print-info 0 "cdb:client-call timeout or error. Trying again in 5 seconds")
	(thread-sleep! 5) 
	(if (> numretries 0)(apply cdb:client-call serverdat qtype immediate (- numretries 1) params)))

Modified http-transport.scm from [768cc35d08] to [f8890a14ba].

9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
;;  PURPOSE.

(require-extension (srfi 18) extras tcp s11n)

(use sqlite3 srfi-1 posix regex regex-case srfi-69 hostinfo md5 message-digest)
(import (prefix sqlite3 sqlite3:))

(use spiffy uri-common intarweb http-client spiffy-request-vars)

;; Configurations for server
(tcp-buffer-size 2048)
(max-connections 2048) 

(declare (unit http-transport))








|







9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
;;  PURPOSE.

(require-extension (srfi 18) extras tcp s11n)

(use sqlite3 srfi-1 posix regex regex-case srfi-69 hostinfo md5 message-digest)
(import (prefix sqlite3 sqlite3:))

(use spiffy uri-common intarweb http-client spiffy-request-vars  uri-common intarweb)

;; Configurations for server
(tcp-buffer-size 2048)
(max-connections 2048) 

(declare (unit http-transport))

144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168


169
170
171
172
173
174
175
176

177
178
179
180


181
182
183
184
185
186

187
188
189
190
191
192
193

194
195
196
197
198
199
200
201










202
203
204
205
206
207
208
209
210

211
212
213
214
215
216
217
218
219
220

221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238

239
240
241
242
243
244
245
	 (print "ERROR: Tried and tried but could not start the server")))
   ;; any error in following steps will result in a retry
   (set! *runremote* (list ipaddrstr portnum))
   ;; (open-run-close tasks:remove-server-records tasks:open-db)
   (open-run-close tasks:server-register 
		   tasks:open-db 
		   (current-process-id)
		   ipaddrstr portnum 0 'live 'http)
   (print "INFO: Trying to start server on " ipaddrstr ":" portnum)
   ;; This starts the spiffy server
   ;; NEED WAY TO SET IP TO #f TO BIND ALL
   (start-server bind-address: ipaddrstr port: portnum)
   (open-run-close tasks:server-delete tasks:open-db ipaddrstr portnum)
   (print "INFO: server has been stopped")))

;;======================================================================
;; S E R V E R   U T I L I T I E S 
;;======================================================================

;;======================================================================
;; C L I E N T S
;;======================================================================

(define *http-mutex* (make-mutex))



;; <html>
;; <head></head>
;; <body>1 Hello, world! Goodbye Dolly</body></html>
;; Send msg to serverdat and receive result
(define (http-transport:client-send-receive serverdat msg)
  (let* ((url        (http-transport:make-server-url serverdat))
	 (fullurl    (conc url "/ctrl")) ;; (conc url "/?dat=" msg)))
	 (numretries 0))     

    (handle-exceptions
     exn
     (begin 
       (debug:print  0 "WARNING: possible communication error")


       (if (< numretries 200)
	   (http-transport:client-send-receive serverdat msg)))
     (begin
       (debug:print-info 11 "fullurl=" fullurl "\n")
       ;; set up the http-client here
       (max-retry-attempts 100)

       (retry-request? (lambda (request)
			 (thread-sleep! (/ (if (> numretries 100) 100 numretries) 10))
			 (set! numretries (+ numretries 1))
			 #t))
       ;; send the data and get the response
       ;; extract the needed info from the http data and 
       ;; process and return it.

       (mutex-lock! *http-mutex*)
       (let* ((res   (with-input-from-request fullurl 
					      ;; #f
					      ;; msg 
					      (list (cons 'dat msg)) 
					      read-string)))
	 (close-all-connections!) 
	 (mutex-unlock! *http-mutex*)










	 (debug:print-info 11 "got res=" res)
	 (let ((match (string-search (regexp "<body>(.*)<.body>") res)))
	   (debug:print-info 11 "match=" match)
	   (let ((final (cadr match)))
	     (debug:print-info 11 "final=" final)
	     final)))))))

(define (http-transport:client-connect iface port)
  (let* ((login-res   #f)

	 (serverdat   (list iface port)))
    (set! login-res (client:login serverdat))
    (if (and (not (null? login-res))
	     (car login-res))
	(begin
	  (debug:print-info 2 "Logged in and connected to " iface ":" port)
	  (set! *runremote* serverdat)
	  serverdat)
	(begin
	  (debug:print-info 0 "Failed to login or connect to " iface ":" port)

	  (set! *runremote* #f)
	  (set! *transport-type* 'fs)
	  #f))))


;; run http-transport:keep-running in a parallel thread to monitor that the db is being 
;; used and to shutdown after sometime if it is not.
;;
(define (http-transport:keep-running)
  ;; if none running or if > 20 seconds since 
  ;; server last used then start shutdown
  ;; This thread waits for the server to come alive
  (let* ((server-info (let loop ()
                        (let ((sdat #f))
                          (mutex-lock! *heartbeat-mutex*)
                          (set! sdat *runremote*)
                          (mutex-unlock! *heartbeat-mutex*)
                          (if sdat sdat

                              (begin
                                (sleep 4)
                                (loop))))))
         (iface       (car server-info))
         (port        (cadr server-info))
         (last-access 0)
	 (tdb         (tasks:open-db))







|
|




|











>
>





|
|
|
>


|
<
>
>
|




|
>

|
|
|



>
|
|
|
<
|
|
|
|
>
>
>
>
>
>
>
>
>
>









>
|








|
>
|
|
|














|
>







144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182

183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202

203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
	 (print "ERROR: Tried and tried but could not start the server")))
   ;; any error in following steps will result in a retry
   (set! *runremote* (list ipaddrstr portnum))
   ;; (open-run-close tasks:remove-server-records tasks:open-db)
   (open-run-close tasks:server-register 
		   tasks:open-db 
		   (current-process-id)
		   ipaddrstr portnum 0 'startup 'http)
   (debug:print 1 "INFO: Trying to start server on " ipaddrstr ":" portnum)
   ;; This starts the spiffy server
   ;; NEED WAY TO SET IP TO #f TO BIND ALL
   (start-server bind-address: ipaddrstr port: portnum)
   (open-run-close tasks:server-delete tasks:open-db ipaddrstr portnum)
   (debug:print 1 "INFO: server has been stopped")))

;;======================================================================
;; S E R V E R   U T I L I T I E S 
;;======================================================================

;;======================================================================
;; C L I E N T S
;;======================================================================

(define *http-mutex* (make-mutex))

;; (system "megatest -list-servers | grep alive || megatest -server - -daemonize && sleep 4")

;; <html>
;; <head></head>
;; <body>1 Hello, world! Goodbye Dolly</body></html>
;; Send msg to serverdat and receive result
(define (http-transport:client-send-receive serverdat msg)
  (let* (;; (url        (http-transport:make-server-url serverdat))
	 (fullurl    (caddr serverdat)) ;; (conc url "/ctrl")) ;; (conc url "/?dat=" msg)))
	 (numretries 10)
	 (res        #f))
    (handle-exceptions
     exn
     (begin

       (print "ERROR IN http-transport:client-send-receive " ((condition-property-accessor 'exn 'message) exn))
       (thread-sleep! 2)
       (if (> numretries 0)
	   (http-transport:client-send-receive serverdat msg)))
     (begin
       (debug:print-info 11 "fullurl=" fullurl "\n")
       ;; set up the http-client here
       (max-retry-attempts 5)
       ;; consider all requests indempotent
       (retry-request? (lambda (request)
			 #t))   ;;  		 (thread-sleep! (/ (if (> numretries 100) 100 numretries) 10))
       (set! numretries (- numretries 1))
       ;;  		 #t))
       ;; send the data and get the response
       ;; extract the needed info from the http data and 
       ;; process and return it.
       (let* ((send-recieve (lambda ()
			      (mutex-lock! *http-mutex*)
			      (set! res (with-input-from-request 
					 fullurl 

					 (list (cons 'dat msg)) 
					 read-string))
			      (close-all-connections!) 
			      (mutex-unlock! *http-mutex*)))
	      (time-out     (lambda ()
			      (thread-sleep! 5)
			      (if (not res)
				  (debug:print 0 "ERROR: communication with the server timed out. Exiting."))))
	      (th1 (make-thread send-recieve "with-input-from-request"))
	      (th2 (make-thread time-out     "time out")))
	 (thread-start! th1)
	 (thread-start! th2)
	 (thread-join! th1)
	 (thread-terminate! th2)
	 (debug:print-info 11 "got res=" res)
	 (let ((match (string-search (regexp "<body>(.*)<.body>") res)))
	   (debug:print-info 11 "match=" match)
	   (let ((final (cadr match)))
	     (debug:print-info 11 "final=" final)
	     final)))))))

(define (http-transport:client-connect iface port)
  (let* ((login-res   #f)
	 (uri-dat     (make-request method: 'POST uri: (uri-reference (conc "http://" iface ":" port "/ctrl"))))
	 (serverdat   (list iface port uri-dat)))
    (set! login-res (client:login serverdat))
    (if (and (not (null? login-res))
	     (car login-res))
	(begin
	  (debug:print-info 2 "Logged in and connected to " iface ":" port)
	  (set! *runremote* serverdat)
	  serverdat)
	(begin
	  (debug:print-info 0 "ERROR: Failed to login or connect to " iface ":" port)
	  (exit 1)))))
;; 	  (set! *runremote* #f)
;; 	  (set! *transport-type* 'fs)
;; 	  #f))))


;; run http-transport:keep-running in a parallel thread to monitor that the db is being 
;; used and to shutdown after sometime if it is not.
;;
(define (http-transport:keep-running)
  ;; if none running or if > 20 seconds since 
  ;; server last used then start shutdown
  ;; This thread waits for the server to come alive
  (let* ((server-info (let loop ()
                        (let ((sdat #f))
                          (mutex-lock! *heartbeat-mutex*)
                          (set! sdat *runremote*)
                          (mutex-unlock! *heartbeat-mutex*)
                          (if sdat
			      sdat
                              (begin
                                (sleep 4)
                                (loop))))))
         (iface       (car server-info))
         (port        (cadr server-info))
         (last-access 0)
	 (tdb         (tasks:open-db))

Modified tasks.scm from [4adf87fded] to [86ea9ba13f].

24
25
26
27
28
29
30
31
32
33
34
35
36
37
38

(define (tasks:open-db)
  (let* ((dbpath  (conc *toppath* "/monitor.db"))
	 (exists  (file-exists? dbpath))
	 (mdb     (sqlite3:open-database dbpath)) ;; (never-give-up-open-db dbpath))
	 (handler (make-busy-timeout 36000)))
    (sqlite3:set-busy-handler! mdb handler)
    (sqlite3:execute mdb (conc "PRAGMA synchronous = 1;"))
    (if (not exists)
	(begin
	  (sqlite3:execute mdb "CREATE TABLE IF NOT EXISTS tasks_queue (id INTEGER PRIMARY KEY,
                                action TEXT DEFAULT '',
                                owner TEXT,
                                state TEXT DEFAULT 'new',
                                target TEXT DEFAULT '',







|







24
25
26
27
28
29
30
31
32
33
34
35
36
37
38

(define (tasks:open-db)
  (let* ((dbpath  (conc *toppath* "/monitor.db"))
	 (exists  (file-exists? dbpath))
	 (mdb     (sqlite3:open-database dbpath)) ;; (never-give-up-open-db dbpath))
	 (handler (make-busy-timeout 36000)))
    (sqlite3:set-busy-handler! mdb handler)
    (sqlite3:execute mdb (conc "PRAGMA synchronous = 0;"))
    (if (not exists)
	(begin
	  (sqlite3:execute mdb "CREATE TABLE IF NOT EXISTS tasks_queue (id INTEGER PRIMARY KEY,
                                action TEXT DEFAULT '',
                                owner TEXT,
                                state TEXT DEFAULT 'new',
                                target TEXT DEFAULT '',
140
141
142
143
144
145
146






147
148
149
150
151
152
153
154
	 (debug:print 0 "ERROR: tasks:server-get-server-id needs (hostname and pid) OR (iface and port) OR (hostname and port)")
	 "SELECT id FROM servers WHERE pid=-999;")))
     (if hostname hostname iface)(if pid pid port))
    res))

(define (tasks:server-update-heartbeat mdb server-id)
  (debug:print-info 0 "Heart beat update of server id=" server-id)






  (sqlite3:execute mdb "UPDATE servers SET heartbeat=strftime('%s','now') WHERE id=?;" server-id))

;; alive servers keep the heartbeat field upto date with seconds every 6 or so seconds
(define (tasks:server-alive? mdb server-id #!key (iface #f)(hostname #f)(port #f)(pid #f))
  (let* ((server-id  (if server-id 
			 server-id
			 (tasks:server-get-server-id mdb hostname iface port pid)))
	 (heartbeat-delta 99e9))







>
>
>
>
>
>
|







140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
	 (debug:print 0 "ERROR: tasks:server-get-server-id needs (hostname and pid) OR (iface and port) OR (hostname and port)")
	 "SELECT id FROM servers WHERE pid=-999;")))
     (if hostname hostname iface)(if pid pid port))
    res))

(define (tasks:server-update-heartbeat mdb server-id)
  (debug:print-info 0 "Heart beat update of server id=" server-id)
  (handle-exceptions
   exn
   (begin
     (debug:print 0 "WARNING: probable timeout on monitor.db access")
     (thread-sleep! 1)
     (tasks:server-update-heartbeat mdb server-id))
   (sqlite3:execute mdb "UPDATE servers SET heartbeat=strftime('%s','now') WHERE id=?;" server-id)))

;; alive servers keep the heartbeat field upto date with seconds every 6 or so seconds
(define (tasks:server-alive? mdb server-id #!key (iface #f)(hostname #f)(port #f)(pid #f))
  (let* ((server-id  (if server-id 
			 server-id
			 (tasks:server-get-server-id mdb hostname iface port pid)))
	 (heartbeat-delta 99e9))