Megatest

Check-in [47cf920bfd]
Login
Overview
Comment:Merged v1.60-no-compile into v1.60
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | v1.60
Files: files | file ages | folders
SHA1: 47cf920bfd5be1845fffe772b9963be1cbe3e80c
User & Date: matt on 2014-03-11 22:20:35
Other Links: branch diff | manifest | tags
Context
2014-03-12
00:43
Add some time back to client restarts check-in: a7747154c2 user: matt tags: v1.60
2014-03-11
22:20
Merged v1.60-no-compile into v1.60 check-in: 47cf920bfd user: matt tags: v1.60
22:17
Fixed http-transport start-on-dead issue Closed-Leaf check-in: 21e4cbf850 user: matt tags: v1.60-no-compile
2014-03-10
16:18
Added better but still too slow startup of server when prev-running server is stuck check-in: acfb29f22e user: mrwellan tags: v1.60
Changes

Modified client.scm from [ef3271835b] to [19438c49e1].

60
61
62
63
64
65
66
67
68
69
70
71
72
73

74
75
76
77
78

79
80
81
82
83
84
85

86
87
88
89
90
91
92
93
  (if (<= remaining-tries 0)
      (begin
	(debug:print 0 "ERROR: failed to start or connect to server for run-id " run-id)
	(exit 1))
      (let ((host-info (hash-table-ref/default *runremote* run-id #f)))
	(debug:print-info 0 "client:setup host-info=" host-info ", remaining-tries=" remaining-tries)
	(if host-info
	    (let* ((iface     (car  host-info))
		   (port      (cadr host-info))
		   (start-res (http-transport:client-connect iface port))
		   ;; (ping-res  (server:ping-server run-id iface port))
		   (ping-res  (rmt:login-no-auto-client-setup start-res run-id)))
	      (if ping-res   ;; sucessful login?
		  (begin

		    (hash-table-set! *runremote* run-id start-res)
		    start-res)  ;; return the server info
		  (if (member remaining-tries '(3 4 6))
		      (begin    ;; login failed
			(debug:print 25 "INFO: client:setup start-res=" start-res ", run-id=" run-id ", server-dat=" host-info)

			(hash-table-delete! *runremote* run-id)
			(open-run-close tasks:server-force-clean-run-record
			 		tasks:open-db
			 		run-id 
			 		(car  host-info)
			 		(cadr host-info)
					" client:setup (host-info=#t)")

			(thread-sleep! 5)
			(client:setup run-id remaining-tries: 10)) ;; (- remaining-tries 1)))
		      (begin
			(debug:print 25 "INFO: client:setup failed to connect, start-res=" start-res ", run-id=" run-id ", host-info=" host-info)
			(thread-sleep! 5)
			(client:setup run-id remaining-tries: (- remaining-tries 1))))))
	    ;; YUK: rename server-dat here
	    (let* ((server-dat (open-run-close tasks:get-server tasks:open-db run-id)))







|
|





>


|


>




|
|

>
|







60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
  (if (<= remaining-tries 0)
      (begin
	(debug:print 0 "ERROR: failed to start or connect to server for run-id " run-id)
	(exit 1))
      (let ((host-info (hash-table-ref/default *runremote* run-id #f)))
	(debug:print-info 0 "client:setup host-info=" host-info ", remaining-tries=" remaining-tries)
	(if host-info
	    (let* ((iface     (http-transport:server-dat-get-iface host-info))
		   (port      (http-transport:server-dat-get-port  host-info))
		   (start-res (http-transport:client-connect iface port))
		   ;; (ping-res  (server:ping-server run-id iface port))
		   (ping-res  (rmt:login-no-auto-client-setup start-res run-id)))
	      (if ping-res   ;; sucessful login?
		  (begin
		    (http-transport:close-connections run-id)
		    (hash-table-set! *runremote* run-id start-res)
		    start-res)  ;; return the server info
		  (if (member remaining-tries '(9 6 4 2))
		      (begin    ;; login failed
			(debug:print 25 "INFO: client:setup start-res=" start-res ", run-id=" run-id ", server-dat=" host-info)
			(http-transport:close-connections run-id)
			(hash-table-delete! *runremote* run-id)
			(open-run-close tasks:server-force-clean-run-record
			 		tasks:open-db
			 		run-id 
			 		iface
			 		port
					" client:setup (host-info=#t)")
			(if (< remaining-tries 8)
			    (thread-sleep! 5))
			(client:setup run-id remaining-tries: 10)) ;; (- remaining-tries 1)))
		      (begin
			(debug:print 25 "INFO: client:setup failed to connect, start-res=" start-res ", run-id=" run-id ", host-info=" host-info)
			(thread-sleep! 5)
			(client:setup run-id remaining-tries: (- remaining-tries 1))))))
	    ;; YUK: rename server-dat here
	    (let* ((server-dat (open-run-close tasks:get-server tasks:open-db run-id)))

Modified http-transport.scm from [cda5622689] to [ffb8788cf7].

91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
				   (send-response body:    (api:process-request db $) ;; the $ is the request vars proc
						  headers: '((content-type text/plain)))
				   (mutex-lock! *heartbeat-mutex*)
				   (set! *last-db-access* (current-seconds))
				   (mutex-unlock! *heartbeat-mutex*))
				  ;; This is the /ctrl path where data is handed to the server and
				  ;; responses 
				  ((equal? (uri-path (request-uri (current-request)))
					   '(/ "ctrl"))
				   (let* ((packet (db:string->obj dat))
					  (qtype  (cdb:packet-get-qtype packet)))
				     (debug:print-info 12 "server=> received packet=" packet)
				     (if (not (member qtype '(sync ping)))
					 (begin
					   (mutex-lock! *heartbeat-mutex*)
					   (set! *last-db-access* (current-seconds))
					   (mutex-unlock! *heartbeat-mutex*)))
				     ;; (mutex-lock! *db:process-queue-mutex*) ;; trying a mutex
				     ;; (set! res (open-run-close db:process-queue-item open-db packet))
				     (set! res (db:process-queue-item db packet))
				     ;; (mutex-unlock! *db:process-queue-mutex*)
				     (debug:print-info 11 "Return value from db:process-queue-item is " res)
				     (send-response body: (conc "<head>ctrl data</head>\n<body>"
								res
								"</body>")
						    headers: '((content-type text/plain)))))
				  ((equal? (uri-path (request-uri (current-request))) 
					   '(/ ""))
				   (send-response body: (http-transport:main-page)))
				  ((equal? (uri-path (request-uri (current-request))) 
					   '(/ "runs"))
				   (send-response body: (http-transport:main-page)))
				  ((equal? (uri-path (request-uri (current-request))) 







|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|







91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
				   (send-response body:    (api:process-request db $) ;; the $ is the request vars proc
						  headers: '((content-type text/plain)))
				   (mutex-lock! *heartbeat-mutex*)
				   (set! *last-db-access* (current-seconds))
				   (mutex-unlock! *heartbeat-mutex*))
				  ;; This is the /ctrl path where data is handed to the server and
				  ;; responses 
				  ;; ((equal? (uri-path (request-uri (current-request)))
				  ;;          '(/ "ctrl"))
				  ;;  (let* ((packet (db:string->obj dat))
				  ;;         (qtype  (cdb:packet-get-qtype packet)))
				  ;;    (debug:print-info 12 "server=> received packet=" packet)
				  ;;    (if (not (member qtype '(sync ping)))
				  ;;        (begin
				  ;;          (mutex-lock! *heartbeat-mutex*)
				  ;;          (set! *last-db-access* (current-seconds))
				  ;;          (mutex-unlock! *heartbeat-mutex*)))
				  ;;    ;; (mutex-lock! *db:process-queue-mutex*) ;; trying a mutex
				  ;;    ;; (set! res (open-run-close db:process-queue-item open-db packet))
				  ;;    (set! res (db:process-queue-item db packet))
				  ;;    ;; (mutex-unlock! *db:process-queue-mutex*)
				  ;;    (debug:print-info 11 "Return value from db:process-queue-item is " res)
				  ;;    (send-response body: (conc "<head>ctrl data</head>\n<body>"
				  ;;       			res
				  ;;       			"</body>")
				  ;;       	    headers: '((content-type text/plain)))))
				  ((equal? (uri-path (request-uri (current-request))) 
					   '(/ ""))
				   (send-response body: (http-transport:main-page)))
				  ((equal? (uri-path (request-uri (current-request))) 
					   '(/ "runs"))
				   (send-response body: (http-transport:main-page)))
				  ((equal? (uri-path (request-uri (current-request))) 
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
(define (http-transport:inc-requests-and-prep-to-close-all-connections)
  (mutex-lock! *http-mutex*)
  (set! *http-requests-in-progress* (+ 1 *http-requests-in-progress*)))

;; Send "cmd" with json payload "params" to serverdat and receive result
;;
(define (http-transport:client-api-send-receive run-id serverdat cmd params #!key (numretries 3))
  (let* ((fullurl    (if (list? serverdat)
			 (list-ref serverdat 4) ;; (cadddr serverdat) ;; this is the uri for /api
			 (begin
			   (debug:print 0 "FATAL ERROR: http-transport:client-api-send-receive called with no server info")
			   (exit 1))))
	 (res        #f))
    (handle-exceptions
     exn
     (if (> numretries 0)
	 (begin
	   (mutex-unlock! *http-mutex*)
	   (thread-sleep! 2)
	   (close-all-connections!)
	   (debug:print 0 "WARNING: Failed to communicate with server, trying again, numretries left: " numretries)
	   (http-transport:client-api-send-receive run-id serverdat cmd params numretries: (- numretries 1)))
	 (begin
	   (mutex-unlock! *http-mutex*)
	   #f))
     (begin







|
|









|







219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
(define (http-transport:inc-requests-and-prep-to-close-all-connections)
  (mutex-lock! *http-mutex*)
  (set! *http-requests-in-progress* (+ 1 *http-requests-in-progress*)))

;; Send "cmd" with json payload "params" to serverdat and receive result
;;
(define (http-transport:client-api-send-receive run-id serverdat cmd params #!key (numretries 3))
  (let* ((fullurl    (if (vector? serverdat)
			 (http-transport:server-dat-get-api-req serverdat)
			 (begin
			   (debug:print 0 "FATAL ERROR: http-transport:client-api-send-receive called with no server info")
			   (exit 1))))
	 (res        #f))
    (handle-exceptions
     exn
     (if (> numretries 0)
	 (begin
	   (mutex-unlock! *http-mutex*)
	   (thread-sleep! 1)
	   (close-all-connections!)
	   (debug:print 0 "WARNING: Failed to communicate with server, trying again, numretries left: " numretries)
	   (http-transport:client-api-send-receive run-id serverdat cmd params numretries: (- numretries 1)))
	 (begin
	   (mutex-unlock! *http-mutex*)
	   #f))
     (begin
271
272
273
274
275
276
277


















278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
	 (thread-start! th1)
	 (thread-start! th2)
	 (thread-join! th1)
	 (thread-terminate! th2)
	 (debug:print-info 11 "got res=" res)
	 res)))))



















;;
;; connect
;;
(define (http-transport:client-connect iface port)
  (let* ((api-url     (conc "http://" iface ":" port "/api"))
	 (uri-dat     (make-request method: 'POST uri: (uri-reference (conc "http://" iface ":" port "/ctrl"))))
	 ;; (uri-dat     (make-request method: 'GET uri: (uri-reference (conc "http://" iface ":" port "/ctrl"))))
	 (uri-api-dat (make-request method: 'POST uri: api-url)) ;; (uri-reference (conc "http://" iface ":" port "/api"))))
	 ;; (uri-api-dat (make-request method: 'GET uri: (uri-reference (conc "http://" iface ":" port "/api"))))
	 (server-dat  (list iface port uri-dat uri-api-dat api-url)))
;;	 (login-res   (server:ping-server run-id server-dat))) ;; login-no-auto-client-setup server-dat run-id)))
    server-dat))
;;     (if (and (list? login-res)
;; 	     (car login-res))
;; 	(begin
;; 	  (hash-table-set! *runremote* run-id server-dat)
;; 	  (debug:print-info 2 "Logged in and connected to " iface ":" port)
;; 	  (hash-table-set! *runremote* run-id server-dat)
;; 	  server-dat)
;; 	(begin
;; 	  (debug:print-info 0 "ERROR: Failed to login or connect to " iface ":" port)
;; 	  #f))))

;; run http-transport:keep-running in a parallel thread to monitor that the db is being 
;; used and to shutdown after sometime if it is not.
;;
(define (http-transport:keep-running server-id)
  ;; if none running or if > 20 seconds since 
  ;; server last used then start shutdown







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>




|
|
<
|
<
|
<

<
<
<
<
<
<
<
<
<
<







271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301

302

303

304










305
306
307
308
309
310
311
	 (thread-start! th1)
	 (thread-start! th2)
	 (thread-join! th1)
	 (thread-terminate! th2)
	 (debug:print-info 11 "got res=" res)
	 res)))))

;; careful closing of connections stored in *runremote*
;;
(define (http-transport:close-connections run-id)
  (let* ((server-dat (hash-table-ref/default *runremote* run-id #f)))
    (if (vector? server-dat)
	(let ((api-dat (http-transport:server-dat-get-api-uri server-dat)))
	  (close-connection! api-dat)
	  #t)
	#f)))


(define (make-http-transport:server-dat)(make-vector 5))
(define (http-transport:server-dat-get-iface         vec)    (vector-ref  vec 0))
(define (http-transport:server-dat-get-port          vec)    (vector-ref  vec 1))
(define (http-transport:server-dat-get-api-uri       vec)    (vector-ref  vec 2))
(define (http-transport:server-dat-get-api-url       vec)    (vector-ref  vec 3))
(define (http-transport:server-dat-get-api-req       vec)    (vector-ref  vec 4))

;;
;; connect
;;
(define (http-transport:client-connect iface port)
  (let* ((api-url      (conc "http://" iface ":" port "/api"))
	 (api-uri      (uri-reference (conc "http://" iface ":" port "/api")))

	 (api-req      (make-request method: 'POST uri: api-uri))

	 (server-dat   (vector iface port api-uri api-url api-req)))

    server-dat))











;; run http-transport:keep-running in a parallel thread to monitor that the db is being 
;; used and to shutdown after sometime if it is not.
;;
(define (http-transport:keep-running server-id)
  ;; if none running or if > 20 seconds since 
  ;; server last used then start shutdown

Modified tests/fullrun/config/mt_include_1.config from [e1668a96e9] to [798352a4c6].

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
[setup]
# exectutable /path/to/megatest
max_concurrent_jobs 500

linktree #{getenv MT_RUN_AREA_HOME}/tmp/mt_links

[jobtools]
useshell yes
# ## launcher launches jobs, the job is managed on the target host
## by megatest, comment out launcher to run local
# workhosts localhost hermes
# launcher exec nbfake
# launcher nbfake
launcher loadrunner
# launcher echo
# launcher nbfind
# launcher nodanggood
# launcher nbload

## use "xterm -e csi -- " as a launcher to examine the launch environment.
## exit with (exit)


|









|
|







1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
[setup]
# exectutable /path/to/megatest
max_concurrent_jobs 20

linktree #{getenv MT_RUN_AREA_HOME}/tmp/mt_links

[jobtools]
useshell yes
# ## launcher launches jobs, the job is managed on the target host
## by megatest, comment out launcher to run local
# workhosts localhost hermes
# launcher exec nbfake
launcher nbfake
# launcher loadrunner
# launcher echo
# launcher nbfind
# launcher nodanggood
# launcher nbload

## use "xterm -e csi -- " as a launcher to examine the launch environment.
## exit with (exit)

Modified tests/fullrun/megatest.config from [68ba577ce2] to [385c046203].

114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
# If the server can't be started on this port it will try the next port until
# it succeeds
port 8080

# This server will keep running this number of hours after last access. 
# Three minutes is 0.05 hours
# timeout 0.025
timeout 0.25

## disks are:
## name host:/path/to/area
## -or-
## name /path/to/area
[disks]
disk0 /foobarbazz







|







114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
# If the server can't be started on this port it will try the next port until
# it succeeds
port 8080

# This server will keep running this number of hours after last access. 
# Three minutes is 0.05 hours
# timeout 0.025
timeout 0.025

## disks are:
## name host:/path/to/area
## -or-
## name /path/to/area
[disks]
disk0 /foobarbazz