Overview
Context
Changes
Modified client.scm
from [5cb1c0c7dc]
to [f6d1b77f60].
︙ | | |
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
|
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
|
-
+
-
-
-
-
-
+
+
+
+
+
-
-
+
-
-
+
+
-
-
-
-
-
-
-
+
+
+
+
+
+
+
-
-
+
-
+
-
+
|
;; *transport-type* and *runremote* from the monitor.db
;;
;; client:setup
;;
;; lookup_server, need to remove *runremote* stuff
;;
(define (client:setup run-id #!key (remaining-tries 10))
(debug:print 0 "INFO: client:setup remaining-tries=" remaining-tries)
(if (<= remaining-tries 0)
(begin
(debug:print 0 "ERROR: failed to start or connect to server for run-id " run-id)
(exit 1))
(let ((server-dat (and run-id (hash-table-ref/default *runremote* run-id #f))))
(thread-sleep! 1) ;; try to avoid race conditons
(if server-dat
(let ((start-res (http-transport:client-connect run-id ;; NB// confusion over server-dat and connection result!
(tasks:hostinfo-get-interface server-dat)
(tasks:hostinfo-get-port server-dat))))
(if start-res ;; sucessful login?
(begin
(let ((new-dat (http-transport:client-connect run-id ;; NB// confusion over server-dat and connection result!
(car server-dat)
(cadr server-dat))))
(if new-dat ;; sucessful login?
new-dat
(hash-table-set! *runremote* run-id start-res)
start-res)
(begin ;; login failed
(debug:print 0 "INFO: login failed in client:setup with existing server-dat: " server-dat ", new-dat: " new-dat ", cleaning out records and then trying again")
(hash-table-delete! *runremote* run-id)
(open-run-close tasks:server-force-clean-run-record
tasks:open-db
run-id
(tasks:hostinfo-get-interface server-dat)
(tasks:hostinfo-get-port server-dat))
(car server-dat)
(cadr server-dat))
(thread-sleep! 5)
(client:setup run-id remaining-tries: (- remaining-tries 1)))))
(let* ((server-dat (open-run-close tasks:get-server tasks:open-db run-id)))
(if server-dat
(let ((start-res (http-transport:client-connect run-id
(tasks:hostinfo-get-interface server-dat)
(tasks:hostinfo-get-port server-dat))))
(if start-res
(begin
(let* ((server-info (open-run-close tasks:get-server tasks:open-db run-id)))
(if server-info
(let ((new-dat (http-transport:client-connect run-id
(tasks:hostinfo-get-interface server-info)
(tasks:hostinfo-get-port server-info))))
(if new-dat
new-dat
(hash-table-set! *runremote* run-id start-res)
start-res)
(begin ;; login failed
(debug:print 0 "INFO: login failed in client:setup with no existing server-dat: " server-dat ", new-dat: " new-dat ", and server-info: " server-info ", cleaning out records and then trying again")
(hash-table-delete! *runremote* run-id)
(open-run-close tasks:server-force-clean-run-record
tasks:open-db
run-id
(tasks:hostinfo-get-interface server-dat)
(tasks:hostinfo-get-port server-dat))
(thread-sleep! 2)
;; (thread-sleep! 2)
(server:try-running run-id)
(thread-sleep! 5) ;; give server a little time to start up
(client:setup run-id remaining-tries: (- remaining-tries 1)))))
(begin ;; no server registered
(thread-sleep! 2)
;; (thread-sleep! 2)
(server:try-running run-id)
(thread-sleep! 5) ;; give server a little time to start up
(client:setup run-id remaining-tries: (- remaining-tries 1)))))))))
;; keep this as a function to ease future
(define (client:start run-id server-info)
(http-transport:client-connect run-id
|
︙ | | |
Modified http-transport.scm
from [fdad451b60]
to [38152c3968].
︙ | | |
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
|
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
|
-
-
-
+
+
+
-
-
+
+
|
;;
;; connect
;;
(define (http-transport:client-connect run-id iface port)
(let* ((uri-dat (make-request method: 'POST uri: (uri-reference (conc "http://" iface ":" port "/ctrl"))))
(uri-api-dat (make-request method: 'POST uri: (uri-reference (conc "http://" iface ":" port "/api"))))
(serverdat (list iface port uri-dat uri-api-dat))
(login-res (rmt:login-no-auto-client-setup serverdat run-id)))
(hash-table-set! *runremote* run-id serverdat) ;; may or may not be good ...
(server-dat (list iface port uri-dat uri-api-dat))
(login-res (rmt:login-no-auto-client-setup server-dat run-id)))
;; (hash-table-set! *runremote* run-id serverdat) ;; may or may not be good ...
(if (and (list? login-res)
(car login-res))
(begin
(debug:print-info 2 "Logged in and connected to " iface ":" port)
(hash-table-set! *runremote* run-id serverdat)
serverdat)
(hash-table-set! *runremote* run-id server-dat)
server-dat)
(begin
(debug:print-info 0 "ERROR: Failed to login or connect to " iface ":" port)
#f))))
;; run http-transport:keep-running in a parallel thread to monitor that the db is being
;; used and to shutdown after sometime if it is not.
;;
|
︙ | | |
Modified rmt.scm
from [a5c523b577]
to [2624718f57].
︙ | | |
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
|
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
|
-
+
|
(connection-info (let ((cinfo (hash-table-ref/default *runremote* run-id #f)))
(if cinfo
cinfo
(let loop ((numtries 100))
(thread-sleep! 1)
(let ((res (client:setup run-id)))
(if res
(hash-table-ref/default *runremote* run-id #f) ;; client:setup filled this in (hopefully)
(hash-table-ref *runremote* run-id) ;; client:setup filled this in (hopefully)
(if (> numtries 0)
(loop (- numtries 1))
(begin
(debug:print 0 "ERROR: 100 tries and no server, giving up")
(exit 1)))))))))
(jparams (db:obj->string params))
(res (http-transport:client-api-send-receive run-id connection-info cmd jparams)))
|
︙ | | |
Modified server.scm
from [8eb4730569]
to [b8b02ced57].
︙ | | |
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
|
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
|
-
+
+
+
-
-
+
+
-
+
-
+
+
+
+
+
|
(define (server:reply return-addr query-sig success/fail result)
(db:obj->string (vector success/fail query-sig result)))
;; > file 2>&1
(define (server:try-running run-id)
(let* ((rand-name (random 100))
(cmdln (conc (if (getenv "MT_MEGATEST") (getenv "MT_MEGATEST") "megatest")
" -server - -run-id " run-id " name=" rand-name " > " *toppath* "/db/" run-id "-" rand-name ".log 2>&1 &")))
" -server - -run-id " run-id " name=" rand-name " > " *toppath* "/db/" run-id
".log 2>&1 &")))
;; ".log &" )))
(debug:print 0 "INFO: Starting server (" cmdln ") as none running ...")
(push-directory *toppath*)
(system cmdln)
(pop-directory)))
(define (server:check-if-running run-id)
(let loop ((server (open-run-close tasks:get-server tasks:open-db run-id))
(trycount 0))
(let loop ((server-info (open-run-close tasks:get-server tasks:open-db run-id))
(trycount 0))
(thread-sleep! 2)
(if server
(if server-info
;; note: client:start will set *runremote*. this needs to be changed
;; also, client:start will login to the server, also need to change that.
;;
;; client:start returns #t if login was successful.
;;
(let ((res (client:start run-id server)))
(let ((res (http-transport:client-connect
run-id
(tasks:hostinfo-get-interface server-info)
(tasks:hostinfo-get-port server-info))))
;; if the server didn't respond we must remove the record
(if res
res
(begin
(debug:print 0 "WARNING: running server not reachable, removing record: " server-info)
(open-run-close tasks:server-force-clean-running-records-for-run-id tasks:open-db run-id)
res)))
#f)))
|
Modified tasks.scm
from [19f1225d86]
to [6da0e18c74].
︙ | | |
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
|
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
|
-
+
|
(define (tasks:hostinfo-get-hostname vec) (vector-ref vec 6))
(define (tasks:server-lock-slot mdb run-id)
(tasks:server-clean-out-old-records-for-run-id mdb run-id)
(if (< (tasks:num-in-available-state mdb run-id) 4)
(begin
(tasks:server-set-available mdb run-id)
(thread-sleep! 2) ;; Try removing this. It may not be needed.
(thread-sleep! 0.2) ;; Try removing this. It may not be needed.
(tasks:server-am-i-the-server? mdb run-id))
#f))
;; register that this server may come online (first to register goes though with the process)
(define (tasks:server-set-available mdb run-id)
(sqlite3:execute
mdb
|
︙ | | |
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
|
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
|
-
+
|
(set! res num-in-queue))
mdb
"SELECT count(id) FROM servers WHERE run_id=?;"
run-id)
res))
(define (tasks:server-clean-out-old-records-for-run-id mdb run-id)
(sqlite3:execute mdb "DELETE FROM servers WHERE state in ('available','shutting-down') AND (strftime('%s','now') - start_time) > 10 AND run_id=?;" run-id))
(sqlite3:execute mdb "DELETE FROM servers WHERE state in ('available','shutting-down') AND (strftime('%s','now') - start_time) > 100 AND run_id=?;" run-id))
(define (tasks:server-force-clean-running-records-for-run-id mdb run-id)
(sqlite3:execute mdb "DELETE FROM servers WHERE state = 'running' AND run_id=?;" run-id))
(define (tasks:server-force-clean-run-record mdb run-id iface port)
(sqlite3:execute mdb "DELETE FROM servers WHERE state = 'running' AND run_id=? AND interface=? AND port=?;"
run-id iface port))
|
︙ | | |
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
|
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
|
-
+
-
-
-
-
+
+
+
+
+
+
+
|
(exit 1))
(car (db:get-rows all))))
(header (db:get-header all))
(id (db:get-value-by-header first header "id"))
(hostname (db:get-value-by-header first header "hostname"))
(pid (db:get-value-by-header first header "pid"))
(priority (db:get-value-by-header first header "priority")))
(debug:print 0 "INFO: am-i-the-server got record " first)
;; for now a basic check. add tiebreaking by priority later
(let* ((my-pid (current-process-id))
(if (and (equal? hostname (get-host-name))
(equal? pid (current-process-id)))
id
#f)))
(res (if (and (equal? hostname (get-host-name))
(equal? pid my-pid))
id
#f)))
(debug:print 0 "INFO: am-i-the-server got record " first ", my-pid: " my-pid ", pid: " pid ", result: " res)
res)))
;; Use: (db:get-value-by-header (car (db:get-rows dat)) (db:get-header dat) "fieldname")
;; to extract info from the structure returned
;;
(define (tasks:server-get-servers-vying-for-run-id mdb run-id)
(let* ((header (list "id" "hostname" "pid" "interface" "port" "pubport" "state" "run_id" "priority" "start_time"))
(selstr (string-intersperse header ","))
|
︙ | | |
Modified tests/fullrun/megatest.config
from [c76d4b28b2]
to [bc391c991d].
︙ | | |
75
76
77
78
79
80
81
82
83
84
85
86
87
88
|
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
|
+
|
[validvalues]
state start end 0 1 - 2
status pass fail n/a 0 1 running - 2
# These are set before all tests, override them
# in the testconfig [pre-launch-env-overrides] section
[env-override]
# This variable is honored by the loadrunner script. The value is in percent
# a value of 200 will stop new jobs from starting.
MAX_ALLOWED_LOAD 200
# MT_XTERM_CMD overrides the terminal command
# MT_XTERM_CMD xterm -bg lightgreen -fg black
|
︙ | | |
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
|
102
103
104
105
106
107
108
109
110
111
112
113
114
115
|
-
-
-
-
|
# The empty var should have a definition with null string
EMPTY_VAR
WRAPPEDVAR This var should have the work blah thrice: \
blah \
blah
# Set MAX_ALLOWED_LOAD for nbload. 150 percent is a good value.
MAX_ALLOWED_LOAD 150
# XTERM [system xterm]
# RUNDEAD [system exit 56]
[server]
# If the server can't be started on this port it will try the next port until
# it succeeds
|
︙ | | |