Overview
Context
Changes
Modified client.scm
from [aa964f7d14]
to [ec1ead835f].
︙ | | |
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
|
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
|
-
+
|
(port (http-transport:server-dat-get-port host-info))
(start-res (case *transport-type*
((http)(http-transport:client-connect iface port))
((nmsg) host-info) ;; (http-transport:server-dat-get-socket host-info))
(else #f)))
(ping-res (case *transport-type*
((http)(rmt:login-no-auto-client-setup start-res run-id))
((nmsg)(nmsg-transport:ping iface port timeout: 2 socket: ))
((nmsg)(nmsg-transport:ping iface port timeout: 2 socket: #t))
(else #f))))
(if ping-res ;; sucessful login?
(begin
(debug:print-info 2 "client:setup, ping is good using host-info=" host-info ", remaining-tries=" remaining-tries)
start-res) ;; return the server info
;; have host info but no ping. shutdown the current connection and try again
(begin ;; login failed
|
︙ | | |
Modified http-transport.scm
from [37e9804757]
to [8c622aa861].
︙ | | |
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
|
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
|
-
-
-
-
-
-
|
;; (debug:print 11 "last-access=" last-access ", server-timeout=" server-timeout)
;;
;; no_traffic, no running tests, if server 0, no running servers
;;
;; (let ((wait-on-running (configf:lookup *configdat* "server" "wait-on-running"))) ;; wait on running tasks (if not true then exit on time out)
;;
(if (and *server-run*
;; (or
(> (+ last-access server-timeout)
(current-seconds)))
;; (and (eq? run-id 0)
;; (> (tasks:num-servers-non-zero-running tdb) 0))
;; (and (not (eq? run-id 0)) ;; only makes sense in non-zero run-id servers
;; (> (db:get-count-tests-actually-running *inmemdb* run-id) 0))
;; ))
(begin
(debug:print-info 0 "Server continuing, seconds since last db access: " (- (current-seconds) last-access))
;;
;; Consider implementing some smarts here to re-insert the record or kill self is
;; the db indicates so
;;
;; (if (tasks:server-am-i-the-server? tdb run-id)
|
︙ | | |
Modified nmsg-transport.scm
from [987e090f8d]
to [b2990ea4bd].
︙ | | |
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
|
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
|
-
+
+
+
+
+
+
+
+
+
+
-
+
-
-
-
-
-
-
+
-
+
+
-
+
+
+
|
sdat)
(begin
(thread-sleep! 0.5)
(loop))))))
(iface (car server-info))
(port (cadr server-info))
(last-access 0)
(tdbdat (tasks:open-db)))
(tdbdat (tasks:open-db))
(server-timeout (let ((tmo (configf:lookup *configdat* "server" "timeout")))
(if (and (string? tmo)
(string->number tmo))
(* 60 60 (string->number tmo))
;; (* 3 24 60 60) ;; default to three days
(* 60 1) ;; default to one minute
;; (* 60 60 25) ;; default to 25 hours
))))
(print "Keep-running got server pid " server-id ", using iface " iface " and port " port)
(let loop ((count 0))
(thread-sleep! 4) ;; no need to do this very often
;; NB// sync currently does NOT return queue-length
(let () ;; (queue-len (cdb:client-call server-info 'sync #t 1)))
;; (print "Server running, count is " count)
(if (< count 1) ;; 3x3 = 9 secs aprox
(loop (+ count 1)))
;; (if ;; (or (> numrunning 0) ;; stay alive for two days after last access
(mutex-lock! *heartbeat-mutex*)
(set! last-access *last-db-access*)
(mutex-unlock! *heartbeat-mutex*)
(if (and *server-run*
(if (> (+ last-access
(> (+ last-access server-timeout)
;; (* 50 60 60) ;; 48 hrs
;; 60 ;; one minute
;; (* 60 60) ;; one hour
(* 45 60) ;; 45 minutes, until the db deletion bug is fixed.
)
(current-seconds))
(current-seconds)))
(begin
(debug:print-info 0 "Server continuing, seconds since last db access: " (- (current-seconds) last-access))
(loop 0))
(begin
(debug:print-info 0 "Starting to shutdown the server.")
(set! *time-to-exit* #t)
(tasks:server-delete-record (db:delay-if-busy tdbdat) server-id " http-transport:keep-running")
(debug:print-info 0 "Server shutdown complete. Exiting")
;; (exit)
(exit)
))))))
;;======================================================================
;; C L I E N T S
;;======================================================================
(define (nmsg-transport:client-connect iface portnum)
(let* ((reqsoc (nmsg-transport:ping iface portnum return-socket: #t)))
(vector iface portnum #f #f #f (current-seconds) reqsoc)))
(define (nmsg-transport:client-api-send-receive run-id connection-info cmd param)
(mutex-lock! *http-mutex*)
(let ((packet (vector cmd param))
(reqsoc (http-transport:server-dat-get-socket connection-info)))
(nn-send reqsoc (db:obj->string packet transport: 'nmsg))
(db:string->obj (nn-recv reqsoc) transport: 'nmsg)))
(let ((res (db:string->obj (nn-recv reqsoc) transport: 'nmsg)))
(mutex-unlock! *http-mutex*)
res)))
;;======================================================================
;; J U N K
;;======================================================================
;; DO NOT USE
;;
|
︙ | | |
Modified server.scm
from [57814a5cf8]
to [3a939720aa].
︙ | | |
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
|
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
|
+
-
-
-
+
+
+
+
+
+
|
(trycount 0))
(if server
;; note: client:start will set *runremote*. this needs to be changed
;; also, client:start will login to the server, also need to change that.
;;
;; client:start returns #t if login was successful.
;;
(let ((res (case *transport-type*
(let ((res (server:ping-server run-id
(tasks:hostinfo-get-interface server)
(tasks:hostinfo-get-port server))))
((http)(server:ping-server run-id
(tasks:hostinfo-get-interface server)
(tasks:hostinfo-get-port server)))
((nmsg)(nmsg-transport:ping (tasks:hostinfo-get-interface server)
(tasks:hostinfo-get-port server)
timeout: 2)))))
;; if the server didn't respond we must remove the record
(if res
#t
(begin
(debug:print-info 0 "server at " server " not responding, removing record")
(tasks:server-force-clean-running-records-for-run-id (db:delay-if-busy tdbdat) run-id
" server:check-if-running")
|
︙ | | |
Modified tests/Makefile
from [a8b041cd34]
to [608cf0bdb8].
︙ | | |
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
|
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
|
-
+
-
+
-
+
-
+
-
+
|
# Some simple checks for bootstrapping and run loop logic
test9 : minsetup test9a test9b test9c test9d test9e
test9a :
@echo Run super-simple mintest e, no waitons.
cd mintest;$(DASHBOARD)&
cd mintest;$(MEGATEST) -preclean -runtests e -target $(VER) :runname `date +%H.%M.%S` -debug $(DEBUG)
cd mintest;$(MEGATEST) -preclean -runtests e -target $(VER) -runname $(shell date +%H.%M.%S) -debug $(DEBUG)
test9b :
@echo Run simple mintest d with one waiton c
cd mintest;$(MEGATEST) -preclean -runtests d -target $(VER) :runname `date +%H.%M.%S` -debug $(DEBUG)
cd mintest;$(MEGATEST) -preclean -runtests d -target $(VER) -runname `date +%H.%M.%S` -debug $(DEBUG)
test9c :
@echo Run mintest a with full waiton chain a -> b -> c -> d -> e
cd mintest;$(MEGATEST) -preclean -runtests a -target $(VER) :runname `date +%H.%M.%S` -debug $(DEBUG)
cd mintest;$(MEGATEST) -preclean -runtests a -target $(VER) -runname `date +%H.%M.%S` -debug $(DEBUG)
test9d :
@echo Run an itemized test with no items
cd mintest;$(MEGATEST) -preclean -runtests g -target $(VER) :runname `date +%H.%M.%S` -debug $(DEBUG)
cd mintest;$(MEGATEST) -preclean -runtests g -target $(VER) -runname `date +%H.%M.%S` -debug $(DEBUG)
test9e :
@echo Run mintest a1 with full waiton chain with d1fail: a1 -> b1 -> c1 -> d1fail -> e1
cd mintest;$(MEGATEST) -preclean -runtests a1 -target $(VER) :runname `date +%H.%M.%S` -debug $(DEBUG)
cd mintest;$(MEGATEST) -preclean -runtests a1 -target $(VER) -runname `date +%H.%M.%S` -debug $(DEBUG)
test10 :
@echo Run a bunch of different targets simultaneously
(cd fullrun;$(MEGATEST) -server - ;sleep 2)&
for targ in mint/btrfs/mintdir sunos/sshfs/loc; do \
(cd fullrun;$(MEGATEST) -preclean -runtests priority_10_waiton_1 -target $$targ :runname $(RUNNAME) &); done
for sys in ubuntu suse redhat debian;do \
|
︙ | | |
Modified tests/fullrun/megatest.config
from [79e8e68f6b]
to [558689506a].
︙ | | |
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
|
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
|
-
+
-
+
+
|
# This server will keep running this number of hours after last access.
# Three minutes is 0.05 hours
# timeout 0.025
timeout 0.01
# Server is required - slower but more resistant to Sqlite issues.
# required yes
required yes
# Start server when average query takes longer than this
server-query-threshold 55500
# server-query-threshold 55500
server-query-threshold -1
# daemonize yes
# hostname #{scheme (get-host-name)}
## disks are:
## name host:/path/to/area
## -or-
|
︙ | | |
Modified tests/mintest/megatest.config
from [158955d103]
to [74b434d2c6].
1
2
3
4
5
6
7
8
9
10
11
12
13
|
1
2
3
4
5
6
7
8
9
10
11
12
13
|
-
+
|
[fields]
X TEXT
[setup]
max_concurrent_jobs 50
linktree #{getenv PWD}/linktree
linktree #{getenv MT_RUN_AREA_HOME}/linktree
transport http
[server]
port 8090
[jobtools]
useshell yes
|
︙ | | |