Overview
Context
Changes
Modified megatest-version.scm
from [94672ddb9c]
to [c0fe9af7ba].
1
2
3
4
5
6
7
|
1
2
3
4
5
6
7
|
-
+
|
;; Always use two digit decimal
;; 1.01, 1.02...1.10,1.11 ... 1.99,2.00..
(declare (unit megatest-version))
(define megatest-version 1.5107)
(define megatest-version 1.5108)
|
Modified megatest.scm
from [c4c3f3d52b]
to [5c2117b3b6].
︙ | | |
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
|
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
|
-
-
-
-
+
+
+
+
+
+
-
+
|
(state (vector-ref server 7))
(mt-ver (vector-ref server 8))
(status (open-run-close tasks:server-alive? tasks:open-db #f hostname: hostname port: port))
(killed #f)
(zmq-socket (if status (server:client-connect hostname port) #f)))
;; no need to login as status of #t indicates we are connecting to correct
;; server
(if (or (not status) ;; no point in keeping dead records in the db
(and khost-port ;; kill by host/port
(equal? hostname (car khost-port))
(equal? port (string->number (cadr khost-port)))))
(if (not status) ;; no point in keeping dead records in the db
(open-run-close tasks:server-deregister tasks:open-db hostname port: port pid: pid))
(if (and khost-port ;; kill by host/port
(equal? hostname (car khost-port))
(equal? port (string->number (cadr khost-port))))
(tasks:kill-server status hostname port pid))
(if (and kpid
(equal? hostname (car khost-port))
(equal? hostname (get-host-name))
(equal? kpid pid)) ;;; YEP, ALL WITH PID WILL BE KILLED!!!
(tasks:kill-server status hostname #f pid))
(format #t fmtstr id mt-ver pid hostname interface port start-time priority
(if status "alive" "dead"))))
servers)
(debug:print-info 1 "Done with listservers")
|
︙ | | |
Modified server.scm
from [ebf5897ef4]
to [d8bc67a0d6].
︙ | | |
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
|
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
|
-
+
|
(mutex-lock! *heartbeat-mutex*)
(set! server-loop-heartbeat *server-loop-heart-beat*)
(set! server-info *server-info*)
(mutex-unlock! *heartbeat-mutex*)
;; The logic here is that if the server loop gets stuck blocked in working
;; we don't want to update our heartbeat
(set! pulse (- (current-seconds) server-loop-heartbeat))
(debug:print-info 1 "Heartbeat period is " pulse " seconds on " (cadr server-info) ":" (caddr server-info) ", last db access is " *last-db-access*)
(debug:print-info 1 "Heartbeat period is " pulse " seconds on " (cadr server-info) ":" (caddr server-info) ", last db access is " (- (current-seconds) *last-db-access*) " seconds ago")
(if (> pulse 11) ;; must stay less than 10 seconds
(begin
(debug:print 0 "ERROR: Heartbeat failed, committing servercide")
(exit))
(open-run-close tasks:server-update-heartbeat tasks:open-db (car server-info)))
(if (or (> numrunning 0) ;; stay alive for two days after last access
(> (+ *last-db-access*
|
︙ | | |
Modified tasks.scm
from [6c83278de0]
to [67418262aa].
︙ | | |
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
|
122
123
124
125
126
127
128
129
130
131
132
133
134
135
|
-
|
server-id
(tasks:server-get-server-id mdb hostname port pid)))
(heartbeat-delta 99e9))
(sqlite3:for-each-row
(lambda (delta)
(set! heartbeat-delta delta))
mdb "SELECT strftime('%s','now')-heartbeat FROM servers WHERE id=?;" server-id)
(debug:print 1 "Found heartbeat-delta of " heartbeat-delta " for server with id " server-id)
(< heartbeat-delta 10)))
(define (tasks:client-register mdb pid hostname cmdline)
(sqlite3:execute
mdb
"INSERT OR REPLACE INTO clients (server_id,pid,hostname,cmdline,login_time) VALUES(?,?,?,?,strftime('%s','now'));")
(tasks:server-get-server-id mdb hostname #f pid)
|
︙ | | |
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
|
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
|
-
-
+
+
+
-
-
-
-
-
+
+
+
+
+
+
+
|
(if (null? tal)
#f
(loop (car tal)(cdr tal))))))))))
(define (tasks:kill-server status hostname port pid)
(debug:print-info 1 "Removing defunct server record for " hostname ":" port)
(if port
(open-run-close tasks:server-deregister tasks:open-db hostname port: port)
(open-run-close tasks:server-deregister tasks:open-db hostname pid: pid))
(open-run-close tasks:server-deregister tasks:open-db hostname port: port)
(open-run-close tasks:server-deregister tasks:open-db hostname pid: pid))
(if status ;; #t means alive
(begin
(if (equal? hostname (get-host-name))
(handle-exceptions
(begin
(debug:print 1 "Sending signal/term to " pid " on " hostname)
(process-signal pid signal/term)
(thread-sleep! 5) ;; give it five seconds to die peacefully then do a brutal kill
(process-signal pid signal/kill)) ;; local machine, send sig term
exn
(debug:print-info 0 "server may or may not be dead, check for megatest -server running as pid " pid "\n"
" EXCEPTION: " ((condition-property-accessor 'exn 'message) exn))
(debug:print 1 "Sending signal/term to " pid " on " hostname)
(process-signal pid signal/term)
(thread-sleep! 5) ;; give it five seconds to die peacefully then do a brutal kill
(process-signal pid signal/kill)) ;; local machine, send sig term
(begin
(debug:print-info 1 "Telling alive server on " hostname ":" port " to commit servercide")
(cdb:kill-server zmq-socket)))) ;; remote machine, try telling server to commit suicide
(begin
(if status
(if (equal? hostname (get-host-name))
(begin
|
︙ | | |