This is equivalent to a diff from
86a3d1148e
to dd4e2a6ea3
Modified api.scm
from [4fa67bb6bd]
to [e3eb999523].
1
2
3
4
5
6
7
|
1
2
3
4
5
6
7
8
9
|
+
+
|
;;======================================================================
;; Copyright 2006-2013, Matthew Welland.
;;
;; This file is part of Megatest.
;;
;; Megatest is free software: you can redistribute it and/or modify
;; it under the terms of the GNU General Public License as published by
|
︙ | | |
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
|
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
|
+
+
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
|
;; api:process-request
;; db:*
;;
;; NB// Runs on the server as part of the server loop
;;
(define (api:process-request dbstruct $) ;; the $ is the request vars proc
(set! *api-process-request-count* (+ *api-process-request-count* 1))
(debug:print 4 *default-log-port* "server-id:" *server-id*)
(let* ((cmd ($ 'cmd))
(paramsj ($ 'params))
(key ($ 'key))
(params (db:string->obj paramsj transport: 'http)) ;; incoming data from the POST (or is it a GET?)
(resdat (api:execute-requests dbstruct (vector cmd params))) ;; process the request, resdat = #( flag result )
(success (vector-ref resdat 0))
(res (vector-ref resdat 1))) ;; (vector flag payload), get the payload, ignore the flag (why?)
(if (not success)
(debug:print 0 *default-log-port* "ERROR: success flag is #f for " cmd " with params " params))
(if (> *api-process-request-count* *max-api-process-requests*)
(set! *max-api-process-requests* *api-process-request-count*))
(set! *api-process-request-count* (- *api-process-request-count* 1))
;; This can be here but needs controls to ensure it doesn't run more than every 4 seconds
;; (rmt:dat->json-str
;; (if (or (string? res)
;; (list? res)
;; (number? res)
;; (boolean? res))
;; res
;; (list "ERROR, not string, list, number or boolean" 1 cmd params res)))))
(db:obj->string res transport: 'http)))
(params (db:string->obj paramsj transport: 'http))) ;; incoming data from the POST (or is it a GET?)
(debug:print 4 *default-log-port* "cmd:" cmd " with params " params "key " key)
(if (equal? key *server-id*)
(let* ((resdat (api:execute-requests dbstruct (vector cmd params))) ;; process the request, resdat = #( flag result )
(success (vector-ref resdat 0))
(res (vector-ref resdat 1))) ;; (vector flag payload), get the payload, ignore the flag (why?)
(debug:print 4 *default-log-port* "res:" res)
(if (not success)
(debug:print 0 *default-log-port* "ERROR: success flag is #f for " cmd " with params " params))
(if (> *api-process-request-count* *max-api-process-requests*)
(set! *max-api-process-requests* *api-process-request-count*))
(set! *api-process-request-count* (- *api-process-request-count* 1))
;; This can be here but needs controls to ensure it doesn't run more than every 4 seconds
;; (rmt:dat->json-str
;; (if (or (string? res)
;; (list? res)
;; (number? res)
;; (boolean? res))
;; res
;; (list "ERROR, not string, list, number or boolean" 1 cmd params res)))))
(db:obj->string res transport: 'http))
(begin
(debug:print 0 *default-log-port* "Server refused to process request. Sever id mismatch. recived " key " expected: " *server-id* ".\nOther arguments recived: cmd=" cmd " params = " params)
(db:obj->string (conc "Server refused to process request server-id mismatch: " key ", " *server-id*) transport: 'http)))))
|
| |
Modified client.scm
from [9da8d7475d]
to [dc4c7b41e8].
︙ | | |
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
|
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
|
-
+
+
+
-
-
+
+
+
+
+
+
+
-
+
|
;; through them searching for a good one.
;;
(let* ((server-dat (server:get-rand-best areapath)) ;; (server:get-first-best areapath))
(runremote (or area-dat *runremote*)))
(if (not server-dat) ;; no server found
(client:setup-http areapath remaining-tries: (- remaining-tries 1))
(let ((host (cadr server-dat))
(port (caddr server-dat)))
(port (caddr server-dat))
(server-id (caddr (cddr server-dat))))
(debug:print-info 4 *default-log-port* "client:setup server-dat=" server-dat ", remaining-tries=" remaining-tries)
(if (and (not area-dat)
(not *runremote*))
(begin
(set! *runremote* (make-remote)))
(if (and host port)
(set! *runremote* (make-remote))
(let* ((server-info (remote-server-info *runremote*)))
(if server-info
(begin
(remote-server-url-set! *runremote* (server:record->url server-info))
(remote-server-id-set! *runremote* (server:record->id server-info)))))))
(if (and host port server-id)
(let* ((start-res (case *transport-type*
((http)(http-transport:client-connect host port))))
((http)(http-transport:client-connect host port server-id))))
(ping-res (case *transport-type*
((http)(rmt:login-no-auto-client-setup start-res)))))
(if (and start-res
ping-res)
(let ((runremote (or area-dat *runremote*))) ;; it might have been generated only a few statements ago
(remote-conndat-set! runremote start-res) ;; (hash-table-set! runremote run-id start-res)
(debug:print-info 2 *default-log-port* "connected to " (http-transport:server-dat-make-url start-res))
|
︙ | | |
Modified common.scm
from [33c7316880]
to [2f158b8f7d].
︙ | | |
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
|
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
|
+
+
-
+
|
(else "FAIL")))
(define (common:logpro-exit-code->test-status exit-code)
(status-sym->string (common:logpro-exit-code->status-sym exit-code)))
(defstruct remote
(hh-dat (common:get-homehost)) ;; homehost record ( addr . hhflag )
(server-url #f) ;; (server:check-if-running *toppath*) #f))
(server-id #f)
(server-url (if *toppath* (server:check-if-running *toppath*))) ;; (server:check-if-running *toppath*) #f))
(server-info (if *toppath* (server:check-if-running *toppath*)))
(last-server-check 0) ;; last time we checked to see if the server was alive
(conndat #f)
(transport *transport-type*)
(server-timeout (server:expiration-timeout))
(force-server #f)
(ro-mode #f)
(ro-mode-checked #f)) ;; flag that indicates we have checked for ro-mode
|
︙ | | |
Modified db.scm
from [fb3a18f52f]
to [5fbac67d93].
︙ | | |
4340
4341
4342
4343
4344
4345
4346
4347
4348
4349
4350
4351
4352
4353
4354
4355
4356
4357
4358
4359
4360
4361
|
4340
4341
4342
4343
4344
4345
4346
4347
4348
4349
4350
4351
4352
4353
4354
4355
4356
4357
4358
4359
4360
4361
4362
|
-
+
+
-
+
|
flush
sync
set-verbosity
killserver
))
(define (db:login dbstruct calling-path calling-version client-signature)
(cond
(cond
((not (equal? calling-path *toppath*))
(list #f "Login failed due to mismatch paths: " calling-path ", " *toppath*))
;; ((not (equal? *run-id* run-id))
;; (list #f "Login failed due to mismatch run-id: " run-id ", " *run-id*))
((not (equal? megatest-version calling-version))
(list #t (conc "Login warning due to mismatch megatest version: " calling-version ", " megatest-version)))
(else
(else
(hash-table-set! *logged-in-clients* client-signature (current-seconds))
'(#t "successful login"))))
(define (db:general-call dbstruct stmtname params)
(let ((query (let ((q (alist-ref (if (string? stmtname)
(string->symbol stmtname)
stmtname)
|
︙ | | |
Modified dcommon.scm
from [0db7864f6b]
to [dbcf309f44].
︙ | | |
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
|
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
|
-
+
|
;; (iup:attribute-set! servers-matrix (conc "0:" colnum) colname)
;; (set! colnum (+ 1 colnum)))
;; colnames)
(set! rownum 1)
(for-each
(lambda (server)
(set! colnum 0)
(match-let (((mod-time host port start-time pid)
(match-let (((mod-time host port start-time server-id pid)
server))
(let* ((uptime (- (current-seconds) mod-time))
(runtime (if start-time
(- mod-time start-time)
0))
(vals (list "-" ;; (vector-ref server 0) ;; Id
"-" ;; (vector-ref server 9) ;; MT-Ver
|
︙ | | |
Modified http-transport.scm
from [67489ed9ab]
to [c4772ba536].
︙ | | |
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
|
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
|
-
-
+
+
+
+
+
+
+
+
|
(http-transport:server-dat-get-api-req serverdat)
(begin
(debug:print 0 *default-log-port* "FATAL ERROR: http-transport:client-api-send-receive called with no server info")
(exit 1))))
(res (vector #f "uninitialized"))
(success #t)
(sparams (db:obj->string params transport: 'http))
(runremote (or area-dat *runremote*)))
(debug:print-info 11 *default-log-port* "fullurl=" fullurl ", cmd=" cmd ", params=" params ", run-id=" run-id "\n")
(runremote (or area-dat *runremote*))
(server-id (if (vector? serverdat)
(http-transport:server-dat-get-server-id serverdat)
(begin
(debug:print 0 *default-log-port* "FATAL ERROR: http-transport:client-api-send-receive called with no server info")
(exit 1)))))
(debug:print-info 11 *default-log-port* "cmd=" cmd " fullurl=" fullurl " server-id=" server-id " current time:" (current-seconds))
;; set up the http-client here
(max-retry-attempts 1)
;; consider all requests indempotent
(retry-request? (lambda (request)
#f))
;; send the data and get the response
;; extract the needed info from the http data and
|
︙ | | |
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
|
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
|
-
+
-
+
+
-
|
(msg ((condition-property-accessor 'exn 'message) exn)))
(set! success #f)
(if (debug:debug-mode 1)
(debug:print-info 0 *default-log-port* "couldn't talk to server, trying again ...")
(begin
(debug:print 0 *default-log-port* "WARNING: failure in with-input-from-request to " fullurl ".")
(debug:print 0 *default-log-port* " message: " msg ", exn=" exn)
(debug:print 0 *default-log-port* " cmd: " cmd " params: " params)
(debug:print 0 *default-log-port* " cmd: " cmd " params: " params " key:" (or server-id "thekey"))
(debug:print 0 *default-log-port* " call-chain: " call-chain)))
(if runremote
(remote-conndat-set! runremote #f))
;; Killing associated server to allow clean retry.")
;; (tasks:kill-server-run-id run-id) ;; better to kill the server in the logic that called this routine?
(mutex-unlock! *http-mutex*)
;;; (signal (make-composite-condition
;;; (make-property-condition 'commfail 'message "failed to connect to server")))
;;; "communications failed"
(db:obj->string #f))
(with-input-from-request ;; was dat
fullurl
(list (cons 'key (or *server-id* "thekey"))
(list (cons 'key (or server-id "thekey"))
(cons 'cmd cmd)
(cons 'params sparams))
read-string))
transport: 'http)
0)) ;; added this speculatively
;; Shouldn't this be a call to the managed call-all-connections stuff above?
(close-all-connections!)
(mutex-unlock! *http-mutex*)
))
(time-out (lambda ()
(thread-sleep! 45)
(debug:print 0 *default-log-port* "WARNING: send-receive took more than 45 seconds!!")
#f))
(th1 (make-thread send-recieve "with-input-from-request"))
(th2 (make-thread time-out "time out")))
(thread-start! th1)
(thread-start! th2)
(thread-join! th1)
(vector-set! res 0 success)
(thread-terminate! th2)
(debug:print-info 11 *default-log-port* "got res=" res)
(if (vector? res)
(if (vector-ref res 0) ;; this is the first flag or the second flag?
res ;; this is the *inner* vector? seriously? why?
(if (debug:debug-mode 11)
(let ((call-chain (get-call-chain))) ;; note: this code also called in nmsg-transport - consider consolidating it
(print-call-chain (current-error-port))
(debug:print-error 11 *default-log-port* "error above occured at server, res=" res) ;; " message: " ((condition-property-accessor 'exn 'message) exn))
|
︙ | | |
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
|
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
|
-
+
+
-
+
-
+
|
(define (make-http-transport:server-dat)(make-vector 6))
(define (http-transport:server-dat-get-iface vec) (vector-ref vec 0))
(define (http-transport:server-dat-get-port vec) (vector-ref vec 1))
(define (http-transport:server-dat-get-api-uri vec) (vector-ref vec 2))
(define (http-transport:server-dat-get-api-url vec) (vector-ref vec 3))
(define (http-transport:server-dat-get-api-req vec) (vector-ref vec 4))
(define (http-transport:server-dat-get-last-access vec) (vector-ref vec 5))
(define (http-transport:server-dat-get-socket vec) (vector-ref vec 6))
;(define (http-transport:server-dat-get-socket vec) (vector-ref vec 6))
(define (http-transport:server-dat-get-server-id vec) (vector-ref vec 6))
(define (http-transport:server-dat-make-url vec)
(if (and (http-transport:server-dat-get-iface vec)
(http-transport:server-dat-get-port vec))
(conc "http://"
(http-transport:server-dat-get-iface vec)
":"
(http-transport:server-dat-get-port vec))
#f))
(define (http-transport:server-dat-update-last-access vec)
(if (vector? vec)
(vector-set! vec 5 (current-seconds))
(begin
(print-call-chain (current-error-port))
(debug:print-error 0 *default-log-port* "call to http-transport:server-dat-update-last-access with non-vector!!"))))
;;
;; connect
;;
(define (http-transport:client-connect iface port)
(define (http-transport:client-connect iface port server-id)
(let* ((api-url (conc "http://" iface ":" port "/api"))
(api-uri (uri-reference (conc "http://" iface ":" port "/api")))
(api-req (make-request method: 'POST uri: api-uri))
(server-dat (vector iface port api-uri api-url api-req (current-seconds))))
(server-dat (vector iface port api-uri api-url api-req (current-seconds) server-id)))
server-dat))
;; run http-transport:keep-running in a parallel thread to monitor that the db is being
;; used and to shutdown after sometime if it is not.
;;
(define (http-transport:keep-running)
;; if none running or if > 20 seconds since
|
︙ | | |
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
|
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
|
+
+
+
-
+
+
+
+
-
+
|
(if (not (equal? sdat (list iface port)))
(let ((new-iface (car sdat))
(new-port (cadr sdat)))
(debug:print-info 0 *default-log-port* "WARNING: interface changed, refreshing iface and port info")
(set! iface new-iface)
(set! port new-port)
(if (not *server-id*)
(set! *server-id* (server:mk-signature)))
(debug:print 0 *default-log-port* (current-seconds) (current-directory) (current-process-id) (argv))
(debug:print 0 *default-log-port* "SERVER STARTED: " iface ":" port " AT " (current-seconds))
(debug:print 0 *default-log-port* "SERVER STARTED: " iface ":" port " AT " (current-seconds) " server-id: " *server-id*)
(flush-output *default-log-port*)))
;; Transfer *db-last-access* to last-access to use in checking that we are still alive
(mutex-lock! *heartbeat-mutex*)
(set! last-access *db-last-access*)
(mutex-unlock! *heartbeat-mutex*)
(if (common:low-noise-print 120 (conc "server running on " iface ":" port))
(begin
(if (not *server-id*)
(set! *server-id* (server:mk-signature)))
(debug:print 0 *default-log-port* (current-seconds) (current-directory) (current-process-id) (argv))
(debug:print 0 *default-log-port* "SERVER STARTED: " iface ":" port " AT " (current-seconds))
(debug:print 0 *default-log-port* "SERVER STARTED: " iface ":" port " AT " (current-seconds) " server-id: " *server-id*)
(flush-output *default-log-port*)))
(if (common:low-noise-print 60 "dbstats")
(begin
(debug:print 0 *default-log-port* "Server stats:")
(db:print-current-query-stats)))
(let* ((hrs-since-start (/ (- (current-seconds) server-start-time) 3600)))
(cond
|
︙ | | |
Modified megatest.scm
from [0e58f17e0f]
to [e69eff1234].
︙ | | |
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
|
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
|
-
+
|
(if out-file (close-output-port out-port))
(exit) ;; yes, bending the rules here - need to exit since this is a utility
))
(if (args:get-arg "-ping")
(let* ((server-id (string->number (args:get-arg "-ping"))) ;; extract run-id (i.e. no ":"
(host:port (args:get-arg "-ping")))
(server:ping (or server-id host:port) do-exit: #t)))
(server:ping (or server-id host:port) #f do-exit: #t)))
;;======================================================================
;; Capture, save and manipulate environments
;;======================================================================
;; NOTE: Keep these above the section where the server or client code is setup
|
︙ | | |
Modified rmt.scm
from [39d97c528a]
to [88741f6ca9].
︙ | | |
98
99
100
101
102
103
104
105
106
107
108
109
110
111
|
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
|
+
+
+
+
+
|
;; DOT INIT_RUNREMOTE; // leaving off - doesn't really add to the clarity
;; DOT MUTEXLOCK -> INIT_RUNREMOTE [label="no remote?"];
;; DOT INIT_RUNREMOTE -> MUTEXLOCK;
;; ensure we have a record for our connection for given area
(if (not runremote) ;; can remove this one. should never get here.
(begin
(set! *runremote* (make-remote))
(let* ((server-info (remote-server-info *runremote*)))
(if server-info
(begin
(remote-server-url-set! *runremote* (server:record->url server-info))
(remote-server-id-set! *runremote* (server:record->id server-info)))))
(set! runremote *runremote*))) ;; new runremote will come from this on next iteration
;; DOT SET_HOMEHOST; // leaving off - doesn't really add to the clarity
;; DOT MUTEXLOCK -> SET_HOMEHOST [label="no homehost?"];
;; DOT SET_HOMEHOST -> MUTEXLOCK;
;; ensure we have a homehost record
(if (not (pair? (remote-hh-dat runremote))) ;; not on homehost
|
︙ | | |
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
|
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
|
-
+
+
+
+
+
+
|
;;DOT CASE6 [label="init\nremote"];
;;DOT MUTEXLOCK -> CASE6 [label="on homehost,\nwrite query,\nhave server,\ncan't reach it"]; {rank=same "case 6" CASE6};
;;DOT CASE6 -> "rmt:send-receive";
;; on homehost and this is a write, we already have a server, but server has died
((and (cdr (remote-hh-dat runremote)) ;; on homehost
(not (member cmd api:read-only-queries)) ;; this is a write
(remote-server-url runremote) ;; have a server
(not (server:ping (remote-server-url runremote)))) ;; server has died. NOTE: this is not a cheap call! Need better approach.
(not (server:ping (remote-server-url runremote) (remote-server-id runremote)))) ;; server has died. NOTE: this is not a cheap call! Need better approach.
(set! *runremote* (make-remote))
(let* ((server-info (remote-server-info *runremote*)))
(if server-info
(begin
(remote-server-url-set! *runremote* (server:record->url server-info))
(remote-server-id-set! *runremote* (server:record->id server-info)))))
(remote-force-server-set! runremote (common:force-server?))
(mutex-unlock! *rmt-mutex*)
(debug:print-info 12 *default-log-port* "rmt:send-receive, case 6")
(rmt:send-receive cmd rid params attemptnum: attemptnum))
;;DOT CASE7 [label="homehost\nwrite"];
;;DOT MUTEXLOCK -> CASE7 [label="server not required,\non homehost,\na write,\nhave a server"]; {rank=same "case 7" CASE7};
|
︙ | | |
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
|
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
|
-
-
-
+
+
+
+
+
-
+
-
+
|
;;DOT CASE8 -> "rmt:open-qry-close-locally";
;; on homehost, no server contact made and this is a write, passively start a server
((and (not (remote-force-server runremote)) ;; honor forced use of server, i.e. server NOT required
(cdr (remote-hh-dat runremote)) ;; have homehost
(not (remote-server-url runremote)) ;; no connection yet
(not (member cmd api:read-only-queries))) ;; not a read-only query
(debug:print-info 12 *default-log-port* "rmt:send-receive, case 8")
(let ((server-url (server:check-if-running *toppath*))) ;; (server:read-dotserver->url *toppath*))) ;; (server:check-if-running *toppath*))) ;; Do NOT want to run server:check-if-running - very expensive to do for every write call
(if server-url
(remote-server-url-set! runremote server-url) ;; the string can be consumed by the client setup if needed
(let ((server-info (server:check-if-running *toppath*))) ;; (server:read-dotserver->url *toppath*))) ;; (server:check-if-running *toppath*))) ;; Do NOT want to run server:check-if-running - very expensive to do for every write call
(if server-info
(begin
(remote-server-url-set! runremote (server:record->url server-info)) ;; the string can be consumed by the client setup if needed
(remote-server-id-set! runremote (server:record->id server-info)))
(if (common:force-server?)
(server:start-and-wait *toppath*)
(server:kind-run *toppath*))))
(server:kind-run *toppath*)))
(remote-force-server-set! runremote (common:force-server?))
(mutex-unlock! *rmt-mutex*)
(debug:print-info 12 *default-log-port* "rmt:send-receive, case 8.1")
(rmt:open-qry-close-locally cmd 0 params))
(rmt:open-qry-close-locally cmd 0 params)))
;;DOT CASE9 [label="force server\nnot on homehost"];
;;DOT MUTEXLOCK -> CASE9 [label="no connection\nand either require server\nor not on homehost"]; {rank=same "case 9" CASE9};
;;DOT CASE9 -> "start\nserver" -> "rmt:send-receive";
((or (and (remote-force-server runremote) ;; we are forcing a server and don't yet have a connection to one
(not (remote-conndat runremote)))
(and (not (cdr (remote-hh-dat runremote))) ;; not on a homehost
|
︙ | | |
Modified server.scm
from [7b2af2dc7e]
to [d960326e6f].
︙ | | |
91
92
93
94
95
96
97
98
99
100
101
102
103
104
|
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
|
+
|
;; Generate a unique signature for this server
(define (server:mk-signature)
(message-digest-string (md5-primitive)
(with-output-to-string
(lambda ()
(write (list (current-directory)
(current-process-id)
(argv)))))))
;; When using zmq this would send the message back (two step process)
;; with spiffy or rpc this simply returns the return data to be returned
;;
(define (server:reply return-addr query-sig success/fail result)
(debug:print-info 11 *default-log-port* "server:reply return-addr=" return-addr ", result=" result)
|
︙ | | |
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
|
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
|
+
-
+
-
+
-
+
-
+
-
-
+
+
+
|
(system (conc "nbfake " cmdln))
(unsetenv "TARGETHOST_LOGF")
(if (get-environment-variable "TARGETHOST")(unsetenv "TARGETHOST"))
(thread-join! log-rotate)
(pop-directory)))
;; given a path to a server log return: host port startseconds
;; any changes to number of elements returned by this fuction will dirctly affect server:record->url,server:record->id,server:kill,server:get-num-alive which uses match let
;;
(define (server:logf-get-start-info logf)
(let ((rx (regexp "^SERVER STARTED: (\\S+):(\\d+) AT ([\\d\\.]+)"))) ;; SERVER STARTED: host:port AT timesecs
(let ((rx (regexp "^SERVER STARTED: (\\S+):(\\d+) AT ([\\d\\.]+) server-id: (\\S+)"))) ;; SERVER STARTED: host:port AT timesecs server id
(handle-exceptions
exn
(begin
(print "failed to get server info from " logf ", exn=" exn)
(list #f #f #f)) ;; no idea what went wrong, call it a bad server
(list #f #f #f #f)) ;; no idea what went wrong, call it a bad server
(with-input-from-file
logf
(lambda ()
(let loop ((inl (read-line))
(lnum 0))
(if (not (eof-object? inl))
(let ((mlst (string-match rx inl)))
(if (not mlst)
(if (< lnum 500) ;; give up if more than 500 lines of server log read
(loop (read-line)(+ lnum 1))
(list #f #f #f))
(list #f #f #f #f))
(let ((dat (cdr mlst)))
(list (car dat) ;; host
(string->number (cadr dat)) ;; port
(string->number (caddr dat))))))
(list #f #f #f))))))))
(string->number (caddr dat))
(cadr (cddr dat))))))
(list #f #f #f #f))))))))
;; get a list of servers with all relevant data
;; ( mod-time host port start-time pid )
;;
(define (server:get-list areapath #!key (limit #f))
(let ((fname-rx (regexp "^(|.*/)server-(\\d+)-(\\S+).log$"))
(day-seconds (* 24 60 60)))
|
︙ | | |
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
|
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
|
-
+
-
+
-
+
|
'()
(let loop ((hed (car server-logs))
(tal (cdr server-logs))
(res '()))
(let* ((mod-time (handle-exceptions
exn
(begin
(print "failed to get modification time on " hed ", exn=" exn)
(debug:print 0 *default-log-port* "failed to get modification time on " hed ", exn=" exn)
(current-seconds)) ;; 0
(file-modification-time hed))) ;; default to *very* old so log gets ignored if deleted
(down-time (- (current-seconds) mod-time))
(serv-dat (if (or (< num-serv-logs 10)
(< down-time 900)) ;; day-seconds))
(server:logf-get-start-info hed)
'())) ;; don't waste time processing server files not touched in the 15 minutes if there are more than ten servers to look at
(serv-rec (cons mod-time serv-dat))
(fmatch (string-match fname-rx hed))
(pid (if fmatch (string->number (list-ref fmatch 2)) #f))
(new-res (if (null? serv-dat)
res
(cons (append serv-rec (list pid)) res))))
(cons (append serv-rec (list pid)) res)))) ;; any changes to number of elements in new-res will dirctly affect server:record->url,server:record->id,server:kill,server:get-num-alive which uses match let
(if (null? tal)
(if (and limit
(> (length new-res) limit))
new-res ;; (take new-res limit) <= need intelligent sorting before this will work
new-res)
(loop (car tal)(cdr tal) new-res)))))))))
(define (server:get-num-alive srvlst)
(let ((num-alive 0))
(for-each
(lambda (server)
(match-let (((mod-time host port start-time pid)
(match-let (((mod-time host port start-time server-id pid)
server))
(let* ((uptime (- (current-seconds) mod-time))
(runtime (if start-time
(- mod-time start-time)
0)))
(if (< uptime 5)(set! num-alive (+ num-alive 1))))))
srvlst)
|
︙ | | |
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
|
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
|
+
+
+
+
+
+
-
+
|
(if (and (list? srvrs)
(not (null? srvrs)))
(let* ((len (length srvrs))
(idx (random len)))
(list-ref srvrs idx))
#f)))
(define (server:record->id servr)
(match-let (((mod-time host port start-time server-id pid)
servr))
(if server-id
server-id
#f)))
(define (server:record->url servr)
(match-let (((mod-time host port start-time pid)
(match-let (((mod-time host port start-time server-id pid)
servr))
(if (and host port)
(conc host ":" port)
#f)))
(define (server:get-client-signature) ;; BB> why is this proc named "get-"? it returns nothing -- set! has not return value.
(if *my-client-signature* *my-client-signature*
|
︙ | | |
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
|
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
|
-
+
-
+
-
+
-
-
+
+
-
+
-
+
|
(common:simple-file-release-lock lock-file)))
(hash-table-set! *server-kind-run* areapath (list (+ call-num 1)(current-seconds))))))
;; this one seems to be the general entry point
;;
(define (server:start-and-wait areapath #!key (timeout 60))
(let ((give-up-time (+ (current-seconds) timeout)))
(let loop ((server-url (server:check-if-running areapath))
(let loop ((server-info (server:check-if-running areapath))
(try-num 0))
(if (or server-url
(if (or server-info
(> (current-seconds) give-up-time)) ;; server-url will be #f if no server available.
server-url
(server:record->url server-info)
(let ((num-ok (length (server:get-best (server:get-list areapath)))))
(if (and (> try-num 0) ;; first time through simply wait a little while then try again
(< num-ok 1)) ;; if there are no decent candidates for servers then try starting a new one
(server:kind-run areapath))
(thread-sleep! 5)
(loop (server:check-if-running areapath)
(+ try-num 1)))))))
(define server:try-running server:run) ;; there is no more per-run servers ;; REMOVE ME. BUG.
(define (server:get-num-servers #!key (numservers 2))
(let ((ns (string->number
(or (configf:lookup *configdat* "server" "numservers") "notanumber"))))
(or ns numservers)))
;; no longer care if multiple servers are started by accident. older servers will drop off in time.
;;
(define (server:check-if-running areapath) ;; #!key (numservers "2"))
(let* ((ns (server:get-num-servers))
(servers (server:get-best (server:get-list areapath))))
;; (print "servers: " servers " ns: " ns)
(if (or (and servers
(null? servers))
(not servers)
(and (list? servers)
(< (length servers) (random ns)))) ;; somewhere between 0 and numservers
#f
(let loop ((hed (car servers))
(tal (cdr servers)))
(let ((res (server:check-server hed)))
(if res
res
hed
(if (null? tal)
#f
(loop (car tal)(cdr tal)))))))))
;; ping the given server
;;
(define (server:check-server server-record)
(let* ((server-url (server:record->url server-record))
(server-id (server:record->id server-record))
(res (case *transport-type*
((http)(server:ping server-url))
((http)(server:ping server-url server-id))
;; ((nmsg)(nmsg-transport:ping (tasks:hostinfo-get-interface server)
)))
(if res
server-url
#f)))
(define (server:kill servr)
(match-let (((mod-time hostname port start-time pid)
servr))
(tasks:kill-server hostname pid)))
;; called in megatest.scm, host-port is string hostname:port
;;
;; NOTE: This is NOT called directly from clients as not all transports support a client running
;; in the same process as the server.
;;
(define (server:ping host-port-in #!key (do-exit #f))
(define (server:ping host-port-in server-id #!key (do-exit #f))
(let ((host:port (if (not host-port-in) ;; use read-dotserver to find
#f ;; (server:check-if-running *toppath*)
;; (if (number? host-port-in) ;; we were handed a server-id
;; (let ((srec (tasks:get-server-by-id (db:delay-if-busy (tasks:open-db)) host-port-in)))
;; ;; (print "srec: " srec " host-port-in: " host-port-in)
;; (if srec
;; (conc (vector-ref srec 3) ":" (vector-ref srec 4))
|
︙ | | |
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
|
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
|
-
+
|
(begin
(if host-port-in
(debug:print 0 *default-log-port* "ERROR: bad host:port"))
(if do-exit (exit 1))
#f)
(let* ((iface (car host-port))
(port (cadr host-port))
(server-dat (http-transport:client-connect iface port))
(server-dat (http-transport:client-connect iface port server-id))
(login-res (rmt:login-no-auto-client-setup server-dat)))
(if (and (list? login-res)
(car login-res))
(begin
;; (print "LOGIN_OK")
(if do-exit (exit 0))
#t)
|
︙ | | |
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
|
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
|
+
+
+
-
+
-
+
|
(delete-file* staging-file)
(let* ((start-time (current-milliseconds))
(res (system sync-cmd))
(dbbackupfile (conc mtdbfile ".backup"))
(res2
(cond
((eq? 0 res )
(handle-exceptions
exn
#f
(if (file-exists? dbbackupfile)
(delete-file* dbbackupfile)
)
(if (eq? 0 (file-size sync-log))
(delete-file sync-log))
(delete-file* sync-log))
(system (conc "/bin/mv " staging-file " " mtdbfile))
(set! last-sync-seconds (/ (- (current-milliseconds) start-time) 1000))
(set! off-time (calculate-off-time
last-sync-seconds
(cond
((and (number? duty-cycle) (> duty-cycle 0) (< duty-cycle 1))
duty-cycle)
(else
(debug:print 0 *default-log-port* "WARNING: ["(common:human-time)"] server.sync-duty-cycle is invalid. Should be a number between 0 and 1, but "duty-cycle" was specified. Using default value: "default-duty-cycle)
default-duty-cycle))))
(debug:print 1 *default-log-port* "INFO: ["(common:human-time)"] pid="(current-process-id)" SYNC took "last-sync-seconds" sec")
(debug:print 1 *default-log-port* "INFO: ["(common:human-time)"] pid="(current-process-id)" SYNC took "last-sync-seconds" sec ; with duty-cycle of "duty-cycle" off time is now "off-time)
'sync-completed)
'sync-completed))
(else
(system (conc "/bin/cp "sync-log" "sync-log".fail"))
(debug:print 0 *default-log-port* "ERROR: ["(common:human-time)"] Sync failed. See log at "sync-log".fail")
(if (file-exists? (conc mtdbfile ".backup"))
(system (conc "/bin/cp "mtdbfile ".backup " mtdbfile)))
#f))))
(common:simple-file-release-lock lockfile)
|
︙ | | |
Modified tasks.scm
from [b621e9649f]
to [7786cfb808].
︙ | | |
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
|
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
|
-
+
-
+
|
#f)))
(if step-id
(begin
(if pgdb-test-id
(begin
(if pgdb-step-id
(begin
(debug:print-info 1 *default-log-port* "Updating existing test-step with test-id: " test-id " and step-id " step-id " pgdb test id: " pgdb-test-id " pgdb step id " pgdb-step-id )
(debug:print-info 4 *default-log-port* "Updating existing test-step with test-id: " test-id " and step-id " step-id " pgdb test id: " pgdb-test-id " pgdb step id " pgdb-step-id )
(let* ((pgdb-last-update (pgdb:get-test-step-last-update dbh pgdb-step-id)))
(if (and (> last-update pgdb-last-update) (or (not smallest-time) (< last-update smallest-time)))
(hash-table-set! smallest-last-update-time "smallest-time" last-update)))
(pgdb:update-test-step dbh pgdb-step-id pgdb-test-id stepname state status event_time comment logfile last-update))
(begin
(debug:print-info 1 *default-log-port* "Inserting test-step with test-id: " test-id " and step-id " step-id " pgdb test id: " pgdb-test-id)
(debug:print-info 4 *default-log-port* "Inserting test-step with test-id: " test-id " and step-id " step-id " pgdb test id: " pgdb-test-id)
(if (or (not smallest-time) (< last-update smallest-time))
(hash-table-set! smallest-last-update-time "smallest-time" last-update))
(pgdb:insert-test-step dbh pgdb-test-id stepname state status event_time comment logfile last-update )
(set! pgdb-step-id (pgdb:get-test-step-id dbh pgdb-test-id stepname state))))
(hash-table-set! step-ht step-id pgdb-step-id ))
(debug:print-info 1 *default-log-port* "Error: Test not cashed")))
(debug:print-info 1 *default-log-port* "Error: Could not get test step info for step id " test-step-id )))) ;; this is a wierd senario need to debug
|
︙ | | |
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
|
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
|
-
+
-
+
|
#f)))
(if data-id
(begin
(if pgdb-test-id
(begin
(if pgdb-data-id
(begin
(debug:print-info 1 *default-log-port* "Updating existing test-data with test-id: " test-id " and data-id " data-id " pgdb test id: " pgdb-test-id " pgdb data id " pgdb-data-id)
(debug:print-info 4 *default-log-port* "Updating existing test-data with test-id: " test-id " and data-id " data-id " pgdb test id: " pgdb-test-id " pgdb data id " pgdb-data-id)
(let* ((pgdb-last-update (pgdb:get-test-data-last-update dbh pgdb-data-id)))
(if (and (> last-update pgdb-last-update) (or (not smallest-time) (< last-update smallest-time)))
(hash-table-set! smallest-last-update-time "smallest-time" last-update)))
(pgdb:update-test-data dbh pgdb-data-id pgdb-test-id category variable value expected tol units comment status type last-update))
(begin
(debug:print-info 1 *default-log-port* "Inserting test-data with test-id: " test-id " and data-id " data-id " pgdb test id: " pgdb-test-id)
(debug:print-info 4 *default-log-port* "Inserting test-data with test-id: " test-id " and data-id " data-id " pgdb test id: " pgdb-test-id)
(if (handle-exceptions
exn
(begin (print-call-chain)
(print ((condition-property-accessor 'exn 'message) exn))
#f)
(pgdb:insert-test-data dbh pgdb-test-id category variable value expected tol units comment status type last-update))
|
︙ | | |
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
|
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
|
-
+
-
+
|
;; "id" "run_id" "testname" "state" "status" "event_time"
;; "host" "cpuload" "diskfree" "uname" "rundir" "item_path"
;; "run_duration" "final_logf" "comment" "shortdir" "attemptnum" "archived"
(if pgdb-run-id
(begin
(if pgdb-test-id ;; have a record
(begin ;; let ((key-name (conc run-id "/" test-name "/" item-path)))
(debug:print-info 0 *default-log-port* "Updating existing test with run-id: " run-id " and test-id: " test-id " pgdb run id: " pgdb-run-id " pgdb-test-id " pgdb-test-id)
(debug:print-info 4 *default-log-port* "Updating existing test with run-id: " run-id " and test-id: " test-id " pgdb run id: " pgdb-run-id " pgdb-test-id " pgdb-test-id)
(let* ((pgdb-last-update (pgdb:get-test-last-update dbh pgdb-test-id)))
(if (and (> last-update pgdb-last-update) (or (not smallest-time) (< last-update smallest-time))) ;;if last-update is same as pgdb-last-update then it is safe to assume the records are identical and we can use a larger last update time.
(hash-table-set! smallest-last-update-time "smallest-time" last-update)))
(pgdb:update-test dbh pgdb-test-id pgdb-run-id test-name item-path state status host cpuload diskfree uname run-dir log-file run-duration comment event-time archived last-update pid))
(begin
(debug:print-info 0 *default-log-port* "Inserting test with run-id: " run-id " and test-id: " test-id " pgdb run id: " pgdb-run-id)
(debug:print-info 4 *default-log-port* "Inserting test with run-id: " run-id " and test-id: " test-id " pgdb run id: " pgdb-run-id)
(pgdb:insert-test dbh pgdb-run-id test-name item-path state status host cpuload diskfree uname run-dir log-file run-duration comment event-time archived last-update pid)
(if (or (not smallest-time) (< last-update smallest-time))
(hash-table-set! smallest-last-update-time "smallest-time" last-update))
(set! pgdb-test-id (pgdb:get-test-id dbh pgdb-run-id test-name item-path))))
(hash-table-set! test-ht test-id pgdb-test-id))
(debug:print-info 1 *default-log-port* "WARNING: Skipping run with run-id:" run-id ". This run was created after privious sync and removed before this sync."))))
test-ids)))
|
︙ | | |