Megatest

Diff
Login

Differences From Artifact [7b2af2dc7e]:

To Artifact [d960326e6f]:


91
92
93
94
95
96
97

98
99
100
101
102
103
104
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105







+







	    
;; Generate a unique signature for this server
(define (server:mk-signature)
  (message-digest-string (md5-primitive) 
			 (with-output-to-string
			   (lambda ()
			     (write (list (current-directory)
                                          (current-process-id)
					  (argv)))))))

;; When using zmq this would send the message back (two step process)
;; with spiffy or rpc this simply returns the return data to be returned
;; 
(define (server:reply return-addr query-sig success/fail result)
  (debug:print-info 11 *default-log-port* "server:reply return-addr=" return-addr ", result=" result)
161
162
163
164
165
166
167

168

169
170

171
172
173
174
175

176
177
178
179
180
181
182
183
184
185
186

187
188
189
190
191



192
193
194
195
196
197
198
162
163
164
165
166
167
168
169

170
171

172
173
174
175
176

177
178
179
180
181
182
183
184
185
186
187

188
189
190
191


192
193
194
195
196
197
198
199
200
201







+
-
+

-
+




-
+










-
+



-
-
+
+
+







    (system (conc "nbfake " cmdln))
    (unsetenv "TARGETHOST_LOGF")
    (if (get-environment-variable "TARGETHOST")(unsetenv "TARGETHOST"))
    (thread-join! log-rotate)
    (pop-directory)))

;; given a path to a server log return: host port startseconds
;; any changes to number of elements returned by this fuction will dirctly affect server:record->url,server:record->id,server:kill,server:get-num-alive which uses match let 
;;

(define (server:logf-get-start-info logf)
  (let ((rx (regexp "^SERVER STARTED: (\\S+):(\\d+) AT ([\\d\\.]+)"))) ;; SERVER STARTED: host:port AT timesecs
  (let ((rx (regexp "^SERVER STARTED: (\\S+):(\\d+) AT ([\\d\\.]+) server-id: (\\S+)"))) ;; SERVER STARTED: host:port AT timesecs server id
    (handle-exceptions
	exn
      (begin
	(print "failed to get server info from " logf ", exn=" exn)
	(list #f #f #f)) ;; no idea what went wrong, call it a bad server
	(list #f #f #f #f)) ;; no idea what went wrong, call it a bad server
      (with-input-from-file
	  logf
	(lambda ()
	  (let loop ((inl  (read-line))
		     (lnum 0))
	    (if (not (eof-object? inl))
		(let ((mlst (string-match rx inl)))
		  (if (not mlst)
		      (if (< lnum 500) ;; give up if more than 500 lines of server log read
			  (loop (read-line)(+ lnum 1))
			  (list #f #f #f))
			  (list #f #f #f #f))
		      (let ((dat  (cdr mlst)))
			(list (car dat) ;; host
			      (string->number (cadr dat)) ;; port
			      (string->number (caddr dat))))))
		(list #f #f #f))))))))
			      (string->number (caddr dat))
                              (cadr (cddr dat))))))
		(list #f #f #f #f))))))))

;; get a list of servers with all relevant data
;; ( mod-time host port start-time pid )
;;
(define (server:get-list areapath #!key (limit #f))
  (let ((fname-rx    (regexp "^(|.*/)server-(\\d+)-(\\S+).log$"))
	(day-seconds (* 24 60 60)))
215
216
217
218
219
220
221
222

223
224
225
226
227
228
229
230
231
232
233
234
235

236
237
238
239
240
241
242
243
244
245
246
247

248
249
250
251
252
253
254
218
219
220
221
222
223
224

225
226
227
228
229
230
231
232
233
234
235
236
237

238
239
240
241
242
243
244
245
246
247
248
249

250
251
252
253
254
255
256
257







-
+












-
+











-
+







	      '()
	      (let loop ((hed  (car server-logs))
			 (tal  (cdr server-logs))
			 (res '()))
		(let* ((mod-time  (handle-exceptions
				   exn
				   (begin
				     (print "failed to get modification time on " hed ", exn=" exn)
				     (debug:print 0 *default-log-port* "failed to get modification time on " hed ", exn=" exn)
				     (current-seconds)) ;; 0
				   (file-modification-time hed))) ;; default to *very* old so log gets ignored if deleted
		       (down-time (- (current-seconds) mod-time))
		       (serv-dat  (if (or (< num-serv-logs 10)
				  	  (< down-time 900)) ;; day-seconds))
				      (server:logf-get-start-info hed)
				      '())) ;; don't waste time processing server files not touched in the 15 minutes if there are more than ten servers to look at
		       (serv-rec (cons mod-time serv-dat))
		       (fmatch   (string-match fname-rx hed))
		       (pid      (if fmatch (string->number (list-ref fmatch 2)) #f))
		       (new-res  (if (null? serv-dat)
				     res
				     (cons (append serv-rec (list pid)) res))))
				     (cons (append serv-rec (list pid)) res)))) ;; any changes to number of elements in new-res will dirctly affect server:record->url,server:record->id,server:kill,server:get-num-alive which uses match let 
		  (if (null? tal)
		      (if (and limit
			       (> (length new-res) limit))
			  new-res ;; (take new-res limit)  <= need intelligent sorting before this will work
			  new-res)
		      (loop (car tal)(cdr tal) new-res)))))))))

(define (server:get-num-alive srvlst)
  (let ((num-alive 0))
    (for-each
     (lambda (server)
       (match-let (((mod-time host port start-time pid)
       (match-let (((mod-time host port start-time server-id pid)
		    server))
	 (let* ((uptime  (- (current-seconds) mod-time))
		(runtime (if start-time
			     (- mod-time start-time)
			     0)))
	   (if (< uptime 5)(set! num-alive (+ num-alive 1))))))
     srvlst)
303
304
305
306
307
308
309






310
311
312

313
314
315
316
317
318
319
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320

321
322
323
324
325
326
327
328







+
+
+
+
+
+


-
+







    (if (and (list? srvrs)
	     (not (null? srvrs)))
	(let* ((len (length srvrs))
	       (idx (random len)))
	  (list-ref srvrs idx))
	#f)))

(define (server:record->id servr)
  (match-let (((mod-time host port start-time server-id pid)
	       servr))
    (if server-id
	server-id
	#f)))

(define (server:record->url servr)
  (match-let (((mod-time host port start-time pid)
  (match-let (((mod-time host port start-time server-id pid)
	       servr))
    (if (and host port)
	(conc host ":" port)
	#f)))

(define (server:get-client-signature) ;; BB> why is this proc named "get-"?  it returns nothing -- set! has not return value.
  (if *my-client-signature* *my-client-signature*
376
377
378
379
380
381
382
383

384
385

386
387

388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419

420
421
422
423
424
425
426
427

428
429

430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446

447
448
449
450
451
452
453
385
386
387
388
389
390
391

392
393

394
395

396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416

417
418
419
420
421
422
423
424
425
426

427
428
429
430
431
432
433
434
435
436
437

438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454

455
456
457
458
459
460
461
462







-
+

-
+

-
+




















-










-
+








+

-
+
















-
+







		  (common:simple-file-release-lock lock-file)))
	(hash-table-set! *server-kind-run* areapath (list (+ call-num 1)(current-seconds))))))

;; this one seems to be the general entry point
;;
(define (server:start-and-wait areapath #!key (timeout 60))
  (let ((give-up-time (+ (current-seconds) timeout)))
    (let loop ((server-url (server:check-if-running areapath))
    (let loop ((server-info (server:check-if-running areapath))
	       (try-num    0))
      (if (or server-url
      (if (or server-info
	      (> (current-seconds) give-up-time)) ;; server-url will be #f if no server available.
	  server-url
	  (server:record->url server-info)
	  (let ((num-ok (length (server:get-best (server:get-list areapath)))))
	    (if (and (> try-num 0)  ;; first time through simply wait a little while then try again
		     (< num-ok 1))  ;; if there are no decent candidates for servers then try starting a new one
		(server:kind-run areapath))
	    (thread-sleep! 5)
	    (loop (server:check-if-running areapath)
		  (+ try-num 1)))))))

(define server:try-running server:run) ;; there is no more per-run servers ;; REMOVE ME. BUG.

(define (server:get-num-servers #!key (numservers 2))
  (let ((ns (string->number
	     (or (configf:lookup *configdat* "server" "numservers") "notanumber"))))
    (or ns numservers)))

;; no longer care if multiple servers are started by accident. older servers will drop off in time.
;;
(define (server:check-if-running areapath) ;;  #!key (numservers "2"))
  (let* ((ns            (server:get-num-servers))
	 (servers       (server:get-best (server:get-list areapath))))
    ;; (print "servers: " servers " ns: " ns)
    (if (or (and servers
		 (null? servers))
	    (not servers)
	    (and (list? servers)
		 (< (length servers) (random ns)))) ;; somewhere between 0 and numservers
        #f
        (let loop ((hed (car servers))
                   (tal (cdr servers)))
          (let ((res (server:check-server hed)))
            (if res
                res
                hed
                (if (null? tal)
                    #f
                    (loop (car tal)(cdr tal)))))))))

;; ping the given server
;;
(define (server:check-server server-record)
  (let* ((server-url (server:record->url server-record))
         (server-id (server:record->id server-record)) 
         (res        (case *transport-type*
                       ((http)(server:ping server-url))
                       ((http)(server:ping server-url server-id))
                       ;; ((nmsg)(nmsg-transport:ping (tasks:hostinfo-get-interface server)
                       )))
    (if res
        server-url
	#f)))

(define (server:kill servr)
  (match-let (((mod-time hostname port start-time pid)
	       servr))
    (tasks:kill-server hostname pid)))

;; called in megatest.scm, host-port is string hostname:port
;;
;; NOTE: This is NOT called directly from clients as not all transports support a client running
;;       in the same process as the server.
;;
(define (server:ping host-port-in #!key (do-exit #f))
(define (server:ping host-port-in server-id #!key (do-exit #f))
  (let ((host:port (if (not host-port-in) ;; use read-dotserver to find
		       #f ;; (server:check-if-running *toppath*)
		;; (if (number? host-port-in) ;; we were handed a server-id
		;; 	   (let ((srec (tasks:get-server-by-id (db:delay-if-busy (tasks:open-db)) host-port-in)))
		;; 	     ;; (print "srec: " srec " host-port-in: " host-port-in)
		;; 	     (if srec
		;; 		 (conc (vector-ref srec 3) ":" (vector-ref srec 4))
465
466
467
468
469
470
471
472

473
474
475
476
477
478
479
474
475
476
477
478
479
480

481
482
483
484
485
486
487
488







-
+







	  (begin
	    (if host-port-in
		(debug:print 0 *default-log-port*  "ERROR: bad host:port"))
	    (if do-exit (exit 1))
	    #f)
	  (let* ((iface      (car host-port))
		 (port       (cadr host-port))
		 (server-dat (http-transport:client-connect iface port))
		 (server-dat (http-transport:client-connect iface port server-id))
		 (login-res  (rmt:login-no-auto-client-setup server-dat)))
	    (if (and (list? login-res)
		     (car login-res))
		(begin
		  ;; (print "LOGIN_OK")
		  (if do-exit (exit 0))
		  #t)
587
588
589
590
591
592
593



594
595
596
597
598

599
600
601
602
603
604
605
606
607
608
609
610
611
612
613

614
615
616
617
618
619
620
596
597
598
599
600
601
602
603
604
605
606
607
608
609

610
611
612
613
614
615
616
617
618
619
620
621
622
623
624

625
626
627
628
629
630
631
632







+
+
+




-
+














-
+







		               (delete-file* staging-file)
		               (let* ((start-time (current-milliseconds))
                                      (res (system sync-cmd))
                                      (dbbackupfile (conc mtdbfile ".backup"))
                                      (res2 
                                       (cond
                                        ((eq? 0 res )
                                         (handle-exceptions
                                            exn
                                            #f
                                         (if (file-exists? dbbackupfile)
		                           (delete-file* dbbackupfile)
                                         )
                                         (if (eq? 0 (file-size sync-log))
                                             (delete-file sync-log))
                                             (delete-file* sync-log))
		                         (system (conc "/bin/mv " staging-file " " mtdbfile))
                                         
                                         (set! last-sync-seconds (/ (- (current-milliseconds) start-time) 1000))
                                         (set! off-time (calculate-off-time
                                                         last-sync-seconds
                                                         (cond
                                                          ((and (number? duty-cycle) (> duty-cycle 0) (< duty-cycle 1))
                                                           duty-cycle)
                                                          (else
                                                           (debug:print 0 *default-log-port* "WARNING: ["(common:human-time)"] server.sync-duty-cycle is invalid.  Should be a number between 0 and 1, but "duty-cycle" was specified.  Using default value: "default-duty-cycle)
                                                           default-duty-cycle))))
                                         
                                         (debug:print 1 *default-log-port* "INFO: ["(common:human-time)"] pid="(current-process-id)" SYNC took "last-sync-seconds" sec")
                                         (debug:print 1 *default-log-port* "INFO: ["(common:human-time)"] pid="(current-process-id)" SYNC took "last-sync-seconds" sec ; with duty-cycle of "duty-cycle" off time is now "off-time)
                                         'sync-completed)
                                         'sync-completed))
                                        (else
                                         (system (conc "/bin/cp "sync-log" "sync-log".fail"))
                                         (debug:print 0 *default-log-port* "ERROR: ["(common:human-time)"] Sync failed. See log at "sync-log".fail")
                                         (if (file-exists? (conc mtdbfile ".backup"))
                                             (system (conc "/bin/cp "mtdbfile ".backup " mtdbfile)))
                                         #f))))
                                 (common:simple-file-release-lock lockfile)