Megatest

Diff
Login

Differences From Artifact [1ca825cede]:

To Artifact [a6f9fa170f]:


203
204
205
206
207
208
209



210
211
212







213
214
215
216




217
218
219
220
221
222
223
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222




223
224
225
226
227
228
229
230
231
232
233







+
+
+



+
+
+
+
+
+
+
-
-
-
-
+
+
+
+







				   run-id)))))
    (if conn
	(begin 
          (debug:print-info 2 *default-log-port* "already connected to a server for " dbfname)
           conn) ;; we are already connected to the server

	;; no conn

	;; find server with lowest number of threads running (i.e. lowest load)
	;;
        (let* ((sdats (tt:get-server-info-sorted ttdat dbfname))
	       (sdat  (if (null? sdats)
			  #f
			  ;; choose server with lowest threads count
			  (car (sort sdats
				     (lambda (a b)
				       (let* ((load-a (tt:get-server-threads a))
					      (load-b (tt:get-server-threads b)))
					 (< load-a load-b))))))))
				     
			  (let ((indx (max (random (- (length sdats) 1)) 0)))
			    (list-ref sdats indx)))))
	   (debug:print-info 2 *default-log-port* "found sdat " sdat)
           (match sdat
	  ;; (let ((indx (max (random (- (length sdats) 1)) 0)))
	  ;;    (list-ref sdats indx)))))
	  (debug:print-info 1 *default-log-port* "found sdat " sdat" from sdats: "sdats)
          (match sdat
	    ((host port start-time server-id pid dbfname2 servinffile)
	     (assert (equal? dbfname dbfname2) "FATAL: read server info from wrong file.")
             (debug:print-info 2 *default-log-port* "no conn - in match servinffile:" servinffile)
	     (let* ((host-port (conc host":"port))
		    (conn (make-tt-conn
			   host: host
			   port: port
274
275
276
277
278
279
280























281
282
283
284
285
286
287
288
289
290
291
292
293
294

295
296
297
298
299

300

301
302
303
304
305
306
307
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334

335
336
337
338
339
340
341
342







+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+














+





+
-
+








;; returns ( result . ping_time )
(define (tt:timed-ping host port server-id)
  (let* ((start-time (current-milliseconds))
	 (result     (tt:ping host port server-id)))
    (cons result (- (current-milliseconds) start-time))))
    
(define *server-load* (make-hash-table))

(define (tt:save-server-meta host port meta)
  (hash-table-set! *server-load* (conc host":"port) meta))

(define (tt:get-server-threads dat)
  (let* ((host (car  dat))
	 (port (cadr dat))
	 (meta (tt:get-server-meta host port #t)))
    (if (list? meta)
	(alist-ref 'sload meta)
	#f)))
 
;; lazy get, does not auto-refresh meta, this might be a problem
;;
(define (tt:get-server-meta host port #!optional (do-ping #f))
  (let* ((meta (hash-table-ref/default *server-load* (conc host":"port) #f)))
    (if (and (not meta)
	     do-ping)
	(begin
	  (tt:timed-ping host port #f)
	  (hash-table-ref/default *server-load* (conc host":"port) #f))
	meta)))

(define (tt:ping host port server-id #!optional (tries-left 5))
  (let*  ((res      (tt:send-receive-direct host port `(ping #f #f #f) ping-mode: #t)) ;; please send me your server-id
	  (try-again (lambda ()
		       (if (> tries-left 0)
			   (begin
			     (thread-sleep! 1)
			     (tt:ping host port server-id (- tries-left 1)))
			   #f))))
    ;;
    ;; need two threads, one a 5 second timer
    ;;
    (match res
      ((status errmsg result meta)
       (tt:save-server-meta host port meta)
       (if (equal? result server-id)
	   (let* ((server-state (alist-ref 'sstate meta)))
	     ;; (debug:print 0 *default-log-port* "Ping to "host":"port" successful.")
	     (or server-state 'unk)) ;; then we are good
	   (begin
	     (if server-id
	     (debug:print 0 *default-log-port* "WARNING: server-id does not match, expected: "server-id", got: "result)
		 (debug:print 0 *default-log-port* "WARNING: server-id does not match, expected: "server-id", got: "result))
	     #f)))
      (else
       ;; (debug:print 0 *default-log-port* "res not in form (status errmsg result meta), got: "res)
       (try-again)))))

;; client side handler
;;
385
386
387
388
389
390
391


392
393
394
395
396
397
398
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435







+
+







		   )))))
	(begin
	  (thread-sleep! 1) ;; no conn yet set up, give it a rest and try again
	  (tt:handler ttdat cmd run-id params attemptnum readonly-mode dbfname testsuite mtexe server-start-proc)))))

;; gets server info and appends path to server file
;; sorts by age, --oldest-- now newest first
;;
;; move the ping here?
;;
;; returns list of (host port startseconds server-id servinfofile)
;;
(define (tt:get-server-info-sorted ttdat dbfname)
  (let* ((areapath (tt-areapath ttdat))
	 (sfiles   (tt:find-server areapath dbfname))
	 (sdats    (filter car (map tt:server-get-info sfiles))) ;; first element is #f if the file disappeared while being read
598
599
600
601
602
603
604

605
606
607
608
609
610
611
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649







+







			      (loop (cdr servrs) prime-host result)) ;; drop 
			     )))))
	       (home-host (if (null? good-srvrs)
			      #f
			      (caar good-srvrs))))
	  ;; by here we have a trustworthy list of servers and we have removed the .servinfo file for any unresponsive servers
	  ;; and the list is in good-srvrs
	  ;;
	  (cond
	   ((not home-host) ;; no servers yet, go ahead and start
	    (debug:print-info 0 *default-log-port* "No servers yet, starting on "(get-host-name)))
	   ((> (length good-srvrs) 3) ;; don't need more, just exit
	    (debug:print-info 0 *default-log-port* "Have "(length good-srvrs)", no need for more, exiting.")
	    (exit))
	   ((not (equal? home-host (get-host-name))) ;; there is a home-host and we are not on it
763
764
765
766
767
768
769


770

771
772
773
774
775
776
777
801
802
803
804
805
806
807
808
809

810
811
812
813
814
815
816
817







+
+
-
+







		(debug:print 0 *default-log-port* "WARNING: file "servinf" was not created.")
		(loop (+ count 1))))))
    serv-id))

;; find valid server
;; get servers listed, last part of name must match :<dbfname>
;; if more than one, wait one second and look again
;; 
;; NOTE: this only gets the servinfo data, no network activity here
;; future: ping oldest, if alive remove other :<dbfname> files
;;       i.e. no ping etc.
;;
(define (tt:find-server areapath dbfname)
  (let* ((servdir  (tt:get-servinfo-dir areapath))
	 (sfiles   (glob (conc servdir"/*:"dbfname)))
	 (goodfiles '()))

    ;; filter the files here by looking in processes table (if we are not main.db)