Index: api.scm ================================================================== --- api.scm +++ api.scm @@ -273,11 +273,11 @@ ;; (thread-sleep! 0.5)) (normal-proc cmd run-id params)) (else (normal-proc cmd run-id params)))) (meta (case cmd - ((ping) `((sstate . ,server-state))) + ((ping) `((sstate . ,server-state)(sload . ,numthreads))) (else `((wait . ,delay-wait))))) (payload (list status errmsg result meta))) ;; (cmd run-id params meta) (db:add-stats cmd run-id params (- (current-milliseconds) start-t)) payload)) Index: tcp-transportmod.scm ================================================================== --- tcp-transportmod.scm +++ tcp-transportmod.scm @@ -205,17 +205,27 @@ (begin (debug:print-info 2 *default-log-port* "already connected to a server for " dbfname) conn) ;; we are already connected to the server ;; no conn + + ;; find server with lowest number of threads running (i.e. lowest load) + ;; (let* ((sdats (tt:get-server-info-sorted ttdat dbfname)) (sdat (if (null? sdats) #f - (let ((indx (max (random (- (length sdats) 1)) 0))) - (list-ref sdats indx))))) - (debug:print-info 2 *default-log-port* "found sdat " sdat) - (match sdat + ;; choose server with lowest threads count + (car (sort sdats + (lambda (a b) + (let* ((load-a (tt:get-server-threads a)) + (load-b (tt:get-server-threads b))) + (< load-a load-b)))))))) + + ;; (let ((indx (max (random (- (length sdats) 1)) 0))) + ;; (list-ref sdats indx))))) + (debug:print-info 1 *default-log-port* "found sdat " sdat" from sdats: "sdats) + (match sdat ((host port start-time server-id pid dbfname2 servinffile) (assert (equal? dbfname dbfname2) "FATAL: read server info from wrong file.") (debug:print-info 2 *default-log-port* "no conn - in match servinffile:" servinffile) (let* ((host-port (conc host":"port)) (conn (make-tt-conn @@ -276,10 +286,33 @@ (define (tt:timed-ping host port server-id) (let* ((start-time (current-milliseconds)) (result (tt:ping host port server-id))) (cons result (- (current-milliseconds) start-time)))) +(define *server-load* (make-hash-table)) + +(define (tt:save-server-meta host port meta) + (hash-table-set! *server-load* (conc host":"port) meta)) + +(define (tt:get-server-threads dat) + (let* ((host (car dat)) + (port (cadr dat)) + (meta (tt:get-server-meta host port #t))) + (if (list? meta) + (alist-ref 'sload meta) + #f))) + +;; lazy get, does not auto-refresh meta, this might be a problem +;; +(define (tt:get-server-meta host port #!optional (do-ping #f)) + (let* ((meta (hash-table-ref/default *server-load* (conc host":"port) #f))) + (if (and (not meta) + do-ping) + (begin + (tt:timed-ping host port #f) + (hash-table-ref/default *server-load* (conc host":"port) #f)) + meta))) (define (tt:ping host port server-id #!optional (tries-left 5)) (let* ((res (tt:send-receive-direct host port `(ping #f #f #f) ping-mode: #t)) ;; please send me your server-id (try-again (lambda () (if (> tries-left 0) @@ -290,16 +323,18 @@ ;; ;; need two threads, one a 5 second timer ;; (match res ((status errmsg result meta) + (tt:save-server-meta host port meta) (if (equal? result server-id) (let* ((server-state (alist-ref 'sstate meta))) ;; (debug:print 0 *default-log-port* "Ping to "host":"port" successful.") (or server-state 'unk)) ;; then we are good (begin - (debug:print 0 *default-log-port* "WARNING: server-id does not match, expected: "server-id", got: "result) + (if server-id + (debug:print 0 *default-log-port* "WARNING: server-id does not match, expected: "server-id", got: "result)) #f))) (else ;; (debug:print 0 *default-log-port* "res not in form (status errmsg result meta), got: "res) (try-again))))) @@ -387,10 +422,12 @@ (thread-sleep! 1) ;; no conn yet set up, give it a rest and try again (tt:handler ttdat cmd run-id params attemptnum readonly-mode dbfname testsuite mtexe server-start-proc))))) ;; gets server info and appends path to server file ;; sorts by age, --oldest-- now newest first +;; +;; move the ping here? ;; ;; returns list of (host port startseconds server-id servinfofile) ;; (define (tt:get-server-info-sorted ttdat dbfname) (let* ((areapath (tt-areapath ttdat)) @@ -600,10 +637,11 @@ (home-host (if (null? good-srvrs) #f (caar good-srvrs)))) ;; by here we have a trustworthy list of servers and we have removed the .servinfo file for any unresponsive servers ;; and the list is in good-srvrs + ;; (cond ((not home-host) ;; no servers yet, go ahead and start (debug:print-info 0 *default-log-port* "No servers yet, starting on "(get-host-name))) ((> (length good-srvrs) 3) ;; don't need more, just exit (debug:print-info 0 *default-log-port* "Have "(length good-srvrs)", no need for more, exiting.") @@ -765,11 +803,13 @@ serv-id)) ;; find valid server ;; get servers listed, last part of name must match : ;; if more than one, wait one second and look again -;; future: ping oldest, if alive remove other : files +;; +;; NOTE: this only gets the servinfo data, no network activity here +;; i.e. no ping etc. ;; (define (tt:find-server areapath dbfname) (let* ((servdir (tt:get-servinfo-dir areapath)) (sfiles (glob (conc servdir"/*:"dbfname))) (goodfiles '()))