Index: api.scm ================================================================== --- api.scm +++ api.scm @@ -248,11 +248,13 @@ 0))) (set! *api-process-request-count* newcount) (set! *db-last-access* (current-seconds)) (match indat ((cmd run-id params meta) - (let* ((status (cond + (let* ((ttdat *server-info*) + (server-state (tt-state ttdat)) + (status (cond ((> newcount 30) 'busy) ((> newcount 15) 'loaded) (else 'ok))) (errmsg (case status ((busy) (conc "Server overloaded, "newcount" threads in flight")) @@ -264,11 +266,13 @@ (else (case cmd ((ping) *server-signature*) (else (api:dispatch-request dbstruct cmd run-id params)))))) - (meta `((wait . ,delay-wait))) + (meta (case cmd + ((ping) `((sstate . ,server-state))) + (else `((wait . ,delay-wait))))) (payload (list status errmsg result meta))) (set! *api-process-request-count* (- *api-process-request-count* 1)) (serialize payload))) (else (assert #f "FATAL: failed to deserialize indat "indat)))))) Index: tcp-transportmod.scm ================================================================== --- tcp-transportmod.scm +++ tcp-transportmod.scm @@ -84,10 +84,11 @@ (defstruct tt ;; client related (conns (make-hash-table)) ;; dbfname -> conn ;; server related + (state 'starting) (areapath #f) (host #f) (port #f) (conn #f) (cleanup-proc #f) @@ -101,10 +102,13 @@ (last-access (current-seconds)) (servinf-file #f) (last-serv-start 0) ) +;; make ttdat visible +(define *server-info* #f) + (define (tt:make-remote areapath) (make-tt areapath: areapath)) ;; 1 ... or #f (define (tt:valid-run-id run-id) @@ -139,20 +143,26 @@ server-id: server-id server-start: start-time pid: pid))) (hash-table-set! (tt-conns ttdat) dbfname conn) ;; verify we can talk to this server - (if (tt:ping host port server-id) - conn - (let* ((curr-secs (current-seconds))) - ;; rm the (last server) would go here - (if (> (- curr-secs (tt-last-serv-start ttdat)) 10) - (begin - (tt-last-serv-start-set! ttdat curr-secs) - (server-start-proc))) ;; don't try and start server unless 30 sec has gone by since last attempt - (thread-sleep! 1) - (tt:client-connect-to-server ttdat dbfname run-id testsuite))))) + (let* ((ping-res (tt:ping host port server-id))) + (case ping-res + ((running) conn) + ((starting) + (thread-sleep! 0.5) + (tt:client-connect-to-server ttdat dbfname run-id testsuite)) + (else + (let* ((curr-secs (current-seconds))) + ;; rm the (last server) would go here + (if (> (- curr-secs (tt-last-serv-start ttdat)) 10) + (begin + (tt-last-serv-start-set! ttdat curr-secs) + (server-start-proc))) ;; start server if 30 sec since last attempt + (thread-sleep! 1) + (tt:client-connect-to-server ttdat dbfname run-id testsuite))))))) + (else (if (> (- (current-seconds) (tt-last-serv-start ttdat)) 5) ;; really do not want to swamp the machine with servers (begin (debug:print-info 0 *default-log-port* "No server found. Starting one for run-id "run-id" in dbfile "dbfname) (server-start-proc) @@ -166,13 +176,13 @@ ;; need two threads, one a 5 second timer ;; (match res ((status errmsg result meta) (if (equal? result server-id) - (begin + (let* ((server-state (alist-ref 'sstate meta))) ;; (debug:print 0 *default-log-port* "Ping to "host":"port" successful.") - #t) ;; then we are good + (or server-state 'unk)) ;; then we are good (begin (debug:print 0 *default-log-port* "WARNING: server-id does not match, expected: "server-id", got: "result) #f))) (else ;; (debug:print 0 *default-log-port* "res not in form (status errmsg result meta), got: "res) @@ -353,10 +363,11 @@ (if (tt-cleanup-proc ttdat) ((tt-cleanup-proc ttdat))) (dbfile:with-no-sync-db nosyncdbpath (lambda (db) (db:no-sync-del! db dbfname)))))) + (set! *server-info* ttdat) (let loop ((count 0)) (if (> count 240) (begin (debug:print 0 *default-log-port* "FATAL: Could not start a tcp server, giving up.") (exit 1)) @@ -385,10 +396,11 @@ (lambda (db) (db:no-sync-get-lock db dbfname)))) (success (car lockinfo))) (if success (begin + (tt-state-set! ttdat 'running) (debug:print 0 *default-log-port* "Got server lock for "dbfname) (set! db-locked-in #t) #t) (begin (debug:print 0 *default-log-port* "Failed to get server lock for "dbfname) @@ -470,10 +482,11 @@ ;; ;; (define (tt:shutdown-server ttdat) (let* ((cleanproc (tt-cleanup-proc ttdat))) + (tt-state-set! ttdat 'shutdown) (if cleanproc (cleanproc)) (tcp-close (tt-socket ttdat)) ;; close up ports here )) ;; (define (wait-and-close uconn)