Index: api.scm ================================================================== --- api.scm +++ api.scm @@ -238,16 +238,69 @@ *api-threads*))) (define (api:get-count-threads-alive) (length *api-threads*)) - -;; indat is (cmd run-id params meta) +;; ========================================================================================================================================== +;; api:tcp-dispatch-request-make-handler is a complex TCP request handler that manages server load, dispatches requests, and ensures that the +;; server's state is consistent with the incoming requests. It is designed to be used in a concurrent environment where multiple requests are +;; being handled simultaneously. +;; +;; It processes incoming requests and dispatches them accordingly. +;; The function takes a dbstruct argument, which is a structure representing the database. +;; +;; 1. The function asserts that global variable `*toppath*` is set. +;; +;; 2. It checks if `*server-signature*` is not set, and if so, it sets it using the `tt:mk-signature` function with `*toppath*` as an argument. +;; The `*server-signature*` is used to identify the server instance. +;; +;; 3. The function returns a lambda function that takes `indat` as an argument. indat is (cmd run-id params meta) This lambda is the actual +;; request handler that will be called with the incoming data. +;; +;; 4. Inside the lambda, the current thread is registered with `api:register-thread`. +;; +;; 5. Several local variables are initialized: +;; - `newcount`: A counter for the number of requests being processed. +;; - `numthreads`: The number of alive threads handling requests. +;; - `delay-wait`: A calculated delay based on the number of requests. +;; +;; 6. A `normal-proc` lambda is defined to handle the incoming command (`cmd`), `run-id`, and `params`. It uses a `case` statement to handle +;; different commands. If the command is "ping", it returns the server signature. Otherwise, it dispatches the request using +;; `api:dispatch-request`. +;; +;; 7. The function updates the `*api-process-request-count*` and `*db-last-access*` global variables. +;; +;; 8. It checks if the number of requests (`newcount`) does not match the number of threads (`numthreads`) and performs cleanup and debugging +;; if necessary. +;; +;; 9. The `match` expression is used to destructure `indat` into its components (`cmd`, `run-id`, `params`, `meta`). +;; +;; 10. Several local variables are set based on the destructured data and the current server state: +;; - `db-ok`: Checks if the database file name matches the expected one for the given `run-id`. +;; - `ttdat`: Retrieves server information. +;; - `server-state`: Gets the current state of the server. +;; - `status`: Determines the server's load status based on `newcount`. +;; - `errmsg`: Generates an error message based on the server's status. +;; - `result`: Processes the command based on the server's status. +;; +;; 11. The `meta` variable is updated with additional information based on the command. +;; +;; 12. The `payload` is constructed, which includes the status, error message, result, and meta information. +;; +;; 13. The `*api-process-request-count*` is decremented, as the request has been processed. +;; +;; 14. The current thread is unregistered with `api:unregister-thread`. +;; +;; 15. Finally, the `payload` is returned, which would be the response to the incoming request. +;; +;; Nothing should be printed within the lambda because it interacts with the current input/output ports, which could interfere with the +;; request/response flow. ;; -;; WARNING: Do not print anything in the lambda of this function as it -;; reads/writes to current in/out port +;; The `else` clause at the end of the `match` expression asserts a fatal error if `indat` cannot be deserialized, indicating that the incoming +;; data is not in the expected format. ;; + (define (api:tcp-dispatch-request-make-handler dbstruct) ;; cmd run-id params) (assert *toppath* "FATAL: api:tcp-dispatch-request-make-handler called but *toppath* not set.") (if (not *server-signature*) (set! *server-signature* (tt:mk-signature *toppath*))) (lambda (indat) Index: common.scm ================================================================== --- common.scm +++ common.scm @@ -302,14 +302,11 @@ ;; transport to be used ;; http - use http-transport ;; http-read-cached - use http-transport for writes but in-mem cached for reads (rmode 'http) - (hh-dat (let ((res (or (server:choose-server *toppath* 'homehost) - (cons #f #f)))) - (assert (pair? res)(conc "FATAL: hh-dat should be a pair, got "res)) - res)) + (hh-dat (common:get-homehost)) ;; homehost record ( addr . hhflag ) (server-url #f) ;; (server:check-if-running *toppath*) #f)) (server-id #f) (server-info #f) ;; (if *toppath* (server:check-if-running *toppath*) #f)) (last-server-check 0) ;; last time we checked to see if the server was alive (connect-time (current-seconds)) ;; when we first connected @@ -1292,10 +1289,73 @@ ;;====================================================================== ;; logic for getting homehost. Returns (host . at-home) ;; IF *toppath* is not set, wait up to five seconds trying every two seconds ;; (this is to accomodate the watchdog) ;; +(define (common:get-homehost #!key (trynum 5)) + ;; called often especially at start up. use mutex to eliminate collisions + (mutex-lock! *homehost-mutex*) + (cond + (*home-host* + (mutex-unlock! *homehost-mutex*) + *home-host*) + ((not *toppath*) + (mutex-unlock! *homehost-mutex*) + (launch:setup) ;; safely mutexed now + (if (> trynum 0) + (begin + (thread-sleep! 2) + (common:get-homehost trynum: (- trynum 1))) + #f)) + (else + (let* ((currhost (get-host-name)) + (bestadrs (server:get-best-guess-address currhost)) + ;; first look in config, then look in file .homehost, create it if not found + (homehost (or (configf:lookup *configdat* "server" "homehost" ) + (handle-exceptions + exn + (if (> trynum 0) + (let ((delay-time (* (- 5 trynum) 5))) + (mutex-unlock! *homehost-mutex*) + (debug:print 0 *default-log-port* "ERROR: ["(common:human-time)"] Failed to read .homehost file, delaying " + delay-time " seconds and trying again, message: " ((condition-property-accessor 'exn 'message) exn) + ", exn=" exn) + (thread-sleep! delay-time) + (common:get-homehost trynum: (- trynum 1))) + (begin + (mutex-unlock! *homehost-mutex*) + (debug:print 0 *default-log-port* "ERROR: ["(common:human-time) + "] Failed to read .homehost file after trying five times. Giving up and exiting, message: " + ((condition-property-accessor 'exn 'message) exn)) + (exit 1))) + (let ((hhf (conc *toppath* "/.homehost"))) + (if (common:file-exists? hhf) + (with-input-from-file hhf read-line) + (if (file-write-access? *toppath*) + (begin + (debug:print 0 *default-log-port* "No .homehost file found. Setting it to the current machine") + (with-output-to-file hhf + (lambda () + (print bestadrs))) + (begin + (mutex-unlock! *homehost-mutex*) + (car (common:get-homehost)))) + #f)))))) + (at-home (or (equal? homehost currhost) + (equal? homehost bestadrs)))) + (set! *home-host* (cons homehost at-home)) + (mutex-unlock! *homehost-mutex*) + *home-host*)))) + +;;====================================================================== +;; am I on the homehost? +;; +(define (common:on-homehost?) + (let ((hh (common:get-homehost))) + (if hh + (cdr hh) + #f))) ;;====================================================================== ;; do we honor the caches of the config files? ;; (define (common:use-cache?) Index: dashboard.scm ================================================================== --- dashboard.scm +++ dashboard.scm @@ -3842,15 +3842,17 @@ (debug:print 0 *default-log-port* "Failed to find megatest.config, exiting") (exit 1) ) ) - #;(if (not (common:on-homehost?)) + (if (not (common:on-homehost?)) (begin - (debug:print 0 *default-log-port* "WARNING: You are starting the dashboard on a machine that is not the homehost:" (server:get-homehost)) + (debug:print 0 *default-log-port* "WARNING: You are starting the dashboard on a machine that is not the homehost:" (car (common:get-homehost))) (debug:print 0 *default-log-port* "It will be slower.") - )) + ) + (debug:print 0 *default-log-port* "Dashboard started on the homehost: " (car (common:get-homehost))) + ) (if (and (common:file-exists? mtdb-path) (file-write-access? mtdb-path)) (if (not (args:get-arg "-skip-version-check")) Index: megatest.scm ================================================================== --- megatest.scm +++ megatest.scm @@ -973,10 +973,15 @@ (if (args:get-arg "-server") (let* (;; (run-id (args:get-arg "-run-id")) (dbfname (args:get-arg "-db")) (tl (launch:setup)) (keys (keys:config-get-fields *configdat*))) + (if (not (common:on-homehost?)) + (begin + (debug:print 0 *default-log-port* "Attempt to start a server on a machine that is not the homehost. Aborting") + (exit + ))) (case (rmt:transport-mode) ((tcp) (let* ((timeout (server:expiration-timeout))) (debug:print 0 *default-log-port* "INFO: Starting server for " dbfname " using tcp method with server timeout of "timeout) (tt-server-timeout-param timeout) @@ -1970,10 +1975,12 @@ (args:get-arg "-run") (args:get-arg "-rerun-clean") (args:get-arg "-rerun-all") (args:get-arg "-runtests") (args:get-arg "-kill-rerun")) + (begin + (common:get-homehost) ;; set the .homehost if it's not set. (let ((need-clean (or (args:get-arg "-rerun-clean") (args:get-arg "-rerun-all"))) (orig-cmdline (string-intersperse (argv) " "))) (general-run-call "-runall" @@ -1999,11 +2006,11 @@ orig-cmdline))))) (debug:print 0 *default-log-port* "ORIG: " orig-cmdline) (debug:print 0 *default-log-port* "NEW: " newcmdline) (system newcmdline))) run-specs)) - (handle-run-requests target runname keys keyvals need-clean)))))) + (handle-run-requests target runname keys keyvals need-clean))))))) ;;====================================================================== ;; run one test ;;======================================================================